49 double* restrict
zeta1;
51 double* restrict
chi1;
67 if ( (
unsigned long ) a % 16 != 0 )
69 if ( (
unsigned long ) u % 16 == 0 ||
70 (
unsigned long ) z % 16 == 0 ||
71 (
unsigned long ) x % 16 == 0 ||
72 (
unsigned long ) w % 16 == 0 )
bl1_abort();
77 n_run = ( n -
n_pre ) / 4;
78 n_left = ( n -
n_pre ) % 4;
96 double zeta1_c = *
zeta1;
98 double chi1_c = *
chi1;
101 alpha1_c += beta_c * upsilon1_c + gamma_c * zeta1_c;
102 rho_c += alpha1_c * chi1_c;
115 b1v.
v = _mm_loaddup_pd( (
double* )beta );
116 g1v.
v = _mm_loaddup_pd( (
double* )gamma );
117 k1v.
v = _mm_loaddup_pd( (
double* )kappa );
119 rhov.
v = _mm_setzero_pd();
121 for ( i = 0; i <
n_run; ++
i )
123 u1v.
v = _mm_load_pd( (
double* )upsilon1 );
124 z1v.
v = _mm_load_pd( (
double* )zeta1 );
125 a1v.
v = _mm_load_pd( (
double* )alpha1 );
127 a1v.
v += b1v.
v * u1v.
v + g1v.
v * z1v.
v;
129 u2v.
v = _mm_load_pd( (
double* )(upsilon1 + 2) );
130 z2v.
v = _mm_load_pd( (
double* )(zeta1 + 2) );
131 a2v.
v = _mm_load_pd( (
double* )(alpha1 + 2) );
133 a2v.
v += b1v.
v * u2v.
v + g1v.
v * z2v.
v;
135 x1v.
v = _mm_load_pd( (
double* )chi1 );
136 x2v.
v = _mm_load_pd( (
double* )(chi1 + 2) );
138 w1v.
v = _mm_load_pd( (
double* )omega1 );
139 w2v.
v = _mm_load_pd( (
double* )(omega1 + 2) );
141 rhov.
v += a1v.
v * x1v.
v;
142 rhov.
v += a2v.
v * x2v.
v;
144 w1v.
v += k1v.
v * a1v.
v;
145 w2v.
v += k1v.
v * a2v.
v;
147 _mm_store_pd( (
double* )alpha1, a1v.
v );
148 _mm_store_pd( (
double* )(alpha1 + 2), a2v.
v );
150 _mm_store_pd( (
double* )omega1, w1v.
v );
151 _mm_store_pd( (
double* )(omega1 + 2), w2v.
v );
161 rho_c += rhov.
d[0] + rhov.
d[1];
172 double zeta1_c = *
zeta1;
174 double chi1_c = *
chi1;
175 double omega1_c = *
omega1;
177 alpha1_c += beta_c * upsilon1_c + gamma_c * zeta1_c;
178 rho_c += alpha1_c * chi1_c;
int n_pre
Definition: bl1_axpyv2bdotaxpy.c:207
* rho
Definition: bl1_axpyv2bdotaxpy.c:322
double kappa_c
Definition: bl1_axpyv2bdotaxpy.c:203
double beta_c
Definition: bl1_axpyv2bdotaxpy.c:201
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
upsilon1
Definition: bl1_axpyv2bdotaxpy.c:225
double d[2]
Definition: blis_type_defs.h:119
double *restrict chi1
Definition: bl1_axpyv2bdotaxpy.c:199
double alpha1_c
Definition: bl1_axpyv2b.c:144
double gamma_c
Definition: bl1_axpyv2bdotaxpy.c:202
int i
Definition: bl1_axpyv2bdotaxpy.c:205
int n_run
Definition: bl1_axpyv2bdotaxpy.c:208
double rho_c
Definition: bl1_axpyv2bdotaxpy.c:204
double *restrict zeta1
Definition: bl1_axpyv2bdotaxpy.c:195
__m128d v
Definition: blis_type_defs.h:118
Definition: blis_type_defs.h:116
int n_left
Definition: bl1_axpyv2bdotaxpy.c:209
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200
void bl1_abort(void)
Definition: bl1_abort.c:13