49 double* restrict
chi1;
61 v2df_t a11v, a12v, x1v, w1v;
62 v2df_t a21v, a22v, x2v, w2v;
70 if ( (
unsigned long ) a1 % 16 != 0 )
72 if ( (
unsigned long ) a2 % 16 == 0 ||
73 (
unsigned long ) x % 16 == 0 ||
74 (
unsigned long ) w % 16 == 0 )
bl1_abort();
79 n_run = ( n -
n_pre ) / 4;
80 n_left = ( n -
n_pre ) % 4;
96 double chi1_c = *
chi1;
99 rho1_c += alpha1_c * chi1_c;
102 rho2_c += alpha2_c * chi1_c;
113 rho1v.
v = _mm_setzero_pd();
114 rho2v.
v = _mm_setzero_pd();
116 k1v.
v = _mm_loaddup_pd( (
double* )kappa1 );
117 k2v.
v = _mm_loaddup_pd( (
double* )kappa2 );
119 for ( i = 0; i <
n_run; ++
i )
121 a11v.
v = _mm_load_pd( (
double* )alpha1 );
122 a12v.
v = _mm_load_pd( (
double* )alpha2 );
123 x1v.
v = _mm_load_pd( (
double* )chi1 );
124 w1v.
v = _mm_load_pd( (
double* )omega1 );
126 rho1v.
v += a11v.
v * x1v.
v;
127 w1v.
v += k1v.
v * a11v.
v;
129 rho2v.
v += a12v.
v * x1v.
v;
130 w1v.
v += k2v.
v * a12v.
v;
132 _mm_store_pd( (
double* )omega1, w1v.
v );
134 a21v.
v = _mm_load_pd( (
double* )(alpha1 + 2) );
135 a22v.
v = _mm_load_pd( (
double* )(alpha2 + 2) );
136 x2v.
v = _mm_load_pd( (
double* )(chi1 + 2) );
137 w2v.
v = _mm_load_pd( (
double* )(omega1 + 2) );
139 rho1v.
v += a21v.
v * x2v.
v;
140 w2v.
v += k1v.
v * a21v.
v;
142 rho2v.
v += a22v.
v * x2v.
v;
143 w2v.
v += k2v.
v * a22v.
v;
145 _mm_store_pd( (
double* )(omega1 + 2), w2v.
v );
161 double chi1_c = *
chi1;
162 double omega1_c = *
omega1;
164 rho1_c += alpha1_c * chi1_c;
167 rho2_c += alpha2_c * chi1_c;
179 rho1_c += rho1v.
d[0] + rho1v.
d[1];
180 rho2_c += rho2v.
d[0] + rho2v.
d[1];
double rho2_c
Definition: bl1_dotv2axpyv2b.c:194
double *restrict chi1
Definition: bl1_dotv2axpyv2b.c:189
double d[2]
Definition: blis_type_defs.h:119
* rho1
Definition: bl1_dotv2axpyv2b.c:311
double alpha1_c
Definition: bl1_axpyv2b.c:144
int n_run
Definition: bl1_dotv2axpyv2b.c:198
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
int n_pre
Definition: bl1_dotv2axpyv2b.c:197
double rho1_c
Definition: bl1_dotv2axpyv2b.c:193
double kappa1_c
Definition: bl1_dotv2axpyv2b.c:191
double alpha2_c
Definition: bl1_axpyv2b.c:145
__m128d v
Definition: blis_type_defs.h:118
double *restrict omega1
Definition: bl1_dotv2axpyv2b.c:190
double kappa2_c
Definition: bl1_dotv2axpyv2b.c:192
Definition: blis_type_defs.h:116
int i
Definition: bl1_dotv2axpyv2b.c:195
int n_left
Definition: bl1_dotv2axpyv2b.c:199
* rho2
Definition: bl1_dotv2axpyv2b.c:312
alpha1
Definition: bl1_dotv2axpyv2b.c:456
void bl1_abort(void)
Definition: bl1_abort.c:13