44 double* restrict
chi1;
45 double* restrict
chi2;
46 double* restrict chi3;
47 double* restrict
psi1;
66 if ( (
unsigned long ) y % 16 != 0 )
68 if ( (
unsigned long )
x1 % 16 == 0 ||
69 (
unsigned long ) x2 % 16 == 0 ||
70 (
unsigned long ) x3 % 16 == 0 )
bl1_abort();
75 n_run = ( n -
n_pre ) / 4;
76 n_left = ( n -
n_pre ) % 4;
87 double alpha3_c = *alpha3;
88 double chi11_c = *
chi1;
89 double chi12_c = *
chi2;
90 double chi13_c = *chi3;
92 *psi1 += alpha1_c * chi11_c + alpha2_c * chi12_c + alpha3_c * chi13_c;
100 a1v.
v = _mm_loaddup_pd( (
double* )
alpha1 );
101 a2v.
v = _mm_loaddup_pd( (
double* )
alpha2 );
102 a3v.
v = _mm_loaddup_pd( (
double* )alpha3 );
104 for ( i = 0; i <
n_run; ++
i )
106 x11v.
v = _mm_load_pd( (
double* )chi1 );
107 x12v.
v = _mm_load_pd( (
double* )chi2 );
108 x13v.
v = _mm_load_pd( (
double* )chi3 );
109 y1v.
v = _mm_load_pd( (
double* )psi1 );
111 y1v.
v += a1v.
v * x11v.
v + a2v.
v * x12v.
v + a3v.
v * x13v.
v;
113 _mm_store_pd( (
double* )psi1, y1v.
v );
115 x21v.
v = _mm_load_pd( (
double* )(chi1 + 2) );
116 x22v.
v = _mm_load_pd( (
double* )(chi2 + 2) );
117 x23v.
v = _mm_load_pd( (
double* )(chi3 + 2) );
118 y2v.
v = _mm_load_pd( (
double* )(psi1 + 2) );
120 y2v.
v += a1v.
v * x21v.
v + a2v.
v * x22v.
v + a3v.
v * x23v.
v;
122 _mm_store_pd( (
double* )(psi1 + 2), y2v.
v );
134 double alpha3_c = *alpha3;
138 double chi11_c = *
chi1;
139 double chi12_c = *
chi2;
140 double chi13_c = *chi3;
142 *psi1 += alpha1_c * chi11_c + alpha2_c * chi12_c + alpha3_c * chi13_c;
double *restrict psi1
Definition: bl1_axpyv3b.c:155
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
chi1
Definition: bl1_axpyv3b.c:168
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
double *restrict chi2
Definition: bl1_axpyv3b.c:152
__m128d v
Definition: blis_type_defs.h:118
Definition: blis_type_defs.h:116
double alpha2_c
Definition: bl1_axpyv3b.c:157
int n_run
Definition: bl1_axpyv3b.c:162
x1
Definition: bl1_dotsv2.c:374
int i
Definition: bl1_axpyv3b.c:160
double alpha1_c
Definition: bl1_axpyv3b.c:156
void bl1_abort(void)
Definition: bl1_abort.c:13
int n_pre
Definition: bl1_axmyv2.c:147
int n_left
Definition: bl1_axpyv3b.c:163