40 double* restrict
chi1;
58 if ( (
unsigned long ) a % 16 != 0 )
60 if ( (
unsigned long ) x % 16 == 0 ||
61 (
unsigned long ) w % 16 == 0 )
bl1_abort();
66 n_run = ( n -
n_pre ) / 4;
67 n_left = ( n -
n_pre ) % 4;
79 double chi1_c = *
chi1;
82 rho_c += alpha1_c * chi1_c;
92 rho1v.
v = _mm_setzero_pd();
94 k1v.
v = _mm_loaddup_pd( (
double* )kappa );
96 for ( i = 0; i <
n_run; ++
i )
98 a1v.
v = _mm_load_pd( (
double* )alpha1 );
99 x1v.
v = _mm_load_pd( (
double* )chi1 );
100 w1v.
v = _mm_load_pd( (
double* )omega1 );
102 a2v.
v = _mm_load_pd( (
double* )(alpha1 + 2) );
103 x2v.
v = _mm_load_pd( (
double* )(chi1 + 2) );
104 w2v.
v = _mm_load_pd( (
double* )(omega1 + 2) );
106 rho1v.
v += a1v.
v * x1v.
v;
107 w1v.
v += k1v.
v * a1v.
v;
109 _mm_store_pd( (
double* )omega1, w1v.
v );
111 rho1v.
v += a2v.
v * x2v.
v;
112 w2v.
v += k1v.
v * a2v.
v;
114 _mm_store_pd( (
double* )(omega1 + 2), w2v.
v );
127 double chi1_c = *
chi1;
128 double omega1_c = *
omega1;
130 rho_c += alpha1_c * chi1_c;
141 rho_c += rho1v.
d[0] + rho1v.
d[1];
* rho
Definition: bl1_dotaxpy.c:242
int i
Definition: bl1_dotaxpy.c:152
double d[2]
Definition: blis_type_defs.h:119
int n_pre
Definition: bl1_dotaxpy.c:154
double *restrict chi1
Definition: bl1_dotaxpy.c:146
double alpha1_c
Definition: bl1_axpyv2b.c:144
alpha1
Definition: bl1_dotaxpy.c:338
__m128d v
Definition: blis_type_defs.h:118
int n_left
Definition: bl1_dotaxpy.c:156
Definition: blis_type_defs.h:116
double rho_c
Definition: bl1_dotaxpy.c:151
double *restrict omega1
Definition: bl1_dotaxpy.c:149
double kappa_c
Definition: bl1_dotaxpy.c:150
int n_run
Definition: bl1_dotaxpy.c:155
void bl1_abort(void)
Definition: bl1_abort.c:13