44 double* restrict
chi1;
46 double* restrict
psi1;
47 double* restrict
zeta1;
65 if ( (
unsigned long ) z % 16 != 0 )
67 if ( (
unsigned long ) x % 16 == 0 ||
68 (
unsigned long ) u % 16 == 0 ||
69 (
unsigned long ) y % 16 == 0 )
bl1_abort();
74 n_run = ( n -
n_pre ) / 2;
75 n_left = ( n -
n_pre ) % 2;
88 double chi1_c = *
chi1;
91 rho_c += chi1_c * upsilon_c;
92 *psi1 -= alpha_c * chi1_c;
93 *zeta1 -= beta_c * chi1_c;
101 a1v.
v = _mm_loaddup_pd( (
double* )alpha );
102 b1v.
v = _mm_loaddup_pd( (
double* )beta );
104 rho1v.
v = _mm_setzero_pd();
106 for ( i = 0; i <
n_run; ++
i )
108 x1v.
v = _mm_load_pd( (
double* )chi1 );
109 u1v.
v = _mm_load_pd( (
double* )upsilon1 );
110 y1v.
v = _mm_load_pd( (
double* )psi1 );
111 z1v.
v = _mm_load_pd( (
double* )zeta1 );
113 rho1v.
v += x1v.
v * u1v.
v;
114 y1v.
v -= a1v.
v * x1v.
v;
115 z1v.
v -= b1v.
v * x1v.
v;
117 _mm_store_pd( (
double* )psi1, y1v.
v );
118 _mm_store_pd( (
double* )zeta1, z1v.
v );
126 rho_c += rho1v.
d[0] + rho1v.
d[1];
135 double chi1_c = *
chi1;
138 rho_c += chi1_c * upsilon_c;
139 *psi1 -= alpha_c * chi1_c;
140 *zeta1 -= beta_c * chi1_c;
* rho
Definition: bl1_dotaxmyv2.c:258
double *restrict psi1
Definition: bl1_dotaxmyv2.c:155
chi1
Definition: bl1_axmyv2.c:366
double *restrict upsilon1
Definition: bl1_dotaxmyv2.c:152
double d[2]
Definition: blis_type_defs.h:119
double *restrict zeta1
Definition: bl1_dotaxmyv2.c:156
double beta_c
Definition: bl1_dotaxmyv2.c:158
int n_pre
Definition: bl1_dotaxmyv2.c:162
int n_left
Definition: bl1_dotaxmyv2.c:164
double alpha_c
Definition: bl1_dotaxmyv2.c:157
__m128d v
Definition: blis_type_defs.h:118
Definition: blis_type_defs.h:116
double rho_c
Definition: bl1_dotaxmyv2.c:159
int i
Definition: bl1_dotaxmyv2.c:160
void bl1_abort(void)
Definition: bl1_abort.c:13
int n_run
Definition: bl1_dotaxmyv2.c:163