libflame  revision_anchor
Functions | Variables
bl1_dotaxmyv2.c File Reference

(r)

Functions

void bl1_sdotaxmyv2 (int n, float *alpha, float *beta, float *x, int inc_x, float *u, int inc_u, float *rho, float *y, int inc_y, float *z, int inc_z)
 
void bl1_ddotaxmyv2 (int n, double *alpha, double *beta, double *x, int inc_x, double *u, int inc_u, double *rho, double *y, int inc_y, double *z, int inc_z)
 
 if (inc_x !=1||inc_u !=1||inc_y !=1||inc_z !=1)
 
 for (i=0;i< n_run;++i)
 
 if (n_left > 0)
 
void bl1_cdotaxmyv2 (int n, scomplex *alpha, scomplex *beta, scomplex *x, int inc_x, scomplex *u, int inc_u, scomplex *rho, scomplex *y, int inc_y, scomplex *z, int inc_z)
 
void bl1_zdotaxmyv2 (int n, dcomplex *alpha, dcomplex *beta, dcomplex *x, int inc_x, dcomplex *u, int inc_u, dcomplex *rho, dcomplex *y, int inc_y, dcomplex *z, int inc_z)
 

Variables

double *restrict upsilon1
 
double *restrict psi1
 
double *restrict zeta1
 
double alpha_c
 
double beta_c
 
double rho_c
 
int i
 
int n_pre
 
int n_run
 
int n_left
 
rho = rho_c
 

Function Documentation

◆ bl1_cdotaxmyv2()

void bl1_cdotaxmyv2 ( int  n,
scomplex alpha,
scomplex beta,
scomplex x,
int  inc_x,
scomplex u,
int  inc_u,
scomplex rho,
scomplex y,
int  inc_y,
scomplex z,
int  inc_z 
)

References bl1_abort().

271 {
272  bl1_abort();
273 }
void bl1_abort(void)
Definition: bl1_abort.c:13

◆ bl1_ddotaxmyv2()

void bl1_ddotaxmyv2 ( int  n,
double *  alpha,
double *  beta,
double *  x,
int  inc_x,
double *  u,
int  inc_u,
double *  rho,
double *  y,
int  inc_y,
double *  z,
int  inc_z 
)

References alpha_c, beta_c, bl1_abort(), chi1, v2df_t::d, i, n_left, n_pre, n_run, psi1, rho_c, upsilon1, v2df_t::v, and zeta1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opd_var1().

43 {
44  double* restrict chi1;
45  double* restrict upsilon1;
46  double* restrict psi1;
47  double* restrict zeta1;
48  double rho_c;
49  int i;
50 
51  int n_pre;
52  int n_run;
53  int n_left;
54 
55  v2df_t a1v, b1v;
56  v2df_t rho1v;
57  v2df_t x1v, u1v, y1v, z1v;
58 
59  if ( inc_x != 1 ||
60  inc_u != 1 ||
61  inc_y != 1 ||
62  inc_z != 1 ) bl1_abort();
63 
64  n_pre = 0;
65  if ( ( unsigned long ) z % 16 != 0 )
66  {
67  if ( ( unsigned long ) x % 16 == 0 ||
68  ( unsigned long ) u % 16 == 0 ||
69  ( unsigned long ) y % 16 == 0 ) bl1_abort();
70 
71  n_pre = 1;
72  }
73 
74  n_run = ( n - n_pre ) / 2;
75  n_left = ( n - n_pre ) % 2;
76 
77  chi1 = x;
78  upsilon1 = u;
79  psi1 = y;
80  zeta1 = z;
81 
82  rho_c = 0.0;
83 
84  if ( n_pre == 1 )
85  {
86  double alpha_c = *alpha;
87  double beta_c = *beta;
88  double chi1_c = *chi1;
89  double upsilon_c = *upsilon1;
90 
91  rho_c += chi1_c * upsilon_c;
92  *psi1 -= alpha_c * chi1_c;
93  *zeta1 -= beta_c * chi1_c;
94 
95  chi1 += inc_x;
96  upsilon1 += inc_u;
97  psi1 += inc_y;
98  zeta1 += inc_z;
99  }
100 
101  a1v.v = _mm_loaddup_pd( ( double* )alpha );
102  b1v.v = _mm_loaddup_pd( ( double* )beta );
103 
104  rho1v.v = _mm_setzero_pd();
105 
106  for ( i = 0; i < n_run; ++i )
107  {
108  x1v.v = _mm_load_pd( ( double* )chi1 );
109  u1v.v = _mm_load_pd( ( double* )upsilon1 );
110  y1v.v = _mm_load_pd( ( double* )psi1 );
111  z1v.v = _mm_load_pd( ( double* )zeta1 );
112 
113  rho1v.v += x1v.v * u1v.v;
114  y1v.v -= a1v.v * x1v.v;
115  z1v.v -= b1v.v * x1v.v;
116 
117  _mm_store_pd( ( double* )psi1, y1v.v );
118  _mm_store_pd( ( double* )zeta1, z1v.v );
119 
120  chi1 += 2;
121  upsilon1 += 2;
122  psi1 += 2;
123  zeta1 += 2;
124  }
125 
126  rho_c += rho1v.d[0] + rho1v.d[1];
127 
128  if ( n_left > 0 )
129  {
130  double alpha_c = *alpha;
131  double beta_c = *beta;
132 
133  for( i = 0; i < n_left; ++i )
134  {
135  double chi1_c = *chi1;
136  double upsilon_c = *upsilon1;
137 
138  rho_c += chi1_c * upsilon_c;
139  *psi1 -= alpha_c * chi1_c;
140  *zeta1 -= beta_c * chi1_c;
141 
142  chi1 += inc_x;
143  upsilon1 += inc_u;
144  psi1 += inc_y;
145  zeta1 += inc_z;
146  }
147  }
148 
149  *rho = rho_c;
150 }
* rho
Definition: bl1_dotaxmyv2.c:258
double *restrict psi1
Definition: bl1_dotaxmyv2.c:155
chi1
Definition: bl1_axmyv2.c:366
double *restrict upsilon1
Definition: bl1_dotaxmyv2.c:152
double d[2]
Definition: blis_type_defs.h:119
double *restrict zeta1
Definition: bl1_dotaxmyv2.c:156
double beta_c
Definition: bl1_dotaxmyv2.c:158
int n_pre
Definition: bl1_dotaxmyv2.c:162
int n_left
Definition: bl1_dotaxmyv2.c:164
double alpha_c
Definition: bl1_dotaxmyv2.c:157
__m128d v
Definition: blis_type_defs.h:118
Definition: blis_type_defs.h:116
double rho_c
Definition: bl1_dotaxmyv2.c:159
int i
Definition: bl1_dotaxmyv2.c:160
void bl1_abort(void)
Definition: bl1_abort.c:13
int n_run
Definition: bl1_dotaxmyv2.c:163

◆ bl1_sdotaxmyv2()

void bl1_sdotaxmyv2 ( int  n,
float *  alpha,
float *  beta,
float *  x,
int  inc_x,
float *  u,
int  inc_u,
float *  rho,
float *  y,
int  inc_y,
float *  z,
int  inc_z 
)

References bl1_abort().

29 {
30  bl1_abort();
31 }
void bl1_abort(void)
Definition: bl1_abort.c:13

◆ bl1_zdotaxmyv2()

void bl1_zdotaxmyv2 ( int  n,
dcomplex alpha,
dcomplex beta,
dcomplex x,
int  inc_x,
dcomplex u,
int  inc_u,
dcomplex rho,
dcomplex y,
int  inc_y,
dcomplex z,
int  inc_z 
)

References alpha_c, beta_c, bl1_abort(), chi1, v2df_t::d, i, dcomplex::imag, psi1, dcomplex::real, rho_c, upsilon1, v2df_t::v, and zeta1.

285 {
286  dcomplex* restrict chi1;
287  dcomplex* restrict upsilon1;
288  dcomplex* restrict psi1;
289  dcomplex* restrict zeta1;
290  int i;
291 
292  v2df_t alpha11v, alpha12v;
293  v2df_t beta11v, beta12v;
294  v2df_t rho1v;
295  v2df_t x1v, x1rv;
296  v2df_t y1v;
297  v2df_t z1v;
298  v2df_t u11v, u12v;
299  v2df_t acad, bdbc;
300  v2df_t bcac, adbd;
301 
302  if ( inc_x != 1 ||
303  inc_u != 1 ||
304  inc_y != 1 ||
305  inc_z != 1 ) bl1_abort();
306 
307  chi1 = x;
308  upsilon1 = u;
309  psi1 = y;
310  zeta1 = z;
311 
312  //rho_c.real = 0.0;
313  //rho_c.imag = 0.0;
314  rho1v.v = _mm_setzero_pd();
315 
316  //alpha_c = *alpha;
317  //beta_c = *beta;
318  alpha11v.v = _mm_loaddup_pd( ( double* )&(alpha->real) );
319  alpha12v.v = _mm_loaddup_pd( ( double* )&(alpha->imag) );
320  beta11v.v = _mm_loaddup_pd( ( double* )&(beta->real) );
321  beta12v.v = _mm_loaddup_pd( ( double* )&(beta->imag) );
322 
323  for ( i = 0; i < n; ++i )
324  {
325  //dcomplex chi1_c = *chi1;
326  x1v.v = _mm_load_pd( ( double* )chi1 );
327 
328  //psi1->real -= alpha_c.real * chi1_c.real - alpha_c.imag * chi1_c.imag;
329  //psi1->imag -= alpha_c.real * chi1_c.imag + alpha_c.imag * chi1_c.real;
330  x1rv.v = _mm_shuffle_pd( x1v.v, x1v.v, _MM_SHUFFLE2 (0,1) );
331  acad.v = alpha11v.v * x1v.v;
332  bdbc.v = alpha12v.v * x1rv.v;
333  y1v.v = _mm_load_pd( ( double* )psi1 );
334  y1v.v = y1v.v - _mm_addsub_pd( acad.v, bdbc.v );
335  _mm_store_pd( ( double* )psi1, y1v.v );
336 
337  //zeta1->real -= beta_c.real * chi1_c.real - beta_c.imag * chi1_c.imag;
338  //zeta1->imag -= beta_c.real * chi1_c.imag + beta_c.imag * chi1_c.real;
339  x1rv.v = _mm_shuffle_pd( x1v.v, x1v.v, _MM_SHUFFLE2 (0,1) );
340  acad.v = beta11v.v * x1v.v;
341  bdbc.v = beta12v.v * x1rv.v;
342  z1v.v = _mm_load_pd( ( double* )zeta1 );
343  z1v.v = z1v.v - _mm_addsub_pd( acad.v, bdbc.v );
344  _mm_store_pd( ( double* )zeta1, z1v.v );
345 
346  //rho_c.real = chi1_c.real * upsilon1_c.real - -chi1_c.imag * upsilon1_c.imag;
347  //rho_c.imag = chi1_c.real * upsilon1_c.imag + -chi1_c.imag * upsilon1_c.real;
348  x1rv.v = _mm_shuffle_pd( x1v.v, x1v.v, _MM_SHUFFLE2 (0,1) );
349  u11v.v = _mm_loaddup_pd( ( double* )&(upsilon1->real) );
350  u12v.v = _mm_loaddup_pd( ( double* )&(upsilon1->imag) );
351  bcac.v = x1rv.v * u11v.v;
352  adbd.v = x1v.v * u12v.v;
353  rho1v.v = rho1v.v + _mm_addsub_pd( bcac.v, adbd.v );
354 
355  chi1 += 1;
356  upsilon1 += 1;
357  psi1 += 1;
358  zeta1 += 1;
359  }
360 
361  rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) );
362 
363  rho1v.d[1] = -rho1v.d[1];
364 
365  _mm_store_pd( ( double* )rho, rho1v.v );
366 }
double *restrict psi1
Definition: bl1_dotaxmyv2.c:155
chi1
Definition: bl1_axmyv2.c:366
double *restrict upsilon1
Definition: bl1_dotaxmyv2.c:152
double imag
Definition: blis_type_defs.h:139
double d[2]
Definition: blis_type_defs.h:119
double *restrict zeta1
Definition: bl1_dotaxmyv2.c:156
double real
Definition: blis_type_defs.h:139
__m128d v
Definition: blis_type_defs.h:118
Definition: blis_type_defs.h:116
int i
Definition: bl1_dotaxmyv2.c:160
void bl1_abort(void)
Definition: bl1_abort.c:13
Definition: blis_type_defs.h:137

◆ for()

for ( )

References chi1, psi1, upsilon1, and zeta1.

210  {
211  double chi1_c = *chi1;
212  double chi2_c = *(chi1 + 1);
213  double upsilon1_c = *upsilon1;
214  double upsilon2_c = *(upsilon1 + 1);
215  double psi1_c = *psi1;
216  double psi2_c = *(psi1 + 1);
217  double zeta1_c = *zeta1;
218  double zeta2_c = *(zeta1 + 1);
219 
220  rho_c += chi1_c * upsilon1_c;
221  rho_c += chi2_c * upsilon2_c;
222 
223  psi1_c -= alpha_c * chi1_c;
224  psi2_c -= alpha_c * chi2_c;
225 
226  zeta1_c -= beta_c * chi1_c;
227  zeta2_c -= beta_c * chi2_c;
228 
229  *psi1 = psi1_c;
230  *(psi1 + 1) = psi2_c;
231  *zeta1 = zeta1_c;
232  *(zeta1 + 1) = zeta2_c;
233 
234  chi1 += 2*inc_x;
235  upsilon1 += 2*inc_u;
236  psi1 += 2*inc_y;
237  zeta1 += 2*inc_z;
238  }
double *restrict psi1
Definition: bl1_dotaxmyv2.c:155
chi1
Definition: bl1_axmyv2.c:366
double *restrict upsilon1
Definition: bl1_dotaxmyv2.c:152
double *restrict zeta1
Definition: bl1_dotaxmyv2.c:156
double beta_c
Definition: bl1_dotaxmyv2.c:158
double alpha_c
Definition: bl1_dotaxmyv2.c:157
double rho_c
Definition: bl1_dotaxmyv2.c:159

◆ if() [1/2]

if ( inc_x !  = 1 || inc_u != 1 || inc_y != 1 || inc_z != 1)
195  {
196  double chi1_c = *chi1;
197  double upsilon_c = *upsilon1;
198 
199  rho_c += chi1_c * upsilon_c;
200  *psi1 -= alpha_c * chi1_c;
201  *zeta1 -= beta_c * chi1_c;
202 
203  chi1 += inc_x;
204  upsilon1 += inc_u;
205  psi1 += inc_y;
206  zeta1 += inc_z;
207  }
double *restrict psi1
Definition: bl1_dotaxmyv2.c:155
chi1
Definition: bl1_axmyv2.c:366
double *restrict upsilon1
Definition: bl1_dotaxmyv2.c:152
double *restrict zeta1
Definition: bl1_dotaxmyv2.c:156
double beta_c
Definition: bl1_dotaxmyv2.c:158
double alpha_c
Definition: bl1_dotaxmyv2.c:157
double rho_c
Definition: bl1_dotaxmyv2.c:159

◆ if() [2/2]

if ( n_left  ,
 
)

References chi1, i, n_left, and upsilon1.

241  {
242  for( i = 0; i < n_left; ++i )
243  {
244  double chi1_c = *chi1;
245  double upsilon_c = *upsilon1;
246 
247  rho_c += chi1_c * upsilon_c;
248  *psi1 -= alpha_c * chi1_c;
249  *zeta1 -= beta_c * chi1_c;
250 
251  chi1 += inc_x;
252  upsilon1 += inc_u;
253  psi1 += inc_y;
254  zeta1 += inc_z;
255  }
256  }
double *restrict psi1
Definition: bl1_dotaxmyv2.c:155
chi1
Definition: bl1_axmyv2.c:366
double *restrict upsilon1
Definition: bl1_dotaxmyv2.c:152
double *restrict zeta1
Definition: bl1_dotaxmyv2.c:156
double beta_c
Definition: bl1_dotaxmyv2.c:158
int n_left
Definition: bl1_dotaxmyv2.c:164
double alpha_c
Definition: bl1_dotaxmyv2.c:157
double rho_c
Definition: bl1_dotaxmyv2.c:159
int i
Definition: bl1_dotaxmyv2.c:160

Variable Documentation

◆ alpha_c

dcomplex alpha_c

Referenced by bl1_ddotaxmyv2(), and bl1_zdotaxmyv2().

◆ beta_c

dcomplex beta_c

Referenced by bl1_ddotaxmyv2(), and bl1_zdotaxmyv2().

◆ i

int i

Referenced by bl1_ddotaxmyv2(), bl1_zdotaxmyv2(), and if().

◆ n_left

int n_left

Referenced by bl1_ddotaxmyv2(), and if().

◆ n_pre

int n_pre

Referenced by bl1_ddotaxmyv2().

◆ n_run

int n_run

Referenced by bl1_ddotaxmyv2().

◆ psi1

dcomplex* restrict psi1

Referenced by bl1_ddotaxmyv2(), bl1_zdotaxmyv2(), and for().

◆ rho

* rho = rho_c

◆ rho_c

dcomplex rho_c

Referenced by bl1_ddotaxmyv2(), and bl1_zdotaxmyv2().

◆ upsilon1

dcomplex* restrict upsilon1
Initial value:
{
double* restrict chi1
chi1
Definition: bl1_axmyv2.c:366

Referenced by bl1_ddotaxmyv2(), bl1_zdotaxmyv2(), for(), and if().

◆ zeta1

dcomplex* restrict zeta1

Referenced by bl1_ddotaxmyv2(), bl1_zdotaxmyv2(), and for().