libflame  revision_anchor
Functions
FLA_Fused_Ahx_Axpy_Ax_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1 (FLA_Obj A, FLA_Obj u, FLA_Obj tau, FLA_Obj a, FLA_Obj beta, FLA_Obj y, FLA_Obj w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1 (int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1 (int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
 

Function Documentation

◆ FLA_Fused_Ahx_Axpy_Ax_opc_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_tau,
scomplex buff_beta,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_u,
int  inc_u,
scomplex buff_a,
int  inc_a,
scomplex buff_y,
int  inc_y,
scomplex buff_w,
int  inc_w 
)

References alpha1, bl1_caxpyv(), bl1_cdots(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

331 {
332  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
333  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
334  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
335  scomplex minus_inv_tau;
336  scomplex conj_psi1;
337  scomplex conj_alpha1;
338  int i;
339 
340  bl1_csetv( m_A,
341  buff_0,
342  buff_w, inc_w );
343 
344  bl1_cdiv3( buff_m1, buff_tau, &minus_inv_tau );
345 
346  for ( i = 0; i < n_A; ++i )
347  {
348  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
349  scomplex* psi1 = buff_y + (i )*inc_y;
350  scomplex* alpha1 = buff_a + (i )*inc_a;
351  scomplex* u = buff_u;
352  scomplex* w = buff_w;
353 
354  /*------------------------------------------------------------*/
355 
357  m_A,
358  buff_1,
359  a1, rs_A,
360  u, inc_u,
361  buff_beta,
362  psi1 );
363 
364  bl1_ccopyconj( psi1, &conj_psi1 );
365  bl1_cmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
366 
367  bl1_ccopyconj( alpha1, &conj_alpha1 );
368 
370  m_A,
371  &conj_alpha1,
372  a1, rs_A,
373  w, inc_w );
374 /*
375  F77_caxpy( &m_A,
376  &conj_alpha1,
377  a1, &rs_A,
378  w, &inc_w );
379 */
380 
381  /*------------------------------------------------------------*/
382 
383  }
384 
385  return FLA_SUCCESS;
386 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition: bl1_dots.c:39
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Fused_Ahx_Axpy_Ax_opd_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1 ( int  m_A,
int  n_A,
double *  buff_tau,
double *  buff_beta,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_u,
int  inc_u,
double *  buff_a,
int  inc_a,
double *  buff_y,
int  inc_y,
double *  buff_w,
int  inc_w 
)

References alpha1, alpha2, bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotsv2(), bl1_dm1(), bl1_dsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, and rho1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

216 {
217  double zero = bl1_d0();
218  double minus_one = bl1_dm1();
219  double* restrict u = buff_u;
220  double* restrict w = buff_w;
221  double* restrict beta = buff_beta;
222  double* restrict a1;
223  double* restrict a2;
224  double* restrict psi1;
225  double* restrict psi2;
226  double* restrict alpha1;
227  double* restrict alpha2;
228 
229  double minus_inv_tau;
230  int i;
231 
232  int n_run = n_A / 2;
233  int n_left = n_A % 2;
234  int stepcs_A = 2*cs_A;
235  int stepinc_y = 2*inc_y;
236  int stepinc_a = 2*inc_a;
237 
238 
239  bl1_dsetv( m_A,
240  &zero,
241  buff_w, inc_w );
242 
243  bl1_ddiv3( &minus_one, buff_tau, &minus_inv_tau );
244 
245  a1 = buff_A;
246  a2 = buff_A + cs_A;
247  psi1 = buff_y;
248  psi2 = buff_y + inc_y;
249  alpha1 = buff_a;
250  alpha2 = buff_a + inc_a;
251 
252  for ( i = 0; i < n_run; ++i )
253  {
254 /*
255  Effective computation:
256  y = beta * y + A' * u;
257  a = a - conj(y) / tau;
258  w = A * conj(a);
259 */
260  /*------------------------------------------------------------*/
261 
263  m_A,
264  a1, rs_A,
265  a2, rs_A,
266  u, inc_u,
267  beta,
268  psi1,
269  psi2 );
270 
271  bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );
272  bl1_dmult4( &minus_inv_tau, psi2, alpha2, alpha2 );
273 
274  bl1_daxpyv2b( m_A,
275  alpha1,
276  alpha2,
277  a1, rs_A,
278  a2, rs_A,
279  w, inc_w );
280 
281  /*------------------------------------------------------------*/
282 
283  a1 += stepcs_A;
284  a2 += stepcs_A;
285  psi1 += stepinc_y;
286  psi2 += stepinc_y;
287  alpha1 += stepinc_a;
288  alpha2 += stepinc_a;
289  }
290 
291  if ( n_left == 1 )
292  //for ( i = 0; i < n_left; ++i )
293  {
294  double rho1;
295 
297  m_A,
298  a1, rs_A,
299  u, inc_u,
300  &rho1 );
301  bl1_dscals( buff_beta, psi1 );
302  bl1_dadd3( psi1, &rho1, psi1 );
303 
304  bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );
305 
307  m_A,
308  alpha1,
309  a1, rs_A,
310  w, inc_w );
311 
312  //a1 += cs_A;
313  //psi1 += inc_y;
314  //alpha1 += inc_a;
315  }
316 
317  return FLA_SUCCESS;
318 }
double rho1
Definition: bl1_dotsv2.c:149
double *restrict psi1
Definition: bl1_axmyv2.c:139
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition: bl1_dotsv2.c:35
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition: bl1_axpyv2b.c:31
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
Definition: blis_type_defs.h:82
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
int n_left
Definition: bl1_axmyv2.c:149
double bl1_dm1(void)
Definition: bl1_constants.c:182
int i
Definition: bl1_axmyv2.c:145
int n_run
Definition: bl1_axmyv2.c:148
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21

◆ FLA_Fused_Ahx_Axpy_Ax_ops_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1 ( int  m_A,
int  n_A,
float *  buff_tau,
float *  buff_beta,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_u,
int  inc_u,
float *  buff_a,
int  inc_a,
float *  buff_y,
int  inc_y,
float *  buff_w,
int  inc_w 
)

References alpha1, bl1_saxpyv(), bl1_sdots(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

152 {
153  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
154  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
155  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
156  float minus_inv_tau;
157  int i;
158 
159  bl1_ssetv( m_A,
160  buff_0,
161  buff_w, inc_w );
162 
163  minus_inv_tau = *buff_m1 / *buff_tau;
164 
165  for ( i = 0; i < n_A; ++i )
166  {
167  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
168  float* psi1 = buff_y + (i )*inc_y;
169  float* alpha1 = buff_a + (i )*inc_a;
170  float* u = buff_u;
171  float* w = buff_w;
172 
173  /*------------------------------------------------------------*/
174 
176  m_A,
177  buff_1,
178  a1, rs_A,
179  u, inc_u,
180  buff_beta,
181  psi1 );
182 
183  // bl1_dmult4( &minus_inv_tau, conj_psi1, alpha1, alpha1 );
184  *alpha1 = *alpha1 + minus_inv_tau * *psi1;
185 
187  m_A,
188  alpha1,
189  a1, rs_A,
190  w, inc_w );
191 /*
192  F77_saxpy( &m_A,
193  alpha1,
194  a1, &rs_A,
195  w, &inc_w );
196 */
197 
198  /*------------------------------------------------------------*/
199 
200  }
201 
202  return FLA_SUCCESS;
203 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition: bl1_dots.c:13
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Fused_Ahx_Axpy_Ax_opt_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1 ( FLA_Obj  A,
FLA_Obj  u,
FLA_Obj  tau,
FLA_Obj  a,
FLA_Obj  beta,
FLA_Obj  y,
FLA_Obj  w 
)

References FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

14 {
15 /*
16  Effective computation:
17  y = beta * y + A' * u;
18  a = a - conj(y) / tau;
19  w = A * conj(a);
20 */
21  FLA_Datatype datatype;
22  int m_A, n_A;
23  int rs_A, cs_A;
24  int inc_u, inc_a, inc_y, inc_w;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30 
31  rs_A = FLA_Obj_row_stride( A );
32  cs_A = FLA_Obj_col_stride( A );
33 
34  inc_u = FLA_Obj_vector_inc( u );
35 
36  inc_a = FLA_Obj_vector_inc( a );
37 
38  inc_y = FLA_Obj_vector_inc( y );
39 
40  inc_w = FLA_Obj_vector_inc( w );
41 
42 
43  switch ( datatype )
44  {
45  case FLA_FLOAT:
46  {
47  float* buff_A = FLA_FLOAT_PTR( A );
48  float* buff_u = FLA_FLOAT_PTR( u );
49  float* buff_a = FLA_FLOAT_PTR( a );
50  float* buff_y = FLA_FLOAT_PTR( y );
51  float* buff_w = FLA_FLOAT_PTR( w );
52  float* buff_tau = FLA_FLOAT_PTR( tau );
53  float* buff_beta = FLA_FLOAT_PTR( beta );
54 
56  n_A,
57  buff_tau,
58  buff_beta,
59  buff_A, rs_A, cs_A,
60  buff_u, inc_u,
61  buff_a, inc_a,
62  buff_y, inc_y,
63  buff_w, inc_w );
64 
65  break;
66  }
67 
68  case FLA_DOUBLE:
69  {
70  double* buff_A = FLA_DOUBLE_PTR( A );
71  double* buff_u = FLA_DOUBLE_PTR( u );
72  double* buff_a = FLA_DOUBLE_PTR( a );
73  double* buff_y = FLA_DOUBLE_PTR( y );
74  double* buff_w = FLA_DOUBLE_PTR( w );
75  double* buff_tau = FLA_DOUBLE_PTR( tau );
76  double* buff_beta = FLA_DOUBLE_PTR( beta );
77 
79  n_A,
80  buff_tau,
81  buff_beta,
82  buff_A, rs_A, cs_A,
83  buff_u, inc_u,
84  buff_a, inc_a,
85  buff_y, inc_y,
86  buff_w, inc_w );
87 
88  break;
89  }
90 
91  case FLA_COMPLEX:
92  {
93  scomplex* buff_A = FLA_COMPLEX_PTR( A );
94  scomplex* buff_u = FLA_COMPLEX_PTR( u );
95  scomplex* buff_a = FLA_COMPLEX_PTR( a );
96  scomplex* buff_y = FLA_COMPLEX_PTR( y );
97  scomplex* buff_w = FLA_COMPLEX_PTR( w );
98  scomplex* buff_tau = FLA_COMPLEX_PTR( tau );
99  scomplex* buff_beta = FLA_COMPLEX_PTR( beta );
100 
102  n_A,
103  buff_tau,
104  buff_beta,
105  buff_A, rs_A, cs_A,
106  buff_u, inc_u,
107  buff_a, inc_a,
108  buff_y, inc_y,
109  buff_w, inc_w );
110 
111  break;
112  }
113 
114  case FLA_DOUBLE_COMPLEX:
115  {
116  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
117  dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
118  dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a );
119  dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y );
120  dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
121  dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau );
122  dcomplex* buff_beta = FLA_DOUBLE_COMPLEX_PTR( beta );
123 
125  n_A,
126  buff_tau,
127  buff_beta,
128  buff_A, rs_A, cs_A,
129  buff_u, inc_u,
130  buff_a, inc_a,
131  buff_y, inc_y,
132  buff_w, inc_w );
133 
134  break;
135  }
136  }
137 
138  return FLA_SUCCESS;
139 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
Definition: blis_type_defs.h:132
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Fused_Ahx_Axpy_Ax_opz_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_tau,
dcomplex buff_beta,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_u,
int  inc_u,
dcomplex buff_a,
int  inc_a,
dcomplex buff_y,
int  inc_y,
dcomplex buff_w,
int  inc_w 
)

References alpha1, alpha2, bl1_z0(), bl1_zaxpyv(), bl1_zaxpyv2b(), bl1_zdot(), bl1_zdotsv2(), bl1_zm1(), bl1_zscals(), bl1_zsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, rho1, and twoinc_y.

Referenced by FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

399 {
400  dcomplex zero = bl1_z0();
401  dcomplex minus_one = bl1_zm1();
402  dcomplex* restrict u = buff_u;
403  dcomplex* restrict w = buff_w;
404  dcomplex* restrict beta = buff_beta;
405  dcomplex* restrict a1;
406  dcomplex* restrict a2;
407  dcomplex* restrict psi1;
408  dcomplex* restrict psi2;
409  dcomplex* restrict alpha1;
410  dcomplex* restrict alpha2;
411 
412  dcomplex minus_inv_tau;
413  dcomplex conj_psi1;
414  dcomplex conj_psi2;
415  dcomplex conj_alpha1;
416  dcomplex conj_alpha2;
417  int i;
418  int n_run = n_A / 2;
419  int n_left = n_A % 2;
420  int twocs_A = 2*cs_A;
421  int twoinc_y = 2*inc_y;
422  int twoinc_a = 2*inc_a;
423 
424 
425  bl1_zsetv( m_A,
426  &zero,
427  buff_w, inc_w );
428 
429  bl1_zdiv3( &minus_one, buff_tau, &minus_inv_tau );
430 
431  a1 = buff_A;
432  a2 = buff_A + cs_A;
433  psi1 = buff_y;
434  psi2 = buff_y + inc_y;
435  alpha1 = buff_a;
436  alpha2 = buff_a + inc_a;
437 
438  for ( i = 0; i < n_run; ++i )
439  {
440 /*
441  Effective computation:
442  y = beta * y + A' * u;
443  a = a - conj(y) / tau;
444  w = A * conj(a);
445 */
446  /*------------------------------------------------------------*/
447 
449  m_A,
450  a1, rs_A,
451  a2, rs_A,
452  u, inc_u,
453  beta,
454  psi1,
455  psi2 );
456 
457  bl1_zcopyconj( psi1, &conj_psi1 );
458  bl1_zcopyconj( psi2, &conj_psi2 );
459  bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
460  bl1_zmult4( &minus_inv_tau, &conj_psi2, alpha2, alpha2 );
461  bl1_zcopyconj( alpha1, &conj_alpha1 );
462  bl1_zcopyconj( alpha2, &conj_alpha2 );
463 
464  bl1_zaxpyv2b( m_A,
465  &conj_alpha1,
466  &conj_alpha2,
467  a1, rs_A,
468  a2, rs_A,
469  w, inc_w );
470 
471  /*------------------------------------------------------------*/
472 
473  a1 += twocs_A;
474  a2 += twocs_A;
475  psi1 += twoinc_y;
476  psi2 += twoinc_y;
477  alpha1 += twoinc_a;
478  alpha2 += twoinc_a;
479  }
480 
481  if ( n_left == 1 )
482  {
483  dcomplex rho1;
484 
486  m_A,
487  a1, rs_A,
488  u, inc_u,
489  &rho1 );
490  bl1_zscals( buff_beta, psi1 );
491  bl1_zadd3( psi1, &rho1, psi1 );
492 
493  bl1_zcopyconj( psi1, &conj_psi1 );
494  bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
495  bl1_zcopyconj( alpha1, &conj_alpha1 );
496 
498  m_A,
499  &conj_alpha1,
500  a1, rs_A,
501  w, inc_w );
502  }
503 
504  return FLA_SUCCESS;
505 }
double rho1
Definition: bl1_dotsv2.c:149
double *restrict psi1
Definition: bl1_axmyv2.c:139
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
Definition: blis_type_defs.h:82
int n_left
Definition: bl1_axmyv2.c:149
void bl1_zdotsv2(conj1_t conjxy, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz)
Definition: bl1_dotsv2.c:248
int i
Definition: bl1_axmyv2.c:145
dcomplex bl1_zm1(void)
Definition: bl1_constants.c:197
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition: bl1_axpyv2b.c:210
int n_run
Definition: bl1_axmyv2.c:148
int twoinc_y
Definition: bl1_axpyv2b.c:154
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60