libflame  revision_anchor
Functions
FLA_Fused_Her2_Ax_l_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Her2_Ax_l_opt_var1 (FLA_Obj beta, FLA_Obj u, FLA_Obj z, FLA_Obj A, FLA_Obj x, FLA_Obj w)
 
FLA_Error FLA_Fused_Her2_Ax_l_ops_var1 (int m_A, float *buff_beta, float *buff_u, int inc_u, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Her2_Ax_l_opd_var1 (int m_A, double *buff_beta, double *buff_u, int inc_u, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Her2_Ax_l_opc_var1 (int m_A, scomplex *buff_beta, scomplex *buff_u, int inc_u, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Her2_Ax_l_opz_var1 (int m_A, dcomplex *buff_beta, dcomplex *buff_u, int inc_u, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_w, int inc_w)
 

Function Documentation

◆ FLA_Fused_Her2_Ax_l_opc_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opc_var1 ( int  m_A,
scomplex buff_beta,
scomplex buff_u,
int  inc_u,
scomplex buff_z,
int  inc_z,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_x,
int  inc_x,
scomplex buff_w,
int  inc_w 
)

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, scomplex::imag, omega1, scomplex::real, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofc_var2().

336 {
337  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
338  int i;
339 
340  bl1_csetv( m_A,
341  buff_0,
342  buff_w, inc_w );
343 
344  for ( i = 0; i < m_A; ++i )
345  {
346  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
347  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
348 
349  scomplex* upsilon1 = buff_u + (i )*inc_u;
350  scomplex* u2 = buff_u + (i+1)*inc_u;
351 
352  scomplex* zeta1 = buff_z + (i )*inc_z;
353  scomplex* z2 = buff_z + (i+1)*inc_z;
354 
355  scomplex* chi1 = buff_x + (i )*inc_x;
356  scomplex* x2 = buff_x + (i+1)*inc_x;
357 
358  scomplex* omega1 = buff_w + (i )*inc_w;
359  scomplex* w2 = buff_w + (i+1)*inc_w;
360 
361  // scomplex* beta = buff_beta;
362 
363  scomplex minus_conj_upsilon1;
364  scomplex minus_conj_zeta1;
365  scomplex temp;
366 
367  int m_ahead = m_A - i - 1;
368 
369  /*------------------------------------------------------------*/
370 
371  // bl1_ccopyconj( zeta1, &conj_zeta1 );
372  // bl1_cmult3( beta, &conj_zeta1, &minus_conj_zeta1 );
373  // bl1_cmult3( &minus_conj_zeta1, upsilon1, &temp );
374  // bl1_cadd3( &temp, alpha11, alpha11 );
375 
376  //bl1_ccopyconj( upsilon1, &conj_upsilon1 );
377  //bl1_cmult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
378  //bl1_cmult3( &minus_conj_upsilon1, zeta1, &temp );
379  //bl1_cadd3( &temp, alpha11, alpha11 );
380  minus_conj_zeta1.real = - zeta1->real;
381  minus_conj_zeta1.imag = - -zeta1->imag;
382  minus_conj_upsilon1.real = - upsilon1->real;
383  minus_conj_upsilon1.imag = - -upsilon1->imag;
384 
385  alpha11->real -= zeta1->real * upsilon1->real - -zeta1->imag * upsilon1->imag +
386  zeta1->real * upsilon1->real - zeta1->imag * -upsilon1->imag;
387  alpha11->imag -= -zeta1->imag * upsilon1->real + zeta1->real * upsilon1->imag +
388  zeta1->imag * upsilon1->real + zeta1->real * -upsilon1->imag;
389 
391  m_ahead,
392  &minus_conj_zeta1,
393  u2, inc_u,
394  a21, rs_A );
395 /*
396  F77_caxpy( &m_ahead,
397  &minus_conj_zeta1,
398  u2, &inc_u,
399  a21, &rs_A );
400 */
401 
402 
404  m_ahead,
405  &minus_conj_upsilon1,
406  z2, inc_z,
407  a21, rs_A );
408 /*
409  F77_caxpy( &m_ahead,
410  &minus_conj_upsilon1,
411  z2, &inc_z,
412  a21, &rs_A );
413 */
414 
415  // bl1_cmult3( alpha11, chi1, &temp );
416  // bl1_cadd3( &temp, omega1, omega1 );
417  omega1->real += alpha11->real * chi1->real - alpha11->imag * chi1->imag;
418  omega1->imag += alpha11->imag * chi1->real + alpha11->real * chi1->imag;
419 
421  m_ahead,
422  a21, rs_A,
423  x2, inc_x,
424  &temp );
425  // bl1_cadd3( &temp, omega1, omega1 );
426  omega1->real += temp.real;
427  omega1->imag += temp.imag;
428 
430  m_ahead,
431  chi1,
432  a21, rs_A,
433  w2, inc_w );
434 /*
435  F77_caxpy( &m_ahead,
436  chi1,
437  a21, &rs_A,
438  w2, &inc_w );
439 */
440 
441  /*------------------------------------------------------------*/
442 
443  }
444 
445  return FLA_SUCCESS;
446 }
float real
Definition: blis_type_defs.h:134
chi1
Definition: bl1_axmyv2.c:366
double *restrict zeta1
Definition: bl1_axmyv2.c:142
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
Definition: blis_type_defs.h:81
upsilon1
Definition: bl1_axpyv2bdotaxpy.c:225
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
dcomplex temp
Definition: bl1_axpyv2b.c:301
Definition: blis_type_defs.h:82
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
float imag
Definition: blis_type_defs.h:134
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200

◆ FLA_Fused_Her2_Ax_l_opd_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opd_var1 ( int  m_A,
double *  buff_beta,
double *  buff_u,
int  inc_u,
double *  buff_z,
int  inc_z,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_x,
int  inc_x,
double *  buff_w,
int  inc_w 
)

References bl1_daxpyv2bdotaxpy(), bl1_dsetv(), chi1, FLA_ZERO, i, omega1, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofd_var2().

253 {
254  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
255  int i;
256 
257  bl1_dsetv( m_A,
258  buff_0,
259  buff_w, inc_w );
260 
261  for ( i = 0; i < m_A; ++i )
262  {
263  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
264  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
265 
266  double* upsilon1 = buff_u + (i )*inc_u;
267  double* u2 = buff_u + (i+1)*inc_u;
268 
269  double* zeta1 = buff_z + (i )*inc_z;
270  double* z2 = buff_z + (i+1)*inc_z;
271 
272  double* chi1 = buff_x + (i )*inc_x;
273  double* x2 = buff_x + (i+1)*inc_x;
274 
275  double* omega1 = buff_w + (i )*inc_w;
276  double* w2 = buff_w + (i+1)*inc_w;
277 
278  // double* beta = buff_beta;
279 
280  double minus_conj_upsilon1;
281  double minus_conj_zeta1;
282  double temp;
283 
284  int m_ahead = m_A - i - 1;
285 
286  /*------------------------------------------------------------*/
287 
288  // bl1_dcopyconj( zeta1, &conj_zeta1 );
289  // bl1_dmult3( beta, &conj_zeta1, &minus_conj_zeta1 );
290  // bl1_dmult3( &minus_conj_zeta1, upsilon1, &temp );
291  // bl1_dadd3( &temp, alpha11, alpha11 );
292 
293  //bl1_dcopyconj( upsilon1, &conj_upsilon1 );
294  //bl1_dmult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
295  //bl1_dmult3( &minus_conj_upsilon1, zeta1, &temp );
296  //bl1_dadd3( &temp, alpha11, alpha11 );
297  minus_conj_zeta1 = - *zeta1;
298  minus_conj_upsilon1 = - *upsilon1;
299 
300  *alpha11 -= 2.0 * *zeta1 * *upsilon1;
301 
302  // bl1_dmult3( alpha11, chi1, &temp );
303  // bl1_dadd3( &temp, omega1, omega1 );
304  *omega1 += *alpha11 * *chi1;
305 
306  bl1_daxpyv2bdotaxpy( m_ahead,
307  &minus_conj_zeta1,
308  u2, inc_u,
309  &minus_conj_upsilon1,
310  z2, inc_z,
311  a21, rs_A,
312  x2, inc_x,
313  chi1,
314  &temp,
315  w2, inc_w );
316 
317  // bl1_dadd3( &temp, omega1, omega1 );
318  *omega1 += temp;
319 
320  /*------------------------------------------------------------*/
321 
322  }
323 
324  return FLA_SUCCESS;
325 }
chi1
Definition: bl1_axmyv2.c:366
double *restrict zeta1
Definition: bl1_axmyv2.c:142
upsilon1
Definition: bl1_axpyv2bdotaxpy.c:225
dcomplex temp
Definition: bl1_axpyv2b.c:301
void bl1_daxpyv2bdotaxpy(int n, double *beta, double *u, int inc_u, double *gamma, double *z, int inc_z, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition: bl1_axpyv2bdotaxpy.c:36
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200

◆ FLA_Fused_Her2_Ax_l_ops_var1()

FLA_Error FLA_Fused_Her2_Ax_l_ops_var1 ( int  m_A,
float *  buff_beta,
float *  buff_u,
int  inc_u,
float *  buff_z,
int  inc_z,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_x,
int  inc_x,
float *  buff_w,
int  inc_w 
)

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, omega1, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofs_var2().

133 {
134  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
135  int i;
136 
137  bl1_ssetv( m_A,
138  buff_0,
139  buff_w, inc_w );
140 
141  for ( i = 0; i < m_A; ++i )
142  {
143  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
144  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
145 
146  float* upsilon1 = buff_u + (i )*inc_u;
147  float* u2 = buff_u + (i+1)*inc_u;
148 
149  float* zeta1 = buff_z + (i )*inc_z;
150  float* z2 = buff_z + (i+1)*inc_z;
151 
152  float* chi1 = buff_x + (i )*inc_x;
153  float* x2 = buff_x + (i+1)*inc_x;
154 
155  float* omega1 = buff_w + (i )*inc_w;
156  float* w2 = buff_w + (i+1)*inc_w;
157 
158  // float* beta = buff_beta;
159 
160  float minus_conj_upsilon1;
161  float minus_conj_zeta1;
162  float temp;
163 
164  int m_ahead = m_A - i - 1;
165 
166  /*------------------------------------------------------------*/
167 
168  // bl1_scopyconj( zeta1, &conj_zeta1 );
169  // bl1_smult3( beta, &conj_zeta1, &minus_conj_zeta1 );
170  // bl1_smult3( &minus_conj_zeta1, upsilon1, &temp );
171  // bl1_sadd3( &temp, alpha11, alpha11 );
172 
173  //bl1_scopyconj( upsilon1, &conj_upsilon1 );
174  //bl1_smult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
175  //bl1_smult3( &minus_conj_upsilon1, zeta1, &temp );
176  //bl1_sadd3( &temp, alpha11, alpha11 );
177  minus_conj_zeta1 = - *zeta1;
178  minus_conj_upsilon1 = - *upsilon1;
179 
180  *alpha11 -= 2.0F * *zeta1 * *upsilon1;
181 
183  m_ahead,
184  &minus_conj_zeta1,
185  u2, inc_u,
186  a21, rs_A );
187 /*
188  F77_saxpy( &m_ahead,
189  &minus_conj_zeta1,
190  u2, &inc_u,
191  a21, &rs_A );
192 */
193 
194 
196  m_ahead,
197  &minus_conj_upsilon1,
198  z2, inc_z,
199  a21, rs_A );
200 /*
201  F77_saxpy( &m_ahead,
202  &minus_conj_upsilon1,
203  z2, &inc_z,
204  a21, &rs_A );
205 */
206 
207  // bl1_smult3( alpha11, chi1, &temp );
208  // bl1_sadd3( &temp, omega1, omega1 );
209  *omega1 += *alpha11 * *chi1;
210 
212  m_ahead,
213  a21, rs_A,
214  x2, inc_x,
215  &temp );
216 /*
217  temp = F77_sdot( &m_ahead,
218  a21, &rs_A,
219  x2, &inc_x );
220 */
221 
222  // bl1_sadd3( &temp, omega1, omega1 );
223  *omega1 += temp;
224 
226  m_ahead,
227  chi1,
228  a21, rs_A,
229  w2, inc_w );
230 /*
231  F77_saxpy( &m_ahead,
232  chi1,
233  a21, &rs_A,
234  w2, &inc_w );
235 */
236 
237  /*------------------------------------------------------------*/
238 
239  }
240 
241  return FLA_SUCCESS;
242 }
chi1
Definition: bl1_axmyv2.c:366
double *restrict zeta1
Definition: bl1_axmyv2.c:142
Definition: blis_type_defs.h:81
upsilon1
Definition: bl1_axpyv2bdotaxpy.c:225
dcomplex temp
Definition: bl1_axpyv2b.c:301
Definition: blis_type_defs.h:82
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200

◆ FLA_Fused_Her2_Ax_l_opt_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opt_var1 ( FLA_Obj  beta,
FLA_Obj  u,
FLA_Obj  z,
FLA_Obj  A,
FLA_Obj  x,
FLA_Obj  w 
)

References FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_Her2_Ax_l_opd_var1(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Fused_Her2_Ax_l_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_vector_inc().

14 {
15 /*
16  Effective computation:
17  A = A + beta * ( u * z' + z * u' );
18  w = A * x;
19 */
20  FLA_Datatype datatype;
21  int m_A;
22  int rs_A, cs_A;
23  int inc_u, inc_z, inc_x, inc_w;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  inc_u = FLA_Obj_vector_inc( u );
33  inc_z = FLA_Obj_vector_inc( z );
34  inc_x = FLA_Obj_vector_inc( x );
35  inc_w = FLA_Obj_vector_inc( w );
36 
37 
38  switch ( datatype )
39  {
40  case FLA_FLOAT:
41  {
42  float* buff_A = FLA_FLOAT_PTR( A );
43  float* buff_u = FLA_FLOAT_PTR( u );
44  float* buff_z = FLA_FLOAT_PTR( z );
45  float* buff_x = FLA_FLOAT_PTR( x );
46  float* buff_w = FLA_FLOAT_PTR( w );
47  float* buff_beta = FLA_FLOAT_PTR( beta );
48 
50  buff_beta,
51  buff_u, inc_u,
52  buff_z, inc_z,
53  buff_A, rs_A, cs_A,
54  buff_x, inc_x,
55  buff_w, inc_w );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_u = FLA_DOUBLE_PTR( u );
64  double* buff_z = FLA_DOUBLE_PTR( z );
65  double* buff_x = FLA_DOUBLE_PTR( x );
66  double* buff_w = FLA_DOUBLE_PTR( w );
67  double* buff_beta = FLA_DOUBLE_PTR( beta );
68 
70  buff_beta,
71  buff_u, inc_u,
72  buff_z, inc_z,
73  buff_A, rs_A, cs_A,
74  buff_x, inc_x,
75  buff_w, inc_w );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_A = FLA_COMPLEX_PTR( A );
83  scomplex* buff_u = FLA_COMPLEX_PTR( u );
84  scomplex* buff_z = FLA_COMPLEX_PTR( z );
85  scomplex* buff_x = FLA_COMPLEX_PTR( x );
86  scomplex* buff_w = FLA_COMPLEX_PTR( w );
87  scomplex* buff_beta = FLA_COMPLEX_PTR( beta );
88 
90  buff_beta,
91  buff_u, inc_u,
92  buff_z, inc_z,
93  buff_A, rs_A, cs_A,
94  buff_x, inc_x,
95  buff_w, inc_w );
96 
97  break;
98  }
99 
100  case FLA_DOUBLE_COMPLEX:
101  {
102  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
103  dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
104  dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z );
105  dcomplex* buff_x = FLA_DOUBLE_COMPLEX_PTR( x );
106  dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
107  dcomplex* buff_beta = FLA_DOUBLE_COMPLEX_PTR( beta );
108 
110  buff_beta,
111  buff_u, inc_u,
112  buff_z, inc_z,
113  buff_A, rs_A, cs_A,
114  buff_x, inc_x,
115  buff_w, inc_w );
116 
117  break;
118  }
119  }
120 
121  return FLA_SUCCESS;
122 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Fused_Her2_Ax_l_opz_var1(int m_A, dcomplex *buff_beta, dcomplex *buff_u, int inc_u, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:450
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Fused_Her2_Ax_l_opc_var1(int m_A, scomplex *buff_beta, scomplex *buff_u, int inc_u, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:329
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Error FLA_Fused_Her2_Ax_l_opd_var1(int m_A, double *buff_beta, double *buff_u, int inc_u, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:246
FLA_Error FLA_Fused_Her2_Ax_l_ops_var1(int m_A, float *buff_beta, float *buff_u, int inc_u, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:126
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Fused_Her2_Ax_l_opz_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opz_var1 ( int  m_A,
dcomplex buff_beta,
dcomplex buff_u,
int  inc_u,
dcomplex buff_z,
int  inc_z,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_x,
int  inc_x,
dcomplex buff_w,
int  inc_w 
)

References bl1_z0(), bl1_zaxpyv2b(), bl1_zdotaxpy(), bl1_zsetv(), chi1, i, dcomplex::imag, omega1, dcomplex::real, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofz_var2().

457 {
458  dcomplex zero = bl1_z0();
459  int i;
460 
461  bl1_zsetv( m_A,
462  &zero,
463  buff_w, inc_w );
464 
465  for ( i = 0; i < m_A; ++i )
466  {
467  dcomplex* restrict alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
468  dcomplex* restrict a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
469 
470  dcomplex* restrict upsilon1 = buff_u + (i )*inc_u;
471  dcomplex* restrict u2 = buff_u + (i+1)*inc_u;
472 
473  dcomplex* restrict zeta1 = buff_z + (i )*inc_z;
474  dcomplex* restrict z2 = buff_z + (i+1)*inc_z;
475 
476  dcomplex* restrict chi1 = buff_x + (i )*inc_x;
477  dcomplex* restrict x2 = buff_x + (i+1)*inc_x;
478 
479  dcomplex* restrict omega1 = buff_w + (i )*inc_w;
480  dcomplex* restrict w2 = buff_w + (i+1)*inc_w;
481 
482  //dcomplex* restrict beta = buff_beta;
483 
484  dcomplex minus_conj_upsilon1;
485  dcomplex minus_conj_zeta1;
486  dcomplex temp;
487 
488  dcomplex ze1;
489  dcomplex up1;
490  dcomplex a11;
491  dcomplex om1;
492  dcomplex ch1;
493 
494  int m_ahead = m_A - i - 1;
495 
496  /*------------------------------------------------------------*/
497 
498  // bl1_zcopyconj( zeta1, &conj_zeta1 );
499  // bl1_zmult3( beta, &conj_zeta1, &minus_conj_zeta1 );
500  // bl1_zmult3( &minus_conj_zeta1, upsilon1, &temp );
501  // bl1_zadd3( &temp, alpha11, alpha11 );
502 
503  //bl1_zcopyconj( upsilon1, &conj_upsilon1 );
504  //bl1_zmult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
505  //bl1_zmult3( &minus_conj_upsilon1, zeta1, &temp );
506  //bl1_zadd3( &temp, alpha11, alpha11 );
507  minus_conj_zeta1.real = - zeta1->real;
508  minus_conj_zeta1.imag = - -zeta1->imag;
509  minus_conj_upsilon1.real = - upsilon1->real;
510  minus_conj_upsilon1.imag = - -upsilon1->imag;
511 
512  ze1 = *zeta1;
513  up1 = *upsilon1;
514  a11 = *alpha11;
515  om1 = *omega1;
516  ch1 = *chi1;
517 
518  //alpha11->real -= zeta1->real * upsilon1->real - -zeta1->imag * upsilon1->imag +
519  // zeta1->real * upsilon1->real - zeta1->imag * -upsilon1->imag;
520  //alpha11->imag -= -zeta1->imag * upsilon1->real + zeta1->real * upsilon1->imag +
521  // zeta1->imag * upsilon1->real + zeta1->real * -upsilon1->imag;
522  a11.real -= ze1.real * up1.real - -ze1.imag * up1.imag +
523  up1.real * ze1.real - -up1.imag * ze1.imag;
524  a11.imag -= ze1.real * up1.imag + -ze1.imag * up1.real +
525  up1.real * ze1.imag + -up1.imag * ze1.real;
526 
527  // bl1_zmult3( alpha11, chi1, &temp );
528  // bl1_zadd3( &temp, omega1, omega1 );
529  //omega1->real += alpha11->real * chi1->real - alpha11->imag * chi1->imag;
530  //omega1->imag += alpha11->imag * chi1->real + alpha11->real * chi1->imag;
531  om1.real += a11.real * ch1.real - a11.imag * ch1.imag;
532  om1.imag += a11.imag * ch1.real + a11.real * ch1.imag;
533 
534  *alpha11 = a11;
535  *omega1 = om1;
536 
537 /*
538  bl1_zaxpyv2bdotaxpy( m_ahead,
539  &minus_conj_zeta1,
540  u2, inc_u,
541  &minus_conj_upsilon1,
542  z2, inc_z,
543  a21, rs_A,
544  x2, inc_x,
545  chi1,
546  &temp,
547  w2, inc_w );
548 */
549 
550  bl1_zaxpyv2b( m_ahead,
551  &minus_conj_zeta1,
552  &minus_conj_upsilon1,
553  u2, inc_u,
554  z2, inc_z,
555  a21, rs_A );
556 
557  bl1_zdotaxpy( m_ahead,
558  a21, rs_A,
559  x2, inc_x,
560  chi1,
561  &temp,
562  w2, inc_w );
563 
564 
565  // bl1_zadd3( &temp, omega1, omega1 );
566  omega1->real += temp.real;
567  omega1->imag += temp.imag;
568 
569  /*------------------------------------------------------------*/
570 
571  }
572 
573  return FLA_SUCCESS;
574 }
chi1
Definition: bl1_axmyv2.c:366
double *restrict zeta1
Definition: bl1_axmyv2.c:142
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
double imag
Definition: blis_type_defs.h:139
upsilon1
Definition: bl1_axpyv2bdotaxpy.c:225
dcomplex temp
Definition: bl1_axpyv2b.c:301
void bl1_zdotaxpy(int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
Definition: bl1_dotaxpy.c:258
double real
Definition: blis_type_defs.h:139
int i
Definition: bl1_axmyv2.c:145
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition: bl1_axpyv2b.c:210
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200
Definition: blis_type_defs.h:137