libflame  revision_anchor
Functions
FLA_Fused_UYx_ZVx_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_UYx_ZVx_opt_var1 (FLA_Obj delta, FLA_Obj a, FLA_Obj U, FLA_Obj Y, FLA_Obj Z, FLA_Obj V, FLA_Obj A, FLA_Obj temp, FLA_Obj t, FLA_Obj w, FLA_Obj al)
 
FLA_Error FLA_Fused_UYx_ZVx_ops_var1 (int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
 
FLA_Error FLA_Fused_UYx_ZVx_opd_var1 (int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
 
FLA_Error FLA_Fused_UYx_ZVx_opc_var1 (int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
 
FLA_Error FLA_Fused_UYx_ZVx_opz_var1 (int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)
 

Function Documentation

◆ FLA_Fused_UYx_ZVx_opc_var1()

FLA_Error FLA_Fused_UYx_ZVx_opc_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_V,
int  rs_V,
int  cs_V,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_temp,
int  inc_temp,
scomplex buff_t,
int  inc_t,
scomplex buff_a,
int  inc_a,
scomplex buff_w,
int  inc_w,
scomplex buff_al,
int  inc_al 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofc_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

439 {
440  int i;
441  int m_A = m_U;
442  int m_Z = m_U;
443 
445  m_A,
446  buff_A, rs_A,
447  buff_al, inc_al );
448 
449  for ( i = 0; i < n_U; ++i )
450  {
451  scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
452  scomplex* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
453  scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
454  scomplex* v1 = buff_V + (0 )*cs_V + (i )*rs_V;
455  scomplex* tau1 = buff_t + (i )*inc_t;
456  scomplex* delta = buff_delta;
457  scomplex* a = buff_a;
458  scomplex* w = buff_w;
459  scomplex* al = buff_al;
460  scomplex* psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
461  scomplex* nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
462  scomplex alpha;
463  scomplex beta;
464  scomplex gamma;
465  scomplex kappa;
466 
467  /*------------------------------------------------------------*/
468 
470  n_V,
471  y1, rs_Y,
472  a, inc_a,
473  &alpha );
474 
476  n_V,
477  v1, cs_V,
478  a, inc_a,
479  &beta );
480 
481  bl1_cconjs( &alpha );
482  bl1_cconjs( &beta );
483  bl1_ccopyconj( psi20_l, &gamma );
484  bl1_ccopyconj( nu20_l, &kappa );
485 
486  *tau1 = beta;
487 
488  bl1_cscals( delta, &alpha );
489  bl1_cscals( delta, &beta );
490  bl1_cscals( delta, &gamma );
491  bl1_cscals( delta, &kappa );
492 
494  m_U,
495  &alpha,
496  u1, rs_U,
497  w, inc_w );
498  //F77_caxpy( &m_U,
499  // &alpha,
500  // u1, &rs_U,
501  // w, &inc_w );
502 
504  m_Z,
505  &beta,
506  z1, rs_Z,
507  w, inc_w );
508  //F77_caxpy( &m_Z,
509  // &beta,
510  // z1, &rs_Z,
511  // w, &inc_w );
512 
514  m_U,
515  &gamma,
516  u1, rs_U,
517  al, inc_al );
518  //F77_caxpy( &m_U,
519  // &gamma,
520  // u1, &rs_U,
521  // al, &inc_al );
522 
524  m_Z,
525  &kappa,
526  z1, rs_Z,
527  al, inc_al);
528  //F77_caxpy( &m_Z,
529  // &kappa,
530  // z1, &rs_Z,
531  // al, &inc_al );
532 
533  /*------------------------------------------------------------*/
534 
535  }
536 
537  return FLA_SUCCESS;
538 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
Definition: blis_type_defs.h:81
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
double *restrict z1
Definition: bl1_dotsv2.c:148
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
double *restrict y1
Definition: bl1_dotsv2.c:145

◆ FLA_Fused_UYx_ZVx_opd_var1()

FLA_Error FLA_Fused_UYx_ZVx_opd_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
double *  buff_delta,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_V,
int  rs_V,
int  cs_V,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_temp,
int  inc_temp,
double *  buff_t,
int  inc_t,
double *  buff_a,
int  inc_a,
double *  buff_w,
int  inc_w,
double *  buff_al,
int  inc_al 
)

References bl1_d0(), bl1_daxmyv2(), bl1_dcopyv(), bl1_ddotsv2(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

346 {
347  double zero = bl1_d0();
348  int i;
349  int m_A = m_U;
350  int m_Z = m_U;
351 
353  m_A,
354  buff_A, rs_A,
355  buff_al, inc_al );
356 
357  if ( m_U == 0 || n_U == 0 ) return 0;
358  if ( m_V == 0 || n_V == 0 ) return 0;
359 
360  for ( i = 0; i < n_U; ++i )
361  {
362  double* restrict u1 = buff_U + (i )*cs_U + (0 )*rs_U;
363  double* restrict y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
364  double* restrict z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
365  double* restrict v1 = buff_V + (0 )*cs_V + (i )*rs_V;
366  double* restrict tau1 = buff_t + (i )*inc_t;
367  double* restrict t1 = buff_temp;
368  double* restrict a = buff_a;
369  double* restrict w = buff_w;
370  double* restrict al = buff_al;
371  double* restrict psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
372  double* restrict nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
373  double alpha;
374  double beta;
375  double gamma;
376  double kappa;
377 
378  /*------------------------------------------------------------*/
379 
381  n_V,
382  v1, cs_V,
383  t1, inc_t );
384 
386  n_V,
387  y1, rs_Y,
388  t1, inc_t,
389  a, inc_a,
390  &zero,
391  &alpha,
392  &beta );
393 
394  *tau1 = beta;
395 
396  bl1_dcopyconj( psi20_l, &gamma );
397  bl1_dcopyconj( nu20_l, &kappa );
398 
400  m_U,
401  &alpha,
402  &gamma,
403  u1, rs_U,
404  w, inc_w,
405  al, inc_al );
406 
408  m_Z,
409  &beta,
410  &kappa,
411  z1, rs_U,
412  w, inc_w,
413  al, inc_al );
414 
415  /*------------------------------------------------------------*/
416 
417  }
418 
419  return FLA_SUCCESS;
420 }
Definition: blis_type_defs.h:81
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition: bl1_dotsv2.c:35
double bl1_d0(void)
Definition: bl1_constants.c:118
double *restrict z1
Definition: bl1_dotsv2.c:148
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_daxmyv2(conj1_t conjx, int n, double *alpha, double *beta, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z)
Definition: bl1_axmyv2.c:34
int i
Definition: bl1_axmyv2.c:145
double *restrict y1
Definition: bl1_dotsv2.c:145

◆ FLA_Fused_UYx_ZVx_ops_var1()

FLA_Error FLA_Fused_UYx_ZVx_ops_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
float *  buff_delta,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_V,
int  rs_V,
int  cs_V,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_temp,
int  inc_temp,
float *  buff_t,
int  inc_t,
float *  buff_a,
int  inc_a,
float *  buff_w,
int  inc_w,
float *  buff_al,
int  inc_al 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofs_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

216 {
217  int i;
218  int m_A = m_U;
219  int m_Z = m_U;
220 
222  m_A,
223  buff_A, rs_A,
224  buff_al, inc_al );
225 
226  for ( i = 0; i < n_U; ++i )
227  {
228  float* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
229  float* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
230  float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
231  float* v1 = buff_V + (0 )*cs_V + (i )*rs_V;
232  float* tau1 = buff_t + (i )*inc_t;
233  float* delta = buff_delta;
234  float* a = buff_a;
235  float* w = buff_w;
236  float* al = buff_al;
237  float* psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
238  float* nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
239  float alpha;
240  float beta;
241  float gamma;
242  float kappa;
243 
244  /*------------------------------------------------------------*/
245 
247  n_V,
248  y1, rs_Y,
249  a, inc_a,
250  &alpha );
251  //alpha = F77_sdot( &n_V,
252  // y1, &rs_Y,
253  // a, &inc_a );
254 
256  n_V,
257  v1, cs_V,
258  a, inc_a,
259  &beta );
260  //beta = F77_sdot( &n_V,
261  // v1, &cs_V,
262  // a, &inc_a );
263 
264  *tau1 = beta;
265 
266  // bl1_sconjs( &alpha );
267  // bl1_sconjs( &beta );
268  // bl1_scopyconj( psi20_l, &gamma );
269  // bl1_scopyconj( nu20_l, &kappa );
270  gamma = *psi20_l;
271  kappa = *nu20_l;
272 
273  // bl1_dscals( delta, &alpha );
274  // bl1_dscals( delta, &beta );
275  // bl1_dscals( delta, &gamma );
276  // bl1_dscals( delta, &kappa );
277  alpha *= *delta;
278  beta *= *delta;
279  gamma *= *delta;
280  kappa *= *delta;
281 
283  m_U,
284  &alpha,
285  u1, rs_U,
286  w, inc_w );
287  //F77_saxpy( &m_U,
288  // &alpha,
289  // u1, &rs_U,
290  // w, &inc_w );
291 
293  m_Z,
294  &beta,
295  z1, rs_Z,
296  w, inc_w );
297  //F77_saxpy( &m_Z,
298  // &beta,
299  // z1, &rs_Z,
300  // w, &inc_w );
301 
303  m_U,
304  &gamma,
305  u1, rs_U,
306  al, inc_al );
307  //F77_saxpy( &m_U,
308  // &gamma,
309  // u1, &rs_U,
310  // al, &inc_al );
311 
313  m_Z,
314  &kappa,
315  z1, rs_Z,
316  al, inc_al );
317  //F77_saxpy( &m_Z,
318  // &kappa,
319  // z1, &rs_Z,
320  // al, &inc_al );
321 
322  /*------------------------------------------------------------*/
323 
324  }
325 
326  return FLA_SUCCESS;
327 }
Definition: blis_type_defs.h:81
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
double *restrict z1
Definition: bl1_dotsv2.c:148
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
int i
Definition: bl1_axmyv2.c:145
double *restrict y1
Definition: bl1_dotsv2.c:145

◆ FLA_Fused_UYx_ZVx_opt_var1()

FLA_Error FLA_Fused_UYx_ZVx_opt_var1 ( FLA_Obj  delta,
FLA_Obj  a,
FLA_Obj  U,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  V,
FLA_Obj  A,
FLA_Obj  temp,
FLA_Obj  t,
FLA_Obj  w,
FLA_Obj  al 
)

References FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

14 {
15 /*
16  Effective computation:
17  w = w + delta * ( U ( Y' conj(a) ) + Z ( V' conj(a) ) );
18  al = A * e0 + delta * ( U ( Y' e0 ) + Z ( V' e0 ) );
19  t = V' conj(a);
20 */
21  FLA_Datatype datatype;
22  int m_U, n_U;
23  int m_V, n_V;
24  int rs_A, cs_A;
25  int rs_U, cs_U;
26  int rs_Y, cs_Y;
27  int rs_Z, cs_Z;
28  int rs_V, cs_V;
29  int inc_a, inc_temp, inc_t, inc_w, inc_al;
30 
31  datatype = FLA_Obj_datatype( A );
32 
33  m_U = FLA_Obj_length( U );
34  n_U = FLA_Obj_width( U );
35 
36  m_V = FLA_Obj_length( V );
37  n_V = FLA_Obj_width( V );
38 
39  rs_U = FLA_Obj_row_stride( U );
40  cs_U = FLA_Obj_col_stride( U );
41 
42  rs_Y = FLA_Obj_row_stride( Y );
43  cs_Y = FLA_Obj_col_stride( Y );
44 
45  rs_Z = FLA_Obj_row_stride( Z );
46  cs_Z = FLA_Obj_col_stride( Z );
47 
48  rs_V = FLA_Obj_row_stride( V );
49  cs_V = FLA_Obj_col_stride( V );
50 
51  rs_A = FLA_Obj_row_stride( A );
52  cs_A = FLA_Obj_col_stride( A );
53 
54  inc_temp = FLA_Obj_vector_inc( temp );
55  inc_t = FLA_Obj_vector_inc( t );
56  inc_a = FLA_Obj_vector_inc( a );
57  inc_w = FLA_Obj_vector_inc( w );
58  inc_al = FLA_Obj_vector_inc( al );
59 
60 
61  switch ( datatype )
62  {
63  case FLA_FLOAT:
64  {
65  float* buff_A = FLA_FLOAT_PTR( A );
66  float* buff_U = FLA_FLOAT_PTR( U );
67  float* buff_Y = FLA_FLOAT_PTR( Y );
68  float* buff_Z = FLA_FLOAT_PTR( Z );
69  float* buff_V = FLA_FLOAT_PTR( V );
70  float* buff_temp = FLA_FLOAT_PTR( temp );
71  float* buff_t = FLA_FLOAT_PTR( t );
72  float* buff_a = FLA_FLOAT_PTR( a );
73  float* buff_w = FLA_FLOAT_PTR( w );
74  float* buff_al = FLA_FLOAT_PTR( al );
75  float* buff_delta = FLA_FLOAT_PTR( delta );
76 
78  n_U,
79  m_V,
80  n_V,
81  buff_delta,
82  buff_U, rs_U, cs_U,
83  buff_Y, rs_Y, cs_Y,
84  buff_Z, rs_Z, cs_Z,
85  buff_V, rs_V, cs_V,
86  buff_A, rs_A, cs_A,
87  buff_temp, inc_temp,
88  buff_t, inc_t,
89  buff_a, inc_a,
90  buff_w, inc_w,
91  buff_al, inc_al );
92 
93  break;
94  }
95 
96  case FLA_DOUBLE:
97  {
98  double* buff_A = FLA_DOUBLE_PTR( A );
99  double* buff_U = FLA_DOUBLE_PTR( U );
100  double* buff_Y = FLA_DOUBLE_PTR( Y );
101  double* buff_Z = FLA_DOUBLE_PTR( Z );
102  double* buff_V = FLA_DOUBLE_PTR( V );
103  double* buff_temp = FLA_DOUBLE_PTR( temp );
104  double* buff_t = FLA_DOUBLE_PTR( t );
105  double* buff_a = FLA_DOUBLE_PTR( a );
106  double* buff_w = FLA_DOUBLE_PTR( w );
107  double* buff_al = FLA_DOUBLE_PTR( al );
108  double* buff_delta = FLA_DOUBLE_PTR( delta );
109 
111  n_U,
112  m_V,
113  n_V,
114  buff_delta,
115  buff_U, rs_U, cs_U,
116  buff_Y, rs_Y, cs_Y,
117  buff_Z, rs_Z, cs_Z,
118  buff_V, rs_V, cs_V,
119  buff_A, rs_A, cs_A,
120  buff_temp, inc_temp,
121  buff_t, inc_t,
122  buff_a, inc_a,
123  buff_w, inc_w,
124  buff_al, inc_al );
125 
126  break;
127  }
128 
129  case FLA_COMPLEX:
130  {
131  scomplex* buff_A = FLA_COMPLEX_PTR( A );
132  scomplex* buff_U = FLA_COMPLEX_PTR( U );
133  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
134  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
135  scomplex* buff_V = FLA_COMPLEX_PTR( V );
136  scomplex* buff_temp = FLA_COMPLEX_PTR( temp );
137  scomplex* buff_t = FLA_COMPLEX_PTR( t );
138  scomplex* buff_a = FLA_COMPLEX_PTR( a );
139  scomplex* buff_w = FLA_COMPLEX_PTR( w );
140  scomplex* buff_al = FLA_COMPLEX_PTR( al );
141  scomplex* buff_delta = FLA_COMPLEX_PTR( delta );
142 
144  n_U,
145  m_V,
146  n_V,
147  buff_delta,
148  buff_U, rs_U, cs_U,
149  buff_Y, rs_Y, cs_Y,
150  buff_Z, rs_Z, cs_Z,
151  buff_V, rs_V, cs_V,
152  buff_A, rs_A, cs_A,
153  buff_temp, inc_temp,
154  buff_t, inc_t,
155  buff_a, inc_a,
156  buff_w, inc_w,
157  buff_al, inc_al );
158 
159  break;
160  }
161 
162  case FLA_DOUBLE_COMPLEX:
163  {
164  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
165  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
166  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
167  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
168  dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V );
169  dcomplex* buff_temp = FLA_DOUBLE_COMPLEX_PTR( temp );
170  dcomplex* buff_t = FLA_DOUBLE_COMPLEX_PTR( t );
171  dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a );
172  dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
173  dcomplex* buff_al = FLA_DOUBLE_COMPLEX_PTR( al );
174  dcomplex* buff_delta = FLA_DOUBLE_COMPLEX_PTR( delta );
175 
177  n_U,
178  m_V,
179  n_V,
180  buff_delta,
181  buff_U, rs_U, cs_U,
182  buff_Y, rs_Y, cs_Y,
183  buff_Z, rs_Z, cs_Z,
184  buff_V, rs_V, cs_V,
185  buff_A, rs_A, cs_A,
186  buff_temp, inc_temp,
187  buff_t, inc_t,
188  buff_a, inc_a,
189  buff_w, inc_w,
190  buff_al, inc_al );
191 
192  break;
193  }
194  }
195 
196  return FLA_SUCCESS;
197 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Fused_UYx_ZVx_opd_var1(int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:331
FLA_Error FLA_Fused_UYx_ZVx_ops_var1(int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:201
FLA_Error FLA_Fused_UYx_ZVx_opc_var1(int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:424
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Error FLA_Fused_UYx_ZVx_opz_var1(int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:542
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Fused_UYx_ZVx_opz_var1()

FLA_Error FLA_Fused_UYx_ZVx_opz_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_V,
int  rs_V,
int  cs_V,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_temp,
int  inc_temp,
dcomplex buff_t,
int  inc_t,
dcomplex buff_a,
int  inc_a,
dcomplex buff_w,
int  inc_w,
dcomplex buff_al,
int  inc_al 
)

References bl1_z0(), bl1_zaxmyv2(), bl1_zcopyv(), bl1_zdotsv2(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofz_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

557 {
558  dcomplex zero = bl1_z0();
559  int i;
560  int m_A = m_U;
561  int m_Z = m_U;
562 
564  m_A,
565  buff_A, rs_A,
566  buff_al, inc_al );
567 
568  if ( m_U == 0 || n_U == 0 ) return 0;
569  if ( m_V == 0 || n_V == 0 ) return 0;
570 
571  for ( i = 0; i < n_U; ++i )
572  {
573  dcomplex* restrict u1 = buff_U + (i )*cs_U + (0 )*rs_U;
574  dcomplex* restrict y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
575  dcomplex* restrict z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
576  dcomplex* restrict v1 = buff_V + (0 )*cs_V + (i )*rs_V;
577  dcomplex* restrict tau1 = buff_t + (i )*inc_t;
578  dcomplex* restrict a = buff_a;
579  dcomplex* restrict w = buff_w;
580  dcomplex* restrict al = buff_al;
581  dcomplex* restrict psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
582  dcomplex* restrict nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
583  dcomplex alpha;
584  dcomplex beta;
585  dcomplex gamma;
586  dcomplex kappa;
587 
588  /*------------------------------------------------------------*/
589 
591  n_V,
592  y1, rs_Y,
593  v1, cs_V,
594  a, inc_a,
595  &zero,
596  &alpha,
597  &beta );
598 
599  bl1_zconjs( &alpha );
600  bl1_zconjs( &beta );
601 
602  *tau1 = beta;
603 
604  bl1_zcopyconj( psi20_l, &gamma );
605  bl1_zcopyconj( nu20_l, &kappa );
606 
608  m_U,
609  &alpha,
610  &gamma,
611  u1, rs_U,
612  w, inc_w,
613  al, inc_al );
614 
616  m_Z,
617  &beta,
618  &kappa,
619  z1, rs_U,
620  w, inc_w,
621  al, inc_al );
622 
623  /*------------------------------------------------------------*/
624 
625  }
626 
627  return FLA_SUCCESS;
628 }
void bl1_zaxmyv2(conj1_t conjx, int n, dcomplex *alpha, dcomplex *beta, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z)
Definition: bl1_axmyv2.c:250
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
Definition: blis_type_defs.h:81
double *restrict z1
Definition: bl1_dotsv2.c:148
void bl1_zdotsv2(conj1_t conjxy, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz)
Definition: bl1_dotsv2.c:248
int i
Definition: bl1_axmyv2.c:145
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
double *restrict y1
Definition: bl1_dotsv2.c:145
Definition: blis_type_defs.h:137