libflame  revision_anchor
Functions
FLA_Tridiag_UT_l_fus_var3.c File Reference

(r)

Functions

FLA_Error FLA_Tridiag_UT_l_ofu_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofu_var3 (FLA_Obj A, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofs_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Tridiag_UT_l_ofu_var3()

FLA_Error FLA_Tridiag_UT_l_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Obj_create_conf_to(), FLA_Obj_free(), and FLA_Tridiag_UT_l_step_ofu_var3().

14 {
15  FLA_Error r_val;
16  FLA_Obj Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
19 
20  r_val = FLA_Tridiag_UT_l_step_ofu_var3( A, Z, T );
21 
22  FLA_Obj_free( &Z );
23 
24  return r_val;
25 }
FLA_Error FLA_Tridiag_UT_l_step_ofu_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:27
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
int FLA_Error
Definition: FLA_type_defs.h:47
Definition: FLA_type_defs.h:158
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition: FLA_Obj.c:286

◆ FLA_Tridiag_UT_l_step_ofc_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

469 {
470  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
471  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
472  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
473  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
474 
475  scomplex first_elem, last_elem;
476  scomplex beta;
477  scomplex inv_tau11;
478  scomplex minus_inv_tau11;
479  int i;
480 
481  // b_alg = FLA_Obj_length( T );
482  int b_alg = m_T;
483 
484  // FLA_Set( FLA_ZERO, Z );
485  bl1_csetm( m_A,
486  b_alg,
487  buff_0,
488  buff_Z, rs_Z, cs_Z );
489 
490  for ( i = 0; i < b_alg; ++i )
491  {
492  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
493  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
494  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
495  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
496  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
497 
498  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
499  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
500  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
501 
502  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
503  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
504 
505  scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
506 
507  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
508  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
509 
510  scomplex* ABL = a10t;
511  scomplex* ZBL = z10t;
512 
513  scomplex* a2 = alpha11;
514 
515  int m_ahead = m_A - i - 1;
516  int m_behind = i;
517  int n_behind = i;
518 
519  /*------------------------------------------------------------*/
520 
521  if ( m_behind > 0 )
522  {
523  // FLA_Copy( a10t_r, last_elem );
524  // FLA_Set( FLA_ONE, a10t_r );
525  last_elem = *a10t_r;
526  *a10t_r = *buff_1;
527  }
528 
529  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
530  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
533  m_ahead + 1,
534  n_behind,
535  buff_m1,
536  ABL, rs_A, cs_A,
537  z10t, cs_Z,
538  buff_1,
539  a2, rs_A );
542  m_ahead + 1,
543  n_behind,
544  buff_m1,
545  ZBL, rs_Z, cs_Z,
546  a10t, cs_A,
547  buff_1,
548  a2, rs_A );
549 
550  if ( m_behind > 0 )
551  {
552  // FLA_Copy( last_elem, a10t_r );
553  *a10t_r = last_elem;
554  }
555 
556  if ( m_ahead > 0 )
557  {
558  // FLA_Househ2_UT( FLA_LEFT,
559  // a21_t,
560  // a21_b, tau11 );
561  FLA_Househ2_UT_l_opc( m_ahead - 1,
562  a21_t,
563  a21_b, rs_A,
564  tau11 );
565 
566  // FLA_Set( FLA_ONE, inv_tau11 );
567  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
568  // FLA_Copy( inv_tau11, minus_inv_tau11 );
569  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
570  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
571  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
572 
573  // FLA_Copy( a21_t, first_elem );
574  // FLA_Set( FLA_ONE, a21_t );
575  first_elem = *a21_t;
576  *a21_t = *buff_1;
577 
578  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
581  m_ahead,
582  buff_1,
583  A22, rs_A, cs_A,
584  a21, rs_A,
585  buff_0,
586  z21, rs_Z );
587 
588  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
589  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
590  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
591  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
592  // FLA_Copy( d01, t01 );
594  n_behind,
595  buff_m1,
596  A20, rs_A, cs_A,
597  Z20, rs_Z, cs_Z,
598  t01, rs_T,
599  a21, rs_A,
600  z21, rs_Z );
601 
602  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
603  // FLA_Inv_scal( FLA_TWO, beta );
605  m_ahead,
606  a21, rs_A,
607  z21, rs_Z,
608  &beta );
609  bl1_cinvscals( buff_2, &beta );
610 
611  // FLA_Scal( minus_inv_tau11, beta );
612  // FLA_Axpy( beta, a21, z21 );
613  // FLA_Scal( inv_tau11, z21 );
614  bl1_cscals( &minus_inv_tau11, &beta );
616  m_ahead,
617  &beta,
618  a21, rs_A,
619  z21, rs_Z );
621  m_ahead,
622  &inv_tau11,
623  z21, rs_Z );
624 
625  // FLA_Copy( first_elem, a21_t );
626  *a21_t = first_elem;
627  }
628 
629  /*------------------------------------------------------------*/
630 
631  }
632 
633  return FLA_SUCCESS;
634 }
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:81
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_chemv(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_hemv.c:35
FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1(int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_UZhu_ZUhu_opt_var1.c:411
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:62
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Tridiag_UT_l_step_ofd_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofd_var3 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T 
)

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsetm(), bl1_dsymv(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

296 {
297  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
298  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
299  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
300  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
301 
302  double first_elem, last_elem;
303  double beta;
304  double inv_tau11;
305  double minus_inv_tau11;
306  int i;
307 
308  // b_alg = FLA_Obj_length( T );
309  int b_alg = m_T;
310 
311  // FLA_Set( FLA_ZERO, Z );
312  bl1_dsetm( m_A,
313  b_alg,
314  buff_0,
315  buff_Z, rs_Z, cs_Z );
316 
317  for ( i = 0; i < b_alg; ++i )
318  {
319  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
320  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
321  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
322  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
323  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
324 
325  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
326  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
327  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
328 
329  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
330  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
331 
332  double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
333 
334  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
335  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
336 
337  double* ABL = a10t;
338  double* ZBL = z10t;
339 
340  double* a2 = alpha11;
341 
342  int m_ahead = m_A - i - 1;
343  int m_behind = i;
344  int n_behind = i;
345 
346  /*------------------------------------------------------------*/
347 
348  if ( m_behind > 0 )
349  {
350  // FLA_Copy( a10t_r, last_elem );
351  // FLA_Set( FLA_ONE, a10t_r );
352  last_elem = *a10t_r;
353  *a10t_r = *buff_1;
354  }
355 
356  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
357  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
360  m_ahead + 1,
361  n_behind,
362  buff_m1,
363  ABL, rs_A, cs_A,
364  z10t, cs_Z,
365  buff_1,
366  a2, rs_A );
369  m_ahead + 1,
370  n_behind,
371  buff_m1,
372  ZBL, rs_Z, cs_Z,
373  a10t, cs_A,
374  buff_1,
375  a2, rs_A );
376 
377  if ( m_behind > 0 )
378  {
379  // FLA_Copy( last_elem, a10t_r );
380  *a10t_r = last_elem;
381  }
382 
383  if ( m_ahead > 0 )
384  {
385  // FLA_Househ2_UT( FLA_LEFT,
386  // a21_t,
387  // a21_b, tau11 );
388  FLA_Househ2_UT_l_opd( m_ahead - 1,
389  a21_t,
390  a21_b, rs_A,
391  tau11 );
392 
393  // FLA_Set( FLA_ONE, inv_tau11 );
394  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
395  // FLA_Copy( inv_tau11, minus_inv_tau11 );
396  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
397  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
398  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
399 
400  // FLA_Copy( a21_t, first_elem );
401  // FLA_Set( FLA_ONE, a21_t );
402  first_elem = *a21_t;
403  *a21_t = *buff_1;
404 
405  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
407  m_ahead,
408  buff_1,
409  A22, rs_A, cs_A,
410  a21, rs_A,
411  buff_0,
412  z21, rs_Z );
413 
414  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
415  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
416  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
417  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
418  // FLA_Copy( d01, t01 );
420  n_behind,
421  buff_m1,
422  A20, rs_A, cs_A,
423  Z20, rs_Z, cs_Z,
424  t01, rs_T,
425  a21, rs_A,
426  z21, rs_Z );
427 
428  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
429  // FLA_Inv_scal( FLA_TWO, beta );
431  m_ahead,
432  a21, rs_A,
433  z21, rs_Z,
434  &beta );
435  bl1_dinvscals( buff_2, &beta );
436 
437  // FLA_Scal( minus_inv_tau11, beta );
438  // FLA_Axpy( beta, a21, z21 );
439  // FLA_Scal( inv_tau11, z21 );
440  bl1_dscals( &minus_inv_tau11, &beta );
442  m_ahead,
443  &beta,
444  a21, rs_A,
445  z21, rs_Z );
447  m_ahead,
448  &inv_tau11,
449  z21, rs_Z );
450 
451  // FLA_Copy( first_elem, a21_t );
452  *a21_t = first_elem;
453  }
454 
455  /*------------------------------------------------------------*/
456 
457  }
458 
459  return FLA_SUCCESS;
460 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:81
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1(int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_w, int inc_w)
Definition: FLA_Fused_UZhu_ZUhu_opt_var1.c:222
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
Definition: blis_type_defs.h:54
void bl1_dsymv(uplo1_t uplo, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_symv.c:56
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:62
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Tridiag_UT_l_step_ofs_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofs_var3 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T 
)

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssetm(), bl1_ssymv(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

123 {
124  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
125  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
126  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
127  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
128 
129  float first_elem, last_elem;
130  float beta;
131  float inv_tau11;
132  float minus_inv_tau11;
133  int i;
134 
135  // b_alg = FLA_Obj_length( T );
136  int b_alg = m_T;
137 
138  // FLA_Set( FLA_ZERO, Z );
139  bl1_ssetm( m_A,
140  b_alg,
141  buff_0,
142  buff_Z, rs_Z, cs_Z );
143 
144  for ( i = 0; i < b_alg; ++i )
145  {
146  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
147  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
148  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
149  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
150  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
151 
152  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
153  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
154  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
155 
156  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
157  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
158 
159  float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
160 
161  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
162  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
163 
164  float* ABL = a10t;
165  float* ZBL = z10t;
166 
167  float* a2 = alpha11;
168 
169  int m_ahead = m_A - i - 1;
170  int m_behind = i;
171  int n_behind = i;
172 
173  /*------------------------------------------------------------*/
174 
175  if ( m_behind > 0 )
176  {
177  // FLA_Copy( a10t_r, last_elem );
178  // FLA_Set( FLA_ONE, a10t_r );
179  last_elem = *a10t_r;
180  *a10t_r = *buff_1;
181  }
182 
183  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
184  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
187  m_ahead + 1,
188  n_behind,
189  buff_m1,
190  ABL, rs_A, cs_A,
191  z10t, cs_Z,
192  buff_1,
193  a2, rs_A );
196  m_ahead + 1,
197  n_behind,
198  buff_m1,
199  ZBL, rs_Z, cs_Z,
200  a10t, cs_A,
201  buff_1,
202  a2, rs_A );
203 
204  if ( m_behind > 0 )
205  {
206  // FLA_Copy( last_elem, a10t_r );
207  *a10t_r = last_elem;
208  }
209 
210  if ( m_ahead > 0 )
211  {
212  // FLA_Househ2_UT( FLA_LEFT,
213  // a21_t,
214  // a21_b, tau11 );
215  FLA_Househ2_UT_l_ops( m_ahead - 1,
216  a21_t,
217  a21_b, rs_A,
218  tau11 );
219 
220  // FLA_Set( FLA_ONE, inv_tau11 );
221  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
222  // FLA_Copy( inv_tau11, minus_inv_tau11 );
223  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
224  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
225  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
226 
227  // FLA_Copy( a21_t, first_elem );
228  // FLA_Set( FLA_ONE, a21_t );
229  first_elem = *a21_t;
230  *a21_t = *buff_1;
231 
232  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
234  m_ahead,
235  buff_1,
236  A22, rs_A, cs_A,
237  a21, rs_A,
238  buff_0,
239  z21, rs_Z );
240 
241  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
242  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
243  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
244  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
245  // FLA_Copy( d01, t01 );
247  n_behind,
248  buff_m1,
249  A20, rs_A, cs_A,
250  Z20, rs_Z, cs_Z,
251  t01, rs_T,
252  a21, rs_A,
253  z21, rs_Z );
254 
255  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
256  // FLA_Inv_scal( FLA_TWO, beta );
258  m_ahead,
259  a21, rs_A,
260  z21, rs_Z,
261  &beta );
262  bl1_sinvscals( buff_2, &beta );
263 
264  // FLA_Scal( minus_inv_tau11, beta );
265  // FLA_Axpy( beta, a21, z21 );
266  // FLA_Scal( inv_tau11, z21 );
267  bl1_sscals( &minus_inv_tau11, &beta );
269  m_ahead,
270  &beta,
271  a21, rs_A,
272  z21, rs_Z );
274  m_ahead,
275  &inv_tau11,
276  z21, rs_Z );
277 
278  // FLA_Copy( first_elem, a21_t );
279  *a21_t = first_elem;
280  }
281 
282  /*------------------------------------------------------------*/
283 
284  }
285 
286  return FLA_SUCCESS;
287 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:81
void bl1_ssymv(uplo1_t uplo, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_symv.c:13
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1(int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_w, int inc_w)
Definition: FLA_Fused_UZhu_ZUhu_opt_var1.c:136
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:62
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

◆ FLA_Tridiag_UT_l_step_ofu_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  Z,
FLA_Obj  T 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_ofs_var3(), and FLA_Tridiag_UT_l_step_ofz_var3().

Referenced by FLA_Tridiag_UT_l_blf_var3(), and FLA_Tridiag_UT_l_ofu_var3().

28 {
29  FLA_Datatype datatype;
30  int m_A, m_T;
31  int rs_A, cs_A;
32  int rs_Z, cs_Z;
33  int rs_T, cs_T;
34 
35  datatype = FLA_Obj_datatype( A );
36 
37  m_A = FLA_Obj_length( A );
38  m_T = FLA_Obj_length( T );
39 
40  rs_A = FLA_Obj_row_stride( A );
41  cs_A = FLA_Obj_col_stride( A );
42 
43  rs_Z = FLA_Obj_row_stride( Z );
44  cs_Z = FLA_Obj_col_stride( Z );
45 
46  rs_T = FLA_Obj_row_stride( T );
47  cs_T = FLA_Obj_col_stride( T );
48 
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  float* buff_A = FLA_FLOAT_PTR( A );
55  float* buff_Z = FLA_FLOAT_PTR( Z );
56  float* buff_T = FLA_FLOAT_PTR( T );
57 
59  m_T,
60  buff_A, rs_A, cs_A,
61  buff_Z, rs_Z, cs_Z,
62  buff_T, rs_T, cs_T );
63 
64  break;
65  }
66 
67  case FLA_DOUBLE:
68  {
69  double* buff_A = FLA_DOUBLE_PTR( A );
70  double* buff_Z = FLA_DOUBLE_PTR( Z );
71  double* buff_T = FLA_DOUBLE_PTR( T );
72 
74  m_T,
75  buff_A, rs_A, cs_A,
76  buff_Z, rs_Z, cs_Z,
77  buff_T, rs_T, cs_T );
78 
79  break;
80  }
81 
82  case FLA_COMPLEX:
83  {
84  scomplex* buff_A = FLA_COMPLEX_PTR( A );
85  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
86  scomplex* buff_T = FLA_COMPLEX_PTR( T );
87 
89  m_T,
90  buff_A, rs_A, cs_A,
91  buff_Z, rs_Z, cs_Z,
92  buff_T, rs_T, cs_T );
93 
94  break;
95  }
96 
97  case FLA_DOUBLE_COMPLEX:
98  {
99  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
100  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
101  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
102 
104  m_T,
105  buff_A, rs_A, cs_A,
106  buff_Z, rs_Z, cs_Z,
107  buff_T, rs_T, cs_T );
108 
109  break;
110  }
111  }
112 
113  return FLA_SUCCESS;
114 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
FLA_Error FLA_Tridiag_UT_l_step_ofz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:638
FLA_Error FLA_Tridiag_UT_l_step_ofd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:291
FLA_Error FLA_Tridiag_UT_l_step_ofc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:464
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Error FLA_Tridiag_UT_l_step_ofs_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:118
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Tridiag_UT_l_step_ofz_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

643 {
644  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
645  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
646  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
647  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
648 
649  dcomplex first_elem, last_elem;
650  dcomplex beta;
651  dcomplex inv_tau11;
652  dcomplex minus_inv_tau11;
653  int i;
654 
655  // b_alg = FLA_Obj_length( T );
656  int b_alg = m_T;
657 
658  // FLA_Set( FLA_ZERO, Z );
659  bl1_zsetm( m_A,
660  b_alg,
661  buff_0,
662  buff_Z, rs_Z, cs_Z );
663 
664  for ( i = 0; i < b_alg; ++i )
665  {
666  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
667  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
668  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
669  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
670  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
671 
672  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
673  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
674  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
675 
676  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
677  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
678 
679  dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
680 
681  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
682  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
683 
684  dcomplex* ABL = a10t;
685  dcomplex* ZBL = z10t;
686 
687  dcomplex* a2 = alpha11;
688 
689  int m_ahead = m_A - i - 1;
690  int m_behind = i;
691  int n_behind = i;
692 
693  /*------------------------------------------------------------*/
694 
695  if ( m_behind > 0 )
696  {
697  // FLA_Copy( a10t_r, last_elem );
698  // FLA_Set( FLA_ONE, a10t_r );
699  last_elem = *a10t_r;
700  *a10t_r = *buff_1;
701  }
702 
703  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
704  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
707  m_ahead + 1,
708  n_behind,
709  buff_m1,
710  ABL, rs_A, cs_A,
711  z10t, cs_Z,
712  buff_1,
713  a2, rs_A );
716  m_ahead + 1,
717  n_behind,
718  buff_m1,
719  ZBL, rs_Z, cs_Z,
720  a10t, cs_A,
721  buff_1,
722  a2, rs_A );
723 
724  if ( m_behind > 0 )
725  {
726  // FLA_Copy( last_elem, a10t_r );
727  *a10t_r = last_elem;
728  }
729 
730  if ( m_ahead > 0 )
731  {
732  // FLA_Househ2_UT( FLA_LEFT,
733  // a21_t,
734  // a21_b, tau11 );
735  FLA_Househ2_UT_l_opz( m_ahead - 1,
736  a21_t,
737  a21_b, rs_A,
738  tau11 );
739 
740  // FLA_Set( FLA_ONE, inv_tau11 );
741  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
742  // FLA_Copy( inv_tau11, minus_inv_tau11 );
743  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
744  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
745  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
746 
747  // FLA_Copy( a21_t, first_elem );
748  // FLA_Set( FLA_ONE, a21_t );
749  first_elem = *a21_t;
750  *a21_t = *buff_1;
751 
752  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
755  m_ahead,
756  buff_1,
757  A22, rs_A, cs_A,
758  a21, rs_A,
759  buff_0,
760  z21, rs_Z );
761 
762  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
763  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
764  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
765  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
766  // FLA_Copy( d01, t01 );
768  n_behind,
769  buff_m1,
770  A20, rs_A, cs_A,
771  Z20, rs_Z, cs_Z,
772  t01, rs_T,
773  a21, rs_A,
774  z21, rs_Z );
775 
776  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
777  // FLA_Inv_scal( FLA_TWO, beta );
779  m_ahead,
780  a21, rs_A,
781  z21, rs_Z,
782  &beta );
783  bl1_zinvscals( buff_2, &beta );
784 
785  // FLA_Scal( minus_inv_tau11, beta );
786  // FLA_Axpy( beta, a21, z21 );
787  // FLA_Scal( inv_tau11, z21 );
788  bl1_zscals( &minus_inv_tau11, &beta );
790  m_ahead,
791  &beta,
792  a21, rs_A,
793  z21, rs_Z );
795  m_ahead,
796  &inv_tau11,
797  z21, rs_Z );
798 
799  // FLA_Copy( first_elem, a21_t );
800  *a21_t = first_elem;
801  }
802 
803  /*------------------------------------------------------------*/
804 
805  }
806 
807  return FLA_SUCCESS;
808 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72
Definition: blis_type_defs.h:81
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1(int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_UZhu_ZUhu_opt_var1.c:485
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
Definition: blis_type_defs.h:54
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
int i
Definition: bl1_axmyv2.c:145
void bl1_zhemv(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_hemv.c:134
Definition: blis_type_defs.h:62
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60