libflame  revision_anchor
Functions
FLA_Hess_UT_opt_var5.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_opt_var5 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var5 (FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var5 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var5 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var5 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var5 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_opt_var5()

FLA_Error FLA_Hess_UT_opt_var5 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Hess_UT_step_opt_var5(), FLA_Obj_create_conf_to(), and FLA_Obj_free().

Referenced by FLA_Hess_UT_internal().

14 {
15  FLA_Error r_val;
16  FLA_Obj U, Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &U );
19  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21  r_val = FLA_Hess_UT_step_opt_var5( A, U, Z, T );
22 
23  FLA_Obj_free( &U );
24  FLA_Obj_free( &Z );
25 
26  return r_val;
27 }
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
int FLA_Error
Definition: FLA_type_defs.h:47
Definition: FLA_type_defs.h:158
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition: FLA_Obj.c:286
FLA_Error FLA_Hess_UT_step_opt_var5(FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Hess_UT_opt_var5.c:29

◆ FLA_Hess_UT_step_opc_var5()

FLA_Error FLA_Hess_UT_step_opc_var5 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cdots(), bl1_cgemv(), bl1_csetm(), bl1_ctrmv(), bl1_ctrmvsx(), bl1_ctrsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

644 {
645  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
646  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
647  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
648  int i;
649 
650  // b_alg = FLA_Obj_length( T );
651  int b_alg = m_T;
652 
653  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
654  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
655  int inc_w = 1;
656 
657  // FLA_Set( FLA_ZERO, U );
658  // FLA_Set( FLA_ZERO, Z );
659  bl1_csetm( m_A,
660  b_alg,
661  buff_0,
662  buff_U, rs_U, cs_U );
663  bl1_csetm( m_A,
664  b_alg,
665  buff_0,
666  buff_Z, rs_Z, cs_Z );
667 
668  for ( i = 0; i < b_alg; ++i )
669  {
670  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
671  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
672  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
673  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
674  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
675  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
676 
677  scomplex* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
678  scomplex* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
679  scomplex* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
680  scomplex* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
681 
682  scomplex* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
683  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
684  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
685  scomplex* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
686  scomplex* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
687  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
688 
689  scomplex* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
690  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
691  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
692 
693  scomplex* w0 = buff_w + (0 )*inc_w;
694 
695  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
696  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
697 
698  scomplex* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
699 
700  int m_ahead = m_A - i - 1;
701  int n_ahead = m_A - i - 1;
702  int m_behind = i;
703  int n_behind = i;
704 
705  /*------------------------------------------------------------*/
706 
707  if ( m_behind > 0 )
708  {
709  // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
710  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
711  // T00, w0 );
713  m_behind,
714  u10t, cs_U,
715  w0, inc_w );
719  m_behind,
720  T00, rs_T, cs_T,
721  w0, inc_w );
722 
723  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
724  // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
725  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
728  m_behind,
729  n_behind,
730  buff_m1,
731  Z00, rs_Z, cs_Z,
732  w0, inc_w,
733  buff_1,
734  a01, rs_A );
736  m_behind,
737  buff_m1,
738  z10t, cs_Z,
739  w0, inc_w,
740  buff_1,
741  alpha11 );
744  m_ahead,
745  n_behind,
746  buff_m1,
747  Z20, rs_Z, cs_Z,
748  w0, inc_w,
749  buff_1,
750  a21, rs_A );
751 
752  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
753  // FLA_ONE, U00, a01, FLA_ZERO, w0 );
754  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
755  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
757  m_behind,
758  a01, rs_A,
759  w0, inc_w );
763  m_behind,
764  U00, rs_U, cs_U,
765  w0, inc_w );
767  m_behind,
768  alpha11,
769  u10t, cs_U,
770  w0, inc_w );
773  m_ahead,
774  n_behind,
775  buff_1,
776  U20, rs_U, cs_U,
777  a21, rs_A,
778  buff_1,
779  w0, inc_w );
780 
781  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
782  // T00, w0 );
786  m_behind,
787  T00, rs_T, cs_T,
788  w0, inc_w );
789 
790  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
791  // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
792  // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
793  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
797  m_behind,
798  buff_m1,
799  U00, rs_U, cs_U,
800  w0, inc_w,
801  buff_1,
802  a01, rs_A );
804  m_behind,
805  buff_m1,
806  u10t, cs_U,
807  w0, inc_w,
808  buff_1,
809  alpha11 );
812  m_ahead,
813  n_behind,
814  buff_m1,
815  U20, rs_U, cs_U,
816  w0, inc_w,
817  buff_1,
818  a21, rs_A );
819  }
820 
821  if ( m_ahead > 0 )
822  {
823  // FLA_Househ2_UT( FLA_LEFT,
824  // a21_t,
825  // a21_b, tau11 );
826  FLA_Househ2_UT_l_opc( m_ahead - 1,
827  a21_t,
828  a21_b, rs_A,
829  tau11 );
830 
831  // FLA_Copy( a21, u21 );
833  m_ahead,
834  a21, rs_A,
835  u21, rs_U );
836 
837  // FLA_Set( FLA_ONE, u21_t );
838  *u21_t = *buff_1;
839 
840  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
841  // FLA_Dot( a12t, u21, zeta11 );
842  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
845  m_behind,
846  n_ahead,
847  buff_1,
848  A02, rs_A, cs_A,
849  u21, rs_U,
850  buff_0,
851  z01, rs_Z );
853  m_ahead,
854  a12t, cs_A,
855  u21, rs_U,
856  zeta11 );
859  m_ahead,
860  n_ahead,
861  buff_1,
862  A22, rs_A, cs_A,
863  u21, rs_U,
864  buff_0,
865  z21, rs_Z );
866 
867  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
870  m_ahead,
871  n_behind,
872  buff_1,
873  U20, rs_U, cs_U,
874  u21, rs_U,
875  buff_0,
876  t01, rs_T );
877  }
878 
879  /*------------------------------------------------------------*/
880 
881  }
882 
883  // FLA_Obj_free( &w );
884  FLA_free( buff_w );
885 
886  return FLA_SUCCESS;
887 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_ctrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition: bl1_trsv.c:99
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition: bl1_dots.c:39
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:74
void bl1_ctrmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_trmvsx.c:129
void bl1_ctrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition: bl1_trmv.c:99
Definition: blis_type_defs.h:62
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
Definition: blis_type_defs.h:63

◆ FLA_Hess_UT_step_opd_var5()

FLA_Error FLA_Hess_UT_step_opd_var5 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T 
)

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_ddots(), bl1_dgemv(), bl1_dsetm(), bl1_dtrmv(), bl1_dtrmvsx(), bl1_dtrsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

391 {
392  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
393  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
394  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
395  int i;
396 
397  // b_alg = FLA_Obj_length( T );
398  int b_alg = m_T;
399 
400  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
401  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
402  int inc_w = 1;
403 
404  // FLA_Set( FLA_ZERO, U );
405  // FLA_Set( FLA_ZERO, Z );
406  bl1_dsetm( m_A,
407  b_alg,
408  buff_0,
409  buff_U, rs_U, cs_U );
410  bl1_dsetm( m_A,
411  b_alg,
412  buff_0,
413  buff_Z, rs_Z, cs_Z );
414 
415  for ( i = 0; i < b_alg; ++i )
416  {
417  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
418  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
419  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
420  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
421  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
422  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
423 
424  double* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
425  double* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
426  double* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
427  double* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
428 
429  double* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
430  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
431  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
432  double* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
433  double* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
434  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
435 
436  double* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
437  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
438  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
439 
440  double* w0 = buff_w + (0 )*inc_w;
441 
442  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
443  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
444 
445  double* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
446 
447  int m_ahead = m_A - i - 1;
448  int n_ahead = m_A - i - 1;
449  int m_behind = i;
450  int n_behind = i;
451 
452  /*------------------------------------------------------------*/
453 
454  if ( m_behind > 0 )
455  {
456  // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
457  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
458  // T00, w0 );
460  m_behind,
461  u10t, cs_U,
462  w0, inc_w );
466  m_behind,
467  T00, rs_T, cs_T,
468  w0, inc_w );
469 
470  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
471  // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
472  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
475  m_behind,
476  n_behind,
477  buff_m1,
478  Z00, rs_Z, cs_Z,
479  w0, inc_w,
480  buff_1,
481  a01, rs_A );
483  m_behind,
484  buff_m1,
485  z10t, cs_Z,
486  w0, inc_w,
487  buff_1,
488  alpha11 );
491  m_ahead,
492  n_behind,
493  buff_m1,
494  Z20, rs_Z, cs_Z,
495  w0, inc_w,
496  buff_1,
497  a21, rs_A );
498 
499  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
500  // FLA_ONE, U00, a01, FLA_ZERO, w0 );
501  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
502  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
504  m_behind,
505  a01, rs_A,
506  w0, inc_w );
510  m_behind,
511  U00, rs_U, cs_U,
512  w0, inc_w );
514  m_behind,
515  alpha11,
516  u10t, cs_U,
517  w0, inc_w );
520  m_ahead,
521  n_behind,
522  buff_1,
523  U20, rs_U, cs_U,
524  a21, rs_A,
525  buff_1,
526  w0, inc_w );
527 
528  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
529  // T00, w0 );
533  m_behind,
534  T00, rs_T, cs_T,
535  w0, inc_w );
536 
537  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
538  // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
539  // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
540  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
544  m_behind,
545  buff_m1,
546  U00, rs_U, cs_U,
547  w0, inc_w,
548  buff_1,
549  a01, rs_A );
551  m_behind,
552  buff_m1,
553  u10t, cs_U,
554  w0, inc_w,
555  buff_1,
556  alpha11 );
559  m_ahead,
560  n_behind,
561  buff_m1,
562  U20, rs_U, cs_U,
563  w0, inc_w,
564  buff_1,
565  a21, rs_A );
566  }
567 
568  if ( m_ahead > 0 )
569  {
570  // FLA_Househ2_UT( FLA_LEFT,
571  // a21_t,
572  // a21_b, tau11 );
573  FLA_Househ2_UT_l_opd( m_ahead - 1,
574  a21_t,
575  a21_b, rs_A,
576  tau11 );
577 
578  // FLA_Copy( a21, u21 );
580  m_ahead,
581  a21, rs_A,
582  u21, rs_U );
583 
584  // FLA_Set( FLA_ONE, u21_t );
585  *u21_t = *buff_1;
586 
587  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
588  // FLA_Dot( a12t, u21, zeta11 );
589  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
592  m_behind,
593  n_ahead,
594  buff_1,
595  A02, rs_A, cs_A,
596  u21, rs_U,
597  buff_0,
598  z01, rs_Z );
600  m_ahead,
601  a12t, cs_A,
602  u21, rs_U,
603  zeta11 );
606  m_ahead,
607  n_ahead,
608  buff_1,
609  A22, rs_A, cs_A,
610  u21, rs_U,
611  buff_0,
612  z21, rs_Z );
613 
614  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
617  m_ahead,
618  n_behind,
619  buff_1,
620  U20, rs_U, cs_U,
621  u21, rs_U,
622  buff_0,
623  t01, rs_T );
624  }
625 
626  /*------------------------------------------------------------*/
627 
628  }
629 
630  // FLA_Obj_free( &w );
631  FLA_free( buff_w );
632 
633  return FLA_SUCCESS;
634 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45
void bl1_dtrmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_trmvsx.c:71
void bl1_dtrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition: bl1_trmv.c:56
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_dtrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition: bl1_trsv.c:56
void bl1_ddots(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
Definition: bl1_dots.c:26
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:74
Definition: blis_type_defs.h:62
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
Definition: blis_type_defs.h:63

◆ FLA_Hess_UT_step_ops_var5()

FLA_Error FLA_Hess_UT_step_ops_var5 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sdots(), bl1_sgemv(), bl1_ssetm(), bl1_strmv(), bl1_strmvsx(), bl1_strsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

138 {
139  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
140  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
141  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
142  int i;
143 
144  // b_alg = FLA_Obj_length( T );
145  int b_alg = m_T;
146 
147  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
148  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
149  int inc_w = 1;
150 
151  // FLA_Set( FLA_ZERO, U );
152  // FLA_Set( FLA_ZERO, Z );
153  bl1_ssetm( m_A,
154  b_alg,
155  buff_0,
156  buff_U, rs_U, cs_U );
157  bl1_ssetm( m_A,
158  b_alg,
159  buff_0,
160  buff_Z, rs_Z, cs_Z );
161 
162  for ( i = 0; i < b_alg; ++i )
163  {
164  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
165  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
166  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
167  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
168  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
169  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
170 
171  float* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
172  float* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
173  float* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
174  float* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
175 
176  float* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
177  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
178  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
179  float* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
180  float* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
181  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
182 
183  float* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
184  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
185  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
186 
187  float* w0 = buff_w + (0 )*inc_w;
188 
189  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
190  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
191 
192  float* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
193 
194  int m_ahead = m_A - i - 1;
195  int n_ahead = m_A - i - 1;
196  int m_behind = i;
197  int n_behind = i;
198 
199  /*------------------------------------------------------------*/
200 
201  if ( m_behind > 0 )
202  {
203  // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
204  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
205  // T00, w0 );
207  m_behind,
208  u10t, cs_U,
209  w0, inc_w );
213  m_behind,
214  T00, rs_T, cs_T,
215  w0, inc_w );
216 
217  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
218  // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
219  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
222  m_behind,
223  n_behind,
224  buff_m1,
225  Z00, rs_Z, cs_Z,
226  w0, inc_w,
227  buff_1,
228  a01, rs_A );
230  m_behind,
231  buff_m1,
232  z10t, cs_Z,
233  w0, inc_w,
234  buff_1,
235  alpha11 );
238  m_ahead,
239  n_behind,
240  buff_m1,
241  Z20, rs_Z, cs_Z,
242  w0, inc_w,
243  buff_1,
244  a21, rs_A );
245 
246  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
247  // FLA_ONE, U00, a01, FLA_ZERO, w0 );
248  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
249  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
251  m_behind,
252  a01, rs_A,
253  w0, inc_w );
257  m_behind,
258  U00, rs_U, cs_U,
259  w0, inc_w );
261  m_behind,
262  alpha11,
263  u10t, cs_U,
264  w0, inc_w );
267  m_ahead,
268  n_behind,
269  buff_1,
270  U20, rs_U, cs_U,
271  a21, rs_A,
272  buff_1,
273  w0, inc_w );
274 
275  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
276  // T00, w0 );
280  m_behind,
281  T00, rs_T, cs_T,
282  w0, inc_w );
283 
284  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
285  // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
286  // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
287  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
291  m_behind,
292  buff_m1,
293  U00, rs_U, cs_U,
294  w0, inc_w,
295  buff_1,
296  a01, rs_A );
298  m_behind,
299  buff_m1,
300  u10t, cs_U,
301  w0, inc_w,
302  buff_1,
303  alpha11 );
306  m_ahead,
307  n_behind,
308  buff_m1,
309  U20, rs_U, cs_U,
310  w0, inc_w,
311  buff_1,
312  a21, rs_A );
313  }
314 
315  if ( m_ahead > 0 )
316  {
317  // FLA_Househ2_UT( FLA_LEFT,
318  // a21_t,
319  // a21_b, tau11 );
320  FLA_Househ2_UT_l_ops( m_ahead - 1,
321  a21_t,
322  a21_b, rs_A,
323  tau11 );
324 
325  // FLA_Copy( a21, u21 );
327  m_ahead,
328  a21, rs_A,
329  u21, rs_U );
330 
331  // FLA_Set( FLA_ONE, u21_t );
332  *u21_t = *buff_1;
333 
334  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
335  // FLA_Dot( a12t, u21, zeta11 );
336  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
339  m_behind,
340  n_ahead,
341  buff_1,
342  A02, rs_A, cs_A,
343  u21, rs_U,
344  buff_0,
345  z01, rs_Z );
347  m_ahead,
348  a12t, cs_A,
349  u21, rs_U,
350  zeta11 );
353  m_ahead,
354  n_ahead,
355  buff_1,
356  A22, rs_A, cs_A,
357  u21, rs_U,
358  buff_0,
359  z21, rs_Z );
360 
361  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
364  m_ahead,
365  n_behind,
366  buff_1,
367  U20, rs_U, cs_U,
368  u21, rs_U,
369  buff_0,
370  t01, rs_T );
371  }
372 
373  /*------------------------------------------------------------*/
374 
375  }
376 
377  // FLA_Obj_free( &w );
378  FLA_free( buff_w );
379 
380  return FLA_SUCCESS;
381 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_strsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition: bl1_trsv.c:13
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition: bl1_dots.c:13
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_strmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition: bl1_trmv.c:13
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:74
Definition: blis_type_defs.h:62
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
Definition: blis_type_defs.h:63
void bl1_strmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_trmvsx.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

◆ FLA_Hess_UT_step_opt_var5()

FLA_Error FLA_Hess_UT_step_opt_var5 ( FLA_Obj  A,
FLA_Obj  U,
FLA_Obj  Z,
FLA_Obj  T 
)

References FLA_Hess_UT_step_opc_var5(), FLA_Hess_UT_step_opd_var5(), FLA_Hess_UT_step_ops_var5(), FLA_Hess_UT_step_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blk_var5(), and FLA_Hess_UT_opt_var5().

30 {
31  FLA_Datatype datatype;
32  int m_A, m_T;
33  int rs_A, cs_A;
34  int rs_U, cs_U;
35  int rs_Z, cs_Z;
36  int rs_T, cs_T;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  m_A = FLA_Obj_length( A );
41  m_T = FLA_Obj_length( T );
42 
43  rs_A = FLA_Obj_row_stride( A );
44  cs_A = FLA_Obj_col_stride( A );
45 
46  rs_U = FLA_Obj_row_stride( U );
47  cs_U = FLA_Obj_col_stride( U );
48 
49  rs_Z = FLA_Obj_row_stride( Z );
50  cs_Z = FLA_Obj_col_stride( Z );
51 
52  rs_T = FLA_Obj_row_stride( T );
53  cs_T = FLA_Obj_col_stride( T );
54 
55 
56  switch ( datatype )
57  {
58  case FLA_FLOAT:
59  {
60  float* buff_A = FLA_FLOAT_PTR( A );
61  float* buff_U = FLA_FLOAT_PTR( U );
62  float* buff_Z = FLA_FLOAT_PTR( Z );
63  float* buff_T = FLA_FLOAT_PTR( T );
64 
66  m_T,
67  buff_A, rs_A, cs_A,
68  buff_U, rs_U, cs_U,
69  buff_Z, rs_Z, cs_Z,
70  buff_T, rs_T, cs_T );
71 
72  break;
73  }
74 
75  case FLA_DOUBLE:
76  {
77  double* buff_A = FLA_DOUBLE_PTR( A );
78  double* buff_U = FLA_DOUBLE_PTR( U );
79  double* buff_Z = FLA_DOUBLE_PTR( Z );
80  double* buff_T = FLA_DOUBLE_PTR( T );
81 
83  m_T,
84  buff_A, rs_A, cs_A,
85  buff_U, rs_U, cs_U,
86  buff_Z, rs_Z, cs_Z,
87  buff_T, rs_T, cs_T );
88 
89  break;
90  }
91 
92  case FLA_COMPLEX:
93  {
94  scomplex* buff_A = FLA_COMPLEX_PTR( A );
95  scomplex* buff_U = FLA_COMPLEX_PTR( U );
96  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
97  scomplex* buff_T = FLA_COMPLEX_PTR( T );
98 
100  m_T,
101  buff_A, rs_A, cs_A,
102  buff_U, rs_U, cs_U,
103  buff_Z, rs_Z, cs_Z,
104  buff_T, rs_T, cs_T );
105 
106  break;
107  }
108 
109  case FLA_DOUBLE_COMPLEX:
110  {
111  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
112  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
113  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
114  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
115 
117  m_T,
118  buff_A, rs_A, cs_A,
119  buff_U, rs_U, cs_U,
120  buff_Z, rs_Z, cs_Z,
121  buff_T, rs_T, cs_T );
122 
123  break;
124  }
125  }
126 
127  return FLA_SUCCESS;
128 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Hess_UT_step_opc_var5(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var5.c:638
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
FLA_Error FLA_Hess_UT_step_opd_var5(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var5.c:385
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Hess_UT_step_opz_var5(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var5.c:891
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Hess_UT_step_ops_var5(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var5.c:132
Definition: blis_type_defs.h:137

◆ FLA_Hess_UT_step_opz_var5()

FLA_Error FLA_Hess_UT_step_opz_var5 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zdots(), bl1_zgemv(), bl1_zsetm(), bl1_ztrmv(), bl1_ztrmvsx(), bl1_ztrsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

897 {
898  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
899  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
900  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
901  int i;
902 
903  // b_alg = FLA_Obj_length( T );
904  int b_alg = m_T;
905 
906  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
907  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
908  int inc_w = 1;
909 
910  // FLA_Set( FLA_ZERO, U );
911  // FLA_Set( FLA_ZERO, Z );
912  bl1_zsetm( m_A,
913  b_alg,
914  buff_0,
915  buff_U, rs_U, cs_U );
916  bl1_zsetm( m_A,
917  b_alg,
918  buff_0,
919  buff_Z, rs_Z, cs_Z );
920 
921  for ( i = 0; i < b_alg; ++i )
922  {
923  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
924  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
925  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
926  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
927  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
928  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
929 
930  dcomplex* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
931  dcomplex* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
932  dcomplex* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
933  dcomplex* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
934 
935  dcomplex* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
936  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
937  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
938  dcomplex* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
939  dcomplex* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
940  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
941 
942  dcomplex* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
943  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
944  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
945 
946  dcomplex* w0 = buff_w + (0 )*inc_w;
947 
948  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
949  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
950 
951  dcomplex* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
952 
953  int m_ahead = m_A - i - 1;
954  int n_ahead = m_A - i - 1;
955  int m_behind = i;
956  int n_behind = i;
957 
958  /*------------------------------------------------------------*/
959 
960  if ( m_behind > 0 )
961  {
962  // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
963  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
964  // T00, w0 );
966  m_behind,
967  u10t, cs_U,
968  w0, inc_w );
972  m_behind,
973  T00, rs_T, cs_T,
974  w0, inc_w );
975 
976  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
977  // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
978  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
981  m_behind,
982  n_behind,
983  buff_m1,
984  Z00, rs_Z, cs_Z,
985  w0, inc_w,
986  buff_1,
987  a01, rs_A );
989  m_behind,
990  buff_m1,
991  z10t, cs_Z,
992  w0, inc_w,
993  buff_1,
994  alpha11 );
997  m_ahead,
998  n_behind,
999  buff_m1,
1000  Z20, rs_Z, cs_Z,
1001  w0, inc_w,
1002  buff_1,
1003  a21, rs_A );
1004 
1005  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
1006  // FLA_ONE, U00, a01, FLA_ZERO, w0 );
1007  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
1008  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
1010  m_behind,
1011  a01, rs_A,
1012  w0, inc_w );
1016  m_behind,
1017  U00, rs_U, cs_U,
1018  w0, inc_w );
1020  m_behind,
1021  alpha11,
1022  u10t, cs_U,
1023  w0, inc_w );
1026  m_ahead,
1027  n_behind,
1028  buff_1,
1029  U20, rs_U, cs_U,
1030  a21, rs_A,
1031  buff_1,
1032  w0, inc_w );
1033 
1034  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
1035  // T00, w0 );
1039  m_behind,
1040  T00, rs_T, cs_T,
1041  w0, inc_w );
1042 
1043  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
1044  // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
1045  // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
1046  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
1050  m_behind,
1051  buff_m1,
1052  U00, rs_U, cs_U,
1053  w0, inc_w,
1054  buff_1,
1055  a01, rs_A );
1057  m_behind,
1058  buff_m1,
1059  u10t, cs_U,
1060  w0, inc_w,
1061  buff_1,
1062  alpha11 );
1065  m_ahead,
1066  n_behind,
1067  buff_m1,
1068  U20, rs_U, cs_U,
1069  w0, inc_w,
1070  buff_1,
1071  a21, rs_A );
1072  }
1073 
1074  if ( m_ahead > 0 )
1075  {
1076  // FLA_Househ2_UT( FLA_LEFT,
1077  // a21_t,
1078  // a21_b, tau11 );
1079  FLA_Househ2_UT_l_opz( m_ahead - 1,
1080  a21_t,
1081  a21_b, rs_A,
1082  tau11 );
1083 
1084  // FLA_Copy( a21, u21 );
1086  m_ahead,
1087  a21, rs_A,
1088  u21, rs_U );
1089 
1090  // FLA_Set( FLA_ONE, u21_t );
1091  *u21_t = *buff_1;
1092 
1093  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
1094  // FLA_Dot( a12t, u21, zeta11 );
1095  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
1098  m_behind,
1099  n_ahead,
1100  buff_1,
1101  A02, rs_A, cs_A,
1102  u21, rs_U,
1103  buff_0,
1104  z01, rs_Z );
1106  m_ahead,
1107  a12t, cs_A,
1108  u21, rs_U,
1109  zeta11 );
1112  m_ahead,
1113  n_ahead,
1114  buff_1,
1115  A22, rs_A, cs_A,
1116  u21, rs_U,
1117  buff_0,
1118  z21, rs_Z );
1119 
1120  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
1123  m_ahead,
1124  n_behind,
1125  buff_1,
1126  U20, rs_U, cs_U,
1127  u21, rs_U,
1128  buff_0,
1129  t01, rs_T );
1130  }
1131 
1132  /*------------------------------------------------------------*/
1133 
1134  }
1135 
1136  // FLA_Obj_free( &w );
1137  FLA_free( buff_w );
1138 
1139  return FLA_SUCCESS;
1140 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ztrmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_trmvsx.c:187
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_ztrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition: bl1_trmv.c:177
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
int i
Definition: bl1_axmyv2.c:145
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
Definition: blis_type_defs.h:74
Definition: blis_type_defs.h:62
void bl1_zdots(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
Definition: bl1_dots.c:56
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
Definition: blis_type_defs.h:63
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
void bl1_ztrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition: bl1_trsv.c:177
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60