libflame  revision_anchor
Functions
FLA_Bidiag_UT_u_opt_var3.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_opt_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var3 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var3 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var3 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var3 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_opt_var3()

FLA_Error FLA_Bidiag_UT_u_opt_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_opt_var3().

Referenced by FLA_Bidiag_UT_u().

14 {
15  return FLA_Bidiag_UT_u_step_opt_var3( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_opt_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:18

◆ FLA_Bidiag_UT_u_step_opc_var3()

FLA_Error FLA_Bidiag_UT_u_step_opc_var3 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

956 {
957  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
958  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
959  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
960 
961  scomplex alpha12;
962  scomplex minus_conj_alpha12;
963  scomplex psi11_minus_alpha12;
964  scomplex minus_inv_tau11;
965  scomplex minus_upsilon11;
966  scomplex minus_conj_nu11;
967  scomplex minus_conj_psi11;
968  scomplex minus_zeta11;
969  scomplex beta;
970  int i;
971 
972  // b_alg = FLA_Obj_length( T );
973  int b_alg = m_TS;
974 
975  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
976  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
977  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
978  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
979  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
980  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
981  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
982  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
983  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
984  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
985  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
986  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
987  scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
988  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
989  int inc_w = 1;
990  int inc_ap = 1;
991  int inc_u = 1;
992  int inc_up = 1;
993  int inc_v = 1;
994  int inc_y = 1;
995  int inc_z = 1;
996 
997  for ( i = 0; i < b_alg; ++i )
998  {
999  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1000  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1001  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1002  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1003  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1004  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1005  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1006 
1007  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1008  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1009 
1010  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1011  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1012 
1013  scomplex* w21 = buff_w + (i+1)*inc_w;
1014 
1015  scomplex* a12p = buff_ap + (i+1)*inc_ap;
1016 
1017  scomplex* upsilon11 = buff_u + (i )*inc_u;
1018  scomplex* u21 = buff_u + (i+1)*inc_u;
1019 
1020  scomplex* u21p = buff_up + (i+1)*inc_up;
1021 
1022  scomplex* nu11 = buff_v + (i )*inc_v;
1023  scomplex* v21 = buff_v + (i+1)*inc_v;
1024 
1025  scomplex* psi11 = buff_y + (i )*inc_y;
1026  scomplex* y21 = buff_y + (i+1)*inc_y;
1027 
1028  scomplex* zeta11 = buff_z + (i )*inc_z;
1029  scomplex* z21 = buff_z + (i+1)*inc_z;
1030 
1031  scomplex* a12p_t = a12p + (0 )*inc_ap;
1032  scomplex* a12p_b = a12p + (1 )*inc_ap;
1033 
1034  scomplex* v21_t = v21 + (0 )*inc_v;
1035  scomplex* v21_b = v21 + (1 )*inc_v;
1036 
1037  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1038  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1039 
1040  scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1041 
1042  int m_ahead = m_A - i - 1;
1043  int n_ahead = n_A - i - 1;
1044  int m_behind = i;
1045  int n_behind = i;
1046 
1047  /*------------------------------------------------------------*/
1048 
1049  if ( m_behind > 0 )
1050  {
1051  // FLA_Copy( upsilon11, minus_upsilon11 );
1052  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1053  bl1_cmult3( buff_m1, upsilon11, &minus_upsilon11 );
1054 
1055  // FLA_Copy( zeta11, minus_zeta11 );
1056  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1057  bl1_cmult3( buff_m1, zeta11, &minus_zeta11 );
1058 
1059  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1060  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1061  bl1_ccopyconj( psi11, &minus_conj_psi11 );
1062  bl1_cscals( buff_m1, &minus_conj_psi11 );
1063 
1064  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1065  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1066  bl1_ccopyconj( nu11, &minus_conj_nu11 );
1067  bl1_cscals( buff_m1, &minus_conj_nu11 );
1068 
1069  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1070  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1072  1,
1073  &minus_conj_psi11,
1074  upsilon11, 1,
1075  alpha11, 1 );
1077  1,
1078  &minus_conj_nu11,
1079  zeta11, 1,
1080  alpha11, 1 );
1081 
1082  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1083  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1085  m_ahead,
1086  &minus_conj_psi11,
1087  u21, inc_u,
1088  a21, rs_A );
1090  m_ahead,
1091  &minus_conj_nu11,
1092  z21, inc_z,
1093  a21, rs_A );
1094 
1095  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1096  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1098  n_ahead,
1099  &minus_upsilon11,
1100  y21, inc_y,
1101  a12t, cs_A );
1103  n_ahead,
1104  &minus_zeta11,
1105  v21, inc_v,
1106  a12t, cs_A );
1107  }
1108 
1109  // FLA_Househ2_UT( FLA_LEFT,
1110  // alpha11,
1111  // a21, tau11 );
1112  // FLA_Copy( a21, u21p );
1113  FLA_Househ2_UT_l_opc( m_ahead,
1114  alpha11,
1115  a21, rs_A,
1116  tau11 );
1118  m_ahead,
1119  a21, rs_A,
1120  u21p, inc_up );
1121 
1122  if ( n_ahead > 0 )
1123  {
1124  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1125  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1126  bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );
1127 
1128  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1129  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1131  n_ahead,
1132  a12t, cs_A,
1133  a12p, inc_ap );
1135  n_ahead,
1136  &minus_inv_tau11,
1137  a12t, cs_A,
1138  a12p, inc_ap );
1139  }
1140 
1141  if ( m_behind > 0 )
1142  {
1143  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1144  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1147  m_ahead,
1148  n_ahead,
1149  buff_m1,
1150  u21, inc_u,
1151  y21, inc_y,
1152  A22, rs_A, cs_A );
1155  m_ahead,
1156  n_ahead,
1157  buff_m1,
1158  z21, inc_z,
1159  v21, inc_v,
1160  A22, rs_A, cs_A );
1161  }
1162 
1163  if ( n_ahead > 0 )
1164  {
1165  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1168  m_ahead,
1169  n_ahead,
1170  buff_1,
1171  A22, rs_A, cs_A,
1172  u21p, inc_up,
1173  buff_0,
1174  y21, inc_y );
1175 
1176  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1178  n_ahead,
1179  &minus_inv_tau11,
1180  y21, inc_y,
1181  a12p, inc_ap );
1182 
1183  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1186  m_ahead,
1187  n_ahead,
1188  buff_1,
1189  A22, rs_A, cs_A,
1190  a12p, inc_ap,
1191  buff_0,
1192  w21, inc_w );
1193 
1194  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1196  n_ahead,
1197  buff_1,
1198  a12t, cs_A,
1199  y21, inc_y );
1200 
1201  // FLA_Househ2s_UT( FLA_RIGHT,
1202  // a12p_t,
1203  // a12p_b,
1204  // alpha12, psi11_minus_alpha12, sigma11 );
1205  FLA_Househ2s_UT_r_opc( n_ahead - 1,
1206  a12p_t,
1207  a12p_b, inc_ap,
1208  &alpha12,
1209  &psi11_minus_alpha12,
1210  sigma11 );
1211 
1212  // FLA_Copy( a12p, v21 );
1213  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1214  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1215  // FLA_Conjugate( v21_b );
1217  n_ahead,
1218  a12p, inc_ap,
1219  v21, inc_v );
1220  bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
1222  n_ahead,
1223  &psi11_minus_alpha12,
1224  v21, inc_v );
1225  bl1_cconjv( n_ahead - 1,
1226  v21_b, inc_v );
1227 
1228  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1229  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1230  *a12t_l = alpha12;
1232  n_ahead - 1,
1233  v21_b, inc_v,
1234  a12t_r, cs_A );
1235  }
1236 
1237  // FLA_Copy( u21p, u21 );
1239  m_ahead,
1240  u21p, inc_up,
1241  u21, inc_u );
1242 
1243  if ( n_ahead > 0 )
1244  {
1245  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1246  // FLA_Scal( FLA_MINUS_ONE, beta );
1247  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1249  n_ahead,
1250  y21, inc_y,
1251  v21, inc_v,
1252  &beta );
1253  bl1_cscals( &minus_inv_tau11, &beta );
1254 
1255  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1256  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1257  bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
1258  bl1_cneg1( &minus_conj_alpha12 );
1259 
1260  // FLA_Copy( w21, z21 );
1261  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1262  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1263  // FLA_Axpy( beta, u21, z21 );
1265  m_ahead,
1266  w21, inc_w,
1267  z21, inc_z );
1269  m_ahead,
1270  &minus_conj_alpha12,
1271  A22_l, rs_A,
1272  z21, inc_z );
1274  m_ahead,
1275  &psi11_minus_alpha12,
1276  z21, inc_z );
1278  m_ahead,
1279  &beta,
1280  u21, inc_u,
1281  z21, inc_z );
1282 
1283  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1284  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1286  n_ahead,
1287  tau11,
1288  y21, inc_y );
1290  m_ahead,
1291  sigma11,
1292  z21, inc_z );
1293 
1294  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1297  m_behind,
1298  n_ahead,
1299  buff_1,
1300  A02, rs_A, cs_A,
1301  v21, inc_v,
1302  buff_0,
1303  s01, rs_S );
1304  }
1305 
1306  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1307  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1309  n_behind,
1310  a10t, cs_A,
1311  t01, rs_T );
1314  m_ahead,
1315  n_behind,
1316  buff_1,
1317  A20, rs_A, cs_A,
1318  u21, inc_u,
1319  buff_1,
1320  t01, rs_T );
1321 
1322  if ( m_behind + 1 == b_alg && n_ahead > 0 )
1323  {
1324  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1325  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1328  m_ahead,
1329  n_ahead,
1330  buff_m1,
1331  u21, inc_u,
1332  y21, inc_y,
1333  A22, rs_A, cs_A );
1336  m_ahead,
1337  n_ahead,
1338  buff_m1,
1339  z21, inc_z,
1340  v21, inc_v,
1341  A22, rs_A, cs_A );
1342  }
1343 
1344  /*------------------------------------------------------------*/
1345 
1346  }
1347 
1348  // FLA_Obj_free( &w );
1349  // FLA_Obj_free( &ap );
1350  // FLA_Obj_free( &u );
1351  // FLA_Obj_free( &up );
1352  // FLA_Obj_free( &v );
1353  // FLA_Obj_free( &y );
1354  // FLA_Obj_free( &z );
1355  FLA_free( buff_w );
1356  FLA_free( buff_ap );
1357  FLA_free( buff_u );
1358  FLA_free( buff_up );
1359  FLA_free( buff_v );
1360  FLA_free( buff_y );
1361  FLA_free( buff_z );
1362 
1363  return FLA_SUCCESS;
1364 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_cconjv(int m, scomplex *x, int incx)
Definition: bl1_conjv.c:23
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition: FLA_Househ2s_UT.c:589
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opd_var3()

FLA_Error FLA_Bidiag_UT_u_step_opd_var3 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

538 {
539  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
540  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
541  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
542 
543  double alpha12;
544  double minus_conj_alpha12;
545  double psi11_minus_alpha12;
546  double minus_inv_tau11;
547  double minus_upsilon11;
548  double minus_conj_nu11;
549  double minus_conj_psi11;
550  double minus_zeta11;
551  double beta;
552  int i;
553 
554  // b_alg = FLA_Obj_length( T );
555  int b_alg = m_TS;
556 
557  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
558  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
559  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
560  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
561  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
562  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
563  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
564  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
565  double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
566  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
567  double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
568  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
569  double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
570  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
571  int inc_w = 1;
572  int inc_ap = 1;
573  int inc_u = 1;
574  int inc_up = 1;
575  int inc_v = 1;
576  int inc_y = 1;
577  int inc_z = 1;
578 
579  for ( i = 0; i < b_alg; ++i )
580  {
581  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
582  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
583  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
584  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
585  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
586  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
587  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
588 
589  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
590  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
591 
592  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
593  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
594 
595  double* w21 = buff_w + (i+1)*inc_w;
596 
597  double* a12p = buff_ap + (i+1)*inc_ap;
598 
599  double* upsilon11 = buff_u + (i )*inc_u;
600  double* u21 = buff_u + (i+1)*inc_u;
601 
602  double* u21p = buff_up + (i+1)*inc_up;
603 
604  double* nu11 = buff_v + (i )*inc_v;
605  double* v21 = buff_v + (i+1)*inc_v;
606 
607  double* psi11 = buff_y + (i )*inc_y;
608  double* y21 = buff_y + (i+1)*inc_y;
609 
610  double* zeta11 = buff_z + (i )*inc_z;
611  double* z21 = buff_z + (i+1)*inc_z;
612 
613  double* a12p_t = a12p + (0 )*inc_ap;
614  double* a12p_b = a12p + (1 )*inc_ap;
615 
616  double* v21_t = v21 + (0 )*inc_v;
617  double* v21_b = v21 + (1 )*inc_v;
618 
619  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
620  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
621 
622  double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
623 
624  int m_ahead = m_A - i - 1;
625  int n_ahead = n_A - i - 1;
626  int m_behind = i;
627  int n_behind = i;
628 
629  /*------------------------------------------------------------*/
630 
631  if ( m_behind > 0 )
632  {
633  // FLA_Copy( upsilon11, minus_upsilon11 );
634  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
635  bl1_dmult3( buff_m1, upsilon11, &minus_upsilon11 );
636 
637  // FLA_Copy( zeta11, minus_zeta11 );
638  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
639  bl1_dmult3( buff_m1, zeta11, &minus_zeta11 );
640 
641  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
642  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
643  bl1_dcopyconj( psi11, &minus_conj_psi11 );
644  bl1_dscals( buff_m1, &minus_conj_psi11 );
645 
646  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
647  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
648  bl1_dcopyconj( nu11, &minus_conj_nu11 );
649  bl1_dscals( buff_m1, &minus_conj_nu11 );
650 
651  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
652  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
654  1,
655  &minus_conj_psi11,
656  upsilon11, 1,
657  alpha11, 1 );
659  1,
660  &minus_conj_nu11,
661  zeta11, 1,
662  alpha11, 1 );
663 
664  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
665  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
667  m_ahead,
668  &minus_conj_psi11,
669  u21, inc_u,
670  a21, rs_A );
672  m_ahead,
673  &minus_conj_nu11,
674  z21, inc_z,
675  a21, rs_A );
676 
677  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
678  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
680  n_ahead,
681  &minus_upsilon11,
682  y21, inc_y,
683  a12t, cs_A );
685  n_ahead,
686  &minus_zeta11,
687  v21, inc_v,
688  a12t, cs_A );
689  }
690 
691  // FLA_Househ2_UT( FLA_LEFT,
692  // alpha11,
693  // a21, tau11 );
694  // FLA_Copy( a21, u21p );
695  FLA_Househ2_UT_l_opd( m_ahead,
696  alpha11,
697  a21, rs_A,
698  tau11 );
700  m_ahead,
701  a21, rs_A,
702  u21p, inc_up );
703 
704  if ( n_ahead > 0 )
705  {
706  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
707  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
708  bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );
709 
710  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
711  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
713  n_ahead,
714  a12t, cs_A,
715  a12p, inc_ap );
717  n_ahead,
718  &minus_inv_tau11,
719  a12t, cs_A,
720  a12p, inc_ap );
721  }
722 
723  if ( m_behind > 0 )
724  {
725  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
726  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
729  m_ahead,
730  n_ahead,
731  buff_m1,
732  u21, inc_u,
733  y21, inc_y,
734  A22, rs_A, cs_A );
737  m_ahead,
738  n_ahead,
739  buff_m1,
740  z21, inc_z,
741  v21, inc_v,
742  A22, rs_A, cs_A );
743  }
744 
745  if ( n_ahead > 0 )
746  {
747  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
750  m_ahead,
751  n_ahead,
752  buff_1,
753  A22, rs_A, cs_A,
754  u21p, inc_up,
755  buff_0,
756  y21, inc_y );
757 
758  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
760  n_ahead,
761  &minus_inv_tau11,
762  y21, inc_y,
763  a12p, inc_ap );
764 
765  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
768  m_ahead,
769  n_ahead,
770  buff_1,
771  A22, rs_A, cs_A,
772  a12p, inc_ap,
773  buff_0,
774  w21, inc_w );
775 
776  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
778  n_ahead,
779  buff_1,
780  a12t, cs_A,
781  y21, inc_y );
782 
783  // FLA_Househ2s_UT( FLA_RIGHT,
784  // a12p_t,
785  // a12p_b,
786  // alpha12, psi11_minus_alpha12, sigma11 );
787  FLA_Househ2s_UT_r_opd( n_ahead - 1,
788  a12p_t,
789  a12p_b, inc_ap,
790  &alpha12,
791  &psi11_minus_alpha12,
792  sigma11 );
793 
794  // FLA_Copy( a12p, v21 );
795  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
796  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
797  // FLA_Conjugate( v21_b );
799  n_ahead,
800  a12p, inc_ap,
801  v21, inc_v );
802  bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
804  n_ahead,
805  &psi11_minus_alpha12,
806  v21, inc_v );
807  bl1_dconjv( n_ahead - 1,
808  v21_b, inc_v );
809 
810  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
811  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
812  *a12t_l = alpha12;
814  n_ahead - 1,
815  v21_b, inc_v,
816  a12t_r, cs_A );
817  }
818 
819  // FLA_Copy( u21p, u21 );
821  m_ahead,
822  u21p, inc_up,
823  u21, inc_u );
824 
825  if ( n_ahead > 0 )
826  {
827  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
828  // FLA_Scal( FLA_MINUS_ONE, beta );
829  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
831  n_ahead,
832  y21, inc_y,
833  v21, inc_v,
834  &beta );
835  bl1_dscals( &minus_inv_tau11, &beta );
836 
837  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
838  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
839  bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
840  bl1_dneg1( &minus_conj_alpha12 );
841 
842  // FLA_Copy( w21, z21 );
843  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
844  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
845  // FLA_Axpy( beta, u21, z21 );
847  m_ahead,
848  w21, inc_w,
849  z21, inc_z );
851  m_ahead,
852  &minus_conj_alpha12,
853  A22_l, rs_A,
854  z21, inc_z );
856  m_ahead,
857  &psi11_minus_alpha12,
858  z21, inc_z );
860  m_ahead,
861  &beta,
862  u21, inc_u,
863  z21, inc_z );
864 
865  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
866  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
868  n_ahead,
869  tau11,
870  y21, inc_y );
872  m_ahead,
873  sigma11,
874  z21, inc_z );
875 
876  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
879  m_behind,
880  n_ahead,
881  buff_1,
882  A02, rs_A, cs_A,
883  v21, inc_v,
884  buff_0,
885  s01, rs_S );
886  }
887 
888  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
889  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
891  n_behind,
892  a10t, cs_A,
893  t01, rs_T );
896  m_ahead,
897  n_behind,
898  buff_1,
899  A20, rs_A, cs_A,
900  u21, inc_u,
901  buff_1,
902  t01, rs_T );
903 
904  if ( m_behind + 1 == b_alg && n_ahead > 0 )
905  {
906  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
907  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
910  m_ahead,
911  n_ahead,
912  buff_m1,
913  u21, inc_u,
914  y21, inc_y,
915  A22, rs_A, cs_A );
918  m_ahead,
919  n_ahead,
920  buff_m1,
921  z21, inc_z,
922  v21, inc_v,
923  A22, rs_A, cs_A );
924  }
925 
926  /*------------------------------------------------------------*/
927 
928  }
929 
930  // FLA_Obj_free( &w );
931  // FLA_Obj_free( &ap );
932  // FLA_Obj_free( &u );
933  // FLA_Obj_free( &up );
934  // FLA_Obj_free( &v );
935  // FLA_Obj_free( &y );
936  // FLA_Obj_free( &z );
937  FLA_free( buff_w );
938  FLA_free( buff_ap );
939  FLA_free( buff_u );
940  FLA_free( buff_up );
941  FLA_free( buff_v );
942  FLA_free( buff_y );
943  FLA_free( buff_z );
944 
945  return FLA_SUCCESS;
946 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition: FLA_Househ2s_UT.c:572
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_dconjv(int m, double *x, int incx)
Definition: bl1_conjv.c:18
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
int i
Definition: bl1_axmyv2.c:145
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ops_var3()

FLA_Error FLA_Bidiag_UT_u_step_ops_var3 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

120 {
121  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
124 
125  float alpha12;
126  float minus_conj_alpha12;
127  float psi11_minus_alpha12;
128  float minus_inv_tau11;
129  float minus_upsilon11;
130  float minus_conj_nu11;
131  float minus_conj_psi11;
132  float minus_zeta11;
133  float beta;
134  int i;
135 
136  // b_alg = FLA_Obj_length( T );
137  int b_alg = m_TS;
138 
139  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
140  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
141  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
142  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
143  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
144  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
145  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
146  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
147  float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
148  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
149  float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
150  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
151  float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
152  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
153  int inc_w = 1;
154  int inc_ap = 1;
155  int inc_u = 1;
156  int inc_up = 1;
157  int inc_v = 1;
158  int inc_y = 1;
159  int inc_z = 1;
160 
161  for ( i = 0; i < b_alg; ++i )
162  {
163  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
164  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
165  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
166  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
167  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
168  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
169  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
170 
171  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
172  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
173 
174  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
175  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
176 
177  float* w21 = buff_w + (i+1)*inc_w;
178 
179  float* a12p = buff_ap + (i+1)*inc_ap;
180 
181  float* upsilon11 = buff_u + (i )*inc_u;
182  float* u21 = buff_u + (i+1)*inc_u;
183 
184  float* u21p = buff_up + (i+1)*inc_up;
185 
186  float* nu11 = buff_v + (i )*inc_v;
187  float* v21 = buff_v + (i+1)*inc_v;
188 
189  float* psi11 = buff_y + (i )*inc_y;
190  float* y21 = buff_y + (i+1)*inc_y;
191 
192  float* zeta11 = buff_z + (i )*inc_z;
193  float* z21 = buff_z + (i+1)*inc_z;
194 
195  float* a12p_t = a12p + (0 )*inc_ap;
196  float* a12p_b = a12p + (1 )*inc_ap;
197 
198  float* v21_t = v21 + (0 )*inc_v;
199  float* v21_b = v21 + (1 )*inc_v;
200 
201  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
202  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
203 
204  float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
205 
206  int m_ahead = m_A - i - 1;
207  int n_ahead = n_A - i - 1;
208  int m_behind = i;
209  int n_behind = i;
210 
211  /*------------------------------------------------------------*/
212 
213  if ( m_behind > 0 )
214  {
215  // FLA_Copy( upsilon11, minus_upsilon11 );
216  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
217  bl1_smult3( buff_m1, upsilon11, &minus_upsilon11 );
218 
219  // FLA_Copy( zeta11, minus_zeta11 );
220  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
221  bl1_smult3( buff_m1, zeta11, &minus_zeta11 );
222 
223  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
224  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
225  bl1_scopyconj( psi11, &minus_conj_psi11 );
226  bl1_sscals( buff_m1, &minus_conj_psi11 );
227 
228  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
229  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
230  bl1_scopyconj( nu11, &minus_conj_nu11 );
231  bl1_sscals( buff_m1, &minus_conj_nu11 );
232 
233  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
234  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
236  1,
237  &minus_conj_psi11,
238  upsilon11, 1,
239  alpha11, 1 );
241  1,
242  &minus_conj_nu11,
243  zeta11, 1,
244  alpha11, 1 );
245 
246  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
247  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
249  m_ahead,
250  &minus_conj_psi11,
251  u21, inc_u,
252  a21, rs_A );
254  m_ahead,
255  &minus_conj_nu11,
256  z21, inc_z,
257  a21, rs_A );
258 
259  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
260  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
262  n_ahead,
263  &minus_upsilon11,
264  y21, inc_y,
265  a12t, cs_A );
267  n_ahead,
268  &minus_zeta11,
269  v21, inc_v,
270  a12t, cs_A );
271  }
272 
273  // FLA_Househ2_UT( FLA_LEFT,
274  // alpha11,
275  // a21, tau11 );
276  // FLA_Copy( a21, u21p );
277  FLA_Househ2_UT_l_ops( m_ahead,
278  alpha11,
279  a21, rs_A,
280  tau11 );
282  m_ahead,
283  a21, rs_A,
284  u21p, inc_up );
285 
286  if ( n_ahead > 0 )
287  {
288  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
289  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
290  bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );
291 
292  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
293  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
295  n_ahead,
296  a12t, cs_A,
297  a12p, inc_ap );
299  n_ahead,
300  &minus_inv_tau11,
301  a12t, cs_A,
302  a12p, inc_ap );
303  }
304 
305  if ( m_behind > 0 )
306  {
307  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
308  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
311  m_ahead,
312  n_ahead,
313  buff_m1,
314  u21, inc_u,
315  y21, inc_y,
316  A22, rs_A, cs_A );
319  m_ahead,
320  n_ahead,
321  buff_m1,
322  z21, inc_z,
323  v21, inc_v,
324  A22, rs_A, cs_A );
325  }
326 
327  if ( n_ahead > 0 )
328  {
329  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
332  m_ahead,
333  n_ahead,
334  buff_1,
335  A22, rs_A, cs_A,
336  u21p, inc_up,
337  buff_0,
338  y21, inc_y );
339 
340  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
342  n_ahead,
343  &minus_inv_tau11,
344  y21, inc_y,
345  a12p, inc_ap );
346 
347  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
350  m_ahead,
351  n_ahead,
352  buff_1,
353  A22, rs_A, cs_A,
354  a12p, inc_ap,
355  buff_0,
356  w21, inc_w );
357 
358  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
360  n_ahead,
361  buff_1,
362  a12t, cs_A,
363  y21, inc_y );
364 
365  // FLA_Househ2s_UT( FLA_RIGHT,
366  // a12p_t,
367  // a12p_b,
368  // alpha12, psi11_minus_alpha12, sigma11 );
369  FLA_Househ2s_UT_r_ops( n_ahead - 1,
370  a12p_t,
371  a12p_b, inc_ap,
372  &alpha12,
373  &psi11_minus_alpha12,
374  sigma11 );
375 
376  // FLA_Copy( a12p, v21 );
377  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
378  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
379  // FLA_Conjugate( v21_b );
381  n_ahead,
382  a12p, inc_ap,
383  v21, inc_v );
384  bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
386  n_ahead,
387  &psi11_minus_alpha12,
388  v21, inc_v );
389  bl1_sconjv( n_ahead - 1,
390  v21_b, inc_v );
391 
392  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
393  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
394  *a12t_l = alpha12;
396  n_ahead - 1,
397  v21_b, inc_v,
398  a12t_r, cs_A );
399  }
400 
401  // FLA_Copy( u21p, u21 );
403  m_ahead,
404  u21p, inc_up,
405  u21, inc_u );
406 
407  if ( n_ahead > 0 )
408  {
409  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
410  // FLA_Scal( FLA_MINUS_ONE, beta );
411  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
413  n_ahead,
414  y21, inc_y,
415  v21, inc_v,
416  &beta );
417  bl1_sscals( &minus_inv_tau11, &beta );
418 
419  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
420  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
421  bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
422  bl1_sneg1( &minus_conj_alpha12 );
423 
424  // FLA_Copy( w21, z21 );
425  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
426  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
427  // FLA_Axpy( beta, u21, z21 );
429  m_ahead,
430  w21, inc_w,
431  z21, inc_z );
433  m_ahead,
434  &minus_conj_alpha12,
435  A22_l, rs_A,
436  z21, inc_z );
438  m_ahead,
439  &psi11_minus_alpha12,
440  z21, inc_z );
442  m_ahead,
443  &beta,
444  u21, inc_u,
445  z21, inc_z );
446 
447  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
448  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
450  n_ahead,
451  tau11,
452  y21, inc_y );
454  m_ahead,
455  sigma11,
456  z21, inc_z );
457 
458  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
461  m_behind,
462  n_ahead,
463  buff_1,
464  A02, rs_A, cs_A,
465  v21, inc_v,
466  buff_0,
467  s01, rs_S );
468  }
469 
470  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
471  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
473  n_behind,
474  a10t, cs_A,
475  t01, rs_T );
478  m_ahead,
479  n_behind,
480  buff_1,
481  A20, rs_A, cs_A,
482  u21, inc_u,
483  buff_1,
484  t01, rs_T );
485 
486  if ( m_behind + 1 == b_alg && n_ahead > 0 )
487  {
488  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
489  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
492  m_ahead,
493  n_ahead,
494  buff_m1,
495  u21, inc_u,
496  y21, inc_y,
497  A22, rs_A, cs_A );
500  m_ahead,
501  n_ahead,
502  buff_m1,
503  z21, inc_z,
504  v21, inc_v,
505  A22, rs_A, cs_A );
506  }
507 
508  /*------------------------------------------------------------*/
509 
510  }
511 
512  // FLA_Obj_free( &w );
513  // FLA_Obj_free( &ap );
514  // FLA_Obj_free( &u );
515  // FLA_Obj_free( &up );
516  // FLA_Obj_free( &v );
517  // FLA_Obj_free( &y );
518  // FLA_Obj_free( &z );
519  FLA_free( buff_w );
520  FLA_free( buff_ap );
521  FLA_free( buff_u );
522  FLA_free( buff_up );
523  FLA_free( buff_v );
524  FLA_free( buff_y );
525  FLA_free( buff_z );
526 
527  return FLA_SUCCESS;
528 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sconjv(int m, float *x, int incx)
Definition: bl1_conjv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition: FLA_Househ2s_UT.c:555
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opt_var3()

FLA_Error FLA_Bidiag_UT_u_step_opt_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var3(), and FLA_Bidiag_UT_u_opt_var3().

19 {
20  FLA_Datatype datatype;
21  int m_A, n_A, m_TS;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24  int rs_S, cs_S;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30  m_TS = FLA_Obj_length( T );
31 
32  rs_A = FLA_Obj_row_stride( A );
33  cs_A = FLA_Obj_col_stride( A );
34 
35  rs_T = FLA_Obj_row_stride( T );
36  cs_T = FLA_Obj_col_stride( T );
37 
38  rs_S = FLA_Obj_row_stride( S );
39  cs_S = FLA_Obj_col_stride( S );
40 
41 
42  switch ( datatype )
43  {
44  case FLA_FLOAT:
45  {
46  float* buff_A = FLA_FLOAT_PTR( A );
47  float* buff_T = FLA_FLOAT_PTR( T );
48  float* buff_S = FLA_FLOAT_PTR( S );
49 
51  n_A,
52  m_TS,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T,
55  buff_S, rs_S, cs_S );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_T = FLA_DOUBLE_PTR( T );
64  double* buff_S = FLA_DOUBLE_PTR( S );
65 
67  n_A,
68  m_TS,
69  buff_A, rs_A, cs_A,
70  buff_T, rs_T, cs_T,
71  buff_S, rs_S, cs_S );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_T = FLA_COMPLEX_PTR( T );
80  scomplex* buff_S = FLA_COMPLEX_PTR( S );
81 
83  n_A,
84  m_TS,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T,
87  buff_S, rs_S, cs_S );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
96  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
97 
99  n_A,
100  m_TS,
101  buff_A, rs_A, cs_A,
102  buff_T, rs_T, cs_T,
103  buff_S, rs_S, cs_S );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Bidiag_UT_u_step_opd_var3(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:532
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Bidiag_UT_u_step_opc_var3(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:950
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Bidiag_UT_u_step_ops_var3(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:114
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Bidiag_UT_u_step_opz_var3(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:1368
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_opz_var3()

FLA_Error FLA_Bidiag_UT_u_step_opz_var3 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), bl1_zscals(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

1374 {
1375  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1376  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1377  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1378 
1379  dcomplex alpha12;
1380  dcomplex minus_conj_alpha12;
1381  dcomplex psi11_minus_alpha12;
1382  dcomplex minus_inv_tau11;
1383  dcomplex minus_upsilon11;
1384  dcomplex minus_conj_nu11;
1385  dcomplex minus_conj_psi11;
1386  dcomplex minus_zeta11;
1387  dcomplex beta;
1388  int i;
1389 
1390  // b_alg = FLA_Obj_length( T );
1391  int b_alg = m_TS;
1392 
1393  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1394  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1395  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1396  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1397  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1398  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
1399  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1400  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1401  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1402  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1403  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1404  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1405  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1406  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1407  int inc_w = 1;
1408  int inc_ap = 1;
1409  int inc_u = 1;
1410  int inc_up = 1;
1411  int inc_v = 1;
1412  int inc_y = 1;
1413  int inc_z = 1;
1414 
1415  for ( i = 0; i < b_alg; ++i )
1416  {
1417  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1418  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1419  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1420  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1421  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1422  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1423  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1424 
1425  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1426  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1427 
1428  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1429  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1430 
1431  dcomplex* w21 = buff_w + (i+1)*inc_w;
1432 
1433  dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1434 
1435  dcomplex* upsilon11 = buff_u + (i )*inc_u;
1436  dcomplex* u21 = buff_u + (i+1)*inc_u;
1437 
1438  dcomplex* u21p = buff_up + (i+1)*inc_up;
1439 
1440  dcomplex* nu11 = buff_v + (i )*inc_v;
1441  dcomplex* v21 = buff_v + (i+1)*inc_v;
1442 
1443  dcomplex* psi11 = buff_y + (i )*inc_y;
1444  dcomplex* y21 = buff_y + (i+1)*inc_y;
1445 
1446  dcomplex* zeta11 = buff_z + (i )*inc_z;
1447  dcomplex* z21 = buff_z + (i+1)*inc_z;
1448 
1449  dcomplex* a12p_t = a12p + (0 )*inc_ap;
1450  dcomplex* a12p_b = a12p + (1 )*inc_ap;
1451 
1452  dcomplex* v21_t = v21 + (0 )*inc_v;
1453  dcomplex* v21_b = v21 + (1 )*inc_v;
1454 
1455  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1456  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1457 
1458  dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1459 
1460  int m_ahead = m_A - i - 1;
1461  int n_ahead = n_A - i - 1;
1462  int m_behind = i;
1463  int n_behind = i;
1464 
1465  /*------------------------------------------------------------*/
1466 
1467  if ( m_behind > 0 )
1468  {
1469  // FLA_Copy( upsilon11, minus_upsilon11 );
1470  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1471  bl1_zmult3( buff_m1, upsilon11, &minus_upsilon11 );
1472 
1473  // FLA_Copy( zeta11, minus_zeta11 );
1474  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1475  bl1_zmult3( buff_m1, zeta11, &minus_zeta11 );
1476 
1477  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1478  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1479  bl1_zcopyconj( psi11, &minus_conj_psi11 );
1480  bl1_zscals( buff_m1, &minus_conj_psi11 );
1481 
1482  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1483  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1484  bl1_zcopyconj( nu11, &minus_conj_nu11 );
1485  bl1_zscals( buff_m1, &minus_conj_nu11 );
1486 
1487  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1488  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1490  1,
1491  &minus_conj_psi11,
1492  upsilon11, 1,
1493  alpha11, 1 );
1495  1,
1496  &minus_conj_nu11,
1497  zeta11, 1,
1498  alpha11, 1 );
1499 
1500  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1501  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1503  m_ahead,
1504  &minus_conj_psi11,
1505  u21, inc_u,
1506  a21, rs_A );
1508  m_ahead,
1509  &minus_conj_nu11,
1510  z21, inc_z,
1511  a21, rs_A );
1512 
1513  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1514  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1516  n_ahead,
1517  &minus_upsilon11,
1518  y21, inc_y,
1519  a12t, cs_A );
1521  n_ahead,
1522  &minus_zeta11,
1523  v21, inc_v,
1524  a12t, cs_A );
1525  }
1526 
1527  // FLA_Househ2_UT( FLA_LEFT,
1528  // alpha11,
1529  // a21, tau11 );
1530  // FLA_Copy( a21, u21p );
1531  FLA_Househ2_UT_l_opz( m_ahead,
1532  alpha11,
1533  a21, rs_A,
1534  tau11 );
1536  m_ahead,
1537  a21, rs_A,
1538  u21p, inc_up );
1539 
1540  if ( n_ahead > 0 )
1541  {
1542  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1543  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1544  bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );
1545 
1546  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1547  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1549  n_ahead,
1550  a12t, cs_A,
1551  a12p, inc_ap );
1553  n_ahead,
1554  &minus_inv_tau11,
1555  a12t, cs_A,
1556  a12p, inc_ap );
1557  }
1558 
1559  if ( m_behind > 0 )
1560  {
1561  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1562  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1565  m_ahead,
1566  n_ahead,
1567  buff_m1,
1568  u21, inc_u,
1569  y21, inc_y,
1570  A22, rs_A, cs_A );
1573  m_ahead,
1574  n_ahead,
1575  buff_m1,
1576  z21, inc_z,
1577  v21, inc_v,
1578  A22, rs_A, cs_A );
1579  }
1580 
1581  if ( n_ahead > 0 )
1582  {
1583  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1586  m_ahead,
1587  n_ahead,
1588  buff_1,
1589  A22, rs_A, cs_A,
1590  u21p, inc_up,
1591  buff_0,
1592  y21, inc_y );
1593 
1594  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1596  n_ahead,
1597  &minus_inv_tau11,
1598  y21, inc_y,
1599  a12p, inc_ap );
1600 
1601  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1604  m_ahead,
1605  n_ahead,
1606  buff_1,
1607  A22, rs_A, cs_A,
1608  a12p, inc_ap,
1609  buff_0,
1610  w21, inc_w );
1611 
1612  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1614  n_ahead,
1615  buff_1,
1616  a12t, cs_A,
1617  y21, inc_y );
1618 
1619  // FLA_Househ2s_UT( FLA_RIGHT,
1620  // a12p_t,
1621  // a12p_b,
1622  // alpha12, psi11_minus_alpha12, sigma11 );
1623  FLA_Househ2s_UT_r_opz( n_ahead - 1,
1624  a12p_t,
1625  a12p_b, inc_ap,
1626  &alpha12,
1627  &psi11_minus_alpha12,
1628  sigma11 );
1629 
1630  // FLA_Copy( a12p, v21 );
1631  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1632  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1633  // FLA_Conjugate( v21_b );
1635  n_ahead,
1636  a12p, inc_ap,
1637  v21, inc_v );
1638  bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
1640  n_ahead,
1641  &psi11_minus_alpha12,
1642  v21, inc_v );
1643  bl1_zconjv( n_ahead - 1,
1644  v21_b, inc_v );
1645 
1646  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1647  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1648  *a12t_l = alpha12;
1650  n_ahead - 1,
1651  v21_b, inc_v,
1652  a12t_r, cs_A );
1653  }
1654 
1655  // FLA_Copy( u21p, u21 );
1657  m_ahead,
1658  u21p, inc_up,
1659  u21, inc_u );
1660 
1661  if ( n_ahead > 0 )
1662  {
1663  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1664  // FLA_Scal( FLA_MINUS_ONE, beta );
1665  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1667  n_ahead,
1668  y21, inc_y,
1669  v21, inc_v,
1670  &beta );
1671  bl1_zscals( &minus_inv_tau11, &beta );
1672 
1673  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1674  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1675  bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
1676  bl1_zneg1( &minus_conj_alpha12 );
1677 
1678  // FLA_Copy( w21, z21 );
1679  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1680  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1681  // FLA_Axpy( beta, u21, z21 );
1683  m_ahead,
1684  w21, inc_w,
1685  z21, inc_z );
1687  m_ahead,
1688  &minus_conj_alpha12,
1689  A22_l, rs_A,
1690  z21, inc_z );
1692  m_ahead,
1693  &psi11_minus_alpha12,
1694  z21, inc_z );
1696  m_ahead,
1697  &beta,
1698  u21, inc_u,
1699  z21, inc_z );
1700 
1701  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1702  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1704  n_ahead,
1705  tau11,
1706  y21, inc_y );
1708  m_ahead,
1709  sigma11,
1710  z21, inc_z );
1711 
1712  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1715  m_behind,
1716  n_ahead,
1717  buff_1,
1718  A02, rs_A, cs_A,
1719  v21, inc_v,
1720  buff_0,
1721  s01, rs_S );
1722  }
1723 
1724  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1725  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1727  n_behind,
1728  a10t, cs_A,
1729  t01, rs_T );
1732  m_ahead,
1733  n_behind,
1734  buff_1,
1735  A20, rs_A, cs_A,
1736  u21, inc_u,
1737  buff_1,
1738  t01, rs_T );
1739 
1740  if ( m_behind + 1 == b_alg && n_ahead > 0 )
1741  {
1742  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1743  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1746  m_ahead,
1747  n_ahead,
1748  buff_m1,
1749  u21, inc_u,
1750  y21, inc_y,
1751  A22, rs_A, cs_A );
1754  m_ahead,
1755  n_ahead,
1756  buff_m1,
1757  z21, inc_z,
1758  v21, inc_v,
1759  A22, rs_A, cs_A );
1760  }
1761 
1762  /*------------------------------------------------------------*/
1763 
1764  }
1765 
1766  // FLA_Obj_free( &w );
1767  // FLA_Obj_free( &ap );
1768  // FLA_Obj_free( &u );
1769  // FLA_Obj_free( &up );
1770  // FLA_Obj_free( &v );
1771  // FLA_Obj_free( &y );
1772  // FLA_Obj_free( &z );
1773  FLA_free( buff_w );
1774  FLA_free( buff_ap );
1775  FLA_free( buff_u );
1776  FLA_free( buff_up );
1777  FLA_free( buff_v );
1778  FLA_free( buff_y );
1779  FLA_free( buff_z );
1780 
1781  return FLA_SUCCESS;
1782 }
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition: FLA_Househ2s_UT.c:610
int i
Definition: bl1_axmyv2.c:145
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition: bl1_conjv.c:34