libflame  revision_anchor
Functions
FLA_Bidiag_UT_u_opt_var5.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_opt_var5 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var5 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var5 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var5 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var5 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var5 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_opt_var5()

FLA_Error FLA_Bidiag_UT_u_opt_var5 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_opt_var5(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17  FLA_Datatype datatype_A;
18  dim_t m_A, n_A;
19 
20  datatype_A = FLA_Obj_datatype( A );
21  m_A = FLA_Obj_length( A );
22  n_A = FLA_Obj_width( A );
23 
24  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26 
27  r_val = FLA_Bidiag_UT_u_step_opt_var5( A, Y, Z, TU, TV );
28 
29  FLA_Obj_free( &Y );
30  FLA_Obj_free( &Z );
31 
32  return r_val;
33 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Bidiag_UT_u_step_opt_var5(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:35
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLA_Bidiag_UT_u_step_opc_var5()

FLA_Error FLA_Bidiag_UT_u_step_opc_var5 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

953 {
954  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
955  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
956  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
957 
958  scomplex beta;
959  scomplex last_elem;
960  int i;
961 
962  // b_alg = FLA_Obj_length( T );
963  int b_alg = m_TS;
964 
965  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
966  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
967  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
968  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
969  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
970  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
971  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
972  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
973  scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
974  scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
975  scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
976  scomplex* buff_g = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
977  int inc_u = 1;
978  int inc_v = 1;
979  int inc_d = 1;
980  int inc_e = 1;
981  int inc_f = 1;
982  int inc_g = 1;
983 
984  // FLA_Set( FLA_ZERO, Y );
985  // FLA_Set( FLA_ZERO, Z );
986  bl1_csetm( n_A,
987  b_alg,
988  buff_0,
989  buff_Y, rs_Y, cs_Y );
990  bl1_csetm( m_A,
991  b_alg,
992  buff_0,
993  buff_Z, rs_Z, cs_Z );
994 
995  for ( i = 0; i < b_alg; ++i )
996  {
997  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
998  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
999  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1000  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1001  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1002  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1003  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1004  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1005 
1006  scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1007  scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1008  scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1009 
1010  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1011  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1012  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1013 
1014  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1015  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1016 
1017  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1018  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1019 
1020  scomplex* u21 = buff_u + (i+1)*inc_u;
1021 
1022  scomplex* v21 = buff_v + (i+1)*inc_v;
1023 
1024  scomplex* d0 = buff_d + (0 )*inc_d;
1025 
1026  scomplex* e0 = buff_e + (0 )*inc_e;
1027 
1028  scomplex* f0 = buff_f + (0 )*inc_f;
1029 
1030  scomplex* g0 = buff_g + (0 )*inc_g;
1031 
1032  scomplex* v21_t = v21 + (0 )*inc_v;
1033  scomplex* v21_b = v21 + (1 )*inc_v;
1034 
1035  scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1036 
1037  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1038  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1039 
1040  scomplex* ABL = a10t;
1041  scomplex* ZBL = z10t;
1042 
1043  scomplex* a2 = alpha11;
1044 
1045  int m_ahead = m_A - i - 1;
1046  int n_ahead = n_A - i - 1;
1047  int m_behind = i;
1048  int n_behind = i;
1049 
1050  /*------------------------------------------------------------*/
1051 
1052  if ( m_behind > 0 )
1053  {
1054  // FLA_Copy( a01_b, last_elem );
1055  // FLA_Set( FLA_ONE, a01_b );
1056  last_elem = *a01_b;
1057  *a01_b = *buff_1;
1058  }
1059 
1060  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1061  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1064  m_ahead + 1,
1065  n_behind,
1066  buff_m1,
1067  ABL, rs_A, cs_A,
1068  y10t, cs_Y,
1069  buff_1,
1070  a2, rs_A );
1073  m_ahead + 1,
1074  n_behind,
1075  buff_m1,
1076  ZBL, rs_Z, cs_Z,
1077  a01, rs_A,
1078  buff_1,
1079  a2, rs_A );
1080 
1081  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1082  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1085  n_ahead,
1086  n_behind,
1087  buff_m1,
1088  Y20, rs_Y, cs_Y,
1089  a10t, cs_A,
1090  buff_1,
1091  a12t, cs_A );
1094  m_behind,
1095  n_ahead,
1096  buff_m1,
1097  A02, rs_A, cs_A,
1098  z10t, cs_Z,
1099  buff_1,
1100  a12t, cs_A );
1101 
1102  if ( m_behind > 0 )
1103  {
1104  // FLA_Copy( last_elem, a01_b );
1105  *a01_b = last_elem;
1106  }
1107 
1108  // FLA_Househ2_UT( FLA_LEFT,
1109  // alpha11,
1110  // a21, tau11 );
1111  // FLA_Copy( a21, u21 );
1112  FLA_Househ2_UT_l_opc( m_ahead,
1113  alpha11,
1114  a21, rs_A,
1115  tau11 );
1117  m_ahead,
1118  a21, rs_A,
1119  u21, inc_u );
1120 
1121  if ( n_ahead > 0 )
1122  {
1123  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
1124  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
1126  n_ahead,
1127  a12t, cs_A,
1128  y21, rs_Y );
1131  m_ahead,
1132  n_ahead,
1133  buff_1,
1134  A22, rs_A, cs_A,
1135  u21, inc_u,
1136  buff_1,
1137  y21, rs_Y );
1138 
1139  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
1140  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
1143  m_ahead,
1144  n_behind,
1145  buff_1,
1146  A20, rs_A, cs_A,
1147  u21, inc_u,
1148  buff_0,
1149  d0, inc_d );
1152  m_ahead,
1153  n_behind,
1154  buff_1,
1155  Z20, rs_Z, cs_Z,
1156  u21, inc_u,
1157  buff_0,
1158  e0, inc_e );
1159 
1160  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1161  // FLA_Axpy( FLA_ONE, d0, t01 );
1163  n_behind,
1164  a10t, cs_A,
1165  t01, rs_T );
1167  n_behind,
1168  buff_1,
1169  d0, inc_d,
1170  t01, rs_T );
1171 
1172  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1173  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1176  n_ahead,
1177  n_behind,
1178  buff_m1,
1179  Y20, rs_Y, cs_Y,
1180  d0, inc_d,
1181  buff_1,
1182  y21, rs_Y );
1185  m_behind,
1186  n_ahead,
1187  buff_m1,
1188  A02, rs_A, cs_A,
1189  e0, inc_e,
1190  buff_1,
1191  y21, rs_Y );
1192 
1193  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1195  n_ahead,
1196  tau11,
1197  y21, rs_Y );
1198 
1199  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
1201  n_ahead,
1202  buff_m1,
1203  y21, rs_Y,
1204  a12t, cs_A );
1205 
1206  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
1207  FLA_Househ2_UT_r_opc( n_ahead - 1,
1208  a12t_l,
1209  a12t_r, cs_A,
1210  sigma11 );
1211 
1212  // FLA_Set( FLA_ONE, v21_t );
1213  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
1214  *v21_t = *buff_1;
1216  n_ahead - 1,
1217  a12t_r, cs_A,
1218  v21_b, inc_v );
1219 
1220  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1221  // FLA_Scal( FLA_MINUS_ONE, beta );
1223  n_ahead,
1224  y21, rs_Y,
1225  v21, inc_v,
1226  &beta );
1227  bl1_cscals( buff_m1, &beta );
1228 
1229  // FLA_Copy( u21, z21 );
1230  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
1232  m_ahead,
1233  u21, inc_u,
1234  z21, rs_Z );
1237  m_ahead,
1238  n_ahead,
1239  buff_1,
1240  A22, rs_A, cs_A,
1241  v21, inc_v,
1242  &beta,
1243  z21, rs_Z );
1244 
1245  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
1246  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
1249  n_ahead,
1250  m_behind,
1251  buff_1,
1252  Y20, rs_Y, cs_Y,
1253  v21, inc_v,
1254  buff_0,
1255  f0, inc_f );
1258  m_behind,
1259  n_ahead,
1260  buff_1,
1261  A02, rs_A, cs_A,
1262  v21, inc_v,
1263  buff_0,
1264  g0, inc_g );
1265 
1266  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
1267  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
1270  m_ahead,
1271  n_behind,
1272  buff_m1,
1273  A20, rs_A, cs_A,
1274  f0, inc_f,
1275  buff_1,
1276  z21, rs_Z );
1279  m_ahead,
1280  n_behind,
1281  buff_m1,
1282  Z20, rs_Z, cs_Z,
1283  g0, inc_g,
1284  buff_1,
1285  z21, rs_Z );
1286 
1287  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1289  m_ahead,
1290  sigma11,
1291  z21, rs_Z );
1292 
1293  // FLA_Copy( g0, s01 );
1295  n_behind,
1296  g0, inc_g,
1297  s01, rs_S );
1298  }
1299  else // if ( n_ahead == 0 )
1300  {
1301  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1302  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1304  n_behind,
1305  a10t, cs_A,
1306  t01, rs_T );
1309  m_ahead,
1310  n_behind,
1311  buff_1,
1312  A20, rs_A, cs_A,
1313  u21, inc_u,
1314  buff_1,
1315  t01, rs_T );
1316  }
1317 
1318  /*------------------------------------------------------------*/
1319 
1320  }
1321 
1322  // FLA_Obj_free( &u );
1323  // FLA_Obj_free( &v );
1324  // FLA_Obj_free( &d );
1325  // FLA_Obj_free( &e );
1326  // FLA_Obj_free( &f );
1327  // FLA_Obj_free( &g );
1328  FLA_free( buff_u );
1329  FLA_free( buff_v );
1330  FLA_free( buff_d );
1331  FLA_free( buff_e );
1332  FLA_free( buff_f );
1333  FLA_free( buff_g );
1334 
1335  return FLA_SUCCESS;
1336 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:677
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opd_var5()

FLA_Error FLA_Bidiag_UT_u_step_opd_var5 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

558 {
559  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
560  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
561  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
562 
563  double beta;
564  double last_elem;
565  int i;
566 
567  // b_alg = FLA_Obj_length( T );
568  int b_alg = m_TS;
569 
570  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
571  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
572  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
573  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
574  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
575  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
576  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
577  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
578  double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
579  double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
580  double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
581  double* buff_g = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
582  int inc_u = 1;
583  int inc_v = 1;
584  int inc_d = 1;
585  int inc_e = 1;
586  int inc_f = 1;
587  int inc_g = 1;
588 
589  // FLA_Set( FLA_ZERO, Y );
590  // FLA_Set( FLA_ZERO, Z );
591  bl1_dsetm( n_A,
592  b_alg,
593  buff_0,
594  buff_Y, rs_Y, cs_Y );
595  bl1_dsetm( m_A,
596  b_alg,
597  buff_0,
598  buff_Z, rs_Z, cs_Z );
599 
600  for ( i = 0; i < b_alg; ++i )
601  {
602  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
603  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
604  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
605  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
606  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
607  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
608  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
609  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
610 
611  double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
612  double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
613  double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
614 
615  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
616  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
617  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
618 
619  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
620  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
621 
622  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
623  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
624 
625  double* u21 = buff_u + (i+1)*inc_u;
626 
627  double* v21 = buff_v + (i+1)*inc_v;
628 
629  double* d0 = buff_d + (0 )*inc_d;
630 
631  double* e0 = buff_e + (0 )*inc_e;
632 
633  double* f0 = buff_f + (0 )*inc_f;
634 
635  double* g0 = buff_g + (0 )*inc_g;
636 
637  double* v21_t = v21 + (0 )*inc_v;
638  double* v21_b = v21 + (1 )*inc_v;
639 
640  double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
641 
642  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
643  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
644 
645  double* ABL = a10t;
646  double* ZBL = z10t;
647 
648  double* a2 = alpha11;
649 
650  int m_ahead = m_A - i - 1;
651  int n_ahead = n_A - i - 1;
652  int m_behind = i;
653  int n_behind = i;
654 
655  /*------------------------------------------------------------*/
656 
657  if ( m_behind > 0 )
658  {
659  // FLA_Copy( a01_b, last_elem );
660  // FLA_Set( FLA_ONE, a01_b );
661  last_elem = *a01_b;
662  *a01_b = *buff_1;
663  }
664 
665  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
666  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
669  m_ahead + 1,
670  n_behind,
671  buff_m1,
672  ABL, rs_A, cs_A,
673  y10t, cs_Y,
674  buff_1,
675  a2, rs_A );
678  m_ahead + 1,
679  n_behind,
680  buff_m1,
681  ZBL, rs_Z, cs_Z,
682  a01, rs_A,
683  buff_1,
684  a2, rs_A );
685 
686  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
687  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
690  n_ahead,
691  n_behind,
692  buff_m1,
693  Y20, rs_Y, cs_Y,
694  a10t, cs_A,
695  buff_1,
696  a12t, cs_A );
699  m_behind,
700  n_ahead,
701  buff_m1,
702  A02, rs_A, cs_A,
703  z10t, cs_Z,
704  buff_1,
705  a12t, cs_A );
706 
707  if ( m_behind > 0 )
708  {
709  // FLA_Copy( last_elem, a01_b );
710  *a01_b = last_elem;
711  }
712 
713  // FLA_Househ2_UT( FLA_LEFT,
714  // alpha11,
715  // a21, tau11 );
716  // FLA_Copy( a21, u21 );
717  FLA_Househ2_UT_l_opd( m_ahead,
718  alpha11,
719  a21, rs_A,
720  tau11 );
722  m_ahead,
723  a21, rs_A,
724  u21, inc_u );
725 
726  if ( n_ahead > 0 )
727  {
728  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
729  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
731  n_ahead,
732  a12t, cs_A,
733  y21, rs_Y );
736  m_ahead,
737  n_ahead,
738  buff_1,
739  A22, rs_A, cs_A,
740  u21, inc_u,
741  buff_1,
742  y21, rs_Y );
743 
744  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
745  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
748  m_ahead,
749  n_behind,
750  buff_1,
751  A20, rs_A, cs_A,
752  u21, inc_u,
753  buff_0,
754  d0, inc_d );
757  m_ahead,
758  n_behind,
759  buff_1,
760  Z20, rs_Z, cs_Z,
761  u21, inc_u,
762  buff_0,
763  e0, inc_e );
764 
765  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
766  // FLA_Axpy( FLA_ONE, d0, t01 );
768  n_behind,
769  a10t, cs_A,
770  t01, rs_T );
772  n_behind,
773  buff_1,
774  d0, inc_d,
775  t01, rs_T );
776 
777  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
778  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
781  n_ahead,
782  n_behind,
783  buff_m1,
784  Y20, rs_Y, cs_Y,
785  d0, inc_d,
786  buff_1,
787  y21, rs_Y );
790  m_behind,
791  n_ahead,
792  buff_m1,
793  A02, rs_A, cs_A,
794  e0, inc_e,
795  buff_1,
796  y21, rs_Y );
797 
798  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
800  n_ahead,
801  tau11,
802  y21, rs_Y );
803 
804  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
806  n_ahead,
807  buff_m1,
808  y21, rs_Y,
809  a12t, cs_A );
810 
811  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
812  FLA_Househ2_UT_r_opd( n_ahead - 1,
813  a12t_l,
814  a12t_r, cs_A,
815  sigma11 );
816 
817  // FLA_Set( FLA_ONE, v21_t );
818  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
819  *v21_t = *buff_1;
821  n_ahead - 1,
822  a12t_r, cs_A,
823  v21_b, inc_v );
824 
825  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
826  // FLA_Scal( FLA_MINUS_ONE, beta );
828  n_ahead,
829  y21, rs_Y,
830  v21, inc_v,
831  &beta );
832  bl1_dscals( buff_m1, &beta );
833 
834  // FLA_Copy( u21, z21 );
835  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
837  m_ahead,
838  u21, inc_u,
839  z21, rs_Z );
842  m_ahead,
843  n_ahead,
844  buff_1,
845  A22, rs_A, cs_A,
846  v21, inc_v,
847  &beta,
848  z21, rs_Z );
849 
850  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
851  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
854  n_ahead,
855  m_behind,
856  buff_1,
857  Y20, rs_Y, cs_Y,
858  v21, inc_v,
859  buff_0,
860  f0, inc_f );
863  m_behind,
864  n_ahead,
865  buff_1,
866  A02, rs_A, cs_A,
867  v21, inc_v,
868  buff_0,
869  g0, inc_g );
870 
871  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
872  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
875  m_ahead,
876  n_behind,
877  buff_m1,
878  A20, rs_A, cs_A,
879  f0, inc_f,
880  buff_1,
881  z21, rs_Z );
884  m_ahead,
885  n_behind,
886  buff_m1,
887  Z20, rs_Z, cs_Z,
888  g0, inc_g,
889  buff_1,
890  z21, rs_Z );
891 
892  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
894  m_ahead,
895  sigma11,
896  z21, rs_Z );
897 
898  // FLA_Copy( g0, s01 );
900  n_behind,
901  g0, inc_g,
902  s01, rs_S );
903  }
904  else // if ( n_ahead == 0 )
905  {
906  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
907  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
909  n_behind,
910  a10t, cs_A,
911  t01, rs_T );
914  m_ahead,
915  n_behind,
916  buff_1,
917  A20, rs_A, cs_A,
918  u21, inc_u,
919  buff_1,
920  t01, rs_T );
921  }
922 
923  /*------------------------------------------------------------*/
924 
925  }
926 
927  // FLA_Obj_free( &u );
928  // FLA_Obj_free( &v );
929  // FLA_Obj_free( &d );
930  // FLA_Obj_free( &e );
931  // FLA_Obj_free( &f );
932  // FLA_Obj_free( &g );
933  FLA_free( buff_u );
934  FLA_free( buff_v );
935  FLA_free( buff_d );
936  FLA_free( buff_e );
937  FLA_free( buff_f );
938  FLA_free( buff_g );
939 
940  return FLA_SUCCESS;
941 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:664
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
int i
Definition: bl1_axmyv2.c:145
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ops_var5()

FLA_Error FLA_Bidiag_UT_u_step_ops_var5 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

163 {
164  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
166  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
167 
168  float beta;
169  float last_elem;
170  int i;
171 
172  // b_alg = FLA_Obj_length( T );
173  int b_alg = m_TS;
174 
175  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
176  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
177  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
178  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
179  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
180  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
181  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
182  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
183  float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
184  float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
185  float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
186  float* buff_g = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
187  int inc_u = 1;
188  int inc_v = 1;
189  int inc_d = 1;
190  int inc_e = 1;
191  int inc_f = 1;
192  int inc_g = 1;
193 
194  // FLA_Set( FLA_ZERO, Y );
195  // FLA_Set( FLA_ZERO, Z );
196  bl1_ssetm( n_A,
197  b_alg,
198  buff_0,
199  buff_Y, rs_Y, cs_Y );
200  bl1_ssetm( m_A,
201  b_alg,
202  buff_0,
203  buff_Z, rs_Z, cs_Z );
204 
205  for ( i = 0; i < b_alg; ++i )
206  {
207  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
208  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
209  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
210  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
211  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
212  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
213  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
214  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
215 
216  float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
217  float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
218  float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
219 
220  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
221  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
222  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
223 
224  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
225  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
226 
227  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
228  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
229 
230  float* u21 = buff_u + (i+1)*inc_u;
231 
232  float* v21 = buff_v + (i+1)*inc_v;
233 
234  float* d0 = buff_d + (0 )*inc_d;
235 
236  float* e0 = buff_e + (0 )*inc_e;
237 
238  float* f0 = buff_f + (0 )*inc_f;
239 
240  float* g0 = buff_g + (0 )*inc_g;
241 
242  float* v21_t = v21 + (0 )*inc_v;
243  float* v21_b = v21 + (1 )*inc_v;
244 
245  float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
246 
247  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
248  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
249 
250  float* ABL = a10t;
251  float* ZBL = z10t;
252 
253  float* a2 = alpha11;
254 
255  int m_ahead = m_A - i - 1;
256  int n_ahead = n_A - i - 1;
257  int m_behind = i;
258  int n_behind = i;
259 
260  /*------------------------------------------------------------*/
261 
262  if ( m_behind > 0 )
263  {
264  // FLA_Copy( a01_b, last_elem );
265  // FLA_Set( FLA_ONE, a01_b );
266  last_elem = *a01_b;
267  *a01_b = *buff_1;
268  }
269 
270  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
271  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
274  m_ahead + 1,
275  n_behind,
276  buff_m1,
277  ABL, rs_A, cs_A,
278  y10t, cs_Y,
279  buff_1,
280  a2, rs_A );
283  m_ahead + 1,
284  n_behind,
285  buff_m1,
286  ZBL, rs_Z, cs_Z,
287  a01, rs_A,
288  buff_1,
289  a2, rs_A );
290 
291  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
292  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
295  n_ahead,
296  n_behind,
297  buff_m1,
298  Y20, rs_Y, cs_Y,
299  a10t, cs_A,
300  buff_1,
301  a12t, cs_A );
304  m_behind,
305  n_ahead,
306  buff_m1,
307  A02, rs_A, cs_A,
308  z10t, cs_Z,
309  buff_1,
310  a12t, cs_A );
311 
312  if ( m_behind > 0 )
313  {
314  // FLA_Copy( last_elem, a01_b );
315  *a01_b = last_elem;
316  }
317 
318  // FLA_Househ2_UT( FLA_LEFT,
319  // alpha11,
320  // a21, tau11 );
321  // FLA_Copy( a21, u21 );
322  FLA_Househ2_UT_l_ops( m_ahead,
323  alpha11,
324  a21, rs_A,
325  tau11 );
327  m_ahead,
328  a21, rs_A,
329  u21, inc_u );
330 
331  if ( n_ahead > 0 )
332  {
333  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
334  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
336  n_ahead,
337  a12t, cs_A,
338  y21, rs_Y );
341  m_ahead,
342  n_ahead,
343  buff_1,
344  A22, rs_A, cs_A,
345  u21, inc_u,
346  buff_1,
347  y21, rs_Y );
348 
349  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
350  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
353  m_ahead,
354  n_behind,
355  buff_1,
356  A20, rs_A, cs_A,
357  u21, inc_u,
358  buff_0,
359  d0, inc_d );
362  m_ahead,
363  n_behind,
364  buff_1,
365  Z20, rs_Z, cs_Z,
366  u21, inc_u,
367  buff_0,
368  e0, inc_e );
369 
370  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
371  // FLA_Axpy( FLA_ONE, d0, t01 );
373  n_behind,
374  a10t, cs_A,
375  t01, rs_T );
377  n_behind,
378  buff_1,
379  d0, inc_d,
380  t01, rs_T );
381 
382  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
383  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
386  n_ahead,
387  n_behind,
388  buff_m1,
389  Y20, rs_Y, cs_Y,
390  d0, inc_d,
391  buff_1,
392  y21, rs_Y );
395  m_behind,
396  n_ahead,
397  buff_m1,
398  A02, rs_A, cs_A,
399  e0, inc_e,
400  buff_1,
401  y21, rs_Y );
402 
403  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
405  n_ahead,
406  tau11,
407  y21, rs_Y );
408 
409  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
411  n_ahead,
412  buff_m1,
413  y21, rs_Y,
414  a12t, cs_A );
415 
416  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
417  FLA_Househ2_UT_r_ops( n_ahead - 1,
418  a12t_l,
419  a12t_r, cs_A,
420  sigma11 );
421 
422  // FLA_Set( FLA_ONE, v21_t );
423  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
424  *v21_t = *buff_1;
426  n_ahead - 1,
427  a12t_r, cs_A,
428  v21_b, inc_v );
429 
430  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
431  // FLA_Scal( FLA_MINUS_ONE, beta );
433  n_ahead,
434  y21, rs_Y,
435  v21, inc_v,
436  &beta );
437  bl1_sscals( buff_m1, &beta );
438 
439  // FLA_Copy( u21, z21 );
440  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
442  m_ahead,
443  u21, inc_u,
444  z21, rs_Z );
447  m_ahead,
448  n_ahead,
449  buff_1,
450  A22, rs_A, cs_A,
451  v21, inc_v,
452  &beta,
453  z21, rs_Z );
454 
455  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
456  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
459  n_ahead,
460  m_behind,
461  buff_1,
462  Y20, rs_Y, cs_Y,
463  v21, inc_v,
464  buff_0,
465  f0, inc_f );
468  m_behind,
469  n_ahead,
470  buff_1,
471  A02, rs_A, cs_A,
472  v21, inc_v,
473  buff_0,
474  g0, inc_g );
475 
476  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
477  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
480  m_ahead,
481  n_behind,
482  buff_m1,
483  A20, rs_A, cs_A,
484  f0, inc_f,
485  buff_1,
486  z21, rs_Z );
489  m_ahead,
490  n_behind,
491  buff_m1,
492  Z20, rs_Z, cs_Z,
493  g0, inc_g,
494  buff_1,
495  z21, rs_Z );
496 
497  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
499  m_ahead,
500  sigma11,
501  z21, rs_Z );
502 
503  // FLA_Copy( g0, s01 );
505  n_behind,
506  g0, inc_g,
507  s01, rs_S );
508  }
509  else // if ( n_ahead == 0 )
510  {
511  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
512  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
514  n_behind,
515  a10t, cs_A,
516  t01, rs_T );
519  m_ahead,
520  n_behind,
521  buff_1,
522  A20, rs_A, cs_A,
523  u21, inc_u,
524  buff_1,
525  t01, rs_T );
526  }
527 
528  /*------------------------------------------------------------*/
529 
530  }
531 
532  // FLA_Obj_free( &u );
533  // FLA_Obj_free( &v );
534  // FLA_Obj_free( &d );
535  // FLA_Obj_free( &e );
536  // FLA_Obj_free( &f );
537  // FLA_Obj_free( &g );
538  FLA_free( buff_u );
539  FLA_free( buff_v );
540  FLA_free( buff_d );
541  FLA_free( buff_e );
542  FLA_free( buff_f );
543  FLA_free( buff_g );
544 
545  return FLA_SUCCESS;
546 }
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:651
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

◆ FLA_Bidiag_UT_u_step_opt_var5()

FLA_Error FLA_Bidiag_UT_u_step_opt_var5 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opc_var5(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var5(), and FLA_Bidiag_UT_u_opt_var5().

36 {
37  FLA_Datatype datatype;
38  int m_A, n_A, m_TS;
39  int rs_A, cs_A;
40  int rs_Y, cs_Y;
41  int rs_Z, cs_Z;
42  int rs_T, cs_T;
43  int rs_S, cs_S;
44 
45  datatype = FLA_Obj_datatype( A );
46 
47  m_A = FLA_Obj_length( A );
48  n_A = FLA_Obj_width( A );
49  m_TS = FLA_Obj_length( T );
50 
51  rs_A = FLA_Obj_row_stride( A );
52  cs_A = FLA_Obj_col_stride( A );
53 
54  rs_Y = FLA_Obj_row_stride( Y );
55  cs_Y = FLA_Obj_col_stride( Y );
56 
57  rs_Z = FLA_Obj_row_stride( Z );
58  cs_Z = FLA_Obj_col_stride( Z );
59 
60  rs_T = FLA_Obj_row_stride( T );
61  cs_T = FLA_Obj_col_stride( T );
62 
63  rs_S = FLA_Obj_row_stride( S );
64  cs_S = FLA_Obj_col_stride( S );
65 
66 
67  switch ( datatype )
68  {
69  case FLA_FLOAT:
70  {
71  float* buff_A = FLA_FLOAT_PTR( A );
72  float* buff_Y = FLA_FLOAT_PTR( Y );
73  float* buff_Z = FLA_FLOAT_PTR( Z );
74  float* buff_T = FLA_FLOAT_PTR( T );
75  float* buff_S = FLA_FLOAT_PTR( S );
76 
78  n_A,
79  m_TS,
80  buff_A, rs_A, cs_A,
81  buff_Y, rs_Y, cs_Y,
82  buff_Z, rs_Z, cs_Z,
83  buff_T, rs_T, cs_T,
84  buff_S, rs_S, cs_S );
85 
86  break;
87  }
88 
89  case FLA_DOUBLE:
90  {
91  double* buff_A = FLA_DOUBLE_PTR( A );
92  double* buff_Y = FLA_DOUBLE_PTR( Y );
93  double* buff_Z = FLA_DOUBLE_PTR( Z );
94  double* buff_T = FLA_DOUBLE_PTR( T );
95  double* buff_S = FLA_DOUBLE_PTR( S );
96 
98  n_A,
99  m_TS,
100  buff_A, rs_A, cs_A,
101  buff_Y, rs_Y, cs_Y,
102  buff_Z, rs_Z, cs_Z,
103  buff_T, rs_T, cs_T,
104  buff_S, rs_S, cs_S );
105 
106  break;
107  }
108 
109  case FLA_COMPLEX:
110  {
111  scomplex* buff_A = FLA_COMPLEX_PTR( A );
112  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
113  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
114  scomplex* buff_T = FLA_COMPLEX_PTR( T );
115  scomplex* buff_S = FLA_COMPLEX_PTR( S );
116 
118  n_A,
119  m_TS,
120  buff_A, rs_A, cs_A,
121  buff_Y, rs_Y, cs_Y,
122  buff_Z, rs_Z, cs_Z,
123  buff_T, rs_T, cs_T,
124  buff_S, rs_S, cs_S );
125 
126  break;
127  }
128 
129  case FLA_DOUBLE_COMPLEX:
130  {
131  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
132  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
133  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
134  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
135  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
136 
138  n_A,
139  m_TS,
140  buff_A, rs_A, cs_A,
141  buff_Y, rs_Y, cs_Y,
142  buff_Z, rs_Z, cs_Z,
143  buff_T, rs_T, cs_T,
144  buff_S, rs_S, cs_S );
145 
146  break;
147  }
148  }
149 
150  return FLA_SUCCESS;
151 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Bidiag_UT_u_step_ops_var5(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:155
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Bidiag_UT_u_step_opz_var5(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:1340
FLA_Error FLA_Bidiag_UT_u_step_opd_var5(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:550
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Bidiag_UT_u_step_opc_var5(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:945
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_opz_var5()

FLA_Error FLA_Bidiag_UT_u_step_opz_var5 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

1348 {
1349  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1350  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1351  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1352 
1353  dcomplex beta;
1354  dcomplex last_elem;
1355  int i;
1356 
1357  // b_alg = FLA_Obj_length( T );
1358  int b_alg = m_TS;
1359 
1360  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1361  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1362  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1363  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1364  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1365  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1366  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1367  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1368  dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1369  dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1370  dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1371  dcomplex* buff_g = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1372  int inc_u = 1;
1373  int inc_v = 1;
1374  int inc_d = 1;
1375  int inc_e = 1;
1376  int inc_f = 1;
1377  int inc_g = 1;
1378 
1379  // FLA_Set( FLA_ZERO, Y );
1380  // FLA_Set( FLA_ZERO, Z );
1381  bl1_zsetm( n_A,
1382  b_alg,
1383  buff_0,
1384  buff_Y, rs_Y, cs_Y );
1385  bl1_zsetm( m_A,
1386  b_alg,
1387  buff_0,
1388  buff_Z, rs_Z, cs_Z );
1389 
1390  for ( i = 0; i < b_alg; ++i )
1391  {
1392  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1393  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1394  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1395  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1396  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1397  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1398  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1399  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1400 
1401  dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1402  dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1403  dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1404 
1405  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1406  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1407  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1408 
1409  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1410  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1411 
1412  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1413  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1414 
1415  dcomplex* u21 = buff_u + (i+1)*inc_u;
1416 
1417  dcomplex* v21 = buff_v + (i+1)*inc_v;
1418 
1419  dcomplex* d0 = buff_d + (0 )*inc_d;
1420 
1421  dcomplex* e0 = buff_e + (0 )*inc_e;
1422 
1423  dcomplex* f0 = buff_f + (0 )*inc_f;
1424 
1425  dcomplex* g0 = buff_g + (0 )*inc_g;
1426 
1427  dcomplex* v21_t = v21 + (0 )*inc_v;
1428  dcomplex* v21_b = v21 + (1 )*inc_v;
1429 
1430  dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1431 
1432  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1433  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1434 
1435  dcomplex* ABL = a10t;
1436  dcomplex* ZBL = z10t;
1437 
1438  dcomplex* a2 = alpha11;
1439 
1440  int m_ahead = m_A - i - 1;
1441  int n_ahead = n_A - i - 1;
1442  int m_behind = i;
1443  int n_behind = i;
1444 
1445  /*------------------------------------------------------------*/
1446 
1447  if ( m_behind > 0 )
1448  {
1449  // FLA_Copy( a01_b, last_elem );
1450  // FLA_Set( FLA_ONE, a01_b );
1451  last_elem = *a01_b;
1452  *a01_b = *buff_1;
1453  }
1454 
1455  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1456  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1459  m_ahead + 1,
1460  n_behind,
1461  buff_m1,
1462  ABL, rs_A, cs_A,
1463  y10t, cs_Y,
1464  buff_1,
1465  a2, rs_A );
1468  m_ahead + 1,
1469  n_behind,
1470  buff_m1,
1471  ZBL, rs_Z, cs_Z,
1472  a01, rs_A,
1473  buff_1,
1474  a2, rs_A );
1475 
1476  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1477  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1480  n_ahead,
1481  n_behind,
1482  buff_m1,
1483  Y20, rs_Y, cs_Y,
1484  a10t, cs_A,
1485  buff_1,
1486  a12t, cs_A );
1489  m_behind,
1490  n_ahead,
1491  buff_m1,
1492  A02, rs_A, cs_A,
1493  z10t, cs_Z,
1494  buff_1,
1495  a12t, cs_A );
1496 
1497  if ( m_behind > 0 )
1498  {
1499  // FLA_Copy( last_elem, a01_b );
1500  *a01_b = last_elem;
1501  }
1502 
1503  // FLA_Househ2_UT( FLA_LEFT,
1504  // alpha11,
1505  // a21, tau11 );
1506  // FLA_Copy( a21, u21 );
1507  FLA_Househ2_UT_l_opz( m_ahead,
1508  alpha11,
1509  a21, rs_A,
1510  tau11 );
1512  m_ahead,
1513  a21, rs_A,
1514  u21, inc_u );
1515 
1516  if ( n_ahead > 0 )
1517  {
1518  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
1519  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
1521  n_ahead,
1522  a12t, cs_A,
1523  y21, rs_Y );
1526  m_ahead,
1527  n_ahead,
1528  buff_1,
1529  A22, rs_A, cs_A,
1530  u21, inc_u,
1531  buff_1,
1532  y21, rs_Y );
1533 
1534  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
1535  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
1538  m_ahead,
1539  n_behind,
1540  buff_1,
1541  A20, rs_A, cs_A,
1542  u21, inc_u,
1543  buff_0,
1544  d0, inc_d );
1547  m_ahead,
1548  n_behind,
1549  buff_1,
1550  Z20, rs_Z, cs_Z,
1551  u21, inc_u,
1552  buff_0,
1553  e0, inc_e );
1554 
1555  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1556  // FLA_Axpy( FLA_ONE, d0, t01 );
1558  n_behind,
1559  a10t, cs_A,
1560  t01, rs_T );
1562  n_behind,
1563  buff_1,
1564  d0, inc_d,
1565  t01, rs_T );
1566 
1567  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1568  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1571  n_ahead,
1572  n_behind,
1573  buff_m1,
1574  Y20, rs_Y, cs_Y,
1575  d0, inc_d,
1576  buff_1,
1577  y21, rs_Y );
1580  m_behind,
1581  n_ahead,
1582  buff_m1,
1583  A02, rs_A, cs_A,
1584  e0, inc_e,
1585  buff_1,
1586  y21, rs_Y );
1587 
1588  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1590  n_ahead,
1591  tau11,
1592  y21, rs_Y );
1593 
1594  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
1596  n_ahead,
1597  buff_m1,
1598  y21, rs_Y,
1599  a12t, cs_A );
1600 
1601  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
1602  FLA_Househ2_UT_r_opz( n_ahead - 1,
1603  a12t_l,
1604  a12t_r, cs_A,
1605  sigma11 );
1606 
1607  // FLA_Set( FLA_ONE, v21_t );
1608  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
1609  *v21_t = *buff_1;
1611  n_ahead - 1,
1612  a12t_r, cs_A,
1613  v21_b, inc_v );
1614 
1615  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1616  // FLA_Scal( FLA_MINUS_ONE, beta );
1618  n_ahead,
1619  y21, rs_Y,
1620  v21, inc_v,
1621  &beta );
1622  bl1_zscals( buff_m1, &beta );
1623 
1624  // FLA_Copy( u21, z21 );
1625  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
1627  m_ahead,
1628  u21, inc_u,
1629  z21, rs_Z );
1632  m_ahead,
1633  n_ahead,
1634  buff_1,
1635  A22, rs_A, cs_A,
1636  v21, inc_v,
1637  &beta,
1638  z21, rs_Z );
1639 
1640  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
1641  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
1644  n_ahead,
1645  m_behind,
1646  buff_1,
1647  Y20, rs_Y, cs_Y,
1648  v21, inc_v,
1649  buff_0,
1650  f0, inc_f );
1653  m_behind,
1654  n_ahead,
1655  buff_1,
1656  A02, rs_A, cs_A,
1657  v21, inc_v,
1658  buff_0,
1659  g0, inc_g );
1660 
1661  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
1662  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
1665  m_ahead,
1666  n_behind,
1667  buff_m1,
1668  A20, rs_A, cs_A,
1669  f0, inc_f,
1670  buff_1,
1671  z21, rs_Z );
1674  m_ahead,
1675  n_behind,
1676  buff_m1,
1677  Z20, rs_Z, cs_Z,
1678  g0, inc_g,
1679  buff_1,
1680  z21, rs_Z );
1681 
1682  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1684  m_ahead,
1685  sigma11,
1686  z21, rs_Z );
1687 
1688  // FLA_Copy( g0, s01 );
1690  n_behind,
1691  g0, inc_g,
1692  s01, rs_S );
1693  }
1694  else // if ( n_ahead == 0 )
1695  {
1696  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1697  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1699  n_behind,
1700  a10t, cs_A,
1701  t01, rs_T );
1704  m_ahead,
1705  n_behind,
1706  buff_1,
1707  A20, rs_A, cs_A,
1708  u21, inc_u,
1709  buff_1,
1710  t01, rs_T );
1711  }
1712 
1713  /*------------------------------------------------------------*/
1714 
1715  }
1716 
1717  // FLA_Obj_free( &u );
1718  // FLA_Obj_free( &v );
1719  // FLA_Obj_free( &d );
1720  // FLA_Obj_free( &e );
1721  // FLA_Obj_free( &f );
1722  // FLA_Obj_free( &g );
1723  FLA_free( buff_u );
1724  FLA_free( buff_v );
1725  FLA_free( buff_d );
1726  FLA_free( buff_e );
1727  FLA_free( buff_f );
1728  FLA_free( buff_g );
1729 
1730  return FLA_SUCCESS;
1731 }
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:693
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60