libflame  revision_anchor
Functions
FLA_Bidiag_UT_u_fus_var4.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_ofu_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var4 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var4 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var4 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var4 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_ofu_var4()

FLA_Error FLA_Bidiag_UT_u_ofu_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_ofu_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17  FLA_Datatype datatype_A;
18  dim_t m_A, n_A;
19 
20  datatype_A = FLA_Obj_datatype( A );
21  m_A = FLA_Obj_length( A );
22  n_A = FLA_Obj_width( A );
23 
24  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26 
27  r_val = FLA_Bidiag_UT_u_step_ofu_var4( A, Y, Z, TU, TV );
28 
29  FLA_Obj_free( &Y );
30  FLA_Obj_free( &Z );
31 
32  return r_val;
33 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:35
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLA_Bidiag_UT_u_step_ofc_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var4 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), bl1_csetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

1103 {
1104  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
1105  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
1106  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
1107 
1108  scomplex alpha12;
1109  scomplex minus_conj_alpha12;
1110  scomplex psi11_minus_alpha12;
1111  scomplex minus_inv_tau11;
1112  scomplex beta;
1113  scomplex last_elem;
1114  int i;
1115 
1116  // b_alg = FLA_Obj_length( T );
1117  int b_alg = m_TS;
1118 
1119  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1120  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1121  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1122  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1123  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1124  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1125  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1126  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1127  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1128  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1129  scomplex* buff_tmp = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1130  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1131  scomplex* buff_al = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1132  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1133  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1134  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1135  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1136  scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1137  scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1138  int inc_tmp = 1;
1139  int inc_w = 1;
1140  int inc_al = 1;
1141  int inc_ap = 1;
1142  int inc_u = 1;
1143  int inc_up = 1;
1144  int inc_v = 1;
1145  int inc_d = 1;
1146  int inc_e = 1;
1147 
1148  // FLA_Set( FLA_ZERO, Y );
1149  // FLA_Set( FLA_ZERO, Z );
1150  bl1_csetm( n_A,
1151  b_alg,
1152  buff_0,
1153  buff_Y, rs_Y, cs_Y );
1154  bl1_csetm( m_A,
1155  b_alg,
1156  buff_0,
1157  buff_Z, rs_Z, cs_Z );
1158 
1159  for ( i = 0; i < b_alg; ++i )
1160  {
1161  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1162  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1163  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1164  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1165  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1166  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1167  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1168  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1169 
1170  scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1171  scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1172  scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1173 
1174  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1175  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1176  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1177 
1178  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1179  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1180 
1181  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1182  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1183 
1184  scomplex* tmp21 = buff_tmp + (i+1)*inc_tmp;
1185 
1186  scomplex* w21 = buff_w + (i+1)*inc_w;
1187 
1188  scomplex* a22l = buff_al + (i+1)*inc_al;
1189 
1190  scomplex* a12p = buff_ap + (i+1)*inc_ap;
1191 
1192  scomplex* u21 = buff_u + (i+1)*inc_u;
1193 
1194  scomplex* u21p = buff_up + (i+1)*inc_up;
1195 
1196  scomplex* v21 = buff_v + (i+1)*inc_v;
1197 
1198  scomplex* d0 = buff_d + (0 )*inc_d;
1199 
1200  scomplex* e0 = buff_e + (0 )*inc_e;
1201 
1202  scomplex* a12p_t = a12p + (0 )*inc_ap;
1203  scomplex* a12p_b = a12p + (1 )*inc_ap;
1204 
1205  scomplex* v21_t = v21 + (0 )*inc_v;
1206  scomplex* v21_b = v21 + (1 )*inc_v;
1207 
1208  scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1209 
1210  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1211  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1212 
1213  scomplex* ABL = a10t;
1214  scomplex* ZBL = z10t;
1215 
1216  scomplex* a2 = alpha11;
1217 
1218  int m_ahead = m_A - i - 1;
1219  int n_ahead = n_A - i - 1;
1220  int m_behind = i;
1221  int n_behind = i;
1222 
1223  /*------------------------------------------------------------*/
1224 
1225  if ( m_behind > 0 )
1226  {
1227  // FLA_Copy( a01_b, last_elem );
1228  // FLA_Set( FLA_ONE, a01_b );
1229  last_elem = *a01_b;
1230  *a01_b = *buff_1;
1231  }
1232 
1233  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1234  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1237  m_ahead + 1,
1238  n_behind,
1239  buff_m1,
1240  ABL, rs_A, cs_A,
1241  y10t, cs_Y,
1242  buff_1,
1243  a2, rs_A );
1246  m_ahead + 1,
1247  n_behind,
1248  buff_m1,
1249  ZBL, rs_Z, cs_Z,
1250  a01, rs_A,
1251  buff_1,
1252  a2, rs_A );
1253 
1254  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1255  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1258  n_ahead,
1259  n_behind,
1260  buff_m1,
1261  Y20, rs_Y, cs_Y,
1262  a10t, cs_A,
1263  buff_1,
1264  a12t, cs_A );
1267  m_behind,
1268  n_ahead,
1269  buff_m1,
1270  A02, rs_A, cs_A,
1271  z10t, cs_Z,
1272  buff_1,
1273  a12t, cs_A );
1274 
1275  if ( m_behind > 0 )
1276  {
1277  // FLA_Copy( last_elem, a01_b );
1278  *a01_b = last_elem;
1279  }
1280 
1281  // FLA_Househ2_UT( FLA_LEFT,
1282  // alpha11,
1283  // a21, tau11 );
1284  // FLA_Copy( a21, u21p );
1285  FLA_Househ2_UT_l_opc( m_ahead,
1286  alpha11,
1287  a21, rs_A,
1288  tau11 );
1290  m_ahead,
1291  a21, rs_A,
1292  u21p, inc_up );
1293 
1294  if ( n_ahead > 0 )
1295  {
1296  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1297  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1298  bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );
1299 
1300  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1301  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1303  n_ahead,
1304  a12t, cs_A,
1305  a12p, inc_ap );
1307  n_ahead,
1308  &minus_inv_tau11,
1309  a12t, cs_A,
1310  a12p, inc_ap );
1311 
1312  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1313  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1316  m_ahead,
1317  n_behind,
1318  buff_1,
1319  A20, rs_A, cs_A,
1320  u21p, inc_up,
1321  buff_0,
1322  d0, inc_d );
1325  m_ahead,
1326  n_behind,
1327  buff_1,
1328  Z20, rs_Z, cs_Z,
1329  u21p, inc_up,
1330  buff_0,
1331  e0, inc_e );
1332 
1333  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1334  // FLA_Axpy( FLA_ONE, d0, t01 );
1336  n_behind,
1337  a10t, cs_A,
1338  t01, rs_T );
1340  n_behind,
1341  buff_1,
1342  d0, inc_d,
1343  t01, rs_T );
1344 
1345  // FLA_Set( FLA_ZERO, y21 );
1346  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1347  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1348  bl1_csetv( n_ahead,
1349  buff_0,
1350  y21, rs_Y );
1353  n_ahead,
1354  n_behind,
1355  buff_m1,
1356  Y20, rs_Y, cs_Y,
1357  d0, inc_d,
1358  buff_1,
1359  y21, rs_Y );
1362  m_behind,
1363  n_ahead,
1364  buff_m1,
1365  A02, rs_A, cs_A,
1366  e0, inc_e,
1367  buff_1,
1368  y21, rs_Y );
1369 
1370  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1371  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1372  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1374  n_ahead,
1375  tau11,
1376  buff_1,
1377  A22, rs_A, cs_A,
1378  u21p, inc_up,
1379  a12p, inc_ap,
1380  y21, rs_Y,
1381  w21, inc_w );
1382 
1383  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1384  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1385  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1386  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1387  // FLA_Copy( A22_l, a22l );
1388  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1389  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1390  // FLA_Copy( g0, s01 );
1391  FLA_Fused_UYx_ZVx_opc_var1( m_ahead,
1392  n_behind,
1393  m_behind,
1394  n_ahead,
1395  buff_m1,
1396  A20, rs_A, cs_A,
1397  Y20, rs_Y, cs_Y,
1398  Z20, rs_Z, cs_Z,
1399  A02, rs_A, cs_A,
1400  A22, rs_A, cs_A,
1401  tmp21, inc_tmp,
1402  s01, rs_S,
1403  a12p, inc_ap,
1404  w21, inc_w,
1405  a22l, inc_al );
1406 
1407  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1409  n_ahead,
1410  buff_1,
1411  a12t, cs_A,
1412  y21, rs_Y );
1413 
1414  // FLA_Househ2s_UT( FLA_RIGHT,
1415  // a12p_t,
1416  // a12p_b,
1417  // alpha12, psi11_minus_alpha12, sigma11 );
1418  FLA_Househ2s_UT_r_opc( n_ahead - 1,
1419  a12p_t,
1420  a12p_b, inc_ap,
1421  &alpha12,
1422  &psi11_minus_alpha12,
1423  sigma11 );
1424 
1425  // FLA_Copy( a12p, v21 );
1426  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1427  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1428  // FLA_Conjugate( v21_b );
1430  n_ahead,
1431  a12p, inc_ap,
1432  v21, inc_v );
1433  bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
1435  n_ahead,
1436  &psi11_minus_alpha12,
1437  v21, inc_v );
1438  bl1_cconjv( n_ahead - 1,
1439  v21_b, inc_v );
1440 
1441  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1442  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1443  bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
1444  bl1_cneg1( &minus_conj_alpha12 );
1445 
1446  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1447  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1449  n_behind,
1450  &minus_conj_alpha12,
1451  A02, rs_A,
1452  s01, rs_S );
1454  n_behind,
1455  &psi11_minus_alpha12,
1456  s01, rs_S );
1457 
1458  // FLA_Copy( alpha12, a12t_l );
1459  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1460  *a12t_l = alpha12;
1462  n_ahead - 1,
1463  v21_b, inc_v,
1464  a12t_r, cs_A );
1465  }
1466 
1467  // FLA_Copy( u21p, u21 );
1469  m_ahead,
1470  u21p, inc_up,
1471  u21, inc_u );
1472 
1473  if ( n_ahead > 0 )
1474  {
1475  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1476  // FLA_Scal( FLA_MINUS_ONE, beta );
1477  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1479  n_ahead,
1480  y21, rs_Y,
1481  v21, inc_v,
1482  &beta );
1483  bl1_cscals( &minus_inv_tau11, &beta );
1484 
1485  // FLA_Copy( w21, z21 );
1486  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1487  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1488  // FLA_Axpy( beta, u21, z21 );
1490  m_ahead,
1491  w21, inc_w,
1492  z21, rs_Z );
1494  m_ahead,
1495  &minus_conj_alpha12,
1496  a22l, inc_al,
1497  z21, rs_Z );
1499  m_ahead,
1500  &psi11_minus_alpha12,
1501  z21, rs_Z );
1503  m_ahead,
1504  &beta,
1505  u21, inc_u,
1506  z21, rs_Z );
1507 
1508  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1509  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1511  n_ahead,
1512  tau11,
1513  y21, rs_Y );
1515  m_ahead,
1516  sigma11,
1517  z21, rs_Z );
1518  }
1519  else // if ( n_ahead == 0 )
1520  {
1521  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1522  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1524  n_behind,
1525  a10t, cs_A,
1526  t01, rs_T );
1529  m_ahead,
1530  n_behind,
1531  buff_1,
1532  A20, rs_A, cs_A,
1533  u21, inc_u,
1534  buff_1,
1535  t01, rs_T );
1536  }
1537 
1538  /*------------------------------------------------------------*/
1539 
1540  }
1541 
1542  // FLA_Obj_free( &w );
1543  // FLA_Obj_free( &al );
1544  // FLA_Obj_free( &ap );
1545  // FLA_Obj_free( &u );
1546  // FLA_Obj_free( &up );
1547  // FLA_Obj_free( &v );
1548  // FLA_Obj_free( &d );
1549  // FLA_Obj_free( &e );
1550  FLA_free( buff_tmp );
1551  FLA_free( buff_w );
1552  FLA_free( buff_al );
1553  FLA_free( buff_ap );
1554  FLA_free( buff_u );
1555  FLA_free( buff_up );
1556  FLA_free( buff_v );
1557  FLA_free( buff_d );
1558  FLA_free( buff_e );
1559 
1560  return FLA_SUCCESS;
1561 }
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
FLA_Error FLA_Fused_UYx_ZVx_opc_var1(int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:424
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_cconjv(int m, scomplex *x, int incx)
Definition: bl1_conjv.c:23
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition: FLA_Househ2s_UT.c:589
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ofd_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var4 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), bl1_dsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

633 {
634  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
635  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
636  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
637 
638  double alpha12;
639  double minus_conj_alpha12;
640  double psi11_minus_alpha12;
641  double minus_inv_tau11;
642  double beta;
643  double last_elem;
644  int i;
645 
646  // b_alg = FLA_Obj_length( T );
647  int b_alg = m_TS;
648 
649  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
650  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
651  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
652  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
653  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
654  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
655  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
656  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
657  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
658  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
659  double* buff_tmp = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
660  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
661  double* buff_al = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
662  double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
663  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
664  double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
665  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
666  double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
667  double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
668  int inc_tmp = 1;
669  int inc_w = 1;
670  int inc_al = 1;
671  int inc_ap = 1;
672  int inc_u = 1;
673  int inc_up = 1;
674  int inc_v = 1;
675  int inc_d = 1;
676  int inc_e = 1;
677 
678  // FLA_Set( FLA_ZERO, Y );
679  // FLA_Set( FLA_ZERO, Z );
680  bl1_dsetm( n_A,
681  b_alg,
682  buff_0,
683  buff_Y, rs_Y, cs_Y );
684  bl1_dsetm( m_A,
685  b_alg,
686  buff_0,
687  buff_Z, rs_Z, cs_Z );
688 
689  for ( i = 0; i < b_alg; ++i )
690  {
691  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
692  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
693  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
694  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
695  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
696  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
697  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
698  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
699 
700  double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
701  double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
702  double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
703 
704  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
705  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
706  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
707 
708  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
709  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
710 
711  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
712  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
713 
714  double* tmp21 = buff_tmp + (i+1)*inc_tmp;
715 
716  double* w21 = buff_w + (i+1)*inc_w;
717 
718  double* a22l = buff_al + (i+1)*inc_al;
719 
720  double* a12p = buff_ap + (i+1)*inc_ap;
721 
722  double* u21 = buff_u + (i+1)*inc_u;
723 
724  double* u21p = buff_up + (i+1)*inc_up;
725 
726  double* v21 = buff_v + (i+1)*inc_v;
727 
728  double* d0 = buff_d + (0 )*inc_d;
729 
730  double* e0 = buff_e + (0 )*inc_e;
731 
732  double* a12p_t = a12p + (0 )*inc_ap;
733  double* a12p_b = a12p + (1 )*inc_ap;
734 
735  double* v21_t = v21 + (0 )*inc_v;
736  double* v21_b = v21 + (1 )*inc_v;
737 
738  double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
739 
740  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
741  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
742 
743  double* ABL = a10t;
744  double* ZBL = z10t;
745 
746  double* a2 = alpha11;
747 
748  int m_ahead = m_A - i - 1;
749  int n_ahead = n_A - i - 1;
750  int m_behind = i;
751  int n_behind = i;
752 
753  /*------------------------------------------------------------*/
754 
755  if ( m_behind > 0 )
756  {
757  // FLA_Copy( a01_b, last_elem );
758  // FLA_Set( FLA_ONE, a01_b );
759  last_elem = *a01_b;
760  *a01_b = *buff_1;
761  }
762 
763  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
764  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
767  m_ahead + 1,
768  n_behind,
769  buff_m1,
770  ABL, rs_A, cs_A,
771  y10t, cs_Y,
772  buff_1,
773  a2, rs_A );
776  m_ahead + 1,
777  n_behind,
778  buff_m1,
779  ZBL, rs_Z, cs_Z,
780  a01, rs_A,
781  buff_1,
782  a2, rs_A );
783 
784  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
785  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
788  n_ahead,
789  n_behind,
790  buff_m1,
791  Y20, rs_Y, cs_Y,
792  a10t, cs_A,
793  buff_1,
794  a12t, cs_A );
797  m_behind,
798  n_ahead,
799  buff_m1,
800  A02, rs_A, cs_A,
801  z10t, cs_Z,
802  buff_1,
803  a12t, cs_A );
804 
805  if ( m_behind > 0 )
806  {
807  // FLA_Copy( last_elem, a01_b );
808  *a01_b = last_elem;
809  }
810 
811  // FLA_Househ2_UT( FLA_LEFT,
812  // alpha11,
813  // a21, tau11 );
814  // FLA_Copy( a21, u21p );
815  FLA_Househ2_UT_l_opd( m_ahead,
816  alpha11,
817  a21, rs_A,
818  tau11 );
820  m_ahead,
821  a21, rs_A,
822  u21p, inc_up );
823 
824  if ( n_ahead > 0 )
825  {
826  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
827  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
828  bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );
829 
830  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
831  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
833  n_ahead,
834  a12t, cs_A,
835  a12p, inc_ap );
837  n_ahead,
838  &minus_inv_tau11,
839  a12t, cs_A,
840  a12p, inc_ap );
841 
842  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
843  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
846  m_ahead,
847  n_behind,
848  buff_1,
849  A20, rs_A, cs_A,
850  u21p, inc_up,
851  buff_0,
852  d0, inc_d );
855  m_ahead,
856  n_behind,
857  buff_1,
858  Z20, rs_Z, cs_Z,
859  u21p, inc_up,
860  buff_0,
861  e0, inc_e );
862 
863  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
864  // FLA_Axpy( FLA_ONE, d0, t01 );
866  n_behind,
867  a10t, cs_A,
868  t01, rs_T );
870  n_behind,
871  buff_1,
872  d0, inc_d,
873  t01, rs_T );
874 
875  // FLA_Set( FLA_ZERO, y21 );
876  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
877  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
878  bl1_dsetv( n_ahead,
879  buff_0,
880  y21, rs_Y );
883  n_ahead,
884  n_behind,
885  buff_m1,
886  Y20, rs_Y, cs_Y,
887  d0, inc_d,
888  buff_1,
889  y21, rs_Y );
892  m_behind,
893  n_ahead,
894  buff_m1,
895  A02, rs_A, cs_A,
896  e0, inc_e,
897  buff_1,
898  y21, rs_Y );
899 
900  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
901  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
902  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
904  n_ahead,
905  tau11,
906  buff_1,
907  A22, rs_A, cs_A,
908  u21p, inc_up,
909  a12p, inc_ap,
910  y21, rs_Y,
911  w21, inc_w );
912 
913  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
914  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
915  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
916  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
917  // FLA_Copy( A22_l, a22l );
918  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
919  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
920  // FLA_Copy( g0, s01 );
922  n_behind,
923  m_behind,
924  n_ahead,
925  buff_m1,
926  A20, rs_A, cs_A,
927  Y20, rs_Y, cs_Y,
928  Z20, rs_Z, cs_Z,
929  A02, rs_A, cs_A,
930  A22, rs_A, cs_A,
931  tmp21, inc_tmp,
932  s01, rs_S,
933  a12p, inc_ap,
934  w21, inc_w,
935  a22l, inc_al );
936 
937  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
939  n_ahead,
940  buff_1,
941  a12t, cs_A,
942  y21, rs_Y );
943 
944  // FLA_Househ2s_UT( FLA_RIGHT,
945  // a12p_t,
946  // a12p_b,
947  // alpha12, psi11_minus_alpha12, sigma11 );
948  FLA_Househ2s_UT_r_opd( n_ahead - 1,
949  a12p_t,
950  a12p_b, inc_ap,
951  &alpha12,
952  &psi11_minus_alpha12,
953  sigma11 );
954 
955  // FLA_Copy( a12p, v21 );
956  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
957  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
958  // FLA_Conjugate( v21_b );
960  n_ahead,
961  a12p, inc_ap,
962  v21, inc_v );
963  bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
965  n_ahead,
966  &psi11_minus_alpha12,
967  v21, inc_v );
968  bl1_dconjv( n_ahead - 1,
969  v21_b, inc_v );
970 
971  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
972  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
973  bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
974  bl1_dneg1( &minus_conj_alpha12 );
975 
976  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
977  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
979  n_behind,
980  &minus_conj_alpha12,
981  A02, rs_A,
982  s01, rs_S );
984  n_behind,
985  &psi11_minus_alpha12,
986  s01, rs_S );
987 
988  // FLA_Copy( alpha12, a12t_l );
989  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
990  *a12t_l = alpha12;
992  n_ahead - 1,
993  v21_b, inc_v,
994  a12t_r, cs_A );
995  }
996 
997  // FLA_Copy( u21p, u21 );
999  m_ahead,
1000  u21p, inc_up,
1001  u21, inc_u );
1002 
1003  if ( n_ahead > 0 )
1004  {
1005  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1006  // FLA_Scal( FLA_MINUS_ONE, beta );
1007  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1009  n_ahead,
1010  y21, rs_Y,
1011  v21, inc_v,
1012  &beta );
1013  bl1_dscals( &minus_inv_tau11, &beta );
1014 
1015  // FLA_Copy( w21, z21 );
1016  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1017  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1018  // FLA_Axpy( beta, u21, z21 );
1020  m_ahead,
1021  w21, inc_w,
1022  z21, rs_Z );
1024  m_ahead,
1025  &minus_conj_alpha12,
1026  a22l, inc_al,
1027  z21, rs_Z );
1029  m_ahead,
1030  &psi11_minus_alpha12,
1031  z21, rs_Z );
1033  m_ahead,
1034  &beta,
1035  u21, inc_u,
1036  z21, rs_Z );
1037 
1038  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1039  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1041  n_ahead,
1042  tau11,
1043  y21, rs_Y );
1045  m_ahead,
1046  sigma11,
1047  z21, rs_Z );
1048  }
1049  else // if ( n_ahead == 0 )
1050  {
1051  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1052  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1054  n_behind,
1055  a10t, cs_A,
1056  t01, rs_T );
1059  m_ahead,
1060  n_behind,
1061  buff_1,
1062  A20, rs_A, cs_A,
1063  u21, inc_u,
1064  buff_1,
1065  t01, rs_T );
1066  }
1067 
1068  /*------------------------------------------------------------*/
1069 
1070  }
1071 
1072  // FLA_Obj_free( &w );
1073  // FLA_Obj_free( &al );
1074  // FLA_Obj_free( &ap );
1075  // FLA_Obj_free( &u );
1076  // FLA_Obj_free( &up );
1077  // FLA_Obj_free( &v );
1078  // FLA_Obj_free( &d );
1079  // FLA_Obj_free( &e );
1080  FLA_free( buff_tmp );
1081  FLA_free( buff_w );
1082  FLA_free( buff_al );
1083  FLA_free( buff_ap );
1084  FLA_free( buff_u );
1085  FLA_free( buff_up );
1086  FLA_free( buff_v );
1087  FLA_free( buff_d );
1088  FLA_free( buff_e );
1089 
1090  return FLA_SUCCESS;
1091 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition: FLA_Househ2s_UT.c:572
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_dconjv(int m, double *x, int incx)
Definition: bl1_conjv.c:18
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
FLA_Error FLA_Fused_UYx_ZVx_opd_var1(int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:331
int i
Definition: bl1_axmyv2.c:145
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ofs_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var4 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), bl1_ssetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

163 {
164  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
166  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
167 
168  float alpha12;
169  float minus_conj_alpha12;
170  float psi11_minus_alpha12;
171  float minus_inv_tau11;
172  float beta;
173  float last_elem;
174  int i;
175 
176  // b_alg = FLA_Obj_length( T );
177  int b_alg = m_TS;
178 
179  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
180  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
181  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
182  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
183  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
184  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
185  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
186  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
187  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
188  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
189  float* buff_tmp = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
190  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
191  float* buff_al = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
192  float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
193  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
194  float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
195  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
196  float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
197  float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
198  int inc_tmp = 1;
199  int inc_w = 1;
200  int inc_al = 1;
201  int inc_ap = 1;
202  int inc_u = 1;
203  int inc_up = 1;
204  int inc_v = 1;
205  int inc_d = 1;
206  int inc_e = 1;
207 
208  // FLA_Set( FLA_ZERO, Y );
209  // FLA_Set( FLA_ZERO, Z );
210  bl1_ssetm( n_A,
211  b_alg,
212  buff_0,
213  buff_Y, rs_Y, cs_Y );
214  bl1_ssetm( m_A,
215  b_alg,
216  buff_0,
217  buff_Z, rs_Z, cs_Z );
218 
219  for ( i = 0; i < b_alg; ++i )
220  {
221  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
222  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
223  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
224  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
225  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
226  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
227  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
228  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
229 
230  float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
231  float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
232  float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
233 
234  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
235  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
236  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
237 
238  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
239  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
240 
241  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
242  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
243 
244  float* tmp21 = buff_tmp + (i+1)*inc_tmp;
245 
246  float* w21 = buff_w + (i+1)*inc_w;
247 
248  float* a22l = buff_al + (i+1)*inc_al;
249 
250  float* a12p = buff_ap + (i+1)*inc_ap;
251 
252  float* u21 = buff_u + (i+1)*inc_u;
253 
254  float* u21p = buff_up + (i+1)*inc_up;
255 
256  float* v21 = buff_v + (i+1)*inc_v;
257 
258  float* d0 = buff_d + (0 )*inc_d;
259 
260  float* e0 = buff_e + (0 )*inc_e;
261 
262  float* a12p_t = a12p + (0 )*inc_ap;
263  float* a12p_b = a12p + (1 )*inc_ap;
264 
265  float* v21_t = v21 + (0 )*inc_v;
266  float* v21_b = v21 + (1 )*inc_v;
267 
268  float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
269 
270  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
271  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
272 
273  float* ABL = a10t;
274  float* ZBL = z10t;
275 
276  float* a2 = alpha11;
277 
278  int m_ahead = m_A - i - 1;
279  int n_ahead = n_A - i - 1;
280  int m_behind = i;
281  int n_behind = i;
282 
283  /*------------------------------------------------------------*/
284 
285  if ( m_behind > 0 )
286  {
287  // FLA_Copy( a01_b, last_elem );
288  // FLA_Set( FLA_ONE, a01_b );
289  last_elem = *a01_b;
290  *a01_b = *buff_1;
291  }
292 
293  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
294  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
297  m_ahead + 1,
298  n_behind,
299  buff_m1,
300  ABL, rs_A, cs_A,
301  y10t, cs_Y,
302  buff_1,
303  a2, rs_A );
306  m_ahead + 1,
307  n_behind,
308  buff_m1,
309  ZBL, rs_Z, cs_Z,
310  a01, rs_A,
311  buff_1,
312  a2, rs_A );
313 
314  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
315  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
318  n_ahead,
319  n_behind,
320  buff_m1,
321  Y20, rs_Y, cs_Y,
322  a10t, cs_A,
323  buff_1,
324  a12t, cs_A );
327  m_behind,
328  n_ahead,
329  buff_m1,
330  A02, rs_A, cs_A,
331  z10t, cs_Z,
332  buff_1,
333  a12t, cs_A );
334 
335  if ( m_behind > 0 )
336  {
337  // FLA_Copy( last_elem, a01_b );
338  *a01_b = last_elem;
339  }
340 
341  // FLA_Househ2_UT( FLA_LEFT,
342  // alpha11,
343  // a21, tau11 );
344  // FLA_Copy( a21, u21p );
345  FLA_Househ2_UT_l_ops( m_ahead,
346  alpha11,
347  a21, rs_A,
348  tau11 );
350  m_ahead,
351  a21, rs_A,
352  u21p, inc_up );
353 
354  if ( n_ahead > 0 )
355  {
356  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
357  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
358  bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );
359 
360  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
361  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
363  n_ahead,
364  a12t, cs_A,
365  a12p, inc_ap );
367  n_ahead,
368  &minus_inv_tau11,
369  a12t, cs_A,
370  a12p, inc_ap );
371 
372  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
373  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
376  m_ahead,
377  n_behind,
378  buff_1,
379  A20, rs_A, cs_A,
380  u21p, inc_up,
381  buff_0,
382  d0, inc_d );
385  m_ahead,
386  n_behind,
387  buff_1,
388  Z20, rs_Z, cs_Z,
389  u21p, inc_up,
390  buff_0,
391  e0, inc_e );
392 
393  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
394  // FLA_Axpy( FLA_ONE, d0, t01 );
396  n_behind,
397  a10t, cs_A,
398  t01, rs_T );
400  n_behind,
401  buff_1,
402  d0, inc_d,
403  t01, rs_T );
404 
405  // FLA_Set( FLA_ZERO, y21 );
406  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
407  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
408  bl1_ssetv( n_ahead,
409  buff_0,
410  y21, rs_Y );
413  n_ahead,
414  n_behind,
415  buff_m1,
416  Y20, rs_Y, cs_Y,
417  d0, inc_d,
418  buff_1,
419  y21, rs_Y );
422  m_behind,
423  n_ahead,
424  buff_m1,
425  A02, rs_A, cs_A,
426  e0, inc_e,
427  buff_1,
428  y21, rs_Y );
429 
430  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
431  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
432  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
434  n_ahead,
435  tau11,
436  buff_1,
437  A22, rs_A, cs_A,
438  u21p, inc_up,
439  a12p, inc_ap,
440  y21, rs_Y,
441  w21, inc_w );
442 
443  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
444  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
445  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
446  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
447  // FLA_Copy( A22_l, a22l );
448  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
449  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
450  // FLA_Copy( g0, s01 );
452  n_behind,
453  m_behind,
454  n_ahead,
455  buff_m1,
456  A20, rs_A, cs_A,
457  Y20, rs_Y, cs_Y,
458  Z20, rs_Z, cs_Z,
459  A02, rs_A, cs_A,
460  A22, rs_A, cs_A,
461  tmp21, inc_tmp,
462  s01, rs_S,
463  a12p, inc_ap,
464  w21, inc_w,
465  a22l, inc_al );
466 
467  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
469  n_ahead,
470  buff_1,
471  a12t, cs_A,
472  y21, rs_Y );
473 
474  // FLA_Househ2s_UT( FLA_RIGHT,
475  // a12p_t,
476  // a12p_b,
477  // alpha12, psi11_minus_alpha12, sigma11 );
478  FLA_Househ2s_UT_r_ops( n_ahead - 1,
479  a12p_t,
480  a12p_b, inc_ap,
481  &alpha12,
482  &psi11_minus_alpha12,
483  sigma11 );
484 
485  // FLA_Copy( a12p, v21 );
486  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
487  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
488  // FLA_Conjugate( v21_b );
490  n_ahead,
491  a12p, inc_ap,
492  v21, inc_v );
493  bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
495  n_ahead,
496  &psi11_minus_alpha12,
497  v21, inc_v );
498  bl1_sconjv( n_ahead - 1,
499  v21_b, inc_v );
500 
501  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
502  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
503  bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
504  bl1_sneg1( &minus_conj_alpha12 );
505 
506  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
507  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
509  n_behind,
510  &minus_conj_alpha12,
511  A02, rs_A,
512  s01, rs_S );
514  n_behind,
515  &psi11_minus_alpha12,
516  s01, rs_S );
517 
518  // FLA_Copy( alpha12, a12t_l );
519  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
520  *a12t_l = alpha12;
522  n_ahead - 1,
523  v21_b, inc_v,
524  a12t_r, cs_A );
525  }
526 
527  // FLA_Copy( u21p, u21 );
529  m_ahead,
530  u21p, inc_up,
531  u21, inc_u );
532 
533  if ( n_ahead > 0 )
534  {
535  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
536  // FLA_Scal( FLA_MINUS_ONE, beta );
537  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
539  n_ahead,
540  y21, rs_Y,
541  v21, inc_v,
542  &beta );
543  bl1_sscals( &minus_inv_tau11, &beta );
544 
545  // FLA_Copy( w21, z21 );
546  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
547  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
548  // FLA_Axpy( beta, u21, z21 );
550  m_ahead,
551  w21, inc_w,
552  z21, rs_Z );
554  m_ahead,
555  &minus_conj_alpha12,
556  a22l, inc_al,
557  z21, rs_Z );
559  m_ahead,
560  &psi11_minus_alpha12,
561  z21, rs_Z );
563  m_ahead,
564  &beta,
565  u21, inc_u,
566  z21, rs_Z );
567 
568  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
569  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
571  n_ahead,
572  tau11,
573  y21, rs_Y );
575  m_ahead,
576  sigma11,
577  z21, rs_Z );
578  }
579  else // if ( n_ahead == 0 )
580  {
581  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
582  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
584  n_behind,
585  a10t, cs_A,
586  t01, rs_T );
589  m_ahead,
590  n_behind,
591  buff_1,
592  A20, rs_A, cs_A,
593  u21, inc_u,
594  buff_1,
595  t01, rs_T );
596  }
597 
598  /*------------------------------------------------------------*/
599 
600  }
601 
602  // FLA_Obj_free( &w );
603  // FLA_Obj_free( &al );
604  // FLA_Obj_free( &ap );
605  // FLA_Obj_free( &u );
606  // FLA_Obj_free( &up );
607  // FLA_Obj_free( &v );
608  // FLA_Obj_free( &d );
609  // FLA_Obj_free( &e );
610  FLA_free( buff_tmp );
611  FLA_free( buff_w );
612  FLA_free( buff_al );
613  FLA_free( buff_ap );
614  FLA_free( buff_u );
615  FLA_free( buff_up );
616  FLA_free( buff_v );
617  FLA_free( buff_d );
618  FLA_free( buff_e );
619 
620  return FLA_SUCCESS;
621 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sconjv(int m, float *x, int incx)
Definition: bl1_conjv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Fused_UYx_ZVx_ops_var1(int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:201
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition: FLA_Househ2s_UT.c:555
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

◆ FLA_Bidiag_UT_u_step_ofu_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blf_var4(), and FLA_Bidiag_UT_u_ofu_var4().

36 {
37  FLA_Datatype datatype;
38  int m_A, n_A, m_TS;
39  int rs_A, cs_A;
40  int rs_Y, cs_Y;
41  int rs_Z, cs_Z;
42  int rs_T, cs_T;
43  int rs_S, cs_S;
44 
45  datatype = FLA_Obj_datatype( A );
46 
47  m_A = FLA_Obj_length( A );
48  n_A = FLA_Obj_width( A );
49  m_TS = FLA_Obj_length( T );
50 
51  rs_A = FLA_Obj_row_stride( A );
52  cs_A = FLA_Obj_col_stride( A );
53 
54  rs_Y = FLA_Obj_row_stride( Y );
55  cs_Y = FLA_Obj_col_stride( Y );
56 
57  rs_Z = FLA_Obj_row_stride( Z );
58  cs_Z = FLA_Obj_col_stride( Z );
59 
60  rs_T = FLA_Obj_row_stride( T );
61  cs_T = FLA_Obj_col_stride( T );
62 
63  rs_S = FLA_Obj_row_stride( S );
64  cs_S = FLA_Obj_col_stride( S );
65 
66 
67  switch ( datatype )
68  {
69  case FLA_FLOAT:
70  {
71  float* buff_A = FLA_FLOAT_PTR( A );
72  float* buff_Y = FLA_FLOAT_PTR( Y );
73  float* buff_Z = FLA_FLOAT_PTR( Z );
74  float* buff_T = FLA_FLOAT_PTR( T );
75  float* buff_S = FLA_FLOAT_PTR( S );
76 
78  n_A,
79  m_TS,
80  buff_A, rs_A, cs_A,
81  buff_Y, rs_Y, cs_Y,
82  buff_Z, rs_Z, cs_Z,
83  buff_T, rs_T, cs_T,
84  buff_S, rs_S, cs_S );
85 
86  break;
87  }
88 
89  case FLA_DOUBLE:
90  {
91  double* buff_A = FLA_DOUBLE_PTR( A );
92  double* buff_Y = FLA_DOUBLE_PTR( Y );
93  double* buff_Z = FLA_DOUBLE_PTR( Z );
94  double* buff_T = FLA_DOUBLE_PTR( T );
95  double* buff_S = FLA_DOUBLE_PTR( S );
96 
98  n_A,
99  m_TS,
100  buff_A, rs_A, cs_A,
101  buff_Y, rs_Y, cs_Y,
102  buff_Z, rs_Z, cs_Z,
103  buff_T, rs_T, cs_T,
104  buff_S, rs_S, cs_S );
105 
106  break;
107  }
108 
109  case FLA_COMPLEX:
110  {
111  scomplex* buff_A = FLA_COMPLEX_PTR( A );
112  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
113  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
114  scomplex* buff_T = FLA_COMPLEX_PTR( T );
115  scomplex* buff_S = FLA_COMPLEX_PTR( S );
116 
118  n_A,
119  m_TS,
120  buff_A, rs_A, cs_A,
121  buff_Y, rs_Y, cs_Y,
122  buff_Z, rs_Z, cs_Z,
123  buff_T, rs_T, cs_T,
124  buff_S, rs_S, cs_S );
125 
126  break;
127  }
128 
129  case FLA_DOUBLE_COMPLEX:
130  {
131  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
132  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
133  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
134  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
135  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
136 
138  n_A,
139  m_TS,
140  buff_A, rs_A, cs_A,
141  buff_Y, rs_Y, cs_Y,
142  buff_Z, rs_Z, cs_Z,
143  buff_T, rs_T, cs_T,
144  buff_S, rs_S, cs_S );
145 
146  break;
147  }
148  }
149 
150  return FLA_SUCCESS;
151 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Bidiag_UT_u_step_ofd_var4(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:625
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Bidiag_UT_u_step_ofz_var4(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:1565
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Error FLA_Bidiag_UT_u_step_ofs_var4(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:155
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Bidiag_UT_u_step_ofc_var4(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:1095
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_ofz_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var4 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), bl1_zsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

1573 {
1574  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1575  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1576  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1577 
1578  dcomplex alpha12;
1579  dcomplex minus_conj_alpha12;
1580  dcomplex psi11_minus_alpha12;
1581  dcomplex minus_inv_tau11;
1582  dcomplex beta;
1583  dcomplex last_elem;
1584  int i;
1585 
1586  // b_alg = FLA_Obj_length( T );
1587  int b_alg = m_TS;
1588 
1589  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1590  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1591  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1592  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1593  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1594  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1595  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1596  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1597  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1598  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1599  dcomplex* buff_tmp = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1600  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1601  dcomplex* buff_al = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1602  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1603  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1604  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1605  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1606  dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1607  dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1608  int inc_tmp = 1;
1609  int inc_w = 1;
1610  int inc_al = 1;
1611  int inc_ap = 1;
1612  int inc_u = 1;
1613  int inc_up = 1;
1614  int inc_v = 1;
1615  int inc_d = 1;
1616  int inc_e = 1;
1617 
1618  // FLA_Set( FLA_ZERO, Y );
1619  // FLA_Set( FLA_ZERO, Z );
1620  bl1_zsetm( n_A,
1621  b_alg,
1622  buff_0,
1623  buff_Y, rs_Y, cs_Y );
1624  bl1_zsetm( m_A,
1625  b_alg,
1626  buff_0,
1627  buff_Z, rs_Z, cs_Z );
1628 
1629  for ( i = 0; i < b_alg; ++i )
1630  {
1631  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1632  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1633  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1634  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1635  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1636  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1637  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1638  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1639 
1640  dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1641  dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1642  dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1643 
1644  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1645  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1646  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1647 
1648  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1649  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1650 
1651  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1652  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1653 
1654  dcomplex* tmp21 = buff_tmp + (i+1)*inc_tmp;
1655 
1656  dcomplex* w21 = buff_w + (i+1)*inc_w;
1657 
1658  dcomplex* a22l = buff_al + (i+1)*inc_al;
1659 
1660  dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1661 
1662  dcomplex* u21 = buff_u + (i+1)*inc_u;
1663 
1664  dcomplex* u21p = buff_up + (i+1)*inc_up;
1665 
1666  dcomplex* v21 = buff_v + (i+1)*inc_v;
1667 
1668  dcomplex* d0 = buff_d + (0 )*inc_d;
1669 
1670  dcomplex* e0 = buff_e + (0 )*inc_e;
1671 
1672  dcomplex* a12p_t = a12p + (0 )*inc_ap;
1673  dcomplex* a12p_b = a12p + (1 )*inc_ap;
1674 
1675  dcomplex* v21_t = v21 + (0 )*inc_v;
1676  dcomplex* v21_b = v21 + (1 )*inc_v;
1677 
1678  dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1679 
1680  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1681  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1682 
1683  dcomplex* ABL = a10t;
1684  dcomplex* ZBL = z10t;
1685 
1686  dcomplex* a2 = alpha11;
1687 
1688  int m_ahead = m_A - i - 1;
1689  int n_ahead = n_A - i - 1;
1690  int m_behind = i;
1691  int n_behind = i;
1692 
1693  /*------------------------------------------------------------*/
1694 
1695  if ( m_behind > 0 )
1696  {
1697  // FLA_Copy( a01_b, last_elem );
1698  // FLA_Set( FLA_ONE, a01_b );
1699  last_elem = *a01_b;
1700  *a01_b = *buff_1;
1701  }
1702 
1703  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1704  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1707  m_ahead + 1,
1708  n_behind,
1709  buff_m1,
1710  ABL, rs_A, cs_A,
1711  y10t, cs_Y,
1712  buff_1,
1713  a2, rs_A );
1716  m_ahead + 1,
1717  n_behind,
1718  buff_m1,
1719  ZBL, rs_Z, cs_Z,
1720  a01, rs_A,
1721  buff_1,
1722  a2, rs_A );
1723 
1724  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1725  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1728  n_ahead,
1729  n_behind,
1730  buff_m1,
1731  Y20, rs_Y, cs_Y,
1732  a10t, cs_A,
1733  buff_1,
1734  a12t, cs_A );
1737  m_behind,
1738  n_ahead,
1739  buff_m1,
1740  A02, rs_A, cs_A,
1741  z10t, cs_Z,
1742  buff_1,
1743  a12t, cs_A );
1744 
1745  if ( m_behind > 0 )
1746  {
1747  // FLA_Copy( last_elem, a01_b );
1748  *a01_b = last_elem;
1749  }
1750 
1751  // FLA_Househ2_UT( FLA_LEFT,
1752  // alpha11,
1753  // a21, tau11 );
1754  // FLA_Copy( a21, u21p );
1755  FLA_Househ2_UT_l_opz( m_ahead,
1756  alpha11,
1757  a21, rs_A,
1758  tau11 );
1760  m_ahead,
1761  a21, rs_A,
1762  u21p, inc_up );
1763 
1764  if ( n_ahead > 0 )
1765  {
1766  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1767  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1768  bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );
1769 
1770  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1771  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1773  n_ahead,
1774  a12t, cs_A,
1775  a12p, inc_ap );
1777  n_ahead,
1778  &minus_inv_tau11,
1779  a12t, cs_A,
1780  a12p, inc_ap );
1781 
1782  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1783  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1786  m_ahead,
1787  n_behind,
1788  buff_1,
1789  A20, rs_A, cs_A,
1790  u21p, inc_up,
1791  buff_0,
1792  d0, inc_d );
1795  m_ahead,
1796  n_behind,
1797  buff_1,
1798  Z20, rs_Z, cs_Z,
1799  u21p, inc_up,
1800  buff_0,
1801  e0, inc_e );
1802 
1803  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1804  // FLA_Axpy( FLA_ONE, d0, t01 );
1806  n_behind,
1807  a10t, cs_A,
1808  t01, rs_T );
1810  n_behind,
1811  buff_1,
1812  d0, inc_d,
1813  t01, rs_T );
1814 
1815  // FLA_Set( FLA_ZERO, y21 );
1816  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1817  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1818  bl1_zsetv( n_ahead,
1819  buff_0,
1820  y21, rs_Y );
1823  n_ahead,
1824  n_behind,
1825  buff_m1,
1826  Y20, rs_Y, cs_Y,
1827  d0, inc_d,
1828  buff_1,
1829  y21, rs_Y );
1832  m_behind,
1833  n_ahead,
1834  buff_m1,
1835  A02, rs_A, cs_A,
1836  e0, inc_e,
1837  buff_1,
1838  y21, rs_Y );
1839 
1840  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1841  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1842  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1844  n_ahead,
1845  tau11,
1846  buff_1,
1847  A22, rs_A, cs_A,
1848  u21p, inc_up,
1849  a12p, inc_ap,
1850  y21, rs_Y,
1851  w21, inc_w );
1852 
1853  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1854  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1855  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1856  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1857  // FLA_Copy( A22_l, a22l );
1858  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1859  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1860  // FLA_Copy( g0, s01 );
1861  FLA_Fused_UYx_ZVx_opz_var1( m_ahead,
1862  n_behind,
1863  m_behind,
1864  n_ahead,
1865  buff_m1,
1866  A20, rs_A, cs_A,
1867  Y20, rs_Y, cs_Y,
1868  Z20, rs_Z, cs_Z,
1869  A02, rs_A, cs_A,
1870  A22, rs_A, cs_A,
1871  tmp21, inc_tmp,
1872  s01, rs_S,
1873  a12p, inc_ap,
1874  w21, inc_w,
1875  a22l, inc_al );
1876 
1877  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1879  n_ahead,
1880  buff_1,
1881  a12t, cs_A,
1882  y21, rs_Y );
1883 
1884  // FLA_Househ2s_UT( FLA_RIGHT,
1885  // a12p_t,
1886  // a12p_b,
1887  // alpha12, psi11_minus_alpha12, sigma11 );
1888  FLA_Househ2s_UT_r_opz( n_ahead - 1,
1889  a12p_t,
1890  a12p_b, inc_ap,
1891  &alpha12,
1892  &psi11_minus_alpha12,
1893  sigma11 );
1894 
1895  // FLA_Copy( a12p, v21 );
1896  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1897  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1898  // FLA_Conjugate( v21_b );
1900  n_ahead,
1901  a12p, inc_ap,
1902  v21, inc_v );
1903  bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
1905  n_ahead,
1906  &psi11_minus_alpha12,
1907  v21, inc_v );
1908  bl1_zconjv( n_ahead - 1,
1909  v21_b, inc_v );
1910 
1911  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1912  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1913  bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
1914  bl1_zneg1( &minus_conj_alpha12 );
1915 
1916  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1917  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1919  n_behind,
1920  &minus_conj_alpha12,
1921  A02, rs_A,
1922  s01, rs_S );
1924  n_behind,
1925  &psi11_minus_alpha12,
1926  s01, rs_S );
1927 
1928  // FLA_Copy( alpha12, a12t_l );
1929  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1930  *a12t_l = alpha12;
1932  n_ahead - 1,
1933  v21_b, inc_v,
1934  a12t_r, cs_A );
1935  }
1936 
1937  // FLA_Copy( u21p, u21 );
1939  m_ahead,
1940  u21p, inc_up,
1941  u21, inc_u );
1942 
1943  if ( n_ahead > 0 )
1944  {
1945  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1946  // FLA_Scal( FLA_MINUS_ONE, beta );
1947  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1949  n_ahead,
1950  y21, rs_Y,
1951  v21, inc_v,
1952  &beta );
1953  bl1_zscals( &minus_inv_tau11, &beta );
1954 
1955  // FLA_Copy( w21, z21 );
1956  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1957  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1958  // FLA_Axpy( beta, u21, z21 );
1960  m_ahead,
1961  w21, inc_w,
1962  z21, rs_Z );
1964  m_ahead,
1965  &minus_conj_alpha12,
1966  a22l, inc_al,
1967  z21, rs_Z );
1969  m_ahead,
1970  &psi11_minus_alpha12,
1971  z21, rs_Z );
1973  m_ahead,
1974  &beta,
1975  u21, inc_u,
1976  z21, rs_Z );
1977 
1978  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1979  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1981  n_ahead,
1982  tau11,
1983  y21, rs_Y );
1985  m_ahead,
1986  sigma11,
1987  z21, rs_Z );
1988  }
1989  else // if ( n_ahead == 0 )
1990  {
1991  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1992  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1994  n_behind,
1995  a10t, cs_A,
1996  t01, rs_T );
1999  m_ahead,
2000  n_behind,
2001  buff_1,
2002  A20, rs_A, cs_A,
2003  u21, inc_u,
2004  buff_1,
2005  t01, rs_T );
2006  }
2007 
2008  /*------------------------------------------------------------*/
2009 
2010  }
2011 
2012  // FLA_Obj_free( &w );
2013  // FLA_Obj_free( &al );
2014  // FLA_Obj_free( &ap );
2015  // FLA_Obj_free( &u );
2016  // FLA_Obj_free( &up );
2017  // FLA_Obj_free( &v );
2018  // FLA_Obj_free( &d );
2019  // FLA_Obj_free( &e );
2020  FLA_free( buff_tmp );
2021  FLA_free( buff_w );
2022  FLA_free( buff_al );
2023  FLA_free( buff_ap );
2024  FLA_free( buff_u );
2025  FLA_free( buff_up );
2026  FLA_free( buff_v );
2027  FLA_free( buff_d );
2028  FLA_free( buff_e );
2029 
2030  return FLA_SUCCESS;
2031 }
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
FLA_Error FLA_Fused_UYx_ZVx_opz_var1(int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:542
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition: FLA_Househ2s_UT.c:610
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
int i
Definition: bl1_axmyv2.c:145
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition: bl1_conjv.c:34