libflame  revision_anchor
Functions
FLA_Apply_G_rf_opt_var3.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_opt_var3 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_opc_var3()

FLA_Error FLA_Apply_G_rf_opc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_blc_var3(), and FLA_Apply_G_rf_opt_var3().

1023 {
1024  float one = bl1_s1();
1025  float zero = bl1_s0();
1026  float gamma23_k1;
1027  float sigma23_k1;
1028  float gamma34_k1;
1029  float sigma34_k1;
1030  float gamma12_k2;
1031  float sigma12_k2;
1032  float gamma23_k2;
1033  float sigma23_k2;
1034  scomplex* a1;
1035  scomplex* a2;
1036  scomplex* a3;
1037  scomplex* a4;
1038  scomplex* g23_k1;
1039  scomplex* g34_k1;
1040  scomplex* g12_k2;
1041  scomplex* g23_k2;
1042  int i, j, g, k;
1043  int nG, nG_app;
1044  int n_iter;
1045  int n_left;
1046  int k_minus_1;
1047  int n_fuse;
1048  int k_fuse;
1049  int is_ident23_k1, is_ident34_k1;
1050  int is_ident12_k2, is_ident23_k2;
1051  int has_ident;
1052 
1053  k_minus_1 = k_G - 1;
1054  nG = n_A - 1;
1055  n_fuse = 2;
1056  k_fuse = 2;
1057 
1058  // Use the simple variant for nG < (k - 1) or k == 1.
1059  if ( nG < 2*k_minus_1 || k_G == 1 )
1060  {
1062  m_A,
1063  n_A,
1064  buff_G, rs_G, cs_G,
1065  buff_A, rs_A, cs_A );
1066  return FLA_SUCCESS;
1067  }
1068 
1069 
1070  // Start-up phase.
1071 
1072  for ( j = -1; j < k_minus_1; j += n_fuse )
1073  {
1074  nG_app = j + 2;
1075  n_iter = nG_app / k_fuse;
1076  n_left = 1;
1077 
1078  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1079  {
1080  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1081  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1082  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1083  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1084  a1 = buff_A + (g - 1)*cs_A;
1085  a2 = buff_A + (g )*cs_A;
1086  a3 = buff_A + (g + 1)*cs_A;
1087  a4 = buff_A + (g + 2)*cs_A;
1088 
1089  gamma23_k1 = g23_k1->real;
1090  sigma23_k1 = g23_k1->imag;
1091  gamma34_k1 = g34_k1->real;
1092  sigma34_k1 = g34_k1->imag;
1093  gamma12_k2 = g12_k2->real;
1094  sigma12_k2 = g12_k2->imag;
1095  gamma23_k2 = g23_k2->real;
1096  sigma23_k2 = g23_k2->imag;
1097 
1098  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1099  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1100  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1101  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1102  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1103  is_ident12_k2 || is_ident23_k2 );
1104 
1105  if ( has_ident )
1106  {
1107  // Apply to pairs of columns as needed.
1108 
1109  if ( !is_ident23_k1 )
1110  MAC_Apply_G_mx2_opc( m_A,
1111  &gamma23_k1,
1112  &sigma23_k1,
1113  a2, rs_A,
1114  a3, rs_A );
1115 
1116  if ( !is_ident34_k1 )
1117  MAC_Apply_G_mx2_opc( m_A,
1118  &gamma34_k1,
1119  &sigma34_k1,
1120  a3, rs_A,
1121  a4, rs_A );
1122 
1123  if ( !is_ident12_k2 )
1124  MAC_Apply_G_mx2_opc( m_A,
1125  &gamma12_k2,
1126  &sigma12_k2,
1127  a1, rs_A,
1128  a2, rs_A );
1129 
1130  if ( !is_ident23_k2 )
1131  MAC_Apply_G_mx2_opc( m_A,
1132  &gamma23_k2,
1133  &sigma23_k2,
1134  a2, rs_A,
1135  a3, rs_A );
1136  }
1137  else
1138  {
1139  // Apply to all four columns.
1140 
1141  MAC_Apply_G_mx4s_opc( m_A,
1142  &gamma23_k1,
1143  &sigma23_k1,
1144  &gamma34_k1,
1145  &sigma34_k1,
1146  &gamma12_k2,
1147  &sigma12_k2,
1148  &gamma23_k2,
1149  &sigma23_k2,
1150  a1, rs_A,
1151  a2, rs_A,
1152  a3, rs_A,
1153  a4, rs_A );
1154  }
1155  }
1156 
1157  if ( n_left == 1 )
1158  {
1159  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1160  a3 = buff_A + (g + 1)*cs_A;
1161  a4 = buff_A + (g + 2)*cs_A;
1162 
1163  gamma34_k1 = g34_k1->real;
1164  sigma34_k1 = g34_k1->imag;
1165 
1166  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1167 
1168  if ( !is_ident34_k1 )
1169  MAC_Apply_G_mx2_opc( m_A,
1170  &gamma34_k1,
1171  &sigma34_k1,
1172  a3, rs_A,
1173  a4, rs_A );
1174  }
1175  }
1176 
1177  // Pipeline stage
1178 
1179  for ( ; j < nG - 1; j += n_fuse )
1180  {
1181  nG_app = k_G;
1182  n_iter = nG_app / k_fuse;
1183  n_left = nG_app % k_fuse;
1184 
1185  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1186  {
1187  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1188  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1189  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1190  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1191  a1 = buff_A + (g - 1)*cs_A;
1192  a2 = buff_A + (g )*cs_A;
1193  a3 = buff_A + (g + 1)*cs_A;
1194  a4 = buff_A + (g + 2)*cs_A;
1195 
1196  gamma23_k1 = g23_k1->real;
1197  sigma23_k1 = g23_k1->imag;
1198  gamma34_k1 = g34_k1->real;
1199  sigma34_k1 = g34_k1->imag;
1200  gamma12_k2 = g12_k2->real;
1201  sigma12_k2 = g12_k2->imag;
1202  gamma23_k2 = g23_k2->real;
1203  sigma23_k2 = g23_k2->imag;
1204 
1205  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1206  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1207  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1208  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1209  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1210  is_ident12_k2 || is_ident23_k2 );
1211 
1212  if ( has_ident )
1213  {
1214  // Apply to pairs of columns as needed.
1215 
1216  if ( !is_ident23_k1 )
1217  MAC_Apply_G_mx2_opc( m_A,
1218  &gamma23_k1,
1219  &sigma23_k1,
1220  a2, rs_A,
1221  a3, rs_A );
1222 
1223  if ( !is_ident34_k1 )
1224  MAC_Apply_G_mx2_opc( m_A,
1225  &gamma34_k1,
1226  &sigma34_k1,
1227  a3, rs_A,
1228  a4, rs_A );
1229 
1230  if ( !is_ident12_k2 )
1231  MAC_Apply_G_mx2_opc( m_A,
1232  &gamma12_k2,
1233  &sigma12_k2,
1234  a1, rs_A,
1235  a2, rs_A );
1236 
1237  if ( !is_ident23_k2 )
1238  MAC_Apply_G_mx2_opc( m_A,
1239  &gamma23_k2,
1240  &sigma23_k2,
1241  a2, rs_A,
1242  a3, rs_A );
1243  }
1244  else
1245  {
1246  // Apply to all four columns.
1247 
1248  MAC_Apply_G_mx4s_opc( m_A,
1249  &gamma23_k1,
1250  &sigma23_k1,
1251  &gamma34_k1,
1252  &sigma34_k1,
1253  &gamma12_k2,
1254  &sigma12_k2,
1255  &gamma23_k2,
1256  &sigma23_k2,
1257  a1, rs_A,
1258  a2, rs_A,
1259  a3, rs_A,
1260  a4, rs_A );
1261  }
1262  }
1263 
1264  if ( n_left == 1 )
1265  {
1266  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1267  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1268  a2 = buff_A + (g )*cs_A;
1269  a3 = buff_A + (g + 1)*cs_A;
1270  a4 = buff_A + (g + 2)*cs_A;
1271 
1272  gamma23_k1 = g23_k1->real;
1273  sigma23_k1 = g23_k1->imag;
1274  gamma34_k1 = g34_k1->real;
1275  sigma34_k1 = g34_k1->imag;
1276 
1277  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1278  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1279 
1280  if ( !is_ident23_k1 && is_ident34_k1 )
1281  {
1282  MAC_Apply_G_mx2_opc( m_A,
1283  &gamma23_k1,
1284  &sigma23_k1,
1285  a2, rs_A,
1286  a3, rs_A );
1287  }
1288  else if ( is_ident23_k1 && !is_ident34_k1 )
1289  {
1290  MAC_Apply_G_mx2_opc( m_A,
1291  &gamma34_k1,
1292  &sigma34_k1,
1293  a3, rs_A,
1294  a4, rs_A );
1295  }
1296  else
1297  {
1298  MAC_Apply_G_mx3_opc( m_A,
1299  &gamma23_k1,
1300  &sigma23_k1,
1301  &gamma34_k1,
1302  &sigma34_k1,
1303  a2, rs_A,
1304  a3, rs_A,
1305  a4, rs_A );
1306  }
1307  }
1308  }
1309 
1310  // Shutdown stage
1311 
1312  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1313  {
1314  g = nG - 1;
1315  k = j;
1316 
1317  //n_left = 1;
1318  //if ( n_left == 1 )
1319  {
1320  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1321  a2 = buff_A + (g )*cs_A;
1322  a3 = buff_A + (g + 1)*cs_A;
1323 
1324  gamma23_k1 = g23_k1->real;
1325  sigma23_k1 = g23_k1->imag;
1326 
1327  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1328 
1329  if ( !is_ident23_k1 )
1330  MAC_Apply_G_mx2_opc( m_A,
1331  &gamma23_k1,
1332  &sigma23_k1,
1333  a2, rs_A,
1334  a3, rs_A );
1335  ++k;
1336  --g;
1337  }
1338 
1339  nG_app = k_minus_1 - j;
1340  n_iter = nG_app / k_fuse;
1341  n_left = nG_app % k_fuse;
1342 
1343  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1344  {
1345  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1346  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1347  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1348  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1349  a1 = buff_A + (g - 1)*cs_A;
1350  a2 = buff_A + (g )*cs_A;
1351  a3 = buff_A + (g + 1)*cs_A;
1352  a4 = buff_A + (g + 2)*cs_A;
1353 
1354  gamma23_k1 = g23_k1->real;
1355  sigma23_k1 = g23_k1->imag;
1356  gamma34_k1 = g34_k1->real;
1357  sigma34_k1 = g34_k1->imag;
1358  gamma12_k2 = g12_k2->real;
1359  sigma12_k2 = g12_k2->imag;
1360  gamma23_k2 = g23_k2->real;
1361  sigma23_k2 = g23_k2->imag;
1362 
1363  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1364  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1365  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1366  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1367  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1368  is_ident12_k2 || is_ident23_k2 );
1369 
1370  if ( has_ident )
1371  {
1372  // Apply to pairs of columns as needed.
1373 
1374  if ( !is_ident23_k1 )
1375  MAC_Apply_G_mx2_opc( m_A,
1376  &gamma23_k1,
1377  &sigma23_k1,
1378  a2, rs_A,
1379  a3, rs_A );
1380 
1381  if ( !is_ident34_k1 )
1382  MAC_Apply_G_mx2_opc( m_A,
1383  &gamma34_k1,
1384  &sigma34_k1,
1385  a3, rs_A,
1386  a4, rs_A );
1387 
1388  if ( !is_ident12_k2 )
1389  MAC_Apply_G_mx2_opc( m_A,
1390  &gamma12_k2,
1391  &sigma12_k2,
1392  a1, rs_A,
1393  a2, rs_A );
1394 
1395  if ( !is_ident23_k2 )
1396  MAC_Apply_G_mx2_opc( m_A,
1397  &gamma23_k2,
1398  &sigma23_k2,
1399  a2, rs_A,
1400  a3, rs_A );
1401  }
1402  else
1403  {
1404  // Apply to all four columns.
1405 
1406  MAC_Apply_G_mx4s_opc( m_A,
1407  &gamma23_k1,
1408  &sigma23_k1,
1409  &gamma34_k1,
1410  &sigma34_k1,
1411  &gamma12_k2,
1412  &sigma12_k2,
1413  &gamma23_k2,
1414  &sigma23_k2,
1415  a1, rs_A,
1416  a2, rs_A,
1417  a3, rs_A,
1418  a4, rs_A );
1419  }
1420  }
1421 
1422  if ( n_left == 1 )
1423  {
1424  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1425  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1426  a2 = buff_A + (g )*cs_A;
1427  a3 = buff_A + (g + 1)*cs_A;
1428  a4 = buff_A + (g + 2)*cs_A;
1429 
1430  gamma23_k1 = g23_k1->real;
1431  sigma23_k1 = g23_k1->imag;
1432  gamma34_k1 = g34_k1->real;
1433  sigma34_k1 = g34_k1->imag;
1434 
1435  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1436  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1437 
1438  if ( !is_ident23_k1 && is_ident34_k1 )
1439  {
1440  MAC_Apply_G_mx2_opc( m_A,
1441  &gamma23_k1,
1442  &sigma23_k1,
1443  a2, rs_A,
1444  a3, rs_A );
1445  }
1446  else if ( is_ident23_k1 && !is_ident34_k1 )
1447  {
1448  MAC_Apply_G_mx2_opc( m_A,
1449  &gamma34_k1,
1450  &sigma34_k1,
1451  a3, rs_A,
1452  a4, rs_A );
1453  }
1454  else
1455  {
1456  MAC_Apply_G_mx3_opc( m_A,
1457  &gamma23_k1,
1458  &sigma23_k1,
1459  &gamma34_k1,
1460  &sigma34_k1,
1461  a2, rs_A,
1462  a3, rs_A,
1463  a4, rs_A );
1464  }
1465  }
1466  }
1467 
1468  return FLA_SUCCESS;
1469 }
float real
Definition: blis_type_defs.h:134
float bl1_s1(void)
Definition: bl1_constants.c:47
int n_left
Definition: bl1_axmyv2.c:149
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_G_rf_opc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:215
int i
Definition: bl1_axmyv2.c:145
float bl1_s0(void)
Definition: bl1_constants.c:111
float imag
Definition: blis_type_defs.h:134

◆ FLA_Apply_G_rf_opd_var3()

FLA_Error FLA_Apply_G_rf_opd_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_bld_var3(), and FLA_Apply_G_rf_opt_var3().

570 {
571  double one = bl1_d1();
572  double zero = bl1_d0();
573  double gamma23_k1;
574  double sigma23_k1;
575  double gamma34_k1;
576  double sigma34_k1;
577  double gamma12_k2;
578  double sigma12_k2;
579  double gamma23_k2;
580  double sigma23_k2;
581  double* a1;
582  double* a2;
583  double* a3;
584  double* a4;
585  dcomplex* g23_k1;
586  dcomplex* g34_k1;
587  dcomplex* g12_k2;
588  dcomplex* g23_k2;
589  int i, j, g, k;
590  int nG, nG_app;
591  int n_iter;
592  int n_left;
593  int k_minus_1;
594  int n_fuse;
595  int k_fuse;
596  int is_ident23_k1, is_ident34_k1;
597  int is_ident12_k2, is_ident23_k2;
598  int has_ident;
599 
600  k_minus_1 = k_G - 1;
601  nG = n_A - 1;
602  n_fuse = 2;
603  k_fuse = 2;
604 
605  // Use the simple variant for nG < (k - 1) or k == 1.
606  if ( nG < 2*k_minus_1 || k_G == 1 )
607  {
609  m_A,
610  n_A,
611  buff_G, rs_G, cs_G,
612  buff_A, rs_A, cs_A );
613  return FLA_SUCCESS;
614  }
615 
616 
617  // Start-up phase.
618 
619  for ( j = -1; j < k_minus_1; j += n_fuse )
620  {
621  nG_app = j + 2;
622  n_iter = nG_app / k_fuse;
623  n_left = 1;
624 
625  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
626  {
627  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
628  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
629  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
630  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
631  a1 = buff_A + (g - 1)*cs_A;
632  a2 = buff_A + (g )*cs_A;
633  a3 = buff_A + (g + 1)*cs_A;
634  a4 = buff_A + (g + 2)*cs_A;
635 
636  gamma23_k1 = g23_k1->real;
637  sigma23_k1 = g23_k1->imag;
638  gamma34_k1 = g34_k1->real;
639  sigma34_k1 = g34_k1->imag;
640  gamma12_k2 = g12_k2->real;
641  sigma12_k2 = g12_k2->imag;
642  gamma23_k2 = g23_k2->real;
643  sigma23_k2 = g23_k2->imag;
644 
645  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
646  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
647  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
648  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
649  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
650  is_ident12_k2 || is_ident23_k2 );
651 
652  if ( has_ident )
653  {
654  // Apply to pairs of columns as needed.
655 
656  if ( !is_ident23_k1 )
657  MAC_Apply_G_mx2_opd( m_A,
658  &gamma23_k1,
659  &sigma23_k1,
660  a2, rs_A,
661  a3, rs_A );
662 
663  if ( !is_ident34_k1 )
664  MAC_Apply_G_mx2_opd( m_A,
665  &gamma34_k1,
666  &sigma34_k1,
667  a3, rs_A,
668  a4, rs_A );
669 
670  if ( !is_ident12_k2 )
671  MAC_Apply_G_mx2_opd( m_A,
672  &gamma12_k2,
673  &sigma12_k2,
674  a1, rs_A,
675  a2, rs_A );
676 
677  if ( !is_ident23_k2 )
678  MAC_Apply_G_mx2_opd( m_A,
679  &gamma23_k2,
680  &sigma23_k2,
681  a2, rs_A,
682  a3, rs_A );
683  }
684  else
685  {
686  // Apply to all four columns.
687 
688  MAC_Apply_G_mx4s_opd( m_A,
689  &gamma23_k1,
690  &sigma23_k1,
691  &gamma34_k1,
692  &sigma34_k1,
693  &gamma12_k2,
694  &sigma12_k2,
695  &gamma23_k2,
696  &sigma23_k2,
697  a1, rs_A,
698  a2, rs_A,
699  a3, rs_A,
700  a4, rs_A );
701  }
702  }
703 
704  if ( n_left == 1 )
705  {
706  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
707  a3 = buff_A + (g + 1)*cs_A;
708  a4 = buff_A + (g + 2)*cs_A;
709 
710  gamma34_k1 = g34_k1->real;
711  sigma34_k1 = g34_k1->imag;
712 
713  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
714 
715  if ( !is_ident34_k1 )
716  MAC_Apply_G_mx2_opd( m_A,
717  &gamma34_k1,
718  &sigma34_k1,
719  a3, rs_A,
720  a4, rs_A );
721  }
722  }
723 
724  // Pipeline stage
725 
726  for ( ; j < nG - 1; j += n_fuse )
727  {
728  nG_app = k_G;
729  n_iter = nG_app / k_fuse;
730  n_left = nG_app % k_fuse;
731 
732  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
733  {
734  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
735  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
736  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
737  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
738  a1 = buff_A + (g - 1)*cs_A;
739  a2 = buff_A + (g )*cs_A;
740  a3 = buff_A + (g + 1)*cs_A;
741  a4 = buff_A + (g + 2)*cs_A;
742 
743  gamma23_k1 = g23_k1->real;
744  sigma23_k1 = g23_k1->imag;
745  gamma34_k1 = g34_k1->real;
746  sigma34_k1 = g34_k1->imag;
747  gamma12_k2 = g12_k2->real;
748  sigma12_k2 = g12_k2->imag;
749  gamma23_k2 = g23_k2->real;
750  sigma23_k2 = g23_k2->imag;
751 
752  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
753  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
754  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
755  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
756  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
757  is_ident12_k2 || is_ident23_k2 );
758 
759  if ( has_ident )
760  {
761  // Apply to pairs of columns as needed.
762 
763  if ( !is_ident23_k1 )
764  MAC_Apply_G_mx2_opd( m_A,
765  &gamma23_k1,
766  &sigma23_k1,
767  a2, rs_A,
768  a3, rs_A );
769 
770  if ( !is_ident34_k1 )
771  MAC_Apply_G_mx2_opd( m_A,
772  &gamma34_k1,
773  &sigma34_k1,
774  a3, rs_A,
775  a4, rs_A );
776 
777  if ( !is_ident12_k2 )
778  MAC_Apply_G_mx2_opd( m_A,
779  &gamma12_k2,
780  &sigma12_k2,
781  a1, rs_A,
782  a2, rs_A );
783 
784  if ( !is_ident23_k2 )
785  MAC_Apply_G_mx2_opd( m_A,
786  &gamma23_k2,
787  &sigma23_k2,
788  a2, rs_A,
789  a3, rs_A );
790  }
791  else
792  {
793  // Apply to all four columns.
794 
795  MAC_Apply_G_mx4s_opd( m_A,
796  &gamma23_k1,
797  &sigma23_k1,
798  &gamma34_k1,
799  &sigma34_k1,
800  &gamma12_k2,
801  &sigma12_k2,
802  &gamma23_k2,
803  &sigma23_k2,
804  a1, rs_A,
805  a2, rs_A,
806  a3, rs_A,
807  a4, rs_A );
808  }
809  }
810 
811  if ( n_left == 1 )
812  {
813  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
814  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
815  a2 = buff_A + (g )*cs_A;
816  a3 = buff_A + (g + 1)*cs_A;
817  a4 = buff_A + (g + 2)*cs_A;
818 
819  gamma23_k1 = g23_k1->real;
820  sigma23_k1 = g23_k1->imag;
821  gamma34_k1 = g34_k1->real;
822  sigma34_k1 = g34_k1->imag;
823 
824  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
825  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
826 
827  if ( !is_ident23_k1 && is_ident34_k1 )
828  {
829  MAC_Apply_G_mx2_opd( m_A,
830  &gamma23_k1,
831  &sigma23_k1,
832  a2, rs_A,
833  a3, rs_A );
834  }
835  else if ( is_ident23_k1 && !is_ident34_k1 )
836  {
837  MAC_Apply_G_mx2_opd( m_A,
838  &gamma34_k1,
839  &sigma34_k1,
840  a3, rs_A,
841  a4, rs_A );
842  }
843  else
844  {
845  MAC_Apply_G_mx3_opd( m_A,
846  &gamma23_k1,
847  &sigma23_k1,
848  &gamma34_k1,
849  &sigma34_k1,
850  a2, rs_A,
851  a3, rs_A,
852  a4, rs_A );
853  }
854  }
855  }
856 
857  // Shutdown stage
858 
859  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
860  {
861  g = nG - 1;
862  k = j;
863 
864  //n_left = 1;
865  //if ( n_left == 1 )
866  {
867  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
868  a2 = buff_A + (g )*cs_A;
869  a3 = buff_A + (g + 1)*cs_A;
870 
871  gamma23_k1 = g23_k1->real;
872  sigma23_k1 = g23_k1->imag;
873 
874  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
875 
876  if ( !is_ident23_k1 )
877  MAC_Apply_G_mx2_opd( m_A,
878  &gamma23_k1,
879  &sigma23_k1,
880  a2, rs_A,
881  a3, rs_A );
882  ++k;
883  --g;
884  }
885 
886  nG_app = k_minus_1 - j;
887  n_iter = nG_app / k_fuse;
888  n_left = nG_app % k_fuse;
889 
890  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
891  {
892  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
893  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
894  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
895  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
896  a1 = buff_A + (g - 1)*cs_A;
897  a2 = buff_A + (g )*cs_A;
898  a3 = buff_A + (g + 1)*cs_A;
899  a4 = buff_A + (g + 2)*cs_A;
900 
901  gamma23_k1 = g23_k1->real;
902  sigma23_k1 = g23_k1->imag;
903  gamma34_k1 = g34_k1->real;
904  sigma34_k1 = g34_k1->imag;
905  gamma12_k2 = g12_k2->real;
906  sigma12_k2 = g12_k2->imag;
907  gamma23_k2 = g23_k2->real;
908  sigma23_k2 = g23_k2->imag;
909 
910  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
911  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
912  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
913  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
914  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
915  is_ident12_k2 || is_ident23_k2 );
916 
917  if ( has_ident )
918  {
919  // Apply to pairs of columns as needed.
920 
921  if ( !is_ident23_k1 )
922  MAC_Apply_G_mx2_opd( m_A,
923  &gamma23_k1,
924  &sigma23_k1,
925  a2, rs_A,
926  a3, rs_A );
927 
928  if ( !is_ident34_k1 )
929  MAC_Apply_G_mx2_opd( m_A,
930  &gamma34_k1,
931  &sigma34_k1,
932  a3, rs_A,
933  a4, rs_A );
934 
935  if ( !is_ident12_k2 )
936  MAC_Apply_G_mx2_opd( m_A,
937  &gamma12_k2,
938  &sigma12_k2,
939  a1, rs_A,
940  a2, rs_A );
941 
942  if ( !is_ident23_k2 )
943  MAC_Apply_G_mx2_opd( m_A,
944  &gamma23_k2,
945  &sigma23_k2,
946  a2, rs_A,
947  a3, rs_A );
948  }
949  else
950  {
951  // Apply to all four columns.
952 
953  MAC_Apply_G_mx4s_opd( m_A,
954  &gamma23_k1,
955  &sigma23_k1,
956  &gamma34_k1,
957  &sigma34_k1,
958  &gamma12_k2,
959  &sigma12_k2,
960  &gamma23_k2,
961  &sigma23_k2,
962  a1, rs_A,
963  a2, rs_A,
964  a3, rs_A,
965  a4, rs_A );
966  }
967  }
968 
969  if ( n_left == 1 )
970  {
971  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
972  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
973  a2 = buff_A + (g )*cs_A;
974  a3 = buff_A + (g + 1)*cs_A;
975  a4 = buff_A + (g + 2)*cs_A;
976 
977  gamma23_k1 = g23_k1->real;
978  sigma23_k1 = g23_k1->imag;
979  gamma34_k1 = g34_k1->real;
980  sigma34_k1 = g34_k1->imag;
981 
982  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
983  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
984 
985  if ( !is_ident23_k1 && is_ident34_k1 )
986  {
987  MAC_Apply_G_mx2_opd( m_A,
988  &gamma23_k1,
989  &sigma23_k1,
990  a2, rs_A,
991  a3, rs_A );
992  }
993  else if ( is_ident23_k1 && !is_ident34_k1 )
994  {
995  MAC_Apply_G_mx2_opd( m_A,
996  &gamma34_k1,
997  &sigma34_k1,
998  a3, rs_A,
999  a4, rs_A );
1000  }
1001  else
1002  {
1003  MAC_Apply_G_mx3_opd( m_A,
1004  &gamma23_k1,
1005  &sigma23_k1,
1006  &gamma34_k1,
1007  &sigma34_k1,
1008  a2, rs_A,
1009  a3, rs_A,
1010  a4, rs_A );
1011  }
1012  }
1013  }
1014 
1015  return FLA_SUCCESS;
1016 }
double imag
Definition: blis_type_defs.h:139
double bl1_d0(void)
Definition: bl1_constants.c:118
double real
Definition: blis_type_defs.h:139
int n_left
Definition: bl1_axmyv2.c:149
int i
Definition: bl1_axmyv2.c:145
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:137
FLA_Error FLA_Apply_G_rf_opd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:164

◆ FLA_Apply_G_rf_ops_var3()

FLA_Error FLA_Apply_G_rf_ops_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_bls_var3(), and FLA_Apply_G_rf_opt_var3().

117 {
118  float one = bl1_s1();
119  float zero = bl1_s0();
120  float gamma23_k1;
121  float sigma23_k1;
122  float gamma34_k1;
123  float sigma34_k1;
124  float gamma12_k2;
125  float sigma12_k2;
126  float gamma23_k2;
127  float sigma23_k2;
128  float* a1;
129  float* a2;
130  float* a3;
131  float* a4;
132  scomplex* g23_k1;
133  scomplex* g34_k1;
134  scomplex* g12_k2;
135  scomplex* g23_k2;
136  int i, j, g, k;
137  int nG, nG_app;
138  int n_iter;
139  int n_left;
140  int k_minus_1;
141  int n_fuse;
142  int k_fuse;
143  int is_ident23_k1, is_ident34_k1;
144  int is_ident12_k2, is_ident23_k2;
145  int has_ident;
146 
147  k_minus_1 = k_G - 1;
148  nG = n_A - 1;
149  n_fuse = 2;
150  k_fuse = 2;
151 
152  // Use the simple variant for nG < (k - 1) or k == 1.
153  if ( nG < 2*k_minus_1 || k_G == 1 )
154  {
156  m_A,
157  n_A,
158  buff_G, rs_G, cs_G,
159  buff_A, rs_A, cs_A );
160  return FLA_SUCCESS;
161  }
162 
163 
164  // Start-up phase.
165 
166  for ( j = -1; j < k_minus_1; j += n_fuse )
167  {
168  nG_app = j + 2;
169  n_iter = nG_app / k_fuse;
170  n_left = 1;
171 
172  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
173  {
174  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
175  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
176  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
177  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
178  a1 = buff_A + (g - 1)*cs_A;
179  a2 = buff_A + (g )*cs_A;
180  a3 = buff_A + (g + 1)*cs_A;
181  a4 = buff_A + (g + 2)*cs_A;
182 
183  gamma23_k1 = g23_k1->real;
184  sigma23_k1 = g23_k1->imag;
185  gamma34_k1 = g34_k1->real;
186  sigma34_k1 = g34_k1->imag;
187  gamma12_k2 = g12_k2->real;
188  sigma12_k2 = g12_k2->imag;
189  gamma23_k2 = g23_k2->real;
190  sigma23_k2 = g23_k2->imag;
191 
192  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
193  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
194  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
195  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
196  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
197  is_ident12_k2 || is_ident23_k2 );
198 
199  if ( has_ident )
200  {
201  // Apply to pairs of columns as needed.
202 
203  if ( !is_ident23_k1 )
204  MAC_Apply_G_mx2_ops( m_A,
205  &gamma23_k1,
206  &sigma23_k1,
207  a2, rs_A,
208  a3, rs_A );
209 
210  if ( !is_ident34_k1 )
211  MAC_Apply_G_mx2_ops( m_A,
212  &gamma34_k1,
213  &sigma34_k1,
214  a3, rs_A,
215  a4, rs_A );
216 
217  if ( !is_ident12_k2 )
218  MAC_Apply_G_mx2_ops( m_A,
219  &gamma12_k2,
220  &sigma12_k2,
221  a1, rs_A,
222  a2, rs_A );
223 
224  if ( !is_ident23_k2 )
225  MAC_Apply_G_mx2_ops( m_A,
226  &gamma23_k2,
227  &sigma23_k2,
228  a2, rs_A,
229  a3, rs_A );
230  }
231  else
232  {
233  // Apply to all four columns.
234 
235  MAC_Apply_G_mx4s_ops( m_A,
236  &gamma23_k1,
237  &sigma23_k1,
238  &gamma34_k1,
239  &sigma34_k1,
240  &gamma12_k2,
241  &sigma12_k2,
242  &gamma23_k2,
243  &sigma23_k2,
244  a1, rs_A,
245  a2, rs_A,
246  a3, rs_A,
247  a4, rs_A );
248  }
249  }
250 
251  if ( n_left == 1 )
252  {
253  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
254  a3 = buff_A + (g + 1)*cs_A;
255  a4 = buff_A + (g + 2)*cs_A;
256 
257  gamma34_k1 = g34_k1->real;
258  sigma34_k1 = g34_k1->imag;
259 
260  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
261 
262  if ( !is_ident34_k1 )
263  MAC_Apply_G_mx2_ops( m_A,
264  &gamma34_k1,
265  &sigma34_k1,
266  a3, rs_A,
267  a4, rs_A );
268  }
269  }
270 
271  // Pipeline stage
272 
273  for ( ; j < nG - 1; j += n_fuse )
274  {
275  nG_app = k_G;
276  n_iter = nG_app / k_fuse;
277  n_left = nG_app % k_fuse;
278 
279  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
280  {
281  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
282  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
283  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
284  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
285  a1 = buff_A + (g - 1)*cs_A;
286  a2 = buff_A + (g )*cs_A;
287  a3 = buff_A + (g + 1)*cs_A;
288  a4 = buff_A + (g + 2)*cs_A;
289 
290  gamma23_k1 = g23_k1->real;
291  sigma23_k1 = g23_k1->imag;
292  gamma34_k1 = g34_k1->real;
293  sigma34_k1 = g34_k1->imag;
294  gamma12_k2 = g12_k2->real;
295  sigma12_k2 = g12_k2->imag;
296  gamma23_k2 = g23_k2->real;
297  sigma23_k2 = g23_k2->imag;
298 
299  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
300  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
301  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
302  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
303  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
304  is_ident12_k2 || is_ident23_k2 );
305 
306  if ( has_ident )
307  {
308  // Apply to pairs of columns as needed.
309 
310  if ( !is_ident23_k1 )
311  MAC_Apply_G_mx2_ops( m_A,
312  &gamma23_k1,
313  &sigma23_k1,
314  a2, rs_A,
315  a3, rs_A );
316 
317  if ( !is_ident34_k1 )
318  MAC_Apply_G_mx2_ops( m_A,
319  &gamma34_k1,
320  &sigma34_k1,
321  a3, rs_A,
322  a4, rs_A );
323 
324  if ( !is_ident12_k2 )
325  MAC_Apply_G_mx2_ops( m_A,
326  &gamma12_k2,
327  &sigma12_k2,
328  a1, rs_A,
329  a2, rs_A );
330 
331  if ( !is_ident23_k2 )
332  MAC_Apply_G_mx2_ops( m_A,
333  &gamma23_k2,
334  &sigma23_k2,
335  a2, rs_A,
336  a3, rs_A );
337  }
338  else
339  {
340  // Apply to all four columns.
341 
342  MAC_Apply_G_mx4s_ops( m_A,
343  &gamma23_k1,
344  &sigma23_k1,
345  &gamma34_k1,
346  &sigma34_k1,
347  &gamma12_k2,
348  &sigma12_k2,
349  &gamma23_k2,
350  &sigma23_k2,
351  a1, rs_A,
352  a2, rs_A,
353  a3, rs_A,
354  a4, rs_A );
355  }
356  }
357 
358  if ( n_left == 1 )
359  {
360  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
361  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
362  a2 = buff_A + (g )*cs_A;
363  a3 = buff_A + (g + 1)*cs_A;
364  a4 = buff_A + (g + 2)*cs_A;
365 
366  gamma23_k1 = g23_k1->real;
367  sigma23_k1 = g23_k1->imag;
368  gamma34_k1 = g34_k1->real;
369  sigma34_k1 = g34_k1->imag;
370 
371  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
372  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
373 
374  if ( !is_ident23_k1 && is_ident34_k1 )
375  {
376  MAC_Apply_G_mx2_ops( m_A,
377  &gamma23_k1,
378  &sigma23_k1,
379  a2, rs_A,
380  a3, rs_A );
381  }
382  else if ( is_ident23_k1 && !is_ident34_k1 )
383  {
384  MAC_Apply_G_mx2_ops( m_A,
385  &gamma34_k1,
386  &sigma34_k1,
387  a3, rs_A,
388  a4, rs_A );
389  }
390  else
391  {
392  MAC_Apply_G_mx3_ops( m_A,
393  &gamma23_k1,
394  &sigma23_k1,
395  &gamma34_k1,
396  &sigma34_k1,
397  a2, rs_A,
398  a3, rs_A,
399  a4, rs_A );
400  }
401  }
402  }
403 
404  // Shutdown stage
405 
406  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
407  {
408  g = nG - 1;
409  k = j;
410 
411  //n_left = 1;
412  //if ( n_left == 1 )
413  {
414  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
415  a2 = buff_A + (g )*cs_A;
416  a3 = buff_A + (g + 1)*cs_A;
417 
418  gamma23_k1 = g23_k1->real;
419  sigma23_k1 = g23_k1->imag;
420 
421  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
422 
423  if ( !is_ident23_k1 )
424  MAC_Apply_G_mx2_ops( m_A,
425  &gamma23_k1,
426  &sigma23_k1,
427  a2, rs_A,
428  a3, rs_A );
429  ++k;
430  --g;
431  }
432 
433  nG_app = k_minus_1 - j;
434  n_iter = nG_app / k_fuse;
435  n_left = nG_app % k_fuse;
436 
437  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
438  {
439  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
440  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
441  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
442  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
443  a1 = buff_A + (g - 1)*cs_A;
444  a2 = buff_A + (g )*cs_A;
445  a3 = buff_A + (g + 1)*cs_A;
446  a4 = buff_A + (g + 2)*cs_A;
447 
448  gamma23_k1 = g23_k1->real;
449  sigma23_k1 = g23_k1->imag;
450  gamma34_k1 = g34_k1->real;
451  sigma34_k1 = g34_k1->imag;
452  gamma12_k2 = g12_k2->real;
453  sigma12_k2 = g12_k2->imag;
454  gamma23_k2 = g23_k2->real;
455  sigma23_k2 = g23_k2->imag;
456 
457  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
458  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
459  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
460  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
461  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
462  is_ident12_k2 || is_ident23_k2 );
463 
464  if ( has_ident )
465  {
466  // Apply to pairs of columns as needed.
467 
468  if ( !is_ident23_k1 )
469  MAC_Apply_G_mx2_ops( m_A,
470  &gamma23_k1,
471  &sigma23_k1,
472  a2, rs_A,
473  a3, rs_A );
474 
475  if ( !is_ident34_k1 )
476  MAC_Apply_G_mx2_ops( m_A,
477  &gamma34_k1,
478  &sigma34_k1,
479  a3, rs_A,
480  a4, rs_A );
481 
482  if ( !is_ident12_k2 )
483  MAC_Apply_G_mx2_ops( m_A,
484  &gamma12_k2,
485  &sigma12_k2,
486  a1, rs_A,
487  a2, rs_A );
488 
489  if ( !is_ident23_k2 )
490  MAC_Apply_G_mx2_ops( m_A,
491  &gamma23_k2,
492  &sigma23_k2,
493  a2, rs_A,
494  a3, rs_A );
495  }
496  else
497  {
498  // Apply to all four columns.
499 
500  MAC_Apply_G_mx4s_ops( m_A,
501  &gamma23_k1,
502  &sigma23_k1,
503  &gamma34_k1,
504  &sigma34_k1,
505  &gamma12_k2,
506  &sigma12_k2,
507  &gamma23_k2,
508  &sigma23_k2,
509  a1, rs_A,
510  a2, rs_A,
511  a3, rs_A,
512  a4, rs_A );
513  }
514  }
515 
516  if ( n_left == 1 )
517  {
518  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
519  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
520  a2 = buff_A + (g )*cs_A;
521  a3 = buff_A + (g + 1)*cs_A;
522  a4 = buff_A + (g + 2)*cs_A;
523 
524  gamma23_k1 = g23_k1->real;
525  sigma23_k1 = g23_k1->imag;
526  gamma34_k1 = g34_k1->real;
527  sigma34_k1 = g34_k1->imag;
528 
529  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
530  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
531 
532  if ( !is_ident23_k1 && is_ident34_k1 )
533  {
534  MAC_Apply_G_mx2_ops( m_A,
535  &gamma23_k1,
536  &sigma23_k1,
537  a2, rs_A,
538  a3, rs_A );
539  }
540  else if ( is_ident23_k1 && !is_ident34_k1 )
541  {
542  MAC_Apply_G_mx2_ops( m_A,
543  &gamma34_k1,
544  &sigma34_k1,
545  a3, rs_A,
546  a4, rs_A );
547  }
548  else
549  {
550  MAC_Apply_G_mx3_ops( m_A,
551  &gamma23_k1,
552  &sigma23_k1,
553  &gamma34_k1,
554  &sigma34_k1,
555  a2, rs_A,
556  a3, rs_A,
557  a4, rs_A );
558  }
559  }
560  }
561 
562  return FLA_SUCCESS;
563 }
float real
Definition: blis_type_defs.h:134
float bl1_s1(void)
Definition: bl1_constants.c:47
int n_left
Definition: bl1_axmyv2.c:149
FLA_Error FLA_Apply_G_rf_ops_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:113
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
float bl1_s0(void)
Definition: bl1_constants.c:111
float imag
Definition: blis_type_defs.h:134

◆ FLA_Apply_G_rf_opt_var3()

FLA_Error FLA_Apply_G_rf_opt_var3 ( FLA_Obj  G,
FLA_Obj  A 
)

References FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_opd_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:565
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Apply_G_rf_ops_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:112
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_G_rf_opz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:1471
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Apply_G_rf_opc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:1018
Definition: blis_type_defs.h:137

◆ FLA_Apply_G_rf_opz_var3()

FLA_Error FLA_Apply_G_rf_opz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_blz_var3(), and FLA_Apply_G_rf_opt_var3().

1476 {
1477  double one = bl1_d1();
1478  double zero = bl1_d0();
1479  double gamma23_k1;
1480  double sigma23_k1;
1481  double gamma34_k1;
1482  double sigma34_k1;
1483  double gamma12_k2;
1484  double sigma12_k2;
1485  double gamma23_k2;
1486  double sigma23_k2;
1487  dcomplex* a1;
1488  dcomplex* a2;
1489  dcomplex* a3;
1490  dcomplex* a4;
1491  dcomplex* g23_k1;
1492  dcomplex* g34_k1;
1493  dcomplex* g12_k2;
1494  dcomplex* g23_k2;
1495  int i, j, g, k;
1496  int nG, nG_app;
1497  int n_iter;
1498  int n_left;
1499  int k_minus_1;
1500  int n_fuse;
1501  int k_fuse;
1502  int is_ident23_k1, is_ident34_k1;
1503  int is_ident12_k2, is_ident23_k2;
1504  int has_ident;
1505 
1506  k_minus_1 = k_G - 1;
1507  nG = n_A - 1;
1508  n_fuse = 2;
1509  k_fuse = 2;
1510 
1511  // Use the simple variant for nG < (k - 1) or k == 1.
1512  if ( nG < 2*k_minus_1 || k_G == 1 )
1513  {
1515  m_A,
1516  n_A,
1517  buff_G, rs_G, cs_G,
1518  buff_A, rs_A, cs_A );
1519  return FLA_SUCCESS;
1520  }
1521 
1522 
1523  // Start-up phase.
1524 
1525  for ( j = -1; j < k_minus_1; j += n_fuse )
1526  {
1527  nG_app = j + 2;
1528  n_iter = nG_app / k_fuse;
1529  n_left = 1;
1530 
1531  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1532  {
1533  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1534  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1535  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1536  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1537  a1 = buff_A + (g - 1)*cs_A;
1538  a2 = buff_A + (g )*cs_A;
1539  a3 = buff_A + (g + 1)*cs_A;
1540  a4 = buff_A + (g + 2)*cs_A;
1541 
1542  gamma23_k1 = g23_k1->real;
1543  sigma23_k1 = g23_k1->imag;
1544  gamma34_k1 = g34_k1->real;
1545  sigma34_k1 = g34_k1->imag;
1546  gamma12_k2 = g12_k2->real;
1547  sigma12_k2 = g12_k2->imag;
1548  gamma23_k2 = g23_k2->real;
1549  sigma23_k2 = g23_k2->imag;
1550 
1551  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1552  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1553  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1554  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1555  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1556  is_ident12_k2 || is_ident23_k2 );
1557 
1558  if ( has_ident )
1559  {
1560  // Apply to pairs of columns as needed.
1561 
1562  if ( !is_ident23_k1 )
1563  MAC_Apply_G_mx2_opz( m_A,
1564  &gamma23_k1,
1565  &sigma23_k1,
1566  a2, rs_A,
1567  a3, rs_A );
1568 
1569  if ( !is_ident34_k1 )
1570  MAC_Apply_G_mx2_opz( m_A,
1571  &gamma34_k1,
1572  &sigma34_k1,
1573  a3, rs_A,
1574  a4, rs_A );
1575 
1576  if ( !is_ident12_k2 )
1577  MAC_Apply_G_mx2_opz( m_A,
1578  &gamma12_k2,
1579  &sigma12_k2,
1580  a1, rs_A,
1581  a2, rs_A );
1582 
1583  if ( !is_ident23_k2 )
1584  MAC_Apply_G_mx2_opz( m_A,
1585  &gamma23_k2,
1586  &sigma23_k2,
1587  a2, rs_A,
1588  a3, rs_A );
1589  }
1590  else
1591  {
1592  // Apply to all four columns.
1593 
1594  MAC_Apply_G_mx4s_opz( m_A,
1595  &gamma23_k1,
1596  &sigma23_k1,
1597  &gamma34_k1,
1598  &sigma34_k1,
1599  &gamma12_k2,
1600  &sigma12_k2,
1601  &gamma23_k2,
1602  &sigma23_k2,
1603  a1, rs_A,
1604  a2, rs_A,
1605  a3, rs_A,
1606  a4, rs_A );
1607  }
1608  }
1609 
1610  if ( n_left == 1 )
1611  {
1612  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1613  a3 = buff_A + (g + 1)*cs_A;
1614  a4 = buff_A + (g + 2)*cs_A;
1615 
1616  gamma34_k1 = g34_k1->real;
1617  sigma34_k1 = g34_k1->imag;
1618 
1619  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1620 
1621  if ( !is_ident34_k1 )
1622  MAC_Apply_G_mx2_opz( m_A,
1623  &gamma34_k1,
1624  &sigma34_k1,
1625  a3, rs_A,
1626  a4, rs_A );
1627  }
1628  }
1629 
1630  // Pipeline stage
1631 
1632  for ( ; j < nG - 1; j += n_fuse )
1633  {
1634  nG_app = k_G;
1635  n_iter = nG_app / k_fuse;
1636  n_left = nG_app % k_fuse;
1637 
1638  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1639  {
1640  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1641  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1642  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1643  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1644  a1 = buff_A + (g - 1)*cs_A;
1645  a2 = buff_A + (g )*cs_A;
1646  a3 = buff_A + (g + 1)*cs_A;
1647  a4 = buff_A + (g + 2)*cs_A;
1648 
1649  gamma23_k1 = g23_k1->real;
1650  sigma23_k1 = g23_k1->imag;
1651  gamma34_k1 = g34_k1->real;
1652  sigma34_k1 = g34_k1->imag;
1653  gamma12_k2 = g12_k2->real;
1654  sigma12_k2 = g12_k2->imag;
1655  gamma23_k2 = g23_k2->real;
1656  sigma23_k2 = g23_k2->imag;
1657 
1658  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1659  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1660  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1661  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1662  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1663  is_ident12_k2 || is_ident23_k2 );
1664 
1665  if ( has_ident )
1666  {
1667  // Apply to pairs of columns as needed.
1668 
1669  if ( !is_ident23_k1 )
1670  MAC_Apply_G_mx2_opz( m_A,
1671  &gamma23_k1,
1672  &sigma23_k1,
1673  a2, rs_A,
1674  a3, rs_A );
1675 
1676  if ( !is_ident34_k1 )
1677  MAC_Apply_G_mx2_opz( m_A,
1678  &gamma34_k1,
1679  &sigma34_k1,
1680  a3, rs_A,
1681  a4, rs_A );
1682 
1683  if ( !is_ident12_k2 )
1684  MAC_Apply_G_mx2_opz( m_A,
1685  &gamma12_k2,
1686  &sigma12_k2,
1687  a1, rs_A,
1688  a2, rs_A );
1689 
1690  if ( !is_ident23_k2 )
1691  MAC_Apply_G_mx2_opz( m_A,
1692  &gamma23_k2,
1693  &sigma23_k2,
1694  a2, rs_A,
1695  a3, rs_A );
1696  }
1697  else
1698  {
1699  // Apply to all four columns.
1700 
1701  MAC_Apply_G_mx4s_opz( m_A,
1702  &gamma23_k1,
1703  &sigma23_k1,
1704  &gamma34_k1,
1705  &sigma34_k1,
1706  &gamma12_k2,
1707  &sigma12_k2,
1708  &gamma23_k2,
1709  &sigma23_k2,
1710  a1, rs_A,
1711  a2, rs_A,
1712  a3, rs_A,
1713  a4, rs_A );
1714  }
1715  }
1716 
1717  if ( n_left == 1 )
1718  {
1719  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1720  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1721  a2 = buff_A + (g )*cs_A;
1722  a3 = buff_A + (g + 1)*cs_A;
1723  a4 = buff_A + (g + 2)*cs_A;
1724 
1725  gamma23_k1 = g23_k1->real;
1726  sigma23_k1 = g23_k1->imag;
1727  gamma34_k1 = g34_k1->real;
1728  sigma34_k1 = g34_k1->imag;
1729 
1730  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1731  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1732 
1733  if ( !is_ident23_k1 && is_ident34_k1 )
1734  {
1735  MAC_Apply_G_mx2_opz( m_A,
1736  &gamma23_k1,
1737  &sigma23_k1,
1738  a2, rs_A,
1739  a3, rs_A );
1740  }
1741  else if ( is_ident23_k1 && !is_ident34_k1 )
1742  {
1743  MAC_Apply_G_mx2_opz( m_A,
1744  &gamma34_k1,
1745  &sigma34_k1,
1746  a3, rs_A,
1747  a4, rs_A );
1748  }
1749  else
1750  {
1751  MAC_Apply_G_mx3_opz( m_A,
1752  &gamma23_k1,
1753  &sigma23_k1,
1754  &gamma34_k1,
1755  &sigma34_k1,
1756  a2, rs_A,
1757  a3, rs_A,
1758  a4, rs_A );
1759  }
1760  }
1761  }
1762 
1763  // Shutdown stage
1764 
1765  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1766  {
1767  g = nG - 1;
1768  k = j;
1769 
1770  //n_left = 1;
1771  //if ( n_left == 1 )
1772  {
1773  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1774  a2 = buff_A + (g )*cs_A;
1775  a3 = buff_A + (g + 1)*cs_A;
1776 
1777  gamma23_k1 = g23_k1->real;
1778  sigma23_k1 = g23_k1->imag;
1779 
1780  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1781 
1782  if ( !is_ident23_k1 )
1783  MAC_Apply_G_mx2_opz( m_A,
1784  &gamma23_k1,
1785  &sigma23_k1,
1786  a2, rs_A,
1787  a3, rs_A );
1788  ++k;
1789  --g;
1790  }
1791 
1792  nG_app = k_minus_1 - j;
1793  n_iter = nG_app / k_fuse;
1794  n_left = nG_app % k_fuse;
1795 
1796  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1797  {
1798  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1799  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1800  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1801  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1802  a1 = buff_A + (g - 1)*cs_A;
1803  a2 = buff_A + (g )*cs_A;
1804  a3 = buff_A + (g + 1)*cs_A;
1805  a4 = buff_A + (g + 2)*cs_A;
1806 
1807  gamma23_k1 = g23_k1->real;
1808  sigma23_k1 = g23_k1->imag;
1809  gamma34_k1 = g34_k1->real;
1810  sigma34_k1 = g34_k1->imag;
1811  gamma12_k2 = g12_k2->real;
1812  sigma12_k2 = g12_k2->imag;
1813  gamma23_k2 = g23_k2->real;
1814  sigma23_k2 = g23_k2->imag;
1815 
1816  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1817  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1818  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1819  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1820  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1821  is_ident12_k2 || is_ident23_k2 );
1822 
1823  if ( has_ident )
1824  {
1825  // Apply to pairs of columns as needed.
1826 
1827  if ( !is_ident23_k1 )
1828  MAC_Apply_G_mx2_opz( m_A,
1829  &gamma23_k1,
1830  &sigma23_k1,
1831  a2, rs_A,
1832  a3, rs_A );
1833 
1834  if ( !is_ident34_k1 )
1835  MAC_Apply_G_mx2_opz( m_A,
1836  &gamma34_k1,
1837  &sigma34_k1,
1838  a3, rs_A,
1839  a4, rs_A );
1840 
1841  if ( !is_ident12_k2 )
1842  MAC_Apply_G_mx2_opz( m_A,
1843  &gamma12_k2,
1844  &sigma12_k2,
1845  a1, rs_A,
1846  a2, rs_A );
1847 
1848  if ( !is_ident23_k2 )
1849  MAC_Apply_G_mx2_opz( m_A,
1850  &gamma23_k2,
1851  &sigma23_k2,
1852  a2, rs_A,
1853  a3, rs_A );
1854  }
1855  else
1856  {
1857  // Apply to all four columns.
1858 
1859  MAC_Apply_G_mx4s_opz( m_A,
1860  &gamma23_k1,
1861  &sigma23_k1,
1862  &gamma34_k1,
1863  &sigma34_k1,
1864  &gamma12_k2,
1865  &sigma12_k2,
1866  &gamma23_k2,
1867  &sigma23_k2,
1868  a1, rs_A,
1869  a2, rs_A,
1870  a3, rs_A,
1871  a4, rs_A );
1872  }
1873  }
1874 
1875  if ( n_left == 1 )
1876  {
1877  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1878  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1879  a2 = buff_A + (g )*cs_A;
1880  a3 = buff_A + (g + 1)*cs_A;
1881  a4 = buff_A + (g + 2)*cs_A;
1882 
1883  gamma23_k1 = g23_k1->real;
1884  sigma23_k1 = g23_k1->imag;
1885  gamma34_k1 = g34_k1->real;
1886  sigma34_k1 = g34_k1->imag;
1887 
1888  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1889  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1890 
1891  if ( !is_ident23_k1 && is_ident34_k1 )
1892  {
1893  MAC_Apply_G_mx2_opz( m_A,
1894  &gamma23_k1,
1895  &sigma23_k1,
1896  a2, rs_A,
1897  a3, rs_A );
1898  }
1899  else if ( is_ident23_k1 && !is_ident34_k1 )
1900  {
1901  MAC_Apply_G_mx2_opz( m_A,
1902  &gamma34_k1,
1903  &sigma34_k1,
1904  a3, rs_A,
1905  a4, rs_A );
1906  }
1907  else
1908  {
1909  MAC_Apply_G_mx3_opz( m_A,
1910  &gamma23_k1,
1911  &sigma23_k1,
1912  &gamma34_k1,
1913  &sigma34_k1,
1914  a2, rs_A,
1915  a3, rs_A,
1916  a4, rs_A );
1917  }
1918  }
1919  }
1920 
1921  return FLA_SUCCESS;
1922 }
double imag
Definition: blis_type_defs.h:139
double bl1_d0(void)
Definition: bl1_constants.c:118
FLA_Error FLA_Apply_G_rf_opz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:267
double real
Definition: blis_type_defs.h:139
int n_left
Definition: bl1_axmyv2.c:149
int i
Definition: bl1_axmyv2.c:145
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:137