libflame  revision_anchor
Functions | Variables
FLASH_Queue.c File Reference

(r)

Functions

void FLASH_Queue_begin (void)
 
void FLASH_Queue_end (void)
 
unsigned int FLASH_Queue_stack_depth (void)
 
FLA_Error FLASH_Queue_enable (void)
 
FLA_Error FLASH_Queue_disable (void)
 
FLA_Bool FLASH_Queue_get_enabled (void)
 
void FLASH_Queue_set_num_threads (unsigned int n_threads)
 
unsigned int FLASH_Queue_get_num_threads (void)
 
void FLASH_Queue_init (void)
 
void FLASH_Queue_finalize (void)
 
unsigned int FLASH_Queue_get_num_tasks (void)
 
void FLASH_Queue_set_verbose_output (FLASH_Verbose verbose)
 
FLASH_Verbose FLASH_Queue_get_verbose_output (void)
 
void FLASH_Queue_set_sorting (FLA_Bool sorting)
 
FLA_Bool FLASH_Queue_get_sorting (void)
 
void FLASH_Queue_set_caching (FLA_Bool caching)
 
FLA_Bool FLASH_Queue_get_caching (void)
 
void FLASH_Queue_set_work_stealing (FLA_Bool work_stealing)
 
FLA_Bool FLASH_Queue_get_work_stealing (void)
 
void FLASH_Queue_set_data_affinity (FLASH_Data_aff data_affinity)
 
FLASH_Data_aff FLASH_Queue_get_data_affinity (void)
 
double FLASH_Queue_get_total_time (void)
 
double FLASH_Queue_get_parallel_time (void)
 
void FLASH_Queue_set_parallel_time (double dtime)
 
void FLASH_Queue_set_block_size (dim_t size)
 
dim_t FLASH_Queue_get_block_size (void)
 
void FLASH_Queue_set_cache_size (dim_t size)
 
dim_t FLASH_Queue_get_cache_size (void)
 
void FLASH_Queue_set_cache_line_size (dim_t size)
 
dim_t FLASH_Queue_get_cache_line_size (void)
 
void FLASH_Queue_set_cores_per_cache (int cores)
 
int FLASH_Queue_get_cores_per_cache (void)
 
void FLASH_Queue_set_cores_per_queue (int cores)
 
int FLASH_Queue_get_cores_per_queue (void)
 
void FLASH_Queue_reset (void)
 
FLASH_TaskFLASH_Queue_get_head_task (void)
 
FLASH_TaskFLASH_Queue_get_tail_task (void)
 
void FLASH_Queue_push (void *func, void *cntl, char *name, FLA_Bool enabled_gpu, int n_int_args, int n_fla_args, int n_input_args, int n_output_args,...)
 
void FLASH_Queue_push_input (FLA_Obj obj, FLASH_Task *t)
 
void FLASH_Queue_push_output (FLA_Obj obj, FLASH_Task *t)
 
FLASH_TaskFLASH_Task_alloc (void *func, void *cntl, char *name, FLA_Bool enabled_gpu, int n_int_args, int n_fla_args, int n_input_args, int n_output_args)
 
void FLASH_Task_free (FLASH_Task *t)
 
void FLASH_Queue_exec_task (FLASH_Task *t)
 
void FLASH_Queue_verbose_output (void)
 

Variables

FLASH_Queue _tq
 

Function Documentation

◆ FLASH_Queue_begin()

void FLASH_Queue_begin ( void  )

Referenced by FLASH_Apply_CAQ_UT_inc(), FLASH_Apply_Q2_UT(), FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_inc(), FLASH_Apply_QUD_UT_inc(), FLASH_CAQR_UT_inc_noopt(), FLASH_Chol(), FLASH_Copy(), FLASH_Copyr(), FLASH_Eig_gest(), FLASH_Gemm(), FLASH_Hemm(), FLASH_Her2k(), FLASH_Herk(), FLASH_LQ_UT(), FLASH_LU_incpiv_noopt(), FLASH_LU_incpiv_opt1(), FLASH_LU_nopiv(), FLASH_LU_piv(), FLASH_Lyap(), FLASH_QR2_UT(), FLASH_QR_UT(), FLASH_QR_UT_inc_noopt(), FLASH_QR_UT_inc_opt1(), FLASH_SPDinv(), FLASH_Sylv(), FLASH_Symm(), FLASH_Syr2k(), FLASH_Syrk(), FLASH_Trinv(), FLASH_Trmm(), FLASH_Trsm(), FLASH_Ttmm(), and FLASH_UDdate_UT_inc().

65 {
66 #ifdef FLA_ENABLE_SUPERMATRIX
67  if ( flash_queue_stack == 0 )
68  {
69  // Save the starting time for the total execution time.
70  flash_queue_total_time = FLA_Clock();
71  }
72 #endif
73 
74  // Push onto the stack.
75  flash_queue_stack++;
76 
77  return;
78 }
double FLA_Clock(void)
Definition: FLA_Clock.c:20

◆ FLASH_Queue_disable()

FLA_Error FLASH_Queue_disable ( void  )

Referenced by FLASH_Apply_pivots(), FLASH_Axpy(), FLASH_Axpyt(), FLASH_Copyt(), FLASH_FS_incpiv(), FLASH_Gemv(), FLASH_Scal(), FLASH_Scalr(), and FLASH_Trsv().

150 {
151 #ifdef FLA_ENABLE_SUPERMATRIX
152  if ( flash_queue_stack == 0 )
153  {
154  // Disable if not begin parallel region yet.
155  flash_queue_enabled = FALSE;
156  return FLA_SUCCESS;
157  }
158  else
159  {
160  // Cannot change status during parallel region.
161  return FLA_FAILURE;
162  }
163 #else
164  // Allow disabling enqueuing even when SuperMatrix is not configured.
165  flash_queue_enabled = FALSE;
166  return FLA_SUCCESS;
167 #endif
168 }

◆ FLASH_Queue_enable()

FLA_Error FLASH_Queue_enable ( void  )

Referenced by FLASH_Apply_pivots(), FLASH_Axpy(), FLASH_Axpyt(), FLASH_Copyt(), FLASH_FS_incpiv(), FLASH_Gemv(), FLASH_Scal(), FLASH_Scalr(), and FLASH_Trsv().

123 {
124 #ifdef FLA_ENABLE_SUPERMATRIX
125  if ( flash_queue_stack == 0 )
126  {
127  // Enable if not begin parallel region yet.
128  flash_queue_enabled = TRUE;
129  return FLA_SUCCESS;
130  }
131  else
132  {
133  // Cannot change status during parallel region.
134  return FLA_FAILURE;
135  }
136 #else
137  // Raise an exception when SuperMatrix is not configured.
138  FLA_Check_error_code( FLA_SUPERMATRIX_NOT_ENABLED );
139  return FLA_FAILURE;
140 #endif
141 }

◆ FLASH_Queue_end()

void FLASH_Queue_end ( void  )

Referenced by FLASH_Apply_CAQ_UT_inc(), FLASH_Apply_Q2_UT(), FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_inc(), FLASH_Apply_QUD_UT_inc(), FLASH_CAQR_UT_inc_noopt(), FLASH_Chol(), FLASH_Copy(), FLASH_Copyr(), FLASH_Eig_gest(), FLASH_Gemm(), FLASH_Hemm(), FLASH_Her2k(), FLASH_Herk(), FLASH_LQ_UT(), FLASH_LU_incpiv_noopt(), FLASH_LU_incpiv_opt1(), FLASH_LU_nopiv(), FLASH_LU_piv(), FLASH_Lyap(), FLASH_QR2_UT(), FLASH_QR_UT(), FLASH_QR_UT_inc_noopt(), FLASH_QR_UT_inc_opt1(), FLASH_SPDinv(), FLASH_Sylv(), FLASH_Symm(), FLASH_Syr2k(), FLASH_Syrk(), FLASH_Trinv(), FLASH_Trmm(), FLASH_Trsm(), FLASH_Ttmm(), and FLASH_UDdate_UT_inc().

87 {
88  // Pop off the stack.
89  flash_queue_stack--;
90 
91 #ifdef FLA_ENABLE_SUPERMATRIX
92  if ( flash_queue_stack == 0 )
93  {
94  // Execute tasks if encounter the outermost parallel region.
96 
97  // Find the total execution time.
98  flash_queue_total_time = FLA_Clock() - flash_queue_total_time;
99  }
100 #endif
101 
102  return;
103 }
void FLASH_Queue_exec(void)
Definition: FLASH_Queue_exec.c:2756
double FLA_Clock(void)
Definition: FLA_Clock.c:20

◆ FLASH_Queue_exec_task()

void FLASH_Queue_exec_task ( FLASH_Task t)

References FLASH_Task_s::cntl, FLA_Apply_CAQ2_UT_task(), FLA_Apply_pivots_macro_task(), FLA_Apply_Q2_UT_task(), FLA_Apply_Q_UT_task(), FLA_Apply_QUD_UT_task(), FLASH_Task_s::fla_arg, FLA_Axpy_task(), FLA_Axpyt_task(), FLA_CAQR2_UT_task(), FLA_Chol_task(), FLA_Copy_task(), FLA_Copyr_task(), FLA_Copyt_task(), FLA_Eig_gest_task(), FLA_Gemm_task(), FLA_Gemv_task(), FLA_Hemm_task(), FLA_Her2k_task(), FLA_Herk_task(), FLA_LQ_UT_macro_task(), FLA_LU_nopiv_task(), FLA_LU_piv_copy_task(), FLA_LU_piv_macro_task(), FLA_LU_piv_task(), FLA_Lyap_task(), FLA_Obj_create_buffer_task(), FLA_Obj_free_buffer_task(), FLA_QR2_UT_task(), FLA_QR_UT_copy_task(), FLA_QR_UT_macro_task(), FLA_QR_UT_task(), FLA_SA_FS_task(), FLA_SA_LU_task(), FLA_Scal_task(), FLA_Scalr_task(), FLA_Sylv_task(), FLA_Symm_task(), FLA_Syr2k_task(), FLA_Syrk_task(), FLA_Trinv_task(), FLA_Trmm_task(), FLA_Trsm_piv_task(), FLA_Trsm_task(), FLA_Trsv_task(), FLA_Ttmm_task(), FLA_UDdate_UT_task(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, and FLASH_Task_s::output_arg.

Referenced by FLASH_Queue_exec_gpu(), FLASH_Queue_exec_parallel_function(), and FLASH_Queue_exec_simulation().

1147 {
1148  // Define local function pointer types.
1149 
1150  // LAPACK-level
1151  typedef FLA_Error(*flash_lu_piv_macro_p)(FLA_Obj A, FLA_Obj p, fla_lu_t* cntl );
1152  typedef FLA_Error(*flash_apply_pivots_macro_p)(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A, fla_appiv_t* cntl);
1153  typedef FLA_Error(*flash_lu_piv_p)(FLA_Obj A, FLA_Obj p, fla_lu_t* cntl);
1154  typedef FLA_Error(*flash_lu_piv_copy_p)(FLA_Obj A, FLA_Obj p, FLA_Obj U, fla_lu_t* cntl);
1155  typedef FLA_Error(*flash_trsm_piv_p)(FLA_Obj A, FLA_Obj C, FLA_Obj p, fla_trsm_t* cntl);
1156  typedef FLA_Error(*flash_sa_lu_p)(FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, int nb_alg, fla_lu_t* cntl);
1157  typedef FLA_Error(*flash_sa_fs_p)(FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, int nb_alg, fla_gemm_t* cntl);
1158  typedef FLA_Error(*flash_lu_nopiv_p)(FLA_Obj A, fla_lu_t* cntl);
1159  typedef FLA_Error(*flash_trinv_p)(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A, fla_trinv_t* cntl);
1160  typedef FLA_Error(*flash_ttmm_p)(FLA_Uplo uplo, FLA_Obj A, fla_ttmm_t* cntl);
1161  typedef FLA_Error(*flash_chol_p)(FLA_Uplo uplo, FLA_Obj A, fla_chol_t* cntl);
1162  typedef FLA_Error(*flash_sylv_p)(FLA_Trans transa, FLA_Trans transb, FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t* cntl);
1163  typedef FLA_Error(*flash_lyap_p)(FLA_Trans trans, FLA_Obj isgn, FLA_Obj A, FLA_Obj C, FLA_Obj scale, fla_lyap_t* cntl);
1164  typedef FLA_Error(*flash_qrut_macro_p)(FLA_Obj A, FLA_Obj T, fla_qrut_t* cntl);
1165  typedef FLA_Error(*flash_qrut_p)(FLA_Obj A, FLA_Obj T, fla_qrut_t* cntl);
1166  typedef FLA_Error(*flash_qrutc_p)(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t* cntl);
1167  typedef FLA_Error(*flash_qr2ut_p)(FLA_Obj B, FLA_Obj D, FLA_Obj T, fla_qr2ut_t* cntl);
1168  typedef FLA_Error(*flash_lqut_macro_p)(FLA_Obj A, FLA_Obj T, fla_lqut_t* cntl);
1169  typedef FLA_Error(*flash_caqr2ut_p)(FLA_Obj B, FLA_Obj D, FLA_Obj T, fla_caqr2ut_t* cntl);
1170  typedef FLA_Error(*flash_uddateut_p)(FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, fla_uddateut_t* cntl);
1171  typedef FLA_Error(*flash_apqut_p)(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t* cntl);
1172  typedef FLA_Error(*flash_apq2ut_p)(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C, FLA_Obj E, fla_apq2ut_t* cntl);
1173  typedef FLA_Error(*flash_apcaq2ut_p)(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C, FLA_Obj E, fla_apcaq2ut_t* cntl);
1174  typedef FLA_Error(*flash_apqudut_p)(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj T, FLA_Obj W, FLA_Obj R, FLA_Obj U, FLA_Obj C, FLA_Obj V, FLA_Obj D, fla_apqudut_t* cntl);
1175  typedef FLA_Error(*flash_eig_gest_p)(FLA_Inv inv, FLA_Uplo uplo, FLA_Obj A, FLA_Obj Y, FLA_Obj B, fla_eig_gest_t* cntl);
1176 
1177  // Level-3 BLAS
1178  typedef FLA_Error(*flash_gemm_p)(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl);
1179  typedef FLA_Error(*flash_hemm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_hemm_t* cntl);
1180  typedef FLA_Error(*flash_herk_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_herk_t* cntl);
1181  typedef FLA_Error(*flash_her2k_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_her2k_t* cntl);
1182  typedef FLA_Error(*flash_symm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_symm_t* cntl);
1183  typedef FLA_Error(*flash_syrk_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t* cntl);
1184  typedef FLA_Error(*flash_syr2k_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_syr2k_t* cntl);
1185  typedef FLA_Error(*flash_trmm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj C, fla_trmm_t* cntl);
1186  typedef FLA_Error(*flash_trsm_p)(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj C, fla_trsm_t* cntl);
1187 
1188  // Level-2 BLAS
1189  typedef FLA_Error(*flash_gemv_p)(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y, fla_gemv_t* cntl);
1190  typedef FLA_Error(*flash_trsv_p)(FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x, fla_trsv_t* cntl);
1191 
1192  // Level-1 BLAS
1193  typedef FLA_Error(*flash_axpy_p)(FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t* cntl);
1194  typedef FLA_Error(*flash_axpyt_p)(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpyt_t* cntl);
1195  typedef FLA_Error(*flash_copy_p)(FLA_Obj A, FLA_Obj B, fla_copy_t* cntl);
1196  typedef FLA_Error(*flash_copyt_p)(FLA_Trans trans, FLA_Obj A, FLA_Obj B, fla_copyt_t* cntl);
1197  typedef FLA_Error(*flash_copyr_p)(FLA_Uplo uplo, FLA_Obj A, FLA_Obj B, fla_copyr_t* cntl);
1198  typedef FLA_Error(*flash_scal_p)(FLA_Obj alpha, FLA_Obj A, fla_scal_t* cntl);
1199  typedef FLA_Error(*flash_scalr_p)(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, fla_scalr_t* cntl);
1200 
1201  // Base
1202  typedef FLA_Error(*flash_obj_create_buffer_p)(dim_t rs, dim_t cs, FLA_Obj A, void* cntl);
1203  typedef FLA_Error(*flash_obj_free_buffer_p)(FLA_Obj A, void* cntl);
1204 
1205  // Only execute task if it is not NULL.
1206  if ( t == NULL )
1207  return;
1208 
1209  // Now "switch" between the various possible task functions.
1210 
1211  // FLA_LU_piv_macro
1212  if ( t->func == (void *) FLA_LU_piv_macro_task )
1213  {
1214  flash_lu_piv_macro_p func;
1215  func = (flash_lu_piv_macro_p) t->func;
1216 
1217  func( t->output_arg[0],
1218  t->output_arg[1],
1219  ( fla_lu_t* ) t->cntl );
1220  }
1221  // FLA_Apply_pivots_macro
1222  else if ( t->func == (void *) FLA_Apply_pivots_macro_task )
1223  {
1224  flash_apply_pivots_macro_p func;
1225  func = (flash_apply_pivots_macro_p) t->func;
1226 
1227  func( ( FLA_Side ) t->int_arg[0],
1228  ( FLA_Trans ) t->int_arg[1],
1229  t->input_arg[0],
1230  t->output_arg[0],
1231  ( fla_appiv_t* ) t->cntl );
1232  }
1233  // FLA_LU_piv
1234  else if ( t->func == (void *) FLA_LU_piv_task )
1235  {
1236  flash_lu_piv_p func;
1237  func = (flash_lu_piv_p) t->func;
1238 
1239  func( t->output_arg[0],
1240  t->fla_arg[0],
1241  ( fla_lu_t* ) t->cntl );
1242  }
1243  // FLA_LU_piv_copy
1244  else if ( t->func == (void *) FLA_LU_piv_copy_task )
1245  {
1246  flash_lu_piv_copy_p func;
1247  func = (flash_lu_piv_copy_p) t->func;
1248 
1249  func( t->output_arg[0],
1250  t->fla_arg[0],
1251  t->output_arg[1],
1252  ( fla_lu_t* ) t->cntl );
1253  }
1254  // FLA_Trsm_piv
1255  else if ( t->func == (void *) FLA_Trsm_piv_task )
1256  {
1257  flash_trsm_piv_p func;
1258  func = (flash_trsm_piv_p) t->func;
1259 
1260  func( t->input_arg[0],
1261  t->output_arg[0],
1262  t->fla_arg[0],
1263  ( fla_trsm_t* ) t->cntl );
1264  }
1265  // FLA_SA_LU
1266  else if ( t->func == (void *) FLA_SA_LU_task )
1267  {
1268  flash_sa_lu_p func;
1269  func = (flash_sa_lu_p) t->func;
1270 
1271  func( t->output_arg[1],
1272  t->output_arg[0],
1273  t->fla_arg[0],
1274  t->fla_arg[1],
1275  t->int_arg[0],
1276  ( fla_lu_t* ) t->cntl );
1277  }
1278  // FLA_SA_FS
1279  else if ( t->func == (void *) FLA_SA_FS_task )
1280  {
1281  flash_sa_fs_p func;
1282  func = (flash_sa_fs_p) t->func;
1283 
1284  func( t->fla_arg[0],
1285  t->input_arg[0],
1286  t->fla_arg[1],
1287  t->output_arg[1],
1288  t->output_arg[0],
1289  t->int_arg[0],
1290  ( fla_gemm_t* ) t->cntl );
1291  }
1292  // FLA_LU_nopiv
1293  else if ( t->func == (void *) FLA_LU_nopiv_task )
1294  {
1295  flash_lu_nopiv_p func;
1296  func = (flash_lu_nopiv_p) t->func;
1297 
1298  func( t->output_arg[0],
1299  ( fla_lu_t* ) t->cntl );
1300  }
1301  // FLA_Trinv
1302  else if ( t->func == (void *) FLA_Trinv_task )
1303  {
1304  flash_trinv_p func;
1305  func = (flash_trinv_p) t->func;
1306 
1307  func( ( FLA_Uplo ) t->int_arg[0],
1308  ( FLA_Diag ) t->int_arg[1],
1309  t->output_arg[0],
1310  ( fla_trinv_t* ) t->cntl );
1311  }
1312  // FLA_Ttmm
1313  else if ( t->func == (void *) FLA_Ttmm_task )
1314  {
1315  flash_ttmm_p func;
1316  func = (flash_ttmm_p) t->func;
1317 
1318  func( ( FLA_Uplo ) t->int_arg[0],
1319  t->output_arg[0],
1320  ( fla_ttmm_t* ) t->cntl );
1321  }
1322  // FLA_Chol
1323  else if ( t->func == (void *) FLA_Chol_task )
1324  {
1325  flash_chol_p func;
1326  func = (flash_chol_p) t->func;
1327 
1328  func( ( FLA_Uplo ) t->int_arg[0],
1329  t->output_arg[0],
1330  ( fla_chol_t* ) t->cntl );
1331  }
1332  // FLA_Sylv
1333  else if ( t->func == (void *) FLA_Sylv_task )
1334  {
1335  flash_sylv_p func;
1336  func = (flash_sylv_p) t->func;
1337 
1338  func( ( FLA_Trans ) t->int_arg[0],
1339  ( FLA_Trans ) t->int_arg[1],
1340  t->fla_arg[0],
1341  t->input_arg[0],
1342  t->input_arg[1],
1343  t->output_arg[0],
1344  t->fla_arg[1],
1345  ( fla_sylv_t* ) t->cntl );
1346  }
1347  // FLA_Lyap
1348  else if ( t->func == (void *) FLA_Lyap_task )
1349  {
1350  flash_lyap_p func;
1351  func = (flash_lyap_p) t->func;
1352 
1353  func( ( FLA_Trans ) t->int_arg[0],
1354  t->fla_arg[0],
1355  t->input_arg[0],
1356  t->output_arg[0],
1357  t->fla_arg[1],
1358  ( fla_lyap_t* ) t->cntl );
1359  }
1360  // FLA_QR_UT_macro
1361  else if ( t->func == (void *) FLA_QR_UT_macro_task )
1362  {
1363  flash_qrut_macro_p func;
1364  func = (flash_qrut_macro_p) t->func;
1365 
1366  func( t->output_arg[0],
1367  t->output_arg[1],
1368  ( fla_qrut_t* ) t->cntl );
1369  }
1370  // FLA_QR_UT
1371  else if ( t->func == (void *) FLA_QR_UT_task )
1372  {
1373  flash_qrut_p func;
1374  func = (flash_qrut_p) t->func;
1375 
1376  func( t->output_arg[0],
1377  t->fla_arg[0],
1378  ( fla_qrut_t* ) t->cntl );
1379  }
1380  // FLA_QR_UT_copy
1381  else if ( t->func == (void *) FLA_QR_UT_copy_task )
1382  {
1383  flash_qrutc_p func;
1384  func = (flash_qrutc_p) t->func;
1385 
1386  func( t->output_arg[0],
1387  t->fla_arg[0],
1388  t->output_arg[1],
1389  ( fla_qrut_t* ) t->cntl );
1390  }
1391  // FLA_QR2_UT
1392  else if ( t->func == (void *) FLA_QR2_UT_task )
1393  {
1394  flash_qr2ut_p func;
1395  func = (flash_qr2ut_p) t->func;
1396 
1397  func( t->output_arg[1],
1398  t->output_arg[0],
1399  t->fla_arg[0],
1400  ( fla_qr2ut_t* ) t->cntl );
1401  }
1402  // FLA_LQ_UT_macro
1403  else if ( t->func == (void *) FLA_LQ_UT_macro_task )
1404  {
1405  flash_lqut_macro_p func;
1406  func = (flash_lqut_macro_p) t->func;
1407 
1408  func( t->output_arg[0],
1409  t->output_arg[1],
1410  ( fla_lqut_t* ) t->cntl );
1411  }
1412  // FLA_CAQR2_UT
1413  else if ( t->func == (void *) FLA_CAQR2_UT_task )
1414  {
1415  flash_caqr2ut_p func;
1416  func = (flash_caqr2ut_p) t->func;
1417 
1418  func( t->output_arg[1],
1419  t->output_arg[0],
1420  t->fla_arg[0],
1421  ( fla_caqr2ut_t* ) t->cntl );
1422  }
1423  // FLA_UDdate_UT
1424  else if ( t->func == (void *) FLA_UDdate_UT_task )
1425  {
1426  flash_uddateut_p func;
1427  func = (flash_uddateut_p) t->func;
1428 
1429  func( t->output_arg[0],
1430  t->output_arg[1],
1431  t->output_arg[2],
1432  t->output_arg[3],
1433  ( fla_uddateut_t* ) t->cntl );
1434  }
1435  // FLA_Apply_Q_UT
1436  else if ( t->func == (void *) FLA_Apply_Q_UT_task )
1437  {
1438  flash_apqut_p func;
1439  func = (flash_apqut_p) t->func;
1440 
1441  func( ( FLA_Side ) t->int_arg[0],
1442  ( FLA_Trans ) t->int_arg[1],
1443  ( FLA_Direct ) t->int_arg[2],
1444  ( FLA_Store ) t->int_arg[3],
1445  t->input_arg[0],
1446  t->fla_arg[0],
1447  t->output_arg[1],
1448  t->output_arg[0],
1449  ( fla_apqut_t* ) t->cntl );
1450  }
1451  // FLA_Apply_Q2_UT
1452  else if ( t->func == (void *) FLA_Apply_Q2_UT_task )
1453  {
1454  flash_apq2ut_p func;
1455  func = (flash_apq2ut_p) t->func;
1456 
1457  func( ( FLA_Side ) t->int_arg[0],
1458  ( FLA_Trans ) t->int_arg[1],
1459  ( FLA_Direct ) t->int_arg[2],
1460  ( FLA_Store ) t->int_arg[3],
1461  t->input_arg[0],
1462  t->fla_arg[0],
1463  t->output_arg[2],
1464  t->output_arg[1],
1465  t->output_arg[0],
1466  ( fla_apq2ut_t* ) t->cntl );
1467  }
1468  // FLA_Apply_CAQ2_UT
1469  else if ( t->func == (void *) FLA_Apply_CAQ2_UT_task )
1470  {
1471  flash_apcaq2ut_p func;
1472  func = (flash_apcaq2ut_p) t->func;
1473 
1474  func( ( FLA_Side ) t->int_arg[0],
1475  ( FLA_Trans ) t->int_arg[1],
1476  ( FLA_Direct ) t->int_arg[2],
1477  ( FLA_Store ) t->int_arg[3],
1478  t->input_arg[0],
1479  t->fla_arg[0],
1480  t->output_arg[2],
1481  t->output_arg[1],
1482  t->output_arg[0],
1483  ( fla_apcaq2ut_t* ) t->cntl );
1484  }
1485  // FLA_Apply_QUD_UT
1486  else if ( t->func == (void *) FLA_Apply_QUD_UT_task )
1487  {
1488  flash_apqudut_p func;
1489  func = (flash_apqudut_p) t->func;
1490 
1491  func( ( FLA_Side ) t->int_arg[0],
1492  ( FLA_Trans ) t->int_arg[1],
1493  ( FLA_Direct ) t->int_arg[2],
1494  ( FLA_Store ) t->int_arg[3],
1495  t->input_arg[0],
1496  t->output_arg[0],
1497  t->output_arg[1],
1498  t->input_arg[1],
1499  t->output_arg[2],
1500  t->input_arg[2],
1501  t->output_arg[3],
1502  ( fla_apqudut_t* ) t->cntl );
1503  }
1504  // FLA_Eig_gest
1505  else if ( t->func == (void *) FLA_Eig_gest_task )
1506  {
1507  flash_eig_gest_p func;
1508  func = (flash_eig_gest_p) t->func;
1509 
1510  func( ( FLA_Inv ) t->int_arg[0],
1511  ( FLA_Uplo ) t->int_arg[1],
1512  t->output_arg[1],
1513  t->output_arg[0],
1514  t->input_arg[0],
1515  ( fla_eig_gest_t* ) t->cntl );
1516  }
1517  // FLA_Gemm
1518  else if ( t->func == (void *) FLA_Gemm_task )
1519  {
1520  flash_gemm_p func;
1521  func = (flash_gemm_p) t->func;
1522 
1523  func( ( FLA_Trans ) t->int_arg[0],
1524  ( FLA_Trans ) t->int_arg[1],
1525  t->fla_arg[0],
1526  t->input_arg[0],
1527  t->input_arg[1],
1528  t->fla_arg[1],
1529  t->output_arg[0],
1530  ( fla_gemm_t* ) t->cntl );
1531  }
1532  // FLA_Hemm
1533  else if ( t->func == (void *) FLA_Hemm_task )
1534  {
1535  flash_hemm_p func;
1536  func = (flash_hemm_p) t->func;
1537 
1538  func( ( FLA_Side ) t->int_arg[0],
1539  ( FLA_Uplo ) t->int_arg[1],
1540  t->fla_arg[0],
1541  t->input_arg[0],
1542  t->input_arg[1],
1543  t->fla_arg[1],
1544  t->output_arg[0],
1545  ( fla_hemm_t* ) t->cntl );
1546  }
1547  // FLA_Herk
1548  else if ( t->func == (void *) FLA_Herk_task )
1549  {
1550  flash_herk_p func;
1551  func = (flash_herk_p) t->func;
1552 
1553  func( ( FLA_Uplo ) t->int_arg[0],
1554  ( FLA_Trans ) t->int_arg[1],
1555  t->fla_arg[0],
1556  t->input_arg[0],
1557  t->fla_arg[1],
1558  t->output_arg[0],
1559  ( fla_herk_t* ) t->cntl );
1560  }
1561  // FLA_Her2k
1562  else if ( t->func == (void *) FLA_Her2k_task )
1563  {
1564  flash_her2k_p func;
1565  func = (flash_her2k_p) t->func;
1566 
1567  func( ( FLA_Uplo ) t->int_arg[0],
1568  ( FLA_Trans ) t->int_arg[1],
1569  t->fla_arg[0],
1570  t->input_arg[0],
1571  t->input_arg[1],
1572  t->fla_arg[1],
1573  t->output_arg[0],
1574  ( fla_her2k_t* ) t->cntl );
1575  }
1576  // FLA_Symm
1577  else if ( t->func == (void *) FLA_Symm_task )
1578  {
1579  flash_symm_p func;
1580  func = (flash_symm_p) t->func;
1581 
1582  func( ( FLA_Side ) t->int_arg[0],
1583  ( FLA_Uplo ) t->int_arg[1],
1584  t->fla_arg[0],
1585  t->input_arg[0],
1586  t->input_arg[1],
1587  t->fla_arg[1],
1588  t->output_arg[0],
1589  ( fla_symm_t* ) t->cntl );
1590  }
1591  // FLA_Syrk
1592  else if ( t->func == (void *) FLA_Syrk_task )
1593  {
1594  flash_syrk_p func;
1595  func = (flash_syrk_p) t->func;
1596 
1597  func( ( FLA_Uplo ) t->int_arg[0],
1598  ( FLA_Trans ) t->int_arg[1],
1599  t->fla_arg[0],
1600  t->input_arg[0],
1601  t->fla_arg[1],
1602  t->output_arg[0],
1603  ( fla_syrk_t* ) t->cntl );
1604  }
1605  // FLA_Syr2k
1606  else if ( t->func == (void *) FLA_Syr2k_task )
1607  {
1608  flash_syr2k_p func;
1609  func = (flash_syr2k_p) t->func;
1610 
1611  func( ( FLA_Uplo ) t->int_arg[0],
1612  ( FLA_Trans ) t->int_arg[1],
1613  t->fla_arg[0],
1614  t->input_arg[0],
1615  t->input_arg[1],
1616  t->fla_arg[1],
1617  t->output_arg[0],
1618  ( fla_syr2k_t* ) t->cntl );
1619  }
1620  // FLA_Trmm
1621  else if ( t->func == (void *) FLA_Trmm_task )
1622  {
1623  flash_trmm_p func;
1624  func = (flash_trmm_p) t->func;
1625 
1626  func( ( FLA_Side ) t->int_arg[0],
1627  ( FLA_Uplo ) t->int_arg[1],
1628  ( FLA_Trans ) t->int_arg[2],
1629  ( FLA_Diag ) t->int_arg[3],
1630  t->fla_arg[0],
1631  t->input_arg[0],
1632  t->output_arg[0],
1633  ( fla_trmm_t* ) t->cntl );
1634  }
1635  // FLA_Trsm
1636  else if ( t->func == (void *) FLA_Trsm_task )
1637  {
1638  flash_trsm_p func;
1639  func = (flash_trsm_p) t->func;
1640 
1641  func( ( FLA_Side ) t->int_arg[0],
1642  ( FLA_Uplo ) t->int_arg[1],
1643  ( FLA_Trans ) t->int_arg[2],
1644  ( FLA_Diag ) t->int_arg[3],
1645  t->fla_arg[0],
1646  t->input_arg[0],
1647  t->output_arg[0],
1648  ( fla_trsm_t* ) t->cntl );
1649  }
1650  // FLA_Gemv
1651  else if ( t->func == (void *) FLA_Gemv_task )
1652  {
1653  flash_gemv_p func;
1654  func = (flash_gemv_p) t->func;
1655 
1656  func( ( FLA_Trans ) t->int_arg[0],
1657  t->fla_arg[0],
1658  t->input_arg[0],
1659  t->input_arg[1],
1660  t->fla_arg[1],
1661  t->output_arg[0],
1662  ( fla_gemv_t* ) t->cntl );
1663  }
1664  // FLA_Trsv
1665  else if ( t->func == (void *) FLA_Trsv_task )
1666  {
1667  flash_trsv_p func;
1668  func = (flash_trsv_p) t->func;
1669 
1670  func( ( FLA_Uplo ) t->int_arg[0],
1671  ( FLA_Trans ) t->int_arg[1],
1672  ( FLA_Diag ) t->int_arg[2],
1673  t->input_arg[0],
1674  t->output_arg[0],
1675  ( fla_trsv_t* ) t->cntl );
1676  }
1677  // FLA_Axpy
1678  else if ( t->func == (void *) FLA_Axpy_task )
1679  {
1680  flash_axpy_p func;
1681  func = (flash_axpy_p) t->func;
1682 
1683  func( t->fla_arg[0],
1684  t->input_arg[0],
1685  t->output_arg[0],
1686  ( fla_axpy_t* ) t->cntl );
1687  }
1688  // FLA_Axpyt
1689  else if ( t->func == (void *) FLA_Axpyt_task )
1690  {
1691  flash_axpyt_p func;
1692  func = (flash_axpyt_p) t->func;
1693 
1694  func( ( FLA_Trans ) t->int_arg[0],
1695  t->fla_arg[0],
1696  t->input_arg[0],
1697  t->output_arg[0],
1698  ( fla_axpyt_t* ) t->cntl );
1699  }
1700  // FLA_Copy
1701  else if ( t->func == (void *) FLA_Copy_task )
1702  {
1703  flash_copy_p func;
1704  func = (flash_copy_p) t->func;
1705 
1706  func( t->input_arg[0],
1707  t->output_arg[0],
1708  ( fla_copy_t* ) t->cntl );
1709  }
1710  // FLA_Copyt
1711  else if ( t->func == (void *) FLA_Copyt_task )
1712  {
1713  flash_copyt_p func;
1714  func = (flash_copyt_p) t->func;
1715 
1716  func( ( FLA_Trans ) t->int_arg[0],
1717  t->input_arg[0],
1718  t->output_arg[0],
1719  ( fla_copyt_t* ) t->cntl );
1720  }
1721  // FLA_Copyr
1722  else if ( t->func == (void *) FLA_Copyr_task )
1723  {
1724  flash_copyr_p func;
1725  func = (flash_copyr_p) t->func;
1726 
1727  func( ( FLA_Uplo ) t->int_arg[0],
1728  t->input_arg[0],
1729  t->output_arg[0],
1730  ( fla_copyr_t* ) t->cntl );
1731  }
1732  // FLA_Scal
1733  else if ( t->func == (void *) FLA_Scal_task )
1734  {
1735  flash_scal_p func;
1736  func = (flash_scal_p) t->func;
1737 
1738  func( t->fla_arg[0],
1739  t->output_arg[0],
1740  ( fla_scal_t* ) t->cntl );
1741  }
1742  // FLA_Scalr
1743  else if ( t->func == (void *) FLA_Scalr_task )
1744  {
1745  flash_scalr_p func;
1746  func = (flash_scalr_p) t->func;
1747 
1748  func( ( FLA_Uplo ) t->int_arg[0],
1749  t->fla_arg[0],
1750  t->output_arg[0],
1751  ( fla_scalr_t* ) t->cntl );
1752  }
1753  // FLA_Obj_create_buffer
1754  else if ( t->func == (void *) FLA_Obj_create_buffer_task )
1755  {
1756  flash_obj_create_buffer_p func;
1757  func = (flash_obj_create_buffer_p) t->func;
1758 
1759  func( ( dim_t ) t->int_arg[0],
1760  ( dim_t ) t->int_arg[1],
1761  t->output_arg[0],
1762  t->cntl );
1763  }
1764  // FLA_Obj_free_buffer
1765  else if ( t->func == (void *) FLA_Obj_free_buffer_task )
1766  {
1767  flash_obj_free_buffer_p func;
1768  func = (flash_obj_free_buffer_p) t->func;
1769 
1770  func( t->output_arg[0],
1771  t->cntl );
1772  }
1773  else
1774  {
1775  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
1776  }
1777 
1778  return;
1779 }
FLA_Error FLA_Copyt_task(FLA_Trans trans, FLA_Obj A, FLA_Obj B, fla_copyt_t *cntl)
Definition: FLA_Copyt_task.c:13
FLA_Error FLA_Gemm_task(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition: FLA_Gemm_task.c:13
int * int_arg
Definition: FLA_type_defs.h:210
Definition: FLA_Cntl_lapack.h:42
FLA_Error FLA_SA_LU_task(FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t *cntl)
Definition: FLA_SA_LU_task.c:13
FLA_Error FLA_Gemv_task(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y, fla_gemv_t *cntl)
Definition: FLA_Gemv_task.c:13
Definition: FLA_Cntl_blas1.h:77
Definition: FLA_Cntl_lapack.h:105
FLA_Error FLA_Apply_pivots_macro_task(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A, fla_appiv_t *cntl)
Definition: FLA_Apply_pivots_macro_task.c:15
FLA_Error FLA_QR_UT_task(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_task.c:15
Definition: FLA_Cntl_blas3.h:65
Definition: FLA_Cntl_blas3.h:90
unsigned long dim_t
Definition: FLA_type_defs.h:71
Definition: FLA_Cntl_lapack.h:162
FLA_Error FLA_Lyap_task(FLA_Trans trans, FLA_Obj isgn, FLA_Obj A, FLA_Obj C, FLA_Obj scale, fla_lyap_t *cntl)
Definition: FLA_Lyap_task.c:15
Definition: FLA_Cntl_lapack.h:80
Definition: FLA_Cntl_blas3.h:27
Definition: FLA_Cntl_blas3.h:115
Definition: FLA_Cntl_blas3.h:40
FLA_Error FLA_SA_FS_task(FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg, fla_gemm_t *cntl)
Definition: FLA_SA_FS_task.c:13
FLA_Error FLA_Eig_gest_task(FLA_Inv inv, FLA_Uplo uplo, FLA_Obj A, FLA_Obj Y, FLA_Obj B, fla_eig_gest_t *cntl)
Definition: FLA_Eig_gest_task.c:16
Definition: FLA_Cntl_blas3.h:103
FLA_Error FLA_LU_piv_copy_task(FLA_Obj A, FLA_Obj p, FLA_Obj U, fla_lu_t *cntl)
Definition: FLA_LU_piv_copy_task.c:13
FLA_Error FLA_LU_piv_task(FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
Definition: FLA_LU_piv_task.c:15
FLA_Error FLA_CAQR2_UT_task(FLA_Obj B, FLA_Obj D, FLA_Obj T, fla_caqr2ut_t *cntl)
Definition: FLA_CAQR2_UT_task.c:15
Definition: FLA_Cntl_blas1.h:26
FLA_Error FLA_Axpyt_task(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpyt_t *cntl)
Definition: FLA_Axpyt_task.c:13
FLA_Error FLA_Herk_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_herk_t *cntl)
Definition: FLA_Herk_task.c:13
FLA_Error FLA_Hemm_task(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_hemm_t *cntl)
Definition: FLA_Hemm_task.c:13
Definition: FLA_Cntl_lapack.h:16
FLA_Error FLA_Symm_task(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_symm_t *cntl)
Definition: FLA_Symm_task.c:13
FLA_Error FLA_UDdate_UT_task(FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, fla_uddateut_t *cntl)
Definition: FLA_UDdate_UT_task.c:15
void * cntl
Definition: FLA_type_defs.h:200
Definition: FLA_Cntl_blas1.h:46
FLA_Error FLA_QR_UT_copy_task(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t *cntl)
Definition: FLA_QR_UT_copy_task.c:15
int FLA_Direct
Definition: FLA_type_defs.h:58
FLA_Error FLA_LU_nopiv_task(FLA_Obj A, fla_lu_t *cntl)
Definition: FLA_LU_nopiv_task.c:15
int FLA_Diag
Definition: FLA_type_defs.h:55
Definition: FLA_Cntl_blas1.h:56
Definition: FLA_Cntl_lapack.h:317
FLA_Error FLA_Syr2k_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_syr2k_t *cntl)
Definition: FLA_Syr2k_task.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Error FLA_Apply_Q_UT_task(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t *cntl)
Definition: FLA_Apply_Q_UT_task.c:15
FLA_Error FLA_Trsm_task(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t *cntl)
Definition: FLA_Trsm_task.c:13
FLA_Error FLA_Scal_task(FLA_Obj alpha, FLA_Obj A, fla_scal_t *cntl)
Definition: FLA_Scal_task.c:13
Definition: FLA_Cntl_lapack.h:210
Definition: FLA_Cntl_lapack.h:95
FLA_Error FLA_Trinv_task(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A, fla_trinv_t *cntl)
Definition: FLA_Trinv_task.c:15
Definition: FLA_Cntl_blas1.h:67
Definition: FLA_type_defs.h:158
Definition: FLA_Cntl_blas2.h:26
int FLA_Store
Definition: FLA_type_defs.h:59
FLA_Error FLA_Trsm_piv_task(FLA_Obj A, FLA_Obj B, FLA_Obj p, fla_trsm_t *cntl)
Definition: FLA_Trsm_piv_task.c:13
FLA_Error FLA_LU_piv_macro_task(FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
Definition: FLA_LU_piv_macro_task.c:13
FLA_Error FLA_Sylv_task(FLA_Trans transa, FLA_Trans transb, FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
Definition: FLA_Sylv_task.c:15
Definition: FLA_Cntl_lapack.h:227
FLA_Error FLA_Copyr_task(FLA_Uplo uplo, FLA_Obj A, FLA_Obj B, fla_copyr_t *cntl)
Definition: FLA_Copyr_task.c:13
Definition: FLA_Cntl_blas3.h:78
FLA_Error FLA_Apply_CAQ2_UT_task(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C, FLA_Obj E, fla_apcaq2ut_t *cntl)
Definition: FLA_Apply_CAQ2_UT_task.c:15
FLA_Error FLA_Obj_free_buffer_task(FLA_Obj obj, void *cntl)
Definition: FLA_Obj_free_buffer_task.c:13
FLA_Error FLA_Apply_Q2_UT_task(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C, FLA_Obj E, fla_apq2ut_t *cntl)
Definition: FLA_Apply_Q2_UT_task.c:15
FLA_Error FLA_Axpy_task(FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t *cntl)
Definition: FLA_Axpy_task.c:13
FLA_Error FLA_QR_UT_macro_task(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_macro_task.c:15
FLA_Error FLA_QR2_UT_task(FLA_Obj B, FLA_Obj D, FLA_Obj T, fla_qr2ut_t *cntl)
Definition: FLA_QR2_UT_task.c:15
FLA_Error FLA_Trsv_task(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, FLA_Obj x, fla_trsv_t *cntl)
Definition: FLA_Trsv_task.c:13
Definition: FLA_Cntl_lapack.h:148
Definition: FLA_Cntl_lapack.h:263
FLA_Obj * output_arg
Definition: FLA_type_defs.h:222
Definition: FLA_Cntl_lapack.h:182
int FLA_Trans
Definition: FLA_type_defs.h:53
FLA_Error FLA_Apply_QUD_UT_task(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj T, FLA_Obj W, FLA_Obj R, FLA_Obj U, FLA_Obj C, FLA_Obj V, FLA_Obj D, fla_apqudut_t *cntl)
Definition: FLA_Apply_QUD_UT_task.c:15
FLA_Error FLA_Obj_create_buffer_task(dim_t rs, dim_t cs, FLA_Obj obj, void *cntl)
Definition: FLA_Obj_create_buffer_task.c:13
int FLA_Uplo
Definition: FLA_type_defs.h:52
int FLA_Side
Definition: FLA_type_defs.h:51
int FLA_Inv
Definition: FLA_type_defs.h:63
Definition: FLA_Cntl_lapack.h:29
Definition: FLA_Cntl_blas3.h:52
Definition: FLA_Cntl_blas1.h:16
FLA_Obj * input_arg
Definition: FLA_type_defs.h:218
Definition: FLA_Cntl_lapack.h:69
Definition: FLA_Cntl_lapack.h:306
FLA_Error FLA_Copy_task(FLA_Obj A, FLA_Obj B, fla_copy_t *cntl)
Definition: FLA_Copy_task.c:13
Definition: FLA_Cntl_blas1.h:36
Definition: FLA_Cntl_blas3.h:16
FLA_Error FLA_Syrk_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t *cntl)
Definition: FLA_Syrk_task.c:13
Definition: FLA_Cntl_blas2.h:16
FLA_Obj * fla_arg
Definition: FLA_type_defs.h:214
FLA_Error FLA_Scalr_task(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, fla_scalr_t *cntl)
Definition: FLA_Scalr_task.c:13
FLA_Error FLA_LQ_UT_macro_task(FLA_Obj A, FLA_Obj T, fla_lqut_t *cntl)
Definition: FLA_LQ_UT_macro_task.c:15
FLA_Error FLA_Trmm_task(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trmm_t *cntl)
Definition: FLA_Trmm_task.c:13
void * func
Definition: FLA_type_defs.h:197
FLA_Error FLA_Ttmm_task(FLA_Uplo uplo, FLA_Obj A, fla_ttmm_t *cntl)
Definition: FLA_Ttmm_task.c:15
FLA_Error FLA_Chol_task(FLA_Uplo uplo, FLA_Obj A, fla_chol_t *cntl)
Definition: FLA_Chol_task.c:15
Definition: FLA_Cntl_lapack.h:52
Definition: FLA_Cntl_lapack.h:355
FLA_Error FLA_Her2k_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_her2k_t *cntl)
Definition: FLA_Her2k_task.c:13

◆ FLASH_Queue_finalize()

void FLASH_Queue_finalize ( void  )
268 {
269  // Exit early if we're not already initialized.
270  if ( flash_queue_initialized == FALSE )
271  return;
272 
273  // Clear the initialized flag.
274  flash_queue_initialized = FALSE;
275 
276 #ifdef FLA_ENABLE_GPU
278 #endif
279 
280  return;
281 }
void FLASH_Queue_finalize_gpu(void)
Definition: FLASH_Queue_gpu.c:36

◆ FLASH_Queue_get_block_size()

dim_t FLASH_Queue_get_block_size ( void  )

Referenced by FLASH_Queue_exec().

482 {
483  return flash_queue_block_size;
484 }

◆ FLASH_Queue_get_cache_line_size()

dim_t FLASH_Queue_get_cache_line_size ( void  )

Referenced by FLASH_Queue_prefetch_block().

530 {
531  return flash_queue_cache_line_size;
532 }

◆ FLASH_Queue_get_cache_size()

dim_t FLASH_Queue_get_cache_size ( void  )

Referenced by FLASH_Queue_exec().

506 {
507  return flash_queue_cache_size;
508 }

◆ FLASH_Queue_get_caching()

FLA_Bool FLASH_Queue_get_caching ( void  )

Referenced by FLASH_Queue_exec(), FLASH_Queue_exec_parallel_function(), FLASH_Queue_wait_dequeue(), and FLASH_Task_update_dependencies().

362 {
363  return flash_queue_caching;
364 }

◆ FLASH_Queue_get_cores_per_cache()

int FLASH_Queue_get_cores_per_cache ( void  )

Referenced by FLASH_Queue_exec(), FLASH_Queue_exec_parallel_function(), and FLASH_Queue_exec_simulation().

554 {
555  return flash_queue_cores_per_cache;
556 }

◆ FLASH_Queue_get_cores_per_queue()

int FLASH_Queue_get_cores_per_queue ( void  )

Referenced by FLASH_Queue_exec().

578 {
579  return flash_queue_cores_per_queue;
580 }

◆ FLASH_Queue_get_data_affinity()

FLASH_Data_aff FLASH_Queue_get_data_affinity ( void  )

Referenced by FLASH_Queue_exec(), FLASH_Queue_init_tasks(), and FLASH_Queue_verbose_output().

410 {
411  return flash_queue_data_affinity;
412 }

◆ FLASH_Queue_get_enabled()

FLA_Bool FLASH_Queue_get_enabled ( void  )

◆ FLASH_Queue_get_head_task()

FLASH_Task* FLASH_Queue_get_head_task ( void  )

References FLASH_Queue_s::head.

Referenced by FLASH_Queue_init_tasks(), and FLASH_Queue_verbose_output().

609 {
610  return _tq.head;
611 }
FLASH_Queue _tq
Definition: FLASH_Queue.c:27
FLASH_Task * head
Definition: FLA_type_defs.h:179

◆ FLASH_Queue_get_num_tasks()

unsigned int FLASH_Queue_get_num_tasks ( void  )

References FLASH_Queue_s::n_tasks.

Referenced by FLASH_Queue_exec(), FLASH_Queue_exec_parallel_function(), FLASH_Queue_exec_simulation(), FLASH_Queue_init_tasks(), and FLASH_Queue_verbose_output().

290 {
291  return _tq.n_tasks;
292 }
FLASH_Queue _tq
Definition: FLASH_Queue.c:27
unsigned int n_tasks
Definition: FLA_type_defs.h:176

◆ FLASH_Queue_get_num_threads()

unsigned int FLASH_Queue_get_num_threads ( void  )

◆ FLASH_Queue_get_parallel_time()

double FLASH_Queue_get_parallel_time ( void  )
436 {
437  // Only return time if out of parallel region.
438  if ( flash_queue_stack == 0 )
439  return flash_queue_parallel_time;
440 
441  return 0.0;
442 }

◆ FLASH_Queue_get_sorting()

FLA_Bool FLASH_Queue_get_sorting ( void  )

Referenced by FLASH_Queue_wait_enqueue(), and FLASH_Task_update_binding().

338 {
339  return flash_queue_sorting;
340 }

◆ FLASH_Queue_get_tail_task()

FLASH_Task* FLASH_Queue_get_tail_task ( void  )

References FLASH_Queue_s::tail.

Referenced by FLASH_Queue_init_tasks().

620 {
621  return _tq.tail;
622 }
FLASH_Queue _tq
Definition: FLASH_Queue.c:27
FLASH_Task * tail
Definition: FLA_type_defs.h:180

◆ FLASH_Queue_get_total_time()

double FLASH_Queue_get_total_time ( void  )
421 {
422  // Only return time if out of parallel region.
423  if ( flash_queue_stack == 0 )
424  return flash_queue_total_time;
425 
426  return 0.0;
427 }

◆ FLASH_Queue_get_verbose_output()

FLASH_Verbose FLASH_Queue_get_verbose_output ( void  )

Referenced by FLASH_Queue_exec(), FLASH_Queue_exec_simulation(), and FLASH_Queue_verbose_output().

314 {
315  return flash_queue_verbose;
316 }

◆ FLASH_Queue_get_work_stealing()

FLA_Bool FLASH_Queue_get_work_stealing ( void  )

Referenced by FLASH_Queue_exec(), FLASH_Queue_exec_parallel_function(), and FLASH_Task_update_dependencies().

386 {
387  return flash_queue_work_stealing;
388 }

◆ FLASH_Queue_init()

void FLASH_Queue_init ( void  )
243 {
244  // Exit early if we're already initialized.
245  if ( flash_queue_initialized == TRUE )
246  return;
247 
248  // Reset all the initial values.
250 
251  // Set the initialized flag.
252  flash_queue_initialized = TRUE;
253 
254 #ifdef FLA_ENABLE_GPU
256 #endif
257 
258  return;
259 }
void FLASH_Queue_reset(void)
Definition: FLASH_Queue.c:583
void FLASH_Queue_init_gpu(void)
Definition: FLASH_Queue_gpu.c:23

◆ FLASH_Queue_push()

void FLASH_Queue_push ( void *  func,
void *  cntl,
char *  name,
FLA_Bool  enabled_gpu,
int  n_int_args,
int  n_fla_args,
int  n_input_args,
int  n_output_args,
  ... 
)

References FLA_Obj_view::base, FLASH_Task_s::fla_arg, FLA_Obj_col_stride(), FLA_Obj_elemtype(), FLA_Obj_length(), FLA_Obj_width(), FLASH_Queue_push_input(), FLASH_Task_alloc(), i, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_macro_args, FLASH_Task_s::output_arg, FLASH_Task_s::queue, and FLA_Obj_struct::write_task.

639 {
640  int i;
641  va_list var_arg_list;
642  FLASH_Task* t;
643  FLA_Obj obj;
644 
645  // Allocate a new FLA_Task and populate its fields with appropriate values.
646  t = FLASH_Task_alloc( func, cntl, name, enabled_gpu,
647  n_int_args, n_fla_args,
648  n_input_args, n_output_args );
649 
650  // Initialize variable argument environment. In case you're wondering, the
651  // second argument in this macro invocation of va_start() is supposed to be
652  // the parameter that immediately preceeds the variable argument list
653  // (ie: the ... above ).
654  va_start( var_arg_list, n_output_args );
655 
656  // Extract the integer arguments.
657  for ( i = 0; i < n_int_args; i++ )
658  t->int_arg[i] = va_arg( var_arg_list, int );
659 
660  // Extract the FLA_Obj arguments.
661  for ( i = 0; i < n_fla_args; i++ )
662  t->fla_arg[i] = va_arg( var_arg_list, FLA_Obj );
663 
664  // Extract the input FLA_Obj arguments.
665  for ( i = 0; i < n_input_args; i++ )
666  {
667  obj = va_arg( var_arg_list, FLA_Obj );
668  t->input_arg[i] = obj;
669 
670  // Macroblock is used.
671  if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX )
672  {
673  dim_t jj, kk;
674  dim_t m = FLA_Obj_length( obj );
675  dim_t n = FLA_Obj_width( obj );
676  dim_t cs = FLA_Obj_col_stride( obj );
677  FLA_Obj* buf = FLASH_OBJ_PTR_AT( obj );
678 
679  // Dependence analysis for each input block in macroblock.
680  for ( jj = 0; jj < n; jj++ )
681  for ( kk = 0; kk < m; kk++ )
682  FLASH_Queue_push_input( *( buf + jj * cs + kk ), t );
683 
684  // Set the number of blocks in the macroblock subtracted by one
685  // since we do not want to recount an operand for each n_input_arg.
686  t->n_macro_args += m * n - 1;
687  }
688  else // Regular block.
689  {
690  // Dependence analysis for input operand.
691  FLASH_Queue_push_input( obj, t );
692  }
693  }
694 
695  // Extract the output FLA_Obj arguments.
696  for ( i = 0; i < n_output_args; i++ )
697  {
698  obj = va_arg( var_arg_list, FLA_Obj );
699  t->output_arg[i] = obj;
700 
701  // Only assign data affinity to the first output block.
702  if ( i == 0 )
703  {
704  FLA_Obj buf = obj;
705 
706  // Use the top left block of the macroblock.
707  if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX )
708  buf = *FLASH_OBJ_PTR_AT( obj );
709 
710  if ( buf.base->write_task == NULL )
711  t->queue = flash_queue_n_write_blocks;
712  else
713  t->queue = buf.base->write_task->queue;
714  }
715 
716  // Macroblock is used.
717  if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX )
718  {
719  dim_t jj, kk;
720  dim_t m = FLA_Obj_length( obj );
721  dim_t n = FLA_Obj_width( obj );
722  dim_t cs = FLA_Obj_col_stride( obj );
723  FLA_Obj* buf = FLASH_OBJ_PTR_AT( obj );
724 
725  // Dependence analysis for each output block in macroblock.
726  for ( jj = 0; jj < n; jj++ )
727  for ( kk = 0; kk < m; kk++ )
728  FLASH_Queue_push_output( *( buf + jj * cs + kk ), t );
729 
730  // Set the number of blocks in the macroblock subtracted by one
731  // since we do not want to recount an operand for each n_output_arg.
732  t->n_macro_args += m * n - 1;
733  }
734  else // Regular block.
735  {
736  // Dependence analysis for output operand.
737  FLASH_Queue_push_output( obj, t );
738  }
739  }
740 
741  // Finalize the variable argument environment.
742  va_end( var_arg_list );
743 
744  // Add the task to the tail of the queue (and the head if queue is empty).
745  if ( _tq.n_tasks == 0 )
746  {
747  _tq.head = t;
748  _tq.tail = t;
749  }
750  else
751  {
752  t->prev_task = _tq.tail;
753  _tq.tail->next_task = t;
754  _tq.tail = t;
755 
756  // Determine the index of the task in the task queue.
757  t->order = t->prev_task->order + 1;
758  }
759 
760  // Increment the number of tasks.
761  _tq.n_tasks++;
762 
763  return;
764 }
int * int_arg
Definition: FLA_type_defs.h:210
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLASH_Queue _tq
Definition: FLASH_Queue.c:27
FLASH_Task * next_task
Definition: FLA_type_defs.h:237
FLA_Base_obj * base
Definition: FLA_type_defs.h:168
void FLASH_Queue_push_output(FLA_Obj obj, FLASH_Task *t)
Definition: FLASH_Queue.c:842
Definition: FLA_type_defs.h:183
FLASH_Task * tail
Definition: FLA_type_defs.h:180
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLASH_Task * head
Definition: FLA_type_defs.h:179
int queue
Definition: FLA_type_defs.h:190
FLA_Obj * output_arg
Definition: FLA_type_defs.h:222
int n_macro_args
Definition: FLA_type_defs.h:225
int order
Definition: FLA_type_defs.h:189
FLASH_Task * prev_task
Definition: FLA_type_defs.h:236
FLASH_Task * write_task
Definition: FLA_type_defs.h:154
void FLASH_Queue_push_input(FLA_Obj obj, FLASH_Task *t)
Definition: FLASH_Queue.c:767
FLA_Obj * input_arg
Definition: FLA_type_defs.h:218
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
int i
Definition: bl1_axmyv2.c:145
FLA_Obj * fla_arg
Definition: FLA_type_defs.h:214
unsigned int n_tasks
Definition: FLA_type_defs.h:176
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLASH_Task * FLASH_Task_alloc(void *func, void *cntl, char *name, FLA_Bool enabled_gpu, int n_int_args, int n_fla_args, int n_input_args, int n_output_args)
Definition: FLASH_Queue.c:956
FLA_Elemtype FLA_Obj_elemtype(FLA_Obj obj)
Definition: FLA_Query.c:51

◆ FLASH_Queue_push_input()

void FLASH_Queue_push_input ( FLA_Obj  obj,
FLASH_Task t 
)

References FLA_Obj_view::base, FLA_Obj_struct::n_read_blocks, FLA_Obj_struct::n_read_tasks, FLASH_Task_s::n_ready, and FLA_Obj_struct::write_task.

Referenced by FLASH_Queue_push().

774 {
775  FLASH_Task* task;
776  FLASH_Dep* d;
777 
778  // Find dependence information.
779  if ( obj.base->write_task == NULL )
780  {
781  t->n_ready--;
782 
783  // Add to number of blocks read if not written and not read before.
784  if ( obj.base->n_read_tasks == 0 )
785  {
786  // Identify each read block with an id for freeing.
787  obj.base->n_read_blocks = flash_queue_n_read_blocks;
788 
789  flash_queue_n_read_blocks++;
790  }
791  }
792  else
793  { // Flow dependence.
794  task = obj.base->write_task;
795 
796  d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
797 
798  d->task = t;
799  d->next_dep = NULL;
800 
801  if ( task->n_dep_args == 0 )
802  {
803  task->dep_arg_head = d;
804  task->dep_arg_tail = d;
805  }
806  else
807  {
808  task->dep_arg_tail->next_dep = d;
809  task->dep_arg_tail = d;
810  }
811 
812  task->n_dep_args++;
813  }
814 
815  // Add task to the read task in the object if not already there.
816  if ( obj.base->n_read_tasks == 0 ||
817  obj.base->read_task_tail->task != t )
818  { // Anti-dependence potentially.
819  d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
820 
821  d->task = t;
822  d->next_dep = NULL;
823 
824  if ( obj.base->n_read_tasks == 0 )
825  {
826  obj.base->read_task_head = d;
827  obj.base->read_task_tail = d;
828  }
829  else
830  {
831  obj.base->read_task_tail->next_dep = d;
832  obj.base->read_task_tail = d;
833  }
834 
835  obj.base->n_read_tasks++;
836  }
837 
838  return;
839 }
FLASH_Dep * dep_arg_head
Definition: FLA_type_defs.h:232
FLASH_Dep * read_task_tail
Definition: FLA_type_defs.h:151
Definition: FLA_type_defs.h:244
FLA_Base_obj * base
Definition: FLA_type_defs.h:168
int n_ready
Definition: FLA_type_defs.h:186
int n_read_tasks
Definition: FLA_type_defs.h:149
FLASH_Dep * dep_arg_tail
Definition: FLA_type_defs.h:233
Definition: FLA_type_defs.h:183
FLASH_Task * task
Definition: FLA_type_defs.h:247
FLASH_Dep * next_dep
Definition: FLA_type_defs.h:250
int n_dep_args
Definition: FLA_type_defs.h:231
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLASH_Task * write_task
Definition: FLA_type_defs.h:154
int n_read_blocks
Definition: FLA_type_defs.h:145
FLASH_Dep * read_task_head
Definition: FLA_type_defs.h:150

◆ FLASH_Queue_push_output()

void FLASH_Queue_push_output ( FLA_Obj  obj,
FLASH_Task t 
)

References FLA_Obj_view::base, i, FLASH_Task_s::n_ready, FLA_Obj_struct::n_write_blocks, and FLA_Obj_struct::write_task.

849 {
850  int i;
851  FLASH_Task* task;
852  FLASH_Dep* d;
853  FLASH_Dep* next_dep;
854 
855  // Assign tasks to threads with data affinity.
856  if ( obj.base->write_task == NULL )
857  {
858  t->n_ready--;
859 
860  // Save index in which this output block is first encountered.
861  obj.base->n_write_blocks = flash_queue_n_write_blocks;
862 
863  // Number of blocks written if not written before.
864  flash_queue_n_write_blocks++;
865 
866  // Add to number of blocks read if not written or read before.
867  if ( obj.base->n_read_tasks == 0 )
868  {
869  // Identify each read block with an id for freeing.
870  obj.base->n_read_blocks = flash_queue_n_read_blocks;
871 
872  flash_queue_n_read_blocks++;
873  }
874  }
875  else
876  { // Flow dependence potentially.
877  // The last task to overwrite this block is not itself.
878  if ( obj.base->write_task != t )
879  {
880  // Create dependency from task that last wrote the block.
881  task = obj.base->write_task;
882 
883  d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
884 
885  d->task = t;
886  d->next_dep = NULL;
887 
888  if ( task->n_dep_args == 0 )
889  {
890  task->dep_arg_head = d;
891  task->dep_arg_tail = d;
892  }
893  else
894  {
895  task->dep_arg_tail->next_dep = d;
896  task->dep_arg_tail = d;
897  }
898 
899  task->n_dep_args++;
900  }
901  else
902  {
903  // No need to notify task twice for output block already seen.
904  t->n_ready--;
905  }
906  }
907 
908  // Clear read task for next set of reads and record the anti-dependence.
909  d = obj.base->read_task_head;
910 
911  for ( i = 0; i < obj.base->n_read_tasks; i++ )
912  {
913  task = d->task;
914  next_dep = d->next_dep;
915 
916  // If the last task to read is not the current task, add dependence.
917  if ( task != t )
918  {
919  d->task = t;
920  d->next_dep = NULL;
921 
922  if ( task->n_dep_args == 0 )
923  {
924  task->dep_arg_head = d;
925  task->dep_arg_tail = d;
926  }
927  else
928  {
929  task->dep_arg_tail->next_dep = d;
930  task->dep_arg_tail = d;
931  }
932 
933  task->n_dep_args++;
934 
935  t->n_war_args++;
936  }
937  else
938  {
939  FLA_free( d );
940  }
941 
942  d = next_dep;
943  }
944 
945  obj.base->n_read_tasks = 0;
946  obj.base->read_task_head = NULL;
947  obj.base->read_task_tail = NULL;
948 
949  // Record this task as the last to write to this block.
950  obj.base->write_task = t;
951 
952  return;
953 }
FLASH_Dep * dep_arg_head
Definition: FLA_type_defs.h:232
FLASH_Dep * read_task_tail
Definition: FLA_type_defs.h:151
Definition: FLA_type_defs.h:244
FLA_Base_obj * base
Definition: FLA_type_defs.h:168
int n_ready
Definition: FLA_type_defs.h:186
int n_read_tasks
Definition: FLA_type_defs.h:149
FLASH_Dep * dep_arg_tail
Definition: FLA_type_defs.h:233
Definition: FLA_type_defs.h:183
FLASH_Task * task
Definition: FLA_type_defs.h:247
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
FLASH_Dep * next_dep
Definition: FLA_type_defs.h:250
int n_dep_args
Definition: FLA_type_defs.h:231
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
int n_war_args
Definition: FLA_type_defs.h:228
FLASH_Task * write_task
Definition: FLA_type_defs.h:154
int n_write_blocks
Definition: FLA_type_defs.h:146
int i
Definition: bl1_axmyv2.c:145
int n_read_blocks
Definition: FLA_type_defs.h:145
FLASH_Dep * read_task_head
Definition: FLA_type_defs.h:150

◆ FLASH_Queue_reset()

void FLASH_Queue_reset ( void  )

References FLASH_Queue_s::head, FLASH_Queue_s::n_tasks, and FLASH_Queue_s::tail.

Referenced by FLASH_Queue_exec().

589 {
590  // Clear the other fields of the FLASH_Queue structure.
591  _tq.n_tasks = 0;
592  _tq.head = NULL;
593  _tq.tail = NULL;
594 
595  // Reset the number of blocks.
596  flash_queue_n_read_blocks = 0;
597  flash_queue_n_write_blocks = 0;
598 
599  return;
600 }
FLASH_Queue _tq
Definition: FLASH_Queue.c:27
FLASH_Task * tail
Definition: FLA_type_defs.h:180
FLASH_Task * head
Definition: FLA_type_defs.h:179
unsigned int n_tasks
Definition: FLA_type_defs.h:176

◆ FLASH_Queue_set_block_size()

void FLASH_Queue_set_block_size ( dim_t  size)

Referenced by FLASH_Obj_create_hierarchy().

467 {
468  // Only adjust the block size if the new block is larger.
469  if ( flash_queue_block_size < size )
470  flash_queue_block_size = size;
471 
472  return;
473 }

◆ FLASH_Queue_set_cache_line_size()

void FLASH_Queue_set_cache_line_size ( dim_t  size)
517 {
518  flash_queue_cache_line_size = size;
519 
520  return;
521 }

◆ FLASH_Queue_set_cache_size()

void FLASH_Queue_set_cache_size ( dim_t  size)
493 {
494  flash_queue_cache_size = size;
495 
496  return;
497 }

◆ FLASH_Queue_set_caching()

void FLASH_Queue_set_caching ( FLA_Bool  caching)

Referenced by FLASH_Queue_exec().

349 {
350  flash_queue_caching = caching;
351 
352  return;
353 }

◆ FLASH_Queue_set_cores_per_cache()

void FLASH_Queue_set_cores_per_cache ( int  cores)
541 {
542  flash_queue_cores_per_cache = cores;
543 
544  return;
545 }

◆ FLASH_Queue_set_cores_per_queue()

void FLASH_Queue_set_cores_per_queue ( int  cores)
565 {
566  flash_queue_cores_per_queue = cores;
567 
568  return;
569 }

◆ FLASH_Queue_set_data_affinity()

void FLASH_Queue_set_data_affinity ( FLASH_Data_aff  data_affinity)

Referenced by FLASH_Queue_exec().

397 {
398  flash_queue_data_affinity = data_affinity;
399 
400  return;
401 }

◆ FLASH_Queue_set_num_threads()

void FLASH_Queue_set_num_threads ( unsigned int  n_threads)

References FLA_Check_num_threads().

193 {
194  FLA_Error e_val;
195 
196  // Verify that the number of threads is positive.
197  e_val = FLA_Check_num_threads( n_threads );
198  FLA_Check_error_code( e_val );
199 
200  // Keep track of the number of threads internally.
201  flash_queue_n_threads = n_threads;
202 
203 #if FLA_MULTITHREADING_MODEL == FLA_OPENMP
204 
205  // No additional action is necessary to set the number of OpenMP threads
206  // since setting the number of threads is handled at the parallel for loop
207  // with a num_threads() clause. This gives the user more flexibility since
208  // he can use the OMP_NUM_THREADS environment variable or the
209  // omp_set_num_threads() function to set the global number of OpenMP threads
210  // independently of the number of SuperMatrix threads.
211 
212 #elif FLA_MULTITHREADING_MODEL == FLA_PTHREADS
213 
214  // No additional action is necessary to set the number of pthreads
215  // since setting the number of threads is handled entirely on our end.
216 
217 #endif
218 
219  return;
220 }
FLA_Error FLA_Check_num_threads(unsigned int n_threads)
Definition: FLA_Check.c:884
int FLA_Error
Definition: FLA_type_defs.h:47

◆ FLASH_Queue_set_parallel_time()

void FLASH_Queue_set_parallel_time ( double  dtime)

Referenced by FLASH_Queue_exec().

454 {
455  flash_queue_parallel_time = dtime;
456 
457  return;
458 }

◆ FLASH_Queue_set_sorting()

void FLASH_Queue_set_sorting ( FLA_Bool  sorting)
325 {
326  flash_queue_sorting = sorting;
327 
328  return;
329 }

◆ FLASH_Queue_set_verbose_output()

void FLASH_Queue_set_verbose_output ( FLASH_Verbose  verbose)
301 {
302  flash_queue_verbose = verbose;
303 
304  return;
305 }

◆ FLASH_Queue_set_work_stealing()

void FLASH_Queue_set_work_stealing ( FLA_Bool  work_stealing)

Referenced by FLASH_Queue_exec().

373 {
374  flash_queue_work_stealing = work_stealing;
375 
376  return;
377 }

◆ FLASH_Queue_stack_depth()

unsigned int FLASH_Queue_stack_depth ( void  )

Referenced by FLASH_Eig_gest(), FLASH_LU_incpiv(), FLASH_QR_UT_inc(), FLASH_Queue_disable_gpu(), and FLASH_Queue_enable_gpu().

112 {
113  return flash_queue_stack;
114 }

◆ FLASH_Queue_verbose_output()

void FLASH_Queue_verbose_output ( void  )

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Queue_get_data_affinity(), FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLASH_Queue_get_num_threads(), FLASH_Queue_get_verbose_output(), i, FLA_Obj_struct::id, FLASH_Task_s::input_arg, FLA_Obj_struct::m_index, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_index, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLASH_Task_s::name, FLASH_Dep_s::next_dep, FLASH_Task_s::next_task, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::queue, and FLASH_Dep_s::task.

Referenced by FLASH_Queue_exec().

1788 {
1789  int i, j, k;
1790  int n_threads = FLASH_Queue_get_num_threads();
1791  int n_tasks = FLASH_Queue_get_num_tasks();
1793  FLASH_Task* t;
1794  FLASH_Dep* d;
1795 
1796  // Grab the head of the task queue.
1798 
1799  if ( verbose == FLASH_QUEUE_VERBOSE_READABLE )
1800  {
1801  // Iterate over linked list of tasks.
1802  for ( i = 0; i < n_tasks; i++ )
1803  {
1804  printf( "%d\t%s\t", t->order, t->name );
1805 
1806  for ( j = 0; j < t->n_output_args; j++ )
1807  printf( "%lu[%lu,%lu] ", t->output_arg[j].base->id,
1808  t->output_arg[j].base->m_index,
1809  t->output_arg[j].base->n_index );
1810 
1811  printf( ":= " );
1812 
1813  for ( j = 0; j < t->n_output_args; j++ )
1814  printf( "%lu[%lu,%lu] ", t->output_arg[j].base->id,
1815  t->output_arg[j].base->m_index,
1816  t->output_arg[j].base->n_index );
1817 
1818  for ( j = 0; j < t->n_input_args; j++ )
1819  printf( "%lu[%lu,%lu] ", t->input_arg[j].base->id,
1820  t->input_arg[j].base->m_index,
1821  t->input_arg[j].base->n_index );
1822 
1823  printf( "\n" );
1824 
1825  // Go to the next task.
1826  t = t->next_task;
1827  }
1828 
1829  printf( "\n" );
1830  }
1831  else
1832  {
1833  printf( "digraph SuperMatrix {\n" );
1834 
1835  if ( FLASH_Queue_get_data_affinity() == FLASH_QUEUE_AFFINITY_NONE )
1836  {
1837  // Iterate over linked list of tasks.
1838  for ( i = 0; i < n_tasks; i++ )
1839  {
1840  printf( "%d [label=\"%s\"]; %d -> {", t->order, t->name, t->order);
1841 
1842  d = t->dep_arg_head;
1843  for ( j = 0; j < t->n_dep_args; j++ )
1844  {
1845  printf( "%d;", d->task->order );
1846  d = d->next_dep;
1847  }
1848 
1849  printf( "};\n" );
1850 
1851  // Go to the next task.
1852  t = t->next_task;
1853  }
1854  }
1855  else
1856  {
1857  // Iterate over all the threads.
1858  for ( k = 0; k < n_threads; k++ )
1859  {
1860  printf( "subgraph cluster%d {\nlabel=\"%d\"\n", k, k );
1861 
1862  // Iterate over linked list of tasks.
1863  for ( i = 0; i < n_tasks; i++ )
1864  {
1865  if ( t->queue == k )
1866  printf( "%d [label=\"%s\"];\n", t->order, t->name );
1867 
1868  // Go to the next task.
1869  t = t->next_task;
1870  }
1871 
1872  printf( "}\n" );
1873 
1874  // Grab the head of the task queue.
1876  }
1877 
1878  // Iterate over linked list of tasks.
1879  for ( i = 0; i < n_tasks; i++ )
1880  {
1881  printf( "%d -> {", t->order );
1882 
1883  d = t->dep_arg_head;
1884  for ( j = 0; j < t->n_dep_args; j++ )
1885  {
1886  printf( "%d;", d->task->order );
1887  d = d->next_dep;
1888  }
1889 
1890  printf( "};\n" );
1891 
1892  // Go to the next task.
1893  t = t->next_task;
1894  }
1895  }
1896 
1897  printf( "}\n\n" );
1898  }
1899 
1900  return;
1901 }
FLASH_Dep * dep_arg_head
Definition: FLA_type_defs.h:232
FLASH_Task * next_task
Definition: FLA_type_defs.h:237
FLASH_Verbose FLASH_Queue_get_verbose_output(void)
Definition: FLASH_Queue.c:308
Definition: FLA_type_defs.h:244
FLA_Base_obj * base
Definition: FLA_type_defs.h:168
dim_t n_index
Definition: FLA_type_defs.h:135
unsigned long id
Definition: FLA_type_defs.h:133
Definition: FLA_type_defs.h:183
int FLASH_Verbose
Definition: FLA_type_defs.h:113
FLASH_Task * task
Definition: FLA_type_defs.h:247
FLASH_Dep * next_dep
Definition: FLA_type_defs.h:250
int n_dep_args
Definition: FLA_type_defs.h:231
int n_input_args
Definition: FLA_type_defs.h:217
FLASH_Task * FLASH_Queue_get_head_task(void)
Definition: FLASH_Queue.c:603
int n_output_args
Definition: FLA_type_defs.h:221
int queue
Definition: FLA_type_defs.h:190
FLA_Obj * output_arg
Definition: FLA_type_defs.h:222
int order
Definition: FLA_type_defs.h:189
FLA_Obj * input_arg
Definition: FLA_type_defs.h:218
int i
Definition: bl1_axmyv2.c:145
FLASH_Data_aff FLASH_Queue_get_data_affinity(void)
Definition: FLASH_Queue.c:404
dim_t m_index
Definition: FLA_type_defs.h:134
unsigned int FLASH_Queue_get_num_threads(void)
Definition: FLASH_Queue.c:223
unsigned int FLASH_Queue_get_num_tasks(void)
Definition: FLASH_Queue.c:284
char * name
Definition: FLA_type_defs.h:203

◆ FLASH_Task_alloc()

FLASH_Task* FLASH_Task_alloc ( void *  func,
void *  cntl,
char *  name,
FLA_Bool  enabled_gpu,
int  n_int_args,
int  n_fla_args,
int  n_input_args,
int  n_output_args 
)

References FLASH_Task_s::cache, FLASH_Task_s::cntl, FLASH_Task_s::dep_arg_head, FLASH_Task_s::dep_arg_tail, FLASH_Task_s::enabled_gpu, FLASH_Task_s::fla_arg, FLA_malloc(), FLASH_Task_s::func, FLASH_Task_s::height, FLASH_Task_s::hit, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_fla_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_int_args, FLASH_Task_s::n_macro_args, FLASH_Task_s::n_output_args, FLASH_Task_s::n_ready, FLASH_Task_s::n_war_args, FLASH_Task_s::name, FLASH_Task_s::next_task, FLASH_Task_s::next_wait, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::prev_wait, FLASH_Task_s::queue, and FLASH_Task_s::thread.

Referenced by FLASH_Queue_push().

969 {
970  FLASH_Task* t;
971 
972  // Allocate space for the task structure t.
973  t = (FLASH_Task *) FLA_malloc( sizeof(FLASH_Task) );
974 
975  // Allocate space for the task's integer arguments.
976  t->int_arg = (int *) FLA_malloc( n_int_args * sizeof(int) );
977 
978  // Allocate space for the task's FLA_Obj arguments.
979  t->fla_arg = (FLA_Obj *) FLA_malloc( n_fla_args * sizeof(FLA_Obj) );
980 
981  // Allocate space for the task's input FLA_Obj arguments.
982  t->input_arg = (FLA_Obj *) FLA_malloc( n_input_args * sizeof(FLA_Obj) );
983 
984  // Allocate space for the task's output FLA_Obj arguments.
985  t->output_arg = (FLA_Obj *) FLA_malloc( n_output_args * sizeof(FLA_Obj) );
986 
987  // Initialize other fields of the structure.
988  t->n_ready = 0;
989  t->order = 0;
990  t->queue = 0;
991  t->height = 0;
992  t->thread = 0;
993  t->cache = 0;
994  t->hit = FALSE;
995 
996  t->func = func;
997  t->cntl = cntl;
998  t->name = name;
999  t->enabled_gpu = enabled_gpu;
1000  t->n_int_args = n_int_args;
1001  t->n_fla_args = n_fla_args;
1002  t->n_input_args = n_input_args;
1003  t->n_output_args = n_output_args;
1004 
1005  t->n_macro_args = 0;
1006  t->n_war_args = 0;
1007  t->n_dep_args = 0;
1008  t->dep_arg_head = NULL;
1009  t->dep_arg_tail = NULL;
1010  t->prev_task = NULL;
1011  t->next_task = NULL;
1012  t->prev_wait = NULL;
1013  t->next_wait = NULL;
1014 
1015  // Return a pointer to the initialized structure.
1016  return t;
1017 }
int height
Definition: FLA_type_defs.h:191
int * int_arg
Definition: FLA_type_defs.h:210
FLASH_Dep * dep_arg_head
Definition: FLA_type_defs.h:232
FLASH_Task * next_task
Definition: FLA_type_defs.h:237
FLASH_Task * prev_wait
Definition: FLA_type_defs.h:240
int n_fla_args
Definition: FLA_type_defs.h:213
int n_ready
Definition: FLA_type_defs.h:186
void * cntl
Definition: FLA_type_defs.h:200
FLASH_Dep * dep_arg_tail
Definition: FLA_type_defs.h:233
Definition: FLA_type_defs.h:183
FLA_Bool enabled_gpu
Definition: FLA_type_defs.h:206
FLASH_Task * next_wait
Definition: FLA_type_defs.h:241
Definition: FLA_type_defs.h:158
int n_int_args
Definition: FLA_type_defs.h:209
int n_dep_args
Definition: FLA_type_defs.h:231
int n_input_args
Definition: FLA_type_defs.h:217
int n_output_args
Definition: FLA_type_defs.h:221
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
int n_war_args
Definition: FLA_type_defs.h:228
int queue
Definition: FLA_type_defs.h:190
FLA_Obj * output_arg
Definition: FLA_type_defs.h:222
int n_macro_args
Definition: FLA_type_defs.h:225
int order
Definition: FLA_type_defs.h:189
FLA_Bool hit
Definition: FLA_type_defs.h:194
FLASH_Task * prev_task
Definition: FLA_type_defs.h:236
FLA_Obj * input_arg
Definition: FLA_type_defs.h:218
int cache
Definition: FLA_type_defs.h:193
int thread
Definition: FLA_type_defs.h:192
FLA_Obj * fla_arg
Definition: FLA_type_defs.h:214
void * func
Definition: FLA_type_defs.h:197
char * name
Definition: FLA_type_defs.h:203

◆ FLASH_Task_free()

void FLASH_Task_free ( FLASH_Task t)

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::fla_arg, FLA_free(), FLA_Obj_col_stride(), FLA_Obj_elemtype(), FLA_Obj_length(), FLA_Obj_width(), i, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLA_Obj_struct::n_read_tasks, FLASH_Dep_s::next_dep, FLASH_Task_s::output_arg, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, and FLA_Obj_struct::write_task.

Referenced by FLASH_Queue_exec(), and FLASH_Queue_exec_simulation().

1026 {
1027  int i, j, k;
1028  FLA_Obj obj;
1029  FLASH_Dep* d;
1030  FLASH_Dep* next_dep;
1031 
1032  // Clearing the last write task in each output block.
1033  for ( i = 0; i < t->n_output_args; i++ )
1034  {
1035  obj = t->output_arg[i];
1036 
1037  // Macroblock is used.
1038  if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX )
1039  {
1040  dim_t jj, kk;
1041  dim_t m = FLA_Obj_length( obj );
1042  dim_t n = FLA_Obj_width( obj );
1043  dim_t cs = FLA_Obj_col_stride( obj );
1044  FLA_Obj* buf = FLASH_OBJ_PTR_AT( obj );
1045 
1046  // Clear each block in macroblock.
1047  for ( jj = 0; jj < n; jj++ )
1048  for ( kk = 0; kk < m; kk++ )
1049  ( buf + jj * cs + kk )->base->write_task = NULL;
1050  }
1051  else // Clear regular block.
1052  {
1053  obj.base->write_task = NULL;
1054  }
1055  }
1056 
1057  // Cleaning the last read tasks in each input block.
1058  for ( i = 0; i < t->n_input_args; i++ )
1059  {
1060  obj = t->input_arg[i];
1061 
1062  // Macroblock is used.
1063  if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX )
1064  {
1065  dim_t jj, kk;
1066  dim_t m = FLA_Obj_length( obj );
1067  dim_t n = FLA_Obj_width( obj );
1068  dim_t cs = FLA_Obj_col_stride( obj );
1069  FLA_Obj* buf = FLASH_OBJ_PTR_AT( obj );
1070 
1071  // Clear each block in macroblock.
1072  for ( jj = 0; jj < n; jj++ )
1073  {
1074  for ( kk = 0; kk < m; kk++ )
1075  {
1076  obj = *( buf + jj * cs + kk );
1077 
1078  k = obj.base->n_read_tasks;
1079  d = obj.base->read_task_head;
1080 
1081  obj.base->n_read_tasks = 0;
1082  obj.base->read_task_head = NULL;
1083  obj.base->read_task_tail = NULL;
1084 
1085  for ( j = 0; j < k; j++ )
1086  {
1087  next_dep = d->next_dep;
1088  FLA_free( d );
1089  d = next_dep;
1090  }
1091  }
1092  }
1093  }
1094  else // Regular block.
1095  {
1096  k = obj.base->n_read_tasks;
1097  d = obj.base->read_task_head;
1098 
1099  obj.base->n_read_tasks = 0;
1100  obj.base->read_task_head = NULL;
1101  obj.base->read_task_tail = NULL;
1102 
1103  for ( j = 0; j < k; j++ )
1104  {
1105  next_dep = d->next_dep;
1106  FLA_free( d );
1107  d = next_dep;
1108  }
1109  }
1110  }
1111 
1112  // Free the dep_arg field of t.
1113  d = t->dep_arg_head;
1114 
1115  for ( i = 0; i < t->n_dep_args; i++ )
1116  {
1117  next_dep = d->next_dep;
1118  FLA_free( d );
1119  d = next_dep;
1120  }
1121 
1122  // Free the int_arg field of t.
1123  FLA_free( t->int_arg );
1124 
1125  // Free the fla_arg field of t.
1126  FLA_free( t->fla_arg );
1127 
1128  // Free the input_arg field of t.
1129  FLA_free( t->input_arg );
1130 
1131  // Free the output_arg field of t.
1132  FLA_free( t->output_arg );
1133 
1134  // Finally, free the struct itself.
1135  FLA_free( t );
1136 
1137  return;
1138 }
int * int_arg
Definition: FLA_type_defs.h:210
FLASH_Dep * dep_arg_head
Definition: FLA_type_defs.h:232
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLASH_Dep * read_task_tail
Definition: FLA_type_defs.h:151
Definition: FLA_type_defs.h:244
FLA_Base_obj * base
Definition: FLA_type_defs.h:168
int n_read_tasks
Definition: FLA_type_defs.h:149
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
FLASH_Dep * next_dep
Definition: FLA_type_defs.h:250
int n_dep_args
Definition: FLA_type_defs.h:231
int n_input_args
Definition: FLA_type_defs.h:217
int n_output_args
Definition: FLA_type_defs.h:221
FLA_Obj * output_arg
Definition: FLA_type_defs.h:222
FLASH_Task * write_task
Definition: FLA_type_defs.h:154
FLA_Obj * input_arg
Definition: FLA_type_defs.h:218
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
int i
Definition: bl1_axmyv2.c:145
FLA_Obj * fla_arg
Definition: FLA_type_defs.h:214
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Elemtype FLA_Obj_elemtype(FLA_Obj obj)
Definition: FLA_Query.c:51
FLASH_Dep * read_task_head
Definition: FLA_type_defs.h:150

Variable Documentation

◆ _tq