libflame  revision_anchor
Functions
FLASH_Queue_gpu.h File Reference

(r)

Go to the source code of this file.

Functions

void FLASH_Queue_init_gpu (void)
 
void FLASH_Queue_finalize_gpu (void)
 
FLA_Error FLASH_Queue_enable_gpu (void)
 
FLA_Error FLASH_Queue_disable_gpu (void)
 
FLA_Bool FLASH_Queue_get_enabled_gpu (void)
 
void FLASH_Queue_set_gpu_num_blocks (dim_t n_blocks)
 
dim_t FLASH_Queue_get_gpu_num_blocks (void)
 
FLA_Error FLASH_Queue_bind_gpu (int thread)
 
FLA_Error FLASH_Queue_alloc_gpu (dim_t size, FLA_Datatype datatype, void **buffer_gpu)
 
FLA_Error FLASH_Queue_free_gpu (void *buffer_gpu)
 
FLA_Error FLASH_Queue_write_gpu (FLA_Obj obj, void *buffer_gpu)
 
FLA_Error FLASH_Queue_read_gpu (FLA_Obj obj, void *buffer_gpu)
 
void FLASH_Queue_exec_task_gpu (FLASH_Task *t, void **input_arg, void **output_arg)
 

Function Documentation

◆ FLASH_Queue_alloc_gpu()

FLA_Error FLASH_Queue_alloc_gpu ( dim_t  size,
FLA_Datatype  datatype,
void **  buffer_gpu 
)

References FLA_Obj_datatype_size().

Referenced by FLASH_Queue_create_gpu().

155 {
156  cublasStatus status;
157 
158  // Allocate memory for a block on GPU.
159  status = cublasAlloc( size,
160  FLA_Obj_datatype_size( datatype ),
161  buffer_gpu );
162 
163  // Check to see if the allocation was successful.
164  if ( status != CUBLAS_STATUS_SUCCESS )
165  FLA_Check_error_code( FLA_MALLOC_GPU_RETURNED_NULL_POINTER );
166 
167  return FLA_SUCCESS;
168 }
dim_t FLA_Obj_datatype_size(FLA_Datatype datatype)
Definition: FLA_Query.c:61

◆ FLASH_Queue_bind_gpu()

FLA_Error FLASH_Queue_bind_gpu ( int  thread)

Referenced by FLASH_Queue_create_gpu().

139 {
140  // Bind a GPU to this thread.
141  cudaSetDevice( thread );
142 
143  return FLA_SUCCESS;
144 }

◆ FLASH_Queue_disable_gpu()

FLA_Error FLASH_Queue_disable_gpu ( void  )

References FLASH_Queue_stack_depth().

76 {
77  if ( FLASH_Queue_stack_depth() == 0 )
78  {
79  // Disable if not begin parallel region yet.
80  flash_queue_enabled_gpu = FALSE;
81  return FLA_SUCCESS;
82  }
83  else
84  {
85  // Cannot change status during parallel region.
86  return FLA_FAILURE;
87  }
88 }
unsigned int FLASH_Queue_stack_depth(void)
Definition: FLASH_Queue.c:106

◆ FLASH_Queue_enable_gpu()

FLA_Error FLASH_Queue_enable_gpu ( void  )

References FLASH_Queue_get_enabled(), and FLASH_Queue_stack_depth().

55 {
57  {
58  // Enable if not begin parallel region yet and SuperMatrix is enabled.
59  flash_queue_enabled_gpu = TRUE;
60  return FLA_SUCCESS;
61  }
62  else
63  {
64  // Cannot change status during parallel region.
65  return FLA_FAILURE;
66  }
67 }
FLA_Bool FLASH_Queue_get_enabled(void)
Definition: FLASH_Queue.c:171
unsigned int FLASH_Queue_stack_depth(void)
Definition: FLASH_Queue.c:106

◆ FLASH_Queue_exec_task_gpu()

void FLASH_Queue_exec_task_gpu ( FLASH_Task t,
void **  input_arg,
void **  output_arg 
)

References FLASH_Task_s::fla_arg, FLA_Axpy_external_gpu(), FLA_Axpy_task(), FLA_Copy_external_gpu(), FLA_Copy_task(), FLA_Gemm_external_gpu(), FLA_Gemm_task(), FLA_Gemv_external_gpu(), FLA_Gemv_task(), FLA_Hemm_external_gpu(), FLA_Hemm_task(), FLA_Her2k_external_gpu(), FLA_Her2k_task(), FLA_Herk_external_gpu(), FLA_Herk_task(), FLA_Scal_external_gpu(), FLA_Scal_task(), FLA_Scalr_external_gpu(), FLA_Scalr_task(), FLA_Symm_external_gpu(), FLA_Symm_task(), FLA_Syr2k_external_gpu(), FLA_Syr2k_task(), FLA_Syrk_external_gpu(), FLA_Syrk_task(), FLA_Trmm_external_gpu(), FLA_Trmm_task(), FLA_Trsm_external_gpu(), FLA_Trsm_task(), FLA_Trsv_external_gpu(), FLA_Trsv_task(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, and FLASH_Task_s::output_arg.

Referenced by FLASH_Queue_exec_gpu().

233 {
234  // Define local function pointer types.
235 
236  // Level-3 BLAS
237  typedef FLA_Error(*flash_gemm_gpu_p)(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu);
238  typedef FLA_Error(*flash_hemm_gpu_p)(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu);
239  typedef FLA_Error(*flash_herk_gpu_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu);
240  typedef FLA_Error(*flash_her2k_gpu_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu);
241  typedef FLA_Error(*flash_symm_gpu_p)(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu);
242  typedef FLA_Error(*flash_syrk_gpu_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu);
243  typedef FLA_Error(*flash_syr2k_gpu_p)(FLA_Uplo uplo, FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu);
244  typedef FLA_Error(*flash_trmm_gpu_p)(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj C, void* C_gpu);
245  typedef FLA_Error(*flash_trsm_gpu_p)(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj C, void* C_gpu);
246 
247  // Level-2 BLAS
248  typedef FLA_Error(*flash_gemv_gpu_p)(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj x, void* x_gpu, FLA_Obj beta, FLA_Obj y, void* y_gpu);
249  typedef FLA_Error(*flash_trsv_gpu_p)(FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, void* A_gpu, FLA_Obj x, void* x_gpu);
250 
251  // Level-1 BLAS
252  typedef FLA_Error(*flash_axpy_gpu_p)(FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu);
253  typedef FLA_Error(*flash_copy_gpu_p)(FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu);
254  typedef FLA_Error(*flash_scal_gpu_p)(FLA_Obj alpha, FLA_Obj A, void* A_gpu);
255  typedef FLA_Error(*flash_scalr_gpu_p)(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, void* A_gpu);
256 
257  // Only execute task if it is not NULL.
258  if ( t == NULL )
259  return;
260 
261  // Now "switch" between the various possible task functions.
262 
263  // FLA_Gemm
264  if ( t->func == (void *) FLA_Gemm_task )
265  {
266  flash_gemm_gpu_p func;
267  func = (flash_gemm_gpu_p) FLA_Gemm_external_gpu;
268 
269  func( ( FLA_Trans ) t->int_arg[0],
270  ( FLA_Trans ) t->int_arg[1],
271  t->fla_arg[0],
272  t->input_arg[0],
273  input_arg[0],
274  t->input_arg[1],
275  input_arg[1],
276  t->fla_arg[1],
277  t->output_arg[0],
278  output_arg[0] );
279  }
280  // FLA_Hemm
281  else if ( t->func == (void *) FLA_Hemm_task )
282  {
283  flash_hemm_gpu_p func;
284  func = (flash_hemm_gpu_p) FLA_Hemm_external_gpu;
285 
286  func( ( FLA_Side ) t->int_arg[0],
287  ( FLA_Uplo ) t->int_arg[1],
288  t->fla_arg[0],
289  t->input_arg[0],
290  input_arg[0],
291  t->input_arg[1],
292  input_arg[1],
293  t->fla_arg[1],
294  t->output_arg[0],
295  output_arg[0] );
296  }
297  // FLA_Herk
298  else if ( t->func == (void *) FLA_Herk_task )
299  {
300  flash_herk_gpu_p func;
301  func = (flash_herk_gpu_p) FLA_Herk_external_gpu;
302 
303  func( ( FLA_Uplo ) t->int_arg[0],
304  ( FLA_Trans ) t->int_arg[1],
305  t->fla_arg[0],
306  t->input_arg[0],
307  input_arg[0],
308  t->fla_arg[1],
309  t->output_arg[0],
310  output_arg[0] );
311  }
312  // FLA_Her2k
313  else if ( t->func == (void *) FLA_Her2k_task )
314  {
315  flash_her2k_gpu_p func;
316  func = (flash_her2k_gpu_p) FLA_Her2k_external_gpu;
317 
318  func( ( FLA_Uplo ) t->int_arg[0],
319  ( FLA_Trans ) t->int_arg[1],
320  t->fla_arg[0],
321  t->input_arg[0],
322  input_arg[0],
323  t->input_arg[1],
324  input_arg[1],
325  t->fla_arg[1],
326  t->output_arg[0],
327  output_arg[0] );
328  }
329  // FLA_Symm
330  else if ( t->func == (void *) FLA_Symm_task )
331  {
332  flash_symm_gpu_p func;
333  func = (flash_symm_gpu_p) FLA_Symm_external_gpu;
334 
335  func( ( FLA_Side ) t->int_arg[0],
336  ( FLA_Uplo ) t->int_arg[1],
337  t->fla_arg[0],
338  t->input_arg[0],
339  input_arg[0],
340  t->input_arg[1],
341  input_arg[1],
342  t->fla_arg[1],
343  t->output_arg[0],
344  output_arg[0] );
345  }
346  // FLA_Syrk
347  else if ( t->func == (void *) FLA_Syrk_task )
348  {
349  flash_syrk_gpu_p func;
350  func = (flash_syrk_gpu_p) FLA_Syrk_external_gpu;
351 
352  func( ( FLA_Uplo ) t->int_arg[0],
353  ( FLA_Trans ) t->int_arg[1],
354  t->fla_arg[0],
355  t->input_arg[0],
356  input_arg[0],
357  t->fla_arg[1],
358  t->output_arg[0],
359  output_arg[0] );
360  }
361  // FLA_Syr2k
362  else if ( t->func == (void *) FLA_Syr2k_task )
363  {
364  flash_syr2k_gpu_p func;
365  func = (flash_syr2k_gpu_p) FLA_Syr2k_external_gpu;
366 
367  func( ( FLA_Uplo ) t->int_arg[0],
368  ( FLA_Trans ) t->int_arg[1],
369  t->fla_arg[0],
370  t->input_arg[0],
371  input_arg[0],
372  t->input_arg[1],
373  input_arg[1],
374  t->fla_arg[1],
375  t->output_arg[0],
376  output_arg[0] );
377  }
378  // FLA_Trmm
379  else if ( t->func == (void *) FLA_Trmm_task )
380  {
381  flash_trmm_gpu_p func;
382  func = (flash_trmm_gpu_p) FLA_Trmm_external_gpu;
383 
384  func( ( FLA_Side ) t->int_arg[0],
385  ( FLA_Uplo ) t->int_arg[1],
386  ( FLA_Trans ) t->int_arg[2],
387  ( FLA_Diag ) t->int_arg[3],
388  t->fla_arg[0],
389  t->input_arg[0],
390  input_arg[0],
391  t->output_arg[0],
392  output_arg[0] );
393  }
394  // FLA_Trsm
395  else if ( t->func == (void *) FLA_Trsm_task )
396  {
397  flash_trsm_gpu_p func;
398  func = (flash_trsm_gpu_p) FLA_Trsm_external_gpu;
399 
400  func( ( FLA_Side ) t->int_arg[0],
401  ( FLA_Uplo ) t->int_arg[1],
402  ( FLA_Trans ) t->int_arg[2],
403  ( FLA_Diag ) t->int_arg[3],
404  t->fla_arg[0],
405  t->input_arg[0],
406  input_arg[0],
407  t->output_arg[0],
408  output_arg[0] );
409  }
410  // FLA_Gemv
411  else if ( t->func == (void *) FLA_Gemv_task )
412  {
413  flash_gemv_gpu_p func;
414  func = (flash_gemv_gpu_p) FLA_Gemv_external_gpu;
415 
416  func( ( FLA_Trans ) t->int_arg[0],
417  t->fla_arg[0],
418  t->input_arg[0],
419  input_arg[0],
420  t->input_arg[1],
421  input_arg[1],
422  t->fla_arg[1],
423  t->output_arg[0],
424  output_arg[0] );
425  }
426  // FLA_Trsv
427  else if ( t->func == (void *) FLA_Trsv_task )
428  {
429  flash_trsv_gpu_p func;
430  func = (flash_trsv_gpu_p) FLA_Trsv_external_gpu;
431 
432  func( ( FLA_Uplo ) t->int_arg[0],
433  ( FLA_Trans ) t->int_arg[1],
434  ( FLA_Diag ) t->int_arg[2],
435  t->input_arg[0],
436  input_arg[0],
437  t->output_arg[0],
438  output_arg[0] );
439  }
440  // FLA_Axpy
441  else if ( t->func == (void *) FLA_Axpy_task )
442  {
443  flash_axpy_gpu_p func;
444  func = (flash_axpy_gpu_p) FLA_Axpy_external_gpu;
445 
446  func( t->fla_arg[0],
447  t->input_arg[0],
448  input_arg[0],
449  t->output_arg[0],
450  output_arg[0] );
451  }
452  // FLA_Copy
453  else if ( t->func == (void *) FLA_Copy_task )
454  {
455  flash_copy_gpu_p func;
456  func = (flash_copy_gpu_p) FLA_Copy_external_gpu;
457 
458  func( t->input_arg[0],
459  input_arg[0],
460  t->output_arg[0],
461  output_arg[0] );
462  }
463  // FLA_Scal
464  else if ( t->func == (void *) FLA_Scal_task )
465  {
466  flash_scal_gpu_p func;
467  func = (flash_scal_gpu_p) FLA_Scal_external_gpu;
468 
469  func( t->fla_arg[0],
470  t->output_arg[0],
471  output_arg[0] );
472  }
473  // FLA_Scalr
474  else if ( t->func == (void *) FLA_Scalr_task )
475  {
476  flash_scalr_gpu_p func;
477  func = (flash_scalr_gpu_p) FLA_Scalr_external_gpu;
478 
479  func( ( FLA_Uplo ) t->int_arg[0],
480  t->fla_arg[0],
481  t->output_arg[0],
482  output_arg[0] );
483  }
484  else
485  {
486  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
487  }
488 
489  return;
490 }
FLA_Error FLA_Gemm_task(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition: FLA_Gemm_task.c:13
int * int_arg
Definition: FLA_type_defs.h:210
FLA_Error FLA_Gemv_task(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y, fla_gemv_t *cntl)
Definition: FLA_Gemv_task.c:13
FLA_Error FLA_Trsv_external_gpu(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, void *A_gpu, FLA_Obj x, void *x_gpu)
Definition: FLA_Trsv_external_gpu.c:17
FLA_Error FLA_Trmm_external_gpu(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu)
Definition: FLA_Trmm_external_gpu.c:17
FLA_Error FLA_Her2k_external_gpu(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu, FLA_Obj beta, FLA_Obj C, void *C_gpu)
Definition: FLA_Her2k_external_gpu.c:17
FLA_Error FLA_Herk_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_herk_t *cntl)
Definition: FLA_Herk_task.c:13
FLA_Error FLA_Hemm_task(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_hemm_t *cntl)
Definition: FLA_Hemm_task.c:13
FLA_Error FLA_Symm_task(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_symm_t *cntl)
Definition: FLA_Symm_task.c:13
int FLA_Diag
Definition: FLA_type_defs.h:55
FLA_Error FLA_Syr2k_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_syr2k_t *cntl)
Definition: FLA_Syr2k_task.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Error FLA_Trsm_task(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t *cntl)
Definition: FLA_Trsm_task.c:13
FLA_Error FLA_Scal_task(FLA_Obj alpha, FLA_Obj A, fla_scal_t *cntl)
Definition: FLA_Scal_task.c:13
FLA_Error FLA_Gemv_external_gpu(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj x, void *x_gpu, FLA_Obj beta, FLA_Obj y, void *y_gpu)
Definition: FLA_Gemv_external_gpu.c:17
FLA_Error FLA_Scal_external_gpu(FLA_Obj alpha, FLA_Obj A, void *A_gpu)
Definition: FLA_Scal_external_gpu.c:17
FLA_Error FLA_Syr2k_external_gpu(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu, FLA_Obj beta, FLA_Obj C, void *C_gpu)
Definition: FLA_Syr2k_external_gpu.c:17
Definition: FLA_type_defs.h:158
FLA_Error FLA_Axpy_task(FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t *cntl)
Definition: FLA_Axpy_task.c:13
FLA_Error FLA_Scalr_external_gpu(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, void *A_gpu)
Definition: FLA_Scalr_external_gpu.c:17
FLA_Error FLA_Trsv_task(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, FLA_Obj x, fla_trsv_t *cntl)
Definition: FLA_Trsv_task.c:13
FLA_Error FLA_Symm_external_gpu(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu, FLA_Obj beta, FLA_Obj C, void *C_gpu)
Definition: FLA_Symm_external_gpu.c:17
FLA_Error FLA_Hemm_external_gpu(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu, FLA_Obj beta, FLA_Obj C, void *C_gpu)
Definition: FLA_Hemm_external_gpu.c:17
FLA_Obj * output_arg
Definition: FLA_type_defs.h:222
int FLA_Trans
Definition: FLA_type_defs.h:53
int FLA_Uplo
Definition: FLA_type_defs.h:52
int FLA_Side
Definition: FLA_type_defs.h:51
FLA_Error FLA_Syrk_external_gpu(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj beta, FLA_Obj C, void *C_gpu)
Definition: FLA_Syrk_external_gpu.c:17
FLA_Error FLA_Gemm_external_gpu(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu, FLA_Obj beta, FLA_Obj C, void *C_gpu)
Definition: FLA_Gemm_external_gpu.c:17
FLA_Error FLA_Herk_external_gpu(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj beta, FLA_Obj C, void *C_gpu)
Definition: FLA_Herk_external_gpu.c:17
FLA_Obj * input_arg
Definition: FLA_type_defs.h:218
FLA_Error FLA_Copy_task(FLA_Obj A, FLA_Obj B, fla_copy_t *cntl)
Definition: FLA_Copy_task.c:13
FLA_Error FLA_Trsm_external_gpu(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu)
Definition: FLA_Trsm_external_gpu.c:17
FLA_Error FLA_Copy_external_gpu(FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu)
Definition: FLA_Copy_external_gpu.c:17
FLA_Error FLA_Syrk_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t *cntl)
Definition: FLA_Syrk_task.c:13
FLA_Obj * fla_arg
Definition: FLA_type_defs.h:214
FLA_Error FLA_Scalr_task(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, fla_scalr_t *cntl)
Definition: FLA_Scalr_task.c:13
FLA_Error FLA_Trmm_task(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trmm_t *cntl)
Definition: FLA_Trmm_task.c:13
void * func
Definition: FLA_type_defs.h:197
FLA_Error FLA_Axpy_external_gpu(FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu)
Definition: FLA_Axpy_external_gpu.c:17
FLA_Error FLA_Her2k_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_her2k_t *cntl)
Definition: FLA_Her2k_task.c:13

◆ FLASH_Queue_finalize_gpu()

void FLASH_Queue_finalize_gpu ( void  )
42 {
43  cublasShutdown();
44 
45  return;
46 }

◆ FLASH_Queue_free_gpu()

FLA_Error FLASH_Queue_free_gpu ( void *  buffer_gpu)

Referenced by FLASH_Queue_destroy_gpu().

177 {
178  // Free memory for a block on GPU.
179  cublasFree( buffer_gpu );
180 
181  return FLA_SUCCESS;
182 }

◆ FLASH_Queue_get_enabled_gpu()

FLA_Bool FLASH_Queue_get_enabled_gpu ( void  )

References FLASH_Queue_get_enabled().

Referenced by FLASH_Queue_create_gpu(), FLASH_Queue_destroy_gpu(), FLASH_Queue_exec_gpu(), FLASH_Queue_exec_parallel_function(), FLASH_Queue_flush_gpu(), FLASH_Queue_wait_dequeue(), and FLASH_Queue_wait_dequeue_block().

97 {
98  // Return if SuperMatrix is enabled, but always false if not.
100  return flash_queue_enabled_gpu;
101  else
102  return FALSE;
103 }
FLA_Bool FLASH_Queue_get_enabled(void)
Definition: FLASH_Queue.c:171

◆ FLASH_Queue_get_gpu_num_blocks()

dim_t FLASH_Queue_get_gpu_num_blocks ( void  )

◆ FLASH_Queue_init_gpu()

void FLASH_Queue_init_gpu ( void  )
29 {
30  cublasInit();
31 
32  return;
33 }

◆ FLASH_Queue_read_gpu()

FLA_Error FLASH_Queue_read_gpu ( FLA_Obj  obj,
void *  buffer_gpu 
)

References FLA_Obj_buffer_at_view(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_datatype_size(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLASH_Queue_destroy_gpu(), FLASH_Queue_flush_block_gpu(), FLASH_Queue_flush_gpu(), and FLASH_Queue_update_block_gpu().

211 {
212  // Read the memory of a block on GPU to main memory.
213  cublasGetMatrix( FLA_Obj_length( obj ),
214  FLA_Obj_width( obj ),
216  buffer_gpu,
217  FLA_Obj_length( obj ),
218  FLA_Obj_buffer_at_view( obj ),
219  FLA_Obj_col_stride( obj ) );
220 
221  return FLA_SUCCESS;
222 }
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
void * FLA_Obj_buffer_at_view(FLA_Obj obj)
Definition: FLA_Query.c:215
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_datatype_size(FLA_Datatype datatype)
Definition: FLA_Query.c:61
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLASH_Queue_set_gpu_num_blocks()

void FLASH_Queue_set_gpu_num_blocks ( dim_t  n_blocks)
112 {
113  flash_queue_gpu_n_blocks = n_blocks;
114 
115  return;
116 }

◆ FLASH_Queue_write_gpu()

FLA_Error FLASH_Queue_write_gpu ( FLA_Obj  obj,
void *  buffer_gpu 
)

References FLA_Obj_buffer_at_view(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_datatype_size(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLASH_Queue_update_block_gpu().

191 {
192  // Write the contents of a block in main memory to GPU.
193  cublasSetMatrix( FLA_Obj_length( obj ),
194  FLA_Obj_width( obj ),
196  FLA_Obj_buffer_at_view( obj ),
197  FLA_Obj_col_stride( obj ),
198  buffer_gpu,
199  FLA_Obj_length( obj ) );
200 
201  return FLA_SUCCESS;
202 }
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
void * FLA_Obj_buffer_at_view(FLA_Obj obj)
Definition: FLA_Query.c:215
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_datatype_size(FLA_Datatype datatype)
Definition: FLA_Query.c:61
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116