libflame  revision_anchor
Functions
FLA_Copy_external_gpu.c File Reference

(r)

Functions

FLA_Error FLA_Copy_external_gpu (FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu)
 

Function Documentation

◆ FLA_Copy_external_gpu()

FLA_Error FLA_Copy_external_gpu ( FLA_Obj  A,
void *  A_gpu,
FLA_Obj  B,
void *  B_gpu 
)

References FLA_Check_error_level(), FLA_Copy_check(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), and i.

Referenced by FLASH_Queue_exec_task_gpu().

18 {
19  FLA_Datatype datatype;
20  int m_B, n_B;
21  int ldim_A, inc_A;
22  int ldim_B, inc_B;
23  int i;
24 
25  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
26  FLA_Copy_check( A, B );
27 
28  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;
29 
30  // It is important that we get the datatype of B and not A, since A could
31  // be an FLA_CONSTANT.
32  datatype = FLA_Obj_datatype( B );
33 
34  ldim_A = FLA_Obj_length( A );
35  inc_A = 1;
36 
37  m_B = FLA_Obj_length( B );
38  n_B = FLA_Obj_width( B );
39  ldim_B = FLA_Obj_length( B );
40  inc_B = 1;
41 
42  switch ( datatype ){
43 
44  case FLA_INT:
45  case FLA_FLOAT:
46  {
47  float* buff_A_gpu = ( float* ) A_gpu;
48  float* buff_B_gpu = ( float* ) B_gpu;
49 
50  for ( i = 0; i < n_B; i++ )
51  cublasScopy( m_B,
52  buff_A_gpu + i * ldim_A, inc_A,
53  buff_B_gpu + i * ldim_B, inc_B );
54 
55  break;
56  }
57 
58  case FLA_DOUBLE:
59  {
60  double* buff_A_gpu = ( double* ) A_gpu;
61  double* buff_B_gpu = ( double* ) B_gpu;
62 
63  for ( i = 0; i < n_B; i++ )
64  cublasDcopy( m_B,
65  buff_A_gpu + i * ldim_A, inc_A,
66  buff_B_gpu + i * ldim_B, inc_B );
67 
68  break;
69  }
70 
71  case FLA_COMPLEX:
72  {
73  cuComplex* buff_A_gpu = ( cuComplex* ) A_gpu;
74  cuComplex* buff_B_gpu = ( cuComplex* ) B_gpu;
75 
76  for ( i = 0; i < n_B; i++ )
77  cublasCcopy( m_B,
78  buff_A_gpu + i * ldim_A, inc_A,
79  buff_B_gpu + i * ldim_B, inc_B );
80 
81  break;
82  }
83 
84  case FLA_DOUBLE_COMPLEX:
85  {
86  cuDoubleComplex* buff_A_gpu = ( cuDoubleComplex* ) A_gpu;
87  cuDoubleComplex* buff_B_gpu = ( cuDoubleComplex* ) B_gpu;
88 
89  for ( i = 0; i < n_B; i++ )
90  cublasZcopy( m_B,
91  buff_A_gpu + i * ldim_A, inc_A,
92  buff_B_gpu + i * ldim_B, inc_B );
93 
94  break;
95  }
96 
97  }
98 
99  return FLA_SUCCESS;
100 }
FLA_Error FLA_Copy_check(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy_check.c:13
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
FLA_Bool FLA_Obj_has_zero_dim(FLA_Obj A)
Definition: FLA_Query.c:400
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
int FLA_Datatype
Definition: FLA_type_defs.h:49
int i
Definition: bl1_axmyv2.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116