52 if ( transa == FLA_NO_TRANSPOSE || transa == FLA_CONJ_NO_TRANSPOSE )
65 float *buff_alpha = (
float * ) FLA_FLOAT_PTR( alpha );
66 float *buff_beta = (
float * ) FLA_FLOAT_PTR( beta );
68 cublasSgemm( blas_transa,
74 (
float * ) A_gpu, ldim_A,
75 (
float * ) B_gpu, ldim_B,
77 (
float * ) C_gpu, ldim_C );
84 double *buff_alpha = (
double * ) FLA_DOUBLE_PTR( alpha );
85 double *buff_beta = (
double * ) FLA_DOUBLE_PTR( beta );
87 cublasDgemm( blas_transa,
93 (
double * ) A_gpu, ldim_A,
94 (
double * ) B_gpu, ldim_B,
96 (
double * ) C_gpu, ldim_C );
103 cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
104 cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta );
106 cublasCgemm( blas_transa,
112 ( cuComplex * ) A_gpu, ldim_A,
113 ( cuComplex * ) B_gpu, ldim_B,
115 ( cuComplex * ) C_gpu, ldim_C );
120 case FLA_DOUBLE_COMPLEX:
122 cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
123 cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
125 cublasZgemm( blas_transa,
131 ( cuDoubleComplex * ) A_gpu, ldim_A,
132 ( cuDoubleComplex * ) B_gpu, ldim_B,
134 ( cuDoubleComplex * ) C_gpu, ldim_C );
void FLA_Param_map_flame_to_netlib_trans(FLA_Trans trans, void *blas_trans)
Definition: FLA_Param.c:15
FLA_Error FLA_Gemm_check(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition: FLA_Gemm_check.c:13
FLA_Error FLA_Scal_external_gpu(FLA_Obj alpha, FLA_Obj A, void *A_gpu)
Definition: FLA_Scal_external_gpu.c:17
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
FLA_Bool FLA_Obj_has_zero_dim(FLA_Obj A)
Definition: FLA_Query.c:400
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116