libflame  revision_anchor
Functions
bl1_gemv.c File Reference

(r)

Functions

void bl1_sgemv (trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
 
void bl1_dgemv (trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
 
void bl1_cgemv (trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
 
void bl1_zgemv (trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
 
void bl1_sgemv_blas (trans1_t transa, int m, int n, float *alpha, float *a, int lda, float *x, int incx, float *beta, float *y, int incy)
 
void bl1_dgemv_blas (trans1_t transa, int m, int n, double *alpha, double *a, int lda, double *x, int incx, double *beta, double *y, int incy)
 
void bl1_cgemv_blas (trans1_t transa, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
 
void bl1_zgemv_blas (trans1_t transa, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
 

Function Documentation

◆ bl1_cgemv()

void bl1_cgemv ( trans1_t  transa,
conj1_t  conjx,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex x,
int  incx,
scomplex beta,
scomplex y,
int  incy 
)

References bl1_c0(), bl1_c1(), bl1_callocv(), bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_ccreate_contigm(), bl1_cfree(), bl1_cfree_contigm(), bl1_cgemv_blas(), bl1_cscalv(), bl1_does_trans(), bl1_is_conj(), bl1_is_conjnotrans(), bl1_is_row_storage(), bl1_zero_dim2(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Accum_T_UT_fc_opc_var1(), FLA_Accum_T_UT_fr_opc_var1(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var1(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_CAQR2_UT_opc_var1(), FLA_Chol_l_opc_var2(), FLA_Chol_u_opc_var2(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nu_opc_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var1(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_LQ_UT_opc_var2(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_nopiv_opc_var4(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), FLA_QR2_UT_opc_var1(), FLA_QR_UT_opc_var2(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), FLA_Tridiag_UT_l_step_opc_var3(), FLA_Ttmm_l_opc_var2(), and FLA_Ttmm_u_opc_var2().

126 {
127  scomplex* a_save = a;
128  int a_rs_save = a_rs;
129  int a_cs_save = a_cs;
130  scomplex zero = bl1_c0();
131  scomplex one = bl1_c1();
132  scomplex* x_conj;
133  scomplex* ax;
134  int lda, inca;
135  int n_x;
136  int incx_conj;
137  int incax;
138 
139  // Return early if possible.
140  if ( bl1_zero_dim2( m, n ) )
141  {
142  int n_elem;
143 
144  if ( bl1_does_trans( transa ) ) n_elem = n;
145  else n_elem = m;
146 
148  n_elem,
149  beta,
150  y, incy );
151  return;
152  }
153 
154  // If necessary, allocate, initialize, and use a temporary contiguous
155  // copy of the matrix rather than the original matrix.
157  n,
158  a_save, a_rs_save, a_cs_save,
159  &a, &a_rs, &a_cs );
160 
161  // Initialize with values assuming column-major storage.
162  lda = a_cs;
163  inca = a_rs;
164 
165  // If A is a row-major matrix, then we can use the underlying column-major
166  // BLAS implementation by fiddling with the parameters.
167  if ( bl1_is_row_storage( a_rs, a_cs ) )
168  {
169  bl1_swap_ints( m, n );
170  bl1_swap_ints( lda, inca );
171  bl1_toggle_trans( transa );
172  }
173 
174  // Initialize with values assuming no conjugation of x.
175  x_conj = x;
176  incx_conj = incx;
177 
178  // We need a temporary vector for the cases when x is conjugated, and
179  // also for the cases where A is conjugated.
180  if ( bl1_is_conj( conjx ) || bl1_is_conjnotrans( transa ) )
181  {
182  if ( bl1_does_trans( transa ) ) n_x = m;
183  else n_x = n;
184 
185  x_conj = bl1_callocv( n_x );
186  incx_conj = 1;
187 
188  bl1_ccopyv( conjx,
189  n_x,
190  x, incx,
191  x_conj, incx_conj );
192  }
193 
194  // We want to handle the conjnotrans case, but without explicitly
195  // conjugating A. To do so, we leverage the fact that computing the
196  // product conj(A) * x is equivalent to computing conj( A * conj(x) ).
197  if ( bl1_is_conjnotrans( transa ) )
198  {
199  // We need a temporary vector for the product A * conj(x), which is
200  // conformal to y. We know we are not transposing, so y is length m.
201  ax = bl1_callocv( m );
202  incax = 1;
203 
204  // Start by conjugating the contents of the temporary copy of x.
205  bl1_cconjv( n,
206  x_conj, incx_conj );
207 
208  // Compute A * conj(x) where x is the temporary copy of x created above.
210  m,
211  n,
212  &one,
213  a, lda,
214  x_conj, incx_conj,
215  &zero,
216  ax, incax );
217 
218  // Scale y by beta.
220  m,
221  beta,
222  y, incy );
223 
224  // And finally, accumulate alpha * conj( A * conj(x) ) into y.
226  m,
227  alpha,
228  ax, incax,
229  y, incy);
230 
231  // Free the temporary vector for Ax.
232  bl1_cfree( ax );
233  }
234  else // notrans, trans, or conjtrans
235  {
236  bl1_cgemv_blas( transa,
237  m,
238  n,
239  alpha,
240  a, lda,
241  x_conj, incx_conj,
242  beta,
243  y, incy );
244  }
245 
246  // Free the temporary conjugated x vector.
247  if ( bl1_is_conj( conjx ) || bl1_is_conjnotrans( transa ) )
248  bl1_cfree( x_conj );
249 
250  // Free the temporary contiguous matrix.
251  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
252  &a, &a_rs, &a_cs );
253 }
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_cgemv_blas(trans1_t transa, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:453
int bl1_is_conj(conj1_t conj)
Definition: bl1_is.c:42
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:81
scomplex bl1_c0(void)
Definition: bl1_constants.c:125
int bl1_is_conjnotrans(trans1_t trans)
Definition: bl1_is.c:25
Definition: blis_type_defs.h:82
int bl1_is_row_storage(int rs, int cs)
Definition: bl1_is.c:95
void bl1_cconjv(int m, scomplex *x, int incx)
Definition: bl1_conjv.c:23
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
scomplex * bl1_callocv(unsigned int n_elem)
Definition: bl1_allocv.c:40
int bl1_does_trans(trans1_t trans)
Definition: bl1_does.c:13
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_cgemv_blas()

void bl1_cgemv_blas ( trans1_t  transa,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex x,
int  incx,
scomplex beta,
scomplex y,
int  incy 
)

References bl1_param_map_to_netlib_trans(), cblas_cgemv(), CblasColMajor, and F77_cgemv().

Referenced by bl1_cgemv().

454 {
455 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
456  enum CBLAS_ORDER cblas_order = CblasColMajor;
457  enum CBLAS_TRANSPOSE cblas_transa;
458 
459  bl1_param_map_to_netlib_trans( transa, &cblas_transa );
460 
461  cblas_cgemv( cblas_order,
462  cblas_transa,
463  m,
464  n,
465  alpha,
466  a, lda,
467  x, incx,
468  beta,
469  y, incy );
470 #else
471  char blas_transa;
472 
473  bl1_param_map_to_netlib_trans( transa, &blas_transa );
474 
475  F77_cgemv( &blas_transa,
476  &m,
477  &n,
478  alpha,
479  a, &lda,
480  x, &incx,
481  beta,
482  y, &incy );
483 #endif
484 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void cblas_cgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY)
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_prototypes_cblas.h:17
void F77_cgemv(char *transa, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *x, int *incx, scomplex *beta, scomplex *y, int *incy)

◆ bl1_dgemv()

void bl1_dgemv ( trans1_t  transa,
conj1_t  conjx,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  x,
int  incx,
double *  beta,
double *  y,
int  incy 
)

References bl1_dcreate_contigm(), bl1_dfree_contigm(), bl1_dgemv_blas(), bl1_does_trans(), bl1_dscalv(), bl1_is_row_storage(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Accum_T_UT_fc_opd_var1(), FLA_Accum_T_UT_fr_opd_var1(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Apply_HUD_UT_l_opd_var1(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_CAQR2_UT_opd_var1(), FLA_Chol_l_opd_var2(), FLA_Chol_u_opd_var2(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nu_opd_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var1(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_LQ_UT_opd_var2(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_opd_var3(), FLA_LU_nopiv_opd_var4(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), FLA_QR2_UT_opd_var1(), FLA_QR_UT_opd_var2(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_opd_var2(), FLA_Tridiag_UT_l_step_opd_var3(), FLA_Ttmm_l_opd_var2(), and FLA_Ttmm_u_opd_var2().

70 {
71  double* a_save = a;
72  int a_rs_save = a_rs;
73  int a_cs_save = a_cs;
74  int lda, inca;
75 
76  // Return early if possible.
77  if ( bl1_zero_dim2( m, n ) )
78  {
79  int n_elem;
80 
81  if ( bl1_does_trans( transa ) ) n_elem = n;
82  else n_elem = m;
83 
85  n_elem,
86  beta,
87  y, incy );
88  return;
89  }
90 
91  // If necessary, allocate, initialize, and use a temporary contiguous
92  // copy of the matrix rather than the original matrix.
94  n,
95  a_save, a_rs_save, a_cs_save,
96  &a, &a_rs, &a_cs );
97 
98  // Initialize with values assuming column-major storage.
99  lda = a_cs;
100  inca = a_rs;
101 
102  // If A is a row-major matrix, then we can use the underlying column-major
103  // BLAS implementation by fiddling with the parameters.
104  if ( bl1_is_row_storage( a_rs, a_cs ) )
105  {
106  bl1_swap_ints( m, n );
107  bl1_swap_ints( lda, inca );
108  bl1_toggle_trans( transa );
109  }
110 
111  bl1_dgemv_blas( transa,
112  m,
113  n,
114  alpha,
115  a, lda,
116  x, incx,
117  beta,
118  y, incy );
119 
120  // Free the temporary contiguous matrix.
121  bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
122  &a, &a_rs, &a_cs );
123 }
void bl1_dgemv_blas(trans1_t transa, int m, int n, double *alpha, double *a, int lda, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:420
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_dcreate_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:47
int bl1_is_row_storage(int rs, int cs)
Definition: bl1_is.c:95
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:29
int bl1_does_trans(trans1_t trans)
Definition: bl1_does.c:13

◆ bl1_dgemv_blas()

void bl1_dgemv_blas ( trans1_t  transa,
int  m,
int  n,
double *  alpha,
double *  a,
int  lda,
double *  x,
int  incx,
double *  beta,
double *  y,
int  incy 
)

References bl1_param_map_to_netlib_trans(), cblas_dgemv(), CblasColMajor, and F77_dgemv().

Referenced by bl1_dgemv().

421 {
422 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
423  enum CBLAS_ORDER cblas_order = CblasColMajor;
424  enum CBLAS_TRANSPOSE cblas_transa;
425 
426  bl1_param_map_to_netlib_trans( transa, &cblas_transa );
427 
428  cblas_dgemv( cblas_order,
429  cblas_transa,
430  m,
431  n,
432  *alpha,
433  a, lda,
434  x, incx,
435  *beta,
436  y, incy );
437 #else
438  char blas_transa;
439 
440  bl1_param_map_to_netlib_trans( transa, &blas_transa );
441 
442  F77_dgemv( &blas_transa,
443  &m,
444  &n,
445  alpha,
446  a, &lda,
447  x, &incx,
448  beta,
449  y, &incy );
450 #endif
451 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_prototypes_cblas.h:17
void cblas_dgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY)
void F77_dgemv(char *transa, int *m, int *n, double *alpha, double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy)

◆ bl1_sgemv()

void bl1_sgemv ( trans1_t  transa,
conj1_t  conjx,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  x,
int  incx,
float *  beta,
float *  y,
int  incy 
)

References bl1_does_trans(), bl1_is_row_storage(), bl1_screate_contigm(), bl1_sfree_contigm(), bl1_sgemv_blas(), bl1_sscalv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Accum_T_UT_fc_ops_var1(), FLA_Accum_T_UT_fr_ops_var1(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_CAQR2_UT_ops_var1(), FLA_Chol_l_ops_var2(), FLA_Chol_u_ops_var2(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nu_ops_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var1(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_LQ_UT_ops_var2(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_ops_var3(), FLA_LU_nopiv_ops_var4(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), FLA_QR2_UT_ops_var1(), FLA_QR_UT_ops_var2(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), FLA_Tridiag_UT_l_step_ops_var3(), FLA_Ttmm_l_ops_var2(), and FLA_Ttmm_u_ops_var2().

14 {
15  float* a_save = a;
16  int a_rs_save = a_rs;
17  int a_cs_save = a_cs;
18  int lda, inca;
19 
20  // Return early if possible.
21  if ( bl1_zero_dim2( m, n ) )
22  {
23  int n_elem;
24 
25  if ( bl1_does_trans( transa ) ) n_elem = n;
26  else n_elem = m;
27 
29  n_elem,
30  beta,
31  y, incy );
32  return;
33  }
34 
35  // If necessary, allocate, initialize, and use a temporary contiguous
36  // copy of the matrix rather than the original matrix.
38  n,
39  a_save, a_rs_save, a_cs_save,
40  &a, &a_rs, &a_cs );
41 
42  // Initialize with values assuming column-major storage.
43  lda = a_cs;
44  inca = a_rs;
45 
46  // If A is a row-major matrix, then we can use the underlying column-major
47  // BLAS implementation by fiddling with the parameters.
48  if ( bl1_is_row_storage( a_rs, a_cs ) )
49  {
50  bl1_swap_ints( m, n );
51  bl1_swap_ints( lda, inca );
52  bl1_toggle_trans( transa );
53  }
54 
55  bl1_sgemv_blas( transa,
56  m,
57  n,
58  alpha,
59  a, lda,
60  x, incx,
61  beta,
62  y, incy );
63 
64  // Free the temporary contiguous matrix.
65  bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
66  &a, &a_rs, &a_cs );
67 }
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_screate_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:13
void bl1_sgemv_blas(trans1_t transa, int m, int n, float *alpha, float *a, int lda, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:387
int bl1_is_row_storage(int rs, int cs)
Definition: bl1_is.c:95
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13
int bl1_does_trans(trans1_t trans)
Definition: bl1_does.c:13

◆ bl1_sgemv_blas()

void bl1_sgemv_blas ( trans1_t  transa,
int  m,
int  n,
float *  alpha,
float *  a,
int  lda,
float *  x,
int  incx,
float *  beta,
float *  y,
int  incy 
)

References bl1_param_map_to_netlib_trans(), cblas_sgemv(), CblasColMajor, and F77_sgemv().

Referenced by bl1_sgemv().

388 {
389 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
390  enum CBLAS_ORDER cblas_order = CblasColMajor;
391  enum CBLAS_TRANSPOSE cblas_transa;
392 
393  bl1_param_map_to_netlib_trans( transa, &cblas_transa );
394 
395  cblas_sgemv( cblas_order,
396  cblas_transa,
397  m,
398  n,
399  *alpha,
400  a, lda,
401  x, incx,
402  *beta,
403  y, incy );
404 #else
405  char blas_transa;
406 
407  bl1_param_map_to_netlib_trans( transa, &blas_transa );
408 
409  F77_sgemv( &blas_transa,
410  &m,
411  &n,
412  alpha,
413  a, &lda,
414  x, &incx,
415  beta,
416  y, &incy );
417 #endif
418 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_prototypes_cblas.h:17
void cblas_sgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY)
void F77_sgemv(char *transa, int *m, int *n, float *alpha, float *a, int *lda, float *x, int *incx, float *beta, float *y, int *incy)

◆ bl1_zgemv()

void bl1_zgemv ( trans1_t  transa,
conj1_t  conjx,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex x,
int  incx,
dcomplex beta,
dcomplex y,
int  incy 
)

References bl1_does_trans(), bl1_is_conj(), bl1_is_conjnotrans(), bl1_is_row_storage(), bl1_z0(), bl1_z1(), bl1_zallocv(), bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zcreate_contigm(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zgemv_blas(), bl1_zscalv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Accum_T_UT_fc_opz_var1(), FLA_Accum_T_UT_fr_opz_var1(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_CAQR2_UT_opz_var1(), FLA_Chol_l_opz_var2(), FLA_Chol_u_opz_var2(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nu_opz_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var1(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_LQ_UT_opz_var2(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_nopiv_opz_var4(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), FLA_QR2_UT_opz_var1(), FLA_QR_UT_opz_var2(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), FLA_Tridiag_UT_l_step_opz_var3(), FLA_Ttmm_l_opz_var2(), and FLA_Ttmm_u_opz_var2().

256 {
257  dcomplex* a_save = a;
258  int a_rs_save = a_rs;
259  int a_cs_save = a_cs;
260  dcomplex zero = bl1_z0();
261  dcomplex one = bl1_z1();
262  dcomplex* x_conj;
263  dcomplex* ax;
264  int lda, inca;
265  int n_x;
266  int incx_conj;
267  int incax;
268 
269  // Return early if possible.
270  if ( bl1_zero_dim2( m, n ) )
271  {
272  int n_elem;
273 
274  if ( bl1_does_trans( transa ) ) n_elem = n;
275  else n_elem = m;
276 
278  n_elem,
279  beta,
280  y, incy );
281  return;
282  }
283 
284  // If necessary, allocate, initialize, and use a temporary contiguous
285  // copy of the matrix rather than the original matrix.
287  n,
288  a_save, a_rs_save, a_cs_save,
289  &a, &a_rs, &a_cs );
290 
291  // Initialize with values assuming column-major storage.
292  lda = a_cs;
293  inca = a_rs;
294 
295  // If A is a row-major matrix, then we can use the underlying column-major
296  // BLAS implementation by fiddling with the parameters.
297  if ( bl1_is_row_storage( a_rs, a_cs ) )
298  {
299  bl1_swap_ints( m, n );
300  bl1_swap_ints( lda, inca );
301  bl1_toggle_trans( transa );
302  }
303 
304  // Initialize with values assuming no conjugation of x.
305  x_conj = x;
306  incx_conj = incx;
307 
308  // We need a temporary vector for the cases when x is conjugated, and
309  // also for the cases where A is conjugated.
310  if ( bl1_is_conj( conjx ) || bl1_is_conjnotrans( transa ) )
311  {
312  if ( bl1_does_trans( transa ) ) n_x = m;
313  else n_x = n;
314 
315  x_conj = bl1_zallocv( n_x );
316  incx_conj = 1;
317 
318  bl1_zcopyv( conjx,
319  n_x,
320  x, incx,
321  x_conj, incx_conj );
322  }
323 
324  // We want to handle the conjnotrans case, but without explicitly
325  // conjugating A. To do so, we leverage the fact that computing the
326  // product conj(A) * x is equivalent to computing conj( A * conj(x) ).
327  if ( bl1_is_conjnotrans( transa ) )
328  {
329  // We need a temporary vector for the product A * conj(x), which is
330  // conformal to y. We know we are not transposing, so y is length m.
331  ax = bl1_zallocv( m );
332  incax = 1;
333 
334  // Start by conjugating the contents of the temporary copy of x.
335  bl1_zconjv( n,
336  x_conj, incx_conj );
337 
338  // Compute A * conj(x) where x is the temporary copy of x created above.
340  m,
341  n,
342  &one,
343  a, lda,
344  x_conj, incx_conj,
345  &zero,
346  ax, incax );
347 
348  // Scale y by beta.
350  m,
351  beta,
352  y, incy );
353 
354  // And finally, accumulate alpha * conj( A * conj(x) ) into y.
356  m,
357  alpha,
358  ax, incax,
359  y, incy);
360 
361  // Free the temporary vector for Ax.
362  bl1_zfree( ax );
363  }
364  else // notrans, trans, or conjtrans
365  {
366  bl1_zgemv_blas( transa,
367  m,
368  n,
369  alpha,
370  a, lda,
371  x_conj, incx_conj,
372  beta,
373  y, incy );
374  }
375 
376  // Free the temporary conjugated x vector.
377  if ( bl1_is_conj( conjx ) || bl1_is_conjnotrans( transa ) )
378  bl1_zfree( x_conj );
379 
380  // Free the temporary contiguous matrix.
381  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
382  &a, &a_rs, &a_cs );
383 }
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:115
int bl1_is_conj(conj1_t conj)
Definition: bl1_is.c:42
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
int bl1_is_conjnotrans(trans1_t trans)
Definition: bl1_is.c:25
Definition: blis_type_defs.h:82
int bl1_is_row_storage(int rs, int cs)
Definition: bl1_is.c:95
void bl1_zgemv_blas(trans1_t transa, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:486
Definition: blis_type_defs.h:54
dcomplex * bl1_zallocv(unsigned int n_elem)
Definition: bl1_allocv.c:45
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
int bl1_does_trans(trans1_t trans)
Definition: bl1_does.c:13
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition: bl1_conjv.c:34

◆ bl1_zgemv_blas()

void bl1_zgemv_blas ( trans1_t  transa,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex x,
int  incx,
dcomplex beta,
dcomplex y,
int  incy 
)

References bl1_param_map_to_netlib_trans(), cblas_zgemv(), CblasColMajor, and F77_zgemv().

Referenced by bl1_zgemv().

487 {
488 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
489  enum CBLAS_ORDER cblas_order = CblasColMajor;
490  enum CBLAS_TRANSPOSE cblas_transa;
491 
492  bl1_param_map_to_netlib_trans( transa, &cblas_transa );
493 
494  cblas_zgemv( cblas_order,
495  cblas_transa,
496  m,
497  n,
498  alpha,
499  a, lda,
500  x, incx,
501  beta,
502  y, incy );
503 #else
504  char blas_transa;
505 
506  bl1_param_map_to_netlib_trans( transa, &blas_transa );
507 
508  F77_zgemv( &blas_transa,
509  &m,
510  &n,
511  alpha,
512  a, &lda,
513  x, &incx,
514  beta,
515  y, &incy );
516 #endif
517 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_zgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY)
void F77_zgemv(char *transa, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *x, int *incx, dcomplex *beta, dcomplex *y, int *incy)
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_prototypes_cblas.h:17