libflame  revision_anchor
Functions
blis_prototypes_level3.h File Reference

(r)

Go to the source code of this file.

Functions

void bl1_sgemm (trans1_t transa, trans1_t transb, int m, int k, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dgemm (trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_cgemm (trans1_t transa, trans1_t transb, int m, int k, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zgemm (trans1_t transa, trans1_t transb, int m, int k, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_sgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
 
void bl1_dgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
 
void bl1_cgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
 
void bl1_shemm (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dhemm (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_chemm (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zhemm (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_chemm_blas (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zhemm_blas (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
 
void bl1_sherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_cherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int a_rs, int a_cs, float *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int a_rs, int a_cs, double *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_cherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc)
 
void bl1_zherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc)
 
void bl1_sher2k (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dher2k (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_cher2k (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, float *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zher2k (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, double *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_cher2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, float *beta, scomplex *c, int ldc)
 
void bl1_zher2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, double *beta, dcomplex *c, int ldc)
 
void bl1_ssymm (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dsymm (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_csymm (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zsymm (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_ssymm_blas (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
 
void bl1_dsymm_blas (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
 
void bl1_csymm_blas (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zsymm_blas (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
 
void bl1_ssyrk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dsyrk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_csyrk (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zsyrk (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_ssyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int lda, float *beta, float *c, int ldc)
 
void bl1_dsyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int lda, double *beta, double *c, int ldc)
 
void bl1_csyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zsyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *beta, dcomplex *c, int ldc)
 
void bl1_ssyr2k (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dsyr2k (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_csyr2k (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zsyr2k (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_ssyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
 
void bl1_dsyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
 
void bl1_csyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zsyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
 
void bl1_strmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dtrmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_ctrmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_ztrmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_strmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb)
 
void bl1_dtrmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb)
 
void bl1_ctrmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb)
 
void bl1_ztrmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb)
 
void bl1_strsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dtrsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_ctrsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_ztrsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_strsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb)
 
void bl1_dtrsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb)
 
void bl1_ctrsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb)
 
void bl1_ztrsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb)
 
void bl1_strmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dtrmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_ctrmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_ztrmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_strsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dtrsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_ctrsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_ztrsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 

Function Documentation

◆ bl1_cgemm()

void bl1_cgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_cconjm(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cgemm_blas(), bl1_cscalm(), bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_zero_dim3(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Gemm_external().

536 {
537  int m_save = m;
538  int n_save = n;
539  scomplex* a_save = a;
540  scomplex* b_save = b;
541  scomplex* c_save = c;
542  int a_rs_save = a_rs;
543  int a_cs_save = a_cs;
544  int b_rs_save = b_rs;
545  int b_cs_save = b_cs;
546  int c_rs_save = c_rs;
547  int c_cs_save = c_cs;
548  scomplex zero = bl1_c0();
549  scomplex one = bl1_c1();
550  scomplex* a_unswap;
551  scomplex* b_unswap;
552  scomplex* a_conj;
553  scomplex* b_conj;
554  scomplex* c_trans;
555  int lda, inca;
556  int ldb, incb;
557  int ldc, incc;
558  int lda_conj, inca_conj;
559  int ldb_conj, incb_conj;
560  int ldc_trans, incc_trans;
561  int m_gemm, n_gemm;
562  int gemm_needs_axpyt = FALSE;
563  int a_was_copied;
564  int b_was_copied;
565 
566  // Return early if possible.
567  if ( bl1_zero_dim3( m, k, n ) )
568  {
570  m,
571  n,
572  beta,
573  c, c_rs, c_cs );
574  return;
575  }
576 
577  // If necessary, allocate, initialize, and use a temporary contiguous
578  // copy of each matrix rather than the original matrices.
579  bl1_ccreate_contigmt( transa,
580  m,
581  k,
582  a_save, a_rs_save, a_cs_save,
583  &a, &a_rs, &a_cs );
584 
585  bl1_ccreate_contigmt( transb,
586  k,
587  n,
588  b_save, b_rs_save, b_cs_save,
589  &b, &b_rs, &b_cs );
590 
592  n,
593  c_save, c_rs_save, c_cs_save,
594  &c, &c_rs, &c_cs );
595 
596  // Figure out whether A and/or B was copied to contiguous memory. This
597  // is used later to prevent redundant copying.
598  a_was_copied = ( a != a_save );
599  b_was_copied = ( b != b_save );
600 
601  // These are used to track the original values of a and b prior to any
602  // operand swapping that might take place. This is necessary for proper
603  // freeing of memory when one is a temporary contiguous matrix.
604  a_unswap = a;
605  b_unswap = b;
606 
607  // These are used to track the dimensions of the product of the
608  // A and B operands to the BLAS invocation of gemm. These differ
609  // from m and n when the operands need to be swapped.
610  m_gemm = m;
611  n_gemm = n;
612 
613  // Initialize with values assuming column-major storage.
614  lda = a_cs;
615  inca = a_rs;
616  ldb = b_cs;
617  incb = b_rs;
618  ldc = c_cs;
619  incc = c_rs;
620 
621  // Adjust the parameters based on the storage of each matrix.
622  if ( bl1_is_col_storage( c_rs, c_cs ) )
623  {
624  if ( bl1_is_col_storage( a_rs, a_cs ) )
625  {
626  if ( bl1_is_col_storage( b_rs, b_cs ) )
627  {
628  // requested operation: C_c += tr( A_c ) * tr( B_c )
629  // effective operation: C_c += tr( A_c ) * tr( B_c )
630  }
631  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
632  {
633 
634  // requested operation: C_c += tr( A_c ) * tr( B_r )
635  // effective operation: C_c += tr( A_c ) * tr( B_c )^T
636  bl1_swap_ints( ldb, incb );
637 
638  bl1_toggle_trans( transb );
639  }
640  }
641  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
642  {
643  if ( bl1_is_col_storage( b_rs, b_cs ) )
644  {
645  // requested operation: C_c += tr( A_r ) * tr( B_c )
646  // effective operation: C_c += tr( A_r )^T * tr( B_c )
647  bl1_swap_ints( lda, inca );
648 
649  bl1_toggle_trans( transa );
650  }
651  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
652  {
653  // requested operation: C_c += tr( A_r ) * tr( B_r )
654  // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
655  bl1_swap_ints( lda, inca );
656  bl1_swap_ints( ldb, incb );
657 
658  bl1_cswap_pointers( a, b );
659  bl1_swap_ints( a_was_copied, b_was_copied );
660  bl1_swap_ints( lda, ldb );
661  bl1_swap_ints( inca, incb );
662  bl1_swap_trans( transa, transb );
663 
664  gemm_needs_axpyt = TRUE;
665  bl1_swap_ints( m_gemm, n_gemm );
666  }
667  }
668  }
669  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
670  {
671  if ( bl1_is_col_storage( a_rs, a_cs ) )
672  {
673  if ( bl1_is_col_storage( b_rs, b_cs ) )
674  {
675  // requested operation: C_r += tr( A_c ) * tr( B_c )
676  // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
677  bl1_swap_ints( ldc, incc );
678 
679  bl1_swap_ints( m, n );
680 
681  gemm_needs_axpyt = TRUE;
682  }
683  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
684  {
685  // requested operation: C_r += tr( A_c ) * tr( B_r )
686  // effective operation: C_c += tr( B_c ) * tr( A_c )^T
687  bl1_swap_ints( ldc, incc );
688  bl1_swap_ints( ldb, incb );
689 
690  bl1_toggle_trans( transa );
691 
692  bl1_swap_ints( m, n );
693  bl1_swap_ints( m_gemm, n_gemm );
694  bl1_cswap_pointers( a, b );
695  bl1_swap_ints( a_was_copied, b_was_copied );
696  bl1_swap_ints( lda, ldb );
697  bl1_swap_ints( inca, incb );
698  bl1_swap_trans( transa, transb );
699  }
700  }
701  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
702  {
703  if ( bl1_is_col_storage( b_rs, b_cs ) )
704  {
705  // requested operation: C_r += tr( A_r ) * tr( B_c )
706  // effective operation: C_c += tr( B_c )^T * tr( A_c )
707  bl1_swap_ints( ldc, incc );
708  bl1_swap_ints( lda, inca );
709 
710  bl1_toggle_trans( transb );
711 
712  bl1_swap_ints( m, n );
713  bl1_swap_ints( m_gemm, n_gemm );
714  bl1_cswap_pointers( a, b );
715  bl1_swap_ints( a_was_copied, b_was_copied );
716  bl1_swap_ints( lda, ldb );
717  bl1_swap_ints( inca, incb );
718  bl1_swap_trans( transa, transb );
719  }
720  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
721  {
722  // requested operation: C_r += tr( A_r ) * tr( B_r )
723  // effective operation: C_c += tr( B_c ) * tr( A_c )
724  bl1_swap_ints( lda, inca );
725  bl1_swap_ints( ldb, incb );
726  bl1_swap_ints( ldc, incc );
727 
728  bl1_swap_ints( m, n );
729  bl1_swap_ints( m_gemm, n_gemm );
730  bl1_cswap_pointers( a, b );
731  bl1_swap_ints( a_was_copied, b_was_copied );
732  bl1_swap_ints( lda, ldb );
733  bl1_swap_ints( inca, incb );
734  bl1_swap_trans( transa, transb );
735  }
736  }
737  }
738 
739  // We need a temporary matrix for the case where A is conjugated.
740  a_conj = a;
741  lda_conj = lda;
742  inca_conj = inca;
743 
744  // If transa indicates conjugate-no-transpose and A was not already
745  // copied, then copy and conjugate it to a temporary matrix. Otherwise,
746  // if transa indicates conjugate-no-transpose and A was already copied,
747  // just conjugate it.
748  if ( bl1_is_conjnotrans( transa ) && !a_was_copied )
749  {
750  a_conj = bl1_callocm( m_gemm, k );
751  lda_conj = m_gemm;
752  inca_conj = 1;
753 
755  m_gemm,
756  k,
757  a, inca, lda,
758  a_conj, inca_conj, lda_conj );
759  }
760  else if ( bl1_is_conjnotrans( transa ) && a_was_copied )
761  {
762  bl1_cconjm( m_gemm,
763  k,
764  a_conj, inca_conj, lda_conj );
765  }
766 
767  // We need a temporary matrix for the case where B is conjugated.
768  b_conj = b;
769  ldb_conj = ldb;
770  incb_conj = incb;
771 
772  // If transb indicates conjugate-no-transpose and B was not already
773  // copied, then copy and conjugate it to a temporary matrix. Otherwise,
774  // if transb indicates conjugate-no-transpose and B was already copied,
775  // just conjugate it.
776  if ( bl1_is_conjnotrans( transb ) && !b_was_copied )
777  {
778  b_conj = bl1_callocm( k, n_gemm );
779  ldb_conj = k;
780  incb_conj = 1;
781 
783  k,
784  n_gemm,
785  b, incb, ldb,
786  b_conj, incb_conj, ldb_conj );
787  }
788  else if ( bl1_is_conjnotrans( transb ) && b_was_copied )
789  {
790  bl1_cconjm( k,
791  n_gemm,
792  b_conj, incb_conj, ldb_conj );
793  }
794 
795  // There are two cases where we need to perform the gemm and then axpy
796  // the result into C with a transposition. We handle those cases here.
797  if ( gemm_needs_axpyt )
798  {
799  // We need a temporary matrix for holding C^T. Notice that m and n
800  // represent the dimensions of C, while m_gemm and n_gemm are the
801  // dimensions of the actual product op(A)*op(B), which may be n-by-m
802  // since the operands may have been swapped.
803  c_trans = bl1_callocm( m_gemm, n_gemm );
804  ldc_trans = m_gemm;
805  incc_trans = 1;
806 
807  // Compute tr( A ) * tr( B ), where A and B may have been swapped
808  // to reference the other, and store the result in C_trans.
809  bl1_cgemm_blas( transa,
810  transb,
811  m_gemm,
812  n_gemm,
813  k,
814  alpha,
815  a_conj, lda_conj,
816  b_conj, ldb_conj,
817  &zero,
818  c_trans, ldc_trans );
819 
820  // Scale C by beta.
822  m,
823  n,
824  beta,
825  c, incc, ldc );
826 
827  // And finally, accumulate the matrix product in C_trans into C
828  // with a transpose.
830  m,
831  n,
832  &one,
833  c_trans, incc_trans, ldc_trans,
834  c, incc, ldc );
835 
836  // Free the temporary matrix for C.
837  bl1_cfree( c_trans );
838  }
839  else // no extra axpyt step needed
840  {
841  bl1_cgemm_blas( transa,
842  transb,
843  m_gemm,
844  n_gemm,
845  k,
846  alpha,
847  a_conj, lda_conj,
848  b_conj, ldb_conj,
849  beta,
850  c, ldc );
851  }
852 
853  if ( bl1_is_conjnotrans( transa ) && !a_was_copied )
854  bl1_cfree( a_conj );
855 
856  if ( bl1_is_conjnotrans( transb ) && !b_was_copied )
857  bl1_cfree( b_conj );
858 
859  // Free any temporary contiguous matrices, copying the result back to
860  // the original matrix.
861  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
862  &a_unswap, &a_rs, &a_cs );
863 
864  bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
865  &b_unswap, &b_rs, &b_cs );
866 
867  bl1_cfree_saved_contigm( m_save,
868  n_save,
869  c_save, c_rs_save, c_cs_save,
870  &c, &c_rs, &c_cs );
871 }
void bl1_cfree_saved_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:59
Definition: blis_type_defs.h:81
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:81
scomplex bl1_c0(void)
Definition: bl1_constants.c:125
int bl1_is_conjnotrans(trans1_t trans)
Definition: bl1_is.c:25
Definition: blis_type_defs.h:55
void bl1_ccreate_contigmt(trans1_t trans_dims, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:89
void bl1_caxpymt(trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:149
int bl1_zero_dim3(int m, int k, int n)
Definition: bl1_is.c:123
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_cconjm(int m, int n, scomplex *a, int a_rs, int a_cs)
Definition: bl1_conjm.c:23
Definition: blis_type_defs.h:56
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:215
Definition: blis_type_defs.h:132
void bl1_cscalm(conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:169
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_cgemm_blas(trans1_t transa, trans1_t transb, int m, int n, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
Definition: bl1_gemm.c:1295
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_cgemm_blas()

void bl1_cgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), cblas_cgemm(), CblasColMajor, and F77_cgemm().

Referenced by bl1_cgemm().

1296 {
1297 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1298  enum CBLAS_ORDER cblas_order = CblasColMajor;
1299  enum CBLAS_TRANSPOSE cblas_transa;
1300  enum CBLAS_TRANSPOSE cblas_transb;
1301 
1302  bl1_param_map_to_netlib_trans( transa, &cblas_transa );
1303  bl1_param_map_to_netlib_trans( transb, &cblas_transb );
1304 
1305  cblas_cgemm( cblas_order,
1306  cblas_transa,
1307  cblas_transb,
1308  m,
1309  n,
1310  k,
1311  alpha,
1312  a, lda,
1313  b, ldb,
1314  beta,
1315  c, ldc );
1316 #else
1317  char blas_transa;
1318  char blas_transb;
1319 
1320  bl1_param_map_to_netlib_trans( transa, &blas_transa );
1321  bl1_param_map_to_netlib_trans( transb, &blas_transb );
1322 
1323  F77_cgemm( &blas_transa,
1324  &blas_transb,
1325  &m,
1326  &n,
1327  &k,
1328  alpha,
1329  a, &lda,
1330  b, &ldb,
1331  beta,
1332  c, &ldc );
1333 #endif
1334 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
void F77_cgemm(char *transa, char *transb, int *m, int *n, int *k, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, scomplex *beta, scomplex *c, int *ldc)
Definition: blis_prototypes_cblas.h:17

◆ bl1_chemm()

void bl1_chemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_cconjmr(), bl1_ccopymrt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_chemm_blas(), bl1_cscalm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Hemm_external().

39 {
40  int m_save = m;
41  int n_save = n;
42  scomplex* a_save = a;
43  scomplex* b_save = b;
44  scomplex* c_save = c;
45  int a_rs_save = a_rs;
46  int a_cs_save = a_cs;
47  int b_rs_save = b_rs;
48  int b_cs_save = b_cs;
49  int c_rs_save = c_rs;
50  int c_cs_save = c_cs;
51  scomplex zero = bl1_c0();
52  scomplex one = bl1_c1();
53  scomplex* a_conj;
54  scomplex* b_copy;
55  scomplex* c_trans;
56  int dim_a;
57  int lda, inca;
58  int ldb, incb;
59  int ldc, incc;
60  int lda_conj, inca_conj;
61  int ldb_copy, incb_copy;
62  int ldc_trans, incc_trans;
63  int hemm_needs_conja = FALSE;
64  int hemm_needs_copyb = FALSE;
65  int hemm_needs_transb = FALSE;
66  int hemm_needs_axpyt = FALSE;
67  int a_was_copied;
68 
69  // Return early if possible.
70  if ( bl1_zero_dim2( m, n ) ) return;
71 
72  // If necessary, allocate, initialize, and use a temporary contiguous
73  // copy of each matrix rather than the original matrices.
74  bl1_set_dim_with_side( side, m, n, &dim_a );
76  dim_a,
77  dim_a,
78  a_save, a_rs_save, a_cs_save,
79  &a, &a_rs, &a_cs );
80 
82  n,
83  b_save, b_rs_save, b_cs_save,
84  &b, &b_rs, &b_cs );
85 
87  n,
88  c_save, c_rs_save, c_cs_save,
89  &c, &c_rs, &c_cs );
90 
91  // Figure out whether A was copied to contiguous memory. This is used to
92  // prevent redundant copying.
93  a_was_copied = ( a != a_save );
94 
95  // Initialize with values assuming column-major storage.
96  lda = a_cs;
97  inca = a_rs;
98  ldb = b_cs;
99  incb = b_rs;
100  ldc = c_cs;
101  incc = c_rs;
102 
103  // Adjust the parameters based on the storage of each matrix.
104  if ( bl1_is_col_storage( c_rs, c_cs ) )
105  {
106  if ( bl1_is_col_storage( a_rs, a_cs ) )
107  {
108  if ( bl1_is_col_storage( b_rs, b_cs ) )
109  {
110  // requested operation: C_c += uplo( A_c ) * B_c
111  // effective operation: C_c += uplo( A_c ) * B_c
112  }
113  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
114  {
115  // requested operation: C_c += uplo( A_c ) * B_r
116  // effective operation: C_c += uplo( A_c ) * B_c
117  hemm_needs_copyb = TRUE;
118  }
119  }
120  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
121  {
122  if ( bl1_is_col_storage( b_rs, b_cs ) )
123  {
124  // requested operation: C_c += uplo( A_r ) * B_c
125  // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
126  bl1_swap_ints( lda, inca );
127 
128  bl1_toggle_uplo( uplo );
129 
130  hemm_needs_conja = TRUE;
131  }
132  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
133  {
134  // requested operation: C_c += uplo( A_r ) * B_r
135  // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
136  bl1_swap_ints( lda, inca );
137  bl1_swap_ints( ldb, incb );
138 
139  bl1_toggle_side( side );
140  bl1_toggle_uplo( uplo );
141 
142  hemm_needs_axpyt = TRUE;
143  }
144  }
145  }
146  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
147  {
148  if ( bl1_is_col_storage( a_rs, a_cs ) )
149  {
150  if ( bl1_is_col_storage( b_rs, b_cs ) )
151  {
152  // requested operation: C_r += uplo( A_c ) * B_c
153  // effective operation: C_c += ( uplo( A_c ) * B_c )^T
154  bl1_swap_ints( ldc, incc );
155 
156  bl1_swap_ints( m, n );
157 
158  hemm_needs_axpyt = TRUE;
159  }
160  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
161  {
162  // requested operation: C_r += uplo( A_c ) * B_r
163  // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
164  bl1_swap_ints( ldc, incc );
165  bl1_swap_ints( ldb, incb );
166 
167  bl1_swap_ints( m, n );
168 
169  bl1_toggle_side( side );
170 
171  hemm_needs_conja = TRUE;
172  }
173  }
174  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
175  {
176  if ( bl1_is_col_storage( b_rs, b_cs ) )
177  {
178  // requested operation: C_r += uplo( A_r ) * B_c
179  // effective operation: C_c += B_c^T * ~uplo( A_c )
180  bl1_swap_ints( ldc, incc );
181  bl1_swap_ints( lda, inca );
182 
183  bl1_swap_ints( m, n );
184 
185  bl1_toggle_side( side );
186  bl1_toggle_uplo( uplo );
187 
188  hemm_needs_copyb = TRUE;
189  hemm_needs_transb = TRUE;
190  }
191  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
192  {
193  // requested operation: C_r += uplo( A_r ) * B_r
194  // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
195  bl1_swap_ints( ldc, incc );
196  bl1_swap_ints( lda, inca );
197  bl1_swap_ints( ldb, incb );
198 
199  bl1_swap_ints( m, n );
200 
201  bl1_toggle_uplo( uplo );
202  bl1_toggle_side( side );
203  }
204  }
205  }
206 
207  // We need a temporary matrix for the cases where A is conjugated.
208  a_conj = a;
209  lda_conj = lda;
210  inca_conj = inca;
211 
212  if ( hemm_needs_conja && !a_was_copied )
213  {
214  int dim_a;
215 
216  bl1_set_dim_with_side( side, m, n, &dim_a );
217 
218  a_conj = bl1_callocm( dim_a, dim_a );
219  lda_conj = dim_a;
220  inca_conj = 1;
221 
222  bl1_ccopymrt( uplo,
224  dim_a,
225  dim_a,
226  a, inca, lda,
227  a_conj, inca_conj, lda_conj );
228  }
229  else if ( hemm_needs_conja && a_was_copied )
230  {
231  int dim_a;
232 
233  bl1_set_dim_with_side( side, m, n, &dim_a );
234 
235  bl1_cconjmr( uplo,
236  dim_a,
237  dim_a,
238  a_conj, inca_conj, lda_conj );
239  }
240 
241  // We need a temporary matrix for the cases where B needs to be copied.
242  b_copy = b;
243  ldb_copy = ldb;
244  incb_copy = incb;
245 
246  // There are two cases where we need to make a copy of B: one where the
247  // copy's dimensions are transposed from the original B, and one where
248  // the dimensions are not swapped.
249  if ( hemm_needs_copyb )
250  {
251  trans1_t transb;
252 
253  // Set transb, which determines whether or not we need to copy from B
254  // as if it needs a transposition. If a transposition is needed, then
255  // m and n and have already been swapped. So in either case m
256  // represents the leading dimension of the copy.
257  if ( hemm_needs_transb ) transb = BLIS1_TRANSPOSE;
258  else transb = BLIS1_NO_TRANSPOSE;
259 
260  b_copy = bl1_callocm( m, n );
261  ldb_copy = m;
262  incb_copy = 1;
263 
264  bl1_ccopymt( transb,
265  m,
266  n,
267  b, incb, ldb,
268  b_copy, incb_copy, ldb_copy );
269  }
270 
271  // There are two cases where we need to perform the hemm and then axpy
272  // the result into C with a transposition. We handle those cases here.
273  if ( hemm_needs_axpyt )
274  {
275  // We need a temporary matrix for holding C^T. Notice that m and n
276  // represent the dimensions of C, and thus C_trans is n-by-m
277  // (interpreting both as column-major matrices). So the leading
278  // dimension of the temporary matrix holding C^T is n.
279  c_trans = bl1_callocm( n, m );
280  ldc_trans = n;
281  incc_trans = 1;
282 
283  // Compute A * B (or B * A) and store the result in C_trans.
284  // Note that there is no overlap between the axpyt cases and
285  // the conja/copyb cases, hence the use of a, b, lda, and ldb.
286  bl1_chemm_blas( side,
287  uplo,
288  n,
289  m,
290  alpha,
291  a, lda,
292  b, ldb,
293  &zero,
294  c_trans, ldc_trans );
295 
296  // Scale C by beta.
298  m,
299  n,
300  beta,
301  c, incc, ldc );
302 
303  // And finally, accumulate the matrix product in C_trans into C
304  // with a transpose.
306  m,
307  n,
308  &one,
309  c_trans, incc_trans, ldc_trans,
310  c, incc, ldc );
311 
312  // Free the temporary matrix for C.
313  bl1_cfree( c_trans );
314  }
315  else // no extra axpyt step needed
316  {
317  bl1_chemm_blas( side,
318  uplo,
319  m,
320  n,
321  alpha,
322  a_conj, lda_conj,
323  b_copy, ldb_copy,
324  beta,
325  c, ldc );
326  }
327 
328  if ( hemm_needs_conja && !a_was_copied )
329  bl1_cfree( a_conj );
330 
331  if ( hemm_needs_copyb )
332  bl1_cfree( b_copy );
333 
334  // Free any temporary contiguous matrices, copying the result back to
335  // the original matrix.
336  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
337  &a, &a_rs, &a_cs );
338 
339  bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
340  &b, &b_rs, &b_cs );
341 
342  bl1_cfree_saved_contigm( m_save,
343  n_save,
344  c_save, c_rs_save, c_cs_save,
345  &c, &c_rs, &c_cs );
346 }
void bl1_cfree_saved_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:59
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:81
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
scomplex bl1_c0(void)
Definition: bl1_constants.c:125
void bl1_chemm_blas(side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
Definition: bl1_hemm.c:660
trans1_t
Definition: blis_type_defs.h:52
Definition: blis_type_defs.h:55
void bl1_caxpymt(trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:149
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_cconjmr(uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs)
Definition: bl1_conjmr.c:23
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:215
Definition: blis_type_defs.h:132
void bl1_cscalm(conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:169
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_ccopymrt(uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymrt.c:223
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_chemm_blas()

void bl1_chemm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_chemm(), CblasColMajor, and F77_chemm().

Referenced by bl1_chemm().

661 {
662 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
663  enum CBLAS_ORDER cblas_order = CblasColMajor;
664  enum CBLAS_SIDE cblas_side;
665  enum CBLAS_UPLO cblas_uplo;
666 
667  bl1_param_map_to_netlib_side( side, &cblas_side );
668  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
669 
670  cblas_chemm( cblas_order,
671  cblas_side,
672  cblas_uplo,
673  m,
674  n,
675  alpha,
676  a, lda,
677  b, ldb,
678  beta,
679  c, ldc );
680 #else
681  char blas_side;
682  char blas_uplo;
683 
684  bl1_param_map_to_netlib_side( side, &blas_side );
685  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
686 
687  F77_chemm( &blas_side,
688  &blas_uplo,
689  &m,
690  &n,
691  alpha,
692  a, &lda,
693  b, &ldb,
694  beta,
695  c, &ldc );
696 #endif
697 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_chemm(char *side, char *uplo, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, scomplex *beta, scomplex *c, int *ldc)

◆ bl1_cher2k()

void bl1_cher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
float *  beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccopymt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cher2k_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_set_dims_with_trans(), bl1_zero_dim2(), BLIS1_CONJ_NO_TRANSPOSE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Her2k_external().

40 {
41  uplo1_t uplo_save = uplo;
42  int m_save = m;
43  scomplex* a_save = a;
44  scomplex* b_save = b;
45  scomplex* c_save = c;
46  int a_rs_save = a_rs;
47  int a_cs_save = a_cs;
48  int b_rs_save = b_rs;
49  int b_cs_save = b_cs;
50  int c_rs_save = c_rs;
51  int c_cs_save = c_cs;
52  float zero_r = bl1_s0();
53  scomplex one = bl1_c1();
54  scomplex alpha_copy;
55  scomplex* a_copy;
56  scomplex* b_copy;
57  scomplex* c_conj;
58  int lda, inca;
59  int ldb, incb;
60  int ldc, incc;
61  int lda_copy, inca_copy;
62  int ldb_copy, incb_copy;
63  int ldc_conj, incc_conj;
64  int her2k_needs_copya = FALSE;
65  int her2k_needs_copyb = FALSE;
66  int her2k_needs_conj = FALSE;
67  int her2k_needs_alpha_conj = FALSE;
68 
69  // Return early if possible.
70  if ( bl1_zero_dim2( m, k ) ) return;
71 
72  // If necessary, allocate, initialize, and use a temporary contiguous
73  // copy of each matrix rather than the original matrices.
74  bl1_ccreate_contigmt( trans,
75  m,
76  k,
77  a_save, a_rs_save, a_cs_save,
78  &a, &a_rs, &a_cs );
79 
80  bl1_ccreate_contigmt( trans,
81  m,
82  k,
83  b_save, b_rs_save, b_cs_save,
84  &b, &b_rs, &b_cs );
85 
87  m,
88  m,
89  c_save, c_rs_save, c_cs_save,
90  &c, &c_rs, &c_cs );
91 
92  // Initialize with values assuming column-major storage.
93  lda = a_cs;
94  inca = a_rs;
95  ldb = b_cs;
96  incb = b_rs;
97  ldc = c_cs;
98  incc = c_rs;
99 
100  // Adjust the parameters based on the storage of each matrix.
101  if ( bl1_is_col_storage( c_rs, c_cs ) )
102  {
103  if ( bl1_is_col_storage( a_rs, a_cs ) )
104  {
105  if ( bl1_is_col_storage( b_rs, b_cs ) )
106  {
107  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
108  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
109  }
110  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
111  {
112  // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
113  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
114  her2k_needs_copyb = TRUE;
115  }
116  }
117  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
118  {
119  if ( bl1_is_col_storage( b_rs, b_cs ) )
120  {
121  // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
122  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
123  her2k_needs_copya = TRUE;
124  }
125  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
126  {
127  // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
128  // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
129  bl1_swap_ints( lda, inca );
130  bl1_swap_ints( ldb, incb );
131 
132  bl1_toggle_conjtrans( trans );
133 
134  her2k_needs_conj = TRUE;
135  her2k_needs_alpha_conj = TRUE;
136  }
137  }
138  }
139  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
140  {
141  if ( bl1_is_col_storage( a_rs, a_cs ) )
142  {
143  if ( bl1_is_col_storage( b_rs, b_cs ) )
144  {
145  // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
146  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
147  bl1_swap_ints( ldc, incc );
148 
149  bl1_toggle_uplo( uplo );
150 
151  her2k_needs_conj = TRUE;
152  }
153  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
154  {
155  // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
156  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
157  her2k_needs_copyb = TRUE;
158 
159  bl1_swap_ints( ldc, incc );
160 
161  bl1_toggle_uplo( uplo );
162 
163  her2k_needs_conj = TRUE;
164  }
165  }
166  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
167  {
168  if ( bl1_is_col_storage( b_rs, b_cs ) )
169  {
170  // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
171  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
172  her2k_needs_copya = TRUE;
173 
174  bl1_swap_ints( ldc, incc );
175 
176  bl1_toggle_uplo( uplo );
177 
178  her2k_needs_conj = TRUE;
179  }
180  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
181  {
182  // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
183  // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
184  bl1_swap_ints( ldc, incc );
185  bl1_swap_ints( lda, inca );
186  bl1_swap_ints( ldb, incb );
187 
188  bl1_toggle_uplo( uplo );
189  bl1_toggle_conjtrans( trans );
190 
191  her2k_needs_alpha_conj = TRUE;
192  }
193  }
194  }
195 
196  // Make a copy of alpha and conjugate if necessary.
197  alpha_copy = *alpha;
198  if ( her2k_needs_alpha_conj )
199  {
200  bl1_zconjs( &alpha_copy );
201  }
202 
203  a_copy = a;
204  lda_copy = lda;
205  inca_copy = inca;
206 
207  // There are two cases where we need to copy A column-major storage.
208  // We handle those two cases here.
209  if ( her2k_needs_copya )
210  {
211  int m_a;
212  int n_a;
213 
214  // Determine the dimensions of A according to the value of trans. We
215  // need this in order to set the leading dimension of the copy of A.
216  bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );
217 
218  // We need a temporary matrix to hold a column-major copy of A.
219  a_copy = bl1_callocm( m, k );
220  lda_copy = m_a;
221  inca_copy = 1;
222 
223  // Copy the contents of A into A_copy.
225  m_a,
226  n_a,
227  a, inca, lda,
228  a_copy, inca_copy, lda_copy );
229  }
230 
231  b_copy = b;
232  ldb_copy = ldb;
233  incb_copy = incb;
234 
235  // There are two cases where we need to copy B column-major storage.
236  // We handle those two cases here.
237  if ( her2k_needs_copyb )
238  {
239  int m_b;
240  int n_b;
241 
242  // Determine the dimensions of B according to the value of trans. We
243  // need this in order to set the leading dimension of the copy of B.
244  bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );
245 
246  // We need a temporary matrix to hold a column-major copy of B.
247  b_copy = bl1_callocm( m, k );
248  ldb_copy = m_b;
249  incb_copy = 1;
250 
251  // Copy the contents of B into B_copy.
253  m_b,
254  n_b,
255  b, incb, ldb,
256  b_copy, incb_copy, ldb_copy );
257  }
258 
259  // There are two cases where we need to perform the rank-2k product and
260  // then axpy the result into C with a conjugation. We handle those two
261  // cases here.
262  if ( her2k_needs_conj )
263  {
264  // We need a temporary matrix for holding the rank-k product.
265  c_conj = bl1_callocm( m, m );
266  ldc_conj = m;
267  incc_conj = 1;
268 
269  // Compute the rank-2k product.
270  bl1_cher2k_blas( uplo,
271  trans,
272  m,
273  k,
274  &alpha_copy,
275  a_copy, lda_copy,
276  b_copy, ldb_copy,
277  &zero_r,
278  c_conj, ldc_conj );
279 
280  // Scale C by beta.
281  bl1_csscalmr( uplo,
282  m,
283  m,
284  beta,
285  c, incc, ldc );
286 
287  // And finally, accumulate the rank-2k product in C_conj into C
288  // with a conjugation.
289  bl1_caxpymrt( uplo,
291  m,
292  m,
293  &one,
294  c_conj, incc_conj, ldc_conj,
295  c, incc, ldc );
296 
297  // Free the temporary matrix for C.
298  bl1_cfree( c_conj );
299  }
300  else
301  {
302  bl1_cher2k_blas( uplo,
303  trans,
304  m,
305  k,
306  &alpha_copy,
307  a_copy, lda_copy,
308  b_copy, ldb_copy,
309  beta,
310  c, ldc );
311  }
312 
313  if ( her2k_needs_copya )
314  bl1_cfree( a_copy );
315 
316  if ( her2k_needs_copyb )
317  bl1_cfree( b_copy );
318 
319  // Free any temporary contiguous matrices, copying the result back to
320  // the original matrix.
321  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
322  &a, &a_rs, &a_cs );
323 
324  bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
325  &b, &b_rs, &b_cs );
326 
327  bl1_cfree_saved_contigmr( uplo_save,
328  m_save,
329  m_save,
330  c_save, c_rs_save, c_cs_save,
331  &c, &c_rs, &c_cs );
332 }
void bl1_csscalmr(uplo1_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
Definition: bl1_scalmr.c:125
uplo1_t
Definition: blis_type_defs.h:60
void bl1_cher2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, float *beta, scomplex *c, int ldc)
Definition: bl1_her2k.c:631
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
void bl1_ccreate_contigmt(trans1_t trans_dims, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:89
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_caxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymrt.c:227
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_set_dims_with_trans(trans1_t trans, int m, int n, int *m_new, int *n_new)
Definition: bl1_set_dims.c:13
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:215
Definition: blis_type_defs.h:132
void bl1_cfree_saved_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:59
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45
float bl1_s0(void)
Definition: bl1_constants.c:111

◆ bl1_cher2k_blas()

void bl1_cher2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
float *  beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cher2k(), CblasColMajor, and F77_cher2k().

Referenced by bl1_cher2k().

632 {
633 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
634  enum CBLAS_ORDER cblas_order = CblasColMajor;
635  enum CBLAS_UPLO cblas_uplo;
636  enum CBLAS_TRANSPOSE cblas_trans;
637 
638  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
639  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
640 
641  cblas_cher2k( cblas_order,
642  cblas_uplo,
643  cblas_trans,
644  m,
645  k,
646  alpha,
647  a, lda,
648  b, ldb,
649  *beta,
650  c, ldc );
651 #else
652  char blas_uplo;
653  char blas_trans;
654 
655  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
656  bl1_param_map_to_netlib_trans( trans, &blas_trans );
657 
658  F77_cher2k( &blas_uplo,
659  &blas_trans,
660  &m,
661  &k,
662  alpha,
663  a, &lda,
664  b, &ldb,
665  beta,
666  c, &ldc );
667 #endif
668 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const float beta, void *C, const int ldc)
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_cher2k(char *uplo, char *transa, int *n, int *k, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, float *beta, scomplex *c, int *ldc)

◆ bl1_cherk()

void bl1_cherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs,
float *  beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cherk_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opc_var1().

37 {
38  uplo1_t uplo_save = uplo;
39  int m_save = m;
40  scomplex* a_save = a;
41  scomplex* c_save = c;
42  int a_rs_save = a_rs;
43  int a_cs_save = a_cs;
44  int c_rs_save = c_rs;
45  int c_cs_save = c_cs;
46  float zero_r = bl1_s0();
47  scomplex one = bl1_c1();
48  scomplex* c_conj;
49  int lda, inca;
50  int ldc, incc;
51  int ldc_conj, incc_conj;
52  int herk_needs_conj = FALSE;
53 
54  // Return early if possible.
55  if ( bl1_zero_dim2( m, k ) ) return;
56 
57  // If necessary, allocate, initialize, and use a temporary contiguous
58  // copy of each matrix rather than the original matrices.
59  bl1_ccreate_contigmt( trans,
60  m,
61  k,
62  a_save, a_rs_save, a_cs_save,
63  &a, &a_rs, &a_cs );
64 
66  m,
67  m,
68  c_save, c_rs_save, c_cs_save,
69  &c, &c_rs, &c_cs );
70 
71  // Initialize with values assuming column-major storage.
72  lda = a_cs;
73  inca = a_rs;
74  ldc = c_cs;
75  incc = c_rs;
76 
77  // Adjust the parameters based on the storage of each matrix.
78  if ( bl1_is_col_storage( c_rs, c_cs ) )
79  {
80  if ( bl1_is_col_storage( a_rs, a_cs ) )
81  {
82  // requested operation: uplo( C_c ) += A_c * A_c'
83  // effective operation: uplo( C_c ) += A_c * A_c'
84  }
85  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
86  {
87  // requested operation: uplo( C_c ) += A_r * A_r'
88  // effective operation: uplo( C_c ) += conj( A_c' * A_c )
89  bl1_swap_ints( lda, inca );
90 
91  bl1_toggle_conjtrans( trans );
92 
93  herk_needs_conj = TRUE;
94  }
95  }
96  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
97  {
98  if ( bl1_is_col_storage( a_rs, a_cs ) )
99  {
100  // requested operation: uplo( C_r ) += A_c * A_c'
101  // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
102  bl1_swap_ints( ldc, incc );
103 
104  bl1_toggle_uplo( uplo );
105 
106  herk_needs_conj = TRUE;
107  }
108  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
109  {
110  // requested operation: uplo( C_r ) += A_r * A_r'
111  // effective operation: ~uplo( C_c ) += A_c' * A_c
112  bl1_swap_ints( ldc, incc );
113  bl1_swap_ints( lda, inca );
114 
115  bl1_toggle_uplo( uplo );
116  bl1_toggle_conjtrans( trans );
117  }
118  }
119 
120  // There are two cases where we need to perform the rank-k product and
121  // then axpy the result into C with a conjugation. We handle those two
122  // cases here.
123  if ( herk_needs_conj )
124  {
125  // We need a temporary matrix for holding the rank-k product.
126  c_conj = bl1_callocm( m, m );
127  ldc_conj = m;
128  incc_conj = 1;
129 
130  // Compute the rank-k product.
131  bl1_cherk_blas( uplo,
132  trans,
133  m,
134  k,
135  alpha,
136  a, lda,
137  &zero_r,
138  c_conj, ldc_conj );
139 
140  // Scale C by beta.
141  bl1_csscalmr( uplo,
142  m,
143  m,
144  beta,
145  c, incc, ldc );
146 
147  // And finally, accumulate the rank-k product in C_conj into C
148  // with a conjugation.
149  bl1_caxpymrt( uplo,
151  m,
152  m,
153  &one,
154  c_conj, incc_conj, ldc_conj,
155  c, incc, ldc );
156 
157  // Free the temporary matrix for C.
158  bl1_cfree( c_conj );
159  }
160  else
161  {
162  bl1_cherk_blas( uplo,
163  trans,
164  m,
165  k,
166  alpha,
167  a, lda,
168  beta,
169  c, ldc );
170  }
171 
172  // Free any temporary contiguous matrices, copying the result back to
173  // the original matrix.
174  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
175  &a, &a_rs, &a_cs );
176 
177  bl1_cfree_saved_contigmr( uplo_save,
178  m_save,
179  m_save,
180  c_save, c_rs_save, c_cs_save,
181  &c, &c_rs, &c_cs );
182 }
void bl1_csscalmr(uplo1_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
Definition: bl1_scalmr.c:125
uplo1_t
Definition: blis_type_defs.h:60
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
void bl1_ccreate_contigmt(trans1_t trans_dims, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:89
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_caxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymrt.c:227
void bl1_cherk_blas(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc)
Definition: bl1_herk.c:334
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:132
void bl1_cfree_saved_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:59
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45
float bl1_s0(void)
Definition: bl1_constants.c:111

◆ bl1_cherk_blas()

void bl1_cherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
scomplex a,
int  lda,
float *  beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cherk(), CblasColMajor, and F77_cherk().

Referenced by bl1_cherk().

335 {
336 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
337  enum CBLAS_ORDER cblas_order = CblasColMajor;
338  enum CBLAS_UPLO cblas_uplo;
339  enum CBLAS_TRANSPOSE cblas_trans;
340 
341  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
342  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
343 
344  cblas_cherk( cblas_order,
345  cblas_uplo,
346  cblas_trans,
347  m,
348  k,
349  *alpha,
350  a, lda,
351  *beta,
352  c, ldc );
353 #else
354  char blas_uplo;
355  char blas_trans;
356 
357  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
358  bl1_param_map_to_netlib_trans( trans, &blas_trans );
359 
360  F77_cherk( &blas_uplo,
361  &blas_trans,
362  &m,
363  &k,
364  alpha,
365  a, &lda,
366  beta,
367  c, &ldc );
368 #endif
369 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const void *A, const int lda, const float beta, void *C, const int ldc)
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void F77_cherk(char *uplo, char *transa, int *n, int *k, float *alpha, scomplex *a, int *lda, float *beta, scomplex *c, int *ldc)
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47

◆ bl1_csymm()

void bl1_csymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_csymm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Symm_external().

536 {
537  int m_save = m;
538  int n_save = n;
539  scomplex* a_save = a;
540  scomplex* b_save = b;
541  scomplex* c_save = c;
542  int a_rs_save = a_rs;
543  int a_cs_save = a_cs;
544  int b_rs_save = b_rs;
545  int b_cs_save = b_cs;
546  int c_rs_save = c_rs;
547  int c_cs_save = c_cs;
548  scomplex zero = bl1_c0();
549  scomplex one = bl1_c1();
550  scomplex* b_copy;
551  scomplex* c_trans;
552  int dim_a;
553  int lda, inca;
554  int ldb, incb;
555  int ldc, incc;
556  int ldb_copy, incb_copy;
557  int ldc_trans, incc_trans;
558  int symm_needs_copyb = FALSE;
559  int symm_needs_transb = FALSE;
560  int symm_needs_axpyt = FALSE;
561 
562  // Return early if possible.
563  if ( bl1_zero_dim2( m, n ) ) return;
564 
565  // If necessary, allocate, initialize, and use a temporary contiguous
566  // copy of each matrix rather than the original matrices.
567  bl1_set_dim_with_side( side, m, n, &dim_a );
568  bl1_ccreate_contigmr( uplo,
569  dim_a,
570  dim_a,
571  a_save, a_rs_save, a_cs_save,
572  &a, &a_rs, &a_cs );
573 
575  n,
576  b_save, b_rs_save, b_cs_save,
577  &b, &b_rs, &b_cs );
578 
580  n,
581  c_save, c_rs_save, c_cs_save,
582  &c, &c_rs, &c_cs );
583 
584  // Initialize with values assuming column-major storage.
585  lda = a_cs;
586  inca = a_rs;
587  ldb = b_cs;
588  incb = b_rs;
589  ldc = c_cs;
590  incc = c_rs;
591 
592  // Adjust the parameters based on the storage of each matrix.
593  if ( bl1_is_col_storage( c_rs, c_cs ) )
594  {
595  if ( bl1_is_col_storage( a_rs, a_cs ) )
596  {
597  if ( bl1_is_col_storage( b_rs, b_cs ) )
598  {
599  // requested operation: C_c += uplo( A_c ) * B_c
600  // effective operation: C_c += uplo( A_c ) * B_c
601  }
602  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
603  {
604  // requested operation: C_c += uplo( A_c ) * B_r
605  // effective operation: C_c += uplo( A_c ) * B_c
606  symm_needs_copyb = TRUE;
607  }
608  }
609  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
610  {
611  if ( bl1_is_col_storage( b_rs, b_cs ) )
612  {
613  // requested operation: C_c += uplo( A_r ) * B_c
614  // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
615  bl1_swap_ints( lda, inca );
616 
617  bl1_toggle_uplo( uplo );
618  }
619  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
620  {
621  // requested operation: C_c += uplo( A_r ) * B_r
622  // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
623  bl1_swap_ints( lda, inca );
624  bl1_swap_ints( ldb, incb );
625 
626  bl1_toggle_side( side );
627  bl1_toggle_uplo( uplo );
628 
629  symm_needs_axpyt = TRUE;
630  }
631  }
632  }
633  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
634  {
635  if ( bl1_is_col_storage( a_rs, a_cs ) )
636  {
637  if ( bl1_is_col_storage( b_rs, b_cs ) )
638  {
639  // requested operation: C_r += uplo( A_c ) * B_c
640  // effective operation: C_c += ( uplo( A_c ) * B_c )^T
641  bl1_swap_ints( ldc, incc );
642 
643  bl1_swap_ints( m, n );
644 
645  symm_needs_axpyt = TRUE;
646  }
647  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
648  {
649  // requested operation: C_r += uplo( A_c ) * B_r
650  // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
651  bl1_swap_ints( ldc, incc );
652  bl1_swap_ints( ldb, incb );
653 
654  bl1_swap_ints( m, n );
655 
656  bl1_toggle_side( side );
657  }
658  }
659  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
660  {
661  if ( bl1_is_col_storage( b_rs, b_cs ) )
662  {
663  // requested operation: C_r += uplo( A_r ) * B_c
664  // effective operation: C_c += B_c^T * ~uplo( A_c )
665  bl1_swap_ints( ldc, incc );
666  bl1_swap_ints( lda, inca );
667 
668  bl1_swap_ints( m, n );
669 
670  bl1_toggle_side( side );
671  bl1_toggle_uplo( uplo );
672 
673  symm_needs_copyb = TRUE;
674  symm_needs_transb = TRUE;
675  }
676  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
677  {
678  // requested operation: C_r += uplo( A_r ) * B_r
679  // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
680  bl1_swap_ints( ldc, incc );
681  bl1_swap_ints( lda, inca );
682  bl1_swap_ints( ldb, incb );
683 
684  bl1_swap_ints( m, n );
685 
686  bl1_toggle_uplo( uplo );
687  bl1_toggle_side( side );
688  }
689  }
690  }
691 
692  // We need a temporary matrix for the cases where B needs to be copied.
693  b_copy = b;
694  ldb_copy = ldb;
695  incb_copy = incb;
696 
697  // There are two cases where we need to make a copy of B: one where the
698  // copy's dimensions are transposed from the original B, and one where
699  // the dimensions are not swapped.
700  if ( symm_needs_copyb )
701  {
702  trans1_t transb;
703 
704  // Set transb, which determines whether or not we need to copy from B
705  // as if it needs a transposition. If a transposition is needed, then
706  // m and n and have already been swapped. So in either case m
707  // represents the leading dimension of the copy.
708  if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
709  else transb = BLIS1_NO_TRANSPOSE;
710 
711  b_copy = bl1_callocm( m, n );
712  ldb_copy = m;
713  incb_copy = 1;
714 
715  bl1_ccopymt( transb,
716  m,
717  n,
718  b, incb, ldb,
719  b_copy, incb_copy, ldb_copy );
720  }
721 
722  // There are two cases where we need to perform the symm and then axpy
723  // the result into C with a transposition. We handle those cases here.
724  if ( symm_needs_axpyt )
725  {
726  // We need a temporary matrix for holding C^T. Notice that m and n
727  // represent the dimensions of C, and thus C_trans is n-by-m
728  // (interpreting both as column-major matrices). So the leading
729  // dimension of the temporary matrix holding C^T is n.
730  c_trans = bl1_callocm( n, m );
731  ldc_trans = n;
732  incc_trans = 1;
733 
734  // Compute A * B (or B * A) and store the result in C_trans.
735  // Note that there is no overlap between the axpyt cases and
736  // the conja/copyb cases, hence the use of a, b, lda, and ldb.
737  bl1_csymm_blas( side,
738  uplo,
739  n,
740  m,
741  alpha,
742  a, lda,
743  b, ldb,
744  &zero,
745  c_trans, ldc_trans );
746 
747  // Scale C by beta.
749  m,
750  n,
751  beta,
752  c, incc, ldc );
753 
754  // And finally, accumulate the matrix product in C_trans into C
755  // with a transpose.
757  m,
758  n,
759  &one,
760  c_trans, incc_trans, ldc_trans,
761  c, incc, ldc );
762 
763  // Free the temporary matrix for C.
764  bl1_cfree( c_trans );
765  }
766  else // no extra axpyt step needed
767  {
768  bl1_csymm_blas( side,
769  uplo,
770  m,
771  n,
772  alpha,
773  a, lda,
774  b_copy, ldb_copy,
775  beta,
776  c, ldc );
777  }
778 
779  if ( symm_needs_copyb )
780  bl1_cfree( b_copy );
781 
782  // Free any temporary contiguous matrices, copying the result back to
783  // the original matrix.
784  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
785  &a, &a_rs, &a_cs );
786 
787  bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
788  &b, &b_rs, &b_cs );
789 
790  bl1_cfree_saved_contigm( m_save,
791  n_save,
792  c_save, c_rs_save, c_cs_save,
793  &c, &c_rs, &c_cs );
794 }
void bl1_cfree_saved_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:59
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:81
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
scomplex bl1_c0(void)
Definition: bl1_constants.c:125
trans1_t
Definition: blis_type_defs.h:52
Definition: blis_type_defs.h:55
void bl1_caxpymt(trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:149
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:54
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:215
Definition: blis_type_defs.h:132
void bl1_cscalm(conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:169
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_csymm_blas(side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
Definition: bl1_symm.c:1137
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_csymm_blas()

void bl1_csymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_csymm(), CblasColMajor, and F77_csymm().

Referenced by bl1_csymm().

1138 {
1139 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1140  enum CBLAS_ORDER cblas_order = CblasColMajor;
1141  enum CBLAS_SIDE cblas_side;
1142  enum CBLAS_UPLO cblas_uplo;
1143 
1144  bl1_param_map_to_netlib_side( side, &cblas_side );
1145  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
1146 
1147  cblas_csymm( cblas_order,
1148  cblas_side,
1149  cblas_uplo,
1150  m,
1151  n,
1152  alpha,
1153  a, lda,
1154  b, ldb,
1155  beta,
1156  c, ldc );
1157 #else
1158  char blas_side;
1159  char blas_uplo;
1160 
1161  bl1_param_map_to_netlib_side( side, &blas_side );
1162  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
1163 
1164  F77_csymm( &blas_side,
1165  &blas_uplo,
1166  &m,
1167  &n,
1168  alpha,
1169  a, &lda,
1170  b, &ldb,
1171  beta,
1172  c, &ldc );
1173 #endif
1174 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
void F77_csymm(char *side, char *uplo, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, scomplex *beta, scomplex *c, int *ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47

◆ bl1_csyr2k()

void bl1_csyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_callocm(), bl1_ccopymt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_csyr2k_blas(), bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Syr2k_external().

466 {
467  uplo1_t uplo_save = uplo;
468  int m_save = m;
469  scomplex* a_save = a;
470  scomplex* b_save = b;
471  scomplex* c_save = c;
472  int a_rs_save = a_rs;
473  int a_cs_save = a_cs;
474  int b_rs_save = b_rs;
475  int b_cs_save = b_cs;
476  int c_rs_save = c_rs;
477  int c_cs_save = c_cs;
478  scomplex* a_copy;
479  scomplex* b_copy;
480  int lda, inca;
481  int ldb, incb;
482  int ldc, incc;
483  int lda_copy, inca_copy;
484  int ldb_copy, incb_copy;
485  int syr2k_needs_copya = FALSE;
486  int syr2k_needs_copyb = FALSE;
487 
488  // Return early if possible.
489  if ( bl1_zero_dim2( m, k ) ) return;
490 
491  // If necessary, allocate, initialize, and use a temporary contiguous
492  // copy of each matrix rather than the original matrices.
493  bl1_ccreate_contigmt( trans,
494  m,
495  k,
496  a_save, a_rs_save, a_cs_save,
497  &a, &a_rs, &a_cs );
498 
499  bl1_ccreate_contigmt( trans,
500  m,
501  k,
502  b_save, b_rs_save, b_cs_save,
503  &b, &b_rs, &b_cs );
504 
505  bl1_ccreate_contigmr( uplo,
506  m,
507  m,
508  c_save, c_rs_save, c_cs_save,
509  &c, &c_rs, &c_cs );
510 
511  // Initialize with values assuming column-major storage.
512  lda = a_cs;
513  inca = a_rs;
514  ldb = b_cs;
515  incb = b_rs;
516  ldc = c_cs;
517  incc = c_rs;
518 
519  // Adjust the parameters based on the storage of each matrix.
520  if ( bl1_is_col_storage( c_rs, c_cs ) )
521  {
522  if ( bl1_is_col_storage( a_rs, a_cs ) )
523  {
524  if ( bl1_is_col_storage( b_rs, b_cs ) )
525  {
526  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
527  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
528  }
529  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
530  {
531  // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
532  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
533  syr2k_needs_copyb = TRUE;
534  }
535  }
536  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
537  {
538  if ( bl1_is_col_storage( b_rs, b_cs ) )
539  {
540  // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
541  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
542  syr2k_needs_copya = TRUE;
543  }
544  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
545  {
546  // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
547  // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
548  bl1_swap_ints( lda, inca );
549  bl1_swap_ints( ldb, incb );
550 
551  bl1_toggle_trans( trans );
552  }
553  }
554  }
555  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
556  {
557  if ( bl1_is_col_storage( a_rs, a_cs ) )
558  {
559  if ( bl1_is_col_storage( b_rs, b_cs ) )
560  {
561  // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
562  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
563  bl1_swap_ints( ldc, incc );
564 
565  bl1_toggle_uplo( uplo );
566  }
567  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
568  {
569  // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
570  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
571  syr2k_needs_copyb = TRUE;
572 
573  bl1_swap_ints( ldc, incc );
574 
575  bl1_toggle_uplo( uplo );
576  }
577  }
578  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
579  {
580  if ( bl1_is_col_storage( b_rs, b_cs ) )
581  {
582  // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
583  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
584  syr2k_needs_copya = TRUE;
585 
586  bl1_swap_ints( ldc, incc );
587 
588  bl1_toggle_uplo( uplo );
589  }
590  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
591  {
592  // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
593  // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
594  bl1_swap_ints( ldc, incc );
595  bl1_swap_ints( lda, inca );
596  bl1_swap_ints( ldb, incb );
597 
598  bl1_toggle_uplo( uplo );
599  bl1_toggle_trans( trans );
600  }
601  }
602  }
603 
604  a_copy = a;
605  lda_copy = lda;
606  inca_copy = inca;
607 
608  // There are two cases where we need to copy A column-major storage.
609  // We handle those two cases here.
610  if ( syr2k_needs_copya )
611  {
612  int m_a;
613  int n_a;
614 
615  // Determine the dimensions of A according to the value of trans. We
616  // need this in order to set the leading dimension of the copy of A.
617  bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );
618 
619  // We need a temporary matrix to hold a column-major copy of A.
620  a_copy = bl1_callocm( m, k );
621  lda_copy = m_a;
622  inca_copy = 1;
623 
624  // Copy the contents of A into A_copy.
626  m_a,
627  n_a,
628  a, inca, lda,
629  a_copy, inca_copy, lda_copy );
630  }
631 
632  b_copy = b;
633  ldb_copy = ldb;
634  incb_copy = incb;
635 
636  // There are two cases where we need to copy B column-major storage.
637  // We handle those two cases here.
638  if ( syr2k_needs_copyb )
639  {
640  int m_b;
641  int n_b;
642 
643  // Determine the dimensions of B according to the value of trans. We
644  // need this in order to set the leading dimension of the copy of B.
645  bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );
646 
647  // We need a temporary matrix to hold a column-major copy of B.
648  b_copy = bl1_callocm( m, k );
649  ldb_copy = m_b;
650  incb_copy = 1;
651 
652  // Copy the contents of B into B_copy.
654  m_b,
655  n_b,
656  b, incb, ldb,
657  b_copy, incb_copy, ldb_copy );
658  }
659 
660  bl1_csyr2k_blas( uplo,
661  trans,
662  m,
663  k,
664  alpha,
665  a_copy, lda_copy,
666  b_copy, ldb_copy,
667  beta,
668  c, ldc );
669 
670  if ( syr2k_needs_copya )
671  bl1_cfree( a_copy );
672 
673  if ( syr2k_needs_copyb )
674  bl1_cfree( b_copy );
675 
676  // Free any temporary contiguous matrices, copying the result back to
677  // the original matrix.
678  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
679  &a, &a_rs, &a_cs );
680 
681  bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
682  &b, &b_rs, &b_cs );
683 
684  bl1_cfree_saved_contigmr( uplo_save,
685  m_save,
686  m_save,
687  c_save, c_rs_save, c_cs_save,
688  &c, &c_rs, &c_cs );
689 }
uplo1_t
Definition: blis_type_defs.h:60
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
void bl1_ccreate_contigmt(trans1_t trans_dims, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:89
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:54
void bl1_set_dims_with_trans(trans1_t trans, int m, int n, int *m_new, int *n_new)
Definition: bl1_set_dims.c:13
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:215
Definition: blis_type_defs.h:132
void bl1_cfree_saved_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:59
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_csyr2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
Definition: bl1_syr2k.c:1013
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_csyr2k_blas()

void bl1_csyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_csyr2k(), CblasColMajor, and F77_csyr2k().

Referenced by bl1_csyr2k().

1014 {
1015 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1016  enum CBLAS_ORDER cblas_order = CblasColMajor;
1017  enum CBLAS_UPLO cblas_uplo;
1018  enum CBLAS_TRANSPOSE cblas_trans;
1019 
1020  // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
1021  // so we have to map it down to regular transposition.
1022  if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;
1023 
1024  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
1025  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
1026 
1027  cblas_csyr2k( cblas_order,
1028  cblas_uplo,
1029  cblas_trans,
1030  m,
1031  k,
1032  alpha,
1033  a, lda,
1034  b, ldb,
1035  beta,
1036  c, ldc );
1037 #else
1038  char blas_uplo;
1039  char blas_trans;
1040 
1041  // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
1042  // so we have to map it down to regular transposition.
1043  if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;
1044 
1045  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
1046  bl1_param_map_to_netlib_trans( trans, &blas_trans );
1047 
1048  F77_csyr2k( &blas_uplo,
1049  &blas_trans,
1050  &m,
1051  &k,
1052  alpha,
1053  a, &lda,
1054  b, &ldb,
1055  beta,
1056  c, &ldc );
1057 #endif
1058 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
int bl1_is_conjtrans(trans1_t trans)
Definition: bl1_is.c:30
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_type_defs.h:55
void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_csyr2k(char *uplo, char *transa, int *n, int *k, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, scomplex *beta, scomplex *c, int *ldc)

◆ bl1_csyrk()

void bl1_csyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_csyrk_blas(), bl1_is_col_storage(), and bl1_zero_dim2().

Referenced by FLA_Syrk_external().

206 {
207  uplo1_t uplo_save = uplo;
208  int m_save = m;
209  scomplex* a_save = a;
210  scomplex* c_save = c;
211  int a_rs_save = a_rs;
212  int a_cs_save = a_cs;
213  int c_rs_save = c_rs;
214  int c_cs_save = c_cs;
215  int lda, inca;
216  int ldc, incc;
217 
218  // Return early if possible.
219  if ( bl1_zero_dim2( m, k ) ) return;
220 
221  // If necessary, allocate, initialize, and use a temporary contiguous
222  // copy of each matrix rather than the original matrices.
223  bl1_ccreate_contigmt( trans,
224  m,
225  k,
226  a_save, a_rs_save, a_cs_save,
227  &a, &a_rs, &a_cs );
228 
229  bl1_ccreate_contigmr( uplo,
230  m,
231  m,
232  c_save, c_rs_save, c_cs_save,
233  &c, &c_rs, &c_cs );
234 
235  // Initialize with values assuming column-major storage.
236  lda = a_cs;
237  inca = a_rs;
238  ldc = c_cs;
239  incc = c_rs;
240 
241  // Adjust the parameters based on the storage of each matrix.
242  if ( bl1_is_col_storage( c_rs, c_cs ) )
243  {
244  if ( bl1_is_col_storage( a_rs, a_cs ) )
245  {
246  // requested operation: uplo( C_c ) += A_c * A_c^T
247  // effective operation: uplo( C_c ) += A_c * A_c^T
248  }
249  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
250  {
251  // requested operation: uplo( C_c ) += A_r * A_r^T
252  // effective operation: uplo( C_c ) += A_c^T * A_c
253  bl1_swap_ints( lda, inca );
254 
255  bl1_toggle_trans( trans );
256  }
257  }
258  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
259  {
260  if ( bl1_is_col_storage( a_rs, a_cs ) )
261  {
262  // requested operation: uplo( C_r ) += A_c * A_c^T
263  // effective operation: ~uplo( C_c ) += A_c * A_c^T
264  bl1_swap_ints( ldc, incc );
265 
266  bl1_toggle_uplo( uplo );
267  }
268  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
269  {
270  // requested operation: uplo( C_r ) += A_r * A_r^T
271  // effective operation: ~uplo( C_c ) += A_c^T * A_c
272  bl1_swap_ints( ldc, incc );
273  bl1_swap_ints( lda, inca );
274 
275  bl1_toggle_uplo( uplo );
276  bl1_toggle_trans( trans );
277  }
278  }
279 
280  bl1_csyrk_blas( uplo,
281  trans,
282  m,
283  k,
284  alpha,
285  a, lda,
286  beta,
287  c, ldc );
288 
289  // Free any temporary contiguous matrices, copying the result back to
290  // the original matrix.
291  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
292  &a, &a_rs, &a_cs );
293 
294  bl1_cfree_saved_contigmr( uplo_save,
295  m_save,
296  m_save,
297  c_save, c_rs_save, c_cs_save,
298  &c, &c_rs, &c_cs );
299 }
uplo1_t
Definition: blis_type_defs.h:60
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
void bl1_ccreate_contigmt(trans1_t trans_dims, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:89
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_csyrk_blas(uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *beta, scomplex *c, int ldc)
Definition: bl1_syrk.c:473
Definition: blis_type_defs.h:132
void bl1_cfree_saved_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:59
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_csyrk_blas()

void bl1_csyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_csyrk(), CblasColMajor, and F77_csyrk().

Referenced by bl1_csyrk().

474 {
475 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
476  enum CBLAS_ORDER cblas_order = CblasColMajor;
477  enum CBLAS_UPLO cblas_uplo;
478  enum CBLAS_TRANSPOSE cblas_trans;
479 
480  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
481  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
482 
483  cblas_csyrk( cblas_order,
484  cblas_uplo,
485  cblas_trans,
486  m,
487  k,
488  alpha,
489  a, lda,
490  beta,
491  c, ldc );
492 #else
493  char blas_uplo;
494  char blas_trans;
495 
496  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
497  bl1_param_map_to_netlib_trans( trans, &blas_trans );
498 
499  F77_csyrk( &blas_uplo,
500  &blas_trans,
501  &m,
502  &k,
503  alpha,
504  a, &lda,
505  beta,
506  c, &ldc );
507 #endif
508 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void F77_csyrk(char *uplo, char *transa, int *n, int *k, scomplex *alpha, scomplex *a, int *lda, scomplex *beta, scomplex *c, int *ldc)
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc)

◆ bl1_ctrmm()

void bl1_ctrmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_callocm(), bl1_cconjmr(), bl1_ccopymrt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_ctrmm_blas(), bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ctrmmsx(), and FLA_Trmm_external().

220 {
221  int m_save = m;
222  int n_save = n;
223  scomplex* a_save = a;
224  scomplex* b_save = b;
225  int a_rs_save = a_rs;
226  int a_cs_save = a_cs;
227  int b_rs_save = b_rs;
228  int b_cs_save = b_cs;
229  scomplex* a_conj;
230  int dim_a;
231  int lda, inca;
232  int ldb, incb;
233  int lda_conj, inca_conj;
234  int a_was_copied;
235 
236  // Return early if possible.
237  if ( bl1_zero_dim2( m, n ) ) return;
238 
239  // If necessary, allocate, initialize, and use a temporary contiguous
240  // copy of each matrix rather than the original matrices.
241  bl1_set_dim_with_side( side, m, n, &dim_a );
242  bl1_ccreate_contigmr( uplo,
243  dim_a,
244  dim_a,
245  a_save, a_rs_save, a_cs_save,
246  &a, &a_rs, &a_cs );
247 
249  n,
250  b_save, b_rs_save, b_cs_save,
251  &b, &b_rs, &b_cs );
252 
253  // Figure out whether A was copied to contiguous memory. This is used to
254  // prevent redundant copying.
255  a_was_copied = ( a != a_save );
256 
257  // Initialize with values assuming column-major storage.
258  lda = a_cs;
259  inca = a_rs;
260  ldb = b_cs;
261  incb = b_rs;
262 
263  // Adjust the parameters based on the storage of each matrix.
264  if ( bl1_is_col_storage( b_rs, b_cs ) )
265  {
266  if ( bl1_is_col_storage( a_rs, a_cs ) )
267  {
268  // requested operation: B_c := tr( uplo( A_c ) ) * B_c
269  // effective operation: B_c := tr( uplo( A_c ) ) * B_c
270  }
271  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
272  {
273  // requested operation: B_c := tr( uplo( A_r ) ) * B_c
274  // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
275  bl1_swap_ints( lda, inca );
276 
277  bl1_toggle_uplo( uplo );
278  bl1_toggle_trans( trans );
279  }
280  }
281  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
282  {
283  if ( bl1_is_col_storage( a_rs, a_cs ) )
284  {
285  // requested operation: B_r := tr( uplo( A_c ) ) * B_r
286  // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
287  bl1_swap_ints( ldb, incb );
288 
289  bl1_swap_ints( m, n );
290 
291  bl1_toggle_side( side );
292  bl1_toggle_trans( trans );
293  }
294  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
295  {
296  // requested operation: B_r := tr( uplo( A_r ) ) * B_r
297  // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
298  bl1_swap_ints( ldb, incb );
299  bl1_swap_ints( lda, inca );
300 
301  bl1_swap_ints( m, n );
302 
303  bl1_toggle_uplo( uplo );
304  bl1_toggle_side( side );
305  }
306  }
307 
308  // Initialize with values assuming that trans is not conjnotrans.
309  a_conj = a;
310  lda_conj = lda;
311  inca_conj = inca;
312 
313  // We want to handle the conjnotrans case. The easiest way to do so is
314  // by making a conjugated copy of A.
315  if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
316  {
317  int dim_a;
318 
319  bl1_set_dim_with_side( side, m, n, &dim_a );
320 
321  a_conj = bl1_callocm( dim_a, dim_a );
322  lda_conj = dim_a;
323  inca_conj = 1;
324 
325  bl1_ccopymrt( uplo,
327  dim_a,
328  dim_a,
329  a, inca, lda,
330  a_conj, inca_conj, lda_conj );
331  }
332  else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
333  {
334  int dim_a;
335 
336  bl1_set_dim_with_side( side, m, n, &dim_a );
337 
338  bl1_cconjmr( uplo,
339  dim_a,
340  dim_a,
341  a_conj, inca_conj, lda_conj );
342  }
343 
344 
345  bl1_ctrmm_blas( side,
346  uplo,
347  trans,
348  diag,
349  m,
350  n,
351  alpha,
352  a_conj, lda_conj,
353  b, ldb );
354 
355  if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
356  bl1_cfree( a_conj );
357 
358  // Free any temporary contiguous matrices, copying the result back to
359  // the original matrix.
360  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
361  &a, &a_rs, &a_cs );
362 
363  bl1_cfree_saved_contigm( m_save,
364  n_save,
365  b_save, b_rs_save, b_cs_save,
366  &b, &b_rs, &b_cs );
367 }
void bl1_cfree_saved_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:59
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:81
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
int bl1_is_conjnotrans(trans1_t trans)
Definition: bl1_is.c:25
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_cconjmr(uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs)
Definition: bl1_conjmr.c:23
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:132
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_ctrmm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb)
Definition: bl1_trmm.c:614
void bl1_ccopymrt(uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymrt.c:223
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_ctrmm_blas()

void bl1_ctrmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ctrmm(), CblasColMajor, and F77_ctrmm().

Referenced by bl1_ctrmm().

615 {
616 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
617  enum CBLAS_ORDER cblas_order = CblasColMajor;
618  enum CBLAS_SIDE cblas_side;
619  enum CBLAS_UPLO cblas_uplo;
620  enum CBLAS_TRANSPOSE cblas_trans;
621  enum CBLAS_DIAG cblas_diag;
622 
623  bl1_param_map_to_netlib_side( side, &cblas_side );
624  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
625  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
626  bl1_param_map_to_netlib_diag( diag, &cblas_diag );
627 
628  cblas_ctrmm( cblas_order,
629  cblas_side,
630  cblas_uplo,
631  cblas_trans,
632  cblas_diag,
633  m,
634  n,
635  alpha,
636  a, lda,
637  b, ldb );
638 #else
639  char blas_side;
640  char blas_uplo;
641  char blas_trans;
642  char blas_diag;
643 
644  bl1_param_map_to_netlib_side( side, &blas_side );
645  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
646  bl1_param_map_to_netlib_trans( trans, &blas_trans );
647  bl1_param_map_to_netlib_diag( diag, &blas_diag );
648 
649  F77_ctrmm( &blas_side,
650  &blas_uplo,
651  &blas_trans,
652  &blas_diag,
653  &m,
654  &n,
655  alpha,
656  a, &lda,
657  b, &ldb );
658 #endif
659 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_DIAG
Definition: blis_prototypes_cblas.h:20
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb)
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void F77_ctrmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb)
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void bl1_param_map_to_netlib_diag(diag1_t blis_diag, void *blas_diag)
Definition: bl1_param_map.c:95

◆ bl1_ctrmmsx()

void bl1_ctrmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_ctrmm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

226 {
227  int m_save = m;
228  int n_save = n;
229  scomplex* a_save = a;
230  scomplex* b_save = b;
231  scomplex* c_save = c;
232  int a_rs_save = a_rs;
233  int a_cs_save = a_cs;
234  int b_rs_save = b_rs;
235  int b_cs_save = b_cs;
236  int c_rs_save = c_rs;
237  int c_cs_save = c_cs;
238  scomplex one = bl1_c1();
239  scomplex* b_copy;
240  int dim_a;
241  int b_copy_rs, b_copy_cs;
242 
243  // Return early if possible.
244  if ( bl1_zero_dim2( m, n ) ) return;
245 
246  // If necessary, allocate, initialize, and use a temporary contiguous
247  // copy of each matrix rather than the original matrices.
248  bl1_set_dim_with_side( side, m, n, &dim_a );
249  bl1_ccreate_contigmr( uplo,
250  dim_a,
251  dim_a,
252  a_save, a_rs_save, a_cs_save,
253  &a, &a_rs, &a_cs );
254 
256  n,
257  b_save, b_rs_save, b_cs_save,
258  &b, &b_rs, &b_cs );
259 
261  n,
262  c_save, c_rs_save, c_cs_save,
263  &c, &c_rs, &c_cs );
264 
265  // Create a copy of B to use in the computation so the original matrix is
266  // left untouched.
267  b_copy = bl1_callocm( m, n );
268 
269  // Match the strides of B_copy to that of B.
270  if ( bl1_is_col_storage( b_rs, b_cs ) )
271  {
272  b_copy_rs = 1;
273  b_copy_cs = m;
274  }
275  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
276  {
277  b_copy_rs = n;
278  b_copy_cs = 1;
279  }
280 
281  // Copy the contents of B to B_copy.
283  m,
284  n,
285  b, b_rs, b_cs,
286  b_copy, b_copy_rs, b_copy_cs );
287 
288  // Perform the operation on B_copy.
289  bl1_ctrmm( side,
290  uplo,
291  trans,
292  diag,
293  m,
294  n,
295  alpha,
296  a, a_rs, a_cs,
297  b_copy, b_copy_rs, b_copy_cs );
298 
299  // Scale C by beta.
301  m,
302  n,
303  beta,
304  c, c_rs, c_cs );
305 
306  // Add B_copy into C.
308  m,
309  n,
310  &one,
311  b_copy, b_copy_rs, b_copy_cs,
312  c, c_rs, c_cs );
313 
314  // Free the copy of B.
315  bl1_cfree( b_copy );
316 
317  // Free any temporary contiguous matrices, copying the result back to
318  // the original matrix.
319  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
320  &a, &a_rs, &a_cs );
321 
322  bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
323  &b, &b_rs, &b_cs );
324 
325  bl1_cfree_saved_contigm( m_save,
326  n_save,
327  c_save, c_rs_save, c_cs_save,
328  &c, &c_rs, &c_cs );
329 }
void bl1_cfree_saved_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:59
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:81
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
void bl1_caxpymt(trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:149
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:54
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:215
Definition: blis_type_defs.h:132
void bl1_cscalm(conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:169
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_ctrmm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_trmm.c:219
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_ctrsm()

void bl1_ctrsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_callocm(), bl1_cconjmr(), bl1_ccopymrt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_ctrsm_blas(), bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ctrsmsx(), FLA_LU_nopiv_opc_var1(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_piv_opc_var3(), and FLA_Trsm_external().

220 {
221  int m_save = m;
222  int n_save = n;
223  scomplex* a_save = a;
224  scomplex* b_save = b;
225  int a_rs_save = a_rs;
226  int a_cs_save = a_cs;
227  int b_rs_save = b_rs;
228  int b_cs_save = b_cs;
229  scomplex* a_conj;
230  int dim_a;
231  int lda, inca;
232  int ldb, incb;
233  int lda_conj, inca_conj;
234  int a_was_copied;
235 
236  // Return early if possible.
237  if ( bl1_zero_dim2( m, n ) ) return;
238 
239  // If necessary, allocate, initialize, and use a temporary contiguous
240  // copy of each matrix rather than the original matrices.
241  bl1_set_dim_with_side( side, m, n, &dim_a );
242  bl1_ccreate_contigmr( uplo,
243  dim_a,
244  dim_a,
245  a_save, a_rs_save, a_cs_save,
246  &a, &a_rs, &a_cs );
247 
249  n,
250  b_save, b_rs_save, b_cs_save,
251  &b, &b_rs, &b_cs );
252 
253  // Figure out whether A was copied to contiguous memory. This is used to
254  // prevent redundant copying.
255  a_was_copied = ( a != a_save );
256 
257  // Initialize with values assuming column-major storage.
258  lda = a_cs;
259  inca = a_rs;
260  ldb = b_cs;
261  incb = b_rs;
262 
263  // Adjust the parameters based on the storage of each matrix.
264  if ( bl1_is_col_storage( b_rs, b_cs ) )
265  {
266  if ( bl1_is_col_storage( a_rs, a_cs ) )
267  {
268  // requested operation: B_c := tr( uplo( A_c ) ) * B_c
269  // effective operation: B_c := tr( uplo( A_c ) ) * B_c
270  }
271  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
272  {
273  // requested operation: B_c := tr( uplo( A_r ) ) * B_c
274  // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
275  bl1_swap_ints( lda, inca );
276 
277  bl1_toggle_uplo( uplo );
278  bl1_toggle_trans( trans );
279  }
280  }
281  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
282  {
283  if ( bl1_is_col_storage( a_rs, a_cs ) )
284  {
285  // requested operation: B_r := tr( uplo( A_c ) ) * B_r
286  // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
287  bl1_swap_ints( ldb, incb );
288 
289  bl1_swap_ints( m, n );
290 
291  bl1_toggle_side( side );
292  bl1_toggle_trans( trans );
293  }
294  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
295  {
296  // requested operation: B_r := tr( uplo( A_r ) ) * B_r
297  // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
298  bl1_swap_ints( ldb, incb );
299  bl1_swap_ints( lda, inca );
300 
301  bl1_swap_ints( m, n );
302 
303  bl1_toggle_uplo( uplo );
304  bl1_toggle_side( side );
305  }
306  }
307 
308  // Initialize with values assuming that trans is not conjnotrans.
309  a_conj = a;
310  lda_conj = lda;
311  inca_conj = inca;
312 
313  // We want to handle the conjnotrans case. The easiest way to do so is
314  // by making a conjugated copy of A.
315  if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
316  {
317  int dim_a;
318 
319  bl1_set_dim_with_side( side, m, n, &dim_a );
320 
321  a_conj = bl1_callocm( dim_a, dim_a );
322  lda_conj = dim_a;
323  inca_conj = 1;
324 
325  bl1_ccopymrt( uplo,
327  dim_a,
328  dim_a,
329  a, inca, lda,
330  a_conj, inca_conj, lda_conj );
331  }
332  else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
333  {
334  int dim_a;
335 
336  bl1_set_dim_with_side( side, m, n, &dim_a );
337 
338  bl1_cconjmr( uplo,
339  dim_a,
340  dim_a,
341  a_conj, inca_conj, lda_conj );
342  }
343 
344 
345  bl1_ctrsm_blas( side,
346  uplo,
347  trans,
348  diag,
349  m,
350  n,
351  alpha,
352  a_conj, lda_conj,
353  b, ldb );
354 
355  if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
356  bl1_cfree( a_conj );
357 
358  // Free any temporary contiguous matrices, copying the result back to
359  // the original matrix.
360  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
361  &a, &a_rs, &a_cs );
362 
363  bl1_cfree_saved_contigm( m_save,
364  n_save,
365  b_save, b_rs_save, b_cs_save,
366  &b, &b_rs, &b_cs );
367 }
void bl1_cfree_saved_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:59
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:81
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
int bl1_is_conjnotrans(trans1_t trans)
Definition: bl1_is.c:25
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_cconjmr(uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs)
Definition: bl1_conjmr.c:23
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:132
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_ctrsm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb)
Definition: bl1_trsm.c:614
void bl1_ccopymrt(uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymrt.c:223
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_ctrsm_blas()

void bl1_ctrsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ctrsm(), CblasColMajor, and F77_ctrsm().

Referenced by bl1_ctrsm().

615 {
616 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
617  enum CBLAS_ORDER cblas_order = CblasColMajor;
618  enum CBLAS_SIDE cblas_side;
619  enum CBLAS_UPLO cblas_uplo;
620  enum CBLAS_TRANSPOSE cblas_trans;
621  enum CBLAS_DIAG cblas_diag;
622 
623  bl1_param_map_to_netlib_side( side, &cblas_side );
624  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
625  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
626  bl1_param_map_to_netlib_diag( diag, &cblas_diag );
627 
628  cblas_ctrsm( cblas_order,
629  cblas_side,
630  cblas_uplo,
631  cblas_trans,
632  cblas_diag,
633  m,
634  n,
635  alpha,
636  a, lda,
637  b, ldb );
638 #else
639  char blas_side;
640  char blas_uplo;
641  char blas_trans;
642  char blas_diag;
643 
644  bl1_param_map_to_netlib_side( side, &blas_side );
645  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
646  bl1_param_map_to_netlib_trans( trans, &blas_trans );
647  bl1_param_map_to_netlib_diag( diag, &blas_diag );
648 
649  F77_ctrsm( &blas_side,
650  &blas_uplo,
651  &blas_trans,
652  &blas_diag,
653  &m,
654  &n,
655  alpha,
656  a, &lda,
657  b, &ldb );
658 #endif
659 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_DIAG
Definition: blis_prototypes_cblas.h:20
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
void F77_ctrsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb)
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb)
void bl1_param_map_to_netlib_diag(diag1_t blis_diag, void *blas_diag)
Definition: bl1_param_map.c:95

◆ bl1_ctrsmsx()

void bl1_ctrsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_ctrsm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

226 {
227  int m_save = m;
228  int n_save = n;
229  scomplex* a_save = a;
230  scomplex* b_save = b;
231  scomplex* c_save = c;
232  int a_rs_save = a_rs;
233  int a_cs_save = a_cs;
234  int b_rs_save = b_rs;
235  int b_cs_save = b_cs;
236  int c_rs_save = c_rs;
237  int c_cs_save = c_cs;
238  scomplex one = bl1_c1();
239  scomplex* b_copy;
240  int dim_a;
241  int b_copy_rs, b_copy_cs;
242 
243  // Return early if possible.
244  if ( bl1_zero_dim2( m, n ) ) return;
245 
246  // If necessary, allocate, initialize, and use a temporary contiguous
247  // copy of each matrix rather than the original matrices.
248  bl1_set_dim_with_side( side, m, n, &dim_a );
249  bl1_ccreate_contigmr( uplo,
250  dim_a,
251  dim_a,
252  a_save, a_rs_save, a_cs_save,
253  &a, &a_rs, &a_cs );
254 
256  n,
257  b_save, b_rs_save, b_cs_save,
258  &b, &b_rs, &b_cs );
259 
261  n,
262  c_save, c_rs_save, c_cs_save,
263  &c, &c_rs, &c_cs );
264 
265  // Create a copy of B to use in the computation so the original matrix is
266  // left untouched.
267  b_copy = bl1_callocm( m, n );
268 
269  // Match the strides of B_copy to that of B.
270  if ( bl1_is_col_storage( b_rs, b_cs ) )
271  {
272  b_copy_rs = 1;
273  b_copy_cs = m;
274  }
275  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
276  {
277  b_copy_rs = n;
278  b_copy_cs = 1;
279  }
280 
281  // Copy the contents of B to B_copy.
283  m,
284  n,
285  b, b_rs, b_cs,
286  b_copy, b_copy_rs, b_copy_cs );
287 
288  // Perform the operation on B_copy.
289  bl1_ctrsm( side,
290  uplo,
291  trans,
292  diag,
293  m,
294  n,
295  alpha,
296  a, a_rs, a_cs,
297  b_copy, b_copy_rs, b_copy_cs );
298 
299  // Scale C by beta.
301  m,
302  n,
303  beta,
304  c, c_rs, c_cs );
305 
306  // Add B_copy into C.
308  m,
309  n,
310  &one,
311  b_copy, b_copy_rs, b_copy_cs,
312  c, c_rs, c_cs );
313 
314  // Free the copy of B.
315  bl1_cfree( b_copy );
316 
317  // Free any temporary contiguous matrices, copying the result back to
318  // the original matrix.
319  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
320  &a, &a_rs, &a_cs );
321 
322  bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
323  &b, &b_rs, &b_cs );
324 
325  bl1_cfree_saved_contigm( m_save,
326  n_save,
327  c_save, c_rs_save, c_cs_save,
328  &c, &c_rs, &c_cs );
329 }
void bl1_cfree_saved_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:59
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:81
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
void bl1_caxpymt(trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:149
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_ctrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:219
Definition: blis_type_defs.h:54
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:215
Definition: blis_type_defs.h:132
void bl1_cscalm(conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:169
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_dgemm()

void bl1_dgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmt(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dgemm_blas(), bl1_dscalm(), bl1_is_col_storage(), bl1_zero_dim3(), BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var2(), FLA_Gemm_external(), FLA_Tevd_v_opd_var2(), and FLA_Tevd_v_opz_var2().

275 {
276  int m_save = m;
277  int n_save = n;
278  double* a_save = a;
279  double* b_save = b;
280  double* c_save = c;
281  int a_rs_save = a_rs;
282  int a_cs_save = a_cs;
283  int b_rs_save = b_rs;
284  int b_cs_save = b_cs;
285  int c_rs_save = c_rs;
286  int c_cs_save = c_cs;
287  double zero = bl1_d0();
288  double one = bl1_d1();
289  double* a_unswap;
290  double* b_unswap;
291  double* c_trans;
292  int lda, inca;
293  int ldb, incb;
294  int ldc, incc;
295  int ldc_trans, incc_trans;
296  int m_gemm, n_gemm;
297  int gemm_needs_axpyt = FALSE;
298 
299  // Return early if possible.
300  if ( bl1_zero_dim3( m, k, n ) )
301  {
303  m,
304  n,
305  beta,
306  c, c_rs, c_cs );
307  return;
308  }
309 
310  // If necessary, allocate, initialize, and use a temporary contiguous
311  // copy of each matrix rather than the original matrices.
312  bl1_dcreate_contigmt( transa,
313  m,
314  k,
315  a_save, a_rs_save, a_cs_save,
316  &a, &a_rs, &a_cs );
317 
318  bl1_dcreate_contigmt( transb,
319  k,
320  n,
321  b_save, b_rs_save, b_cs_save,
322  &b, &b_rs, &b_cs );
323 
325  n,
326  c_save, c_rs_save, c_cs_save,
327  &c, &c_rs, &c_cs );
328 
329  // These are used to track the original values of a and b prior to any
330  // operand swapping that might take place. This is necessary for proper
331  // freeing of memory when one is a temporary contiguous matrix.
332  a_unswap = a;
333  b_unswap = b;
334 
335  // These are used to track the dimensions of the product of the
336  // A and B operands to the BLAS invocation of gemm. These differ
337  // from m and n when the operands need to be swapped.
338  m_gemm = m;
339  n_gemm = n;
340 
341  // Initialize with values assuming column-major storage.
342  lda = a_cs;
343  inca = a_rs;
344  ldb = b_cs;
345  incb = b_rs;
346  ldc = c_cs;
347  incc = c_rs;
348 
349  // Adjust the parameters based on the storage of each matrix.
350  if ( bl1_is_col_storage( c_rs, c_cs ) )
351  {
352  if ( bl1_is_col_storage( a_rs, a_cs ) )
353  {
354  if ( bl1_is_col_storage( b_rs, b_cs ) )
355  {
356  // requested operation: C_c += tr( A_c ) * tr( B_c )
357  // effective operation: C_c += tr( A_c ) * tr( B_c )
358  }
359  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
360  {
361 
362  // requested operation: C_c += tr( A_c ) * tr( B_r )
363  // effective operation: C_c += tr( A_c ) * tr( B_c )^T
364  bl1_swap_ints( ldb, incb );
365 
366  bl1_toggle_trans( transb );
367  }
368  }
369  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
370  {
371  if ( bl1_is_col_storage( b_rs, b_cs ) )
372  {
373  // requested operation: C_c += tr( A_r ) * tr( B_c )
374  // effective operation: C_c += tr( A_r )^T * tr( B_c )
375  bl1_swap_ints( lda, inca );
376 
377  bl1_toggle_trans( transa );
378  }
379  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
380  {
381  // requested operation: C_c += tr( A_r ) * tr( B_r )
382  // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
383  bl1_swap_ints( lda, inca );
384  bl1_swap_ints( ldb, incb );
385 
386  bl1_dswap_pointers( a, b );
387  bl1_swap_ints( lda, ldb );
388  bl1_swap_ints( inca, incb );
389  bl1_swap_trans( transa, transb );
390 
391  gemm_needs_axpyt = TRUE;
392  bl1_swap_ints( m_gemm, n_gemm );
393  }
394  }
395  }
396  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
397  {
398  if ( bl1_is_col_storage( a_rs, a_cs ) )
399  {
400  if ( bl1_is_col_storage( b_rs, b_cs ) )
401  {
402  // requested operation: C_r += tr( A_c ) * tr( B_c )
403  // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
404  bl1_swap_ints( ldc, incc );
405 
406  bl1_swap_ints( m, n );
407 
408  gemm_needs_axpyt = TRUE;
409  }
410  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
411  {
412  // requested operation: C_r += tr( A_c ) * tr( B_r )
413  // effective operation: C_c += tr( B_c ) * tr( A_c )^T
414  bl1_swap_ints( ldc, incc );
415  bl1_swap_ints( ldb, incb );
416 
417  bl1_toggle_trans( transa );
418 
419  bl1_swap_ints( m, n );
420  bl1_swap_ints( m_gemm, n_gemm );
421  bl1_dswap_pointers( a, b );
422  bl1_swap_ints( lda, ldb );
423  bl1_swap_ints( inca, incb );
424  bl1_swap_trans( transa, transb );
425  }
426  }
427  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
428  {
429  if ( bl1_is_col_storage( b_rs, b_cs ) )
430  {
431  // requested operation: C_r += tr( A_r ) * tr( B_c )
432  // effective operation: C_c += tr( B_c )^T * tr( A_c )
433  bl1_swap_ints( ldc, incc );
434  bl1_swap_ints( lda, inca );
435 
436  bl1_toggle_trans( transb );
437 
438  bl1_swap_ints( m, n );
439  bl1_swap_ints( m_gemm, n_gemm );
440  bl1_dswap_pointers( a, b );
441  bl1_swap_ints( lda, ldb );
442  bl1_swap_ints( inca, incb );
443  bl1_swap_trans( transa, transb );
444  }
445  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
446  {
447  // requested operation: C_r += tr( A_r ) * tr( B_r )
448  // effective operation: C_c += tr( B_c ) * tr( A_c )
449  bl1_swap_ints( lda, inca );
450  bl1_swap_ints( ldb, incb );
451  bl1_swap_ints( ldc, incc );
452 
453  bl1_swap_ints( m, n );
454  bl1_swap_ints( m_gemm, n_gemm );
455  bl1_dswap_pointers( a, b );
456  bl1_swap_ints( lda, ldb );
457  bl1_swap_ints( inca, incb );
458  bl1_swap_trans( transa, transb );
459  }
460  }
461  }
462 
463  // There are two cases where we need to perform the gemm and then axpy
464  // the result into C with a transposition. We handle those cases here.
465  if ( gemm_needs_axpyt )
466  {
467  // We need a temporary matrix for holding C^T. Notice that m and n
468  // represent the dimensions of C, while m_gemm and n_gemm are the
469  // dimensions of the actual product op(A)*op(B), which may be n-by-m
470  // since the operands may have been swapped.
471  c_trans = bl1_dallocm( m_gemm, n_gemm );
472  ldc_trans = m_gemm;
473  incc_trans = 1;
474 
475  // Compute tr( A ) * tr( B ), where A and B may have been swapped
476  // to reference the other, and store the result in C_trans.
477  bl1_dgemm_blas( transa,
478  transb,
479  m_gemm,
480  n_gemm,
481  k,
482  alpha,
483  a, lda,
484  b, ldb,
485  &zero,
486  c_trans, ldc_trans );
487 
488  // Scale C by beta.
490  m,
491  n,
492  beta,
493  c, incc, ldc );
494 
495  // And finally, accumulate the matrix product in C_trans into C
496  // with a transpose.
498  m,
499  n,
500  &one,
501  c_trans, incc_trans, ldc_trans,
502  c, incc, ldc );
503 
504  // Free the temporary matrix for C.
505  bl1_dfree( c_trans );
506  }
507  else // no extra axpyt step needed
508  {
509  bl1_dgemm_blas( transa,
510  transb,
511  m_gemm,
512  n_gemm,
513  k,
514  alpha,
515  a, lda,
516  b, ldb,
517  beta,
518  c, ldc );
519  }
520 
521  // Free any temporary contiguous matrices, copying the result back to
522  // the original matrix.
523  bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
524  &a_unswap, &a_rs, &a_cs );
525 
526  bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
527  &b_unswap, &b_rs, &b_cs );
528 
529  bl1_dfree_saved_contigm( m_save,
530  n_save,
531  c_save, c_rs_save, c_cs_save,
532  &c, &c_rs, &c_cs );
533 }
Definition: blis_type_defs.h:81
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_dfree(double *p)
Definition: bl1_free.c:35
void bl1_dcreate_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:47
Definition: blis_type_defs.h:55
int bl1_zero_dim3(int m, int k, int n)
Definition: bl1_is.c:123
void bl1_dcreate_contigmt(trans1_t trans_dims, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:51
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_dfree_saved_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:36
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:29
double * bl1_dallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:35
void bl1_dgemm_blas(trans1_t transa, trans1_t transb, int m, int n, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
Definition: bl1_gemm.c:1254
void bl1_daxpymt(trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:81
double bl1_d1(void)
Definition: bl1_constants.c:54
void bl1_dscalm(conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:65

◆ bl1_dgemm_blas()

void bl1_dgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb,
double *  beta,
double *  c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), cblas_dgemm(), CblasColMajor, and F77_dgemm().

Referenced by bl1_dgemm().

1255 {
1256 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1257  enum CBLAS_ORDER cblas_order = CblasColMajor;
1258  enum CBLAS_TRANSPOSE cblas_transa;
1259  enum CBLAS_TRANSPOSE cblas_transb;
1260 
1261  bl1_param_map_to_netlib_trans( transa, &cblas_transa );
1262  bl1_param_map_to_netlib_trans( transb, &cblas_transb );
1263 
1264  cblas_dgemm( cblas_order,
1265  cblas_transa,
1266  cblas_transb,
1267  m,
1268  n,
1269  k,
1270  *alpha,
1271  a, lda,
1272  b, ldb,
1273  *beta,
1274  c, ldc );
1275 #else
1276  char blas_transa;
1277  char blas_transb;
1278 
1279  bl1_param_map_to_netlib_trans( transa, &blas_transa );
1280  bl1_param_map_to_netlib_trans( transb, &blas_transb );
1281 
1282  F77_dgemm( &blas_transa,
1283  &blas_transb,
1284  &m,
1285  &n,
1286  &k,
1287  alpha,
1288  a, &lda,
1289  b, &ldb,
1290  beta,
1291  c, &ldc );
1292 #endif
1293 }
void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc)
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_prototypes_cblas.h:17
void F77_dgemm(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)

◆ bl1_dhemm()

void bl1_dhemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dsymm().

26 {
27  bl1_dsymm( side,
28  uplo,
29  m,
30  n,
31  alpha,
32  a, a_rs, a_cs,
33  b, b_rs, b_cs,
34  beta,
35  c, c_rs, c_cs );
36 }
void bl1_dsymm(side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition: bl1_symm.c:274

◆ bl1_dher2k()

void bl1_dher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dsyr2k().

27 {
28  bl1_dsyr2k( uplo,
29  trans,
30  m,
31  k,
32  alpha,
33  a, a_rs, a_cs,
34  b, b_rs, b_cs,
35  beta,
36  c, c_rs, c_cs );
37 }
void bl1_dsyr2k(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition: bl1_syr2k.c:239

◆ bl1_dherk()

void bl1_dherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dsyrk().

25 {
26  bl1_dsyrk( uplo,
27  trans,
28  m,
29  k,
30  alpha,
31  a, a_rs, a_cs,
32  beta,
33  c, c_rs, c_cs );
34 }
void bl1_dsyrk(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
Definition: bl1_syrk.c:109

◆ bl1_dsymm()

void bl1_dsymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dsymm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by bl1_dhemm(), FLA_Hemm_external(), and FLA_Symm_external().

275 {
276  int m_save = m;
277  int n_save = n;
278  double* a_save = a;
279  double* b_save = b;
280  double* c_save = c;
281  int a_rs_save = a_rs;
282  int a_cs_save = a_cs;
283  int b_rs_save = b_rs;
284  int b_cs_save = b_cs;
285  int c_rs_save = c_rs;
286  int c_cs_save = c_cs;
287  double zero = bl1_d0();
288  double one = bl1_d1();
289  double* b_copy;
290  double* c_trans;
291  int dim_a;
292  int lda, inca;
293  int ldb, incb;
294  int ldc, incc;
295  int ldb_copy, incb_copy;
296  int ldc_trans, incc_trans;
297  int symm_needs_copyb = FALSE;
298  int symm_needs_transb = FALSE;
299  int symm_needs_axpyt = FALSE;
300 
301  // Return early if possible.
302  if ( bl1_zero_dim2( m, n ) ) return;
303 
304  // If necessary, allocate, initialize, and use a temporary contiguous
305  // copy of each matrix rather than the original matrices.
306  bl1_set_dim_with_side( side, m, n, &dim_a );
307  bl1_dcreate_contigmr( uplo,
308  dim_a,
309  dim_a,
310  a_save, a_rs_save, a_cs_save,
311  &a, &a_rs, &a_cs );
312 
314  n,
315  b_save, b_rs_save, b_cs_save,
316  &b, &b_rs, &b_cs );
317 
319  n,
320  c_save, c_rs_save, c_cs_save,
321  &c, &c_rs, &c_cs );
322 
323  // Initialize with values assuming column-major storage.
324  lda = a_cs;
325  inca = a_rs;
326  ldb = b_cs;
327  incb = b_rs;
328  ldc = c_cs;
329  incc = c_rs;
330 
331  // Adjust the parameters based on the storage of each matrix.
332  if ( bl1_is_col_storage( c_rs, c_cs ) )
333  {
334  if ( bl1_is_col_storage( a_rs, a_cs ) )
335  {
336  if ( bl1_is_col_storage( b_rs, b_cs ) )
337  {
338  // requested operation: C_c += uplo( A_c ) * B_c
339  // effective operation: C_c += uplo( A_c ) * B_c
340  }
341  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
342  {
343  // requested operation: C_c += uplo( A_c ) * B_r
344  // effective operation: C_c += uplo( A_c ) * B_c
345  symm_needs_copyb = TRUE;
346  }
347  }
348  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
349  {
350  if ( bl1_is_col_storage( b_rs, b_cs ) )
351  {
352  // requested operation: C_c += uplo( A_r ) * B_c
353  // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
354  bl1_swap_ints( lda, inca );
355 
356  bl1_toggle_uplo( uplo );
357  }
358  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
359  {
360  // requested operation: C_c += uplo( A_r ) * B_r
361  // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
362  bl1_swap_ints( lda, inca );
363  bl1_swap_ints( ldb, incb );
364 
365  bl1_toggle_side( side );
366  bl1_toggle_uplo( uplo );
367 
368  symm_needs_axpyt = TRUE;
369  }
370  }
371  }
372  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
373  {
374  if ( bl1_is_col_storage( a_rs, a_cs ) )
375  {
376  if ( bl1_is_col_storage( b_rs, b_cs ) )
377  {
378  // requested operation: C_r += uplo( A_c ) * B_c
379  // effective operation: C_c += ( uplo( A_c ) * B_c )^T
380  bl1_swap_ints( ldc, incc );
381 
382  bl1_swap_ints( m, n );
383 
384  symm_needs_axpyt = TRUE;
385  }
386  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
387  {
388  // requested operation: C_r += uplo( A_c ) * B_r
389  // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
390  bl1_swap_ints( ldc, incc );
391  bl1_swap_ints( ldb, incb );
392 
393  bl1_swap_ints( m, n );
394 
395  bl1_toggle_side( side );
396  }
397  }
398  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
399  {
400  if ( bl1_is_col_storage( b_rs, b_cs ) )
401  {
402  // requested operation: C_r += uplo( A_r ) * B_c
403  // effective operation: C_c += B_c^T * ~uplo( A_c )
404  bl1_swap_ints( ldc, incc );
405  bl1_swap_ints( lda, inca );
406 
407  bl1_swap_ints( m, n );
408 
409  bl1_toggle_side( side );
410  bl1_toggle_uplo( uplo );
411 
412  symm_needs_copyb = TRUE;
413  symm_needs_transb = TRUE;
414  }
415  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
416  {
417  // requested operation: C_r += uplo( A_r ) * B_r
418  // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
419  bl1_swap_ints( ldc, incc );
420  bl1_swap_ints( lda, inca );
421  bl1_swap_ints( ldb, incb );
422 
423  bl1_swap_ints( m, n );
424 
425  bl1_toggle_uplo( uplo );
426  bl1_toggle_side( side );
427  }
428  }
429  }
430 
431  // We need a temporary matrix for the cases where B needs to be copied.
432  b_copy = b;
433  ldb_copy = ldb;
434  incb_copy = incb;
435 
436  // There are two cases where we need to make a copy of B: one where the
437  // copy's dimensions are transposed from the original B, and one where
438  // the dimensions are not swapped.
439  if ( symm_needs_copyb )
440  {
441  trans1_t transb;
442 
443  // Set transb, which determines whether or not we need to copy from B
444  // as if it needs a transposition. If a transposition is needed, then
445  // m and n and have already been swapped. So in either case m
446  // represents the leading dimension of the copy.
447  if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
448  else transb = BLIS1_NO_TRANSPOSE;
449 
450  b_copy = bl1_dallocm( m, n );
451  ldb_copy = m;
452  incb_copy = 1;
453 
454  bl1_dcopymt( transb,
455  m,
456  n,
457  b, incb, ldb,
458  b_copy, incb_copy, ldb_copy );
459  }
460 
461  // There are two cases where we need to perform the symm and then axpy
462  // the result into C with a transposition. We handle those cases here.
463  if ( symm_needs_axpyt )
464  {
465  // We need a temporary matrix for holding C^T. Notice that m and n
466  // represent the dimensions of C, and thus C_trans is n-by-m
467  // (interpreting both as column-major matrices). So the leading
468  // dimension of the temporary matrix holding C^T is n.
469  c_trans = bl1_dallocm( n, m );
470  ldc_trans = n;
471  incc_trans = 1;
472 
473  // Compute A * B (or B * A) and store the result in C_trans.
474  // Note that there is no overlap between the axpyt cases and
475  // the conja/copyb cases, hence the use of a, b, lda, and ldb.
476  bl1_dsymm_blas( side,
477  uplo,
478  n,
479  m,
480  alpha,
481  a, lda,
482  b, ldb,
483  &zero,
484  c_trans, ldc_trans );
485 
486  // Scale C by beta.
488  m,
489  n,
490  beta,
491  c, incc, ldc );
492 
493  // And finally, accumulate the matrix product in C_trans into C
494  // with a transpose.
496  m,
497  n,
498  &one,
499  c_trans, incc_trans, ldc_trans,
500  c, incc, ldc );
501 
502  // Free the temporary matrix for C.
503  bl1_dfree( c_trans );
504  }
505  else // no extra axpyt step needed
506  {
507  bl1_dsymm_blas( side,
508  uplo,
509  m,
510  n,
511  alpha,
512  a, lda,
513  b_copy, ldb_copy,
514  beta,
515  c, ldc );
516  }
517 
518  if ( symm_needs_copyb )
519  bl1_dfree( b_copy );
520 
521  // Free any temporary contiguous matrices, copying the result back to
522  // the original matrix.
523  bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
524  &a, &a_rs, &a_cs );
525 
526  bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
527  &b, &b_rs, &b_cs );
528 
529  bl1_dfree_saved_contigm( m_save,
530  n_save,
531  c_save, c_rs_save, c_cs_save,
532  &c, &c_rs, &c_cs );
533 }
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_dfree(double *p)
Definition: bl1_free.c:35
void bl1_dsymm_blas(side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
Definition: bl1_symm.c:1098
void bl1_dcreate_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:47
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:148
trans1_t
Definition: blis_type_defs.h:52
Definition: blis_type_defs.h:55
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:54
void bl1_dfree_saved_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:36
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:29
double * bl1_dallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:35
void bl1_daxpymt(trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:81
void bl1_dcreate_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:45
double bl1_d1(void)
Definition: bl1_constants.c:54
void bl1_dscalm(conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:65

◆ bl1_dsymm_blas()

void bl1_dsymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb,
double *  beta,
double *  c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_dsymm(), CblasColMajor, and F77_dsymm().

Referenced by bl1_dsymm().

1099 {
1100 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1101  enum CBLAS_ORDER cblas_order = CblasColMajor;
1102  enum CBLAS_SIDE cblas_side;
1103  enum CBLAS_UPLO cblas_uplo;
1104 
1105  bl1_param_map_to_netlib_side( side, &cblas_side );
1106  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
1107 
1108  cblas_dsymm( cblas_order,
1109  cblas_side,
1110  cblas_uplo,
1111  m,
1112  n,
1113  *alpha,
1114  a, lda,
1115  b, ldb,
1116  *beta,
1117  c, ldc );
1118 #else
1119  char blas_side;
1120  char blas_uplo;
1121 
1122  bl1_param_map_to_netlib_side( side, &blas_side );
1123  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
1124 
1125  F77_dsymm( &blas_side,
1126  &blas_uplo,
1127  &m,
1128  &n,
1129  alpha,
1130  a, &lda,
1131  b, &ldb,
1132  beta,
1133  c, &ldc );
1134 #endif
1135 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_dsymm(char *side, char *uplo, int *m, int *n, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)

◆ bl1_dsyr2k()

void bl1_dsyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dallocm(), bl1_dcopymt(), bl1_dcreate_contigmr(), bl1_dcreate_contigmt(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigmr(), bl1_dsyr2k_blas(), bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dher2k(), FLA_Her2k_external(), and FLA_Syr2k_external().

240 {
241  uplo1_t uplo_save = uplo;
242  int m_save = m;
243  double* a_save = a;
244  double* b_save = b;
245  double* c_save = c;
246  int a_rs_save = a_rs;
247  int a_cs_save = a_cs;
248  int b_rs_save = b_rs;
249  int b_cs_save = b_cs;
250  int c_rs_save = c_rs;
251  int c_cs_save = c_cs;
252  double* a_copy;
253  double* b_copy;
254  int lda, inca;
255  int ldb, incb;
256  int ldc, incc;
257  int lda_copy, inca_copy;
258  int ldb_copy, incb_copy;
259  int syr2k_needs_copya = FALSE;
260  int syr2k_needs_copyb = FALSE;
261 
262  // Return early if possible.
263  if ( bl1_zero_dim2( m, k ) ) return;
264 
265  // If necessary, allocate, initialize, and use a temporary contiguous
266  // copy of each matrix rather than the original matrices.
267  bl1_dcreate_contigmt( trans,
268  m,
269  k,
270  a_save, a_rs_save, a_cs_save,
271  &a, &a_rs, &a_cs );
272 
273  bl1_dcreate_contigmt( trans,
274  m,
275  k,
276  b_save, b_rs_save, b_cs_save,
277  &b, &b_rs, &b_cs );
278 
279  bl1_dcreate_contigmr( uplo,
280  m,
281  m,
282  c_save, c_rs_save, c_cs_save,
283  &c, &c_rs, &c_cs );
284 
285  // Initialize with values assuming column-major storage.
286  lda = a_cs;
287  inca = a_rs;
288  ldb = b_cs;
289  incb = b_rs;
290  ldc = c_cs;
291  incc = c_rs;
292 
293  // Adjust the parameters based on the storage of each matrix.
294  if ( bl1_is_col_storage( c_rs, c_cs ) )
295  {
296  if ( bl1_is_col_storage( a_rs, a_cs ) )
297  {
298  if ( bl1_is_col_storage( b_rs, b_cs ) )
299  {
300  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
301  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
302  }
303  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
304  {
305  // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
306  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
307  syr2k_needs_copyb = TRUE;
308  }
309  }
310  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
311  {
312  if ( bl1_is_col_storage( b_rs, b_cs ) )
313  {
314  // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
315  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
316  syr2k_needs_copya = TRUE;
317  }
318  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
319  {
320  // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
321  // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
322  bl1_swap_ints( lda, inca );
323  bl1_swap_ints( ldb, incb );
324 
325  bl1_toggle_trans( trans );
326  }
327  }
328  }
329  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
330  {
331  if ( bl1_is_col_storage( a_rs, a_cs ) )
332  {
333  if ( bl1_is_col_storage( b_rs, b_cs ) )
334  {
335  // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
336  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
337  bl1_swap_ints( ldc, incc );
338 
339  bl1_toggle_uplo( uplo );
340  }
341  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
342  {
343  // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
344  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
345  syr2k_needs_copyb = TRUE;
346 
347  bl1_swap_ints( ldc, incc );
348 
349  bl1_toggle_uplo( uplo );
350  }
351  }
352  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
353  {
354  if ( bl1_is_col_storage( b_rs, b_cs ) )
355  {
356  // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
357  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
358  syr2k_needs_copya = TRUE;
359 
360  bl1_swap_ints( ldc, incc );
361 
362  bl1_toggle_uplo( uplo );
363  }
364  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
365  {
366  // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
367  // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
368  bl1_swap_ints( ldc, incc );
369  bl1_swap_ints( lda, inca );
370  bl1_swap_ints( ldb, incb );
371 
372  bl1_toggle_uplo( uplo );
373  bl1_toggle_trans( trans );
374  }
375  }
376  }
377 
378  a_copy = a;
379  lda_copy = lda;
380  inca_copy = inca;
381 
382  // There are two cases where we need to copy A column-major storage.
383  // We handle those two cases here.
384  if ( syr2k_needs_copya )
385  {
386  int m_a;
387  int n_a;
388 
389  // Determine the dimensions of A according to the value of trans. We
390  // need this in order to set the leading dimension of the copy of A.
391  bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );
392 
393  // We need a temporary matrix to hold a column-major copy of A.
394  a_copy = bl1_dallocm( m, k );
395  lda_copy = m_a;
396  inca_copy = 1;
397 
398  // Copy the contents of A into A_copy.
400  m_a,
401  n_a,
402  a, inca, lda,
403  a_copy, inca_copy, lda_copy );
404  }
405 
406  b_copy = b;
407  ldb_copy = ldb;
408  incb_copy = incb;
409 
410  // There are two cases where we need to copy B column-major storage.
411  // We handle those two cases here.
412  if ( syr2k_needs_copyb )
413  {
414  int m_b;
415  int n_b;
416 
417  // Determine the dimensions of B according to the value of trans. We
418  // need this in order to set the leading dimension of the copy of B.
419  bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );
420 
421  // We need a temporary matrix to hold a column-major copy of B.
422  b_copy = bl1_dallocm( m, k );
423  ldb_copy = m_b;
424  incb_copy = 1;
425 
426  // Copy the contents of B into B_copy.
428  m_b,
429  n_b,
430  b, incb, ldb,
431  b_copy, incb_copy, ldb_copy );
432  }
433 
434  bl1_dsyr2k_blas( uplo,
435  trans,
436  m,
437  k,
438  alpha,
439  a_copy, lda_copy,
440  b_copy, ldb_copy,
441  beta,
442  c, ldc );
443 
444  if ( syr2k_needs_copya )
445  bl1_dfree( a_copy );
446 
447  if ( syr2k_needs_copyb )
448  bl1_dfree( b_copy );
449 
450  // Free any temporary contiguous matrices, copying the result back to
451  // the original matrix.
452  bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
453  &a, &a_rs, &a_cs );
454 
455  bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
456  &b, &b_rs, &b_cs );
457 
458  bl1_dfree_saved_contigmr( uplo_save,
459  m_save,
460  m_save,
461  c_save, c_rs_save, c_cs_save,
462  &c, &c_rs, &c_cs );
463 }
uplo1_t
Definition: blis_type_defs.h:60
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_dfree(double *p)
Definition: bl1_free.c:35
void bl1_dfree_saved_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:36
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:148
void bl1_dsyr2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
Definition: bl1_syr2k.c:966
void bl1_dcreate_contigmt(trans1_t trans_dims, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:51
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:54
void bl1_set_dims_with_trans(trans1_t trans, int m, int n, int *m_new, int *n_new)
Definition: bl1_set_dims.c:13
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:29
double * bl1_dallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:35
void bl1_dcreate_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:45

◆ bl1_dsyr2k_blas()

void bl1_dsyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb,
double *  beta,
double *  c,
int  ldc 
)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_dsyr2k(), CblasColMajor, and F77_dsyr2k().

Referenced by bl1_dsyr2k().

967 {
968 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
969  enum CBLAS_ORDER cblas_order = CblasColMajor;
970  enum CBLAS_UPLO cblas_uplo;
971  enum CBLAS_TRANSPOSE cblas_trans;
972 
973  // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
974  // so we have to map it down to regular transposition.
975  if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;
976 
977  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
978  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
979 
980  cblas_dsyr2k( cblas_order,
981  cblas_uplo,
982  cblas_trans,
983  m,
984  k,
985  *alpha,
986  a, lda,
987  b, ldb,
988  *beta,
989  c, ldc );
990 #else
991  char blas_uplo;
992  char blas_trans;
993 
994  // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
995  // so we have to map it down to regular transposition.
996  if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;
997 
998  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
999  bl1_param_map_to_netlib_trans( trans, &blas_trans );
1000 
1001  F77_dsyr2k( &blas_uplo,
1002  &blas_trans,
1003  &m,
1004  &k,
1005  alpha,
1006  a, &lda,
1007  b, &ldb,
1008  beta,
1009  c, &ldc );
1010 #endif
1011 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
int bl1_is_conjtrans(trans1_t trans)
Definition: bl1_is.c:30
void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc)
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_type_defs.h:55
void F77_dsyr2k(char *uplo, char *transa, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47

◆ bl1_dsyrk()

void bl1_dsyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dcreate_contigmr(), bl1_dcreate_contigmt(), bl1_dfree_contigm(), bl1_dfree_saved_contigmr(), bl1_dsyrk_blas(), bl1_is_col_storage(), and bl1_zero_dim2().

Referenced by bl1_dherk(), FLA_Herk_external(), FLA_Syrk_external(), and FLA_UDdate_UT_opd_var1().

110 {
111  uplo1_t uplo_save = uplo;
112  int m_save = m;
113  double* a_save = a;
114  double* c_save = c;
115  int a_rs_save = a_rs;
116  int a_cs_save = a_cs;
117  int c_rs_save = c_rs;
118  int c_cs_save = c_cs;
119  int lda, inca;
120  int ldc, incc;
121 
122  // Return early if possible.
123  if ( bl1_zero_dim2( m, k ) ) return;
124 
125  // If necessary, allocate, initialize, and use a temporary contiguous
126  // copy of each matrix rather than the original matrices.
127  bl1_dcreate_contigmt( trans,
128  m,
129  k,
130  a_save, a_rs_save, a_cs_save,
131  &a, &a_rs, &a_cs );
132 
133  bl1_dcreate_contigmr( uplo,
134  m,
135  m,
136  c_save, c_rs_save, c_cs_save,
137  &c, &c_rs, &c_cs );
138 
139  // Initialize with values assuming column-major storage.
140  lda = a_cs;
141  inca = a_rs;
142  ldc = c_cs;
143  incc = c_rs;
144 
145  // Adjust the parameters based on the storage of each matrix.
146  if ( bl1_is_col_storage( c_rs, c_cs ) )
147  {
148  if ( bl1_is_col_storage( a_rs, a_cs ) )
149  {
150  // requested operation: uplo( C_c ) += A_c * A_c^T
151  // effective operation: uplo( C_c ) += A_c * A_c^T
152  }
153  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
154  {
155  // requested operation: uplo( C_c ) += A_r * A_r^T
156  // effective operation: uplo( C_c ) += A_c^T * A_c
157  bl1_swap_ints( lda, inca );
158 
159  bl1_toggle_trans( trans );
160  }
161  }
162  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
163  {
164  if ( bl1_is_col_storage( a_rs, a_cs ) )
165  {
166  // requested operation: uplo( C_r ) += A_c * A_c^T
167  // effective operation: ~uplo( C_c ) += A_c * A_c^T
168  bl1_swap_ints( ldc, incc );
169 
170  bl1_toggle_uplo( uplo );
171  }
172  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
173  {
174  // requested operation: uplo( C_r ) += A_r * A_r^T
175  // effective operation: ~uplo( C_c ) += A_c^T * A_c
176  bl1_swap_ints( ldc, incc );
177  bl1_swap_ints( lda, inca );
178 
179  bl1_toggle_uplo( uplo );
180  bl1_toggle_trans( trans );
181  }
182  }
183 
184  bl1_dsyrk_blas( uplo,
185  trans,
186  m,
187  k,
188  alpha,
189  a, lda,
190  beta,
191  c, ldc );
192 
193  // Free any temporary contiguous matrices, copying the result back to
194  // the original matrix.
195  bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
196  &a, &a_rs, &a_cs );
197 
198  bl1_dfree_saved_contigmr( uplo_save,
199  m_save,
200  m_save,
201  c_save, c_rs_save, c_cs_save,
202  &c, &c_rs, &c_cs );
203 }
uplo1_t
Definition: blis_type_defs.h:60
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_dfree_saved_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:36
void bl1_dcreate_contigmt(trans1_t trans_dims, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:51
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_dsyrk_blas(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int lda, double *beta, double *c, int ldc)
Definition: bl1_syrk.c:436
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:29
void bl1_dcreate_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:45

◆ bl1_dsyrk_blas()

void bl1_dsyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  lda,
double *  beta,
double *  c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_dsyrk(), CblasColMajor, and F77_dsyrk().

Referenced by bl1_dsyrk().

437 {
438 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
439  enum CBLAS_ORDER cblas_order = CblasColMajor;
440  enum CBLAS_UPLO cblas_uplo;
441  enum CBLAS_TRANSPOSE cblas_trans;
442 
443  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
444  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
445 
446  cblas_dsyrk( cblas_order,
447  cblas_uplo,
448  cblas_trans,
449  m,
450  k,
451  *alpha,
452  a, lda,
453  *beta,
454  c, ldc );
455 #else
456  char blas_uplo;
457  char blas_trans;
458 
459  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
460  bl1_param_map_to_netlib_trans( trans, &blas_trans );
461 
462  F77_dsyrk( &blas_uplo,
463  &blas_trans,
464  &m,
465  &k,
466  alpha,
467  a, &lda,
468  beta,
469  c, &ldc );
470 #endif
471 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc)
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_dsyrk(char *uplo, char *transa, int *n, int *k, double *alpha, double *a, int *lda, double *beta, double *c, int *ldc)

◆ bl1_dtrmm()

void bl1_dtrmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dtrmm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), and bl1_zero_dim2().

Referenced by bl1_dtrmmsx(), and FLA_Trmm_external().

117 {
118  int m_save = m;
119  int n_save = n;
120  double* a_save = a;
121  double* b_save = b;
122  int a_rs_save = a_rs;
123  int a_cs_save = a_cs;
124  int b_rs_save = b_rs;
125  int b_cs_save = b_cs;
126  int dim_a;
127  int lda, inca;
128  int ldb, incb;
129 
130  // Return early if possible.
131  if ( bl1_zero_dim2( m, n ) ) return;
132 
133  // If necessary, allocate, initialize, and use a temporary contiguous
134  // copy of each matrix rather than the original matrices.
135  bl1_set_dim_with_side( side, m, n, &dim_a );
136  bl1_dcreate_contigmr( uplo,
137  dim_a,
138  dim_a,
139  a_save, a_rs_save, a_cs_save,
140  &a, &a_rs, &a_cs );
141 
143  n,
144  b_save, b_rs_save, b_cs_save,
145  &b, &b_rs, &b_cs );
146 
147  // Initialize with values assuming column-major storage.
148  lda = a_cs;
149  inca = a_rs;
150  ldb = b_cs;
151  incb = b_rs;
152 
153  // Adjust the parameters based on the storage of each matrix.
154  if ( bl1_is_col_storage( b_rs, b_cs ) )
155  {
156  if ( bl1_is_col_storage( a_rs, a_cs ) )
157  {
158  // requested operation: B_c := tr( uplo( A_c ) ) * B_c
159  // effective operation: B_c := tr( uplo( A_c ) ) * B_c
160  }
161  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
162  {
163  // requested operation: B_c := tr( uplo( A_r ) ) * B_c
164  // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
165  bl1_swap_ints( lda, inca );
166 
167  bl1_toggle_uplo( uplo );
168  bl1_toggle_trans( trans );
169  }
170  }
171  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
172  {
173  if ( bl1_is_col_storage( a_rs, a_cs ) )
174  {
175  // requested operation: B_r := tr( uplo( A_c ) ) * B_r
176  // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
177  bl1_swap_ints( ldb, incb );
178 
179  bl1_swap_ints( m, n );
180 
181  bl1_toggle_side( side );
182  bl1_toggle_trans( trans );
183  }
184  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
185  {
186  // requested operation: B_r := tr( uplo( A_r ) ) * B_r
187  // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
188  bl1_swap_ints( ldb, incb );
189  bl1_swap_ints( lda, inca );
190 
191  bl1_swap_ints( m, n );
192 
193  bl1_toggle_uplo( uplo );
194  bl1_toggle_side( side );
195  }
196  }
197 
198  bl1_dtrmm_blas( side,
199  uplo,
200  trans,
201  diag,
202  m,
203  n,
204  alpha,
205  a, lda,
206  b, ldb );
207 
208  // Free any temporary contiguous matrices, copying the result back to
209  // the original matrix.
210  bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
211  &a, &a_rs, &a_cs );
212 
213  bl1_dfree_saved_contigm( m_save,
214  n_save,
215  b_save, b_rs_save, b_cs_save,
216  &b, &b_rs, &b_cs );
217 }
void bl1_dtrmm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb)
Definition: bl1_trmm.c:567
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_dcreate_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:47
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_dfree_saved_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:36
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:29
void bl1_dcreate_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:45

◆ bl1_dtrmm_blas()

void bl1_dtrmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_dtrmm(), CblasColMajor, and F77_dtrmm().

Referenced by bl1_dtrmm().

568 {
569 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
570  enum CBLAS_ORDER cblas_order = CblasColMajor;
571  enum CBLAS_SIDE cblas_side;
572  enum CBLAS_UPLO cblas_uplo;
573  enum CBLAS_TRANSPOSE cblas_trans;
574  enum CBLAS_DIAG cblas_diag;
575 
576  bl1_param_map_to_netlib_side( side, &cblas_side );
577  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
578  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
579  bl1_param_map_to_netlib_diag( diag, &cblas_diag );
580 
581  cblas_dtrmm( cblas_order,
582  cblas_side,
583  cblas_uplo,
584  cblas_trans,
585  cblas_diag,
586  m,
587  n,
588  *alpha,
589  a, lda,
590  b, ldb );
591 #else
592  char blas_side;
593  char blas_uplo;
594  char blas_trans;
595  char blas_diag;
596 
597  bl1_param_map_to_netlib_side( side, &blas_side );
598  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
599  bl1_param_map_to_netlib_trans( trans, &blas_trans );
600  bl1_param_map_to_netlib_diag( diag, &blas_diag );
601 
602  F77_dtrmm( &blas_side,
603  &blas_uplo,
604  &blas_trans,
605  &blas_diag,
606  &m,
607  &n,
608  alpha,
609  a, &lda,
610  b, &ldb );
611 #endif
612 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_DIAG
Definition: blis_prototypes_cblas.h:20
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb)
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_dtrmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *a, int *lda, double *b, int *ldb)
void bl1_param_map_to_netlib_diag(diag1_t blis_diag, void *blas_diag)
Definition: bl1_param_map.c:95

◆ bl1_dtrmmsx()

void bl1_dtrmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dtrmm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

120 {
121  int m_save = m;
122  int n_save = n;
123  double* a_save = a;
124  double* b_save = b;
125  double* c_save = c;
126  int a_rs_save = a_rs;
127  int a_cs_save = a_cs;
128  int b_rs_save = b_rs;
129  int b_cs_save = b_cs;
130  int c_rs_save = c_rs;
131  int c_cs_save = c_cs;
132  double one = bl1_d1();
133  double* b_copy;
134  int dim_a;
135  int b_copy_rs, b_copy_cs;
136 
137  // Return early if possible.
138  if ( bl1_zero_dim2( m, n ) ) return;
139 
140  // If necessary, allocate, initialize, and use a temporary contiguous
141  // copy of each matrix rather than the original matrices.
142  bl1_set_dim_with_side( side, m, n, &dim_a );
143  bl1_dcreate_contigmr( uplo,
144  dim_a,
145  dim_a,
146  a_save, a_rs_save, a_cs_save,
147  &a, &a_rs, &a_cs );
148 
150  n,
151  b_save, b_rs_save, b_cs_save,
152  &b, &b_rs, &b_cs );
153 
155  n,
156  c_save, c_rs_save, c_cs_save,
157  &c, &c_rs, &c_cs );
158 
159  // Create a copy of B to use in the computation so the original matrix is
160  // left untouched.
161  b_copy = bl1_dallocm( m, n );
162 
163  // Match the strides of B_copy to that of B.
164  if ( bl1_is_col_storage( b_rs, b_cs ) )
165  {
166  b_copy_rs = 1;
167  b_copy_cs = m;
168  }
169  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
170  {
171  b_copy_rs = n;
172  b_copy_cs = 1;
173  }
174 
175  // Copy the contents of B to B_copy.
177  m,
178  n,
179  b, b_rs, b_cs,
180  b_copy, b_copy_rs, b_copy_cs );
181 
182  // Perform the operation on B_copy.
183  bl1_dtrmm( side,
184  uplo,
185  trans,
186  diag,
187  m,
188  n,
189  alpha,
190  a, a_rs, a_cs,
191  b_copy, b_copy_rs, b_copy_cs );
192 
193  // Scale C by beta.
195  m,
196  n,
197  beta,
198  c, c_rs, c_cs );
199 
200  // Add B_copy into C.
202  m,
203  n,
204  &one,
205  b_copy, b_copy_rs, b_copy_cs,
206  c, c_rs, c_cs );
207 
208  // Free the copy of B.
209  bl1_dfree( b_copy );
210 
211  // Free any temporary contiguous matrices, copying the result back to
212  // the original matrix.
213  bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
214  &a, &a_rs, &a_cs );
215 
216  bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
217  &b, &b_rs, &b_cs );
218 
219  bl1_dfree_saved_contigm( m_save,
220  n_save,
221  c_save, c_rs_save, c_cs_save,
222  &c, &c_rs, &c_cs );
223 }
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_dfree(double *p)
Definition: bl1_free.c:35
void bl1_dcreate_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:47
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:148
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:54
void bl1_dtrmm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_trmm.c:116
void bl1_dfree_saved_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:36
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:29
double * bl1_dallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:35
void bl1_daxpymt(trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:81
void bl1_dcreate_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:45
double bl1_d1(void)
Definition: bl1_constants.c:54
void bl1_dscalm(conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:65

◆ bl1_dtrsm()

void bl1_dtrsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dtrsm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), and bl1_zero_dim2().

Referenced by bl1_dtrsmsx(), FLA_LU_nopiv_opd_var1(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_opd_var3(), FLA_LU_piv_opd_var3(), and FLA_Trsm_external().

117 {
118  int m_save = m;
119  int n_save = n;
120  double* a_save = a;
121  double* b_save = b;
122  int a_rs_save = a_rs;
123  int a_cs_save = a_cs;
124  int b_rs_save = b_rs;
125  int b_cs_save = b_cs;
126  int dim_a;
127  int lda, inca;
128  int ldb, incb;
129 
130  // Return early if possible.
131  if ( bl1_zero_dim2( m, n ) ) return;
132 
133  // If necessary, allocate, initialize, and use a temporary contiguous
134  // copy of each matrix rather than the original matrices.
135  bl1_set_dim_with_side( side, m, n, &dim_a );
136  bl1_dcreate_contigmr( uplo,
137  dim_a,
138  dim_a,
139  a_save, a_rs_save, a_cs_save,
140  &a, &a_rs, &a_cs );
141 
143  n,
144  b_save, b_rs_save, b_cs_save,
145  &b, &b_rs, &b_cs );
146 
147  // Initialize with values assuming column-major storage.
148  lda = a_cs;
149  inca = a_rs;
150  ldb = b_cs;
151  incb = b_rs;
152 
153  // Adjust the parameters based on the storage of each matrix.
154  if ( bl1_is_col_storage( b_rs, b_cs ) )
155  {
156  if ( bl1_is_col_storage( a_rs, a_cs ) )
157  {
158  // requested operation: B_c := tr( uplo( A_c ) ) * B_c
159  // effective operation: B_c := tr( uplo( A_c ) ) * B_c
160  }
161  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
162  {
163  // requested operation: B_c := tr( uplo( A_r ) ) * B_c
164  // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
165  bl1_swap_ints( lda, inca );
166 
167  bl1_toggle_uplo( uplo );
168  bl1_toggle_trans( trans );
169  }
170  }
171  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
172  {
173  if ( bl1_is_col_storage( a_rs, a_cs ) )
174  {
175  // requested operation: B_r := tr( uplo( A_c ) ) * B_r
176  // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
177  bl1_swap_ints( ldb, incb );
178 
179  bl1_swap_ints( m, n );
180 
181  bl1_toggle_side( side );
182  bl1_toggle_trans( trans );
183  }
184  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
185  {
186  // requested operation: B_r := tr( uplo( A_r ) ) * B_r
187  // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
188  bl1_swap_ints( ldb, incb );
189  bl1_swap_ints( lda, inca );
190 
191  bl1_swap_ints( m, n );
192 
193  bl1_toggle_uplo( uplo );
194  bl1_toggle_side( side );
195  }
196  }
197 
198  bl1_dtrsm_blas( side,
199  uplo,
200  trans,
201  diag,
202  m,
203  n,
204  alpha,
205  a, lda,
206  b, ldb );
207 
208  // Free any temporary contiguous matrices, copying the result back to
209  // the original matrix.
210  bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
211  &a, &a_rs, &a_cs );
212 
213  bl1_dfree_saved_contigm( m_save,
214  n_save,
215  b_save, b_rs_save, b_cs_save,
216  &b, &b_rs, &b_cs );
217 }
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_dcreate_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:47
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_dtrsm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb)
Definition: bl1_trsm.c:567
void bl1_dfree_saved_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:36
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:29
void bl1_dcreate_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:45

◆ bl1_dtrsm_blas()

void bl1_dtrsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_dtrsm(), CblasColMajor, and F77_dtrsm().

Referenced by bl1_dtrsm().

568 {
569 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
570  enum CBLAS_ORDER cblas_order = CblasColMajor;
571  enum CBLAS_SIDE cblas_side;
572  enum CBLAS_UPLO cblas_uplo;
573  enum CBLAS_TRANSPOSE cblas_trans;
574  enum CBLAS_DIAG cblas_diag;
575 
576  bl1_param_map_to_netlib_side( side, &cblas_side );
577  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
578  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
579  bl1_param_map_to_netlib_diag( diag, &cblas_diag );
580 
581  cblas_dtrsm( cblas_order,
582  cblas_side,
583  cblas_uplo,
584  cblas_trans,
585  cblas_diag,
586  m,
587  n,
588  *alpha,
589  a, lda,
590  b, ldb );
591 #else
592  char blas_side;
593  char blas_uplo;
594  char blas_trans;
595  char blas_diag;
596 
597  bl1_param_map_to_netlib_side( side, &blas_side );
598  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
599  bl1_param_map_to_netlib_trans( trans, &blas_trans );
600  bl1_param_map_to_netlib_diag( diag, &blas_diag );
601 
602  F77_dtrsm( &blas_side,
603  &blas_uplo,
604  &blas_trans,
605  &blas_diag,
606  &m,
607  &n,
608  alpha,
609  a, &lda,
610  b, &ldb );
611 #endif
612 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_DIAG
Definition: blis_prototypes_cblas.h:20
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb)
void F77_dtrsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *a, int *lda, double *b, int *ldb)
void bl1_param_map_to_netlib_diag(diag1_t blis_diag, void *blas_diag)
Definition: bl1_param_map.c:95

◆ bl1_dtrsmsx()

void bl1_dtrsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dtrsm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

120 {
121  int m_save = m;
122  int n_save = n;
123  double* a_save = a;
124  double* b_save = b;
125  double* c_save = c;
126  int a_rs_save = a_rs;
127  int a_cs_save = a_cs;
128  int b_rs_save = b_rs;
129  int b_cs_save = b_cs;
130  int c_rs_save = c_rs;
131  int c_cs_save = c_cs;
132  double one = bl1_d1();
133  double* b_copy;
134  int dim_a;
135  int b_copy_rs, b_copy_cs;
136 
137  // Return early if possible.
138  if ( bl1_zero_dim2( m, n ) ) return;
139 
140  // If necessary, allocate, initialize, and use a temporary contiguous
141  // copy of each matrix rather than the original matrices.
142  bl1_set_dim_with_side( side, m, n, &dim_a );
143  bl1_dcreate_contigmr( uplo,
144  dim_a,
145  dim_a,
146  a_save, a_rs_save, a_cs_save,
147  &a, &a_rs, &a_cs );
148 
150  n,
151  b_save, b_rs_save, b_cs_save,
152  &b, &b_rs, &b_cs );
153 
155  n,
156  c_save, c_rs_save, c_cs_save,
157  &c, &c_rs, &c_cs );
158 
159  // Create a copy of B to use in the computation so the original matrix is
160  // left untouched.
161  b_copy = bl1_dallocm( m, n );
162 
163  // Match the strides of B_copy to that of B.
164  if ( bl1_is_col_storage( b_rs, b_cs ) )
165  {
166  b_copy_rs = 1;
167  b_copy_cs = m;
168  }
169  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
170  {
171  b_copy_rs = n;
172  b_copy_cs = 1;
173  }
174 
175  // Copy the contents of B to B_copy.
177  m,
178  n,
179  b, b_rs, b_cs,
180  b_copy, b_copy_rs, b_copy_cs );
181 
182  // Perform the operation on B_copy.
183  bl1_dtrsm( side,
184  uplo,
185  trans,
186  diag,
187  m,
188  n,
189  alpha,
190  a, a_rs, a_cs,
191  b_copy, b_copy_rs, b_copy_cs );
192 
193  // Scale C by beta.
195  m,
196  n,
197  beta,
198  c, c_rs, c_cs );
199 
200  // Add B_copy into C.
202  m,
203  n,
204  &one,
205  b_copy, b_copy_rs, b_copy_cs,
206  c, c_rs, c_cs );
207 
208  // Free the copy of B.
209  bl1_dfree( b_copy );
210 
211  // Free any temporary contiguous matrices, copying the result back to
212  // the original matrix.
213  bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
214  &a, &a_rs, &a_cs );
215 
216  bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
217  &b, &b_rs, &b_cs );
218 
219  bl1_dfree_saved_contigm( m_save,
220  n_save,
221  c_save, c_rs_save, c_cs_save,
222  &c, &c_rs, &c_cs );
223 }
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_dfree(double *p)
Definition: bl1_free.c:35
void bl1_dcreate_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:47
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:148
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:54
void bl1_dfree_saved_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:36
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:29
double * bl1_dallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:35
void bl1_dtrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:116
void bl1_daxpymt(trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:81
void bl1_dcreate_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:45
double bl1_d1(void)
Definition: bl1_constants.c:54
void bl1_dscalm(conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:65

◆ bl1_sgemm()

void bl1_sgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_s0(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_screate_contigm(), bl1_screate_contigmt(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sgemm_blas(), bl1_sscalm(), bl1_zero_dim3(), BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Gemm_external().

14 {
15  int m_save = m;
16  int n_save = n;
17  float* a_save = a;
18  float* b_save = b;
19  float* c_save = c;
20  int a_rs_save = a_rs;
21  int a_cs_save = a_cs;
22  int b_rs_save = b_rs;
23  int b_cs_save = b_cs;
24  int c_rs_save = c_rs;
25  int c_cs_save = c_cs;
26  float zero = bl1_s0();
27  float one = bl1_s1();
28  float* a_unswap;
29  float* b_unswap;
30  float* c_trans;
31  int lda, inca;
32  int ldb, incb;
33  int ldc, incc;
34  int ldc_trans, incc_trans;
35  int m_gemm, n_gemm;
36  int gemm_needs_axpyt = FALSE;
37 
38  // Return early if possible.
39  if ( bl1_zero_dim3( m, k, n ) )
40  {
42  m,
43  n,
44  beta,
45  c, c_rs, c_cs );
46  return;
47  }
48 
49  // If necessary, allocate, initialize, and use a temporary contiguous
50  // copy of each matrix rather than the original matrices.
51  bl1_screate_contigmt( transa,
52  m,
53  k,
54  a_save, a_rs_save, a_cs_save,
55  &a, &a_rs, &a_cs );
56 
57  bl1_screate_contigmt( transb,
58  k,
59  n,
60  b_save, b_rs_save, b_cs_save,
61  &b, &b_rs, &b_cs );
62 
64  n,
65  c_save, c_rs_save, c_cs_save,
66  &c, &c_rs, &c_cs );
67 
68  // These are used to track the original values of a and b prior to any
69  // operand swapping that might take place. This is necessary for proper
70  // freeing of memory when one is a temporary contiguous matrix.
71  a_unswap = a;
72  b_unswap = b;
73 
74  // These are used to track the dimensions of the product of the
75  // A and B operands to the BLAS invocation of gemm. These differ
76  // from m and n when the operands need to be swapped.
77  m_gemm = m;
78  n_gemm = n;
79 
80  // Initialize with values assuming column-major storage.
81  lda = a_cs;
82  inca = a_rs;
83  ldb = b_cs;
84  incb = b_rs;
85  ldc = c_cs;
86  incc = c_rs;
87 
88  // Adjust the parameters based on the storage of each matrix.
89  if ( bl1_is_col_storage( c_rs, c_cs ) )
90  {
91  if ( bl1_is_col_storage( a_rs, a_cs ) )
92  {
93  if ( bl1_is_col_storage( b_rs, b_cs ) )
94  {
95  // requested operation: C_c += tr( A_c ) * tr( B_c )
96  // effective operation: C_c += tr( A_c ) * tr( B_c )
97  }
98  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
99  {
100 
101  // requested operation: C_c += tr( A_c ) * tr( B_r )
102  // effective operation: C_c += tr( A_c ) * tr( B_c )^T
103  bl1_swap_ints( ldb, incb );
104 
105  bl1_toggle_trans( transb );
106  }
107  }
108  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
109  {
110  if ( bl1_is_col_storage( b_rs, b_cs ) )
111  {
112  // requested operation: C_c += tr( A_r ) * tr( B_c )
113  // effective operation: C_c += tr( A_r )^T * tr( B_c )
114  bl1_swap_ints( lda, inca );
115 
116  bl1_toggle_trans( transa );
117  }
118  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
119  {
120  // requested operation: C_c += tr( A_r ) * tr( B_r )
121  // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
122  bl1_swap_ints( lda, inca );
123  bl1_swap_ints( ldb, incb );
124 
125  bl1_sswap_pointers( a, b );
126  bl1_swap_ints( lda, ldb );
127  bl1_swap_ints( inca, incb );
128  bl1_swap_trans( transa, transb );
129 
130  gemm_needs_axpyt = TRUE;
131  bl1_swap_ints( m_gemm, n_gemm );
132  }
133  }
134  }
135  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
136  {
137  if ( bl1_is_col_storage( a_rs, a_cs ) )
138  {
139  if ( bl1_is_col_storage( b_rs, b_cs ) )
140  {
141  // requested operation: C_r += tr( A_c ) * tr( B_c )
142  // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
143  bl1_swap_ints( ldc, incc );
144 
145  bl1_swap_ints( m, n );
146 
147  gemm_needs_axpyt = TRUE;
148  }
149  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
150  {
151  // requested operation: C_r += tr( A_c ) * tr( B_r )
152  // effective operation: C_c += tr( B_c ) * tr( A_c )^T
153  bl1_swap_ints( ldc, incc );
154  bl1_swap_ints( ldb, incb );
155 
156  bl1_toggle_trans( transa );
157 
158  bl1_swap_ints( m, n );
159  bl1_swap_ints( m_gemm, n_gemm );
160  bl1_sswap_pointers( a, b );
161  bl1_swap_ints( lda, ldb );
162  bl1_swap_ints( inca, incb );
163  bl1_swap_trans( transa, transb );
164  }
165  }
166  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
167  {
168  if ( bl1_is_col_storage( b_rs, b_cs ) )
169  {
170  // requested operation: C_r += tr( A_r ) * tr( B_c )
171  // effective operation: C_c += tr( B_c )^T * tr( A_c )
172  bl1_swap_ints( ldc, incc );
173  bl1_swap_ints( lda, inca );
174 
175  bl1_toggle_trans( transb );
176 
177  bl1_swap_ints( m, n );
178  bl1_swap_ints( m_gemm, n_gemm );
179  bl1_sswap_pointers( a, b );
180  bl1_swap_ints( lda, ldb );
181  bl1_swap_ints( inca, incb );
182  bl1_swap_trans( transa, transb );
183  }
184  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
185  {
186  // requested operation: C_r += tr( A_r ) * tr( B_r )
187  // effective operation: C_c += tr( B_c ) * tr( A_c )
188  bl1_swap_ints( lda, inca );
189  bl1_swap_ints( ldb, incb );
190  bl1_swap_ints( ldc, incc );
191 
192  bl1_swap_ints( m, n );
193  bl1_swap_ints( m_gemm, n_gemm );
194  bl1_sswap_pointers( a, b );
195  bl1_swap_ints( lda, ldb );
196  bl1_swap_ints( inca, incb );
197  bl1_swap_trans( transa, transb );
198  }
199  }
200  }
201 
202  // There are two cases where we need to perform the gemm and then axpy
203  // the result into C with a transposition. We handle those cases here.
204  if ( gemm_needs_axpyt )
205  {
206  // We need a temporary matrix for holding C^T. Notice that m and n
207  // represent the dimensions of C, while m_gemm and n_gemm are the
208  // dimensions of the actual product op(A)*op(B), which may be n-by-m
209  // since the operands may have been swapped.
210  c_trans = bl1_sallocm( m_gemm, n_gemm );
211  ldc_trans = m_gemm;
212  incc_trans = 1;
213 
214  // Compute tr( A ) * tr( B ), where A and B may have been swapped
215  // to reference the other, and store the result in C_trans.
216  bl1_sgemm_blas( transa,
217  transb,
218  m_gemm,
219  n_gemm,
220  k,
221  alpha,
222  a, lda,
223  b, ldb,
224  &zero,
225  c_trans, ldc_trans );
226 
227  // Scale C by beta.
229  m,
230  n,
231  beta,
232  c, incc, ldc );
233 
234  // And finally, accumulate the matrix product in C_trans into C
235  // with a transpose.
237  m,
238  n,
239  &one,
240  c_trans, incc_trans, ldc_trans,
241  c, incc, ldc );
242 
243  // Free the temporary matrix for C.
244  bl1_sfree( c_trans );
245  }
246  else // no extra axpyt step needed
247  {
248  bl1_sgemm_blas( transa,
249  transb,
250  m_gemm,
251  n_gemm,
252  k,
253  alpha,
254  a, lda,
255  b, ldb,
256  beta,
257  c, ldc );
258  }
259 
260  // Free any temporary contiguous matrices, copying the result back to
261  // the original matrix.
262  bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
263  &a_unswap, &a_rs, &a_cs );
264 
265  bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
266  &b_unswap, &b_rs, &b_cs );
267 
268  bl1_sfree_saved_contigm( m_save,
269  n_save,
270  c_save, c_rs_save, c_cs_save,
271  &c, &c_rs, &c_cs );
272 }
void bl1_sfree_saved_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:13
float * bl1_sallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:30
float bl1_s1(void)
Definition: bl1_constants.c:47
Definition: blis_type_defs.h:81
void bl1_sfree(float *p)
Definition: bl1_free.c:30
void bl1_screate_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:13
Definition: blis_type_defs.h:55
void bl1_sgemm_blas(trans1_t transa, trans1_t transb, int m, int n, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
Definition: bl1_gemm.c:1213
int bl1_zero_dim3(int m, int k, int n)
Definition: bl1_is.c:123
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:13
void bl1_sscalm(conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:13
void bl1_saxpymt(trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:13
void bl1_screate_contigmt(trans1_t trans_dims, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:13
float bl1_s0(void)
Definition: bl1_constants.c:111

◆ bl1_sgemm_blas()

void bl1_sgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb,
float *  beta,
float *  c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), cblas_sgemm(), CblasColMajor, and F77_sgemm().

Referenced by bl1_sgemm().

1214 {
1215 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1216  enum CBLAS_ORDER cblas_order = CblasColMajor;
1217  enum CBLAS_TRANSPOSE cblas_transa;
1218  enum CBLAS_TRANSPOSE cblas_transb;
1219 
1220  bl1_param_map_to_netlib_trans( transa, &cblas_transa );
1221  bl1_param_map_to_netlib_trans( transb, &cblas_transb );
1222 
1223  cblas_sgemm( cblas_order,
1224  cblas_transa,
1225  cblas_transb,
1226  m,
1227  n,
1228  k,
1229  *alpha,
1230  a, lda,
1231  b, ldb,
1232  *beta,
1233  c, ldc );
1234 #else
1235  char blas_transa;
1236  char blas_transb;
1237 
1238  bl1_param_map_to_netlib_trans( transa, &blas_transa );
1239  bl1_param_map_to_netlib_trans( transb, &blas_transb );
1240 
1241  F77_sgemm( &blas_transa,
1242  &blas_transb,
1243  &m,
1244  &n,
1245  &k,
1246  alpha,
1247  a, &lda,
1248  b, &ldb,
1249  beta,
1250  c, &ldc );
1251 #endif
1252 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void F77_sgemm(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc)
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_prototypes_cblas.h:17
void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc)

◆ bl1_shemm()

void bl1_shemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_ssymm().

14 {
15  bl1_ssymm( side,
16  uplo,
17  m,
18  n,
19  alpha,
20  a, a_rs, a_cs,
21  b, b_rs, b_cs,
22  beta,
23  c, c_rs, c_cs );
24 }
void bl1_ssymm(side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
Definition: bl1_symm.c:13

◆ bl1_sher2k()

void bl1_sher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_ssyr2k().

14 {
15  bl1_ssyr2k( uplo,
16  trans,
17  m,
18  k,
19  alpha,
20  a, a_rs, a_cs,
21  b, b_rs, b_cs,
22  beta,
23  c, c_rs, c_cs );
24 }
void bl1_ssyr2k(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
Definition: bl1_syr2k.c:13

◆ bl1_sherk()

void bl1_sherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_ssyrk().

14 {
15  bl1_ssyrk( uplo,
16  trans,
17  m,
18  k,
19  alpha,
20  a, a_rs, a_cs,
21  beta,
22  c, c_rs, c_cs );
23 }
void bl1_ssyrk(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
Definition: bl1_syrk.c:13

◆ bl1_ssymm()

void bl1_ssymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_s0(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_ssymm_blas(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by bl1_shemm(), FLA_Hemm_external(), and FLA_Symm_external().

14 {
15  int m_save = m;
16  int n_save = n;
17  float* a_save = a;
18  float* b_save = b;
19  float* c_save = c;
20  int a_rs_save = a_rs;
21  int a_cs_save = a_cs;
22  int b_rs_save = b_rs;
23  int b_cs_save = b_cs;
24  int c_rs_save = c_rs;
25  int c_cs_save = c_cs;
26  float zero = bl1_s0();
27  float one = bl1_s1();
28  float* b_copy;
29  float* c_trans;
30  int dim_a;
31  int lda, inca;
32  int ldb, incb;
33  int ldc, incc;
34  int ldb_copy, incb_copy;
35  int ldc_trans, incc_trans;
36  int symm_needs_copyb = FALSE;
37  int symm_needs_transb = FALSE;
38  int symm_needs_axpyt = FALSE;
39 
40  // Return early if possible.
41  if ( bl1_zero_dim2( m, n ) ) return;
42 
43  // If necessary, allocate, initialize, and use a temporary contiguous
44  // copy of each matrix rather than the original matrices.
45  bl1_set_dim_with_side( side, m, n, &dim_a );
47  dim_a,
48  dim_a,
49  a_save, a_rs_save, a_cs_save,
50  &a, &a_rs, &a_cs );
51 
53  n,
54  b_save, b_rs_save, b_cs_save,
55  &b, &b_rs, &b_cs );
56 
58  n,
59  c_save, c_rs_save, c_cs_save,
60  &c, &c_rs, &c_cs );
61 
62  // Initialize with values assuming column-major storage.
63  lda = a_cs;
64  inca = a_rs;
65  ldb = b_cs;
66  incb = b_rs;
67  ldc = c_cs;
68  incc = c_rs;
69 
70  // Adjust the parameters based on the storage of each matrix.
71  if ( bl1_is_col_storage( c_rs, c_cs ) )
72  {
73  if ( bl1_is_col_storage( a_rs, a_cs ) )
74  {
75  if ( bl1_is_col_storage( b_rs, b_cs ) )
76  {
77  // requested operation: C_c += uplo( A_c ) * B_c
78  // effective operation: C_c += uplo( A_c ) * B_c
79  }
80  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
81  {
82  // requested operation: C_c += uplo( A_c ) * B_r
83  // effective operation: C_c += uplo( A_c ) * B_c
84  symm_needs_copyb = TRUE;
85  }
86  }
87  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
88  {
89  if ( bl1_is_col_storage( b_rs, b_cs ) )
90  {
91  // requested operation: C_c += uplo( A_r ) * B_c
92  // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
93  bl1_swap_ints( lda, inca );
94 
95  bl1_toggle_uplo( uplo );
96  }
97  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
98  {
99  // requested operation: C_c += uplo( A_r ) * B_r
100  // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
101  bl1_swap_ints( lda, inca );
102  bl1_swap_ints( ldb, incb );
103 
104  bl1_toggle_side( side );
105  bl1_toggle_uplo( uplo );
106 
107  symm_needs_axpyt = TRUE;
108  }
109  }
110  }
111  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
112  {
113  if ( bl1_is_col_storage( a_rs, a_cs ) )
114  {
115  if ( bl1_is_col_storage( b_rs, b_cs ) )
116  {
117  // requested operation: C_r += uplo( A_c ) * B_c
118  // effective operation: C_c += ( uplo( A_c ) * B_c )^T
119  bl1_swap_ints( ldc, incc );
120 
121  bl1_swap_ints( m, n );
122 
123  symm_needs_axpyt = TRUE;
124  }
125  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
126  {
127  // requested operation: C_r += uplo( A_c ) * B_r
128  // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
129  bl1_swap_ints( ldc, incc );
130  bl1_swap_ints( ldb, incb );
131 
132  bl1_swap_ints( m, n );
133 
134  bl1_toggle_side( side );
135  }
136  }
137  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
138  {
139  if ( bl1_is_col_storage( b_rs, b_cs ) )
140  {
141  // requested operation: C_r += uplo( A_r ) * B_c
142  // effective operation: C_c += B_c^T * ~uplo( A_c )
143  bl1_swap_ints( ldc, incc );
144  bl1_swap_ints( lda, inca );
145 
146  bl1_swap_ints( m, n );
147 
148  bl1_toggle_side( side );
149  bl1_toggle_uplo( uplo );
150 
151  symm_needs_copyb = TRUE;
152  symm_needs_transb = TRUE;
153  }
154  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
155  {
156  // requested operation: C_r += uplo( A_r ) * B_r
157  // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
158  bl1_swap_ints( ldc, incc );
159  bl1_swap_ints( lda, inca );
160  bl1_swap_ints( ldb, incb );
161 
162  bl1_swap_ints( m, n );
163 
164  bl1_toggle_uplo( uplo );
165  bl1_toggle_side( side );
166  }
167  }
168  }
169 
170  // We need a temporary matrix for the cases where B needs to be copied.
171  b_copy = b;
172  ldb_copy = ldb;
173  incb_copy = incb;
174 
175  // There are two cases where we need to make a copy of B: one where the
176  // copy's dimensions are transposed from the original B, and one where
177  // the dimensions are not swapped.
178  if ( symm_needs_copyb )
179  {
180  trans1_t transb;
181 
182  // Set transb, which determines whether or not we need to copy from B
183  // as if it needs a transposition. If a transposition is needed, then
184  // m and n and have already been swapped. So in either case m
185  // represents the leading dimension of the copy.
186  if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
187  else transb = BLIS1_NO_TRANSPOSE;
188 
189  b_copy = bl1_sallocm( m, n );
190  ldb_copy = m;
191  incb_copy = 1;
192 
193  bl1_scopymt( transb,
194  m,
195  n,
196  b, incb, ldb,
197  b_copy, incb_copy, ldb_copy );
198  }
199 
200  // There are two cases where we need to perform the symm and then axpy
201  // the result into C with a transposition. We handle those cases here.
202  if ( symm_needs_axpyt )
203  {
204  // We need a temporary matrix for holding C^T. Notice that m and n
205  // represent the dimensions of C, and thus C_trans is n-by-m
206  // (interpreting both as column-major matrices). So the leading
207  // dimension of the temporary matrix holding C^T is n.
208  c_trans = bl1_sallocm( n, m );
209  ldc_trans = n;
210  incc_trans = 1;
211 
212  // Compute A * B (or B * A) and store the result in C_trans.
213  // Note that there is no overlap between the axpyt cases and
214  // the conja/copyb cases, hence the use of a, b, lda, and ldb.
215  bl1_ssymm_blas( side,
216  uplo,
217  n,
218  m,
219  alpha,
220  a, lda,
221  b, ldb,
222  &zero,
223  c_trans, ldc_trans );
224 
225  // Scale C by beta.
227  m,
228  n,
229  beta,
230  c, incc, ldc );
231 
232  // And finally, accumulate the matrix product in C_trans into C
233  // with a transpose.
235  m,
236  n,
237  &one,
238  c_trans, incc_trans, ldc_trans,
239  c, incc, ldc );
240 
241  // Free the temporary matrix for C.
242  bl1_sfree( c_trans );
243  }
244  else // no extra axpyt step needed
245  {
246  bl1_ssymm_blas( side,
247  uplo,
248  m,
249  n,
250  alpha,
251  a, lda,
252  b_copy, ldb_copy,
253  beta,
254  c, ldc );
255  }
256 
257  if ( symm_needs_copyb )
258  bl1_sfree( b_copy );
259 
260  // Free any temporary contiguous matrices, copying the result back to
261  // the original matrix.
262  bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
263  &a, &a_rs, &a_cs );
264 
265  bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
266  &b, &b_rs, &b_cs );
267 
268  bl1_sfree_saved_contigm( m_save,
269  n_save,
270  c_save, c_rs_save, c_cs_save,
271  &c, &c_rs, &c_cs );
272 }
void bl1_sfree_saved_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:13
float * bl1_sallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:30
float bl1_s1(void)
Definition: bl1_constants.c:47
Definition: blis_type_defs.h:81
void bl1_sfree(float *p)
Definition: bl1_free.c:30
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_screate_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:13
trans1_t
Definition: blis_type_defs.h:52
Definition: blis_type_defs.h:55
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_ssymm_blas(side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
Definition: bl1_symm.c:1059
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:13
void bl1_sscalm(conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:13
void bl1_saxpymt(trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:13
Definition: blis_type_defs.h:54
void bl1_scopymt(trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:81
void bl1_screate_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:13
float bl1_s0(void)
Definition: bl1_constants.c:111

◆ bl1_ssymm_blas()

void bl1_ssymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb,
float *  beta,
float *  c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_ssymm(), CblasColMajor, and F77_ssymm().

Referenced by bl1_ssymm().

1060 {
1061 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1062  enum CBLAS_ORDER cblas_order = CblasColMajor;
1063  enum CBLAS_SIDE cblas_side;
1064  enum CBLAS_UPLO cblas_uplo;
1065 
1066  bl1_param_map_to_netlib_side( side, &cblas_side );
1067  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
1068 
1069  cblas_ssymm( cblas_order,
1070  cblas_side,
1071  cblas_uplo,
1072  m,
1073  n,
1074  *alpha,
1075  a, lda,
1076  b, ldb,
1077  *beta,
1078  c, ldc );
1079 #else
1080  char blas_side;
1081  char blas_uplo;
1082 
1083  bl1_param_map_to_netlib_side( side, &blas_side );
1084  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
1085 
1086  F77_ssymm( &blas_side,
1087  &blas_uplo,
1088  &m,
1089  &n,
1090  alpha,
1091  a, &lda,
1092  b, &ldb,
1093  beta,
1094  c, &ldc );
1095 #endif
1096 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_ssymm(char *side, char *uplo, int *m, int *n, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc)

◆ bl1_ssyr2k()

void bl1_ssyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_sallocm(), bl1_scopymt(), bl1_screate_contigmr(), bl1_screate_contigmt(), bl1_set_dims_with_trans(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigmr(), bl1_ssyr2k_blas(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sher2k(), FLA_Her2k_external(), and FLA_Syr2k_external().

14 {
15  uplo1_t uplo_save = uplo;
16  int m_save = m;
17  float* a_save = a;
18  float* b_save = b;
19  float* c_save = c;
20  int a_rs_save = a_rs;
21  int a_cs_save = a_cs;
22  int b_rs_save = b_rs;
23  int b_cs_save = b_cs;
24  int c_rs_save = c_rs;
25  int c_cs_save = c_cs;
26  float* a_copy;
27  float* b_copy;
28  int lda, inca;
29  int ldb, incb;
30  int ldc, incc;
31  int lda_copy, inca_copy;
32  int ldb_copy, incb_copy;
33  int syr2k_needs_copya = FALSE;
34  int syr2k_needs_copyb = FALSE;
35 
36  // Return early if possible.
37  if ( bl1_zero_dim2( m, k ) ) return;
38 
39  // If necessary, allocate, initialize, and use a temporary contiguous
40  // copy of each matrix rather than the original matrices.
41  bl1_screate_contigmt( trans,
42  m,
43  k,
44  a_save, a_rs_save, a_cs_save,
45  &a, &a_rs, &a_cs );
46 
47  bl1_screate_contigmt( trans,
48  m,
49  k,
50  b_save, b_rs_save, b_cs_save,
51  &b, &b_rs, &b_cs );
52 
54  m,
55  m,
56  c_save, c_rs_save, c_cs_save,
57  &c, &c_rs, &c_cs );
58 
59  // Initialize with values assuming column-major storage.
60  lda = a_cs;
61  inca = a_rs;
62  ldb = b_cs;
63  incb = b_rs;
64  ldc = c_cs;
65  incc = c_rs;
66 
67  // Adjust the parameters based on the storage of each matrix.
68  if ( bl1_is_col_storage( c_rs, c_cs ) )
69  {
70  if ( bl1_is_col_storage( a_rs, a_cs ) )
71  {
72  if ( bl1_is_col_storage( b_rs, b_cs ) )
73  {
74  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
75  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
76  }
77  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
78  {
79  // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
80  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
81  syr2k_needs_copyb = TRUE;
82  }
83  }
84  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
85  {
86  if ( bl1_is_col_storage( b_rs, b_cs ) )
87  {
88  // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
89  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
90  syr2k_needs_copya = TRUE;
91  }
92  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
93  {
94  // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
95  // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
96  bl1_swap_ints( lda, inca );
97  bl1_swap_ints( ldb, incb );
98 
99  bl1_toggle_trans( trans );
100  }
101  }
102  }
103  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
104  {
105  if ( bl1_is_col_storage( a_rs, a_cs ) )
106  {
107  if ( bl1_is_col_storage( b_rs, b_cs ) )
108  {
109  // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
110  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
111  bl1_swap_ints( ldc, incc );
112 
113  bl1_toggle_uplo( uplo );
114  }
115  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
116  {
117  // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
118  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
119  syr2k_needs_copyb = TRUE;
120 
121  bl1_swap_ints( ldc, incc );
122 
123  bl1_toggle_uplo( uplo );
124  }
125  }
126  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
127  {
128  if ( bl1_is_col_storage( b_rs, b_cs ) )
129  {
130  // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
131  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
132  syr2k_needs_copya = TRUE;
133 
134  bl1_swap_ints( ldc, incc );
135 
136  bl1_toggle_uplo( uplo );
137  }
138  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
139  {
140  // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
141  // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
142  bl1_swap_ints( ldc, incc );
143  bl1_swap_ints( lda, inca );
144  bl1_swap_ints( ldb, incb );
145 
146  bl1_toggle_uplo( uplo );
147  bl1_toggle_trans( trans );
148  }
149  }
150  }
151 
152  a_copy = a;
153  lda_copy = lda;
154  inca_copy = inca;
155 
156  // There are two cases where we need to copy A column-major storage.
157  // We handle those two cases here.
158  if ( syr2k_needs_copya )
159  {
160  int m_a;
161  int n_a;
162 
163  // Determine the dimensions of A according to the value of trans. We
164  // need this in order to set the leading dimension of the copy of A.
165  bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );
166 
167  // We need a temporary matrix to hold a column-major copy of A.
168  a_copy = bl1_sallocm( m, k );
169  lda_copy = m_a;
170  inca_copy = 1;
171 
172  // Copy the contents of A into A_copy.
174  m_a,
175  n_a,
176  a, inca, lda,
177  a_copy, inca_copy, lda_copy );
178  }
179 
180  b_copy = b;
181  ldb_copy = ldb;
182  incb_copy = incb;
183 
184  // There are two cases where we need to copy B column-major storage.
185  // We handle those two cases here.
186  if ( syr2k_needs_copyb )
187  {
188  int m_b;
189  int n_b;
190 
191  // Determine the dimensions of B according to the value of trans. We
192  // need this in order to set the leading dimension of the copy of B.
193  bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );
194 
195  // We need a temporary matrix to hold a column-major copy of B.
196  b_copy = bl1_sallocm( m, k );
197  ldb_copy = m_b;
198  incb_copy = 1;
199 
200  // Copy the contents of B into B_copy.
202  m_b,
203  n_b,
204  b, incb, ldb,
205  b_copy, incb_copy, ldb_copy );
206  }
207 
208  bl1_ssyr2k_blas( uplo,
209  trans,
210  m,
211  k,
212  alpha,
213  a_copy, lda_copy,
214  b_copy, ldb_copy,
215  beta,
216  c, ldc );
217 
218  if ( syr2k_needs_copya )
219  bl1_sfree( a_copy );
220 
221  if ( syr2k_needs_copyb )
222  bl1_sfree( b_copy );
223 
224  // Free any temporary contiguous matrices, copying the result back to
225  // the original matrix.
226  bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
227  &a, &a_rs, &a_cs );
228 
229  bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
230  &b, &b_rs, &b_cs );
231 
232  bl1_sfree_saved_contigmr( uplo_save,
233  m_save,
234  m_save,
235  c_save, c_rs_save, c_cs_save,
236  &c, &c_rs, &c_cs );
237 }
void bl1_sfree_saved_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:13
uplo1_t
Definition: blis_type_defs.h:60
float * bl1_sallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:30
void bl1_sfree(float *p)
Definition: bl1_free.c:30
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:13
Definition: blis_type_defs.h:54
void bl1_set_dims_with_trans(trans1_t trans, int m, int n, int *m_new, int *n_new)
Definition: bl1_set_dims.c:13
void bl1_ssyr2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
Definition: bl1_syr2k.c:919
void bl1_screate_contigmt(trans1_t trans_dims, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:13
void bl1_scopymt(trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:81
void bl1_screate_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:13

◆ bl1_ssyr2k_blas()

void bl1_ssyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb,
float *  beta,
float *  c,
int  ldc 
)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_ssyr2k(), CblasColMajor, and F77_ssyr2k().

Referenced by bl1_ssyr2k().

920 {
921 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
922  enum CBLAS_ORDER cblas_order = CblasColMajor;
923  enum CBLAS_UPLO cblas_uplo;
924  enum CBLAS_TRANSPOSE cblas_trans;
925 
926  // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
927  // so we have to map it down to regular transposition.
928  if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;
929 
930  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
931  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
932 
933  cblas_ssyr2k( cblas_order,
934  cblas_uplo,
935  cblas_trans,
936  m,
937  k,
938  *alpha,
939  a, lda,
940  b, ldb,
941  *beta,
942  c, ldc );
943 #else
944  char blas_uplo;
945  char blas_trans;
946 
947  // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
948  // so we have to map it down to regular transposition.
949  if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;
950 
951  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
952  bl1_param_map_to_netlib_trans( trans, &blas_trans );
953 
954  F77_ssyr2k( &blas_uplo,
955  &blas_trans,
956  &m,
957  &k,
958  alpha,
959  a, &lda,
960  b, &ldb,
961  beta,
962  c, &ldc );
963 #endif
964 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
int bl1_is_conjtrans(trans1_t trans)
Definition: bl1_is.c:30
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void F77_ssyr2k(char *uplo, char *transa, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc)
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_type_defs.h:55
void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc)
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47

◆ bl1_ssyrk()

void bl1_ssyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_screate_contigmr(), bl1_screate_contigmt(), bl1_sfree_contigm(), bl1_sfree_saved_contigmr(), bl1_ssyrk_blas(), and bl1_zero_dim2().

Referenced by bl1_sherk(), FLA_Herk_external(), FLA_Syrk_external(), and FLA_UDdate_UT_ops_var1().

14 {
15  uplo1_t uplo_save = uplo;
16  int m_save = m;
17  float* a_save = a;
18  float* c_save = c;
19  int a_rs_save = a_rs;
20  int a_cs_save = a_cs;
21  int c_rs_save = c_rs;
22  int c_cs_save = c_cs;
23  int lda, inca;
24  int ldc, incc;
25 
26  // Return early if possible.
27  if ( bl1_zero_dim2( m, k ) ) return;
28 
29  // If necessary, allocate, initialize, and use a temporary contiguous
30  // copy of each matrix rather than the original matrices.
31  bl1_screate_contigmt( trans,
32  m,
33  k,
34  a_save, a_rs_save, a_cs_save,
35  &a, &a_rs, &a_cs );
36 
38  m,
39  m,
40  c_save, c_rs_save, c_cs_save,
41  &c, &c_rs, &c_cs );
42 
43  // Initialize with values assuming column-major storage.
44  lda = a_cs;
45  inca = a_rs;
46  ldc = c_cs;
47  incc = c_rs;
48 
49  // Adjust the parameters based on the storage of each matrix.
50  if ( bl1_is_col_storage( c_rs, c_cs ) )
51  {
52  if ( bl1_is_col_storage( a_rs, a_cs ) )
53  {
54  // requested operation: uplo( C_c ) += A_c * A_c^T
55  // effective operation: uplo( C_c ) += A_c * A_c^T
56  }
57  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
58  {
59  // requested operation: uplo( C_c ) += A_r * A_r^T
60  // effective operation: uplo( C_c ) += A_c^T * A_c
61  bl1_swap_ints( lda, inca );
62 
63  bl1_toggle_trans( trans );
64  }
65  }
66  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
67  {
68  if ( bl1_is_col_storage( a_rs, a_cs ) )
69  {
70  // requested operation: uplo( C_r ) += A_c * A_c^T
71  // effective operation: ~uplo( C_c ) += A_c * A_c^T
72  bl1_swap_ints( ldc, incc );
73 
74  bl1_toggle_uplo( uplo );
75  }
76  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
77  {
78  // requested operation: uplo( C_r ) += A_r * A_r^T
79  // effective operation: ~uplo( C_c ) += A_c^T * A_c
80  bl1_swap_ints( ldc, incc );
81  bl1_swap_ints( lda, inca );
82 
83  bl1_toggle_uplo( uplo );
84  bl1_toggle_trans( trans );
85  }
86  }
87 
88  bl1_ssyrk_blas( uplo,
89  trans,
90  m,
91  k,
92  alpha,
93  a, lda,
94  beta,
95  c, ldc );
96 
97  // Free any temporary contiguous matrices, copying the result back to
98  // the original matrix.
99  bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
100  &a, &a_rs, &a_cs );
101 
102  bl1_sfree_saved_contigmr( uplo_save,
103  m_save,
104  m_save,
105  c_save, c_rs_save, c_cs_save,
106  &c, &c_rs, &c_cs );
107 }
void bl1_sfree_saved_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:13
uplo1_t
Definition: blis_type_defs.h:60
void bl1_ssyrk_blas(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int lda, float *beta, float *c, int ldc)
Definition: bl1_syrk.c:399
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:13
void bl1_screate_contigmt(trans1_t trans_dims, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:13
void bl1_screate_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:13

◆ bl1_ssyrk_blas()

void bl1_ssyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  lda,
float *  beta,
float *  c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ssyrk(), CblasColMajor, and F77_ssyrk().

Referenced by bl1_ssyrk().

400 {
401 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
402  enum CBLAS_ORDER cblas_order = CblasColMajor;
403  enum CBLAS_UPLO cblas_uplo;
404  enum CBLAS_TRANSPOSE cblas_trans;
405 
406  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
407  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
408 
409  cblas_ssyrk( cblas_order,
410  cblas_uplo,
411  cblas_trans,
412  m,
413  k,
414  *alpha,
415  a, lda,
416  *beta,
417  c, ldc );
418 #else
419  char blas_uplo;
420  char blas_trans;
421 
422  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
423  bl1_param_map_to_netlib_trans( trans, &blas_trans );
424 
425  F77_ssyrk( &blas_uplo,
426  &blas_trans,
427  &m,
428  &k,
429  alpha,
430  a, &lda,
431  beta,
432  c, &ldc );
433 #endif
434 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void F77_ssyrk(char *uplo, char *transa, int *n, int *k, float *alpha, float *a, int *lda, float *beta, float *c, int *ldc)
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc)

◆ bl1_strmm()

void bl1_strmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_is_col_storage(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_strmm_blas(), and bl1_zero_dim2().

Referenced by bl1_strmmsx(), and FLA_Trmm_external().

14 {
15  int m_save = m;
16  int n_save = n;
17  float* a_save = a;
18  float* b_save = b;
19  int a_rs_save = a_rs;
20  int a_cs_save = a_cs;
21  int b_rs_save = b_rs;
22  int b_cs_save = b_cs;
23  int dim_a;
24  int lda, inca;
25  int ldb, incb;
26 
27  // Return early if possible.
28  if ( bl1_zero_dim2( m, n ) ) return;
29 
30  // If necessary, allocate, initialize, and use a temporary contiguous
31  // copy of each matrix rather than the original matrices.
32  bl1_set_dim_with_side( side, m, n, &dim_a );
34  dim_a,
35  dim_a,
36  a_save, a_rs_save, a_cs_save,
37  &a, &a_rs, &a_cs );
38 
40  n,
41  b_save, b_rs_save, b_cs_save,
42  &b, &b_rs, &b_cs );
43 
44  // Initialize with values assuming column-major storage.
45  lda = a_cs;
46  inca = a_rs;
47  ldb = b_cs;
48  incb = b_rs;
49 
50  // Adjust the parameters based on the storage of each matrix.
51  if ( bl1_is_col_storage( b_rs, b_cs ) )
52  {
53  if ( bl1_is_col_storage( a_rs, a_cs ) )
54  {
55  // requested operation: B_c := tr( uplo( A_c ) ) * B_c
56  // effective operation: B_c := tr( uplo( A_c ) ) * B_c
57  }
58  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
59  {
60  // requested operation: B_c := tr( uplo( A_r ) ) * B_c
61  // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
62  bl1_swap_ints( lda, inca );
63 
64  bl1_toggle_uplo( uplo );
65  bl1_toggle_trans( trans );
66  }
67  }
68  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
69  {
70  if ( bl1_is_col_storage( a_rs, a_cs ) )
71  {
72  // requested operation: B_r := tr( uplo( A_c ) ) * B_r
73  // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
74  bl1_swap_ints( ldb, incb );
75 
76  bl1_swap_ints( m, n );
77 
78  bl1_toggle_side( side );
79  bl1_toggle_trans( trans );
80  }
81  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
82  {
83  // requested operation: B_r := tr( uplo( A_r ) ) * B_r
84  // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
85  bl1_swap_ints( ldb, incb );
86  bl1_swap_ints( lda, inca );
87 
88  bl1_swap_ints( m, n );
89 
90  bl1_toggle_uplo( uplo );
91  bl1_toggle_side( side );
92  }
93  }
94 
95  bl1_strmm_blas( side,
96  uplo,
97  trans,
98  diag,
99  m,
100  n,
101  alpha,
102  a, lda,
103  b, ldb );
104 
105  // Free any temporary contiguous matrices, copying the result back to
106  // the original matrix.
107  bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
108  &a, &a_rs, &a_cs );
109 
110  bl1_sfree_saved_contigm( m_save,
111  n_save,
112  b_save, b_rs_save, b_cs_save,
113  &b, &b_rs, &b_cs );
114 }
void bl1_sfree_saved_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:13
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_screate_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:13
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_strmm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb)
Definition: bl1_trmm.c:520
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:13
void bl1_screate_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:13

◆ bl1_strmm_blas()

void bl1_strmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_strmm(), CblasColMajor, and F77_strmm().

Referenced by bl1_strmm().

521 {
522 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
523  enum CBLAS_ORDER cblas_order = CblasColMajor;
524  enum CBLAS_SIDE cblas_side;
525  enum CBLAS_UPLO cblas_uplo;
526  enum CBLAS_TRANSPOSE cblas_trans;
527  enum CBLAS_DIAG cblas_diag;
528 
529  bl1_param_map_to_netlib_side( side, &cblas_side );
530  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
531  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
532  bl1_param_map_to_netlib_diag( diag, &cblas_diag );
533 
534  cblas_strmm( cblas_order,
535  cblas_side,
536  cblas_uplo,
537  cblas_trans,
538  cblas_diag,
539  m,
540  n,
541  *alpha,
542  a, lda,
543  b, ldb );
544 #else
545  char blas_side;
546  char blas_uplo;
547  char blas_trans;
548  char blas_diag;
549 
550  bl1_param_map_to_netlib_side( side, &blas_side );
551  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
552  bl1_param_map_to_netlib_trans( trans, &blas_trans );
553  bl1_param_map_to_netlib_diag( diag, &blas_diag );
554 
555  F77_strmm( &blas_side,
556  &blas_uplo,
557  &blas_trans,
558  &blas_diag,
559  &m,
560  &n,
561  alpha,
562  a, &lda,
563  b, &ldb );
564 #endif
565 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_DIAG
Definition: blis_prototypes_cblas.h:20
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb)
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void F77_strmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *a, int *lda, float *b, int *ldb)
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void bl1_param_map_to_netlib_diag(diag1_t blis_diag, void *blas_diag)
Definition: bl1_param_map.c:95

◆ bl1_strmmsx()

void bl1_strmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_strmm(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

14 {
15  int m_save = m;
16  int n_save = n;
17  float* a_save = a;
18  float* b_save = b;
19  float* c_save = c;
20  int a_rs_save = a_rs;
21  int a_cs_save = a_cs;
22  int b_rs_save = b_rs;
23  int b_cs_save = b_cs;
24  int c_rs_save = c_rs;
25  int c_cs_save = c_cs;
26  float one = bl1_s1();
27  float* b_copy;
28  int dim_a;
29  int b_copy_rs, b_copy_cs;
30 
31  // Return early if possible.
32  if ( bl1_zero_dim2( m, n ) ) return;
33 
34  // If necessary, allocate, initialize, and use a temporary contiguous
35  // copy of each matrix rather than the original matrices.
36  bl1_set_dim_with_side( side, m, n, &dim_a );
38  dim_a,
39  dim_a,
40  a_save, a_rs_save, a_cs_save,
41  &a, &a_rs, &a_cs );
42 
44  n,
45  b_save, b_rs_save, b_cs_save,
46  &b, &b_rs, &b_cs );
47 
49  n,
50  c_save, c_rs_save, c_cs_save,
51  &c, &c_rs, &c_cs );
52 
53  // Create a copy of B to use in the computation so the original matrix is
54  // left untouched.
55  b_copy = bl1_sallocm( m, n );
56 
57  // Match the strides of B_copy to that of B.
58  if ( bl1_is_col_storage( b_rs, b_cs ) )
59  {
60  b_copy_rs = 1;
61  b_copy_cs = m;
62  }
63  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
64  {
65  b_copy_rs = n;
66  b_copy_cs = 1;
67  }
68 
69  // Copy the contents of B to B_copy.
71  m,
72  n,
73  b, b_rs, b_cs,
74  b_copy, b_copy_rs, b_copy_cs );
75 
76  // Perform the operation on B_copy.
77  bl1_strmm( side,
78  uplo,
79  trans,
80  diag,
81  m,
82  n,
83  alpha,
84  a, a_rs, a_cs,
85  b_copy, b_copy_rs, b_copy_cs );
86 
87  // Scale C by beta.
89  m,
90  n,
91  beta,
92  c, c_rs, c_cs );
93 
94  // Add B_copy into C.
96  m,
97  n,
98  &one,
99  b_copy, b_copy_rs, b_copy_cs,
100  c, c_rs, c_cs );
101 
102  // Free the copy of B.
103  bl1_sfree( b_copy );
104 
105  // Free any temporary contiguous matrices, copying the result back to
106  // the original matrix.
107  bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
108  &a, &a_rs, &a_cs );
109 
110  bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
111  &b, &b_rs, &b_cs );
112 
113  bl1_sfree_saved_contigm( m_save,
114  n_save,
115  c_save, c_rs_save, c_cs_save,
116  &c, &c_rs, &c_cs );
117 }
void bl1_sfree_saved_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:13
void bl1_strmm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_trmm.c:13
float * bl1_sallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:30
float bl1_s1(void)
Definition: bl1_constants.c:47
Definition: blis_type_defs.h:81
void bl1_sfree(float *p)
Definition: bl1_free.c:30
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_screate_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:13
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:13
void bl1_sscalm(conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:13
void bl1_saxpymt(trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:13
Definition: blis_type_defs.h:54
void bl1_scopymt(trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:81
void bl1_screate_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:13

◆ bl1_strsm()

void bl1_strsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_is_col_storage(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_strsm_blas(), and bl1_zero_dim2().

Referenced by bl1_strsmsx(), FLA_LU_nopiv_ops_var1(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_ops_var3(), FLA_LU_piv_ops_var3(), and FLA_Trsm_external().

14 {
15  int m_save = m;
16  int n_save = n;
17  float* a_save = a;
18  float* b_save = b;
19  int a_rs_save = a_rs;
20  int a_cs_save = a_cs;
21  int b_rs_save = b_rs;
22  int b_cs_save = b_cs;
23  int dim_a;
24  int lda, inca;
25  int ldb, incb;
26 
27  // Return early if possible.
28  if ( bl1_zero_dim2( m, n ) ) return;
29 
30  // If necessary, allocate, initialize, and use a temporary contiguous
31  // copy of each matrix rather than the original matrices.
32  bl1_set_dim_with_side( side, m, n, &dim_a );
34  dim_a,
35  dim_a,
36  a_save, a_rs_save, a_cs_save,
37  &a, &a_rs, &a_cs );
38 
40  n,
41  b_save, b_rs_save, b_cs_save,
42  &b, &b_rs, &b_cs );
43 
44  // Initialize with values assuming column-major storage.
45  lda = a_cs;
46  inca = a_rs;
47  ldb = b_cs;
48  incb = b_rs;
49 
50  // Adjust the parameters based on the storage of each matrix.
51  if ( bl1_is_col_storage( b_rs, b_cs ) )
52  {
53  if ( bl1_is_col_storage( a_rs, a_cs ) )
54  {
55  // requested operation: B_c := tr( uplo( A_c ) ) * B_c
56  // effective operation: B_c := tr( uplo( A_c ) ) * B_c
57  }
58  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
59  {
60  // requested operation: B_c := tr( uplo( A_r ) ) * B_c
61  // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
62  bl1_swap_ints( lda, inca );
63 
64  bl1_toggle_uplo( uplo );
65  bl1_toggle_trans( trans );
66  }
67  }
68  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
69  {
70  if ( bl1_is_col_storage( a_rs, a_cs ) )
71  {
72  // requested operation: B_r := tr( uplo( A_c ) ) * B_r
73  // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
74  bl1_swap_ints( ldb, incb );
75 
76  bl1_swap_ints( m, n );
77 
78  bl1_toggle_side( side );
79  bl1_toggle_trans( trans );
80  }
81  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
82  {
83  // requested operation: B_r := tr( uplo( A_r ) ) * B_r
84  // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
85  bl1_swap_ints( ldb, incb );
86  bl1_swap_ints( lda, inca );
87 
88  bl1_swap_ints( m, n );
89 
90  bl1_toggle_uplo( uplo );
91  bl1_toggle_side( side );
92  }
93  }
94 
95  bl1_strsm_blas( side,
96  uplo,
97  trans,
98  diag,
99  m,
100  n,
101  alpha,
102  a, lda,
103  b, ldb );
104 
105  // Free any temporary contiguous matrices, copying the result back to
106  // the original matrix.
107  bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
108  &a, &a_rs, &a_cs );
109 
110  bl1_sfree_saved_contigm( m_save,
111  n_save,
112  b_save, b_rs_save, b_cs_save,
113  &b, &b_rs, &b_cs );
114 }
void bl1_sfree_saved_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:13
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_screate_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:13
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:13
void bl1_strsm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb)
Definition: bl1_trsm.c:520
void bl1_screate_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:13

◆ bl1_strsm_blas()

void bl1_strsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_strsm(), CblasColMajor, and F77_strsm().

Referenced by bl1_strsm().

521 {
522 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
523  enum CBLAS_ORDER cblas_order = CblasColMajor;
524  enum CBLAS_SIDE cblas_side;
525  enum CBLAS_UPLO cblas_uplo;
526  enum CBLAS_TRANSPOSE cblas_trans;
527  enum CBLAS_DIAG cblas_diag;
528 
529  bl1_param_map_to_netlib_side( side, &cblas_side );
530  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
531  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
532  bl1_param_map_to_netlib_diag( diag, &cblas_diag );
533 
534  cblas_strsm( cblas_order,
535  cblas_side,
536  cblas_uplo,
537  cblas_trans,
538  cblas_diag,
539  m,
540  n,
541  *alpha,
542  a, lda,
543  b, ldb );
544 #else
545  char blas_side;
546  char blas_uplo;
547  char blas_trans;
548  char blas_diag;
549 
550  bl1_param_map_to_netlib_side( side, &blas_side );
551  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
552  bl1_param_map_to_netlib_trans( trans, &blas_trans );
553  bl1_param_map_to_netlib_diag( diag, &blas_diag );
554 
555  F77_strsm( &blas_side,
556  &blas_uplo,
557  &blas_trans,
558  &blas_diag,
559  &m,
560  &n,
561  alpha,
562  a, &lda,
563  b, &ldb );
564 #endif
565 }
void F77_strsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *a, int *lda, float *b, int *ldb)
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_DIAG
Definition: blis_prototypes_cblas.h:20
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void bl1_param_map_to_netlib_diag(diag1_t blis_diag, void *blas_diag)
Definition: bl1_param_map.c:95
void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb)

◆ bl1_strsmsx()

void bl1_strsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_strsm(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

14 {
15  int m_save = m;
16  int n_save = n;
17  float* a_save = a;
18  float* b_save = b;
19  float* c_save = c;
20  int a_rs_save = a_rs;
21  int a_cs_save = a_cs;
22  int b_rs_save = b_rs;
23  int b_cs_save = b_cs;
24  int c_rs_save = c_rs;
25  int c_cs_save = c_cs;
26  float one = bl1_s1();
27  float* b_copy;
28  int dim_a;
29  int b_copy_rs, b_copy_cs;
30 
31  // Return early if possible.
32  if ( bl1_zero_dim2( m, n ) ) return;
33 
34  // If necessary, allocate, initialize, and use a temporary contiguous
35  // copy of each matrix rather than the original matrices.
36  bl1_set_dim_with_side( side, m, n, &dim_a );
38  dim_a,
39  dim_a,
40  a_save, a_rs_save, a_cs_save,
41  &a, &a_rs, &a_cs );
42 
44  n,
45  b_save, b_rs_save, b_cs_save,
46  &b, &b_rs, &b_cs );
47 
49  n,
50  c_save, c_rs_save, c_cs_save,
51  &c, &c_rs, &c_cs );
52 
53  // Create a copy of B to use in the computation so the original matrix is
54  // left untouched.
55  b_copy = bl1_sallocm( m, n );
56 
57  // Match the strides of B_copy to that of B.
58  if ( bl1_is_col_storage( b_rs, b_cs ) )
59  {
60  b_copy_rs = 1;
61  b_copy_cs = m;
62  }
63  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
64  {
65  b_copy_rs = n;
66  b_copy_cs = 1;
67  }
68 
69  // Copy the contents of B to B_copy.
71  m,
72  n,
73  b, b_rs, b_cs,
74  b_copy, b_copy_rs, b_copy_cs );
75 
76  // Perform the operation on B_copy.
77  bl1_strsm( side,
78  uplo,
79  trans,
80  diag,
81  m,
82  n,
83  alpha,
84  a, a_rs, a_cs,
85  b_copy, b_copy_rs, b_copy_cs );
86 
87  // Scale C by beta.
89  m,
90  n,
91  beta,
92  c, c_rs, c_cs );
93 
94  // Add B_copy into C.
96  m,
97  n,
98  &one,
99  b_copy, b_copy_rs, b_copy_cs,
100  c, c_rs, c_cs );
101 
102  // Free the copy of B.
103  bl1_sfree( b_copy );
104 
105  // Free any temporary contiguous matrices, copying the result back to
106  // the original matrix.
107  bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
108  &a, &a_rs, &a_cs );
109 
110  bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
111  &b, &b_rs, &b_cs );
112 
113  bl1_sfree_saved_contigm( m_save,
114  n_save,
115  c_save, c_rs_save, c_cs_save,
116  &c, &c_rs, &c_cs );
117 }
void bl1_sfree_saved_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:13
float * bl1_sallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:30
float bl1_s1(void)
Definition: bl1_constants.c:47
Definition: blis_type_defs.h:81
void bl1_sfree(float *p)
Definition: bl1_free.c:30
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_screate_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:13
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:13
void bl1_sscalm(conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:13
void bl1_strsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:13
void bl1_saxpymt(trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:13
Definition: blis_type_defs.h:54
void bl1_scopymt(trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:81
void bl1_screate_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:13

◆ bl1_zgemm()

void bl1_zgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zconjm(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmt(), bl1_zero_dim3(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zgemm_blas(), bl1_zscalm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Gemm_external().

874 {
875  int m_save = m;
876  int n_save = n;
877  dcomplex* a_save = a;
878  dcomplex* b_save = b;
879  dcomplex* c_save = c;
880  int a_rs_save = a_rs;
881  int a_cs_save = a_cs;
882  int b_rs_save = b_rs;
883  int b_cs_save = b_cs;
884  int c_rs_save = c_rs;
885  int c_cs_save = c_cs;
886  dcomplex zero = bl1_z0();
887  dcomplex one = bl1_z1();
888  dcomplex* a_unswap;
889  dcomplex* b_unswap;
890  dcomplex* a_conj;
891  dcomplex* b_conj;
892  dcomplex* c_trans;
893  int lda, inca;
894  int ldb, incb;
895  int ldc, incc;
896  int lda_conj, inca_conj;
897  int ldb_conj, incb_conj;
898  int ldc_trans, incc_trans;
899  int m_gemm, n_gemm;
900  int gemm_needs_axpyt = FALSE;
901  int a_was_copied;
902  int b_was_copied;
903 
904  // Return early if possible.
905  if ( bl1_zero_dim3( m, k, n ) )
906  {
908  m,
909  n,
910  beta,
911  c, c_rs, c_cs );
912  return;
913  }
914 
915  // If necessary, allocate, initialize, and use a temporary contiguous
916  // copy of each matrix rather than the original matrices.
917  bl1_zcreate_contigmt( transa,
918  m,
919  k,
920  a_save, a_rs_save, a_cs_save,
921  &a, &a_rs, &a_cs );
922 
923  bl1_zcreate_contigmt( transb,
924  k,
925  n,
926  b_save, b_rs_save, b_cs_save,
927  &b, &b_rs, &b_cs );
928 
930  n,
931  c_save, c_rs_save, c_cs_save,
932  &c, &c_rs, &c_cs );
933 
934  // Figure out whether A and/or B was copied to contiguous memory. This
935  // is used later to prevent redundant copying.
936  a_was_copied = ( a != a_save );
937  b_was_copied = ( b != b_save );
938 
939  // These are used to track the original values of a and b prior to any
940  // operand swapping that might take place. This is necessary for proper
941  // freeing of memory when one is a temporary contiguous matrix.
942  a_unswap = a;
943  b_unswap = b;
944 
945  // These are used to track the dimensions of the product of the
946  // A and B operands to the BLAS invocation of gemm. These differ
947  // from m and n when the operands need to be swapped.
948  m_gemm = m;
949  n_gemm = n;
950 
951  // Initialize with values assuming column-major storage.
952  lda = a_cs;
953  inca = a_rs;
954  ldb = b_cs;
955  incb = b_rs;
956  ldc = c_cs;
957  incc = c_rs;
958 
959  // Adjust the parameters based on the storage of each matrix.
960  if ( bl1_is_col_storage( c_rs, c_cs ) )
961  {
962  if ( bl1_is_col_storage( a_rs, a_cs ) )
963  {
964  if ( bl1_is_col_storage( b_rs, b_cs ) )
965  {
966  // requested operation: C_c += tr( A_c ) * tr( B_c )
967  // effective operation: C_c += tr( A_c ) * tr( B_c )
968  }
969  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
970  {
971 
972  // requested operation: C_c += tr( A_c ) * tr( B_r )
973  // effective operation: C_c += tr( A_c ) * tr( B_c )^T
974  bl1_swap_ints( ldb, incb );
975 
976  bl1_toggle_trans( transb );
977  }
978  }
979  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
980  {
981  if ( bl1_is_col_storage( b_rs, b_cs ) )
982  {
983  // requested operation: C_c += tr( A_r ) * tr( B_c )
984  // effective operation: C_c += tr( A_r )^T * tr( B_c )
985  bl1_swap_ints( lda, inca );
986 
987  bl1_toggle_trans( transa );
988  }
989  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
990  {
991  // requested operation: C_c += tr( A_r ) * tr( B_r )
992  // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
993  bl1_swap_ints( lda, inca );
994  bl1_swap_ints( ldb, incb );
995 
996  bl1_zswap_pointers( a, b );
997  bl1_swap_ints( a_was_copied, b_was_copied );
998  bl1_swap_ints( lda, ldb );
999  bl1_swap_ints( inca, incb );
1000  bl1_swap_trans( transa, transb );
1001 
1002  gemm_needs_axpyt = TRUE;
1003  bl1_swap_ints( m_gemm, n_gemm );
1004  }
1005  }
1006  }
1007  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
1008  {
1009  if ( bl1_is_col_storage( a_rs, a_cs ) )
1010  {
1011  if ( bl1_is_col_storage( b_rs, b_cs ) )
1012  {
1013  // requested operation: C_r += tr( A_c ) * tr( B_c )
1014  // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
1015  bl1_swap_ints( ldc, incc );
1016 
1017  bl1_swap_ints( m, n );
1018 
1019  gemm_needs_axpyt = TRUE;
1020  }
1021  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1022  {
1023  // requested operation: C_r += tr( A_c ) * tr( B_r )
1024  // effective operation: C_c += tr( B_c ) * tr( A_c )^T
1025  bl1_swap_ints( ldc, incc );
1026  bl1_swap_ints( ldb, incb );
1027 
1028  bl1_toggle_trans( transa );
1029 
1030  bl1_swap_ints( m, n );
1031  bl1_swap_ints( m_gemm, n_gemm );
1032  bl1_zswap_pointers( a, b );
1033  bl1_swap_ints( a_was_copied, b_was_copied );
1034  bl1_swap_ints( lda, ldb );
1035  bl1_swap_ints( inca, incb );
1036  bl1_swap_trans( transa, transb );
1037  }
1038  }
1039  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
1040  {
1041  if ( bl1_is_col_storage( b_rs, b_cs ) )
1042  {
1043  // requested operation: C_r += tr( A_r ) * tr( B_c )
1044  // effective operation: C_c += tr( B_c )^T * tr( A_c )
1045  bl1_swap_ints( ldc, incc );
1046  bl1_swap_ints( lda, inca );
1047 
1048  bl1_toggle_trans( transb );
1049 
1050  bl1_swap_ints( m, n );
1051  bl1_swap_ints( m_gemm, n_gemm );
1052  bl1_zswap_pointers( a, b );
1053  bl1_swap_ints( a_was_copied, b_was_copied );
1054  bl1_swap_ints( lda, ldb );
1055  bl1_swap_ints( inca, incb );
1056  bl1_swap_trans( transa, transb );
1057  }
1058  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1059  {
1060  // requested operation: C_r += tr( A_r ) * tr( B_r )
1061  // effective operation: C_c += tr( B_c ) * tr( A_c )
1062  bl1_swap_ints( lda, inca );
1063  bl1_swap_ints( ldb, incb );
1064  bl1_swap_ints( ldc, incc );
1065 
1066  bl1_swap_ints( m, n );
1067  bl1_swap_ints( m_gemm, n_gemm );
1068  bl1_zswap_pointers( a, b );
1069  bl1_swap_ints( a_was_copied, b_was_copied );
1070  bl1_swap_ints( lda, ldb );
1071  bl1_swap_ints( inca, incb );
1072  bl1_swap_trans( transa, transb );
1073  }
1074  }
1075  }
1076 
1077  // We need a temporary matrix for the case where A is conjugated.
1078  a_conj = a;
1079  lda_conj = lda;
1080  inca_conj = inca;
1081 
1082  // If transa indicates conjugate-no-transpose and A was not already
1083  // copied, then copy and conjugate it to a temporary matrix. Otherwise,
1084  // if transa indicates conjugate-no-transpose and A was already copied,
1085  // just conjugate it.
1086  if ( bl1_is_conjnotrans( transa ) && !a_was_copied )
1087  {
1088  a_conj = bl1_zallocm( m_gemm, k );
1089  lda_conj = m_gemm;
1090  inca_conj = 1;
1091 
1093  m_gemm,
1094  k,
1095  a, inca, lda,
1096  a_conj, inca_conj, lda_conj );
1097  }
1098  else if ( bl1_is_conjnotrans( transa ) && a_was_copied )
1099  {
1100  bl1_zconjm( m_gemm,
1101  k,
1102  a_conj, inca_conj, lda_conj );
1103  }
1104 
1105  // We need a temporary matrix for the case where B is conjugated.
1106  b_conj = b;
1107  ldb_conj = ldb;
1108  incb_conj = incb;
1109 
1110  // If transb indicates conjugate-no-transpose and B was not already
1111  // copied, then copy and conjugate it to a temporary matrix. Otherwise,
1112  // if transb indicates conjugate-no-transpose and B was already copied,
1113  // just conjugate it.
1114  if ( bl1_is_conjnotrans( transb ) && !b_was_copied )
1115  {
1116  b_conj = bl1_zallocm( k, n_gemm );
1117  ldb_conj = k;
1118  incb_conj = 1;
1119 
1121  k,
1122  n_gemm,
1123  b, incb, ldb,
1124  b_conj, incb_conj, ldb_conj );
1125  }
1126  else if ( bl1_is_conjnotrans( transb ) && b_was_copied )
1127  {
1128  bl1_zconjm( k,
1129  n_gemm,
1130  b_conj, incb_conj, ldb_conj );
1131  }
1132 
1133  // There are two cases where we need to perform the gemm and then axpy
1134  // the result into C with a transposition. We handle those cases here.
1135  if ( gemm_needs_axpyt )
1136  {
1137  // We need a temporary matrix for holding C^T. Notice that m and n
1138  // represent the dimensions of C, while m_gemm and n_gemm are the
1139  // dimensions of the actual product op(A)*op(B), which may be n-by-m
1140  // since the operands may have been swapped.
1141  c_trans = bl1_zallocm( m_gemm, n_gemm );
1142  ldc_trans = m_gemm;
1143  incc_trans = 1;
1144 
1145  // Compute tr( A ) * tr( B ), where A and B may have been swapped
1146  // to reference the other, and store the result in C_trans.
1147  bl1_zgemm_blas( transa,
1148  transb,
1149  m_gemm,
1150  n_gemm,
1151  k,
1152  alpha,
1153  a_conj, lda_conj,
1154  b_conj, ldb_conj,
1155  &zero,
1156  c_trans, ldc_trans );
1157 
1158  // Scale C by beta.
1160  m,
1161  n,
1162  beta,
1163  c, incc, ldc );
1164 
1165  // And finally, accumulate the matrix product in C_trans into C
1166  // with a transpose.
1168  m,
1169  n,
1170  &one,
1171  c_trans, incc_trans, ldc_trans,
1172  c, incc, ldc );
1173 
1174  // Free the temporary matrix for C.
1175  bl1_zfree( c_trans );
1176  }
1177  else // no extra axpyt step needed
1178  {
1179  bl1_zgemm_blas( transa,
1180  transb,
1181  m_gemm,
1182  n_gemm,
1183  k,
1184  alpha,
1185  a_conj, lda_conj,
1186  b_conj, ldb_conj,
1187  beta,
1188  c, ldc );
1189  }
1190 
1191  if ( bl1_is_conjnotrans( transa ) && !a_was_copied )
1192  bl1_zfree( a_conj );
1193 
1194  if ( bl1_is_conjnotrans( transb ) && !b_was_copied )
1195  bl1_zfree( b_conj );
1196 
1197  // Free any temporary contiguous matrices, copying the result back to
1198  // the original matrix.
1199  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
1200  &a_unswap, &a_rs, &a_cs );
1201 
1202  bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
1203  &b_unswap, &b_rs, &b_cs );
1204 
1205  bl1_zfree_saved_contigm( m_save,
1206  n_save,
1207  c_save, c_rs_save, c_cs_save,
1208  &c, &c_rs, &c_cs );
1209 }
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:115
void bl1_zcreate_contigmt(trans1_t trans_dims, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:127
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
Definition: blis_type_defs.h:81
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zscalm(conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:273
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
int bl1_is_conjnotrans(trans1_t trans)
Definition: bl1_is.c:25
Definition: blis_type_defs.h:55
int bl1_zero_dim3(int m, int k, int n)
Definition: bl1_is.c:123
void bl1_zaxpymt(trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:248
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_zfree_saved_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:82
Definition: blis_type_defs.h:56
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
void bl1_zconjm(int m, int n, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_conjm.c:72
void bl1_zgemm_blas(trans1_t transa, trans1_t transb, int m, int n, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
Definition: bl1_gemm.c:1336
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ bl1_zgemm_blas()

void bl1_zgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), cblas_zgemm(), CblasColMajor, and F77_zgemm().

Referenced by bl1_zgemm().

1337 {
1338 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1339  enum CBLAS_ORDER cblas_order = CblasColMajor;
1340  enum CBLAS_TRANSPOSE cblas_transa;
1341  enum CBLAS_TRANSPOSE cblas_transb;
1342 
1343  bl1_param_map_to_netlib_trans( transa, &cblas_transa );
1344  bl1_param_map_to_netlib_trans( transb, &cblas_transb );
1345 
1346  cblas_zgemm( cblas_order,
1347  cblas_transa,
1348  cblas_transb,
1349  m,
1350  n,
1351  k,
1352  alpha,
1353  a, lda,
1354  b, ldb,
1355  beta,
1356  c, ldc );
1357 #else
1358  char blas_transa;
1359  char blas_transb;
1360 
1361  bl1_param_map_to_netlib_trans( transa, &blas_transa );
1362  bl1_param_map_to_netlib_trans( transb, &blas_transb );
1363 
1364  F77_zgemm( &blas_transa,
1365  &blas_transb,
1366  &m,
1367  &n,
1368  &k,
1369  alpha,
1370  a, &lda,
1371  b, &ldb,
1372  beta,
1373  c, &ldc );
1374 #endif
1375 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void F77_zgemm(char *transa, char *transb, int *m, int *n, int *k, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta, dcomplex *c, int *ldc)
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_prototypes_cblas.h:17
void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)

◆ bl1_zhemm()

void bl1_zhemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zconjmr(), bl1_zcopymrt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zhemm_blas(), bl1_zscalm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Hemm_external().

349 {
350  int m_save = m;
351  int n_save = n;
352  dcomplex* a_save = a;
353  dcomplex* b_save = b;
354  dcomplex* c_save = c;
355  int a_rs_save = a_rs;
356  int a_cs_save = a_cs;
357  int b_rs_save = b_rs;
358  int b_cs_save = b_cs;
359  int c_rs_save = c_rs;
360  int c_cs_save = c_cs;
361  dcomplex zero = bl1_z0();
362  dcomplex one = bl1_z1();
363  dcomplex* a_conj;
364  dcomplex* b_copy;
365  dcomplex* c_trans;
366  int dim_a;
367  int lda, inca;
368  int ldb, incb;
369  int ldc, incc;
370  int lda_conj, inca_conj;
371  int ldb_copy, incb_copy;
372  int ldc_trans, incc_trans;
373  int hemm_needs_conja = FALSE;
374  int hemm_needs_copyb = FALSE;
375  int hemm_needs_transb = FALSE;
376  int hemm_needs_axpyt = FALSE;
377  int a_was_copied;
378 
379  // Return early if possible.
380  if ( bl1_zero_dim2( m, n ) ) return;
381 
382  // If necessary, allocate, initialize, and use a temporary contiguous
383  // copy of each matrix rather than the original matrices.
384  bl1_set_dim_with_side( side, m, n, &dim_a );
385  bl1_zcreate_contigmr( uplo,
386  dim_a,
387  dim_a,
388  a_save, a_rs_save, a_cs_save,
389  &a, &a_rs, &a_cs );
390 
392  n,
393  b_save, b_rs_save, b_cs_save,
394  &b, &b_rs, &b_cs );
395 
397  n,
398  c_save, c_rs_save, c_cs_save,
399  &c, &c_rs, &c_cs );
400 
401  // Figure out whether A was copied to contiguous memory. This is used to
402  // prevent redundant copying.
403  a_was_copied = ( a != a_save );
404 
405  // Initialize with values assuming column-major storage.
406  lda = a_cs;
407  inca = a_rs;
408  ldb = b_cs;
409  incb = b_rs;
410  ldc = c_cs;
411  incc = c_rs;
412 
413  // Adjust the parameters based on the storage of each matrix.
414  if ( bl1_is_col_storage( c_rs, c_cs ) )
415  {
416  if ( bl1_is_col_storage( a_rs, a_cs ) )
417  {
418  if ( bl1_is_col_storage( b_rs, b_cs ) )
419  {
420  // requested operation: C_c += uplo( A_c ) * B_c
421  // effective operation: C_c += uplo( A_c ) * B_c
422  }
423  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
424  {
425  // requested operation: C_c += uplo( A_c ) * B_r
426  // effective operation: C_c += uplo( A_c ) * B_c
427  hemm_needs_copyb = TRUE;
428  }
429  }
430  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
431  {
432  if ( bl1_is_col_storage( b_rs, b_cs ) )
433  {
434  // requested operation: C_c += uplo( A_r ) * B_c
435  // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
436  bl1_swap_ints( lda, inca );
437 
438  bl1_toggle_uplo( uplo );
439 
440  hemm_needs_conja = TRUE;
441  }
442  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
443  {
444  // requested operation: C_c += uplo( A_r ) * B_r
445  // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
446  bl1_swap_ints( lda, inca );
447  bl1_swap_ints( ldb, incb );
448 
449  bl1_toggle_side( side );
450  bl1_toggle_uplo( uplo );
451 
452  hemm_needs_axpyt = TRUE;
453  }
454  }
455  }
456  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
457  {
458  if ( bl1_is_col_storage( a_rs, a_cs ) )
459  {
460  if ( bl1_is_col_storage( b_rs, b_cs ) )
461  {
462  // requested operation: C_r += uplo( A_c ) * B_c
463  // effective operation: C_c += ( uplo( A_c ) * B_c )^T
464  bl1_swap_ints( ldc, incc );
465 
466  bl1_swap_ints( m, n );
467 
468  hemm_needs_axpyt = TRUE;
469  }
470  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
471  {
472  // requested operation: C_r += uplo( A_c ) * B_r
473  // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
474  bl1_swap_ints( ldc, incc );
475  bl1_swap_ints( ldb, incb );
476 
477  bl1_swap_ints( m, n );
478 
479  bl1_toggle_side( side );
480 
481  hemm_needs_conja = TRUE;
482  }
483  }
484  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
485  {
486  if ( bl1_is_col_storage( b_rs, b_cs ) )
487  {
488  // requested operation: C_r += uplo( A_r ) * B_c
489  // effective operation: C_c += B_c^T * ~uplo( A_c )
490  bl1_swap_ints( ldc, incc );
491  bl1_swap_ints( lda, inca );
492 
493  bl1_swap_ints( m, n );
494 
495  bl1_toggle_side( side );
496  bl1_toggle_uplo( uplo );
497 
498  hemm_needs_copyb = TRUE;
499  hemm_needs_transb = TRUE;
500  }
501  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
502  {
503  // requested operation: C_r += uplo( A_r ) * B_r
504  // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
505  bl1_swap_ints( ldc, incc );
506  bl1_swap_ints( lda, inca );
507  bl1_swap_ints( ldb, incb );
508 
509  bl1_swap_ints( m, n );
510 
511  bl1_toggle_uplo( uplo );
512  bl1_toggle_side( side );
513  }
514  }
515  }
516 
517  // We need a temporary matrix for the cases where A is conjugated.
518  a_conj = a;
519  lda_conj = lda;
520  inca_conj = inca;
521 
522  if ( hemm_needs_conja && !a_was_copied )
523  {
524  int dim_a;
525 
526  bl1_set_dim_with_side( side, m, n, &dim_a );
527 
528  a_conj = bl1_zallocm( dim_a, dim_a );
529  lda_conj = dim_a;
530  inca_conj = 1;
531 
532  bl1_zcopymrt( uplo,
534  dim_a,
535  dim_a,
536  a, inca, lda,
537  a_conj, inca_conj, lda_conj );
538  }
539  else if ( hemm_needs_conja && a_was_copied )
540  {
541  int dim_a;
542 
543  bl1_set_dim_with_side( side, m, n, &dim_a );
544 
545  bl1_zconjmr( uplo,
546  dim_a,
547  dim_a,
548  a_conj, inca_conj, lda_conj );
549  }
550 
551  // We need a temporary matrix for the cases where B needs to be copied.
552  b_copy = b;
553  ldb_copy = ldb;
554  incb_copy = incb;
555 
556  // There are two cases where we need to make a copy of B: one where the
557  // copy's dimensions are transposed from the original B, and one where
558  // the dimensions are not swapped.
559  if ( hemm_needs_copyb )
560  {
561  trans1_t transb;
562 
563  // Set transb, which determines whether or not we need to copy from B
564  // as if it needs a transposition. If a transposition is needed, then
565  // m and n and have already been swapped. So in either case m
566  // represents the leading dimension of the copy.
567  if ( hemm_needs_transb ) transb = BLIS1_TRANSPOSE;
568  else transb = BLIS1_NO_TRANSPOSE;
569 
570  b_copy = bl1_zallocm( m, n );
571  ldb_copy = m;
572  incb_copy = 1;
573 
574  bl1_zcopymt( transb,
575  m,
576  n,
577  b, incb, ldb,
578  b_copy, incb_copy, ldb_copy );
579  }
580 
581  // There are two cases where we need to perform the hemm and then axpy
582  // the result into C with a transposition. We handle those cases here.
583  if ( hemm_needs_axpyt )
584  {
585  // We need a temporary matrix for holding C^T. Notice that m and n
586  // represent the dimensions of C, and thus C_trans is n-by-m
587  // (interpreting both as column-major matrices). So the leading
588  // dimension of the temporary matrix holding C^T is n.
589  c_trans = bl1_zallocm( n, m );
590  ldc_trans = n;
591  incc_trans = 1;
592 
593  // Compute A * B (or B * A) and store the result in C_trans.
594  // Note that there is no overlap between the axpyt cases and
595  // the conja/copyb cases, hence the use of a, b, lda, and ldb.
596  bl1_zhemm_blas( side,
597  uplo,
598  n,
599  m,
600  alpha,
601  a, lda,
602  b, ldb,
603  &zero,
604  c_trans, ldc_trans );
605 
606  // Scale C by beta.
608  m,
609  n,
610  beta,
611  c, incc, ldc );
612 
613  // And finally, accumulate the matrix product in C_trans into C
614  // with a transpose.
616  m,
617  n,
618  &one,
619  c_trans, incc_trans, ldc_trans,
620  c, incc, ldc );
621 
622  // Free the temporary matrix for C.
623  bl1_zfree( c_trans );
624  }
625  else // no extra axpyt step needed
626  {
627  bl1_zhemm_blas( side,
628  uplo,
629  m,
630  n,
631  alpha,
632  a_conj, lda_conj,
633  b_copy, ldb_copy,
634  beta,
635  c, ldc );
636  }
637 
638  if ( hemm_needs_conja && !a_was_copied )
639  bl1_zfree( a_conj );
640 
641  if ( hemm_needs_copyb )
642  bl1_zfree( b_copy );
643 
644  // Free any temporary contiguous matrices, copying the result back to
645  // the original matrix.
646  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
647  &a, &a_rs, &a_cs );
648 
649  bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
650  &b, &b_rs, &b_cs );
651 
652  bl1_zfree_saved_contigm( m_save,
653  n_save,
654  c_save, c_rs_save, c_cs_save,
655  &c, &c_rs, &c_cs );
656 }
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:115
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zhemm_blas(side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
Definition: bl1_hemm.c:699
void bl1_zscalm(conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:273
trans1_t
Definition: blis_type_defs.h:52
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
Definition: blis_type_defs.h:55
void bl1_zaxpymt(trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:248
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_zfree_saved_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:82
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
void bl1_zcopymrt(uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymrt.c:328
void bl1_zconjmr(uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_conjmr.c:79
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ bl1_zhemm_blas()

void bl1_zhemm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_zhemm(), CblasColMajor, and F77_zhemm().

Referenced by bl1_zhemm().

700 {
701 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
702  enum CBLAS_ORDER cblas_order = CblasColMajor;
703  enum CBLAS_SIDE cblas_side;
704  enum CBLAS_UPLO cblas_uplo;
705 
706  bl1_param_map_to_netlib_side( side, &cblas_side );
707  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
708 
709  cblas_zhemm( cblas_order,
710  cblas_side,
711  cblas_uplo,
712  m,
713  n,
714  alpha,
715  a, lda,
716  b, ldb,
717  beta,
718  c, ldc );
719 #else
720  char blas_side;
721  char blas_uplo;
722 
723  bl1_param_map_to_netlib_side( side, &blas_side );
724  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
725 
726  F77_zhemm( &blas_side,
727  &blas_uplo,
728  &m,
729  &n,
730  alpha,
731  a, &lda,
732  b, &ldb,
733  beta,
734  c, &ldc );
735 #endif
736 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void F77_zhemm(char *side, char *uplo, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta, dcomplex *c, int *ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)

◆ bl1_zher2k()

void bl1_zher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
double *  beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcopymt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zher2k_blas(), BLIS1_CONJ_NO_TRANSPOSE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Her2k_external().

335 {
336  uplo1_t uplo_save = uplo;
337  int m_save = m;
338  dcomplex* a_save = a;
339  dcomplex* b_save = b;
340  dcomplex* c_save = c;
341  int a_rs_save = a_rs;
342  int a_cs_save = a_cs;
343  int b_rs_save = b_rs;
344  int b_cs_save = b_cs;
345  int c_rs_save = c_rs;
346  int c_cs_save = c_cs;
347  double zero_r = bl1_d0();
348  dcomplex one = bl1_z1();
349  dcomplex alpha_copy;
350  dcomplex* a_copy;
351  dcomplex* b_copy;
352  dcomplex* c_conj;
353  int lda, inca;
354  int ldb, incb;
355  int ldc, incc;
356  int lda_copy, inca_copy;
357  int ldb_copy, incb_copy;
358  int ldc_conj, incc_conj;
359  int her2k_needs_copya = FALSE;
360  int her2k_needs_copyb = FALSE;
361  int her2k_needs_conj = FALSE;
362  int her2k_needs_alpha_conj = FALSE;
363 
364  // Return early if possible.
365  if ( bl1_zero_dim2( m, k ) ) return;
366 
367  // If necessary, allocate, initialize, and use a temporary contiguous
368  // copy of each matrix rather than the original matrices.
369  bl1_zcreate_contigmt( trans,
370  m,
371  k,
372  a_save, a_rs_save, a_cs_save,
373  &a, &a_rs, &a_cs );
374 
375  bl1_zcreate_contigmt( trans,
376  m,
377  k,
378  b_save, b_rs_save, b_cs_save,
379  &b, &b_rs, &b_cs );
380 
381  bl1_zcreate_contigmr( uplo,
382  m,
383  m,
384  c_save, c_rs_save, c_cs_save,
385  &c, &c_rs, &c_cs );
386 
387  // Initialize with values assuming column-major storage.
388  lda = a_cs;
389  inca = a_rs;
390  ldb = b_cs;
391  incb = b_rs;
392  ldc = c_cs;
393  incc = c_rs;
394 
395  // Adjust the parameters based on the storage of each matrix.
396  if ( bl1_is_col_storage( c_rs, c_cs ) )
397  {
398  if ( bl1_is_col_storage( a_rs, a_cs ) )
399  {
400  if ( bl1_is_col_storage( b_rs, b_cs ) )
401  {
402  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
403  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
404  }
405  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
406  {
407  // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
408  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
409  her2k_needs_copyb = TRUE;
410  }
411  }
412  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
413  {
414  if ( bl1_is_col_storage( b_rs, b_cs ) )
415  {
416  // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
417  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
418  her2k_needs_copya = TRUE;
419  }
420  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
421  {
422  // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
423  // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
424  bl1_swap_ints( lda, inca );
425  bl1_swap_ints( ldb, incb );
426 
427  bl1_toggle_conjtrans( trans );
428 
429  her2k_needs_conj = TRUE;
430  her2k_needs_alpha_conj = TRUE;
431  }
432  }
433  }
434  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
435  {
436  if ( bl1_is_col_storage( a_rs, a_cs ) )
437  {
438  if ( bl1_is_col_storage( b_rs, b_cs ) )
439  {
440  // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
441  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
442  bl1_swap_ints( ldc, incc );
443 
444  bl1_toggle_uplo( uplo );
445 
446  her2k_needs_conj = TRUE;
447  }
448  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
449  {
450  // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
451  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
452  her2k_needs_copyb = TRUE;
453 
454  bl1_swap_ints( ldc, incc );
455 
456  bl1_toggle_uplo( uplo );
457 
458  her2k_needs_conj = TRUE;
459  }
460  }
461  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
462  {
463  if ( bl1_is_col_storage( b_rs, b_cs ) )
464  {
465  // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
466  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
467  her2k_needs_copya = TRUE;
468 
469  bl1_swap_ints( ldc, incc );
470 
471  bl1_toggle_uplo( uplo );
472 
473  her2k_needs_conj = TRUE;
474  }
475  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
476  {
477  // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
478  // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
479  bl1_swap_ints( ldc, incc );
480  bl1_swap_ints( lda, inca );
481  bl1_swap_ints( ldb, incb );
482 
483  bl1_toggle_uplo( uplo );
484  bl1_toggle_conjtrans( trans );
485 
486  her2k_needs_alpha_conj = TRUE;
487  }
488  }
489  }
490 
491  // Make a copy of alpha and conjugate if necessary.
492  alpha_copy = *alpha;
493  if ( her2k_needs_alpha_conj )
494  {
495  bl1_zconjs( &alpha_copy );
496  }
497 
498  a_copy = a;
499  lda_copy = lda;
500  inca_copy = inca;
501 
502  // There are two cases where we need to copy A column-major storage.
503  // We handle those two cases here.
504  if ( her2k_needs_copya )
505  {
506  int m_a;
507  int n_a;
508 
509  // Determine the dimensions of A according to the value of trans. We
510  // need this in order to set the leading dimension of the copy of A.
511  bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );
512 
513  // We need a temporary matrix to hold a column-major copy of A.
514  a_copy = bl1_zallocm( m, k );
515  lda_copy = m_a;
516  inca_copy = 1;
517 
518  // Copy the contents of A into A_copy.
520  m_a,
521  n_a,
522  a, inca, lda,
523  a_copy, inca_copy, lda_copy );
524  }
525 
526  b_copy = b;
527  ldb_copy = ldb;
528  incb_copy = incb;
529 
530  // There are two cases where we need to copy B column-major storage.
531  // We handle those two cases here.
532  if ( her2k_needs_copyb )
533  {
534  int m_b;
535  int n_b;
536 
537  // Determine the dimensions of B according to the value of trans. We
538  // need this in order to set the leading dimension of the copy of B.
539  bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );
540 
541  // We need a temporary matrix to hold a column-major copy of B.
542  b_copy = bl1_zallocm( m, k );
543  ldb_copy = m_b;
544  incb_copy = 1;
545 
546  // Copy the contents of B into B_copy.
548  m_b,
549  n_b,
550  b, incb, ldb,
551  b_copy, incb_copy, ldb_copy );
552  }
553 
554  // There are two cases where we need to perform the rank-2k product and
555  // then axpy the result into C with a conjugation. We handle those two
556  // cases here.
557  if ( her2k_needs_conj )
558  {
559  // We need a temporary matrix for holding the rank-k product.
560  c_conj = bl1_zallocm( m, m );
561  ldc_conj = m;
562  incc_conj = 1;
563 
564  // Compute the rank-2k product.
565  bl1_zher2k_blas( uplo,
566  trans,
567  m,
568  k,
569  &alpha_copy,
570  a_copy, lda_copy,
571  b_copy, ldb_copy,
572  &zero_r,
573  c_conj, ldc_conj );
574 
575  // Scale C by beta.
576  bl1_zdscalmr( uplo,
577  m,
578  m,
579  beta,
580  c, incc, ldc );
581 
582  // And finally, accumulate the rank-2k product in C_conj into C
583  // with a conjugation.
584  bl1_zaxpymrt( uplo,
586  m,
587  m,
588  &one,
589  c_conj, incc_conj, ldc_conj,
590  c, incc, ldc );
591 
592  // Free the temporary matrix for C.
593  bl1_zfree( c_conj );
594  }
595  else
596  {
597  bl1_zher2k_blas( uplo,
598  trans,
599  m,
600  k,
601  &alpha_copy,
602  a_copy, lda_copy,
603  b_copy, ldb_copy,
604  beta,
605  c, ldc );
606  }
607 
608  if ( her2k_needs_copya )
609  bl1_zfree( a_copy );
610 
611  if ( her2k_needs_copyb )
612  bl1_zfree( b_copy );
613 
614  // Free any temporary contiguous matrices, copying the result back to
615  // the original matrix.
616  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
617  &a, &a_rs, &a_cs );
618 
619  bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
620  &b, &b_rs, &b_cs );
621 
622  bl1_zfree_saved_contigmr( uplo_save,
623  m_save,
624  m_save,
625  c_save, c_rs_save, c_cs_save,
626  &c, &c_rs, &c_cs );
627 }
void bl1_zdscalmr(uplo1_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_scalmr.c:237
void bl1_zher2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, double *beta, dcomplex *c, int ldc)
Definition: bl1_her2k.c:670
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
uplo1_t
Definition: blis_type_defs.h:60
void bl1_zaxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymrt.c:334
void bl1_zcreate_contigmt(trans1_t trans_dims, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:127
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
double bl1_d0(void)
Definition: bl1_constants.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zfree_saved_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:82
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_set_dims_with_trans(trans1_t trans, int m, int n, int *m_new, int *n_new)
Definition: bl1_set_dims.c:13
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ bl1_zher2k_blas()

void bl1_zher2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
double *  beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zher2k(), CblasColMajor, and F77_zher2k().

Referenced by bl1_zher2k().

671 {
672 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
673  enum CBLAS_ORDER cblas_order = CblasColMajor;
674  enum CBLAS_UPLO cblas_uplo;
675  enum CBLAS_TRANSPOSE cblas_trans;
676 
677  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
678  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
679 
680  cblas_zher2k( cblas_order,
681  cblas_uplo,
682  cblas_trans,
683  m,
684  k,
685  alpha,
686  a, lda,
687  b, ldb,
688  *beta,
689  c, ldc );
690 #else
691  char blas_uplo;
692  char blas_trans;
693 
694  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
695  bl1_param_map_to_netlib_trans( trans, &blas_trans );
696 
697  F77_zher2k( &blas_uplo,
698  &blas_trans,
699  &m,
700  &k,
701  alpha,
702  a, &lda,
703  b, &ldb,
704  beta,
705  c, &ldc );
706 #endif
707 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const double beta, void *C, const int ldc)
void F77_zher2k(char *uplo, char *transa, int *n, int *k, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, double *beta, dcomplex *c, int *ldc)

◆ bl1_zherk()

void bl1_zherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs,
double *  beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_is_col_storage(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zherk_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opz_var1().

185 {
186  uplo1_t uplo_save = uplo;
187  int m_save = m;
188  dcomplex* a_save = a;
189  dcomplex* c_save = c;
190  int a_rs_save = a_rs;
191  int a_cs_save = a_cs;
192  int c_rs_save = c_rs;
193  int c_cs_save = c_cs;
194  double zero_r = bl1_d0();
195  dcomplex one = bl1_z1();
196  dcomplex* c_conj;
197  int lda, inca;
198  int ldc, incc;
199  int ldc_conj, incc_conj;
200  int herk_needs_conj = FALSE;
201 
202  // Return early if possible.
203  if ( bl1_zero_dim2( m, k ) ) return;
204 
205  // If necessary, allocate, initialize, and use a temporary contiguous
206  // copy of each matrix rather than the original matrices.
207  bl1_zcreate_contigmt( trans,
208  m,
209  k,
210  a_save, a_rs_save, a_cs_save,
211  &a, &a_rs, &a_cs );
212 
213  bl1_zcreate_contigmr( uplo,
214  m,
215  m,
216  c_save, c_rs_save, c_cs_save,
217  &c, &c_rs, &c_cs );
218 
219  // Initialize with values assuming column-major storage.
220  lda = a_cs;
221  inca = a_rs;
222  ldc = c_cs;
223  incc = c_rs;
224 
225  // Adjust the parameters based on the storage of each matrix.
226  if ( bl1_is_col_storage( c_rs, c_cs ) )
227  {
228  if ( bl1_is_col_storage( a_rs, a_cs ) )
229  {
230  // requested operation: uplo( C_c ) += A_c * A_c'
231  // effective operation: uplo( C_c ) += A_c * A_c'
232  }
233  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
234  {
235  // requested operation: uplo( C_c ) += A_r * A_r'
236  // effective operation: uplo( C_c ) += conj( A_c' * A_c )
237  bl1_swap_ints( lda, inca );
238 
239  bl1_toggle_conjtrans( trans );
240 
241  herk_needs_conj = TRUE;
242  }
243  }
244  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
245  {
246  if ( bl1_is_col_storage( a_rs, a_cs ) )
247  {
248  // requested operation: uplo( C_r ) += A_c * A_c'
249  // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
250  bl1_swap_ints( ldc, incc );
251 
252  bl1_toggle_uplo( uplo );
253 
254  herk_needs_conj = TRUE;
255  }
256  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
257  {
258  // requested operation: uplo( C_r ) += A_r * A_r'
259  // effective operation: ~uplo( C_c ) += A_c' * A_c
260  bl1_swap_ints( ldc, incc );
261  bl1_swap_ints( lda, inca );
262 
263  bl1_toggle_uplo( uplo );
264  bl1_toggle_conjtrans( trans );
265  }
266  }
267 
268  // There are two cases where we need to perform the rank-k product and
269  // then axpy the result into C with a conjugation. We handle those two
270  // cases here.
271  if ( herk_needs_conj )
272  {
273  // We need a temporary matrix for holding the rank-k product.
274  c_conj = bl1_zallocm( m, m );
275  ldc_conj = m;
276  incc_conj = 1;
277 
278  // Compute the rank-k product.
279  bl1_zherk_blas( uplo,
280  trans,
281  m,
282  k,
283  alpha,
284  a, lda,
285  &zero_r,
286  c_conj, ldc_conj );
287 
288  // Scale C by beta.
289  bl1_zdscalmr( uplo,
290  m,
291  m,
292  beta,
293  c, incc, ldc );
294 
295  // And finally, accumulate the rank-k product in C_conj into C
296  // with a conjugation.
297  bl1_zaxpymrt( uplo,
299  m,
300  m,
301  &one,
302  c_conj, incc_conj, ldc_conj,
303  c, incc, ldc );
304 
305  // Free the temporary matrix for C.
306  bl1_zfree( c_conj );
307  }
308  else
309  {
310  bl1_zherk_blas( uplo,
311  trans,
312  m,
313  k,
314  alpha,
315  a, lda,
316  beta,
317  c, ldc );
318  }
319 
320  // Free any temporary contiguous matrices, copying the result back to
321  // the original matrix.
322  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
323  &a, &a_rs, &a_cs );
324 
325  bl1_zfree_saved_contigmr( uplo_save,
326  m_save,
327  m_save,
328  c_save, c_rs_save, c_cs_save,
329  &c, &c_rs, &c_cs );
330 }
void bl1_zdscalmr(uplo1_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_scalmr.c:237
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
uplo1_t
Definition: blis_type_defs.h:60
void bl1_zaxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymrt.c:334
void bl1_zcreate_contigmt(trans1_t trans_dims, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:127
void bl1_zherk_blas(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc)
Definition: bl1_herk.c:371
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
double bl1_d0(void)
Definition: bl1_constants.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zfree_saved_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:82
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:56
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ bl1_zherk_blas()

void bl1_zherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
dcomplex a,
int  lda,
double *  beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zherk(), CblasColMajor, and F77_zherk().

Referenced by bl1_zherk().

372 {
373 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
374  enum CBLAS_ORDER cblas_order = CblasColMajor;
375  enum CBLAS_UPLO cblas_uplo;
376  enum CBLAS_TRANSPOSE cblas_trans;
377 
378  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
379  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
380 
381  cblas_zherk( cblas_order,
382  cblas_uplo,
383  cblas_trans,
384  m,
385  k,
386  *alpha,
387  a, lda,
388  *beta,
389  c, ldc );
390 #else
391  char blas_uplo;
392  char blas_trans;
393 
394  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
395  bl1_param_map_to_netlib_trans( trans, &blas_trans );
396 
397  F77_zherk( &blas_uplo,
398  &blas_trans,
399  &m,
400  &k,
401  alpha,
402  a, &lda,
403  beta,
404  c, &ldc );
405 #endif
406 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void F77_zherk(char *uplo, char *transa, int *n, int *k, double *alpha, dcomplex *a, int *lda, double *beta, dcomplex *c, int *ldc)
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const void *A, const int lda, const double beta, void *C, const int ldc)

◆ bl1_zsymm()

void bl1_zsymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_zsymm_blas(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Symm_external().

797 {
798  int m_save = m;
799  int n_save = n;
800  dcomplex* a_save = a;
801  dcomplex* b_save = b;
802  dcomplex* c_save = c;
803  int a_rs_save = a_rs;
804  int a_cs_save = a_cs;
805  int b_rs_save = b_rs;
806  int b_cs_save = b_cs;
807  int c_rs_save = c_rs;
808  int c_cs_save = c_cs;
809  dcomplex zero = bl1_z0();
810  dcomplex one = bl1_z1();
811  dcomplex* b_copy;
812  dcomplex* c_trans;
813  int dim_a;
814  int lda, inca;
815  int ldb, incb;
816  int ldc, incc;
817  int ldb_copy, incb_copy;
818  int ldc_trans, incc_trans;
819  int symm_needs_copyb = FALSE;
820  int symm_needs_transb = FALSE;
821  int symm_needs_axpyt = FALSE;
822 
823  // Return early if possible.
824  if ( bl1_zero_dim2( m, n ) ) return;
825 
826  // If necessary, allocate, initialize, and use a temporary contiguous
827  // copy of each matrix rather than the original matrices.
828  bl1_set_dim_with_side( side, m, n, &dim_a );
829  bl1_zcreate_contigmr( uplo,
830  dim_a,
831  dim_a,
832  a_save, a_rs_save, a_cs_save,
833  &a, &a_rs, &a_cs );
834 
836  n,
837  b_save, b_rs_save, b_cs_save,
838  &b, &b_rs, &b_cs );
839 
841  n,
842  c_save, c_rs_save, c_cs_save,
843  &c, &c_rs, &c_cs );
844 
845  // Initialize with values assuming column-major storage.
846  lda = a_cs;
847  inca = a_rs;
848  ldb = b_cs;
849  incb = b_rs;
850  ldc = c_cs;
851  incc = c_rs;
852 
853  // Adjust the parameters based on the storage of each matrix.
854  if ( bl1_is_col_storage( c_rs, c_cs ) )
855  {
856  if ( bl1_is_col_storage( a_rs, a_cs ) )
857  {
858  if ( bl1_is_col_storage( b_rs, b_cs ) )
859  {
860  // requested operation: C_c += uplo( A_c ) * B_c
861  // effective operation: C_c += uplo( A_c ) * B_c
862  }
863  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
864  {
865  // requested operation: C_c += uplo( A_c ) * B_r
866  // effective operation: C_c += uplo( A_c ) * B_c
867  symm_needs_copyb = TRUE;
868  }
869  }
870  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
871  {
872  if ( bl1_is_col_storage( b_rs, b_cs ) )
873  {
874  // requested operation: C_c += uplo( A_r ) * B_c
875  // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
876  bl1_swap_ints( lda, inca );
877 
878  bl1_toggle_uplo( uplo );
879  }
880  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
881  {
882  // requested operation: C_c += uplo( A_r ) * B_r
883  // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
884  bl1_swap_ints( lda, inca );
885  bl1_swap_ints( ldb, incb );
886 
887  bl1_toggle_side( side );
888  bl1_toggle_uplo( uplo );
889 
890  symm_needs_axpyt = TRUE;
891  }
892  }
893  }
894  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
895  {
896  if ( bl1_is_col_storage( a_rs, a_cs ) )
897  {
898  if ( bl1_is_col_storage( b_rs, b_cs ) )
899  {
900  // requested operation: C_r += uplo( A_c ) * B_c
901  // effective operation: C_c += ( uplo( A_c ) * B_c )^T
902  bl1_swap_ints( ldc, incc );
903 
904  bl1_swap_ints( m, n );
905 
906  symm_needs_axpyt = TRUE;
907  }
908  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
909  {
910  // requested operation: C_r += uplo( A_c ) * B_r
911  // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
912  bl1_swap_ints( ldc, incc );
913  bl1_swap_ints( ldb, incb );
914 
915  bl1_swap_ints( m, n );
916 
917  bl1_toggle_side( side );
918  }
919  }
920  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
921  {
922  if ( bl1_is_col_storage( b_rs, b_cs ) )
923  {
924  // requested operation: C_r += uplo( A_r ) * B_c
925  // effective operation: C_c += B_c^T * ~uplo( A_c )
926  bl1_swap_ints( ldc, incc );
927  bl1_swap_ints( lda, inca );
928 
929  bl1_swap_ints( m, n );
930 
931  bl1_toggle_side( side );
932  bl1_toggle_uplo( uplo );
933 
934  symm_needs_copyb = TRUE;
935  symm_needs_transb = TRUE;
936  }
937  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
938  {
939  // requested operation: C_r += uplo( A_r ) * B_r
940  // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
941  bl1_swap_ints( ldc, incc );
942  bl1_swap_ints( lda, inca );
943  bl1_swap_ints( ldb, incb );
944 
945  bl1_swap_ints( m, n );
946 
947  bl1_toggle_uplo( uplo );
948  bl1_toggle_side( side );
949  }
950  }
951  }
952 
953  // We need a temporary matrix for the cases where B needs to be copied.
954  b_copy = b;
955  ldb_copy = ldb;
956  incb_copy = incb;
957 
958  // There are two cases where we need to make a copy of B: one where the
959  // copy's dimensions are transposed from the original B, and one where
960  // the dimensions are not swapped.
961  if ( symm_needs_copyb )
962  {
963  trans1_t transb;
964 
965  // Set transb, which determines whether or not we need to copy from B
966  // as if it needs a transposition. If a transposition is needed, then
967  // m and n and have already been swapped. So in either case m
968  // represents the leading dimension of the copy.
969  if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
970  else transb = BLIS1_NO_TRANSPOSE;
971 
972  b_copy = bl1_zallocm( m, n );
973  ldb_copy = m;
974  incb_copy = 1;
975 
976  bl1_zcopymt( transb,
977  m,
978  n,
979  b, incb, ldb,
980  b_copy, incb_copy, ldb_copy );
981  }
982 
983  // There are two cases where we need to perform the symm and then axpy
984  // the result into C with a transposition. We handle those cases here.
985  if ( symm_needs_axpyt )
986  {
987  // We need a temporary matrix for holding C^T. Notice that m and n
988  // represent the dimensions of C, and thus C_trans is n-by-m
989  // (interpreting both as column-major matrices). So the leading
990  // dimension of the temporary matrix holding C^T is n.
991  c_trans = bl1_zallocm( n, m );
992  ldc_trans = n;
993  incc_trans = 1;
994 
995  // Compute A * B (or B * A) and store the result in C_trans.
996  // Note that there is no overlap between the axpyt cases and
997  // the conja/copyb cases, hence the use of a, b, lda, and ldb.
998  bl1_zsymm_blas( side,
999  uplo,
1000  n,
1001  m,
1002  alpha,
1003  a, lda,
1004  b, ldb,
1005  &zero,
1006  c_trans, ldc_trans );
1007 
1008  // Scale C by beta.
1010  m,
1011  n,
1012  beta,
1013  c, incc, ldc );
1014 
1015  // And finally, accumulate the matrix product in C_trans into C
1016  // with a transpose.
1018  m,
1019  n,
1020  &one,
1021  c_trans, incc_trans, ldc_trans,
1022  c, incc, ldc );
1023 
1024  // Free the temporary matrix for C.
1025  bl1_zfree( c_trans );
1026  }
1027  else // no extra axpyt step needed
1028  {
1029  bl1_zsymm_blas( side,
1030  uplo,
1031  m,
1032  n,
1033  alpha,
1034  a, lda,
1035  b_copy, ldb_copy,
1036  beta,
1037  c, ldc );
1038  }
1039 
1040  if ( symm_needs_copyb )
1041  bl1_zfree( b_copy );
1042 
1043  // Free any temporary contiguous matrices, copying the result back to
1044  // the original matrix.
1045  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
1046  &a, &a_rs, &a_cs );
1047 
1048  bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
1049  &b, &b_rs, &b_cs );
1050 
1051  bl1_zfree_saved_contigm( m_save,
1052  n_save,
1053  c_save, c_rs_save, c_cs_save,
1054  &c, &c_rs, &c_cs );
1055 }
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:115
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zscalm(conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:273
trans1_t
Definition: blis_type_defs.h:52
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
Definition: blis_type_defs.h:55
void bl1_zaxpymt(trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:248
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_zfree_saved_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:82
Definition: blis_type_defs.h:54
void bl1_zsymm_blas(side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
Definition: bl1_symm.c:1176
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ bl1_zsymm_blas()

void bl1_zsymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_zsymm(), CblasColMajor, and F77_zsymm().

Referenced by bl1_zsymm().

1177 {
1178 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1179  enum CBLAS_ORDER cblas_order = CblasColMajor;
1180  enum CBLAS_SIDE cblas_side;
1181  enum CBLAS_UPLO cblas_uplo;
1182 
1183  bl1_param_map_to_netlib_side( side, &cblas_side );
1184  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
1185 
1186  cblas_zsymm( cblas_order,
1187  cblas_side,
1188  cblas_uplo,
1189  m,
1190  n,
1191  alpha,
1192  a, lda,
1193  b, ldb,
1194  beta,
1195  c, ldc );
1196 #else
1197  char blas_side;
1198  char blas_uplo;
1199 
1200  bl1_param_map_to_netlib_side( side, &blas_side );
1201  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
1202 
1203  F77_zsymm( &blas_side,
1204  &blas_uplo,
1205  &m,
1206  &n,
1207  alpha,
1208  a, &lda,
1209  b, &ldb,
1210  beta,
1211  c, &ldc );
1212 #endif
1213 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
void F77_zsymm(char *side, char *uplo, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta, dcomplex *c, int *ldc)
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47

◆ bl1_zsyr2k()

void bl1_zsyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_zallocm(), bl1_zcopymt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zsyr2k_blas(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Syr2k_external().

692 {
693  uplo1_t uplo_save = uplo;
694  int m_save = m;
695  dcomplex* a_save = a;
696  dcomplex* b_save = b;
697  dcomplex* c_save = c;
698  int a_rs_save = a_rs;
699  int a_cs_save = a_cs;
700  int b_rs_save = b_rs;
701  int b_cs_save = b_cs;
702  int c_rs_save = c_rs;
703  int c_cs_save = c_cs;
704  dcomplex* a_copy;
705  dcomplex* b_copy;
706  int lda, inca;
707  int ldb, incb;
708  int ldc, incc;
709  int lda_copy, inca_copy;
710  int ldb_copy, incb_copy;
711  int syr2k_needs_copya = FALSE;
712  int syr2k_needs_copyb = FALSE;
713 
714  // Return early if possible.
715  if ( bl1_zero_dim2( m, k ) ) return;
716 
717  // If necessary, allocate, initialize, and use a temporary contiguous
718  // copy of each matrix rather than the original matrices.
719  bl1_zcreate_contigmt( trans,
720  m,
721  k,
722  a_save, a_rs_save, a_cs_save,
723  &a, &a_rs, &a_cs );
724 
725  bl1_zcreate_contigmt( trans,
726  m,
727  k,
728  b_save, b_rs_save, b_cs_save,
729  &b, &b_rs, &b_cs );
730 
731  bl1_zcreate_contigmr( uplo,
732  m,
733  m,
734  c_save, c_rs_save, c_cs_save,
735  &c, &c_rs, &c_cs );
736 
737  // Initialize with values assuming column-major storage.
738  lda = a_cs;
739  inca = a_rs;
740  ldb = b_cs;
741  incb = b_rs;
742  ldc = c_cs;
743  incc = c_rs;
744 
745  // Adjust the parameters based on the storage of each matrix.
746  if ( bl1_is_col_storage( c_rs, c_cs ) )
747  {
748  if ( bl1_is_col_storage( a_rs, a_cs ) )
749  {
750  if ( bl1_is_col_storage( b_rs, b_cs ) )
751  {
752  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
753  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
754  }
755  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
756  {
757  // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
758  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
759  syr2k_needs_copyb = TRUE;
760  }
761  }
762  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
763  {
764  if ( bl1_is_col_storage( b_rs, b_cs ) )
765  {
766  // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
767  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
768  syr2k_needs_copya = TRUE;
769  }
770  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
771  {
772  // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
773  // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
774  bl1_swap_ints( lda, inca );
775  bl1_swap_ints( ldb, incb );
776 
777  bl1_toggle_trans( trans );
778  }
779  }
780  }
781  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
782  {
783  if ( bl1_is_col_storage( a_rs, a_cs ) )
784  {
785  if ( bl1_is_col_storage( b_rs, b_cs ) )
786  {
787  // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
788  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
789  bl1_swap_ints( ldc, incc );
790 
791  bl1_toggle_uplo( uplo );
792  }
793  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
794  {
795  // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
796  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
797  syr2k_needs_copyb = TRUE;
798 
799  bl1_swap_ints( ldc, incc );
800 
801  bl1_toggle_uplo( uplo );
802  }
803  }
804  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
805  {
806  if ( bl1_is_col_storage( b_rs, b_cs ) )
807  {
808  // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
809  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
810  syr2k_needs_copya = TRUE;
811 
812  bl1_swap_ints( ldc, incc );
813 
814  bl1_toggle_uplo( uplo );
815  }
816  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
817  {
818  // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
819  // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
820  bl1_swap_ints( ldc, incc );
821  bl1_swap_ints( lda, inca );
822  bl1_swap_ints( ldb, incb );
823 
824  bl1_toggle_uplo( uplo );
825  bl1_toggle_trans( trans );
826  }
827  }
828  }
829 
830  a_copy = a;
831  lda_copy = lda;
832  inca_copy = inca;
833 
834  // There are two cases where we need to copy A column-major storage.
835  // We handle those two cases here.
836  if ( syr2k_needs_copya )
837  {
838  int m_a;
839  int n_a;
840 
841  // Determine the dimensions of A according to the value of trans. We
842  // need this in order to set the leading dimension of the copy of A.
843  bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );
844 
845  // We need a temporary matrix to hold a column-major copy of A.
846  a_copy = bl1_zallocm( m, k );
847  lda_copy = m_a;
848  inca_copy = 1;
849 
850  // Copy the contents of A into A_copy.
852  m_a,
853  n_a,
854  a, inca, lda,
855  a_copy, inca_copy, lda_copy );
856  }
857 
858  b_copy = b;
859  ldb_copy = ldb;
860  incb_copy = incb;
861 
862  // There are two cases where we need to copy B column-major storage.
863  // We handle those two cases here.
864  if ( syr2k_needs_copyb )
865  {
866  int m_b;
867  int n_b;
868 
869  // Determine the dimensions of B according to the value of trans. We
870  // need this in order to set the leading dimension of the copy of B.
871  bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );
872 
873  // We need a temporary matrix to hold a column-major copy of B.
874  b_copy = bl1_zallocm( m, k );
875  ldb_copy = m_b;
876  incb_copy = 1;
877 
878  // Copy the contents of B into B_copy.
880  m_b,
881  n_b,
882  b, incb, ldb,
883  b_copy, incb_copy, ldb_copy );
884  }
885 
886  bl1_zsyr2k_blas( uplo,
887  trans,
888  m,
889  k,
890  alpha,
891  a_copy, lda_copy,
892  b_copy, ldb_copy,
893  beta,
894  c, ldc );
895 
896  if ( syr2k_needs_copya )
897  bl1_zfree( a_copy );
898 
899  if ( syr2k_needs_copyb )
900  bl1_zfree( b_copy );
901 
902  // Free any temporary contiguous matrices, copying the result back to
903  // the original matrix.
904  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
905  &a, &a_rs, &a_cs );
906 
907  bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
908  &b, &b_rs, &b_cs );
909 
910  bl1_zfree_saved_contigmr( uplo_save,
911  m_save,
912  m_save,
913  c_save, c_rs_save, c_cs_save,
914  &c, &c_rs, &c_cs );
915 }
void bl1_zsyr2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
Definition: bl1_syr2k.c:1060
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
uplo1_t
Definition: blis_type_defs.h:60
void bl1_zcreate_contigmt(trans1_t trans_dims, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:127
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zfree_saved_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:82
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:54
void bl1_set_dims_with_trans(trans1_t trans, int m, int n, int *m_new, int *n_new)
Definition: bl1_set_dims.c:13
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
Definition: blis_type_defs.h:137

◆ bl1_zsyr2k_blas()

void bl1_zsyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_zsyr2k(), CblasColMajor, and F77_zsyr2k().

Referenced by bl1_zsyr2k().

1061 {
1062 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1063  enum CBLAS_ORDER cblas_order = CblasColMajor;
1064  enum CBLAS_UPLO cblas_uplo;
1065  enum CBLAS_TRANSPOSE cblas_trans;
1066 
1067  // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
1068  // so we have to map it down to regular transposition.
1069  if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;
1070 
1071  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
1072  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
1073 
1074  cblas_zsyr2k( cblas_order,
1075  cblas_uplo,
1076  cblas_trans,
1077  m,
1078  k,
1079  alpha,
1080  a, lda,
1081  b, ldb,
1082  beta,
1083  c, ldc );
1084 #else
1085  char blas_uplo;
1086  char blas_trans;
1087 
1088  // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
1089  // so we have to map it down to regular transposition.
1090  if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;
1091 
1092  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
1093  bl1_param_map_to_netlib_trans( trans, &blas_trans );
1094 
1095  F77_zsyr2k( &blas_uplo,
1096  &blas_trans,
1097  &m,
1098  &k,
1099  alpha,
1100  a, &lda,
1101  b, &ldb,
1102  beta,
1103  c, &ldc );
1104 #endif
1105 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
int bl1_is_conjtrans(trans1_t trans)
Definition: bl1_is.c:30
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
Definition: blis_type_defs.h:55
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_zsyr2k(char *uplo, char *transa, int *n, int *k, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta, dcomplex *c, int *ldc)
void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)

◆ bl1_zsyrk()

void bl1_zsyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zero_dim2(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), and bl1_zsyrk_blas().

Referenced by FLA_Syrk_external().

302 {
303  uplo1_t uplo_save = uplo;
304  int m_save = m;
305  dcomplex* a_save = a;
306  dcomplex* c_save = c;
307  int a_rs_save = a_rs;
308  int a_cs_save = a_cs;
309  int c_rs_save = c_rs;
310  int c_cs_save = c_cs;
311  int lda, inca;
312  int ldc, incc;
313 
314  // Return early if possible.
315  if ( bl1_zero_dim2( m, k ) ) return;
316 
317  // If necessary, allocate, initialize, and use a temporary contiguous
318  // copy of each matrix rather than the original matrices.
319  bl1_zcreate_contigmt( trans,
320  m,
321  k,
322  a_save, a_rs_save, a_cs_save,
323  &a, &a_rs, &a_cs );
324 
325  bl1_zcreate_contigmr( uplo,
326  m,
327  m,
328  c_save, c_rs_save, c_cs_save,
329  &c, &c_rs, &c_cs );
330 
331  // Initialize with values assuming column-major storage.
332  lda = a_cs;
333  inca = a_rs;
334  ldc = c_cs;
335  incc = c_rs;
336 
337  // Adjust the parameters based on the storage of each matrix.
338  if ( bl1_is_col_storage( c_rs, c_cs ) )
339  {
340  if ( bl1_is_col_storage( a_rs, a_cs ) )
341  {
342  // requested operation: uplo( C_c ) += A_c * A_c^T
343  // effective operation: uplo( C_c ) += A_c * A_c^T
344  }
345  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
346  {
347  // requested operation: uplo( C_c ) += A_r * A_r^T
348  // effective operation: uplo( C_c ) += A_c^T * A_c
349  bl1_swap_ints( lda, inca );
350 
351  bl1_toggle_trans( trans );
352  }
353  }
354  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
355  {
356  if ( bl1_is_col_storage( a_rs, a_cs ) )
357  {
358  // requested operation: uplo( C_r ) += A_c * A_c^T
359  // effective operation: ~uplo( C_c ) += A_c * A_c^T
360  bl1_swap_ints( ldc, incc );
361 
362  bl1_toggle_uplo( uplo );
363  }
364  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
365  {
366  // requested operation: uplo( C_r ) += A_r * A_r^T
367  // effective operation: ~uplo( C_c ) += A_c^T * A_c
368  bl1_swap_ints( ldc, incc );
369  bl1_swap_ints( lda, inca );
370 
371  bl1_toggle_uplo( uplo );
372  bl1_toggle_trans( trans );
373  }
374  }
375 
376  bl1_zsyrk_blas( uplo,
377  trans,
378  m,
379  k,
380  alpha,
381  a, lda,
382  beta,
383  c, ldc );
384 
385  // Free any temporary contiguous matrices, copying the result back to
386  // the original matrix.
387  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
388  &a, &a_rs, &a_cs );
389 
390  bl1_zfree_saved_contigmr( uplo_save,
391  m_save,
392  m_save,
393  c_save, c_rs_save, c_cs_save,
394  &c, &c_rs, &c_cs );
395 }
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
uplo1_t
Definition: blis_type_defs.h:60
void bl1_zcreate_contigmt(trans1_t trans_dims, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:127
void bl1_zsyrk_blas(uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *beta, dcomplex *c, int ldc)
Definition: bl1_syrk.c:510
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_zfree_saved_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:82
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
Definition: blis_type_defs.h:137

◆ bl1_zsyrk_blas()

void bl1_zsyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zsyrk(), CblasColMajor, and F77_zsyrk().

Referenced by bl1_zsyrk().

511 {
512 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
513  enum CBLAS_ORDER cblas_order = CblasColMajor;
514  enum CBLAS_UPLO cblas_uplo;
515  enum CBLAS_TRANSPOSE cblas_trans;
516 
517  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
518  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
519 
520  cblas_zsyrk( cblas_order,
521  cblas_uplo,
522  cblas_trans,
523  m,
524  k,
525  alpha,
526  a, lda,
527  beta,
528  c, ldc );
529 #else
530  char blas_uplo;
531  char blas_trans;
532 
533  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
534  bl1_param_map_to_netlib_trans( trans, &blas_trans );
535 
536  F77_zsyrk( &blas_uplo,
537  &blas_trans,
538  &m,
539  &k,
540  alpha,
541  a, &lda,
542  beta,
543  c, &ldc );
544 #endif
545 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void F77_zsyrk(char *uplo, char *transa, int *n, int *k, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *beta, dcomplex *c, int *ldc)
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc)

◆ bl1_ztrmm()

void bl1_ztrmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zallocm(), bl1_zconjmr(), bl1_zcopymrt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_ztrmm_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ztrmmsx(), and FLA_Trmm_external().

370 {
371  int m_save = m;
372  int n_save = n;
373  dcomplex* a_save = a;
374  dcomplex* b_save = b;
375  int a_rs_save = a_rs;
376  int a_cs_save = a_cs;
377  int b_rs_save = b_rs;
378  int b_cs_save = b_cs;
379  dcomplex* a_conj;
380  int dim_a;
381  int lda, inca;
382  int ldb, incb;
383  int lda_conj, inca_conj;
384  int a_was_copied;
385 
386  // Return early if possible.
387  if ( bl1_zero_dim2( m, n ) ) return;
388 
389  // If necessary, allocate, initialize, and use a temporary contiguous
390  // copy of each matrix rather than the original matrices.
391  bl1_set_dim_with_side( side, m, n, &dim_a );
392  bl1_zcreate_contigmr( uplo,
393  dim_a,
394  dim_a,
395  a_save, a_rs_save, a_cs_save,
396  &a, &a_rs, &a_cs );
397 
399  n,
400  b_save, b_rs_save, b_cs_save,
401  &b, &b_rs, &b_cs );
402 
403  // Figure out whether A was copied to contiguous memory. This is used to
404  // prevent redundant copying.
405  a_was_copied = ( a != a_save );
406 
407  // Initialize with values assuming column-major storage.
408  lda = a_cs;
409  inca = a_rs;
410  ldb = b_cs;
411  incb = b_rs;
412 
413  // Adjust the parameters based on the storage of each matrix.
414  if ( bl1_is_col_storage( b_rs, b_cs ) )
415  {
416  if ( bl1_is_col_storage( a_rs, a_cs ) )
417  {
418  // requested operation: B_c := tr( uplo( A_c ) ) * B_c
419  // effective operation: B_c := tr( uplo( A_c ) ) * B_c
420  }
421  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
422  {
423  // requested operation: B_c := tr( uplo( A_r ) ) * B_c
424  // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
425  bl1_swap_ints( lda, inca );
426 
427  bl1_toggle_uplo( uplo );
428  bl1_toggle_trans( trans );
429  }
430  }
431  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
432  {
433  if ( bl1_is_col_storage( a_rs, a_cs ) )
434  {
435  // requested operation: B_r := tr( uplo( A_c ) ) * B_r
436  // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
437  bl1_swap_ints( ldb, incb );
438 
439  bl1_swap_ints( m, n );
440 
441  bl1_toggle_side( side );
442  bl1_toggle_trans( trans );
443  }
444  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
445  {
446  // requested operation: B_r := tr( uplo( A_r ) ) * B_r
447  // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
448  bl1_swap_ints( ldb, incb );
449  bl1_swap_ints( lda, inca );
450 
451  bl1_swap_ints( m, n );
452 
453  bl1_toggle_side( side );
454  bl1_toggle_uplo( uplo );
455  }
456  }
457 
458  // Initialize with values assuming that trans is not conjnotrans.
459  a_conj = a;
460  lda_conj = lda;
461  inca_conj = inca;
462 
463  // We want to handle the conjnotrans case. The easiest way to do so is
464  // by making a conjugated copy of A.
465  if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
466  {
467  int dim_a;
468 
469  bl1_set_dim_with_side( side, m, n, &dim_a );
470 
471  a_conj = bl1_zallocm( dim_a, dim_a );
472  lda_conj = dim_a;
473  inca_conj = 1;
474 
475  bl1_zcopymrt( uplo,
477  dim_a,
478  dim_a,
479  a, inca, lda,
480  a_conj, inca_conj, lda_conj );
481  }
482  else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
483  {
484  int dim_a;
485 
486  bl1_set_dim_with_side( side, m, n, &dim_a );
487 
488  bl1_zconjmr( uplo,
489  dim_a,
490  dim_a,
491  a_conj, inca_conj, lda_conj );
492  }
493 
494  bl1_ztrmm_blas( side,
495  uplo,
496  trans,
497  diag,
498  m,
499  n,
500  alpha,
501  a_conj, lda_conj,
502  b, ldb );
503 
504  if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
505  bl1_zfree( a_conj );
506 
507  // Free any temporary contiguous matrices, copying the result back to
508  // the original matrix.
509  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
510  &a, &a_rs, &a_cs );
511 
512  bl1_zfree_saved_contigm( m_save,
513  n_save,
514  b_save, b_rs_save, b_cs_save,
515  &b, &b_rs, &b_cs );
516 }
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:115
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
int bl1_is_conjnotrans(trans1_t trans)
Definition: bl1_is.c:25
void bl1_ztrmm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb)
Definition: bl1_trmm.c:661
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_zfree_saved_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:82
Definition: blis_type_defs.h:56
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
void bl1_zcopymrt(uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymrt.c:328
void bl1_zconjmr(uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_conjmr.c:79
Definition: blis_type_defs.h:137

◆ bl1_ztrmm_blas()

void bl1_ztrmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ztrmm(), CblasColMajor, and F77_ztrmm().

Referenced by bl1_ztrmm().

662 {
663 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
664  enum CBLAS_ORDER cblas_order = CblasColMajor;
665  enum CBLAS_SIDE cblas_side;
666  enum CBLAS_UPLO cblas_uplo;
667  enum CBLAS_TRANSPOSE cblas_trans;
668  enum CBLAS_DIAG cblas_diag;
669 
670  bl1_param_map_to_netlib_side( side, &cblas_side );
671  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
672  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
673  bl1_param_map_to_netlib_diag( diag, &cblas_diag );
674 
675  cblas_ztrmm( cblas_order,
676  cblas_side,
677  cblas_uplo,
678  cblas_trans,
679  cblas_diag,
680  m,
681  n,
682  alpha,
683  a, lda,
684  b, ldb );
685 #else
686  char blas_side;
687  char blas_uplo;
688  char blas_trans;
689  char blas_diag;
690 
691  bl1_param_map_to_netlib_side( side, &blas_side );
692  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
693  bl1_param_map_to_netlib_trans( trans, &blas_trans );
694  bl1_param_map_to_netlib_diag( diag, &blas_diag );
695 
696  F77_ztrmm( &blas_side,
697  &blas_uplo,
698  &blas_trans,
699  &blas_diag,
700  &m,
701  &n,
702  alpha,
703  a, &lda,
704  b, &ldb );
705 #endif
706 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void F77_ztrmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb)
CBLAS_DIAG
Definition: blis_prototypes_cblas.h:20
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void bl1_param_map_to_netlib_diag(diag1_t blis_diag, void *blas_diag)
Definition: bl1_param_map.c:95

◆ bl1_ztrmmsx()

void bl1_ztrmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_ztrmm(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

332 {
333  int m_save = m;
334  int n_save = n;
335  dcomplex* a_save = a;
336  dcomplex* b_save = b;
337  dcomplex* c_save = c;
338  int a_rs_save = a_rs;
339  int a_cs_save = a_cs;
340  int b_rs_save = b_rs;
341  int b_cs_save = b_cs;
342  int c_rs_save = c_rs;
343  int c_cs_save = c_cs;
344  dcomplex one = bl1_z1();
345  dcomplex* b_copy;
346  int dim_a;
347  int b_copy_rs, b_copy_cs;
348 
349  // Return early if possible.
350  if ( bl1_zero_dim2( m, n ) ) return;
351 
352  // If necessary, allocate, initialize, and use a temporary contiguous
353  // copy of each matrix rather than the original matrices.
354  bl1_set_dim_with_side( side, m, n, &dim_a );
355  bl1_zcreate_contigmr( uplo,
356  dim_a,
357  dim_a,
358  a_save, a_rs_save, a_cs_save,
359  &a, &a_rs, &a_cs );
360 
362  n,
363  b_save, b_rs_save, b_cs_save,
364  &b, &b_rs, &b_cs );
365 
367  n,
368  c_save, c_rs_save, c_cs_save,
369  &c, &c_rs, &c_cs );
370 
371  // Create a copy of B to use in the computation so the original matrix is
372  // left untouched.
373  b_copy = bl1_zallocm( m, n );
374 
375  // Match the strides of B_copy to that of B.
376  if ( bl1_is_col_storage( b_rs, b_cs ) )
377  {
378  b_copy_rs = 1;
379  b_copy_cs = m;
380  }
381  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
382  {
383  b_copy_rs = n;
384  b_copy_cs = 1;
385  }
386 
387  // Copy the contents of B to B_copy.
389  m,
390  n,
391  b, b_rs, b_cs,
392  b_copy, b_copy_rs, b_copy_cs );
393 
394  // Perform the operation on B_copy.
395  bl1_ztrmm( side,
396  uplo,
397  trans,
398  diag,
399  m,
400  n,
401  alpha,
402  a, a_rs, a_cs,
403  b_copy, b_copy_rs, b_copy_cs );
404 
405  // Scale C by beta.
407  m,
408  n,
409  beta,
410  c, c_rs, c_cs );
411 
412  // Add B_copy into C.
414  m,
415  n,
416  &one,
417  b_copy, b_copy_rs, b_copy_cs,
418  c, c_rs, c_cs );
419 
420  // Free the copy of B.
421  bl1_zfree( b_copy );
422 
423  // Free any temporary contiguous matrices, copying the result back to
424  // the original matrix.
425  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
426  &a, &a_rs, &a_cs );
427 
428  bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
429  &b, &b_rs, &b_cs );
430 
431  bl1_zfree_saved_contigm( m_save,
432  n_save,
433  c_save, c_rs_save, c_cs_save,
434  &c, &c_rs, &c_cs );
435 }
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:115
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zscalm(conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:273
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
void bl1_zaxpymt(trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:248
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_zfree_saved_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:82
Definition: blis_type_defs.h:54
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69
void bl1_ztrmm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_trmm.c:369

◆ bl1_ztrsm()

void bl1_ztrsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zallocm(), bl1_zconjmr(), bl1_zcopymrt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_ztrsm_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ztrsmsx(), FLA_LU_nopiv_opz_var1(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_piv_opz_var3(), and FLA_Trsm_external().

370 {
371  int m_save = m;
372  int n_save = n;
373  dcomplex* a_save = a;
374  dcomplex* b_save = b;
375  int a_rs_save = a_rs;
376  int a_cs_save = a_cs;
377  int b_rs_save = b_rs;
378  int b_cs_save = b_cs;
379  dcomplex* a_conj;
380  int dim_a;
381  int lda, inca;
382  int ldb, incb;
383  int lda_conj, inca_conj;
384  int a_was_copied;
385 
386  // Return early if possible.
387  if ( bl1_zero_dim2( m, n ) ) return;
388 
389  // If necessary, allocate, initialize, and use a temporary contiguous
390  // copy of each matrix rather than the original matrices.
391  bl1_set_dim_with_side( side, m, n, &dim_a );
392  bl1_zcreate_contigmr( uplo,
393  dim_a,
394  dim_a,
395  a_save, a_rs_save, a_cs_save,
396  &a, &a_rs, &a_cs );
397 
399  n,
400  b_save, b_rs_save, b_cs_save,
401  &b, &b_rs, &b_cs );
402 
403  // Figure out whether A was copied to contiguous memory. This is used to
404  // prevent redundant copying.
405  a_was_copied = ( a != a_save );
406 
407  // Initialize with values assuming column-major storage.
408  lda = a_cs;
409  inca = a_rs;
410  ldb = b_cs;
411  incb = b_rs;
412 
413  // Adjust the parameters based on the storage of each matrix.
414  if ( bl1_is_col_storage( b_rs, b_cs ) )
415  {
416  if ( bl1_is_col_storage( a_rs, a_cs ) )
417  {
418  // requested operation: B_c := tr( uplo( A_c ) ) * B_c
419  // effective operation: B_c := tr( uplo( A_c ) ) * B_c
420  }
421  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
422  {
423  // requested operation: B_c := tr( uplo( A_r ) ) * B_c
424  // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
425  bl1_swap_ints( lda, inca );
426 
427  bl1_toggle_uplo( uplo );
428  bl1_toggle_trans( trans );
429  }
430  }
431  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
432  {
433  if ( bl1_is_col_storage( a_rs, a_cs ) )
434  {
435  // requested operation: B_r := tr( uplo( A_c ) ) * B_r
436  // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
437  bl1_swap_ints( ldb, incb );
438 
439  bl1_swap_ints( m, n );
440 
441  bl1_toggle_side( side );
442  bl1_toggle_trans( trans );
443  }
444  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
445  {
446  // requested operation: B_r := tr( uplo( A_r ) ) * B_r
447  // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
448  bl1_swap_ints( ldb, incb );
449  bl1_swap_ints( lda, inca );
450 
451  bl1_swap_ints( m, n );
452 
453  bl1_toggle_side( side );
454  bl1_toggle_uplo( uplo );
455  }
456  }
457 
458  // Initialize with values assuming that trans is not conjnotrans.
459  a_conj = a;
460  lda_conj = lda;
461  inca_conj = inca;
462 
463  // We want to handle the conjnotrans case. The easiest way to do so is
464  // by making a conjugated copy of A.
465  if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
466  {
467  int dim_a;
468 
469  bl1_set_dim_with_side( side, m, n, &dim_a );
470 
471  a_conj = bl1_zallocm( dim_a, dim_a );
472  lda_conj = dim_a;
473  inca_conj = 1;
474 
475  bl1_zcopymrt( uplo,
477  dim_a,
478  dim_a,
479  a, inca, lda,
480  a_conj, inca_conj, lda_conj );
481  }
482  else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
483  {
484  int dim_a;
485 
486  bl1_set_dim_with_side( side, m, n, &dim_a );
487 
488  bl1_zconjmr( uplo,
489  dim_a,
490  dim_a,
491  a_conj, inca_conj, lda_conj );
492  }
493 
494  bl1_ztrsm_blas( side,
495  uplo,
496  trans,
497  diag,
498  m,
499  n,
500  alpha,
501  a_conj, lda_conj,
502  b, ldb );
503 
504  if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
505  bl1_zfree( a_conj );
506 
507  // Free any temporary contiguous matrices, copying the result back to
508  // the original matrix.
509  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
510  &a, &a_rs, &a_cs );
511 
512  bl1_zfree_saved_contigm( m_save,
513  n_save,
514  b_save, b_rs_save, b_cs_save,
515  &b, &b_rs, &b_cs );
516 }
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:115
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
int bl1_is_conjnotrans(trans1_t trans)
Definition: bl1_is.c:25
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_zfree_saved_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:82
Definition: blis_type_defs.h:56
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
void bl1_zcopymrt(uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymrt.c:328
void bl1_zconjmr(uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_conjmr.c:79
Definition: blis_type_defs.h:137
void bl1_ztrsm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb)
Definition: bl1_trsm.c:661

◆ bl1_ztrsm_blas()

void bl1_ztrsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ztrsm(), CblasColMajor, and F77_ztrsm().

Referenced by bl1_ztrsm().

662 {
663 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
664  enum CBLAS_ORDER cblas_order = CblasColMajor;
665  enum CBLAS_SIDE cblas_side;
666  enum CBLAS_UPLO cblas_uplo;
667  enum CBLAS_TRANSPOSE cblas_trans;
668  enum CBLAS_DIAG cblas_diag;
669 
670  bl1_param_map_to_netlib_side( side, &cblas_side );
671  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
672  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
673  bl1_param_map_to_netlib_diag( diag, &cblas_diag );
674 
675  cblas_ztrsm( cblas_order,
676  cblas_side,
677  cblas_uplo,
678  cblas_trans,
679  cblas_diag,
680  m,
681  n,
682  alpha,
683  a, lda,
684  b, ldb );
685 #else
686  char blas_side;
687  char blas_uplo;
688  char blas_trans;
689  char blas_diag;
690 
691  bl1_param_map_to_netlib_side( side, &blas_side );
692  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
693  bl1_param_map_to_netlib_trans( trans, &blas_trans );
694  bl1_param_map_to_netlib_diag( diag, &blas_diag );
695 
696  F77_ztrsm( &blas_side,
697  &blas_uplo,
698  &blas_trans,
699  &blas_diag,
700  &m,
701  &n,
702  alpha,
703  a, &lda,
704  b, &ldb );
705 #endif
706 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_DIAG
Definition: blis_prototypes_cblas.h:20
void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb)
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void F77_ztrsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb)
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void bl1_param_map_to_netlib_diag(diag1_t blis_diag, void *blas_diag)
Definition: bl1_param_map.c:95

◆ bl1_ztrsmsx()

void bl1_ztrsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_ztrsm(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

332 {
333  int m_save = m;
334  int n_save = n;
335  dcomplex* a_save = a;
336  dcomplex* b_save = b;
337  dcomplex* c_save = c;
338  int a_rs_save = a_rs;
339  int a_cs_save = a_cs;
340  int b_rs_save = b_rs;
341  int b_cs_save = b_cs;
342  int c_rs_save = c_rs;
343  int c_cs_save = c_cs;
344  dcomplex one = bl1_z1();
345  dcomplex* b_copy;
346  int dim_a;
347  int b_copy_rs, b_copy_cs;
348 
349  // Return early if possible.
350  if ( bl1_zero_dim2( m, n ) ) return;
351 
352  // If necessary, allocate, initialize, and use a temporary contiguous
353  // copy of each matrix rather than the original matrices.
354  bl1_set_dim_with_side( side, m, n, &dim_a );
355  bl1_zcreate_contigmr( uplo,
356  dim_a,
357  dim_a,
358  a_save, a_rs_save, a_cs_save,
359  &a, &a_rs, &a_cs );
360 
362  n,
363  b_save, b_rs_save, b_cs_save,
364  &b, &b_rs, &b_cs );
365 
367  n,
368  c_save, c_rs_save, c_cs_save,
369  &c, &c_rs, &c_cs );
370 
371  // Create a copy of B to use in the computation so the original matrix is
372  // left untouched.
373  b_copy = bl1_zallocm( m, n );
374 
375  // Match the strides of B_copy to that of B.
376  if ( bl1_is_col_storage( b_rs, b_cs ) )
377  {
378  b_copy_rs = 1;
379  b_copy_cs = m;
380  }
381  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
382  {
383  b_copy_rs = n;
384  b_copy_cs = 1;
385  }
386 
387  // Copy the contents of B to B_copy.
389  m,
390  n,
391  b, b_rs, b_cs,
392  b_copy, b_copy_rs, b_copy_cs );
393 
394  // Perform the operation on B_copy.
395  bl1_ztrsm( side,
396  uplo,
397  trans,
398  diag,
399  m,
400  n,
401  alpha,
402  a, a_rs, a_cs,
403  b_copy, b_copy_rs, b_copy_cs );
404 
405  // Scale C by beta.
407  m,
408  n,
409  beta,
410  c, c_rs, c_cs );
411 
412  // Add B_copy into C.
414  m,
415  n,
416  &one,
417  b_copy, b_copy_rs, b_copy_cs,
418  c, c_rs, c_cs );
419 
420  // Free the copy of B.
421  bl1_zfree( b_copy );
422 
423  // Free any temporary contiguous matrices, copying the result back to
424  // the original matrix.
425  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
426  &a, &a_rs, &a_cs );
427 
428  bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
429  &b, &b_rs, &b_cs );
430 
431  bl1_zfree_saved_contigm( m_save,
432  n_save,
433  c_save, c_rs_save, c_cs_save,
434  &c, &c_rs, &c_cs );
435 }
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:115
void bl1_ztrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:369
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zscalm(conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:273
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
void bl1_zaxpymt(trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:248
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_zfree_saved_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:82
Definition: blis_type_defs.h:54
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69