libflame  revision_anchor
Functions
bl1_symm.c File Reference

(r)

Functions

void bl1_ssymm (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dsymm (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_csymm (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zsymm (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_ssymm_blas (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
 
void bl1_dsymm_blas (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
 
void bl1_csymm_blas (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zsymm_blas (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
 

Function Documentation

◆ bl1_csymm()

void bl1_csymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_csymm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Symm_external().

536 {
537  int m_save = m;
538  int n_save = n;
539  scomplex* a_save = a;
540  scomplex* b_save = b;
541  scomplex* c_save = c;
542  int a_rs_save = a_rs;
543  int a_cs_save = a_cs;
544  int b_rs_save = b_rs;
545  int b_cs_save = b_cs;
546  int c_rs_save = c_rs;
547  int c_cs_save = c_cs;
548  scomplex zero = bl1_c0();
549  scomplex one = bl1_c1();
550  scomplex* b_copy;
551  scomplex* c_trans;
552  int dim_a;
553  int lda, inca;
554  int ldb, incb;
555  int ldc, incc;
556  int ldb_copy, incb_copy;
557  int ldc_trans, incc_trans;
558  int symm_needs_copyb = FALSE;
559  int symm_needs_transb = FALSE;
560  int symm_needs_axpyt = FALSE;
561 
562  // Return early if possible.
563  if ( bl1_zero_dim2( m, n ) ) return;
564 
565  // If necessary, allocate, initialize, and use a temporary contiguous
566  // copy of each matrix rather than the original matrices.
567  bl1_set_dim_with_side( side, m, n, &dim_a );
568  bl1_ccreate_contigmr( uplo,
569  dim_a,
570  dim_a,
571  a_save, a_rs_save, a_cs_save,
572  &a, &a_rs, &a_cs );
573 
575  n,
576  b_save, b_rs_save, b_cs_save,
577  &b, &b_rs, &b_cs );
578 
580  n,
581  c_save, c_rs_save, c_cs_save,
582  &c, &c_rs, &c_cs );
583 
584  // Initialize with values assuming column-major storage.
585  lda = a_cs;
586  inca = a_rs;
587  ldb = b_cs;
588  incb = b_rs;
589  ldc = c_cs;
590  incc = c_rs;
591 
592  // Adjust the parameters based on the storage of each matrix.
593  if ( bl1_is_col_storage( c_rs, c_cs ) )
594  {
595  if ( bl1_is_col_storage( a_rs, a_cs ) )
596  {
597  if ( bl1_is_col_storage( b_rs, b_cs ) )
598  {
599  // requested operation: C_c += uplo( A_c ) * B_c
600  // effective operation: C_c += uplo( A_c ) * B_c
601  }
602  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
603  {
604  // requested operation: C_c += uplo( A_c ) * B_r
605  // effective operation: C_c += uplo( A_c ) * B_c
606  symm_needs_copyb = TRUE;
607  }
608  }
609  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
610  {
611  if ( bl1_is_col_storage( b_rs, b_cs ) )
612  {
613  // requested operation: C_c += uplo( A_r ) * B_c
614  // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
615  bl1_swap_ints( lda, inca );
616 
617  bl1_toggle_uplo( uplo );
618  }
619  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
620  {
621  // requested operation: C_c += uplo( A_r ) * B_r
622  // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
623  bl1_swap_ints( lda, inca );
624  bl1_swap_ints( ldb, incb );
625 
626  bl1_toggle_side( side );
627  bl1_toggle_uplo( uplo );
628 
629  symm_needs_axpyt = TRUE;
630  }
631  }
632  }
633  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
634  {
635  if ( bl1_is_col_storage( a_rs, a_cs ) )
636  {
637  if ( bl1_is_col_storage( b_rs, b_cs ) )
638  {
639  // requested operation: C_r += uplo( A_c ) * B_c
640  // effective operation: C_c += ( uplo( A_c ) * B_c )^T
641  bl1_swap_ints( ldc, incc );
642 
643  bl1_swap_ints( m, n );
644 
645  symm_needs_axpyt = TRUE;
646  }
647  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
648  {
649  // requested operation: C_r += uplo( A_c ) * B_r
650  // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
651  bl1_swap_ints( ldc, incc );
652  bl1_swap_ints( ldb, incb );
653 
654  bl1_swap_ints( m, n );
655 
656  bl1_toggle_side( side );
657  }
658  }
659  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
660  {
661  if ( bl1_is_col_storage( b_rs, b_cs ) )
662  {
663  // requested operation: C_r += uplo( A_r ) * B_c
664  // effective operation: C_c += B_c^T * ~uplo( A_c )
665  bl1_swap_ints( ldc, incc );
666  bl1_swap_ints( lda, inca );
667 
668  bl1_swap_ints( m, n );
669 
670  bl1_toggle_side( side );
671  bl1_toggle_uplo( uplo );
672 
673  symm_needs_copyb = TRUE;
674  symm_needs_transb = TRUE;
675  }
676  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
677  {
678  // requested operation: C_r += uplo( A_r ) * B_r
679  // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
680  bl1_swap_ints( ldc, incc );
681  bl1_swap_ints( lda, inca );
682  bl1_swap_ints( ldb, incb );
683 
684  bl1_swap_ints( m, n );
685 
686  bl1_toggle_uplo( uplo );
687  bl1_toggle_side( side );
688  }
689  }
690  }
691 
692  // We need a temporary matrix for the cases where B needs to be copied.
693  b_copy = b;
694  ldb_copy = ldb;
695  incb_copy = incb;
696 
697  // There are two cases where we need to make a copy of B: one where the
698  // copy's dimensions are transposed from the original B, and one where
699  // the dimensions are not swapped.
700  if ( symm_needs_copyb )
701  {
702  trans1_t transb;
703 
704  // Set transb, which determines whether or not we need to copy from B
705  // as if it needs a transposition. If a transposition is needed, then
706  // m and n and have already been swapped. So in either case m
707  // represents the leading dimension of the copy.
708  if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
709  else transb = BLIS1_NO_TRANSPOSE;
710 
711  b_copy = bl1_callocm( m, n );
712  ldb_copy = m;
713  incb_copy = 1;
714 
715  bl1_ccopymt( transb,
716  m,
717  n,
718  b, incb, ldb,
719  b_copy, incb_copy, ldb_copy );
720  }
721 
722  // There are two cases where we need to perform the symm and then axpy
723  // the result into C with a transposition. We handle those cases here.
724  if ( symm_needs_axpyt )
725  {
726  // We need a temporary matrix for holding C^T. Notice that m and n
727  // represent the dimensions of C, and thus C_trans is n-by-m
728  // (interpreting both as column-major matrices). So the leading
729  // dimension of the temporary matrix holding C^T is n.
730  c_trans = bl1_callocm( n, m );
731  ldc_trans = n;
732  incc_trans = 1;
733 
734  // Compute A * B (or B * A) and store the result in C_trans.
735  // Note that there is no overlap between the axpyt cases and
736  // the conja/copyb cases, hence the use of a, b, lda, and ldb.
737  bl1_csymm_blas( side,
738  uplo,
739  n,
740  m,
741  alpha,
742  a, lda,
743  b, ldb,
744  &zero,
745  c_trans, ldc_trans );
746 
747  // Scale C by beta.
749  m,
750  n,
751  beta,
752  c, incc, ldc );
753 
754  // And finally, accumulate the matrix product in C_trans into C
755  // with a transpose.
757  m,
758  n,
759  &one,
760  c_trans, incc_trans, ldc_trans,
761  c, incc, ldc );
762 
763  // Free the temporary matrix for C.
764  bl1_cfree( c_trans );
765  }
766  else // no extra axpyt step needed
767  {
768  bl1_csymm_blas( side,
769  uplo,
770  m,
771  n,
772  alpha,
773  a, lda,
774  b_copy, ldb_copy,
775  beta,
776  c, ldc );
777  }
778 
779  if ( symm_needs_copyb )
780  bl1_cfree( b_copy );
781 
782  // Free any temporary contiguous matrices, copying the result back to
783  // the original matrix.
784  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
785  &a, &a_rs, &a_cs );
786 
787  bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
788  &b, &b_rs, &b_cs );
789 
790  bl1_cfree_saved_contigm( m_save,
791  n_save,
792  c_save, c_rs_save, c_cs_save,
793  &c, &c_rs, &c_cs );
794 }
void bl1_cfree_saved_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:59
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:81
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
scomplex bl1_c0(void)
Definition: bl1_constants.c:125
trans1_t
Definition: blis_type_defs.h:52
Definition: blis_type_defs.h:55
void bl1_caxpymt(trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:149
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:54
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:215
Definition: blis_type_defs.h:132
void bl1_cscalm(conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:169
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_csymm_blas(side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
Definition: bl1_symm.c:1137
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45

◆ bl1_csymm_blas()

void bl1_csymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_csymm(), CblasColMajor, and F77_csymm().

Referenced by bl1_csymm().

1138 {
1139 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1140  enum CBLAS_ORDER cblas_order = CblasColMajor;
1141  enum CBLAS_SIDE cblas_side;
1142  enum CBLAS_UPLO cblas_uplo;
1143 
1144  bl1_param_map_to_netlib_side( side, &cblas_side );
1145  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
1146 
1147  cblas_csymm( cblas_order,
1148  cblas_side,
1149  cblas_uplo,
1150  m,
1151  n,
1152  alpha,
1153  a, lda,
1154  b, ldb,
1155  beta,
1156  c, ldc );
1157 #else
1158  char blas_side;
1159  char blas_uplo;
1160 
1161  bl1_param_map_to_netlib_side( side, &blas_side );
1162  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
1163 
1164  F77_csymm( &blas_side,
1165  &blas_uplo,
1166  &m,
1167  &n,
1168  alpha,
1169  a, &lda,
1170  b, &ldb,
1171  beta,
1172  c, &ldc );
1173 #endif
1174 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
void F77_csymm(char *side, char *uplo, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, scomplex *beta, scomplex *c, int *ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47

◆ bl1_dsymm()

void bl1_dsymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dsymm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by bl1_dhemm(), FLA_Hemm_external(), and FLA_Symm_external().

275 {
276  int m_save = m;
277  int n_save = n;
278  double* a_save = a;
279  double* b_save = b;
280  double* c_save = c;
281  int a_rs_save = a_rs;
282  int a_cs_save = a_cs;
283  int b_rs_save = b_rs;
284  int b_cs_save = b_cs;
285  int c_rs_save = c_rs;
286  int c_cs_save = c_cs;
287  double zero = bl1_d0();
288  double one = bl1_d1();
289  double* b_copy;
290  double* c_trans;
291  int dim_a;
292  int lda, inca;
293  int ldb, incb;
294  int ldc, incc;
295  int ldb_copy, incb_copy;
296  int ldc_trans, incc_trans;
297  int symm_needs_copyb = FALSE;
298  int symm_needs_transb = FALSE;
299  int symm_needs_axpyt = FALSE;
300 
301  // Return early if possible.
302  if ( bl1_zero_dim2( m, n ) ) return;
303 
304  // If necessary, allocate, initialize, and use a temporary contiguous
305  // copy of each matrix rather than the original matrices.
306  bl1_set_dim_with_side( side, m, n, &dim_a );
307  bl1_dcreate_contigmr( uplo,
308  dim_a,
309  dim_a,
310  a_save, a_rs_save, a_cs_save,
311  &a, &a_rs, &a_cs );
312 
314  n,
315  b_save, b_rs_save, b_cs_save,
316  &b, &b_rs, &b_cs );
317 
319  n,
320  c_save, c_rs_save, c_cs_save,
321  &c, &c_rs, &c_cs );
322 
323  // Initialize with values assuming column-major storage.
324  lda = a_cs;
325  inca = a_rs;
326  ldb = b_cs;
327  incb = b_rs;
328  ldc = c_cs;
329  incc = c_rs;
330 
331  // Adjust the parameters based on the storage of each matrix.
332  if ( bl1_is_col_storage( c_rs, c_cs ) )
333  {
334  if ( bl1_is_col_storage( a_rs, a_cs ) )
335  {
336  if ( bl1_is_col_storage( b_rs, b_cs ) )
337  {
338  // requested operation: C_c += uplo( A_c ) * B_c
339  // effective operation: C_c += uplo( A_c ) * B_c
340  }
341  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
342  {
343  // requested operation: C_c += uplo( A_c ) * B_r
344  // effective operation: C_c += uplo( A_c ) * B_c
345  symm_needs_copyb = TRUE;
346  }
347  }
348  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
349  {
350  if ( bl1_is_col_storage( b_rs, b_cs ) )
351  {
352  // requested operation: C_c += uplo( A_r ) * B_c
353  // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
354  bl1_swap_ints( lda, inca );
355 
356  bl1_toggle_uplo( uplo );
357  }
358  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
359  {
360  // requested operation: C_c += uplo( A_r ) * B_r
361  // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
362  bl1_swap_ints( lda, inca );
363  bl1_swap_ints( ldb, incb );
364 
365  bl1_toggle_side( side );
366  bl1_toggle_uplo( uplo );
367 
368  symm_needs_axpyt = TRUE;
369  }
370  }
371  }
372  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
373  {
374  if ( bl1_is_col_storage( a_rs, a_cs ) )
375  {
376  if ( bl1_is_col_storage( b_rs, b_cs ) )
377  {
378  // requested operation: C_r += uplo( A_c ) * B_c
379  // effective operation: C_c += ( uplo( A_c ) * B_c )^T
380  bl1_swap_ints( ldc, incc );
381 
382  bl1_swap_ints( m, n );
383 
384  symm_needs_axpyt = TRUE;
385  }
386  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
387  {
388  // requested operation: C_r += uplo( A_c ) * B_r
389  // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
390  bl1_swap_ints( ldc, incc );
391  bl1_swap_ints( ldb, incb );
392 
393  bl1_swap_ints( m, n );
394 
395  bl1_toggle_side( side );
396  }
397  }
398  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
399  {
400  if ( bl1_is_col_storage( b_rs, b_cs ) )
401  {
402  // requested operation: C_r += uplo( A_r ) * B_c
403  // effective operation: C_c += B_c^T * ~uplo( A_c )
404  bl1_swap_ints( ldc, incc );
405  bl1_swap_ints( lda, inca );
406 
407  bl1_swap_ints( m, n );
408 
409  bl1_toggle_side( side );
410  bl1_toggle_uplo( uplo );
411 
412  symm_needs_copyb = TRUE;
413  symm_needs_transb = TRUE;
414  }
415  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
416  {
417  // requested operation: C_r += uplo( A_r ) * B_r
418  // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
419  bl1_swap_ints( ldc, incc );
420  bl1_swap_ints( lda, inca );
421  bl1_swap_ints( ldb, incb );
422 
423  bl1_swap_ints( m, n );
424 
425  bl1_toggle_uplo( uplo );
426  bl1_toggle_side( side );
427  }
428  }
429  }
430 
431  // We need a temporary matrix for the cases where B needs to be copied.
432  b_copy = b;
433  ldb_copy = ldb;
434  incb_copy = incb;
435 
436  // There are two cases where we need to make a copy of B: one where the
437  // copy's dimensions are transposed from the original B, and one where
438  // the dimensions are not swapped.
439  if ( symm_needs_copyb )
440  {
441  trans1_t transb;
442 
443  // Set transb, which determines whether or not we need to copy from B
444  // as if it needs a transposition. If a transposition is needed, then
445  // m and n and have already been swapped. So in either case m
446  // represents the leading dimension of the copy.
447  if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
448  else transb = BLIS1_NO_TRANSPOSE;
449 
450  b_copy = bl1_dallocm( m, n );
451  ldb_copy = m;
452  incb_copy = 1;
453 
454  bl1_dcopymt( transb,
455  m,
456  n,
457  b, incb, ldb,
458  b_copy, incb_copy, ldb_copy );
459  }
460 
461  // There are two cases where we need to perform the symm and then axpy
462  // the result into C with a transposition. We handle those cases here.
463  if ( symm_needs_axpyt )
464  {
465  // We need a temporary matrix for holding C^T. Notice that m and n
466  // represent the dimensions of C, and thus C_trans is n-by-m
467  // (interpreting both as column-major matrices). So the leading
468  // dimension of the temporary matrix holding C^T is n.
469  c_trans = bl1_dallocm( n, m );
470  ldc_trans = n;
471  incc_trans = 1;
472 
473  // Compute A * B (or B * A) and store the result in C_trans.
474  // Note that there is no overlap between the axpyt cases and
475  // the conja/copyb cases, hence the use of a, b, lda, and ldb.
476  bl1_dsymm_blas( side,
477  uplo,
478  n,
479  m,
480  alpha,
481  a, lda,
482  b, ldb,
483  &zero,
484  c_trans, ldc_trans );
485 
486  // Scale C by beta.
488  m,
489  n,
490  beta,
491  c, incc, ldc );
492 
493  // And finally, accumulate the matrix product in C_trans into C
494  // with a transpose.
496  m,
497  n,
498  &one,
499  c_trans, incc_trans, ldc_trans,
500  c, incc, ldc );
501 
502  // Free the temporary matrix for C.
503  bl1_dfree( c_trans );
504  }
505  else // no extra axpyt step needed
506  {
507  bl1_dsymm_blas( side,
508  uplo,
509  m,
510  n,
511  alpha,
512  a, lda,
513  b_copy, ldb_copy,
514  beta,
515  c, ldc );
516  }
517 
518  if ( symm_needs_copyb )
519  bl1_dfree( b_copy );
520 
521  // Free any temporary contiguous matrices, copying the result back to
522  // the original matrix.
523  bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
524  &a, &a_rs, &a_cs );
525 
526  bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
527  &b, &b_rs, &b_cs );
528 
529  bl1_dfree_saved_contigm( m_save,
530  n_save,
531  c_save, c_rs_save, c_cs_save,
532  &c, &c_rs, &c_cs );
533 }
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_dfree(double *p)
Definition: bl1_free.c:35
void bl1_dsymm_blas(side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
Definition: bl1_symm.c:1098
void bl1_dcreate_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:47
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:148
trans1_t
Definition: blis_type_defs.h:52
Definition: blis_type_defs.h:55
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:54
void bl1_dfree_saved_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:36
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:29
double * bl1_dallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:35
void bl1_daxpymt(trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:81
void bl1_dcreate_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:45
double bl1_d1(void)
Definition: bl1_constants.c:54
void bl1_dscalm(conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:65

◆ bl1_dsymm_blas()

void bl1_dsymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb,
double *  beta,
double *  c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_dsymm(), CblasColMajor, and F77_dsymm().

Referenced by bl1_dsymm().

1099 {
1100 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1101  enum CBLAS_ORDER cblas_order = CblasColMajor;
1102  enum CBLAS_SIDE cblas_side;
1103  enum CBLAS_UPLO cblas_uplo;
1104 
1105  bl1_param_map_to_netlib_side( side, &cblas_side );
1106  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
1107 
1108  cblas_dsymm( cblas_order,
1109  cblas_side,
1110  cblas_uplo,
1111  m,
1112  n,
1113  *alpha,
1114  a, lda,
1115  b, ldb,
1116  *beta,
1117  c, ldc );
1118 #else
1119  char blas_side;
1120  char blas_uplo;
1121 
1122  bl1_param_map_to_netlib_side( side, &blas_side );
1123  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
1124 
1125  F77_dsymm( &blas_side,
1126  &blas_uplo,
1127  &m,
1128  &n,
1129  alpha,
1130  a, &lda,
1131  b, &ldb,
1132  beta,
1133  c, &ldc );
1134 #endif
1135 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_dsymm(char *side, char *uplo, int *m, int *n, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)

◆ bl1_ssymm()

void bl1_ssymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_s0(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_ssymm_blas(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by bl1_shemm(), FLA_Hemm_external(), and FLA_Symm_external().

14 {
15  int m_save = m;
16  int n_save = n;
17  float* a_save = a;
18  float* b_save = b;
19  float* c_save = c;
20  int a_rs_save = a_rs;
21  int a_cs_save = a_cs;
22  int b_rs_save = b_rs;
23  int b_cs_save = b_cs;
24  int c_rs_save = c_rs;
25  int c_cs_save = c_cs;
26  float zero = bl1_s0();
27  float one = bl1_s1();
28  float* b_copy;
29  float* c_trans;
30  int dim_a;
31  int lda, inca;
32  int ldb, incb;
33  int ldc, incc;
34  int ldb_copy, incb_copy;
35  int ldc_trans, incc_trans;
36  int symm_needs_copyb = FALSE;
37  int symm_needs_transb = FALSE;
38  int symm_needs_axpyt = FALSE;
39 
40  // Return early if possible.
41  if ( bl1_zero_dim2( m, n ) ) return;
42 
43  // If necessary, allocate, initialize, and use a temporary contiguous
44  // copy of each matrix rather than the original matrices.
45  bl1_set_dim_with_side( side, m, n, &dim_a );
47  dim_a,
48  dim_a,
49  a_save, a_rs_save, a_cs_save,
50  &a, &a_rs, &a_cs );
51 
53  n,
54  b_save, b_rs_save, b_cs_save,
55  &b, &b_rs, &b_cs );
56 
58  n,
59  c_save, c_rs_save, c_cs_save,
60  &c, &c_rs, &c_cs );
61 
62  // Initialize with values assuming column-major storage.
63  lda = a_cs;
64  inca = a_rs;
65  ldb = b_cs;
66  incb = b_rs;
67  ldc = c_cs;
68  incc = c_rs;
69 
70  // Adjust the parameters based on the storage of each matrix.
71  if ( bl1_is_col_storage( c_rs, c_cs ) )
72  {
73  if ( bl1_is_col_storage( a_rs, a_cs ) )
74  {
75  if ( bl1_is_col_storage( b_rs, b_cs ) )
76  {
77  // requested operation: C_c += uplo( A_c ) * B_c
78  // effective operation: C_c += uplo( A_c ) * B_c
79  }
80  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
81  {
82  // requested operation: C_c += uplo( A_c ) * B_r
83  // effective operation: C_c += uplo( A_c ) * B_c
84  symm_needs_copyb = TRUE;
85  }
86  }
87  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
88  {
89  if ( bl1_is_col_storage( b_rs, b_cs ) )
90  {
91  // requested operation: C_c += uplo( A_r ) * B_c
92  // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
93  bl1_swap_ints( lda, inca );
94 
95  bl1_toggle_uplo( uplo );
96  }
97  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
98  {
99  // requested operation: C_c += uplo( A_r ) * B_r
100  // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
101  bl1_swap_ints( lda, inca );
102  bl1_swap_ints( ldb, incb );
103 
104  bl1_toggle_side( side );
105  bl1_toggle_uplo( uplo );
106 
107  symm_needs_axpyt = TRUE;
108  }
109  }
110  }
111  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
112  {
113  if ( bl1_is_col_storage( a_rs, a_cs ) )
114  {
115  if ( bl1_is_col_storage( b_rs, b_cs ) )
116  {
117  // requested operation: C_r += uplo( A_c ) * B_c
118  // effective operation: C_c += ( uplo( A_c ) * B_c )^T
119  bl1_swap_ints( ldc, incc );
120 
121  bl1_swap_ints( m, n );
122 
123  symm_needs_axpyt = TRUE;
124  }
125  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
126  {
127  // requested operation: C_r += uplo( A_c ) * B_r
128  // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
129  bl1_swap_ints( ldc, incc );
130  bl1_swap_ints( ldb, incb );
131 
132  bl1_swap_ints( m, n );
133 
134  bl1_toggle_side( side );
135  }
136  }
137  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
138  {
139  if ( bl1_is_col_storage( b_rs, b_cs ) )
140  {
141  // requested operation: C_r += uplo( A_r ) * B_c
142  // effective operation: C_c += B_c^T * ~uplo( A_c )
143  bl1_swap_ints( ldc, incc );
144  bl1_swap_ints( lda, inca );
145 
146  bl1_swap_ints( m, n );
147 
148  bl1_toggle_side( side );
149  bl1_toggle_uplo( uplo );
150 
151  symm_needs_copyb = TRUE;
152  symm_needs_transb = TRUE;
153  }
154  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
155  {
156  // requested operation: C_r += uplo( A_r ) * B_r
157  // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
158  bl1_swap_ints( ldc, incc );
159  bl1_swap_ints( lda, inca );
160  bl1_swap_ints( ldb, incb );
161 
162  bl1_swap_ints( m, n );
163 
164  bl1_toggle_uplo( uplo );
165  bl1_toggle_side( side );
166  }
167  }
168  }
169 
170  // We need a temporary matrix for the cases where B needs to be copied.
171  b_copy = b;
172  ldb_copy = ldb;
173  incb_copy = incb;
174 
175  // There are two cases where we need to make a copy of B: one where the
176  // copy's dimensions are transposed from the original B, and one where
177  // the dimensions are not swapped.
178  if ( symm_needs_copyb )
179  {
180  trans1_t transb;
181 
182  // Set transb, which determines whether or not we need to copy from B
183  // as if it needs a transposition. If a transposition is needed, then
184  // m and n and have already been swapped. So in either case m
185  // represents the leading dimension of the copy.
186  if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
187  else transb = BLIS1_NO_TRANSPOSE;
188 
189  b_copy = bl1_sallocm( m, n );
190  ldb_copy = m;
191  incb_copy = 1;
192 
193  bl1_scopymt( transb,
194  m,
195  n,
196  b, incb, ldb,
197  b_copy, incb_copy, ldb_copy );
198  }
199 
200  // There are two cases where we need to perform the symm and then axpy
201  // the result into C with a transposition. We handle those cases here.
202  if ( symm_needs_axpyt )
203  {
204  // We need a temporary matrix for holding C^T. Notice that m and n
205  // represent the dimensions of C, and thus C_trans is n-by-m
206  // (interpreting both as column-major matrices). So the leading
207  // dimension of the temporary matrix holding C^T is n.
208  c_trans = bl1_sallocm( n, m );
209  ldc_trans = n;
210  incc_trans = 1;
211 
212  // Compute A * B (or B * A) and store the result in C_trans.
213  // Note that there is no overlap between the axpyt cases and
214  // the conja/copyb cases, hence the use of a, b, lda, and ldb.
215  bl1_ssymm_blas( side,
216  uplo,
217  n,
218  m,
219  alpha,
220  a, lda,
221  b, ldb,
222  &zero,
223  c_trans, ldc_trans );
224 
225  // Scale C by beta.
227  m,
228  n,
229  beta,
230  c, incc, ldc );
231 
232  // And finally, accumulate the matrix product in C_trans into C
233  // with a transpose.
235  m,
236  n,
237  &one,
238  c_trans, incc_trans, ldc_trans,
239  c, incc, ldc );
240 
241  // Free the temporary matrix for C.
242  bl1_sfree( c_trans );
243  }
244  else // no extra axpyt step needed
245  {
246  bl1_ssymm_blas( side,
247  uplo,
248  m,
249  n,
250  alpha,
251  a, lda,
252  b_copy, ldb_copy,
253  beta,
254  c, ldc );
255  }
256 
257  if ( symm_needs_copyb )
258  bl1_sfree( b_copy );
259 
260  // Free any temporary contiguous matrices, copying the result back to
261  // the original matrix.
262  bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
263  &a, &a_rs, &a_cs );
264 
265  bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
266  &b, &b_rs, &b_cs );
267 
268  bl1_sfree_saved_contigm( m_save,
269  n_save,
270  c_save, c_rs_save, c_cs_save,
271  &c, &c_rs, &c_cs );
272 }
void bl1_sfree_saved_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:13
float * bl1_sallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:30
float bl1_s1(void)
Definition: bl1_constants.c:47
Definition: blis_type_defs.h:81
void bl1_sfree(float *p)
Definition: bl1_free.c:30
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
void bl1_screate_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:13
trans1_t
Definition: blis_type_defs.h:52
Definition: blis_type_defs.h:55
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_ssymm_blas(side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
Definition: bl1_symm.c:1059
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:13
void bl1_sscalm(conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:13
void bl1_saxpymt(trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:13
Definition: blis_type_defs.h:54
void bl1_scopymt(trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:81
void bl1_screate_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:13
float bl1_s0(void)
Definition: bl1_constants.c:111

◆ bl1_ssymm_blas()

void bl1_ssymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb,
float *  beta,
float *  c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_ssymm(), CblasColMajor, and F77_ssymm().

Referenced by bl1_ssymm().

1060 {
1061 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1062  enum CBLAS_ORDER cblas_order = CblasColMajor;
1063  enum CBLAS_SIDE cblas_side;
1064  enum CBLAS_UPLO cblas_uplo;
1065 
1066  bl1_param_map_to_netlib_side( side, &cblas_side );
1067  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
1068 
1069  cblas_ssymm( cblas_order,
1070  cblas_side,
1071  cblas_uplo,
1072  m,
1073  n,
1074  *alpha,
1075  a, lda,
1076  b, ldb,
1077  *beta,
1078  c, ldc );
1079 #else
1080  char blas_side;
1081  char blas_uplo;
1082 
1083  bl1_param_map_to_netlib_side( side, &blas_side );
1084  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
1085 
1086  F77_ssymm( &blas_side,
1087  &blas_uplo,
1088  &m,
1089  &n,
1090  alpha,
1091  a, &lda,
1092  b, &ldb,
1093  beta,
1094  c, &ldc );
1095 #endif
1096 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_ssymm(char *side, char *uplo, int *m, int *n, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc)

◆ bl1_zsymm()

void bl1_zsymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_zsymm_blas(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Symm_external().

797 {
798  int m_save = m;
799  int n_save = n;
800  dcomplex* a_save = a;
801  dcomplex* b_save = b;
802  dcomplex* c_save = c;
803  int a_rs_save = a_rs;
804  int a_cs_save = a_cs;
805  int b_rs_save = b_rs;
806  int b_cs_save = b_cs;
807  int c_rs_save = c_rs;
808  int c_cs_save = c_cs;
809  dcomplex zero = bl1_z0();
810  dcomplex one = bl1_z1();
811  dcomplex* b_copy;
812  dcomplex* c_trans;
813  int dim_a;
814  int lda, inca;
815  int ldb, incb;
816  int ldc, incc;
817  int ldb_copy, incb_copy;
818  int ldc_trans, incc_trans;
819  int symm_needs_copyb = FALSE;
820  int symm_needs_transb = FALSE;
821  int symm_needs_axpyt = FALSE;
822 
823  // Return early if possible.
824  if ( bl1_zero_dim2( m, n ) ) return;
825 
826  // If necessary, allocate, initialize, and use a temporary contiguous
827  // copy of each matrix rather than the original matrices.
828  bl1_set_dim_with_side( side, m, n, &dim_a );
829  bl1_zcreate_contigmr( uplo,
830  dim_a,
831  dim_a,
832  a_save, a_rs_save, a_cs_save,
833  &a, &a_rs, &a_cs );
834 
836  n,
837  b_save, b_rs_save, b_cs_save,
838  &b, &b_rs, &b_cs );
839 
841  n,
842  c_save, c_rs_save, c_cs_save,
843  &c, &c_rs, &c_cs );
844 
845  // Initialize with values assuming column-major storage.
846  lda = a_cs;
847  inca = a_rs;
848  ldb = b_cs;
849  incb = b_rs;
850  ldc = c_cs;
851  incc = c_rs;
852 
853  // Adjust the parameters based on the storage of each matrix.
854  if ( bl1_is_col_storage( c_rs, c_cs ) )
855  {
856  if ( bl1_is_col_storage( a_rs, a_cs ) )
857  {
858  if ( bl1_is_col_storage( b_rs, b_cs ) )
859  {
860  // requested operation: C_c += uplo( A_c ) * B_c
861  // effective operation: C_c += uplo( A_c ) * B_c
862  }
863  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
864  {
865  // requested operation: C_c += uplo( A_c ) * B_r
866  // effective operation: C_c += uplo( A_c ) * B_c
867  symm_needs_copyb = TRUE;
868  }
869  }
870  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
871  {
872  if ( bl1_is_col_storage( b_rs, b_cs ) )
873  {
874  // requested operation: C_c += uplo( A_r ) * B_c
875  // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
876  bl1_swap_ints( lda, inca );
877 
878  bl1_toggle_uplo( uplo );
879  }
880  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
881  {
882  // requested operation: C_c += uplo( A_r ) * B_r
883  // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
884  bl1_swap_ints( lda, inca );
885  bl1_swap_ints( ldb, incb );
886 
887  bl1_toggle_side( side );
888  bl1_toggle_uplo( uplo );
889 
890  symm_needs_axpyt = TRUE;
891  }
892  }
893  }
894  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
895  {
896  if ( bl1_is_col_storage( a_rs, a_cs ) )
897  {
898  if ( bl1_is_col_storage( b_rs, b_cs ) )
899  {
900  // requested operation: C_r += uplo( A_c ) * B_c
901  // effective operation: C_c += ( uplo( A_c ) * B_c )^T
902  bl1_swap_ints( ldc, incc );
903 
904  bl1_swap_ints( m, n );
905 
906  symm_needs_axpyt = TRUE;
907  }
908  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
909  {
910  // requested operation: C_r += uplo( A_c ) * B_r
911  // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
912  bl1_swap_ints( ldc, incc );
913  bl1_swap_ints( ldb, incb );
914 
915  bl1_swap_ints( m, n );
916 
917  bl1_toggle_side( side );
918  }
919  }
920  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
921  {
922  if ( bl1_is_col_storage( b_rs, b_cs ) )
923  {
924  // requested operation: C_r += uplo( A_r ) * B_c
925  // effective operation: C_c += B_c^T * ~uplo( A_c )
926  bl1_swap_ints( ldc, incc );
927  bl1_swap_ints( lda, inca );
928 
929  bl1_swap_ints( m, n );
930 
931  bl1_toggle_side( side );
932  bl1_toggle_uplo( uplo );
933 
934  symm_needs_copyb = TRUE;
935  symm_needs_transb = TRUE;
936  }
937  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
938  {
939  // requested operation: C_r += uplo( A_r ) * B_r
940  // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
941  bl1_swap_ints( ldc, incc );
942  bl1_swap_ints( lda, inca );
943  bl1_swap_ints( ldb, incb );
944 
945  bl1_swap_ints( m, n );
946 
947  bl1_toggle_uplo( uplo );
948  bl1_toggle_side( side );
949  }
950  }
951  }
952 
953  // We need a temporary matrix for the cases where B needs to be copied.
954  b_copy = b;
955  ldb_copy = ldb;
956  incb_copy = incb;
957 
958  // There are two cases where we need to make a copy of B: one where the
959  // copy's dimensions are transposed from the original B, and one where
960  // the dimensions are not swapped.
961  if ( symm_needs_copyb )
962  {
963  trans1_t transb;
964 
965  // Set transb, which determines whether or not we need to copy from B
966  // as if it needs a transposition. If a transposition is needed, then
967  // m and n and have already been swapped. So in either case m
968  // represents the leading dimension of the copy.
969  if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
970  else transb = BLIS1_NO_TRANSPOSE;
971 
972  b_copy = bl1_zallocm( m, n );
973  ldb_copy = m;
974  incb_copy = 1;
975 
976  bl1_zcopymt( transb,
977  m,
978  n,
979  b, incb, ldb,
980  b_copy, incb_copy, ldb_copy );
981  }
982 
983  // There are two cases where we need to perform the symm and then axpy
984  // the result into C with a transposition. We handle those cases here.
985  if ( symm_needs_axpyt )
986  {
987  // We need a temporary matrix for holding C^T. Notice that m and n
988  // represent the dimensions of C, and thus C_trans is n-by-m
989  // (interpreting both as column-major matrices). So the leading
990  // dimension of the temporary matrix holding C^T is n.
991  c_trans = bl1_zallocm( n, m );
992  ldc_trans = n;
993  incc_trans = 1;
994 
995  // Compute A * B (or B * A) and store the result in C_trans.
996  // Note that there is no overlap between the axpyt cases and
997  // the conja/copyb cases, hence the use of a, b, lda, and ldb.
998  bl1_zsymm_blas( side,
999  uplo,
1000  n,
1001  m,
1002  alpha,
1003  a, lda,
1004  b, ldb,
1005  &zero,
1006  c_trans, ldc_trans );
1007 
1008  // Scale C by beta.
1010  m,
1011  n,
1012  beta,
1013  c, incc, ldc );
1014 
1015  // And finally, accumulate the matrix product in C_trans into C
1016  // with a transpose.
1018  m,
1019  n,
1020  &one,
1021  c_trans, incc_trans, ldc_trans,
1022  c, incc, ldc );
1023 
1024  // Free the temporary matrix for C.
1025  bl1_zfree( c_trans );
1026  }
1027  else // no extra axpyt step needed
1028  {
1029  bl1_zsymm_blas( side,
1030  uplo,
1031  m,
1032  n,
1033  alpha,
1034  a, lda,
1035  b_copy, ldb_copy,
1036  beta,
1037  c, ldc );
1038  }
1039 
1040  if ( symm_needs_copyb )
1041  bl1_zfree( b_copy );
1042 
1043  // Free any temporary contiguous matrices, copying the result back to
1044  // the original matrix.
1045  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
1046  &a, &a_rs, &a_cs );
1047 
1048  bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
1049  &b, &b_rs, &b_cs );
1050 
1051  bl1_zfree_saved_contigm( m_save,
1052  n_save,
1053  c_save, c_rs_save, c_cs_save,
1054  &c, &c_rs, &c_cs );
1055 }
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigm.c:115
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
Definition: blis_type_defs.h:81
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zscalm(conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_scalm.c:273
trans1_t
Definition: blis_type_defs.h:52
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
Definition: blis_type_defs.h:55
void bl1_zaxpymt(trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymt.c:248
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition: bl1_set_dims.c:27
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_zfree_saved_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigm.c:82
Definition: blis_type_defs.h:54
void bl1_zsymm_blas(side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
Definition: bl1_symm.c:1176
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ bl1_zsymm_blas()

void bl1_zsymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_zsymm(), CblasColMajor, and F77_zsymm().

Referenced by bl1_zsymm().

1177 {
1178 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1179  enum CBLAS_ORDER cblas_order = CblasColMajor;
1180  enum CBLAS_SIDE cblas_side;
1181  enum CBLAS_UPLO cblas_uplo;
1182 
1183  bl1_param_map_to_netlib_side( side, &cblas_side );
1184  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
1185 
1186  cblas_zsymm( cblas_order,
1187  cblas_side,
1188  cblas_uplo,
1189  m,
1190  n,
1191  alpha,
1192  a, lda,
1193  b, ldb,
1194  beta,
1195  c, ldc );
1196 #else
1197  char blas_side;
1198  char blas_uplo;
1199 
1200  bl1_param_map_to_netlib_side( side, &blas_side );
1201  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
1202 
1203  F77_zsymm( &blas_side,
1204  &blas_uplo,
1205  &m,
1206  &n,
1207  alpha,
1208  a, &lda,
1209  b, &ldb,
1210  beta,
1211  c, &ldc );
1212 #endif
1213 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition: bl1_param_map.c:71
void F77_zsymm(char *side, char *uplo, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta, dcomplex *c, int *ldc)
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
CBLAS_SIDE
Definition: blis_prototypes_cblas.h:21
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47