libflame  revision_anchor
Functions
bl1_her2k.c File Reference

(r)

Functions

void bl1_sher2k (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dher2k (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_cher2k (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, float *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zher2k (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, double *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_cher2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, float *beta, scomplex *c, int ldc)
 
void bl1_zher2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, double *beta, dcomplex *c, int ldc)
 

Function Documentation

◆ bl1_cher2k()

void bl1_cher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
float *  beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccopymt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cher2k_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_set_dims_with_trans(), bl1_zero_dim2(), BLIS1_CONJ_NO_TRANSPOSE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Her2k_external().

40 {
41  uplo1_t uplo_save = uplo;
42  int m_save = m;
43  scomplex* a_save = a;
44  scomplex* b_save = b;
45  scomplex* c_save = c;
46  int a_rs_save = a_rs;
47  int a_cs_save = a_cs;
48  int b_rs_save = b_rs;
49  int b_cs_save = b_cs;
50  int c_rs_save = c_rs;
51  int c_cs_save = c_cs;
52  float zero_r = bl1_s0();
53  scomplex one = bl1_c1();
54  scomplex alpha_copy;
55  scomplex* a_copy;
56  scomplex* b_copy;
57  scomplex* c_conj;
58  int lda, inca;
59  int ldb, incb;
60  int ldc, incc;
61  int lda_copy, inca_copy;
62  int ldb_copy, incb_copy;
63  int ldc_conj, incc_conj;
64  int her2k_needs_copya = FALSE;
65  int her2k_needs_copyb = FALSE;
66  int her2k_needs_conj = FALSE;
67  int her2k_needs_alpha_conj = FALSE;
68 
69  // Return early if possible.
70  if ( bl1_zero_dim2( m, k ) ) return;
71 
72  // If necessary, allocate, initialize, and use a temporary contiguous
73  // copy of each matrix rather than the original matrices.
74  bl1_ccreate_contigmt( trans,
75  m,
76  k,
77  a_save, a_rs_save, a_cs_save,
78  &a, &a_rs, &a_cs );
79 
80  bl1_ccreate_contigmt( trans,
81  m,
82  k,
83  b_save, b_rs_save, b_cs_save,
84  &b, &b_rs, &b_cs );
85 
87  m,
88  m,
89  c_save, c_rs_save, c_cs_save,
90  &c, &c_rs, &c_cs );
91 
92  // Initialize with values assuming column-major storage.
93  lda = a_cs;
94  inca = a_rs;
95  ldb = b_cs;
96  incb = b_rs;
97  ldc = c_cs;
98  incc = c_rs;
99 
100  // Adjust the parameters based on the storage of each matrix.
101  if ( bl1_is_col_storage( c_rs, c_cs ) )
102  {
103  if ( bl1_is_col_storage( a_rs, a_cs ) )
104  {
105  if ( bl1_is_col_storage( b_rs, b_cs ) )
106  {
107  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
108  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
109  }
110  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
111  {
112  // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
113  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
114  her2k_needs_copyb = TRUE;
115  }
116  }
117  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
118  {
119  if ( bl1_is_col_storage( b_rs, b_cs ) )
120  {
121  // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
122  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
123  her2k_needs_copya = TRUE;
124  }
125  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
126  {
127  // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
128  // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
129  bl1_swap_ints( lda, inca );
130  bl1_swap_ints( ldb, incb );
131 
132  bl1_toggle_conjtrans( trans );
133 
134  her2k_needs_conj = TRUE;
135  her2k_needs_alpha_conj = TRUE;
136  }
137  }
138  }
139  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
140  {
141  if ( bl1_is_col_storage( a_rs, a_cs ) )
142  {
143  if ( bl1_is_col_storage( b_rs, b_cs ) )
144  {
145  // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
146  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
147  bl1_swap_ints( ldc, incc );
148 
149  bl1_toggle_uplo( uplo );
150 
151  her2k_needs_conj = TRUE;
152  }
153  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
154  {
155  // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
156  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
157  her2k_needs_copyb = TRUE;
158 
159  bl1_swap_ints( ldc, incc );
160 
161  bl1_toggle_uplo( uplo );
162 
163  her2k_needs_conj = TRUE;
164  }
165  }
166  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
167  {
168  if ( bl1_is_col_storage( b_rs, b_cs ) )
169  {
170  // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
171  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
172  her2k_needs_copya = TRUE;
173 
174  bl1_swap_ints( ldc, incc );
175 
176  bl1_toggle_uplo( uplo );
177 
178  her2k_needs_conj = TRUE;
179  }
180  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
181  {
182  // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
183  // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
184  bl1_swap_ints( ldc, incc );
185  bl1_swap_ints( lda, inca );
186  bl1_swap_ints( ldb, incb );
187 
188  bl1_toggle_uplo( uplo );
189  bl1_toggle_conjtrans( trans );
190 
191  her2k_needs_alpha_conj = TRUE;
192  }
193  }
194  }
195 
196  // Make a copy of alpha and conjugate if necessary.
197  alpha_copy = *alpha;
198  if ( her2k_needs_alpha_conj )
199  {
200  bl1_zconjs( &alpha_copy );
201  }
202 
203  a_copy = a;
204  lda_copy = lda;
205  inca_copy = inca;
206 
207  // There are two cases where we need to copy A column-major storage.
208  // We handle those two cases here.
209  if ( her2k_needs_copya )
210  {
211  int m_a;
212  int n_a;
213 
214  // Determine the dimensions of A according to the value of trans. We
215  // need this in order to set the leading dimension of the copy of A.
216  bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );
217 
218  // We need a temporary matrix to hold a column-major copy of A.
219  a_copy = bl1_callocm( m, k );
220  lda_copy = m_a;
221  inca_copy = 1;
222 
223  // Copy the contents of A into A_copy.
225  m_a,
226  n_a,
227  a, inca, lda,
228  a_copy, inca_copy, lda_copy );
229  }
230 
231  b_copy = b;
232  ldb_copy = ldb;
233  incb_copy = incb;
234 
235  // There are two cases where we need to copy B column-major storage.
236  // We handle those two cases here.
237  if ( her2k_needs_copyb )
238  {
239  int m_b;
240  int n_b;
241 
242  // Determine the dimensions of B according to the value of trans. We
243  // need this in order to set the leading dimension of the copy of B.
244  bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );
245 
246  // We need a temporary matrix to hold a column-major copy of B.
247  b_copy = bl1_callocm( m, k );
248  ldb_copy = m_b;
249  incb_copy = 1;
250 
251  // Copy the contents of B into B_copy.
253  m_b,
254  n_b,
255  b, incb, ldb,
256  b_copy, incb_copy, ldb_copy );
257  }
258 
259  // There are two cases where we need to perform the rank-2k product and
260  // then axpy the result into C with a conjugation. We handle those two
261  // cases here.
262  if ( her2k_needs_conj )
263  {
264  // We need a temporary matrix for holding the rank-k product.
265  c_conj = bl1_callocm( m, m );
266  ldc_conj = m;
267  incc_conj = 1;
268 
269  // Compute the rank-2k product.
270  bl1_cher2k_blas( uplo,
271  trans,
272  m,
273  k,
274  &alpha_copy,
275  a_copy, lda_copy,
276  b_copy, ldb_copy,
277  &zero_r,
278  c_conj, ldc_conj );
279 
280  // Scale C by beta.
281  bl1_csscalmr( uplo,
282  m,
283  m,
284  beta,
285  c, incc, ldc );
286 
287  // And finally, accumulate the rank-2k product in C_conj into C
288  // with a conjugation.
289  bl1_caxpymrt( uplo,
291  m,
292  m,
293  &one,
294  c_conj, incc_conj, ldc_conj,
295  c, incc, ldc );
296 
297  // Free the temporary matrix for C.
298  bl1_cfree( c_conj );
299  }
300  else
301  {
302  bl1_cher2k_blas( uplo,
303  trans,
304  m,
305  k,
306  &alpha_copy,
307  a_copy, lda_copy,
308  b_copy, ldb_copy,
309  beta,
310  c, ldc );
311  }
312 
313  if ( her2k_needs_copya )
314  bl1_cfree( a_copy );
315 
316  if ( her2k_needs_copyb )
317  bl1_cfree( b_copy );
318 
319  // Free any temporary contiguous matrices, copying the result back to
320  // the original matrix.
321  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
322  &a, &a_rs, &a_cs );
323 
324  bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
325  &b, &b_rs, &b_cs );
326 
327  bl1_cfree_saved_contigmr( uplo_save,
328  m_save,
329  m_save,
330  c_save, c_rs_save, c_cs_save,
331  &c, &c_rs, &c_cs );
332 }
void bl1_csscalmr(uplo1_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
Definition: bl1_scalmr.c:125
uplo1_t
Definition: blis_type_defs.h:60
void bl1_cher2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, float *beta, scomplex *c, int ldc)
Definition: bl1_her2k.c:631
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
void bl1_ccreate_contigmt(trans1_t trans_dims, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:89
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
void bl1_caxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymrt.c:227
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_set_dims_with_trans(trans1_t trans, int m, int n, int *m_new, int *n_new)
Definition: bl1_set_dims.c:13
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:215
Definition: blis_type_defs.h:132
void bl1_cfree_saved_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:59
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45
float bl1_s0(void)
Definition: bl1_constants.c:111

◆ bl1_cher2k_blas()

void bl1_cher2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
float *  beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cher2k(), CblasColMajor, and F77_cher2k().

Referenced by bl1_cher2k().

632 {
633 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
634  enum CBLAS_ORDER cblas_order = CblasColMajor;
635  enum CBLAS_UPLO cblas_uplo;
636  enum CBLAS_TRANSPOSE cblas_trans;
637 
638  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
639  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
640 
641  cblas_cher2k( cblas_order,
642  cblas_uplo,
643  cblas_trans,
644  m,
645  k,
646  alpha,
647  a, lda,
648  b, ldb,
649  *beta,
650  c, ldc );
651 #else
652  char blas_uplo;
653  char blas_trans;
654 
655  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
656  bl1_param_map_to_netlib_trans( trans, &blas_trans );
657 
658  F77_cher2k( &blas_uplo,
659  &blas_trans,
660  &m,
661  &k,
662  alpha,
663  a, &lda,
664  b, &ldb,
665  beta,
666  c, &ldc );
667 #endif
668 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const float beta, void *C, const int ldc)
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void F77_cher2k(char *uplo, char *transa, int *n, int *k, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, float *beta, scomplex *c, int *ldc)

◆ bl1_dher2k()

void bl1_dher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dsyr2k().

27 {
28  bl1_dsyr2k( uplo,
29  trans,
30  m,
31  k,
32  alpha,
33  a, a_rs, a_cs,
34  b, b_rs, b_cs,
35  beta,
36  c, c_rs, c_cs );
37 }
void bl1_dsyr2k(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition: bl1_syr2k.c:239

◆ bl1_sher2k()

void bl1_sher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_ssyr2k().

14 {
15  bl1_ssyr2k( uplo,
16  trans,
17  m,
18  k,
19  alpha,
20  a, a_rs, a_cs,
21  b, b_rs, b_cs,
22  beta,
23  c, c_rs, c_cs );
24 }
void bl1_ssyr2k(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
Definition: bl1_syr2k.c:13

◆ bl1_zher2k()

void bl1_zher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
double *  beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcopymt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zher2k_blas(), BLIS1_CONJ_NO_TRANSPOSE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Her2k_external().

335 {
336  uplo1_t uplo_save = uplo;
337  int m_save = m;
338  dcomplex* a_save = a;
339  dcomplex* b_save = b;
340  dcomplex* c_save = c;
341  int a_rs_save = a_rs;
342  int a_cs_save = a_cs;
343  int b_rs_save = b_rs;
344  int b_cs_save = b_cs;
345  int c_rs_save = c_rs;
346  int c_cs_save = c_cs;
347  double zero_r = bl1_d0();
348  dcomplex one = bl1_z1();
349  dcomplex alpha_copy;
350  dcomplex* a_copy;
351  dcomplex* b_copy;
352  dcomplex* c_conj;
353  int lda, inca;
354  int ldb, incb;
355  int ldc, incc;
356  int lda_copy, inca_copy;
357  int ldb_copy, incb_copy;
358  int ldc_conj, incc_conj;
359  int her2k_needs_copya = FALSE;
360  int her2k_needs_copyb = FALSE;
361  int her2k_needs_conj = FALSE;
362  int her2k_needs_alpha_conj = FALSE;
363 
364  // Return early if possible.
365  if ( bl1_zero_dim2( m, k ) ) return;
366 
367  // If necessary, allocate, initialize, and use a temporary contiguous
368  // copy of each matrix rather than the original matrices.
369  bl1_zcreate_contigmt( trans,
370  m,
371  k,
372  a_save, a_rs_save, a_cs_save,
373  &a, &a_rs, &a_cs );
374 
375  bl1_zcreate_contigmt( trans,
376  m,
377  k,
378  b_save, b_rs_save, b_cs_save,
379  &b, &b_rs, &b_cs );
380 
381  bl1_zcreate_contigmr( uplo,
382  m,
383  m,
384  c_save, c_rs_save, c_cs_save,
385  &c, &c_rs, &c_cs );
386 
387  // Initialize with values assuming column-major storage.
388  lda = a_cs;
389  inca = a_rs;
390  ldb = b_cs;
391  incb = b_rs;
392  ldc = c_cs;
393  incc = c_rs;
394 
395  // Adjust the parameters based on the storage of each matrix.
396  if ( bl1_is_col_storage( c_rs, c_cs ) )
397  {
398  if ( bl1_is_col_storage( a_rs, a_cs ) )
399  {
400  if ( bl1_is_col_storage( b_rs, b_cs ) )
401  {
402  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
403  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
404  }
405  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
406  {
407  // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
408  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
409  her2k_needs_copyb = TRUE;
410  }
411  }
412  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
413  {
414  if ( bl1_is_col_storage( b_rs, b_cs ) )
415  {
416  // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
417  // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
418  her2k_needs_copya = TRUE;
419  }
420  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
421  {
422  // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
423  // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
424  bl1_swap_ints( lda, inca );
425  bl1_swap_ints( ldb, incb );
426 
427  bl1_toggle_conjtrans( trans );
428 
429  her2k_needs_conj = TRUE;
430  her2k_needs_alpha_conj = TRUE;
431  }
432  }
433  }
434  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
435  {
436  if ( bl1_is_col_storage( a_rs, a_cs ) )
437  {
438  if ( bl1_is_col_storage( b_rs, b_cs ) )
439  {
440  // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
441  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
442  bl1_swap_ints( ldc, incc );
443 
444  bl1_toggle_uplo( uplo );
445 
446  her2k_needs_conj = TRUE;
447  }
448  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
449  {
450  // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
451  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
452  her2k_needs_copyb = TRUE;
453 
454  bl1_swap_ints( ldc, incc );
455 
456  bl1_toggle_uplo( uplo );
457 
458  her2k_needs_conj = TRUE;
459  }
460  }
461  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
462  {
463  if ( bl1_is_col_storage( b_rs, b_cs ) )
464  {
465  // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
466  // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
467  her2k_needs_copya = TRUE;
468 
469  bl1_swap_ints( ldc, incc );
470 
471  bl1_toggle_uplo( uplo );
472 
473  her2k_needs_conj = TRUE;
474  }
475  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
476  {
477  // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
478  // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
479  bl1_swap_ints( ldc, incc );
480  bl1_swap_ints( lda, inca );
481  bl1_swap_ints( ldb, incb );
482 
483  bl1_toggle_uplo( uplo );
484  bl1_toggle_conjtrans( trans );
485 
486  her2k_needs_alpha_conj = TRUE;
487  }
488  }
489  }
490 
491  // Make a copy of alpha and conjugate if necessary.
492  alpha_copy = *alpha;
493  if ( her2k_needs_alpha_conj )
494  {
495  bl1_zconjs( &alpha_copy );
496  }
497 
498  a_copy = a;
499  lda_copy = lda;
500  inca_copy = inca;
501 
502  // There are two cases where we need to copy A column-major storage.
503  // We handle those two cases here.
504  if ( her2k_needs_copya )
505  {
506  int m_a;
507  int n_a;
508 
509  // Determine the dimensions of A according to the value of trans. We
510  // need this in order to set the leading dimension of the copy of A.
511  bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );
512 
513  // We need a temporary matrix to hold a column-major copy of A.
514  a_copy = bl1_zallocm( m, k );
515  lda_copy = m_a;
516  inca_copy = 1;
517 
518  // Copy the contents of A into A_copy.
520  m_a,
521  n_a,
522  a, inca, lda,
523  a_copy, inca_copy, lda_copy );
524  }
525 
526  b_copy = b;
527  ldb_copy = ldb;
528  incb_copy = incb;
529 
530  // There are two cases where we need to copy B column-major storage.
531  // We handle those two cases here.
532  if ( her2k_needs_copyb )
533  {
534  int m_b;
535  int n_b;
536 
537  // Determine the dimensions of B according to the value of trans. We
538  // need this in order to set the leading dimension of the copy of B.
539  bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );
540 
541  // We need a temporary matrix to hold a column-major copy of B.
542  b_copy = bl1_zallocm( m, k );
543  ldb_copy = m_b;
544  incb_copy = 1;
545 
546  // Copy the contents of B into B_copy.
548  m_b,
549  n_b,
550  b, incb, ldb,
551  b_copy, incb_copy, ldb_copy );
552  }
553 
554  // There are two cases where we need to perform the rank-2k product and
555  // then axpy the result into C with a conjugation. We handle those two
556  // cases here.
557  if ( her2k_needs_conj )
558  {
559  // We need a temporary matrix for holding the rank-k product.
560  c_conj = bl1_zallocm( m, m );
561  ldc_conj = m;
562  incc_conj = 1;
563 
564  // Compute the rank-2k product.
565  bl1_zher2k_blas( uplo,
566  trans,
567  m,
568  k,
569  &alpha_copy,
570  a_copy, lda_copy,
571  b_copy, ldb_copy,
572  &zero_r,
573  c_conj, ldc_conj );
574 
575  // Scale C by beta.
576  bl1_zdscalmr( uplo,
577  m,
578  m,
579  beta,
580  c, incc, ldc );
581 
582  // And finally, accumulate the rank-2k product in C_conj into C
583  // with a conjugation.
584  bl1_zaxpymrt( uplo,
586  m,
587  m,
588  &one,
589  c_conj, incc_conj, ldc_conj,
590  c, incc, ldc );
591 
592  // Free the temporary matrix for C.
593  bl1_zfree( c_conj );
594  }
595  else
596  {
597  bl1_zher2k_blas( uplo,
598  trans,
599  m,
600  k,
601  &alpha_copy,
602  a_copy, lda_copy,
603  b_copy, ldb_copy,
604  beta,
605  c, ldc );
606  }
607 
608  if ( her2k_needs_copya )
609  bl1_zfree( a_copy );
610 
611  if ( her2k_needs_copyb )
612  bl1_zfree( b_copy );
613 
614  // Free any temporary contiguous matrices, copying the result back to
615  // the original matrix.
616  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
617  &a, &a_rs, &a_cs );
618 
619  bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
620  &b, &b_rs, &b_cs );
621 
622  bl1_zfree_saved_contigmr( uplo_save,
623  m_save,
624  m_save,
625  c_save, c_rs_save, c_cs_save,
626  &c, &c_rs, &c_cs );
627 }
void bl1_zdscalmr(uplo1_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_scalmr.c:237
void bl1_zher2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, double *beta, dcomplex *c, int ldc)
Definition: bl1_her2k.c:670
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
uplo1_t
Definition: blis_type_defs.h:60
void bl1_zaxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymrt.c:334
void bl1_zcreate_contigmt(trans1_t trans_dims, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:127
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
double bl1_d0(void)
Definition: bl1_constants.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
void bl1_zfree_saved_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:82
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_set_dims_with_trans(trans1_t trans, int m, int n, int *m_new, int *n_new)
Definition: bl1_set_dims.c:13
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ bl1_zher2k_blas()

void bl1_zher2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
double *  beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zher2k(), CblasColMajor, and F77_zher2k().

Referenced by bl1_zher2k().

671 {
672 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
673  enum CBLAS_ORDER cblas_order = CblasColMajor;
674  enum CBLAS_UPLO cblas_uplo;
675  enum CBLAS_TRANSPOSE cblas_trans;
676 
677  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
678  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
679 
680  cblas_zher2k( cblas_order,
681  cblas_uplo,
682  cblas_trans,
683  m,
684  k,
685  alpha,
686  a, lda,
687  b, ldb,
688  *beta,
689  c, ldc );
690 #else
691  char blas_uplo;
692  char blas_trans;
693 
694  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
695  bl1_param_map_to_netlib_trans( trans, &blas_trans );
696 
697  F77_zher2k( &blas_uplo,
698  &blas_trans,
699  &m,
700  &k,
701  alpha,
702  a, &lda,
703  b, &ldb,
704  beta,
705  c, &ldc );
706 #endif
707 }
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
Definition: blis_prototypes_cblas.h:17
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47
void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const double beta, void *C, const int ldc)
void F77_zher2k(char *uplo, char *transa, int *n, int *k, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, double *beta, dcomplex *c, int *ldc)