libflame  revision_anchor
Functions
FLA_Fused_Gerc2_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Gerc2_opt_var1 (FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A)
 
FLA_Error FLA_Fused_Gerc2_ops_var1 (int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opd_var1 (int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opc_var1 (int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opz_var1 (int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Fused_Gerc2_opc_var1()

FLA_Error FLA_Fused_Gerc2_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_v,
int  inc_v,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_caxpyv(), BLIS1_NO_CONJUGATE, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofc_var2(), and FLA_Hess_UT_step_ofc_var3().

249 {
250  int i;
251 
252  for ( i = 0; i < n_A; ++i )
253  {
254  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
255  scomplex* u = buff_u;
256  scomplex* psi1 = buff_y + (i )*inc_y;
257  scomplex* z = buff_z;
258  scomplex* nu1 = buff_v + (i )*inc_v;
259  scomplex* alpha = buff_alpha;
260  scomplex psi1_conj;
261  scomplex nu1_conj;
262  scomplex temp1;
263  scomplex temp2;
264 
265  /*------------------------------------------------------------*/
266 
267  bl1_ccopyconj( psi1, &psi1_conj );
268  bl1_cmult3( alpha, &psi1_conj, &temp1 );
269 
270  bl1_ccopyconj( nu1, &nu1_conj );
271  bl1_cmult3( alpha, &nu1_conj, &temp2 );
272 
274  m_A,
275  &temp1,
276  u, inc_u,
277  a1, rs_A );
278 /*
279  F77_caxpy( &m_A,
280  &temp1,
281  u, &inc_u,
282  a1, &rs_A );
283 */
284 
286  m_A,
287  &temp2,
288  z, inc_z,
289  a1, rs_A );
290 /*
291  F77_caxpy( &m_A,
292  &temp2,
293  z, &inc_z,
294  a1, &rs_A );
295 */
296 
297  /*------------------------------------------------------------*/
298 
299  }
300 
301  return FLA_SUCCESS;
302 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
double temp2
Definition: bl1_axpyv2b.c:147
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
double temp1
Definition: bl1_axpyv2b.c:146

◆ FLA_Fused_Gerc2_opd_var1()

FLA_Error FLA_Fused_Gerc2_opd_var1 ( int  m_A,
int  n_A,
double *  buff_alpha,
double *  buff_u,
int  inc_u,
double *  buff_y,
int  inc_y,
double *  buff_z,
int  inc_z,
double *  buff_v,
int  inc_v,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_daxpyv2b(), i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofd_var2(), and FLA_Hess_UT_step_ofd_var3().

201 {
202  int i;
203 
204  for ( i = 0; i < n_A; ++i )
205  {
206 /*
207  Effective computation:
208  A = A + alpha * ( u * y' + z * v' );
209 */
210  double* restrict a1 = buff_A + (i )*cs_A + (0 )*rs_A;
211  double* restrict u = buff_u;
212  double* restrict psi1 = buff_y + (i )*inc_y;
213  double* restrict z = buff_z;
214  double* restrict nu1 = buff_v + (i )*inc_v;
215  double* restrict alpha = buff_alpha;
216  double alpha_conj_psi1;
217  double alpha_conj_nu1;
218 
219  /*------------------------------------------------------------*/
220 
221  bl1_dmult3( alpha, psi1, &alpha_conj_psi1 );
222 
223  bl1_dmult3( alpha, nu1, &alpha_conj_nu1 );
224 
225  bl1_daxpyv2b( m_A,
226  &alpha_conj_psi1,
227  &alpha_conj_nu1,
228  u, inc_u,
229  z, inc_z,
230  a1, rs_A );
231 
232  /*------------------------------------------------------------*/
233 
234  }
235 
236  return FLA_SUCCESS;
237 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition: bl1_axpyv2b.c:31
int i
Definition: bl1_axmyv2.c:145

◆ FLA_Fused_Gerc2_ops_var1()

FLA_Error FLA_Fused_Gerc2_ops_var1 ( int  m_A,
int  n_A,
float *  buff_alpha,
float *  buff_u,
int  inc_u,
float *  buff_y,
int  inc_y,
float *  buff_z,
int  inc_z,
float *  buff_v,
int  inc_v,
float *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_saxpyv(), BLIS1_NO_CONJUGATE, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofs_var2(), and FLA_Hess_UT_step_ofs_var3().

138 {
139  int i;
140 
141  for ( i = 0; i < n_A; ++i )
142  {
143  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
144  float* u = buff_u;
145  float* psi1 = buff_y + (i )*inc_y;
146  float* z = buff_z;
147  float* nu1 = buff_v + (i )*inc_v;
148  float* alpha = buff_alpha;
149  float temp1;
150  float temp2;
151 
152  /*------------------------------------------------------------*/
153 
154  // bl1_smult3( alpha, psi1, &temp1 );
155  temp1 = *alpha * *psi1;
156 
157  // bl1_smult3( alpha, nu1, &temp2 );
158  temp2 = *alpha * *nu1;
159 
161  m_A,
162  &temp1,
163  u, inc_u,
164  a1, rs_A );
165 /*
166  F77_saxpy( &m_A,
167  &temp1,
168  u, &inc_u,
169  a1, &rs_A );
170 */
171 
173  m_A,
174  &temp2,
175  z, inc_z,
176  a1, rs_A );
177 /*
178  F77_saxpy( &m_A,
179  &temp2,
180  z, &inc_z,
181  a1, &rs_A );
182 */
183 
184  /*------------------------------------------------------------*/
185 
186  }
187 
188  return FLA_SUCCESS;
189 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
double temp2
Definition: bl1_axpyv2b.c:147
Definition: blis_type_defs.h:81
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
int i
Definition: bl1_axmyv2.c:145
double temp1
Definition: bl1_axpyv2b.c:146

◆ FLA_Fused_Gerc2_opt_var1()

FLA_Error FLA_Fused_Gerc2_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  u,
FLA_Obj  y,
FLA_Obj  z,
FLA_Obj  v,
FLA_Obj  A 
)

References FLA_Fused_Gerc2_opc_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

14 {
15 /*
16  Effective computation:
17  A = A + alpha * ( u * y' + z * v' );
18 */
19  FLA_Datatype datatype;
20  int m_A, n_A;
21  int rs_A, cs_A;
22  int inc_u, inc_y, inc_z, inc_v;
23 
24  datatype = FLA_Obj_datatype( A );
25 
26  m_A = FLA_Obj_length( A );
27  n_A = FLA_Obj_width( A );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  inc_u = FLA_Obj_vector_inc( u );
33  inc_y = FLA_Obj_vector_inc( y );
34  inc_z = FLA_Obj_vector_inc( z );
35  inc_v = FLA_Obj_vector_inc( v );
36 
37 
38  switch ( datatype )
39  {
40  case FLA_FLOAT:
41  {
42  float* buff_A = FLA_FLOAT_PTR( A );
43  float* buff_u = FLA_FLOAT_PTR( u );
44  float* buff_y = FLA_FLOAT_PTR( y );
45  float* buff_z = FLA_FLOAT_PTR( z );
46  float* buff_v = FLA_FLOAT_PTR( v );
47  float* buff_alpha = FLA_FLOAT_PTR( alpha );
48 
50  n_A,
51  buff_alpha,
52  buff_u, inc_u,
53  buff_y, inc_y,
54  buff_z, inc_z,
55  buff_v, inc_v,
56  buff_A, rs_A, cs_A );
57 
58  break;
59  }
60 
61  case FLA_DOUBLE:
62  {
63  double* buff_A = FLA_DOUBLE_PTR( A );
64  double* buff_u = FLA_DOUBLE_PTR( u );
65  double* buff_y = FLA_DOUBLE_PTR( y );
66  double* buff_z = FLA_DOUBLE_PTR( z );
67  double* buff_v = FLA_DOUBLE_PTR( v );
68  double* buff_alpha = FLA_DOUBLE_PTR( alpha );
69 
71  n_A,
72  buff_alpha,
73  buff_u, inc_u,
74  buff_y, inc_y,
75  buff_z, inc_z,
76  buff_v, inc_v,
77  buff_A, rs_A, cs_A );
78 
79  break;
80  }
81 
82  case FLA_COMPLEX:
83  {
84  scomplex* buff_A = FLA_COMPLEX_PTR( A );
85  scomplex* buff_u = FLA_COMPLEX_PTR( u );
86  scomplex* buff_y = FLA_COMPLEX_PTR( y );
87  scomplex* buff_z = FLA_COMPLEX_PTR( z );
88  scomplex* buff_v = FLA_COMPLEX_PTR( v );
89  scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha );
90 
92  n_A,
93  buff_alpha,
94  buff_u, inc_u,
95  buff_y, inc_y,
96  buff_z, inc_z,
97  buff_v, inc_v,
98  buff_A, rs_A, cs_A );
99 
100  break;
101  }
102 
103  case FLA_DOUBLE_COMPLEX:
104  {
105  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
106  dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
107  dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y );
108  dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z );
109  dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v );
110  dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha );
111 
113  n_A,
114  buff_alpha,
115  buff_u, inc_u,
116  buff_y, inc_y,
117  buff_z, inc_z,
118  buff_v, inc_v,
119  buff_A, rs_A, cs_A );
120 
121  break;
122  }
123  }
124 
125  return FLA_SUCCESS;
126 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:241
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:306
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:130
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Fused_Gerc2_opz_var1()

FLA_Error FLA_Fused_Gerc2_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_v,
int  inc_v,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_zaxpyv2b(), i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofz_var2(), and FLA_Hess_UT_step_ofz_var3().

314 {
315  int i;
316 
317  for ( i = 0; i < n_A; ++i )
318  {
319  dcomplex* restrict a1 = buff_A + (i )*cs_A + (0 )*rs_A;
320  dcomplex* restrict u = buff_u;
321  dcomplex* restrict psi1 = buff_y + (i )*inc_y;
322  dcomplex* restrict z = buff_z;
323  dcomplex* restrict nu1 = buff_v + (i )*inc_v;
324  dcomplex* restrict alpha = buff_alpha;
325  dcomplex conj_psi1;
326  dcomplex conj_nu1;
327  dcomplex alpha_conj_psi1;
328  dcomplex alpha_conj_nu1;
329 
330  /*------------------------------------------------------------*/
331 
332  bl1_zcopyconj( psi1, &conj_psi1 );
333  bl1_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 );
334 
335  bl1_zcopyconj( nu1, &conj_nu1 );
336  bl1_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 );
337 
338  bl1_zaxpyv2b( m_A,
339  &alpha_conj_psi1,
340  &alpha_conj_nu1,
341  u, inc_u,
342  z, inc_z,
343  a1, rs_A );
344 
345  /*------------------------------------------------------------*/
346 
347  }
348 
349  return FLA_SUCCESS;
350 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
int i
Definition: bl1_axmyv2.c:145
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition: bl1_axpyv2b.c:210
Definition: blis_type_defs.h:137