libflame  revision_anchor
Functions
FLA_Apply_G_rf_asm_var6b.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_asm_var6b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_asc_var6b()

FLA_Error FLA_Apply_G_rf_asc_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

Referenced by FLA_Apply_G_rf_asm_var6b().

437 {
438  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
439 
440  return FLA_SUCCESS;
441 }

◆ FLA_Apply_G_rf_asd_var6b()

FLA_Error FLA_Apply_G_rf_asd_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bld_var6b().

138 {
139  double one = bl1_d1();
140  double zero = bl1_d0();
141  double gamma12;
142  double sigma12;
143  double gamma23;
144  double sigma23;
145  double* a1;
146  double* a2;
147  double* a3;
148  dcomplex* g12;
149  dcomplex* g23;
150  int i, j, g, k;
151  int nG, nG_app;
152  int n_iter;
153  int n_left;
154  int k_minus_1;
155  int n_fuse;
156  int is_ident12, is_ident23;
157  int m_app;
158 
159 
160  k_minus_1 = k_G - 1;
161  nG = n_A - 1;
162  n_fuse = 2;
163 
164  // Use the simple variant for nG < (k - 1) or k == 1.
165  if ( nG < k_minus_1 || k_G == 1 )
166  {
168  m_A,
169  n_A,
170  buff_G, rs_G, cs_G,
171  buff_A, rs_A, cs_A );
172  return FLA_SUCCESS;
173  }
174 
175 
176  // Start-up phase.
177 
178  for ( j = 0; j < k_minus_1; ++j )
179  {
180  nG_app = j + 1;
181  n_iter = nG_app / n_fuse;
182  n_left = nG_app % n_fuse;
183 
184  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
185  {
186  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
187  g23 = buff_G + (g )*rs_G + (k )*cs_G;
188  a1 = buff_A + (g - 1)*cs_A;
189  a2 = buff_A + (g )*cs_A;
190  a3 = buff_A + (g + 1)*cs_A;
191 
192  gamma12 = g12->real;
193  sigma12 = g12->imag;
194  gamma23 = g23->real;
195  sigma23 = g23->imag;
196 
197  is_ident12 = ( gamma12 == one && sigma12 == zero );
198  is_ident23 = ( gamma23 == one && sigma23 == zero );
199 
200  m_app = min( i_k + 2 + j - iTL, m_A );
201  m_app = max( m_app, 0 );
202 
203  if ( !is_ident12 && is_ident23 )
204  {
205  // Apply only to columns 1 and 2.
206 
207  MAC_Apply_G_mx2_asd( m_app,
208  &gamma12,
209  &sigma12,
210  a1, 1,
211  a2, 1 );
212  }
213  else if ( is_ident12 && !is_ident23 )
214  {
215  // Apply only to columns 2 and 3.
216 
217  MAC_Apply_G_mx2_asd( m_app,
218  &gamma23,
219  &sigma23,
220  a2, 1,
221  a3, 1 );
222  }
223  else if ( !is_ident12 && !is_ident23 )
224  {
225  // Apply to all three columns.
226 
227  MAC_Apply_G_mx3b_asd( m_app,
228  &gamma12,
229  &sigma12,
230  &gamma23,
231  &sigma23,
232  a1, 1,
233  a2, 1,
234  a3, 1 );
235  }
236  }
237  if ( n_left == 1 )
238  {
239  g23 = buff_G + (g )*rs_G + (k )*cs_G;
240  a2 = buff_A + (g )*cs_A;
241  a3 = buff_A + (g + 1)*cs_A;
242 
243  gamma23 = g23->real;
244  sigma23 = g23->imag;
245 
246  is_ident23 = ( gamma23 == one && sigma23 == zero );
247 
248  m_app = min( i_k + 2 + j - iTL, m_A );
249  m_app = max( m_app, 0 );
250 
251  if ( !is_ident23 )
252  MAC_Apply_G_mx2_asd( m_app,
253  &gamma23,
254  &sigma23,
255  a2, 1,
256  a3, 1 );
257  }
258  }
259 
260  // Pipeline stage
261 
262  for ( j = k_minus_1; j < nG; ++j )
263  {
264  nG_app = k_G;
265  n_iter = nG_app / n_fuse;
266  n_left = nG_app % n_fuse;
267 
268  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
269  {
270  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
271  g23 = buff_G + (g )*rs_G + (k )*cs_G;
272  a1 = buff_A + (g - 1)*cs_A;
273  a2 = buff_A + (g )*cs_A;
274  a3 = buff_A + (g + 1)*cs_A;
275 
276  gamma12 = g12->real;
277  sigma12 = g12->imag;
278  gamma23 = g23->real;
279  sigma23 = g23->imag;
280 
281  is_ident12 = ( gamma12 == one && sigma12 == zero );
282  is_ident23 = ( gamma23 == one && sigma23 == zero );
283 
284  m_app = min( i_k + 2 + j - iTL, m_A );
285  m_app = max( m_app, 0 );
286 
287  if ( !is_ident12 && is_ident23 )
288  {
289  // Apply only to columns 1 and 2.
290 
291  MAC_Apply_G_mx2_asd( m_app,
292  &gamma12,
293  &sigma12,
294  a1, 1,
295  a2, 1 );
296  }
297  else if ( is_ident12 && !is_ident23 )
298  {
299  // Apply only to columns 2 and 3.
300 
301  MAC_Apply_G_mx2_asd( m_app,
302  &gamma23,
303  &sigma23,
304  a2, 1,
305  a3, 1 );
306  }
307  else if ( !is_ident12 && !is_ident23 )
308  {
309  // Apply to all three columns.
310 
311  MAC_Apply_G_mx3b_asd( m_app,
312  &gamma12,
313  &sigma12,
314  &gamma23,
315  &sigma23,
316  a1, 1,
317  a2, 1,
318  a3, 1 );
319  }
320  }
321  if ( n_left == 1 )
322  {
323  g23 = buff_G + (g )*rs_G + (k )*cs_G;
324  a2 = buff_A + (g )*cs_A;
325  a3 = buff_A + (g + 1)*cs_A;
326 
327  gamma23 = g23->real;
328  sigma23 = g23->imag;
329 
330  is_ident23 = ( gamma23 == one && sigma23 == zero );
331 
332  m_app = min( i_k + 2 + j - iTL, m_A );
333  m_app = max( m_app, 0 );
334 
335  if ( !is_ident23 )
336  MAC_Apply_G_mx2_asd( m_app,
337  &gamma23,
338  &sigma23,
339  a2, 1,
340  a3, 1 );
341  }
342  }
343 
344  // Shutdown stage
345 
346  for ( j = 1; j < k_G; ++j )
347  {
348  nG_app = k_G - j;
349  n_iter = nG_app / n_fuse;
350  n_left = nG_app % n_fuse;
351 
352  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
353  {
354  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
355  g23 = buff_G + (g )*rs_G + (k )*cs_G;
356  a1 = buff_A + (g - 1)*cs_A;
357  a2 = buff_A + (g )*cs_A;
358  a3 = buff_A + (g + 1)*cs_A;
359 
360  gamma12 = g12->real;
361  sigma12 = g12->imag;
362  gamma23 = g23->real;
363  sigma23 = g23->imag;
364 
365  is_ident12 = ( gamma12 == one && sigma12 == zero );
366  is_ident23 = ( gamma23 == one && sigma23 == zero );
367 
368  m_app = m_A;
369 
370  if ( !is_ident12 && is_ident23 )
371  {
372  // Apply only to columns 1 and 2.
373 
374  MAC_Apply_G_mx2_asd( m_app,
375  &gamma12,
376  &sigma12,
377  a1, 1,
378  a2, 1 );
379  }
380  else if ( is_ident12 && !is_ident23 )
381  {
382  // Apply only to columns 2 and 3.
383 
384  MAC_Apply_G_mx2_asd( m_app,
385  &gamma23,
386  &sigma23,
387  a2, 1,
388  a3, 1 );
389  }
390  else if ( !is_ident12 && !is_ident23 )
391  {
392  // Apply to all three columns.
393 
394  MAC_Apply_G_mx3b_asd( m_app,
395  &gamma12,
396  &sigma12,
397  &gamma23,
398  &sigma23,
399  a1, 1,
400  a2, 1,
401  a3, 1 );
402  }
403  }
404  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
405  if ( n_left == 1 )
406  {
407  g23 = buff_G + (g )*rs_G + (k )*cs_G;
408  a2 = buff_A + (g )*cs_A;
409  a3 = buff_A + (g + 1)*cs_A;
410 
411  gamma23 = g23->real;
412  sigma23 = g23->imag;
413 
414  is_ident23 = ( gamma23 == one && sigma23 == zero );
415 
416  m_app = m_A;
417 
418  if ( !is_ident23 )
419  MAC_Apply_G_mx2_asd( m_app,
420  &gamma23,
421  &sigma23,
422  a2, 1,
423  a3, 1 );
424  }
425  }
426 
427  return FLA_SUCCESS;
428 }
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:164
double imag
Definition: blis_type_defs.h:139
double bl1_d0(void)
Definition: bl1_constants.c:118
double real
Definition: blis_type_defs.h:139
int n_left
Definition: bl1_axmyv2.c:149
int i
Definition: bl1_axmyv2.c:145
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:137

◆ FLA_Apply_G_rf_asm_var6b()

FLA_Error FLA_Apply_G_rf_asm_var6b ( FLA_Obj  G,
FLA_Obj  A 
)

References FLA_Apply_G_rf_asc_var6b(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_ass_var6b(), FLA_Apply_G_rf_asz_var6b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

29 {
30  FLA_Datatype datatype;
31  int k_G, m_A, n_A;
32  int rs_G, cs_G;
33  int rs_A, cs_A;
34 
35  datatype = FLA_Obj_datatype( A );
36 
37  k_G = FLA_Obj_width( G );
38  m_A = FLA_Obj_length( A );
39  n_A = FLA_Obj_width( A );
40 
41  rs_G = FLA_Obj_row_stride( G );
42  cs_G = FLA_Obj_col_stride( G );
43 
44  rs_A = FLA_Obj_row_stride( A );
45  cs_A = FLA_Obj_col_stride( A );
46 
47  switch ( datatype )
48  {
49  case FLA_FLOAT:
50  {
51  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
52  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
53 
55  m_A,
56  n_A,
57  0,
58  0,
59  buff_G, rs_G, cs_G,
60  buff_A, rs_A, cs_A );
61 
62  break;
63  }
64 
65  case FLA_DOUBLE:
66  {
67  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
68  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
69 
71  m_A,
72  n_A,
73  0,
74  0,
75  buff_G, rs_G, cs_G,
76  buff_A, rs_A, cs_A );
77 
78  break;
79  }
80 
81  case FLA_COMPLEX:
82  {
83  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
84  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
85 
87  m_A,
88  n_A,
89  0,
90  0,
91  buff_G, rs_G, cs_G,
92  buff_A, rs_A, cs_A );
93 
94  break;
95  }
96 
97  case FLA_DOUBLE_COMPLEX:
98  {
99  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
100  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
101 
103  m_A,
104  n_A,
105  0,
106  0,
107  buff_G, rs_G, cs_G,
108  buff_A, rs_A, cs_A );
109 
110  break;
111  }
112  }
113 
114  return FLA_SUCCESS;
115 }
FLA_Error FLA_Apply_G_rf_asd_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6b.c:131
FLA_Error FLA_Apply_G_rf_asz_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6b.c:443
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Apply_G_rf_ass_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6b.c:118
FLA_Error FLA_Apply_G_rf_asc_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6b.c:430
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Apply_G_rf_ass_var6b()

FLA_Error FLA_Apply_G_rf_ass_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bls_var6b().

125 {
126  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
127 
128  return FLA_SUCCESS;
129 }

◆ FLA_Apply_G_rf_asz_var6b()

FLA_Error FLA_Apply_G_rf_asz_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

Referenced by FLA_Apply_G_rf_asm_var6b().

450 {
451  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
452 
453  return FLA_SUCCESS;
454 }