libflame  revision_anchor
Functions
FLA_Apply_G_rf_asm_var9b.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_asm_var9b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_asc_var9b()

FLA_Error FLA_Apply_G_rf_asc_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

Referenced by FLA_Apply_G_rf_asm_var9b().

422 {
423  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
424 
425  return FLA_SUCCESS;
426 }

◆ FLA_Apply_G_rf_asd_var9b()

FLA_Error FLA_Apply_G_rf_asd_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bld_var9b().

138 {
139  double one = bl1_d1();
140  double zero = bl1_d0();
141  double gamma12;
142  double sigma12;
143  double gamma23;
144  double sigma23;
145  double* a1;
146  double* a2;
147  double* a3;
148  dcomplex* g12;
149  dcomplex* g23;
150  int i, j, g, k;
151  int nG, nG_app;
152  int n_iter;
153  int n_left;
154  int k_minus_1;
155  int n_fuse;
156  int is_ident12, is_ident23;
157  int m_app;
158 
159 
160  k_minus_1 = k_G - 1;
161  nG = n_A - 1;
162  n_fuse = 2;
163 
164  // Use the simple variant for nG < (k - 1) or k == 1.
165  if ( nG < 2*k_minus_1 || k_G == 1 )
166  {
168  m_A,
169  n_A,
170  buff_G, rs_G, cs_G,
171  buff_A, rs_A, cs_A );
172  return FLA_SUCCESS;
173  }
174 
175 
176  // Start-up phase.
177 
178  for ( j = -1; j < k_minus_1; j += n_fuse )
179  {
180  nG_app = j + 1;
181  n_iter = nG_app;
182  n_left = 1;
183 
184  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
185  {
186  g12 = buff_G + (g )*rs_G + (k )*cs_G;
187  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
188  a1 = buff_A + (g )*cs_A;
189  a2 = buff_A + (g + 1)*cs_A;
190  a3 = buff_A + (g + 2)*cs_A;
191 
192  gamma12 = g12->real;
193  sigma12 = g12->imag;
194  gamma23 = g23->real;
195  sigma23 = g23->imag;
196 
197  is_ident12 = ( gamma12 == one && sigma12 == zero );
198  is_ident23 = ( gamma23 == one && sigma23 == zero );
199 
200  m_app = min( i_k + 3 + j - iTL, m_A );
201  m_app = max( m_app, 0 );
202 
203  if ( !is_ident12 && is_ident23 )
204  {
205  // Apply only to columns 1 and 2.
206 
207  MAC_Apply_G_mx2_asd( m_app,
208  &gamma12,
209  &sigma12,
210  a1, 1,
211  a2, 1 );
212  }
213  else if ( is_ident12 && !is_ident23 )
214  {
215  // Apply only to columns 2 and 3.
216 
217  MAC_Apply_G_mx2_asd( m_app,
218  &gamma23,
219  &sigma23,
220  a2, 1,
221  a3, 1 );
222  }
223  else if ( !is_ident12 && !is_ident23 )
224  {
225  // Apply to all three columns.
226 
227  MAC_Apply_G_mx3_asd( m_app,
228  &gamma12,
229  &sigma12,
230  &gamma23,
231  &sigma23,
232  a1, 1,
233  a2, 1,
234  a3, 1 );
235  }
236  }
237 
238  if ( n_left == 1 )
239  {
240  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
241  a2 = buff_A + (g + 1)*cs_A;
242  a3 = buff_A + (g + 2)*cs_A;
243 
244  gamma23 = g23->real;
245  sigma23 = g23->imag;
246 
247  is_ident23 = ( gamma23 == one && sigma23 == zero );
248 
249  m_app = min( i_k + 3 + j - iTL, m_A );
250  m_app = max( m_app, 0 );
251 
252  if ( !is_ident23 )
253  MAC_Apply_G_mx2_asd( m_app,
254  &gamma23,
255  &sigma23,
256  a2, 1,
257  a3, 1 );
258  }
259  }
260 
261  // Pipeline stage
262 
263  for ( ; j < nG - 1; j += n_fuse )
264  {
265  nG_app = k_G;
266  n_iter = nG_app;
267  n_left = 0;
268 
269  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
270  {
271  g12 = buff_G + (g )*rs_G + (k )*cs_G;
272  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
273  a1 = buff_A + (g )*cs_A;
274  a2 = buff_A + (g + 1)*cs_A;
275  a3 = buff_A + (g + 2)*cs_A;
276 
277  gamma12 = g12->real;
278  sigma12 = g12->imag;
279  gamma23 = g23->real;
280  sigma23 = g23->imag;
281 
282  is_ident12 = ( gamma12 == one && sigma12 == zero );
283  is_ident23 = ( gamma23 == one && sigma23 == zero );
284 
285  m_app = min( i_k + 3 + j - iTL, m_A );
286  m_app = max( m_app, 0 );
287 
288  if ( !is_ident12 && is_ident23 )
289  {
290  // Apply only to columns 1 and 2.
291 
292  MAC_Apply_G_mx2_asd( m_app,
293  &gamma12,
294  &sigma12,
295  a1, 1,
296  a2, 1 );
297  }
298  else if ( is_ident12 && !is_ident23 )
299  {
300  // Apply only to columns 2 and 3.
301 
302  MAC_Apply_G_mx2_asd( m_app,
303  &gamma23,
304  &sigma23,
305  a2, 1,
306  a3, 1 );
307  }
308  else if ( !is_ident12 && !is_ident23 )
309  {
310  // Apply to all three columns.
311 
312  MAC_Apply_G_mx3_asd( m_app,
313  &gamma12,
314  &sigma12,
315  &gamma23,
316  &sigma23,
317  a1, 1,
318  a2, 1,
319  a3, 1 );
320  }
321  }
322  }
323 
324  // Shutdown stage
325 
326  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
327  {
328  g = nG - 1;
329  k = j;
330 
331  n_left = 1;
332  if ( n_left == 1 )
333  {
334  g12 = buff_G + (g )*rs_G + (k )*cs_G;
335  a1 = buff_A + (g )*cs_A;
336  a2 = buff_A + (g + 1)*cs_A;
337 
338  gamma12 = g12->real;
339  sigma12 = g12->imag;
340 
341  is_ident12 = ( gamma12 == one && sigma12 == zero );
342 
343  m_app = m_A;
344 
345  if ( !is_ident12 )
346  MAC_Apply_G_mx2_asd( m_app,
347  &gamma12,
348  &sigma12,
349  a1, 1,
350  a2, 1 );
351  ++k;
352  --g;
353  }
354 
355  nG_app = k_minus_1 - j;
356  n_iter = nG_app;
357 
358  for ( i = 0; i < n_iter; ++i, ++k, --g )
359  {
360  g12 = buff_G + (g )*rs_G + (k )*cs_G;
361  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
362  a1 = buff_A + (g )*cs_A;
363  a2 = buff_A + (g + 1)*cs_A;
364  a3 = buff_A + (g + 2)*cs_A;
365 
366  gamma12 = g12->real;
367  sigma12 = g12->imag;
368  gamma23 = g23->real;
369  sigma23 = g23->imag;
370 
371  is_ident12 = ( gamma12 == one && sigma12 == zero );
372  is_ident23 = ( gamma23 == one && sigma23 == zero );
373 
374  m_app = m_A;
375 
376  if ( !is_ident12 && is_ident23 )
377  {
378  // Apply only to columns 1 and 2.
379 
380  MAC_Apply_G_mx2_asd( m_app,
381  &gamma12,
382  &sigma12,
383  a1, 1,
384  a2, 1 );
385  }
386  else if ( is_ident12 && !is_ident23 )
387  {
388  // Apply only to columns 2 and 3.
389 
390  MAC_Apply_G_mx2_asd( m_app,
391  &gamma23,
392  &sigma23,
393  a2, 1,
394  a3, 1 );
395  }
396  else if ( !is_ident12 && !is_ident23 )
397  {
398  // Apply to all three columns.
399 
400  MAC_Apply_G_mx3_asd( m_app,
401  &gamma12,
402  &sigma12,
403  &gamma23,
404  &sigma23,
405  a1, 1,
406  a2, 1,
407  a3, 1 );
408  }
409  }
410  }
411 
412  return FLA_SUCCESS;
413 }
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:164
double imag
Definition: blis_type_defs.h:139
double bl1_d0(void)
Definition: bl1_constants.c:118
double real
Definition: blis_type_defs.h:139
int n_left
Definition: bl1_axmyv2.c:149
int i
Definition: bl1_axmyv2.c:145
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:137

◆ FLA_Apply_G_rf_asm_var9b()

FLA_Error FLA_Apply_G_rf_asm_var9b ( FLA_Obj  G,
FLA_Obj  A 
)

References FLA_Apply_G_rf_asc_var9b(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_ass_var9b(), FLA_Apply_G_rf_asz_var9b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

29 {
30  FLA_Datatype datatype;
31  int k_G, m_A, n_A;
32  int rs_G, cs_G;
33  int rs_A, cs_A;
34 
35  datatype = FLA_Obj_datatype( A );
36 
37  k_G = FLA_Obj_width( G );
38  m_A = FLA_Obj_length( A );
39  n_A = FLA_Obj_width( A );
40 
41  rs_G = FLA_Obj_row_stride( G );
42  cs_G = FLA_Obj_col_stride( G );
43 
44  rs_A = FLA_Obj_row_stride( A );
45  cs_A = FLA_Obj_col_stride( A );
46 
47  switch ( datatype )
48  {
49  case FLA_FLOAT:
50  {
51  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
52  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
53 
55  m_A,
56  n_A,
57  0,
58  0,
59  buff_G, rs_G, cs_G,
60  buff_A, rs_A, cs_A );
61 
62  break;
63  }
64 
65  case FLA_DOUBLE:
66  {
67  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
68  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
69 
71  m_A,
72  n_A,
73  0,
74  0,
75  buff_G, rs_G, cs_G,
76  buff_A, rs_A, cs_A );
77 
78  break;
79  }
80 
81  case FLA_COMPLEX:
82  {
83  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
84  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
85 
87  m_A,
88  n_A,
89  0,
90  0,
91  buff_G, rs_G, cs_G,
92  buff_A, rs_A, cs_A );
93 
94  break;
95  }
96 
97  case FLA_DOUBLE_COMPLEX:
98  {
99  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
100  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
101 
103  m_A,
104  n_A,
105  0,
106  0,
107  buff_G, rs_G, cs_G,
108  buff_A, rs_A, cs_A );
109 
110  break;
111  }
112  }
113 
114  return FLA_SUCCESS;
115 }
FLA_Error FLA_Apply_G_rf_asz_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9b.c:428
FLA_Error FLA_Apply_G_rf_ass_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9b.c:118
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_G_rf_asc_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9b.c:415
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Apply_G_rf_asd_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9b.c:131
Definition: blis_type_defs.h:137

◆ FLA_Apply_G_rf_ass_var9b()

FLA_Error FLA_Apply_G_rf_ass_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bls_var9b().

125 {
126  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
127 
128  return FLA_SUCCESS;
129 }

◆ FLA_Apply_G_rf_asz_var9b()

FLA_Error FLA_Apply_G_rf_asz_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

Referenced by FLA_Apply_G_rf_asm_var9b().

435 {
436  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
437 
438  return FLA_SUCCESS;
439 }