libflame  revision_anchor
Functions
FLA_Tridiag_UT_l_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Tridiag_UT_l_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ops_var1 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opd_var1 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opc_var1 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opz_var1 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Tridiag_UT_l_opt_var1()

FLA_Error FLA_Tridiag_UT_l_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Tridiag_UT_l_step_opt_var1().

Referenced by FLA_Tridiag_UT_l().

14 {
15  return FLA_Tridiag_UT_l_step_opt_var1( A, T );
16 }
FLA_Error FLA_Tridiag_UT_l_step_opt_var1(FLA_Obj A, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:18

◆ FLA_Tridiag_UT_l_step_opc_var1()

FLA_Error FLA_Tridiag_UT_l_step_opc_var1 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cher2(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

363 {
364  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
365  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
366  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
367  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
368 
369  scomplex first_elem;
370  scomplex beta;
371  scomplex inv_tau11;
372  scomplex minus_inv_tau11;
373  int i;
374 
375  // b_alg = FLA_Obj_length( T );
376  int b_alg = m_T;
377 
378  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
379  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
380  int inc_z = 1;
381 
382  for ( i = 0; i < b_alg; ++i )
383  {
384  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
385  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
386  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
387 
388  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
389  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
390 
391  scomplex* z21 = buff_z + (i+1)*inc_z;
392 
393  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
394  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
395 
396  int m_ahead = m_A - i - 1;
397  int n_behind = i;
398 
399  /*------------------------------------------------------------*/
400 
401  if ( m_ahead > 0 )
402  {
403  // FLA_Househ2_UT( FLA_LEFT,
404  // a21_t,
405  // a21_b, tau11 );
406  FLA_Househ2_UT_l_opc( m_ahead - 1,
407  a21_t,
408  a21_b, rs_A,
409  tau11 );
410 
411  // FLA_Set( FLA_ONE, inv_tau11 );
412  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
413  // FLA_Copy( inv_tau11, minus_inv_tau11 );
414  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
415  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
416  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
417 
418  // FLA_Copy( a21_t, first_elem );
419  // FLA_Set( FLA_ONE, a21_t );
420  first_elem = *a21_t;
421  *a21_t = *buff_1;
422 
423  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
426  m_ahead,
427  buff_1,
428  A22, rs_A, cs_A,
429  a21, rs_A,
430  buff_0,
431  z21, inc_z );
432 
433  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
434  // FLA_Inv_scal( FLA_TWO, beta );
436  m_ahead,
437  a21, rs_A,
438  z21, inc_z,
439  &beta );
440  bl1_cinvscals( buff_2, &beta );
441 
442  // FLA_Scal( minus_inv_tau11, beta );
443  // FLA_Axpy( beta, a21, z21 );
444  // FLA_Scal( inv_tau11, z21 );
445  bl1_cscals( &minus_inv_tau11, &beta );
447  m_ahead,
448  &beta,
449  a21, rs_A,
450  z21, inc_z );
452  m_ahead,
453  &inv_tau11,
454  z21, inc_z );
455 
456  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
459  m_ahead,
460  buff_m1,
461  a21, rs_A,
462  z21, inc_z,
463  A22, rs_A, cs_A );
464 
465  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
468  m_ahead,
469  n_behind,
470  buff_1,
471  A20, rs_A, cs_A,
472  a21, rs_A,
473  buff_0,
474  t01, rs_T );
475 
476  // FLA_Copy( first_elem, a21_t );
477  *a21_t = first_elem;
478  }
479 
480  /*------------------------------------------------------------*/
481 
482  }
483 
484  // FLA_Obj_free( &z );
485  FLA_free( buff_z );
486 
487  return FLA_SUCCESS;
488 }
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_chemv(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_hemv.c:35
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_cher2(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_her2.c:33
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:62
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Tridiag_UT_l_step_opd_var1()

FLA_Error FLA_Tridiag_UT_l_step_opd_var1 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsymv(), bl1_dsyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

232 {
233  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
234  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
235  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
236  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
237 
238  double first_elem;
239  double beta;
240  double inv_tau11;
241  double minus_inv_tau11;
242  int i;
243 
244  // b_alg = FLA_Obj_length( T );
245  int b_alg = m_T;
246 
247  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
248  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
249  int inc_z = 1;
250 
251  for ( i = 0; i < b_alg; ++i )
252  {
253  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
254  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
255  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
256 
257  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
258  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
259 
260  double* z21 = buff_z + (i+1)*inc_z;
261 
262  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
263  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
264 
265  int m_ahead = m_A - i - 1;
266  int n_behind = i;
267 
268  /*------------------------------------------------------------*/
269 
270  if ( m_ahead > 0 )
271  {
272  // FLA_Househ2_UT( FLA_LEFT,
273  // a21_t,
274  // a21_b, tau11 );
275  FLA_Househ2_UT_l_opd( m_ahead - 1,
276  a21_t,
277  a21_b, rs_A,
278  tau11 );
279 
280  // FLA_Set( FLA_ONE, inv_tau11 );
281  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
282  // FLA_Copy( inv_tau11, minus_inv_tau11 );
283  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
284  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
285  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
286 
287  // FLA_Copy( a21_t, first_elem );
288  // FLA_Set( FLA_ONE, a21_t );
289  first_elem = *a21_t;
290  *a21_t = *buff_1;
291 
292  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
294  m_ahead,
295  buff_1,
296  A22, rs_A, cs_A,
297  a21, rs_A,
298  buff_0,
299  z21, inc_z );
300 
301  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
302  // FLA_Inv_scal( FLA_TWO, beta );
304  m_ahead,
305  a21, rs_A,
306  z21, inc_z,
307  &beta );
308  bl1_dinvscals( buff_2, &beta );
309 
310  // FLA_Scal( minus_inv_tau11, beta );
311  // FLA_Axpy( beta, a21, z21 );
312  // FLA_Scal( inv_tau11, z21 );
313  bl1_dscals( &minus_inv_tau11, &beta );
315  m_ahead,
316  &beta,
317  a21, rs_A,
318  z21, inc_z );
320  m_ahead,
321  &inv_tau11,
322  z21, inc_z );
323 
324  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
326  m_ahead,
327  buff_m1,
328  a21, rs_A,
329  z21, inc_z,
330  A22, rs_A, cs_A );
331 
332  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
335  m_ahead,
336  n_behind,
337  buff_1,
338  A20, rs_A, cs_A,
339  a21, rs_A,
340  buff_0,
341  t01, rs_T );
342 
343  // FLA_Copy( first_elem, a21_t );
344  *a21_t = first_elem;
345  }
346 
347  /*------------------------------------------------------------*/
348 
349  }
350 
351  // FLA_Obj_free( &z );
352  FLA_free( buff_z );
353 
354  return FLA_SUCCESS;
355 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_dsyr2(uplo1_t uplo, int m, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_syr2.c:58
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_dsymv(uplo1_t uplo, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_symv.c:56
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:62
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Tridiag_UT_l_step_ops_var1()

FLA_Error FLA_Tridiag_UT_l_step_ops_var1 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssymv(), bl1_ssyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float beta;
109  float inv_tau11;
110  float minus_inv_tau11;
111  int i;
112 
113  // b_alg = FLA_Obj_length( T );
114  int b_alg = m_T;
115 
116  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
117  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
118  int inc_z = 1;
119 
120  for ( i = 0; i < b_alg; ++i )
121  {
122  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
123  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
124  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
125 
126  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
127  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
128 
129  float* z21 = buff_z + (i+1)*inc_z;
130 
131  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
132  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
133 
134  int m_ahead = m_A - i - 1;
135  int n_behind = i;
136 
137  /*------------------------------------------------------------*/
138 
139  if ( m_ahead > 0 )
140  {
141  // FLA_Househ2_UT( FLA_LEFT,
142  // a21_t,
143  // a21_b, tau11 );
144  FLA_Househ2_UT_l_ops( m_ahead - 1,
145  a21_t,
146  a21_b, rs_A,
147  tau11 );
148 
149  // FLA_Set( FLA_ONE, inv_tau11 );
150  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
151  // FLA_Copy( inv_tau11, minus_inv_tau11 );
152  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
153  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
154  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
155 
156  // FLA_Copy( a21_t, first_elem );
157  // FLA_Set( FLA_ONE, a21_t );
158  first_elem = *a21_t;
159  *a21_t = *buff_1;
160 
161  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
163  m_ahead,
164  buff_1,
165  A22, rs_A, cs_A,
166  a21, rs_A,
167  buff_0,
168  z21, inc_z );
169 
170  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
171  // FLA_Inv_scal( FLA_TWO, beta );
173  m_ahead,
174  a21, rs_A,
175  z21, inc_z,
176  &beta );
177  bl1_sinvscals( buff_2, &beta );
178 
179  // FLA_Scal( minus_inv_tau11, beta );
180  // FLA_Axpy( beta, a21, z21 );
181  // FLA_Scal( inv_tau11, z21 );
182  bl1_sscals( &minus_inv_tau11, &beta );
184  m_ahead,
185  &beta,
186  a21, rs_A,
187  z21, inc_z );
189  m_ahead,
190  &inv_tau11,
191  z21, inc_z );
192 
193  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
195  m_ahead,
196  buff_m1,
197  a21, rs_A,
198  z21, inc_z,
199  A22, rs_A, cs_A );
200 
201  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
204  m_ahead,
205  n_behind,
206  buff_1,
207  A20, rs_A, cs_A,
208  a21, rs_A,
209  buff_0,
210  t01, rs_T );
211 
212  // FLA_Copy( first_elem, a21_t );
213  *a21_t = first_elem;
214  }
215 
216  /*------------------------------------------------------------*/
217 
218  }
219 
220  // FLA_Obj_free( &z );
221  FLA_free( buff_z );
222 
223  return FLA_SUCCESS;
224 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_ssymv(uplo1_t uplo, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_symv.c:13
void bl1_ssyr2(uplo1_t uplo, int m, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_syr2.c:13
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:62
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Tridiag_UT_l_step_opt_var1()

FLA_Error FLA_Tridiag_UT_l_step_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_ops_var1(), and FLA_Tridiag_UT_l_step_opz_var1().

Referenced by FLA_Tridiag_UT_l_blk_var1(), and FLA_Tridiag_UT_l_opt_var1().

19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Tridiag_UT_l_step_ops_var1(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:97
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Tridiag_UT_l_step_opc_var1(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:359
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
FLA_Error FLA_Tridiag_UT_l_step_opz_var1(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:492
FLA_Error FLA_Tridiag_UT_l_step_opd_var1(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:228
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Tridiag_UT_l_step_opz_var1()

FLA_Error FLA_Tridiag_UT_l_step_opz_var1 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zher2(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

496 {
497  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
498  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
499  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
500  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
501 
502  dcomplex first_elem;
503  dcomplex beta;
504  dcomplex inv_tau11;
505  dcomplex minus_inv_tau11;
506  int i;
507 
508  // b_alg = FLA_Obj_length( T );
509  int b_alg = m_T;
510 
511  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
512  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
513  int inc_z = 1;
514 
515  for ( i = 0; i < b_alg; ++i )
516  {
517  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
518  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
519  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
520 
521  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
522  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
523 
524  dcomplex* z21 = buff_z + (i+1)*inc_z;
525 
526  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
527  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
528 
529  int m_ahead = m_A - i - 1;
530  int n_behind = i;
531 
532  /*------------------------------------------------------------*/
533 
534  if ( m_ahead > 0 )
535  {
536  // FLA_Househ2_UT( FLA_LEFT,
537  // a21_t,
538  // a21_b, tau11 );
539  FLA_Househ2_UT_l_opz( m_ahead - 1,
540  a21_t,
541  a21_b, rs_A,
542  tau11 );
543 
544  // FLA_Set( FLA_ONE, inv_tau11 );
545  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
546  // FLA_Copy( inv_tau11, minus_inv_tau11 );
547  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
548  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
549  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
550 
551  // FLA_Copy( a21_t, first_elem );
552  // FLA_Set( FLA_ONE, a21_t );
553  first_elem = *a21_t;
554  *a21_t = *buff_1;
555 
556  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
559  m_ahead,
560  buff_1,
561  A22, rs_A, cs_A,
562  a21, rs_A,
563  buff_0,
564  z21, inc_z );
565 
566  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
567  // FLA_Inv_scal( FLA_TWO, beta );
569  m_ahead,
570  a21, rs_A,
571  z21, inc_z,
572  &beta );
573  bl1_zinvscals( buff_2, &beta );
574 
575  // FLA_Scal( minus_inv_tau11, beta );
576  // FLA_Axpy( beta, a21, z21 );
577  // FLA_Scal( inv_tau11, z21 );
578  bl1_zscals( &minus_inv_tau11, &beta );
580  m_ahead,
581  &beta,
582  a21, rs_A,
583  z21, inc_z );
585  m_ahead,
586  &inv_tau11,
587  z21, inc_z );
588 
589  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
592  m_ahead,
593  buff_m1,
594  a21, rs_A,
595  z21, inc_z,
596  A22, rs_A, cs_A );
597 
598  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
601  m_ahead,
602  n_behind,
603  buff_1,
604  A20, rs_A, cs_A,
605  a21, rs_A,
606  buff_0,
607  t01, rs_T );
608 
609  // FLA_Copy( first_elem, a21_t );
610  *a21_t = first_elem;
611  }
612 
613  /*------------------------------------------------------------*/
614 
615  }
616 
617  // FLA_Obj_free( &z );
618  FLA_free( buff_z );
619 
620  return FLA_SUCCESS;
621 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
int i
Definition: bl1_axmyv2.c:145
void bl1_zhemv(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_hemv.c:134
void bl1_zher2(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_her2.c:121
Definition: blis_type_defs.h:62
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60