libflame  revision_anchor
Functions
FLA_Bidiag_UT_realify.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_realify (FLA_Obj A, FLA_Obj d, FLA_Obj e)
 
FLA_Error FLA_Bidiag_UT_l_realify_unb (FLA_Obj A, FLA_Obj d, FLA_Obj e)
 
FLA_Error FLA_Bidiag_UT_l_realify_opt (FLA_Obj A, FLA_Obj d, FLA_Obj e)
 
FLA_Error FLA_Bidiag_UT_u_realify_unb (FLA_Obj A, FLA_Obj d, FLA_Obj e)
 
FLA_Error FLA_Bidiag_UT_u_realify_opt (FLA_Obj A, FLA_Obj d, FLA_Obj e)
 

Function Documentation

◆ FLA_Bidiag_UT_l_realify_opt()

FLA_Error FLA_Bidiag_UT_l_realify_opt ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References bl1_dsetv(), bl1_ssetv(), bl1_zscals(), BLIS1_CONJUGATE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_ONE, FLA_ZERO, i, scomplex::imag, and dcomplex::imag.

Referenced by FLA_Bidiag_UT_realify().

160 {
161  FLA_Datatype datatype;
162  int m_A, n_A;
163  int min_m_n;
164  int rs_A, cs_A;
165  int inc_d;
166  int inc_e;
167  int i;
168 
169  datatype = FLA_Obj_datatype( A );
170 
171  m_A = FLA_Obj_length( A );
172  n_A = FLA_Obj_width( A );
173  min_m_n = FLA_Obj_min_dim( A );
174 
175  rs_A = FLA_Obj_row_stride( A );
176  cs_A = FLA_Obj_col_stride( A );
177 
178  inc_d = FLA_Obj_vector_inc( d );
179 
180  inc_e = FLA_Obj_vector_inc( e );
181 
182 
183  switch ( datatype )
184  {
185  case FLA_FLOAT:
186  {
187  float* buff_d = FLA_FLOAT_PTR( d );
188  float* buff_e = FLA_FLOAT_PTR( e );
189  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
190 
191  bl1_ssetv( min_m_n,
192  buff_1,
193  buff_d, inc_d );
194 
195  bl1_ssetv( min_m_n,
196  buff_1,
197  buff_e, inc_e );
198 
199  break;
200  }
201 
202  case FLA_DOUBLE:
203  {
204  double* buff_d = FLA_DOUBLE_PTR( d );
205  double* buff_e = FLA_DOUBLE_PTR( e );
206  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
207 
208  bl1_dsetv( min_m_n,
209  buff_1,
210  buff_d, inc_d );
211 
212  bl1_dsetv( min_m_n,
213  buff_1,
214  buff_e, inc_e );
215 
216  break;
217  }
218 
219  case FLA_COMPLEX:
220  {
221  scomplex* buff_A = FLA_COMPLEX_PTR( A );
222  scomplex* buff_d = FLA_COMPLEX_PTR( d );
223  scomplex* buff_e = FLA_COMPLEX_PTR( e );
224  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
225  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
226 
227  for ( i = 0; i < min_m_n; ++i )
228  {
229 
230  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
231  scomplex* delta1 = buff_d + (i )*inc_d;
232  scomplex* epsilon1 = buff_e + (i )*inc_e;
233  scomplex absv;
234 
235  int m_ahead = m_A - i - 1;
236  int m_behind = i;
237 
238  if ( m_behind == 0 )
239  {
240  // FLA_Set( FLA_ONE, delta1 );
241  *delta1 = *buff_1;
242  }
243  else
244  {
245  scomplex* a10t_r = buff_A + (i-1)*cs_A + (i )*rs_A;
246  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
247  // FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
248  // FLA_Absolute_value( absv );
249  // FLA_Inv_scal( absv, delta1 );
250  bl1_ccopys( BLIS1_CONJUGATE, a10t_r, delta1 );
251  bl1_cabsval2( a10t_r, &absv );
252  bl1_cinvscals( &absv, delta1 );
253 
254  // FLA_Scalc( FLA_NO_CONJUGATE, delta1, a10t_r );
255  // FLA_Obj_set_imag_part( FLA_ZERO, a10t_r );
256  bl1_cscals( delta1, a10t_r );
257  a10t_r->imag = *buff_0;
258 
259  // FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
260  bl1_cscals( delta1, alpha11 );
261  }
262 
263  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, epsilon1 );
264  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
265  // FLA_Absolute_value( absv );
266  // FLA_Inv_scal( absv, epsilon1 );
267  bl1_ccopys( BLIS1_CONJUGATE, alpha11, epsilon1 );
268  bl1_cabsval2( alpha11, &absv );
269  bl1_cinvscals( &absv, epsilon1 );
270 
271  // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
272  // FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
273  bl1_cscals( epsilon1, alpha11 );
274  alpha11->imag = *buff_0;
275 
276  if ( m_ahead > 0 )
277  {
278  scomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
279  // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a21_t );
280  bl1_cscals( epsilon1, a21_t );
281  }
282  }
283 
284  break;
285  }
286 
287  case FLA_DOUBLE_COMPLEX:
288  {
289  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
290  dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
291  dcomplex* buff_e = FLA_DOUBLE_COMPLEX_PTR( e );
292  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
293  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
294 
295  for ( i = 0; i < min_m_n; ++i )
296  {
297 
298  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
299  dcomplex* delta1 = buff_d + (i )*inc_d;
300  dcomplex* epsilon1 = buff_e + (i )*inc_e;
301  dcomplex absv;
302 
303  int m_ahead = m_A - i - 1;
304  int m_behind = i;
305 
306  if ( m_behind == 0 )
307  {
308  // FLA_Set( FLA_ONE, delta1 );
309  *delta1 = *buff_1;
310  }
311  else
312  {
313  dcomplex* a10t_r = buff_A + (i-1)*cs_A + (i )*rs_A;
314  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
315  // FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
316  // FLA_Absolute_value( absv );
317  // FLA_Inv_scal( absv, delta1 );
318  bl1_zcopys( BLIS1_CONJUGATE, a10t_r, delta1 );
319  bl1_zabsval2( a10t_r, &absv );
320  bl1_zinvscals( &absv, delta1 );
321 
322  // FLA_Scalc( FLA_NO_CONJUGATE, delta1, a10t_r );
323  // FLA_Obj_set_imag_part( FLA_ZERO, a10t_r );
324  bl1_zscals( delta1, a10t_r );
325  a10t_r->imag = *buff_0;
326 
327  // FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
328  bl1_zscals( delta1, alpha11 );
329  }
330 
331  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, epsilon1 );
332  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
333  // FLA_Absolute_value( absv );
334  // FLA_Inv_scal( absv, epsilon1 );
335  bl1_zcopys( BLIS1_CONJUGATE, alpha11, epsilon1 );
336  bl1_zabsval2( alpha11, &absv );
337  bl1_zinvscals( &absv, epsilon1 );
338 
339  // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
340  // FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
341  bl1_zscals( epsilon1, alpha11 );
342  alpha11->imag = *buff_0;
343 
344  if ( m_ahead > 0 )
345  {
346  dcomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
347  // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a21_t );
348  bl1_zscals( epsilon1, a21_t );
349  }
350  }
351 
352  break;
353  }
354  }
355 
356  return FLA_SUCCESS;
357 }
double imag
Definition: blis_type_defs.h:139
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
int i
Definition: bl1_axmyv2.c:145
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
float imag
Definition: blis_type_defs.h:134
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_l_realify_unb()

FLA_Error FLA_Bidiag_UT_l_realify_unb ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_set_imag_part(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), FLA_Set(), and FLA_ZERO.

39 {
40  FLA_Obj ATL, ATR, A00, a01, A02,
41  ABL, ABR, a10t, alpha11, a12t,
42  A20, a21, A22;
43 
44  FLA_Obj dT, d0,
45  dB, delta1,
46  d2;
47 
48  FLA_Obj eT, e0,
49  eB, epsilon1,
50  e2;
51 
52  FLA_Obj a10t_l, a10t_r;
53 
54  FLA_Obj a21_t,
55  a21_b;
56 
57  FLA_Obj absv;
58 
59 
60  FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv );
61 
62  FLA_Part_2x2( A, &ATL, &ATR,
63  &ABL, &ABR, 0, 0, FLA_TL );
64 
65  FLA_Part_2x1( d, &dT,
66  &dB, 0, FLA_TOP );
67 
68  FLA_Part_2x1( e, &eT,
69  &eB, 0, FLA_TOP );
70 
71  while ( FLA_Obj_min_dim( ABR ) > 0 )
72  {
73  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
74  /* ************* */ /* ************************** */
75  &a10t, /**/ &alpha11, &a12t,
76  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
77  1, 1, FLA_BR );
78 
79  FLA_Repart_2x1_to_3x1( dT, &d0,
80  /* ** */ /* ****** */
81  &delta1,
82  dB, &d2, 1, FLA_BOTTOM );
83 
84  FLA_Repart_2x1_to_3x1( eT, &e0,
85  /* ** */ /* ******** */
86  &epsilon1,
87  eB, &e2, 1, FLA_BOTTOM );
88 
89  /*------------------------------------------------------------*/
90 
91  if ( FLA_Obj_width( a10t ) == 0 )
92  {
93  // delta1 = 1;
94  FLA_Set( FLA_ONE, delta1 );
95  }
96  else
97  {
98  FLA_Part_1x2( a10t, &a10t_l, &a10t_r, 1, FLA_RIGHT );
99 
100  // delta1 = conj(a10t_r) / abs(a10t_r);
101  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
102  FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
103  FLA_Absolute_value( absv );
104  FLA_Inv_scal( absv, delta1 );
105 
106  // a10t_r = delta1 * a10t_r;
107  // a10t_r.imag = 0;
108  FLA_Scalc( FLA_NO_CONJUGATE, delta1, a10t_r );
109  FLA_Obj_set_imag_part( FLA_ZERO, a10t_r );
110 
111  // alpha11 = delta1 * alpha11;
112  FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
113  }
114 
115  // epsilon1 = conj(alpha11) / abs(alpha11);
116  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, epsilon1 );
117  FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
118  FLA_Absolute_value( absv );
119  FLA_Inv_scal( absv, epsilon1 );
120 
121  // alpha11 = epsilon1 * alpha11;
122  // alpha11.imag = 0;
123  FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
124  FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
125 
126  if ( FLA_Obj_length( a21 ) > 0 )
127  {
128  FLA_Part_2x1( a21, &a21_t,
129  &a21_b, 1, FLA_TOP );
130 
131  // a21_t = epsilon1 * a21_t;
132  FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a21_t );
133  }
134 
135  /*------------------------------------------------------------*/
136 
137  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
138  a10t, alpha11, /**/ a12t,
139  /* ************** */ /* ************************ */
140  &ABL, /**/ &ABR, A20, a21, /**/ A22,
141  FLA_TL );
142 
143  FLA_Cont_with_3x1_to_2x1( &dT, d0,
144  delta1,
145  /* ** */ /* ****** */
146  &dB, d2, FLA_TOP );
147 
148  FLA_Cont_with_3x1_to_2x1( &eT, e0,
149  epsilon1,
150  /* ** */ /* ******** */
151  &eB, e2, FLA_TOP );
152  }
153 
154  FLA_Obj_free( &absv );
155 
156  return FLA_SUCCESS;
157 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
void FLA_Obj_set_imag_part(FLA_Obj alpha, FLA_Obj beta)
Definition: FLA_Misc.c:229
FLA_Error FLA_Inv_scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scal.c:13
FLA_Error FLA_Absolute_value(FLA_Obj alpha)
Definition: FLA_Absolute_value.c:13
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scalc.c:13
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_realify()

FLA_Error FLA_Bidiag_UT_realify ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References FLA_Bidiag_UT_l_realify_opt(), FLA_Bidiag_UT_realify_check(), FLA_Bidiag_UT_u_realify_opt(), FLA_Check_error_level(), FLA_Obj_is_real(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, and FLA_Set().

Referenced by FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().

14 {
15  FLA_Error r_val = FLA_SUCCESS;
16 
17  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
18  FLA_Bidiag_UT_realify_check( A, d, e );
19 
20  if ( FLA_Obj_is_real( A ) )
21  {
22  FLA_Set( FLA_ONE, d );
23  FLA_Set( FLA_ONE, e );
24  return FLA_SUCCESS;
25  }
26 
27  if ( FLA_Obj_length( A ) < FLA_Obj_width( A ) )
28  //r_val = FLA_Bidiag_UT_l_realify_unb( A, d, e );
29  r_val = FLA_Bidiag_UT_l_realify_opt( A, d, e );
30  else
31  //r_val = FLA_Bidiag_UT_u_realify_unb( A, d, e );
32  r_val = FLA_Bidiag_UT_u_realify_opt( A, d, e );
33 
34  return r_val;
35 }
FLA_Error FLA_Bidiag_UT_realify_check(FLA_Obj A, FLA_Obj d, FLA_Obj e)
Definition: FLA_Bidiag_UT_realify_check.c:13
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
int FLA_Error
Definition: FLA_type_defs.h:47
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Bidiag_UT_l_realify_opt(FLA_Obj A, FLA_Obj d, FLA_Obj e)
Definition: FLA_Bidiag_UT_realify.c:159
FLA_Error FLA_Bidiag_UT_u_realify_opt(FLA_Obj A, FLA_Obj d, FLA_Obj e)
Definition: FLA_Bidiag_UT_realify.c:482
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Bool FLA_Obj_is_real(FLA_Obj A)
Definition: FLA_Query.c:307
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLA_Bidiag_UT_u_realify_opt()

FLA_Error FLA_Bidiag_UT_u_realify_opt ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References bl1_dsetv(), bl1_ssetv(), bl1_zscals(), BLIS1_CONJUGATE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_ONE, FLA_ZERO, i, scomplex::imag, and dcomplex::imag.

Referenced by FLA_Bidiag_UT_realify().

483 {
484  FLA_Datatype datatype;
485  int m_A, n_A;
486  int min_m_n;
487  int rs_A, cs_A;
488  int inc_d;
489  int inc_e;
490  int i;
491 
492  datatype = FLA_Obj_datatype( A );
493 
494  m_A = FLA_Obj_length( A );
495  n_A = FLA_Obj_width( A );
496  min_m_n = FLA_Obj_min_dim( A );
497 
498  rs_A = FLA_Obj_row_stride( A );
499  cs_A = FLA_Obj_col_stride( A );
500 
501  inc_d = FLA_Obj_vector_inc( d );
502 
503  inc_e = FLA_Obj_vector_inc( e );
504 
505 
506  switch ( datatype )
507  {
508  case FLA_FLOAT:
509  {
510  float* buff_d = FLA_FLOAT_PTR( d );
511  float* buff_e = FLA_FLOAT_PTR( e );
512  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
513 
514  bl1_ssetv( min_m_n,
515  buff_1,
516  buff_d, inc_d );
517 
518  bl1_ssetv( min_m_n,
519  buff_1,
520  buff_e, inc_e );
521 
522  break;
523  }
524 
525  case FLA_DOUBLE:
526  {
527  double* buff_d = FLA_DOUBLE_PTR( d );
528  double* buff_e = FLA_DOUBLE_PTR( e );
529  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
530 
531  bl1_dsetv( min_m_n,
532  buff_1,
533  buff_d, inc_d );
534 
535  bl1_dsetv( min_m_n,
536  buff_1,
537  buff_e, inc_e );
538 
539  break;
540  }
541 
542  case FLA_COMPLEX:
543  {
544  scomplex* buff_A = FLA_COMPLEX_PTR( A );
545  scomplex* buff_d = FLA_COMPLEX_PTR( d );
546  scomplex* buff_e = FLA_COMPLEX_PTR( e );
547  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
548  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
549 
550  for ( i = 0; i < min_m_n; ++i )
551  {
552  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
553  scomplex* delta1 = buff_d + (i )*inc_d;
554  scomplex* epsilon1 = buff_e + (i )*inc_e;
555  scomplex absv;
556 
557  int n_ahead = n_A - i - 1;
558  int n_behind = i;
559 
560  if ( n_behind == 0 )
561  {
562  // FLA_Set( FLA_ONE, epsilon1 );
563  *epsilon1 = *buff_1;
564  }
565  else
566  {
567  scomplex* a01_b = buff_A + (i )*cs_A + (i-1)*rs_A;
568  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, epsilon1 );
569  // FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
570  // FLA_Absolute_value( absv );
571  // FLA_Inv_scal( absv, epsilon1 );
572  bl1_ccopys( BLIS1_CONJUGATE, a01_b, epsilon1 );
573  bl1_cabsval2( a01_b, &absv );
574  bl1_cinvscals( &absv, epsilon1 );
575 
576  // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a01_b );
577  // FLA_Obj_set_imag_part( FLA_ZERO, a01_b );
578  bl1_cscals( epsilon1, a01_b );
579  a01_b->imag = *buff_0;
580 
581  // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
582  bl1_cscals( epsilon1, alpha11 );
583  }
584 
585  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, delta1 );
586  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
587  // FLA_Absolute_value( absv );
588  // FLA_Inv_scal( absv, delta1 );
589  bl1_ccopys( BLIS1_CONJUGATE, alpha11, delta1 );
590  bl1_cabsval2( alpha11, &absv );
591  bl1_cinvscals( &absv, delta1 );
592 
593  // FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
594  // FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
595  bl1_cscals( delta1, alpha11 );
596  alpha11->imag = *buff_0;
597 
598  if ( n_ahead > 0 )
599  {
600  scomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
601  // FLA_Scalc( FLA_NO_CONJUGATE, delta1, a12t_l );
602  bl1_cscals( delta1, a12t_l );
603  }
604  }
605 
606  break;
607  }
608 
609  case FLA_DOUBLE_COMPLEX:
610  {
611  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
612  dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
613  dcomplex* buff_e = FLA_DOUBLE_COMPLEX_PTR( e );
614  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
615  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
616 
617  for ( i = 0; i < min_m_n; ++i )
618  {
619  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
620  dcomplex* delta1 = buff_d + (i )*inc_d;
621  dcomplex* epsilon1 = buff_e + (i )*inc_e;
622  dcomplex absv;
623 
624  int n_ahead = n_A - i - 1;
625  int n_behind = i;
626 
627  if ( n_behind == 0 )
628  {
629  // FLA_Set( FLA_ONE, epsilon1 );
630  *epsilon1 = *buff_1;
631  }
632  else
633  {
634  dcomplex* a01_b = buff_A + (i )*cs_A + (i-1)*rs_A;
635  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, epsilon1 );
636  // FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
637  // FLA_Absolute_value( absv );
638  // FLA_Inv_scal( absv, epsilon1 );
639  bl1_zcopys( BLIS1_CONJUGATE, a01_b, epsilon1 );
640  bl1_zabsval2( a01_b, &absv );
641  bl1_zinvscals( &absv, epsilon1 );
642 
643  // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a01_b );
644  // FLA_Obj_set_imag_part( FLA_ZERO, a01_b );
645  bl1_zscals( epsilon1, a01_b );
646  a01_b->imag = *buff_0;
647 
648  // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
649  bl1_zscals( epsilon1, alpha11 );
650  }
651 
652  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, delta1 );
653  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
654  // FLA_Absolute_value( absv );
655  // FLA_Inv_scal( absv, delta1 );
656  bl1_zcopys( BLIS1_CONJUGATE, alpha11, delta1 );
657  bl1_zabsval2( alpha11, &absv );
658  bl1_zinvscals( &absv, delta1 );
659 
660  // FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
661  // FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
662  bl1_zscals( delta1, alpha11 );
663  alpha11->imag = *buff_0;
664 
665  if ( n_ahead > 0 )
666  {
667  dcomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
668  // FLA_Scalc( FLA_NO_CONJUGATE, delta1, a12t_l );
669  bl1_zscals( delta1, a12t_l );
670  }
671  }
672 
673  break;
674  }
675  }
676 
677  return FLA_SUCCESS;
678 }
double imag
Definition: blis_type_defs.h:139
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
int i
Definition: bl1_axmyv2.c:145
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
float imag
Definition: blis_type_defs.h:134
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_u_realify_unb()

FLA_Error FLA_Bidiag_UT_u_realify_unb ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_set_imag_part(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), FLA_Set(), and FLA_ZERO.

362 {
363  FLA_Obj ATL, ATR, A00, a01, A02,
364  ABL, ABR, a10t, alpha11, a12t,
365  A20, a21, A22;
366 
367  FLA_Obj dT, d0,
368  dB, delta1,
369  d2;
370 
371  FLA_Obj eT, e0,
372  eB, epsilon1,
373  e2;
374 
375  FLA_Obj a01_t,
376  a01_b;
377 
378  FLA_Obj a12t_l, a12t_r;
379 
380  FLA_Obj absv;
381 
382 
383  FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv );
384 
385  FLA_Part_2x2( A, &ATL, &ATR,
386  &ABL, &ABR, 0, 0, FLA_TL );
387 
388  FLA_Part_2x1( d, &dT,
389  &dB, 0, FLA_TOP );
390 
391  FLA_Part_2x1( e, &eT,
392  &eB, 0, FLA_TOP );
393 
394  while ( FLA_Obj_min_dim( ABR ) > 0 )
395  {
396  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
397  /* ************* */ /* ************************** */
398  &a10t, /**/ &alpha11, &a12t,
399  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
400  1, 1, FLA_BR );
401 
402  FLA_Repart_2x1_to_3x1( dT, &d0,
403  /* ** */ /* ****** */
404  &delta1,
405  dB, &d2, 1, FLA_BOTTOM );
406 
407  FLA_Repart_2x1_to_3x1( eT, &e0,
408  /* ** */ /* ******** */
409  &epsilon1,
410  eB, &e2, 1, FLA_BOTTOM );
411 
412  /*------------------------------------------------------------*/
413 
414  if ( FLA_Obj_length( a01 ) == 0 )
415  {
416  // epsilon1 = 1;
417  FLA_Set( FLA_ONE, epsilon1 );
418  }
419  else
420  {
421  FLA_Part_2x1( a01, &a01_t,
422  &a01_b, 1, FLA_BOTTOM );
423 
424  // epsilon1 = conj(a01_b) / abs(a01_b);
425  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, epsilon1 );
426  FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
427  FLA_Absolute_value( absv );
428  FLA_Inv_scal( absv, epsilon1 );
429 
430  // a01_b = epsilon1 * a01_b;
431  // a01_b.imag = 0;
432  FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a01_b );
434 
435  // alpha11 = epsilon1 * alpha11;
436  FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
437  }
438 
439  // delta1 = conj(alpha11) / abs(alpha11);
440  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, delta1 );
441  FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
442  FLA_Absolute_value( absv );
443  FLA_Inv_scal( absv, delta1 );
444 
445  // alpha11 = delta1 * alpha11;
446  // alpha11.imag = 0;
447  FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
448  FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
449 
450  if ( FLA_Obj_width( a12t ) > 0 )
451  {
452  FLA_Part_1x2( a12t, &a12t_l, &a12t_r, 1, FLA_LEFT );
453 
454  // a12t_l = delta1 * a12t_l;
455  FLA_Scalc( FLA_NO_CONJUGATE, delta1, a12t_l );
456  }
457 
458  /*------------------------------------------------------------*/
459 
460  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
461  a10t, alpha11, /**/ a12t,
462  /* ************** */ /* ************************ */
463  &ABL, /**/ &ABR, A20, a21, /**/ A22,
464  FLA_TL );
465 
466  FLA_Cont_with_3x1_to_2x1( &dT, d0,
467  delta1,
468  /* ** */ /* ****** */
469  &dB, d2, FLA_TOP );
470 
471  FLA_Cont_with_3x1_to_2x1( &eT, e0,
472  epsilon1,
473  /* ** */ /* ******** */
474  &eB, e2, FLA_TOP );
475  }
476 
477  FLA_Obj_free( &absv );
478 
479  return FLA_SUCCESS;
480 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
void FLA_Obj_set_imag_part(FLA_Obj alpha, FLA_Obj beta)
Definition: FLA_Misc.c:229
FLA_Error FLA_Inv_scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scal.c:13
FLA_Error FLA_Absolute_value(FLA_Obj alpha)
Definition: FLA_Absolute_value.c:13
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scalc.c:13
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153