libflame  revision_anchor
Functions
FLA_SA_LU_unb.c File Reference

(r)

Functions

FLA_Error FLA_SA_LU_unb (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L)
 

Function Documentation

◆ FLA_SA_LU_unb()

FLA_Error FLA_SA_LU_unb ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  L 
)

References bl1_camax(), bl1_ccopy(), bl1_cger(), bl1_cscal(), bl1_cswap(), bl1_damax(), bl1_dcopy(), bl1_dger(), bl1_dscal(), bl1_dswap(), bl1_samax(), bl1_scopy(), bl1_sger(), bl1_sscal(), bl1_sswap(), bl1_zamax(), bl1_zcopy(), bl1_zger(), bl1_zscal(), bl1_zswap(), BLIS1_NO_CONJUGATE, FLA_Copy_external(), FLA_MINUS_ONE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Triangularize(), scomplex::imag, dcomplex::imag, scomplex::real, dcomplex::real, and temp.

Referenced by FLA_SA_LU_blk().

14 {
15  FLA_Datatype datatype;
16  int m_U, cs_U;
17  int m_D, cs_D;
18  int cs_L;
19  // int rs_U;
20  int rs_D;
21  // int rs_L;
22  int m_U_min_j, m_U_min_j_min_1;
23  int j, ipiv;
24  int* buff_p;
25 
26  if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS;
27 
28  datatype = FLA_Obj_datatype( U );
29 
30  m_U = FLA_Obj_length( U );
31  // rs_U = FLA_Obj_row_stride( U );
32  cs_U = FLA_Obj_col_stride( U );
33 
34  m_D = FLA_Obj_length( D );
35  rs_D = FLA_Obj_row_stride( D );
36  cs_D = FLA_Obj_col_stride( D );
37 
38  // rs_L = FLA_Obj_row_stride( L );
39  cs_L = FLA_Obj_col_stride( L );
40 
41  FLA_Copy_external( U, L );
42  FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L );
43 
44  buff_p = ( int * ) FLA_INT_PTR( p );
45 
46  switch ( datatype ){
47 
48  case FLA_FLOAT:
49  {
50  float* buff_U = ( float * ) FLA_FLOAT_PTR( U );
51  float* buff_D = ( float * ) FLA_FLOAT_PTR( D );
52  float* buff_L = ( float * ) FLA_FLOAT_PTR( L );
53  float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE );
54  float L_tmp;
55  float D_tmp;
56  float d_inv_Ljj;
57 
58  for ( j = 0; j < m_U; ++j )
59  {
60  bl1_samax( m_D,
61  buff_D + j*cs_D + 0*rs_D,
62  rs_D,
63  &ipiv );
64 
65  L_tmp = buff_L[ j*cs_L + j ];
66  D_tmp = buff_D[ j*cs_D + ipiv ];
67 
68  if ( fabsf( L_tmp ) < fabsf( D_tmp ) )
69  {
70  bl1_sswap( m_U,
71  buff_L + 0*cs_L + j, cs_L,
72  buff_D + 0*cs_D + ipiv, cs_D );
73 
74  buff_p[ j ] = ipiv + m_U - j;
75  }
76  else
77  {
78  buff_p[ j ] = 0;
79  }
80 
81  d_inv_Ljj = 1.0F / buff_L[ j*cs_L + j ];
82 
83  bl1_sscal( m_D,
84  &d_inv_Ljj,
85  buff_D + j*cs_D + 0, rs_D );
86 
87  m_U_min_j_min_1 = m_U - j - 1;
88 
89  if ( m_U_min_j_min_1 > 0 )
90  {
93  m_D,
94  m_U_min_j_min_1,
95  buff_minus1,
96  buff_D + (j+0)*cs_D + 0, rs_D,
97  buff_L + (j+1)*cs_L + j, cs_L,
98  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
99  }
100 
101  m_U_min_j = m_U - j;
102 
103  if ( m_U_min_j > 0 )
104  {
105  bl1_scopy( m_U_min_j,
106  buff_L + j*cs_L + j, cs_L,
107  buff_U + j*cs_U + j, cs_U );
108  }
109  }
110  break;
111  }
112 
113  case FLA_DOUBLE:
114  {
115  double* buff_U = ( double * ) FLA_DOUBLE_PTR( U );
116  double* buff_D = ( double * ) FLA_DOUBLE_PTR( D );
117  double* buff_L = ( double * ) FLA_DOUBLE_PTR( L );
118  double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE );
119  double L_tmp;
120  double D_tmp;
121  double d_inv_Ljj;
122 
123  for ( j = 0; j < m_U; ++j )
124  {
125  bl1_damax( m_D,
126  buff_D + j*cs_D + 0*rs_D,
127  rs_D,
128  &ipiv );
129 
130  L_tmp = buff_L[ j*cs_L + j ];
131  D_tmp = buff_D[ j*cs_D + ipiv ];
132 
133  if ( fabs( L_tmp ) < fabs( D_tmp ) )
134  {
135  bl1_dswap( m_U,
136  buff_L + 0*cs_L + j, cs_L,
137  buff_D + 0*cs_D + ipiv, cs_D );
138 
139  buff_p[ j ] = ipiv + m_U - j;
140  }
141  else
142  {
143  buff_p[ j ] = 0;
144  }
145 
146  d_inv_Ljj = 1.0 / buff_L[ j*cs_L + j ];
147 
148  bl1_dscal( m_D,
149  &d_inv_Ljj,
150  buff_D + j*cs_D + 0, rs_D );
151 
152  m_U_min_j_min_1 = m_U - j - 1;
153 
154  if ( m_U_min_j_min_1 > 0 )
155  {
158  m_D,
159  m_U_min_j_min_1,
160  buff_minus1,
161  buff_D + (j+0)*cs_D + 0, rs_D,
162  buff_L + (j+1)*cs_L + j, cs_L,
163  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
164  }
165 
166  m_U_min_j = m_U - j;
167 
168  if ( m_U_min_j > 0 )
169  {
170  bl1_dcopy( m_U_min_j,
171  buff_L + j*cs_L + j, cs_L,
172  buff_U + j*cs_U + j, cs_U );
173  }
174  }
175  break;
176  }
177 
178  case FLA_COMPLEX:
179  {
180  scomplex* buff_U = ( scomplex * ) FLA_COMPLEX_PTR( U );
181  scomplex* buff_D = ( scomplex * ) FLA_COMPLEX_PTR( D );
182  scomplex* buff_L = ( scomplex * ) FLA_COMPLEX_PTR( L );
183  scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE );
184  scomplex L_tmp;
185  scomplex D_tmp;
186  scomplex d_inv_Ljj;
187  scomplex Ljj;
188  float temp;
189 
190  for ( j = 0; j < m_U; ++j )
191  {
192  bl1_camax( m_D,
193  buff_D + j*cs_D + 0*rs_D,
194  rs_D,
195  &ipiv );
196 
197  L_tmp = buff_L[ j*cs_L + j ];
198  D_tmp = buff_D[ j*cs_D + ipiv ];
199 
200  if ( fabsf( L_tmp.real + L_tmp.imag ) < fabsf( D_tmp.real + D_tmp.imag ) )
201  {
202  bl1_cswap( m_U,
203  buff_L + 0*cs_L + j, cs_L,
204  buff_D + 0*cs_D + ipiv, cs_D );
205 
206  buff_p[ j ] = ipiv + m_U - j;
207  }
208  else
209  {
210  buff_p[ j ] = 0;
211  }
212 
213  Ljj = buff_L[ j*cs_L + j ];
214 
215  // d_inv_Ljj = 1.0 / Ljj
216  temp = 1.0F / ( Ljj.real * Ljj.real +
217  Ljj.imag * Ljj.imag );
218  d_inv_Ljj.real = Ljj.real * temp;
219  d_inv_Ljj.imag = Ljj.imag * -temp;
220 
221  bl1_cscal( m_D,
222  &d_inv_Ljj,
223  buff_D + j*cs_D + 0, rs_D );
224 
225  m_U_min_j_min_1 = m_U - j - 1;
226 
227  if ( m_U_min_j_min_1 > 0 )
228  {
231  m_D,
232  m_U_min_j_min_1,
233  buff_minus1,
234  buff_D + (j+0)*cs_D + 0, rs_D,
235  buff_L + (j+1)*cs_L + j, cs_L,
236  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
237  }
238 
239  m_U_min_j = m_U - j;
240 
241  if ( m_U_min_j > 0 )
242  {
243  bl1_ccopy( m_U_min_j,
244  buff_L + j*cs_L + j, cs_L,
245  buff_U + j*cs_U + j, cs_U );
246  }
247  }
248  break;
249  }
250 
251  case FLA_DOUBLE_COMPLEX:
252  {
253  dcomplex* buff_U = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U );
254  dcomplex* buff_D = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D );
255  dcomplex* buff_L = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( L );
256  dcomplex* buff_minus1 = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
257  dcomplex L_tmp;
258  dcomplex D_tmp;
259  dcomplex d_inv_Ljj;
260  dcomplex Ljj;
261  double temp;
262 
263  for ( j = 0; j < m_U; ++j )
264  {
265  bl1_zamax( m_D,
266  buff_D + j*cs_D + 0*rs_D,
267  rs_D,
268  &ipiv );
269 
270  L_tmp = buff_L[ j*cs_L + j ];
271  D_tmp = buff_D[ j*cs_D + ipiv ];
272 
273  if ( fabs( L_tmp.real + L_tmp.imag ) < fabs( D_tmp.real + D_tmp.imag ) )
274  {
275  bl1_zswap( m_U,
276  buff_L + 0*cs_L + j, cs_L,
277  buff_D + 0*cs_D + ipiv, cs_D );
278 
279  buff_p[ j ] = ipiv + m_U - j;
280  }
281  else
282  {
283  buff_p[ j ] = 0;
284  }
285 
286  Ljj = buff_L[ j*cs_L + j ];
287 
288  // d_inv_Ljj = 1.0 / Ljj
289  temp = 1.0 / ( Ljj.real * Ljj.real +
290  Ljj.imag * Ljj.imag );
291  d_inv_Ljj.real = Ljj.real * temp;
292  d_inv_Ljj.imag = Ljj.imag * -temp;
293 
294  bl1_zscal( m_D,
295  &d_inv_Ljj,
296  buff_D + j*cs_D + 0, rs_D );
297 
298  m_U_min_j_min_1 = m_U - j - 1;
299 
300  if ( m_U_min_j_min_1 > 0 )
301  {
304  m_D,
305  m_U_min_j_min_1,
306  buff_minus1,
307  buff_D + (j+0)*cs_D + 0, rs_D,
308  buff_L + (j+1)*cs_L + j, cs_L,
309  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
310  }
311 
312  m_U_min_j = m_U - j;
313 
314  if ( m_U_min_j > 0 )
315  {
316  bl1_zcopy( m_U_min_j,
317  buff_L + j*cs_L + j, cs_L,
318  buff_U + j*cs_U + j, cs_U );
319  }
320  }
321  break;
322  }
323 
324  }
325 
326  return FLA_SUCCESS;
327 }
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
float real
Definition: blis_type_defs.h:134
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
double imag
Definition: blis_type_defs.h:139
Definition: blis_type_defs.h:81
void bl1_zamax(int n, dcomplex *x, int incx, int *index)
Definition: bl1_amax.c:46
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dcomplex temp
Definition: bl1_axpyv2b.c:301
void bl1_camax(int n, scomplex *x, int incx, int *index)
Definition: bl1_amax.c:35
double real
Definition: blis_type_defs.h:139
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy_external.c:13
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
void bl1_damax(int n, double *x, int incx, int *index)
Definition: bl1_amax.c:24
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
FLA_Bool FLA_Obj_has_zero_dim(FLA_Obj A)
Definition: FLA_Query.c:400
void bl1_dswap(int n, double *x, int incx, double *y, int incy)
Definition: bl1_swap.c:26
Definition: blis_type_defs.h:132
void bl1_sscal(int n, float *alpha, float *x, int incx)
Definition: bl1_scal.c:13
void bl1_dscal(int n, double *alpha, double *x, int incx)
Definition: bl1_scal.c:26
void bl1_zscal(int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scal.c:78
void bl1_scopy(int m, float *x, int incx, float *y, int incy)
Definition: bl1_copy.c:13
void bl1_ccopy(int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copy.c:39
void bl1_cswap(int n, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_swap.c:39
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition: FLA_Triangularize.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
void bl1_zcopy(int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copy.c:52
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
void bl1_samax(int n, float *x, int incx, int *index)
Definition: bl1_amax.c:13
void bl1_zswap(int n, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_swap.c:52
void bl1_dcopy(int m, double *x, int incx, double *y, int incy)
Definition: bl1_copy.c:26
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
void bl1_sswap(int n, float *x, int incx, float *y, int incy)
Definition: bl1_swap.c:13
void bl1_cscal(int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scal.c:52
float imag
Definition: blis_type_defs.h:134
Definition: blis_type_defs.h:137