libflame  revision_anchor
Functions
FLA_QR_UT.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_QR_UT (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_QR_UT_internal (FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
 
FLA_Error FLA_QR_UT_copy_internal (FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t *cntl)
 
FLA_Error FLA_QR_UT_create_T (FLA_Obj A, FLA_Obj *T)
 
FLA_Error FLA_QR_UT_recover_tau (FLA_Obj T, FLA_Obj tau)
 
FLA_Error FLA_QR_UT_solve (FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X)
 
FLA_Error FLASH_QR_UT (FLA_Obj A, FLA_Obj TW)
 
FLA_Error FLASH_QR_UT_create_hier_matrices (FLA_Obj A_flat, dim_t depth, dim_t *b_flash, FLA_Obj *A, FLA_Obj *TW)
 
FLA_Error FLASH_QR_UT_solve (FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X)
 
FLA_Error FLA_QR_UT_form_Q (FLA_Obj A, FLA_Obj T, FLA_Obj Q)
 
FLA_Error FLA_QR_UT_form_Q_blk_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj W)
 
FLA_Error FLA_QR_UT_form_Q_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_QR_UT_form_Q_ops_var1 (int m_A, int n_AT, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_QR_UT_form_Q_opd_var1 (int m_A, int n_AT, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_QR_UT_form_Q_opc_var1 (int m_A, int n_AT, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_QR_UT_form_Q_opz_var1 (int m_A, int n_AT, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_QR_UT()

FLA_Error FLA_QR_UT ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Check_error_level(), FLA_QR_UT_check(), and FLA_QR_UT_internal().

16 {
17  FLA_Error r_val;
18 
19  // Check parameters.
20  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
21  FLA_QR_UT_check( A, T );
22 
23  // Invoke FLA_QR_UT_internal() with the standard control tree.
24  //r_val = FLA_QR_UT_internal( A, T, fla_qrut_cntl2 );
25  r_val = FLA_QR_UT_internal( A, T, fla_qrut_cntl_leaf );
26 
27  return r_val;
28 }
fla_qrut_t * fla_qrut_cntl_leaf
Definition: FLA_QR_UT_cntl_init.c:16
FLA_Error FLA_QR_UT_check(FLA_Obj A, FLA_Obj T)
Definition: FLA_QR_UT_check.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Error FLA_QR_UT_internal(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_internal.c:17

◆ FLA_QR_UT_copy_internal()

FLA_Error FLA_QR_UT_copy_internal ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  U,
fla_qrut_t cntl 
)

References FLA_Check_error_level(), FLA_QR_UT_copy_internal_check(), FLA_QR_UT_copy_task(), and FLASH_Queue_get_enabled().

Referenced by FLA_QR_UT_inc_blk_var2().

14 {
15  FLA_Error r_val = FLA_SUCCESS;
16 
17  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
18  FLA_QR_UT_copy_internal_check( A, T, U, cntl );
19 
21  {
22  // Enqueue task.
23  ENQUEUE_FLASH_QR_UT_copy( *FLASH_OBJ_PTR_AT( A ),
24  *FLASH_OBJ_PTR_AT( T ),
25  *FLASH_OBJ_PTR_AT( U ),
26  NULL );
27  }
28  else
29  {
30  // Execute task immediately.
31  FLA_QR_UT_copy_task( *FLASH_OBJ_PTR_AT( A ),
32  *FLASH_OBJ_PTR_AT( T ),
33  *FLASH_OBJ_PTR_AT( U ),
34  NULL );
35  }
36 
37  return r_val;
38 }
FLA_Error FLA_QR_UT_copy_task(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t *cntl)
Definition: FLA_QR_UT_copy_task.c:15
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Bool FLASH_Queue_get_enabled(void)
Definition: FLASH_Queue.c:171
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Error FLA_QR_UT_copy_internal_check(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t *cntl)
Definition: FLA_QR_UT_copy_internal_check.c:13

◆ FLA_QR_UT_create_T()

FLA_Error FLA_QR_UT_create_T ( FLA_Obj  A,
FLA_Obj T 
)

References FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), FLA_Obj_width(), and FLA_Query_blocksize().

Referenced by FLA_Random_unitary_matrix(), FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().

14 {
15  FLA_Datatype datatype;
16  dim_t b_alg, k;
17  dim_t rs_T, cs_T;
18 
19  // Query the datatype of A.
20  datatype = FLA_Obj_datatype( A );
21 
22  // Query the blocksize from the library.
23  b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN );
24 
25  // Scale the blocksize by a pre-set global constant.
26  b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_QR_INNER_TO_OUTER_B_RATIO );
27 
28  // Adjust the blocksize with respect to the min-dim of A.
29  b_alg = min(b_alg, FLA_Obj_min_dim( A ));
30 
31  // Query the width of A.
32  k = FLA_Obj_width( A );
33 
34  // Figure out whether T should be row-major or column-major.
35  if ( FLA_Obj_row_stride( A ) == 1 )
36  {
37  rs_T = 1;
38  cs_T = b_alg;
39  }
40  else // if ( FLA_Obj_col_stride( A ) == 1 )
41  {
42  rs_T = k;
43  cs_T = 1;
44  }
45 
46  // Create a b_alg x k matrix to hold the block Householder transforms that
47  // will be accumulated within the QR factorization algorithm.
48  FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, T );
49 
50  return FLA_SUCCESS;
51 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
unsigned long dim_t
Definition: FLA_type_defs.h:71
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Query_blocksize(FLA_Datatype dt, FLA_Dimension dim)
Definition: FLA_Blocksize.c:161
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_QR_UT_form_Q()

FLA_Error FLA_QR_UT_form_Q ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  Q 
)

References FLA_Apply_Q_UT(), FLA_Apply_Q_UT_create_workspace_side(), FLA_Check_error_level(), FLA_Obj_free(), FLA_Obj_is_overlapped(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_QR_UT_form_Q_blk_var1(), FLA_QR_UT_form_Q_check(), FLA_Set_diag(), FLA_Set_to_identity(), FLA_Setr(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_form_U_ext(), FLA_LQ_UT_form_Q(), FLA_Random_unitary_matrix(), FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), FLA_Svd_uv_unb_var2(), and FLA_Tridiag_UT_form_Q().

14 {
15  FLA_Error r_val = FLA_SUCCESS;
16  FLA_Obj QTL, QTR,
17  QBL, QBR;
18  FLA_Obj W;
19  dim_t b;
20 
21  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
22  FLA_QR_UT_form_Q_check( A, T, Q );
23 
24  if ( FLA_Obj_is_overlapped( A, Q ) == FALSE )
25  {
26  // If A and Q are different objects, Q is explicitly formed with A.
27 
28  // Set Q identify
30 
31  // Q = H_{0} H_{1} ... H_{k-1}
32  FLA_Apply_Q_UT_create_workspace_side( FLA_LEFT, T, Q, &W );
33  r_val = FLA_Apply_Q_UT( FLA_LEFT, FLA_NO_TRANSPOSE,
34  FLA_FORWARD, FLA_COLUMNWISE,
35  A, T, W, Q );
36  FLA_Obj_free( &W );
37 
38  }
39  else
40  {
41  // If A and Q are the same objects, Q is formed in-place.
42  // - even if A and Q has the same base, they may have different
43  // dimensions.
44  // - width of T controls the loop in FLA_QR_UT_form_Q_blk_var1.
45 
46  // Zero out the upper triangle of Q.
47  FLA_Setr( FLA_UPPER_TRIANGULAR, FLA_ZERO, Q );
48 
49  // Adjust T w.r.t A; W is a place holder.
50  if ( FLA_Obj_width( T ) > FLA_Obj_width( A ) )
51  FLA_Part_1x2( T, &T, &W,
52  FLA_Obj_width( A ),
53  FLA_LEFT );
54 
55  // Zero out the lower triangle of QBR
56  if ( FLA_Obj_width( Q ) > FLA_Obj_width( T ) )
57  {
58  b = FLA_Obj_width( T );
59  FLA_Part_2x2( Q, &QTL, &QTR,
60  &QBL, &QBR, b, b, FLA_TL );
61  FLA_Setr( FLA_LOWER_TRIANGULAR, FLA_ZERO, QBR );
62  }
63 
64  // Set the digaonal to one.
65  FLA_Set_diag( FLA_ONE, Q );
66 
67  // Create workspace for applying the block Householder transforms.
68  FLA_Apply_Q_UT_create_workspace_side( FLA_LEFT, T, Q, &W );
69 
70  // Overwrite Q, which currently contains Householder vectors in the
71  // strictly lower triangle and identity in the upper triangle, with
72  // the unitary matrix associated with those Householder transforms.
73  r_val = FLA_QR_UT_form_Q_blk_var1( Q, T, W );
74 
75  // Free the temporary workspace.
76  FLA_Obj_free( &W );
77  }
78  /*
79  FLA_Apply_Q_UT_create_workspace( T, Q, &W );
80  FLA_Set_to_identity( Q );
81  FLA_Apply_Q_UT( FLA_LEFT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
82  A, T, W, Q );
83  FLA_Obj_free( &W );
84  FLA_Obj_show( "Q", Q, "%8.1e %8.1e ", "" );
85  */
86 
87  return r_val;
88 }
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Setr(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Setr.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Error FLA_QR_UT_form_Q_blk_var1(FLA_Obj A, FLA_Obj T, FLA_Obj W)
Definition: FLA_QR_UT_form_Q.c:90
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Bool FLA_Obj_is_overlapped(FLA_Obj A, FLA_Obj B)
Definition: FLA_Query.c:488
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Apply_Q_UT_create_workspace_side(FLA_Side side, FLA_Obj T, FLA_Obj B, FLA_Obj *W)
Definition: FLA_Apply_Q_UT_create_workspace.c:28
FLA_Error FLA_Set_diag(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set_diag.c:13
FLA_Error FLA_Apply_Q_UT(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B)
Definition: FLA_Apply_Q_UT.c:16
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Error FLA_QR_UT_form_Q_check(FLA_Obj A, FLA_Obj T, FLA_Obj Q)
Definition: FLA_QR_UT_form_Q_check.c:13
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Set_to_identity(FLA_Obj A)
Definition: FLA_Set_to_identity.c:13

◆ FLA_QR_UT_form_Q_blk_var1()

FLA_Error FLA_QR_UT_form_Q_blk_var1 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  W 
)

References FLA_Apply_Q_UT(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Merge_2x1(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_form_Q_opt_var1(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_QR_UT_form_Q().

91 {
92  FLA_Obj ATL, ATR, A00, A01, A02,
93  ABL, ABR, A10, A11, A12,
94  A20, A21, A22;
95 
96  FLA_Obj TL, TR, T0, T1, T2;
97 
98  FLA_Obj T1T,
99  T2B;
100 
101  FLA_Obj WTL, WTR,
102  WBL, WBR;
103 
104  FLA_Obj AB1, AB2;
105 
106  dim_t b, b_alg;
107  dim_t m_BR, n_BR;
108 
109  b_alg = FLA_Obj_length( T );
110 
111 
112  // If A is wider than T, then we need to position ourseves carefully
113  // within the matrix for the initial partitioning.
114  if ( FLA_Obj_width( A ) > FLA_Obj_width( T ) )
115  {
116  m_BR = FLA_Obj_length( A ) - FLA_Obj_width( T );
117  n_BR = FLA_Obj_width( A ) - FLA_Obj_width( T );
118  }
119  else
120  {
121  m_BR = FLA_Obj_length( A ) - FLA_Obj_width( A );
122  n_BR = 0;
123  }
124 
125  FLA_Part_2x2( A, &ATL, &ATR,
126  &ABL, &ABR, m_BR, n_BR, FLA_BR );
127 
128  FLA_Part_1x2( T, &TL, &TR, 0, FLA_RIGHT );
129 
130  while ( /* FLA_Obj_min_dim( ATL ) > 0 && */ FLA_Obj_width( TL ) > 0 )
131  {
132  b = min( b_alg, FLA_Obj_min_dim( ATL ) );
133 
134  // Since T was filled from left to right, and since we need to access them
135  // in reverse order, we need to handle the case where the last block is
136  // smaller than the other b x b blocks.
137  if ( FLA_Obj_width( TR ) == 0 && FLA_Obj_width( T ) % b_alg > 0 )
138  b = FLA_Obj_width( T ) % b_alg;
139 
140  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
141  &A10, &A11, /**/ &A12,
142  /* ************* */ /* ******************** */
143  ABL, /**/ ABR, &A20, &A21, /**/ &A22,
144  b, b, FLA_TL );
145 
146  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, &T1, /**/ &T2,
147  b, FLA_LEFT );
148 
149  /*------------------------------------------------------------*/
150 
151  FLA_Part_2x1( T1, &T1T,
152  &T2B, b, FLA_TOP );
153 
154  FLA_Part_2x2( W, &WTL, &WTR,
155  &WBL, &WBR, b, FLA_Obj_width( A12 ), FLA_TL );
156 
157  // Use an unblocked algorithm for the first (or only) block.
158  if ( FLA_Obj_length( ABR ) == 0 )
159  {
160  FLA_QR_UT_form_Q_opt_var1( A11, T1T );
161  }
162  else
163  {
164  FLA_Merge_2x1( A11,
165  A21, &AB1 );
166  FLA_Merge_2x1( A12,
167  A22, &AB2 );
168 
169  // Apply the block Householder transforms to A12 and A22.
170  FLA_Apply_Q_UT( FLA_LEFT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
171  AB1, T1T, WTL, AB2 );
172 
173  // Apply H to the current block panel consisting of A11 and A21.
174  FLA_QR_UT_form_Q_opt_var1( AB1, T1T );
175  }
176 
177  /*------------------------------------------------------------*/
178 
179  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
180  /* ************** */ /* ****************** */
181  A10, /**/ A11, A12,
182  &ABL, /**/ &ABR, A20, /**/ A21, A22,
183  FLA_BR );
184 
185  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, /**/ T1, T2,
186  FLA_RIGHT );
187  }
188 
189  return FLA_SUCCESS;
190 }
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Apply_Q_UT(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B)
Definition: FLA_Apply_Q_UT.c:16
FLA_Error FLA_QR_UT_form_Q_opt_var1(FLA_Obj A, FLA_Obj T)
Definition: FLA_QR_UT_form_Q.c:193
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_QR_UT_form_Q_opc_var1()

FLA_Error FLA_QR_UT_form_Q_opc_var1 ( int  m_A,
int  n_AT,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)

References bl1_c0(), bl1_c1(), bl1_cscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), i, scomplex::imag, and scomplex::real.

Referenced by FLA_QR_UT_form_Q_opt_var1().

373 {
374  scomplex zero = bl1_c0();
375  scomplex one = bl1_c1();
376  int min_m_n = min( m_A, n_A );
377  int i;
378 
379  for ( i = min_m_n - 1; i >= 0; --i )
380  {
381  //scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
382  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
383  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
384  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
385  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
386 
387  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
388 
389  scomplex minus_inv_tau11;
390 
391  //int m_behind = i;
392  int n_ahead = n_A - i - 1;
393  int m_ahead = m_A - i - 1;
394 
396  n_ahead,
397  tau11,
398  a21, rs_A,
399  a12t, cs_A,
400  A22, rs_A, cs_A );
401 
402  minus_inv_tau11.real = -one.real / tau11->real;
403  minus_inv_tau11.imag = zero.imag;
404 
405  alpha11->real = one.real + minus_inv_tau11.real;
406  alpha11->imag = zero.imag;
407 
409  m_ahead,
410  &minus_inv_tau11,
411  a21, rs_A );
412 
413  // Not necessary if upper triangle of A is initialized to identity.
414  //bl1_csetv( m_behind,
415  // &zero,
416  // a01, rs_A );
417  }
418 
419  return FLA_SUCCESS;
420 }
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
float real
Definition: blis_type_defs.h:134
Definition: blis_type_defs.h:81
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
scomplex bl1_c0(void)
Definition: bl1_constants.c:125
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:269
int i
Definition: bl1_axmyv2.c:145
float imag
Definition: blis_type_defs.h:134

◆ FLA_QR_UT_form_Q_opd_var1()

FLA_Error FLA_QR_UT_form_Q_opd_var1 ( int  m_A,
int  n_AT,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)

References bl1_d1(), bl1_dscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), and i.

Referenced by FLA_QR_UT_form_Q_opt_var1().

322 {
323  double one = bl1_d1();
324  int min_m_n = min( m_A, n_A );
325  int i;
326 
327  for ( i = min_m_n - 1; i >= 0; --i )
328  {
329  //double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
330  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
331  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
332  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
333  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
334 
335  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
336 
337  double minus_inv_tau11;
338 
339  //int m_behind = i;
340  int n_ahead = n_A - i - 1;
341  int m_ahead = m_A - i - 1;
342 
344  n_ahead,
345  tau11,
346  a21, rs_A,
347  a12t, cs_A,
348  A22, rs_A, cs_A );
349 
350  minus_inv_tau11 = -one / *tau11;
351 
352  *alpha11 = one + minus_inv_tau11;
353 
355  m_ahead,
356  &minus_inv_tau11,
357  a21, rs_A );
358 
359  // Not necessary if upper triangle of A is initialized to identity.
360  //bl1_dsetv( m_behind,
361  // &zero,
362  // a01, rs_A );
363  }
364 
365  return FLA_SUCCESS;
366 }
Definition: blis_type_defs.h:81
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:195
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
int i
Definition: bl1_axmyv2.c:145
double bl1_d1(void)
Definition: bl1_constants.c:54

◆ FLA_QR_UT_form_Q_ops_var1()

FLA_Error FLA_QR_UT_form_Q_ops_var1 ( int  m_A,
int  n_AT,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)

References bl1_d1(), bl1_sscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), and i.

Referenced by FLA_QR_UT_form_Q_opt_var1().

272 {
273  float one = bl1_d1();
274  int min_m_n = min( m_A, n_A );
275  int i;
276 
277  for ( i = min_m_n - 1; i >= 0; --i )
278  {
279  //float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
280  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
281  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
282  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
283  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
284 
285  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
286 
287  float minus_inv_tau11;
288 
289  //int m_behind = i;
290  int n_ahead = n_A - i - 1;
291  int m_ahead = m_A - i - 1;
292 
294  n_ahead,
295  tau11,
296  a21, rs_A,
297  a12t, cs_A,
298  A22, rs_A, cs_A );
299 
300  minus_inv_tau11 = -one / *tau11;
301 
302  *alpha11 = one + minus_inv_tau11;
303 
305  m_ahead,
306  &minus_inv_tau11,
307  a21, rs_A );
308 
309  // Not necessary if upper triangle of A is initialized to identity.
310  //bl1_ssetv( m_behind,
311  // &zero,
312  // a01, rs_A );
313  }
314 
315  return FLA_SUCCESS;
316 }
Definition: blis_type_defs.h:81
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:121
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13
int i
Definition: bl1_axmyv2.c:145
double bl1_d1(void)
Definition: bl1_constants.c:54

◆ FLA_QR_UT_form_Q_opt_var1()

FLA_Error FLA_QR_UT_form_Q_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_QR_UT_form_Q_opc_var1(), FLA_QR_UT_form_Q_opd_var1(), FLA_QR_UT_form_Q_ops_var1(), and FLA_QR_UT_form_Q_opz_var1().

Referenced by FLA_QR_UT_form_Q_blk_var1().

194 {
195  FLA_Datatype datatype;
196  int m_A, n_A;
197  int rs_A, cs_A;
198  int rs_T, cs_T;
199 
200  datatype = FLA_Obj_datatype( A );
201 
202  m_A = FLA_Obj_length( A );
203  n_A = FLA_Obj_width( A );
204  rs_A = FLA_Obj_row_stride( A );
205  cs_A = FLA_Obj_col_stride( A );
206 
207  rs_T = FLA_Obj_row_stride( T );
208  cs_T = FLA_Obj_col_stride( T );
209 
210  switch ( datatype )
211  {
212  case FLA_FLOAT:
213  {
214  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
215  float* buff_T = ( float* ) FLA_FLOAT_PTR( T );
216 
218  n_A,
219  buff_A, rs_A, cs_A,
220  buff_T, rs_T, cs_T );
221 
222  break;
223  }
224 
225  case FLA_DOUBLE:
226  {
227  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
228  double* buff_T = ( double* ) FLA_DOUBLE_PTR( T );
229 
231  n_A,
232  buff_A, rs_A, cs_A,
233  buff_T, rs_T, cs_T );
234 
235  break;
236  }
237 
238  case FLA_COMPLEX:
239  {
240  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
241  scomplex* buff_T = ( scomplex* ) FLA_COMPLEX_PTR( T );
242 
244  n_A,
245  buff_A, rs_A, cs_A,
246  buff_T, rs_T, cs_T );
247 
248  break;
249  }
250 
251  case FLA_DOUBLE_COMPLEX:
252  {
253  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
254  dcomplex* buff_T = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( T );
255 
257  n_A,
258  buff_A, rs_A, cs_A,
259  buff_T, rs_T, cs_T );
260 
261  break;
262  }
263  }
264 
265  return FLA_SUCCESS;
266 }
FLA_Error FLA_QR_UT_form_Q_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_QR_UT_form_Q.c:318
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_QR_UT_form_Q_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_QR_UT_form_Q.c:422
FLA_Error FLA_QR_UT_form_Q_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_QR_UT_form_Q.c:369
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_QR_UT_form_Q_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_QR_UT_form_Q.c:268
Definition: blis_type_defs.h:137

◆ FLA_QR_UT_form_Q_opz_var1()

FLA_Error FLA_QR_UT_form_Q_opz_var1 ( int  m_A,
int  n_AT,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)

References bl1_z0(), bl1_z1(), bl1_zscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), i, dcomplex::imag, and dcomplex::real.

Referenced by FLA_QR_UT_form_Q_opt_var1().

426 {
427  dcomplex zero = bl1_z0();
428  dcomplex one = bl1_z1();
429  int min_m_n = min( m_A, n_A );
430  int i;
431 
432  for ( i = min_m_n - 1; i >= 0; --i )
433  {
434  //dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
435  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
436  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
437  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
438  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
439 
440  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
441 
442  dcomplex minus_inv_tau11;
443 
444  //int m_behind = i;
445  int n_ahead = n_A - i - 1;
446  int m_ahead = m_A - i - 1;
447 
449  n_ahead,
450  tau11,
451  a21, rs_A,
452  a12t, cs_A,
453  A22, rs_A, cs_A );
454 
455  minus_inv_tau11.real = -one.real / tau11->real;
456  minus_inv_tau11.imag = zero.imag;
457 
458  alpha11->real = one.real + minus_inv_tau11.real;
459  alpha11->imag = zero.imag;
460 
462  m_ahead,
463  &minus_inv_tau11,
464  a21, rs_A );
465 
466  // Not necessary if upper triangle of A is initialized to identity.
467  //bl1_zsetv( m_behind,
468  // &zero,
469  // a01, rs_A );
470  }
471 
472  return FLA_SUCCESS;
473 }
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:343
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72
double imag
Definition: blis_type_defs.h:139
Definition: blis_type_defs.h:81
double real
Definition: blis_type_defs.h:139
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ FLA_QR_UT_internal()

FLA_Error FLA_QR_UT_internal ( FLA_Obj  A,
FLA_Obj  T,
fla_qrut_t cntl 
)

References FLA_Check_error_level(), FLA_QR_UT_blk_var1(), FLA_QR_UT_blk_var2(), FLA_QR_UT_blk_var3(), FLA_QR_UT_internal_check(), FLA_QR_UT_macro_task(), FLA_QR_UT_opt_var1(), FLA_QR_UT_opt_var2(), FLA_QR_UT_unb_var1(), FLA_QR_UT_unb_var2(), and FLASH_Queue_get_enabled().

Referenced by FLA_QR_UT(), FLA_QR_UT_blk_var1(), FLA_QR_UT_blk_var2(), FLA_QR_UT_blk_var3(), FLA_QR_UT_copy_task(), FLA_QR_UT_inc_blk_var1(), FLA_QR_UT_macro_task(), FLA_QR_UT_task(), and FLASH_QR_UT().

18 {
19  FLA_Error r_val = FLA_SUCCESS;
20 
21  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
22  FLA_QR_UT_internal_check( A, T, cntl );
23 
24  if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER &&
25  FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM )
26  {
27  if ( FLASH_Queue_get_enabled( ) )
28  {
29  // Enqueue
30  ENQUEUE_FLASH_QR_UT_macro( A, *FLASH_OBJ_PTR_AT( T ), cntl );
31  }
32  else
33  {
34  // Execute
35  r_val = FLA_QR_UT_macro_task( A, *FLASH_OBJ_PTR_AT( T ), cntl );
36  }
37  }
38  else
39  {
40  if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 )
41  {
42  r_val = FLA_QR_UT_unb_var1( A, T );
43  }
44  else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 )
45  {
46  r_val = FLA_QR_UT_opt_var1( A, T );
47  }
48  else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 )
49  {
50  r_val = FLA_QR_UT_blk_var1( A, T, cntl );
51  }
52  else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 )
53  {
54  r_val = FLA_QR_UT_unb_var2( A, T );
55  }
56  else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 )
57  {
58  r_val = FLA_QR_UT_opt_var2( A, T );
59  }
60  else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 )
61  {
62  r_val = FLA_QR_UT_blk_var2( A, T, cntl );
63  }
64  else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 )
65  {
66  r_val = FLA_QR_UT_blk_var3( A, T, cntl );
67  }
68  else
69  {
70  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
71  }
72  }
73 
74  return r_val;
75 }
FLA_Error FLA_QR_UT_internal_check(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_internal_check.c:13
FLA_Error FLA_QR_UT_opt_var1(FLA_Obj A, FLA_Obj t)
Definition: FLA_QR_UT_opt_var1.c:13
FLA_Error FLA_QR_UT_unb_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_QR_UT_unb_var2.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Error FLA_QR_UT_unb_var1(FLA_Obj A, FLA_Obj t)
Definition: FLA_QR_UT_unb_var1.c:13
FLA_Bool FLASH_Queue_get_enabled(void)
Definition: FLASH_Queue.c:171
FLA_Error FLA_QR_UT_blk_var1(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_blk_var1.c:13
FLA_Error FLA_QR_UT_macro_task(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_macro_task.c:15
FLA_Error FLA_QR_UT_opt_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_QR_UT_opt_var2.c:13
FLA_Error FLA_QR_UT_blk_var3(FLA_Obj A, FLA_Obj TW, fla_qrut_t *cntl)
Definition: FLA_QR_UT_blk_var3.c:13
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Error FLA_QR_UT_blk_var2(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_blk_var2.c:13

◆ FLA_QR_UT_recover_tau()

FLA_Error FLA_QR_UT_recover_tau ( FLA_Obj  T,
FLA_Obj  tau 
)

References FLA_Check_error_level(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_QR_UT_recover_tau_check(), FLA_QR_UT_recover_tau_submatrix(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x1_to_3x1().

Referenced by FLA_LQ_UT_recover_tau().

16 {
17  FLA_Obj TL, TR, T0, T1, T2;
18 
19  FLA_Obj tT, t0,
20  tB, t1,
21  t2;
22 
23  dim_t b_alg, b;
24 
25  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
27 
28  b_alg = FLA_Obj_length( T );
29 
30  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
31 
32  FLA_Part_2x1( t, &tT,
33  &tB, 0, FLA_TOP );
34 
35  // T matrix may include workspace; thus, T should not be placed as a loop guard.
36  while ( FLA_Obj_length( tB ) > 0 ) {
37 
38  // The blocksize should be determined that the T matrix is square.
39  b = min( FLA_Obj_length( tB ), b_alg );
40 
41  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
42  b, FLA_RIGHT );
43 
44  FLA_Repart_2x1_to_3x1( tT, &t0,
45  /* ** */ /* ** */
46  &t1,
47  tB, &t2, b, FLA_BOTTOM );
48 
49  /*------------------------------------------------------------*/
50 
52 
53  /*------------------------------------------------------------*/
54 
55  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
56  FLA_LEFT );
57 
58  FLA_Cont_with_3x1_to_2x1( &tT, t0,
59  t1,
60  /* ** */ /* ** */
61  &tB, t2, FLA_TOP );
62  }
63 
64  return FLA_SUCCESS;
65 }
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
Definition: FLA_type_defs.h:158
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_QR_UT_recover_tau_check(FLA_Obj T, FLA_Obj tau)
Definition: FLA_QR_UT_recover_tau_check.c:13
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_QR_UT_recover_tau_submatrix(FLA_Obj T, FLA_Obj t)
Definition: FLA_QR_UT_recover_tau.c:68
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLA_QR_UT_solve()

FLA_Error FLA_QR_UT_solve ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  B,
FLA_Obj  X 
)

References FLA_Apply_Q_UT(), FLA_Apply_Q_UT_create_workspace(), FLA_Check_error_level(), FLA_Copy_external(), FLA_Obj_create_copy_of(), FLA_Obj_free(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_QR_UT_solve_check(), and FLA_Trsm_external().

14 {
15  FLA_Obj W, Y;
16  FLA_Obj AT, AB;
17  FLA_Obj YT, YB;
18 
19  // Check parameters.
20  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
21  FLA_QR_UT_solve_check( A, T, B, X );
22 
24 
25  FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &Y );
26 
27  FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
28  A, T, W, Y );
29 
30  FLA_Part_2x1( A, &AT,
31  &AB, FLA_Obj_width( A ), FLA_TOP );
32  FLA_Part_2x1( Y, &YT,
33  &YB, FLA_Obj_width( A ), FLA_TOP );
34 
35  FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,
36  FLA_NONUNIT_DIAG, FLA_ONE, AT, YT );
37 
38  FLA_Copy_external( YT, X );
39 
40  FLA_Obj_free( &Y );
41  FLA_Obj_free( &W );
42 
43  return FLA_SUCCESS;
44 }
FLA_Error FLA_QR_UT_solve_check(FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X)
Definition: FLA_QR_UT_solve_check.c:13
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Obj_create_copy_of(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition: FLA_Obj.c:345
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy_external.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Apply_Q_UT(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B)
Definition: FLA_Apply_Q_UT.c:16
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Error FLA_Trsm_external(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Trsm_external.c:13
FLA_Error FLA_Apply_Q_UT_create_workspace(FLA_Obj T, FLA_Obj B, FLA_Obj *W)
Definition: FLA_Apply_Q_UT_create_workspace.c:13

◆ FLASH_QR_UT()

FLA_Error FLASH_QR_UT ( FLA_Obj  A,
FLA_Obj  TW 
)

References FLA_Abort(), FLA_Check_error_level(), FLA_Print_message(), FLA_QR_UT_check(), FLA_QR_UT_internal(), FLASH_Obj_depth(), FLASH_Obj_scalar_length_tl(), FLASH_Obj_scalar_min_dim(), FLASH_Obj_scalar_width_tl(), FLASH_Queue_begin(), and FLASH_Queue_end().

17 {
18  FLA_Error r_val;
19  dim_t b_alg, b_flash;
20 
21  // Check parameters.
22  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
23  FLA_QR_UT_check( A, TW );
24 
25  // *** The current hierarchical QR_UT algorithm assumes that the matrix
26  // has a hierarchical depth of 1. We check for that here, because we
27  // anticipate that we'll use a more general algorithm in the future, and
28  // we don't want to forget to remove the constraint. ***
29  if ( FLASH_Obj_depth( A ) != 1 )
30  {
31  FLA_Print_message( "FLASH_QR_UT() currently only supports matrices of depth 1",
32  __FILE__, __LINE__ );
33  FLA_Abort();
34  }
35 
36  // Inspect the length of TTL to get the blocksize used by the QR
37  // factorization, which will be our inner blocksize for Apply_Q_UT.
38  b_alg = FLASH_Obj_scalar_length_tl( TW );
39  b_flash = FLASH_Obj_scalar_width_tl( TW );
40 
41  // The traditional (non-incremental) QR_UT algorithm-by-blocks requires
42  // that the algorithmic blocksize be equal to the storage blocksize.
43  if ( b_alg != b_flash )
44  {
45  FLA_Print_message( "FLASH_QR_UT() requires that b_alg == b_store",
46  __FILE__, __LINE__ );
47  FLA_Abort();
48  }
49 
50  // The traditional (non-incremental) QR_UT algorithm-by-blocks requires
51  // that min_dim(A) % b_flash == 0.
52  if ( FLASH_Obj_scalar_min_dim( A ) % b_flash != 0 )
53  {
54  FLA_Print_message( "FLASH_QR_UT() requires that min_dim( A ) %% b_store == 0",
55  __FILE__, __LINE__ );
56  FLA_Abort();
57  }
58 
59  // Begin a parallel region.
61 
62  // Invoke FLA_QR_UT_internal() with hierarchical control tree.
63  r_val = FLA_QR_UT_internal( A, TW, flash_qrut_cntl );
64 
65  // End the parallel region.
67 
68  return r_val;
69 }
void FLASH_Queue_end(void)
Definition: FLASH_Queue.c:81
unsigned long dim_t
Definition: FLA_type_defs.h:71
dim_t FLASH_Obj_depth(FLA_Obj H)
Definition: FLASH_Obj.c:20
FLA_Error FLA_QR_UT_check(FLA_Obj A, FLA_Obj T)
Definition: FLA_QR_UT_check.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
fla_qrut_t * flash_qrut_cntl
Definition: FLASH_QR_UT_cntl_init.c:16
void FLASH_Queue_begin(void)
Definition: FLASH_Queue.c:59
dim_t FLASH_Obj_scalar_width_tl(FLA_Obj H)
Definition: FLASH_View.c:737
void FLA_Abort(void)
Definition: FLA_Error.c:248
void FLA_Print_message(char *str, char *file, int line)
Definition: FLA_Error.c:234
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
dim_t FLASH_Obj_scalar_min_dim(FLA_Obj H)
Definition: FLASH_View.c:675
dim_t FLASH_Obj_scalar_length_tl(FLA_Obj H)
Definition: FLASH_View.c:723
FLA_Error FLA_QR_UT_internal(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_internal.c:17

◆ FLASH_QR_UT_create_hier_matrices()

FLA_Error FLASH_QR_UT_create_hier_matrices ( FLA_Obj  A_flat,
dim_t  depth,
dim_t b_flash,
FLA_Obj A,
FLA_Obj TW 
)

References FLA_Abort(), FLA_Obj_datatype(), FLA_Obj_min_dim(), FLA_Print_message(), FLASH_Obj_create_ext(), and FLASH_Obj_create_hier_copy_of_flat().

14 {
15  FLA_Datatype datatype;
16  dim_t m, n;
17  dim_t min_m_n;
18 
19  // *** The current QR_UT algorithm implemented assumes that
20  // the matrix has a hierarchical depth of 1. We check for that here
21  // because we anticipate that we'll use a more general algorithm in the
22  // future, and we don't want to forget to remove the constraint. ***
23  if ( depth != 1 )
24  {
25  FLA_Print_message( "FLASH_QR_UT() currently only supports matrices of depth 1",
26  __FILE__, __LINE__ );
27  FLA_Abort();
28  }
29 
30  // Create hierarchical copy of matrix A_flat.
31  FLASH_Obj_create_hier_copy_of_flat( A_flat, depth, b_flash, A );
32 
33  // Query the datatype of matrix A_flat.
34  datatype = FLA_Obj_datatype( A_flat );
35 
36  // Query the minimum dimension of A_flat.
37  min_m_n = FLA_Obj_min_dim( A_flat );
38 
39  // Set the m and n dimensions of TW to be min_m_n.
40  m = min_m_n;
41  n = min_m_n;
42 
43  // Create hierarchical matrices T and W.
44  FLASH_Obj_create_ext( datatype, m, n,
45  depth, b_flash, b_flash,
46  TW );
47 
48  return FLA_SUCCESS;
49 }
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLASH_Obj_create_ext(FLA_Datatype datatype, dim_t m, dim_t n, dim_t depth, dim_t *b_m, dim_t *b_n, FLA_Obj *H)
Definition: FLASH_Obj.c:151
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
void FLA_Abort(void)
Definition: FLA_Error.c:248
void FLA_Print_message(char *str, char *file, int line)
Definition: FLA_Error.c:234
FLA_Error FLASH_Obj_create_hier_copy_of_flat(FLA_Obj F, dim_t depth, dim_t *b_mn, FLA_Obj *H)
Definition: FLASH_Obj.c:591
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLASH_QR_UT_solve()

FLA_Error FLASH_QR_UT_solve ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  B,
FLA_Obj  X 
)

References FLA_Check_error_level(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_QR_UT_solve_check(), FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_create_workspace(), FLASH_Copy(), FLASH_Obj_create_copy_of(), FLASH_Obj_free(), and FLASH_Trsm().

14 {
15  FLA_Obj W, Y;
16  FLA_Obj AT, AB;
17  FLA_Obj YT, YB;
18 
19  // Check parameters.
20  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
21  FLA_QR_UT_solve_check( A, TW, B, X );
22 
24 
25  FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &Y );
26 
27  FLASH_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
28  A, TW, W, Y );
29 
30  FLA_Part_2x1( A, &AT,
31  &AB, FLA_Obj_width( A ), FLA_TOP );
32  FLA_Part_2x1( Y, &YT,
33  &YB, FLA_Obj_width( A ), FLA_TOP );
34 
35  FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
36  FLA_ONE, AT, YT );
37 
38  FLASH_Copy( YT, X );
39 
40  FLASH_Obj_free( &Y );
41  FLASH_Obj_free( &W );
42 
43  return FLA_SUCCESS;
44 }
FLA_Error FLA_QR_UT_solve_check(FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X)
Definition: FLA_QR_UT_solve_check.c:13
FLA_Error FLASH_Obj_create_copy_of(FLA_Trans trans, FLA_Obj H_cur, FLA_Obj *H_new)
Definition: FLASH_Obj.c:561
void FLASH_Obj_free(FLA_Obj *H)
Definition: FLASH_Obj.c:638
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLASH_Apply_Q_UT_create_workspace(FLA_Obj TW, FLA_Obj B, FLA_Obj *W)
Definition: FLASH_Apply_Q_UT_create_workspace.c:13
FLA_Error FLASH_Apply_Q_UT(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B)
Definition: FLASH_Apply_Q_UT.c:16
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Error FLASH_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLASH_Copy.c:15
FLA_Error FLASH_Trsm(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLASH_Trsm.c:15