libflame  revision_anchor
Functions
FLA_QR_UT_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_QR_UT_unb_var1 (FLA_Obj A, FLA_Obj t)
 
FLA_Error FLA_QR_UT_blk_var1 (FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
 
FLA_Error FLA_QR_UT_opt_var1 (FLA_Obj A, FLA_Obj t)
 
FLA_Error FLA_QR_UT_ops_var1 (int m_A, int n_A, float *A, int rs_A, int cs_A, float *t, int inc_t)
 
FLA_Error FLA_QR_UT_opd_var1 (int m_A, int n_A, double *A, int rs_A, int cs_A, double *t, int inc_t)
 
FLA_Error FLA_QR_UT_opc_var1 (int m_A, int n_A, scomplex *A, int rs_A, int cs_A, scomplex *t, int inc_t)
 
FLA_Error FLA_QR_UT_opz_var1 (int m_A, int n_A, dcomplex *A, int rs_A, int cs_A, dcomplex *t, int inc_t)
 
FLA_Error FLA_QR_UT_unb_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_QR_UT_blk_var2 (FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
 
FLA_Error FLA_QR_UT_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_QR_UT_ops_var2 (int m_A, int n_A, float *A, int rs_A, int cs_A, float *T, int rs_T, int cs_T)
 
FLA_Error FLA_QR_UT_opd_var2 (int m_A, int n_A, double *A, int rs_A, int cs_A, double *T, int rs_T, int cs_T)
 
FLA_Error FLA_QR_UT_opc_var2 (int m_A, int n_A, scomplex *A, int rs_A, int cs_A, scomplex *T, int rs_T, int cs_T)
 
FLA_Error FLA_QR_UT_opz_var2 (int m_A, int n_A, dcomplex *A, int rs_A, int cs_A, dcomplex *T, int rs_T, int cs_T)
 
FLA_Error FLA_QR_UT_blk_var3 (FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
 

Function Documentation

◆ FLA_QR_UT_blk_var1()

FLA_Error FLA_QR_UT_blk_var1 ( FLA_Obj  A,
FLA_Obj  T,
fla_qrut_t cntl 
)

References FLA_Apply_Q_UT_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Merge_2x1(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_internal(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_QR_UT_internal().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj TL, TR, T0, T1, W12;
20 
21  FLA_Obj T1T, T2B;
22 
23  FLA_Obj AB1, AB2;
24 
25  dim_t b_alg, b;
26 
27  // Query the algorithmic blocksize by inspecting the length of T.
28  b_alg = FLA_Obj_length( T );
29 
30  FLA_Part_2x2( A, &ATL, &ATR,
31  &ABL, &ABR, 0, 0, FLA_TL );
32 
33  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
34 
35  while ( FLA_Obj_min_dim( ABR ) > 0 ){
36 
37  b = min( b_alg, FLA_Obj_min_dim( ABR ) );
38 
39  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
40  /* ************* */ /* ******************** */
41  &A10, /**/ &A11, &A12,
42  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
43  b, b, FLA_BR );
44 
45  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
46  b, FLA_RIGHT );
47 
48  /*------------------------------------------------------------*/
49 
50  FLA_Part_2x1( T1, &T1T,
51  &T2B, b, FLA_TOP );
52 
53  FLA_Merge_2x1( A11,
54  A21, &AB1 );
55 
56  // Perform a QR factorization via the UT transform on AB1:
57  //
58  // / A11 \ -> QB1 R11
59  // \ A21 /
60  //
61  // where:
62  // - QB1 is formed from UB1 (which is stored column-wise below the
63  // diagonal of AB1) and T11 (which is stored to the upper triangle
64  // of T11).
65  // - R11 is stored to the upper triangle of AB1.
66 
67  FLA_QR_UT_internal( AB1, T1T,
68  FLA_Cntl_sub_qrut( cntl ) );
69 
70 
71  if ( FLA_Obj_width( A12 ) > 0 )
72  {
73  FLA_Merge_2x1( A12,
74  A22, &AB2 );
75 
76  // Apply the Householder transforms associated with UB1 and T11 to
77  // AB2:
78  //
79  // / A12 \ := QB1' / A12 \
80  // \ A22 / \ A22 /
81  //
82  // where QB1 is formed from UB1 and T11.
83 
84  FLA_Apply_Q_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
85  AB1, T1T, W12, AB2,
86  FLA_Cntl_sub_apqut( cntl ) );
87  }
88 
89  /*------------------------------------------------------------*/
90 
91  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
92  A10, A11, /**/ A12,
93  /* ************** */ /* ****************** */
94  &ABL, /**/ &ABR, A20, A21, /**/ A22,
95  FLA_TL );
96 
97  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
98  FLA_LEFT );
99  }
100 
101  return FLA_SUCCESS;
102 }
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Error FLA_Apply_Q_UT_internal(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t *cntl)
Definition: FLA_Apply_Q_UT_internal.c:17
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_QR_UT_internal(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_internal.c:17
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_QR_UT_blk_var2()

FLA_Error FLA_QR_UT_blk_var2 ( FLA_Obj  A,
FLA_Obj  T,
fla_qrut_t cntl 
)

References FLA_Apply_Q_UT_internal(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Determine_blocksize(), FLA_Gemm_external(), FLA_Merge_2x1(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x2(), FLA_QR_UT_internal(), FLA_Repart_2x2_to_3x3(), and FLA_Trmm_external().

Referenced by FLA_QR_UT_internal().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj TTL, TTR, T00, T01, T02,
20  TBL, TBR, T10, T11, W12,
21  T20, T21, T22;
22 
23  FLA_Obj AB1, AB2;
24 
25  dim_t b;
26 
27  FLA_Part_2x2( A, &ATL, &ATR,
28  &ABL, &ABR, 0, 0, FLA_TL );
29 
30  FLA_Part_2x2( T, &TTL, &TTR,
31  &TBL, &TBR, 0, 0, FLA_TL );
32 
33  while ( FLA_Obj_min_dim( ABR ) > 0 ){
34 
35  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38  /* ************* */ /* ******************** */
39  &A10, /**/ &A11, &A12,
40  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41  b, b, FLA_BR );
42 
43  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &T01, &T02,
44  /* ************* */ /* ******************** */
45  &T10, /**/ &T11, &W12,
46  TBL, /**/ TBR, &T20, /**/ &T21, &T22,
47  b, b, FLA_BR );
48 
49  /*------------------------------------------------------------*/
50 
51  FLA_Merge_2x1( A11,
52  A21, &AB1 );
53 
54  // Perform a QR factorization via the UT transform on AB1:
55  //
56  // / A11 \ -> QB1 R11
57  // \ A21 /
58  //
59  // where:
60  // - QB1 is formed from UB1 (which is stored column-wise below the
61  // diagonal of AB1) and T11 (which is stored to the upper triangle
62  // of T11).
63  // - R11 is stored to the upper triangle of AB1.
64 
65  FLA_QR_UT_internal( AB1, T11,
66  FLA_Cntl_sub_qrut( cntl ) );
67 
68 
69  if ( FLA_Obj_width( A12 ) > 0 )
70  {
71  FLA_Merge_2x1( A12,
72  A22, &AB2 );
73 
74  // Apply the Householder transforms associated with UB1 and T11 to
75  // AB2:
76  //
77  // / A12 \ := QB1' / A12 \
78  // \ A22 / \ A22 /
79  //
80  // where QB1 is formed from UB1 and T11.
81 
82  FLA_Apply_Q_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
83  AB1, T11, W12, AB2,
84  FLA_Cntl_sub_apqut( cntl ) );
85  }
86 
87 
88  // Update T
89  //
90  // T01 = A10' * U11 + A20' * U21;
91  //
92  // Recall: U11 = trilu( A11 );
93  // U21 = A21;
94 
95  FLA_Copyt_external( FLA_CONJ_TRANSPOSE, A10, T01 );
96  FLA_Trmm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR,
97  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
98  FLA_ONE, A11, T01 );
99  FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
100  FLA_ONE, A20, A21, FLA_ONE, T01 );
101 
102  /*------------------------------------------------------------*/
103 
104  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
105  A10, A11, /**/ A12,
106  /* ************** */ /* ****************** */
107  &ABL, /**/ &ABR, A20, A21, /**/ A22,
108  FLA_TL );
109 
110  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, T01, /**/ T02,
111  T10, T11, /**/ W12,
112  /* ************** */ /* ****************** */
113  &TBL, /**/ &TBR, T20, T21, /**/ T22,
114  FLA_TL );
115  }
116 
117  return FLA_SUCCESS;
118 }
FLA_Error FLA_Trmm_external(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Trmm_external.c:13
unsigned long dim_t
Definition: FLA_type_defs.h:71
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition: FLA_Blocksize.c:234
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Copyt_external(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt_external.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Apply_Q_UT_internal(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t *cntl)
Definition: FLA_Apply_Q_UT_internal.c:17
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Gemm_external(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition: FLA_Gemm_external.c:13
FLA_Error FLA_QR_UT_internal(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_internal.c:17
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_QR_UT_blk_var3()

FLA_Error FLA_QR_UT_blk_var3 ( FLA_Obj  A,
FLA_Obj  T,
fla_qrut_t cntl 
)

References FLA_Apply_Q_UT_internal(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Merge_2x1(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_Part_2x2(), FLA_QR_UT_internal(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_QR_UT_internal().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj TWTL, TWTR, TW00, TW01, TW02,
20  TWBL, TWBR, TW10, T11, W12,
21  TW20, TW21, TW22;
22 
23  FLA_Obj AB1, AB2;
24 
25  dim_t b;
26 
27  FLA_Part_2x2( A, &ATL, &ATR,
28  &ABL, &ABR, 0, 0, FLA_TL );
29 
30  FLA_Part_2x2( TW, &TWTL, &TWTR,
31  &TWBL, &TWBR, 0, 0, FLA_TL );
32 
33  while ( FLA_Obj_min_dim( ABR ) > 0 ){
34 
35  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38  /* ************* */ /* ******************** */
39  &A10, /**/ &A11, &A12,
40  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41  b, b, FLA_BR );
42 
43  FLA_Repart_2x2_to_3x3( TWTL, /**/ TWTR, &TW00, /**/ &TW01, &TW02,
44  /* ************* */ /* ******************** */
45  &TW10, /**/ &T11, &W12,
46  TWBL, /**/ TWBR, &TW20, /**/ &TW21, &TW22,
47  b, b, FLA_BR );
48 
49  /*------------------------------------------------------------*/
50 
51  FLA_Merge_2x1( A11,
52  A21, &AB1 );
53 
54  // Perform a QR factorization via the UT transform on AB1:
55  //
56  // / A11 \ -> QB1 R11
57  // \ A21 /
58  //
59  // where:
60  // - QB1 is formed from UB1 (which is stored column-wise below the
61  // diagonal of AB1) and T11 (which is stored to the upper triangle
62  // of T11).
63  // - R11 is stored to the upper triangle of AB1.
64 
65  FLA_QR_UT_internal( AB1, T11,
66  FLA_Cntl_sub_qrut( cntl ) );
67 
68 
69  if ( FLA_Obj_width( A12 ) > 0 )
70  {
71  FLA_Merge_2x1( A12,
72  A22, &AB2 );
73 
74  // Apply the Householder transforms associated with UB1 and T11 to
75  // AB2:
76  //
77  // / A12 \ := QB1' / A12 \
78  // \ A22 / \ A22 /
79  //
80  // where QB1 is formed from UB1 and T11.
81 
82  FLA_Apply_Q_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
83  AB1, T11, W12, AB2,
84  FLA_Cntl_sub_apqut( cntl ) );
85  }
86 
87  /*------------------------------------------------------------*/
88 
89  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
90  A10, A11, /**/ A12,
91  /* ************** */ /* ****************** */
92  &ABL, /**/ &ABR, A20, A21, /**/ A22,
93  FLA_TL );
94 
95  FLA_Cont_with_3x3_to_2x2( &TWTL, /**/ &TWTR, TW00, TW01, /**/ TW02,
96  TW10, T11, /**/ W12,
97  /* ************** */ /* ****************** */
98  &TWBL, /**/ &TWBR, TW20, TW21, /**/ TW22,
99  FLA_TL );
100  }
101 
102  return FLA_SUCCESS;
103 }
unsigned long dim_t
Definition: FLA_type_defs.h:71
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition: FLA_Blocksize.c:234
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Error FLA_Apply_Q_UT_internal(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t *cntl)
Definition: FLA_Apply_Q_UT_internal.c:17
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_QR_UT_internal(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_internal.c:17
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_QR_UT_opc_var1()

FLA_Error FLA_QR_UT_opc_var1 ( int  m_A,
int  n_A,
scomplex A,
int  rs_A,
int  cs_A,
scomplex t,
int  inc_t 
)

References FLA_Apply_H2_UT_l_opc_var1(), FLA_Househ2_UT_l_opc(), and i.

Referenced by FLA_QR_UT_opt_var1().

190 {
191  int min_m_n = min( m_A, n_A );
192  int i;
193 
194  for ( i = 0; i < min_m_n; ++i )
195  {
196  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
197  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
198  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
199  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
200 
201  scomplex* tau1 = buff_t + (i )*inc_t;
202 
203  int m_ahead = m_A - i - 1;
204  int n_ahead = n_A - i - 1;
205 
206  /*------------------------------------------------------------*/
207 
208  // FLA_Househ2_UT( FLA_LEFT,
209  // alpha11,
210  // a21, tau1 );
211  FLA_Househ2_UT_l_opc( m_ahead,
212  alpha11,
213  a21, rs_A,
214  tau1 );
215 
216  // FLA_Apply_H2_UT( FLA_LEFT, tau1, a21, a12t,
217  // A22 );
219  n_ahead,
220  tau1,
221  a21, rs_A,
222  a12t, cs_A,
223  A22, rs_A, cs_A );
224 
225  /*------------------------------------------------------------*/
226 
227  }
228 
229  return FLA_SUCCESS;
230 }
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:269
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145

◆ FLA_QR_UT_opc_var2()

FLA_Error FLA_QR_UT_opc_var2 ( int  m_A,
int  n_A,
scomplex A,
int  rs_A,
int  cs_A,
scomplex T,
int  rs_T,
int  cs_T 
)

References bl1_ccopyv(), bl1_cgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_ONE, and i.

Referenced by FLA_QR_UT_opt_var2().

235 {
236  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
237  int min_m_n = min( m_A, n_A );
238  int i;
239 
240  for ( i = 0; i < min_m_n; ++i )
241  {
242  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
243  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
244  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
245  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
246  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
247  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
248 
249  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
250  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
251 
252  int m_ahead = m_A - i - 1;
253  int n_ahead = n_A - i - 1;
254  int n_behind = i;
255 
256  /*------------------------------------------------------------*/
257 
258  // FLA_Househ2_UT( FLA_LEFT,
259  // alpha11,
260  // a21, tau11 );
261  FLA_Househ2_UT_l_opc( m_ahead,
262  alpha11,
263  a21, rs_A,
264  tau11 );
265 
266  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t,
267  // A22 );
269  n_ahead,
270  tau11,
271  a21, rs_A,
272  a12t, cs_A,
273  A22, rs_A, cs_A );
274 
275  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
277  n_behind,
278  a10t, cs_A,
279  t01, rs_T );
280 
281  // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
284  m_ahead,
285  n_behind,
286  buff_1,
287  A20, rs_A, cs_A,
288  a21, rs_A,
289  buff_1,
290  t01, rs_T );
291 
292  /*------------------------------------------------------------*/
293 
294  }
295 
296  return FLA_SUCCESS;
297 }
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:269
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49

◆ FLA_QR_UT_opd_var1()

FLA_Error FLA_QR_UT_opd_var1 ( int  m_A,
int  n_A,
double *  A,
int  rs_A,
int  cs_A,
double *  t,
int  inc_t 
)

References FLA_Apply_H2_UT_l_opd_var1(), FLA_Househ2_UT_l_opd(), and i.

Referenced by FLA_QR_UT_opt_var1().

142 {
143  int min_m_n = min( m_A, n_A );
144  int i;
145 
146  for ( i = 0; i < min_m_n; ++i )
147  {
148  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
149  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
150  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
151  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
152 
153  double* tau1 = buff_t + (i )*inc_t;
154 
155  int m_ahead = m_A - i - 1;
156  int n_ahead = n_A - i - 1;
157 
158  /*------------------------------------------------------------*/
159 
160  // FLA_Househ2_UT( FLA_LEFT,
161  // alpha11,
162  // a21, tau1 );
163  FLA_Househ2_UT_l_opd( m_ahead,
164  alpha11,
165  a21, rs_A,
166  tau1 );
167 
168  // FLA_Apply_H2_UT( FLA_LEFT, tau1, a21, a12t,
169  // A22 );
171  n_ahead,
172  tau1,
173  a21, rs_A,
174  a12t, cs_A,
175  A22, rs_A, cs_A );
176 
177  /*------------------------------------------------------------*/
178 
179  }
180 
181  return FLA_SUCCESS;
182 }
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:195
int i
Definition: bl1_axmyv2.c:145

◆ FLA_QR_UT_opd_var2()

FLA_Error FLA_QR_UT_opd_var2 ( int  m_A,
int  n_A,
double *  A,
int  rs_A,
int  cs_A,
double *  T,
int  rs_T,
int  cs_T 
)

References bl1_dcopyv(), bl1_dgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_ONE, and i.

Referenced by FLA_QR_UT_opt_var2().

165 {
166  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
167  int min_m_n = min( m_A, n_A );
168  int i;
169 
170  for ( i = 0; i < min_m_n; ++i )
171  {
172  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
173  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
174  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
175  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
176  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
177  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
178 
179  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
180  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
181 
182  int m_ahead = m_A - i - 1;
183  int n_ahead = n_A - i - 1;
184  int n_behind = i;
185 
186  /*------------------------------------------------------------*/
187 
188  // FLA_Househ2_UT( FLA_LEFT,
189  // alpha11,
190  // a21, tau11 );
191  FLA_Househ2_UT_l_opd( m_ahead,
192  alpha11,
193  a21, rs_A,
194  tau11 );
195 
196  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t,
197  // A22 );
199  n_ahead,
200  tau11,
201  a21, rs_A,
202  a12t, cs_A,
203  A22, rs_A, cs_A );
204 
205  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
207  n_behind,
208  a10t, cs_A,
209  t01, rs_T );
210 
211  // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
214  m_ahead,
215  n_behind,
216  buff_1,
217  A20, rs_A, cs_A,
218  a21, rs_A,
219  buff_1,
220  t01, rs_T );
221 
222  /*------------------------------------------------------------*/
223 
224  }
225 
226  return FLA_SUCCESS;
227 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:195
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
int i
Definition: bl1_axmyv2.c:145

◆ FLA_QR_UT_ops_var1()

FLA_Error FLA_QR_UT_ops_var1 ( int  m_A,
int  n_A,
float *  A,
int  rs_A,
int  cs_A,
float *  t,
int  inc_t 
)

References FLA_Apply_H2_UT_l_ops_var1(), FLA_Househ2_UT_l_ops(), and i.

Referenced by FLA_QR_UT_opt_var1().

94 {
95  int min_m_n = min( m_A, n_A );
96  int i;
97 
98  for ( i = 0; i < min_m_n; ++i )
99  {
100  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
101  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
102  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
103  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
104 
105  float* tau1 = buff_t + (i )*inc_t;
106 
107  int m_ahead = m_A - i - 1;
108  int n_ahead = n_A - i - 1;
109 
110  /*------------------------------------------------------------*/
111 
112  // FLA_Househ2_UT( FLA_LEFT,
113  // alpha11,
114  // a21, tau1 );
115  FLA_Househ2_UT_l_ops( m_ahead,
116  alpha11,
117  a21, rs_A,
118  tau1 );
119 
120  // FLA_Apply_H2_UT( FLA_LEFT, tau1, a21, a12t,
121  // A22 );
123  n_ahead,
124  tau1,
125  a21, rs_A,
126  a12t, cs_A,
127  A22, rs_A, cs_A );
128 
129  /*------------------------------------------------------------*/
130 
131  }
132 
133  return FLA_SUCCESS;
134 }
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:121
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145

◆ FLA_QR_UT_ops_var2()

FLA_Error FLA_QR_UT_ops_var2 ( int  m_A,
int  n_A,
float *  A,
int  rs_A,
int  cs_A,
float *  T,
int  rs_T,
int  cs_T 
)

References bl1_scopyv(), bl1_sgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_ONE, and i.

Referenced by FLA_QR_UT_opt_var2().

95 {
96  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
97  int min_m_n = min( m_A, n_A );
98  int i;
99 
100  for ( i = 0; i < min_m_n; ++i )
101  {
102  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
103  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
104  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
105  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
106  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
107  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
108 
109  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
110  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
111 
112  int m_ahead = m_A - i - 1;
113  int n_ahead = n_A - i - 1;
114  int n_behind = i;
115 
116  /*------------------------------------------------------------*/
117 
118  // FLA_Househ2_UT( FLA_LEFT,
119  // alpha11,
120  // a21, tau11 );
121  FLA_Househ2_UT_l_ops( m_ahead,
122  alpha11,
123  a21, rs_A,
124  tau11 );
125 
126  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t,
127  // A22 );
129  n_ahead,
130  tau11,
131  a21, rs_A,
132  a12t, cs_A,
133  A22, rs_A, cs_A );
134 
135  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
137  n_behind,
138  a10t, cs_A,
139  t01, rs_T );
140 
141  // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
144  m_ahead,
145  n_behind,
146  buff_1,
147  A20, rs_A, cs_A,
148  a21, rs_A,
149  buff_1,
150  t01, rs_T );
151 
152  /*------------------------------------------------------------*/
153 
154  }
155 
156  return FLA_SUCCESS;
157 }
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:121
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145

◆ FLA_QR_UT_opt_var1()

FLA_Error FLA_QR_UT_opt_var1 ( FLA_Obj  A,
FLA_Obj  t 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_QR_UT_opc_var1(), FLA_QR_UT_opd_var1(), FLA_QR_UT_ops_var1(), and FLA_QR_UT_opz_var1().

Referenced by FLA_QR_UT_internal().

14 {
15  FLA_Datatype datatype;
16  int m_A, n_A;
17  int rs_A, cs_A;
18  int inc_t;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  m_A = FLA_Obj_length( A );
23  n_A = FLA_Obj_width( A );
24  rs_A = FLA_Obj_row_stride( A );
25  cs_A = FLA_Obj_col_stride( A );
26 
27  inc_t = FLA_Obj_vector_inc( t );
28 
29 
30  switch ( datatype )
31  {
32  case FLA_FLOAT:
33  {
34  float* buff_A = FLA_FLOAT_PTR( A );
35  float* buff_t = FLA_FLOAT_PTR( t );
36 
37  FLA_QR_UT_ops_var1( m_A,
38  n_A,
39  buff_A, rs_A, cs_A,
40  buff_t, inc_t );
41 
42  break;
43  }
44 
45  case FLA_DOUBLE:
46  {
47  double* buff_A = FLA_DOUBLE_PTR( A );
48  double* buff_t = FLA_DOUBLE_PTR( t );
49 
50  FLA_QR_UT_opd_var1( m_A,
51  n_A,
52  buff_A, rs_A, cs_A,
53  buff_t, inc_t );
54 
55  break;
56  }
57 
58  case FLA_COMPLEX:
59  {
60  scomplex* buff_A = FLA_COMPLEX_PTR( A );
61  scomplex* buff_t = FLA_COMPLEX_PTR( t );
62 
63  FLA_QR_UT_opc_var1( m_A,
64  n_A,
65  buff_A, rs_A, cs_A,
66  buff_t, inc_t );
67 
68  break;
69  }
70 
71  case FLA_DOUBLE_COMPLEX:
72  {
73  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
74  dcomplex* buff_t = FLA_DOUBLE_COMPLEX_PTR( t );
75 
76  FLA_QR_UT_opz_var1( m_A,
77  n_A,
78  buff_A, rs_A, cs_A,
79  buff_t, inc_t );
80 
81  break;
82  }
83  }
84 
85  return FLA_SUCCESS;
86 }
FLA_Error FLA_QR_UT_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_t, int inc_t)
Definition: FLA_QR_UT_opt_var1.c:234
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_QR_UT_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_t, int inc_t)
Definition: FLA_QR_UT_opt_var1.c:138
Definition: blis_type_defs.h:132
FLA_Error FLA_QR_UT_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_t, int inc_t)
Definition: FLA_QR_UT_opt_var1.c:90
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137
FLA_Error FLA_QR_UT_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_t, int inc_t)
Definition: FLA_QR_UT_opt_var1.c:186

◆ FLA_QR_UT_opt_var2()

FLA_Error FLA_QR_UT_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_QR_UT_opc_var2(), FLA_QR_UT_opd_var2(), FLA_QR_UT_ops_var2(), and FLA_QR_UT_opz_var2().

Referenced by FLA_QR_UT_internal().

14 {
15  FLA_Datatype datatype;
16  int m_A, n_A;
17  int rs_A, cs_A;
18  int rs_T, cs_T;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  m_A = FLA_Obj_length( A );
23  n_A = FLA_Obj_width( A );
24  rs_A = FLA_Obj_row_stride( A );
25  cs_A = FLA_Obj_col_stride( A );
26 
27  rs_T = FLA_Obj_row_stride( T );
28  cs_T = FLA_Obj_col_stride( T );
29 
30 
31  switch ( datatype )
32  {
33  case FLA_FLOAT:
34  {
35  float* buff_A = FLA_FLOAT_PTR( A );
36  float* buff_T = FLA_FLOAT_PTR( T );
37 
38  FLA_QR_UT_ops_var2( m_A,
39  n_A,
40  buff_A, rs_A, cs_A,
41  buff_T, rs_T, cs_T );
42 
43  break;
44  }
45 
46  case FLA_DOUBLE:
47  {
48  double* buff_A = FLA_DOUBLE_PTR( A );
49  double* buff_T = FLA_DOUBLE_PTR( T );
50 
51  FLA_QR_UT_opd_var2( m_A,
52  n_A,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T );
55 
56  break;
57  }
58 
59  case FLA_COMPLEX:
60  {
61  scomplex* buff_A = FLA_COMPLEX_PTR( A );
62  scomplex* buff_T = FLA_COMPLEX_PTR( T );
63 
64  FLA_QR_UT_opc_var2( m_A,
65  n_A,
66  buff_A, rs_A, cs_A,
67  buff_T, rs_T, cs_T );
68 
69  break;
70  }
71 
72  case FLA_DOUBLE_COMPLEX:
73  {
74  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
75  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
76 
77  FLA_QR_UT_opz_var2( m_A,
78  n_A,
79  buff_A, rs_A, cs_A,
80  buff_T, rs_T, cs_T );
81 
82  break;
83  }
84  }
85 
86  return FLA_SUCCESS;
87 }
FLA_Error FLA_QR_UT_opz_var2(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_QR_UT_opt_var2.c:301
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_QR_UT_opc_var2(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_QR_UT_opt_var2.c:231
FLA_Error FLA_QR_UT_opd_var2(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_QR_UT_opt_var2.c:161
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_QR_UT_ops_var2(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_QR_UT_opt_var2.c:91
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_QR_UT_opz_var1()

FLA_Error FLA_QR_UT_opz_var1 ( int  m_A,
int  n_A,
dcomplex A,
int  rs_A,
int  cs_A,
dcomplex t,
int  inc_t 
)

References FLA_Apply_H2_UT_l_opz_var1(), FLA_Househ2_UT_l_opz(), and i.

Referenced by FLA_QR_UT_opt_var1().

238 {
239  int min_m_n = min( m_A, n_A );
240  int i;
241 
242  for ( i = 0; i < min_m_n; ++i )
243  {
244  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
245  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
246  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
247  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
248 
249  dcomplex* tau1 = buff_t + (i )*inc_t;
250 
251  int m_ahead = m_A - i - 1;
252  int n_ahead = n_A - i - 1;
253 
254  /*------------------------------------------------------------*/
255 
256  // FLA_Househ2_UT( FLA_LEFT,
257  // alpha11,
258  // a21, tau1 );
259  FLA_Househ2_UT_l_opz( m_ahead,
260  alpha11,
261  a21, rs_A,
262  tau1 );
263 
264  // FLA_Apply_H2_UT( FLA_LEFT, tau1, a21, a12t,
265  // A22 );
267  n_ahead,
268  tau1,
269  a21, rs_A,
270  a12t, cs_A,
271  A22, rs_A, cs_A );
272 
273  /*------------------------------------------------------------*/
274 
275  }
276 
277  return FLA_SUCCESS;
278 }
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:343
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137

◆ FLA_QR_UT_opz_var2()

FLA_Error FLA_QR_UT_opz_var2 ( int  m_A,
int  n_A,
dcomplex A,
int  rs_A,
int  cs_A,
dcomplex T,
int  rs_T,
int  cs_T 
)

References bl1_zcopyv(), bl1_zgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_ONE, and i.

Referenced by FLA_QR_UT_opt_var2().

305 {
306  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
307  int min_m_n = min( m_A, n_A );
308  int i;
309 
310  for ( i = 0; i < min_m_n; ++i )
311  {
312  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
313  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
314  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
315  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
316  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
317  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
318 
319  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
320  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
321 
322  int m_ahead = m_A - i - 1;
323  int n_ahead = n_A - i - 1;
324  int n_behind = i;
325 
326  /*------------------------------------------------------------*/
327 
328  // FLA_Househ2_UT( FLA_LEFT,
329  // alpha11,
330  // a21, tau11 );
331  FLA_Househ2_UT_l_opz( m_ahead,
332  alpha11,
333  a21, rs_A,
334  tau11 );
335 
336  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t,
337  // A22 );
339  n_ahead,
340  tau11,
341  a21, rs_A,
342  a12t, cs_A,
343  A22, rs_A, cs_A );
344 
345  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
347  n_behind,
348  a10t, cs_A,
349  t01, rs_T );
350 
351  // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
354  m_ahead,
355  n_behind,
356  buff_1,
357  A20, rs_A, cs_A,
358  a21, rs_A,
359  buff_1,
360  t01, rs_T );
361 
362  /*------------------------------------------------------------*/
363 
364  }
365 
366  return FLA_SUCCESS;
367 }
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:343
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
int i
Definition: bl1_axmyv2.c:145
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137

◆ FLA_QR_UT_unb_var1()

FLA_Error FLA_QR_UT_unb_var1 ( FLA_Obj  A,
FLA_Obj  t 
)

References FLA_Apply_H2_UT(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Househ2_UT(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_QR_UT_internal().

14 {
15  FLA_Obj ATL, ATR, A00, a01, A02,
16  ABL, ABR, a10t, alpha11, a12t,
17  A20, a21, A22;
18 
19  FLA_Obj tLt, tRt, t0t, tau1, t2t;
20 
21 
22  FLA_Part_2x2( A, &ATL, &ATR,
23  &ABL, &ABR, 0, 0, FLA_TL );
24 
25  FLA_Part_1x2( t, &tLt, &tRt, 0, FLA_LEFT );
26 
27  while ( FLA_Obj_min_dim( ABR ) > 0 ){
28 
29  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
30  /* ************* */ /* ************************** */
31  &a10t, /**/ &alpha11, &a12t,
32  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
33  1, 1, FLA_BR );
34 
35  FLA_Repart_1x2_to_1x3( tLt, /**/ tRt, &t0t, /**/ &tau1, &t2t,
36  1, FLA_RIGHT );
37 
38  /*------------------------------------------------------------*/
39 
40  // Compute tau11 and u21 from alpha11 and a21 such that tau11 and u21
41  // determine a Householder transform H such that applying H from the
42  // left to the column vector consisting of alpha11 and a21 annihilates
43  // the entries in a21 (and updates alpha11).
44  FLA_Househ2_UT( FLA_LEFT,
45  alpha11,
46  a21, tau1 );
47 
48  // / a12t \ = H / a12t \
49  // \ A22 / \ A22 /
50  //
51  // where H is formed from tau11 and u21.
52  FLA_Apply_H2_UT( FLA_LEFT, tau1, a21, a12t,
53  A22 );
54 
55  /*------------------------------------------------------------*/
56 
57  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
58  a10t, alpha11, /**/ a12t,
59  /* ************** */ /* ************************ */
60  &ABL, /**/ &ABR, A20, a21, /**/ A22,
61  FLA_TL );
62 
63  FLA_Cont_with_1x3_to_1x2( &tLt, /**/ &tRt, t0t, tau1, /**/ t2t,
64  FLA_LEFT );
65  }
66 
67  return FLA_SUCCESS;
68 }
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Apply_H2_UT(FLA_Side side, FLA_Obj tau, FLA_Obj u2, FLA_Obj a1, FLA_Obj A2)
Definition: FLA_Apply_H2_UT.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
Definition: FLA_type_defs.h:158
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_QR_UT_unb_var2()

FLA_Error FLA_QR_UT_unb_var2 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Apply_H2_UT(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Gemv_external(), FLA_Househ2_UT(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_2x2(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_QR_UT_internal().

14 {
15  FLA_Obj ATL, ATR, A00, a01, A02,
16  ABL, ABR, a10t, alpha11, a12t,
17  A20, a21, A22;
18 
19  FLA_Obj TTL, TTR, T00, t01, T02,
20  TBL, TBR, t10t, tau11, t12t,
21  T20, t21, T22;
22 
23 
24  FLA_Part_2x2( A, &ATL, &ATR,
25  &ABL, &ABR, 0, 0, FLA_TL );
26 
27  FLA_Part_2x2( T, &TTL, &TTR,
28  &TBL, &TBR, 0, 0, FLA_TL );
29 
30  while ( FLA_Obj_min_dim( ABR ) > 0 ){
31 
32  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
33  /* ************* */ /* ************************** */
34  &a10t, /**/ &alpha11, &a12t,
35  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
36  1, 1, FLA_BR );
37 
38  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
39  /* ************* */ /* ************************ */
40  &t10t, /**/ &tau11, &t12t,
41  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
42  1, 1, FLA_BR );
43 
44  /*------------------------------------------------------------*/
45 
46  // Compute tau11 and u21 from alpha11 and a21 such that tau11 and u21
47  // determine a Householder transform H such that applying H from the
48  // left to the column vector consisting of alpha11 and a21 annihilates
49  // the entries in a21 (and updates alpha11).
50  FLA_Househ2_UT( FLA_LEFT,
51  alpha11,
52  a21, tau11 );
53 
54  // / a12t \ = H / a12t \
55  // \ A22 / \ A22 /
56  //
57  // where H is formed from tau11 and u21.
58  FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t,
59  A22 );
60 
61  // t01 = a10t' + A20' * u21;
62  FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
63  FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
64 
65  /*------------------------------------------------------------*/
66 
67  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
68  a10t, alpha11, /**/ a12t,
69  /* ************** */ /* ************************ */
70  &ABL, /**/ &ABR, A20, a21, /**/ A22,
71  FLA_TL );
72 
73  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
74  t10t, tau11, /**/ t12t,
75  /* ************** */ /* ********************** */
76  &TBL, /**/ &TBR, T20, t21, /**/ T22,
77  FLA_TL );
78 
79  }
80 
81  return FLA_SUCCESS;
82 }
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
FLA_Error FLA_Gemv_external(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv_external.c:13
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Apply_H2_UT(FLA_Side side, FLA_Obj tau, FLA_Obj u2, FLA_Obj a1, FLA_Obj A2)
Definition: FLA_Apply_H2_UT.c:13
FLA_Error FLA_Copyt_external(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt_external.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
Definition: FLA_type_defs.h:158
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153