libflame  revision_anchor
Functions
FLA_QR_UT_piv_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_QR_UT_piv_unb_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p)
 
FLA_Error FLA_QR_UT_piv_blk_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p, fla_qrut_t *cntl)
 
FLA_Error FLA_QR_UT_piv_unb_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p)
 
FLA_Error FLA_QR_UT_piv_blk_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p, fla_qrut_t *cntl)
 
FLA_Error FLA_Apply_H2_UT_piv_row (FLA_Obj tau, FLA_Obj a1t, FLA_Obj u1t, FLA_Obj W, FLA_Obj u2, FLA_Obj A2, FLA_Obj U2, FLA_Obj w1t, FLA_Obj vt)
 

Function Documentation

◆ FLA_Apply_H2_UT_piv_row()

FLA_Error FLA_Apply_H2_UT_piv_row ( FLA_Obj  tau,
FLA_Obj  a1t,
FLA_Obj  u1t,
FLA_Obj  W,
FLA_Obj  u2,
FLA_Obj  A2,
FLA_Obj  U2,
FLA_Obj  w1t,
FLA_Obj  vt 
)

References FLA_Axpy_external(), FLA_Copy_external(), FLA_Gemvc_external(), FLA_Inv_scalc_external(), FLA_MINUS_ONE, FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), and FLA_ZERO.

Referenced by FLA_QR_UT_piv_unb_var2().

50 {
51  // a1t -= u1t W = 1 a1t -1 W^T u1t;
52  FLA_Gemvc_external( FLA_TRANSPOSE, FLA_NO_CONJUGATE,
53  FLA_MINUS_ONE, W, u1t, FLA_ONE, a1t );
54 
55  // w1t := a1t;
56  FLA_Copy_external( a1t, w1t );
57 
58  // w1t += u2' A2 = 1 w1t + 1 A2^T conj(u2);
59  FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
60 
61  if ( FLA_Obj_min_dim( U2 ) > 0 )
62  {
63  FLA_Obj vtR;
64 
65  // Partition the workspace (a row vector matching the width of a1t)
66  FLA_Part_1x2( vt, &vt, &vtR, FLA_Obj_width( U2 ), FLA_LEFT );
67 
68  // vt := u2'U2 = 0 vt + 1 U2^T conj(u2);
69  FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE,
70  FLA_ONE, U2, u2, FLA_ZERO, vt );
71 
72  // w1t -= - vt W = 1 w1t -1 W^T vt;
73  FLA_Gemvc_external( FLA_TRANSPOSE, FLA_NO_CONJUGATE,
74  FLA_MINUS_ONE, W, vt, FLA_ONE, w1t );
75  }
76 
77  // w1t = w1t / tau;
78  FLA_Inv_scalc_external( FLA_NO_CONJUGATE, tau, w1t );
79 
80  // a1t = a1t - w1t;
81  FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
82 
83  return FLA_SUCCESS;
84 }
FLA_Error FLA_Axpy_external(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpy_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy_external.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Gemvc_external(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemvc_external.c:13
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Inv_scalc_external(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scalc_external.c:13
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_QR_UT_piv_blk_var1()

FLA_Error FLA_QR_UT_piv_blk_var1 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  w,
FLA_Obj  p,
fla_qrut_t cntl 
)

References FLA_Apply_pivots(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_piv_internal(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and w1.

Referenced by FLA_QR_UT_piv_internal().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj TL, TR, T0, T1, W12;
20  FLA_Obj T1T, T2B;
21 
22  FLA_Obj pT, p0,
23  pB, p1,
24  p2;
25 
26  FLA_Obj wT, w0,
27  wB, w1,
28  w2;
29 
30  dim_t b_alg, b;
31 
32  // Query the algorithmic blocksize by inspecting the length of T.
33  b_alg = FLA_Obj_length( T );
34 
35  FLA_Part_2x2( A, &ATL, &ATR,
36  &ABL, &ABR, 0, 0, FLA_TL );
37 
38  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
39 
40  FLA_Part_2x1( p, &pT,
41  &pB, 0, FLA_TOP );
42 
43  FLA_Part_2x1( w, &wT,
44  &wB, 0, FLA_TOP );
45 
46  // Loop over A; T is properly truncated.
47  while ( FLA_Obj_min_dim( ABR ) > 0 ){
48 
49  b = min( b_alg, FLA_Obj_min_dim( ABR ) );
50 
51  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
52  /* ************* */ /* ******************** */
53  &A10, /**/ &A11, &A12,
54  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
55  b, b, FLA_BR );
56 
57  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
58  b, FLA_RIGHT );
59 
60  FLA_Repart_2x1_to_3x1( pT, &p0,
61  /* ** */ /* ** */
62  &p1,
63  pB, &p2, b, FLA_BOTTOM );
64 
65  FLA_Repart_2x1_to_3x1( wT, &w0,
66  /* ** */ /* ** */
67  &w1,
68  wB, &w2, b, FLA_BOTTOM );
69 
70  /*------------------------------------------------------------*/
71 
72  FLA_Part_2x1( T1, &T1T,
73  &T2B, b, FLA_TOP );
74 
75  // Perform a unblocked (BLAS2-oriented) QR factorization
76  // with pivoting via the UT transform on ABR:
77  //
78  // ABR -> QB1 R11
79  //
80  // where:
81  // - QB1 is formed from UB1 (which is stored column-wise below the
82  // diagonal of ( A11 A21 )^T and T1T (which is stored to the upper triangle
83  // of T11).
84  // - R11 is stored to ( A11 A12 ).
85  FLA_QR_UT_piv_internal( ABR, T1T, wB, p1,
86  FLA_Cntl_sub_qrut( cntl ) );
87 
88  // Apply pivots to previous columns.
89  FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, p1, ATR );
90 
91  /*------------------------------------------------------------*/
92 
93  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
94  A10, A11, /**/ A12,
95  /* ************** */ /* ****************** */
96  &ABL, /**/ &ABR, A20, A21, /**/ A22,
97  FLA_TL );
98 
99  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
100  FLA_LEFT );
101 
102  FLA_Cont_with_3x1_to_2x1( &pT, p0,
103  p1,
104  /* ** */ /* ** */
105  &pB, p2, FLA_TOP );
106 
107  FLA_Cont_with_3x1_to_2x1( &wT, w0,
108  w1,
109  /* ** */ /* ** */
110  &wB, w2, FLA_TOP );
111  }
112 
113  return FLA_SUCCESS;
114 }
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
double *restrict w1
Definition: bl1_dotsv3.c:172
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
Definition: FLA_type_defs.h:158
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_QR_UT_piv_internal(FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p, fla_qrut_t *cntl)
Definition: FLA_QR_UT_piv_internal.c:13
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Apply_pivots(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A)
Definition: FLA_Apply_pivots.c:15
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_QR_UT_piv_blk_var2()

FLA_Error FLA_QR_UT_piv_blk_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  w,
FLA_Obj  p,
fla_qrut_t cntl 
)

References FLA_Apply_pivots(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_piv_internal(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and w1.

Referenced by FLA_QR_UT_piv_internal().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj TL, TR, T0, T1, W12;
20  FLA_Obj TT, TB;
21 
22  FLA_Obj pT, p0,
23  pB, p1,
24  p2;
25 
26  FLA_Obj wT, w0,
27  wB, w1,
28  w2;
29 
30  dim_t b_alg, b;
31 
32  // Query the algorithmic blocksize by inspecting the length of T.
33  b_alg = FLA_Obj_length( T );
34 
35  FLA_Part_2x2( A, &ATL, &ATR,
36  &ABL, &ABR, 0, 0, FLA_TL );
37 
38  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
39 
40  FLA_Part_2x1( p, &pT,
41  &pB, 0, FLA_TOP );
42 
43  FLA_Part_2x1( w, &wT,
44  &wB, 0, FLA_TOP );
45 
46  while ( FLA_Obj_min_dim( ABR ) > 0 ){
47 
48  b = min( b_alg, FLA_Obj_min_dim( ABR ) );
49 
50  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
51  /* ************* */ /* ******************** */
52  &A10, /**/ &A11, &A12,
53  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
54  b, b, FLA_BR );
55 
56  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
57  b, FLA_RIGHT );
58 
59  FLA_Repart_2x1_to_3x1( pT, &p0,
60  /* ** */ /* ** */
61  &p1,
62  pB, &p2, b, FLA_BOTTOM );
63 
64  FLA_Repart_2x1_to_3x1( wT, &w0,
65  /* ** */ /* ** */
66  &w1,
67  wB, &w2, b, FLA_BOTTOM );
68 
69  /*------------------------------------------------------------*/
70 
71  // ** Reshape T matrices to match the blocksize b
72  FLA_Part_2x1( TR, &TT,
73  &TB, b, FLA_TOP );
74 
75  // ** Perform a unblocked (BLAS2-oriented) QR factorization
76  // with pivoting via the UT transform on ABR:
77  //
78  // ABR -> QB1 R11
79  //
80  // where:
81  // - QB1 is formed from UB1 (which is stored column-wise below the
82  // diagonal of ( A11 A21 )^T and the upper-triangle of T1.
83  // - R11 is stored to ( A11 A12 ).
84  // - W12 stores T and partial updates for FLA_Apply_Q_UT_piv_var.
85  FLA_QR_UT_piv_internal( ABR, TT, wB, p1,
86  FLA_Cntl_sub_qrut( cntl ) );
87 
88  if ( FLA_Obj_width( A12 ) > 0 )
89  {
90  // ** Block update
91  FLA_Part_2x1( W12, &TT,
92  &TB, b, FLA_TOP );
93 
94  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
95  FLA_MINUS_ONE, A21, TT, FLA_ONE, A22 );
96  }
97 
98  // ** Apply pivots to previous columns.
99  FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, p1, ATR );
100 
101  /*------------------------------------------------------------*/
102 
103  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
104  A10, A11, /**/ A12,
105  /* ************** */ /* ****************** */
106  &ABL, /**/ &ABR, A20, A21, /**/ A22,
107  FLA_TL );
108 
109  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
110  FLA_LEFT );
111 
112  FLA_Cont_with_3x1_to_2x1( &pT, p0,
113  p1,
114  /* ** */ /* ** */
115  &pB, p2, FLA_TOP );
116 
117  FLA_Cont_with_3x1_to_2x1( &wT, w0,
118  w1,
119  /* ** */ /* ** */
120  &wB, w2, FLA_TOP );
121  }
122 
123  return FLA_SUCCESS;
124 }
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
double *restrict w1
Definition: bl1_dotsv3.c:172
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_QR_UT_piv_internal(FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p, fla_qrut_t *cntl)
Definition: FLA_QR_UT_piv_internal.c:13
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Apply_pivots(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A)
Definition: FLA_Apply_pivots.c:15
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Gemm_external(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition: FLA_Gemm_external.c:13
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_QR_UT_piv_unb_var1()

FLA_Error FLA_QR_UT_piv_unb_var1 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  w,
FLA_Obj  p 
)

References FLA_Amax_external(), FLA_Apply_H2_UT(), FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Gemv_external(), FLA_Househ2_UT(), FLA_Merge_1x2(), FLA_MINUS_ONE, FLA_Obj_le(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_piv_colnorm(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_ZERO, and omega1.

Referenced by FLA_QR_UT_piv_internal().

15 {
16  FLA_Obj AL, AR;
17 
18  FLA_Obj ATL, ATR, A00, a01, A02,
19  ABL, ABR, a10t, alpha11, a12t,
20  A20, a21, A22;
21 
22  FLA_Obj AT, A0,
23  AB, a1t,
24  A2;
25  FLA_Obj AB1, AT1, at1;
26 
27  FLA_Obj TTL, TTR, T00, t01, T02,
28  TBL, TBR, t10t, tau11, t12t,
29  T20, t21, T22;
30 
31  FLA_Obj pT, p0,
32  pB, pi1,
33  p2;
34 
35  FLA_Obj wT, w0,
36  wB, omega1,
37  w2;
38 
39  dim_t nb = FLA_Obj_width ( A ) - FLA_Obj_width( T );
40  //dim_t mb = FLA_Obj_length( A ) - FLA_Obj_width( T );
41 
42  FLA_Part_1x2( A, &AL, &AR, nb, FLA_RIGHT );
43 
44  FLA_Part_2x2( AL, &ATL, &ATR,
45  &ABL, &ABR, 0, 0, FLA_TL );
46 
47  FLA_Part_2x1( AR, &AT,
48  &AB, 0, FLA_TOP );
49 
50  FLA_Part_2x2( T, &TTL, &TTR,
51  &TBL, &TBR, 0, 0, FLA_TL );
52 
53  FLA_Part_2x1( p, &pT,
54  &pB, 0, FLA_TOP );
55 
56  FLA_Part_2x1( w, &wT,
57  &wB, 0, FLA_TOP );
58 
59  while ( FLA_Obj_min_dim( ABR ) > 0 ){
60 
61  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
62  /* ************* */ /* ************************** */
63  &a10t, /**/ &alpha11, &a12t,
64  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
65  1, 1, FLA_BR );
66 
67  FLA_Repart_2x1_to_3x1( AT, &A0,
68  /* ** */ /* *** */
69  &a1t,
70  AB, &A2, 1, FLA_BOTTOM );
71 
72  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
73  /* ************* */ /* ************************ */
74  &t10t, /**/ &tau11, &t12t,
75  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
76  1, 1, FLA_BR );
77 
78  FLA_Repart_2x1_to_3x1( pT, &p0,
79  /* ** */ /* *** */
80  &pi1,
81  pB, &p2, 1, FLA_BOTTOM );
82 
83  FLA_Repart_2x1_to_3x1( wT, &w0,
84  /* ** */ /* *** */
85  &omega1,
86  wB, &w2, 1, FLA_BOTTOM );
87 
88  /*------------------------------------------------------------*/
89 
90 
91  // Ignore minus inputs for LAPACK compatability.
92  if ( FLA_Obj_le( pi1, FLA_ZERO ) == FALSE )
93  {
94  // Determine pivot index
95  FLA_Amax_external( wB, pi1 );
96  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, wB );
97 
98  // Apply pivots
99  FLA_Merge_1x2( ABR, AB, &AB1 );
100  FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, pi1, AB1 );
101  }
102  else
103  {
104  // Do not pivot.
105  FLA_Set( FLA_ZERO, pi1 );
106  }
107 
108  // Compute tau11 and u21 from alpha11 and a21 such that tau11 and u21
109  // determine a Householder transform H such that applying H from the
110  // left to the column vector consisting of alpha11 and a21 annihilates
111  // the entries in a21 (and updates alpha11).
112  FLA_Househ2_UT( FLA_LEFT,
113  alpha11,
114  a21, tau11 );
115 
116  // Apply H to (a12t A22)^T
117  // / a12t \ = H / a12t \
118  // \ A22 / \ A22 /
119  //
120  // where H is formed from tau11 and u21.
121  FLA_Merge_1x2( A22, A2, &AB1 );
122  FLA_Merge_1x2( a12t, a1t, &at1 );
123 
124  FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, at1,
125  AB1 );
126 
127  // t01 = a10t' + A20' * u21;
128  FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
129  FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
130 
131  // Apply pivots to previous rows
132  if ( FLA_Obj_le( pi1, FLA_ZERO ) == FALSE )
133  {
134  FLA_Merge_1x2( ATR, AT, &AT1 );
135  FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, pi1, AT1 );
136  }
137 
138  // Norm downdate w2 = alpha w2 + beta columnwisenorm2(a12t)
140 
141  /*------------------------------------------------------------*/
142 
143  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
144  a10t, alpha11, /**/ a12t,
145  /* ************** */ /* ************************ */
146  &ABL, /**/ &ABR, A20, a21, /**/ A22,
147  FLA_TL );
148 
149  FLA_Cont_with_3x1_to_2x1( &AT, A0,
150  a1t,
151  /* ** */ /* *** */
152  &AB, A2, FLA_TOP );
153 
154  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
155  t10t, tau11, /**/ t12t,
156  /* ************** */ /* ********************** */
157  &TBL, /**/ &TBR, T20, t21, /**/ T22,
158  FLA_TL );
159 
160  FLA_Cont_with_3x1_to_2x1( &pT, p0,
161  pi1,
162  /* ** */ /* *** */
163  &pB, p2, FLA_TOP );
164 
165  FLA_Cont_with_3x1_to_2x1( &wT, w0,
166  omega1,
167  /* ** */ /* *** */
168  &wB, w2, FLA_TOP );
169  }
170 
171  return FLA_SUCCESS;
172 }
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Gemv_external(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_QR_UT_piv_colnorm(FLA_Obj alpha, FLA_Obj A, FLA_Obj b)
Definition: FLA_QR_UT_piv_colnorm.c:13
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Apply_H2_UT(FLA_Side side, FLA_Obj tau, FLA_Obj u2, FLA_Obj a1, FLA_Obj A2)
Definition: FLA_Apply_H2_UT.c:13
FLA_Error FLA_Copyt_external(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt_external.c:13
FLA_Error FLA_Merge_1x2(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A)
Definition: FLA_View.c:562
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Bool FLA_Obj_le(FLA_Obj A, FLA_Obj B)
Definition: FLA_Query.c:890
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Apply_pivots(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A)
Definition: FLA_Apply_pivots.c:15
FLA_Error FLA_Amax_external(FLA_Obj x, FLA_Obj index)
Definition: FLA_Amax_external.c:13
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_QR_UT_piv_unb_var2()

FLA_Error FLA_QR_UT_piv_unb_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  w,
FLA_Obj  p 
)

References FLA_Amax_external(), FLA_Apply_H2_UT_piv_row(), FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Gemv_external(), FLA_Househ2_UT(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_lt(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_piv_colnorm(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_ZERO, and omega1.

Referenced by FLA_QR_UT_piv_internal().

14 {
15  FLA_Obj ATL, ATR, A00, a01, A02,
16  ABL, ABR, a10t, alpha11, a12t,
17  A20, a21, A22;
18 
19  FLA_Obj TTL, TTR, T00, t01, T02,
20  TBL, TBR, t10t, tau11, t12t,
21  T20, t21, T22;
22 
23  FLA_Obj pT, p0,
24  pB, pi1,
25  p2;
26 
27  FLA_Obj wT, w0,
28  wB, omega1,
29  w2;
30 
31  FLA_Obj ab1, v;
32 
33  // Create workspace
34  FLA_Obj_create( FLA_Obj_datatype( T ), 1, FLA_Obj_width( T ), 0, 0, &v );
35 
36  FLA_Part_2x2( A, &ATL, &ATR,
37  &ABL, &ABR, 0, 0, FLA_TL );
38 
39  FLA_Part_2x2( T, &TTL, &TTR,
40  &TBL, &TBR, 0, 0, FLA_TL );
41 
42  FLA_Part_2x1( p, &pT,
43  &pB, 0, FLA_TOP );
44 
45  FLA_Part_2x1( w, &wT,
46  &wB, 0, FLA_TOP );
47 
48  while ( FLA_Obj_min_dim( pB ) > 0 ) {
49 
50  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
51  /* ************* */ /* ************************** */
52  &a10t, /**/ &alpha11, &a12t,
53  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
54  1, 1, FLA_BR );
55 
56  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
57  /* ************* */ /* ************************ */
58  &t10t, /**/ &tau11, &t12t,
59  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
60  1, 1, FLA_BR );
61 
62  FLA_Repart_2x1_to_3x1( pT, &p0,
63  /* ** */ /* *** */
64  &pi1,
65  pB, &p2, 1, FLA_BOTTOM );
66 
67  FLA_Repart_2x1_to_3x1( wT, &w0,
68  /* ** */ /* *** */
69  &omega1,
70  wB, &w2, 1, FLA_BOTTOM );
71 
72  /*------------------------------------------------------------*/
73 
74 
75  // ** Ignore minus inputs for LAPACK compatability.
76  if ( FLA_Obj_lt( pi1, FLA_ZERO ) == FALSE )
77  {
78  // ** Determine pivot index
79  FLA_Amax_external( wB, pi1 );
80 
81  // ** BLIS returns -1 if it fails to search the maximum value
82  if ( FLA_Obj_lt( pi1, FLA_ZERO ) == TRUE )
83  FLA_Set( FLA_ZERO, pi1 );
84 
85  // ** Apply a pivot on column norms
86  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, wB );
87 
88  // ** Apply a pivot on ABR
89  FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, pi1, ABR );
90 
91  // ** Apply a pivot on TTR
92  FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, pi1, TTR );
93  }
94  else
95  {
96  // ** Do not pivot.
97  FLA_Set( FLA_ZERO, pi1 );
98  }
99 
100  // ** Update the pivot column
101  FLA_Merge_2x1( alpha11,
102  a21, &ab1 );
103 
104  // ab1 = ab1 - ABL t01
105  FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, ABL, t01, FLA_ONE, ab1 );
106 
107  // ** Find the householder reflector on that column
108  FLA_Househ2_UT( FLA_LEFT, alpha11,
109  a21, tau11 );
110 
111  // ** Update the pivot row
112  FLA_Apply_H2_UT_piv_row( tau11, a12t, a10t, T02,
113  a21, A22, A20, t12t,
114  v );
115 
116  // ** Apply pivots on ATR
117  FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, pi1, ATR );
118 
119  // ** Norm downdate w2 = w2 - columnwisenorm2(a12t)
121 
122  // ** Update T matrix
123  // t01 = a10t' + A20' * u21;
124  FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
125  FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
126 
127  /*------------------------------------------------------------*/
128 
129  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
130  a10t, alpha11, /**/ a12t,
131  /* ************** */ /* ************************ */
132  &ABL, /**/ &ABR, A20, a21, /**/ A22,
133  FLA_TL );
134 
135  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
136  t10t, tau11, /**/ t12t,
137  /* ************** */ /* ********************** */
138  &TBL, /**/ &TBR, T20, t21, /**/ T22,
139  FLA_TL );
140 
141  FLA_Cont_with_3x1_to_2x1( &pT, p0,
142  pi1,
143  /* ** */ /* *** */
144  &pB, p2, FLA_TOP );
145 
146  FLA_Cont_with_3x1_to_2x1( &wT, w0,
147  omega1,
148  /* ** */ /* *** */
149  &wB, w2, FLA_TOP );
150  }
151 
152  // Free the workspace
153  FLA_Obj_free( &v);
154 
155  return FLA_SUCCESS;
156 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Gemv_external(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_QR_UT_piv_colnorm(FLA_Obj alpha, FLA_Obj A, FLA_Obj b)
Definition: FLA_QR_UT_piv_colnorm.c:13
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Apply_H2_UT_piv_row(FLA_Obj tau, FLA_Obj a1t, FLA_Obj u1t, FLA_Obj W, FLA_Obj u2, FLA_Obj A2, FLA_Obj U2, FLA_Obj w1t, FLA_Obj vt)
Definition: FLA_Apply_H2_UT_piv_row.c:14
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Copyt_external(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt_external.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
FLA_Bool FLA_Obj_lt(FLA_Obj A, FLA_Obj B)
Definition: FLA_Query.c:813
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Apply_pivots(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A)
Definition: FLA_Apply_pivots.c:15
FLA_Error FLA_Amax_external(FLA_Obj x, FLA_Obj index)
Definition: FLA_Amax_external.c:13
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153