libflame  revision_anchor
Functions
FLA_Hess_UT_unb_var3.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_unb_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var3 (FLA_Obj A, FLA_Obj T)
 

Function Documentation

◆ FLA_Hess_UT_step_unb_var3()

FLA_Error FLA_Hess_UT_step_unb_var3 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dot(), FLA_Dotc(), FLA_Gemv(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, FLA_ZERO, omega1, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_unb_var3().

19 {
20  FLA_Obj ATL, ATR, A00, a01, A02,
21  ABL, ABR, a10t, alpha11, a12t,
22  A20, a21, A22;
23  FLA_Obj TTL, TTR, T00, t01, T02,
24  TBL, TBR, t10t, tau11, t12t,
25  T20, t21, T22;
26  FLA_Obj uT, u0,
27  uB, upsilon1,
28  u2;
29  FLA_Obj yT, y0,
30  yB, psi1,
31  y2;
32  FLA_Obj zT, z0,
33  zB, zeta1,
34  z2;
35  FLA_Obj vT, v0,
36  vB, nu1,
37  v2;
38  FLA_Obj wT, w0,
39  wB, omega1,
40  w2;
41  FLA_Obj u, y, z, v, w;
42 
43  FLA_Obj inv_tau11;
44  FLA_Obj minus_inv_tau11;
45  FLA_Obj first_elem;
46  FLA_Obj beta;
47  FLA_Obj conj_beta;
48  FLA_Obj dot_product;
49  FLA_Obj minus_upsilon1;
50  FLA_Obj minus_conj_upsilon1;
51  FLA_Obj minus_psi1;
52  FLA_Obj minus_conj_psi1;
53  FLA_Obj minus_zeta1;
54 
55  FLA_Obj a21_t,
56  a21_b;
57 
58  FLA_Datatype datatype_A;
59  dim_t m_A;
60  dim_t b_alg;
61 
62 
63  b_alg = FLA_Obj_length( T );
64 
65  datatype_A = FLA_Obj_datatype( A );
66  m_A = FLA_Obj_length( A );
67 
68  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
69  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 );
70  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
71  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
72  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &conj_beta );
73  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &dot_product );
74  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_upsilon1 );
75  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_upsilon1 );
76  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_psi1 );
77  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_psi1 );
78  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta1 );
79  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
80  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
81  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
82  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
83  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
84 
85  FLA_Part_2x2( A, &ATL, &ATR,
86  &ABL, &ABR, 0, 0, FLA_TL );
87  FLA_Part_2x2( T, &TTL, &TTR,
88  &TBL, &TBR, 0, 0, FLA_TL );
89  FLA_Part_2x1( u, &uT,
90  &uB, 0, FLA_TOP );
91  FLA_Part_2x1( y, &yT,
92  &yB, 0, FLA_TOP );
93  FLA_Part_2x1( z, &zT,
94  &zB, 0, FLA_TOP );
95  FLA_Part_2x1( v, &vT,
96  &vB, 0, FLA_TOP );
97  FLA_Part_2x1( w, &wT,
98  &wB, 0, FLA_TOP );
99 
100  while ( FLA_Obj_length( ATL ) < b_alg )
101  {
102  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
103  /* ************* */ /* ************************** */
104  &a10t, /**/ &alpha11, &a12t,
105  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
106  1, 1, FLA_BR );
107  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
108  /* ************* */ /* ************************** */
109  &t10t, /**/ &tau11, &t12t,
110  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
111  1, 1, FLA_BR );
112  FLA_Repart_2x1_to_3x1( uT, &u0,
113  /* ** */ /* ******** */
114  &upsilon1,
115  uB, &u2, 1, FLA_BOTTOM );
116  FLA_Repart_2x1_to_3x1( yT, &y0,
117  /* ** */ /* **** */
118  &psi1,
119  yB, &y2, 1, FLA_BOTTOM );
120  FLA_Repart_2x1_to_3x1( zT, &z0,
121  /* ** */ /* ***** */
122  &zeta1,
123  zB, &z2, 1, FLA_BOTTOM );
124  FLA_Repart_2x1_to_3x1( vT, &v0,
125  /* ** */ /* *** */
126  &nu1,
127  vB, &v2, 1, FLA_BOTTOM );
128  FLA_Repart_2x1_to_3x1( wT, &w0,
129  /* ** */ /* ****** */
130  &omega1,
131  wB, &w2, 1, FLA_BOTTOM );
132 
133  /*------------------------------------------------------------*/
134 
135  if ( FLA_Obj_length( ATL ) > 0 )
136  {
137  FLA_Copy( upsilon1, minus_upsilon1 );
138  FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
139  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
140 
141  FLA_Copy( psi1, minus_psi1 );
142  FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
143  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
144 
145  FLA_Copy( zeta1, minus_zeta1 );
146  FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
147 
148  // alpha11 = alpha11 - upsilon11 * conj(psi11) - zeta11 * conj(upsilon11);
149  FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
150  FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
151 
152  // a12t = a12t - upsilon11 * y21' - zeta11 * u21';
153  FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
154  FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
155 
156  // a21 = a21 - conj(psi11) * u21 - conj(upsilon11) * z21;
157  FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
158  FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
159  }
160 
161  if ( FLA_Obj_length( A22 ) > 0 )
162  {
163  FLA_Part_2x1( a21, &a21_t,
164  &a21_b, 1, FLA_TOP );
165 
166  // [ x21, tau11, a21 ] = House( a21 );
167  FLA_Househ2_UT( FLA_LEFT,
168  a21_t,
169  a21_b, tau11 );
170 
171  // inv_tau11 = 1 / tau11;
172  // minus_inv_tau11 = -1 / tau11;
173  FLA_Set( FLA_ONE, inv_tau11 );
174  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
175  FLA_Copy( inv_tau11, minus_inv_tau11 );
176  FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
177 
178  // Save first element of a21_t and set it to one so we can use a21 as
179  // u21 in subsequent computations. We will restore a21_t later on.
180  FLA_Copy( a21_t, first_elem );
181  FLA_Set( FLA_ONE, a21_t );
182  }
183 
184  if ( FLA_Obj_length( ATL ) > 0 )
185  {
186  // A22 = A22 - u21 * y21' - z21 * u21';
187  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
188  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
189  }
190 
191  if ( FLA_Obj_length( A22 ) > 0 )
192  {
193  // v2 = A22' * x21;
194  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
195 
196  // w2 = A22 * x21;
197  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
198 
199  // u21 = x21;
200  // y21 = v2;
201  // z21 = w2;
202  FLA_Copy( a21, u2 );
203  FLA_Copy( v2, y2 );
204  FLA_Copy( w2, z2 );
205 
206  // beta = u21' * z21 / 2;
207  // conj_beta = conj(beta);
208  FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
209  FLA_Inv_scal( FLA_TWO, beta );
210  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
211 
212  // y21' = ( y21' - beta / tau * u21' ) / tau;
213  // y21 = ( y21 - conj(beta) / tau * u21 ) / tau;
214  FLA_Scal( minus_inv_tau11, conj_beta );
215  FLA_Axpy( conj_beta, a21, y2 );
216  FLA_Scal( inv_tau11, y2 );
217 
218  // z21 = ( z21 - beta / tau * u21 ) / tau;
219  FLA_Scal( minus_inv_tau11, beta );
220  FLA_Axpy( beta, a21, z2 );
221  FLA_Scal( inv_tau11, z2 );
222 
223  // a12t = a12t * ( I - u21 * u21' / tau );
224  // = a12t - ( a12t * u21 ) * u21' / tau;
225  FLA_Dot( a12t, a21, dot_product );
226  FLA_Scal( minus_inv_tau11, dot_product );
227  FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
228 
229  // A02 = A02 * ( I - u21 * u21' / tau );
230  // = A02 - ( A02 * u21 ) * u21' / tau;
231  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
232  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
233 
234  // t01 = U20' * u21;
235  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
236 
237  // Restore first element of a21.
238  FLA_Copy( first_elem, a21_t );
239  }
240 
241  // Update A22 if this is the last iteration; this is needed when we're
242  // being called from the blocked routine so A22 is left in a valid state.
243  if ( FLA_Obj_length( ATL ) + 1 == b_alg &&
244  FLA_Obj_length( A22 ) > 0 )
245  {
246  // A22 = A22 - u21 * y21' - z21 * u21';
247  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
248  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
249  }
250 
251  /*------------------------------------------------------------*/
252 
253  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
254  a10t, alpha11, /**/ a12t,
255  /* ************** */ /* ************************ */
256  &ABL, /**/ &ABR, A20, a21, /**/ A22,
257  FLA_TL );
258  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
259  t10t, tau11, /**/ t12t,
260  /* ************** */ /* ************************ */
261  &TBL, /**/ &TBR, T20, t21, /**/ T22,
262  FLA_TL );
263  FLA_Cont_with_3x1_to_2x1( &uT, u0,
264  upsilon1,
265  /* ** */ /* ******** */
266  &uB, u2, FLA_TOP );
267  FLA_Cont_with_3x1_to_2x1( &yT, y0,
268  psi1,
269  /* ** */ /* **** */
270  &yB, y2, FLA_TOP );
271  FLA_Cont_with_3x1_to_2x1( &zT, z0,
272  zeta1,
273  /* ** */ /* ***** */
274  &zB, z2, FLA_TOP );
275  FLA_Cont_with_3x1_to_2x1( &vT, v0,
276  nu1,
277  /* ** */ /* *** */
278  &vB, v2, FLA_TOP );
279  FLA_Cont_with_3x1_to_2x1( &wT, w0,
280  omega1,
281  /* ** */ /* ****** */
282  &wB, w2, FLA_TOP );
283  }
284 
285  FLA_Obj_free( &inv_tau11 );
286  FLA_Obj_free( &minus_inv_tau11 );
287  FLA_Obj_free( &first_elem );
288  FLA_Obj_free( &beta );
289  FLA_Obj_free( &conj_beta );
290  FLA_Obj_free( &dot_product );
291  FLA_Obj_free( &minus_upsilon1 );
292  FLA_Obj_free( &minus_conj_upsilon1 );
293  FLA_Obj_free( &minus_psi1 );
294  FLA_Obj_free( &minus_conj_psi1 );
295  FLA_Obj_free( &minus_zeta1 );
296  FLA_Obj_free( &u );
297  FLA_Obj_free( &y );
298  FLA_Obj_free( &z );
299  FLA_Obj_free( &v );
300  FLA_Obj_free( &w );
301 
302  return FLA_SUCCESS;
303 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
double *restrict psi1
Definition: bl1_axmyv2.c:139
double *restrict zeta1
Definition: bl1_axmyv2.c:142
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpyt.c:15
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
upsilon1
Definition: bl1_axpyv2bdotaxpy.c:225
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scalc.c:13
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Gerc(FLA_Conj conjx, FLA_Conj conjy, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition: FLA_Gerc.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv.c:15
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scal.c:15
FLA_Error FLA_Dot(FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dot.c:13
FLA_Error FLA_Inv_scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scal.c:13
FLA_Error FLA_Axpy(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpy.c:15
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dotc.c:13

◆ FLA_Hess_UT_unb_var3()

FLA_Error FLA_Hess_UT_unb_var3 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Hess_UT_step_unb_var3().

Referenced by FLA_Hess_UT_internal().

14 {
15  return FLA_Hess_UT_step_unb_var3( A, T );
16 }
FLA_Error FLA_Hess_UT_step_unb_var3(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_unb_var3.c:18