libflame  revision_anchor
Functions
FLA_Hess_UT_unb_var4.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_unb_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 

Function Documentation

◆ FLA_Hess_UT_step_unb_var4()

FLA_Error FLA_Hess_UT_step_unb_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dot(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, and FLA_ZERO.

Referenced by FLA_Hess_UT_unb_var4().

30 {
31  FLA_Obj ATL, ATR, A00, a01, A02,
32  ABL, ABR, a10t, alpha11, a12t,
33  A20, a21, A22;
34  FLA_Obj YTL, YTR, Y00, y01, Y02,
35  YBL, YBR, y10t, psi11, y12t,
36  Y20, y21, Y22;
37  FLA_Obj ZTL, ZTR, Z00, z01, Z02,
38  ZBL, ZBR, z10t, zeta11, z12t,
39  Z20, z21, Z22;
40  FLA_Obj TTL, TTR, T00, t01, T02,
41  TBL, TBR, t10t, tau11, t12t,
42  T20, t21, T22;
43  FLA_Obj dT, d0,
44  dB, delta1,
45  d2;
46  FLA_Obj eT, e0,
47  eB, epsilon1,
48  e2;
49  FLA_Obj fT, f0,
50  fB, phi1,
51  f2;
52  FLA_Obj d, e, f;
53 
54  FLA_Obj inv_tau11;
55  FLA_Obj minus_inv_tau11;
56  FLA_Obj first_elem;
57  FLA_Obj last_elem;
58  FLA_Obj beta;
59  FLA_Obj conj_beta;
60  FLA_Obj dot_product;
61 
62  FLA_Obj a10t_l, a10t_r;
63  FLA_Obj a21_t,
64  a21_b;
65  FLA_Obj a2;
66 
67  FLA_Datatype datatype_A;
68  dim_t m_A;
69  dim_t b_alg;
70 
71 
72  b_alg = FLA_Obj_length( T );
73 
74  datatype_A = FLA_Obj_datatype( A );
75  m_A = FLA_Obj_length( A );
76 
77  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
78  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 );
79  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
80  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &last_elem );
81  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
82  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &conj_beta );
83  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &dot_product );
84  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
85  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
86  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
87 
88  FLA_Set( FLA_ZERO, Y );
89  FLA_Set( FLA_ZERO, Z );
90 
91  FLA_Part_2x2( A, &ATL, &ATR,
92  &ABL, &ABR, 0, 0, FLA_TL );
93  FLA_Part_2x2( Y, &YTL, &YTR,
94  &YBL, &YBR, 0, 0, FLA_TL );
95  FLA_Part_2x2( Z, &ZTL, &ZTR,
96  &ZBL, &ZBR, 0, 0, FLA_TL );
97  FLA_Part_2x2( T, &TTL, &TTR,
98  &TBL, &TBR, 0, 0, FLA_TL );
99  FLA_Part_2x1( d, &dT,
100  &dB, 0, FLA_TOP );
101  FLA_Part_2x1( e, &eT,
102  &eB, 0, FLA_TOP );
103  FLA_Part_2x1( f, &fT,
104  &fB, 0, FLA_TOP );
105 
106  while ( FLA_Obj_length( ATL ) < b_alg )
107  {
108  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
109  /* ************* */ /* ************************** */
110  &a10t, /**/ &alpha11, &a12t,
111  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
112  1, 1, FLA_BR );
113  FLA_Repart_2x2_to_3x3( YTL, /**/ YTR, &Y00, /**/ &y01, &Y02,
114  /* ************* */ /* ************************ */
115  &y10t, /**/ &psi11, &y12t,
116  YBL, /**/ YBR, &Y20, /**/ &y21, &Y22,
117  1, 1, FLA_BR );
118  FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z01, &Z02,
119  /* ************* */ /* ************************* */
120  &z10t, /**/ &zeta11, &z12t,
121  ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
122  1, 1, FLA_BR );
123  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
124  /* ************* */ /* ************************** */
125  &t10t, /**/ &tau11, &t12t,
126  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
127  1, 1, FLA_BR );
128  FLA_Repart_2x1_to_3x1( dT, &d0,
129  /* ** */ /* ****** */
130  &delta1,
131  dB, &d2, 1, FLA_BOTTOM );
132  FLA_Repart_2x1_to_3x1( eT, &e0,
133  /* ** */ /* ******** */
134  &epsilon1,
135  eB, &e2, 1, FLA_BOTTOM );
136  FLA_Repart_2x1_to_3x1( fT, &f0,
137  /* ** */ /* **** */
138  &phi1,
139  fB, &f2, 1, FLA_BOTTOM );
140 
141  /*------------------------------------------------------------*/
142 
143  // Save first element of a10_r and set it to one so we can use a10t as
144  // u10t in subsequent computations. We will restore a10_r later on.
145  if ( FLA_Obj_length( ATL ) > 0 )
146  {
147  FLA_Part_1x2( a10t, &a10t_l, &a10t_r, 1, FLA_RIGHT );
148  FLA_Copy( a10t_r, last_elem );
149  FLA_Set( FLA_ONE, a10t_r );
150  }
151 
152  FLA_Merge_2x1( alpha11,
153  a21, &a2 );
154 
155  // alpha11 = alpha11 - u10t * y10t' - z10t * u10t';
156  // a21 = a21 - U20 * y10t' - Z20 * u10t';
157  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
158  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
159 
160  // a12t = a12t - u10t * Y20' - z10t * U20';
161  FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
162  FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
163 
164  // Restore last element of a10t.
165  if ( FLA_Obj_length( ATL ) > 0 )
166  {
167  FLA_Copy( last_elem, a10t_r );
168  }
169 
170  if ( FLA_Obj_length( A22 ) > 0 )
171  {
172  FLA_Part_2x1( a21, &a21_t,
173  &a21_b, 1, FLA_TOP );
174 
175  // [ u21, tau11, a21 ] = House( a21 );
176  FLA_Househ2_UT( FLA_LEFT,
177  a21_t,
178  a21_b, tau11 );
179 
180  // inv_tau11 = 1 / tau11;
181  // minus_inv_tau11 = -1 / tau11;
182  FLA_Set( FLA_ONE, inv_tau11 );
183  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
184  FLA_Copy( inv_tau11, minus_inv_tau11 );
185  FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
186 
187  // Save first element of a21_t and set it to one.
188  FLA_Copy( a21_t, first_elem );
189  FLA_Set( FLA_ONE, a21_t );
190 
191  // y21 = A22' * u21;
192  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
193 
194  // z21 = A22 * u21;
195  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
196 
197  // y21 = y21 - Y20 * ( U20' * u21 ) - U20 * ( Z20' * u21 );
198  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
199  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
200  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
201 
202  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
203  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
204 
205  // t01 = U20' * u21;
206  FLA_Copy( d0, t01 );
207 
208  // z21 = z21 - U20 * ( Y20' * u21 ) - Z20 * ( U20' * u21 );
209  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
210  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
211 
212  // beta = u21' * z21 / 2;
213  // conj_beta = conj(beta);
214  FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
215  FLA_Inv_scal( FLA_TWO, beta );
216  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
217 
218  // y21' = ( y21' - beta / tau * u21' ) / tau;
219  // y21 = ( y21 - conj(beta) / tau * u21 ) / tau;
220  FLA_Scal( minus_inv_tau11, conj_beta );
221  FLA_Axpy( conj_beta, a21, y21 );
222  FLA_Scal( inv_tau11, y21 );
223 
224  // z21 = ( z21 - beta / tau * u21 ) / tau;
225  FLA_Scal( minus_inv_tau11, beta );
226  FLA_Axpy( beta, a21, z21 );
227  FLA_Scal( inv_tau11, z21 );
228 
229  // a12t = a12t * ( I - u21 * u21' / tau );
230  // = a12t - ( a12t * u21 ) * u21' / tau;
231  FLA_Dot( a12t, a21, dot_product );
232  FLA_Scal( minus_inv_tau11, dot_product );
233  FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
234 
235  // A02 = A02 * ( I - u21 * u21' / tau );
236  // = A02 - ( A02 * u21 ) * u21' / tau;
237  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
238  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
239 
240  // Restore first element of a21.
241  FLA_Copy( first_elem, a21_t );
242  }
243 
244  /*------------------------------------------------------------*/
245 
246  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
247  a10t, alpha11, /**/ a12t,
248  /* ************** */ /* ************************ */
249  &ABL, /**/ &ABR, A20, a21, /**/ A22,
250  FLA_TL );
251  FLA_Cont_with_3x3_to_2x2( &YTL, /**/ &YTR, Y00, y01, /**/ Y02,
252  y10t, psi11, /**/ y12t,
253  /* ************** */ /* ********************** */
254  &YBL, /**/ &YBR, Y20, y21, /**/ Y22,
255  FLA_TL );
256  FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z01, /**/ Z02,
257  z10t, zeta11, /**/ z12t,
258  /* ************** */ /* *********************** */
259  &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
260  FLA_TL );
261  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
262  t10t, tau11, /**/ t12t,
263  /* ************** */ /* ************************ */
264  &TBL, /**/ &TBR, T20, t21, /**/ T22,
265  FLA_TL );
266  FLA_Cont_with_3x1_to_2x1( &dT, d0,
267  delta1,
268  /* ** */ /* ****** */
269  &dB, d2, FLA_TOP );
270  FLA_Cont_with_3x1_to_2x1( &eT, e0,
271  epsilon1,
272  /* ** */ /* ******** */
273  &eB, e2, FLA_TOP );
274  FLA_Cont_with_3x1_to_2x1( &fT, f0,
275  phi1,
276  /* ** */ /* **** */
277  &fB, f2, FLA_TOP );
278  }
279 
280  FLA_Obj_free( &inv_tau11 );
281  FLA_Obj_free( &minus_inv_tau11 );
282  FLA_Obj_free( &first_elem );
283  FLA_Obj_free( &last_elem );
284  FLA_Obj_free( &beta );
285  FLA_Obj_free( &conj_beta );
286  FLA_Obj_free( &dot_product );
287  FLA_Obj_free( &d );
288  FLA_Obj_free( &e );
289  FLA_Obj_free( &f );
290 
291  return FLA_SUCCESS;
292 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Gemvc(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemvc.c:13
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpyt.c:15
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scalc.c:13
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Gerc(FLA_Conj conjx, FLA_Conj conjy, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition: FLA_Gerc.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv.c:15
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scal.c:15
FLA_Error FLA_Dot(FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dot.c:13
FLA_Error FLA_Inv_scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scal.c:13
FLA_Error FLA_Axpy(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpy.c:15
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dotc.c:13

◆ FLA_Hess_UT_unb_var4()

FLA_Error FLA_Hess_UT_unb_var4 ( FLA_Obj  A,
FLA_Obj  T 
)

References FLA_Hess_UT_step_unb_var4(), FLA_Obj_create_conf_to(), and FLA_Obj_free().

Referenced by FLA_Hess_UT_internal().

14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Y );
19  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21  r_val = FLA_Hess_UT_step_unb_var4( A, Y, Z, T );
22 
23  FLA_Obj_free( &Y );
24  FLA_Obj_free( &Z );
25 
26  return r_val;
27 }
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Hess_UT_step_unb_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Hess_UT_unb_var4.c:29
int FLA_Error
Definition: FLA_type_defs.h:47
Definition: FLA_type_defs.h:158
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition: FLA_Obj.c:286