libflame  revision_anchor
Functions
FLA_UDdate_UT_inc.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLASH_UDdate_UT_inc (FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W)
 
FLA_Error FLA_UDdate_UT_inc_blk_var1 (FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W, fla_uddateutinc_t *cntl)
 
FLA_Error FLASH_UDdate_UT_inc_create_hier_matrices (FLA_Obj R_flat, FLA_Obj C_flat, FLA_Obj D_flat, dim_t depth, dim_t *b_flash, dim_t b_alg, FLA_Obj *R, FLA_Obj *C, FLA_Obj *D, FLA_Obj *T, FLA_Obj *W)
 
dim_t FLASH_UDdate_UT_inc_determine_alg_blocksize (FLA_Obj R)
 
FLA_Error FLASH_UDdate_UT_inc_update_rhs (FLA_Obj T, FLA_Obj bR, FLA_Obj C, FLA_Obj bC, FLA_Obj D, FLA_Obj bD)
 
FLA_Error FLASH_UDdate_UT_inc_solve (FLA_Obj R, FLA_Obj bR, FLA_Obj x)
 

Function Documentation

◆ FLA_UDdate_UT_inc_blk_var1()

FLA_Error FLA_UDdate_UT_inc_blk_var1 ( FLA_Obj  R,
FLA_Obj  C,
FLA_Obj  D,
FLA_Obj  T,
FLA_Obj  W,
fla_uddateutinc_t cntl 
)

References FLA_Apply_QUD_UT_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_UDdate_UT_internal().

Referenced by FLASH_UDdate_UT_inc().

16 {
17  FLA_Obj RTL, RTR, R00, R01, R02,
18  RBL, RBR, R10, R11, R12,
19  R20, R21, R22;
20 
21  FLA_Obj CL, CR, C0, C1, C2;
22 
23  FLA_Obj DL, DR, D0, D1, D2;
24 
25  FLA_Obj TL, TR, T0, T1, T2;
26 
27  FLA_Obj WTL, WTR, W00, W01, W02,
28  WBL, WBR, W10, W11, W12,
29  W20, W21, W22;
30 
31  dim_t b;
32 
33  FLA_Part_2x2( R, &RTL, &RTR,
34  &RBL, &RBR, 0, 0, FLA_TL );
35 
36  FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT );
37 
38  FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT );
39 
40  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
41 
42  FLA_Part_2x2( W, &WTL, &WTR,
43  &WBL, &WBR, 0, 0, FLA_TL );
44 
45  while ( FLA_Obj_min_dim( RBR ) > 0 ){
46 
47  b = FLA_Determine_blocksize( RBR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
48 
49  FLA_Repart_2x2_to_3x3( RTL, /**/ RTR, &R00, /**/ &R01, &R02,
50  /* ************* */ /* ******************** */
51  &R10, /**/ &R11, &R12,
52  RBL, /**/ RBR, &R20, /**/ &R21, &R22,
53  b, b, FLA_BR );
54 
55  FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2,
56  b, FLA_RIGHT );
57 
58  FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &D1, &D2,
59  b, FLA_RIGHT );
60 
61  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
62  b, FLA_RIGHT );
63 
64  FLA_Repart_2x2_to_3x3( WTL, /**/ WTR, &W00, /**/ &W01, &W02,
65  /* ************* */ /* ******************** */
66  &W10, /**/ &W11, &W12,
67  WBL, /**/ WBR, &W20, /**/ &W21, &W22,
68  b, b, FLA_BR );
69 
70  /*------------------------------------------------------------*/
71 
72  /*
73  Perform an up/downdate of the upper triangular factor R11 via
74  up/downdating UT Householder transformations:
75 
76  [ R11, ...
77  C1, ...
78  D1, T1 ] = FLA_UDdate_UT( R11, ...
79  C1, ...
80  D1, T1 );
81 
82  by updating R11 in such a way that removes the contributions of
83  the rows in D1 while simultaneously adding new contributions to
84  the factorization from the rows of C1. Note that C1 and D1 are
85  also updated in the process.
86  */
87 
89  C1,
90  D1, T1,
91  FLA_Cntl_sub_uddateut( cntl ) );
92 
93 
94 
95  if ( FLA_Obj_width( R12 ) > 0 )
96  {
97  /*
98  Apply Q' to R12, C2, and D2 from the left:
99 
100  / R12 \ / R12 \
101  | C2 | = Q' * | C2 |
102  \ D2 / \ D2 /
103 
104  where Q is formed from C1, D1, and T1.
105  */
106 
107  FLA_Apply_QUD_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
108  T1, W12,
109  R12,
110  C1, C2,
111  D1, D2, FLA_Cntl_sub_apqudut( cntl ) );
112  }
113 
114 
115  /*------------------------------------------------------------*/
116 
117  FLA_Cont_with_3x3_to_2x2( &RTL, /**/ &RTR, R00, R01, /**/ R02,
118  R10, R11, /**/ R12,
119  /* ************** */ /* ****************** */
120  &RBL, /**/ &RBR, R20, R21, /**/ R22,
121  FLA_TL );
122 
123  FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2,
124  FLA_LEFT );
125 
126  FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, D1, /**/ D2,
127  FLA_LEFT );
128 
129  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
130  FLA_LEFT );
131 
132  FLA_Cont_with_3x3_to_2x2( &WTL, /**/ &WTR, W00, W01, /**/ W02,
133  W10, W11, /**/ W12,
134  /* ************** */ /* ****************** */
135  &WBL, /**/ &WBR, W20, W21, /**/ W22,
136  FLA_TL );
137  }
138 
139  return FLA_SUCCESS;
140 }
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition: FLA_Blocksize.c:234
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Error FLA_UDdate_UT_internal(FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, fla_uddateut_t *cntl)
Definition: FLA_UDdate_UT_internal.c:16
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Apply_QUD_UT_internal(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj T, FLA_Obj W, FLA_Obj R, FLA_Obj U, FLA_Obj C, FLA_Obj V, FLA_Obj D, fla_apqudut_t *cntl)
Definition: FLA_Apply_QUD_UT_internal.c:17
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLASH_UDdate_UT_inc()

FLA_Error FLASH_UDdate_UT_inc ( FLA_Obj  R,
FLA_Obj  C,
FLA_Obj  D,
FLA_Obj  T,
FLA_Obj  W 
)

References FLA_Check_error_level(), FLA_UDdate_UT_inc_blk_var1(), FLA_UDdate_UT_inc_check(), FLASH_Queue_begin(), and FLASH_Queue_end().

19 {
20  FLA_Error r_val;
21 
22  // Check parameters.
23  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
24  FLA_UDdate_UT_inc_check( R, C, D, T, W );
25 
26  // Begin a parallel region.
28 
29  // Invoke FLA_QR_UT_inc_blk_var1() with the standard control tree.
30  r_val = FLA_UDdate_UT_inc_blk_var1( R, C, D, T, W, flash_uddateutinc_cntl );
31 
32  // End the parallel region.
34 
35  return r_val;
36 }
void FLASH_Queue_end(void)
Definition: FLASH_Queue.c:81
FLA_Error FLA_UDdate_UT_inc_blk_var1(FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W, fla_uddateutinc_t *cntl)
Definition: FLA_UDdate_UT_inc_blk_var1.c:13
fla_uddateutinc_t * flash_uddateutinc_cntl
Definition: FLASH_UDdate_UT_inc_cntl_init.c:16
int FLA_Error
Definition: FLA_type_defs.h:47
void FLASH_Queue_begin(void)
Definition: FLASH_Queue.c:59
FLA_Error FLA_UDdate_UT_inc_check(FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W)
Definition: FLA_UDdate_UT_inc_check.c:13
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18

◆ FLASH_UDdate_UT_inc_create_hier_matrices()

FLA_Error FLASH_UDdate_UT_inc_create_hier_matrices ( FLA_Obj  R_flat,
FLA_Obj  C_flat,
FLA_Obj  D_flat,
dim_t  depth,
dim_t b_flash,
dim_t  b_alg,
FLA_Obj R,
FLA_Obj C,
FLA_Obj D,
FLA_Obj T,
FLA_Obj W 
)

References FLA_Abort(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_width(), FLA_Print_message(), FLASH_Obj_create_ext(), FLASH_Obj_create_hier_copy_of_flat(), and FLASH_UDdate_UT_inc_determine_alg_blocksize().

14 {
15  FLA_Datatype datatype;
16  dim_t m_T, n_T;
17  dim_t m_W, n_W;
18  dim_t m_C;
19  dim_t m_D;
20 
21  // *** The current UDdate_UT_inc algorithm implemented assumes that
22  // the matrix has a hierarchical depth of 1. We check for that here
23  // because we anticipate that we'll use a more general algorithm in the
24  // future, and we don't want to forget to remove the constraint. ***
25  if ( depth != 1 )
26  {
27  FLA_Print_message( "FLASH_UDdate_UT_inc() currently only supports matrices of depth 1",
28  __FILE__, __LINE__ );
29  FLA_Abort();
30  }
31 
32  // Create hierarchical copy of matrices R_flat, C_flat, and D_flat.
33  FLASH_Obj_create_hier_copy_of_flat( R_flat, depth, b_flash, R );
34  FLASH_Obj_create_hier_copy_of_flat( C_flat, depth, b_flash, C );
35  FLASH_Obj_create_hier_copy_of_flat( D_flat, depth, b_flash, D );
36 
37  // Query the datatype of matrix R_flat.
38  datatype = FLA_Obj_datatype( R_flat );
39 
40  // If the user passed in zero for b_alg, then we need to set the
41  // algorithmic (inner) blocksize to a reasonable default value.
42  if ( b_alg == 0 )
43  {
45  }
46 
47  // Determine the element (not scalar) dimensions of the new hierarchical
48  // matrix T. By using the element dimensions, we will probably allocate
49  // more storage than we actually need (at the bottom and right edge cases)
50  // but this is simpler than computing the exact amount and the excess
51  // storage is usually small in practice.
52  n_T = FLA_Obj_width( *R );
53  m_C = FLA_Obj_length( *C );
54  m_D = FLA_Obj_length( *D );
55  m_T = max( m_C, m_D );
56 
57  // Create hierarchical matrix T, with element dimensions conformal to the
58  // the larger of C and D, where each block is b_alg-by-b_flash.
59  FLASH_Obj_create_ext( datatype, m_T * b_alg, n_T * b_flash[0],
60  depth, &b_alg, b_flash,
61  T );
62 
63  // Determine the element (not scalar) dimensions of the new hierarchical
64  // matrix W. The element length and width will be identical to that of R.
65  // Once again, we will probably allocate excess storage, but we consider
66  // this to be small.
67  m_W = FLA_Obj_length( *R );
68  n_W = FLA_Obj_width( *R );
69 
70  // Create hierarchical matrix W, with element dimensions conformal to R,
71  // where each block is b_alg-by-b_flash.
72  FLASH_Obj_create_ext( datatype, m_W * b_alg, n_W * b_flash[0],
73  depth, &b_alg, b_flash,
74  W );
75 
76  return FLA_SUCCESS;
77 }
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLASH_Obj_create_ext(FLA_Datatype datatype, dim_t m, dim_t n, dim_t depth, dim_t *b_m, dim_t *b_n, FLA_Obj *H)
Definition: FLASH_Obj.c:151
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
void FLA_Abort(void)
Definition: FLA_Error.c:248
void FLA_Print_message(char *str, char *file, int line)
Definition: FLA_Error.c:234
FLA_Error FLASH_Obj_create_hier_copy_of_flat(FLA_Obj F, dim_t depth, dim_t *b_mn, FLA_Obj *H)
Definition: FLASH_Obj.c:591
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLASH_UDdate_UT_inc_determine_alg_blocksize(FLA_Obj R)
Definition: FLASH_UDdate_UT_inc_create_hier_matrices.c:80
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLASH_UDdate_UT_inc_determine_alg_blocksize()

dim_t FLASH_UDdate_UT_inc_determine_alg_blocksize ( FLA_Obj  R)

References FLA_Obj_length().

Referenced by FLASH_UDdate_UT_inc_create_hier_matrices().

81 {
82  dim_t b_alg;
83  dim_t b_flash;
84 
85  // Acquire the storage blocksize.
86  b_flash = FLA_Obj_length( *FLASH_OBJ_PTR_AT( R ) );
87 
88  // Scale the storage blocksize by a pre-defined scalar to arrive at a
89  // reasonable algorithmic blocksize, but make sure it's at least 1.
90  b_alg = ( dim_t ) max( ( double ) b_flash * FLA_UDDATE_INNER_TO_OUTER_B_RATIO, 1 );
91 
92  return b_alg;
93 }
unsigned long dim_t
Definition: FLA_type_defs.h:71
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLASH_UDdate_UT_inc_solve()

FLA_Error FLASH_UDdate_UT_inc_solve ( FLA_Obj  R,
FLA_Obj  bR,
FLA_Obj  x 
)

References FLA_Check_error_level(), FLA_ONE, FLA_UDdate_UT_inc_solve_check(), FLASH_Copy(), and FLASH_Trsm().

14 {
15  // Check parameters.
16  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
18 
19  // Copy the contents of bR to x so that after the triangular solve, the
20  // solution resides in x (and bR is preserved).
21  FLASH_Copy( bR, x );
22 
23  // Perform a triangular solve with R the right-hand side.
24  FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR,
25  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
26  FLA_ONE, R, x );
27 
28  return FLA_SUCCESS;
29 }
FLA_Error FLA_UDdate_UT_inc_solve_check(FLA_Obj R, FLA_Obj bR, FLA_Obj x)
Definition: FLA_UDdate_UT_inc_solve_check.c:13
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Error FLASH_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLASH_Copy.c:15
FLA_Error FLASH_Trsm(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLASH_Trsm.c:15

◆ FLASH_UDdate_UT_inc_update_rhs()

FLA_Error FLASH_UDdate_UT_inc_update_rhs ( FLA_Obj  T,
FLA_Obj  bR,
FLA_Obj  C,
FLA_Obj  bC,
FLA_Obj  D,
FLA_Obj  bD 
)

References FLA_Check_error_level(), FLA_UDdate_UT_inc_update_rhs_check(), FLASH_Apply_QUD_UT_inc(), FLASH_Apply_QUD_UT_inc_create_workspace(), FLASH_Obj_create_copy_of(), and FLASH_Obj_free().

16 {
17  FLA_Obj W;
18  FLA_Obj bC_copy;
19  FLA_Obj bD_copy;
20 
21  // Check parameters.
22  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
23  FLA_UDdate_UT_inc_update_rhs_check( T, bR, C, bC, D, bD );
24 
25  // Create hierarchical workspace.
27 
28  // Make temporary copies of the bC and bD right-hand side objects so we
29  // don't destory their original contents.
30  FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bC, &bC_copy );
31  FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bD, &bD_copy );
32 
33  // Apply the updowndating Q' incrementally to the right-hand sides.
34  FLASH_Apply_QUD_UT_inc( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
35  T, W,
36  bR,
37  C, bC_copy,
38  D, bD_copy );
39 
40  // Free the temporary objects.
41  FLASH_Obj_free( &bC_copy );
42  FLASH_Obj_free( &bD_copy );
43 
44  // Free the workspace object.
45  FLASH_Obj_free( &W );
46 
47  return FLA_SUCCESS;
48 }
FLA_Error FLASH_Apply_QUD_UT_inc(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj T, FLA_Obj W, FLA_Obj R, FLA_Obj U, FLA_Obj C, FLA_Obj V, FLA_Obj D)
Definition: FLASH_Apply_QUD_UT_inc.c:16
FLA_Error FLASH_Obj_create_copy_of(FLA_Trans trans, FLA_Obj H_cur, FLA_Obj *H_new)
Definition: FLASH_Obj.c:561
void FLASH_Obj_free(FLA_Obj *H)
Definition: FLASH_Obj.c:638
FLA_Error FLASH_Apply_QUD_UT_inc_create_workspace(FLA_Obj T, FLA_Obj R, FLA_Obj *W)
Definition: FLASH_Apply_QUD_UT_inc_create_workspace.c:13
FLA_Error FLA_UDdate_UT_inc_update_rhs_check(FLA_Obj T, FLA_Obj bR, FLA_Obj C, FLA_Obj bC, FLA_Obj D, FLA_Obj bD)
Definition: FLA_UDdate_UT_inc_update_rhs_check.c:13
Definition: FLA_type_defs.h:158
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18