libflame  revision_anchor
Functions
FLA_LU_incpiv.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLASH_LU_incpiv_create_hier_matrices (FLA_Obj A_flat, dim_t depth, dim_t *b_flash, dim_t b_alg, FLA_Obj *A, FLA_Obj *p, FLA_Obj *L)
 
dim_t FLASH_LU_incpiv_determine_alg_blocksize (FLA_Obj A)
 
FLA_Error FLASH_LU_incpiv_noopt (FLA_Obj A, FLA_Obj p, FLA_Obj L)
 
FLA_Error FLASH_LU_incpiv_opt1 (FLA_Obj A, FLA_Obj p, FLA_Obj L)
 
FLA_Error FLASH_LU_incpiv_solve (FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj B, FLA_Obj X)
 

Function Documentation

◆ FLASH_LU_incpiv_create_hier_matrices()

FLA_Error FLASH_LU_incpiv_create_hier_matrices ( FLA_Obj  A_flat,
dim_t  depth,
dim_t b_flash,
dim_t  b_alg,
FLA_Obj A,
FLA_Obj p,
FLA_Obj L 
)

References FLA_Abort(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_width(), FLA_Print_message(), FLASH_LU_incpiv_determine_alg_blocksize(), FLASH_Obj_create_ext(), and FLASH_Obj_create_hier_copy_of_flat().

14 {
15  FLA_Datatype datatype;
16  dim_t m, n;
17  dim_t one = 1;
18 
19  // *** The current LU_incpiv algorithm implemented assumes that
20  // the matrix has a hierarchical depth of 1. We check for that here, because
21  // we anticipate that we'll use a more general algorithm in the future, and
22  // we don't want to forget to remove the constraint. ***
23  if ( depth != 1 )
24  {
25  FLA_Print_message( "FLASH_LU_incpiv() currently only supports matrices of depth 1",
26  __FILE__, __LINE__ );
27  FLA_Abort();
28  }
29 
30  // Create hierarchical copy of matrix A_flat.
31  FLASH_Obj_create_hier_copy_of_flat( A_flat, depth, b_flash, A );
32 
33  // Query the datatype of matrix A_flat.
34  datatype = FLA_Obj_datatype( A_flat );
35 
36  // If the user passed in zero for b_alg, then we need to set the algorithmic
37  // (inner) blocksize to a reasonable default value.
38  if ( b_alg == 0 )
39  {
41  }
42 
43  // Query the element (not scalar) dimensions of the new hierarchical matrix.
44  // This is done so we can create p and L with full blocks for the bottom
45  // and right "edge cases" of A.
46  m = FLA_Obj_length( *A );
47  n = FLA_Obj_width ( *A );
48 
49  // Create hierarchical matrices p and L.
50  FLASH_Obj_create_ext( FLA_INT, m * b_flash[0], n,
51  depth, b_flash, &one,
52  p );
53 
54  FLASH_Obj_create_ext( datatype, m * b_flash[0], n * b_alg,
55  depth, b_flash, &b_alg,
56  L );
57 
58  return FLA_SUCCESS;
59 }
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLASH_Obj_create_ext(FLA_Datatype datatype, dim_t m, dim_t n, dim_t depth, dim_t *b_m, dim_t *b_n, FLA_Obj *H)
Definition: FLASH_Obj.c:151
dim_t FLASH_LU_incpiv_determine_alg_blocksize(FLA_Obj A)
Definition: FLASH_LU_incpiv_create_hier_matrices.c:62
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
void FLA_Abort(void)
Definition: FLA_Error.c:248
void FLA_Print_message(char *str, char *file, int line)
Definition: FLA_Error.c:234
FLA_Error FLASH_Obj_create_hier_copy_of_flat(FLA_Obj F, dim_t depth, dim_t *b_mn, FLA_Obj *H)
Definition: FLASH_Obj.c:591
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLASH_LU_incpiv_determine_alg_blocksize()

dim_t FLASH_LU_incpiv_determine_alg_blocksize ( FLA_Obj  A)

References FLA_Obj_length().

Referenced by FLASH_LU_incpiv_create_hier_matrices().

63 {
64  dim_t b_alg;
65  dim_t b_flash;
66 
67  // Acquire the storage blocksize.
68  b_flash = FLA_Obj_length( *FLASH_OBJ_PTR_AT( A ) );
69 
70  // Scale the storage blocksize by a pre-defined scalar to arrive at a
71  // reasonable algorithmic blocksize, but make sure it's at least 1.
72  b_alg = ( dim_t ) max( ( double ) b_flash * FLA_LU_INNER_TO_OUTER_B_RATIO, 1 );
73 
74  return b_alg;
75 }
unsigned long dim_t
Definition: FLA_type_defs.h:71
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLASH_LU_incpiv_noopt()

FLA_Error FLASH_LU_incpiv_noopt ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L 
)

References FLASH_LU_incpiv_var1(), FLASH_Obj_scalar_width_tl(), FLASH_Queue_begin(), and FLASH_Queue_end().

Referenced by FLASH_LU_incpiv().

16 {
17  dim_t nb_alg;
18  FLA_Error r_val;
19 
20  // Inspect the width of a the top-left element of L to get the algorithmic
21  // blocksize we'll use throughout the LU_incpiv algorithm.
22  nb_alg = FLASH_Obj_scalar_width_tl( L );
23 
24  // Begin a parallel region.
26 
27  // Enqueue tasks via a SuperMatrix-aware control tree.
28  r_val = FLASH_LU_incpiv_var1( A, p, L, nb_alg, flash_lu_incpiv_cntl );
29 
30  // End the parallel region.
32 
33  return r_val;
34 }
void FLASH_Queue_end(void)
Definition: FLASH_Queue.c:81
unsigned long dim_t
Definition: FLA_type_defs.h:71
fla_lu_t * flash_lu_incpiv_cntl
Definition: FLASH_LU_incpiv_cntl_init.c:18
int FLA_Error
Definition: FLA_type_defs.h:47
void FLASH_Queue_begin(void)
Definition: FLASH_Queue.c:59
dim_t FLASH_Obj_scalar_width_tl(FLA_Obj H)
Definition: FLASH_View.c:737
FLA_Error FLASH_LU_incpiv_var1(FLA_Obj A, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t *cntl)
Definition: FLASH_LU_incpiv_var1.c:13

◆ FLASH_LU_incpiv_opt1()

FLA_Error FLASH_LU_incpiv_opt1 ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L 
)

References FLASH_LU_incpiv_var2(), FLASH_Obj_create_diag_panel(), FLASH_Obj_free(), FLASH_Obj_scalar_width_tl(), FLASH_Queue_begin(), and FLASH_Queue_end().

Referenced by FLASH_LU_incpiv().

16 {
17  dim_t nb_alg;
18  FLA_Error r_val;
19  FLA_Obj U;
20 
21  // Inspect the width of a the top-left element of L to get the algorithmic
22  // blocksize we'll use throughout the LU_incpiv algorithm.
23  nb_alg = FLASH_Obj_scalar_width_tl( L );
24 
25  // Create a temporary matrix to hold copies of all of the blocks along the
26  // diagonal of A.
28 
29  // Begin a parallel region.
31 
32  // Enqueue tasks via a SuperMatrix-aware control tree.
33  r_val = FLASH_LU_incpiv_var2( A, p, L, U, nb_alg, flash_lu_incpiv_cntl );
34 
35  // End the parallel region.
37 
38  // Free the temporary matrix.
39  FLASH_Obj_free( &U );
40 
41  return r_val;
42 }
void FLASH_Queue_end(void)
Definition: FLASH_Queue.c:81
fla_lu_t * flash_lu_incpiv_cntl
Definition: FLASH_LU_incpiv_cntl_init.c:18
unsigned long dim_t
Definition: FLA_type_defs.h:71
void FLASH_Obj_free(FLA_Obj *H)
Definition: FLASH_Obj.c:638
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Error FLASH_Obj_create_diag_panel(FLA_Obj A, FLA_Obj *U)
Definition: FLASH_Obj_create_diag_panel.c:13
void FLASH_Queue_begin(void)
Definition: FLASH_Queue.c:59
Definition: FLA_type_defs.h:158
dim_t FLASH_Obj_scalar_width_tl(FLA_Obj H)
Definition: FLASH_View.c:737
FLA_Error FLASH_LU_incpiv_var2(FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj U, dim_t nb_alg, fla_lu_t *cntl)
Definition: FLASH_LU_incpiv_var2.c:13

◆ FLASH_LU_incpiv_solve()

FLA_Error FLASH_LU_incpiv_solve ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L,
FLA_Obj  B,
FLA_Obj  X 
)

References FLA_Check_error_level(), FLA_LU_incpiv_solve_check(), FLA_ONE, FLASH_Copy(), FLASH_FS_incpiv(), and FLASH_Trsm().

14 {
15  // Check parameters.
16  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
17  FLA_LU_incpiv_solve_check( A, p, L, B, X );
18 
19  FLASH_Copy( B, X );
20 
21  FLASH_FS_incpiv( A, p, L, X );
22  FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,
23  FLA_NONUNIT_DIAG, FLA_ONE, A, X );
24 
25  return FLA_SUCCESS;
26 }
FLA_Error FLA_LU_incpiv_solve_check(FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj B, FLA_Obj X)
Definition: FLA_LU_incpiv_solve_check.c:13
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLASH_FS_incpiv(FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj b)
Definition: FLASH_FS_incpiv.c:13
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Error FLASH_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLASH_Copy.c:15
FLA_Error FLASH_Trsm(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLASH_Trsm.c:15