libflame  revision_anchor
Functions
FLA_Tevd_v.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Tevd_compute_scaling_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sigma)
 
FLA_Error FLA_Tevd_compute_scaling_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sigma)
 
FLA_Error FLA_Tevd_find_submatrix_ops (int m_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
 
FLA_Error FLA_Tevd_find_submatrix_opd (int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
 
FLA_Error FLA_Tevd_find_perfshift_ops (int m_d, int m_l, float *buff_d, int inc_d, float *buff_e, int inc_e, float *buff_l, int inc_l, int *buff_lstat, int inc_lstat, float *buff_pu, int inc_pu, int *ij_shift)
 
FLA_Error FLA_Tevd_find_perfshift_opd (int m_d, int m_l, double *buff_d, int inc_d, double *buff_e, int inc_e, double *buff_l, int inc_l, int *buff_lstat, int inc_lstat, double *buff_pu, int inc_pu, int *ij_shift)
 
FLA_Error FLA_Norm1_tridiag (FLA_Obj d, FLA_Obj e, FLA_Obj norm)
 
FLA_Error FLA_Norm1_tridiag_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *norm)
 
FLA_Error FLA_Norm1_tridiag_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *norm)
 
FLA_Error FLA_Tevd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj U, dim_t b_alg)
 
FLA_Error FLA_Tevd_v_ops_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opd_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opc_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opz_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg)
 
FLA_Error FLA_Tevd_v_ops_var2 (int m_A, int m_U, int n_G, int n_G_extra, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opd_var2 (int m_A, int m_U, int n_G, int n_G_extra, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opc_var2 (int m_A, int m_U, int n_G, int n_G_extra, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opz_var2 (int m_A, int m_U, int n_G, int n_G_extra, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
 

Function Documentation

◆ FLA_Norm1_tridiag()

FLA_Error FLA_Norm1_tridiag ( FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  norm 
)

References FLA_Norm1_tridiag_opd(), FLA_Norm1_tridiag_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().

14 {
15  FLA_Datatype datatype;
16  int m_A;
17  int inc_d;
18  int inc_e;
19 
20  datatype = FLA_Obj_datatype( d );
21 
22  m_A = FLA_Obj_vector_dim( d );
23 
24  inc_d = FLA_Obj_vector_inc( d );
25  inc_e = FLA_Obj_vector_inc( e );
26 
27 
28  switch ( datatype )
29  {
30  case FLA_FLOAT:
31  {
32  float* buff_d = FLA_FLOAT_PTR( d );
33  float* buff_e = FLA_FLOAT_PTR( e );
34  float* buff_norm = FLA_FLOAT_PTR( norm );
35 
37  buff_d, inc_d,
38  buff_e, inc_e,
39  buff_norm );
40 
41  break;
42  }
43 
44  case FLA_DOUBLE:
45  {
46  double* buff_d = FLA_DOUBLE_PTR( d );
47  double* buff_e = FLA_DOUBLE_PTR( e );
48  double* buff_norm = FLA_DOUBLE_PTR( norm );
49 
51  buff_d, inc_d,
52  buff_e, inc_e,
53  buff_norm );
54 
55  break;
56  }
57  }
58 
59  return FLA_SUCCESS;
60 }
FLA_Error FLA_Norm1_tridiag_opd(int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *norm)
Definition: FLA_Norm1_tridiag.c:111
FLA_Error FLA_Norm1_tridiag_ops(int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *norm)
Definition: FLA_Norm1_tridiag.c:64
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition: FLA_Query.c:137
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145

◆ FLA_Norm1_tridiag_opd()

FLA_Error FLA_Norm1_tridiag_opd ( int  m_A,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  norm 
)

References i.

Referenced by FLA_Norm1_tridiag(), and FLA_Tevd_compute_scaling_opd().

115 {
116  double* d = buff_d;
117  double* e = buff_e;
118  double nm;
119  int i;
120 
121  if ( m_A == 1 )
122  {
123  nm = fabs( *d );
124  }
125  else
126  {
127  double d_first = d[ (0 )*inc_d ];
128  double e_first = e[ (0 )*inc_e ];
129  double e_last = e[ (m_A-2)*inc_e ];
130  double d_last = d[ (m_A-1)*inc_d ];
131 
132  // Record the maximum of the absolute row/column sums for the
133  // first and last row/columns.
134  nm = max( fabs( d_first ) + fabs( e_first ),
135  fabs( e_last ) + fabs( d_last ) );
136 
137  for ( i = 1; i < m_A - 2; ++i )
138  {
139  double e0 = e[ (i-1)*inc_e ];
140  double e1 = e[ (i )*inc_e ];
141  double d1 = d[ (i )*inc_d ];
142 
143  // Update nm with the absolute row/column sum for the ith
144  // row/column.
145  nm = max( nm, fabs( e0 ) +
146  fabs( d1 ) +
147  fabs( e1 ) );
148  }
149  }
150 
151  *norm = nm;
152 
153  return FLA_SUCCESS;
154 }
int i
Definition: bl1_axmyv2.c:145

◆ FLA_Norm1_tridiag_ops()

FLA_Error FLA_Norm1_tridiag_ops ( int  m_A,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  norm 
)

References i.

Referenced by FLA_Norm1_tridiag(), and FLA_Tevd_compute_scaling_ops().

68 {
69  float* d = buff_d;
70  float* e = buff_e;
71  float nm;
72  int i;
73 
74  if ( m_A == 1 )
75  {
76  nm = fabs( *d );
77  }
78  else
79  {
80  float d_first = d[ (0 )*inc_d ];
81  float e_first = e[ (0 )*inc_e ];
82  float e_last = e[ (m_A-2)*inc_e ];
83  float d_last = d[ (m_A-1)*inc_d ];
84 
85  // Record the maximum of the absolute row/column sums for the
86  // first and last row/columns.
87  nm = max( fabs( d_first ) + fabs( e_first ),
88  fabs( e_last ) + fabs( d_last ) );
89 
90  for ( i = 1; i < m_A - 2; ++i )
91  {
92  float e0 = e[ (i-1)*inc_e ];
93  float e1 = e[ (i )*inc_e ];
94  float d1 = d[ (i )*inc_d ];
95 
96  // Update nm with the absolute row/column sum for the ith
97  // row/column.
98  nm = max( nm, fabs( e0 ) +
99  fabs( d1 ) +
100  fabs( e1 ) );
101  }
102  }
103 
104  *norm = nm;
105 
106  return FLA_SUCCESS;
107 }
int i
Definition: bl1_axmyv2.c:145

◆ FLA_Tevd_compute_scaling_opd()

FLA_Error FLA_Tevd_compute_scaling_opd ( int  m_A,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  sigma 
)

References bl1_d1(), FLA_Mach_params_opd(), and FLA_Norm1_tridiag_opd().

63 {
64  double one = bl1_d1();
65  double three = 3.0;
66  double norm;
67  double eps2;
68  double safmin;
69  double safmax;
70  double ssfmin;
71  double ssfmax;
72 
73  // Query some constants.
74  eps2 = FLA_Mach_params_opd( FLA_MACH_EPS2 );
75  safmin = FLA_Mach_params_opd( FLA_MACH_SFMIN );
76  safmax = one / safmin;
77 
78  // Compute the acceptable range for the 1-norm;
79  ssfmax = sqrt( safmax ) / three;
80  ssfmin = sqrt( safmin ) / eps2;
81 
82  // Compute the 1-norm of the tridiagonal matrix.
84  buff_d, inc_d,
85  buff_e, inc_e,
86  &norm );
87 
88  // Compute sigma accordingly if norm is outside of the range.
89  if ( norm > ssfmax )
90  {
91  *sigma = ssfmax / norm;
92  }
93  else if ( norm < ssfmin )
94  {
95  *sigma = ssfmin / norm;
96  }
97  else
98  {
99  *sigma = one;
100  }
101 
102  return FLA_SUCCESS;
103 }
double FLA_Mach_params_opd(FLA_Machval machval)
Definition: FLA_Mach_params.c:74
FLA_Error FLA_Norm1_tridiag_opd(int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *norm)
Definition: FLA_Norm1_tridiag.c:111
double bl1_d1(void)
Definition: bl1_constants.c:54

◆ FLA_Tevd_compute_scaling_ops()

FLA_Error FLA_Tevd_compute_scaling_ops ( int  m_A,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  sigma 
)

References bl1_s1(), FLA_Mach_params_ops(), and FLA_Norm1_tridiag_ops().

17 {
18  float one = bl1_s1();
19  float three = 3.0F;
20  float norm;
21  float eps2;
22  float safmin;
23  float safmax;
24  float ssfmin;
25  float ssfmax;
26 
27  // Query some constants.
28  eps2 = FLA_Mach_params_ops( FLA_MACH_EPS2 );
29  safmin = FLA_Mach_params_ops( FLA_MACH_SFMIN );
30  safmax = one / safmin;
31 
32  // Compute the acceptable range for the 1-norm;
33  ssfmax = sqrt( safmax ) / three;
34  ssfmin = sqrt( safmin ) / eps2;
35 
36  // Compute the 1-norm of the tridiagonal matrix.
38  buff_d, inc_d,
39  buff_e, inc_e,
40  &norm );
41 
42  // Compute sigma accordingly if norm is outside of the range.
43  if ( norm > ssfmax )
44  {
45  *sigma = ssfmax / norm;
46  }
47  else if ( norm < ssfmin )
48  {
49  *sigma = ssfmin / norm;
50  }
51  else
52  {
53  *sigma = one;
54  }
55 
56  return FLA_SUCCESS;
57 }
float bl1_s1(void)
Definition: bl1_constants.c:47
FLA_Error FLA_Norm1_tridiag_ops(int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *norm)
Definition: FLA_Norm1_tridiag.c:64
float FLA_Mach_params_ops(FLA_Machval machval)
Definition: FLA_Mach_params.c:47

◆ FLA_Tevd_find_perfshift_opd()

FLA_Error FLA_Tevd_find_perfshift_opd ( int  m_d,
int  m_l,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  buff_l,
int  inc_l,
int *  buff_lstat,
int  inc_lstat,
double *  buff_pu,
int  inc_pu,
int *  ij_shift 
)

References FLA_Wilkshift_tridiag_opd(), and i.

Referenced by FLA_Tevd_eigval_v_opd_var3().

38 {
39  double* d1p;
40  double* e1p;
41  double* d2p;
42  double wilkshift;
43  int i;
44  int ij_cand;
45  double dist_cand;
46  double pshift_cand;
47 
48  d1p = buff_d + (m_d-2)*inc_d;
49  e1p = buff_e + (m_d-2)*inc_e;
50  d2p = buff_d + (m_d-1)*inc_d;
51 
52  if ( *buff_ls == -1 )
53  {
54  *ij_shift = -1;
55  return FLA_FAILURE;
56  }
57 
59  *e1p,
60  *d2p,
61  &wilkshift );
62 
63 /*
64  // If we have shifted here previously, use a Wilkinson shfit.
65  prev_shift = buff_pu[ (m_d-1)*inc_pu ];
66 
67  if ( prev_shift != 0.0 )
68  {
69  // *shift = prev_shift;
70  *shift = wilkshift;
71  return FLA_SUCCESS;
72  }
73 */
74  ij_cand = -1;
75 
76  // Find an available (unused) shift.
77  for ( i = 0; i < m_l; ++i )
78  {
79  int* status = buff_ls + (i )*inc_ls;
80 
81  if ( *status == 0 )
82  {
83  double* lambda1 = buff_l + (i )*inc_l;
84  ij_cand = i;
85  pshift_cand = *lambda1;
86  dist_cand = fabs( wilkshift - pshift_cand );
87  }
88  }
89 
90  if ( ij_cand == -1 )
91  {
92  *ij_shift = -1;
93  *buff_ls = -1;
94  return FLA_FAILURE;
95  }
96 
97  // Now try to find a shift closer to wilkshift than the
98  // first one we found.
99  for ( i = 0; i < m_l; ++i )
100  {
101  double* lambda1 = buff_l + (i )*inc_l;
102  int* status = buff_ls + (i )*inc_ls;
103  double dist = fabs( wilkshift - *lambda1 );
104 
105  if ( *status == 1 ) continue;
106 
107  if ( dist < dist_cand )
108  {
109  ij_cand = i;
110  pshift_cand = *lambda1;
111  dist_cand = dist;
112  }
113  }
114 
115  *ij_shift = ij_cand;
116 
117  return FLA_SUCCESS;
118 }
FLA_Error FLA_Wilkshift_tridiag_opd(double delta1, double epsilon, double delta2, double *kappa)
Definition: FLA_Wilkshift_tridiag.c:155
int i
Definition: bl1_axmyv2.c:145

◆ FLA_Tevd_find_perfshift_ops()

FLA_Error FLA_Tevd_find_perfshift_ops ( int  m_d,
int  m_l,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  buff_l,
int  inc_l,
int *  buff_lstat,
int  inc_lstat,
float *  buff_pu,
int  inc_pu,
int *  ij_shift 
)
22 {
23  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
24 
25  return FLA_SUCCESS;
26 }

◆ FLA_Tevd_find_submatrix_opd()

FLA_Error FLA_Tevd_find_submatrix_opd ( int  m_A,
int  ij_begin,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
int *  ijTL,
int *  ijBR 
)

References bl1_d0(), and FLA_Mach_params_opd().

34 {
35  double rzero = bl1_d0();
36  double eps;
37  int ij_tl;
38  int ij_br;
39 
40  // Initialize some numerical constants.
41  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
42 
43  // Search for the first non-zero subdiagonal element starting at
44  // the index specified by ij_begin.
45  for ( ij_tl = ij_begin; ij_tl < m_A - 1; ++ij_tl )
46  {
47  double* d1 = buff_d + (ij_tl )*inc_d;
48  double* d2 = buff_d + (ij_tl+1)*inc_d;
49  double* e1 = buff_e + (ij_tl )*inc_e;
50  double abs_e1 = fabs( *e1 );
51 
52  // If we encounter a non-zero subdiagonal element that is close
53  // enough to zero, set it to zero.
54  if ( abs_e1 != rzero )
55  {
56  if ( abs_e1 <= eps * sqrt( fabs( *d1 ) ) *
57  sqrt( fabs( *d2 ) ) )
58  {
59 #ifdef PRINTF
60 printf( "FLA_Tevd_find_submatrix_opd: nudging non-zero subdiagonal element (e1) to zero.\n" );
61 printf( " d[%3d] = %22.19e\n", ij_tl, *d1 );
62 printf( " e[%3d] d[%3d] = %22.19e %22.19e\n", ij_tl, ij_tl+1, *e1, *d2 );
63 #endif
64  *e1 = rzero;
65  }
66  }
67 
68  // If we find a non-zero element, record it and break out of this
69  // loop.
70  if ( *e1 != rzero )
71  {
72 #ifdef PRINTF
73 printf( "FLA_Tevd_find_submatrix_opd: found non-zero subdiagonal element\n" );
74 printf( " e[%3d] = %22.19e\n", ij_tl, *e1 );
75 #endif
76  *ijTL = ij_tl;
77  break;
78  }
79  }
80 
81  // If ij_tl was incremented all the way up to m_A - 1, then we didn't
82  // find any non-zeros.
83  if ( ij_tl == m_A - 1 )
84  {
85 #ifdef PRINTF
86 printf( "FLA_Tevd_find_submatrix_opd: no submatrices found.\n" );
87 #endif
88  return FLA_FAILURE;
89  }
90 
91  // If we've gotten this far, then a non-zero subdiagonal element was
92  // found. Now we must walk the remaining portion of the subdiagonal
93  // to find the first zero element, or if one is not found, we simply
94  // use the last element of the subdiagonal.
95  for ( ij_br = ij_tl; ij_br < m_A - 1; ++ij_br )
96  {
97  double* d1 = buff_d + (ij_br )*inc_d;
98  double* d2 = buff_d + (ij_br+1)*inc_d;
99  double* e1 = buff_e + (ij_br )*inc_e;
100  double abs_e1 = fabs( *e1 );
101 
102  // If we encounter a non-zero subdiagonal element that is close
103  // enough to zero, set it to zero.
104  if ( abs_e1 != rzero )
105  {
106  if ( abs_e1 <= eps * sqrt( fabs( *d1 ) ) *
107  sqrt( fabs( *d2 ) ) )
108  {
109 #ifdef PRINTF
110 printf( "FLA_Tevd_find_submatrix_opd: nudging non-zero subdiagonal element (e1) to zero.\n" );
111 printf( " d[%3d] = %22.19e\n", ij_br, *d1 );
112 printf( " e[%3d] d[%3d] = %22.19e %22.19e\n", ij_br, ij_br+1, *e1, *d2 );
113 #endif
114  *e1 = rzero;
115  }
116  }
117 
118  // If we find a zero element, record it and break out of this
119  // loop.
120  if ( *e1 == rzero )
121  {
122 #ifdef PRINTF
123 printf( "FLA_Tevd_find_submatrix_opd: found zero subdiagonal element\n" );
124 printf( " e[%3d] = %22.19e\n", ij_br, *e1 );
125 #endif
126  break;
127  }
128  }
129 
130  // If a zero element was found, then ij_br should hold the index of
131  // that element. If a zero element was not found, then ij_br should
132  // hold m_A - 1. Either way, we save the value and return success.
133  *ijBR = ij_br;
134 
135  return FLA_SUCCESS;
136 }
double FLA_Mach_params_opd(FLA_Machval machval)
Definition: FLA_Mach_params.c:74
double bl1_d0(void)
Definition: bl1_constants.c:118

◆ FLA_Tevd_find_submatrix_ops()

FLA_Error FLA_Tevd_find_submatrix_ops ( int  m_A,
int  ij_begin,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
int *  ijTL,
int *  ijBR 
)
20 {
21  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
22 
23  return FLA_SUCCESS;
24 }

◆ FLA_Tevd_v_opc_var1()

FLA_Error FLA_Tevd_v_opc_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

Referenced by FLA_Tevd_v_opt_var1().

374 {
375  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
376 
377  return FLA_SUCCESS;
378 }

◆ FLA_Tevd_v_opc_var2()

FLA_Error FLA_Tevd_v_opc_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_R,
int  rs_R,
int  cs_R,
scomplex buff_W,
int  rs_W,
int  cs_W,
scomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

Referenced by FLA_Tevd_v_opt_var2().

416 {
417  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
418 
419  return FLA_SUCCESS;
420 }

◆ FLA_Tevd_v_opd_var1()

FLA_Error FLA_Tevd_v_opd_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

References bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var1().

153 {
154  dcomplex one = bl1_z1();
155 
156  dcomplex* G;
157  double* d1;
158  double* e1;
159  int r_val;
160  int done;
161  int m_G_sweep_max;
162  int ij_begin;
163  int ijTL, ijBR;
164  int m_A11;
165  int n_iter_perf;
166  int n_U_apply;
167  int total_deflations;
168  int n_deflations;
169  int n_iter_prev;
170  int n_iter_perf_sweep_max;
171 
172  // Initialize our completion flag.
173  done = FALSE;
174 
175  // Initialize a counter that holds the maximum number of rows of G
176  // that we would need to initialize for the next sweep.
177  m_G_sweep_max = m_A - 1;
178 
179  // Initialize a counter for the total number of iterations performed.
180  n_iter_prev = 0;
181 
182  // Iterate until the matrix has completely deflated.
183  for ( total_deflations = 0; done != TRUE; )
184  {
185  // Initialize G to contain only identity rotations.
186  bl1_zsetm( m_G_sweep_max,
187  n_G,
188  &one,
189  buff_G, rs_G, cs_G );
190 
191  // Keep track of the maximum number of iterations performed in the
192  // current sweep. This is used when applying the sweep's Givens
193  // rotations.
194  n_iter_perf_sweep_max = 0;
195 
196  // Perform a sweep: Move through the matrix and perform a tridiagonal
197  // EVD on each non-zero submatrix that is encountered. During the
198  // first time through, ijTL will be 0 and ijBR will be m_A - 1.
199  for ( ij_begin = 0; ij_begin < m_A; )
200  {
201 
202 #ifdef PRINTF
203 if ( ij_begin == 0 )
204 printf( "FLA_Tevd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
205 #endif
206 
207  // Search for the first submatrix along the diagonal that is
208  // bounded by zeroes (or endpoints of the matrix). If no
209  // submatrix is found (ie: if the entire subdiagonal is zero
210  // then FLA_FAILURE is returned. This function also inspects
211  // subdiagonal elements for proximity to zero. If a given
212  // element is close enough to zero, then it is deemed
213  // converged and manually set to zero.
214  r_val = FLA_Tevd_find_submatrix_opd( m_A,
215  ij_begin,
216  buff_d, inc_d,
217  buff_e, inc_e,
218  &ijTL,
219  &ijBR );
220 
221  // Verify that a submatrix was found. If one was not found,
222  // then we are done with the current sweep. Furthermore, if
223  // a submatrix was not found AND we began our search at the
224  // beginning of the matrix (ie: ij_begin == 0), then the
225  // matrix has completely deflated and so we are done with
226  // Francis step iteration.
227  if ( r_val == FLA_FAILURE )
228  {
229  if ( ij_begin == 0 )
230  {
231 #ifdef PRINTF
232 printf( "FLA_Tevd_v_opd_var1: subdiagonal is completely zero.\n" );
233 printf( "FLA_Tevd_v_opd_var1: Francis iteration is done!\n" );
234 #endif
235  done = TRUE;
236  }
237 
238  // Break out of the current sweep so we can apply the last
239  // remaining Givens rotations.
240  break;
241  }
242 
243  // If we got this far, then:
244  // (a) ijTL refers to the index of the first non-zero
245  // subdiagonal along the diagonal, and
246  // (b) ijBR refers to either:
247  // - the first zero element that occurs after ijTL, or
248  // - the the last diagonal element.
249  // Note that ijTL and ijBR also correspond to the first and
250  // last diagonal elements of the submatrix of interest. Thus,
251  // we may compute the dimension of this submatrix as:
252  m_A11 = ijBR - ijTL + 1;
253 
254 #ifdef PRINTF
255 printf( "FLA_Tevd_v_opd_var1: ij_begin = %d\n", ij_begin );
256 printf( "FLA_Tevd_v_opd_var1: ijTL = %d\n", ijTL );
257 printf( "FLA_Tevd_v_opd_var1: ijBR = %d\n", ijBR );
258 printf( "FLA_Tevd_v_opd_var1: m_A11 = %d\n", m_A11 );
259 #endif
260 
261  // Adjust ij_begin, which gets us ready for the next submatrix
262  // search in the current sweep.
263  ij_begin = ijBR + 1;
264 
265  // Index to the submatrices upon which we will operate.
266  d1 = buff_d + ijTL * inc_d;
267  e1 = buff_e + ijTL * inc_e;
268  G = buff_G + ijTL * rs_G;
269 
270  // Search for a batch of eigenvalues, recursing on deflated
271  // subproblems whenever a split occurs. Iteration continues
272  // as long as:
273  // (a) there is still matrix left to operate on, and
274  // (b) the number of iterations performed in this batch is
275  // less than n_G.
276  // If/when either of the two above conditions fails to hold,
277  // the function returns.
278  n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
279  n_G,
280  ijTL,
281  d1, inc_d,
282  e1, inc_e,
283  G, rs_G, cs_G,
284  &n_iter_perf );
285 
286  // Record the number of deflations that were observed.
287  total_deflations += n_deflations;
288 
289  // Update the maximum number of iterations performed in the
290  // current sweep.
291  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
292 
293 #ifdef PRINTF
294 printf( "FLA_Tevd_v_opd_var1: deflations observed = %d\n", n_deflations );
295 printf( "FLA_Tevd_v_opd_var1: total deflations observed = %d\n", total_deflations );
296 printf( "FLA_Tevd_v_opd_var1: num iterations performed = %d\n", n_iter_perf );
297 #endif
298 
299  // Store the most recent value of ijBR in m_G_sweep_max.
300  // When the sweep is done, this value will contain the minimum
301  // number of rows of G we can apply and safely include all
302  // non-identity rotations that were computed during the
303  // eigenvalue searches.
304  m_G_sweep_max = ijBR;
305 
306  // Make sure we haven't exceeded our maximum iteration count.
307  if ( n_iter_prev >= m_A * n_iter_max )
308  {
309 #ifdef PRINTF
310 printf( "FLA_Tevd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
311 #endif
312  FLA_Abort();
313  //return FLA_FAILURE;
314  }
315  }
316 
317  // The sweep is complete. Now we must apply the Givens rotations
318  // that were accumulated during the sweep.
319 
320  // Recall that the number of columns of U to which we apply
321  // rotations is one more than the number of rotations.
322  n_U_apply = m_G_sweep_max + 1;
323 
324 #ifdef PRINTF
325 printf( "FLA_Tevd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
326 #endif
327 
328  // Apply the Givens rotations. Note that we optimize the scope
329  // of the operation in two ways:
330  // 1. We only apply k sets of Givens rotations, where
331  // k = n_iter_perf_sweep_max. We could simply always apply
332  // n_G sets of rotations since G is initialized to contain
333  // identity rotations in every element, but we do this to
334  // save a little bit of time.
335  // 2. We only apply to the first n_U_apply columns of A since
336  // this is the most we need to touch given the ijBR index
337  // bound of the last submatrix found in the previous sweep.
338  // Similar to above, we could simply always perform the
339  // application on all m_A columns of A, but instead we apply
340  // only to the first n_U_apply columns to save time.
341  //FLA_Apply_G_rf_bld_var1( n_iter_perf_sweep_max,
342  //FLA_Apply_G_rf_bld_var2( n_iter_perf_sweep_max,
343  FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
344  //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
345  //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
346  m_U,
347  n_U_apply,
348  buff_G, rs_G, cs_G,
349  buff_U, rs_U, cs_U,
350  b_alg );
351 
352 
353 
354  // Increment the total number of iterations previously performed.
355  n_iter_prev += n_iter_perf_sweep_max;
356 
357 #ifdef PRINTF
358 printf( "FLA_Tevd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev );
359 #endif
360  }
361 
362  return n_iter_prev;
363 }
FLA_Error FLA_Tevd_iteracc_v_opd_var1(int m_A, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
Definition: FLA_Tevd_iteracc_v_opt_var1.c:26
FLA_Error FLA_Tevd_find_submatrix_opd(int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Tevd_find_submatrix.c:28
FLA_Error FLA_Apply_G_rf_bld_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:128
void FLA_Abort(void)
Definition: FLA_Error.c:248
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ FLA_Tevd_v_opd_var2()

FLA_Error FLA_Tevd_v_opd_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_R,
int  rs_R,
int  cs_R,
double *  buff_W,
int  rs_W,
int  cs_W,
double *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var2().

181 {
182  dcomplex one = bl1_z1();
183  double rone = bl1_d1();
184  double rzero = bl1_d0();
185 
186  dcomplex* G;
187  double* d1;
188  double* e1;
189  int r_val;
190  int done;
191  int m_G_sweep_max;
192  int ij_begin;
193  int ijTL, ijBR;
194  int m_A11;
195  int n_iter_perf;
196  int n_U_apply;
197  int total_deflations;
198  int n_deflations;
199  int n_iter_prev;
200  int n_iter_perf_sweep_max;
201 
202  // Initialize our completion flag.
203  done = FALSE;
204 
205  // Initialize a counter that holds the maximum number of rows of G
206  // that we would need to initialize for the next sweep.
207  m_G_sweep_max = m_A - 1;
208 
209  // Initialize a counter for the total number of iterations performed.
210  n_iter_prev = 0;
211 
212  // Initialize R to identity.
213  bl1_dident( m_A,
214  buff_R, rs_R, cs_R );
215 
216  // Iterate until the matrix has completely deflated.
217  for ( total_deflations = 0; done != TRUE; )
218  {
219 
220  // Initialize G to contain only identity rotations.
221  bl1_zsetm( m_G_sweep_max,
222  n_G,
223  &one,
224  buff_G, rs_G, cs_G );
225 
226  // Keep track of the maximum number of iterations performed in the
227  // current sweep. This is used when applying the sweep's Givens
228  // rotations.
229  n_iter_perf_sweep_max = 0;
230 
231  // Perform a sweep: Move through the matrix and perform a tridiagonal
232  // EVD on each non-zero submatrix that is encountered. During the
233  // first time through, ijTL will be 0 and ijBR will be m_A - 1.
234  for ( ij_begin = 0; ij_begin < m_A; )
235  {
236 
237 #ifdef PRINTF
238 if ( ij_begin == 0 )
239 printf( "FLA_Tevd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
240 #endif
241 
242  // Search for the first submatrix along the diagonal that is
243  // bounded by zeroes (or endpoints of the matrix). If no
244  // submatrix is found (ie: if the entire subdiagonal is zero
245  // then FLA_FAILURE is returned. This function also inspects
246  // subdiagonal elements for proximity to zero. If a given
247  // element is close enough to zero, then it is deemed
248  // converged and manually set to zero.
249  r_val = FLA_Tevd_find_submatrix_opd( m_A,
250  ij_begin,
251  buff_d, inc_d,
252  buff_e, inc_e,
253  &ijTL,
254  &ijBR );
255 
256  // Verify that a submatrix was found. If one was not found,
257  // then we are done with the current sweep. Furthermore, if
258  // a submatrix was not found AND we began our search at the
259  // beginning of the matrix (ie: ij_begin == 0), then the
260  // matrix has completely deflated and so we are done with
261  // Francis step iteration.
262  if ( r_val == FLA_FAILURE )
263  {
264  if ( ij_begin == 0 )
265  {
266 #ifdef PRINTF
267 printf( "FLA_Tevd_v_opd_var2: subdiagonal is completely zero.\n" );
268 printf( "FLA_Tevd_v_opd_var2: Francis iteration is done!\n" );
269 #endif
270  done = TRUE;
271  }
272 
273  // Break out of the current sweep so we can apply the last
274  // remaining Givens rotations.
275  break;
276  }
277 
278  // If we got this far, then:
279  // (a) ijTL refers to the index of the first non-zero
280  // subdiagonal along the diagonal, and
281  // (b) ijBR refers to either:
282  // - the first zero element that occurs after ijTL, or
283  // - the the last diagonal element.
284  // Note that ijTL and ijBR also correspond to the first and
285  // last diagonal elements of the submatrix of interest. Thus,
286  // we may compute the dimension of this submatrix as:
287  m_A11 = ijBR - ijTL + 1;
288 
289 #ifdef PRINTF
290 printf( "FLA_Tevd_v_opd_var2: ij_begin = %d\n", ij_begin );
291 printf( "FLA_Tevd_v_opd_var2: ijTL = %d\n", ijTL );
292 printf( "FLA_Tevd_v_opd_var2: ijBR = %d\n", ijBR );
293 printf( "FLA_Tevd_v_opd_var2: m_A11 = %d\n", m_A11 );
294 #endif
295 
296  // Adjust ij_begin, which gets us ready for the next subproblem, if
297  // there is one.
298  ij_begin = ijBR + 1;
299 
300  // Index to the submatrices upon which we will operate.
301  d1 = buff_d + ijTL * inc_d;
302  e1 = buff_e + ijTL * inc_e;
303  G = buff_G + ijTL * rs_G;
304 
305  // Search for a batch of eigenvalues, recursing on deflated
306  // subproblems whenever a split occurs. Iteration continues
307  // as long as
308  // (a) there is still matrix left to operate on, and
309  // (b) the number of iterations performed in this batch is
310  // less than n_G.
311  // If/when either of the two above conditions fails to hold,
312  // the function returns.
313  n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
314  n_G,
315  ijTL,
316  d1, inc_d,
317  e1, inc_e,
318  G, rs_G, cs_G,
319  &n_iter_perf );
320 
321  // Record the number of deflations that we observed.
322  total_deflations += n_deflations;
323 
324  // Update the maximum number of iterations performed in the
325  // current sweep.
326  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
327 
328 #ifdef PRINTF
329 printf( "FLA_Tevd_v_opd_var2: deflations observed = %d\n", n_deflations );
330 printf( "FLA_Tevd_v_opd_var2: total deflations observed = %d\n", total_deflations );
331 printf( "FLA_Tevd_v_opd_var2: num iterations = %d\n", n_iter_perf );
332 #endif
333 
334  // Store the most recent value of ijBR in m_G_sweep_max.
335  // When the sweep is done, this value will contain the minimum
336  // number of rows of G we can apply and safely include all
337  // non-identity rotations that were computed during the
338  // eigenvalue searches.
339  m_G_sweep_max = ijBR;
340 
341  // Make sure we haven't exceeded our maximum iteration count.
342  if ( n_iter_prev >= m_A * n_iter_max )
343  {
344 #ifdef PRINTF
345 printf( "FLA_Tevd_v_opd_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
346 #endif
347  FLA_Abort();
348  //return FLA_FAILURE;
349  }
350  }
351 
352  // The sweep is complete. Now we must apply the Givens rotations
353  // that were accumulated during the sweep.
354 
355 
356  // Recall that the number of columns of U to which we apply
357  // rotations is one more than the number of rotations.
358  n_U_apply = m_G_sweep_max + 1;
359 
360  // Apply the Givens rotations that were computed as part of
361  // the previous batch of iterations.
362  //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
363  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
364  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
365  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
366  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
367  m_U,
368  n_U_apply,
369  n_iter_prev,
370  buff_G, rs_G, cs_G,
371  buff_R, rs_R, cs_R,
372  b_alg );
373 
374 #ifdef PRINTF
375 printf( "FLA_Tevd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
376 #endif
377 
378  // Increment the total number of iterations previously performed.
379  n_iter_prev += n_iter_perf_sweep_max;
380  }
381 
382  // Copy the contents of Q to temporary storage.
384  m_A,
385  m_A,
386  buff_U, rs_U, cs_U,
387  buff_W, rs_W, cs_W );
388 
389 
390  // Multiply Q by R, overwriting U.
393  m_A,
394  m_A,
395  m_A,
396  &rone,
397  ( double* )buff_W, rs_W, cs_W,
398  buff_R, rs_R, cs_R,
399  &rzero,
400  ( double* )buff_U, rs_U, cs_U );
401 
402  return n_iter_prev;
403 }
FLA_Error FLA_Tevd_iteracc_v_opd_var1(int m_A, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
Definition: FLA_Tevd_iteracc_v_opt_var1.c:26
void bl1_dident(int m, double *a, int a_rs, int a_cs)
Definition: bl1_ident.c:32
FLA_Error FLA_Tevd_find_submatrix_opd(int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Tevd_find_submatrix.c:28
void bl1_dgemm(trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition: bl1_gemm.c:274
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:148
void FLA_Abort(void)
Definition: FLA_Error.c:248
Definition: blis_type_defs.h:54
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
double bl1_d1(void)
Definition: bl1_constants.c:54
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3b.c:135
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ FLA_Tevd_v_ops_var1()

FLA_Error FLA_Tevd_v_ops_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

Referenced by FLA_Tevd_v_opt_var1().

136 {
137  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
138 
139  return FLA_SUCCESS;
140 }

◆ FLA_Tevd_v_ops_var2()

FLA_Error FLA_Tevd_v_ops_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_R,
int  rs_R,
int  cs_R,
float *  buff_W,
int  rs_W,
int  cs_W,
float *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

Referenced by FLA_Tevd_v_opt_var2().

162 {
163  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
164 
165  return FLA_SUCCESS;
166 }

◆ FLA_Tevd_v_opt_var1()

FLA_Error FLA_Tevd_v_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  U,
dim_t  b_alg 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var1(), FLA_Tevd_v_opd_var1(), FLA_Tevd_v_ops_var1(), and FLA_Tevd_v_opz_var1().

Referenced by FLA_Hevd_lv_unb_var1().

14 {
15  FLA_Error r_val = FLA_SUCCESS;
16  FLA_Datatype datatype;
17  int m_A, m_U, n_G;
18  int inc_d;
19  int inc_e;
20  int rs_G, cs_G;
21  int rs_U, cs_U;
22 
23  datatype = FLA_Obj_datatype( U );
24 
25  m_A = FLA_Obj_vector_dim( d );
26  m_U = FLA_Obj_length( U );
27  n_G = FLA_Obj_width( G );
28 
29  inc_d = FLA_Obj_vector_inc( d );
30  inc_e = FLA_Obj_vector_inc( e );
31 
32  rs_G = FLA_Obj_row_stride( G );
33  cs_G = FLA_Obj_col_stride( G );
34 
35  rs_U = FLA_Obj_row_stride( U );
36  cs_U = FLA_Obj_col_stride( U );
37 
38 
39  switch ( datatype )
40  {
41  case FLA_FLOAT:
42  {
43  float* buff_d = FLA_FLOAT_PTR( d );
44  float* buff_e = FLA_FLOAT_PTR( e );
45  scomplex* buff_G = FLA_COMPLEX_PTR( G );
46  float* buff_U = FLA_FLOAT_PTR( U );
47 
48  r_val = FLA_Tevd_v_ops_var1( m_A,
49  m_U,
50  n_G,
51  n_iter_max,
52  buff_d, inc_d,
53  buff_e, inc_e,
54  buff_G, rs_G, cs_G,
55  buff_U, rs_U, cs_U,
56  b_alg );
57 
58  break;
59  }
60 
61  case FLA_DOUBLE:
62  {
63  double* buff_d = FLA_DOUBLE_PTR( d );
64  double* buff_e = FLA_DOUBLE_PTR( e );
65  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
66  double* buff_U = FLA_DOUBLE_PTR( U );
67 
68  r_val = FLA_Tevd_v_opd_var1( m_A,
69  m_U,
70  n_G,
71  n_iter_max,
72  buff_d, inc_d,
73  buff_e, inc_e,
74  buff_G, rs_G, cs_G,
75  buff_U, rs_U, cs_U,
76  b_alg );
77 
78  break;
79  }
80 
81  case FLA_COMPLEX:
82  {
83  float* buff_d = FLA_FLOAT_PTR( d );
84  float* buff_e = FLA_FLOAT_PTR( e );
85  scomplex* buff_G = FLA_COMPLEX_PTR( G );
86  scomplex* buff_U = FLA_COMPLEX_PTR( U );
87 
88  r_val = FLA_Tevd_v_opc_var1( m_A,
89  m_U,
90  n_G,
91  n_iter_max,
92  buff_d, inc_d,
93  buff_e, inc_e,
94  buff_G, rs_G, cs_G,
95  buff_U, rs_U, cs_U,
96  b_alg );
97 
98  break;
99  }
100 
101  case FLA_DOUBLE_COMPLEX:
102  {
103  double* buff_d = FLA_DOUBLE_PTR( d );
104  double* buff_e = FLA_DOUBLE_PTR( e );
105  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
106  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
107 
108  r_val = FLA_Tevd_v_opz_var1( m_A,
109  m_U,
110  n_G,
111  n_iter_max,
112  buff_d, inc_d,
113  buff_e, inc_e,
114  buff_G, rs_G, cs_G,
115  buff_U, rs_U, cs_U,
116  b_alg );
117 
118  break;
119  }
120  }
121 
122  return r_val;
123 }
FLA_Error FLA_Tevd_v_opz_var1(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var1.c:380
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Tevd_v_ops_var1(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var1.c:127
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
FLA_Error FLA_Tevd_v_opc_var1(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var1.c:365
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition: FLA_Query.c:137
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Tevd_v_opd_var1(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var1.c:144
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Tevd_v_opt_var2()

FLA_Error FLA_Tevd_v_opt_var2 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  R,
FLA_Obj  W,
FLA_Obj  U,
dim_t  b_alg 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var2(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_ops_var2(), and FLA_Tevd_v_opz_var2().

Referenced by FLA_Hevd_lv_unb_var2().

14 {
15  FLA_Error r_val = FLA_SUCCESS;
16  FLA_Datatype datatype;
17  int m_A, m_U, n_G;
18  int inc_d;
19  int inc_e;
20  int rs_G, cs_G;
21  int rs_R, cs_R;
22  int rs_U, cs_U;
23  int rs_W, cs_W;
24 
25  datatype = FLA_Obj_datatype( U );
26 
27  m_A = FLA_Obj_vector_dim( d );
28  m_U = FLA_Obj_length( U );
29  n_G = FLA_Obj_width( G );
30 
31  inc_d = FLA_Obj_vector_inc( d );
32  inc_e = FLA_Obj_vector_inc( e );
33 
34  rs_G = FLA_Obj_row_stride( G );
35  cs_G = FLA_Obj_col_stride( G );
36 
37  rs_R = FLA_Obj_row_stride( R );
38  cs_R = FLA_Obj_col_stride( R );
39 
40  rs_W = FLA_Obj_row_stride( W );
41  cs_W = FLA_Obj_col_stride( W );
42 
43  rs_U = FLA_Obj_row_stride( U );
44  cs_U = FLA_Obj_col_stride( U );
45 
46 
47  switch ( datatype )
48  {
49  case FLA_FLOAT:
50  {
51  float* buff_d = FLA_FLOAT_PTR( d );
52  float* buff_e = FLA_FLOAT_PTR( e );
53  scomplex* buff_G = FLA_COMPLEX_PTR( G );
54  float* buff_R = FLA_FLOAT_PTR( R );
55  float* buff_W = FLA_FLOAT_PTR( W );
56  float* buff_U = FLA_FLOAT_PTR( U );
57 
58  r_val = FLA_Tevd_v_ops_var2( m_A,
59  m_U,
60  n_G,
61  n_iter_max,
62  buff_d, inc_d,
63  buff_e, inc_e,
64  buff_G, rs_G, cs_G,
65  buff_R, rs_R, cs_R,
66  buff_W, rs_W, cs_W,
67  buff_U, rs_U, cs_U,
68  b_alg );
69 
70  break;
71  }
72 
73  case FLA_DOUBLE:
74  {
75  double* buff_d = FLA_DOUBLE_PTR( d );
76  double* buff_e = FLA_DOUBLE_PTR( e );
77  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
78  double* buff_R = FLA_DOUBLE_PTR( R );
79  double* buff_W = FLA_DOUBLE_PTR( W );
80  double* buff_U = FLA_DOUBLE_PTR( U );
81 
82  r_val = FLA_Tevd_v_opd_var2( m_A,
83  m_U,
84  n_G,
85  n_iter_max,
86  buff_d, inc_d,
87  buff_e, inc_e,
88  buff_G, rs_G, cs_G,
89  buff_R, rs_R, cs_R,
90  buff_W, rs_W, cs_W,
91  buff_U, rs_U, cs_U,
92  b_alg );
93 
94  break;
95  }
96 
97  case FLA_COMPLEX:
98  {
99  float* buff_d = FLA_FLOAT_PTR( d );
100  float* buff_e = FLA_FLOAT_PTR( e );
101  scomplex* buff_G = FLA_COMPLEX_PTR( G );
102  float* buff_R = FLA_FLOAT_PTR( R );
103  scomplex* buff_W = FLA_COMPLEX_PTR( W );
104  scomplex* buff_U = FLA_COMPLEX_PTR( U );
105 
106  r_val = FLA_Tevd_v_opc_var2( m_A,
107  m_U,
108  n_G,
109  n_iter_max,
110  buff_d, inc_d,
111  buff_e, inc_e,
112  buff_G, rs_G, cs_G,
113  buff_R, rs_R, cs_R,
114  buff_W, rs_W, cs_W,
115  buff_U, rs_U, cs_U,
116  b_alg );
117 
118  break;
119  }
120 
121  case FLA_DOUBLE_COMPLEX:
122  {
123  double* buff_d = FLA_DOUBLE_PTR( d );
124  double* buff_e = FLA_DOUBLE_PTR( e );
125  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
126  double* buff_R = FLA_DOUBLE_PTR( R );
127  dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
128  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
129 
130  r_val = FLA_Tevd_v_opz_var2( m_A,
131  m_U,
132  n_G,
133  n_iter_max,
134  buff_d, inc_d,
135  buff_e, inc_e,
136  buff_G, rs_G, cs_G,
137  buff_R, rs_R, cs_R,
138  buff_W, rs_W, cs_W,
139  buff_U, rs_U, cs_U,
140  b_alg );
141 
142  break;
143  }
144  }
145 
146  return r_val;
147 }
FLA_Error FLA_Tevd_v_ops_var2(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var2.c:151
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Tevd_v_opd_var2(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var2.c:170
FLA_Error FLA_Tevd_v_opc_var2(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var2.c:405
Definition: blis_type_defs.h:132
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition: FLA_Query.c:137
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
FLA_Error FLA_Tevd_v_opz_var2(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var2.c:424
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Tevd_v_opz_var1()

FLA_Error FLA_Tevd_v_opz_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

References bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var1().

389 {
390  dcomplex one = bl1_z1();
391 
392  dcomplex* G;
393  double* d1;
394  double* e1;
395  int r_val;
396  int done;
397  int m_G_sweep_max;
398  int ij_begin;
399  int ijTL, ijBR;
400  int m_A11;
401  int n_iter_perf;
402  int n_U_apply;
403  int total_deflations;
404  int n_deflations;
405  int n_iter_prev;
406  int n_iter_perf_sweep_max;
407 
408  // Initialize our completion flag.
409  done = FALSE;
410 
411  // Initialize a counter that holds the maximum number of rows of G
412  // that we would need to initialize for the next sweep.
413  m_G_sweep_max = m_A - 1;
414 
415  // Initialize a counter for the total number of iterations performed.
416  n_iter_prev = 0;
417 
418  // Iterate until the matrix has completely deflated.
419  for ( total_deflations = 0; done != TRUE; )
420  {
421 
422  // Initialize G to contain only identity rotations.
423  bl1_zsetm( m_G_sweep_max,
424  n_G,
425  &one,
426  buff_G, rs_G, cs_G );
427 
428  // Keep track of the maximum number of iterations performed in the
429  // current sweep. This is used when applying the sweep's Givens
430  // rotations.
431  n_iter_perf_sweep_max = 0;
432 
433  // Perform a sweep: Move through the matrix and perform a tridiagonal
434  // EVD on each non-zero submatrix that is encountered. During the
435  // first time through, ijTL will be 0 and ijBR will be m_A - 1.
436  for ( ij_begin = 0; ij_begin < m_A; )
437  {
438 
439 #ifdef PRINTF
440 if ( ij_begin == 0 )
441 printf( "FLA_Tevd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
442 #endif
443 
444  // Search for the first submatrix along the diagonal that is
445  // bounded by zeroes (or endpoints of the matrix). If no
446  // submatrix is found (ie: if the entire subdiagonal is zero
447  // then FLA_FAILURE is returned. This function also inspects
448  // subdiagonal elements for proximity to zero. If a given
449  // element is close enough to zero, then it is deemed
450  // converged and manually set to zero.
451  r_val = FLA_Tevd_find_submatrix_opd( m_A,
452  ij_begin,
453  buff_d, inc_d,
454  buff_e, inc_e,
455  &ijTL,
456  &ijBR );
457 
458  // Verify that a submatrix was found. If one was not found,
459  // then we are done with the current sweep. Furthermore, if
460  // a submatrix was not found AND we began our search at the
461  // beginning of the matrix (ie: ij_begin == 0), then the
462  // matrix has completely deflated and so we are done with
463  // Francis step iteration.
464  if ( r_val == FLA_FAILURE )
465  {
466  if ( ij_begin == 0 )
467  {
468 #ifdef PRINTF
469 printf( "FLA_Tevd_v_opz_var1: subdiagonal is completely zero.\n" );
470 printf( "FLA_Tevd_v_opz_var1: Francis iteration is done!\n" );
471 #endif
472  done = TRUE;
473  }
474 
475  // Break out of the current sweep so we can apply the last
476  // remaining Givens rotations.
477  break;
478  }
479 
480  // If we got this far, then:
481  // (a) ijTL refers to the index of the first non-zero
482  // subdiagonal along the diagonal, and
483  // (b) ijBR refers to either:
484  // - the first zero element that occurs after ijTL, or
485  // - the the last diagonal element.
486  // Note that ijTL and ijBR also correspond to the first and
487  // last diagonal elements of the submatrix of interest. Thus,
488  // we may compute the dimension of this submatrix as:
489  m_A11 = ijBR - ijTL + 1;
490 
491 #ifdef PRINTF
492 printf( "FLA_Tevd_v_opz_var1: ij_begin = %d\n", ij_begin );
493 printf( "FLA_Tevd_v_opz_var1: ijTL = %d\n", ijTL );
494 printf( "FLA_Tevd_v_opz_var1: ijBR = %d\n", ijBR );
495 printf( "FLA_Tevd_v_opz_var1: m_A11 = %d\n", m_A11 );
496 #endif
497 
498  // Adjust ij_begin, which gets us ready for the next submatrix
499  // search in the current sweep.
500  ij_begin = ijBR + 1;
501 
502  // Index to the submatrices upon which we will operate.
503  d1 = buff_d + ijTL * inc_d;
504  e1 = buff_e + ijTL * inc_e;
505  G = buff_G + ijTL * rs_G;
506 
507  // Search for a batch of eigenvalues, recursing on deflated
508  // subproblems whenever a split occurs. Iteration continues
509  // as long as:
510  // (a) there is still matrix left to operate on, and
511  // (b) the number of iterations performed in this batch is
512  // less than n_G.
513  // If/when either of the two above conditions fails to hold,
514  // the function returns.
515  n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
516  n_G,
517  ijTL,
518  d1, inc_d,
519  e1, inc_e,
520  G, rs_G, cs_G,
521  &n_iter_perf );
522 
523  // Record the number of deflations that were observed.
524  total_deflations += n_deflations;
525 
526  // Update the maximum number of iterations performed in the
527  // current sweep.
528  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
529 
530 #ifdef PRINTF
531 printf( "FLA_Tevd_v_opz_var1: deflations observed = %d\n", n_deflations );
532 printf( "FLA_Tevd_v_opz_var1: total deflations observed = %d\n", total_deflations );
533 printf( "FLA_Tevd_v_opz_var1: num iterations performed = %d\n", n_iter_perf );
534 #endif
535 
536  // Store the most recent value of ijBR in m_G_sweep_max.
537  // When the sweep is done, this value will contain the minimum
538  // number of rows of G we can apply and safely include all
539  // non-identity rotations that were computed during the
540  // eigenvalue searches.
541  m_G_sweep_max = ijBR;
542 
543  // Make sure we haven't exceeded our maximum iteration count.
544  if ( n_iter_prev >= m_A * n_iter_max )
545  {
546 #ifdef PRINTF
547 printf( "FLA_Tevd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
548 #endif
549  FLA_Abort();
550  //return FLA_FAILURE;
551  }
552  }
553 
554  // The sweep is complete. Now we must apply the Givens rotations
555  // that were accumulated during the sweep.
556 
557  // Recall that the number of columns of U to which we apply
558  // rotations is one more than the number of rotations.
559  n_U_apply = m_G_sweep_max + 1;
560 
561 #ifdef PRINTF
562 printf( "FLA_Tevd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
563 #endif
564 
565  // Apply the Givens rotations. Note that we optimize the scope
566  // of the operation in two ways:
567  // 1. We only apply k sets of Givens rotations, where
568  // k = n_iter_perf_sweep_max. We could simply always apply
569  // n_G sets of rotations since G is initialized to contain
570  // identity rotations in every element, but we do this to
571  // save a little bit of time.
572  // 2. We only apply to the first n_U_apply columns of A since
573  // this is the most we need to touch given the ijBR index
574  // bound of the last submatrix found in the previous sweep.
575  // Similar to above, we could simply always perform the
576  // application on all m_A columns of A, but instead we apply
577  // only to the first n_U_apply columns to save time.
578  //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
579  FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
580  //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
581  //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
582  m_U,
583  n_U_apply,
584  buff_G, rs_G, cs_G,
585  buff_U, rs_U, cs_U,
586  b_alg );
587 
588  // Increment the total number of iterations previously performed.
589  n_iter_prev += n_iter_perf_sweep_max;
590 
591 #ifdef PRINTF
592 printf( "FLA_Tevd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
593 #endif
594  }
595 
596  return n_iter_prev;
597 }
FLA_Error FLA_Tevd_iteracc_v_opd_var1(int m_A, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
Definition: FLA_Tevd_iteracc_v_opt_var1.c:26
FLA_Error FLA_Tevd_find_submatrix_opd(int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Tevd_find_submatrix.c:28
FLA_Error FLA_Apply_G_rf_blz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:186
void FLA_Abort(void)
Definition: FLA_Error.c:248
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ FLA_Tevd_v_opz_var2()

FLA_Error FLA_Tevd_v_opz_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_R,
int  rs_R,
int  cs_R,
dcomplex buff_W,
int  rs_W,
int  cs_W,
dcomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)

References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zcopymt(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var2().

435 {
436  dcomplex one = bl1_z1();
437  double rone = bl1_d1();
438  double rzero = bl1_d0();
439 
440  dcomplex* G;
441  double* d1;
442  double* e1;
443  int r_val;
444  int done;
445  int m_G_sweep_max;
446  int ij_begin;
447  int ijTL, ijBR;
448  int m_A11;
449  int n_iter_perf;
450  int n_U_apply;
451  int total_deflations;
452  int n_deflations;
453  int n_iter_prev;
454  int n_iter_perf_sweep_max;
455 
456  // Initialize our completion flag.
457  done = FALSE;
458 
459  // Initialize a counter that holds the maximum number of rows of G
460  // that we would need to initialize for the next sweep.
461  m_G_sweep_max = m_A - 1;
462 
463  // Initialize a counter for the total number of iterations performed.
464  n_iter_prev = 0;
465 
466  // Initialize R to identity.
467  bl1_dident( m_A,
468  buff_R, rs_R, cs_R );
469 
470  // Iterate until the matrix has completely deflated.
471  for ( total_deflations = 0; done != TRUE; )
472  {
473 
474  // Initialize G to contain only identity rotations.
475  bl1_zsetm( m_G_sweep_max,
476  n_G,
477  &one,
478  buff_G, rs_G, cs_G );
479 
480  // Keep track of the maximum number of iterations performed in the
481  // current sweep. This is used when applying the sweep's Givens
482  // rotations.
483  n_iter_perf_sweep_max = 0;
484 
485  // Perform a sweep: Move through the matrix and perform a tridiagonal
486  // EVD on each non-zero submatrix that is encountered. During the
487  // first time through, ijTL will be 0 and ijBR will be m_A - 1.
488  for ( ij_begin = 0; ij_begin < m_A; )
489  {
490 
491 #ifdef PRINTF
492 if ( ij_begin == 0 )
493 printf( "FLA_Tevd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
494 #endif
495 
496  // Search for the first submatrix along the diagonal that is
497  // bounded by zeroes (or endpoints of the matrix). If no
498  // submatrix is found (ie: if the entire subdiagonal is zero
499  // then FLA_FAILURE is returned. This function also inspects
500  // subdiagonal elements for proximity to zero. If a given
501  // element is close enough to zero, then it is deemed
502  // converged and manually set to zero.
503  r_val = FLA_Tevd_find_submatrix_opd( m_A,
504  ij_begin,
505  buff_d, inc_d,
506  buff_e, inc_e,
507  &ijTL,
508  &ijBR );
509 
510  // Verify that a submatrix was found. If one was not found,
511  // then we are done with the current sweep. Furthermore, if
512  // a submatrix was not found AND we began our search at the
513  // beginning of the matrix (ie: ij_begin == 0), then the
514  // matrix has completely deflated and so we are done with
515  // Francis step iteration.
516  if ( r_val == FLA_FAILURE )
517  {
518  if ( ij_begin == 0 )
519  {
520 #ifdef PRINTF
521 printf( "FLA_Tevd_v_opz_var2: subdiagonal is completely zero.\n" );
522 printf( "FLA_Tevd_v_opz_var2: Francis iteration is done!\n" );
523 #endif
524  done = TRUE;
525  }
526 
527  // Break out of the current sweep so we can apply the last
528  // remaining Givens rotations.
529  break;
530  }
531 
532  // If we got this far, then:
533  // (a) ijTL refers to the index of the first non-zero
534  // subdiagonal along the diagonal, and
535  // (b) ijBR refers to either:
536  // - the first zero element that occurs after ijTL, or
537  // - the the last diagonal element.
538  // Note that ijTL and ijBR also correspond to the first and
539  // last diagonal elements of the submatrix of interest. Thus,
540  // we may compute the dimension of this submatrix as:
541  m_A11 = ijBR - ijTL + 1;
542 
543 #ifdef PRINTF
544 printf( "FLA_Tevd_v_opz_var2: ij_begin = %d\n", ij_begin );
545 printf( "FLA_Tevd_v_opz_var2: ijTL = %d\n", ijTL );
546 printf( "FLA_Tevd_v_opz_var2: ijBR = %d\n", ijBR );
547 printf( "FLA_Tevd_v_opz_var2: m_A11 = %d\n", m_A11 );
548 #endif
549 
550  // Adjust ij_begin, which gets us ready for the next subproblem, if
551  // there is one.
552  ij_begin = ijBR + 1;
553 
554  // Index to the submatrices upon which we will operate.
555  d1 = buff_d + ijTL * inc_d;
556  e1 = buff_e + ijTL * inc_e;
557  G = buff_G + ijTL * rs_G;
558 
559  // Search for a batch of eigenvalues, recursing on deflated
560  // subproblems whenever a split occurs. Iteration continues
561  // as long as
562  // (a) there is still matrix left to operate on, and
563  // (b) the number of iterations performed in this batch is
564  // less than n_G.
565  // If/when either of the two above conditions fails to hold,
566  // the function returns.
567  n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
568  n_G,
569  ijTL,
570  d1, inc_d,
571  e1, inc_e,
572  G, rs_G, cs_G,
573  &n_iter_perf );
574 
575  // Record the number of deflations that we observed.
576  total_deflations += n_deflations;
577 
578  // Update the maximum number of iterations performed in the
579  // current sweep.
580  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
581 
582 #ifdef PRINTF
583 printf( "FLA_Tevd_v_opz_var2: deflations observed = %d\n", n_deflations );
584 printf( "FLA_Tevd_v_opz_var2: total deflations observed = %d\n", total_deflations );
585 printf( "FLA_Tevd_v_opz_var2: num iterations = %d\n", n_iter_perf );
586 #endif
587 
588  // Store the most recent value of ijBR in m_G_sweep_max.
589  // When the sweep is done, this value will contain the minimum
590  // number of rows of G we can apply and safely include all
591  // non-identity rotations that were computed during the
592  // eigenvalue searches.
593  m_G_sweep_max = ijBR;
594 
595  // Make sure we haven't exceeded our maximum iteration count.
596  if ( n_iter_prev >= m_A * n_iter_max )
597  {
598 #ifdef PRINTF
599 printf( "FLA_Tevd_v_opz_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
600 #endif
601  FLA_Abort();
602  //return FLA_FAILURE;
603  }
604  }
605 
606  // The sweep is complete. Now we must apply the Givens rotations
607  // that were accumulated during the sweep.
608 
609 
610  // Recall that the number of columns of U to which we apply
611  // rotations is one more than the number of rotations.
612  n_U_apply = m_G_sweep_max + 1;
613 
614  // Apply the Givens rotations that were computed as part of
615  // the previous batch of iterations.
616  //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
617  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
618  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
619  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
620  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
621  m_U,
622  n_U_apply,
623  n_iter_prev,
624  buff_G, rs_G, cs_G,
625  buff_R, rs_R, cs_R,
626  b_alg );
627 
628 #ifdef PRINTF
629 printf( "FLA_Tevd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
630 #endif
631 
632  // Increment the total number of iterations previously performed.
633  n_iter_prev += n_iter_perf_sweep_max;
634  }
635 
636  // Copy the contents of Q to temporary storage.
638  m_A,
639  m_A,
640  buff_U, rs_U, cs_U,
641  buff_W, rs_W, cs_W );
642 
643 
644  // Multiply Q by R, overwriting U.
647  2*m_A,
648  m_A,
649  m_A,
650  &rone,
651  ( double* )buff_W, rs_W, 2*cs_W,
652  buff_R, rs_R, cs_R,
653  &rzero,
654  ( double* )buff_U, rs_U, 2*cs_U );
655 
656  return n_iter_prev;
657 }
FLA_Error FLA_Tevd_iteracc_v_opd_var1(int m_A, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
Definition: FLA_Tevd_iteracc_v_opt_var1.c:26
void bl1_dident(int m, double *a, int a_rs, int a_cs)
Definition: bl1_ident.c:32
FLA_Error FLA_Tevd_find_submatrix_opd(int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Tevd_find_submatrix.c:28
void bl1_dgemm(trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition: bl1_gemm.c:274
double bl1_d0(void)
Definition: bl1_constants.c:118
void FLA_Abort(void)
Definition: FLA_Error.c:248
Definition: blis_type_defs.h:54
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
double bl1_d1(void)
Definition: bl1_constants.c:54
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3b.c:135
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69