libflame  revision_anchor
Functions
FLA_Bsvd_ext_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Bsvd_ext_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Svd_type jobu, FLA_Obj U, FLA_Svd_type jobv, FLA_Obj V, FLA_Bool apply_Uh2C, FLA_Obj C, dim_t b_alg)
 
FLA_Error FLA_Bsvd_ext_ops_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, float *buff_C, int rs_C, int cs_C, int b_alg)
 
FLA_Error FLA_Bsvd_ext_opd_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, double *buff_C, int rs_C, int cs_C, int b_alg)
 
FLA_Error FLA_Bsvd_ext_opc_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_C, int rs_C, int cs_C, int b_alg)
 
FLA_Error FLA_Bsvd_ext_opz_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_C, int rs_C, int cs_C, int b_alg)
 

Function Documentation

◆ FLA_Bsvd_ext_opc_var1()

FLA_Error FLA_Bsvd_ext_opc_var1 ( int  m_d,
int  m_U,
int  m_V,
int  m_C,
int  n_C,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
scomplex buff_C,
int  rs_C,
int  cs_C,
int  b_alg 
)

References bl1_c1(), bl1_csetm(), bl1_csscalv(), bl1_s0(), bl1_sm1(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blc_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_ext_opt_var1().

753 {
754  scomplex one = bl1_c1();
755  float rzero = bl1_s0();
756 
757  int maxitr = 6;
758 
759  float eps;
760  float tolmul;
761  float tol;
762  float thresh;
763 
764  scomplex* G;
765  scomplex* H;
766  float* d1;
767  float* e1;
768  int r_val;
769  int done;
770  int m_GH_sweep_max;
771  int ij_begin;
772  int ijTL, ijBR;
773  int m_A11;
774  int n_iter_perf;
775  int n_UV_apply;
776  int total_deflations;
777  int n_deflations;
778  int n_iter_prev;
779  int n_iter_perf_sweep_max;
780 
781  // Compute some convergence constants.
782  eps = FLA_Mach_params_ops( FLA_MACH_EPS );
783  tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
785  tolmul,
786  maxitr,
787  buff_d, inc_d,
788  buff_e, inc_e,
789  &tol,
790  &thresh );
791 
792  // Initialize our completion flag.
793  done = FALSE;
794 
795  // Initialize a counter that holds the maximum number of rows of G
796  // and H that we would need to initialize for the next sweep.
797  m_GH_sweep_max = m_d - 1;
798 
799  // Initialize a counter for the total number of iterations performed.
800  n_iter_prev = 0;
801 
802  // Iterate until the matrix has completely deflated.
803  for ( total_deflations = 0; done != TRUE; )
804  {
805 
806  // Initialize G and H to contain only identity rotations.
807  bl1_csetm( m_GH_sweep_max,
808  n_GH,
809  &one,
810  buff_G, rs_G, cs_G );
811  bl1_csetm( m_GH_sweep_max,
812  n_GH,
813  &one,
814  buff_H, rs_H, cs_H );
815 
816  // Keep track of the maximum number of iterations performed in the
817  // current sweep. This is used when applying the sweep's Givens
818  // rotations.
819  n_iter_perf_sweep_max = 0;
820 
821  // Perform a sweep: Move through the matrix and perform a bidiagonal
822  // SVD on each non-zero submatrix that is encountered. During the
823  // first time through, ijTL will be 0 and ijBR will be m_d - 1.
824  for ( ij_begin = 0; ij_begin < m_d; )
825  {
826  // Search for the first submatrix along the diagonal that is
827  // bounded by zeroes (or endpoints of the matrix). If no
828  // submatrix is found (ie: if the entire superdiagonal is zero
829  // then FLA_FAILURE is returned. This function also inspects
830  // superdiagonal elements for proximity to zero. If a given
831  // element is close enough to zero, then it is deemed
832  // converged and manually set to zero.
833  r_val = FLA_Bsvd_find_submatrix_ops( m_d,
834  ij_begin,
835  buff_d, inc_d,
836  buff_e, inc_e,
837  &ijTL,
838  &ijBR );
839 
840  // Verify that a submatrix was found. If one was not found,
841  // then we are done with the current sweep. Furthermore, if
842  // a submatrix was not found AND we began our search at the
843  // beginning of the matrix (ie: ij_begin == 0), then the
844  // matrix has completely deflated and so we are done with
845  // Francis step iteration.
846  if ( r_val == FLA_FAILURE )
847  {
848  if ( ij_begin == 0 )
849  {
850  done = TRUE;
851  }
852 
853  // Break out of the current sweep so we can apply the last
854  // remaining Givens rotations.
855  break;
856  }
857 
858  // If we got this far, then:
859  // (a) ijTL refers to the index of the first non-zero
860  // superdiagonal along the diagonal, and
861  // (b) ijBR refers to either:
862  // - the first zero element that occurs after ijTL, or
863  // - the the last diagonal element.
864  // Note that ijTL and ijBR also correspond to the first and
865  // last diagonal elements of the submatrix of interest. Thus,
866  // we may compute the dimension of this submatrix as:
867  m_A11 = ijBR - ijTL + 1;
868 
869  // Adjust ij_begin, which gets us ready for the next submatrix
870  // search in the current sweep.
871  ij_begin = ijBR + 1;
872 
873  // Index to the submatrices upon which we will operate.
874  d1 = buff_d + ijTL * inc_d;
875  e1 = buff_e + ijTL * inc_e;
876  G = buff_G + ijTL * rs_G;
877  H = buff_H + ijTL * rs_H;
878 
879  // Search for a batch of singular values, recursing on deflated
880  // subproblems whenever a split occurs. Iteration continues as
881  // long as
882  // (a) there is still matrix left to operate on, and
883  // (b) the number of iterations performed in this batch is
884  // less than n_G.
885  // If/when either of the two above conditions fails to hold,
886  // the function returns.
887  n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11,
888  n_GH,
889  ijTL,
890  tol,
891  thresh,
892  d1, inc_d,
893  e1, inc_e,
894  G, rs_G, cs_G,
895  H, rs_H, cs_H,
896  &n_iter_perf );
897 
898  // Record the number of deflations that were observed.
899  total_deflations += n_deflations;
900 
901  // Update the maximum number of iterations performed in the
902  // current sweep.
903  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
904 
905  // Store the most recent value of ijBR in m_G_sweep_max.
906  // When the sweep is done, this value will contain the minimum
907  // number of rows of G and H we can apply and safely include all
908  // non-identity rotations that were computed during the
909  // singular value searches.
910  m_GH_sweep_max = ijBR;
911 
912  // Make sure we haven't exceeded our maximum iteration count.
913  if ( n_iter_prev >= n_iter_max * m_d )
914  {
915  FLA_Abort();
916  //return FLA_FAILURE;
917  }
918  }
919 
920  // The sweep is complete. Now we must apply the Givens rotations
921  // that were accumulated during the sweep.
922 
923  // Recall that the number of columns of U and V to which we apply
924  // rotations is one more than the number of rotations.
925  n_UV_apply = m_GH_sweep_max + 1;
926 
927  // Apply the Givens rotations. Note that we only apply k sets of
928  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
929  // apply to n_UV_apply columns of U and V since this is the most we
930  // need to touch given the most recent value stored to m_GH_sweep_max.
931 
932  if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested
933  FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max,
934  m_U,
935  n_UV_apply,
936  buff_G, rs_G, cs_G,
937  buff_U, rs_U, cs_U,
938  b_alg );
939 
940  if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested
941  FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max,
942  m_V,
943  n_UV_apply,
944  buff_H, rs_H, cs_H,
945  buff_V, rs_V, cs_V,
946  b_alg );
947 
948  // lf with non-flipped cs/rs m/n should be used;
949  // but the same operation can be achieved by using rf
950  // ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1}
951  if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip
952  {
953  FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max,
954  n_C,
955  m_C,
956  buff_G, rs_G, cs_G,
957  buff_C, cs_C, rs_C,
958  b_alg );
959  }
960 
961  // Increment the total number of iterations previously performed.
962  n_iter_prev += n_iter_perf_sweep_max;
963  }
964 
965  // Make all the singular values positive.
966  {
967  int i;
968  float minus_one = bl1_sm1();
969 
970  for ( i = 0; i < m_d; ++i )
971  {
972  if ( buff_d[ (i )*inc_d ] < rzero )
973  {
974  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
975 
976  // Scale the right singular vectors.
977  if ( buff_V != NULL )
979  m_V,
980  &minus_one,
981  buff_V + (i )*cs_V, rs_V );
982  }
983  }
984  }
985 
986  return FLA_SUCCESS;
987 }
FLA_Error FLA_Bsvd_iteracc_v_ops_var1(int m_A, int n_GH, int ijTL, float tol, float thresh, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition: FLA_Bsvd_iteracc_v_opt_var1.c:13
Definition: blis_type_defs.h:81
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
FLA_Error FLA_Bsvd_find_submatrix_ops(int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Bsvd_find_submatrix.c:14
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_csscalv(conj1_t conj, int n, float *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:35
void FLA_Abort(void)
Definition: FLA_Error.c:248
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_G_rf_blc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:157
FLA_Error FLA_Bsvd_compute_tol_thresh_ops(int n_A, float tolmul, float maxitr, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh)
Definition: FLA_Bsvd_compute_tol_thresh.c:78
float bl1_sm1(void)
Definition: bl1_constants.c:175
int i
Definition: bl1_axmyv2.c:145
float FLA_Mach_params_ops(FLA_Machval machval)
Definition: FLA_Mach_params.c:47
float bl1_s0(void)
Definition: bl1_constants.c:111

◆ FLA_Bsvd_ext_opd_var1()

FLA_Error FLA_Bsvd_ext_opd_var1 ( int  m_d,
int  m_U,
int  m_V,
int  m_C,
int  n_C,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_V,
int  rs_V,
int  cs_V,
double *  buff_C,
int  rs_C,
int  cs_C,
int  b_alg 
)

References bl1_d0(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_ext_opt_var1().

501 {
502  dcomplex one = bl1_z1();
503  double rzero = bl1_d0();
504 
505  int maxitr = 6;
506 
507  double eps;
508  double tolmul;
509  double tol;
510  double thresh;
511 
512  dcomplex* G;
513  dcomplex* H;
514  double* d1;
515  double* e1;
516  int r_val;
517  int done;
518  int m_GH_sweep_max;
519  int ij_begin;
520  int ijTL, ijBR;
521  int m_A11;
522  int n_iter_perf;
523  int n_UV_apply;
524  int total_deflations;
525  int n_deflations;
526  int n_iter_prev;
527  int n_iter_perf_sweep_max;
528 
529  // Compute some convergence constants.
530  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
531  tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
533  tolmul,
534  maxitr,
535  buff_d, inc_d,
536  buff_e, inc_e,
537  &tol,
538  &thresh );
539 
540  // Initialize our completion flag.
541  done = FALSE;
542 
543  // Initialize a counter that holds the maximum number of rows of G
544  // and H that we would need to initialize for the next sweep.
545  m_GH_sweep_max = m_d - 1;
546 
547  // Initialize a counter for the total number of iterations performed.
548  n_iter_prev = 0;
549 
550  // Iterate until the matrix has completely deflated.
551  for ( total_deflations = 0; done != TRUE; )
552  {
553 
554  // Initialize G and H to contain only identity rotations.
555  bl1_zsetm( m_GH_sweep_max,
556  n_GH,
557  &one,
558  buff_G, rs_G, cs_G );
559  bl1_zsetm( m_GH_sweep_max,
560  n_GH,
561  &one,
562  buff_H, rs_H, cs_H );
563 
564  // Keep track of the maximum number of iterations performed in the
565  // current sweep. This is used when applying the sweep's Givens
566  // rotations.
567  n_iter_perf_sweep_max = 0;
568 
569  // Perform a sweep: Move through the matrix and perform a bidiagonal
570  // SVD on each non-zero submatrix that is encountered. During the
571  // first time through, ijTL will be 0 and ijBR will be m_d - 1.
572  for ( ij_begin = 0; ij_begin < m_d; )
573  {
574  // Search for the first submatrix along the diagonal that is
575  // bounded by zeroes (or endpoints of the matrix). If no
576  // submatrix is found (ie: if the entire superdiagonal is zero
577  // then FLA_FAILURE is returned. This function also inspects
578  // superdiagonal elements for proximity to zero. If a given
579  // element is close enough to zero, then it is deemed
580  // converged and manually set to zero.
581  r_val = FLA_Bsvd_find_submatrix_opd( m_d,
582  ij_begin,
583  buff_d, inc_d,
584  buff_e, inc_e,
585  &ijTL,
586  &ijBR );
587 
588  // Verify that a submatrix was found. If one was not found,
589  // then we are done with the current sweep. Furthermore, if
590  // a submatrix was not found AND we began our search at the
591  // beginning of the matrix (ie: ij_begin == 0), then the
592  // matrix has completely deflated and so we are done with
593  // Francis step iteration.
594  if ( r_val == FLA_FAILURE )
595  {
596  if ( ij_begin == 0 )
597  {
598  done = TRUE;
599  }
600 
601  // Break out of the current sweep so we can apply the last
602  // remaining Givens rotations.
603  break;
604  }
605 
606  // If we got this far, then:
607  // (a) ijTL refers to the index of the first non-zero
608  // superdiagonal along the diagonal, and
609  // (b) ijBR refers to either:
610  // - the first zero element that occurs after ijTL, or
611  // - the the last diagonal element.
612  // Note that ijTL and ijBR also correspond to the first and
613  // last diagonal elements of the submatrix of interest. Thus,
614  // we may compute the dimension of this submatrix as:
615  m_A11 = ijBR - ijTL + 1;
616 
617  // Adjust ij_begin, which gets us ready for the next submatrix
618  // search in the current sweep.
619  ij_begin = ijBR + 1;
620 
621  // Index to the submatrices upon which we will operate.
622  d1 = buff_d + ijTL * inc_d;
623  e1 = buff_e + ijTL * inc_e;
624  G = buff_G + ijTL * rs_G;
625  H = buff_H + ijTL * rs_H;
626 
627  // Search for a batch of singular values, recursing on deflated
628  // subproblems whenever a split occurs. Iteration continues as
629  // long as
630  // (a) there is still matrix left to operate on, and
631  // (b) the number of iterations performed in this batch is
632  // less than n_G.
633  // If/when either of the two above conditions fails to hold,
634  // the function returns.
635  n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
636  n_GH,
637  ijTL,
638  tol,
639  thresh,
640  d1, inc_d,
641  e1, inc_e,
642  G, rs_G, cs_G,
643  H, rs_H, cs_H,
644  &n_iter_perf );
645 
646  // Record the number of deflations that were observed.
647  total_deflations += n_deflations;
648 
649  // Update the maximum number of iterations performed in the
650  // current sweep.
651  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
652 
653  // Store the most recent value of ijBR in m_G_sweep_max.
654  // When the sweep is done, this value will contain the minimum
655  // number of rows of G and H we can apply and safely include all
656  // non-identity rotations that were computed during the
657  // singular value searches.
658  m_GH_sweep_max = ijBR;
659 
660  // Make sure we haven't exceeded our maximum iteration count.
661  if ( n_iter_prev >= n_iter_max * m_d )
662  {
663  FLA_Abort();
664  //return FLA_FAILURE;
665  }
666  }
667 
668  // The sweep is complete. Now we must apply the Givens rotations
669  // that were accumulated during the sweep.
670 
671  // Recall that the number of columns of U and V to which we apply
672  // rotations is one more than the number of rotations.
673  n_UV_apply = m_GH_sweep_max + 1;
674 
675  // Apply the Givens rotations. Note that we only apply k sets of
676  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
677  // apply to n_UV_apply columns of U and V since this is the most we
678  // need to touch given the most recent value stored to m_GH_sweep_max.
679 
680  if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested
681  FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
682  m_U,
683  n_UV_apply,
684  buff_G, rs_G, cs_G,
685  buff_U, rs_U, cs_U,
686  b_alg );
687 
688  if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested
689  FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
690  m_V,
691  n_UV_apply,
692  buff_H, rs_H, cs_H,
693  buff_V, rs_V, cs_V,
694  b_alg );
695 
696 
697  // lf with non-flipped cs/rs m/n should be used;
698  // but the same operation can be achieved by using rf
699  // ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1}
700  if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip
701  {
702  FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
703  n_C,
704  m_C,
705  buff_G, rs_G, cs_G,
706  buff_C, cs_C, rs_C,
707  b_alg );
708  }
709 
710  // Increment the total number of iterations previously performed.
711  n_iter_prev += n_iter_perf_sweep_max;
712  }
713 
714  // Make all the singular values positive.
715  {
716  int i;
717  double minus_one = bl1_dm1();
718 
719  for ( i = 0; i < m_d; ++i )
720  {
721  if ( buff_d[ (i )*inc_d ] < rzero )
722  {
723  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
724 
725  // Scale the right singular vectors.
726  if ( buff_V != NULL )
728  m_V,
729  &minus_one,
730  buff_V + (i )*cs_V, rs_V );
731  }
732  }
733  }
734 
735  return n_iter_prev;
736 }
Definition: blis_type_defs.h:81
FLA_Error FLA_Bsvd_iteracc_v_opd_var1(int m_A, int n_GH, int ijTL, double tol, double thresh, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition: FLA_Bsvd_iteracc_v_opt_var1.c:176
double FLA_Mach_params_opd(FLA_Machval machval)
Definition: FLA_Mach_params.c:74
double bl1_d0(void)
Definition: bl1_constants.c:118
FLA_Error FLA_Apply_G_rf_bld_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:128
FLA_Error FLA_Bsvd_compute_tol_thresh_opd(int n_A, double tolmul, double maxitr, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
Definition: FLA_Bsvd_compute_tol_thresh.c:137
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
void FLA_Abort(void)
Definition: FLA_Error.c:248
FLA_Error FLA_Bsvd_find_submatrix_opd(int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Bsvd_find_submatrix.c:73
double bl1_dm1(void)
Definition: bl1_constants.c:182
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69

◆ FLA_Bsvd_ext_ops_var1()

FLA_Error FLA_Bsvd_ext_ops_var1 ( int  m_d,
int  m_U,
int  m_V,
int  m_C,
int  n_C,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_V,
int  rs_V,
int  cs_V,
float *  buff_C,
int  rs_C,
int  cs_C,
int  b_alg 
)

References bl1_c1(), bl1_csetm(), bl1_s0(), bl1_sm1(), bl1_sscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_G_rf_bls_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_ext_opt_var1().

246 {
247  scomplex one = bl1_c1();
248  float rzero = bl1_s0();
249 
250  int maxitr = 6;
251 
252  float eps;
253  float tolmul;
254  float tol;
255  float thresh;
256 
257  scomplex* G;
258  scomplex* H;
259  float* d1;
260  float* e1;
261  int r_val;
262  int done;
263  int m_GH_sweep_max;
264  int ij_begin;
265  int ijTL, ijBR;
266  int m_A11;
267  int n_iter_perf;
268  int n_UV_apply;
269  int total_deflations;
270  int n_deflations;
271  int n_iter_prev;
272  int n_iter_perf_sweep_max;
273 
274  // Compute some convergence constants.
275  eps = FLA_Mach_params_ops( FLA_MACH_EPS );
276  tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
278  tolmul,
279  maxitr,
280  buff_d, inc_d,
281  buff_e, inc_e,
282  &tol,
283  &thresh );
284 
285  // Initialize our completion flag.
286  done = FALSE;
287 
288  // Initialize a counter that holds the maximum number of rows of G
289  // and H that we would need to initialize for the next sweep.
290  m_GH_sweep_max = m_d - 1;
291 
292  // Initialize a counter for the total number of iterations performed.
293  n_iter_prev = 0;
294 
295  // Iterate until the matrix has completely deflated.
296  for ( total_deflations = 0; done != TRUE; )
297  {
298 
299  // Initialize G and H to contain only identity rotations.
300  bl1_csetm( m_GH_sweep_max,
301  n_GH,
302  &one,
303  buff_G, rs_G, cs_G );
304  bl1_csetm( m_GH_sweep_max,
305  n_GH,
306  &one,
307  buff_H, rs_H, cs_H );
308 
309  // Keep track of the maximum number of iterations performed in the
310  // current sweep. This is used when applying the sweep's Givens
311  // rotations.
312  n_iter_perf_sweep_max = 0;
313 
314  // Perform a sweep: Move through the matrix and perform a bidiagonal
315  // SVD on each non-zero submatrix that is encountered. During the
316  // first time through, ijTL will be 0 and ijBR will be m_d - 1.
317  for ( ij_begin = 0; ij_begin < m_d; )
318  {
319 
320  // Search for the first submatrix along the diagonal that is
321  // bounded by zeroes (or endpoints of the matrix). If no
322  // submatrix is found (ie: if the entire superdiagonal is zero
323  // then FLA_FAILURE is returned. This function also inspects
324  // superdiagonal elements for proximity to zero. If a given
325  // element is close enough to zero, then it is deemed
326  // converged and manually set to zero.
327  r_val = FLA_Bsvd_find_submatrix_ops( m_d,
328  ij_begin,
329  buff_d, inc_d,
330  buff_e, inc_e,
331  &ijTL,
332  &ijBR );
333 
334  // Verify that a submatrix was found. If one was not found,
335  // then we are done with the current sweep. Furthermore, if
336  // a submatrix was not found AND we began our search at the
337  // beginning of the matrix (ie: ij_begin == 0), then the
338  // matrix has completely deflated and so we are done with
339  // Francis step iteration.
340  if ( r_val == FLA_FAILURE )
341  {
342  if ( ij_begin == 0 )
343  {
344  done = TRUE;
345  }
346 
347  // Break out of the current sweep so we can apply the last
348  // remaining Givens rotations.
349  break;
350  }
351 
352  // If we got this far, then:
353  // (a) ijTL refers to the index of the first non-zero
354  // superdiagonal along the diagonal, and
355  // (b) ijBR refers to either:
356  // - the first zero element that occurs after ijTL, or
357  // - the the last diagonal element.
358  // Note that ijTL and ijBR also correspond to the first and
359  // last diagonal elements of the submatrix of interest. Thus,
360  // we may compute the dimension of this submatrix as:
361  m_A11 = ijBR - ijTL + 1;
362 
363  // Adjust ij_begin, which gets us ready for the next submatrix
364  // search in the current sweep.
365  ij_begin = ijBR + 1;
366 
367  // Index to the submatrices upon which we will operate.
368  d1 = buff_d + ijTL * inc_d;
369  e1 = buff_e + ijTL * inc_e;
370  G = buff_G + ijTL * rs_G;
371  H = buff_H + ijTL * rs_H;
372 
373  // Search for a batch of singular values, recursing on deflated
374  // subproblems whenever a split occurs. Iteration continues as
375  // long as
376  // (a) there is still matrix left to operate on, and
377  // (b) the number of iterations performed in this batch is
378  // less than n_G.
379  // If/when either of the two above conditions fails to hold,
380  // the function returns.
381  n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11,
382  n_GH,
383  ijTL,
384  tol,
385  thresh,
386  d1, inc_d,
387  e1, inc_e,
388  G, rs_G, cs_G,
389  H, rs_H, cs_H,
390  &n_iter_perf );
391 
392  // Record the number of deflations that were observed.
393  total_deflations += n_deflations;
394 
395  // Update the maximum number of iterations performed in the
396  // current sweep.
397  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
398 
399  // Store the most recent value of ijBR in m_G_sweep_max.
400  // When the sweep is done, this value will contain the minimum
401  // number of rows of G and H we can apply and safely include all
402  // non-identity rotations that were computed during the
403  // singular value searches.
404  m_GH_sweep_max = ijBR;
405 
406  // Make sure we haven't exceeded our maximum iteration count.
407  if ( n_iter_prev >= n_iter_max * m_d )
408  {
409  //FLA_Abort();
410  return n_iter_prev;
411  }
412  }
413 
414  // The sweep is complete. Now we must apply the Givens rotations
415  // that were accumulated during the sweep.
416 
417  // Recall that the number of columns of U and V to which we apply
418  // rotations is one more than the number of rotations.
419  n_UV_apply = m_GH_sweep_max + 1;
420 
421  // Apply the Givens rotations. Note that we only apply k sets of
422  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
423  // apply to n_UV_apply columns of U and V since this is the most we
424  // need to touch given the most recent value stored to m_GH_sweep_max.
425 
426  if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested
427  FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max,
428  m_U,
429  n_UV_apply,
430  buff_G, rs_G, cs_G,
431  buff_U, rs_U, cs_U,
432  b_alg );
433 
434  if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested
435  FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max,
436  m_V,
437  n_UV_apply,
438  buff_H, rs_H, cs_H,
439  buff_V, rs_V, cs_V,
440  b_alg );
441 
442  // lf with non-flipped cs/rs m/n should be used;
443  // but the same operation can be achieved by using rf
444  // ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1}
445  if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip
446  {
447  FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max,
448  n_C,
449  m_C,
450  buff_G, rs_G, cs_G,
451  buff_C, cs_C, rs_C,
452  b_alg );
453  }
454 
455  // Increment the total number of iterations previously performed.
456  n_iter_prev += n_iter_perf_sweep_max;
457 
458  }
459 
460  // Make all the singular values positive.
461  {
462  int i;
463  float minus_one = bl1_sm1();
464 
465  for ( i = 0; i < m_d; ++i )
466  {
467  if ( buff_d[ (i )*inc_d ] < rzero )
468  {
469  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
470 
471  // Scale the right singular vectors.
472  if ( buff_V != NULL )
474  m_V,
475  &minus_one,
476  buff_V + (i )*cs_V, rs_V );
477  }
478  }
479  }
480 
481  return n_iter_prev;
482 }
FLA_Error FLA_Bsvd_iteracc_v_ops_var1(int m_A, int n_GH, int ijTL, float tol, float thresh, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition: FLA_Bsvd_iteracc_v_opt_var1.c:13
Definition: blis_type_defs.h:81
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
FLA_Error FLA_Bsvd_find_submatrix_ops(int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Bsvd_find_submatrix.c:14
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_G_rf_bls_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:99
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13
FLA_Error FLA_Bsvd_compute_tol_thresh_ops(int n_A, float tolmul, float maxitr, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh)
Definition: FLA_Bsvd_compute_tol_thresh.c:78
float bl1_sm1(void)
Definition: bl1_constants.c:175
int i
Definition: bl1_axmyv2.c:145
float FLA_Mach_params_ops(FLA_Machval machval)
Definition: FLA_Mach_params.c:47
float bl1_s0(void)
Definition: bl1_constants.c:111

◆ FLA_Bsvd_ext_opt_var1()

FLA_Error FLA_Bsvd_ext_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Svd_type  jobu,
FLA_Obj  U,
FLA_Svd_type  jobv,
FLA_Obj  V,
FLA_Bool  apply_Uh2C,
FLA_Obj  C,
dim_t  b_alg 
)

References FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Sort_bsvd_ext_b_opc(), FLA_Sort_bsvd_ext_b_opd(), FLA_Sort_bsvd_ext_b_ops(), and FLA_Sort_bsvd_ext_b_opz().

Referenced by FLA_Bsvd(), FLA_Bsvd_ext(), and FLA_Svd_ext_u_unb_var1().

19 {
20  FLA_Error r_val = FLA_SUCCESS;
21  FLA_Datatype datatype;
22  int m_U, m_V, m_C, n_C, n_GH;
23  int m_d, inc_d;
24  int inc_e;
25  int rs_G, cs_G;
26  int rs_H, cs_H;
27  int rs_U, cs_U;
28  int rs_V, cs_V;
29  int rs_C, cs_C;
30 
31  if ( jobu != FLA_SVD_VECTORS_NONE )
32  datatype = FLA_Obj_datatype( U );
33  else if ( jobv != FLA_SVD_VECTORS_NONE )
34  datatype = FLA_Obj_datatype( V );
35  else if ( apply_Uh2C == TRUE )
36  datatype = FLA_Obj_datatype( C );
37  else
38  datatype = FLA_Obj_datatype( d );
39 
40  m_d = FLA_Obj_vector_dim( d );
41  inc_d = FLA_Obj_vector_inc( d );
42  inc_e = FLA_Obj_vector_inc( e );
43 
44  n_GH = FLA_Obj_width( G );
45 
46  rs_G = FLA_Obj_row_stride( G );
47  cs_G = FLA_Obj_col_stride( G );
48 
49  rs_H = FLA_Obj_row_stride( H );
50  cs_H = FLA_Obj_col_stride( H );
51 
52  if ( jobu == FLA_SVD_VECTORS_NONE )
53  {
54  m_U = 0;
55  rs_U = 0;
56  cs_U = 0;
57  }
58  else
59  {
60  m_U = FLA_Obj_length( U );
61  rs_U = FLA_Obj_row_stride( U );
62  cs_U = FLA_Obj_col_stride( U );
63  }
64  if ( jobv == FLA_SVD_VECTORS_NONE )
65  {
66  m_V = 0;
67  rs_V = 0;
68  cs_V = 0;
69  }
70  else
71  {
72  m_V = FLA_Obj_length( V );
73  rs_V = FLA_Obj_row_stride( V );
74  cs_V = FLA_Obj_col_stride( V );
75  }
76  if ( apply_Uh2C == FALSE )
77  {
78  m_C = 0;
79  n_C = 0;
80  rs_C = 0;
81  cs_C = 0;
82  }
83  else
84  {
85  m_C = FLA_Obj_length( C );
86  n_C = FLA_Obj_width( C );
87  rs_C = FLA_Obj_row_stride( C );
88  cs_C = FLA_Obj_col_stride( C );
89  }
90 
91  switch ( datatype )
92  {
93  case FLA_FLOAT:
94  {
95  float* buff_d = FLA_FLOAT_PTR( d );
96  float* buff_e = FLA_FLOAT_PTR( e );
97  scomplex* buff_G = FLA_COMPLEX_PTR( G );
98  scomplex* buff_H = FLA_COMPLEX_PTR( H );
99  float* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_FLOAT_PTR( U ) );
100  float* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_FLOAT_PTR( V ) );
101  float* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_FLOAT_PTR( C ) );
102 
103  r_val = FLA_Bsvd_ext_ops_var1( m_d,
104  m_U,
105  m_V,
106  m_C,
107  n_C,
108  n_GH,
109  n_iter_max,
110  buff_d, inc_d,
111  buff_e, inc_e,
112  buff_G, rs_G, cs_G,
113  buff_H, rs_H, cs_H,
114  buff_U, rs_U, cs_U,
115  buff_V, rs_V, cs_V,
116  buff_C, rs_C, cs_C,
117  b_alg );
118 
119  FLA_Sort_bsvd_ext_b_ops( m_d, buff_d, inc_d,
120  m_U, buff_U, rs_U, cs_U,
121  m_V, buff_V, rs_V, cs_V,
122  n_C, buff_C, rs_C, cs_C );
123  break;
124  }
125 
126  case FLA_DOUBLE:
127  {
128  double* buff_d = FLA_DOUBLE_PTR( d );
129  double* buff_e = FLA_DOUBLE_PTR( e );
130  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
131  dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
132  double* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_PTR( U ) );
133  double* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_PTR( V ) );
134  double* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_DOUBLE_PTR( C ) );
135 
136  r_val = FLA_Bsvd_ext_opd_var1( m_d,
137  m_U,
138  m_V,
139  m_C,
140  n_C,
141  n_GH,
142  n_iter_max,
143  buff_d, inc_d,
144  buff_e, inc_e,
145  buff_G, rs_G, cs_G,
146  buff_H, rs_H, cs_H,
147  buff_U, rs_U, cs_U,
148  buff_V, rs_V, cs_V,
149  buff_C, rs_C, cs_C,
150  b_alg );
151 
152  FLA_Sort_bsvd_ext_b_opd( m_d, buff_d, inc_d,
153  m_U, buff_U, rs_U, cs_U,
154  m_V, buff_V, rs_V, cs_V,
155  n_C, buff_C, rs_C, cs_C );
156  break;
157  }
158 
159  case FLA_COMPLEX:
160  {
161  float* buff_d = FLA_FLOAT_PTR( d );
162  float* buff_e = FLA_FLOAT_PTR( e );
163  scomplex* buff_G = FLA_COMPLEX_PTR( G );
164  scomplex* buff_H = FLA_COMPLEX_PTR( H );
165  scomplex* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_COMPLEX_PTR( U ) );
166  scomplex* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_COMPLEX_PTR( V ) );
167  scomplex* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_COMPLEX_PTR( C ) );
168 
169  r_val = FLA_Bsvd_ext_opc_var1( m_d,
170  m_U,
171  m_V,
172  m_C,
173  n_C,
174  n_GH,
175  n_iter_max,
176  buff_d, inc_d,
177  buff_e, inc_e,
178  buff_G, rs_G, cs_G,
179  buff_H, rs_H, cs_H,
180  buff_U, rs_U, cs_U,
181  buff_V, rs_V, cs_V,
182  buff_C, rs_C, cs_C,
183  b_alg );
184 
185  FLA_Sort_bsvd_ext_b_opc( m_d, buff_d, inc_d,
186  m_U, buff_U, rs_U, cs_U,
187  m_V, buff_V, rs_V, cs_V,
188  n_C, buff_C, rs_C, cs_C );
189  break;
190  }
191 
192  case FLA_DOUBLE_COMPLEX:
193  {
194  double* buff_d = FLA_DOUBLE_PTR( d );
195  double* buff_e = FLA_DOUBLE_PTR( e );
196  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
197  dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
198  dcomplex* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_COMPLEX_PTR( U ) );
199  dcomplex* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_COMPLEX_PTR( V ) );
200  dcomplex* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_DOUBLE_COMPLEX_PTR( C ) );
201 
202  r_val = FLA_Bsvd_ext_opz_var1( m_d,
203  m_U,
204  m_V,
205  m_C,
206  n_C,
207  n_GH,
208  n_iter_max,
209  buff_d, inc_d,
210  buff_e, inc_e,
211  buff_G, rs_G, cs_G,
212  buff_H, rs_H, cs_H,
213  buff_U, rs_U, cs_U,
214  buff_V, rs_V, cs_V,
215  buff_C, rs_C, cs_C,
216  b_alg );
217 
218  FLA_Sort_bsvd_ext_b_opz( m_d, buff_d, inc_d,
219  m_U, buff_U, rs_U, cs_U,
220  m_V, buff_V, rs_V, cs_V,
221  n_C, buff_C, rs_C, cs_C );
222  break;
223  }
224  }
225 
226  return r_val;
227 }
FLA_Error FLA_Bsvd_ext_opc_var1(int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_C, int rs_C, int cs_C, int b_alg)
Definition: FLA_Bsvd_ext_opt_var1.c:738
FLA_Error FLA_Sort_bsvd_ext_b_opc(int m_s, float *s, int inc_s, int m_U, scomplex *U, int rs_U, int cs_U, int m_V, scomplex *V, int rs_V, int cs_V, int n_C, scomplex *C, int rs_C, int cs_C)
Definition: FLA_Sort_bsvd_ext.c:235
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Bsvd_ext_opd_var1(int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, double *buff_C, int rs_C, int cs_C, int b_alg)
Definition: FLA_Bsvd_ext_opt_var1.c:486
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
FLA_Error FLA_Bsvd_ext_ops_var1(int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, float *buff_C, int rs_C, int cs_C, int b_alg)
Definition: FLA_Bsvd_ext_opt_var1.c:231
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Bsvd_ext_opz_var1(int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_C, int rs_C, int cs_C, int b_alg)
Definition: FLA_Bsvd_ext_opt_var1.c:989
Definition: blis_type_defs.h:132
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition: FLA_Query.c:137
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Sort_bsvd_ext_b_opz(int m_s, double *s, int inc_s, int m_U, dcomplex *U, int rs_U, int cs_U, int m_V, dcomplex *V, int rs_V, int cs_V, int n_C, dcomplex *C, int rs_C, int cs_C)
Definition: FLA_Sort_bsvd_ext.c:257
FLA_Error FLA_Sort_bsvd_ext_b_opd(int m_s, double *s, int inc_s, int m_U, double *U, int rs_U, int cs_U, int m_V, double *V, int rs_V, int cs_V, int n_C, double *C, int rs_C, int cs_C)
Definition: FLA_Sort_bsvd_ext.c:213
FLA_Error FLA_Sort_bsvd_ext_b_ops(int m_s, float *s, int inc_s, int m_U, float *U, int rs_U, int cs_U, int m_V, float *V, int rs_V, int cs_V, int n_C, float *C, int rs_C, int cs_C)
Definition: FLA_Sort_bsvd_ext.c:191
Definition: blis_type_defs.h:137

◆ FLA_Bsvd_ext_opz_var1()

FLA_Error FLA_Bsvd_ext_opz_var1 ( int  m_d,
int  m_U,
int  m_V,
int  m_C,
int  n_C,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
dcomplex buff_C,
int  rs_C,
int  cs_C,
int  b_alg 
)

References bl1_d0(), bl1_dm1(), bl1_z1(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_ext_opt_var1().

1004 {
1005  dcomplex one = bl1_z1();
1006  double rzero = bl1_d0();
1007 
1008  int maxitr = 6;
1009 
1010  double eps;
1011  double tolmul;
1012  double tol;
1013  double thresh;
1014 
1015  dcomplex* G;
1016  dcomplex* H;
1017  double* d1;
1018  double* e1;
1019  int r_val;
1020  int done;
1021  int m_GH_sweep_max;
1022  int ij_begin;
1023  int ijTL, ijBR;
1024  int m_A11;
1025  int n_iter_perf;
1026  int n_UV_apply;
1027  int total_deflations;
1028  int n_deflations;
1029  int n_iter_prev;
1030  int n_iter_perf_sweep_max;
1031 
1032  // Compute some convergence constants.
1033  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
1034  tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
1036  tolmul,
1037  maxitr,
1038  buff_d, inc_d,
1039  buff_e, inc_e,
1040  &tol,
1041  &thresh );
1042 
1043  // Initialize our completion flag.
1044  done = FALSE;
1045 
1046  // Initialize a counter that holds the maximum number of rows of G
1047  // and H that we would need to initialize for the next sweep.
1048  m_GH_sweep_max = m_d - 1;
1049 
1050  // Initialize a counter for the total number of iterations performed.
1051  n_iter_prev = 0;
1052 
1053  // Iterate until the matrix has completely deflated.
1054  for ( total_deflations = 0; done != TRUE; )
1055  {
1056 
1057  // Initialize G and H to contain only identity rotations.
1058  bl1_zsetm( m_GH_sweep_max,
1059  n_GH,
1060  &one,
1061  buff_G, rs_G, cs_G );
1062  bl1_zsetm( m_GH_sweep_max,
1063  n_GH,
1064  &one,
1065  buff_H, rs_H, cs_H );
1066 
1067  // Keep track of the maximum number of iterations performed in the
1068  // current sweep. This is used when applying the sweep's Givens
1069  // rotations.
1070  n_iter_perf_sweep_max = 0;
1071 
1072  // Perform a sweep: Move through the matrix and perform a bidiagonal
1073  // SVD on each non-zero submatrix that is encountered. During the
1074  // first time through, ijTL will be 0 and ijBR will be m_d - 1.
1075  for ( ij_begin = 0; ij_begin < m_d; )
1076  {
1077  // Search for the first submatrix along the diagonal that is
1078  // bounded by zeroes (or endpoints of the matrix). If no
1079  // submatrix is found (ie: if the entire superdiagonal is zero
1080  // then FLA_FAILURE is returned. This function also inspects
1081  // superdiagonal elements for proximity to zero. If a given
1082  // element is close enough to zero, then it is deemed
1083  // converged and manually set to zero.
1084  r_val = FLA_Bsvd_find_submatrix_opd( m_d,
1085  ij_begin,
1086  buff_d, inc_d,
1087  buff_e, inc_e,
1088  &ijTL,
1089  &ijBR );
1090 
1091  // Verify that a submatrix was found. If one was not found,
1092  // then we are done with the current sweep. Furthermore, if
1093  // a submatrix was not found AND we began our search at the
1094  // beginning of the matrix (ie: ij_begin == 0), then the
1095  // matrix has completely deflated and so we are done with
1096  // Francis step iteration.
1097  if ( r_val == FLA_FAILURE )
1098  {
1099  if ( ij_begin == 0 )
1100  {
1101  done = TRUE;
1102  }
1103 
1104  // Break out of the current sweep so we can apply the last
1105  // remaining Givens rotations.
1106  break;
1107  }
1108 
1109  // If we got this far, then:
1110  // (a) ijTL refers to the index of the first non-zero
1111  // superdiagonal along the diagonal, and
1112  // (b) ijBR refers to either:
1113  // - the first zero element that occurs after ijTL, or
1114  // - the the last diagonal element.
1115  // Note that ijTL and ijBR also correspond to the first and
1116  // last diagonal elements of the submatrix of interest. Thus,
1117  // we may compute the dimension of this submatrix as:
1118  m_A11 = ijBR - ijTL + 1;
1119 
1120  // Adjust ij_begin, which gets us ready for the next submatrix
1121  // search in the current sweep.
1122  ij_begin = ijBR + 1;
1123 
1124  // Index to the submatrices upon which we will operate.
1125  d1 = buff_d + ijTL * inc_d;
1126  e1 = buff_e + ijTL * inc_e;
1127  G = buff_G + ijTL * rs_G;
1128  H = buff_H + ijTL * rs_H;
1129 
1130  // Search for a batch of singular values, recursing on deflated
1131  // subproblems whenever a split occurs. Iteration continues as
1132  // long as
1133  // (a) there is still matrix left to operate on, and
1134  // (b) the number of iterations performed in this batch is
1135  // less than n_G.
1136  // If/when either of the two above conditions fails to hold,
1137  // the function returns.
1138  n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
1139  n_GH,
1140  ijTL,
1141  tol,
1142  thresh,
1143  d1, inc_d,
1144  e1, inc_e,
1145  G, rs_G, cs_G,
1146  H, rs_H, cs_H,
1147  &n_iter_perf );
1148 
1149  // Record the number of deflations that were observed.
1150  total_deflations += n_deflations;
1151 
1152  // Update the maximum number of iterations performed in the
1153  // current sweep.
1154  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
1155 
1156  // Store the most recent value of ijBR in m_G_sweep_max.
1157  // When the sweep is done, this value will contain the minimum
1158  // number of rows of G and H we can apply and safely include all
1159  // non-identity rotations that were computed during the
1160  // singular value searches.
1161  m_GH_sweep_max = ijBR;
1162 
1163  // Make sure we haven't exceeded our maximum iteration count.
1164  if ( n_iter_prev >= n_iter_max * m_d )
1165  {
1166  FLA_Abort();
1167  //return FLA_FAILURE;
1168  }
1169  }
1170 
1171  // The sweep is complete. Now we must apply the Givens rotations
1172  // that were accumulated during the sweep.
1173 
1174  // Recall that the number of columns of U and V to which we apply
1175  // rotations is one more than the number of rotations.
1176  n_UV_apply = m_GH_sweep_max + 1;
1177 
1178  // Apply the Givens rotations. Note that we only apply k sets of
1179  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
1180  // apply to n_UV_apply columns of U and V since this is the most we
1181  // need to touch given the most recent value stored to m_GH_sweep_max.
1182 
1183  if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested
1184  FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
1185  m_U,
1186  n_UV_apply,
1187  buff_G, rs_G, cs_G,
1188  buff_U, rs_U, cs_U,
1189  b_alg );
1190 
1191  if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested
1192  FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
1193  m_V,
1194  n_UV_apply,
1195  buff_H, rs_H, cs_H,
1196  buff_V, rs_V, cs_V,
1197  b_alg );
1198 
1199  // lf with non-flipped cs/rs m/n should be used;
1200  // but the same operation can be achieved by using rf
1201  // ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1}
1202  if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip
1203  {
1204  FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
1205  n_C,
1206  m_C,
1207  buff_G, rs_G, cs_G,
1208  buff_C, cs_C, rs_C,
1209  b_alg );
1210  }
1211 
1212  // Increment the total number of iterations previously performed.
1213  n_iter_prev += n_iter_perf_sweep_max;
1214  }
1215 
1216  // Make all the singular values positive.
1217  {
1218  int i;
1219  double minus_one = bl1_dm1();
1220 
1221  for ( i = 0; i < m_d; ++i )
1222  {
1223  if ( buff_d[ (i )*inc_d ] < rzero )
1224  {
1225  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
1226 
1227  // Scale the right singular vectors.
1228  if ( buff_V != NULL )
1230  m_V,
1231  &minus_one,
1232  buff_V + (i )*cs_V, rs_V );
1233  }
1234  }
1235  }
1236 
1237  return n_iter_prev;
1238 }
void bl1_zdscalv(conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:61
Definition: blis_type_defs.h:81
FLA_Error FLA_Bsvd_iteracc_v_opd_var1(int m_A, int n_GH, int ijTL, double tol, double thresh, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition: FLA_Bsvd_iteracc_v_opt_var1.c:176
double FLA_Mach_params_opd(FLA_Machval machval)
Definition: FLA_Mach_params.c:74
double bl1_d0(void)
Definition: bl1_constants.c:118
FLA_Error FLA_Apply_G_rf_blz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:186
FLA_Error FLA_Bsvd_compute_tol_thresh_opd(int n_A, double tolmul, double maxitr, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
Definition: FLA_Bsvd_compute_tol_thresh.c:137
void FLA_Abort(void)
Definition: FLA_Error.c:248
FLA_Error FLA_Bsvd_find_submatrix_opd(int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Bsvd_find_submatrix.c:73
double bl1_dm1(void)
Definition: bl1_constants.c:182
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:137
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69