libflame  revision_anchor
Functions
FLA_Bidiag_UT_u.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Bidiag_UT_u_unb_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blk_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_unb_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_unb_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blk_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blf_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_unb_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_unb_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blk_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blf_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_unb_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_unb_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blk_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blf_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_unb_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_unb_var5 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blk_var5 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_unb_var5 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_opt_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var1 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var1 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var1 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var1 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_opt_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var2 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var2 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var2 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var2 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_opt_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var3 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var3 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var3 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var3 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_opt_var4 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var4 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var4 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var4 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var4 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_opt_var5 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var5 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var5 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var5 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var5 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var5 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_ofu_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var2 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var2 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var2 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var2 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_ofu_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var3 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var3 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var3 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var3 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_ofu_var4 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var4 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var4 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var4 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var4 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Fused_Gerc2_opt_var1 (FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A)
 
FLA_Error FLA_Fused_Gerc2_ops_var1 (int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opd_var1 (int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opc_var1 (int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opz_var1 (int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1 (FLA_Obj A, FLA_Obj u, FLA_Obj tau, FLA_Obj a, FLA_Obj beta, FLA_Obj y, FLA_Obj w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1 (int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1 (int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1 (FLA_Obj alpha, FLA_Obj tau, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A, FLA_Obj up, FLA_Obj a, FLA_Obj w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1 (int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1 (int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UYx_ZVx_opt_var1 (FLA_Obj delta, FLA_Obj a, FLA_Obj U, FLA_Obj Y, FLA_Obj Z, FLA_Obj V, FLA_Obj A, FLA_Obj temp, FLA_Obj t, FLA_Obj w, FLA_Obj al)
 
FLA_Error FLA_Fused_UYx_ZVx_ops_var1 (int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
 
FLA_Error FLA_Fused_UYx_ZVx_opd_var1 (int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
 
FLA_Error FLA_Fused_UYx_ZVx_opc_var1 (int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
 
FLA_Error FLA_Fused_UYx_ZVx_opz_var1 (int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)
 

Function Documentation

◆ FLA_Bidiag_UT_u_blf_var2()

FLA_Error FLA_Bidiag_UT_u_blf_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_ofu_var2(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj TUL, TUR, TU0, TU1, TU2;
19  FLA_Obj TVL, TVR, TV0, TV1, TV2;
20 
21  FLA_Obj TU1_tl;
22  FLA_Obj TV1_tl;
23  FLA_Obj none, none2, none3;
24  dim_t b_alg, b;
25 
26  b_alg = FLA_Obj_length( TU );
27 
28  FLA_Part_2x2( A, &ATL, &ATR,
29  &ABL, &ABR, 0, 0, FLA_TL );
30  FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
31  FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
32 
33  while ( FLA_Obj_min_dim( ABR ) > 0 )
34  {
35  b = min( FLA_Obj_min_dim( ABR ), b_alg );
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38  /* ************* */ /* ******************** */
39  &A10, /**/ &A11, &A12,
40  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41  b, b, FLA_BR );
42  FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
43  b, FLA_RIGHT );
44  FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
45  b, FLA_RIGHT );
46 
47  /*------------------------------------------------------------*/
48 
49  FLA_Part_2x2( TU1, &TU1_tl, &none,
50  &none2, &none3, b, b, FLA_TL );
51 
52  FLA_Part_2x2( TV1, &TV1_tl, &none,
53  &none2, &none3, b, b, FLA_TL );
54 
55  // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1, TV1, b );
56  //FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1_tl, TV1_tl );
57  FLA_Bidiag_UT_u_step_ofu_var2( ABR, TU1_tl, TV1_tl );
58  //FLA_Bidiag_UT_u_step_opt_var2( ABR, TU1_tl, TV1_tl );
59 
60  /*------------------------------------------------------------*/
61 
62  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
63  A10, A11, /**/ A12,
64  /* ************** */ /* ****************** */
65  &ABL, /**/ &ABR, A20, A21, /**/ A22,
66  FLA_TL );
67  FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
68  FLA_LEFT );
69  FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
70  FLA_LEFT );
71  }
72 
73  return FLA_SUCCESS;
74 }
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
Definition: FLA_type_defs.h:158
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:18
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_u_blf_var3()

FLA_Error FLA_Bidiag_UT_u_blf_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_ofu_var3(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj TUL, TUR, TU0, TU1, TU2;
19  FLA_Obj TVL, TVR, TV0, TV1, TV2;
20 
21  FLA_Obj TU1_tl;
22  FLA_Obj TV1_tl;
23  FLA_Obj none, none2, none3;
24  dim_t b_alg, b;
25 
26  b_alg = FLA_Obj_length( TU );
27 
28  FLA_Part_2x2( A, &ATL, &ATR,
29  &ABL, &ABR, 0, 0, FLA_TL );
30  FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
31  FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
32 
33  while ( FLA_Obj_min_dim( ABR ) > 0 )
34  {
35  b = min( FLA_Obj_min_dim( ABR ), b_alg );
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38  /* ************* */ /* ******************** */
39  &A10, /**/ &A11, &A12,
40  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41  b, b, FLA_BR );
42  FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
43  b, FLA_RIGHT );
44  FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
45  b, FLA_RIGHT );
46 
47  /*------------------------------------------------------------*/
48 
49  FLA_Part_2x2( TU1, &TU1_tl, &none,
50  &none2, &none3, b, b, FLA_TL );
51 
52  FLA_Part_2x2( TV1, &TV1_tl, &none,
53  &none2, &none3, b, b, FLA_TL );
54 
55  // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1, TV1, b );
56  //FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1_tl, TV1_tl );
57  FLA_Bidiag_UT_u_step_ofu_var3( ABR, TU1_tl, TV1_tl );
58  //FLA_Bidiag_UT_u_step_opt_var3( ABR, TU1_tl, TV1_tl );
59 
60  /*------------------------------------------------------------*/
61 
62  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
63  A10, A11, /**/ A12,
64  /* ************** */ /* ****************** */
65  &ABL, /**/ &ABR, A20, A21, /**/ A22,
66  FLA_TL );
67  FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
68  FLA_LEFT );
69  FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
70  FLA_LEFT );
71  }
72 
73  return FLA_SUCCESS;
74 }
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:18
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
Definition: FLA_type_defs.h:158
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_u_blf_var4()

FLA_Error FLA_Bidiag_UT_u_blf_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_ofu_var4(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj VT, V0,
22  VB, V1,
23  V2;
24  FLA_Obj YT, Y0,
25  YB, Y1,
26  Y2;
27  FLA_Obj ZT, Z0,
28  ZB, Z1,
29  Z2;
30  FLA_Obj TUL, TUR, TU0, TU1, TU2;
31  FLA_Obj TVL, TVR, TV0, TV1, TV2;
32 
33  FLA_Obj U, V, Y, Z;
34  FLA_Obj ABR_l, ABR_t;
35  FLA_Obj UB_l, U2_l;
36  FLA_Obj VB_l, V2_l;
37  FLA_Obj YB_l, Y2_l;
38  FLA_Obj ZB_l, Z2_l;
39  FLA_Obj TU1_tl;
40  FLA_Obj TV1_tl;
41  FLA_Obj none, none2, none3;
42  FLA_Obj VB_tl,
43  VB_bl;
44  FLA_Datatype datatype_A;
45  dim_t m_A, n_A;
46  dim_t b_alg, b;
47 
48  b_alg = FLA_Obj_length( TU );
49 
50  datatype_A = FLA_Obj_datatype( A );
51  m_A = FLA_Obj_length( A );
52  n_A = FLA_Obj_width( A );
53 
54  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
55  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &V );
56  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &Y );
57  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
58 
59  FLA_Part_2x2( A, &ATL, &ATR,
60  &ABL, &ABR, 0, 0, FLA_TL );
61  FLA_Part_2x1( U, &UT,
62  &UB, 0, FLA_TOP );
63  FLA_Part_2x1( V, &VT,
64  &VB, 0, FLA_TOP );
65  FLA_Part_2x1( Y, &YT,
66  &YB, 0, FLA_TOP );
67  FLA_Part_2x1( Z, &ZT,
68  &ZB, 0, FLA_TOP );
69  FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
70  FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
71 
72  while ( FLA_Obj_min_dim( ABR ) > 0 )
73  {
74  b = min( FLA_Obj_min_dim( ABR ), b_alg );
75 
76  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
77  /* ************* */ /* ******************** */
78  &A10, /**/ &A11, &A12,
79  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
80  b, b, FLA_BR );
81  FLA_Repart_2x1_to_3x1( UT, &U0,
82  /* ** */ /* ** */
83  &U1,
84  UB, &U2, b, FLA_BOTTOM );
85  FLA_Repart_2x1_to_3x1( VT, &V0,
86  /* ** */ /* ** */
87  &V1,
88  VB, &V2, b, FLA_BOTTOM );
89  FLA_Repart_2x1_to_3x1( YT, &Y0,
90  /* ** */ /* ** */
91  &Y1,
92  YB, &Y2, b, FLA_BOTTOM );
93  FLA_Repart_2x1_to_3x1( ZT, &Z0,
94  /* ** */ /* ** */
95  &Z1,
96  ZB, &Z2, b, FLA_BOTTOM );
97  FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
98  b, FLA_RIGHT );
99  FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
100  b, FLA_RIGHT );
101 
102  /*------------------------------------------------------------*/
103 
104  FLA_Part_2x2( TU1, &TU1_tl, &none,
105  &none2, &none3, b, b, FLA_TL );
106 
107  FLA_Part_2x2( TV1, &TV1_tl, &none,
108  &none2, &none3, b, b, FLA_TL );
109 
110  FLA_Part_1x2( ABR, &ABR_l, &none, b, FLA_LEFT );
111  FLA_Part_2x1( ABR, &ABR_t,
112  &none, b, FLA_TOP );
113 
114  FLA_Part_1x2( UB, &UB_l, &none, b, FLA_LEFT );
115  FLA_Part_1x2( VB, &VB_l, &none, b, FLA_LEFT );
116  FLA_Part_1x2( YB, &YB_l, &none, b, FLA_LEFT );
117  FLA_Part_1x2( ZB, &ZB_l, &none, b, FLA_LEFT );
118 
119  FLA_Part_2x1( UB_l, &none,
120  &U2_l, b, FLA_TOP );
121  FLA_Part_2x1( VB_l, &none,
122  &V2_l, b, FLA_TOP );
123  FLA_Part_2x1( YB_l, &none,
124  &Y2_l, b, FLA_TOP );
125  FLA_Part_2x1( ZB_l, &none,
126  &Z2_l, b, FLA_TOP );
127 
128  // [ ABR, YB, ZB, TU1, TV1 ] = FLA_Bidiag_UT_u_step_unb_var4( ABR, TU1, TV1, b );
129  //FLA_Bidiag_UT_u_step_unb_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
130  FLA_Bidiag_UT_u_step_ofu_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
131  //FLA_Bidiag_UT_u_step_opt_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
132 
133  if ( FLA_Obj_length( A22 ) > 0 )
134  {
135  // Build UB from ABR, with explicit unit subdiagonal and zeros.
136  FLA_Copy( ABR_l, UB_l );
137  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_l );
138 
139  // Build VB from ABR, with explicit unit subdiagonal and zeros.
140  FLA_Copyt( FLA_TRANSPOSE, ABR_t, VB_l );
141  FLA_Part_2x1( VB_l, &VB_tl,
142  &VB_bl, 1, FLA_TOP );
143  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, VB_bl );
144  FLA_Set( FLA_ZERO, VB_tl );
145 
146  // A22 = A22 - U2 * Y2' - Z2 * V2';
147  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
148  FLA_MINUS_ONE, U2_l, Y2_l, FLA_ONE, A22 );
149  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
150  FLA_MINUS_ONE, Z2_l, V2_l, FLA_ONE, A22 );
151  }
152 
153  /*------------------------------------------------------------*/
154 
155  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
156  A10, A11, /**/ A12,
157  /* ************** */ /* ****************** */
158  &ABL, /**/ &ABR, A20, A21, /**/ A22,
159  FLA_TL );
160  FLA_Cont_with_3x1_to_2x1( &UT, U0,
161  U1,
162  /* ** */ /* ** */
163  &UB, U2, FLA_TOP );
164  FLA_Cont_with_3x1_to_2x1( &VT, V0,
165  V1,
166  /* ** */ /* ** */
167  &VB, V2, FLA_TOP );
168  FLA_Cont_with_3x1_to_2x1( &YT, Y0,
169  Y1,
170  /* ** */ /* ** */
171  &YB, Y2, FLA_TOP );
172  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
173  Z1,
174  /* ** */ /* ** */
175  &ZB, Z2, FLA_TOP );
176  FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
177  FLA_LEFT );
178  FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
179  FLA_LEFT );
180  }
181 
182  FLA_Obj_free( &U );
183  FLA_Obj_free( &V );
184  FLA_Obj_free( &Y );
185  FLA_Obj_free( &Z );
186 
187  return FLA_SUCCESS;
188 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition: FLA_Triangularize.c:13
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:35
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Gemm_external(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition: FLA_Gemm_external.c:13
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_u_blk_var1()

FLA_Error FLA_Bidiag_UT_u_blk_var1 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_opt_var1(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj TUL, TUR, TU0, TU1, TU2;
19  FLA_Obj TVL, TVR, TV0, TV1, TV2;
20 
21  FLA_Obj TU1_tl;
22  FLA_Obj TV1_tl;
23  FLA_Obj none, none2, none3;
24  dim_t b_alg, b;
25 
26  b_alg = FLA_Obj_length( TU );
27 
28  FLA_Part_2x2( A, &ATL, &ATR,
29  &ABL, &ABR, 0, 0, FLA_TL );
30  FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
31  FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
32 
33  while ( FLA_Obj_min_dim( ABR ) > 0 )
34  {
35  b = min( FLA_Obj_min_dim( ABR ), b_alg );
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38  /* ************* */ /* ******************** */
39  &A10, /**/ &A11, &A12,
40  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41  b, b, FLA_BR );
42  FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
43  b, FLA_RIGHT );
44  FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
45  b, FLA_RIGHT );
46 
47  /*------------------------------------------------------------*/
48 
49  FLA_Part_2x2( TU1, &TU1_tl, &none,
50  &none2, &none3, b, b, FLA_TL );
51 
52  FLA_Part_2x2( TV1, &TV1_tl, &none,
53  &none2, &none3, b, b, FLA_TL );
54 
55  // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var1( ABR, TU1, TV1, b );
56  //FLA_Bidiag_UT_u_step_unb_var1( ABR, TU1_tl, TV1_tl );
57  FLA_Bidiag_UT_u_step_opt_var1( ABR, TU1_tl, TV1_tl );
58 
59  /*------------------------------------------------------------*/
60 
61  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
62  A10, A11, /**/ A12,
63  /* ************** */ /* ****************** */
64  &ABL, /**/ &ABR, A20, A21, /**/ A22,
65  FLA_TL );
66  FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
67  FLA_LEFT );
68  FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
69  FLA_LEFT );
70  }
71 
72  return FLA_SUCCESS;
73 }
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
Definition: FLA_type_defs.h:158
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Bidiag_UT_u_step_opt_var1(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:18
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_u_blk_var2()

FLA_Error FLA_Bidiag_UT_u_blk_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_opt_var2(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj TUL, TUR, TU0, TU1, TU2;
19  FLA_Obj TVL, TVR, TV0, TV1, TV2;
20 
21  FLA_Obj TU1_tl;
22  FLA_Obj TV1_tl;
23  FLA_Obj none, none2, none3;
24  dim_t b_alg, b;
25 
26  b_alg = FLA_Obj_length( TU );
27 
28  FLA_Part_2x2( A, &ATL, &ATR,
29  &ABL, &ABR, 0, 0, FLA_TL );
30  FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
31  FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
32 
33  while ( FLA_Obj_min_dim( ABR ) > 0 )
34  {
35  b = min( FLA_Obj_min_dim( ABR ), b_alg );
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38  /* ************* */ /* ******************** */
39  &A10, /**/ &A11, &A12,
40  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41  b, b, FLA_BR );
42  FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
43  b, FLA_RIGHT );
44  FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
45  b, FLA_RIGHT );
46 
47  /*------------------------------------------------------------*/
48 
49  FLA_Part_2x2( TU1, &TU1_tl, &none,
50  &none2, &none3, b, b, FLA_TL );
51 
52  FLA_Part_2x2( TV1, &TV1_tl, &none,
53  &none2, &none3, b, b, FLA_TL );
54 
55  // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1, TV1, b );
56  //FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1_tl, TV1_tl );
57  //FLA_Bidiag_UT_u_step_ofu_var2( ABR, TU1_tl, TV1_tl );
58  FLA_Bidiag_UT_u_step_opt_var2( ABR, TU1_tl, TV1_tl );
59 
60  /*------------------------------------------------------------*/
61 
62  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
63  A10, A11, /**/ A12,
64  /* ************** */ /* ****************** */
65  &ABL, /**/ &ABR, A20, A21, /**/ A22,
66  FLA_TL );
67  FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
68  FLA_LEFT );
69  FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
70  FLA_LEFT );
71  }
72 
73  return FLA_SUCCESS;
74 }
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Error FLA_Bidiag_UT_u_step_opt_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:18
Definition: FLA_type_defs.h:158
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_u_blk_var3()

FLA_Error FLA_Bidiag_UT_u_blk_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_opt_var3(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj TUL, TUR, TU0, TU1, TU2;
19  FLA_Obj TVL, TVR, TV0, TV1, TV2;
20 
21  FLA_Obj TU1_tl;
22  FLA_Obj TV1_tl;
23  FLA_Obj none, none2, none3;
24  dim_t b_alg, b;
25 
26  b_alg = FLA_Obj_length( TU );
27 
28  FLA_Part_2x2( A, &ATL, &ATR,
29  &ABL, &ABR, 0, 0, FLA_TL );
30  FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
31  FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
32 
33  while ( FLA_Obj_min_dim( ABR ) > 0 )
34  {
35  b = min( FLA_Obj_min_dim( ABR ), b_alg );
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38  /* ************* */ /* ******************** */
39  &A10, /**/ &A11, &A12,
40  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41  b, b, FLA_BR );
42  FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
43  b, FLA_RIGHT );
44  FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
45  b, FLA_RIGHT );
46 
47  /*------------------------------------------------------------*/
48 
49  FLA_Part_2x2( TU1, &TU1_tl, &none,
50  &none2, &none3, b, b, FLA_TL );
51 
52  FLA_Part_2x2( TV1, &TV1_tl, &none,
53  &none2, &none3, b, b, FLA_TL );
54 
55  // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1, TV1, b );
56  //FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1_tl, TV1_tl );
57  //FLA_Bidiag_UT_u_step_ofu_var3( ABR, TU1_tl, TV1_tl );
58  FLA_Bidiag_UT_u_step_opt_var3( ABR, TU1_tl, TV1_tl );
59 
60  /*------------------------------------------------------------*/
61 
62  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
63  A10, A11, /**/ A12,
64  /* ************** */ /* ****************** */
65  &ABL, /**/ &ABR, A20, A21, /**/ A22,
66  FLA_TL );
67  FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
68  FLA_LEFT );
69  FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
70  FLA_LEFT );
71  }
72 
73  return FLA_SUCCESS;
74 }
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Bidiag_UT_u_step_opt_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:18
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
Definition: FLA_type_defs.h:158
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_u_blk_var4()

FLA_Error FLA_Bidiag_UT_u_blk_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_opt_var4(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj VT, V0,
22  VB, V1,
23  V2;
24  FLA_Obj YT, Y0,
25  YB, Y1,
26  Y2;
27  FLA_Obj ZT, Z0,
28  ZB, Z1,
29  Z2;
30  FLA_Obj TUL, TUR, TU0, TU1, TU2;
31  FLA_Obj TVL, TVR, TV0, TV1, TV2;
32 
33  FLA_Obj U, V, Y, Z;
34  FLA_Obj ABR_l, ABR_t;
35  FLA_Obj UB_l, U2_l;
36  FLA_Obj VB_l, V2_l;
37  FLA_Obj YB_l, Y2_l;
38  FLA_Obj ZB_l, Z2_l;
39  FLA_Obj TU1_tl;
40  FLA_Obj TV1_tl;
41  FLA_Obj none, none2, none3;
42  FLA_Obj VB_tl,
43  VB_bl;
44  FLA_Datatype datatype_A;
45  dim_t m_A, n_A;
46  dim_t b_alg, b;
47 
48  b_alg = FLA_Obj_length( TU );
49 
50  datatype_A = FLA_Obj_datatype( A );
51  m_A = FLA_Obj_length( A );
52  n_A = FLA_Obj_width( A );
53 
54  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
55  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &V );
56  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &Y );
57  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
58 
59  FLA_Part_2x2( A, &ATL, &ATR,
60  &ABL, &ABR, 0, 0, FLA_TL );
61  FLA_Part_2x1( U, &UT,
62  &UB, 0, FLA_TOP );
63  FLA_Part_2x1( V, &VT,
64  &VB, 0, FLA_TOP );
65  FLA_Part_2x1( Y, &YT,
66  &YB, 0, FLA_TOP );
67  FLA_Part_2x1( Z, &ZT,
68  &ZB, 0, FLA_TOP );
69  FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
70  FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
71 
72  while ( FLA_Obj_min_dim( ABR ) > 0 )
73  {
74  b = min( FLA_Obj_min_dim( ABR ), b_alg );
75 
76  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
77  /* ************* */ /* ******************** */
78  &A10, /**/ &A11, &A12,
79  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
80  b, b, FLA_BR );
81  FLA_Repart_2x1_to_3x1( UT, &U0,
82  /* ** */ /* ** */
83  &U1,
84  UB, &U2, b, FLA_BOTTOM );
85  FLA_Repart_2x1_to_3x1( VT, &V0,
86  /* ** */ /* ** */
87  &V1,
88  VB, &V2, b, FLA_BOTTOM );
89  FLA_Repart_2x1_to_3x1( YT, &Y0,
90  /* ** */ /* ** */
91  &Y1,
92  YB, &Y2, b, FLA_BOTTOM );
93  FLA_Repart_2x1_to_3x1( ZT, &Z0,
94  /* ** */ /* ** */
95  &Z1,
96  ZB, &Z2, b, FLA_BOTTOM );
97  FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
98  b, FLA_RIGHT );
99  FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
100  b, FLA_RIGHT );
101 
102  /*------------------------------------------------------------*/
103 
104  FLA_Part_2x2( TU1, &TU1_tl, &none,
105  &none2, &none3, b, b, FLA_TL );
106 
107  FLA_Part_2x2( TV1, &TV1_tl, &none,
108  &none2, &none3, b, b, FLA_TL );
109 
110  FLA_Part_1x2( ABR, &ABR_l, &none, b, FLA_LEFT );
111  FLA_Part_2x1( ABR, &ABR_t,
112  &none, b, FLA_TOP );
113 
114  FLA_Part_1x2( UB, &UB_l, &none, b, FLA_LEFT );
115  FLA_Part_1x2( VB, &VB_l, &none, b, FLA_LEFT );
116  FLA_Part_1x2( YB, &YB_l, &none, b, FLA_LEFT );
117  FLA_Part_1x2( ZB, &ZB_l, &none, b, FLA_LEFT );
118 
119  FLA_Part_2x1( UB_l, &none,
120  &U2_l, b, FLA_TOP );
121  FLA_Part_2x1( VB_l, &none,
122  &V2_l, b, FLA_TOP );
123  FLA_Part_2x1( YB_l, &none,
124  &Y2_l, b, FLA_TOP );
125  FLA_Part_2x1( ZB_l, &none,
126  &Z2_l, b, FLA_TOP );
127 
128  // [ ABR, YB, ZB, TU1, TV1 ] = FLA_Bidiag_UT_u_step_unb_var4( ABR, TU1, TV1, b );
129  //FLA_Bidiag_UT_u_step_unb_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
130  //FLA_Bidiag_UT_u_step_ofu_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
131  FLA_Bidiag_UT_u_step_opt_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
132 
133  if ( FLA_Obj_length( A22 ) > 0 )
134  {
135  // Build UB from ABR, with explicit unit subdiagonal and zeros.
136  FLA_Copy( ABR_l, UB_l );
137  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_l );
138 
139  // Build VB from ABR, with explicit unit subdiagonal and zeros.
140  FLA_Copyt( FLA_TRANSPOSE, ABR_t, VB_l );
141  FLA_Part_2x1( VB_l, &VB_tl,
142  &VB_bl, 1, FLA_TOP );
143  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, VB_bl );
144  FLA_Set( FLA_ZERO, VB_tl );
145 
146  // A22 = A22 - U2 * Y2' - Z2 * V2';
147  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
148  FLA_MINUS_ONE, U2_l, Y2_l, FLA_ONE, A22 );
149  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
150  FLA_MINUS_ONE, Z2_l, V2_l, FLA_ONE, A22 );
151  }
152 
153  /*------------------------------------------------------------*/
154 
155  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
156  A10, A11, /**/ A12,
157  /* ************** */ /* ****************** */
158  &ABL, /**/ &ABR, A20, A21, /**/ A22,
159  FLA_TL );
160  FLA_Cont_with_3x1_to_2x1( &UT, U0,
161  U1,
162  /* ** */ /* ** */
163  &UB, U2, FLA_TOP );
164  FLA_Cont_with_3x1_to_2x1( &VT, V0,
165  V1,
166  /* ** */ /* ** */
167  &VB, V2, FLA_TOP );
168  FLA_Cont_with_3x1_to_2x1( &YT, Y0,
169  Y1,
170  /* ** */ /* ** */
171  &YB, Y2, FLA_TOP );
172  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
173  Z1,
174  /* ** */ /* ** */
175  &ZB, Z2, FLA_TOP );
176  FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
177  FLA_LEFT );
178  FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
179  FLA_LEFT );
180  }
181 
182  FLA_Obj_free( &U );
183  FLA_Obj_free( &V );
184  FLA_Obj_free( &Y );
185  FLA_Obj_free( &Z );
186 
187  return FLA_SUCCESS;
188 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition: FLA_Triangularize.c:13
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Bidiag_UT_u_step_opt_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:35
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Gemm_external(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition: FLA_Gemm_external.c:13
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_u_blk_var5()

FLA_Error FLA_Bidiag_UT_u_blk_var5 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_opt_var5(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj VT, V0,
22  VB, V1,
23  V2;
24  FLA_Obj YT, Y0,
25  YB, Y1,
26  Y2;
27  FLA_Obj ZT, Z0,
28  ZB, Z1,
29  Z2;
30  FLA_Obj TUL, TUR, TU0, TU1, TU2;
31  FLA_Obj TVL, TVR, TV0, TV1, TV2;
32 
33  FLA_Obj U, V, Y, Z;
34  FLA_Obj ABR_l, ABR_t;
35  FLA_Obj UB_l, U2_l;
36  FLA_Obj VB_l, V2_l;
37  FLA_Obj YB_l, Y2_l;
38  FLA_Obj ZB_l, Z2_l;
39  FLA_Obj TU1_tl;
40  FLA_Obj TV1_tl;
41  FLA_Obj none, none2, none3;
42  FLA_Obj VB_tl,
43  VB_bl;
44  FLA_Datatype datatype_A;
45  dim_t m_A, n_A;
46  dim_t b_alg, b;
47 
48  b_alg = FLA_Obj_length( TU );
49 
50  datatype_A = FLA_Obj_datatype( A );
51  m_A = FLA_Obj_length( A );
52  n_A = FLA_Obj_width( A );
53 
54  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
55  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &V );
56  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &Y );
57  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
58 
59  FLA_Part_2x2( A, &ATL, &ATR,
60  &ABL, &ABR, 0, 0, FLA_TL );
61  FLA_Part_2x1( U, &UT,
62  &UB, 0, FLA_TOP );
63  FLA_Part_2x1( V, &VT,
64  &VB, 0, FLA_TOP );
65  FLA_Part_2x1( Y, &YT,
66  &YB, 0, FLA_TOP );
67  FLA_Part_2x1( Z, &ZT,
68  &ZB, 0, FLA_TOP );
69  FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
70  FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
71 
72  while ( FLA_Obj_min_dim( ABR ) > 0 )
73  {
74  b = min( FLA_Obj_min_dim( ABR ), b_alg );
75 
76  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
77  /* ************* */ /* ******************** */
78  &A10, /**/ &A11, &A12,
79  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
80  b, b, FLA_BR );
81  FLA_Repart_2x1_to_3x1( UT, &U0,
82  /* ** */ /* ** */
83  &U1,
84  UB, &U2, b, FLA_BOTTOM );
85  FLA_Repart_2x1_to_3x1( VT, &V0,
86  /* ** */ /* ** */
87  &V1,
88  VB, &V2, b, FLA_BOTTOM );
89  FLA_Repart_2x1_to_3x1( YT, &Y0,
90  /* ** */ /* ** */
91  &Y1,
92  YB, &Y2, b, FLA_BOTTOM );
93  FLA_Repart_2x1_to_3x1( ZT, &Z0,
94  /* ** */ /* ** */
95  &Z1,
96  ZB, &Z2, b, FLA_BOTTOM );
97  FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
98  b, FLA_RIGHT );
99  FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
100  b, FLA_RIGHT );
101 
102  /*------------------------------------------------------------*/
103 
104  FLA_Part_2x2( TU1, &TU1_tl, &none,
105  &none2, &none3, b, b, FLA_TL );
106 
107  FLA_Part_2x2( TV1, &TV1_tl, &none,
108  &none2, &none3, b, b, FLA_TL );
109 
110  FLA_Part_1x2( ABR, &ABR_l, &none, b, FLA_LEFT );
111  FLA_Part_2x1( ABR, &ABR_t,
112  &none, b, FLA_TOP );
113 
114  FLA_Part_1x2( UB, &UB_l, &none, b, FLA_LEFT );
115  FLA_Part_1x2( VB, &VB_l, &none, b, FLA_LEFT );
116  FLA_Part_1x2( YB, &YB_l, &none, b, FLA_LEFT );
117  FLA_Part_1x2( ZB, &ZB_l, &none, b, FLA_LEFT );
118 
119  FLA_Part_2x1( UB_l, &none,
120  &U2_l, b, FLA_TOP );
121  FLA_Part_2x1( VB_l, &none,
122  &V2_l, b, FLA_TOP );
123  FLA_Part_2x1( YB_l, &none,
124  &Y2_l, b, FLA_TOP );
125  FLA_Part_2x1( ZB_l, &none,
126  &Z2_l, b, FLA_TOP );
127 
128  // [ ABR, YB, ZB, TU1, TV1 ] = FLA_Bidiag_UT_u_step_unb_var5( ABR, TU1, TV1, b );
129  //FLA_Bidiag_UT_u_step_unb_var5( ABR, YB, ZB, TU1_tl, TV1_tl );
130  FLA_Bidiag_UT_u_step_opt_var5( ABR, YB, ZB, TU1_tl, TV1_tl );
131 
132  if ( FLA_Obj_length( A22 ) > 0 )
133  {
134  // Build UB from ABR, with explicit unit subdiagonal and zeros.
135  FLA_Copy( ABR_l, UB_l );
136  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_l );
137 
138  // Build VB from ABR, with explicit unit subdiagonal and zeros.
139  FLA_Copyt( FLA_TRANSPOSE, ABR_t, VB_l );
140  FLA_Part_2x1( VB_l, &VB_tl,
141  &VB_bl, 1, FLA_TOP );
142  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, VB_bl );
143  FLA_Set( FLA_ZERO, VB_tl );
144 
145  // A22 = A22 - U2 * Y2' - Z2 * V2';
146  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
147  FLA_MINUS_ONE, U2_l, Y2_l, FLA_ONE, A22 );
148  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
149  FLA_MINUS_ONE, Z2_l, V2_l, FLA_ONE, A22 );
150  }
151 
152  /*------------------------------------------------------------*/
153 
154  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
155  A10, A11, /**/ A12,
156  /* ************** */ /* ****************** */
157  &ABL, /**/ &ABR, A20, A21, /**/ A22,
158  FLA_TL );
159  FLA_Cont_with_3x1_to_2x1( &UT, U0,
160  U1,
161  /* ** */ /* ** */
162  &UB, U2, FLA_TOP );
163  FLA_Cont_with_3x1_to_2x1( &VT, V0,
164  V1,
165  /* ** */ /* ** */
166  &VB, V2, FLA_TOP );
167  FLA_Cont_with_3x1_to_2x1( &YT, Y0,
168  Y1,
169  /* ** */ /* ** */
170  &YB, Y2, FLA_TOP );
171  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
172  Z1,
173  /* ** */ /* ** */
174  &ZB, Z2, FLA_TOP );
175  FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
176  FLA_LEFT );
177  FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
178  FLA_LEFT );
179  }
180 
181  FLA_Obj_free( &U );
182  FLA_Obj_free( &V );
183  FLA_Obj_free( &Y );
184  FLA_Obj_free( &Z );
185 
186  return FLA_SUCCESS;
187 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Bidiag_UT_u_step_opt_var5(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:35
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition: FLA_Triangularize.c:13
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Gemm_external(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition: FLA_Gemm_external.c:13
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153

◆ FLA_Bidiag_UT_u_ofu_var2()

FLA_Error FLA_Bidiag_UT_u_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_ofu_var2().

14 {
15  return FLA_Bidiag_UT_u_step_ofu_var2( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:18

◆ FLA_Bidiag_UT_u_ofu_var3()

FLA_Error FLA_Bidiag_UT_u_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_ofu_var3().

14 {
15  return FLA_Bidiag_UT_u_step_ofu_var3( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:18

◆ FLA_Bidiag_UT_u_ofu_var4()

FLA_Error FLA_Bidiag_UT_u_ofu_var4 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_ofu_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17  FLA_Datatype datatype_A;
18  dim_t m_A, n_A;
19 
20  datatype_A = FLA_Obj_datatype( A );
21  m_A = FLA_Obj_length( A );
22  n_A = FLA_Obj_width( A );
23 
24  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26 
27  r_val = FLA_Bidiag_UT_u_step_ofu_var4( A, Y, Z, TU, TV );
28 
29  FLA_Obj_free( &Y );
30  FLA_Obj_free( &Z );
31 
32  return r_val;
33 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:35
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLA_Bidiag_UT_u_opt_var1()

FLA_Error FLA_Bidiag_UT_u_opt_var1 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opt_var1().

Referenced by FLA_Bidiag_UT_u().

14 {
15  return FLA_Bidiag_UT_u_step_opt_var1( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_opt_var1(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:18

◆ FLA_Bidiag_UT_u_opt_var2()

FLA_Error FLA_Bidiag_UT_u_opt_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opt_var2().

Referenced by FLA_Bidiag_UT_u().

14 {
15  return FLA_Bidiag_UT_u_step_opt_var2( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_opt_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:18

◆ FLA_Bidiag_UT_u_opt_var3()

FLA_Error FLA_Bidiag_UT_u_opt_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opt_var3().

Referenced by FLA_Bidiag_UT_u().

14 {
15  return FLA_Bidiag_UT_u_step_opt_var3( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_opt_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:18

◆ FLA_Bidiag_UT_u_opt_var4()

FLA_Error FLA_Bidiag_UT_u_opt_var4 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opt_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17  FLA_Datatype datatype_A;
18  dim_t m_A, n_A;
19 
20  datatype_A = FLA_Obj_datatype( A );
21  m_A = FLA_Obj_length( A );
22  n_A = FLA_Obj_width( A );
23 
24  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26 
27  r_val = FLA_Bidiag_UT_u_step_opt_var4( A, Y, Z, TU, TV );
28 
29  FLA_Obj_free( &Y );
30  FLA_Obj_free( &Z );
31 
32  return r_val;
33 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
FLA_Error FLA_Bidiag_UT_u_step_opt_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:35
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLA_Bidiag_UT_u_opt_var5()

FLA_Error FLA_Bidiag_UT_u_opt_var5 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opt_var5(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17  FLA_Datatype datatype_A;
18  dim_t m_A, n_A;
19 
20  datatype_A = FLA_Obj_datatype( A );
21  m_A = FLA_Obj_length( A );
22  n_A = FLA_Obj_width( A );
23 
24  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26 
27  r_val = FLA_Bidiag_UT_u_step_opt_var5( A, Y, Z, TU, TV );
28 
29  FLA_Obj_free( &Y );
30  FLA_Obj_free( &Z );
31 
32  return r_val;
33 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Bidiag_UT_u_step_opt_var5(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:35
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLA_Bidiag_UT_u_step_ofc_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var2 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

520 {
521  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
522  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
523  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
524 
525  scomplex beta;
526  int i;
527 
528  // b_alg = FLA_Obj_length( T );
529  int b_alg = m_TS;
530 
531  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
532  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
533  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
534  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
535  scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
536  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
537  int inc_v = 1;
538  int inc_y = 1;
539  int inc_z = 1;
540 
541  for ( i = 0; i < b_alg; ++i )
542  {
543  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
544  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
545  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
546  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
547  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
548  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
549  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
550 
551  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
552  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
553 
554  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
555  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
556 
557  scomplex* v21 = buff_v + (i+1)*inc_v;
558 
559  scomplex* y21 = buff_y + (i+1)*inc_y;
560 
561  scomplex* z21 = buff_z + (i+1)*inc_z;
562 
563  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
564  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
565 
566  scomplex* v21_t = v21 + (0 )*inc_v;
567  scomplex* v21_b = v21 + (1 )*inc_v;
568 
569  int m_ahead = m_A - i - 1;
570  int n_ahead = n_A - i - 1;
571  int m_behind = i;
572  int n_behind = i;
573 
574  /*------------------------------------------------------------*/
575 
576  // FLA_Househ2_UT( FLA_LEFT,
577  // alpha11,
578  // a21, tau11 );
579  FLA_Househ2_UT_l_opc( m_ahead,
580  alpha11,
581  a21, rs_A,
582  tau11 );
583 
584  if ( n_ahead > 0 )
585  {
586  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
587  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
589  n_ahead,
590  a12t, cs_A,
591  y21, inc_y );
594  m_ahead,
595  n_ahead,
596  buff_1,
597  A22, rs_A, cs_A,
598  a21, rs_A,
599  buff_1,
600  y21, inc_y );
601 
602  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
604  n_ahead,
605  tau11,
606  y21, inc_y );
607 
608  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
610  n_ahead,
611  buff_m1,
612  y21, inc_y,
613  a12t, cs_A );
614 
615  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
616  FLA_Househ2_UT_r_opc( n_ahead - 1,
617  a12t_l,
618  a12t_r, cs_A,
619  sigma11 );
620 
621  // FLA_Set( FLA_ONE, v21_t );
622  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
623  *v21_t = *buff_1;
625  n_ahead - 1,
626  a12t_r, cs_A,
627  v21_b, inc_y );
628 
629  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
630  // FLA_Scal( FLA_MINUS_ONE, beta );
632  n_ahead,
633  y21, inc_y,
634  v21, inc_v,
635  &beta );
636  bl1_cneg1( &beta );
637 
638  // FLA_Copy( a21, z21 );
639  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
640  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
642  m_ahead,
643  a21, rs_A,
644  z21, inc_z );
647  m_ahead,
648  n_ahead,
649  buff_1,
650  A22, rs_A, cs_A,
651  v21, inc_v,
652  &beta,
653  z21, inc_z );
655  m_ahead,
656  sigma11,
657  z21, inc_z );
658 
659  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
660  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
661  FLA_Fused_Gerc2_opc_var1( m_ahead,
662  n_ahead,
663  buff_m1,
664  a21, rs_A,
665  y21, inc_y,
666  z21, inc_z,
667  v21, inc_v,
668  A22, rs_A, cs_A );
669 
670  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
673  m_behind,
674  n_ahead,
675  buff_1,
676  A02, rs_A, cs_A,
677  v21, inc_v,
678  buff_0,
679  s01, rs_S );
680  }
681 
682  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
683  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
685  n_behind,
686  a10t, cs_A,
687  t01, rs_T );
690  m_ahead,
691  n_behind,
692  buff_1,
693  A20, rs_A, cs_A,
694  a21, rs_A,
695  buff_1,
696  t01, rs_T );
697 
698  /*------------------------------------------------------------*/
699 
700  }
701 
702  // FLA_Obj_free( &v );
703  // FLA_Obj_free( &y );
704  // FLA_Obj_free( &z );
705  FLA_free( buff_v );
706  FLA_free( buff_y );
707  FLA_free( buff_z );
708 
709  return FLA_SUCCESS;
710 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:241
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:677
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ofc_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var3 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

927 {
928  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
929  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
930  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
931 
932  scomplex alpha12;
933  scomplex minus_conj_alpha12;
934  scomplex psi11_minus_alpha12;
935  scomplex minus_inv_tau11;
936  scomplex minus_upsilon11;
937  scomplex minus_conj_nu11;
938  scomplex minus_conj_psi11;
939  scomplex minus_zeta11;
940  scomplex beta;
941  int i;
942 
943  // b_alg = FLA_Obj_length( T );
944  int b_alg = m_TS;
945 
946  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
947  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
948  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
949  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
950  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
951  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
952  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
953  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
954  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
955  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
956  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
957  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
958  scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
959  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
960  int inc_w = 1;
961  int inc_ap = 1;
962  int inc_u = 1;
963  int inc_up = 1;
964  int inc_v = 1;
965  int inc_y = 1;
966  int inc_z = 1;
967 
968  for ( i = 0; i < b_alg; ++i )
969  {
970  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
971  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
972  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
973  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
974  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
975  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
976  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
977 
978  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
979  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
980 
981  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
982  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
983 
984  scomplex* w21 = buff_w + (i+1)*inc_w;
985 
986  scomplex* a12p = buff_ap + (i+1)*inc_ap;
987 
988  scomplex* upsilon11 = buff_u + (i )*inc_u;
989  scomplex* u21 = buff_u + (i+1)*inc_u;
990 
991  scomplex* u21p = buff_up + (i+1)*inc_up;
992 
993  scomplex* nu11 = buff_v + (i )*inc_v;
994  scomplex* v21 = buff_v + (i+1)*inc_v;
995 
996  scomplex* psi11 = buff_y + (i )*inc_y;
997  scomplex* y21 = buff_y + (i+1)*inc_y;
998 
999  scomplex* zeta11 = buff_z + (i )*inc_z;
1000  scomplex* z21 = buff_z + (i+1)*inc_z;
1001 
1002  scomplex* a12p_t = a12p + (0 )*inc_ap;
1003  scomplex* a12p_b = a12p + (1 )*inc_ap;
1004 
1005  scomplex* v21_t = v21 + (0 )*inc_v;
1006  scomplex* v21_b = v21 + (1 )*inc_v;
1007 
1008  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1009  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1010 
1011  scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1012 
1013  int m_ahead = m_A - i - 1;
1014  int n_ahead = n_A - i - 1;
1015  int m_behind = i;
1016  int n_behind = i;
1017 
1018  /*------------------------------------------------------------*/
1019 
1020  if ( m_behind > 0 )
1021  {
1022  // FLA_Copy( upsilon11, minus_upsilon11 );
1023  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1024  bl1_cmult3( buff_m1, upsilon11, &minus_upsilon11 );
1025 
1026  // FLA_Copy( zeta11, minus_zeta11 );
1027  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1028  bl1_cmult3( buff_m1, zeta11, &minus_zeta11 );
1029 
1030  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1031  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1032  bl1_ccopyconj( psi11, &minus_conj_psi11 );
1033  bl1_cscals( buff_m1, &minus_conj_psi11 );
1034 
1035  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1036  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1037  bl1_ccopyconj( nu11, &minus_conj_nu11 );
1038  bl1_cscals( buff_m1, &minus_conj_nu11 );
1039 
1040  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1041  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1043  1,
1044  &minus_conj_psi11,
1045  upsilon11, 1,
1046  alpha11, 1 );
1048  1,
1049  &minus_conj_nu11,
1050  zeta11, 1,
1051  alpha11, 1 );
1052 
1053  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1054  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1056  m_ahead,
1057  &minus_conj_psi11,
1058  u21, inc_u,
1059  a21, rs_A );
1061  m_ahead,
1062  &minus_conj_nu11,
1063  z21, inc_z,
1064  a21, rs_A );
1065 
1066  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1067  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1069  n_ahead,
1070  &minus_upsilon11,
1071  y21, inc_y,
1072  a12t, cs_A );
1074  n_ahead,
1075  &minus_zeta11,
1076  v21, inc_v,
1077  a12t, cs_A );
1078  }
1079 
1080  // FLA_Househ2_UT( FLA_LEFT,
1081  // alpha11,
1082  // a21, tau11 );
1083  // FLA_Copy( a21, u21p );
1084  FLA_Househ2_UT_l_opc( m_ahead,
1085  alpha11,
1086  a21, rs_A,
1087  tau11 );
1089  m_ahead,
1090  a21, rs_A,
1091  u21p, inc_up );
1092 
1093  if ( n_ahead > 0 )
1094  {
1095  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1096  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1097  bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );
1098 
1099  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1100  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1102  n_ahead,
1103  a12t, cs_A,
1104  a12p, inc_ap );
1106  n_ahead,
1107  &minus_inv_tau11,
1108  a12t, cs_A,
1109  a12p, inc_ap );
1110  }
1111 
1112  if ( m_behind > 0 && n_ahead > 0 )
1113  {
1114  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1115  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1116  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1117  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1118  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1120  n_ahead,
1121  tau11,
1122  buff_m1,
1123  u21, inc_u,
1124  y21, inc_y,
1125  z21, inc_z,
1126  v21, inc_v,
1127  A22, rs_A, cs_A,
1128  u21p, inc_up,
1129  a12p, inc_ap,
1130  w21, inc_w );
1131 
1132 
1133  }
1134  else if ( n_ahead > 0 )
1135  {
1136  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1137  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1138  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1140  n_ahead,
1141  tau11,
1142  buff_0,
1143  A22, rs_A, cs_A,
1144  u21p, inc_up,
1145  a12p, inc_ap,
1146  y21, inc_y,
1147  w21, inc_w );
1148  }
1149 
1150  if ( n_ahead > 0 )
1151  {
1152  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1154  n_ahead,
1155  buff_1,
1156  a12t, cs_A,
1157  y21, inc_y );
1158 
1159  // FLA_Househ2s_UT( FLA_RIGHT,
1160  // a12p_t,
1161  // a12p_b,
1162  // alpha12, psi11_minus_alpha12, sigma11 );
1163  FLA_Househ2s_UT_r_opc( n_ahead - 1,
1164  a12p_t,
1165  a12p_b, inc_ap,
1166  &alpha12,
1167  &psi11_minus_alpha12,
1168  sigma11 );
1169 
1170  // FLA_Copy( a12p, v21 );
1171  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1172  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1173  // FLA_Conjugate( v21_b );
1175  n_ahead,
1176  a12p, inc_ap,
1177  v21, inc_v );
1178  bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
1180  n_ahead,
1181  &psi11_minus_alpha12,
1182  v21, inc_v );
1183  bl1_cconjv( n_ahead - 1,
1184  v21_b, inc_v );
1185 
1186  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1187  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1188  *a12t_l = alpha12;
1190  n_ahead - 1,
1191  v21_b, inc_v,
1192  a12t_r, cs_A );
1193  }
1194 
1195  // FLA_Copy( u21p, u21 );
1197  m_ahead,
1198  u21p, inc_up,
1199  u21, inc_u );
1200 
1201  if ( n_ahead > 0 )
1202  {
1203  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1204  // FLA_Scal( FLA_MINUS_ONE, beta );
1205  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1207  n_ahead,
1208  y21, inc_y,
1209  v21, inc_v,
1210  &beta );
1211  bl1_cscals( &minus_inv_tau11, &beta );
1212 
1213  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1214  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1215  bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
1216  bl1_cneg1( &minus_conj_alpha12 );
1217 
1218  // FLA_Copy( w21, z21 );
1219  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1220  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1221  // FLA_Axpy( beta, u21, z21 );
1223  m_ahead,
1224  w21, inc_w,
1225  z21, inc_z );
1227  m_ahead,
1228  &minus_conj_alpha12,
1229  A22_l, rs_A,
1230  z21, inc_z );
1232  m_ahead,
1233  &psi11_minus_alpha12,
1234  z21, inc_z );
1236  m_ahead,
1237  &beta,
1238  u21, inc_u,
1239  z21, inc_z );
1240 
1241  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1242  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1244  n_ahead,
1245  tau11,
1246  y21, inc_y );
1248  m_ahead,
1249  sigma11,
1250  z21, inc_z );
1251 
1252  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1255  m_behind,
1256  n_ahead,
1257  buff_1,
1258  A02, rs_A, cs_A,
1259  v21, inc_v,
1260  buff_0,
1261  s01, rs_S );
1262  }
1263 
1264  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1265  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1267  n_behind,
1268  a10t, cs_A,
1269  t01, rs_T );
1272  m_ahead,
1273  n_behind,
1274  buff_1,
1275  A20, rs_A, cs_A,
1276  u21, inc_u,
1277  buff_1,
1278  t01, rs_T );
1279 
1280  if ( m_behind + 1 == b_alg && n_ahead > 0 )
1281  {
1282  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1283  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1286  m_ahead,
1287  n_ahead,
1288  buff_m1,
1289  u21, inc_u,
1290  y21, inc_y,
1291  A22, rs_A, cs_A );
1294  m_ahead,
1295  n_ahead,
1296  buff_m1,
1297  z21, inc_z,
1298  v21, inc_v,
1299  A22, rs_A, cs_A );
1300  }
1301 
1302  /*------------------------------------------------------------*/
1303 
1304  }
1305 
1306  // FLA_Obj_free( &w );
1307  // FLA_Obj_free( &ap );
1308  // FLA_Obj_free( &u );
1309  // FLA_Obj_free( &up );
1310  // FLA_Obj_free( &v );
1311  // FLA_Obj_free( &y );
1312  // FLA_Obj_free( &z );
1313  FLA_free( buff_w );
1314  FLA_free( buff_ap );
1315  FLA_free( buff_u );
1316  FLA_free( buff_up );
1317  FLA_free( buff_v );
1318  FLA_free( buff_y );
1319  FLA_free( buff_z );
1320 
1321  return FLA_SUCCESS;
1322 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:424
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_cconjv(int m, scomplex *x, int incx)
Definition: bl1_conjv.c:23
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition: FLA_Househ2s_UT.c:589
Definition: blis_type_defs.h:56
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ofc_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var4 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), bl1_csetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

1103 {
1104  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
1105  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
1106  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
1107 
1108  scomplex alpha12;
1109  scomplex minus_conj_alpha12;
1110  scomplex psi11_minus_alpha12;
1111  scomplex minus_inv_tau11;
1112  scomplex beta;
1113  scomplex last_elem;
1114  int i;
1115 
1116  // b_alg = FLA_Obj_length( T );
1117  int b_alg = m_TS;
1118 
1119  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1120  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1121  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1122  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1123  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1124  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1125  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1126  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1127  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1128  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1129  scomplex* buff_tmp = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1130  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1131  scomplex* buff_al = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1132  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1133  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1134  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1135  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1136  scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1137  scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1138  int inc_tmp = 1;
1139  int inc_w = 1;
1140  int inc_al = 1;
1141  int inc_ap = 1;
1142  int inc_u = 1;
1143  int inc_up = 1;
1144  int inc_v = 1;
1145  int inc_d = 1;
1146  int inc_e = 1;
1147 
1148  // FLA_Set( FLA_ZERO, Y );
1149  // FLA_Set( FLA_ZERO, Z );
1150  bl1_csetm( n_A,
1151  b_alg,
1152  buff_0,
1153  buff_Y, rs_Y, cs_Y );
1154  bl1_csetm( m_A,
1155  b_alg,
1156  buff_0,
1157  buff_Z, rs_Z, cs_Z );
1158 
1159  for ( i = 0; i < b_alg; ++i )
1160  {
1161  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1162  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1163  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1164  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1165  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1166  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1167  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1168  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1169 
1170  scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1171  scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1172  scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1173 
1174  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1175  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1176  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1177 
1178  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1179  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1180 
1181  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1182  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1183 
1184  scomplex* tmp21 = buff_tmp + (i+1)*inc_tmp;
1185 
1186  scomplex* w21 = buff_w + (i+1)*inc_w;
1187 
1188  scomplex* a22l = buff_al + (i+1)*inc_al;
1189 
1190  scomplex* a12p = buff_ap + (i+1)*inc_ap;
1191 
1192  scomplex* u21 = buff_u + (i+1)*inc_u;
1193 
1194  scomplex* u21p = buff_up + (i+1)*inc_up;
1195 
1196  scomplex* v21 = buff_v + (i+1)*inc_v;
1197 
1198  scomplex* d0 = buff_d + (0 )*inc_d;
1199 
1200  scomplex* e0 = buff_e + (0 )*inc_e;
1201 
1202  scomplex* a12p_t = a12p + (0 )*inc_ap;
1203  scomplex* a12p_b = a12p + (1 )*inc_ap;
1204 
1205  scomplex* v21_t = v21 + (0 )*inc_v;
1206  scomplex* v21_b = v21 + (1 )*inc_v;
1207 
1208  scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1209 
1210  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1211  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1212 
1213  scomplex* ABL = a10t;
1214  scomplex* ZBL = z10t;
1215 
1216  scomplex* a2 = alpha11;
1217 
1218  int m_ahead = m_A - i - 1;
1219  int n_ahead = n_A - i - 1;
1220  int m_behind = i;
1221  int n_behind = i;
1222 
1223  /*------------------------------------------------------------*/
1224 
1225  if ( m_behind > 0 )
1226  {
1227  // FLA_Copy( a01_b, last_elem );
1228  // FLA_Set( FLA_ONE, a01_b );
1229  last_elem = *a01_b;
1230  *a01_b = *buff_1;
1231  }
1232 
1233  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1234  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1237  m_ahead + 1,
1238  n_behind,
1239  buff_m1,
1240  ABL, rs_A, cs_A,
1241  y10t, cs_Y,
1242  buff_1,
1243  a2, rs_A );
1246  m_ahead + 1,
1247  n_behind,
1248  buff_m1,
1249  ZBL, rs_Z, cs_Z,
1250  a01, rs_A,
1251  buff_1,
1252  a2, rs_A );
1253 
1254  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1255  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1258  n_ahead,
1259  n_behind,
1260  buff_m1,
1261  Y20, rs_Y, cs_Y,
1262  a10t, cs_A,
1263  buff_1,
1264  a12t, cs_A );
1267  m_behind,
1268  n_ahead,
1269  buff_m1,
1270  A02, rs_A, cs_A,
1271  z10t, cs_Z,
1272  buff_1,
1273  a12t, cs_A );
1274 
1275  if ( m_behind > 0 )
1276  {
1277  // FLA_Copy( last_elem, a01_b );
1278  *a01_b = last_elem;
1279  }
1280 
1281  // FLA_Househ2_UT( FLA_LEFT,
1282  // alpha11,
1283  // a21, tau11 );
1284  // FLA_Copy( a21, u21p );
1285  FLA_Househ2_UT_l_opc( m_ahead,
1286  alpha11,
1287  a21, rs_A,
1288  tau11 );
1290  m_ahead,
1291  a21, rs_A,
1292  u21p, inc_up );
1293 
1294  if ( n_ahead > 0 )
1295  {
1296  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1297  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1298  bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );
1299 
1300  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1301  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1303  n_ahead,
1304  a12t, cs_A,
1305  a12p, inc_ap );
1307  n_ahead,
1308  &minus_inv_tau11,
1309  a12t, cs_A,
1310  a12p, inc_ap );
1311 
1312  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1313  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1316  m_ahead,
1317  n_behind,
1318  buff_1,
1319  A20, rs_A, cs_A,
1320  u21p, inc_up,
1321  buff_0,
1322  d0, inc_d );
1325  m_ahead,
1326  n_behind,
1327  buff_1,
1328  Z20, rs_Z, cs_Z,
1329  u21p, inc_up,
1330  buff_0,
1331  e0, inc_e );
1332 
1333  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1334  // FLA_Axpy( FLA_ONE, d0, t01 );
1336  n_behind,
1337  a10t, cs_A,
1338  t01, rs_T );
1340  n_behind,
1341  buff_1,
1342  d0, inc_d,
1343  t01, rs_T );
1344 
1345  // FLA_Set( FLA_ZERO, y21 );
1346  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1347  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1348  bl1_csetv( n_ahead,
1349  buff_0,
1350  y21, rs_Y );
1353  n_ahead,
1354  n_behind,
1355  buff_m1,
1356  Y20, rs_Y, cs_Y,
1357  d0, inc_d,
1358  buff_1,
1359  y21, rs_Y );
1362  m_behind,
1363  n_ahead,
1364  buff_m1,
1365  A02, rs_A, cs_A,
1366  e0, inc_e,
1367  buff_1,
1368  y21, rs_Y );
1369 
1370  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1371  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1372  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1374  n_ahead,
1375  tau11,
1376  buff_1,
1377  A22, rs_A, cs_A,
1378  u21p, inc_up,
1379  a12p, inc_ap,
1380  y21, rs_Y,
1381  w21, inc_w );
1382 
1383  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1384  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1385  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1386  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1387  // FLA_Copy( A22_l, a22l );
1388  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1389  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1390  // FLA_Copy( g0, s01 );
1391  FLA_Fused_UYx_ZVx_opc_var1( m_ahead,
1392  n_behind,
1393  m_behind,
1394  n_ahead,
1395  buff_m1,
1396  A20, rs_A, cs_A,
1397  Y20, rs_Y, cs_Y,
1398  Z20, rs_Z, cs_Z,
1399  A02, rs_A, cs_A,
1400  A22, rs_A, cs_A,
1401  tmp21, inc_tmp,
1402  s01, rs_S,
1403  a12p, inc_ap,
1404  w21, inc_w,
1405  a22l, inc_al );
1406 
1407  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1409  n_ahead,
1410  buff_1,
1411  a12t, cs_A,
1412  y21, rs_Y );
1413 
1414  // FLA_Househ2s_UT( FLA_RIGHT,
1415  // a12p_t,
1416  // a12p_b,
1417  // alpha12, psi11_minus_alpha12, sigma11 );
1418  FLA_Househ2s_UT_r_opc( n_ahead - 1,
1419  a12p_t,
1420  a12p_b, inc_ap,
1421  &alpha12,
1422  &psi11_minus_alpha12,
1423  sigma11 );
1424 
1425  // FLA_Copy( a12p, v21 );
1426  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1427  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1428  // FLA_Conjugate( v21_b );
1430  n_ahead,
1431  a12p, inc_ap,
1432  v21, inc_v );
1433  bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
1435  n_ahead,
1436  &psi11_minus_alpha12,
1437  v21, inc_v );
1438  bl1_cconjv( n_ahead - 1,
1439  v21_b, inc_v );
1440 
1441  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1442  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1443  bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
1444  bl1_cneg1( &minus_conj_alpha12 );
1445 
1446  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1447  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1449  n_behind,
1450  &minus_conj_alpha12,
1451  A02, rs_A,
1452  s01, rs_S );
1454  n_behind,
1455  &psi11_minus_alpha12,
1456  s01, rs_S );
1457 
1458  // FLA_Copy( alpha12, a12t_l );
1459  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1460  *a12t_l = alpha12;
1462  n_ahead - 1,
1463  v21_b, inc_v,
1464  a12t_r, cs_A );
1465  }
1466 
1467  // FLA_Copy( u21p, u21 );
1469  m_ahead,
1470  u21p, inc_up,
1471  u21, inc_u );
1472 
1473  if ( n_ahead > 0 )
1474  {
1475  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1476  // FLA_Scal( FLA_MINUS_ONE, beta );
1477  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1479  n_ahead,
1480  y21, rs_Y,
1481  v21, inc_v,
1482  &beta );
1483  bl1_cscals( &minus_inv_tau11, &beta );
1484 
1485  // FLA_Copy( w21, z21 );
1486  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1487  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1488  // FLA_Axpy( beta, u21, z21 );
1490  m_ahead,
1491  w21, inc_w,
1492  z21, rs_Z );
1494  m_ahead,
1495  &minus_conj_alpha12,
1496  a22l, inc_al,
1497  z21, rs_Z );
1499  m_ahead,
1500  &psi11_minus_alpha12,
1501  z21, rs_Z );
1503  m_ahead,
1504  &beta,
1505  u21, inc_u,
1506  z21, rs_Z );
1507 
1508  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1509  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1511  n_ahead,
1512  tau11,
1513  y21, rs_Y );
1515  m_ahead,
1516  sigma11,
1517  z21, rs_Z );
1518  }
1519  else // if ( n_ahead == 0 )
1520  {
1521  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1522  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1524  n_behind,
1525  a10t, cs_A,
1526  t01, rs_T );
1529  m_ahead,
1530  n_behind,
1531  buff_1,
1532  A20, rs_A, cs_A,
1533  u21, inc_u,
1534  buff_1,
1535  t01, rs_T );
1536  }
1537 
1538  /*------------------------------------------------------------*/
1539 
1540  }
1541 
1542  // FLA_Obj_free( &w );
1543  // FLA_Obj_free( &al );
1544  // FLA_Obj_free( &ap );
1545  // FLA_Obj_free( &u );
1546  // FLA_Obj_free( &up );
1547  // FLA_Obj_free( &v );
1548  // FLA_Obj_free( &d );
1549  // FLA_Obj_free( &e );
1550  FLA_free( buff_tmp );
1551  FLA_free( buff_w );
1552  FLA_free( buff_al );
1553  FLA_free( buff_ap );
1554  FLA_free( buff_u );
1555  FLA_free( buff_up );
1556  FLA_free( buff_v );
1557  FLA_free( buff_d );
1558  FLA_free( buff_e );
1559 
1560  return FLA_SUCCESS;
1561 }
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
FLA_Error FLA_Fused_UYx_ZVx_opc_var1(int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:424
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_cconjv(int m, scomplex *x, int incx)
Definition: bl1_conjv.c:23
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition: FLA_Househ2s_UT.c:589
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ofd_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var2 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

320 {
321  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
322  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
323  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
324 
325  double beta;
326  int i;
327 
328  // b_alg = FLA_Obj_length( T );
329  int b_alg = m_TS;
330 
331  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
332  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
333  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
334  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
335  double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
336  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
337  int inc_v = 1;
338  int inc_y = 1;
339  int inc_z = 1;
340 
341  for ( i = 0; i < b_alg; ++i )
342  {
343  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
344  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
345  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
346  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
347  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
348  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
349  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
350 
351  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
352  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
353 
354  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
355  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
356 
357  double* v21 = buff_v + (i+1)*inc_v;
358 
359  double* y21 = buff_y + (i+1)*inc_y;
360 
361  double* z21 = buff_z + (i+1)*inc_z;
362 
363  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
364  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
365 
366  double* v21_t = v21 + (0 )*inc_v;
367  double* v21_b = v21 + (1 )*inc_v;
368 
369  int m_ahead = m_A - i - 1;
370  int n_ahead = n_A - i - 1;
371  int m_behind = i;
372  int n_behind = i;
373 
374  /*------------------------------------------------------------*/
375 
376  // FLA_Househ2_UT( FLA_LEFT,
377  // alpha11,
378  // a21, tau11 );
379  FLA_Househ2_UT_l_opd( m_ahead,
380  alpha11,
381  a21, rs_A,
382  tau11 );
383 
384  if ( n_ahead > 0 )
385  {
386  // FLA_Copyt( FLA_TRANSPOSE, a12t, y21 );
387  // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
389  n_ahead,
390  a12t, cs_A,
391  y21, inc_y );
394  m_ahead,
395  n_ahead,
396  buff_1,
397  A22, rs_A, cs_A,
398  a21, rs_A,
399  buff_1,
400  y21, inc_y );
401 
402  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
404  n_ahead,
405  tau11,
406  y21, inc_y );
407 
408  // FLA_Axpyt( FLA_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
410  n_ahead,
411  buff_m1,
412  y21, inc_y,
413  a12t, cs_A );
414 
415  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
416  FLA_Househ2_UT_r_opd( n_ahead - 1,
417  a12t_l,
418  a12t_r, cs_A,
419  sigma11 );
420 
421  // FLA_Set( FLA_ONE, v21_t );
422  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
423  *v21_t = *buff_1;
425  n_ahead - 1,
426  a12t_r, cs_A,
427  v21_b, inc_y );
428 
429  // FLA_Dotc( FLA_CONJUGATE, v21, y21, beta );
430  // FLA_Scal( FLA_MINUS_ONE, beta );
432  n_ahead,
433  v21, inc_v,
434  y21, inc_y,
435  &beta );
436  bl1_dneg1( &beta );
437 
438  // FLA_Copy( a21, z21 );
439  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
440  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
442  m_ahead,
443  a21, rs_A,
444  z21, inc_z );
447  m_ahead,
448  n_ahead,
449  buff_1,
450  A22, rs_A, cs_A,
451  v21, inc_v,
452  &beta,
453  z21, inc_z );
455  m_ahead,
456  sigma11,
457  z21, inc_z );
458 
459  // FLA_Ger( FLA_MINUS_ONE, a21, y21, A22 );
460  // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
461  FLA_Fused_Gerc2_opd_var1( m_ahead,
462  n_ahead,
463  buff_m1,
464  a21, rs_A,
465  y21, inc_y,
466  z21, inc_z,
467  v21, inc_v,
468  A22, rs_A, cs_A );
469 
470  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
473  m_behind,
474  n_ahead,
475  buff_1,
476  A02, rs_A, cs_A,
477  v21, inc_v,
478  buff_0,
479  s01, rs_S );
480  }
481 
482  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
483  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
485  n_behind,
486  a10t, cs_A,
487  t01, rs_T );
490  m_ahead,
491  n_behind,
492  buff_1,
493  A20, rs_A, cs_A,
494  a21, rs_A,
495  buff_1,
496  t01, rs_T );
497 
498  /*------------------------------------------------------------*/
499 
500  }
501 
502  // FLA_Obj_free( &v );
503  // FLA_Obj_free( &y );
504  // FLA_Obj_free( &z );
505  FLA_free( buff_v );
506  FLA_free( buff_y );
507  FLA_free( buff_z );
508 
509  return FLA_SUCCESS;
510 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:664
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:193
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ofd_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var3 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

522 {
523  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
524  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
525  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
526 
527  double alpha12;
528  double minus_conj_alpha12;
529  double psi11_minus_alpha12;
530  double minus_inv_tau11;
531  double minus_upsilon11;
532  double minus_conj_nu11;
533  double minus_conj_psi11;
534  double minus_zeta11;
535  double beta;
536  int i;
537 
538  // b_alg = FLA_Obj_length( T );
539  int b_alg = m_TS;
540 
541  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
542  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
543  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
544  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
545  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
546  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
547  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
548  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
549  double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
550  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
551  double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
552  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
553  double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
554  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
555  int inc_w = 1;
556  int inc_ap = 1;
557  int inc_u = 1;
558  int inc_up = 1;
559  int inc_v = 1;
560  int inc_y = 1;
561  int inc_z = 1;
562 
563  for ( i = 0; i < b_alg; ++i )
564  {
565  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
566  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
567  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
568  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
569  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
570  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
571  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
572 
573  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
574  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
575 
576  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
577  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
578 
579  double* w21 = buff_w + (i+1)*inc_w;
580 
581  double* a12p = buff_ap + (i+1)*inc_ap;
582 
583  double* upsilon11 = buff_u + (i )*inc_u;
584  double* u21 = buff_u + (i+1)*inc_u;
585 
586  double* u21p = buff_up + (i+1)*inc_up;
587 
588  double* nu11 = buff_v + (i )*inc_v;
589  double* v21 = buff_v + (i+1)*inc_v;
590 
591  double* psi11 = buff_y + (i )*inc_y;
592  double* y21 = buff_y + (i+1)*inc_y;
593 
594  double* zeta11 = buff_z + (i )*inc_z;
595  double* z21 = buff_z + (i+1)*inc_z;
596 
597  double* a12p_t = a12p + (0 )*inc_ap;
598  double* a12p_b = a12p + (1 )*inc_ap;
599 
600  double* v21_t = v21 + (0 )*inc_v;
601  double* v21_b = v21 + (1 )*inc_v;
602 
603  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
604  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
605 
606  double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
607 
608  int m_ahead = m_A - i - 1;
609  int n_ahead = n_A - i - 1;
610  int m_behind = i;
611  int n_behind = i;
612 
613  /*------------------------------------------------------------*/
614 
615  if ( m_behind > 0 )
616  {
617  // FLA_Copy( upsilon11, minus_upsilon11 );
618  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
619  bl1_dmult3( buff_m1, upsilon11, &minus_upsilon11 );
620 
621  // FLA_Copy( zeta11, minus_zeta11 );
622  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
623  bl1_dmult3( buff_m1, zeta11, &minus_zeta11 );
624 
625  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
626  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
627  bl1_dcopyconj( psi11, &minus_conj_psi11 );
628  bl1_dscals( buff_m1, &minus_conj_psi11 );
629 
630  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
631  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
632  bl1_dcopyconj( nu11, &minus_conj_nu11 );
633  bl1_dscals( buff_m1, &minus_conj_nu11 );
634 
635  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
636  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
638  1,
639  &minus_conj_psi11,
640  upsilon11, 1,
641  alpha11, 1 );
643  1,
644  &minus_conj_nu11,
645  zeta11, 1,
646  alpha11, 1 );
647 
648  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
649  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
651  m_ahead,
652  &minus_conj_psi11,
653  u21, inc_u,
654  a21, rs_A );
656  m_ahead,
657  &minus_conj_nu11,
658  z21, inc_z,
659  a21, rs_A );
660 
661  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
662  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
664  n_ahead,
665  &minus_upsilon11,
666  y21, inc_y,
667  a12t, cs_A );
669  n_ahead,
670  &minus_zeta11,
671  v21, inc_v,
672  a12t, cs_A );
673  }
674 
675  // FLA_Househ2_UT( FLA_LEFT,
676  // alpha11,
677  // a21, tau11 );
678  // FLA_Copy( a21, u21p );
679  FLA_Househ2_UT_l_opd( m_ahead,
680  alpha11,
681  a21, rs_A,
682  tau11 );
684  m_ahead,
685  a21, rs_A,
686  u21p, inc_up );
687 
688  if ( n_ahead > 0 )
689  {
690  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
691  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
692  bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );
693 
694  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
695  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
697  n_ahead,
698  a12t, cs_A,
699  a12p, inc_ap );
701  n_ahead,
702  &minus_inv_tau11,
703  a12t, cs_A,
704  a12p, inc_ap );
705  }
706 
707  if ( m_behind > 0 && n_ahead > 0 )
708  {
709  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
710  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
711  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
712  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
713  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
715  n_ahead,
716  tau11,
717  buff_m1,
718  u21, inc_u,
719  y21, inc_y,
720  z21, inc_z,
721  v21, inc_v,
722  A22, rs_A, cs_A,
723  u21p, inc_up,
724  a12p, inc_ap,
725  w21, inc_w );
726 
727 
728  }
729  else if ( n_ahead > 0 )
730  {
731  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
732  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
733  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
735  n_ahead,
736  tau11,
737  buff_0,
738  A22, rs_A, cs_A,
739  u21p, inc_up,
740  a12p, inc_ap,
741  y21, inc_y,
742  w21, inc_w );
743  }
744 
745  if ( n_ahead > 0 )
746  {
747  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
749  n_ahead,
750  buff_1,
751  a12t, cs_A,
752  y21, inc_y );
753 
754  // FLA_Househ2s_UT( FLA_RIGHT,
755  // a12p_t,
756  // a12p_b,
757  // alpha12, psi11_minus_alpha12, sigma11 );
758  FLA_Househ2s_UT_r_opd( n_ahead - 1,
759  a12p_t,
760  a12p_b, inc_ap,
761  &alpha12,
762  &psi11_minus_alpha12,
763  sigma11 );
764 
765  // FLA_Copy( a12p, v21 );
766  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
767  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
768  // FLA_Conjugate( v21_b );
770  n_ahead,
771  a12p, inc_ap,
772  v21, inc_v );
773  bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
775  n_ahead,
776  &psi11_minus_alpha12,
777  v21, inc_v );
778  bl1_dconjv( n_ahead - 1,
779  v21_b, inc_v );
780 
781  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
782  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
783  *a12t_l = alpha12;
785  n_ahead - 1,
786  v21_b, inc_v,
787  a12t_r, cs_A );
788  }
789 
790  // FLA_Copy( u21p, u21 );
792  m_ahead,
793  u21p, inc_up,
794  u21, inc_u );
795 
796  if ( n_ahead > 0 )
797  {
798  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
799  // FLA_Scal( FLA_MINUS_ONE, beta );
800  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
802  n_ahead,
803  y21, inc_y,
804  v21, inc_v,
805  &beta );
806  bl1_dscals( &minus_inv_tau11, &beta );
807 
808  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
809  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
810  bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
811  bl1_dneg1( &minus_conj_alpha12 );
812 
813  // FLA_Copy( w21, z21 );
814  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
815  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
816  // FLA_Axpy( beta, u21, z21 );
818  m_ahead,
819  w21, inc_w,
820  z21, inc_z );
822  m_ahead,
823  &minus_conj_alpha12,
824  A22_l, rs_A,
825  z21, inc_z );
827  m_ahead,
828  &psi11_minus_alpha12,
829  z21, inc_z );
831  m_ahead,
832  &beta,
833  u21, inc_u,
834  z21, inc_z );
835 
836  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
837  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
839  n_ahead,
840  tau11,
841  y21, inc_y );
843  m_ahead,
844  sigma11,
845  z21, inc_z );
846 
847  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
850  m_behind,
851  n_ahead,
852  buff_1,
853  A02, rs_A, cs_A,
854  v21, inc_v,
855  buff_0,
856  s01, rs_S );
857  }
858 
859  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
860  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
862  n_behind,
863  a10t, cs_A,
864  t01, rs_T );
867  m_ahead,
868  n_behind,
869  buff_1,
870  A20, rs_A, cs_A,
871  u21, inc_u,
872  buff_1,
873  t01, rs_T );
874 
875  if ( m_behind + 1 == b_alg && n_ahead > 0 )
876  {
877  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
878  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
881  m_ahead,
882  n_ahead,
883  buff_m1,
884  u21, inc_u,
885  y21, inc_y,
886  A22, rs_A, cs_A );
889  m_ahead,
890  n_ahead,
891  buff_m1,
892  z21, inc_z,
893  v21, inc_v,
894  A22, rs_A, cs_A );
895  }
896 
897  /*------------------------------------------------------------*/
898 
899  }
900 
901  // FLA_Obj_free( &w );
902  // FLA_Obj_free( &ap );
903  // FLA_Obj_free( &u );
904  // FLA_Obj_free( &up );
905  // FLA_Obj_free( &v );
906  // FLA_Obj_free( &y );
907  // FLA_Obj_free( &z );
908  FLA_free( buff_w );
909  FLA_free( buff_ap );
910  FLA_free( buff_u );
911  FLA_free( buff_up );
912  FLA_free( buff_v );
913  FLA_free( buff_y );
914  FLA_free( buff_z );
915 
916  return FLA_SUCCESS;
917 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition: FLA_Househ2s_UT.c:572
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:267
void bl1_dconjv(int m, double *x, int incx)
Definition: bl1_conjv.c:18
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
int i
Definition: bl1_axmyv2.c:145
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ofd_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var4 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), bl1_dsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

633 {
634  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
635  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
636  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
637 
638  double alpha12;
639  double minus_conj_alpha12;
640  double psi11_minus_alpha12;
641  double minus_inv_tau11;
642  double beta;
643  double last_elem;
644  int i;
645 
646  // b_alg = FLA_Obj_length( T );
647  int b_alg = m_TS;
648 
649  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
650  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
651  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
652  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
653  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
654  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
655  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
656  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
657  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
658  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
659  double* buff_tmp = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
660  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
661  double* buff_al = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
662  double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
663  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
664  double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
665  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
666  double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
667  double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
668  int inc_tmp = 1;
669  int inc_w = 1;
670  int inc_al = 1;
671  int inc_ap = 1;
672  int inc_u = 1;
673  int inc_up = 1;
674  int inc_v = 1;
675  int inc_d = 1;
676  int inc_e = 1;
677 
678  // FLA_Set( FLA_ZERO, Y );
679  // FLA_Set( FLA_ZERO, Z );
680  bl1_dsetm( n_A,
681  b_alg,
682  buff_0,
683  buff_Y, rs_Y, cs_Y );
684  bl1_dsetm( m_A,
685  b_alg,
686  buff_0,
687  buff_Z, rs_Z, cs_Z );
688 
689  for ( i = 0; i < b_alg; ++i )
690  {
691  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
692  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
693  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
694  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
695  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
696  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
697  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
698  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
699 
700  double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
701  double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
702  double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
703 
704  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
705  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
706  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
707 
708  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
709  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
710 
711  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
712  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
713 
714  double* tmp21 = buff_tmp + (i+1)*inc_tmp;
715 
716  double* w21 = buff_w + (i+1)*inc_w;
717 
718  double* a22l = buff_al + (i+1)*inc_al;
719 
720  double* a12p = buff_ap + (i+1)*inc_ap;
721 
722  double* u21 = buff_u + (i+1)*inc_u;
723 
724  double* u21p = buff_up + (i+1)*inc_up;
725 
726  double* v21 = buff_v + (i+1)*inc_v;
727 
728  double* d0 = buff_d + (0 )*inc_d;
729 
730  double* e0 = buff_e + (0 )*inc_e;
731 
732  double* a12p_t = a12p + (0 )*inc_ap;
733  double* a12p_b = a12p + (1 )*inc_ap;
734 
735  double* v21_t = v21 + (0 )*inc_v;
736  double* v21_b = v21 + (1 )*inc_v;
737 
738  double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
739 
740  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
741  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
742 
743  double* ABL = a10t;
744  double* ZBL = z10t;
745 
746  double* a2 = alpha11;
747 
748  int m_ahead = m_A - i - 1;
749  int n_ahead = n_A - i - 1;
750  int m_behind = i;
751  int n_behind = i;
752 
753  /*------------------------------------------------------------*/
754 
755  if ( m_behind > 0 )
756  {
757  // FLA_Copy( a01_b, last_elem );
758  // FLA_Set( FLA_ONE, a01_b );
759  last_elem = *a01_b;
760  *a01_b = *buff_1;
761  }
762 
763  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
764  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
767  m_ahead + 1,
768  n_behind,
769  buff_m1,
770  ABL, rs_A, cs_A,
771  y10t, cs_Y,
772  buff_1,
773  a2, rs_A );
776  m_ahead + 1,
777  n_behind,
778  buff_m1,
779  ZBL, rs_Z, cs_Z,
780  a01, rs_A,
781  buff_1,
782  a2, rs_A );
783 
784  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
785  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
788  n_ahead,
789  n_behind,
790  buff_m1,
791  Y20, rs_Y, cs_Y,
792  a10t, cs_A,
793  buff_1,
794  a12t, cs_A );
797  m_behind,
798  n_ahead,
799  buff_m1,
800  A02, rs_A, cs_A,
801  z10t, cs_Z,
802  buff_1,
803  a12t, cs_A );
804 
805  if ( m_behind > 0 )
806  {
807  // FLA_Copy( last_elem, a01_b );
808  *a01_b = last_elem;
809  }
810 
811  // FLA_Househ2_UT( FLA_LEFT,
812  // alpha11,
813  // a21, tau11 );
814  // FLA_Copy( a21, u21p );
815  FLA_Househ2_UT_l_opd( m_ahead,
816  alpha11,
817  a21, rs_A,
818  tau11 );
820  m_ahead,
821  a21, rs_A,
822  u21p, inc_up );
823 
824  if ( n_ahead > 0 )
825  {
826  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
827  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
828  bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );
829 
830  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
831  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
833  n_ahead,
834  a12t, cs_A,
835  a12p, inc_ap );
837  n_ahead,
838  &minus_inv_tau11,
839  a12t, cs_A,
840  a12p, inc_ap );
841 
842  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
843  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
846  m_ahead,
847  n_behind,
848  buff_1,
849  A20, rs_A, cs_A,
850  u21p, inc_up,
851  buff_0,
852  d0, inc_d );
855  m_ahead,
856  n_behind,
857  buff_1,
858  Z20, rs_Z, cs_Z,
859  u21p, inc_up,
860  buff_0,
861  e0, inc_e );
862 
863  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
864  // FLA_Axpy( FLA_ONE, d0, t01 );
866  n_behind,
867  a10t, cs_A,
868  t01, rs_T );
870  n_behind,
871  buff_1,
872  d0, inc_d,
873  t01, rs_T );
874 
875  // FLA_Set( FLA_ZERO, y21 );
876  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
877  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
878  bl1_dsetv( n_ahead,
879  buff_0,
880  y21, rs_Y );
883  n_ahead,
884  n_behind,
885  buff_m1,
886  Y20, rs_Y, cs_Y,
887  d0, inc_d,
888  buff_1,
889  y21, rs_Y );
892  m_behind,
893  n_ahead,
894  buff_m1,
895  A02, rs_A, cs_A,
896  e0, inc_e,
897  buff_1,
898  y21, rs_Y );
899 
900  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
901  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
902  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
904  n_ahead,
905  tau11,
906  buff_1,
907  A22, rs_A, cs_A,
908  u21p, inc_up,
909  a12p, inc_ap,
910  y21, rs_Y,
911  w21, inc_w );
912 
913  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
914  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
915  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
916  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
917  // FLA_Copy( A22_l, a22l );
918  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
919  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
920  // FLA_Copy( g0, s01 );
922  n_behind,
923  m_behind,
924  n_ahead,
925  buff_m1,
926  A20, rs_A, cs_A,
927  Y20, rs_Y, cs_Y,
928  Z20, rs_Z, cs_Z,
929  A02, rs_A, cs_A,
930  A22, rs_A, cs_A,
931  tmp21, inc_tmp,
932  s01, rs_S,
933  a12p, inc_ap,
934  w21, inc_w,
935  a22l, inc_al );
936 
937  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
939  n_ahead,
940  buff_1,
941  a12t, cs_A,
942  y21, rs_Y );
943 
944  // FLA_Househ2s_UT( FLA_RIGHT,
945  // a12p_t,
946  // a12p_b,
947  // alpha12, psi11_minus_alpha12, sigma11 );
948  FLA_Househ2s_UT_r_opd( n_ahead - 1,
949  a12p_t,
950  a12p_b, inc_ap,
951  &alpha12,
952  &psi11_minus_alpha12,
953  sigma11 );
954 
955  // FLA_Copy( a12p, v21 );
956  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
957  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
958  // FLA_Conjugate( v21_b );
960  n_ahead,
961  a12p, inc_ap,
962  v21, inc_v );
963  bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
965  n_ahead,
966  &psi11_minus_alpha12,
967  v21, inc_v );
968  bl1_dconjv( n_ahead - 1,
969  v21_b, inc_v );
970 
971  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
972  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
973  bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
974  bl1_dneg1( &minus_conj_alpha12 );
975 
976  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
977  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
979  n_behind,
980  &minus_conj_alpha12,
981  A02, rs_A,
982  s01, rs_S );
984  n_behind,
985  &psi11_minus_alpha12,
986  s01, rs_S );
987 
988  // FLA_Copy( alpha12, a12t_l );
989  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
990  *a12t_l = alpha12;
992  n_ahead - 1,
993  v21_b, inc_v,
994  a12t_r, cs_A );
995  }
996 
997  // FLA_Copy( u21p, u21 );
999  m_ahead,
1000  u21p, inc_up,
1001  u21, inc_u );
1002 
1003  if ( n_ahead > 0 )
1004  {
1005  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1006  // FLA_Scal( FLA_MINUS_ONE, beta );
1007  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1009  n_ahead,
1010  y21, rs_Y,
1011  v21, inc_v,
1012  &beta );
1013  bl1_dscals( &minus_inv_tau11, &beta );
1014 
1015  // FLA_Copy( w21, z21 );
1016  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1017  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1018  // FLA_Axpy( beta, u21, z21 );
1020  m_ahead,
1021  w21, inc_w,
1022  z21, rs_Z );
1024  m_ahead,
1025  &minus_conj_alpha12,
1026  a22l, inc_al,
1027  z21, rs_Z );
1029  m_ahead,
1030  &psi11_minus_alpha12,
1031  z21, rs_Z );
1033  m_ahead,
1034  &beta,
1035  u21, inc_u,
1036  z21, rs_Z );
1037 
1038  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1039  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1041  n_ahead,
1042  tau11,
1043  y21, rs_Y );
1045  m_ahead,
1046  sigma11,
1047  z21, rs_Z );
1048  }
1049  else // if ( n_ahead == 0 )
1050  {
1051  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1052  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1054  n_behind,
1055  a10t, cs_A,
1056  t01, rs_T );
1059  m_ahead,
1060  n_behind,
1061  buff_1,
1062  A20, rs_A, cs_A,
1063  u21, inc_u,
1064  buff_1,
1065  t01, rs_T );
1066  }
1067 
1068  /*------------------------------------------------------------*/
1069 
1070  }
1071 
1072  // FLA_Obj_free( &w );
1073  // FLA_Obj_free( &al );
1074  // FLA_Obj_free( &ap );
1075  // FLA_Obj_free( &u );
1076  // FLA_Obj_free( &up );
1077  // FLA_Obj_free( &v );
1078  // FLA_Obj_free( &d );
1079  // FLA_Obj_free( &e );
1080  FLA_free( buff_tmp );
1081  FLA_free( buff_w );
1082  FLA_free( buff_al );
1083  FLA_free( buff_ap );
1084  FLA_free( buff_u );
1085  FLA_free( buff_up );
1086  FLA_free( buff_v );
1087  FLA_free( buff_d );
1088  FLA_free( buff_e );
1089 
1090  return FLA_SUCCESS;
1091 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition: FLA_Househ2s_UT.c:572
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_dconjv(int m, double *x, int incx)
Definition: bl1_conjv.c:18
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
FLA_Error FLA_Fused_UYx_ZVx_opd_var1(int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:331
int i
Definition: bl1_axmyv2.c:145
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ofs_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var2 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

120 {
121  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
124 
125  float beta;
126  int i;
127 
128  // b_alg = FLA_Obj_length( T );
129  int b_alg = m_TS;
130 
131  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
132  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
133  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
134  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
135  float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
136  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
137  int inc_v = 1;
138  int inc_y = 1;
139  int inc_z = 1;
140 
141  for ( i = 0; i < b_alg; ++i )
142  {
143  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
144  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
145  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
146  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
147  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
148  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
149  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
150 
151  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
152  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
153 
154  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
155  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
156 
157  float* v21 = buff_v + (i+1)*inc_v;
158 
159  float* y21 = buff_y + (i+1)*inc_y;
160 
161  float* z21 = buff_z + (i+1)*inc_z;
162 
163  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
164  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
165 
166  float* v21_t = v21 + (0 )*inc_v;
167  float* v21_b = v21 + (1 )*inc_v;
168 
169  int m_ahead = m_A - i - 1;
170  int n_ahead = n_A - i - 1;
171  int m_behind = i;
172  int n_behind = i;
173 
174  /*------------------------------------------------------------*/
175 
176  // FLA_Househ2_UT( FLA_LEFT,
177  // alpha11,
178  // a21, tau11 );
179  FLA_Househ2_UT_l_ops( m_ahead,
180  alpha11,
181  a21, rs_A,
182  tau11 );
183 
184  if ( n_ahead > 0 )
185  {
186  // FLA_Copyt( FLA_TRANSPOSE, a12t, y21 );
187  // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
189  n_ahead,
190  a12t, cs_A,
191  y21, inc_y );
194  m_ahead,
195  n_ahead,
196  buff_1,
197  A22, rs_A, cs_A,
198  a21, rs_A,
199  buff_1,
200  y21, inc_y );
201 
202  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
204  n_ahead,
205  tau11,
206  y21, inc_y );
207 
208  // FLA_Axpyt( FLA_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
210  n_ahead,
211  buff_m1,
212  y21, inc_y,
213  a12t, cs_A );
214 
215  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
216  FLA_Househ2_UT_r_ops( n_ahead - 1,
217  a12t_l,
218  a12t_r, cs_A,
219  sigma11 );
220 
221  // FLA_Set( FLA_ONE, v21_t );
222  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
223  *v21_t = *buff_1;
225  n_ahead - 1,
226  a12t_r, cs_A,
227  v21_b, inc_y );
228 
229  // FLA_Dotc( FLA_CONJUGATE, v21, y21, beta );
230  // FLA_Scal( FLA_MINUS_ONE, beta );
232  n_ahead,
233  v21, inc_v,
234  y21, inc_y,
235  &beta );
236  bl1_sneg1( &beta );
237 
238  // FLA_Copy( a21, z21 );
239  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
240  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
242  m_ahead,
243  a21, rs_A,
244  z21, inc_z );
247  m_ahead,
248  n_ahead,
249  buff_1,
250  A22, rs_A, cs_A,
251  v21, inc_v,
252  &beta,
253  z21, inc_z );
255  m_ahead,
256  sigma11,
257  z21, inc_z );
258 
259  // FLA_Ger( FLA_MINUS_ONE, a21, y21, A22 );
260  // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
261  FLA_Fused_Gerc2_ops_var1( m_ahead,
262  n_ahead,
263  buff_m1,
264  a21, rs_A,
265  y21, inc_y,
266  z21, inc_z,
267  v21, inc_v,
268  A22, rs_A, cs_A );
269 
270  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
273  m_behind,
274  n_ahead,
275  buff_1,
276  A02, rs_A, cs_A,
277  v21, inc_v,
278  buff_0,
279  s01, rs_S );
280  }
281 
282  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
283  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
285  n_behind,
286  a10t, cs_A,
287  t01, rs_T );
290  m_ahead,
291  n_behind,
292  buff_1,
293  A20, rs_A, cs_A,
294  a21, rs_A,
295  buff_1,
296  t01, rs_T );
297 
298  /*------------------------------------------------------------*/
299 
300  }
301 
302  // FLA_Obj_free( &v );
303  // FLA_Obj_free( &y );
304  // FLA_Obj_free( &z );
305  FLA_free( buff_v );
306  FLA_free( buff_y );
307  FLA_free( buff_z );
308 
309  return FLA_SUCCESS;
310 }
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:651
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:130
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ofs_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var3 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

120 {
121  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
124 
125  float alpha12;
126  float minus_conj_alpha12;
127  float psi11_minus_alpha12;
128  float minus_inv_tau11;
129  float minus_upsilon11;
130  float minus_conj_nu11;
131  float minus_conj_psi11;
132  float minus_zeta11;
133  float beta;
134  int i;
135 
136  // b_alg = FLA_Obj_length( T );
137  int b_alg = m_TS;
138 
139  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
140  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
141  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
142  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
143  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
144  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
145  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
146  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
147  float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
148  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
149  float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
150  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
151  float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
152  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
153  int inc_w = 1;
154  int inc_ap = 1;
155  int inc_u = 1;
156  int inc_up = 1;
157  int inc_v = 1;
158  int inc_y = 1;
159  int inc_z = 1;
160 
161  for ( i = 0; i < b_alg; ++i )
162  {
163  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
164  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
165  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
166  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
167  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
168  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
169  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
170 
171  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
172  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
173 
174  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
175  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
176 
177  float* w21 = buff_w + (i+1)*inc_w;
178 
179  float* a12p = buff_ap + (i+1)*inc_ap;
180 
181  float* upsilon11 = buff_u + (i )*inc_u;
182  float* u21 = buff_u + (i+1)*inc_u;
183 
184  float* u21p = buff_up + (i+1)*inc_up;
185 
186  float* nu11 = buff_v + (i )*inc_v;
187  float* v21 = buff_v + (i+1)*inc_v;
188 
189  float* psi11 = buff_y + (i )*inc_y;
190  float* y21 = buff_y + (i+1)*inc_y;
191 
192  float* zeta11 = buff_z + (i )*inc_z;
193  float* z21 = buff_z + (i+1)*inc_z;
194 
195  float* a12p_t = a12p + (0 )*inc_ap;
196  float* a12p_b = a12p + (1 )*inc_ap;
197 
198  float* v21_t = v21 + (0 )*inc_v;
199  float* v21_b = v21 + (1 )*inc_v;
200 
201  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
202  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
203 
204  float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
205 
206  int m_ahead = m_A - i - 1;
207  int n_ahead = n_A - i - 1;
208  int m_behind = i;
209  int n_behind = i;
210 
211  /*------------------------------------------------------------*/
212 
213  if ( m_behind > 0 )
214  {
215  // FLA_Copy( upsilon11, minus_upsilon11 );
216  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
217  bl1_smult3( buff_m1, upsilon11, &minus_upsilon11 );
218 
219  // FLA_Copy( zeta11, minus_zeta11 );
220  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
221  bl1_smult3( buff_m1, zeta11, &minus_zeta11 );
222 
223  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
224  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
225  bl1_scopyconj( psi11, &minus_conj_psi11 );
226  bl1_sscals( buff_m1, &minus_conj_psi11 );
227 
228  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
229  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
230  bl1_scopyconj( nu11, &minus_conj_nu11 );
231  bl1_sscals( buff_m1, &minus_conj_nu11 );
232 
233  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_upsilon11, psi11, alpha11 );
234  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_zeta11, nu11, alpha11 );
236  1,
237  &minus_upsilon11,
238  psi11, 1,
239  alpha11, 1 );
241  1,
242  &minus_zeta11,
243  nu11, 1,
244  alpha11, 1 );
245 
246  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
247  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
249  m_ahead,
250  &minus_conj_psi11,
251  u21, inc_u,
252  a21, rs_A );
254  m_ahead,
255  &minus_conj_nu11,
256  z21, inc_z,
257  a21, rs_A );
258 
259  // FLA_Axpyt( FLA_TRANSPOSE, minus_upsilon11, y21, a12t );
260  // FLA_Axpyt( FLA_TRANSPOSE, minus_zeta11, v21, a12t );
262  n_ahead,
263  &minus_upsilon11,
264  y21, inc_y,
265  a12t, cs_A );
267  n_ahead,
268  &minus_zeta11,
269  v21, inc_v,
270  a12t, cs_A );
271  }
272 
273  // FLA_Househ2_UT( FLA_LEFT,
274  // alpha11,
275  // a21, tau11 );
276  // FLA_Copy( a21, u21p );
277  FLA_Househ2_UT_l_ops( m_ahead,
278  alpha11,
279  a21, rs_A,
280  tau11 );
282  m_ahead,
283  a21, rs_A,
284  u21p, inc_up );
285 
286  if ( n_ahead > 0 )
287  {
288  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
289  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
290  bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );
291 
292  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
293  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
295  n_ahead,
296  a12t, cs_A,
297  a12p, inc_ap );
299  n_ahead,
300  &minus_inv_tau11,
301  a12t, cs_A,
302  a12p, inc_ap );
303  }
304 
305  if ( m_behind > 0 && n_ahead > 0 )
306  {
307  // FLA_Ger( FLA_MINUS_ONE, u21, y21, A22 );
308  // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
309  // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
310  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
311  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
313  n_ahead,
314  tau11,
315  buff_m1,
316  u21, inc_u,
317  y21, inc_y,
318  z21, inc_z,
319  v21, inc_v,
320  A22, rs_A, cs_A,
321  u21p, inc_up,
322  a12p, inc_ap,
323  w21, inc_w );
324 
325 
326  }
327  else if ( n_ahead > 0 )
328  {
329  // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
330  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
331  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
333  n_ahead,
334  tau11,
335  buff_0,
336  A22, rs_A, cs_A,
337  u21p, inc_up,
338  a12p, inc_ap,
339  y21, inc_y,
340  w21, inc_w );
341  }
342 
343  if ( n_ahead > 0 )
344  {
345  // FLA_Axpyt( FLA_TRANSPOSE, FLA_ONE, a12t, y21 );
347  n_ahead,
348  buff_1,
349  a12t, cs_A,
350  y21, inc_y );
351 
352  // FLA_Househ2s_UT( FLA_RIGHT,
353  // a12p_t,
354  // a12p_b,
355  // alpha12, psi11_minus_alpha12, sigma11 );
356  FLA_Househ2s_UT_r_ops( n_ahead - 1,
357  a12p_t,
358  a12p_b, inc_ap,
359  &alpha12,
360  &psi11_minus_alpha12,
361  sigma11 );
362 
363  // FLA_Copy( a12p, v21 );
364  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
365  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
367  n_ahead,
368  a12p, inc_ap,
369  v21, inc_v );
370  bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
372  n_ahead,
373  &psi11_minus_alpha12,
374  v21, inc_v );
375 
376  // FLA_Copy( alpha12, a12t_l );
377  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
378  *a12t_l = alpha12;
380  n_ahead - 1,
381  v21_b, inc_v,
382  a12t_r, cs_A );
383  }
384 
385  // FLA_Copy( u21p, u21 );
387  m_ahead,
388  u21p, inc_up,
389  u21, inc_u );
390 
391  if ( n_ahead > 0 )
392  {
393  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
394  // FLA_Scal( FLA_MINUS_ONE, beta );
395  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
397  n_ahead,
398  y21, inc_y,
399  v21, inc_v,
400  &beta );
401  bl1_sscals( &minus_inv_tau11, &beta );
402 
403  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
404  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
405  bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
406  bl1_sneg1( &minus_conj_alpha12 );
407 
408  // FLA_Copy( w21, z21 );
409  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
410  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
411  // FLA_Axpy( beta, u21, z21 );
413  m_ahead,
414  w21, inc_w,
415  z21, inc_z );
417  m_ahead,
418  &minus_conj_alpha12,
419  A22_l, rs_A,
420  z21, inc_z );
422  m_ahead,
423  &psi11_minus_alpha12,
424  z21, inc_z );
426  m_ahead,
427  &beta,
428  u21, inc_u,
429  z21, inc_z );
430 
431  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
432  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
434  n_ahead,
435  tau11,
436  y21, inc_y );
438  m_ahead,
439  sigma11,
440  z21, inc_z );
441 
442  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
445  m_behind,
446  n_ahead,
447  buff_1,
448  A02, rs_A, cs_A,
449  v21, inc_v,
450  buff_0,
451  s01, rs_S );
452  }
453 
454  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
455  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
457  n_behind,
458  a10t, cs_A,
459  t01, rs_T );
462  m_ahead,
463  n_behind,
464  buff_1,
465  A20, rs_A, cs_A,
466  u21, inc_u,
467  buff_1,
468  t01, rs_T );
469 
470  if ( m_behind + 1 == b_alg && n_ahead > 0 )
471  {
472  // FLA_Ger( FLA_MINUS_ONE, u21, y21, A22 );
473  // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
476  m_ahead,
477  n_ahead,
478  buff_m1,
479  u21, inc_u,
480  y21, inc_y,
481  A22, rs_A, cs_A );
484  m_ahead,
485  n_ahead,
486  buff_m1,
487  z21, inc_z,
488  v21, inc_v,
489  A22, rs_A, cs_A );
490  }
491 
492  /*------------------------------------------------------------*/
493 
494  }
495 
496  // FLA_Obj_free( &w );
497  // FLA_Obj_free( &ap );
498  // FLA_Obj_free( &u );
499  // FLA_Obj_free( &up );
500  // FLA_Obj_free( &v );
501  // FLA_Obj_free( &y );
502  // FLA_Obj_free( &z );
503  FLA_free( buff_w );
504  FLA_free( buff_ap );
505  FLA_free( buff_u );
506  FLA_free( buff_up );
507  FLA_free( buff_v );
508  FLA_free( buff_y );
509  FLA_free( buff_z );
510 
511  return FLA_SUCCESS;
512 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:170
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition: FLA_Househ2s_UT.c:555
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ofs_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var4 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), bl1_ssetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

163 {
164  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
166  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
167 
168  float alpha12;
169  float minus_conj_alpha12;
170  float psi11_minus_alpha12;
171  float minus_inv_tau11;
172  float beta;
173  float last_elem;
174  int i;
175 
176  // b_alg = FLA_Obj_length( T );
177  int b_alg = m_TS;
178 
179  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
180  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
181  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
182  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
183  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
184  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
185  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
186  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
187  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
188  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
189  float* buff_tmp = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
190  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
191  float* buff_al = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
192  float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
193  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
194  float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
195  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
196  float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
197  float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
198  int inc_tmp = 1;
199  int inc_w = 1;
200  int inc_al = 1;
201  int inc_ap = 1;
202  int inc_u = 1;
203  int inc_up = 1;
204  int inc_v = 1;
205  int inc_d = 1;
206  int inc_e = 1;
207 
208  // FLA_Set( FLA_ZERO, Y );
209  // FLA_Set( FLA_ZERO, Z );
210  bl1_ssetm( n_A,
211  b_alg,
212  buff_0,
213  buff_Y, rs_Y, cs_Y );
214  bl1_ssetm( m_A,
215  b_alg,
216  buff_0,
217  buff_Z, rs_Z, cs_Z );
218 
219  for ( i = 0; i < b_alg; ++i )
220  {
221  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
222  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
223  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
224  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
225  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
226  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
227  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
228  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
229 
230  float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
231  float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
232  float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
233 
234  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
235  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
236  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
237 
238  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
239  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
240 
241  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
242  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
243 
244  float* tmp21 = buff_tmp + (i+1)*inc_tmp;
245 
246  float* w21 = buff_w + (i+1)*inc_w;
247 
248  float* a22l = buff_al + (i+1)*inc_al;
249 
250  float* a12p = buff_ap + (i+1)*inc_ap;
251 
252  float* u21 = buff_u + (i+1)*inc_u;
253 
254  float* u21p = buff_up + (i+1)*inc_up;
255 
256  float* v21 = buff_v + (i+1)*inc_v;
257 
258  float* d0 = buff_d + (0 )*inc_d;
259 
260  float* e0 = buff_e + (0 )*inc_e;
261 
262  float* a12p_t = a12p + (0 )*inc_ap;
263  float* a12p_b = a12p + (1 )*inc_ap;
264 
265  float* v21_t = v21 + (0 )*inc_v;
266  float* v21_b = v21 + (1 )*inc_v;
267 
268  float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
269 
270  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
271  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
272 
273  float* ABL = a10t;
274  float* ZBL = z10t;
275 
276  float* a2 = alpha11;
277 
278  int m_ahead = m_A - i - 1;
279  int n_ahead = n_A - i - 1;
280  int m_behind = i;
281  int n_behind = i;
282 
283  /*------------------------------------------------------------*/
284 
285  if ( m_behind > 0 )
286  {
287  // FLA_Copy( a01_b, last_elem );
288  // FLA_Set( FLA_ONE, a01_b );
289  last_elem = *a01_b;
290  *a01_b = *buff_1;
291  }
292 
293  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
294  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
297  m_ahead + 1,
298  n_behind,
299  buff_m1,
300  ABL, rs_A, cs_A,
301  y10t, cs_Y,
302  buff_1,
303  a2, rs_A );
306  m_ahead + 1,
307  n_behind,
308  buff_m1,
309  ZBL, rs_Z, cs_Z,
310  a01, rs_A,
311  buff_1,
312  a2, rs_A );
313 
314  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
315  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
318  n_ahead,
319  n_behind,
320  buff_m1,
321  Y20, rs_Y, cs_Y,
322  a10t, cs_A,
323  buff_1,
324  a12t, cs_A );
327  m_behind,
328  n_ahead,
329  buff_m1,
330  A02, rs_A, cs_A,
331  z10t, cs_Z,
332  buff_1,
333  a12t, cs_A );
334 
335  if ( m_behind > 0 )
336  {
337  // FLA_Copy( last_elem, a01_b );
338  *a01_b = last_elem;
339  }
340 
341  // FLA_Househ2_UT( FLA_LEFT,
342  // alpha11,
343  // a21, tau11 );
344  // FLA_Copy( a21, u21p );
345  FLA_Househ2_UT_l_ops( m_ahead,
346  alpha11,
347  a21, rs_A,
348  tau11 );
350  m_ahead,
351  a21, rs_A,
352  u21p, inc_up );
353 
354  if ( n_ahead > 0 )
355  {
356  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
357  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
358  bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );
359 
360  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
361  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
363  n_ahead,
364  a12t, cs_A,
365  a12p, inc_ap );
367  n_ahead,
368  &minus_inv_tau11,
369  a12t, cs_A,
370  a12p, inc_ap );
371 
372  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
373  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
376  m_ahead,
377  n_behind,
378  buff_1,
379  A20, rs_A, cs_A,
380  u21p, inc_up,
381  buff_0,
382  d0, inc_d );
385  m_ahead,
386  n_behind,
387  buff_1,
388  Z20, rs_Z, cs_Z,
389  u21p, inc_up,
390  buff_0,
391  e0, inc_e );
392 
393  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
394  // FLA_Axpy( FLA_ONE, d0, t01 );
396  n_behind,
397  a10t, cs_A,
398  t01, rs_T );
400  n_behind,
401  buff_1,
402  d0, inc_d,
403  t01, rs_T );
404 
405  // FLA_Set( FLA_ZERO, y21 );
406  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
407  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
408  bl1_ssetv( n_ahead,
409  buff_0,
410  y21, rs_Y );
413  n_ahead,
414  n_behind,
415  buff_m1,
416  Y20, rs_Y, cs_Y,
417  d0, inc_d,
418  buff_1,
419  y21, rs_Y );
422  m_behind,
423  n_ahead,
424  buff_m1,
425  A02, rs_A, cs_A,
426  e0, inc_e,
427  buff_1,
428  y21, rs_Y );
429 
430  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
431  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
432  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
434  n_ahead,
435  tau11,
436  buff_1,
437  A22, rs_A, cs_A,
438  u21p, inc_up,
439  a12p, inc_ap,
440  y21, rs_Y,
441  w21, inc_w );
442 
443  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
444  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
445  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
446  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
447  // FLA_Copy( A22_l, a22l );
448  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
449  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
450  // FLA_Copy( g0, s01 );
452  n_behind,
453  m_behind,
454  n_ahead,
455  buff_m1,
456  A20, rs_A, cs_A,
457  Y20, rs_Y, cs_Y,
458  Z20, rs_Z, cs_Z,
459  A02, rs_A, cs_A,
460  A22, rs_A, cs_A,
461  tmp21, inc_tmp,
462  s01, rs_S,
463  a12p, inc_ap,
464  w21, inc_w,
465  a22l, inc_al );
466 
467  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
469  n_ahead,
470  buff_1,
471  a12t, cs_A,
472  y21, rs_Y );
473 
474  // FLA_Househ2s_UT( FLA_RIGHT,
475  // a12p_t,
476  // a12p_b,
477  // alpha12, psi11_minus_alpha12, sigma11 );
478  FLA_Househ2s_UT_r_ops( n_ahead - 1,
479  a12p_t,
480  a12p_b, inc_ap,
481  &alpha12,
482  &psi11_minus_alpha12,
483  sigma11 );
484 
485  // FLA_Copy( a12p, v21 );
486  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
487  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
488  // FLA_Conjugate( v21_b );
490  n_ahead,
491  a12p, inc_ap,
492  v21, inc_v );
493  bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
495  n_ahead,
496  &psi11_minus_alpha12,
497  v21, inc_v );
498  bl1_sconjv( n_ahead - 1,
499  v21_b, inc_v );
500 
501  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
502  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
503  bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
504  bl1_sneg1( &minus_conj_alpha12 );
505 
506  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
507  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
509  n_behind,
510  &minus_conj_alpha12,
511  A02, rs_A,
512  s01, rs_S );
514  n_behind,
515  &psi11_minus_alpha12,
516  s01, rs_S );
517 
518  // FLA_Copy( alpha12, a12t_l );
519  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
520  *a12t_l = alpha12;
522  n_ahead - 1,
523  v21_b, inc_v,
524  a12t_r, cs_A );
525  }
526 
527  // FLA_Copy( u21p, u21 );
529  m_ahead,
530  u21p, inc_up,
531  u21, inc_u );
532 
533  if ( n_ahead > 0 )
534  {
535  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
536  // FLA_Scal( FLA_MINUS_ONE, beta );
537  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
539  n_ahead,
540  y21, rs_Y,
541  v21, inc_v,
542  &beta );
543  bl1_sscals( &minus_inv_tau11, &beta );
544 
545  // FLA_Copy( w21, z21 );
546  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
547  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
548  // FLA_Axpy( beta, u21, z21 );
550  m_ahead,
551  w21, inc_w,
552  z21, rs_Z );
554  m_ahead,
555  &minus_conj_alpha12,
556  a22l, inc_al,
557  z21, rs_Z );
559  m_ahead,
560  &psi11_minus_alpha12,
561  z21, rs_Z );
563  m_ahead,
564  &beta,
565  u21, inc_u,
566  z21, rs_Z );
567 
568  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
569  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
571  n_ahead,
572  tau11,
573  y21, rs_Y );
575  m_ahead,
576  sigma11,
577  z21, rs_Z );
578  }
579  else // if ( n_ahead == 0 )
580  {
581  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
582  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
584  n_behind,
585  a10t, cs_A,
586  t01, rs_T );
589  m_ahead,
590  n_behind,
591  buff_1,
592  A20, rs_A, cs_A,
593  u21, inc_u,
594  buff_1,
595  t01, rs_T );
596  }
597 
598  /*------------------------------------------------------------*/
599 
600  }
601 
602  // FLA_Obj_free( &w );
603  // FLA_Obj_free( &al );
604  // FLA_Obj_free( &ap );
605  // FLA_Obj_free( &u );
606  // FLA_Obj_free( &up );
607  // FLA_Obj_free( &v );
608  // FLA_Obj_free( &d );
609  // FLA_Obj_free( &e );
610  FLA_free( buff_tmp );
611  FLA_free( buff_w );
612  FLA_free( buff_al );
613  FLA_free( buff_ap );
614  FLA_free( buff_u );
615  FLA_free( buff_up );
616  FLA_free( buff_v );
617  FLA_free( buff_d );
618  FLA_free( buff_e );
619 
620  return FLA_SUCCESS;
621 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sconjv(int m, float *x, int incx)
Definition: bl1_conjv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Fused_UYx_ZVx_ops_var1(int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:201
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition: FLA_Househ2s_UT.c:555
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

◆ FLA_Bidiag_UT_u_step_ofu_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blf_var2(), and FLA_Bidiag_UT_u_ofu_var2().

19 {
20  FLA_Datatype datatype;
21  int m_A, n_A, m_TS;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24  int rs_S, cs_S;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30  m_TS = FLA_Obj_length( T );
31 
32  rs_A = FLA_Obj_row_stride( A );
33  cs_A = FLA_Obj_col_stride( A );
34 
35  rs_T = FLA_Obj_row_stride( T );
36  cs_T = FLA_Obj_col_stride( T );
37 
38  rs_S = FLA_Obj_row_stride( S );
39  cs_S = FLA_Obj_col_stride( S );
40 
41 
42  switch ( datatype )
43  {
44  case FLA_FLOAT:
45  {
46  float* buff_A = FLA_FLOAT_PTR( A );
47  float* buff_T = FLA_FLOAT_PTR( T );
48  float* buff_S = FLA_FLOAT_PTR( S );
49 
51  n_A,
52  m_TS,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T,
55  buff_S, rs_S, cs_S );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_T = FLA_DOUBLE_PTR( T );
64  double* buff_S = FLA_DOUBLE_PTR( S );
65 
67  n_A,
68  m_TS,
69  buff_A, rs_A, cs_A,
70  buff_T, rs_T, cs_T,
71  buff_S, rs_S, cs_S );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_T = FLA_COMPLEX_PTR( T );
80  scomplex* buff_S = FLA_COMPLEX_PTR( S );
81 
83  n_A,
84  m_TS,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T,
87  buff_S, rs_S, cs_S );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
96  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
97 
99  n_A,
100  m_TS,
101  buff_A, rs_A, cs_A,
102  buff_T, rs_T, cs_T,
103  buff_S, rs_S, cs_S );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Bidiag_UT_u_step_ofs_var2(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:114
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Bidiag_UT_u_step_ofc_var2(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:514
Definition: blis_type_defs.h:132
FLA_Error FLA_Bidiag_UT_u_step_ofd_var2(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:314
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Error FLA_Bidiag_UT_u_step_ofz_var2(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:714
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_ofu_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blf_var3(), and FLA_Bidiag_UT_u_ofu_var3().

19 {
20  FLA_Datatype datatype;
21  int m_A, n_A, m_TS;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24  int rs_S, cs_S;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30  m_TS = FLA_Obj_length( T );
31 
32  rs_A = FLA_Obj_row_stride( A );
33  cs_A = FLA_Obj_col_stride( A );
34 
35  rs_T = FLA_Obj_row_stride( T );
36  cs_T = FLA_Obj_col_stride( T );
37 
38  rs_S = FLA_Obj_row_stride( S );
39  cs_S = FLA_Obj_col_stride( S );
40 
41 
42  switch ( datatype )
43  {
44  case FLA_FLOAT:
45  {
46  float* buff_A = FLA_FLOAT_PTR( A );
47  float* buff_T = FLA_FLOAT_PTR( T );
48  float* buff_S = FLA_FLOAT_PTR( S );
49 
51  n_A,
52  m_TS,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T,
55  buff_S, rs_S, cs_S );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_T = FLA_DOUBLE_PTR( T );
64  double* buff_S = FLA_DOUBLE_PTR( S );
65 
67  n_A,
68  m_TS,
69  buff_A, rs_A, cs_A,
70  buff_T, rs_T, cs_T,
71  buff_S, rs_S, cs_S );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_T = FLA_COMPLEX_PTR( T );
80  scomplex* buff_S = FLA_COMPLEX_PTR( S );
81 
83  n_A,
84  m_TS,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T,
87  buff_S, rs_S, cs_S );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
96  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
97 
99  n_A,
100  m_TS,
101  buff_A, rs_A, cs_A,
102  buff_T, rs_T, cs_T,
103  buff_S, rs_S, cs_S );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Bidiag_UT_u_step_ofc_var3(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:921
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Bidiag_UT_u_step_ofs_var3(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:114
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
FLA_Error FLA_Bidiag_UT_u_step_ofz_var3(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:1326
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Bidiag_UT_u_step_ofd_var3(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:516
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_ofu_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blf_var4(), and FLA_Bidiag_UT_u_ofu_var4().

36 {
37  FLA_Datatype datatype;
38  int m_A, n_A, m_TS;
39  int rs_A, cs_A;
40  int rs_Y, cs_Y;
41  int rs_Z, cs_Z;
42  int rs_T, cs_T;
43  int rs_S, cs_S;
44 
45  datatype = FLA_Obj_datatype( A );
46 
47  m_A = FLA_Obj_length( A );
48  n_A = FLA_Obj_width( A );
49  m_TS = FLA_Obj_length( T );
50 
51  rs_A = FLA_Obj_row_stride( A );
52  cs_A = FLA_Obj_col_stride( A );
53 
54  rs_Y = FLA_Obj_row_stride( Y );
55  cs_Y = FLA_Obj_col_stride( Y );
56 
57  rs_Z = FLA_Obj_row_stride( Z );
58  cs_Z = FLA_Obj_col_stride( Z );
59 
60  rs_T = FLA_Obj_row_stride( T );
61  cs_T = FLA_Obj_col_stride( T );
62 
63  rs_S = FLA_Obj_row_stride( S );
64  cs_S = FLA_Obj_col_stride( S );
65 
66 
67  switch ( datatype )
68  {
69  case FLA_FLOAT:
70  {
71  float* buff_A = FLA_FLOAT_PTR( A );
72  float* buff_Y = FLA_FLOAT_PTR( Y );
73  float* buff_Z = FLA_FLOAT_PTR( Z );
74  float* buff_T = FLA_FLOAT_PTR( T );
75  float* buff_S = FLA_FLOAT_PTR( S );
76 
78  n_A,
79  m_TS,
80  buff_A, rs_A, cs_A,
81  buff_Y, rs_Y, cs_Y,
82  buff_Z, rs_Z, cs_Z,
83  buff_T, rs_T, cs_T,
84  buff_S, rs_S, cs_S );
85 
86  break;
87  }
88 
89  case FLA_DOUBLE:
90  {
91  double* buff_A = FLA_DOUBLE_PTR( A );
92  double* buff_Y = FLA_DOUBLE_PTR( Y );
93  double* buff_Z = FLA_DOUBLE_PTR( Z );
94  double* buff_T = FLA_DOUBLE_PTR( T );
95  double* buff_S = FLA_DOUBLE_PTR( S );
96 
98  n_A,
99  m_TS,
100  buff_A, rs_A, cs_A,
101  buff_Y, rs_Y, cs_Y,
102  buff_Z, rs_Z, cs_Z,
103  buff_T, rs_T, cs_T,
104  buff_S, rs_S, cs_S );
105 
106  break;
107  }
108 
109  case FLA_COMPLEX:
110  {
111  scomplex* buff_A = FLA_COMPLEX_PTR( A );
112  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
113  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
114  scomplex* buff_T = FLA_COMPLEX_PTR( T );
115  scomplex* buff_S = FLA_COMPLEX_PTR( S );
116 
118  n_A,
119  m_TS,
120  buff_A, rs_A, cs_A,
121  buff_Y, rs_Y, cs_Y,
122  buff_Z, rs_Z, cs_Z,
123  buff_T, rs_T, cs_T,
124  buff_S, rs_S, cs_S );
125 
126  break;
127  }
128 
129  case FLA_DOUBLE_COMPLEX:
130  {
131  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
132  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
133  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
134  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
135  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
136 
138  n_A,
139  m_TS,
140  buff_A, rs_A, cs_A,
141  buff_Y, rs_Y, cs_Y,
142  buff_Z, rs_Z, cs_Z,
143  buff_T, rs_T, cs_T,
144  buff_S, rs_S, cs_S );
145 
146  break;
147  }
148  }
149 
150  return FLA_SUCCESS;
151 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Bidiag_UT_u_step_ofd_var4(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:625
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Bidiag_UT_u_step_ofz_var4(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:1565
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Error FLA_Bidiag_UT_u_step_ofs_var4(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:155
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Bidiag_UT_u_step_ofc_var4(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var4.c:1095
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_ofz_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var2 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

720 {
721  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
722  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
723  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
724 
725  dcomplex beta;
726  int i;
727 
728  // b_alg = FLA_Obj_length( T );
729  int b_alg = m_TS;
730 
731  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
732  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
733  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
734  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
735  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
736  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
737  int inc_v = 1;
738  int inc_y = 1;
739  int inc_z = 1;
740 
741  for ( i = 0; i < b_alg; ++i )
742  {
743  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
744  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
745  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
746  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
747  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
748  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
749  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
750 
751  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
752  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
753 
754  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
755  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
756 
757  dcomplex* v21 = buff_v + (i+1)*inc_v;
758 
759  dcomplex* y21 = buff_y + (i+1)*inc_y;
760 
761  dcomplex* z21 = buff_z + (i+1)*inc_z;
762 
763  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
764  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
765 
766  dcomplex* v21_t = v21 + (0 )*inc_v;
767  dcomplex* v21_b = v21 + (1 )*inc_v;
768 
769  int m_ahead = m_A - i - 1;
770  int n_ahead = n_A - i - 1;
771  int m_behind = i;
772  int n_behind = i;
773 
774  /*------------------------------------------------------------*/
775 
776  // FLA_Househ2_UT( FLA_LEFT,
777  // alpha11,
778  // a21, tau11 );
779  FLA_Househ2_UT_l_opz( m_ahead,
780  alpha11,
781  a21, rs_A,
782  tau11 );
783 
784  if ( n_ahead > 0 )
785  {
786  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
787  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
789  n_ahead,
790  a12t, cs_A,
791  y21, inc_y );
794  m_ahead,
795  n_ahead,
796  buff_1,
797  A22, rs_A, cs_A,
798  a21, rs_A,
799  buff_1,
800  y21, inc_y );
801 
802  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
804  n_ahead,
805  tau11,
806  y21, inc_y );
807 
808  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
810  n_ahead,
811  buff_m1,
812  y21, inc_y,
813  a12t, cs_A );
814 
815  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
816  FLA_Househ2_UT_r_opz( n_ahead - 1,
817  a12t_l,
818  a12t_r, cs_A,
819  sigma11 );
820 
821  // FLA_Set( FLA_ONE, v21_t );
822  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
823  *v21_t = *buff_1;
825  n_ahead - 1,
826  a12t_r, cs_A,
827  v21_b, inc_y );
828 
829  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
830  // FLA_Scal( FLA_MINUS_ONE, beta );
832  n_ahead,
833  y21, inc_y,
834  v21, inc_v,
835  &beta );
836  bl1_zneg1( &beta );
837 
838  // FLA_Copy( a21, z21 );
839  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
840  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
842  m_ahead,
843  a21, rs_A,
844  z21, inc_z );
847  m_ahead,
848  n_ahead,
849  buff_1,
850  A22, rs_A, cs_A,
851  v21, inc_v,
852  &beta,
853  z21, inc_z );
855  m_ahead,
856  sigma11,
857  z21, inc_z );
858 
859  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
860  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
861  FLA_Fused_Gerc2_opz_var1( m_ahead,
862  n_ahead,
863  buff_m1,
864  a21, rs_A,
865  y21, inc_y,
866  z21, inc_z,
867  v21, inc_v,
868  A22, rs_A, cs_A );
869 
870  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
873  m_behind,
874  n_ahead,
875  buff_1,
876  A02, rs_A, cs_A,
877  v21, inc_v,
878  buff_0,
879  s01, rs_S );
880  }
881 
882  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
883  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
885  n_behind,
886  a10t, cs_A,
887  t01, rs_T );
890  m_ahead,
891  n_behind,
892  buff_1,
893  A20, rs_A, cs_A,
894  a21, rs_A,
895  buff_1,
896  t01, rs_T );
897 
898  /*------------------------------------------------------------*/
899 
900  }
901 
902  // FLA_Obj_free( &v );
903  // FLA_Obj_free( &y );
904  // FLA_Obj_free( &z );
905  FLA_free( buff_v );
906  FLA_free( buff_y );
907  FLA_free( buff_z );
908 
909  return FLA_SUCCESS;
910 }
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:82
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:306
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:693
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60

◆ FLA_Bidiag_UT_u_step_ofz_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var3 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), bl1_zscals(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

1332 {
1333  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1334  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1335  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1336 
1337  dcomplex alpha12;
1338  dcomplex minus_conj_alpha12;
1339  dcomplex psi11_minus_alpha12;
1340  dcomplex minus_inv_tau11;
1341  dcomplex minus_upsilon11;
1342  dcomplex minus_conj_nu11;
1343  dcomplex minus_conj_psi11;
1344  dcomplex minus_zeta11;
1345  dcomplex beta;
1346  int i;
1347 
1348  // b_alg = FLA_Obj_length( T );
1349  int b_alg = m_TS;
1350 
1351  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1352  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1353  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1354  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1355  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1356  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
1357  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1358  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1359  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1360  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1361  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1362  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1363  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1364  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1365  int inc_w = 1;
1366  int inc_ap = 1;
1367  int inc_u = 1;
1368  int inc_up = 1;
1369  int inc_v = 1;
1370  int inc_y = 1;
1371  int inc_z = 1;
1372 
1373  for ( i = 0; i < b_alg; ++i )
1374  {
1375  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1376  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1377  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1378  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1379  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1380  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1381  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1382 
1383  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1384  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1385 
1386  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1387  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1388 
1389  dcomplex* w21 = buff_w + (i+1)*inc_w;
1390 
1391  dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1392 
1393  dcomplex* upsilon11 = buff_u + (i )*inc_u;
1394  dcomplex* u21 = buff_u + (i+1)*inc_u;
1395 
1396  dcomplex* u21p = buff_up + (i+1)*inc_up;
1397 
1398  dcomplex* nu11 = buff_v + (i )*inc_v;
1399  dcomplex* v21 = buff_v + (i+1)*inc_v;
1400 
1401  dcomplex* psi11 = buff_y + (i )*inc_y;
1402  dcomplex* y21 = buff_y + (i+1)*inc_y;
1403 
1404  dcomplex* zeta11 = buff_z + (i )*inc_z;
1405  dcomplex* z21 = buff_z + (i+1)*inc_z;
1406 
1407  dcomplex* a12p_t = a12p + (0 )*inc_ap;
1408  dcomplex* a12p_b = a12p + (1 )*inc_ap;
1409 
1410  dcomplex* v21_t = v21 + (0 )*inc_v;
1411  dcomplex* v21_b = v21 + (1 )*inc_v;
1412 
1413  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1414  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1415 
1416  dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1417 
1418  int m_ahead = m_A - i - 1;
1419  int n_ahead = n_A - i - 1;
1420  int m_behind = i;
1421  int n_behind = i;
1422 
1423  /*------------------------------------------------------------*/
1424 
1425  if ( m_behind > 0 )
1426  {
1427  // FLA_Copy( upsilon11, minus_upsilon11 );
1428  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1429  bl1_zmult3( buff_m1, upsilon11, &minus_upsilon11 );
1430 
1431  // FLA_Copy( zeta11, minus_zeta11 );
1432  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1433  bl1_zmult3( buff_m1, zeta11, &minus_zeta11 );
1434 
1435  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1436  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1437  bl1_zcopyconj( psi11, &minus_conj_psi11 );
1438  bl1_zscals( buff_m1, &minus_conj_psi11 );
1439 
1440  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1441  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1442  bl1_zcopyconj( nu11, &minus_conj_nu11 );
1443  bl1_zscals( buff_m1, &minus_conj_nu11 );
1444 
1445  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1446  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1448  1,
1449  &minus_conj_psi11,
1450  upsilon11, 1,
1451  alpha11, 1 );
1453  1,
1454  &minus_conj_nu11,
1455  zeta11, 1,
1456  alpha11, 1 );
1457 
1458  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1459  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1461  m_ahead,
1462  &minus_conj_psi11,
1463  u21, inc_u,
1464  a21, rs_A );
1466  m_ahead,
1467  &minus_conj_nu11,
1468  z21, inc_z,
1469  a21, rs_A );
1470 
1471  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1472  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1474  n_ahead,
1475  &minus_upsilon11,
1476  y21, inc_y,
1477  a12t, cs_A );
1479  n_ahead,
1480  &minus_zeta11,
1481  v21, inc_v,
1482  a12t, cs_A );
1483  }
1484 
1485  // FLA_Househ2_UT( FLA_LEFT,
1486  // alpha11,
1487  // a21, tau11 );
1488  // FLA_Copy( a21, u21p );
1489  FLA_Househ2_UT_l_opz( m_ahead,
1490  alpha11,
1491  a21, rs_A,
1492  tau11 );
1494  m_ahead,
1495  a21, rs_A,
1496  u21p, inc_up );
1497 
1498  if ( n_ahead > 0 )
1499  {
1500  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1501  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1502  bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );
1503 
1504  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1505  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1507  n_ahead,
1508  a12t, cs_A,
1509  a12p, inc_ap );
1511  n_ahead,
1512  &minus_inv_tau11,
1513  a12t, cs_A,
1514  a12p, inc_ap );
1515  }
1516 
1517  if ( m_behind > 0 && n_ahead > 0 )
1518  {
1519  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1520  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1521  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1522  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1523  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1525  n_ahead,
1526  tau11,
1527  buff_m1,
1528  u21, inc_u,
1529  y21, inc_y,
1530  z21, inc_z,
1531  v21, inc_v,
1532  A22, rs_A, cs_A,
1533  u21p, inc_up,
1534  a12p, inc_ap,
1535  w21, inc_w );
1536 
1537 
1538  }
1539  else if ( n_ahead > 0 )
1540  {
1541  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1542  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1543  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1545  n_ahead,
1546  tau11,
1547  buff_0,
1548  A22, rs_A, cs_A,
1549  u21p, inc_up,
1550  a12p, inc_ap,
1551  y21, inc_y,
1552  w21, inc_w );
1553  }
1554 
1555  if ( n_ahead > 0 )
1556  {
1557  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1559  n_ahead,
1560  buff_1,
1561  a12t, cs_A,
1562  y21, inc_y );
1563 
1564  // FLA_Househ2s_UT( FLA_RIGHT,
1565  // a12p_t,
1566  // a12p_b,
1567  // alpha12, psi11_minus_alpha12, sigma11 );
1568  FLA_Househ2s_UT_r_opz( n_ahead - 1,
1569  a12p_t,
1570  a12p_b, inc_ap,
1571  &alpha12,
1572  &psi11_minus_alpha12,
1573  sigma11 );
1574 
1575  // FLA_Copy( a12p, v21 );
1576  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1577  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1578  // FLA_Conjugate( v21_b );
1580  n_ahead,
1581  a12p, inc_ap,
1582  v21, inc_v );
1583  bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
1585  n_ahead,
1586  &psi11_minus_alpha12,
1587  v21, inc_v );
1588  bl1_zconjv( n_ahead - 1,
1589  v21_b, inc_v );
1590 
1591  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1592  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1593  *a12t_l = alpha12;
1595  n_ahead - 1,
1596  v21_b, inc_v,
1597  a12t_r, cs_A );
1598  }
1599 
1600  // FLA_Copy( u21p, u21 );
1602  m_ahead,
1603  u21p, inc_up,
1604  u21, inc_u );
1605 
1606  if ( n_ahead > 0 )
1607  {
1608  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1609  // FLA_Scal( FLA_MINUS_ONE, beta );
1610  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1612  n_ahead,
1613  y21, inc_y,
1614  v21, inc_v,
1615  &beta );
1616  bl1_zscals( &minus_inv_tau11, &beta );
1617 
1618  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1619  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1620  bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
1621  bl1_zneg1( &minus_conj_alpha12 );
1622 
1623  // FLA_Copy( w21, z21 );
1624  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1625  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1626  // FLA_Axpy( beta, u21, z21 );
1628  m_ahead,
1629  w21, inc_w,
1630  z21, inc_z );
1632  m_ahead,
1633  &minus_conj_alpha12,
1634  A22_l, rs_A,
1635  z21, inc_z );
1637  m_ahead,
1638  &psi11_minus_alpha12,
1639  z21, inc_z );
1641  m_ahead,
1642  &beta,
1643  u21, inc_u,
1644  z21, inc_z );
1645 
1646  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1647  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1649  n_ahead,
1650  tau11,
1651  y21, inc_y );
1653  m_ahead,
1654  sigma11,
1655  z21, inc_z );
1656 
1657  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1660  m_behind,
1661  n_ahead,
1662  buff_1,
1663  A02, rs_A, cs_A,
1664  v21, inc_v,
1665  buff_0,
1666  s01, rs_S );
1667  }
1668 
1669  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1670  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1672  n_behind,
1673  a10t, cs_A,
1674  t01, rs_T );
1677  m_ahead,
1678  n_behind,
1679  buff_1,
1680  A20, rs_A, cs_A,
1681  u21, inc_u,
1682  buff_1,
1683  t01, rs_T );
1684 
1685  if ( m_behind + 1 == b_alg && n_ahead > 0 )
1686  {
1687  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1688  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1691  m_ahead,
1692  n_ahead,
1693  buff_m1,
1694  u21, inc_u,
1695  y21, inc_y,
1696  A22, rs_A, cs_A );
1699  m_ahead,
1700  n_ahead,
1701  buff_m1,
1702  z21, inc_z,
1703  v21, inc_v,
1704  A22, rs_A, cs_A );
1705  }
1706 
1707  /*------------------------------------------------------------*/
1708 
1709  }
1710 
1711  // FLA_Obj_free( &w );
1712  // FLA_Obj_free( &ap );
1713  // FLA_Obj_free( &u );
1714  // FLA_Obj_free( &up );
1715  // FLA_Obj_free( &v );
1716  // FLA_Obj_free( &y );
1717  // FLA_Obj_free( &z );
1718  FLA_free( buff_w );
1719  FLA_free( buff_ap );
1720  FLA_free( buff_u );
1721  FLA_free( buff_up );
1722  FLA_free( buff_v );
1723  FLA_free( buff_y );
1724  FLA_free( buff_z );
1725 
1726  return FLA_SUCCESS;
1727 }
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition: FLA_Househ2s_UT.c:610
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:523
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition: bl1_conjv.c:34

◆ FLA_Bidiag_UT_u_step_ofz_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var4 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), bl1_zsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

1573 {
1574  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1575  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1576  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1577 
1578  dcomplex alpha12;
1579  dcomplex minus_conj_alpha12;
1580  dcomplex psi11_minus_alpha12;
1581  dcomplex minus_inv_tau11;
1582  dcomplex beta;
1583  dcomplex last_elem;
1584  int i;
1585 
1586  // b_alg = FLA_Obj_length( T );
1587  int b_alg = m_TS;
1588 
1589  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1590  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1591  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1592  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1593  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1594  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1595  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1596  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1597  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1598  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1599  dcomplex* buff_tmp = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1600  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1601  dcomplex* buff_al = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1602  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1603  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1604  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1605  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1606  dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1607  dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1608  int inc_tmp = 1;
1609  int inc_w = 1;
1610  int inc_al = 1;
1611  int inc_ap = 1;
1612  int inc_u = 1;
1613  int inc_up = 1;
1614  int inc_v = 1;
1615  int inc_d = 1;
1616  int inc_e = 1;
1617 
1618  // FLA_Set( FLA_ZERO, Y );
1619  // FLA_Set( FLA_ZERO, Z );
1620  bl1_zsetm( n_A,
1621  b_alg,
1622  buff_0,
1623  buff_Y, rs_Y, cs_Y );
1624  bl1_zsetm( m_A,
1625  b_alg,
1626  buff_0,
1627  buff_Z, rs_Z, cs_Z );
1628 
1629  for ( i = 0; i < b_alg; ++i )
1630  {
1631  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1632  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1633  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1634  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1635  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1636  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1637  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1638  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1639 
1640  dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1641  dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1642  dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1643 
1644  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1645  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1646  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1647 
1648  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1649  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1650 
1651  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1652  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1653 
1654  dcomplex* tmp21 = buff_tmp + (i+1)*inc_tmp;
1655 
1656  dcomplex* w21 = buff_w + (i+1)*inc_w;
1657 
1658  dcomplex* a22l = buff_al + (i+1)*inc_al;
1659 
1660  dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1661 
1662  dcomplex* u21 = buff_u + (i+1)*inc_u;
1663 
1664  dcomplex* u21p = buff_up + (i+1)*inc_up;
1665 
1666  dcomplex* v21 = buff_v + (i+1)*inc_v;
1667 
1668  dcomplex* d0 = buff_d + (0 )*inc_d;
1669 
1670  dcomplex* e0 = buff_e + (0 )*inc_e;
1671 
1672  dcomplex* a12p_t = a12p + (0 )*inc_ap;
1673  dcomplex* a12p_b = a12p + (1 )*inc_ap;
1674 
1675  dcomplex* v21_t = v21 + (0 )*inc_v;
1676  dcomplex* v21_b = v21 + (1 )*inc_v;
1677 
1678  dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1679 
1680  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1681  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1682 
1683  dcomplex* ABL = a10t;
1684  dcomplex* ZBL = z10t;
1685 
1686  dcomplex* a2 = alpha11;
1687 
1688  int m_ahead = m_A - i - 1;
1689  int n_ahead = n_A - i - 1;
1690  int m_behind = i;
1691  int n_behind = i;
1692 
1693  /*------------------------------------------------------------*/
1694 
1695  if ( m_behind > 0 )
1696  {
1697  // FLA_Copy( a01_b, last_elem );
1698  // FLA_Set( FLA_ONE, a01_b );
1699  last_elem = *a01_b;
1700  *a01_b = *buff_1;
1701  }
1702 
1703  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1704  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1707  m_ahead + 1,
1708  n_behind,
1709  buff_m1,
1710  ABL, rs_A, cs_A,
1711  y10t, cs_Y,
1712  buff_1,
1713  a2, rs_A );
1716  m_ahead + 1,
1717  n_behind,
1718  buff_m1,
1719  ZBL, rs_Z, cs_Z,
1720  a01, rs_A,
1721  buff_1,
1722  a2, rs_A );
1723 
1724  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1725  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1728  n_ahead,
1729  n_behind,
1730  buff_m1,
1731  Y20, rs_Y, cs_Y,
1732  a10t, cs_A,
1733  buff_1,
1734  a12t, cs_A );
1737  m_behind,
1738  n_ahead,
1739  buff_m1,
1740  A02, rs_A, cs_A,
1741  z10t, cs_Z,
1742  buff_1,
1743  a12t, cs_A );
1744 
1745  if ( m_behind > 0 )
1746  {
1747  // FLA_Copy( last_elem, a01_b );
1748  *a01_b = last_elem;
1749  }
1750 
1751  // FLA_Househ2_UT( FLA_LEFT,
1752  // alpha11,
1753  // a21, tau11 );
1754  // FLA_Copy( a21, u21p );
1755  FLA_Househ2_UT_l_opz( m_ahead,
1756  alpha11,
1757  a21, rs_A,
1758  tau11 );
1760  m_ahead,
1761  a21, rs_A,
1762  u21p, inc_up );
1763 
1764  if ( n_ahead > 0 )
1765  {
1766  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1767  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1768  bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );
1769 
1770  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1771  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1773  n_ahead,
1774  a12t, cs_A,
1775  a12p, inc_ap );
1777  n_ahead,
1778  &minus_inv_tau11,
1779  a12t, cs_A,
1780  a12p, inc_ap );
1781 
1782  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1783  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1786  m_ahead,
1787  n_behind,
1788  buff_1,
1789  A20, rs_A, cs_A,
1790  u21p, inc_up,
1791  buff_0,
1792  d0, inc_d );
1795  m_ahead,
1796  n_behind,
1797  buff_1,
1798  Z20, rs_Z, cs_Z,
1799  u21p, inc_up,
1800  buff_0,
1801  e0, inc_e );
1802 
1803  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1804  // FLA_Axpy( FLA_ONE, d0, t01 );
1806  n_behind,
1807  a10t, cs_A,
1808  t01, rs_T );
1810  n_behind,
1811  buff_1,
1812  d0, inc_d,
1813  t01, rs_T );
1814 
1815  // FLA_Set( FLA_ZERO, y21 );
1816  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1817  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1818  bl1_zsetv( n_ahead,
1819  buff_0,
1820  y21, rs_Y );
1823  n_ahead,
1824  n_behind,
1825  buff_m1,
1826  Y20, rs_Y, cs_Y,
1827  d0, inc_d,
1828  buff_1,
1829  y21, rs_Y );
1832  m_behind,
1833  n_ahead,
1834  buff_m1,
1835  A02, rs_A, cs_A,
1836  e0, inc_e,
1837  buff_1,
1838  y21, rs_Y );
1839 
1840  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1841  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1842  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1844  n_ahead,
1845  tau11,
1846  buff_1,
1847  A22, rs_A, cs_A,
1848  u21p, inc_up,
1849  a12p, inc_ap,
1850  y21, rs_Y,
1851  w21, inc_w );
1852 
1853  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1854  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1855  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1856  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1857  // FLA_Copy( A22_l, a22l );
1858  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1859  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1860  // FLA_Copy( g0, s01 );
1861  FLA_Fused_UYx_ZVx_opz_var1( m_ahead,
1862  n_behind,
1863  m_behind,
1864  n_ahead,
1865  buff_m1,
1866  A20, rs_A, cs_A,
1867  Y20, rs_Y, cs_Y,
1868  Z20, rs_Z, cs_Z,
1869  A02, rs_A, cs_A,
1870  A22, rs_A, cs_A,
1871  tmp21, inc_tmp,
1872  s01, rs_S,
1873  a12p, inc_ap,
1874  w21, inc_w,
1875  a22l, inc_al );
1876 
1877  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1879  n_ahead,
1880  buff_1,
1881  a12t, cs_A,
1882  y21, rs_Y );
1883 
1884  // FLA_Househ2s_UT( FLA_RIGHT,
1885  // a12p_t,
1886  // a12p_b,
1887  // alpha12, psi11_minus_alpha12, sigma11 );
1888  FLA_Househ2s_UT_r_opz( n_ahead - 1,
1889  a12p_t,
1890  a12p_b, inc_ap,
1891  &alpha12,
1892  &psi11_minus_alpha12,
1893  sigma11 );
1894 
1895  // FLA_Copy( a12p, v21 );
1896  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1897  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1898  // FLA_Conjugate( v21_b );
1900  n_ahead,
1901  a12p, inc_ap,
1902  v21, inc_v );
1903  bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
1905  n_ahead,
1906  &psi11_minus_alpha12,
1907  v21, inc_v );
1908  bl1_zconjv( n_ahead - 1,
1909  v21_b, inc_v );
1910 
1911  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1912  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1913  bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
1914  bl1_zneg1( &minus_conj_alpha12 );
1915 
1916  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1917  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1919  n_behind,
1920  &minus_conj_alpha12,
1921  A02, rs_A,
1922  s01, rs_S );
1924  n_behind,
1925  &psi11_minus_alpha12,
1926  s01, rs_S );
1927 
1928  // FLA_Copy( alpha12, a12t_l );
1929  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1930  *a12t_l = alpha12;
1932  n_ahead - 1,
1933  v21_b, inc_v,
1934  a12t_r, cs_A );
1935  }
1936 
1937  // FLA_Copy( u21p, u21 );
1939  m_ahead,
1940  u21p, inc_up,
1941  u21, inc_u );
1942 
1943  if ( n_ahead > 0 )
1944  {
1945  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1946  // FLA_Scal( FLA_MINUS_ONE, beta );
1947  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1949  n_ahead,
1950  y21, rs_Y,
1951  v21, inc_v,
1952  &beta );
1953  bl1_zscals( &minus_inv_tau11, &beta );
1954 
1955  // FLA_Copy( w21, z21 );
1956  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1957  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1958  // FLA_Axpy( beta, u21, z21 );
1960  m_ahead,
1961  w21, inc_w,
1962  z21, rs_Z );
1964  m_ahead,
1965  &minus_conj_alpha12,
1966  a22l, inc_al,
1967  z21, rs_Z );
1969  m_ahead,
1970  &psi11_minus_alpha12,
1971  z21, rs_Z );
1973  m_ahead,
1974  &beta,
1975  u21, inc_u,
1976  z21, rs_Z );
1977 
1978  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1979  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1981  n_ahead,
1982  tau11,
1983  y21, rs_Y );
1985  m_ahead,
1986  sigma11,
1987  z21, rs_Z );
1988  }
1989  else // if ( n_ahead == 0 )
1990  {
1991  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1992  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1994  n_behind,
1995  a10t, cs_A,
1996  t01, rs_T );
1999  m_ahead,
2000  n_behind,
2001  buff_1,
2002  A20, rs_A, cs_A,
2003  u21, inc_u,
2004  buff_1,
2005  t01, rs_T );
2006  }
2007 
2008  /*------------------------------------------------------------*/
2009 
2010  }
2011 
2012  // FLA_Obj_free( &w );
2013  // FLA_Obj_free( &al );
2014  // FLA_Obj_free( &ap );
2015  // FLA_Obj_free( &u );
2016  // FLA_Obj_free( &up );
2017  // FLA_Obj_free( &v );
2018  // FLA_Obj_free( &d );
2019  // FLA_Obj_free( &e );
2020  FLA_free( buff_tmp );
2021  FLA_free( buff_w );
2022  FLA_free( buff_al );
2023  FLA_free( buff_ap );
2024  FLA_free( buff_u );
2025  FLA_free( buff_up );
2026  FLA_free( buff_v );
2027  FLA_free( buff_d );
2028  FLA_free( buff_e );
2029 
2030  return FLA_SUCCESS;
2031 }
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
FLA_Error FLA_Fused_UYx_ZVx_opz_var1(int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:542
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition: FLA_Househ2s_UT.c:610
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
int i
Definition: bl1_axmyv2.c:145
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition: bl1_conjv.c:34

◆ FLA_Bidiag_UT_u_step_opc_var1()

FLA_Error FLA_Bidiag_UT_u_step_opc_var1 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_ccopyv(), bl1_cgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

390 {
391  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
392  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
393 
394  int i;
395 
396  // b_alg = FLA_Obj_length( T );
397  int b_alg = m_TS;
398 
399  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
400  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
401  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
402  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
403  int inc_v = 1;
404 
405  for ( i = 0; i < b_alg; ++i )
406  {
407  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
408  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
409  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
410  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
411  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
412  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
413  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
414 
415  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
416  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
417 
418  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
419  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
420 
421  scomplex* v21 = buff_v + (i+1)*inc_v;
422 
423  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
424  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
425 
426  scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
427  scomplex* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
428 
429  scomplex* v21_t = v21 + (0 )*inc_v;
430  scomplex* v21_b = v21 + (1 )*inc_v;
431 
432  int m_ahead = m_A - i - 1;
433  int n_ahead = n_A - i - 1;
434  int m_behind = i;
435  int n_behind = i;
436 
437  /*------------------------------------------------------------*/
438 
439  // FLA_Househ2_UT( FLA_LEFT,
440  // alpha11,
441  // a21, tau11 );
442  FLA_Househ2_UT_l_opc( m_ahead,
443  alpha11,
444  a21, rs_A,
445  tau11 );
446 
447  if ( n_ahead > 0 )
448  {
449  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
451  n_ahead,
452  tau11,
453  a21, rs_A,
454  a12t, cs_A,
455  A22, rs_A, cs_A );
456 
457  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
458  FLA_Househ2_UT_r_opc( n_ahead - 1,
459  a12t_l,
460  a12t_r, cs_A,
461  sigma11 );
462 
463  // FLA_Set( FLA_ONE, v21_t );
464  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
465  *v21_t = *buff_1;
467  n_ahead - 1,
468  a12t_r, cs_A,
469  v21_b, inc_v );
470 
471  // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
473  n_ahead - 1,
474  sigma11,
475  v21_b, inc_v,
476  A22_l, rs_A,
477  A22_r, rs_A, cs_A );
478 
479  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
482  m_behind,
483  n_ahead,
484  buff_1,
485  A02, rs_A, cs_A,
486  v21, inc_v,
487  buff_0,
488  s01, rs_S );
489  }
490 
491  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
492  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
494  n_behind,
495  a10t, cs_A,
496  t01, rs_T );
499  m_ahead,
500  n_behind,
501  buff_1,
502  A20, rs_A, cs_A,
503  a21, rs_A,
504  buff_1,
505  t01, rs_T );
506 
507  /*------------------------------------------------------------*/
508 
509  }
510 
511  // FLA_Obj_free( &v );
512  FLA_free( buff_v );
513 
514  return FLA_SUCCESS;
515 }
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:677
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:269
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Error FLA_Apply_H2_UT_r_opc_var1(int n_u2h_A2, int m_a1, scomplex *tau, scomplex *u2h, int inc_u2h, scomplex *a1, int inc_a1, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:254
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opc_var2()

FLA_Error FLA_Bidiag_UT_u_step_opc_var2 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

536 {
537  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
538  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
539  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
540 
541  scomplex beta;
542  int i;
543 
544  // b_alg = FLA_Obj_length( T );
545  int b_alg = m_TS;
546 
547  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
548  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
549  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
550  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
551  scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
552  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
553  int inc_v = 1;
554  int inc_y = 1;
555  int inc_z = 1;
556 
557  for ( i = 0; i < b_alg; ++i )
558  {
559  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
560  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
561  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
562  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
563  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
564  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
565  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
566 
567  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
568  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
569 
570  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
571  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
572 
573  scomplex* v21 = buff_v + (i+1)*inc_v;
574 
575  scomplex* y21 = buff_y + (i+1)*inc_y;
576 
577  scomplex* z21 = buff_z + (i+1)*inc_z;
578 
579  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
580  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
581 
582  scomplex* v21_t = v21 + (0 )*inc_v;
583  scomplex* v21_b = v21 + (1 )*inc_v;
584 
585  int m_ahead = m_A - i - 1;
586  int n_ahead = n_A - i - 1;
587  int m_behind = i;
588  int n_behind = i;
589 
590  /*------------------------------------------------------------*/
591 
592  // FLA_Househ2_UT( FLA_LEFT,
593  // alpha11,
594  // a21, tau11 );
595  FLA_Househ2_UT_l_opc( m_ahead,
596  alpha11,
597  a21, rs_A,
598  tau11 );
599 
600  if ( n_ahead > 0 )
601  {
602  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
603  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
605  n_ahead,
606  a12t, cs_A,
607  y21, inc_y );
610  m_ahead,
611  n_ahead,
612  buff_1,
613  A22, rs_A, cs_A,
614  a21, rs_A,
615  buff_1,
616  y21, inc_y );
617 
618  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
620  n_ahead,
621  tau11,
622  y21, inc_y );
623 
624  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
626  n_ahead,
627  buff_m1,
628  y21, inc_y,
629  a12t, cs_A );
630 
631  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
632  FLA_Househ2_UT_r_opc( n_ahead - 1,
633  a12t_l,
634  a12t_r, cs_A,
635  sigma11 );
636 
637  // FLA_Set( FLA_ONE, v21_t );
638  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
639  *v21_t = *buff_1;
641  n_ahead - 1,
642  a12t_r, cs_A,
643  v21_b, inc_y );
644 
645  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
646  // FLA_Scal( FLA_MINUS_ONE, beta );
648  n_ahead,
649  y21, inc_y,
650  v21, inc_v,
651  &beta );
652  bl1_cneg1( &beta );
653 
654  // FLA_Copy( a21, z21 );
655  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
656  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
658  m_ahead,
659  a21, rs_A,
660  z21, inc_z );
663  m_ahead,
664  n_ahead,
665  buff_1,
666  A22, rs_A, cs_A,
667  v21, inc_v,
668  &beta,
669  z21, inc_z );
671  m_ahead,
672  sigma11,
673  z21, inc_z );
674 
675  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
676  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
679  m_ahead,
680  n_ahead,
681  buff_m1,
682  a21, rs_A,
683  y21, inc_y,
684  A22, rs_A, cs_A );
687  m_ahead,
688  n_ahead,
689  buff_m1,
690  z21, inc_z,
691  v21, inc_v,
692  A22, rs_A, cs_A );
693 
694  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
697  m_behind,
698  n_ahead,
699  buff_1,
700  A02, rs_A, cs_A,
701  v21, inc_v,
702  buff_0,
703  s01, rs_S );
704  }
705 
706  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
707  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
709  n_behind,
710  a10t, cs_A,
711  t01, rs_T );
714  m_ahead,
715  n_behind,
716  buff_1,
717  A20, rs_A, cs_A,
718  a21, rs_A,
719  buff_1,
720  t01, rs_T );
721 
722  /*------------------------------------------------------------*/
723 
724  }
725 
726  // FLA_Obj_free( &v );
727  // FLA_Obj_free( &y );
728  // FLA_Obj_free( &z );
729  FLA_free( buff_v );
730  FLA_free( buff_y );
731  FLA_free( buff_z );
732 
733  return FLA_SUCCESS;
734 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:677
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opc_var3()

FLA_Error FLA_Bidiag_UT_u_step_opc_var3 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

956 {
957  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
958  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
959  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
960 
961  scomplex alpha12;
962  scomplex minus_conj_alpha12;
963  scomplex psi11_minus_alpha12;
964  scomplex minus_inv_tau11;
965  scomplex minus_upsilon11;
966  scomplex minus_conj_nu11;
967  scomplex minus_conj_psi11;
968  scomplex minus_zeta11;
969  scomplex beta;
970  int i;
971 
972  // b_alg = FLA_Obj_length( T );
973  int b_alg = m_TS;
974 
975  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
976  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
977  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
978  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
979  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
980  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
981  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
982  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
983  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
984  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
985  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
986  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
987  scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
988  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
989  int inc_w = 1;
990  int inc_ap = 1;
991  int inc_u = 1;
992  int inc_up = 1;
993  int inc_v = 1;
994  int inc_y = 1;
995  int inc_z = 1;
996 
997  for ( i = 0; i < b_alg; ++i )
998  {
999  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1000  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1001  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1002  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1003  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1004  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1005  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1006 
1007  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1008  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1009 
1010  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1011  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1012 
1013  scomplex* w21 = buff_w + (i+1)*inc_w;
1014 
1015  scomplex* a12p = buff_ap + (i+1)*inc_ap;
1016 
1017  scomplex* upsilon11 = buff_u + (i )*inc_u;
1018  scomplex* u21 = buff_u + (i+1)*inc_u;
1019 
1020  scomplex* u21p = buff_up + (i+1)*inc_up;
1021 
1022  scomplex* nu11 = buff_v + (i )*inc_v;
1023  scomplex* v21 = buff_v + (i+1)*inc_v;
1024 
1025  scomplex* psi11 = buff_y + (i )*inc_y;
1026  scomplex* y21 = buff_y + (i+1)*inc_y;
1027 
1028  scomplex* zeta11 = buff_z + (i )*inc_z;
1029  scomplex* z21 = buff_z + (i+1)*inc_z;
1030 
1031  scomplex* a12p_t = a12p + (0 )*inc_ap;
1032  scomplex* a12p_b = a12p + (1 )*inc_ap;
1033 
1034  scomplex* v21_t = v21 + (0 )*inc_v;
1035  scomplex* v21_b = v21 + (1 )*inc_v;
1036 
1037  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1038  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1039 
1040  scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1041 
1042  int m_ahead = m_A - i - 1;
1043  int n_ahead = n_A - i - 1;
1044  int m_behind = i;
1045  int n_behind = i;
1046 
1047  /*------------------------------------------------------------*/
1048 
1049  if ( m_behind > 0 )
1050  {
1051  // FLA_Copy( upsilon11, minus_upsilon11 );
1052  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1053  bl1_cmult3( buff_m1, upsilon11, &minus_upsilon11 );
1054 
1055  // FLA_Copy( zeta11, minus_zeta11 );
1056  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1057  bl1_cmult3( buff_m1, zeta11, &minus_zeta11 );
1058 
1059  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1060  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1061  bl1_ccopyconj( psi11, &minus_conj_psi11 );
1062  bl1_cscals( buff_m1, &minus_conj_psi11 );
1063 
1064  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1065  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1066  bl1_ccopyconj( nu11, &minus_conj_nu11 );
1067  bl1_cscals( buff_m1, &minus_conj_nu11 );
1068 
1069  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1070  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1072  1,
1073  &minus_conj_psi11,
1074  upsilon11, 1,
1075  alpha11, 1 );
1077  1,
1078  &minus_conj_nu11,
1079  zeta11, 1,
1080  alpha11, 1 );
1081 
1082  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1083  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1085  m_ahead,
1086  &minus_conj_psi11,
1087  u21, inc_u,
1088  a21, rs_A );
1090  m_ahead,
1091  &minus_conj_nu11,
1092  z21, inc_z,
1093  a21, rs_A );
1094 
1095  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1096  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1098  n_ahead,
1099  &minus_upsilon11,
1100  y21, inc_y,
1101  a12t, cs_A );
1103  n_ahead,
1104  &minus_zeta11,
1105  v21, inc_v,
1106  a12t, cs_A );
1107  }
1108 
1109  // FLA_Househ2_UT( FLA_LEFT,
1110  // alpha11,
1111  // a21, tau11 );
1112  // FLA_Copy( a21, u21p );
1113  FLA_Househ2_UT_l_opc( m_ahead,
1114  alpha11,
1115  a21, rs_A,
1116  tau11 );
1118  m_ahead,
1119  a21, rs_A,
1120  u21p, inc_up );
1121 
1122  if ( n_ahead > 0 )
1123  {
1124  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1125  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1126  bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );
1127 
1128  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1129  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1131  n_ahead,
1132  a12t, cs_A,
1133  a12p, inc_ap );
1135  n_ahead,
1136  &minus_inv_tau11,
1137  a12t, cs_A,
1138  a12p, inc_ap );
1139  }
1140 
1141  if ( m_behind > 0 )
1142  {
1143  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1144  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1147  m_ahead,
1148  n_ahead,
1149  buff_m1,
1150  u21, inc_u,
1151  y21, inc_y,
1152  A22, rs_A, cs_A );
1155  m_ahead,
1156  n_ahead,
1157  buff_m1,
1158  z21, inc_z,
1159  v21, inc_v,
1160  A22, rs_A, cs_A );
1161  }
1162 
1163  if ( n_ahead > 0 )
1164  {
1165  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1168  m_ahead,
1169  n_ahead,
1170  buff_1,
1171  A22, rs_A, cs_A,
1172  u21p, inc_up,
1173  buff_0,
1174  y21, inc_y );
1175 
1176  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1178  n_ahead,
1179  &minus_inv_tau11,
1180  y21, inc_y,
1181  a12p, inc_ap );
1182 
1183  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1186  m_ahead,
1187  n_ahead,
1188  buff_1,
1189  A22, rs_A, cs_A,
1190  a12p, inc_ap,
1191  buff_0,
1192  w21, inc_w );
1193 
1194  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1196  n_ahead,
1197  buff_1,
1198  a12t, cs_A,
1199  y21, inc_y );
1200 
1201  // FLA_Househ2s_UT( FLA_RIGHT,
1202  // a12p_t,
1203  // a12p_b,
1204  // alpha12, psi11_minus_alpha12, sigma11 );
1205  FLA_Househ2s_UT_r_opc( n_ahead - 1,
1206  a12p_t,
1207  a12p_b, inc_ap,
1208  &alpha12,
1209  &psi11_minus_alpha12,
1210  sigma11 );
1211 
1212  // FLA_Copy( a12p, v21 );
1213  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1214  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1215  // FLA_Conjugate( v21_b );
1217  n_ahead,
1218  a12p, inc_ap,
1219  v21, inc_v );
1220  bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
1222  n_ahead,
1223  &psi11_minus_alpha12,
1224  v21, inc_v );
1225  bl1_cconjv( n_ahead - 1,
1226  v21_b, inc_v );
1227 
1228  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1229  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1230  *a12t_l = alpha12;
1232  n_ahead - 1,
1233  v21_b, inc_v,
1234  a12t_r, cs_A );
1235  }
1236 
1237  // FLA_Copy( u21p, u21 );
1239  m_ahead,
1240  u21p, inc_up,
1241  u21, inc_u );
1242 
1243  if ( n_ahead > 0 )
1244  {
1245  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1246  // FLA_Scal( FLA_MINUS_ONE, beta );
1247  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1249  n_ahead,
1250  y21, inc_y,
1251  v21, inc_v,
1252  &beta );
1253  bl1_cscals( &minus_inv_tau11, &beta );
1254 
1255  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1256  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1257  bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
1258  bl1_cneg1( &minus_conj_alpha12 );
1259 
1260  // FLA_Copy( w21, z21 );
1261  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1262  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1263  // FLA_Axpy( beta, u21, z21 );
1265  m_ahead,
1266  w21, inc_w,
1267  z21, inc_z );
1269  m_ahead,
1270  &minus_conj_alpha12,
1271  A22_l, rs_A,
1272  z21, inc_z );
1274  m_ahead,
1275  &psi11_minus_alpha12,
1276  z21, inc_z );
1278  m_ahead,
1279  &beta,
1280  u21, inc_u,
1281  z21, inc_z );
1282 
1283  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1284  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1286  n_ahead,
1287  tau11,
1288  y21, inc_y );
1290  m_ahead,
1291  sigma11,
1292  z21, inc_z );
1293 
1294  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1297  m_behind,
1298  n_ahead,
1299  buff_1,
1300  A02, rs_A, cs_A,
1301  v21, inc_v,
1302  buff_0,
1303  s01, rs_S );
1304  }
1305 
1306  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1307  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1309  n_behind,
1310  a10t, cs_A,
1311  t01, rs_T );
1314  m_ahead,
1315  n_behind,
1316  buff_1,
1317  A20, rs_A, cs_A,
1318  u21, inc_u,
1319  buff_1,
1320  t01, rs_T );
1321 
1322  if ( m_behind + 1 == b_alg && n_ahead > 0 )
1323  {
1324  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1325  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1328  m_ahead,
1329  n_ahead,
1330  buff_m1,
1331  u21, inc_u,
1332  y21, inc_y,
1333  A22, rs_A, cs_A );
1336  m_ahead,
1337  n_ahead,
1338  buff_m1,
1339  z21, inc_z,
1340  v21, inc_v,
1341  A22, rs_A, cs_A );
1342  }
1343 
1344  /*------------------------------------------------------------*/
1345 
1346  }
1347 
1348  // FLA_Obj_free( &w );
1349  // FLA_Obj_free( &ap );
1350  // FLA_Obj_free( &u );
1351  // FLA_Obj_free( &up );
1352  // FLA_Obj_free( &v );
1353  // FLA_Obj_free( &y );
1354  // FLA_Obj_free( &z );
1355  FLA_free( buff_w );
1356  FLA_free( buff_ap );
1357  FLA_free( buff_u );
1358  FLA_free( buff_up );
1359  FLA_free( buff_v );
1360  FLA_free( buff_y );
1361  FLA_free( buff_z );
1362 
1363  return FLA_SUCCESS;
1364 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_cconjv(int m, scomplex *x, int incx)
Definition: bl1_conjv.c:23
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition: FLA_Househ2s_UT.c:589
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opc_var4()

FLA_Error FLA_Bidiag_UT_u_step_opc_var4 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), bl1_csetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

1259 {
1260  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
1261  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
1262  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
1263 
1264  scomplex alpha12;
1265  scomplex minus_conj_alpha12;
1266  scomplex psi11_minus_alpha12;
1267  scomplex minus_inv_tau11;
1268  scomplex beta;
1269  scomplex last_elem;
1270  int i;
1271 
1272  // b_alg = FLA_Obj_length( T );
1273  int b_alg = m_TS;
1274 
1275  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1276  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1277  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1278  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1279  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1280  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1281  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1282  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1283  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1284  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1285  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1286  scomplex* buff_al = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1287  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1288  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1289  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1290  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1291  scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1292  scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1293  scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1294  scomplex* buff_g = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1295  int inc_w = 1;
1296  int inc_al = 1;
1297  int inc_ap = 1;
1298  int inc_u = 1;
1299  int inc_up = 1;
1300  int inc_v = 1;
1301  int inc_d = 1;
1302  int inc_e = 1;
1303  int inc_f = 1;
1304  int inc_g = 1;
1305 
1306  // FLA_Set( FLA_ZERO, Y );
1307  // FLA_Set( FLA_ZERO, Z );
1308  bl1_csetm( n_A,
1309  b_alg,
1310  buff_0,
1311  buff_Y, rs_Y, cs_Y );
1312  bl1_csetm( m_A,
1313  b_alg,
1314  buff_0,
1315  buff_Z, rs_Z, cs_Z );
1316 
1317  for ( i = 0; i < b_alg; ++i )
1318  {
1319  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1320  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1321  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1322  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1323  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1324  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1325  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1326  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1327 
1328  scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1329  scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1330  scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1331 
1332  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1333  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1334  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1335 
1336  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1337  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1338 
1339  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1340  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1341 
1342  scomplex* w21 = buff_w + (i+1)*inc_w;
1343 
1344  scomplex* a22l = buff_al + (i+1)*inc_al;
1345 
1346  scomplex* a12p = buff_ap + (i+1)*inc_ap;
1347 
1348  scomplex* u21 = buff_u + (i+1)*inc_u;
1349 
1350  scomplex* u21p = buff_up + (i+1)*inc_up;
1351 
1352  scomplex* v21 = buff_v + (i+1)*inc_v;
1353 
1354  scomplex* d0 = buff_d + (0 )*inc_d;
1355 
1356  scomplex* e0 = buff_e + (0 )*inc_e;
1357 
1358  scomplex* f0 = buff_f + (0 )*inc_f;
1359 
1360  scomplex* g0 = buff_g + (0 )*inc_g;
1361 
1362  scomplex* a12p_t = a12p + (0 )*inc_ap;
1363  scomplex* a12p_b = a12p + (1 )*inc_ap;
1364 
1365  scomplex* v21_t = v21 + (0 )*inc_v;
1366  scomplex* v21_b = v21 + (1 )*inc_v;
1367 
1368  scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1369 
1370  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1371  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1372 
1373  scomplex* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
1374 
1375  scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1376 
1377  scomplex* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
1378 
1379  scomplex* ABL = a10t;
1380  scomplex* ZBL = z10t;
1381 
1382  scomplex* a2 = alpha11;
1383 
1384  int m_ahead = m_A - i - 1;
1385  int n_ahead = n_A - i - 1;
1386  int m_behind = i;
1387  int n_behind = i;
1388 
1389  /*------------------------------------------------------------*/
1390 
1391  if ( m_behind > 0 )
1392  {
1393  // FLA_Copy( a01_b, last_elem );
1394  // FLA_Set( FLA_ONE, a01_b );
1395  last_elem = *a01_b;
1396  *a01_b = *buff_1;
1397  }
1398 
1399  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1400  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1403  m_ahead + 1,
1404  n_behind,
1405  buff_m1,
1406  ABL, rs_A, cs_A,
1407  y10t, cs_Y,
1408  buff_1,
1409  a2, rs_A );
1412  m_ahead + 1,
1413  n_behind,
1414  buff_m1,
1415  ZBL, rs_Z, cs_Z,
1416  a01, rs_A,
1417  buff_1,
1418  a2, rs_A );
1419 
1420  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1421  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1424  n_ahead,
1425  n_behind,
1426  buff_m1,
1427  Y20, rs_Y, cs_Y,
1428  a10t, cs_A,
1429  buff_1,
1430  a12t, cs_A );
1433  m_behind,
1434  n_ahead,
1435  buff_m1,
1436  A02, rs_A, cs_A,
1437  z10t, cs_Z,
1438  buff_1,
1439  a12t, cs_A );
1440 
1441  if ( m_behind > 0 )
1442  {
1443  // FLA_Copy( last_elem, a01_b );
1444  *a01_b = last_elem;
1445  }
1446 
1447  // FLA_Househ2_UT( FLA_LEFT,
1448  // alpha11,
1449  // a21, tau11 );
1450  // FLA_Copy( a21, u21p );
1451  FLA_Househ2_UT_l_opc( m_ahead,
1452  alpha11,
1453  a21, rs_A,
1454  tau11 );
1456  m_ahead,
1457  a21, rs_A,
1458  u21p, inc_up );
1459 
1460  if ( n_ahead > 0 )
1461  {
1462  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1463  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1464  bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );
1465 
1466  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1467  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1469  n_ahead,
1470  a12t, cs_A,
1471  a12p, inc_ap );
1473  n_ahead,
1474  &minus_inv_tau11,
1475  a12t, cs_A,
1476  a12p, inc_ap );
1477 
1478  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1479  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1482  m_ahead,
1483  n_behind,
1484  buff_1,
1485  A20, rs_A, cs_A,
1486  u21p, inc_up,
1487  buff_0,
1488  d0, inc_d );
1491  m_ahead,
1492  n_behind,
1493  buff_1,
1494  Z20, rs_Z, cs_Z,
1495  u21p, inc_up,
1496  buff_0,
1497  e0, inc_e );
1498 
1499  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1500  // FLA_Axpy( FLA_ONE, d0, t01 );
1502  n_behind,
1503  a10t, cs_A,
1504  t01, rs_T );
1506  n_behind,
1507  buff_1,
1508  d0, inc_d,
1509  t01, rs_T );
1510 
1511  // FLA_Set( FLA_ZERO, y21 );
1512  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1513  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1514  bl1_csetv( n_ahead,
1515  buff_0,
1516  y21, rs_Y );
1519  n_ahead,
1520  n_behind,
1521  buff_m1,
1522  Y20, rs_Y, cs_Y,
1523  d0, inc_d,
1524  buff_1,
1525  y21, rs_Y );
1528  m_behind,
1529  n_ahead,
1530  buff_m1,
1531  A02, rs_A, cs_A,
1532  e0, inc_e,
1533  buff_1,
1534  y21, rs_Y );
1535 
1536  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1539  m_ahead,
1540  n_ahead,
1541  buff_1,
1542  A22, rs_A, cs_A,
1543  u21p, inc_up,
1544  buff_1,
1545  y21, rs_Y );
1546 
1547  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1549  n_ahead,
1550  &minus_inv_tau11,
1551  y21, rs_Y,
1552  a12p, inc_ap );
1553 
1554  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1557  m_ahead,
1558  n_ahead,
1559  buff_1,
1560  A22, rs_A, cs_A,
1561  a12p, inc_ap,
1562  buff_0,
1563  w21, inc_w );
1564 
1565  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1566  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1569  n_ahead,
1570  n_behind,
1571  buff_1,
1572  Y20, rs_Y, cs_Y,
1573  a12p, inc_ap,
1574  buff_0,
1575  f0, inc_f );
1578  m_behind,
1579  n_ahead,
1580  buff_1,
1581  A02, rs_A, cs_A,
1582  a12p, inc_ap,
1583  buff_0,
1584  g0, inc_g );
1585 
1586  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1587  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1590  m_ahead,
1591  n_behind,
1592  buff_m1,
1593  A20, rs_A, cs_A,
1594  f0, inc_f,
1595  buff_1,
1596  w21, inc_w );
1599  m_ahead,
1600  n_behind,
1601  buff_m1,
1602  Z20, rs_Z, cs_Z,
1603  g0, inc_g,
1604  buff_1,
1605  w21, inc_w );
1606 
1607  // FLA_Copy( A22_l, a22l );
1608  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1609  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1611  m_ahead,
1612  A22_l, rs_A,
1613  a22l, inc_al );
1616  m_ahead,
1617  n_behind,
1618  buff_m1,
1619  A20, rs_A, cs_A,
1620  Y20_t, cs_Y,
1621  buff_1,
1622  a22l, inc_al );
1625  m_ahead,
1626  n_behind,
1627  buff_m1,
1628  Z20, rs_Z, cs_Z,
1629  A02_l, rs_A,
1630  buff_1,
1631  a22l, inc_al );
1632 
1633  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1635  n_ahead,
1636  buff_1,
1637  a12t, cs_A,
1638  y21, rs_Y );
1639 
1640  // FLA_Househ2s_UT( FLA_RIGHT,
1641  // a12p_t,
1642  // a12p_b,
1643  // alpha12, psi11_minus_alpha12, sigma11 );
1644  FLA_Househ2s_UT_r_opc( n_ahead - 1,
1645  a12p_t,
1646  a12p_b, inc_ap,
1647  &alpha12,
1648  &psi11_minus_alpha12,
1649  sigma11 );
1650 
1651  // FLA_Copy( a12p, v21 );
1652  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1653  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1654  // FLA_Conjugate( v21_b );
1656  n_ahead,
1657  a12p, inc_ap,
1658  v21, inc_v );
1659  bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
1661  n_ahead,
1662  &psi11_minus_alpha12,
1663  v21, inc_v );
1664  bl1_cconjv( n_ahead - 1,
1665  v21_b, inc_v );
1666 
1667  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1668  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1669  bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
1670  bl1_cneg1( &minus_conj_alpha12 );
1671 
1672  // FLA_Copy( g0, s01 );
1673  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1674  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1676  n_behind,
1677  g0, inc_g,
1678  s01, rs_S );
1680  n_behind,
1681  &minus_conj_alpha12,
1682  A02_l, rs_A,
1683  s01, rs_S );
1685  n_behind,
1686  &psi11_minus_alpha12,
1687  s01, rs_S );
1688 
1689  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1690  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1691  *a12t_l = alpha12;
1693  n_ahead - 1,
1694  v21_b, inc_v,
1695  a12t_r, cs_A );
1696  }
1697 
1698  // FLA_Copy( u21p, u21 );
1700  m_ahead,
1701  u21p, inc_up,
1702  u21, inc_u );
1703 
1704  if ( n_ahead > 0 )
1705  {
1706  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1707  // FLA_Scal( FLA_MINUS_ONE, beta );
1708  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1710  n_ahead,
1711  y21, rs_Y,
1712  v21, inc_v,
1713  &beta );
1714  bl1_cscals( &minus_inv_tau11, &beta );
1715 
1716  // FLA_Copy( w21, z21 );
1717  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1718  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1719  // FLA_Axpy( beta, u21, z21 );
1721  m_ahead,
1722  w21, inc_w,
1723  z21, rs_Z );
1725  m_ahead,
1726  &minus_conj_alpha12,
1727  a22l, inc_al,
1728  z21, rs_Z );
1730  m_ahead,
1731  &psi11_minus_alpha12,
1732  z21, rs_Z );
1734  m_ahead,
1735  &beta,
1736  u21, inc_u,
1737  z21, rs_Z );
1738 
1739  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1740  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1742  n_ahead,
1743  tau11,
1744  y21, rs_Y );
1746  m_ahead,
1747  sigma11,
1748  z21, rs_Z );
1749  }
1750  else // if ( n_ahead == 0 )
1751  {
1752  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1753  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1755  n_behind,
1756  a10t, cs_A,
1757  t01, rs_T );
1760  m_ahead,
1761  n_behind,
1762  buff_1,
1763  A20, rs_A, cs_A,
1764  u21, inc_u,
1765  buff_1,
1766  t01, rs_T );
1767  }
1768 
1769  /*------------------------------------------------------------*/
1770 
1771  }
1772 
1773  // FLA_Obj_free( &w );
1774  // FLA_Obj_free( &al );
1775  // FLA_Obj_free( &ap );
1776  // FLA_Obj_free( &u );
1777  // FLA_Obj_free( &up );
1778  // FLA_Obj_free( &v );
1779  // FLA_Obj_free( &d );
1780  // FLA_Obj_free( &e );
1781  // FLA_Obj_free( &f );
1782  // FLA_Obj_free( &g );
1783  FLA_free( buff_w );
1784  FLA_free( buff_al );
1785  FLA_free( buff_ap );
1786  FLA_free( buff_u );
1787  FLA_free( buff_up );
1788  FLA_free( buff_v );
1789  FLA_free( buff_d );
1790  FLA_free( buff_e );
1791  FLA_free( buff_f );
1792  FLA_free( buff_g );
1793 
1794  return FLA_SUCCESS;
1795 }
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_cconjv(int m, scomplex *x, int incx)
Definition: bl1_conjv.c:23
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition: FLA_Househ2s_UT.c:589
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opc_var5()

FLA_Error FLA_Bidiag_UT_u_step_opc_var5 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

953 {
954  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
955  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
956  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
957 
958  scomplex beta;
959  scomplex last_elem;
960  int i;
961 
962  // b_alg = FLA_Obj_length( T );
963  int b_alg = m_TS;
964 
965  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
966  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
967  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
968  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
969  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
970  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
971  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
972  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
973  scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
974  scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
975  scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
976  scomplex* buff_g = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
977  int inc_u = 1;
978  int inc_v = 1;
979  int inc_d = 1;
980  int inc_e = 1;
981  int inc_f = 1;
982  int inc_g = 1;
983 
984  // FLA_Set( FLA_ZERO, Y );
985  // FLA_Set( FLA_ZERO, Z );
986  bl1_csetm( n_A,
987  b_alg,
988  buff_0,
989  buff_Y, rs_Y, cs_Y );
990  bl1_csetm( m_A,
991  b_alg,
992  buff_0,
993  buff_Z, rs_Z, cs_Z );
994 
995  for ( i = 0; i < b_alg; ++i )
996  {
997  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
998  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
999  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1000  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1001  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1002  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1003  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1004  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1005 
1006  scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1007  scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1008  scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1009 
1010  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1011  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1012  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1013 
1014  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1015  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1016 
1017  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1018  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1019 
1020  scomplex* u21 = buff_u + (i+1)*inc_u;
1021 
1022  scomplex* v21 = buff_v + (i+1)*inc_v;
1023 
1024  scomplex* d0 = buff_d + (0 )*inc_d;
1025 
1026  scomplex* e0 = buff_e + (0 )*inc_e;
1027 
1028  scomplex* f0 = buff_f + (0 )*inc_f;
1029 
1030  scomplex* g0 = buff_g + (0 )*inc_g;
1031 
1032  scomplex* v21_t = v21 + (0 )*inc_v;
1033  scomplex* v21_b = v21 + (1 )*inc_v;
1034 
1035  scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1036 
1037  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1038  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1039 
1040  scomplex* ABL = a10t;
1041  scomplex* ZBL = z10t;
1042 
1043  scomplex* a2 = alpha11;
1044 
1045  int m_ahead = m_A - i - 1;
1046  int n_ahead = n_A - i - 1;
1047  int m_behind = i;
1048  int n_behind = i;
1049 
1050  /*------------------------------------------------------------*/
1051 
1052  if ( m_behind > 0 )
1053  {
1054  // FLA_Copy( a01_b, last_elem );
1055  // FLA_Set( FLA_ONE, a01_b );
1056  last_elem = *a01_b;
1057  *a01_b = *buff_1;
1058  }
1059 
1060  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1061  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1064  m_ahead + 1,
1065  n_behind,
1066  buff_m1,
1067  ABL, rs_A, cs_A,
1068  y10t, cs_Y,
1069  buff_1,
1070  a2, rs_A );
1073  m_ahead + 1,
1074  n_behind,
1075  buff_m1,
1076  ZBL, rs_Z, cs_Z,
1077  a01, rs_A,
1078  buff_1,
1079  a2, rs_A );
1080 
1081  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1082  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1085  n_ahead,
1086  n_behind,
1087  buff_m1,
1088  Y20, rs_Y, cs_Y,
1089  a10t, cs_A,
1090  buff_1,
1091  a12t, cs_A );
1094  m_behind,
1095  n_ahead,
1096  buff_m1,
1097  A02, rs_A, cs_A,
1098  z10t, cs_Z,
1099  buff_1,
1100  a12t, cs_A );
1101 
1102  if ( m_behind > 0 )
1103  {
1104  // FLA_Copy( last_elem, a01_b );
1105  *a01_b = last_elem;
1106  }
1107 
1108  // FLA_Househ2_UT( FLA_LEFT,
1109  // alpha11,
1110  // a21, tau11 );
1111  // FLA_Copy( a21, u21 );
1112  FLA_Househ2_UT_l_opc( m_ahead,
1113  alpha11,
1114  a21, rs_A,
1115  tau11 );
1117  m_ahead,
1118  a21, rs_A,
1119  u21, inc_u );
1120 
1121  if ( n_ahead > 0 )
1122  {
1123  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
1124  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
1126  n_ahead,
1127  a12t, cs_A,
1128  y21, rs_Y );
1131  m_ahead,
1132  n_ahead,
1133  buff_1,
1134  A22, rs_A, cs_A,
1135  u21, inc_u,
1136  buff_1,
1137  y21, rs_Y );
1138 
1139  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
1140  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
1143  m_ahead,
1144  n_behind,
1145  buff_1,
1146  A20, rs_A, cs_A,
1147  u21, inc_u,
1148  buff_0,
1149  d0, inc_d );
1152  m_ahead,
1153  n_behind,
1154  buff_1,
1155  Z20, rs_Z, cs_Z,
1156  u21, inc_u,
1157  buff_0,
1158  e0, inc_e );
1159 
1160  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1161  // FLA_Axpy( FLA_ONE, d0, t01 );
1163  n_behind,
1164  a10t, cs_A,
1165  t01, rs_T );
1167  n_behind,
1168  buff_1,
1169  d0, inc_d,
1170  t01, rs_T );
1171 
1172  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1173  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1176  n_ahead,
1177  n_behind,
1178  buff_m1,
1179  Y20, rs_Y, cs_Y,
1180  d0, inc_d,
1181  buff_1,
1182  y21, rs_Y );
1185  m_behind,
1186  n_ahead,
1187  buff_m1,
1188  A02, rs_A, cs_A,
1189  e0, inc_e,
1190  buff_1,
1191  y21, rs_Y );
1192 
1193  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1195  n_ahead,
1196  tau11,
1197  y21, rs_Y );
1198 
1199  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
1201  n_ahead,
1202  buff_m1,
1203  y21, rs_Y,
1204  a12t, cs_A );
1205 
1206  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
1207  FLA_Househ2_UT_r_opc( n_ahead - 1,
1208  a12t_l,
1209  a12t_r, cs_A,
1210  sigma11 );
1211 
1212  // FLA_Set( FLA_ONE, v21_t );
1213  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
1214  *v21_t = *buff_1;
1216  n_ahead - 1,
1217  a12t_r, cs_A,
1218  v21_b, inc_v );
1219 
1220  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1221  // FLA_Scal( FLA_MINUS_ONE, beta );
1223  n_ahead,
1224  y21, rs_Y,
1225  v21, inc_v,
1226  &beta );
1227  bl1_cscals( buff_m1, &beta );
1228 
1229  // FLA_Copy( u21, z21 );
1230  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
1232  m_ahead,
1233  u21, inc_u,
1234  z21, rs_Z );
1237  m_ahead,
1238  n_ahead,
1239  buff_1,
1240  A22, rs_A, cs_A,
1241  v21, inc_v,
1242  &beta,
1243  z21, rs_Z );
1244 
1245  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
1246  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
1249  n_ahead,
1250  m_behind,
1251  buff_1,
1252  Y20, rs_Y, cs_Y,
1253  v21, inc_v,
1254  buff_0,
1255  f0, inc_f );
1258  m_behind,
1259  n_ahead,
1260  buff_1,
1261  A02, rs_A, cs_A,
1262  v21, inc_v,
1263  buff_0,
1264  g0, inc_g );
1265 
1266  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
1267  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
1270  m_ahead,
1271  n_behind,
1272  buff_m1,
1273  A20, rs_A, cs_A,
1274  f0, inc_f,
1275  buff_1,
1276  z21, rs_Z );
1279  m_ahead,
1280  n_behind,
1281  buff_m1,
1282  Z20, rs_Z, cs_Z,
1283  g0, inc_g,
1284  buff_1,
1285  z21, rs_Z );
1286 
1287  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1289  m_ahead,
1290  sigma11,
1291  z21, rs_Z );
1292 
1293  // FLA_Copy( g0, s01 );
1295  n_behind,
1296  g0, inc_g,
1297  s01, rs_S );
1298  }
1299  else // if ( n_ahead == 0 )
1300  {
1301  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1302  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1304  n_behind,
1305  a10t, cs_A,
1306  t01, rs_T );
1309  m_ahead,
1310  n_behind,
1311  buff_1,
1312  A20, rs_A, cs_A,
1313  u21, inc_u,
1314  buff_1,
1315  t01, rs_T );
1316  }
1317 
1318  /*------------------------------------------------------------*/
1319 
1320  }
1321 
1322  // FLA_Obj_free( &u );
1323  // FLA_Obj_free( &v );
1324  // FLA_Obj_free( &d );
1325  // FLA_Obj_free( &e );
1326  // FLA_Obj_free( &f );
1327  // FLA_Obj_free( &g );
1328  FLA_free( buff_u );
1329  FLA_free( buff_v );
1330  FLA_free( buff_d );
1331  FLA_free( buff_e );
1332  FLA_free( buff_f );
1333  FLA_free( buff_g );
1334 
1335  return FLA_SUCCESS;
1336 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:677
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:132
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opd_var1()

FLA_Error FLA_Bidiag_UT_u_step_opd_var1 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_dcopyv(), bl1_dgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

255 {
256  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
257  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
258 
259  int i;
260 
261  // b_alg = FLA_Obj_length( T );
262  int b_alg = m_TS;
263 
264  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
265  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
266  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
267  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
268  int inc_v = 1;
269 
270  for ( i = 0; i < b_alg; ++i )
271  {
272  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
273  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
274  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
275  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
276  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
277  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
278  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
279 
280  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
281  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
282 
283  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
284  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
285 
286  double* v21 = buff_v + (i+1)*inc_v;
287 
288  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
289  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
290 
291  double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
292  double* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
293 
294  double* v21_t = v21 + (0 )*inc_v;
295  double* v21_b = v21 + (1 )*inc_v;
296 
297  int m_ahead = m_A - i - 1;
298  int n_ahead = n_A - i - 1;
299  int m_behind = i;
300  int n_behind = i;
301 
302  /*------------------------------------------------------------*/
303 
304  // FLA_Househ2_UT( FLA_LEFT,
305  // alpha11,
306  // a21, tau11 );
307  FLA_Househ2_UT_l_opd( m_ahead,
308  alpha11,
309  a21, rs_A,
310  tau11 );
311 
312  if ( n_ahead > 0 )
313  {
314  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
316  n_ahead,
317  tau11,
318  a21, rs_A,
319  a12t, cs_A,
320  A22, rs_A, cs_A );
321 
322  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
323  FLA_Househ2_UT_r_opd( n_ahead - 1,
324  a12t_l,
325  a12t_r, cs_A,
326  sigma11 );
327 
328  // FLA_Set( FLA_ONE, v21_t );
329  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
330  *v21_t = *buff_1;
332  n_ahead - 1,
333  a12t_r, cs_A,
334  v21_b, inc_v );
335 
336  // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
338  n_ahead - 1,
339  sigma11,
340  v21_b, inc_v,
341  A22_l, rs_A,
342  A22_r, rs_A, cs_A );
343 
344  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
347  m_behind,
348  n_ahead,
349  buff_1,
350  A02, rs_A, cs_A,
351  v21, inc_v,
352  buff_0,
353  s01, rs_S );
354  }
355 
356  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
357  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
359  n_behind,
360  a10t, cs_A,
361  t01, rs_T );
364  m_ahead,
365  n_behind,
366  buff_1,
367  A20, rs_A, cs_A,
368  a21, rs_A,
369  buff_1,
370  t01, rs_T );
371 
372  /*------------------------------------------------------------*/
373 
374  }
375 
376  // FLA_Obj_free( &v );
377  FLA_free( buff_v );
378 
379  return FLA_SUCCESS;
380 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:664
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:195
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Apply_H2_UT_r_opd_var1(int n_u2h_A2, int m_a1, double *tau, double *u2h, int inc_u2h, double *a1, int inc_a1, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:181
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opd_var2()

FLA_Error FLA_Bidiag_UT_u_step_opd_var2 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

328 {
329  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
330  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
331  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
332 
333  double beta;
334  int i;
335 
336  // b_alg = FLA_Obj_length( T );
337  int b_alg = m_TS;
338 
339  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
340  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
341  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
342  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
343  double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
344  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
345  int inc_v = 1;
346  int inc_y = 1;
347  int inc_z = 1;
348 
349  for ( i = 0; i < b_alg; ++i )
350  {
351  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
352  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
353  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
354  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
355  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
356  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
357  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
358 
359  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
360  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
361 
362  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
363  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
364 
365  double* v21 = buff_v + (i+1)*inc_v;
366 
367  double* y21 = buff_y + (i+1)*inc_y;
368 
369  double* z21 = buff_z + (i+1)*inc_z;
370 
371  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
372  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
373 
374  double* v21_t = v21 + (0 )*inc_v;
375  double* v21_b = v21 + (1 )*inc_v;
376 
377  int m_ahead = m_A - i - 1;
378  int n_ahead = n_A - i - 1;
379  int m_behind = i;
380  int n_behind = i;
381 
382  /*------------------------------------------------------------*/
383 
384  // FLA_Househ2_UT( FLA_LEFT,
385  // alpha11,
386  // a21, tau11 );
387  FLA_Househ2_UT_l_opd( m_ahead,
388  alpha11,
389  a21, rs_A,
390  tau11 );
391 
392  if ( n_ahead > 0 )
393  {
394  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
395  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
397  n_ahead,
398  a12t, cs_A,
399  y21, inc_y );
402  m_ahead,
403  n_ahead,
404  buff_1,
405  A22, rs_A, cs_A,
406  a21, rs_A,
407  buff_1,
408  y21, inc_y );
409 
410  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
412  n_ahead,
413  tau11,
414  y21, inc_y );
415 
416  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
418  n_ahead,
419  buff_m1,
420  y21, inc_y,
421  a12t, cs_A );
422 
423  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
424  FLA_Househ2_UT_r_opd( n_ahead - 1,
425  a12t_l,
426  a12t_r, cs_A,
427  sigma11 );
428 
429  // FLA_Set( FLA_ONE, v21_t );
430  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
431  *v21_t = *buff_1;
433  n_ahead - 1,
434  a12t_r, cs_A,
435  v21_b, inc_y );
436 
437  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
438  // FLA_Scal( FLA_MINUS_ONE, beta );
440  n_ahead,
441  y21, inc_y,
442  v21, inc_v,
443  &beta );
444  bl1_dneg1( &beta );
445 
446  // FLA_Copy( a21, z21 );
447  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
448  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
450  m_ahead,
451  a21, rs_A,
452  z21, inc_z );
455  m_ahead,
456  n_ahead,
457  buff_1,
458  A22, rs_A, cs_A,
459  v21, inc_v,
460  &beta,
461  z21, inc_z );
463  m_ahead,
464  sigma11,
465  z21, inc_z );
466 
467  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
468  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
471  m_ahead,
472  n_ahead,
473  buff_m1,
474  a21, rs_A,
475  y21, inc_y,
476  A22, rs_A, cs_A );
479  m_ahead,
480  n_ahead,
481  buff_m1,
482  z21, inc_z,
483  v21, inc_v,
484  A22, rs_A, cs_A );
485 
486  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
489  m_behind,
490  n_ahead,
491  buff_1,
492  A02, rs_A, cs_A,
493  v21, inc_v,
494  buff_0,
495  s01, rs_S );
496  }
497 
498  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
499  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
501  n_behind,
502  a10t, cs_A,
503  t01, rs_T );
506  m_ahead,
507  n_behind,
508  buff_1,
509  A20, rs_A, cs_A,
510  a21, rs_A,
511  buff_1,
512  t01, rs_T );
513 
514  /*------------------------------------------------------------*/
515 
516  }
517 
518  // FLA_Obj_free( &v );
519  // FLA_Obj_free( &y );
520  // FLA_Obj_free( &z );
521  FLA_free( buff_v );
522  FLA_free( buff_y );
523  FLA_free( buff_z );
524 
525  return FLA_SUCCESS;
526 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:664
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
int i
Definition: bl1_axmyv2.c:145
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opd_var3()

FLA_Error FLA_Bidiag_UT_u_step_opd_var3 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

538 {
539  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
540  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
541  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
542 
543  double alpha12;
544  double minus_conj_alpha12;
545  double psi11_minus_alpha12;
546  double minus_inv_tau11;
547  double minus_upsilon11;
548  double minus_conj_nu11;
549  double minus_conj_psi11;
550  double minus_zeta11;
551  double beta;
552  int i;
553 
554  // b_alg = FLA_Obj_length( T );
555  int b_alg = m_TS;
556 
557  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
558  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
559  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
560  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
561  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
562  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
563  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
564  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
565  double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
566  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
567  double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
568  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
569  double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
570  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
571  int inc_w = 1;
572  int inc_ap = 1;
573  int inc_u = 1;
574  int inc_up = 1;
575  int inc_v = 1;
576  int inc_y = 1;
577  int inc_z = 1;
578 
579  for ( i = 0; i < b_alg; ++i )
580  {
581  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
582  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
583  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
584  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
585  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
586  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
587  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
588 
589  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
590  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
591 
592  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
593  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
594 
595  double* w21 = buff_w + (i+1)*inc_w;
596 
597  double* a12p = buff_ap + (i+1)*inc_ap;
598 
599  double* upsilon11 = buff_u + (i )*inc_u;
600  double* u21 = buff_u + (i+1)*inc_u;
601 
602  double* u21p = buff_up + (i+1)*inc_up;
603 
604  double* nu11 = buff_v + (i )*inc_v;
605  double* v21 = buff_v + (i+1)*inc_v;
606 
607  double* psi11 = buff_y + (i )*inc_y;
608  double* y21 = buff_y + (i+1)*inc_y;
609 
610  double* zeta11 = buff_z + (i )*inc_z;
611  double* z21 = buff_z + (i+1)*inc_z;
612 
613  double* a12p_t = a12p + (0 )*inc_ap;
614  double* a12p_b = a12p + (1 )*inc_ap;
615 
616  double* v21_t = v21 + (0 )*inc_v;
617  double* v21_b = v21 + (1 )*inc_v;
618 
619  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
620  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
621 
622  double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
623 
624  int m_ahead = m_A - i - 1;
625  int n_ahead = n_A - i - 1;
626  int m_behind = i;
627  int n_behind = i;
628 
629  /*------------------------------------------------------------*/
630 
631  if ( m_behind > 0 )
632  {
633  // FLA_Copy( upsilon11, minus_upsilon11 );
634  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
635  bl1_dmult3( buff_m1, upsilon11, &minus_upsilon11 );
636 
637  // FLA_Copy( zeta11, minus_zeta11 );
638  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
639  bl1_dmult3( buff_m1, zeta11, &minus_zeta11 );
640 
641  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
642  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
643  bl1_dcopyconj( psi11, &minus_conj_psi11 );
644  bl1_dscals( buff_m1, &minus_conj_psi11 );
645 
646  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
647  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
648  bl1_dcopyconj( nu11, &minus_conj_nu11 );
649  bl1_dscals( buff_m1, &minus_conj_nu11 );
650 
651  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
652  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
654  1,
655  &minus_conj_psi11,
656  upsilon11, 1,
657  alpha11, 1 );
659  1,
660  &minus_conj_nu11,
661  zeta11, 1,
662  alpha11, 1 );
663 
664  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
665  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
667  m_ahead,
668  &minus_conj_psi11,
669  u21, inc_u,
670  a21, rs_A );
672  m_ahead,
673  &minus_conj_nu11,
674  z21, inc_z,
675  a21, rs_A );
676 
677  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
678  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
680  n_ahead,
681  &minus_upsilon11,
682  y21, inc_y,
683  a12t, cs_A );
685  n_ahead,
686  &minus_zeta11,
687  v21, inc_v,
688  a12t, cs_A );
689  }
690 
691  // FLA_Househ2_UT( FLA_LEFT,
692  // alpha11,
693  // a21, tau11 );
694  // FLA_Copy( a21, u21p );
695  FLA_Househ2_UT_l_opd( m_ahead,
696  alpha11,
697  a21, rs_A,
698  tau11 );
700  m_ahead,
701  a21, rs_A,
702  u21p, inc_up );
703 
704  if ( n_ahead > 0 )
705  {
706  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
707  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
708  bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );
709 
710  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
711  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
713  n_ahead,
714  a12t, cs_A,
715  a12p, inc_ap );
717  n_ahead,
718  &minus_inv_tau11,
719  a12t, cs_A,
720  a12p, inc_ap );
721  }
722 
723  if ( m_behind > 0 )
724  {
725  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
726  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
729  m_ahead,
730  n_ahead,
731  buff_m1,
732  u21, inc_u,
733  y21, inc_y,
734  A22, rs_A, cs_A );
737  m_ahead,
738  n_ahead,
739  buff_m1,
740  z21, inc_z,
741  v21, inc_v,
742  A22, rs_A, cs_A );
743  }
744 
745  if ( n_ahead > 0 )
746  {
747  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
750  m_ahead,
751  n_ahead,
752  buff_1,
753  A22, rs_A, cs_A,
754  u21p, inc_up,
755  buff_0,
756  y21, inc_y );
757 
758  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
760  n_ahead,
761  &minus_inv_tau11,
762  y21, inc_y,
763  a12p, inc_ap );
764 
765  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
768  m_ahead,
769  n_ahead,
770  buff_1,
771  A22, rs_A, cs_A,
772  a12p, inc_ap,
773  buff_0,
774  w21, inc_w );
775 
776  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
778  n_ahead,
779  buff_1,
780  a12t, cs_A,
781  y21, inc_y );
782 
783  // FLA_Househ2s_UT( FLA_RIGHT,
784  // a12p_t,
785  // a12p_b,
786  // alpha12, psi11_minus_alpha12, sigma11 );
787  FLA_Househ2s_UT_r_opd( n_ahead - 1,
788  a12p_t,
789  a12p_b, inc_ap,
790  &alpha12,
791  &psi11_minus_alpha12,
792  sigma11 );
793 
794  // FLA_Copy( a12p, v21 );
795  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
796  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
797  // FLA_Conjugate( v21_b );
799  n_ahead,
800  a12p, inc_ap,
801  v21, inc_v );
802  bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
804  n_ahead,
805  &psi11_minus_alpha12,
806  v21, inc_v );
807  bl1_dconjv( n_ahead - 1,
808  v21_b, inc_v );
809 
810  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
811  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
812  *a12t_l = alpha12;
814  n_ahead - 1,
815  v21_b, inc_v,
816  a12t_r, cs_A );
817  }
818 
819  // FLA_Copy( u21p, u21 );
821  m_ahead,
822  u21p, inc_up,
823  u21, inc_u );
824 
825  if ( n_ahead > 0 )
826  {
827  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
828  // FLA_Scal( FLA_MINUS_ONE, beta );
829  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
831  n_ahead,
832  y21, inc_y,
833  v21, inc_v,
834  &beta );
835  bl1_dscals( &minus_inv_tau11, &beta );
836 
837  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
838  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
839  bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
840  bl1_dneg1( &minus_conj_alpha12 );
841 
842  // FLA_Copy( w21, z21 );
843  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
844  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
845  // FLA_Axpy( beta, u21, z21 );
847  m_ahead,
848  w21, inc_w,
849  z21, inc_z );
851  m_ahead,
852  &minus_conj_alpha12,
853  A22_l, rs_A,
854  z21, inc_z );
856  m_ahead,
857  &psi11_minus_alpha12,
858  z21, inc_z );
860  m_ahead,
861  &beta,
862  u21, inc_u,
863  z21, inc_z );
864 
865  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
866  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
868  n_ahead,
869  tau11,
870  y21, inc_y );
872  m_ahead,
873  sigma11,
874  z21, inc_z );
875 
876  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
879  m_behind,
880  n_ahead,
881  buff_1,
882  A02, rs_A, cs_A,
883  v21, inc_v,
884  buff_0,
885  s01, rs_S );
886  }
887 
888  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
889  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
891  n_behind,
892  a10t, cs_A,
893  t01, rs_T );
896  m_ahead,
897  n_behind,
898  buff_1,
899  A20, rs_A, cs_A,
900  u21, inc_u,
901  buff_1,
902  t01, rs_T );
903 
904  if ( m_behind + 1 == b_alg && n_ahead > 0 )
905  {
906  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
907  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
910  m_ahead,
911  n_ahead,
912  buff_m1,
913  u21, inc_u,
914  y21, inc_y,
915  A22, rs_A, cs_A );
918  m_ahead,
919  n_ahead,
920  buff_m1,
921  z21, inc_z,
922  v21, inc_v,
923  A22, rs_A, cs_A );
924  }
925 
926  /*------------------------------------------------------------*/
927 
928  }
929 
930  // FLA_Obj_free( &w );
931  // FLA_Obj_free( &ap );
932  // FLA_Obj_free( &u );
933  // FLA_Obj_free( &up );
934  // FLA_Obj_free( &v );
935  // FLA_Obj_free( &y );
936  // FLA_Obj_free( &z );
937  FLA_free( buff_w );
938  FLA_free( buff_ap );
939  FLA_free( buff_u );
940  FLA_free( buff_up );
941  FLA_free( buff_v );
942  FLA_free( buff_y );
943  FLA_free( buff_z );
944 
945  return FLA_SUCCESS;
946 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition: FLA_Househ2s_UT.c:572
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_dconjv(int m, double *x, int incx)
Definition: bl1_conjv.c:18
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
int i
Definition: bl1_axmyv2.c:145
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opd_var4()

FLA_Error FLA_Bidiag_UT_u_step_opd_var4 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), bl1_dsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

711 {
712  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
713  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
714  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
715 
716  double alpha12;
717  double minus_conj_alpha12;
718  double psi11_minus_alpha12;
719  double minus_inv_tau11;
720  double beta;
721  double last_elem;
722  int i;
723 
724  // b_alg = FLA_Obj_length( T );
725  int b_alg = m_TS;
726 
727  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
728  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
729  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
730  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
731  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
732  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
733  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
734  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
735  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
736  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
737  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
738  double* buff_al = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
739  double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
740  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
741  double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
742  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
743  double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
744  double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
745  double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
746  double* buff_g = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
747  int inc_w = 1;
748  int inc_al = 1;
749  int inc_ap = 1;
750  int inc_u = 1;
751  int inc_up = 1;
752  int inc_v = 1;
753  int inc_d = 1;
754  int inc_e = 1;
755  int inc_f = 1;
756  int inc_g = 1;
757 
758  // FLA_Set( FLA_ZERO, Y );
759  // FLA_Set( FLA_ZERO, Z );
760  bl1_dsetm( n_A,
761  b_alg,
762  buff_0,
763  buff_Y, rs_Y, cs_Y );
764  bl1_dsetm( m_A,
765  b_alg,
766  buff_0,
767  buff_Z, rs_Z, cs_Z );
768 
769  for ( i = 0; i < b_alg; ++i )
770  {
771  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
772  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
773  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
774  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
775  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
776  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
777  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
778  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
779 
780  double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
781  double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
782  double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
783 
784  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
785  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
786  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
787 
788  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
789  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
790 
791  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
792  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
793 
794  double* w21 = buff_w + (i+1)*inc_w;
795 
796  double* a22l = buff_al + (i+1)*inc_al;
797 
798  double* a12p = buff_ap + (i+1)*inc_ap;
799 
800  double* u21 = buff_u + (i+1)*inc_u;
801 
802  double* u21p = buff_up + (i+1)*inc_up;
803 
804  double* v21 = buff_v + (i+1)*inc_v;
805 
806  double* d0 = buff_d + (0 )*inc_d;
807 
808  double* e0 = buff_e + (0 )*inc_e;
809 
810  double* f0 = buff_f + (0 )*inc_f;
811 
812  double* g0 = buff_g + (0 )*inc_g;
813 
814  double* a12p_t = a12p + (0 )*inc_ap;
815  double* a12p_b = a12p + (1 )*inc_ap;
816 
817  double* v21_t = v21 + (0 )*inc_v;
818  double* v21_b = v21 + (1 )*inc_v;
819 
820  double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
821 
822  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
823  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
824 
825  double* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
826 
827  double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
828 
829  double* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
830 
831  double* ABL = a10t;
832  double* ZBL = z10t;
833 
834  double* a2 = alpha11;
835 
836  int m_ahead = m_A - i - 1;
837  int n_ahead = n_A - i - 1;
838  int m_behind = i;
839  int n_behind = i;
840 
841  /*------------------------------------------------------------*/
842 
843  if ( m_behind > 0 )
844  {
845  // FLA_Copy( a01_b, last_elem );
846  // FLA_Set( FLA_ONE, a01_b );
847  last_elem = *a01_b;
848  *a01_b = *buff_1;
849  }
850 
851  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
852  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
855  m_ahead + 1,
856  n_behind,
857  buff_m1,
858  ABL, rs_A, cs_A,
859  y10t, cs_Y,
860  buff_1,
861  a2, rs_A );
864  m_ahead + 1,
865  n_behind,
866  buff_m1,
867  ZBL, rs_Z, cs_Z,
868  a01, rs_A,
869  buff_1,
870  a2, rs_A );
871 
872  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
873  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
876  n_ahead,
877  n_behind,
878  buff_m1,
879  Y20, rs_Y, cs_Y,
880  a10t, cs_A,
881  buff_1,
882  a12t, cs_A );
885  m_behind,
886  n_ahead,
887  buff_m1,
888  A02, rs_A, cs_A,
889  z10t, cs_Z,
890  buff_1,
891  a12t, cs_A );
892 
893  if ( m_behind > 0 )
894  {
895  // FLA_Copy( last_elem, a01_b );
896  *a01_b = last_elem;
897  }
898 
899  // FLA_Househ2_UT( FLA_LEFT,
900  // alpha11,
901  // a21, tau11 );
902  // FLA_Copy( a21, u21p );
903  FLA_Househ2_UT_l_opd( m_ahead,
904  alpha11,
905  a21, rs_A,
906  tau11 );
908  m_ahead,
909  a21, rs_A,
910  u21p, inc_up );
911 
912  if ( n_ahead > 0 )
913  {
914  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
915  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
916  bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );
917 
918  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
919  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
921  n_ahead,
922  a12t, cs_A,
923  a12p, inc_ap );
925  n_ahead,
926  &minus_inv_tau11,
927  a12t, cs_A,
928  a12p, inc_ap );
929 
930  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
931  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
934  m_ahead,
935  n_behind,
936  buff_1,
937  A20, rs_A, cs_A,
938  u21p, inc_up,
939  buff_0,
940  d0, inc_d );
943  m_ahead,
944  n_behind,
945  buff_1,
946  Z20, rs_Z, cs_Z,
947  u21p, inc_up,
948  buff_0,
949  e0, inc_e );
950 
951  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
952  // FLA_Axpy( FLA_ONE, d0, t01 );
954  n_behind,
955  a10t, cs_A,
956  t01, rs_T );
958  n_behind,
959  buff_1,
960  d0, inc_d,
961  t01, rs_T );
962 
963  // FLA_Set( FLA_ZERO, y21 );
964  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
965  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
966  bl1_dsetv( n_ahead,
967  buff_0,
968  y21, rs_Y );
971  n_ahead,
972  n_behind,
973  buff_m1,
974  Y20, rs_Y, cs_Y,
975  d0, inc_d,
976  buff_1,
977  y21, rs_Y );
980  m_behind,
981  n_ahead,
982  buff_m1,
983  A02, rs_A, cs_A,
984  e0, inc_e,
985  buff_1,
986  y21, rs_Y );
987 
988  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
991  m_ahead,
992  n_ahead,
993  buff_1,
994  A22, rs_A, cs_A,
995  u21p, inc_up,
996  buff_1,
997  y21, rs_Y );
998 
999  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1001  n_ahead,
1002  &minus_inv_tau11,
1003  y21, rs_Y,
1004  a12p, inc_ap );
1005 
1006  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1009  m_ahead,
1010  n_ahead,
1011  buff_1,
1012  A22, rs_A, cs_A,
1013  a12p, inc_ap,
1014  buff_0,
1015  w21, inc_w );
1016 
1017  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1018  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1021  n_ahead,
1022  n_behind,
1023  buff_1,
1024  Y20, rs_Y, cs_Y,
1025  a12p, inc_ap,
1026  buff_0,
1027  f0, inc_f );
1030  m_behind,
1031  n_ahead,
1032  buff_1,
1033  A02, rs_A, cs_A,
1034  a12p, inc_ap,
1035  buff_0,
1036  g0, inc_g );
1037 
1038  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1039  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1042  m_ahead,
1043  n_behind,
1044  buff_m1,
1045  A20, rs_A, cs_A,
1046  f0, inc_f,
1047  buff_1,
1048  w21, inc_w );
1051  m_ahead,
1052  n_behind,
1053  buff_m1,
1054  Z20, rs_Z, cs_Z,
1055  g0, inc_g,
1056  buff_1,
1057  w21, inc_w );
1058 
1059  // FLA_Copy( A22_l, a22l );
1060  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1061  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1063  m_ahead,
1064  A22_l, rs_A,
1065  a22l, inc_al );
1068  m_ahead,
1069  n_behind,
1070  buff_m1,
1071  A20, rs_A, cs_A,
1072  Y20_t, cs_Y,
1073  buff_1,
1074  a22l, inc_al );
1077  m_ahead,
1078  n_behind,
1079  buff_m1,
1080  Z20, rs_Z, cs_Z,
1081  A02_l, rs_A,
1082  buff_1,
1083  a22l, inc_al );
1084 
1085  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1087  n_ahead,
1088  buff_1,
1089  a12t, cs_A,
1090  y21, rs_Y );
1091 
1092  // FLA_Househ2s_UT( FLA_RIGHT,
1093  // a12p_t,
1094  // a12p_b,
1095  // alpha12, psi11_minus_alpha12, sigma11 );
1096  FLA_Househ2s_UT_r_opd( n_ahead - 1,
1097  a12p_t,
1098  a12p_b, inc_ap,
1099  &alpha12,
1100  &psi11_minus_alpha12,
1101  sigma11 );
1102 
1103  // FLA_Copy( a12p, v21 );
1104  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1105  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1106  // FLA_Conjugate( v21_b );
1108  n_ahead,
1109  a12p, inc_ap,
1110  v21, inc_v );
1111  bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
1113  n_ahead,
1114  &psi11_minus_alpha12,
1115  v21, inc_v );
1116  bl1_dconjv( n_ahead - 1,
1117  v21_b, inc_v );
1118 
1119  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1120  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1121  bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
1122  bl1_dneg1( &minus_conj_alpha12 );
1123 
1124  // FLA_Copy( g0, s01 );
1125  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1126  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1128  n_behind,
1129  g0, inc_g,
1130  s01, rs_S );
1132  n_behind,
1133  &minus_conj_alpha12,
1134  A02_l, rs_A,
1135  s01, rs_S );
1137  n_behind,
1138  &psi11_minus_alpha12,
1139  s01, rs_S );
1140 
1141  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1142  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1143  *a12t_l = alpha12;
1145  n_ahead - 1,
1146  v21_b, inc_v,
1147  a12t_r, cs_A );
1148  }
1149 
1150  // FLA_Copy( u21p, u21 );
1152  m_ahead,
1153  u21p, inc_up,
1154  u21, inc_u );
1155 
1156  if ( n_ahead > 0 )
1157  {
1158  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1159  // FLA_Scal( FLA_MINUS_ONE, beta );
1160  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1162  n_ahead,
1163  y21, rs_Y,
1164  v21, inc_v,
1165  &beta );
1166  bl1_dscals( &minus_inv_tau11, &beta );
1167 
1168  // FLA_Copy( w21, z21 );
1169  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1170  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1171  // FLA_Axpy( beta, u21, z21 );
1173  m_ahead,
1174  w21, inc_w,
1175  z21, rs_Z );
1177  m_ahead,
1178  &minus_conj_alpha12,
1179  a22l, inc_al,
1180  z21, rs_Z );
1182  m_ahead,
1183  &psi11_minus_alpha12,
1184  z21, rs_Z );
1186  m_ahead,
1187  &beta,
1188  u21, inc_u,
1189  z21, rs_Z );
1190 
1191  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1192  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1194  n_ahead,
1195  tau11,
1196  y21, rs_Y );
1198  m_ahead,
1199  sigma11,
1200  z21, rs_Z );
1201  }
1202  else // if ( n_ahead == 0 )
1203  {
1204  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1205  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1207  n_behind,
1208  a10t, cs_A,
1209  t01, rs_T );
1212  m_ahead,
1213  n_behind,
1214  buff_1,
1215  A20, rs_A, cs_A,
1216  u21, inc_u,
1217  buff_1,
1218  t01, rs_T );
1219  }
1220 
1221  /*------------------------------------------------------------*/
1222 
1223  }
1224 
1225  // FLA_Obj_free( &w );
1226  // FLA_Obj_free( &al );
1227  // FLA_Obj_free( &ap );
1228  // FLA_Obj_free( &u );
1229  // FLA_Obj_free( &up );
1230  // FLA_Obj_free( &v );
1231  // FLA_Obj_free( &d );
1232  // FLA_Obj_free( &e );
1233  // FLA_Obj_free( &f );
1234  // FLA_Obj_free( &g );
1235  FLA_free( buff_w );
1236  FLA_free( buff_al );
1237  FLA_free( buff_ap );
1238  FLA_free( buff_u );
1239  FLA_free( buff_up );
1240  FLA_free( buff_v );
1241  FLA_free( buff_d );
1242  FLA_free( buff_e );
1243  FLA_free( buff_f );
1244  FLA_free( buff_g );
1245 
1246  return FLA_SUCCESS;
1247 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition: FLA_Househ2s_UT.c:572
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_dconjv(int m, double *x, int incx)
Definition: bl1_conjv.c:18
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
int i
Definition: bl1_axmyv2.c:145
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_opd_var5()

FLA_Error FLA_Bidiag_UT_u_step_opd_var5 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

558 {
559  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
560  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
561  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
562 
563  double beta;
564  double last_elem;
565  int i;
566 
567  // b_alg = FLA_Obj_length( T );
568  int b_alg = m_TS;
569 
570  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
571  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
572  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
573  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
574  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
575  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
576  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
577  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
578  double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
579  double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
580  double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
581  double* buff_g = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
582  int inc_u = 1;
583  int inc_v = 1;
584  int inc_d = 1;
585  int inc_e = 1;
586  int inc_f = 1;
587  int inc_g = 1;
588 
589  // FLA_Set( FLA_ZERO, Y );
590  // FLA_Set( FLA_ZERO, Z );
591  bl1_dsetm( n_A,
592  b_alg,
593  buff_0,
594  buff_Y, rs_Y, cs_Y );
595  bl1_dsetm( m_A,
596  b_alg,
597  buff_0,
598  buff_Z, rs_Z, cs_Z );
599 
600  for ( i = 0; i < b_alg; ++i )
601  {
602  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
603  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
604  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
605  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
606  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
607  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
608  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
609  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
610 
611  double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
612  double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
613  double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
614 
615  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
616  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
617  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
618 
619  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
620  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
621 
622  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
623  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
624 
625  double* u21 = buff_u + (i+1)*inc_u;
626 
627  double* v21 = buff_v + (i+1)*inc_v;
628 
629  double* d0 = buff_d + (0 )*inc_d;
630 
631  double* e0 = buff_e + (0 )*inc_e;
632 
633  double* f0 = buff_f + (0 )*inc_f;
634 
635  double* g0 = buff_g + (0 )*inc_g;
636 
637  double* v21_t = v21 + (0 )*inc_v;
638  double* v21_b = v21 + (1 )*inc_v;
639 
640  double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
641 
642  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
643  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
644 
645  double* ABL = a10t;
646  double* ZBL = z10t;
647 
648  double* a2 = alpha11;
649 
650  int m_ahead = m_A - i - 1;
651  int n_ahead = n_A - i - 1;
652  int m_behind = i;
653  int n_behind = i;
654 
655  /*------------------------------------------------------------*/
656 
657  if ( m_behind > 0 )
658  {
659  // FLA_Copy( a01_b, last_elem );
660  // FLA_Set( FLA_ONE, a01_b );
661  last_elem = *a01_b;
662  *a01_b = *buff_1;
663  }
664 
665  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
666  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
669  m_ahead + 1,
670  n_behind,
671  buff_m1,
672  ABL, rs_A, cs_A,
673  y10t, cs_Y,
674  buff_1,
675  a2, rs_A );
678  m_ahead + 1,
679  n_behind,
680  buff_m1,
681  ZBL, rs_Z, cs_Z,
682  a01, rs_A,
683  buff_1,
684  a2, rs_A );
685 
686  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
687  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
690  n_ahead,
691  n_behind,
692  buff_m1,
693  Y20, rs_Y, cs_Y,
694  a10t, cs_A,
695  buff_1,
696  a12t, cs_A );
699  m_behind,
700  n_ahead,
701  buff_m1,
702  A02, rs_A, cs_A,
703  z10t, cs_Z,
704  buff_1,
705  a12t, cs_A );
706 
707  if ( m_behind > 0 )
708  {
709  // FLA_Copy( last_elem, a01_b );
710  *a01_b = last_elem;
711  }
712 
713  // FLA_Househ2_UT( FLA_LEFT,
714  // alpha11,
715  // a21, tau11 );
716  // FLA_Copy( a21, u21 );
717  FLA_Househ2_UT_l_opd( m_ahead,
718  alpha11,
719  a21, rs_A,
720  tau11 );
722  m_ahead,
723  a21, rs_A,
724  u21, inc_u );
725 
726  if ( n_ahead > 0 )
727  {
728  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
729  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
731  n_ahead,
732  a12t, cs_A,
733  y21, rs_Y );
736  m_ahead,
737  n_ahead,
738  buff_1,
739  A22, rs_A, cs_A,
740  u21, inc_u,
741  buff_1,
742  y21, rs_Y );
743 
744  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
745  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
748  m_ahead,
749  n_behind,
750  buff_1,
751  A20, rs_A, cs_A,
752  u21, inc_u,
753  buff_0,
754  d0, inc_d );
757  m_ahead,
758  n_behind,
759  buff_1,
760  Z20, rs_Z, cs_Z,
761  u21, inc_u,
762  buff_0,
763  e0, inc_e );
764 
765  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
766  // FLA_Axpy( FLA_ONE, d0, t01 );
768  n_behind,
769  a10t, cs_A,
770  t01, rs_T );
772  n_behind,
773  buff_1,
774  d0, inc_d,
775  t01, rs_T );
776 
777  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
778  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
781  n_ahead,
782  n_behind,
783  buff_m1,
784  Y20, rs_Y, cs_Y,
785  d0, inc_d,
786  buff_1,
787  y21, rs_Y );
790  m_behind,
791  n_ahead,
792  buff_m1,
793  A02, rs_A, cs_A,
794  e0, inc_e,
795  buff_1,
796  y21, rs_Y );
797 
798  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
800  n_ahead,
801  tau11,
802  y21, rs_Y );
803 
804  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
806  n_ahead,
807  buff_m1,
808  y21, rs_Y,
809  a12t, cs_A );
810 
811  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
812  FLA_Househ2_UT_r_opd( n_ahead - 1,
813  a12t_l,
814  a12t_r, cs_A,
815  sigma11 );
816 
817  // FLA_Set( FLA_ONE, v21_t );
818  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
819  *v21_t = *buff_1;
821  n_ahead - 1,
822  a12t_r, cs_A,
823  v21_b, inc_v );
824 
825  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
826  // FLA_Scal( FLA_MINUS_ONE, beta );
828  n_ahead,
829  y21, rs_Y,
830  v21, inc_v,
831  &beta );
832  bl1_dscals( buff_m1, &beta );
833 
834  // FLA_Copy( u21, z21 );
835  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
837  m_ahead,
838  u21, inc_u,
839  z21, rs_Z );
842  m_ahead,
843  n_ahead,
844  buff_1,
845  A22, rs_A, cs_A,
846  v21, inc_v,
847  &beta,
848  z21, rs_Z );
849 
850  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
851  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
854  n_ahead,
855  m_behind,
856  buff_1,
857  Y20, rs_Y, cs_Y,
858  v21, inc_v,
859  buff_0,
860  f0, inc_f );
863  m_behind,
864  n_ahead,
865  buff_1,
866  A02, rs_A, cs_A,
867  v21, inc_v,
868  buff_0,
869  g0, inc_g );
870 
871  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
872  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
875  m_ahead,
876  n_behind,
877  buff_m1,
878  A20, rs_A, cs_A,
879  f0, inc_f,
880  buff_1,
881  z21, rs_Z );
884  m_ahead,
885  n_behind,
886  buff_m1,
887  Z20, rs_Z, cs_Z,
888  g0, inc_g,
889  buff_1,
890  z21, rs_Z );
891 
892  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
894  m_ahead,
895  sigma11,
896  z21, rs_Z );
897 
898  // FLA_Copy( g0, s01 );
900  n_behind,
901  g0, inc_g,
902  s01, rs_S );
903  }
904  else // if ( n_ahead == 0 )
905  {
906  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
907  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
909  n_behind,
910  a10t, cs_A,
911  t01, rs_T );
914  m_ahead,
915  n_behind,
916  buff_1,
917  A20, rs_A, cs_A,
918  u21, inc_u,
919  buff_1,
920  t01, rs_T );
921  }
922 
923  /*------------------------------------------------------------*/
924 
925  }
926 
927  // FLA_Obj_free( &u );
928  // FLA_Obj_free( &v );
929  // FLA_Obj_free( &d );
930  // FLA_Obj_free( &e );
931  // FLA_Obj_free( &f );
932  // FLA_Obj_free( &g );
933  FLA_free( buff_u );
934  FLA_free( buff_v );
935  FLA_free( buff_d );
936  FLA_free( buff_e );
937  FLA_free( buff_f );
938  FLA_free( buff_g );
939 
940  return FLA_SUCCESS;
941 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:664
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
int i
Definition: bl1_axmyv2.c:145
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ops_var1()

FLA_Error FLA_Bidiag_UT_u_step_ops_var1 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_scopyv(), bl1_sgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

120 {
121  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123 
124  int i;
125 
126  // b_alg = FLA_Obj_length( T );
127  int b_alg = m_TS;
128 
129  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
130  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
131  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
132  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
133  int inc_v = 1;
134 
135  for ( i = 0; i < b_alg; ++i )
136  {
137  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
138  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
139  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
140  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
141  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
142  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
143  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
144 
145  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
146  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
147 
148  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
149  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
150 
151  float* v21 = buff_v + (i+1)*inc_v;
152 
153  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
154  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
155 
156  float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
157  float* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
158 
159  float* v21_t = v21 + (0 )*inc_v;
160  float* v21_b = v21 + (1 )*inc_v;
161 
162  int m_ahead = m_A - i - 1;
163  int n_ahead = n_A - i - 1;
164  int m_behind = i;
165  int n_behind = i;
166 
167  /*------------------------------------------------------------*/
168 
169  // FLA_Househ2_UT( FLA_LEFT,
170  // alpha11,
171  // a21, tau11 );
172  FLA_Househ2_UT_l_ops( m_ahead,
173  alpha11,
174  a21, rs_A,
175  tau11 );
176 
177  if ( n_ahead > 0 )
178  {
179  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
181  n_ahead,
182  tau11,
183  a21, rs_A,
184  a12t, cs_A,
185  A22, rs_A, cs_A );
186 
187  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
188  FLA_Househ2_UT_r_ops( n_ahead - 1,
189  a12t_l,
190  a12t_r, cs_A,
191  sigma11 );
192 
193  // FLA_Set( FLA_ONE, v21_t );
194  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
195  *v21_t = *buff_1;
197  n_ahead - 1,
198  a12t_r, cs_A,
199  v21_b, inc_v );
200 
201  // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
203  n_ahead - 1,
204  sigma11,
205  v21_b, inc_v,
206  A22_l, rs_A,
207  A22_r, rs_A, cs_A );
208 
209  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
212  m_behind,
213  n_ahead,
214  buff_1,
215  A02, rs_A, cs_A,
216  v21, inc_v,
217  buff_0,
218  s01, rs_S );
219  }
220 
221  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
222  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
224  n_behind,
225  a10t, cs_A,
226  t01, rs_T );
229  m_ahead,
230  n_behind,
231  buff_1,
232  A20, rs_A, cs_A,
233  a21, rs_A,
234  buff_1,
235  t01, rs_T );
236 
237  /*------------------------------------------------------------*/
238 
239  }
240 
241  // FLA_Obj_free( &v );
242  FLA_free( buff_v );
243 
244  return FLA_SUCCESS;
245 }
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:651
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
FLA_Error FLA_Apply_H2_UT_r_ops_var1(int n_u2h_A2, int m_a1, float *tau, float *u2h, int inc_u2h, float *a1, int inc_a1, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:108
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:56
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:121
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ops_var2()

FLA_Error FLA_Bidiag_UT_u_step_ops_var2 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

120 {
121  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
124 
125  float beta;
126  int i;
127 
128  // b_alg = FLA_Obj_length( T );
129  int b_alg = m_TS;
130 
131  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
132  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
133  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
134  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
135  float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
136  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
137  int inc_v = 1;
138  int inc_y = 1;
139  int inc_z = 1;
140 
141  for ( i = 0; i < b_alg; ++i )
142  {
143  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
144  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
145  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
146  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
147  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
148  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
149  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
150 
151  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
152  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
153 
154  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
155  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
156 
157  float* v21 = buff_v + (i+1)*inc_v;
158 
159  float* y21 = buff_y + (i+1)*inc_y;
160 
161  float* z21 = buff_z + (i+1)*inc_z;
162 
163  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
164  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
165 
166  float* v21_t = v21 + (0 )*inc_v;
167  float* v21_b = v21 + (1 )*inc_v;
168 
169  int m_ahead = m_A - i - 1;
170  int n_ahead = n_A - i - 1;
171  int m_behind = i;
172  int n_behind = i;
173 
174  /*------------------------------------------------------------*/
175 
176  // FLA_Househ2_UT( FLA_LEFT,
177  // alpha11,
178  // a21, tau11 );
179  FLA_Househ2_UT_l_ops( m_ahead,
180  alpha11,
181  a21, rs_A,
182  tau11 );
183 
184  if ( n_ahead > 0 )
185  {
186  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
187  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
189  n_ahead,
190  a12t, cs_A,
191  y21, inc_y );
194  m_ahead,
195  n_ahead,
196  buff_1,
197  A22, rs_A, cs_A,
198  a21, rs_A,
199  buff_1,
200  y21, inc_y );
201 
202  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
204  n_ahead,
205  tau11,
206  y21, inc_y );
207 
208  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
210  n_ahead,
211  buff_m1,
212  y21, inc_y,
213  a12t, cs_A );
214 
215  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
216  FLA_Househ2_UT_r_ops( n_ahead - 1,
217  a12t_l,
218  a12t_r, cs_A,
219  sigma11 );
220 
221  // FLA_Set( FLA_ONE, v21_t );
222  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
223  *v21_t = *buff_1;
225  n_ahead - 1,
226  a12t_r, cs_A,
227  v21_b, inc_y );
228 
229  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
230  // FLA_Scal( FLA_MINUS_ONE, beta );
232  n_ahead,
233  y21, inc_y,
234  v21, inc_v,
235  &beta );
236  bl1_sneg1( &beta );
237 
238  // FLA_Copy( a21, z21 );
239  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
240  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
242  m_ahead,
243  a21, rs_A,
244  z21, inc_z );
247  m_ahead,
248  n_ahead,
249  buff_1,
250  A22, rs_A, cs_A,
251  v21, inc_v,
252  &beta,
253  z21, inc_z );
255  m_ahead,
256  sigma11,
257  z21, inc_z );
258 
259  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
260  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
263  m_ahead,
264  n_ahead,
265  buff_m1,
266  a21, rs_A,
267  y21, inc_y,
268  A22, rs_A, cs_A );
271  m_ahead,
272  n_ahead,
273  buff_m1,
274  z21, inc_z,
275  v21, inc_v,
276  A22, rs_A, cs_A );
277 
278  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
281  m_behind,
282  n_ahead,
283  buff_1,
284  A02, rs_A, cs_A,
285  v21, inc_v,
286  buff_0,
287  s01, rs_S );
288  }
289 
290  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
291  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
293  n_behind,
294  a10t, cs_A,
295  t01, rs_T );
298  m_ahead,
299  n_behind,
300  buff_1,
301  A20, rs_A, cs_A,
302  a21, rs_A,
303  buff_1,
304  t01, rs_T );
305 
306  /*------------------------------------------------------------*/
307 
308  }
309 
310  // FLA_Obj_free( &v );
311  // FLA_Obj_free( &y );
312  // FLA_Obj_free( &z );
313  FLA_free( buff_v );
314  FLA_free( buff_y );
315  FLA_free( buff_z );
316 
317  return FLA_SUCCESS;
318 }
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:651
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ops_var3()

FLA_Error FLA_Bidiag_UT_u_step_ops_var3 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

120 {
121  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
124 
125  float alpha12;
126  float minus_conj_alpha12;
127  float psi11_minus_alpha12;
128  float minus_inv_tau11;
129  float minus_upsilon11;
130  float minus_conj_nu11;
131  float minus_conj_psi11;
132  float minus_zeta11;
133  float beta;
134  int i;
135 
136  // b_alg = FLA_Obj_length( T );
137  int b_alg = m_TS;
138 
139  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
140  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
141  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
142  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
143  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
144  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
145  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
146  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
147  float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
148  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
149  float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
150  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
151  float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
152  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
153  int inc_w = 1;
154  int inc_ap = 1;
155  int inc_u = 1;
156  int inc_up = 1;
157  int inc_v = 1;
158  int inc_y = 1;
159  int inc_z = 1;
160 
161  for ( i = 0; i < b_alg; ++i )
162  {
163  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
164  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
165  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
166  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
167  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
168  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
169  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
170 
171  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
172  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
173 
174  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
175  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
176 
177  float* w21 = buff_w + (i+1)*inc_w;
178 
179  float* a12p = buff_ap + (i+1)*inc_ap;
180 
181  float* upsilon11 = buff_u + (i )*inc_u;
182  float* u21 = buff_u + (i+1)*inc_u;
183 
184  float* u21p = buff_up + (i+1)*inc_up;
185 
186  float* nu11 = buff_v + (i )*inc_v;
187  float* v21 = buff_v + (i+1)*inc_v;
188 
189  float* psi11 = buff_y + (i )*inc_y;
190  float* y21 = buff_y + (i+1)*inc_y;
191 
192  float* zeta11 = buff_z + (i )*inc_z;
193  float* z21 = buff_z + (i+1)*inc_z;
194 
195  float* a12p_t = a12p + (0 )*inc_ap;
196  float* a12p_b = a12p + (1 )*inc_ap;
197 
198  float* v21_t = v21 + (0 )*inc_v;
199  float* v21_b = v21 + (1 )*inc_v;
200 
201  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
202  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
203 
204  float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
205 
206  int m_ahead = m_A - i - 1;
207  int n_ahead = n_A - i - 1;
208  int m_behind = i;
209  int n_behind = i;
210 
211  /*------------------------------------------------------------*/
212 
213  if ( m_behind > 0 )
214  {
215  // FLA_Copy( upsilon11, minus_upsilon11 );
216  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
217  bl1_smult3( buff_m1, upsilon11, &minus_upsilon11 );
218 
219  // FLA_Copy( zeta11, minus_zeta11 );
220  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
221  bl1_smult3( buff_m1, zeta11, &minus_zeta11 );
222 
223  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
224  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
225  bl1_scopyconj( psi11, &minus_conj_psi11 );
226  bl1_sscals( buff_m1, &minus_conj_psi11 );
227 
228  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
229  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
230  bl1_scopyconj( nu11, &minus_conj_nu11 );
231  bl1_sscals( buff_m1, &minus_conj_nu11 );
232 
233  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
234  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
236  1,
237  &minus_conj_psi11,
238  upsilon11, 1,
239  alpha11, 1 );
241  1,
242  &minus_conj_nu11,
243  zeta11, 1,
244  alpha11, 1 );
245 
246  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
247  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
249  m_ahead,
250  &minus_conj_psi11,
251  u21, inc_u,
252  a21, rs_A );
254  m_ahead,
255  &minus_conj_nu11,
256  z21, inc_z,
257  a21, rs_A );
258 
259  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
260  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
262  n_ahead,
263  &minus_upsilon11,
264  y21, inc_y,
265  a12t, cs_A );
267  n_ahead,
268  &minus_zeta11,
269  v21, inc_v,
270  a12t, cs_A );
271  }
272 
273  // FLA_Househ2_UT( FLA_LEFT,
274  // alpha11,
275  // a21, tau11 );
276  // FLA_Copy( a21, u21p );
277  FLA_Househ2_UT_l_ops( m_ahead,
278  alpha11,
279  a21, rs_A,
280  tau11 );
282  m_ahead,
283  a21, rs_A,
284  u21p, inc_up );
285 
286  if ( n_ahead > 0 )
287  {
288  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
289  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
290  bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );
291 
292  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
293  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
295  n_ahead,
296  a12t, cs_A,
297  a12p, inc_ap );
299  n_ahead,
300  &minus_inv_tau11,
301  a12t, cs_A,
302  a12p, inc_ap );
303  }
304 
305  if ( m_behind > 0 )
306  {
307  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
308  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
311  m_ahead,
312  n_ahead,
313  buff_m1,
314  u21, inc_u,
315  y21, inc_y,
316  A22, rs_A, cs_A );
319  m_ahead,
320  n_ahead,
321  buff_m1,
322  z21, inc_z,
323  v21, inc_v,
324  A22, rs_A, cs_A );
325  }
326 
327  if ( n_ahead > 0 )
328  {
329  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
332  m_ahead,
333  n_ahead,
334  buff_1,
335  A22, rs_A, cs_A,
336  u21p, inc_up,
337  buff_0,
338  y21, inc_y );
339 
340  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
342  n_ahead,
343  &minus_inv_tau11,
344  y21, inc_y,
345  a12p, inc_ap );
346 
347  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
350  m_ahead,
351  n_ahead,
352  buff_1,
353  A22, rs_A, cs_A,
354  a12p, inc_ap,
355  buff_0,
356  w21, inc_w );
357 
358  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
360  n_ahead,
361  buff_1,
362  a12t, cs_A,
363  y21, inc_y );
364 
365  // FLA_Househ2s_UT( FLA_RIGHT,
366  // a12p_t,
367  // a12p_b,
368  // alpha12, psi11_minus_alpha12, sigma11 );
369  FLA_Househ2s_UT_r_ops( n_ahead - 1,
370  a12p_t,
371  a12p_b, inc_ap,
372  &alpha12,
373  &psi11_minus_alpha12,
374  sigma11 );
375 
376  // FLA_Copy( a12p, v21 );
377  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
378  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
379  // FLA_Conjugate( v21_b );
381  n_ahead,
382  a12p, inc_ap,
383  v21, inc_v );
384  bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
386  n_ahead,
387  &psi11_minus_alpha12,
388  v21, inc_v );
389  bl1_sconjv( n_ahead - 1,
390  v21_b, inc_v );
391 
392  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
393  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
394  *a12t_l = alpha12;
396  n_ahead - 1,
397  v21_b, inc_v,
398  a12t_r, cs_A );
399  }
400 
401  // FLA_Copy( u21p, u21 );
403  m_ahead,
404  u21p, inc_up,
405  u21, inc_u );
406 
407  if ( n_ahead > 0 )
408  {
409  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
410  // FLA_Scal( FLA_MINUS_ONE, beta );
411  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
413  n_ahead,
414  y21, inc_y,
415  v21, inc_v,
416  &beta );
417  bl1_sscals( &minus_inv_tau11, &beta );
418 
419  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
420  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
421  bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
422  bl1_sneg1( &minus_conj_alpha12 );
423 
424  // FLA_Copy( w21, z21 );
425  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
426  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
427  // FLA_Axpy( beta, u21, z21 );
429  m_ahead,
430  w21, inc_w,
431  z21, inc_z );
433  m_ahead,
434  &minus_conj_alpha12,
435  A22_l, rs_A,
436  z21, inc_z );
438  m_ahead,
439  &psi11_minus_alpha12,
440  z21, inc_z );
442  m_ahead,
443  &beta,
444  u21, inc_u,
445  z21, inc_z );
446 
447  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
448  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
450  n_ahead,
451  tau11,
452  y21, inc_y );
454  m_ahead,
455  sigma11,
456  z21, inc_z );
457 
458  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
461  m_behind,
462  n_ahead,
463  buff_1,
464  A02, rs_A, cs_A,
465  v21, inc_v,
466  buff_0,
467  s01, rs_S );
468  }
469 
470  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
471  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
473  n_behind,
474  a10t, cs_A,
475  t01, rs_T );
478  m_ahead,
479  n_behind,
480  buff_1,
481  A20, rs_A, cs_A,
482  u21, inc_u,
483  buff_1,
484  t01, rs_T );
485 
486  if ( m_behind + 1 == b_alg && n_ahead > 0 )
487  {
488  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
489  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
492  m_ahead,
493  n_ahead,
494  buff_m1,
495  u21, inc_u,
496  y21, inc_y,
497  A22, rs_A, cs_A );
500  m_ahead,
501  n_ahead,
502  buff_m1,
503  z21, inc_z,
504  v21, inc_v,
505  A22, rs_A, cs_A );
506  }
507 
508  /*------------------------------------------------------------*/
509 
510  }
511 
512  // FLA_Obj_free( &w );
513  // FLA_Obj_free( &ap );
514  // FLA_Obj_free( &u );
515  // FLA_Obj_free( &up );
516  // FLA_Obj_free( &v );
517  // FLA_Obj_free( &y );
518  // FLA_Obj_free( &z );
519  FLA_free( buff_w );
520  FLA_free( buff_ap );
521  FLA_free( buff_u );
522  FLA_free( buff_up );
523  FLA_free( buff_v );
524  FLA_free( buff_y );
525  FLA_free( buff_z );
526 
527  return FLA_SUCCESS;
528 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sconjv(int m, float *x, int incx)
Definition: bl1_conjv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition: FLA_Househ2s_UT.c:555
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_ops_var4()

FLA_Error FLA_Bidiag_UT_u_step_ops_var4 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), bl1_ssetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

163 {
164  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
166  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
167 
168  float alpha12;
169  float minus_conj_alpha12;
170  float psi11_minus_alpha12;
171  float minus_inv_tau11;
172  float beta;
173  float last_elem;
174  int i;
175 
176  // b_alg = FLA_Obj_length( T );
177  int b_alg = m_TS;
178 
179  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
180  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
181  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
182  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
183  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
184  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
185  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
186  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
187  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
188  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
189  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
190  float* buff_al = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
191  float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
192  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
193  float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
194  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
195  float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
196  float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
197  float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
198  float* buff_g = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
199  int inc_w = 1;
200  int inc_al = 1;
201  int inc_ap = 1;
202  int inc_u = 1;
203  int inc_up = 1;
204  int inc_v = 1;
205  int inc_d = 1;
206  int inc_e = 1;
207  int inc_f = 1;
208  int inc_g = 1;
209 
210  // FLA_Set( FLA_ZERO, Y );
211  // FLA_Set( FLA_ZERO, Z );
212  bl1_ssetm( n_A,
213  b_alg,
214  buff_0,
215  buff_Y, rs_Y, cs_Y );
216  bl1_ssetm( m_A,
217  b_alg,
218  buff_0,
219  buff_Z, rs_Z, cs_Z );
220 
221  for ( i = 0; i < b_alg; ++i )
222  {
223  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
224  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
225  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
226  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
227  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
228  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
229  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
230  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
231 
232  float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
233  float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
234  float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
235 
236  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
237  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
238  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
239 
240  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
241  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
242 
243  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
244  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
245 
246  float* w21 = buff_w + (i+1)*inc_w;
247 
248  float* a22l = buff_al + (i+1)*inc_al;
249 
250  float* a12p = buff_ap + (i+1)*inc_ap;
251 
252  float* u21 = buff_u + (i+1)*inc_u;
253 
254  float* u21p = buff_up + (i+1)*inc_up;
255 
256  float* v21 = buff_v + (i+1)*inc_v;
257 
258  float* d0 = buff_d + (0 )*inc_d;
259 
260  float* e0 = buff_e + (0 )*inc_e;
261 
262  float* f0 = buff_f + (0 )*inc_f;
263 
264  float* g0 = buff_g + (0 )*inc_g;
265 
266  float* a12p_t = a12p + (0 )*inc_ap;
267  float* a12p_b = a12p + (1 )*inc_ap;
268 
269  float* v21_t = v21 + (0 )*inc_v;
270  float* v21_b = v21 + (1 )*inc_v;
271 
272  float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
273 
274  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
275  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
276 
277  float* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
278 
279  float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
280 
281  float* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
282 
283  float* ABL = a10t;
284  float* ZBL = z10t;
285 
286  float* a2 = alpha11;
287 
288  int m_ahead = m_A - i - 1;
289  int n_ahead = n_A - i - 1;
290  int m_behind = i;
291  int n_behind = i;
292 
293  /*------------------------------------------------------------*/
294 
295  if ( m_behind > 0 )
296  {
297  // FLA_Copy( a01_b, last_elem );
298  // FLA_Set( FLA_ONE, a01_b );
299  last_elem = *a01_b;
300  *a01_b = *buff_1;
301  }
302 
303  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
304  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
307  m_ahead + 1,
308  n_behind,
309  buff_m1,
310  ABL, rs_A, cs_A,
311  y10t, cs_Y,
312  buff_1,
313  a2, rs_A );
316  m_ahead + 1,
317  n_behind,
318  buff_m1,
319  ZBL, rs_Z, cs_Z,
320  a01, rs_A,
321  buff_1,
322  a2, rs_A );
323 
324  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
325  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
328  n_ahead,
329  n_behind,
330  buff_m1,
331  Y20, rs_Y, cs_Y,
332  a10t, cs_A,
333  buff_1,
334  a12t, cs_A );
337  m_behind,
338  n_ahead,
339  buff_m1,
340  A02, rs_A, cs_A,
341  z10t, cs_Z,
342  buff_1,
343  a12t, cs_A );
344 
345  if ( m_behind > 0 )
346  {
347  // FLA_Copy( last_elem, a01_b );
348  *a01_b = last_elem;
349  }
350 
351  // FLA_Househ2_UT( FLA_LEFT,
352  // alpha11,
353  // a21, tau11 );
354  // FLA_Copy( a21, u21p );
355  FLA_Househ2_UT_l_ops( m_ahead,
356  alpha11,
357  a21, rs_A,
358  tau11 );
360  m_ahead,
361  a21, rs_A,
362  u21p, inc_up );
363 
364  if ( n_ahead > 0 )
365  {
366  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
367  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
368  bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );
369 
370  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
371  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
373  n_ahead,
374  a12t, cs_A,
375  a12p, inc_ap );
377  n_ahead,
378  &minus_inv_tau11,
379  a12t, cs_A,
380  a12p, inc_ap );
381 
382  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
383  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
386  m_ahead,
387  n_behind,
388  buff_1,
389  A20, rs_A, cs_A,
390  u21p, inc_up,
391  buff_0,
392  d0, inc_d );
395  m_ahead,
396  n_behind,
397  buff_1,
398  Z20, rs_Z, cs_Z,
399  u21p, inc_up,
400  buff_0,
401  e0, inc_e );
402 
403  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
404  // FLA_Axpy( FLA_ONE, d0, t01 );
406  n_behind,
407  a10t, cs_A,
408  t01, rs_T );
410  n_behind,
411  buff_1,
412  d0, inc_d,
413  t01, rs_T );
414 
415  // FLA_Set( FLA_ZERO, y21 );
416  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
417  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
418  bl1_ssetv( n_ahead,
419  buff_0,
420  y21, rs_Y );
423  n_ahead,
424  n_behind,
425  buff_m1,
426  Y20, rs_Y, cs_Y,
427  d0, inc_d,
428  buff_1,
429  y21, rs_Y );
432  m_behind,
433  n_ahead,
434  buff_m1,
435  A02, rs_A, cs_A,
436  e0, inc_e,
437  buff_1,
438  y21, rs_Y );
439 
440  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
443  m_ahead,
444  n_ahead,
445  buff_1,
446  A22, rs_A, cs_A,
447  u21p, inc_up,
448  buff_1,
449  y21, rs_Y );
450 
451  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
453  n_ahead,
454  &minus_inv_tau11,
455  y21, rs_Y,
456  a12p, inc_ap );
457 
458  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
461  m_ahead,
462  n_ahead,
463  buff_1,
464  A22, rs_A, cs_A,
465  a12p, inc_ap,
466  buff_0,
467  w21, inc_w );
468 
469  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
470  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
473  n_ahead,
474  n_behind,
475  buff_1,
476  Y20, rs_Y, cs_Y,
477  a12p, inc_ap,
478  buff_0,
479  f0, inc_f );
482  m_behind,
483  n_ahead,
484  buff_1,
485  A02, rs_A, cs_A,
486  a12p, inc_ap,
487  buff_0,
488  g0, inc_g );
489 
490  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
491  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
494  m_ahead,
495  n_behind,
496  buff_m1,
497  A20, rs_A, cs_A,
498  f0, inc_f,
499  buff_1,
500  w21, inc_w );
503  m_ahead,
504  n_behind,
505  buff_m1,
506  Z20, rs_Z, cs_Z,
507  g0, inc_g,
508  buff_1,
509  w21, inc_w );
510 
511  // FLA_Copy( A22_l, a22l );
512  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
513  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
515  m_ahead,
516  A22_l, rs_A,
517  a22l, inc_al );
520  m_ahead,
521  n_behind,
522  buff_m1,
523  A20, rs_A, cs_A,
524  Y20_t, cs_Y,
525  buff_1,
526  a22l, inc_al );
529  m_ahead,
530  n_behind,
531  buff_m1,
532  Z20, rs_Z, cs_Z,
533  A02_l, rs_A,
534  buff_1,
535  a22l, inc_al );
536 
537  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
539  n_ahead,
540  buff_1,
541  a12t, cs_A,
542  y21, rs_Y );
543 
544  // FLA_Househ2s_UT( FLA_RIGHT,
545  // a12p_t,
546  // a12p_b,
547  // alpha12, psi11_minus_alpha12, sigma11 );
548  FLA_Househ2s_UT_r_ops( n_ahead - 1,
549  a12p_t,
550  a12p_b, inc_ap,
551  &alpha12,
552  &psi11_minus_alpha12,
553  sigma11 );
554 
555  // FLA_Copy( a12p, v21 );
556  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
557  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
558  // FLA_Conjugate( v21_b );
560  n_ahead,
561  a12p, inc_ap,
562  v21, inc_v );
563  bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
565  n_ahead,
566  &psi11_minus_alpha12,
567  v21, inc_v );
568  bl1_sconjv( n_ahead - 1,
569  v21_b, inc_v );
570 
571  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
572  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
573  bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
574  bl1_sneg1( &minus_conj_alpha12 );
575 
576  // FLA_Copy( g0, s01 );
577  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
578  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
580  n_behind,
581  g0, inc_g,
582  s01, rs_S );
584  n_behind,
585  &minus_conj_alpha12,
586  A02_l, rs_A,
587  s01, rs_S );
589  n_behind,
590  &psi11_minus_alpha12,
591  s01, rs_S );
592 
593  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
594  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
595  *a12t_l = alpha12;
597  n_ahead - 1,
598  v21_b, inc_v,
599  a12t_r, cs_A );
600  }
601 
602  // FLA_Copy( u21p, u21 );
604  m_ahead,
605  u21p, inc_up,
606  u21, inc_u );
607 
608  if ( n_ahead > 0 )
609  {
610  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
611  // FLA_Scal( FLA_MINUS_ONE, beta );
612  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
614  n_ahead,
615  y21, rs_Y,
616  v21, inc_v,
617  &beta );
618  bl1_sscals( &minus_inv_tau11, &beta );
619 
620  // FLA_Copy( w21, z21 );
621  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
622  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
623  // FLA_Axpy( beta, u21, z21 );
625  m_ahead,
626  w21, inc_w,
627  z21, rs_Z );
629  m_ahead,
630  &minus_conj_alpha12,
631  a22l, inc_al,
632  z21, rs_Z );
634  m_ahead,
635  &psi11_minus_alpha12,
636  z21, rs_Z );
638  m_ahead,
639  &beta,
640  u21, inc_u,
641  z21, rs_Z );
642 
643  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
644  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
646  n_ahead,
647  tau11,
648  y21, rs_Y );
650  m_ahead,
651  sigma11,
652  z21, rs_Z );
653  }
654  else // if ( n_ahead == 0 )
655  {
656  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
657  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
659  n_behind,
660  a10t, cs_A,
661  t01, rs_T );
664  m_ahead,
665  n_behind,
666  buff_1,
667  A20, rs_A, cs_A,
668  u21, inc_u,
669  buff_1,
670  t01, rs_T );
671  }
672 
673  /*------------------------------------------------------------*/
674 
675  }
676 
677  // FLA_Obj_free( &w );
678  // FLA_Obj_free( &al );
679  // FLA_Obj_free( &ap );
680  // FLA_Obj_free( &u );
681  // FLA_Obj_free( &up );
682  // FLA_Obj_free( &v );
683  // FLA_Obj_free( &d );
684  // FLA_Obj_free( &e );
685  // FLA_Obj_free( &f );
686  // FLA_Obj_free( &g );
687  FLA_free( buff_w );
688  FLA_free( buff_al );
689  FLA_free( buff_ap );
690  FLA_free( buff_u );
691  FLA_free( buff_up );
692  FLA_free( buff_v );
693  FLA_free( buff_d );
694  FLA_free( buff_e );
695  FLA_free( buff_f );
696  FLA_free( buff_g );
697 
698  return FLA_SUCCESS;
699 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sconjv(int m, float *x, int incx)
Definition: bl1_conjv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition: FLA_Househ2s_UT.c:555
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

◆ FLA_Bidiag_UT_u_step_ops_var5()

FLA_Error FLA_Bidiag_UT_u_step_ops_var5 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

163 {
164  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
166  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
167 
168  float beta;
169  float last_elem;
170  int i;
171 
172  // b_alg = FLA_Obj_length( T );
173  int b_alg = m_TS;
174 
175  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
176  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
177  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
178  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
179  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
180  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
181  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
182  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
183  float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
184  float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
185  float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
186  float* buff_g = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
187  int inc_u = 1;
188  int inc_v = 1;
189  int inc_d = 1;
190  int inc_e = 1;
191  int inc_f = 1;
192  int inc_g = 1;
193 
194  // FLA_Set( FLA_ZERO, Y );
195  // FLA_Set( FLA_ZERO, Z );
196  bl1_ssetm( n_A,
197  b_alg,
198  buff_0,
199  buff_Y, rs_Y, cs_Y );
200  bl1_ssetm( m_A,
201  b_alg,
202  buff_0,
203  buff_Z, rs_Z, cs_Z );
204 
205  for ( i = 0; i < b_alg; ++i )
206  {
207  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
208  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
209  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
210  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
211  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
212  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
213  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
214  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
215 
216  float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
217  float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
218  float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
219 
220  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
221  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
222  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
223 
224  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
225  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
226 
227  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
228  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
229 
230  float* u21 = buff_u + (i+1)*inc_u;
231 
232  float* v21 = buff_v + (i+1)*inc_v;
233 
234  float* d0 = buff_d + (0 )*inc_d;
235 
236  float* e0 = buff_e + (0 )*inc_e;
237 
238  float* f0 = buff_f + (0 )*inc_f;
239 
240  float* g0 = buff_g + (0 )*inc_g;
241 
242  float* v21_t = v21 + (0 )*inc_v;
243  float* v21_b = v21 + (1 )*inc_v;
244 
245  float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
246 
247  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
248  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
249 
250  float* ABL = a10t;
251  float* ZBL = z10t;
252 
253  float* a2 = alpha11;
254 
255  int m_ahead = m_A - i - 1;
256  int n_ahead = n_A - i - 1;
257  int m_behind = i;
258  int n_behind = i;
259 
260  /*------------------------------------------------------------*/
261 
262  if ( m_behind > 0 )
263  {
264  // FLA_Copy( a01_b, last_elem );
265  // FLA_Set( FLA_ONE, a01_b );
266  last_elem = *a01_b;
267  *a01_b = *buff_1;
268  }
269 
270  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
271  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
274  m_ahead + 1,
275  n_behind,
276  buff_m1,
277  ABL, rs_A, cs_A,
278  y10t, cs_Y,
279  buff_1,
280  a2, rs_A );
283  m_ahead + 1,
284  n_behind,
285  buff_m1,
286  ZBL, rs_Z, cs_Z,
287  a01, rs_A,
288  buff_1,
289  a2, rs_A );
290 
291  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
292  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
295  n_ahead,
296  n_behind,
297  buff_m1,
298  Y20, rs_Y, cs_Y,
299  a10t, cs_A,
300  buff_1,
301  a12t, cs_A );
304  m_behind,
305  n_ahead,
306  buff_m1,
307  A02, rs_A, cs_A,
308  z10t, cs_Z,
309  buff_1,
310  a12t, cs_A );
311 
312  if ( m_behind > 0 )
313  {
314  // FLA_Copy( last_elem, a01_b );
315  *a01_b = last_elem;
316  }
317 
318  // FLA_Househ2_UT( FLA_LEFT,
319  // alpha11,
320  // a21, tau11 );
321  // FLA_Copy( a21, u21 );
322  FLA_Househ2_UT_l_ops( m_ahead,
323  alpha11,
324  a21, rs_A,
325  tau11 );
327  m_ahead,
328  a21, rs_A,
329  u21, inc_u );
330 
331  if ( n_ahead > 0 )
332  {
333  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
334  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
336  n_ahead,
337  a12t, cs_A,
338  y21, rs_Y );
341  m_ahead,
342  n_ahead,
343  buff_1,
344  A22, rs_A, cs_A,
345  u21, inc_u,
346  buff_1,
347  y21, rs_Y );
348 
349  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
350  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
353  m_ahead,
354  n_behind,
355  buff_1,
356  A20, rs_A, cs_A,
357  u21, inc_u,
358  buff_0,
359  d0, inc_d );
362  m_ahead,
363  n_behind,
364  buff_1,
365  Z20, rs_Z, cs_Z,
366  u21, inc_u,
367  buff_0,
368  e0, inc_e );
369 
370  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
371  // FLA_Axpy( FLA_ONE, d0, t01 );
373  n_behind,
374  a10t, cs_A,
375  t01, rs_T );
377  n_behind,
378  buff_1,
379  d0, inc_d,
380  t01, rs_T );
381 
382  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
383  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
386  n_ahead,
387  n_behind,
388  buff_m1,
389  Y20, rs_Y, cs_Y,
390  d0, inc_d,
391  buff_1,
392  y21, rs_Y );
395  m_behind,
396  n_ahead,
397  buff_m1,
398  A02, rs_A, cs_A,
399  e0, inc_e,
400  buff_1,
401  y21, rs_Y );
402 
403  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
405  n_ahead,
406  tau11,
407  y21, rs_Y );
408 
409  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
411  n_ahead,
412  buff_m1,
413  y21, rs_Y,
414  a12t, cs_A );
415 
416  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
417  FLA_Househ2_UT_r_ops( n_ahead - 1,
418  a12t_l,
419  a12t_r, cs_A,
420  sigma11 );
421 
422  // FLA_Set( FLA_ONE, v21_t );
423  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
424  *v21_t = *buff_1;
426  n_ahead - 1,
427  a12t_r, cs_A,
428  v21_b, inc_v );
429 
430  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
431  // FLA_Scal( FLA_MINUS_ONE, beta );
433  n_ahead,
434  y21, rs_Y,
435  v21, inc_v,
436  &beta );
437  bl1_sscals( buff_m1, &beta );
438 
439  // FLA_Copy( u21, z21 );
440  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
442  m_ahead,
443  u21, inc_u,
444  z21, rs_Z );
447  m_ahead,
448  n_ahead,
449  buff_1,
450  A22, rs_A, cs_A,
451  v21, inc_v,
452  &beta,
453  z21, rs_Z );
454 
455  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
456  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
459  n_ahead,
460  m_behind,
461  buff_1,
462  Y20, rs_Y, cs_Y,
463  v21, inc_v,
464  buff_0,
465  f0, inc_f );
468  m_behind,
469  n_ahead,
470  buff_1,
471  A02, rs_A, cs_A,
472  v21, inc_v,
473  buff_0,
474  g0, inc_g );
475 
476  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
477  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
480  m_ahead,
481  n_behind,
482  buff_m1,
483  A20, rs_A, cs_A,
484  f0, inc_f,
485  buff_1,
486  z21, rs_Z );
489  m_ahead,
490  n_behind,
491  buff_m1,
492  Z20, rs_Z, cs_Z,
493  g0, inc_g,
494  buff_1,
495  z21, rs_Z );
496 
497  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
499  m_ahead,
500  sigma11,
501  z21, rs_Z );
502 
503  // FLA_Copy( g0, s01 );
505  n_behind,
506  g0, inc_g,
507  s01, rs_S );
508  }
509  else // if ( n_ahead == 0 )
510  {
511  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
512  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
514  n_behind,
515  a10t, cs_A,
516  t01, rs_T );
519  m_ahead,
520  n_behind,
521  buff_1,
522  A20, rs_A, cs_A,
523  u21, inc_u,
524  buff_1,
525  t01, rs_T );
526  }
527 
528  /*------------------------------------------------------------*/
529 
530  }
531 
532  // FLA_Obj_free( &u );
533  // FLA_Obj_free( &v );
534  // FLA_Obj_free( &d );
535  // FLA_Obj_free( &e );
536  // FLA_Obj_free( &f );
537  // FLA_Obj_free( &g );
538  FLA_free( buff_u );
539  FLA_free( buff_v );
540  FLA_free( buff_d );
541  FLA_free( buff_e );
542  FLA_free( buff_f );
543  FLA_free( buff_g );
544 
545  return FLA_SUCCESS;
546 }
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:651
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

◆ FLA_Bidiag_UT_u_step_opt_var1()

FLA_Error FLA_Bidiag_UT_u_step_opt_var1 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opc_var1(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var1(), and FLA_Bidiag_UT_u_opt_var1().

19 {
20  FLA_Datatype datatype;
21  int m_A, n_A, m_TS;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24  int rs_S, cs_S;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30  m_TS = FLA_Obj_length( T );
31 
32  rs_A = FLA_Obj_row_stride( A );
33  cs_A = FLA_Obj_col_stride( A );
34 
35  rs_T = FLA_Obj_row_stride( T );
36  cs_T = FLA_Obj_col_stride( T );
37 
38  rs_S = FLA_Obj_row_stride( S );
39  cs_S = FLA_Obj_col_stride( S );
40 
41 
42  switch ( datatype )
43  {
44  case FLA_FLOAT:
45  {
46  float* buff_A = FLA_FLOAT_PTR( A );
47  float* buff_T = FLA_FLOAT_PTR( T );
48  float* buff_S = FLA_FLOAT_PTR( S );
49 
51  n_A,
52  m_TS,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T,
55  buff_S, rs_S, cs_S );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_T = FLA_DOUBLE_PTR( T );
64  double* buff_S = FLA_DOUBLE_PTR( S );
65 
67  n_A,
68  m_TS,
69  buff_A, rs_A, cs_A,
70  buff_T, rs_T, cs_T,
71  buff_S, rs_S, cs_S );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_T = FLA_COMPLEX_PTR( T );
80  scomplex* buff_S = FLA_COMPLEX_PTR( S );
81 
83  n_A,
84  m_TS,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T,
87  buff_S, rs_S, cs_S );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
96  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
97 
99  n_A,
100  m_TS,
101  buff_A, rs_A, cs_A,
102  buff_T, rs_T, cs_T,
103  buff_S, rs_S, cs_S );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Bidiag_UT_u_step_opz_var1(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:519
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
FLA_Error FLA_Bidiag_UT_u_step_opc_var1(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:384
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Error FLA_Bidiag_UT_u_step_ops_var1(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:114
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Bidiag_UT_u_step_opd_var1(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:249
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_opt_var2()

FLA_Error FLA_Bidiag_UT_u_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var2(), and FLA_Bidiag_UT_u_opt_var2().

19 {
20  FLA_Datatype datatype;
21  int m_A, n_A, m_TS;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24  int rs_S, cs_S;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30  m_TS = FLA_Obj_length( T );
31 
32  rs_A = FLA_Obj_row_stride( A );
33  cs_A = FLA_Obj_col_stride( A );
34 
35  rs_T = FLA_Obj_row_stride( T );
36  cs_T = FLA_Obj_col_stride( T );
37 
38  rs_S = FLA_Obj_row_stride( S );
39  cs_S = FLA_Obj_col_stride( S );
40 
41 
42  switch ( datatype )
43  {
44  case FLA_FLOAT:
45  {
46  float* buff_A = FLA_FLOAT_PTR( A );
47  float* buff_T = FLA_FLOAT_PTR( T );
48  float* buff_S = FLA_FLOAT_PTR( S );
49 
51  n_A,
52  m_TS,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T,
55  buff_S, rs_S, cs_S );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_T = FLA_DOUBLE_PTR( T );
64  double* buff_S = FLA_DOUBLE_PTR( S );
65 
67  n_A,
68  m_TS,
69  buff_A, rs_A, cs_A,
70  buff_T, rs_T, cs_T,
71  buff_S, rs_S, cs_S );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_T = FLA_COMPLEX_PTR( T );
80  scomplex* buff_S = FLA_COMPLEX_PTR( S );
81 
83  n_A,
84  m_TS,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T,
87  buff_S, rs_S, cs_S );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
96  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
97 
99  n_A,
100  m_TS,
101  buff_A, rs_A, cs_A,
102  buff_T, rs_T, cs_T,
103  buff_S, rs_S, cs_S );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Bidiag_UT_u_step_opz_var2(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:738
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Bidiag_UT_u_step_opd_var2(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:322
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Bidiag_UT_u_step_opc_var2(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:530
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Error FLA_Bidiag_UT_u_step_ops_var2(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:114
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_opt_var3()

FLA_Error FLA_Bidiag_UT_u_step_opt_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var3(), and FLA_Bidiag_UT_u_opt_var3().

19 {
20  FLA_Datatype datatype;
21  int m_A, n_A, m_TS;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24  int rs_S, cs_S;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30  m_TS = FLA_Obj_length( T );
31 
32  rs_A = FLA_Obj_row_stride( A );
33  cs_A = FLA_Obj_col_stride( A );
34 
35  rs_T = FLA_Obj_row_stride( T );
36  cs_T = FLA_Obj_col_stride( T );
37 
38  rs_S = FLA_Obj_row_stride( S );
39  cs_S = FLA_Obj_col_stride( S );
40 
41 
42  switch ( datatype )
43  {
44  case FLA_FLOAT:
45  {
46  float* buff_A = FLA_FLOAT_PTR( A );
47  float* buff_T = FLA_FLOAT_PTR( T );
48  float* buff_S = FLA_FLOAT_PTR( S );
49 
51  n_A,
52  m_TS,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T,
55  buff_S, rs_S, cs_S );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_T = FLA_DOUBLE_PTR( T );
64  double* buff_S = FLA_DOUBLE_PTR( S );
65 
67  n_A,
68  m_TS,
69  buff_A, rs_A, cs_A,
70  buff_T, rs_T, cs_T,
71  buff_S, rs_S, cs_S );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_T = FLA_COMPLEX_PTR( T );
80  scomplex* buff_S = FLA_COMPLEX_PTR( S );
81 
83  n_A,
84  m_TS,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T,
87  buff_S, rs_S, cs_S );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
96  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
97 
99  n_A,
100  m_TS,
101  buff_A, rs_A, cs_A,
102  buff_T, rs_T, cs_T,
103  buff_S, rs_S, cs_S );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Bidiag_UT_u_step_opd_var3(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:532
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Bidiag_UT_u_step_opc_var3(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:950
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Bidiag_UT_u_step_ops_var3(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:114
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Bidiag_UT_u_step_opz_var3(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var3.c:1368
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_opt_var4()

FLA_Error FLA_Bidiag_UT_u_step_opt_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var4(), and FLA_Bidiag_UT_u_opt_var4().

36 {
37  FLA_Datatype datatype;
38  int m_A, n_A, m_TS;
39  int rs_A, cs_A;
40  int rs_Y, cs_Y;
41  int rs_Z, cs_Z;
42  int rs_T, cs_T;
43  int rs_S, cs_S;
44 
45  datatype = FLA_Obj_datatype( A );
46 
47  m_A = FLA_Obj_length( A );
48  n_A = FLA_Obj_width( A );
49  m_TS = FLA_Obj_length( T );
50 
51  rs_A = FLA_Obj_row_stride( A );
52  cs_A = FLA_Obj_col_stride( A );
53 
54  rs_Y = FLA_Obj_row_stride( Y );
55  cs_Y = FLA_Obj_col_stride( Y );
56 
57  rs_Z = FLA_Obj_row_stride( Z );
58  cs_Z = FLA_Obj_col_stride( Z );
59 
60  rs_T = FLA_Obj_row_stride( T );
61  cs_T = FLA_Obj_col_stride( T );
62 
63  rs_S = FLA_Obj_row_stride( S );
64  cs_S = FLA_Obj_col_stride( S );
65 
66 
67  switch ( datatype )
68  {
69  case FLA_FLOAT:
70  {
71  float* buff_A = FLA_FLOAT_PTR( A );
72  float* buff_Y = FLA_FLOAT_PTR( Y );
73  float* buff_Z = FLA_FLOAT_PTR( Z );
74  float* buff_T = FLA_FLOAT_PTR( T );
75  float* buff_S = FLA_FLOAT_PTR( S );
76 
78  n_A,
79  m_TS,
80  buff_A, rs_A, cs_A,
81  buff_Y, rs_Y, cs_Y,
82  buff_Z, rs_Z, cs_Z,
83  buff_T, rs_T, cs_T,
84  buff_S, rs_S, cs_S );
85 
86  break;
87  }
88 
89  case FLA_DOUBLE:
90  {
91  double* buff_A = FLA_DOUBLE_PTR( A );
92  double* buff_Y = FLA_DOUBLE_PTR( Y );
93  double* buff_Z = FLA_DOUBLE_PTR( Z );
94  double* buff_T = FLA_DOUBLE_PTR( T );
95  double* buff_S = FLA_DOUBLE_PTR( S );
96 
98  n_A,
99  m_TS,
100  buff_A, rs_A, cs_A,
101  buff_Y, rs_Y, cs_Y,
102  buff_Z, rs_Z, cs_Z,
103  buff_T, rs_T, cs_T,
104  buff_S, rs_S, cs_S );
105 
106  break;
107  }
108 
109  case FLA_COMPLEX:
110  {
111  scomplex* buff_A = FLA_COMPLEX_PTR( A );
112  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
113  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
114  scomplex* buff_T = FLA_COMPLEX_PTR( T );
115  scomplex* buff_S = FLA_COMPLEX_PTR( S );
116 
118  n_A,
119  m_TS,
120  buff_A, rs_A, cs_A,
121  buff_Y, rs_Y, cs_Y,
122  buff_Z, rs_Z, cs_Z,
123  buff_T, rs_T, cs_T,
124  buff_S, rs_S, cs_S );
125 
126  break;
127  }
128 
129  case FLA_DOUBLE_COMPLEX:
130  {
131  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
132  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
133  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
134  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
135  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
136 
138  n_A,
139  m_TS,
140  buff_A, rs_A, cs_A,
141  buff_Y, rs_Y, cs_Y,
142  buff_Z, rs_Z, cs_Z,
143  buff_T, rs_T, cs_T,
144  buff_S, rs_S, cs_S );
145 
146  break;
147  }
148  }
149 
150  return FLA_SUCCESS;
151 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Bidiag_UT_u_step_opd_var4(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:703
FLA_Error FLA_Bidiag_UT_u_step_opc_var4(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:1251
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
FLA_Error FLA_Bidiag_UT_u_step_opz_var4(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:1799
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
FLA_Error FLA_Bidiag_UT_u_step_ops_var4(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:155
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_opt_var5()

FLA_Error FLA_Bidiag_UT_u_step_opt_var5 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)

References FLA_Bidiag_UT_u_step_opc_var5(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var5(), and FLA_Bidiag_UT_u_opt_var5().

36 {
37  FLA_Datatype datatype;
38  int m_A, n_A, m_TS;
39  int rs_A, cs_A;
40  int rs_Y, cs_Y;
41  int rs_Z, cs_Z;
42  int rs_T, cs_T;
43  int rs_S, cs_S;
44 
45  datatype = FLA_Obj_datatype( A );
46 
47  m_A = FLA_Obj_length( A );
48  n_A = FLA_Obj_width( A );
49  m_TS = FLA_Obj_length( T );
50 
51  rs_A = FLA_Obj_row_stride( A );
52  cs_A = FLA_Obj_col_stride( A );
53 
54  rs_Y = FLA_Obj_row_stride( Y );
55  cs_Y = FLA_Obj_col_stride( Y );
56 
57  rs_Z = FLA_Obj_row_stride( Z );
58  cs_Z = FLA_Obj_col_stride( Z );
59 
60  rs_T = FLA_Obj_row_stride( T );
61  cs_T = FLA_Obj_col_stride( T );
62 
63  rs_S = FLA_Obj_row_stride( S );
64  cs_S = FLA_Obj_col_stride( S );
65 
66 
67  switch ( datatype )
68  {
69  case FLA_FLOAT:
70  {
71  float* buff_A = FLA_FLOAT_PTR( A );
72  float* buff_Y = FLA_FLOAT_PTR( Y );
73  float* buff_Z = FLA_FLOAT_PTR( Z );
74  float* buff_T = FLA_FLOAT_PTR( T );
75  float* buff_S = FLA_FLOAT_PTR( S );
76 
78  n_A,
79  m_TS,
80  buff_A, rs_A, cs_A,
81  buff_Y, rs_Y, cs_Y,
82  buff_Z, rs_Z, cs_Z,
83  buff_T, rs_T, cs_T,
84  buff_S, rs_S, cs_S );
85 
86  break;
87  }
88 
89  case FLA_DOUBLE:
90  {
91  double* buff_A = FLA_DOUBLE_PTR( A );
92  double* buff_Y = FLA_DOUBLE_PTR( Y );
93  double* buff_Z = FLA_DOUBLE_PTR( Z );
94  double* buff_T = FLA_DOUBLE_PTR( T );
95  double* buff_S = FLA_DOUBLE_PTR( S );
96 
98  n_A,
99  m_TS,
100  buff_A, rs_A, cs_A,
101  buff_Y, rs_Y, cs_Y,
102  buff_Z, rs_Z, cs_Z,
103  buff_T, rs_T, cs_T,
104  buff_S, rs_S, cs_S );
105 
106  break;
107  }
108 
109  case FLA_COMPLEX:
110  {
111  scomplex* buff_A = FLA_COMPLEX_PTR( A );
112  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
113  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
114  scomplex* buff_T = FLA_COMPLEX_PTR( T );
115  scomplex* buff_S = FLA_COMPLEX_PTR( S );
116 
118  n_A,
119  m_TS,
120  buff_A, rs_A, cs_A,
121  buff_Y, rs_Y, cs_Y,
122  buff_Z, rs_Z, cs_Z,
123  buff_T, rs_T, cs_T,
124  buff_S, rs_S, cs_S );
125 
126  break;
127  }
128 
129  case FLA_DOUBLE_COMPLEX:
130  {
131  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
132  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
133  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
134  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
135  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
136 
138  n_A,
139  m_TS,
140  buff_A, rs_A, cs_A,
141  buff_Y, rs_Y, cs_Y,
142  buff_Z, rs_Z, cs_Z,
143  buff_T, rs_T, cs_T,
144  buff_S, rs_S, cs_S );
145 
146  break;
147  }
148  }
149 
150  return FLA_SUCCESS;
151 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Bidiag_UT_u_step_ops_var5(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:155
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Bidiag_UT_u_step_opz_var5(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:1340
FLA_Error FLA_Bidiag_UT_u_step_opd_var5(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:550
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Bidiag_UT_u_step_opc_var5(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var5.c:945
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Bidiag_UT_u_step_opz_var1()

FLA_Error FLA_Bidiag_UT_u_step_opz_var1 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zcopyv(), bl1_zgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

525 {
526  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
527  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
528 
529  int i;
530 
531  // b_alg = FLA_Obj_length( T );
532  int b_alg = m_TS;
533 
534  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
535  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
536  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
537  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
538  int inc_v = 1;
539 
540  for ( i = 0; i < b_alg; ++i )
541  {
542  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
543  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
544  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
545  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
546  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
547  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
548  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
549 
550  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
551  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
552 
553  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
554  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
555 
556  dcomplex* v21 = buff_v + (i+1)*inc_v;
557 
558  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
559  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
560 
561  dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
562  dcomplex* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
563 
564  dcomplex* v21_t = v21 + (0 )*inc_v;
565  dcomplex* v21_b = v21 + (1 )*inc_v;
566 
567  int m_ahead = m_A - i - 1;
568  int n_ahead = n_A - i - 1;
569  int m_behind = i;
570  int n_behind = i;
571 
572  /*------------------------------------------------------------*/
573 
574  // FLA_Househ2_UT( FLA_LEFT,
575  // alpha11,
576  // a21, tau11 );
577  FLA_Househ2_UT_l_opz( m_ahead,
578  alpha11,
579  a21, rs_A,
580  tau11 );
581 
582  if ( n_ahead > 0 )
583  {
584  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
586  n_ahead,
587  tau11,
588  a21, rs_A,
589  a12t, cs_A,
590  A22, rs_A, cs_A );
591 
592  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
593  FLA_Househ2_UT_r_opz( n_ahead - 1,
594  a12t_l,
595  a12t_r, cs_A,
596  sigma11 );
597 
598  // FLA_Set( FLA_ONE, v21_t );
599  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
600  *v21_t = *buff_1;
602  n_ahead - 1,
603  a12t_r, cs_A,
604  v21_b, inc_v );
605 
606  // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
608  n_ahead - 1,
609  sigma11,
610  v21_b, inc_v,
611  A22_l, rs_A,
612  A22_r, rs_A, cs_A );
613 
614  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
617  m_behind,
618  n_ahead,
619  buff_1,
620  A02, rs_A, cs_A,
621  v21, inc_v,
622  buff_0,
623  s01, rs_S );
624  }
625 
626  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
627  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
629  n_behind,
630  a10t, cs_A,
631  t01, rs_T );
634  m_ahead,
635  n_behind,
636  buff_1,
637  A20, rs_A, cs_A,
638  a21, rs_A,
639  buff_1,
640  t01, rs_T );
641 
642  /*------------------------------------------------------------*/
643 
644  }
645 
646  // FLA_Obj_free( &v );
647  FLA_free( buff_v );
648 
649  return FLA_SUCCESS;
650 }
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:343
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:693
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
FLA_Error FLA_Apply_H2_UT_r_opz_var1(int n_u2h_A2, int m_a1, dcomplex *tau, dcomplex *u2h, int inc_u2h, dcomplex *a1, int inc_a1, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:327

◆ FLA_Bidiag_UT_u_step_opz_var2()

FLA_Error FLA_Bidiag_UT_u_step_opz_var2 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

744 {
745  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
746  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
747  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
748 
749  dcomplex beta;
750  int i;
751 
752  // b_alg = FLA_Obj_length( T );
753  int b_alg = m_TS;
754 
755  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
756  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
757  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
758  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
759  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
760  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
761  int inc_v = 1;
762  int inc_y = 1;
763  int inc_z = 1;
764 
765  for ( i = 0; i < b_alg; ++i )
766  {
767  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
768  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
769  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
770  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
771  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
772  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
773  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
774 
775  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
776  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
777 
778  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
779  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
780 
781  dcomplex* v21 = buff_v + (i+1)*inc_v;
782 
783  dcomplex* y21 = buff_y + (i+1)*inc_y;
784 
785  dcomplex* z21 = buff_z + (i+1)*inc_z;
786 
787  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
788  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
789 
790  dcomplex* v21_t = v21 + (0 )*inc_v;
791  dcomplex* v21_b = v21 + (1 )*inc_v;
792 
793  int m_ahead = m_A - i - 1;
794  int n_ahead = n_A - i - 1;
795  int m_behind = i;
796  int n_behind = i;
797 
798  /*------------------------------------------------------------*/
799 
800  // FLA_Househ2_UT( FLA_LEFT,
801  // alpha11,
802  // a21, tau11 );
803  FLA_Househ2_UT_l_opz( m_ahead,
804  alpha11,
805  a21, rs_A,
806  tau11 );
807 
808  if ( n_ahead > 0 )
809  {
810  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
811  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
813  n_ahead,
814  a12t, cs_A,
815  y21, inc_y );
818  m_ahead,
819  n_ahead,
820  buff_1,
821  A22, rs_A, cs_A,
822  a21, rs_A,
823  buff_1,
824  y21, inc_y );
825 
826  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
828  n_ahead,
829  tau11,
830  y21, inc_y );
831 
832  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
834  n_ahead,
835  buff_m1,
836  y21, inc_y,
837  a12t, cs_A );
838 
839  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
840  FLA_Househ2_UT_r_opz( n_ahead - 1,
841  a12t_l,
842  a12t_r, cs_A,
843  sigma11 );
844 
845  // FLA_Set( FLA_ONE, v21_t );
846  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
847  *v21_t = *buff_1;
849  n_ahead - 1,
850  a12t_r, cs_A,
851  v21_b, inc_y );
852 
853  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
854  // FLA_Scal( FLA_MINUS_ONE, beta );
856  n_ahead,
857  y21, inc_y,
858  v21, inc_v,
859  &beta );
860  bl1_zneg1( &beta );
861 
862  // FLA_Copy( a21, z21 );
863  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
864  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
866  m_ahead,
867  a21, rs_A,
868  z21, inc_z );
871  m_ahead,
872  n_ahead,
873  buff_1,
874  A22, rs_A, cs_A,
875  v21, inc_v,
876  &beta,
877  z21, inc_z );
879  m_ahead,
880  sigma11,
881  z21, inc_z );
882 
883  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
884  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
887  m_ahead,
888  n_ahead,
889  buff_m1,
890  a21, rs_A,
891  y21, inc_y,
892  A22, rs_A, cs_A );
895  m_ahead,
896  n_ahead,
897  buff_m1,
898  z21, inc_z,
899  v21, inc_v,
900  A22, rs_A, cs_A );
901 
902  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
905  m_behind,
906  n_ahead,
907  buff_1,
908  A02, rs_A, cs_A,
909  v21, inc_v,
910  buff_0,
911  s01, rs_S );
912  }
913 
914  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
915  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
917  n_behind,
918  a10t, cs_A,
919  t01, rs_T );
922  m_ahead,
923  n_behind,
924  buff_1,
925  A20, rs_A, cs_A,
926  a21, rs_A,
927  buff_1,
928  t01, rs_T );
929 
930  /*------------------------------------------------------------*/
931 
932  }
933 
934  // FLA_Obj_free( &v );
935  // FLA_Obj_free( &y );
936  // FLA_Obj_free( &z );
937  FLA_free( buff_v );
938  FLA_free( buff_y );
939  FLA_free( buff_z );
940 
941  return FLA_SUCCESS;
942 }
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:693
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60

◆ FLA_Bidiag_UT_u_step_opz_var3()

FLA_Error FLA_Bidiag_UT_u_step_opz_var3 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), bl1_zscals(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

1374 {
1375  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1376  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1377  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1378 
1379  dcomplex alpha12;
1380  dcomplex minus_conj_alpha12;
1381  dcomplex psi11_minus_alpha12;
1382  dcomplex minus_inv_tau11;
1383  dcomplex minus_upsilon11;
1384  dcomplex minus_conj_nu11;
1385  dcomplex minus_conj_psi11;
1386  dcomplex minus_zeta11;
1387  dcomplex beta;
1388  int i;
1389 
1390  // b_alg = FLA_Obj_length( T );
1391  int b_alg = m_TS;
1392 
1393  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1394  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1395  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1396  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1397  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1398  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
1399  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1400  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1401  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1402  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1403  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1404  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1405  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1406  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1407  int inc_w = 1;
1408  int inc_ap = 1;
1409  int inc_u = 1;
1410  int inc_up = 1;
1411  int inc_v = 1;
1412  int inc_y = 1;
1413  int inc_z = 1;
1414 
1415  for ( i = 0; i < b_alg; ++i )
1416  {
1417  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1418  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1419  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1420  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1421  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1422  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1423  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1424 
1425  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1426  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1427 
1428  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1429  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1430 
1431  dcomplex* w21 = buff_w + (i+1)*inc_w;
1432 
1433  dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1434 
1435  dcomplex* upsilon11 = buff_u + (i )*inc_u;
1436  dcomplex* u21 = buff_u + (i+1)*inc_u;
1437 
1438  dcomplex* u21p = buff_up + (i+1)*inc_up;
1439 
1440  dcomplex* nu11 = buff_v + (i )*inc_v;
1441  dcomplex* v21 = buff_v + (i+1)*inc_v;
1442 
1443  dcomplex* psi11 = buff_y + (i )*inc_y;
1444  dcomplex* y21 = buff_y + (i+1)*inc_y;
1445 
1446  dcomplex* zeta11 = buff_z + (i )*inc_z;
1447  dcomplex* z21 = buff_z + (i+1)*inc_z;
1448 
1449  dcomplex* a12p_t = a12p + (0 )*inc_ap;
1450  dcomplex* a12p_b = a12p + (1 )*inc_ap;
1451 
1452  dcomplex* v21_t = v21 + (0 )*inc_v;
1453  dcomplex* v21_b = v21 + (1 )*inc_v;
1454 
1455  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1456  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1457 
1458  dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1459 
1460  int m_ahead = m_A - i - 1;
1461  int n_ahead = n_A - i - 1;
1462  int m_behind = i;
1463  int n_behind = i;
1464 
1465  /*------------------------------------------------------------*/
1466 
1467  if ( m_behind > 0 )
1468  {
1469  // FLA_Copy( upsilon11, minus_upsilon11 );
1470  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1471  bl1_zmult3( buff_m1, upsilon11, &minus_upsilon11 );
1472 
1473  // FLA_Copy( zeta11, minus_zeta11 );
1474  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1475  bl1_zmult3( buff_m1, zeta11, &minus_zeta11 );
1476 
1477  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1478  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1479  bl1_zcopyconj( psi11, &minus_conj_psi11 );
1480  bl1_zscals( buff_m1, &minus_conj_psi11 );
1481 
1482  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1483  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1484  bl1_zcopyconj( nu11, &minus_conj_nu11 );
1485  bl1_zscals( buff_m1, &minus_conj_nu11 );
1486 
1487  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1488  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1490  1,
1491  &minus_conj_psi11,
1492  upsilon11, 1,
1493  alpha11, 1 );
1495  1,
1496  &minus_conj_nu11,
1497  zeta11, 1,
1498  alpha11, 1 );
1499 
1500  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1501  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1503  m_ahead,
1504  &minus_conj_psi11,
1505  u21, inc_u,
1506  a21, rs_A );
1508  m_ahead,
1509  &minus_conj_nu11,
1510  z21, inc_z,
1511  a21, rs_A );
1512 
1513  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1514  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1516  n_ahead,
1517  &minus_upsilon11,
1518  y21, inc_y,
1519  a12t, cs_A );
1521  n_ahead,
1522  &minus_zeta11,
1523  v21, inc_v,
1524  a12t, cs_A );
1525  }
1526 
1527  // FLA_Househ2_UT( FLA_LEFT,
1528  // alpha11,
1529  // a21, tau11 );
1530  // FLA_Copy( a21, u21p );
1531  FLA_Househ2_UT_l_opz( m_ahead,
1532  alpha11,
1533  a21, rs_A,
1534  tau11 );
1536  m_ahead,
1537  a21, rs_A,
1538  u21p, inc_up );
1539 
1540  if ( n_ahead > 0 )
1541  {
1542  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1543  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1544  bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );
1545 
1546  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1547  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1549  n_ahead,
1550  a12t, cs_A,
1551  a12p, inc_ap );
1553  n_ahead,
1554  &minus_inv_tau11,
1555  a12t, cs_A,
1556  a12p, inc_ap );
1557  }
1558 
1559  if ( m_behind > 0 )
1560  {
1561  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1562  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1565  m_ahead,
1566  n_ahead,
1567  buff_m1,
1568  u21, inc_u,
1569  y21, inc_y,
1570  A22, rs_A, cs_A );
1573  m_ahead,
1574  n_ahead,
1575  buff_m1,
1576  z21, inc_z,
1577  v21, inc_v,
1578  A22, rs_A, cs_A );
1579  }
1580 
1581  if ( n_ahead > 0 )
1582  {
1583  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1586  m_ahead,
1587  n_ahead,
1588  buff_1,
1589  A22, rs_A, cs_A,
1590  u21p, inc_up,
1591  buff_0,
1592  y21, inc_y );
1593 
1594  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1596  n_ahead,
1597  &minus_inv_tau11,
1598  y21, inc_y,
1599  a12p, inc_ap );
1600 
1601  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1604  m_ahead,
1605  n_ahead,
1606  buff_1,
1607  A22, rs_A, cs_A,
1608  a12p, inc_ap,
1609  buff_0,
1610  w21, inc_w );
1611 
1612  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1614  n_ahead,
1615  buff_1,
1616  a12t, cs_A,
1617  y21, inc_y );
1618 
1619  // FLA_Househ2s_UT( FLA_RIGHT,
1620  // a12p_t,
1621  // a12p_b,
1622  // alpha12, psi11_minus_alpha12, sigma11 );
1623  FLA_Househ2s_UT_r_opz( n_ahead - 1,
1624  a12p_t,
1625  a12p_b, inc_ap,
1626  &alpha12,
1627  &psi11_minus_alpha12,
1628  sigma11 );
1629 
1630  // FLA_Copy( a12p, v21 );
1631  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1632  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1633  // FLA_Conjugate( v21_b );
1635  n_ahead,
1636  a12p, inc_ap,
1637  v21, inc_v );
1638  bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
1640  n_ahead,
1641  &psi11_minus_alpha12,
1642  v21, inc_v );
1643  bl1_zconjv( n_ahead - 1,
1644  v21_b, inc_v );
1645 
1646  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1647  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1648  *a12t_l = alpha12;
1650  n_ahead - 1,
1651  v21_b, inc_v,
1652  a12t_r, cs_A );
1653  }
1654 
1655  // FLA_Copy( u21p, u21 );
1657  m_ahead,
1658  u21p, inc_up,
1659  u21, inc_u );
1660 
1661  if ( n_ahead > 0 )
1662  {
1663  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1664  // FLA_Scal( FLA_MINUS_ONE, beta );
1665  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1667  n_ahead,
1668  y21, inc_y,
1669  v21, inc_v,
1670  &beta );
1671  bl1_zscals( &minus_inv_tau11, &beta );
1672 
1673  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1674  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1675  bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
1676  bl1_zneg1( &minus_conj_alpha12 );
1677 
1678  // FLA_Copy( w21, z21 );
1679  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1680  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1681  // FLA_Axpy( beta, u21, z21 );
1683  m_ahead,
1684  w21, inc_w,
1685  z21, inc_z );
1687  m_ahead,
1688  &minus_conj_alpha12,
1689  A22_l, rs_A,
1690  z21, inc_z );
1692  m_ahead,
1693  &psi11_minus_alpha12,
1694  z21, inc_z );
1696  m_ahead,
1697  &beta,
1698  u21, inc_u,
1699  z21, inc_z );
1700 
1701  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1702  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1704  n_ahead,
1705  tau11,
1706  y21, inc_y );
1708  m_ahead,
1709  sigma11,
1710  z21, inc_z );
1711 
1712  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1715  m_behind,
1716  n_ahead,
1717  buff_1,
1718  A02, rs_A, cs_A,
1719  v21, inc_v,
1720  buff_0,
1721  s01, rs_S );
1722  }
1723 
1724  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1725  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1727  n_behind,
1728  a10t, cs_A,
1729  t01, rs_T );
1732  m_ahead,
1733  n_behind,
1734  buff_1,
1735  A20, rs_A, cs_A,
1736  u21, inc_u,
1737  buff_1,
1738  t01, rs_T );
1739 
1740  if ( m_behind + 1 == b_alg && n_ahead > 0 )
1741  {
1742  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1743  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1746  m_ahead,
1747  n_ahead,
1748  buff_m1,
1749  u21, inc_u,
1750  y21, inc_y,
1751  A22, rs_A, cs_A );
1754  m_ahead,
1755  n_ahead,
1756  buff_m1,
1757  z21, inc_z,
1758  v21, inc_v,
1759  A22, rs_A, cs_A );
1760  }
1761 
1762  /*------------------------------------------------------------*/
1763 
1764  }
1765 
1766  // FLA_Obj_free( &w );
1767  // FLA_Obj_free( &ap );
1768  // FLA_Obj_free( &u );
1769  // FLA_Obj_free( &up );
1770  // FLA_Obj_free( &v );
1771  // FLA_Obj_free( &y );
1772  // FLA_Obj_free( &z );
1773  FLA_free( buff_w );
1774  FLA_free( buff_ap );
1775  FLA_free( buff_u );
1776  FLA_free( buff_up );
1777  FLA_free( buff_v );
1778  FLA_free( buff_y );
1779  FLA_free( buff_z );
1780 
1781  return FLA_SUCCESS;
1782 }
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition: FLA_Househ2s_UT.c:610
int i
Definition: bl1_axmyv2.c:145
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition: bl1_conjv.c:34

◆ FLA_Bidiag_UT_u_step_opz_var4()

FLA_Error FLA_Bidiag_UT_u_step_opz_var4 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), bl1_zsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

1807 {
1808  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1809  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1810  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1811 
1812  dcomplex alpha12;
1813  dcomplex minus_conj_alpha12;
1814  dcomplex psi11_minus_alpha12;
1815  dcomplex minus_inv_tau11;
1816  dcomplex beta;
1817  dcomplex last_elem;
1818  int i;
1819 
1820  // b_alg = FLA_Obj_length( T );
1821  int b_alg = m_TS;
1822 
1823  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1824  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1825  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1826  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1827  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1828  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1829  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1830  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1831  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1832  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1833  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1834  dcomplex* buff_al = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1835  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1836  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1837  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1838  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1839  dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1840  dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1841  dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1842  dcomplex* buff_g = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1843  int inc_w = 1;
1844  int inc_al = 1;
1845  int inc_ap = 1;
1846  int inc_u = 1;
1847  int inc_up = 1;
1848  int inc_v = 1;
1849  int inc_d = 1;
1850  int inc_e = 1;
1851  int inc_f = 1;
1852  int inc_g = 1;
1853 
1854  // FLA_Set( FLA_ZERO, Y );
1855  // FLA_Set( FLA_ZERO, Z );
1856  bl1_zsetm( n_A,
1857  b_alg,
1858  buff_0,
1859  buff_Y, rs_Y, cs_Y );
1860  bl1_zsetm( m_A,
1861  b_alg,
1862  buff_0,
1863  buff_Z, rs_Z, cs_Z );
1864 
1865  for ( i = 0; i < b_alg; ++i )
1866  {
1867  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1868  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1869  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1870  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1871  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1872  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1873  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1874  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1875 
1876  dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1877  dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1878  dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1879 
1880  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1881  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1882  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1883 
1884  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1885  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1886 
1887  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1888  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1889 
1890  dcomplex* w21 = buff_w + (i+1)*inc_w;
1891 
1892  dcomplex* a22l = buff_al + (i+1)*inc_al;
1893 
1894  dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1895 
1896  dcomplex* u21 = buff_u + (i+1)*inc_u;
1897 
1898  dcomplex* u21p = buff_up + (i+1)*inc_up;
1899 
1900  dcomplex* v21 = buff_v + (i+1)*inc_v;
1901 
1902  dcomplex* d0 = buff_d + (0 )*inc_d;
1903 
1904  dcomplex* e0 = buff_e + (0 )*inc_e;
1905 
1906  dcomplex* f0 = buff_f + (0 )*inc_f;
1907 
1908  dcomplex* g0 = buff_g + (0 )*inc_g;
1909 
1910  dcomplex* a12p_t = a12p + (0 )*inc_ap;
1911  dcomplex* a12p_b = a12p + (1 )*inc_ap;
1912 
1913  dcomplex* v21_t = v21 + (0 )*inc_v;
1914  dcomplex* v21_b = v21 + (1 )*inc_v;
1915 
1916  dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1917 
1918  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1919  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1920 
1921  dcomplex* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
1922 
1923  dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1924 
1925  dcomplex* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
1926 
1927  dcomplex* ABL = a10t;
1928  dcomplex* ZBL = z10t;
1929 
1930  dcomplex* a2 = alpha11;
1931 
1932  int m_ahead = m_A - i - 1;
1933  int n_ahead = n_A - i - 1;
1934  int m_behind = i;
1935  int n_behind = i;
1936 
1937  /*------------------------------------------------------------*/
1938 
1939  if ( m_behind > 0 )
1940  {
1941  // FLA_Copy( a01_b, last_elem );
1942  // FLA_Set( FLA_ONE, a01_b );
1943  last_elem = *a01_b;
1944  *a01_b = *buff_1;
1945  }
1946 
1947  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1948  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1951  m_ahead + 1,
1952  n_behind,
1953  buff_m1,
1954  ABL, rs_A, cs_A,
1955  y10t, cs_Y,
1956  buff_1,
1957  a2, rs_A );
1960  m_ahead + 1,
1961  n_behind,
1962  buff_m1,
1963  ZBL, rs_Z, cs_Z,
1964  a01, rs_A,
1965  buff_1,
1966  a2, rs_A );
1967 
1968  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1969  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1972  n_ahead,
1973  n_behind,
1974  buff_m1,
1975  Y20, rs_Y, cs_Y,
1976  a10t, cs_A,
1977  buff_1,
1978  a12t, cs_A );
1981  m_behind,
1982  n_ahead,
1983  buff_m1,
1984  A02, rs_A, cs_A,
1985  z10t, cs_Z,
1986  buff_1,
1987  a12t, cs_A );
1988 
1989  if ( m_behind > 0 )
1990  {
1991  // FLA_Copy( last_elem, a01_b );
1992  *a01_b = last_elem;
1993  }
1994 
1995  // FLA_Househ2_UT( FLA_LEFT,
1996  // alpha11,
1997  // a21, tau11 );
1998  // FLA_Copy( a21, u21p );
1999  FLA_Househ2_UT_l_opz( m_ahead,
2000  alpha11,
2001  a21, rs_A,
2002  tau11 );
2004  m_ahead,
2005  a21, rs_A,
2006  u21p, inc_up );
2007 
2008  if ( n_ahead > 0 )
2009  {
2010  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
2011  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
2012  bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );
2013 
2014  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
2015  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
2017  n_ahead,
2018  a12t, cs_A,
2019  a12p, inc_ap );
2021  n_ahead,
2022  &minus_inv_tau11,
2023  a12t, cs_A,
2024  a12p, inc_ap );
2025 
2026  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
2027  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
2030  m_ahead,
2031  n_behind,
2032  buff_1,
2033  A20, rs_A, cs_A,
2034  u21p, inc_up,
2035  buff_0,
2036  d0, inc_d );
2039  m_ahead,
2040  n_behind,
2041  buff_1,
2042  Z20, rs_Z, cs_Z,
2043  u21p, inc_up,
2044  buff_0,
2045  e0, inc_e );
2046 
2047  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
2048  // FLA_Axpy( FLA_ONE, d0, t01 );
2050  n_behind,
2051  a10t, cs_A,
2052  t01, rs_T );
2054  n_behind,
2055  buff_1,
2056  d0, inc_d,
2057  t01, rs_T );
2058 
2059  // FLA_Set( FLA_ZERO, y21 );
2060  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
2061  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
2062  bl1_zsetv( n_ahead,
2063  buff_0,
2064  y21, rs_Y );
2067  n_ahead,
2068  n_behind,
2069  buff_m1,
2070  Y20, rs_Y, cs_Y,
2071  d0, inc_d,
2072  buff_1,
2073  y21, rs_Y );
2076  m_behind,
2077  n_ahead,
2078  buff_m1,
2079  A02, rs_A, cs_A,
2080  e0, inc_e,
2081  buff_1,
2082  y21, rs_Y );
2083 
2084  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
2087  m_ahead,
2088  n_ahead,
2089  buff_1,
2090  A22, rs_A, cs_A,
2091  u21p, inc_up,
2092  buff_1,
2093  y21, rs_Y );
2094 
2095  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
2097  n_ahead,
2098  &minus_inv_tau11,
2099  y21, rs_Y,
2100  a12p, inc_ap );
2101 
2102  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
2105  m_ahead,
2106  n_ahead,
2107  buff_1,
2108  A22, rs_A, cs_A,
2109  a12p, inc_ap,
2110  buff_0,
2111  w21, inc_w );
2112 
2113  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
2114  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
2117  n_ahead,
2118  n_behind,
2119  buff_1,
2120  Y20, rs_Y, cs_Y,
2121  a12p, inc_ap,
2122  buff_0,
2123  f0, inc_f );
2126  m_behind,
2127  n_ahead,
2128  buff_1,
2129  A02, rs_A, cs_A,
2130  a12p, inc_ap,
2131  buff_0,
2132  g0, inc_g );
2133 
2134  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
2135  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
2138  m_ahead,
2139  n_behind,
2140  buff_m1,
2141  A20, rs_A, cs_A,
2142  f0, inc_f,
2143  buff_1,
2144  w21, inc_w );
2147  m_ahead,
2148  n_behind,
2149  buff_m1,
2150  Z20, rs_Z, cs_Z,
2151  g0, inc_g,
2152  buff_1,
2153  w21, inc_w );
2154 
2155  // FLA_Copy( A22_l, a22l );
2156  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
2157  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
2159  m_ahead,
2160  A22_l, rs_A,
2161  a22l, inc_al );
2164  m_ahead,
2165  n_behind,
2166  buff_m1,
2167  A20, rs_A, cs_A,
2168  Y20_t, cs_Y,
2169  buff_1,
2170  a22l, inc_al );
2173  m_ahead,
2174  n_behind,
2175  buff_m1,
2176  Z20, rs_Z, cs_Z,
2177  A02_l, rs_A,
2178  buff_1,
2179  a22l, inc_al );
2180 
2181  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
2183  n_ahead,
2184  buff_1,
2185  a12t, cs_A,
2186  y21, rs_Y );
2187 
2188  // FLA_Househ2s_UT( FLA_RIGHT,
2189  // a12p_t,
2190  // a12p_b,
2191  // alpha12, psi11_minus_alpha12, sigma11 );
2192  FLA_Househ2s_UT_r_opz( n_ahead - 1,
2193  a12p_t,
2194  a12p_b, inc_ap,
2195  &alpha12,
2196  &psi11_minus_alpha12,
2197  sigma11 );
2198 
2199  // FLA_Copy( a12p, v21 );
2200  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
2201  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
2202  // FLA_Conjugate( v21_b );
2204  n_ahead,
2205  a12p, inc_ap,
2206  v21, inc_v );
2207  bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
2209  n_ahead,
2210  &psi11_minus_alpha12,
2211  v21, inc_v );
2212  bl1_zconjv( n_ahead - 1,
2213  v21_b, inc_v );
2214 
2215  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
2216  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
2217  bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
2218  bl1_zneg1( &minus_conj_alpha12 );
2219 
2220  // FLA_Copy( g0, s01 );
2221  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
2222  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
2224  n_behind,
2225  g0, inc_g,
2226  s01, rs_S );
2228  n_behind,
2229  &minus_conj_alpha12,
2230  A02_l, rs_A,
2231  s01, rs_S );
2233  n_behind,
2234  &psi11_minus_alpha12,
2235  s01, rs_S );
2236 
2237  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
2238  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
2239  *a12t_l = alpha12;
2241  n_ahead - 1,
2242  v21_b, inc_v,
2243  a12t_r, cs_A );
2244  }
2245 
2246  // FLA_Copy( u21p, u21 );
2248  m_ahead,
2249  u21p, inc_up,
2250  u21, inc_u );
2251 
2252  if ( n_ahead > 0 )
2253  {
2254  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
2255  // FLA_Scal( FLA_MINUS_ONE, beta );
2256  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
2258  n_ahead,
2259  y21, rs_Y,
2260  v21, inc_v,
2261  &beta );
2262  bl1_zscals( &minus_inv_tau11, &beta );
2263 
2264  // FLA_Copy( w21, z21 );
2265  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
2266  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
2267  // FLA_Axpy( beta, u21, z21 );
2269  m_ahead,
2270  w21, inc_w,
2271  z21, rs_Z );
2273  m_ahead,
2274  &minus_conj_alpha12,
2275  a22l, inc_al,
2276  z21, rs_Z );
2278  m_ahead,
2279  &psi11_minus_alpha12,
2280  z21, rs_Z );
2282  m_ahead,
2283  &beta,
2284  u21, inc_u,
2285  z21, rs_Z );
2286 
2287  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
2288  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
2290  n_ahead,
2291  tau11,
2292  y21, rs_Y );
2294  m_ahead,
2295  sigma11,
2296  z21, rs_Z );
2297  }
2298  else // if ( n_ahead == 0 )
2299  {
2300  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
2301  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
2303  n_behind,
2304  a10t, cs_A,
2305  t01, rs_T );
2308  m_ahead,
2309  n_behind,
2310  buff_1,
2311  A20, rs_A, cs_A,
2312  u21, inc_u,
2313  buff_1,
2314  t01, rs_T );
2315  }
2316 
2317  /*------------------------------------------------------------*/
2318 
2319  }
2320 
2321  // FLA_Obj_free( &w );
2322  // FLA_Obj_free( &al );
2323  // FLA_Obj_free( &ap );
2324  // FLA_Obj_free( &u );
2325  // FLA_Obj_free( &up );
2326  // FLA_Obj_free( &v );
2327  // FLA_Obj_free( &d );
2328  // FLA_Obj_free( &e );
2329  // FLA_Obj_free( &f );
2330  // FLA_Obj_free( &g );
2331  FLA_free( buff_w );
2332  FLA_free( buff_al );
2333  FLA_free( buff_ap );
2334  FLA_free( buff_u );
2335  FLA_free( buff_up );
2336  FLA_free( buff_v );
2337  FLA_free( buff_d );
2338  FLA_free( buff_e );
2339  FLA_free( buff_f );
2340  FLA_free( buff_g );
2341 
2342  return FLA_SUCCESS;
2343 }
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition: FLA_Househ2s_UT.c:610
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
int i
Definition: bl1_axmyv2.c:145
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition: bl1_conjv.c:34

◆ FLA_Bidiag_UT_u_step_opz_var5()

FLA_Error FLA_Bidiag_UT_u_step_opz_var5 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

1348 {
1349  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1350  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1351  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1352 
1353  dcomplex beta;
1354  dcomplex last_elem;
1355  int i;
1356 
1357  // b_alg = FLA_Obj_length( T );
1358  int b_alg = m_TS;
1359 
1360  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1361  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1362  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1363  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1364  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1365  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1366  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1367  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1368  dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1369  dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1370  dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1371  dcomplex* buff_g = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1372  int inc_u = 1;
1373  int inc_v = 1;
1374  int inc_d = 1;
1375  int inc_e = 1;
1376  int inc_f = 1;
1377  int inc_g = 1;
1378 
1379  // FLA_Set( FLA_ZERO, Y );
1380  // FLA_Set( FLA_ZERO, Z );
1381  bl1_zsetm( n_A,
1382  b_alg,
1383  buff_0,
1384  buff_Y, rs_Y, cs_Y );
1385  bl1_zsetm( m_A,
1386  b_alg,
1387  buff_0,
1388  buff_Z, rs_Z, cs_Z );
1389 
1390  for ( i = 0; i < b_alg; ++i )
1391  {
1392  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1393  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1394  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1395  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1396  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1397  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1398  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1399  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1400 
1401  dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1402  dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1403  dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1404 
1405  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1406  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1407  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1408 
1409  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1410  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1411 
1412  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1413  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1414 
1415  dcomplex* u21 = buff_u + (i+1)*inc_u;
1416 
1417  dcomplex* v21 = buff_v + (i+1)*inc_v;
1418 
1419  dcomplex* d0 = buff_d + (0 )*inc_d;
1420 
1421  dcomplex* e0 = buff_e + (0 )*inc_e;
1422 
1423  dcomplex* f0 = buff_f + (0 )*inc_f;
1424 
1425  dcomplex* g0 = buff_g + (0 )*inc_g;
1426 
1427  dcomplex* v21_t = v21 + (0 )*inc_v;
1428  dcomplex* v21_b = v21 + (1 )*inc_v;
1429 
1430  dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1431 
1432  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1433  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1434 
1435  dcomplex* ABL = a10t;
1436  dcomplex* ZBL = z10t;
1437 
1438  dcomplex* a2 = alpha11;
1439 
1440  int m_ahead = m_A - i - 1;
1441  int n_ahead = n_A - i - 1;
1442  int m_behind = i;
1443  int n_behind = i;
1444 
1445  /*------------------------------------------------------------*/
1446 
1447  if ( m_behind > 0 )
1448  {
1449  // FLA_Copy( a01_b, last_elem );
1450  // FLA_Set( FLA_ONE, a01_b );
1451  last_elem = *a01_b;
1452  *a01_b = *buff_1;
1453  }
1454 
1455  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1456  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1459  m_ahead + 1,
1460  n_behind,
1461  buff_m1,
1462  ABL, rs_A, cs_A,
1463  y10t, cs_Y,
1464  buff_1,
1465  a2, rs_A );
1468  m_ahead + 1,
1469  n_behind,
1470  buff_m1,
1471  ZBL, rs_Z, cs_Z,
1472  a01, rs_A,
1473  buff_1,
1474  a2, rs_A );
1475 
1476  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1477  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1480  n_ahead,
1481  n_behind,
1482  buff_m1,
1483  Y20, rs_Y, cs_Y,
1484  a10t, cs_A,
1485  buff_1,
1486  a12t, cs_A );
1489  m_behind,
1490  n_ahead,
1491  buff_m1,
1492  A02, rs_A, cs_A,
1493  z10t, cs_Z,
1494  buff_1,
1495  a12t, cs_A );
1496 
1497  if ( m_behind > 0 )
1498  {
1499  // FLA_Copy( last_elem, a01_b );
1500  *a01_b = last_elem;
1501  }
1502 
1503  // FLA_Househ2_UT( FLA_LEFT,
1504  // alpha11,
1505  // a21, tau11 );
1506  // FLA_Copy( a21, u21 );
1507  FLA_Househ2_UT_l_opz( m_ahead,
1508  alpha11,
1509  a21, rs_A,
1510  tau11 );
1512  m_ahead,
1513  a21, rs_A,
1514  u21, inc_u );
1515 
1516  if ( n_ahead > 0 )
1517  {
1518  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
1519  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
1521  n_ahead,
1522  a12t, cs_A,
1523  y21, rs_Y );
1526  m_ahead,
1527  n_ahead,
1528  buff_1,
1529  A22, rs_A, cs_A,
1530  u21, inc_u,
1531  buff_1,
1532  y21, rs_Y );
1533 
1534  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
1535  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
1538  m_ahead,
1539  n_behind,
1540  buff_1,
1541  A20, rs_A, cs_A,
1542  u21, inc_u,
1543  buff_0,
1544  d0, inc_d );
1547  m_ahead,
1548  n_behind,
1549  buff_1,
1550  Z20, rs_Z, cs_Z,
1551  u21, inc_u,
1552  buff_0,
1553  e0, inc_e );
1554 
1555  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1556  // FLA_Axpy( FLA_ONE, d0, t01 );
1558  n_behind,
1559  a10t, cs_A,
1560  t01, rs_T );
1562  n_behind,
1563  buff_1,
1564  d0, inc_d,
1565  t01, rs_T );
1566 
1567  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1568  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1571  n_ahead,
1572  n_behind,
1573  buff_m1,
1574  Y20, rs_Y, cs_Y,
1575  d0, inc_d,
1576  buff_1,
1577  y21, rs_Y );
1580  m_behind,
1581  n_ahead,
1582  buff_m1,
1583  A02, rs_A, cs_A,
1584  e0, inc_e,
1585  buff_1,
1586  y21, rs_Y );
1587 
1588  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1590  n_ahead,
1591  tau11,
1592  y21, rs_Y );
1593 
1594  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
1596  n_ahead,
1597  buff_m1,
1598  y21, rs_Y,
1599  a12t, cs_A );
1600 
1601  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
1602  FLA_Househ2_UT_r_opz( n_ahead - 1,
1603  a12t_l,
1604  a12t_r, cs_A,
1605  sigma11 );
1606 
1607  // FLA_Set( FLA_ONE, v21_t );
1608  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
1609  *v21_t = *buff_1;
1611  n_ahead - 1,
1612  a12t_r, cs_A,
1613  v21_b, inc_v );
1614 
1615  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1616  // FLA_Scal( FLA_MINUS_ONE, beta );
1618  n_ahead,
1619  y21, rs_Y,
1620  v21, inc_v,
1621  &beta );
1622  bl1_zscals( buff_m1, &beta );
1623 
1624  // FLA_Copy( u21, z21 );
1625  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
1627  m_ahead,
1628  u21, inc_u,
1629  z21, rs_Z );
1632  m_ahead,
1633  n_ahead,
1634  buff_1,
1635  A22, rs_A, cs_A,
1636  v21, inc_v,
1637  &beta,
1638  z21, rs_Z );
1639 
1640  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
1641  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
1644  n_ahead,
1645  m_behind,
1646  buff_1,
1647  Y20, rs_Y, cs_Y,
1648  v21, inc_v,
1649  buff_0,
1650  f0, inc_f );
1653  m_behind,
1654  n_ahead,
1655  buff_1,
1656  A02, rs_A, cs_A,
1657  v21, inc_v,
1658  buff_0,
1659  g0, inc_g );
1660 
1661  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
1662  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
1665  m_ahead,
1666  n_behind,
1667  buff_m1,
1668  A20, rs_A, cs_A,
1669  f0, inc_f,
1670  buff_1,
1671  z21, rs_Z );
1674  m_ahead,
1675  n_behind,
1676  buff_m1,
1677  Z20, rs_Z, cs_Z,
1678  g0, inc_g,
1679  buff_1,
1680  z21, rs_Z );
1681 
1682  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1684  m_ahead,
1685  sigma11,
1686  z21, rs_Z );
1687 
1688  // FLA_Copy( g0, s01 );
1690  n_behind,
1691  g0, inc_g,
1692  s01, rs_S );
1693  }
1694  else // if ( n_ahead == 0 )
1695  {
1696  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1697  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1699  n_behind,
1700  a10t, cs_A,
1701  t01, rs_T );
1704  m_ahead,
1705  n_behind,
1706  buff_1,
1707  A20, rs_A, cs_A,
1708  u21, inc_u,
1709  buff_1,
1710  t01, rs_T );
1711  }
1712 
1713  /*------------------------------------------------------------*/
1714 
1715  }
1716 
1717  // FLA_Obj_free( &u );
1718  // FLA_Obj_free( &v );
1719  // FLA_Obj_free( &d );
1720  // FLA_Obj_free( &e );
1721  // FLA_Obj_free( &f );
1722  // FLA_Obj_free( &g );
1723  FLA_free( buff_u );
1724  FLA_free( buff_v );
1725  FLA_free( buff_d );
1726  FLA_free( buff_e );
1727  FLA_free( buff_f );
1728  FLA_free( buff_g );
1729 
1730  return FLA_SUCCESS;
1731 }
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:57
Definition: blis_type_defs.h:81
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
Definition: blis_type_defs.h:55
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:54
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
int i
Definition: bl1_axmyv2.c:145
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:693
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60

◆ FLA_Bidiag_UT_u_step_unb_var1()

FLA_Error FLA_Bidiag_UT_u_step_unb_var1 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Apply_H2_UT(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Gemv(), FLA_Househ2_UT(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_unb_var1().

19 {
20  FLA_Obj ATL, ATR, A00, a01, A02,
21  ABL, ABR, a10t, alpha11, a12t,
22  A20, a21, A22;
23  FLA_Obj TTL, TTR, T00, t01, T02,
24  TBL, TBR, t10t, tau11, t12t,
25  T20, t21, T22;
26  FLA_Obj STL, STR, S00, s01, S02,
27  SBL, SBR, s10t, sigma11, s12t,
28  S20, s21, S22;
29  FLA_Obj vT, v01,
30  vB, nu11,
31  v21;
32  FLA_Obj v;
33 
34  FLA_Obj a12t_l, a12t_r;
35  FLA_Obj A22_l, A22_r;
36  FLA_Obj v21_t,
37  v21_b;
38 
39  FLA_Datatype datatype_A;
40  dim_t n_A;
41  dim_t b_alg;
42 
43 
44  b_alg = FLA_Obj_length( T );
45 
46  datatype_A = FLA_Obj_datatype( A );
47  n_A = FLA_Obj_width( A );
48 
49  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
50 
51  FLA_Part_2x2( A, &ATL, &ATR,
52  &ABL, &ABR, 0, 0, FLA_TL );
53  FLA_Part_2x2( T, &TTL, &TTR,
54  &TBL, &TBR, 0, 0, FLA_TL );
55  FLA_Part_2x2( S, &STL, &STR,
56  &SBL, &SBR, 0, 0, FLA_TL );
57  FLA_Part_2x1( v, &vT,
58  &vB, 0, FLA_TOP );
59 
60  while ( FLA_Obj_length( ATL ) < b_alg )
61  {
62  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
63  /* ************* */ /* ************************** */
64  &a10t, /**/ &alpha11, &a12t,
65  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
66  1, 1, FLA_BR );
67  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
68  /* ************* */ /* ************************** */
69  &t10t, /**/ &tau11, &t12t,
70  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
71  1, 1, FLA_BR );
72  FLA_Repart_2x2_to_3x3( STL, /**/ STR, &S00, /**/ &s01, &S02,
73  /* ************* */ /* ************************** */
74  &s10t, /**/ &sigma11, &s12t,
75  SBL, /**/ SBR, &S20, /**/ &s21, &S22,
76  1, 1, FLA_BR );
77  FLA_Repart_2x1_to_3x1( vT, &v01,
78  /* ** */ /* ***** */
79  &nu11,
80  vB, &v21, 1, FLA_BOTTOM );
81 
82  /*------------------------------------------------------------*/
83 
84  // [ alpha11_new, u21, tau11 ] = House2( alpha11, a21 );
85  FLA_Househ2_UT( FLA_LEFT,
86  alpha11,
87  a21, tau11 );
88 
89  if ( FLA_Obj_width( A22 ) > 0 )
90  {
91  FLA_Part_1x2( a12t, &a12t_l, &a12t_r, 1, FLA_LEFT );
92  FLA_Part_1x2( A22, &A22_l, &A22_r, 1, FLA_LEFT );
93  FLA_Part_2x1( v21, &v21_t,
94  &v21_b, 1, FLA_TOP );
95 
96  // Apply H from the left to a12t and A22.
97  FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
98 
99  // [ alpha12t, u12t_r, tau11 ] = House2( a12t_l, a12t_r );
100  FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
101 
102  // v21_t = 1;
103  // v21_b = a12t_r;
104  FLA_Set( FLA_ONE, v21_t );
105  FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
106 
107  // Apply H from the right to A22.
108  FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
109 
110  // s01 = conj(V02) * v21;
111  FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
112  }
113 
114  // t01 = a10t' + U20' * u21;
115  FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
116  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
117 
118  /*------------------------------------------------------------*/
119 
120  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
121  a10t, alpha11, /**/ a12t,
122  /* ************** */ /* ************************ */
123  &ABL, /**/ &ABR, A20, a21, /**/ A22,
124  FLA_TL );
125  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
126  t10t, tau11, /**/ t12t,
127  /* ************** */ /* ************************ */
128  &TBL, /**/ &TBR, T20, t21, /**/ T22,
129  FLA_TL );
130  FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR, S00, s01, /**/ S02,
131  s10t, sigma11, /**/ s12t,
132  /* ************** */ /* ************************ */
133  &SBL, /**/ &SBR, S20, s21, /**/ S22,
134  FLA_TL );
135  FLA_Cont_with_3x1_to_2x1( &vT, v01,
136  nu11,
137  /* ** */ /* ***** */
138  &vB, v21, FLA_TOP );
139  }
140 
141  FLA_Obj_free( &v );
142 
143  return FLA_SUCCESS;
144 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Apply_H2_UT(FLA_Side side, FLA_Obj tau, FLA_Obj u2, FLA_Obj a1, FLA_Obj A2)
Definition: FLA_Apply_H2_UT.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv.c:15
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Bidiag_UT_u_step_unb_var2()

FLA_Error FLA_Bidiag_UT_u_step_unb_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_unb_var2().

19 {
20  FLA_Obj ATL, ATR, A00, a01, A02,
21  ABL, ABR, a10t, alpha11, a12t,
22  A20, a21, A22;
23  FLA_Obj TTL, TTR, T00, t01, T02,
24  TBL, TBR, t10t, tau11, t12t,
25  T20, t21, T22;
26  FLA_Obj STL, STR, S00, s01, S02,
27  SBL, SBR, s10t, sigma11, s12t,
28  S20, s21, S22;
29  FLA_Obj yT, y01,
30  yB, psi11,
31  y21;
32  FLA_Obj zT, z01,
33  zB, zeta11,
34  z21;
35  FLA_Obj vT, v01,
36  vB, nu11,
37  v21;
38  FLA_Obj v, y, z;
39 
40  FLA_Obj beta;
41 
42  FLA_Obj a12t_l, a12t_r;
43  FLA_Obj v21_t,
44  v21_b;
45 
46  FLA_Datatype datatype_A;
47  dim_t m_A, n_A;
48  dim_t b_alg;
49 
50 
51  b_alg = FLA_Obj_length( T );
52 
53  datatype_A = FLA_Obj_datatype( A );
54  m_A = FLA_Obj_length( A );
55  n_A = FLA_Obj_width( A );
56 
57  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
58  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
59  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
60  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
61 
62  FLA_Part_2x2( A, &ATL, &ATR,
63  &ABL, &ABR, 0, 0, FLA_TL );
64  FLA_Part_2x2( T, &TTL, &TTR,
65  &TBL, &TBR, 0, 0, FLA_TL );
66  FLA_Part_2x2( S, &STL, &STR,
67  &SBL, &SBR, 0, 0, FLA_TL );
68  FLA_Part_2x1( v, &vT,
69  &vB, 0, FLA_TOP );
70  FLA_Part_2x1( y, &yT,
71  &yB, 0, FLA_TOP );
72  FLA_Part_2x1( z, &zT,
73  &zB, 0, FLA_TOP );
74 
75  while ( FLA_Obj_length( ATL ) < b_alg )
76  {
77  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
78  /* ************* */ /* ************************** */
79  &a10t, /**/ &alpha11, &a12t,
80  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
81  1, 1, FLA_BR );
82  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
83  /* ************* */ /* ************************** */
84  &t10t, /**/ &tau11, &t12t,
85  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
86  1, 1, FLA_BR );
87  FLA_Repart_2x2_to_3x3( STL, /**/ STR, &S00, /**/ &s01, &S02,
88  /* ************* */ /* ************************** */
89  &s10t, /**/ &sigma11, &s12t,
90  SBL, /**/ SBR, &S20, /**/ &s21, &S22,
91  1, 1, FLA_BR );
92  FLA_Repart_2x1_to_3x1( vT, &v01,
93  /* ** */ /* ***** */
94  &nu11,
95  vB, &v21, 1, FLA_BOTTOM );
96  FLA_Repart_2x1_to_3x1( yT, &y01,
97  /* ** */ /* ***** */
98  &psi11,
99  yB, &y21, 1, FLA_BOTTOM );
100  FLA_Repart_2x1_to_3x1( zT, &z01,
101  /* ** */ /* ***** */
102  &zeta11,
103  zB, &z21, 1, FLA_BOTTOM );
104 
105  /*------------------------------------------------------------*/
106 
107  // [ alpha11_new, u21, tau11 ] = House2( alpha11, a21 );
108  FLA_Househ2_UT( FLA_LEFT,
109  alpha11,
110  a21, tau11 );
111 
112  if ( FLA_Obj_width( A22 ) > 0 )
113  {
114  // y21' = a12t + u21' * A22;
115  // y21 = conj(a12t) + A22' * u21;
116  FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
117  FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
118 
119  // y21 = y21 / tau11;
120  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
121 
122  // a12t = a12t - conj(y21)^T;
123  FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
124 
125  FLA_Part_1x2( a12t, &a12t_l, &a12t_r, 1, FLA_LEFT );
126  FLA_Part_2x1( v21, &v21_t,
127  &v21_b, 1, FLA_TOP );
128 
129  // [ a12t_l, v12t_b, sigma11 ] = House2( a12t_l, a12t_r );
130  FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
131 
132  // v21_t = 1;
133  // v21_b = a12t_r^T;
134  FLA_Set( FLA_ONE, v21_t );
135  FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
136 
137  // beta = - y21' * v21;
138  FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
139  FLA_Scal( FLA_MINUS_ONE, beta );
140 
141  // z21 = ( A22 - u21 * y21' ) * v21 / sigma11;
142  // = ( A22 * v21 - u21 * y21' * v21 ) / sigma11;
143  // = ( A22 * v21 + beta * u21 ) / sigma11;
144  FLA_Copy( a21, z21 );
145  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
146  FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
147 
148  // A22 = A22 - u21 * y21' - z21 * v21';
149  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
150  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
151 
152  // s01 = conj(V02) * v21;
153  FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
154  }
155 
156  // t01 = a10t' + U20' * u21;
157  FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
158  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
159 
160  /*------------------------------------------------------------*/
161 
162  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
163  a10t, alpha11, /**/ a12t,
164  /* ************** */ /* ************************ */
165  &ABL, /**/ &ABR, A20, a21, /**/ A22,
166  FLA_TL );
167  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
168  t10t, tau11, /**/ t12t,
169  /* ************** */ /* ************************ */
170  &TBL, /**/ &TBR, T20, t21, /**/ T22,
171  FLA_TL );
172  FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR, S00, s01, /**/ S02,
173  s10t, sigma11, /**/ s12t,
174  /* ************** */ /* ************************ */
175  &SBL, /**/ &SBR, S20, s21, /**/ S22,
176  FLA_TL );
177  FLA_Cont_with_3x1_to_2x1( &vT, v01,
178  nu11,
179  /* ** */ /* ***** */
180  &vB, v21, FLA_TOP );
181  FLA_Cont_with_3x1_to_2x1( &yT, y01,
182  psi11,
183  /* ** */ /* ***** */
184  &yB, y21, FLA_TOP );
185  FLA_Cont_with_3x1_to_2x1( &zT, z01,
186  zeta11,
187  /* ** */ /* ***** */
188  &zB, z21, FLA_TOP );
189  }
190 
191  FLA_Obj_free( &beta );
192  FLA_Obj_free( &v );
193  FLA_Obj_free( &y );
194  FLA_Obj_free( &z );
195 
196  return FLA_SUCCESS;
197 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Gemvc(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemvc.c:13
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpyt.c:15
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scalc.c:13
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Gerc(FLA_Conj conjx, FLA_Conj conjy, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition: FLA_Gerc.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv.c:15
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scal.c:15
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dotc.c:13

◆ FLA_Bidiag_UT_u_step_unb_var3()

FLA_Error FLA_Bidiag_UT_u_step_unb_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Axpy(), FLA_Axpyt(), FLA_Conjugate(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Househ2s_UT(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Mult_add(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_unb_var3().

19 {
20  FLA_Obj ATL, ATR, A00, a01, A02,
21  ABL, ABR, a10t, alpha11, a12t,
22  A20, a21, A22;
23  FLA_Obj TTL, TTR, T00, t01, T02,
24  TBL, TBR, t10t, tau11, t12t,
25  T20, t21, T22;
26  FLA_Obj STL, STR, S00, s01, S02,
27  SBL, SBR, s10t, sigma11, s12t,
28  S20, s21, S22;
29  FLA_Obj wT, w01,
30  wB, omega11,
31  w21;
32  FLA_Obj apT, a01p,
33  apB, alpha11p,
34  a12p;
35  FLA_Obj uT, u01,
36  uB, upsilon11,
37  u21;
38  FLA_Obj uTp, u01p,
39  uBp, upsilon11p,
40  u21p;
41  FLA_Obj vT, v01,
42  vB, nu11,
43  v21;
44  FLA_Obj yT, y01,
45  yB, psi11,
46  y21;
47  FLA_Obj zT, z01,
48  zB, zeta11,
49  z21;
50  FLA_Obj w, ap, u, up, v, y, z;
51 
52  FLA_Obj minus_inv_tau11;
53  FLA_Obj beta;
54  FLA_Obj alpha12;
55  FLA_Obj minus_conj_alpha12;
56  FLA_Obj psi11_minus_alpha12;
57  FLA_Obj minus_upsilon11;
58  FLA_Obj minus_conj_nu11;
59  FLA_Obj minus_conj_psi11;
60  FLA_Obj minus_zeta11;
61 
62  FLA_Obj a12t_l, a12t_r;
63  FLA_Obj a12p_t,
64  a12p_b;
65  FLA_Obj A22_l, A22_r;
66  FLA_Obj v21_t,
67  v21_b;
68 
69  FLA_Datatype datatype_A;
70  dim_t m_A, n_A;
71  dim_t b_alg;
72 
73 
74  b_alg = FLA_Obj_length( T );
75 
76  datatype_A = FLA_Obj_datatype( A );
77  m_A = FLA_Obj_length( A );
78  n_A = FLA_Obj_width( A );
79 
80  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 );
81  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
82  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &alpha12 );
83  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_alpha12 );
84  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &psi11_minus_alpha12 );
85  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_upsilon11 );
86  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_nu11 );
87  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_psi11 );
88  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta11 );
89  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
90  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
91  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
92  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
93  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
94  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
95  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
96 
97  FLA_Part_2x2( A, &ATL, &ATR,
98  &ABL, &ABR, 0, 0, FLA_TL );
99  FLA_Part_2x2( T, &TTL, &TTR,
100  &TBL, &TBR, 0, 0, FLA_TL );
101  FLA_Part_2x2( S, &STL, &STR,
102  &SBL, &SBR, 0, 0, FLA_TL );
103  FLA_Part_2x1( w, &wT,
104  &wB, 0, FLA_TOP );
105  FLA_Part_2x1( ap, &apT,
106  &apB, 0, FLA_TOP );
107  FLA_Part_2x1( u, &uT,
108  &uB, 0, FLA_TOP );
109  FLA_Part_2x1( up, &uTp,
110  &uBp, 0, FLA_TOP );
111  FLA_Part_2x1( v, &vT,
112  &vB, 0, FLA_TOP );
113  FLA_Part_2x1( y, &yT,
114  &yB, 0, FLA_TOP );
115  FLA_Part_2x1( z, &zT,
116  &zB, 0, FLA_TOP );
117 
118  while ( FLA_Obj_length( ATL ) < b_alg )
119  {
120  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
121  /* ************* */ /* ************************** */
122  &a10t, /**/ &alpha11, &a12t,
123  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
124  1, 1, FLA_BR );
125  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
126  /* ************* */ /* ************************** */
127  &t10t, /**/ &tau11, &t12t,
128  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
129  1, 1, FLA_BR );
130  FLA_Repart_2x2_to_3x3( STL, /**/ STR, &S00, /**/ &s01, &S02,
131  /* ************* */ /* ************************** */
132  &s10t, /**/ &sigma11, &s12t,
133  SBL, /**/ SBR, &S20, /**/ &s21, &S22,
134  1, 1, FLA_BR );
135  FLA_Repart_2x1_to_3x1( wT, &w01,
136  /* ** */ /* ***** */
137  &omega11,
138  wB, &w21, 1, FLA_BOTTOM );
139  FLA_Repart_2x1_to_3x1( apT, &a01p,
140  /* ** */ /* ***** */
141  &alpha11p,
142  apB, &a12p, 1, FLA_BOTTOM );
143  FLA_Repart_2x1_to_3x1( uT, &u01,
144  /* ** */ /* ***** */
145  &upsilon11,
146  uB, &u21, 1, FLA_BOTTOM );
147  FLA_Repart_2x1_to_3x1( uTp, &u01p,
148  /* ** */ /* ***** */
149  &upsilon11p,
150  uBp, &u21p, 1, FLA_BOTTOM );
151  FLA_Repart_2x1_to_3x1( vT, &v01,
152  /* ** */ /* ***** */
153  &nu11,
154  vB, &v21, 1, FLA_BOTTOM );
155  FLA_Repart_2x1_to_3x1( yT, &y01,
156  /* ** */ /* ***** */
157  &psi11,
158  yB, &y21, 1, FLA_BOTTOM );
159  FLA_Repart_2x1_to_3x1( zT, &z01,
160  /* ** */ /* ***** */
161  &zeta11,
162  zB, &z21, 1, FLA_BOTTOM );
163 
164  /*------------------------------------------------------------*/
165 
166  if ( FLA_Obj_length( ATL ) > 0 )
167  {
168  FLA_Copy( upsilon11, minus_upsilon11 );
169  FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
170 
171  FLA_Copy( zeta11, minus_zeta11 );
172  FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
173 
174  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
175  FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
176 
177  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
178  FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
179 
180  // alpha11 = alpha11 - upsilon11 * conj(psi11) - zeta11 * conj(nu1);
181  FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
182  FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
183 
184  // a21 = a21 - u21 * conj(psi11) - z21 * conj(nu11);
185  FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
186  FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
187 
188  // a12t = a12t - upsilon11 * y21' - zeta11 * v21';
189  FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
190  FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
191  }
192 
193  // [ alpha11, u21p, tau11 ] = House2( alpha11, a21 );
194  FLA_Househ2_UT( FLA_LEFT,
195  alpha11,
196  a21, tau11 );
197  FLA_Copy( a21, u21p );
198 
199  if ( FLA_Obj_width( A22 ) > 0 )
200  {
201  // minus_inv_tau11 = - 1 / tau11;
202  FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
203  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
204 
205  // a12p = ( tau11 - 1 ) * a12t^T / tau11;
206  // = a12t^T - ( 1 / tau11 ) * a12t^T;
207  FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
208  FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
209  }
210 
211  if ( FLA_Obj_length( ATL ) > 0 )
212  {
213  // A22 = A22 - u21 * y21' - z21 * v21';
214  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
215  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
216  }
217 
218  if ( FLA_Obj_width( A22 ) > 0 )
219  {
220  // y21 = A22' * u21p;
221  FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
222 
223  // a12p = a12p - conj(y21) / tau11;
224  FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
225 
226  // w21 = A22 * conj(a12p);
227  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
228 
229  // y21 = y21 + conj(a12t)^T;
230  FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
231 
232  FLA_Part_1x2( a12t, &a12t_l, &a12t_r, 1, FLA_LEFT );
233  FLA_Part_2x1( v21, &v21_t,
234  &v21_b, 1, FLA_TOP );
235  FLA_Part_2x1( a12p, &a12p_t,
236  &a12p_b, 1, FLA_TOP );
237 
238  // [ alpha12, psi11_minus_alpha12, sigma11 ] = House2s( a12p_t, a12p_b );
239  FLA_Househ2s_UT( FLA_RIGHT,
240  a12p_t,
241  a12p_b,
242  alpha12, psi11_minus_alpha12, sigma11 );
243 
244  // v21 = conj( ( a12p - alpha12 * e0 ) / ( psi11 - alpha12 ) );
245  FLA_Copy( a12p, v21 );
246  FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
247  FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
248  FLA_Conjugate( v21_b );
249 
250  // a12t_l = alpha12;
251  // a12t_r = v21_b^T;
252  FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
253  FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
254  }
255 
256  // u21 = u21p;
257  FLA_Copy( u21p, u21 );
258 
259  if ( FLA_Obj_width( A22 ) > 0 )
260  {
261  // beta = - y21' * v21 / tau11;
262  FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
263  FLA_Scal( FLA_MINUS_ONE, beta );
264  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
265 
266  FLA_Part_1x2( A22, &A22_l, &A22_r, 1, FLA_LEFT );
267 
268  // minus_conj_alpha12 = - conj(alpha12);
269  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
270  FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
271 
272  // z21 = ( w21 - conj(alpha12) * A22 * e0 ) / conj(psi11 - alpha12) + beta * u21;
273  FLA_Copy( w21, z21 );
274  FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
275  FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
276  FLA_Axpy( beta, u21, z21 );
277 
278  // y21 = y21 / tau11;
279  // z21 = z21 / sigma11;
280  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
281  FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
282 
283  // s01 = conj(V02) * v21;
284  FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
285  }
286 
287  // t01 = a10t' + U20' * u21;
288  FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
289  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
290 
291  // Update A22 if this is the last iteration; this is needed when we're
292  // being called from the blocked routine so A22 is left in a valid state.
293  if ( FLA_Obj_length( ATL ) + 1 == b_alg &&
294  FLA_Obj_width( A22 ) > 0 )
295  {
296  // A22 = A22 - u21 * y21' - z21 * v21';
297  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
298  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
299  }
300 
301  /*------------------------------------------------------------*/
302 
303  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
304  a10t, alpha11, /**/ a12t,
305  /* ************** */ /* ************************ */
306  &ABL, /**/ &ABR, A20, a21, /**/ A22,
307  FLA_TL );
308  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
309  t10t, tau11, /**/ t12t,
310  /* ************** */ /* ************************ */
311  &TBL, /**/ &TBR, T20, t21, /**/ T22,
312  FLA_TL );
313  FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR, S00, s01, /**/ S02,
314  s10t, sigma11, /**/ s12t,
315  /* ************** */ /* ************************ */
316  &SBL, /**/ &SBR, S20, s21, /**/ S22,
317  FLA_TL );
318  FLA_Cont_with_3x1_to_2x1( &wT, w01,
319  omega11,
320  /* ** */ /* ***** */
321  &wB, w21, FLA_TOP );
322  FLA_Cont_with_3x1_to_2x1( &apT, a01p,
323  alpha11p,
324  /* ** */ /* ***** */
325  &apB, a12p, FLA_TOP );
326  FLA_Cont_with_3x1_to_2x1( &uT, u01,
327  upsilon11,
328  /* ** */ /* ***** */
329  &uB, u21, FLA_TOP );
330  FLA_Cont_with_3x1_to_2x1( &uTp, u01p,
331  upsilon11p,
332  /* ** */ /* ***** */
333  &uBp, u21p, FLA_TOP );
334  FLA_Cont_with_3x1_to_2x1( &vT, v01,
335  nu11,
336  /* ** */ /* ***** */
337  &vB, v21, FLA_TOP );
338  FLA_Cont_with_3x1_to_2x1( &yT, y01,
339  psi11,
340  /* ** */ /* ***** */
341  &yB, y21, FLA_TOP );
342  FLA_Cont_with_3x1_to_2x1( &zT, z01,
343  zeta11,
344  /* ** */ /* ***** */
345  &zB, z21, FLA_TOP );
346  }
347 
348  FLA_Obj_free( &minus_inv_tau11 );
349  FLA_Obj_free( &beta );
350  FLA_Obj_free( &alpha12 );
351  FLA_Obj_free( &minus_conj_alpha12 );
352  FLA_Obj_free( &psi11_minus_alpha12 );
353  FLA_Obj_free( &minus_upsilon11 );
354  FLA_Obj_free( &minus_conj_nu11 );
355  FLA_Obj_free( &minus_conj_psi11 );
356  FLA_Obj_free( &minus_zeta11 );
357  FLA_Obj_free( &w );
358  FLA_Obj_free( &ap );
359  FLA_Obj_free( &u );
360  FLA_Obj_free( &up );
361  FLA_Obj_free( &v );
362  FLA_Obj_free( &y );
363  FLA_Obj_free( &z );
364 
365  return FLA_SUCCESS;
366 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Gemvc(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemvc.c:13
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpyt.c:15
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scalc.c:13
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Gerc(FLA_Conj conjx, FLA_Conj conjy, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition: FLA_Gerc.c:13
FLA_Error FLA_Conjugate(FLA_Obj A)
Definition: FLA_Conjugate.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv.c:15
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scal.c:15
FLA_Error FLA_Axpy(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpy.c:15
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Mult_add(FLA_Obj alpha, FLA_Obj beta, FLA_Obj gamma)
Definition: FLA_Mult_add.c:13
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Househ2s_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj alpha, FLA_Obj chi_1_minus_alpha, FLA_Obj tau)
Definition: FLA_Househ2s_UT.c:16
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dotc.c:13

◆ FLA_Bidiag_UT_u_step_unb_var4()

FLA_Error FLA_Bidiag_UT_u_step_unb_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Axpy(), FLA_Axpyt(), FLA_Conjugate(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Househ2_UT(), FLA_Househ2s_UT(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Mult_add(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_unb_var4().

36 {
37  FLA_Obj ATL, ATR, A00, a01, A02,
38  ABL, ABR, a10t, alpha11, a12t,
39  A20, a21, A22;
40  FLA_Obj YTL, YTR, Y00, y01, Y02,
41  YBL, YBR, y10t, psi11, y12t,
42  Y20, y21, Y22;
43  FLA_Obj ZTL, ZTR, Z00, z01, Z02,
44  ZBL, ZBR, z10t, zeta11, z12t,
45  Z20, z21, Z22;
46  FLA_Obj TTL, TTR, T00, t01, T02,
47  TBL, TBR, t10t, tau11, t12t,
48  T20, t21, T22;
49  FLA_Obj STL, STR, S00, s01, S02,
50  SBL, SBR, s10t, sigma11, s12t,
51  S20, s21, S22;
52  FLA_Obj wT, w01,
53  wB, omega11,
54  w21;
55  FLA_Obj alT, a01l,
56  alB, alpha11l,
57  a22l;
58  FLA_Obj apT, a01p,
59  apB, alpha11p,
60  a12p;
61  FLA_Obj uT, u01,
62  uB, upsilon11,
63  u21;
64  FLA_Obj uTp, u01p,
65  uBp, upsilon11p,
66  u21p;
67  FLA_Obj vT, v01,
68  vB, nu11,
69  v21;
70  FLA_Obj dT, d0,
71  dB, delta1,
72  d2;
73  FLA_Obj eT, e0,
74  eB, epsilon1,
75  e2;
76  FLA_Obj fT, f0,
77  fB, phi1,
78  f2;
79  FLA_Obj gT, g0,
80  gB, ghi1,
81  g2;
82  FLA_Obj w, al, ap, u, up, v;
83  FLA_Obj d, e, f, g;
84 
85  FLA_Obj minus_inv_tau11;
86  FLA_Obj last_elem;
87  FLA_Obj beta;
88  FLA_Obj alpha12;
89  FLA_Obj minus_alpha12;
90  FLA_Obj minus_conj_alpha12;
91  FLA_Obj psi11_minus_alpha12;
92  FLA_Obj minus_upsilon11;
93  FLA_Obj minus_conj_nu11;
94  FLA_Obj minus_conj_psi11;
95  FLA_Obj minus_zeta11;
96 
97  FLA_Obj a01_t,
98  a01_b;
99  FLA_Obj A02_l, A02_r;
100  FLA_Obj a12t_l, a12t_r;
101  FLA_Obj a12p_t,
102  a12p_b;
103  FLA_Obj A22_l, A22_r;
104  FLA_Obj v21_t,
105  v21_b;
106  FLA_Obj Y20_t,
107  Y20_b;
108  FLA_Obj a2;
109 
110  FLA_Datatype datatype_A;
111  dim_t m_A, n_A;
112  dim_t b_alg;
113 
114 
115  b_alg = FLA_Obj_length( T );
116 
117  datatype_A = FLA_Obj_datatype( A );
118  m_A = FLA_Obj_length( A );
119  n_A = FLA_Obj_width( A );
120 
121  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 );
122  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &last_elem );
123  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
124  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &alpha12 );
125  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_alpha12 );
126  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_alpha12 );
127  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &psi11_minus_alpha12 );
128  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_upsilon11 );
129  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_nu11 );
130  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_psi11 );
131  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta11 );
132  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
133  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
134  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
135  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
136  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
137  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
138  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
139  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
140  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
141  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
142 
143  FLA_Set( FLA_ZERO, Y );
144  FLA_Set( FLA_ZERO, Z );
145 
146  FLA_Part_2x2( A, &ATL, &ATR,
147  &ABL, &ABR, 0, 0, FLA_TL );
148  FLA_Part_2x2( Y, &YTL, &YTR,
149  &YBL, &YBR, 0, 0, FLA_TL );
150  FLA_Part_2x2( Z, &ZTL, &ZTR,
151  &ZBL, &ZBR, 0, 0, FLA_TL );
152  FLA_Part_2x2( T, &TTL, &TTR,
153  &TBL, &TBR, 0, 0, FLA_TL );
154  FLA_Part_2x2( S, &STL, &STR,
155  &SBL, &SBR, 0, 0, FLA_TL );
156  FLA_Part_2x1( w, &wT,
157  &wB, 0, FLA_TOP );
158  FLA_Part_2x1( al, &alT,
159  &alB, 0, FLA_TOP );
160  FLA_Part_2x1( ap, &apT,
161  &apB, 0, FLA_TOP );
162  FLA_Part_2x1( u, &uT,
163  &uB, 0, FLA_TOP );
164  FLA_Part_2x1( up, &uTp,
165  &uBp, 0, FLA_TOP );
166  FLA_Part_2x1( v, &vT,
167  &vB, 0, FLA_TOP );
168  FLA_Part_2x1( d, &dT,
169  &dB, 0, FLA_TOP );
170  FLA_Part_2x1( e, &eT,
171  &eB, 0, FLA_TOP );
172  FLA_Part_2x1( f, &fT,
173  &fB, 0, FLA_TOP );
174  FLA_Part_2x1( g, &gT,
175  &gB, 0, FLA_TOP );
176 
177  while ( FLA_Obj_length( ATL ) < b_alg )
178  {
179  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
180  /* ************* */ /* ************************** */
181  &a10t, /**/ &alpha11, &a12t,
182  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
183  1, 1, FLA_BR );
184  FLA_Repart_2x2_to_3x3( YTL, /**/ YTR, &Y00, /**/ &y01, &Y02,
185  /* ************* */ /* ************************ */
186  &y10t, /**/ &psi11, &y12t,
187  YBL, /**/ YBR, &Y20, /**/ &y21, &Y22,
188  1, 1, FLA_BR );
189  FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z01, &Z02,
190  /* ************* */ /* ************************* */
191  &z10t, /**/ &zeta11, &z12t,
192  ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
193  1, 1, FLA_BR );
194  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
195  /* ************* */ /* ************************** */
196  &t10t, /**/ &tau11, &t12t,
197  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
198  1, 1, FLA_BR );
199  FLA_Repart_2x2_to_3x3( STL, /**/ STR, &S00, /**/ &s01, &S02,
200  /* ************* */ /* ************************** */
201  &s10t, /**/ &sigma11, &s12t,
202  SBL, /**/ SBR, &S20, /**/ &s21, &S22,
203  1, 1, FLA_BR );
204  FLA_Repart_2x1_to_3x1( wT, &w01,
205  /* ** */ /* ***** */
206  &omega11,
207  wB, &w21, 1, FLA_BOTTOM );
208  FLA_Repart_2x1_to_3x1( alT, &a01l,
209  /* ** */ /* ***** */
210  &alpha11l,
211  alB, &a22l, 1, FLA_BOTTOM );
212  FLA_Repart_2x1_to_3x1( apT, &a01p,
213  /* ** */ /* ***** */
214  &alpha11p,
215  apB, &a12p, 1, FLA_BOTTOM );
216  FLA_Repart_2x1_to_3x1( uT, &u01,
217  /* ** */ /* ***** */
218  &upsilon11,
219  uB, &u21, 1, FLA_BOTTOM );
220  FLA_Repart_2x1_to_3x1( uTp, &u01p,
221  /* ** */ /* ***** */
222  &upsilon11p,
223  uBp, &u21p, 1, FLA_BOTTOM );
224  FLA_Repart_2x1_to_3x1( vT, &v01,
225  /* ** */ /* ***** */
226  &nu11,
227  vB, &v21, 1, FLA_BOTTOM );
228  FLA_Repart_2x1_to_3x1( dT, &d0,
229  /* ** */ /* ****** */
230  &delta1,
231  dB, &d2, 1, FLA_BOTTOM );
232  FLA_Repart_2x1_to_3x1( eT, &e0,
233  /* ** */ /* ******** */
234  &epsilon1,
235  eB, &e2, 1, FLA_BOTTOM );
236  FLA_Repart_2x1_to_3x1( fT, &f0,
237  /* ** */ /* **** */
238  &phi1,
239  fB, &f2, 1, FLA_BOTTOM );
240  FLA_Repart_2x1_to_3x1( gT, &g0,
241  /* ** */ /* **** */
242  &ghi1,
243  gB, &g2, 1, FLA_BOTTOM );
244 
245  /*------------------------------------------------------------*/
246 
247  // Save last element of a01 and set it to one so we can use a01 as
248  // v10t^T in subsequent computations. We will restore a01_b later on.
249  // Also note: V20^T is stored in A02.
250  if ( FLA_Obj_length( ATL ) > 0 )
251  {
252  FLA_Part_2x1( a01, &a01_t,
253  &a01_b, 1, FLA_BOTTOM );
254  FLA_Copy( a01_b, last_elem );
255  FLA_Set( FLA_ONE, a01_b );
256  }
257 
258  FLA_Merge_2x1( alpha11,
259  a21, &a2 );
260 
261  // alpha11 = alpha11 - u10t * y10t' - z10t * v10t';
262  // a21 = a21 - U20 * y10t' - Z20 * v10t';
263  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
264  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
265 
266  // a12t = a12t - u10t * Y20' - z10t * V20';
267  FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
268  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
269 
270  // Restore last element of a01.
271  if ( FLA_Obj_length( ATL ) > 0 )
272  {
273  FLA_Copy( last_elem, a01_b );
274  }
275 
276  // [ alpha11, u21p, tau11 ] = House2( alpha11, a21 );
277  FLA_Househ2_UT( FLA_LEFT,
278  alpha11,
279  a21, tau11 );
280  FLA_Copy( a21, u21p );
281 
282  if ( FLA_Obj_width( A22 ) > 0 )
283  {
284  // minus_inv_tau11 = - 1 / tau11;
285  FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
286  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
287 
288  // a12p = ( tau11 - 1 ) * a12t^T / tau11;
289  // = a12t^T - ( 1 / tau11 ) * a12t^T;
290  FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
291  FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
292 
293  // y21 = - Y20 * ( U20' * u21p ) - V20 * ( Z20' * u21p );
294  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
295  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
296 
297  FLA_Set( FLA_ZERO, y21 );
298  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
299  FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
300 
301  // t01 = a10t' + U20' * u21;
302  FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
303  FLA_Axpy( FLA_ONE, d0, t01 );
304 
305  // y21 = y21 + A22' * u21p;
306  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
307 
308  // a12p = a12p - conj(y21) / tau11;
309  FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
310 
311  // w21 = A22 * conj(a12p);
312  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
313 
314  // w21 = w21 - U20 * ( Y20' * conj(a12p) ) - Z20 * ( V20' * conj(a12p) );
315  FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
316  FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
317 
318  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
319  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
320 
321  FLA_Part_1x2( A22, &A22_l, &A22_r, 1, FLA_LEFT );
322  FLA_Part_2x1( Y20, &Y20_t,
323  &Y20_b, 1, FLA_TOP );
324  FLA_Part_1x2( A02, &A02_l, &A02_r, 1, FLA_LEFT );
325 
326  // a22l = A22 * e0 - U20 * ( Y20' * e0 ) - Z20 * ( V20' * e0 );
327  FLA_Copy( A22_l, a22l );
328  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
329  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
330 
331  // y21 = y21 + conj(a12t)^T;
332  FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
333 
334  FLA_Part_1x2( a12t, &a12t_l, &a12t_r, 1, FLA_LEFT );
335  FLA_Part_2x1( v21, &v21_t,
336  &v21_b, 1, FLA_TOP );
337  FLA_Part_2x1( a12p, &a12p_t,
338  &a12p_b, 1, FLA_TOP );
339 
340  // [ alpha12, psi11_minus_alpha12, sigma11 ] = House2s( a12p_t, a12p_b );
341  FLA_Househ2s_UT( FLA_RIGHT,
342  a12p_t,
343  a12p_b,
344  alpha12, psi11_minus_alpha12, sigma11 );
345 
346  // v21 = conj( ( a12p - alpha12 * e0 ) / ( psi11 - alpha12 ) );
347  FLA_Copy( a12p, v21 );
348  FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
349  FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
350  FLA_Conjugate( v21_b );
351 
352  // minus_conj_alpha12 = - conj(alpha12);
353  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
354  FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
355 
356  // s01 = V20' * v21;
357  // = conj(V02) * v21;
358  // = conj(V02) * conj( ( a12p - alpha12 * e0 ) / ( psi11 - alpha12 ) );
359  // = conj(V02) * ( conj(a12p) - conj(alpha12) * e0 ) / conj( psi11 - alpha12 ) );
360  // = ( conj(V02) * conj(a12p) - conj(V02) * conj(alpha12) * e0 ) / conj( psi11 - alpha12 );
361  // = ( g0 - conj(V02) * conj(alpha12) * e0 ) / conj( psi11 - alpha12 );
362  FLA_Copy( g0, s01 );
363  FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
364  FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
365 
366  // a12t_l = alpha12;
367  // a12t_r = v21_b^T;
368  FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
369  FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
370  }
371 
372  // u21 = u21p;
373  FLA_Copy( u21p, u21 );
374 
375  if ( FLA_Obj_width( A22 ) > 0 )
376  {
377  // beta = - y21' * v21 / tau11;
378  FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
379  FLA_Scal( FLA_MINUS_ONE, beta );
380  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
381 
382  // z21 = ( w21 - conj(alpha12) * a22l ) / conj(psi11 - alpha12) + beta * u21;
383  FLA_Copy( w21, z21 );
384  FLA_Axpy( minus_conj_alpha12, a22l, z21 );
385  FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
386  FLA_Axpy( beta, u21, z21 );
387 
388  // y21 = y21 / tau11;
389  // z21 = z21 / sigma11;
390  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
391  FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
392  }
393  else // if ( FLA_Obj_width( A22 ) == 0 )
394  {
395  // t01 = a10t' + U20' * u21;
396  FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
397  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
398  }
399 
400  /*------------------------------------------------------------*/
401 
402  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
403  a10t, alpha11, /**/ a12t,
404  /* ************** */ /* ************************ */
405  &ABL, /**/ &ABR, A20, a21, /**/ A22,
406  FLA_TL );
407  FLA_Cont_with_3x3_to_2x2( &YTL, /**/ &YTR, Y00, y01, /**/ Y02,
408  y10t, psi11, /**/ y12t,
409  /* ************** */ /* ********************** */
410  &YBL, /**/ &YBR, Y20, y21, /**/ Y22,
411  FLA_TL );
412  FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z01, /**/ Z02,
413  z10t, zeta11, /**/ z12t,
414  /* ************** */ /* *********************** */
415  &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
416  FLA_TL );
417  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
418  t10t, tau11, /**/ t12t,
419  /* ************** */ /* ************************ */
420  &TBL, /**/ &TBR, T20, t21, /**/ T22,
421  FLA_TL );
422  FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR, S00, s01, /**/ S02,
423  s10t, sigma11, /**/ s12t,
424  /* ************** */ /* ************************ */
425  &SBL, /**/ &SBR, S20, s21, /**/ S22,
426  FLA_TL );
427  FLA_Cont_with_3x1_to_2x1( &wT, w01,
428  omega11,
429  /* ** */ /* ***** */
430  &wB, w21, FLA_TOP );
431  FLA_Cont_with_3x1_to_2x1( &alT, a01l,
432  alpha11l,
433  /* ** */ /* ***** */
434  &alB, a22l, FLA_TOP );
435  FLA_Cont_with_3x1_to_2x1( &apT, a01p,
436  alpha11p,
437  /* ** */ /* ***** */
438  &apB, a12p, FLA_TOP );
439  FLA_Cont_with_3x1_to_2x1( &uT, u01,
440  upsilon11,
441  /* ** */ /* ***** */
442  &uB, u21, FLA_TOP );
443  FLA_Cont_with_3x1_to_2x1( &uTp, u01p,
444  upsilon11p,
445  /* ** */ /* ***** */
446  &uBp, u21p, FLA_TOP );
447  FLA_Cont_with_3x1_to_2x1( &vT, v01,
448  nu11,
449  /* ** */ /* ***** */
450  &vB, v21, FLA_TOP );
451  FLA_Cont_with_3x1_to_2x1( &dT, d0,
452  delta1,
453  /* ** */ /* ****** */
454  &dB, d2, FLA_TOP );
455  FLA_Cont_with_3x1_to_2x1( &eT, e0,
456  epsilon1,
457  /* ** */ /* ******** */
458  &eB, e2, FLA_TOP );
459  FLA_Cont_with_3x1_to_2x1( &fT, f0,
460  phi1,
461  /* ** */ /* **** */
462  &fB, f2, FLA_TOP );
463  FLA_Cont_with_3x1_to_2x1( &gT, g0,
464  ghi1,
465  /* ** */ /* **** */
466  &gB, g2, FLA_TOP );
467  }
468 
469  FLA_Obj_free( &minus_inv_tau11 );
470  FLA_Obj_free( &last_elem );
471  FLA_Obj_free( &beta );
472  FLA_Obj_free( &alpha12 );
473  FLA_Obj_free( &minus_alpha12 );
474  FLA_Obj_free( &minus_conj_alpha12 );
475  FLA_Obj_free( &psi11_minus_alpha12 );
476  FLA_Obj_free( &minus_upsilon11 );
477  FLA_Obj_free( &minus_conj_nu11 );
478  FLA_Obj_free( &minus_conj_psi11 );
479  FLA_Obj_free( &minus_zeta11 );
480  FLA_Obj_free( &w );
481  FLA_Obj_free( &al );
482  FLA_Obj_free( &ap );
483  FLA_Obj_free( &u );
484  FLA_Obj_free( &up );
485  FLA_Obj_free( &v );
486  FLA_Obj_free( &d );
487  FLA_Obj_free( &e );
488  FLA_Obj_free( &f );
489  FLA_Obj_free( &g );
490 
491  return FLA_SUCCESS;
492 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Gemvc(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemvc.c:13
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpyt.c:15
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scalc.c:13
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Conjugate(FLA_Obj A)
Definition: FLA_Conjugate.c:13
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv.c:15
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scal.c:15
FLA_Error FLA_Axpy(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpy.c:15
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Mult_add(FLA_Obj alpha, FLA_Obj beta, FLA_Obj gamma)
Definition: FLA_Mult_add.c:13
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Househ2s_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj alpha, FLA_Obj chi_1_minus_alpha, FLA_Obj tau)
Definition: FLA_Househ2s_UT.c:16
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dotc.c:13

◆ FLA_Bidiag_UT_u_step_unb_var5()

FLA_Error FLA_Bidiag_UT_u_step_unb_var5 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Househ2_UT(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u_unb_var5().

36 {
37  FLA_Obj ATL, ATR, A00, a01, A02,
38  ABL, ABR, a10t, alpha11, a12t,
39  A20, a21, A22;
40  FLA_Obj YTL, YTR, Y00, y01, Y02,
41  YBL, YBR, y10t, psi11, y12t,
42  Y20, y21, Y22;
43  FLA_Obj ZTL, ZTR, Z00, z01, Z02,
44  ZBL, ZBR, z10t, zeta11, z12t,
45  Z20, z21, Z22;
46  FLA_Obj TTL, TTR, T00, t01, T02,
47  TBL, TBR, t10t, tau11, t12t,
48  T20, t21, T22;
49  FLA_Obj STL, STR, S00, s01, S02,
50  SBL, SBR, s10t, sigma11, s12t,
51  S20, s21, S22;
52  FLA_Obj uT, u01,
53  uB, upsilon11,
54  u21;
55  FLA_Obj vT, v01,
56  vB, nu11,
57  v21;
58  FLA_Obj dT, d0,
59  dB, delta1,
60  d2;
61  FLA_Obj eT, e0,
62  eB, epsilon1,
63  e2;
64  FLA_Obj fT, f0,
65  fB, phi1,
66  f2;
67  FLA_Obj gT, g0,
68  gB, ghi1,
69  g2;
70  FLA_Obj u, v;
71  FLA_Obj d, e, f, g;
72 
73  FLA_Obj last_elem;
74  FLA_Obj beta;
75  FLA_Obj minus_upsilon11;
76  FLA_Obj minus_zeta11;
77 
78  FLA_Obj a01_t,
79  a01_b;
80  FLA_Obj a12t_l, a12t_r;
81  FLA_Obj v21_t,
82  v21_b;
83  FLA_Obj a2;
84 
85  FLA_Datatype datatype_A;
86  dim_t m_A, n_A;
87  dim_t b_alg;
88 
89 
90  b_alg = FLA_Obj_length( T );
91 
92  datatype_A = FLA_Obj_datatype( A );
93  m_A = FLA_Obj_length( A );
94  n_A = FLA_Obj_width( A );
95 
96  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &last_elem );
97  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
98  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_upsilon11 );
99  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta11 );
100  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
101  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
102  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
103  FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
104  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
105  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
106 
107  FLA_Set( FLA_ZERO, Y );
108  FLA_Set( FLA_ZERO, Z );
109 
110  FLA_Part_2x2( A, &ATL, &ATR,
111  &ABL, &ABR, 0, 0, FLA_TL );
112  FLA_Part_2x2( Y, &YTL, &YTR,
113  &YBL, &YBR, 0, 0, FLA_TL );
114  FLA_Part_2x2( Z, &ZTL, &ZTR,
115  &ZBL, &ZBR, 0, 0, FLA_TL );
116  FLA_Part_2x2( T, &TTL, &TTR,
117  &TBL, &TBR, 0, 0, FLA_TL );
118  FLA_Part_2x2( S, &STL, &STR,
119  &SBL, &SBR, 0, 0, FLA_TL );
120  FLA_Part_2x1( u, &uT,
121  &uB, 0, FLA_TOP );
122  FLA_Part_2x1( v, &vT,
123  &vB, 0, FLA_TOP );
124  FLA_Part_2x1( d, &dT,
125  &dB, 0, FLA_TOP );
126  FLA_Part_2x1( e, &eT,
127  &eB, 0, FLA_TOP );
128  FLA_Part_2x1( f, &fT,
129  &fB, 0, FLA_TOP );
130  FLA_Part_2x1( g, &gT,
131  &gB, 0, FLA_TOP );
132 
133  while ( FLA_Obj_length( ATL ) < b_alg )
134  {
135  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
136  /* ************* */ /* ************************** */
137  &a10t, /**/ &alpha11, &a12t,
138  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
139  1, 1, FLA_BR );
140  FLA_Repart_2x2_to_3x3( YTL, /**/ YTR, &Y00, /**/ &y01, &Y02,
141  /* ************* */ /* ************************ */
142  &y10t, /**/ &psi11, &y12t,
143  YBL, /**/ YBR, &Y20, /**/ &y21, &Y22,
144  1, 1, FLA_BR );
145  FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z01, &Z02,
146  /* ************* */ /* ************************* */
147  &z10t, /**/ &zeta11, &z12t,
148  ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
149  1, 1, FLA_BR );
150  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
151  /* ************* */ /* ************************** */
152  &t10t, /**/ &tau11, &t12t,
153  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
154  1, 1, FLA_BR );
155  FLA_Repart_2x2_to_3x3( STL, /**/ STR, &S00, /**/ &s01, &S02,
156  /* ************* */ /* ************************** */
157  &s10t, /**/ &sigma11, &s12t,
158  SBL, /**/ SBR, &S20, /**/ &s21, &S22,
159  1, 1, FLA_BR );
160  FLA_Repart_2x1_to_3x1( uT, &u01,
161  /* ** */ /* ***** */
162  &upsilon11,
163  uB, &u21, 1, FLA_BOTTOM );
164  FLA_Repart_2x1_to_3x1( vT, &v01,
165  /* ** */ /* ***** */
166  &nu11,
167  vB, &v21, 1, FLA_BOTTOM );
168  FLA_Repart_2x1_to_3x1( dT, &d0,
169  /* ** */ /* ****** */
170  &delta1,
171  dB, &d2, 1, FLA_BOTTOM );
172  FLA_Repart_2x1_to_3x1( eT, &e0,
173  /* ** */ /* ******** */
174  &epsilon1,
175  eB, &e2, 1, FLA_BOTTOM );
176  FLA_Repart_2x1_to_3x1( fT, &f0,
177  /* ** */ /* **** */
178  &phi1,
179  fB, &f2, 1, FLA_BOTTOM );
180  FLA_Repart_2x1_to_3x1( gT, &g0,
181  /* ** */ /* **** */
182  &ghi1,
183  gB, &g2, 1, FLA_BOTTOM );
184 
185  /*------------------------------------------------------------*/
186 
187  // Save last element of a01 and set it to one so we can use a01 as
188  // v10t^T in subsequent computations. We will restore a01_b later on.
189  // Also note: V20^T is stored in A02.
190  if ( FLA_Obj_length( ATL ) > 0 )
191  {
192  FLA_Part_2x1( a01, &a01_t,
193  &a01_b, 1, FLA_BOTTOM );
194  FLA_Copy( a01_b, last_elem );
195  FLA_Set( FLA_ONE, a01_b );
196  }
197 
198  FLA_Merge_2x1( alpha11,
199  a21, &a2 );
200 
201  // alpha11 = alpha11 - u10t * y10t' - z10t * v10t';
202  // a21 = a21 - U20 * y10t' - Z20 * v10t';
203  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
204  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
205 
206  // a12t = a12t - u10t * Y20' - z10t * V20';
207  FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
208  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
209 
210  // Restore last element of a01.
211  if ( FLA_Obj_length( ATL ) > 0 )
212  {
213  FLA_Copy( last_elem, a01_b );
214  }
215 
216  // [ alpha11, u21, tau11 ] = House2( alpha11, a21 );
217  FLA_Househ2_UT( FLA_LEFT,
218  alpha11,
219  a21, tau11 );
220  FLA_Copy( a21, u21 );
221 
222  if ( FLA_Obj_width( A22 ) > 0 )
223  {
224  // y21' = a12t + u21' * A22;
225  // y21 = conj(a12t) + A22' * u21;
226  FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
227  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
228 
229  // y21 = y21 - Y20 * ( U20' * u21 ) - V20 * ( Z20' * u21 );
230  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
231  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
232 
233  // t01 = a10t' + U20' * u21;
234  FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
235  FLA_Axpy( FLA_ONE, d0, t01 );
236 
237  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
238  FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
239 
240  // y21 = y21 / tau11;
241  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
242 
243  // a12t = a12t - conj(y21)^T;
244  FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
245 
246  FLA_Part_1x2( a12t, &a12t_l, &a12t_r, 1, FLA_LEFT );
247  FLA_Part_2x1( v21, &v21_t,
248  &v21_b, 1, FLA_TOP );
249 
250  // [ a12t_l, v21_b, sigma11 ] = House2( a12t_l, a12t_r );
251  FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
252 
253  // v21_t = 1;
254  // v21_b = a12t_r^T;
255  FLA_Set( FLA_ONE, v21_t );
256  FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
257 
258  // beta = - y21' * v21;
259  FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
260  FLA_Scal( FLA_MINUS_ONE, beta );
261 
262  // z21 = A22 * v21 + beta * u21;
263  FLA_Copy( u21, z21 );
264  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
265 
266  // z21 = z21 - U20 * ( Y20' * v21 ) - Z20 * ( V20' * v21 );
267  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
268  FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
269 
270  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
271  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
272 
273  // z21 = z21 / sigma11;
274  FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
275 
276  // s01 = conj(V02) * v21;
277  FLA_Copy( g0, s01 );
278  }
279  else // if ( FLA_Obj_width( A22 ) == 0 )
280  {
281  // t01 = a10t' + U20' * u21;
282  FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
283  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
284  }
285 
286  /*------------------------------------------------------------*/
287 
288  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
289  a10t, alpha11, /**/ a12t,
290  /* ************** */ /* ************************ */
291  &ABL, /**/ &ABR, A20, a21, /**/ A22,
292  FLA_TL );
293  FLA_Cont_with_3x3_to_2x2( &YTL, /**/ &YTR, Y00, y01, /**/ Y02,
294  y10t, psi11, /**/ y12t,
295  /* ************** */ /* ********************** */
296  &YBL, /**/ &YBR, Y20, y21, /**/ Y22,
297  FLA_TL );
298  FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z01, /**/ Z02,
299  z10t, zeta11, /**/ z12t,
300  /* ************** */ /* *********************** */
301  &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
302  FLA_TL );
303  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
304  t10t, tau11, /**/ t12t,
305  /* ************** */ /* ************************ */
306  &TBL, /**/ &TBR, T20, t21, /**/ T22,
307  FLA_TL );
308  FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR, S00, s01, /**/ S02,
309  s10t, sigma11, /**/ s12t,
310  /* ************** */ /* ************************ */
311  &SBL, /**/ &SBR, S20, s21, /**/ S22,
312  FLA_TL );
313  FLA_Cont_with_3x1_to_2x1( &uT, u01,
314  upsilon11,
315  /* ** */ /* ***** */
316  &uB, u21, FLA_TOP );
317  FLA_Cont_with_3x1_to_2x1( &vT, v01,
318  nu11,
319  /* ** */ /* ***** */
320  &vB, v21, FLA_TOP );
321  FLA_Cont_with_3x1_to_2x1( &dT, d0,
322  delta1,
323  /* ** */ /* ****** */
324  &dB, d2, FLA_TOP );
325  FLA_Cont_with_3x1_to_2x1( &eT, e0,
326  epsilon1,
327  /* ** */ /* ******** */
328  &eB, e2, FLA_TOP );
329  FLA_Cont_with_3x1_to_2x1( &fT, f0,
330  phi1,
331  /* ** */ /* **** */
332  &fB, f2, FLA_TOP );
333  FLA_Cont_with_3x1_to_2x1( &gT, g0,
334  ghi1,
335  /* ** */ /* **** */
336  &gB, g2, FLA_TOP );
337  }
338 
339  FLA_Obj_free( &last_elem );
340  FLA_Obj_free( &beta );
341  FLA_Obj_free( &minus_upsilon11 );
342  FLA_Obj_free( &minus_zeta11 );
343  FLA_Obj_free( &u );
344  FLA_Obj_free( &v );
345  FLA_Obj_free( &d );
346  FLA_Obj_free( &e );
347  FLA_Obj_free( &f );
348  FLA_Obj_free( &g );
349 
350  return FLA_SUCCESS;
351 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Gemvc(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemvc.c:13
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpyt.c:15
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scalc.c:13
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv.c:15
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scal.c:15
FLA_Error FLA_Axpy(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpy.c:15
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dotc.c:13

◆ FLA_Bidiag_UT_u_unb_var1()

FLA_Error FLA_Bidiag_UT_u_unb_var1 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_unb_var1().

Referenced by FLA_Bidiag_UT_u().

14 {
15  return FLA_Bidiag_UT_u_step_unb_var1( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_unb_var1(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_unb_var1.c:18

◆ FLA_Bidiag_UT_u_unb_var2()

FLA_Error FLA_Bidiag_UT_u_unb_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_unb_var2().

Referenced by FLA_Bidiag_UT_u().

14 {
15  return FLA_Bidiag_UT_u_step_unb_var2( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_unb_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_unb_var2.c:18

◆ FLA_Bidiag_UT_u_unb_var3()

FLA_Error FLA_Bidiag_UT_u_unb_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_unb_var3().

Referenced by FLA_Bidiag_UT_u().

14 {
15  return FLA_Bidiag_UT_u_step_unb_var3( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_unb_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_unb_var3.c:18

◆ FLA_Bidiag_UT_u_unb_var4()

FLA_Error FLA_Bidiag_UT_u_unb_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_unb_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17  FLA_Datatype datatype_A;
18  dim_t m_A, n_A;
19 
20  datatype_A = FLA_Obj_datatype( A );
21  m_A = FLA_Obj_length( A );
22  n_A = FLA_Obj_width( A );
23 
24  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26 
27  r_val = FLA_Bidiag_UT_u_step_unb_var4( A, Y, Z, TU, TV );
28 
29  FLA_Obj_free( &Y );
30  FLA_Obj_free( &Z );
31 
32  return r_val;
33 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Bidiag_UT_u_step_unb_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_unb_var4.c:35
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLA_Bidiag_UT_u_unb_var5()

FLA_Error FLA_Bidiag_UT_u_unb_var5 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_unb_var5(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u().

14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17  FLA_Datatype datatype_A;
18  dim_t m_A, n_A;
19 
20  datatype_A = FLA_Obj_datatype( A );
21  m_A = FLA_Obj_length( A );
22  n_A = FLA_Obj_width( A );
23 
24  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26 
27  r_val = FLA_Bidiag_UT_u_step_unb_var5( A, Y, Z, TU, TV );
28 
29  FLA_Obj_free( &Y );
30  FLA_Obj_free( &Z );
31 
32  return r_val;
33 }
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Bidiag_UT_u_step_unb_var5(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_unb_var5.c:35
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116

◆ FLA_Fused_Ahx_Axpy_Ax_opc_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_tau,
scomplex buff_beta,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_u,
int  inc_u,
scomplex buff_a,
int  inc_a,
scomplex buff_y,
int  inc_y,
scomplex buff_w,
int  inc_w 
)

References alpha1, bl1_caxpyv(), bl1_cdots(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

331 {
332  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
333  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
334  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
335  scomplex minus_inv_tau;
336  scomplex conj_psi1;
337  scomplex conj_alpha1;
338  int i;
339 
340  bl1_csetv( m_A,
341  buff_0,
342  buff_w, inc_w );
343 
344  bl1_cdiv3( buff_m1, buff_tau, &minus_inv_tau );
345 
346  for ( i = 0; i < n_A; ++i )
347  {
348  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
349  scomplex* psi1 = buff_y + (i )*inc_y;
350  scomplex* alpha1 = buff_a + (i )*inc_a;
351  scomplex* u = buff_u;
352  scomplex* w = buff_w;
353 
354  /*------------------------------------------------------------*/
355 
357  m_A,
358  buff_1,
359  a1, rs_A,
360  u, inc_u,
361  buff_beta,
362  psi1 );
363 
364  bl1_ccopyconj( psi1, &conj_psi1 );
365  bl1_cmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
366 
367  bl1_ccopyconj( alpha1, &conj_alpha1 );
368 
370  m_A,
371  &conj_alpha1,
372  a1, rs_A,
373  w, inc_w );
374 /*
375  F77_caxpy( &m_A,
376  &conj_alpha1,
377  a1, &rs_A,
378  w, &inc_w );
379 */
380 
381  /*------------------------------------------------------------*/
382 
383  }
384 
385  return FLA_SUCCESS;
386 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition: bl1_dots.c:39
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Fused_Ahx_Axpy_Ax_opd_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1 ( int  m_A,
int  n_A,
double *  buff_tau,
double *  buff_beta,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_u,
int  inc_u,
double *  buff_a,
int  inc_a,
double *  buff_y,
int  inc_y,
double *  buff_w,
int  inc_w 
)

References alpha1, alpha2, bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotsv2(), bl1_dm1(), bl1_dsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, and rho1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

216 {
217  double zero = bl1_d0();
218  double minus_one = bl1_dm1();
219  double* restrict u = buff_u;
220  double* restrict w = buff_w;
221  double* restrict beta = buff_beta;
222  double* restrict a1;
223  double* restrict a2;
224  double* restrict psi1;
225  double* restrict psi2;
226  double* restrict alpha1;
227  double* restrict alpha2;
228 
229  double minus_inv_tau;
230  int i;
231 
232  int n_run = n_A / 2;
233  int n_left = n_A % 2;
234  int stepcs_A = 2*cs_A;
235  int stepinc_y = 2*inc_y;
236  int stepinc_a = 2*inc_a;
237 
238 
239  bl1_dsetv( m_A,
240  &zero,
241  buff_w, inc_w );
242 
243  bl1_ddiv3( &minus_one, buff_tau, &minus_inv_tau );
244 
245  a1 = buff_A;
246  a2 = buff_A + cs_A;
247  psi1 = buff_y;
248  psi2 = buff_y + inc_y;
249  alpha1 = buff_a;
250  alpha2 = buff_a + inc_a;
251 
252  for ( i = 0; i < n_run; ++i )
253  {
254 /*
255  Effective computation:
256  y = beta * y + A' * u;
257  a = a - conj(y) / tau;
258  w = A * conj(a);
259 */
260  /*------------------------------------------------------------*/
261 
263  m_A,
264  a1, rs_A,
265  a2, rs_A,
266  u, inc_u,
267  beta,
268  psi1,
269  psi2 );
270 
271  bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );
272  bl1_dmult4( &minus_inv_tau, psi2, alpha2, alpha2 );
273 
274  bl1_daxpyv2b( m_A,
275  alpha1,
276  alpha2,
277  a1, rs_A,
278  a2, rs_A,
279  w, inc_w );
280 
281  /*------------------------------------------------------------*/
282 
283  a1 += stepcs_A;
284  a2 += stepcs_A;
285  psi1 += stepinc_y;
286  psi2 += stepinc_y;
287  alpha1 += stepinc_a;
288  alpha2 += stepinc_a;
289  }
290 
291  if ( n_left == 1 )
292  //for ( i = 0; i < n_left; ++i )
293  {
294  double rho1;
295 
297  m_A,
298  a1, rs_A,
299  u, inc_u,
300  &rho1 );
301  bl1_dscals( buff_beta, psi1 );
302  bl1_dadd3( psi1, &rho1, psi1 );
303 
304  bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );
305 
307  m_A,
308  alpha1,
309  a1, rs_A,
310  w, inc_w );
311 
312  //a1 += cs_A;
313  //psi1 += inc_y;
314  //alpha1 += inc_a;
315  }
316 
317  return FLA_SUCCESS;
318 }
double rho1
Definition: bl1_dotsv2.c:149
double *restrict psi1
Definition: bl1_axmyv2.c:139
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition: bl1_dotsv2.c:35
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition: bl1_axpyv2b.c:31
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
Definition: blis_type_defs.h:82
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
int n_left
Definition: bl1_axmyv2.c:149
double bl1_dm1(void)
Definition: bl1_constants.c:182
int i
Definition: bl1_axmyv2.c:145
int n_run
Definition: bl1_axmyv2.c:148
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21

◆ FLA_Fused_Ahx_Axpy_Ax_ops_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1 ( int  m_A,
int  n_A,
float *  buff_tau,
float *  buff_beta,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_u,
int  inc_u,
float *  buff_a,
int  inc_a,
float *  buff_y,
int  inc_y,
float *  buff_w,
int  inc_w 
)

References alpha1, bl1_saxpyv(), bl1_sdots(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

152 {
153  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
154  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
155  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
156  float minus_inv_tau;
157  int i;
158 
159  bl1_ssetv( m_A,
160  buff_0,
161  buff_w, inc_w );
162 
163  minus_inv_tau = *buff_m1 / *buff_tau;
164 
165  for ( i = 0; i < n_A; ++i )
166  {
167  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
168  float* psi1 = buff_y + (i )*inc_y;
169  float* alpha1 = buff_a + (i )*inc_a;
170  float* u = buff_u;
171  float* w = buff_w;
172 
173  /*------------------------------------------------------------*/
174 
176  m_A,
177  buff_1,
178  a1, rs_A,
179  u, inc_u,
180  buff_beta,
181  psi1 );
182 
183  // bl1_dmult4( &minus_inv_tau, conj_psi1, alpha1, alpha1 );
184  *alpha1 = *alpha1 + minus_inv_tau * *psi1;
185 
187  m_A,
188  alpha1,
189  a1, rs_A,
190  w, inc_w );
191 /*
192  F77_saxpy( &m_A,
193  alpha1,
194  a1, &rs_A,
195  w, &inc_w );
196 */
197 
198  /*------------------------------------------------------------*/
199 
200  }
201 
202  return FLA_SUCCESS;
203 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
Definition: blis_type_defs.h:82
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition: bl1_dots.c:13
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
int i
Definition: bl1_axmyv2.c:145
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Fused_Ahx_Axpy_Ax_opt_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1 ( FLA_Obj  A,
FLA_Obj  u,
FLA_Obj  tau,
FLA_Obj  a,
FLA_Obj  beta,
FLA_Obj  y,
FLA_Obj  w 
)

References FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

14 {
15 /*
16  Effective computation:
17  y = beta * y + A' * u;
18  a = a - conj(y) / tau;
19  w = A * conj(a);
20 */
21  FLA_Datatype datatype;
22  int m_A, n_A;
23  int rs_A, cs_A;
24  int inc_u, inc_a, inc_y, inc_w;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30 
31  rs_A = FLA_Obj_row_stride( A );
32  cs_A = FLA_Obj_col_stride( A );
33 
34  inc_u = FLA_Obj_vector_inc( u );
35 
36  inc_a = FLA_Obj_vector_inc( a );
37 
38  inc_y = FLA_Obj_vector_inc( y );
39 
40  inc_w = FLA_Obj_vector_inc( w );
41 
42 
43  switch ( datatype )
44  {
45  case FLA_FLOAT:
46  {
47  float* buff_A = FLA_FLOAT_PTR( A );
48  float* buff_u = FLA_FLOAT_PTR( u );
49  float* buff_a = FLA_FLOAT_PTR( a );
50  float* buff_y = FLA_FLOAT_PTR( y );
51  float* buff_w = FLA_FLOAT_PTR( w );
52  float* buff_tau = FLA_FLOAT_PTR( tau );
53  float* buff_beta = FLA_FLOAT_PTR( beta );
54 
56  n_A,
57  buff_tau,
58  buff_beta,
59  buff_A, rs_A, cs_A,
60  buff_u, inc_u,
61  buff_a, inc_a,
62  buff_y, inc_y,
63  buff_w, inc_w );
64 
65  break;
66  }
67 
68  case FLA_DOUBLE:
69  {
70  double* buff_A = FLA_DOUBLE_PTR( A );
71  double* buff_u = FLA_DOUBLE_PTR( u );
72  double* buff_a = FLA_DOUBLE_PTR( a );
73  double* buff_y = FLA_DOUBLE_PTR( y );
74  double* buff_w = FLA_DOUBLE_PTR( w );
75  double* buff_tau = FLA_DOUBLE_PTR( tau );
76  double* buff_beta = FLA_DOUBLE_PTR( beta );
77 
79  n_A,
80  buff_tau,
81  buff_beta,
82  buff_A, rs_A, cs_A,
83  buff_u, inc_u,
84  buff_a, inc_a,
85  buff_y, inc_y,
86  buff_w, inc_w );
87 
88  break;
89  }
90 
91  case FLA_COMPLEX:
92  {
93  scomplex* buff_A = FLA_COMPLEX_PTR( A );
94  scomplex* buff_u = FLA_COMPLEX_PTR( u );
95  scomplex* buff_a = FLA_COMPLEX_PTR( a );
96  scomplex* buff_y = FLA_COMPLEX_PTR( y );
97  scomplex* buff_w = FLA_COMPLEX_PTR( w );
98  scomplex* buff_tau = FLA_COMPLEX_PTR( tau );
99  scomplex* buff_beta = FLA_COMPLEX_PTR( beta );
100 
102  n_A,
103  buff_tau,
104  buff_beta,
105  buff_A, rs_A, cs_A,
106  buff_u, inc_u,
107  buff_a, inc_a,
108  buff_y, inc_y,
109  buff_w, inc_w );
110 
111  break;
112  }
113 
114  case FLA_DOUBLE_COMPLEX:
115  {
116  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
117  dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
118  dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a );
119  dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y );
120  dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
121  dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau );
122  dcomplex* buff_beta = FLA_DOUBLE_COMPLEX_PTR( beta );
123 
125  n_A,
126  buff_tau,
127  buff_beta,
128  buff_A, rs_A, cs_A,
129  buff_u, inc_u,
130  buff_a, inc_a,
131  buff_y, inc_y,
132  buff_w, inc_w );
133 
134  break;
135  }
136  }
137 
138  return FLA_SUCCESS;
139 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
Definition: blis_type_defs.h:132
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Fused_Ahx_Axpy_Ax_opz_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_tau,
dcomplex buff_beta,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_u,
int  inc_u,
dcomplex buff_a,
int  inc_a,
dcomplex buff_y,
int  inc_y,
dcomplex buff_w,
int  inc_w 
)

References alpha1, alpha2, bl1_z0(), bl1_zaxpyv(), bl1_zaxpyv2b(), bl1_zdot(), bl1_zdotsv2(), bl1_zm1(), bl1_zscals(), bl1_zsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, rho1, and twoinc_y.

Referenced by FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

399 {
400  dcomplex zero = bl1_z0();
401  dcomplex minus_one = bl1_zm1();
402  dcomplex* restrict u = buff_u;
403  dcomplex* restrict w = buff_w;
404  dcomplex* restrict beta = buff_beta;
405  dcomplex* restrict a1;
406  dcomplex* restrict a2;
407  dcomplex* restrict psi1;
408  dcomplex* restrict psi2;
409  dcomplex* restrict alpha1;
410  dcomplex* restrict alpha2;
411 
412  dcomplex minus_inv_tau;
413  dcomplex conj_psi1;
414  dcomplex conj_psi2;
415  dcomplex conj_alpha1;
416  dcomplex conj_alpha2;
417  int i;
418  int n_run = n_A / 2;
419  int n_left = n_A % 2;
420  int twocs_A = 2*cs_A;
421  int twoinc_y = 2*inc_y;
422  int twoinc_a = 2*inc_a;
423 
424 
425  bl1_zsetv( m_A,
426  &zero,
427  buff_w, inc_w );
428 
429  bl1_zdiv3( &minus_one, buff_tau, &minus_inv_tau );
430 
431  a1 = buff_A;
432  a2 = buff_A + cs_A;
433  psi1 = buff_y;
434  psi2 = buff_y + inc_y;
435  alpha1 = buff_a;
436  alpha2 = buff_a + inc_a;
437 
438  for ( i = 0; i < n_run; ++i )
439  {
440 /*
441  Effective computation:
442  y = beta * y + A' * u;
443  a = a - conj(y) / tau;
444  w = A * conj(a);
445 */
446  /*------------------------------------------------------------*/
447 
449  m_A,
450  a1, rs_A,
451  a2, rs_A,
452  u, inc_u,
453  beta,
454  psi1,
455  psi2 );
456 
457  bl1_zcopyconj( psi1, &conj_psi1 );
458  bl1_zcopyconj( psi2, &conj_psi2 );
459  bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
460  bl1_zmult4( &minus_inv_tau, &conj_psi2, alpha2, alpha2 );
461  bl1_zcopyconj( alpha1, &conj_alpha1 );
462  bl1_zcopyconj( alpha2, &conj_alpha2 );
463 
464  bl1_zaxpyv2b( m_A,
465  &conj_alpha1,
466  &conj_alpha2,
467  a1, rs_A,
468  a2, rs_A,
469  w, inc_w );
470 
471  /*------------------------------------------------------------*/
472 
473  a1 += twocs_A;
474  a2 += twocs_A;
475  psi1 += twoinc_y;
476  psi2 += twoinc_y;
477  alpha1 += twoinc_a;
478  alpha2 += twoinc_a;
479  }
480 
481  if ( n_left == 1 )
482  {
483  dcomplex rho1;
484 
486  m_A,
487  a1, rs_A,
488  u, inc_u,
489  &rho1 );
490  bl1_zscals( buff_beta, psi1 );
491  bl1_zadd3( psi1, &rho1, psi1 );
492 
493  bl1_zcopyconj( psi1, &conj_psi1 );
494  bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
495  bl1_zcopyconj( alpha1, &conj_alpha1 );
496 
498  m_A,
499  &conj_alpha1,
500  a1, rs_A,
501  w, inc_w );
502  }
503 
504  return FLA_SUCCESS;
505 }
double rho1
Definition: bl1_dotsv2.c:149
double *restrict psi1
Definition: bl1_axmyv2.c:139
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
Definition: blis_type_defs.h:82
int n_left
Definition: bl1_axmyv2.c:149
void bl1_zdotsv2(conj1_t conjxy, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz)
Definition: bl1_dotsv2.c:248
int i
Definition: bl1_axmyv2.c:145
dcomplex bl1_zm1(void)
Definition: bl1_constants.c:197
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition: bl1_axpyv2b.c:210
int n_run
Definition: bl1_axmyv2.c:148
int twoinc_y
Definition: bl1_axpyv2b.c:154
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_tau,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_v,
int  inc_v,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_up,
int  inc_up,
scomplex buff_a,
int  inc_a,
scomplex buff_w,
int  inc_w 
)

References alpha1, bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ZERO, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofc_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

436 {
437  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
438  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
439  scomplex minus_inv_tau;
440  scomplex conj_psi1;
441  scomplex conj_nu1;
442  scomplex conj_alpha1;
443  int i;
444 
445  bl1_csetv( m_A,
446  buff_0,
447  buff_w, inc_w );
448 
449  bl1_cdiv3( buff_m1, buff_tau, &minus_inv_tau );
450 
451  for ( i = 0; i < n_A; ++i )
452  {
453  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
454  scomplex* u = buff_u;
455  scomplex* psi1 = buff_y + (i )*inc_y;
456  scomplex* nu1 = buff_v + (i )*inc_v;
457  scomplex* z = buff_z;
458  scomplex* up = buff_up;
459  scomplex* alpha1 = buff_a + (i )*inc_a;
460  scomplex* w = buff_w;
461  scomplex* alpha = buff_alpha;
462  scomplex temp1;
463  scomplex temp2;
464 
465  /*------------------------------------------------------------*/
466 
467  bl1_ccopyconj( psi1, &conj_psi1 );
468  bl1_cmult3( alpha, &conj_psi1, &temp1 );
469 
470  bl1_ccopyconj( nu1, &conj_nu1 );
471  bl1_cmult3( alpha, &conj_nu1, &temp2 );
472 
474  m_A,
475  &temp1,
476  u, inc_u,
477  a1, rs_A );
478  //F77_caxpy( &m_A,
479  // &temp1,
480  // u, &inc_u,
481  // a1, &rs_A );
482 
484  m_A,
485  &temp2,
486  z, inc_z,
487  a1, rs_A );
488  //F77_caxpy( &m_A,
489  // &temp2,
490  // z, &inc_z,
491  // a1, &rs_A );
492 
494  m_A,
495  a1, rs_A,
496  up, inc_up,
497  psi1 );
498 
499  bl1_ccopyconj( psi1, &conj_psi1 );
500  bl1_cmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
501 
502  bl1_ccopyconj( alpha1, &conj_alpha1 );
503 
505  m_A,
506  &conj_alpha1,
507  a1, rs_A,
508  w, inc_w );
509  //F77_caxpy( &m_A,
510  // &conj_alpha1,
511  // a1, &rs_A,
512  // w, &inc_w );
513 
514  /*------------------------------------------------------------*/
515 
516  }
517 
518  return FLA_SUCCESS;
519 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
double temp2
Definition: bl1_axpyv2b.c:147
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
Definition: blis_type_defs.h:82
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
double temp1
Definition: bl1_axpyv2b.c:146
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1 ( int  m_A,
int  n_A,
double *  buff_tau,
double *  buff_alpha,
double *  buff_u,
int  inc_u,
double *  buff_y,
int  inc_y,
double *  buff_z,
int  inc_z,
double *  buff_v,
int  inc_v,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_up,
int  inc_up,
double *  buff_a,
int  inc_a,
double *  buff_w,
int  inc_w 
)

References alpha1, alpha2, bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotsv2(), bl1_dm1(), bl1_dsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, rho1, and twoinc_y.

Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

279 {
280  double zero = bl1_d0();
281  double minus_one = bl1_dm1();
282  double* restrict u = buff_u;
283  double* restrict up = buff_up;
284  double* restrict w = buff_w;
285  double* restrict z = buff_z;
286  double* restrict alpha = buff_alpha;
287  double* restrict a1;
288  double* restrict a2;
289  double* restrict psi1;
290  double* restrict psi2;
291  double* restrict alpha1;
292  double* restrict alpha2;
293  double* restrict nu1;
294  double* restrict nu2;
295 
296  double minus_inv_tau;
297  double alpha_conj_psi1;
298  double alpha_conj_psi2;
299  double alpha_conj_nu1;
300  double alpha_conj_nu2;
301  int i;
302  int n_run = n_A / 2;
303  int n_left = n_A % 2;
304  int twocs_A = 2*cs_A;
305  int twoinc_y = 2*inc_y;
306  int twoinc_a = 2*inc_a;
307  int twoinc_v = 2*inc_v;
308 
309 
310  bl1_dsetv( m_A,
311  &zero,
312  buff_w, inc_w );
313 
314  bl1_ddiv3( &minus_one, buff_tau, &minus_inv_tau );
315 
316  a1 = buff_A;
317  a2 = buff_A + cs_A;
318  psi1 = buff_y;
319  psi2 = buff_y + inc_y;
320  alpha1 = buff_a;
321  alpha2 = buff_a + inc_a;
322  nu1 = buff_v;
323  nu2 = buff_v + inc_v;
324 
325  for ( i = 0; i < n_run; ++i )
326  {
327 
328  /*------------------------------------------------------------*/
329 
330  bl1_dmult3( alpha, psi1, &alpha_conj_psi1 );
331  bl1_dmult3( alpha, psi2, &alpha_conj_psi2 );
332 
333  bl1_dmult3( alpha, nu1, &alpha_conj_nu1 );
334  bl1_dmult3( alpha, nu2, &alpha_conj_nu2 );
335 
336 /*
337  Effective computation:
338  A = A + alpha * ( u * y' + z * v' );
339  y = A' * up;
340  a = a - conj(y) / tau;
341  w = A * conj(a);
342 */
343  bl1_daxpyv2b( m_A,
344  &alpha_conj_psi1,
345  &alpha_conj_nu1,
346  u, inc_u,
347  z, inc_z,
348  a1, rs_A );
349  bl1_daxpyv2b( m_A,
350  &alpha_conj_psi2,
351  &alpha_conj_nu2,
352  u, inc_u,
353  z, inc_z,
354  a2, rs_A );
355 
356 
358  m_A,
359  a1, rs_A,
360  a2, rs_A,
361  up, inc_up,
362  &zero,
363  psi1,
364  psi2 );
365 
366  bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );
367  bl1_dmult4( &minus_inv_tau, psi2, alpha2, alpha2 );
368 
369  bl1_daxpyv2b( m_A,
370  alpha1,
371  alpha2,
372  a1, rs_A,
373  a2, rs_A,
374  w, inc_w );
375 
376  /*------------------------------------------------------------*/
377 
378  a1 += twocs_A;
379  a2 += twocs_A;
380  psi1 += twoinc_y;
381  psi2 += twoinc_y;
382  alpha1 += twoinc_a;
383  alpha2 += twoinc_a;
384  nu1 += twoinc_v;
385  nu2 += twoinc_v;
386  }
387 
388  if ( n_left == 1 )
389  {
390  double rho1;
391 
392  bl1_dmult3( alpha, psi1, &alpha_conj_psi1 );
393  bl1_dmult3( alpha, nu1, &alpha_conj_nu1 );
394 
395  bl1_daxpyv2b( m_A,
396  &alpha_conj_psi1,
397  &alpha_conj_nu1,
398  u, inc_u,
399  z, inc_z,
400  a1, rs_A );
401 
403  m_A,
404  a1, rs_A,
405  up, inc_up,
406  &rho1 );
407  bl1_dscals( &zero, psi1 );
408  bl1_dadd3( psi1, &rho1, psi1 );
409 
410  bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );
411 
413  m_A,
414  alpha1,
415  a1, rs_A,
416  w, inc_w );
417  }
418 
419  return FLA_SUCCESS;
420 }
double rho1
Definition: bl1_dotsv2.c:149
double *restrict psi1
Definition: bl1_axmyv2.c:139
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition: bl1_dotsv2.c:35
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition: bl1_axpyv2b.c:31
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
Definition: blis_type_defs.h:82
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
int n_left
Definition: bl1_axmyv2.c:149
double bl1_dm1(void)
Definition: bl1_constants.c:182
int i
Definition: bl1_axmyv2.c:145
int n_run
Definition: bl1_axmyv2.c:148
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
int twoinc_y
Definition: bl1_axpyv2b.c:154

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1 ( int  m_A,
int  n_A,
float *  buff_tau,
float *  buff_alpha,
float *  buff_u,
int  inc_u,
float *  buff_y,
int  inc_y,
float *  buff_z,
int  inc_z,
float *  buff_v,
int  inc_v,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_up,
int  inc_up,
float *  buff_a,
int  inc_a,
float *  buff_w,
int  inc_w 
)

References alpha1, bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ZERO, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofs_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

182 {
183  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
184  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
185  float minus_inv_tau;
186  int i;
187 
188  bl1_ssetv( m_A,
189  buff_0,
190  buff_w, inc_w );
191 
192  minus_inv_tau = *buff_m1 / *buff_tau;
193 
194  for ( i = 0; i < n_A; ++i )
195  {
196  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
197  float* u = buff_u;
198  float* psi1 = buff_y + (i )*inc_y;
199  float* nu1 = buff_v + (i )*inc_v;
200  float* z = buff_z;
201  float* up = buff_up;
202  float* alpha1 = buff_a + (i )*inc_a;
203  float* w = buff_w;
204  float* alpha = buff_alpha;
205  float temp1;
206  float temp2;
207 
208  /*------------------------------------------------------------*/
209 
210  // bl1_smult3( alpha, psi1, &temp1 );
211  temp1 = *alpha * *psi1;
212 
213  // bl1_smult3( alpha, nu1, &temp2 );
214  temp2 = *alpha * *nu1;
215 
217  m_A,
218  &temp1,
219  u, inc_u,
220  a1, rs_A );
221  //F77_saxpy( &m_A,
222  // &temp1,
223  // u, &inc_u,
224  // a1, &rs_A );
225 
227  m_A,
228  &temp2,
229  z, inc_z,
230  a1, rs_A );
231  //F77_saxpy( &m_A,
232  // &temp2,
233  // z, &inc_z,
234  // a1, &rs_A );
235 
237  m_A,
238  a1, rs_A,
239  up, inc_up,
240  psi1 );
241  //*psi1 = F77_sdot( &m_A,
242  // a1, &rs_A,
243  // up, &inc_up );
244 
245  // bl1_smult4( &minus_inv_tau, psi1, alpha1, alpha1 );
246  *alpha1 = *alpha1 + minus_inv_tau * *psi1;
247 
249  m_A,
250  alpha1,
251  a1, rs_A,
252  w, inc_w );
253  //F77_saxpy( &m_A,
254  // alpha1,
255  // a1, &rs_A,
256  // w, &inc_w );
257 
258  /*------------------------------------------------------------*/
259 
260  }
261 
262  return FLA_SUCCESS;
263 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
double temp2
Definition: bl1_axpyv2b.c:147
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
Definition: blis_type_defs.h:82
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
int i
Definition: bl1_axmyv2.c:145
double temp1
Definition: bl1_axpyv2b.c:146
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  tau,
FLA_Obj  u,
FLA_Obj  y,
FLA_Obj  z,
FLA_Obj  v,
FLA_Obj  A,
FLA_Obj  up,
FLA_Obj  a,
FLA_Obj  w 
)

References FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

14 {
15 /*
16  Effective computation:
17  A = A + alpha * ( u * y' + z * v' );
18  y = A' * up;
19  a = a - conj(y) / tau;
20  w = A * conj(a);
21 */
22  FLA_Datatype datatype;
23  int m_A, n_A;
24  int rs_A, cs_A;
25  int inc_u, inc_y, inc_z, inc_v;
26  int inc_up, inc_a, inc_w;
27 
28  datatype = FLA_Obj_datatype( A );
29 
30  m_A = FLA_Obj_length( A );
31  n_A = FLA_Obj_width( A );
32 
33  rs_A = FLA_Obj_row_stride( A );
34  cs_A = FLA_Obj_col_stride( A );
35 
36  inc_u = FLA_Obj_vector_inc( u );
37  inc_y = FLA_Obj_vector_inc( y );
38  inc_z = FLA_Obj_vector_inc( z );
39  inc_v = FLA_Obj_vector_inc( v );
40 
41  inc_up = FLA_Obj_vector_inc( up );
42  inc_a = FLA_Obj_vector_inc( a );
43  inc_w = FLA_Obj_vector_inc( w );
44 
45 
46  switch ( datatype )
47  {
48  case FLA_FLOAT:
49  {
50  float* buff_A = FLA_FLOAT_PTR( A );
51  float* buff_u = FLA_FLOAT_PTR( u );
52  float* buff_y = FLA_FLOAT_PTR( y );
53  float* buff_z = FLA_FLOAT_PTR( z );
54  float* buff_v = FLA_FLOAT_PTR( v );
55  float* buff_up = FLA_FLOAT_PTR( up );
56  float* buff_a = FLA_FLOAT_PTR( a );
57  float* buff_w = FLA_FLOAT_PTR( w );
58  float* buff_tau = FLA_FLOAT_PTR( tau );
59  float* buff_alpha = FLA_FLOAT_PTR( alpha );
60 
62  n_A,
63  buff_tau,
64  buff_alpha,
65  buff_u, inc_u,
66  buff_y, inc_y,
67  buff_z, inc_z,
68  buff_v, inc_v,
69  buff_A, rs_A, cs_A,
70  buff_up, inc_up,
71  buff_a, inc_a,
72  buff_w, inc_w );
73 
74  break;
75  }
76 
77  case FLA_DOUBLE:
78  {
79  double* buff_A = FLA_DOUBLE_PTR( A );
80  double* buff_u = FLA_DOUBLE_PTR( u );
81  double* buff_y = FLA_DOUBLE_PTR( y );
82  double* buff_z = FLA_DOUBLE_PTR( z );
83  double* buff_v = FLA_DOUBLE_PTR( v );
84  double* buff_up = FLA_DOUBLE_PTR( up );
85  double* buff_a = FLA_DOUBLE_PTR( a );
86  double* buff_w = FLA_DOUBLE_PTR( w );
87  double* buff_tau = FLA_DOUBLE_PTR( tau );
88  double* buff_alpha = FLA_DOUBLE_PTR( alpha );
89 
91  n_A,
92  buff_tau,
93  buff_alpha,
94  buff_u, inc_u,
95  buff_y, inc_y,
96  buff_z, inc_z,
97  buff_v, inc_v,
98  buff_A, rs_A, cs_A,
99  buff_up, inc_up,
100  buff_a, inc_a,
101  buff_w, inc_w );
102 
103  break;
104  }
105 
106  case FLA_COMPLEX:
107  {
108  scomplex* buff_A = FLA_COMPLEX_PTR( A );
109  scomplex* buff_u = FLA_COMPLEX_PTR( u );
110  scomplex* buff_y = FLA_COMPLEX_PTR( y );
111  scomplex* buff_z = FLA_COMPLEX_PTR( z );
112  scomplex* buff_v = FLA_COMPLEX_PTR( v );
113  scomplex* buff_up = FLA_COMPLEX_PTR( up );
114  scomplex* buff_a = FLA_COMPLEX_PTR( a );
115  scomplex* buff_w = FLA_COMPLEX_PTR( w );
116  scomplex* buff_tau = FLA_COMPLEX_PTR( tau );
117  scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha );
118 
120  n_A,
121  buff_tau,
122  buff_alpha,
123  buff_u, inc_u,
124  buff_y, inc_y,
125  buff_z, inc_z,
126  buff_v, inc_v,
127  buff_A, rs_A, cs_A,
128  buff_up, inc_up,
129  buff_a, inc_a,
130  buff_w, inc_w );
131 
132  break;
133  }
134 
135  case FLA_DOUBLE_COMPLEX:
136  {
137  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
138  dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
139  dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y );
140  dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z );
141  dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v );
142  dcomplex* buff_up = FLA_DOUBLE_COMPLEX_PTR( up );
143  dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a );
144  dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
145  dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau );
146  dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha );
147 
149  n_A,
150  buff_tau,
151  buff_alpha,
152  buff_u, inc_u,
153  buff_y, inc_y,
154  buff_z, inc_z,
155  buff_v, inc_v,
156  buff_A, rs_A, cs_A,
157  buff_up, inc_up,
158  buff_a, inc_a,
159  buff_w, inc_w );
160 
161  break;
162  }
163  }
164 
165  return FLA_SUCCESS;
166 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:170
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:523
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:424
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:267
Definition: blis_type_defs.h:137

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_tau,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_v,
int  inc_v,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_up,
int  inc_up,
dcomplex buff_a,
int  inc_a,
dcomplex buff_w,
int  inc_w 
)

References alpha1, alpha2, bl1_z0(), bl1_zaxpyv(), bl1_zaxpyv2b(), bl1_zdot(), bl1_zdotsv2(), bl1_zm1(), bl1_zscals(), bl1_zsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, rho1, and twoinc_y.

Referenced by FLA_Bidiag_UT_u_step_ofz_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

535 {
536  dcomplex zero = bl1_z0();
537  dcomplex minus_one = bl1_zm1();
538  dcomplex* restrict u = buff_u;
539  dcomplex* restrict up = buff_up;
540  dcomplex* restrict w = buff_w;
541  dcomplex* restrict z = buff_z;
542  dcomplex* restrict alpha = buff_alpha;
543  dcomplex* restrict a1;
544  dcomplex* restrict a2;
545  dcomplex* restrict psi1;
546  dcomplex* restrict psi2;
547  dcomplex* restrict alpha1;
548  dcomplex* restrict alpha2;
549  dcomplex* restrict nu1;
550  dcomplex* restrict nu2;
551 
552  dcomplex minus_inv_tau;
553  dcomplex conj_psi1;
554  dcomplex conj_psi2;
555  dcomplex conj_nu1;
556  dcomplex conj_nu2;
557  dcomplex conj_alpha1;
558  dcomplex conj_alpha2;
559  dcomplex alpha_conj_psi1;
560  dcomplex alpha_conj_psi2;
561  dcomplex alpha_conj_nu1;
562  dcomplex alpha_conj_nu2;
563  int i;
564  int n_run = n_A / 2;
565  int n_left = n_A % 2;
566  int twocs_A = 2*cs_A;
567  int twoinc_y = 2*inc_y;
568  int twoinc_a = 2*inc_a;
569  int twoinc_v = 2*inc_v;
570 
571 
572  bl1_zsetv( m_A,
573  &zero,
574  buff_w, inc_w );
575 
576  bl1_zdiv3( &minus_one, buff_tau, &minus_inv_tau );
577 
578  a1 = buff_A;
579  a2 = buff_A + cs_A;
580  psi1 = buff_y;
581  psi2 = buff_y + inc_y;
582  alpha1 = buff_a;
583  alpha2 = buff_a + inc_a;
584  nu1 = buff_v;
585  nu2 = buff_v + inc_v;
586 
587  for ( i = 0; i < n_run; ++i )
588  {
589 
590  /*------------------------------------------------------------*/
591 
592  bl1_zcopyconj( psi1, &conj_psi1 );
593  bl1_zcopyconj( psi2, &conj_psi2 );
594  bl1_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 );
595  bl1_zmult3( alpha, &conj_psi2, &alpha_conj_psi2 );
596 
597  bl1_zcopyconj( nu1, &conj_nu1 );
598  bl1_zcopyconj( nu2, &conj_nu2 );
599  bl1_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 );
600  bl1_zmult3( alpha, &conj_nu2, &alpha_conj_nu2 );
601 
602  bl1_zaxpyv2b( m_A,
603  &alpha_conj_psi1,
604  &alpha_conj_nu1,
605  u, inc_u,
606  z, inc_z,
607  a1, rs_A );
608  bl1_zaxpyv2b( m_A,
609  &alpha_conj_psi2,
610  &alpha_conj_nu2,
611  u, inc_u,
612  z, inc_z,
613  a2, rs_A );
614 
615 
617  m_A,
618  a1, rs_A,
619  a2, rs_A,
620  up, inc_up,
621  &zero,
622  psi1,
623  psi2 );
624 
625  bl1_zcopyconj( psi1, &conj_psi1 );
626  bl1_zcopyconj( psi2, &conj_psi2 );
627  bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
628  bl1_zmult4( &minus_inv_tau, &conj_psi2, alpha2, alpha2 );
629  bl1_zcopyconj( alpha1, &conj_alpha1 );
630  bl1_zcopyconj( alpha2, &conj_alpha2 );
631 
632  bl1_zaxpyv2b( m_A,
633  &conj_alpha1,
634  &conj_alpha2,
635  a1, rs_A,
636  a2, rs_A,
637  w, inc_w );
638 
639  /*------------------------------------------------------------*/
640 
641  a1 += twocs_A;
642  a2 += twocs_A;
643  psi1 += twoinc_y;
644  psi2 += twoinc_y;
645  alpha1 += twoinc_a;
646  alpha2 += twoinc_a;
647  nu1 += twoinc_v;
648  nu2 += twoinc_v;
649  }
650 
651  if ( n_left == 1 )
652  {
653  dcomplex rho1;
654 
655  bl1_zcopyconj( psi1, &conj_psi1 );
656  bl1_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 );
657  bl1_zcopyconj( nu1, &conj_nu1 );
658  bl1_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 );
659 
660  bl1_zaxpyv2b( m_A,
661  &alpha_conj_psi1,
662  &alpha_conj_nu1,
663  u, inc_u,
664  z, inc_z,
665  a1, rs_A );
666 
668  m_A,
669  a1, rs_A,
670  up, inc_up,
671  &rho1 );
672  bl1_zscals( &zero, psi1 );
673  bl1_zadd3( psi1, &rho1, psi1 );
674 
675  bl1_zcopyconj( psi1, &conj_psi1 );
676  bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
677  bl1_zcopyconj( alpha1, &conj_alpha1 );
678 
680  m_A,
681  &conj_alpha1,
682  a1, rs_A,
683  w, inc_w );
684  }
685 
686  return FLA_SUCCESS;
687 }
double rho1
Definition: bl1_dotsv2.c:149
double *restrict psi1
Definition: bl1_axmyv2.c:139
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
Definition: blis_type_defs.h:81
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
Definition: blis_type_defs.h:82
int n_left
Definition: bl1_axmyv2.c:149
void bl1_zdotsv2(conj1_t conjxy, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz)
Definition: bl1_dotsv2.c:248
int i
Definition: bl1_axmyv2.c:145
dcomplex bl1_zm1(void)
Definition: bl1_constants.c:197
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition: bl1_axpyv2b.c:210
int n_run
Definition: bl1_axmyv2.c:148
int twoinc_y
Definition: bl1_axpyv2b.c:154
Definition: blis_type_defs.h:137
bl1_zscals(beta, rho_yz)
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60

◆ FLA_Fused_Gerc2_opc_var1()

FLA_Error FLA_Fused_Gerc2_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_v,
int  inc_v,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_caxpyv(), BLIS1_NO_CONJUGATE, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofc_var2(), and FLA_Hess_UT_step_ofc_var3().

249 {
250  int i;
251 
252  for ( i = 0; i < n_A; ++i )
253  {
254  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
255  scomplex* u = buff_u;
256  scomplex* psi1 = buff_y + (i )*inc_y;
257  scomplex* z = buff_z;
258  scomplex* nu1 = buff_v + (i )*inc_v;
259  scomplex* alpha = buff_alpha;
260  scomplex psi1_conj;
261  scomplex nu1_conj;
262  scomplex temp1;
263  scomplex temp2;
264 
265  /*------------------------------------------------------------*/
266 
267  bl1_ccopyconj( psi1, &psi1_conj );
268  bl1_cmult3( alpha, &psi1_conj, &temp1 );
269 
270  bl1_ccopyconj( nu1, &nu1_conj );
271  bl1_cmult3( alpha, &nu1_conj, &temp2 );
272 
274  m_A,
275  &temp1,
276  u, inc_u,
277  a1, rs_A );
278 /*
279  F77_caxpy( &m_A,
280  &temp1,
281  u, &inc_u,
282  a1, &rs_A );
283 */
284 
286  m_A,
287  &temp2,
288  z, inc_z,
289  a1, rs_A );
290 /*
291  F77_caxpy( &m_A,
292  &temp2,
293  z, &inc_z,
294  a1, &rs_A );
295 */
296 
297  /*------------------------------------------------------------*/
298 
299  }
300 
301  return FLA_SUCCESS;
302 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
double temp2
Definition: bl1_axpyv2b.c:147
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
double temp1
Definition: bl1_axpyv2b.c:146

◆ FLA_Fused_Gerc2_opd_var1()

FLA_Error FLA_Fused_Gerc2_opd_var1 ( int  m_A,
int  n_A,
double *  buff_alpha,
double *  buff_u,
int  inc_u,
double *  buff_y,
int  inc_y,
double *  buff_z,
int  inc_z,
double *  buff_v,
int  inc_v,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_daxpyv2b(), i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofd_var2(), and FLA_Hess_UT_step_ofd_var3().

201 {
202  int i;
203 
204  for ( i = 0; i < n_A; ++i )
205  {
206 /*
207  Effective computation:
208  A = A + alpha * ( u * y' + z * v' );
209 */
210  double* restrict a1 = buff_A + (i )*cs_A + (0 )*rs_A;
211  double* restrict u = buff_u;
212  double* restrict psi1 = buff_y + (i )*inc_y;
213  double* restrict z = buff_z;
214  double* restrict nu1 = buff_v + (i )*inc_v;
215  double* restrict alpha = buff_alpha;
216  double alpha_conj_psi1;
217  double alpha_conj_nu1;
218 
219  /*------------------------------------------------------------*/
220 
221  bl1_dmult3( alpha, psi1, &alpha_conj_psi1 );
222 
223  bl1_dmult3( alpha, nu1, &alpha_conj_nu1 );
224 
225  bl1_daxpyv2b( m_A,
226  &alpha_conj_psi1,
227  &alpha_conj_nu1,
228  u, inc_u,
229  z, inc_z,
230  a1, rs_A );
231 
232  /*------------------------------------------------------------*/
233 
234  }
235 
236  return FLA_SUCCESS;
237 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition: bl1_axpyv2b.c:31
int i
Definition: bl1_axmyv2.c:145

◆ FLA_Fused_Gerc2_ops_var1()

FLA_Error FLA_Fused_Gerc2_ops_var1 ( int  m_A,
int  n_A,
float *  buff_alpha,
float *  buff_u,
int  inc_u,
float *  buff_y,
int  inc_y,
float *  buff_z,
int  inc_z,
float *  buff_v,
int  inc_v,
float *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_saxpyv(), BLIS1_NO_CONJUGATE, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofs_var2(), and FLA_Hess_UT_step_ofs_var3().

138 {
139  int i;
140 
141  for ( i = 0; i < n_A; ++i )
142  {
143  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
144  float* u = buff_u;
145  float* psi1 = buff_y + (i )*inc_y;
146  float* z = buff_z;
147  float* nu1 = buff_v + (i )*inc_v;
148  float* alpha = buff_alpha;
149  float temp1;
150  float temp2;
151 
152  /*------------------------------------------------------------*/
153 
154  // bl1_smult3( alpha, psi1, &temp1 );
155  temp1 = *alpha * *psi1;
156 
157  // bl1_smult3( alpha, nu1, &temp2 );
158  temp2 = *alpha * *nu1;
159 
161  m_A,
162  &temp1,
163  u, inc_u,
164  a1, rs_A );
165 /*
166  F77_saxpy( &m_A,
167  &temp1,
168  u, &inc_u,
169  a1, &rs_A );
170 */
171 
173  m_A,
174  &temp2,
175  z, inc_z,
176  a1, rs_A );
177 /*
178  F77_saxpy( &m_A,
179  &temp2,
180  z, &inc_z,
181  a1, &rs_A );
182 */
183 
184  /*------------------------------------------------------------*/
185 
186  }
187 
188  return FLA_SUCCESS;
189 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
double temp2
Definition: bl1_axpyv2b.c:147
Definition: blis_type_defs.h:81
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
int i
Definition: bl1_axmyv2.c:145
double temp1
Definition: bl1_axpyv2b.c:146

◆ FLA_Fused_Gerc2_opt_var1()

FLA_Error FLA_Fused_Gerc2_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  u,
FLA_Obj  y,
FLA_Obj  z,
FLA_Obj  v,
FLA_Obj  A 
)

References FLA_Fused_Gerc2_opc_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

14 {
15 /*
16  Effective computation:
17  A = A + alpha * ( u * y' + z * v' );
18 */
19  FLA_Datatype datatype;
20  int m_A, n_A;
21  int rs_A, cs_A;
22  int inc_u, inc_y, inc_z, inc_v;
23 
24  datatype = FLA_Obj_datatype( A );
25 
26  m_A = FLA_Obj_length( A );
27  n_A = FLA_Obj_width( A );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  inc_u = FLA_Obj_vector_inc( u );
33  inc_y = FLA_Obj_vector_inc( y );
34  inc_z = FLA_Obj_vector_inc( z );
35  inc_v = FLA_Obj_vector_inc( v );
36 
37 
38  switch ( datatype )
39  {
40  case FLA_FLOAT:
41  {
42  float* buff_A = FLA_FLOAT_PTR( A );
43  float* buff_u = FLA_FLOAT_PTR( u );
44  float* buff_y = FLA_FLOAT_PTR( y );
45  float* buff_z = FLA_FLOAT_PTR( z );
46  float* buff_v = FLA_FLOAT_PTR( v );
47  float* buff_alpha = FLA_FLOAT_PTR( alpha );
48 
50  n_A,
51  buff_alpha,
52  buff_u, inc_u,
53  buff_y, inc_y,
54  buff_z, inc_z,
55  buff_v, inc_v,
56  buff_A, rs_A, cs_A );
57 
58  break;
59  }
60 
61  case FLA_DOUBLE:
62  {
63  double* buff_A = FLA_DOUBLE_PTR( A );
64  double* buff_u = FLA_DOUBLE_PTR( u );
65  double* buff_y = FLA_DOUBLE_PTR( y );
66  double* buff_z = FLA_DOUBLE_PTR( z );
67  double* buff_v = FLA_DOUBLE_PTR( v );
68  double* buff_alpha = FLA_DOUBLE_PTR( alpha );
69 
71  n_A,
72  buff_alpha,
73  buff_u, inc_u,
74  buff_y, inc_y,
75  buff_z, inc_z,
76  buff_v, inc_v,
77  buff_A, rs_A, cs_A );
78 
79  break;
80  }
81 
82  case FLA_COMPLEX:
83  {
84  scomplex* buff_A = FLA_COMPLEX_PTR( A );
85  scomplex* buff_u = FLA_COMPLEX_PTR( u );
86  scomplex* buff_y = FLA_COMPLEX_PTR( y );
87  scomplex* buff_z = FLA_COMPLEX_PTR( z );
88  scomplex* buff_v = FLA_COMPLEX_PTR( v );
89  scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha );
90 
92  n_A,
93  buff_alpha,
94  buff_u, inc_u,
95  buff_y, inc_y,
96  buff_z, inc_z,
97  buff_v, inc_v,
98  buff_A, rs_A, cs_A );
99 
100  break;
101  }
102 
103  case FLA_DOUBLE_COMPLEX:
104  {
105  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
106  dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
107  dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y );
108  dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z );
109  dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v );
110  dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha );
111 
113  n_A,
114  buff_alpha,
115  buff_u, inc_u,
116  buff_y, inc_y,
117  buff_z, inc_z,
118  buff_v, inc_v,
119  buff_A, rs_A, cs_A );
120 
121  break;
122  }
123  }
124 
125  return FLA_SUCCESS;
126 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:241
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:306
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:130
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Fused_Gerc2_opz_var1()

FLA_Error FLA_Fused_Gerc2_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_v,
int  inc_v,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_zaxpyv2b(), i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofz_var2(), and FLA_Hess_UT_step_ofz_var3().

314 {
315  int i;
316 
317  for ( i = 0; i < n_A; ++i )
318  {
319  dcomplex* restrict a1 = buff_A + (i )*cs_A + (0 )*rs_A;
320  dcomplex* restrict u = buff_u;
321  dcomplex* restrict psi1 = buff_y + (i )*inc_y;
322  dcomplex* restrict z = buff_z;
323  dcomplex* restrict nu1 = buff_v + (i )*inc_v;
324  dcomplex* restrict alpha = buff_alpha;
325  dcomplex conj_psi1;
326  dcomplex conj_nu1;
327  dcomplex alpha_conj_psi1;
328  dcomplex alpha_conj_nu1;
329 
330  /*------------------------------------------------------------*/
331 
332  bl1_zcopyconj( psi1, &conj_psi1 );
333  bl1_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 );
334 
335  bl1_zcopyconj( nu1, &conj_nu1 );
336  bl1_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 );
337 
338  bl1_zaxpyv2b( m_A,
339  &alpha_conj_psi1,
340  &alpha_conj_nu1,
341  u, inc_u,
342  z, inc_z,
343  a1, rs_A );
344 
345  /*------------------------------------------------------------*/
346 
347  }
348 
349  return FLA_SUCCESS;
350 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
int i
Definition: bl1_axmyv2.c:145
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition: bl1_axpyv2b.c:210
Definition: blis_type_defs.h:137

◆ FLA_Fused_UYx_ZVx_opc_var1()

FLA_Error FLA_Fused_UYx_ZVx_opc_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_V,
int  rs_V,
int  cs_V,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_temp,
int  inc_temp,
scomplex buff_t,
int  inc_t,
scomplex buff_a,
int  inc_a,
scomplex buff_w,
int  inc_w,
scomplex buff_al,
int  inc_al 
)

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofc_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

439 {
440  int i;
441  int m_A = m_U;
442  int m_Z = m_U;
443 
445  m_A,
446  buff_A, rs_A,
447  buff_al, inc_al );
448 
449  for ( i = 0; i < n_U; ++i )
450  {
451  scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
452  scomplex* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
453  scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
454  scomplex* v1 = buff_V + (0 )*cs_V + (i )*rs_V;
455  scomplex* tau1 = buff_t + (i )*inc_t;
456  scomplex* delta = buff_delta;
457  scomplex* a = buff_a;
458  scomplex* w = buff_w;
459  scomplex* al = buff_al;
460  scomplex* psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
461  scomplex* nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
462  scomplex alpha;
463  scomplex beta;
464  scomplex gamma;
465  scomplex kappa;
466 
467  /*------------------------------------------------------------*/
468 
470  n_V,
471  y1, rs_Y,
472  a, inc_a,
473  &alpha );
474 
476  n_V,
477  v1, cs_V,
478  a, inc_a,
479  &beta );
480 
481  bl1_cconjs( &alpha );
482  bl1_cconjs( &beta );
483  bl1_ccopyconj( psi20_l, &gamma );
484  bl1_ccopyconj( nu20_l, &kappa );
485 
486  *tau1 = beta;
487 
488  bl1_cscals( delta, &alpha );
489  bl1_cscals( delta, &beta );
490  bl1_cscals( delta, &gamma );
491  bl1_cscals( delta, &kappa );
492 
494  m_U,
495  &alpha,
496  u1, rs_U,
497  w, inc_w );
498  //F77_caxpy( &m_U,
499  // &alpha,
500  // u1, &rs_U,
501  // w, &inc_w );
502 
504  m_Z,
505  &beta,
506  z1, rs_Z,
507  w, inc_w );
508  //F77_caxpy( &m_Z,
509  // &beta,
510  // z1, &rs_Z,
511  // w, &inc_w );
512 
514  m_U,
515  &gamma,
516  u1, rs_U,
517  al, inc_al );
518  //F77_caxpy( &m_U,
519  // &gamma,
520  // u1, &rs_U,
521  // al, &inc_al );
522 
524  m_Z,
525  &kappa,
526  z1, rs_Z,
527  al, inc_al);
528  //F77_caxpy( &m_Z,
529  // &kappa,
530  // z1, &rs_Z,
531  // al, &inc_al );
532 
533  /*------------------------------------------------------------*/
534 
535  }
536 
537  return FLA_SUCCESS;
538 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
Definition: blis_type_defs.h:81
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
double *restrict z1
Definition: bl1_dotsv2.c:148
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
double *restrict y1
Definition: bl1_dotsv2.c:145

◆ FLA_Fused_UYx_ZVx_opd_var1()

FLA_Error FLA_Fused_UYx_ZVx_opd_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
double *  buff_delta,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_V,
int  rs_V,
int  cs_V,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_temp,
int  inc_temp,
double *  buff_t,
int  inc_t,
double *  buff_a,
int  inc_a,
double *  buff_w,
int  inc_w,
double *  buff_al,
int  inc_al 
)

References bl1_d0(), bl1_daxmyv2(), bl1_dcopyv(), bl1_ddotsv2(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

346 {
347  double zero = bl1_d0();
348  int i;
349  int m_A = m_U;
350  int m_Z = m_U;
351 
353  m_A,
354  buff_A, rs_A,
355  buff_al, inc_al );
356 
357  if ( m_U == 0 || n_U == 0 ) return 0;
358  if ( m_V == 0 || n_V == 0 ) return 0;
359 
360  for ( i = 0; i < n_U; ++i )
361  {
362  double* restrict u1 = buff_U + (i )*cs_U + (0 )*rs_U;
363  double* restrict y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
364  double* restrict z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
365  double* restrict v1 = buff_V + (0 )*cs_V + (i )*rs_V;
366  double* restrict tau1 = buff_t + (i )*inc_t;
367  double* restrict t1 = buff_temp;
368  double* restrict a = buff_a;
369  double* restrict w = buff_w;
370  double* restrict al = buff_al;
371  double* restrict psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
372  double* restrict nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
373  double alpha;
374  double beta;
375  double gamma;
376  double kappa;
377 
378  /*------------------------------------------------------------*/
379 
381  n_V,
382  v1, cs_V,
383  t1, inc_t );
384 
386  n_V,
387  y1, rs_Y,
388  t1, inc_t,
389  a, inc_a,
390  &zero,
391  &alpha,
392  &beta );
393 
394  *tau1 = beta;
395 
396  bl1_dcopyconj( psi20_l, &gamma );
397  bl1_dcopyconj( nu20_l, &kappa );
398 
400  m_U,
401  &alpha,
402  &gamma,
403  u1, rs_U,
404  w, inc_w,
405  al, inc_al );
406 
408  m_Z,
409  &beta,
410  &kappa,
411  z1, rs_U,
412  w, inc_w,
413  al, inc_al );
414 
415  /*------------------------------------------------------------*/
416 
417  }
418 
419  return FLA_SUCCESS;
420 }
Definition: blis_type_defs.h:81
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition: bl1_dotsv2.c:35
double bl1_d0(void)
Definition: bl1_constants.c:118
double *restrict z1
Definition: bl1_dotsv2.c:148
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_daxmyv2(conj1_t conjx, int n, double *alpha, double *beta, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z)
Definition: bl1_axmyv2.c:34
int i
Definition: bl1_axmyv2.c:145
double *restrict y1
Definition: bl1_dotsv2.c:145

◆ FLA_Fused_UYx_ZVx_ops_var1()

FLA_Error FLA_Fused_UYx_ZVx_ops_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
float *  buff_delta,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_V,
int  rs_V,
int  cs_V,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_temp,
int  inc_temp,
float *  buff_t,
int  inc_t,
float *  buff_a,
int  inc_a,
float *  buff_w,
int  inc_w,
float *  buff_al,
int  inc_al 
)

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofs_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

216 {
217  int i;
218  int m_A = m_U;
219  int m_Z = m_U;
220 
222  m_A,
223  buff_A, rs_A,
224  buff_al, inc_al );
225 
226  for ( i = 0; i < n_U; ++i )
227  {
228  float* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
229  float* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
230  float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
231  float* v1 = buff_V + (0 )*cs_V + (i )*rs_V;
232  float* tau1 = buff_t + (i )*inc_t;
233  float* delta = buff_delta;
234  float* a = buff_a;
235  float* w = buff_w;
236  float* al = buff_al;
237  float* psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
238  float* nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
239  float alpha;
240  float beta;
241  float gamma;
242  float kappa;
243 
244  /*------------------------------------------------------------*/
245 
247  n_V,
248  y1, rs_Y,
249  a, inc_a,
250  &alpha );
251  //alpha = F77_sdot( &n_V,
252  // y1, &rs_Y,
253  // a, &inc_a );
254 
256  n_V,
257  v1, cs_V,
258  a, inc_a,
259  &beta );
260  //beta = F77_sdot( &n_V,
261  // v1, &cs_V,
262  // a, &inc_a );
263 
264  *tau1 = beta;
265 
266  // bl1_sconjs( &alpha );
267  // bl1_sconjs( &beta );
268  // bl1_scopyconj( psi20_l, &gamma );
269  // bl1_scopyconj( nu20_l, &kappa );
270  gamma = *psi20_l;
271  kappa = *nu20_l;
272 
273  // bl1_dscals( delta, &alpha );
274  // bl1_dscals( delta, &beta );
275  // bl1_dscals( delta, &gamma );
276  // bl1_dscals( delta, &kappa );
277  alpha *= *delta;
278  beta *= *delta;
279  gamma *= *delta;
280  kappa *= *delta;
281 
283  m_U,
284  &alpha,
285  u1, rs_U,
286  w, inc_w );
287  //F77_saxpy( &m_U,
288  // &alpha,
289  // u1, &rs_U,
290  // w, &inc_w );
291 
293  m_Z,
294  &beta,
295  z1, rs_Z,
296  w, inc_w );
297  //F77_saxpy( &m_Z,
298  // &beta,
299  // z1, &rs_Z,
300  // w, &inc_w );
301 
303  m_U,
304  &gamma,
305  u1, rs_U,
306  al, inc_al );
307  //F77_saxpy( &m_U,
308  // &gamma,
309  // u1, &rs_U,
310  // al, &inc_al );
311 
313  m_Z,
314  &kappa,
315  z1, rs_Z,
316  al, inc_al );
317  //F77_saxpy( &m_Z,
318  // &kappa,
319  // z1, &rs_Z,
320  // al, &inc_al );
321 
322  /*------------------------------------------------------------*/
323 
324  }
325 
326  return FLA_SUCCESS;
327 }
Definition: blis_type_defs.h:81
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
double *restrict z1
Definition: bl1_dotsv2.c:148
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
int i
Definition: bl1_axmyv2.c:145
double *restrict y1
Definition: bl1_dotsv2.c:145

◆ FLA_Fused_UYx_ZVx_opt_var1()

FLA_Error FLA_Fused_UYx_ZVx_opt_var1 ( FLA_Obj  delta,
FLA_Obj  a,
FLA_Obj  U,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  V,
FLA_Obj  A,
FLA_Obj  temp,
FLA_Obj  t,
FLA_Obj  w,
FLA_Obj  al 
)

References FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

14 {
15 /*
16  Effective computation:
17  w = w + delta * ( U ( Y' conj(a) ) + Z ( V' conj(a) ) );
18  al = A * e0 + delta * ( U ( Y' e0 ) + Z ( V' e0 ) );
19  t = V' conj(a);
20 */
21  FLA_Datatype datatype;
22  int m_U, n_U;
23  int m_V, n_V;
24  int rs_A, cs_A;
25  int rs_U, cs_U;
26  int rs_Y, cs_Y;
27  int rs_Z, cs_Z;
28  int rs_V, cs_V;
29  int inc_a, inc_temp, inc_t, inc_w, inc_al;
30 
31  datatype = FLA_Obj_datatype( A );
32 
33  m_U = FLA_Obj_length( U );
34  n_U = FLA_Obj_width( U );
35 
36  m_V = FLA_Obj_length( V );
37  n_V = FLA_Obj_width( V );
38 
39  rs_U = FLA_Obj_row_stride( U );
40  cs_U = FLA_Obj_col_stride( U );
41 
42  rs_Y = FLA_Obj_row_stride( Y );
43  cs_Y = FLA_Obj_col_stride( Y );
44 
45  rs_Z = FLA_Obj_row_stride( Z );
46  cs_Z = FLA_Obj_col_stride( Z );
47 
48  rs_V = FLA_Obj_row_stride( V );
49  cs_V = FLA_Obj_col_stride( V );
50 
51  rs_A = FLA_Obj_row_stride( A );
52  cs_A = FLA_Obj_col_stride( A );
53 
54  inc_temp = FLA_Obj_vector_inc( temp );
55  inc_t = FLA_Obj_vector_inc( t );
56  inc_a = FLA_Obj_vector_inc( a );
57  inc_w = FLA_Obj_vector_inc( w );
58  inc_al = FLA_Obj_vector_inc( al );
59 
60 
61  switch ( datatype )
62  {
63  case FLA_FLOAT:
64  {
65  float* buff_A = FLA_FLOAT_PTR( A );
66  float* buff_U = FLA_FLOAT_PTR( U );
67  float* buff_Y = FLA_FLOAT_PTR( Y );
68  float* buff_Z = FLA_FLOAT_PTR( Z );
69  float* buff_V = FLA_FLOAT_PTR( V );
70  float* buff_temp = FLA_FLOAT_PTR( temp );
71  float* buff_t = FLA_FLOAT_PTR( t );
72  float* buff_a = FLA_FLOAT_PTR( a );
73  float* buff_w = FLA_FLOAT_PTR( w );
74  float* buff_al = FLA_FLOAT_PTR( al );
75  float* buff_delta = FLA_FLOAT_PTR( delta );
76 
78  n_U,
79  m_V,
80  n_V,
81  buff_delta,
82  buff_U, rs_U, cs_U,
83  buff_Y, rs_Y, cs_Y,
84  buff_Z, rs_Z, cs_Z,
85  buff_V, rs_V, cs_V,
86  buff_A, rs_A, cs_A,
87  buff_temp, inc_temp,
88  buff_t, inc_t,
89  buff_a, inc_a,
90  buff_w, inc_w,
91  buff_al, inc_al );
92 
93  break;
94  }
95 
96  case FLA_DOUBLE:
97  {
98  double* buff_A = FLA_DOUBLE_PTR( A );
99  double* buff_U = FLA_DOUBLE_PTR( U );
100  double* buff_Y = FLA_DOUBLE_PTR( Y );
101  double* buff_Z = FLA_DOUBLE_PTR( Z );
102  double* buff_V = FLA_DOUBLE_PTR( V );
103  double* buff_temp = FLA_DOUBLE_PTR( temp );
104  double* buff_t = FLA_DOUBLE_PTR( t );
105  double* buff_a = FLA_DOUBLE_PTR( a );
106  double* buff_w = FLA_DOUBLE_PTR( w );
107  double* buff_al = FLA_DOUBLE_PTR( al );
108  double* buff_delta = FLA_DOUBLE_PTR( delta );
109 
111  n_U,
112  m_V,
113  n_V,
114  buff_delta,
115  buff_U, rs_U, cs_U,
116  buff_Y, rs_Y, cs_Y,
117  buff_Z, rs_Z, cs_Z,
118  buff_V, rs_V, cs_V,
119  buff_A, rs_A, cs_A,
120  buff_temp, inc_temp,
121  buff_t, inc_t,
122  buff_a, inc_a,
123  buff_w, inc_w,
124  buff_al, inc_al );
125 
126  break;
127  }
128 
129  case FLA_COMPLEX:
130  {
131  scomplex* buff_A = FLA_COMPLEX_PTR( A );
132  scomplex* buff_U = FLA_COMPLEX_PTR( U );
133  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
134  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
135  scomplex* buff_V = FLA_COMPLEX_PTR( V );
136  scomplex* buff_temp = FLA_COMPLEX_PTR( temp );
137  scomplex* buff_t = FLA_COMPLEX_PTR( t );
138  scomplex* buff_a = FLA_COMPLEX_PTR( a );
139  scomplex* buff_w = FLA_COMPLEX_PTR( w );
140  scomplex* buff_al = FLA_COMPLEX_PTR( al );
141  scomplex* buff_delta = FLA_COMPLEX_PTR( delta );
142 
144  n_U,
145  m_V,
146  n_V,
147  buff_delta,
148  buff_U, rs_U, cs_U,
149  buff_Y, rs_Y, cs_Y,
150  buff_Z, rs_Z, cs_Z,
151  buff_V, rs_V, cs_V,
152  buff_A, rs_A, cs_A,
153  buff_temp, inc_temp,
154  buff_t, inc_t,
155  buff_a, inc_a,
156  buff_w, inc_w,
157  buff_al, inc_al );
158 
159  break;
160  }
161 
162  case FLA_DOUBLE_COMPLEX:
163  {
164  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
165  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
166  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
167  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
168  dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V );
169  dcomplex* buff_temp = FLA_DOUBLE_COMPLEX_PTR( temp );
170  dcomplex* buff_t = FLA_DOUBLE_COMPLEX_PTR( t );
171  dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a );
172  dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
173  dcomplex* buff_al = FLA_DOUBLE_COMPLEX_PTR( al );
174  dcomplex* buff_delta = FLA_DOUBLE_COMPLEX_PTR( delta );
175 
177  n_U,
178  m_V,
179  n_V,
180  buff_delta,
181  buff_U, rs_U, cs_U,
182  buff_Y, rs_Y, cs_Y,
183  buff_Z, rs_Z, cs_Z,
184  buff_V, rs_V, cs_V,
185  buff_A, rs_A, cs_A,
186  buff_temp, inc_temp,
187  buff_t, inc_t,
188  buff_a, inc_a,
189  buff_w, inc_w,
190  buff_al, inc_al );
191 
192  break;
193  }
194  }
195 
196  return FLA_SUCCESS;
197 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Fused_UYx_ZVx_opd_var1(int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:331
FLA_Error FLA_Fused_UYx_ZVx_ops_var1(int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:201
FLA_Error FLA_Fused_UYx_ZVx_opc_var1(int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:424
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Error FLA_Fused_UYx_ZVx_opz_var1(int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)
Definition: FLA_Fused_UYx_ZVx_opt_var1.c:542
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Fused_UYx_ZVx_opz_var1()

FLA_Error FLA_Fused_UYx_ZVx_opz_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_V,
int  rs_V,
int  cs_V,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_temp,
int  inc_temp,
dcomplex buff_t,
int  inc_t,
dcomplex buff_a,
int  inc_a,
dcomplex buff_w,
int  inc_w,
dcomplex buff_al,
int  inc_al 
)

References bl1_z0(), bl1_zaxmyv2(), bl1_zcopyv(), bl1_zdotsv2(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofz_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

557 {
558  dcomplex zero = bl1_z0();
559  int i;
560  int m_A = m_U;
561  int m_Z = m_U;
562 
564  m_A,
565  buff_A, rs_A,
566  buff_al, inc_al );
567 
568  if ( m_U == 0 || n_U == 0 ) return 0;
569  if ( m_V == 0 || n_V == 0 ) return 0;
570 
571  for ( i = 0; i < n_U; ++i )
572  {
573  dcomplex* restrict u1 = buff_U + (i )*cs_U + (0 )*rs_U;
574  dcomplex* restrict y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
575  dcomplex* restrict z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
576  dcomplex* restrict v1 = buff_V + (0 )*cs_V + (i )*rs_V;
577  dcomplex* restrict tau1 = buff_t + (i )*inc_t;
578  dcomplex* restrict a = buff_a;
579  dcomplex* restrict w = buff_w;
580  dcomplex* restrict al = buff_al;
581  dcomplex* restrict psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
582  dcomplex* restrict nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
583  dcomplex alpha;
584  dcomplex beta;
585  dcomplex gamma;
586  dcomplex kappa;
587 
588  /*------------------------------------------------------------*/
589 
591  n_V,
592  y1, rs_Y,
593  v1, cs_V,
594  a, inc_a,
595  &zero,
596  &alpha,
597  &beta );
598 
599  bl1_zconjs( &alpha );
600  bl1_zconjs( &beta );
601 
602  *tau1 = beta;
603 
604  bl1_zcopyconj( psi20_l, &gamma );
605  bl1_zcopyconj( nu20_l, &kappa );
606 
608  m_U,
609  &alpha,
610  &gamma,
611  u1, rs_U,
612  w, inc_w,
613  al, inc_al );
614 
616  m_Z,
617  &beta,
618  &kappa,
619  z1, rs_U,
620  w, inc_w,
621  al, inc_al );
622 
623  /*------------------------------------------------------------*/
624 
625  }
626 
627  return FLA_SUCCESS;
628 }
void bl1_zaxmyv2(conj1_t conjx, int n, dcomplex *alpha, dcomplex *beta, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z)
Definition: bl1_axmyv2.c:250
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
Definition: blis_type_defs.h:81
double *restrict z1
Definition: bl1_dotsv2.c:148
void bl1_zdotsv2(conj1_t conjxy, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz)
Definition: bl1_dotsv2.c:248
int i
Definition: bl1_axmyv2.c:145
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
double *restrict y1
Definition: bl1_dotsv2.c:145
Definition: blis_type_defs.h:137