libflame  revision_anchor
Functions
FLA_Apply_G_rf_asm_var6.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_asm_var6 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_asc_var6()

FLA_Error FLA_Apply_G_rf_asc_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blc_var6().

680 {
681  float one = bl1_s1();
682  float zero = bl1_s0();
683  float gamma12;
684  float sigma12;
685  float gamma23;
686  float sigma23;
687  scomplex* a1;
688  scomplex* a2;
689  scomplex* a3;
690  scomplex* g12;
691  scomplex* g23;
692  int i, j, g, k;
693  int nG, nG_app;
694  int n_iter;
695  int n_left;
696  int k_minus_1;
697  int n_fuse;
698  int is_ident12, is_ident23;
699 
700  k_minus_1 = k_G - 1;
701  nG = n_A - 1;
702  n_fuse = 2;
703 
704  // Use the simple variant for nG < (k - 1) or k == 1.
705  if ( nG < k_minus_1 || k_G == 1 )
706  {
708  m_A,
709  n_A,
710  buff_G, rs_G, cs_G,
711  buff_A, rs_A, cs_A );
712  return FLA_SUCCESS;
713  }
714 
715 
716  // Start-up phase.
717 
718  for ( j = 0; j < k_minus_1; ++j )
719  {
720  nG_app = j + 1;
721  n_iter = nG_app / n_fuse;
722  n_left = nG_app % n_fuse;
723 
724  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
725  {
726  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
727  g23 = buff_G + (g )*rs_G + (k )*cs_G;
728  a1 = buff_A + (g - 1)*cs_A;
729  a2 = buff_A + (g )*cs_A;
730  a3 = buff_A + (g + 1)*cs_A;
731 
732  gamma12 = g12->real;
733  sigma12 = g12->imag;
734  gamma23 = g23->real;
735  sigma23 = g23->imag;
736 
737  is_ident12 = ( gamma12 == one && sigma12 == zero );
738  is_ident23 = ( gamma23 == one && sigma23 == zero );
739 
740  if ( !is_ident12 && is_ident23 )
741  {
742  // Apply only to columns 1 and 2.
743 
744  MAC_Apply_G_mx2_asc( m_A,
745  &gamma12,
746  &sigma12,
747  a1, 1,
748  a2, 1 );
749  }
750  else if ( is_ident12 && !is_ident23 )
751  {
752  // Apply only to columns 2 and 3.
753 
754  MAC_Apply_G_mx2_asc( m_A,
755  &gamma23,
756  &sigma23,
757  a2, 1,
758  a3, 1 );
759  }
760  else if ( !is_ident12 && !is_ident23 )
761  {
762  // Apply to all three columns.
763 
764  MAC_Apply_G_mx3b_asc( m_A,
765  &gamma12,
766  &sigma12,
767  &gamma23,
768  &sigma23,
769  a1, 1,
770  a2, 1,
771  a3, 1 );
772  }
773  }
774 
775  if ( n_left == 1 )
776  {
777  g23 = buff_G + (g )*rs_G + (k )*cs_G;
778  a2 = buff_A + (g )*cs_A;
779  a3 = buff_A + (g + 1)*cs_A;
780 
781  gamma23 = g23->real;
782  sigma23 = g23->imag;
783 
784  is_ident23 = ( gamma23 == one && sigma23 == zero );
785 
786  if ( !is_ident23 )
787  MAC_Apply_G_mx2_asc( m_A,
788  &gamma23,
789  &sigma23,
790  a2, 1,
791  a3, 1 );
792  }
793  }
794 
795  // Pipeline stage
796 
797  for ( j = k_minus_1; j < nG; ++j )
798  {
799  nG_app = k_G;
800  n_iter = nG_app / n_fuse;
801  n_left = nG_app % n_fuse;
802 
803  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
804  {
805  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
806  g23 = buff_G + (g )*rs_G + (k )*cs_G;
807  a1 = buff_A + (g - 1)*cs_A;
808  a2 = buff_A + (g )*cs_A;
809  a3 = buff_A + (g + 1)*cs_A;
810 
811  gamma12 = g12->real;
812  sigma12 = g12->imag;
813  gamma23 = g23->real;
814  sigma23 = g23->imag;
815 
816  is_ident12 = ( gamma12 == one && sigma12 == zero );
817  is_ident23 = ( gamma23 == one && sigma23 == zero );
818 
819  if ( !is_ident12 && is_ident23 )
820  {
821  // Apply only to columns 1 and 2.
822 
823  MAC_Apply_G_mx2_asc( m_A,
824  &gamma12,
825  &sigma12,
826  a1, 1,
827  a2, 1 );
828  }
829  else if ( is_ident12 && !is_ident23 )
830  {
831  // Apply only to columns 2 and 3.
832 
833  MAC_Apply_G_mx2_asc( m_A,
834  &gamma23,
835  &sigma23,
836  a2, 1,
837  a3, 1 );
838  }
839  else if ( !is_ident12 && !is_ident23 )
840  {
841  // Apply to all three columns.
842 
843  MAC_Apply_G_mx3b_asc( m_A,
844  &gamma12,
845  &sigma12,
846  &gamma23,
847  &sigma23,
848  a1, 1,
849  a2, 1,
850  a3, 1 );
851  }
852  }
853 
854  if ( n_left == 1 )
855  {
856  g23 = buff_G + (g )*rs_G + (k )*cs_G;
857  a2 = buff_A + (g )*cs_A;
858  a3 = buff_A + (g + 1)*cs_A;
859 
860  gamma23 = g23->real;
861  sigma23 = g23->imag;
862 
863  is_ident23 = ( gamma23 == one && sigma23 == zero );
864 
865  if ( !is_ident23 )
866  MAC_Apply_G_mx2_asc( m_A,
867  &gamma23,
868  &sigma23,
869  a2, 1,
870  a3, 1 );
871  }
872  }
873 
874  // Shutdown stage
875 
876  for ( j = 1; j < k_G; ++j )
877  {
878  nG_app = k_G - j;
879  n_iter = nG_app / n_fuse;
880  n_left = nG_app % n_fuse;
881 
882  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
883  {
884  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
885  g23 = buff_G + (g )*rs_G + (k )*cs_G;
886  a1 = buff_A + (g - 1)*cs_A;
887  a2 = buff_A + (g )*cs_A;
888  a3 = buff_A + (g + 1)*cs_A;
889 
890  gamma12 = g12->real;
891  sigma12 = g12->imag;
892  gamma23 = g23->real;
893  sigma23 = g23->imag;
894 
895  is_ident12 = ( gamma12 == one && sigma12 == zero );
896  is_ident23 = ( gamma23 == one && sigma23 == zero );
897 
898  if ( !is_ident12 && is_ident23 )
899  {
900  // Apply only to columns 1 and 2.
901 
902  MAC_Apply_G_mx2_asc( m_A,
903  &gamma12,
904  &sigma12,
905  a1, 1,
906  a2, 1 );
907  }
908  else if ( is_ident12 && !is_ident23 )
909  {
910  // Apply only to columns 2 and 3.
911 
912  MAC_Apply_G_mx2_asc( m_A,
913  &gamma23,
914  &sigma23,
915  a2, 1,
916  a3, 1 );
917  }
918  else if ( !is_ident12 && !is_ident23 )
919  {
920  // Apply to all three columns.
921 
922  MAC_Apply_G_mx3b_asc( m_A,
923  &gamma12,
924  &sigma12,
925  &gamma23,
926  &sigma23,
927  a1, 1,
928  a2, 1,
929  a3, 1 );
930  }
931  }
932 
933  if ( n_left == 1 )
934  {
935  g23 = buff_G + (g )*rs_G + (k )*cs_G;
936  a2 = buff_A + (g )*cs_A;
937  a3 = buff_A + (g + 1)*cs_A;
938 
939  gamma23 = g23->real;
940  sigma23 = g23->imag;
941 
942  is_ident23 = ( gamma23 == one && sigma23 == zero );
943 
944  if ( !is_ident23 )
945  MAC_Apply_G_mx2_asc( m_A,
946  &gamma23,
947  &sigma23,
948  a2, 1,
949  a3, 1 );
950  }
951  }
952 
953  return FLA_SUCCESS;
954 }
float real
Definition: blis_type_defs.h:134
float bl1_s1(void)
Definition: bl1_constants.c:47
int n_left
Definition: bl1_axmyv2.c:149
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_G_rf_asc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:215
int i
Definition: bl1_axmyv2.c:145
float bl1_s0(void)
Definition: bl1_constants.c:111
float imag
Definition: blis_type_defs.h:134

◆ FLA_Apply_G_rf_asd_var6()

FLA_Error FLA_Apply_G_rf_asd_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bld_var6().

399 {
400  double one = bl1_d1();
401  double zero = bl1_d0();
402  double gamma12;
403  double sigma12;
404  double gamma23;
405  double sigma23;
406  double* a1;
407  double* a2;
408  double* a3;
409  dcomplex* g12;
410  dcomplex* g23;
411  int i, j, g, k;
412  int nG, nG_app;
413  int n_iter;
414  int n_left;
415  int k_minus_1;
416  int n_fuse;
417  int is_ident12, is_ident23;
418 
419  k_minus_1 = k_G - 1;
420  nG = n_A - 1;
421  n_fuse = 2;
422 
423  // Use the simple variant for nG < (k - 1) or k == 1.
424  if ( nG < k_minus_1 || k_G == 1 )
425  {
427  m_A,
428  n_A,
429  buff_G, rs_G, cs_G,
430  buff_A, rs_A, cs_A );
431  return FLA_SUCCESS;
432  }
433 
434 
435  // Start-up phase.
436 
437  for ( j = 0; j < k_minus_1; ++j )
438  {
439  nG_app = j + 1;
440  n_iter = nG_app / n_fuse;
441  n_left = nG_app % n_fuse;
442 
443  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
444  {
445  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
446  g23 = buff_G + (g )*rs_G + (k )*cs_G;
447  a1 = buff_A + (g - 1)*cs_A;
448  a2 = buff_A + (g )*cs_A;
449  a3 = buff_A + (g + 1)*cs_A;
450 
451  gamma12 = g12->real;
452  sigma12 = g12->imag;
453  gamma23 = g23->real;
454  sigma23 = g23->imag;
455 
456  is_ident12 = ( gamma12 == one && sigma12 == zero );
457  is_ident23 = ( gamma23 == one && sigma23 == zero );
458 
459  if ( !is_ident12 && is_ident23 )
460  {
461  // Apply only to columns 1 and 2.
462 
463  MAC_Apply_G_mx2_asd( m_A,
464  &gamma12,
465  &sigma12,
466  a1, 1,
467  a2, 1 );
468  }
469  else if ( is_ident12 && !is_ident23 )
470  {
471  // Apply only to columns 2 and 3.
472 
473  MAC_Apply_G_mx2_asd( m_A,
474  &gamma23,
475  &sigma23,
476  a2, 1,
477  a3, 1 );
478  }
479  else if ( !is_ident12 && !is_ident23 )
480  {
481  // Apply to all three columns.
482 
483  MAC_Apply_G_mx3b_asd( m_A,
484  &gamma12,
485  &sigma12,
486  &gamma23,
487  &sigma23,
488  a1, 1,
489  a2, 1,
490  a3, 1 );
491  }
492  }
493 
494  if ( n_left == 1 )
495  {
496  g23 = buff_G + (g )*rs_G + (k )*cs_G;
497  a2 = buff_A + (g )*cs_A;
498  a3 = buff_A + (g + 1)*cs_A;
499 
500  gamma23 = g23->real;
501  sigma23 = g23->imag;
502 
503  is_ident23 = ( gamma23 == one && sigma23 == zero );
504 
505  if ( !is_ident23 )
506  MAC_Apply_G_mx2_asd( m_A,
507  &gamma23,
508  &sigma23,
509  a2, 1,
510  a3, 1 );
511  }
512  }
513 
514  // Pipeline stage
515 
516  for ( j = k_minus_1; j < nG; ++j )
517  {
518  nG_app = k_G;
519  n_iter = nG_app / n_fuse;
520  n_left = nG_app % n_fuse;
521 
522  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
523  {
524  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
525  g23 = buff_G + (g )*rs_G + (k )*cs_G;
526  a1 = buff_A + (g - 1)*cs_A;
527  a2 = buff_A + (g )*cs_A;
528  a3 = buff_A + (g + 1)*cs_A;
529 
530  gamma12 = g12->real;
531  sigma12 = g12->imag;
532  gamma23 = g23->real;
533  sigma23 = g23->imag;
534 
535  is_ident12 = ( gamma12 == one && sigma12 == zero );
536  is_ident23 = ( gamma23 == one && sigma23 == zero );
537 
538  if ( !is_ident12 && is_ident23 )
539  {
540  // Apply only to columns 1 and 2.
541 
542  MAC_Apply_G_mx2_asd( m_A,
543  &gamma12,
544  &sigma12,
545  a1, 1,
546  a2, 1 );
547  }
548  else if ( is_ident12 && !is_ident23 )
549  {
550  // Apply only to columns 2 and 3.
551 
552  MAC_Apply_G_mx2_asd( m_A,
553  &gamma23,
554  &sigma23,
555  a2, 1,
556  a3, 1 );
557  }
558  else if ( !is_ident12 && !is_ident23 )
559  {
560  // Apply to all three columns.
561 
562  MAC_Apply_G_mx3b_asd( m_A,
563  &gamma12,
564  &sigma12,
565  &gamma23,
566  &sigma23,
567  a1, 1,
568  a2, 1,
569  a3, 1 );
570  }
571  }
572 
573  if ( n_left == 1 )
574  {
575  g23 = buff_G + (g )*rs_G + (k )*cs_G;
576  a2 = buff_A + (g )*cs_A;
577  a3 = buff_A + (g + 1)*cs_A;
578 
579  gamma23 = g23->real;
580  sigma23 = g23->imag;
581 
582  is_ident23 = ( gamma23 == one && sigma23 == zero );
583 
584  if ( !is_ident23 )
585  MAC_Apply_G_mx2_asd( m_A,
586  &gamma23,
587  &sigma23,
588  a2, 1,
589  a3, 1 );
590  }
591  }
592 
593  // Shutdown stage
594 
595  for ( j = 1; j < k_G; ++j )
596  {
597  nG_app = k_G - j;
598  n_iter = nG_app / n_fuse;
599  n_left = nG_app % n_fuse;
600 
601  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
602  {
603  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
604  g23 = buff_G + (g )*rs_G + (k )*cs_G;
605  a1 = buff_A + (g - 1)*cs_A;
606  a2 = buff_A + (g )*cs_A;
607  a3 = buff_A + (g + 1)*cs_A;
608 
609  gamma12 = g12->real;
610  sigma12 = g12->imag;
611  gamma23 = g23->real;
612  sigma23 = g23->imag;
613 
614  is_ident12 = ( gamma12 == one && sigma12 == zero );
615  is_ident23 = ( gamma23 == one && sigma23 == zero );
616 
617  if ( !is_ident12 && is_ident23 )
618  {
619  // Apply only to columns 1 and 2.
620 
621  MAC_Apply_G_mx2_asd( m_A,
622  &gamma12,
623  &sigma12,
624  a1, 1,
625  a2, 1 );
626  }
627  else if ( is_ident12 && !is_ident23 )
628  {
629  // Apply only to columns 2 and 3.
630 
631  MAC_Apply_G_mx2_asd( m_A,
632  &gamma23,
633  &sigma23,
634  a2, 1,
635  a3, 1 );
636  }
637  else if ( !is_ident12 && !is_ident23 )
638  {
639  // Apply to all three columns.
640 
641  MAC_Apply_G_mx3b_asd( m_A,
642  &gamma12,
643  &sigma12,
644  &gamma23,
645  &sigma23,
646  a1, 1,
647  a2, 1,
648  a3, 1 );
649  }
650  }
651 
652  if ( n_left == 1 )
653  {
654  g23 = buff_G + (g )*rs_G + (k )*cs_G;
655  a2 = buff_A + (g )*cs_A;
656  a3 = buff_A + (g + 1)*cs_A;
657 
658  gamma23 = g23->real;
659  sigma23 = g23->imag;
660 
661  is_ident23 = ( gamma23 == one && sigma23 == zero );
662 
663  if ( !is_ident23 )
664  MAC_Apply_G_mx2_asd( m_A,
665  &gamma23,
666  &sigma23,
667  a2, 1,
668  a3, 1 );
669  }
670  }
671 
672  return FLA_SUCCESS;
673 }
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:164
double imag
Definition: blis_type_defs.h:139
double bl1_d0(void)
Definition: bl1_constants.c:118
double real
Definition: blis_type_defs.h:139
int n_left
Definition: bl1_axmyv2.c:149
int i
Definition: bl1_axmyv2.c:145
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:137

◆ FLA_Apply_G_rf_asm_var6()

FLA_Error FLA_Apply_G_rf_asm_var6 ( FLA_Obj  G,
FLA_Obj  A 
)

References FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_asz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Apply_G_rf_ass_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:113
FLA_Error FLA_Apply_G_rf_asc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:675
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_G_rf_asz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:956
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137
FLA_Error FLA_Apply_G_rf_asd_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:394

◆ FLA_Apply_G_rf_ass_var6()

FLA_Error FLA_Apply_G_rf_ass_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bls_var6().

118 {
119  float one = bl1_s1();
120  float zero = bl1_s0();
121  float gamma12;
122  float sigma12;
123  float gamma23;
124  float sigma23;
125  float* a1;
126  float* a2;
127  float* a3;
128  scomplex* g12;
129  scomplex* g23;
130  int i, j, g, k;
131  int nG, nG_app;
132  int n_iter;
133  int n_left;
134  int k_minus_1;
135  int n_fuse;
136  int is_ident12, is_ident23;
137 
138  k_minus_1 = k_G - 1;
139  nG = n_A - 1;
140  n_fuse = 2;
141 
142  // Use the simple variant for nG < (k - 1) or k == 1.
143  if ( nG < k_minus_1 || k_G == 1 )
144  {
146  m_A,
147  n_A,
148  buff_G, rs_G, cs_G,
149  buff_A, rs_A, cs_A );
150  return FLA_SUCCESS;
151  }
152 
153 
154  // Start-up phase.
155 
156  for ( j = 0; j < k_minus_1; ++j )
157  {
158  nG_app = j + 1;
159  n_iter = nG_app / n_fuse;
160  n_left = nG_app % n_fuse;
161 
162  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
163  {
164  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
165  g23 = buff_G + (g )*rs_G + (k )*cs_G;
166  a1 = buff_A + (g - 1)*cs_A;
167  a2 = buff_A + (g )*cs_A;
168  a3 = buff_A + (g + 1)*cs_A;
169 
170  gamma12 = g12->real;
171  sigma12 = g12->imag;
172  gamma23 = g23->real;
173  sigma23 = g23->imag;
174 
175  is_ident12 = ( gamma12 == one && sigma12 == zero );
176  is_ident23 = ( gamma23 == one && sigma23 == zero );
177 
178  if ( !is_ident12 && is_ident23 )
179  {
180  // Apply only to columns 1 and 2.
181 
182  MAC_Apply_G_mx2_ass( m_A,
183  &gamma12,
184  &sigma12,
185  a1, 1,
186  a2, 1 );
187  }
188  else if ( is_ident12 && !is_ident23 )
189  {
190  // Apply only to columns 2 and 3.
191 
192  MAC_Apply_G_mx2_ass( m_A,
193  &gamma23,
194  &sigma23,
195  a2, 1,
196  a3, 1 );
197  }
198  else if ( !is_ident12 && !is_ident23 )
199  {
200  // Apply to all three columns.
201 
202  MAC_Apply_G_mx3b_ass( m_A,
203  &gamma12,
204  &sigma12,
205  &gamma23,
206  &sigma23,
207  a1, 1,
208  a2, 1,
209  a3, 1 );
210  }
211  }
212 
213  if ( n_left == 1 )
214  {
215  g23 = buff_G + (g )*rs_G + (k )*cs_G;
216  a2 = buff_A + (g )*cs_A;
217  a3 = buff_A + (g + 1)*cs_A;
218 
219  gamma23 = g23->real;
220  sigma23 = g23->imag;
221 
222  is_ident23 = ( gamma23 == one && sigma23 == zero );
223 
224  if ( !is_ident23 )
225  MAC_Apply_G_mx2_ass( m_A,
226  &gamma23,
227  &sigma23,
228  a2, 1,
229  a3, 1 );
230  }
231  }
232 
233  // Pipeline stage
234 
235  for ( j = k_minus_1; j < nG; ++j )
236  {
237  nG_app = k_G;
238  n_iter = nG_app / n_fuse;
239  n_left = nG_app % n_fuse;
240 
241  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
242  {
243  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
244  g23 = buff_G + (g )*rs_G + (k )*cs_G;
245  a1 = buff_A + (g - 1)*cs_A;
246  a2 = buff_A + (g )*cs_A;
247  a3 = buff_A + (g + 1)*cs_A;
248 
249  gamma12 = g12->real;
250  sigma12 = g12->imag;
251  gamma23 = g23->real;
252  sigma23 = g23->imag;
253 
254  is_ident12 = ( gamma12 == one && sigma12 == zero );
255  is_ident23 = ( gamma23 == one && sigma23 == zero );
256 
257  if ( !is_ident12 && is_ident23 )
258  {
259  // Apply only to columns 1 and 2.
260 
261  MAC_Apply_G_mx2_ass( m_A,
262  &gamma12,
263  &sigma12,
264  a1, 1,
265  a2, 1 );
266  }
267  else if ( is_ident12 && !is_ident23 )
268  {
269  // Apply only to columns 2 and 3.
270 
271  MAC_Apply_G_mx2_ass( m_A,
272  &gamma23,
273  &sigma23,
274  a2, 1,
275  a3, 1 );
276  }
277  else if ( !is_ident12 && !is_ident23 )
278  {
279  // Apply to all three columns.
280 
281  MAC_Apply_G_mx3b_ass( m_A,
282  &gamma12,
283  &sigma12,
284  &gamma23,
285  &sigma23,
286  a1, 1,
287  a2, 1,
288  a3, 1 );
289  }
290  }
291 
292  if ( n_left == 1 )
293  {
294  g23 = buff_G + (g )*rs_G + (k )*cs_G;
295  a2 = buff_A + (g )*cs_A;
296  a3 = buff_A + (g + 1)*cs_A;
297 
298  gamma23 = g23->real;
299  sigma23 = g23->imag;
300 
301  is_ident23 = ( gamma23 == one && sigma23 == zero );
302 
303  if ( !is_ident23 )
304  MAC_Apply_G_mx2_ass( m_A,
305  &gamma23,
306  &sigma23,
307  a2, 1,
308  a3, 1 );
309  }
310  }
311 
312  // Shutdown stage
313 
314  for ( j = 1; j < k_G; ++j )
315  {
316  nG_app = k_G - j;
317  n_iter = nG_app / n_fuse;
318  n_left = nG_app % n_fuse;
319 
320  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
321  {
322  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
323  g23 = buff_G + (g )*rs_G + (k )*cs_G;
324  a1 = buff_A + (g - 1)*cs_A;
325  a2 = buff_A + (g )*cs_A;
326  a3 = buff_A + (g + 1)*cs_A;
327 
328  gamma12 = g12->real;
329  sigma12 = g12->imag;
330  gamma23 = g23->real;
331  sigma23 = g23->imag;
332 
333  is_ident12 = ( gamma12 == one && sigma12 == zero );
334  is_ident23 = ( gamma23 == one && sigma23 == zero );
335 
336  if ( !is_ident12 && is_ident23 )
337  {
338  // Apply only to columns 1 and 2.
339 
340  MAC_Apply_G_mx2_ass( m_A,
341  &gamma12,
342  &sigma12,
343  a1, 1,
344  a2, 1 );
345  }
346  else if ( is_ident12 && !is_ident23 )
347  {
348  // Apply only to columns 2 and 3.
349 
350  MAC_Apply_G_mx2_ass( m_A,
351  &gamma23,
352  &sigma23,
353  a2, 1,
354  a3, 1 );
355  }
356  else if ( !is_ident12 && !is_ident23 )
357  {
358  // Apply to all three columns.
359 
360  MAC_Apply_G_mx3b_ass( m_A,
361  &gamma12,
362  &sigma12,
363  &gamma23,
364  &sigma23,
365  a1, 1,
366  a2, 1,
367  a3, 1 );
368  }
369  }
370 
371  if ( n_left == 1 )
372  {
373  g23 = buff_G + (g )*rs_G + (k )*cs_G;
374  a2 = buff_A + (g )*cs_A;
375  a3 = buff_A + (g + 1)*cs_A;
376 
377  gamma23 = g23->real;
378  sigma23 = g23->imag;
379 
380  is_ident23 = ( gamma23 == one && sigma23 == zero );
381 
382  if ( !is_ident23 )
383  MAC_Apply_G_mx2_ass( m_A,
384  &gamma23,
385  &sigma23,
386  a2, 1,
387  a3, 1 );
388  }
389  }
390 
391  return FLA_SUCCESS;
392 }
float real
Definition: blis_type_defs.h:134
float bl1_s1(void)
Definition: bl1_constants.c:47
int n_left
Definition: bl1_axmyv2.c:149
FLA_Error FLA_Apply_G_rf_ass_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:113
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
float bl1_s0(void)
Definition: bl1_constants.c:111
float imag
Definition: blis_type_defs.h:134

◆ FLA_Apply_G_rf_asz_var6()

FLA_Error FLA_Apply_G_rf_asz_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blz_var6().

961 {
962  double one = bl1_d1();
963  double zero = bl1_d0();
964  double gamma12;
965  double sigma12;
966  double gamma23;
967  double sigma23;
968  dcomplex* a1;
969  dcomplex* a2;
970  dcomplex* a3;
971  dcomplex* g12;
972  dcomplex* g23;
973  int i, j, g, k;
974  int nG, nG_app;
975  int n_iter;
976  int n_left;
977  int k_minus_1;
978  int n_fuse;
979  int is_ident12, is_ident23;
980 
981  k_minus_1 = k_G - 1;
982  nG = n_A - 1;
983  n_fuse = 2;
984 
985  // Use the simple variant for nG < (k - 1) or k == 1.
986  if ( nG < k_minus_1 || k_G == 1 )
987  {
989  m_A,
990  n_A,
991  buff_G, rs_G, cs_G,
992  buff_A, rs_A, cs_A );
993  return FLA_SUCCESS;
994  }
995 
996 
997  // Start-up phase.
998 
999  for ( j = 0; j < k_minus_1; ++j )
1000  {
1001  nG_app = j + 1;
1002  n_iter = nG_app / n_fuse;
1003  n_left = nG_app % n_fuse;
1004 
1005  //for ( k = 0, g = nG_app - 1; k < nG_app; k += n_fuse, g -= n_fuse )
1006  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1007  {
1008  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1009  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1010  a1 = buff_A + (g - 1)*cs_A;
1011  a2 = buff_A + (g )*cs_A;
1012  a3 = buff_A + (g + 1)*cs_A;
1013 
1014  gamma12 = g12->real;
1015  sigma12 = g12->imag;
1016  gamma23 = g23->real;
1017  sigma23 = g23->imag;
1018 
1019  is_ident12 = ( gamma12 == one && sigma12 == zero );
1020  is_ident23 = ( gamma23 == one && sigma23 == zero );
1021 
1022  if ( !is_ident12 && is_ident23 )
1023  {
1024  // Apply only to columns 1 and 2.
1025 
1026  MAC_Apply_G_mx2_asz( m_A,
1027  &gamma12,
1028  &sigma12,
1029  a1, 1,
1030  a2, 1 );
1031  }
1032  else if ( is_ident12 && !is_ident23 )
1033  {
1034  // Apply only to columns 2 and 3.
1035 
1036  MAC_Apply_G_mx2_asz( m_A,
1037  &gamma23,
1038  &sigma23,
1039  a2, 1,
1040  a3, 1 );
1041  }
1042  else if ( !is_ident12 && !is_ident23 )
1043  {
1044  // Apply to all three columns.
1045 
1046  MAC_Apply_G_mx3b_asz( m_A,
1047  &gamma12,
1048  &sigma12,
1049  &gamma23,
1050  &sigma23,
1051  a1, 1,
1052  a2, 1,
1053  a3, 1 );
1054  }
1055  }
1056  //for ( k = 0; k < n_left; k += 1, g -= 1 )
1057  if ( n_left == 1 )
1058  {
1059  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1060  a2 = buff_A + (g )*cs_A;
1061  a3 = buff_A + (g + 1)*cs_A;
1062 
1063  gamma23 = g23->real;
1064  sigma23 = g23->imag;
1065 
1066  is_ident23 = ( gamma23 == one && sigma23 == zero );
1067 
1068  if ( !is_ident23 )
1069  MAC_Apply_G_mx2_asz( m_A,
1070  &gamma23,
1071  &sigma23,
1072  a2, 1,
1073  a3, 1 );
1074  }
1075  }
1076 
1077  // Pipeline stage
1078 
1079  for ( j = k_minus_1; j < nG; ++j )
1080  {
1081  nG_app = k_G;
1082  n_iter = nG_app / n_fuse;
1083  n_left = nG_app % n_fuse;
1084 
1085  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1086  {
1087  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1088  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1089  a1 = buff_A + (g - 1)*cs_A;
1090  a2 = buff_A + (g )*cs_A;
1091  a3 = buff_A + (g + 1)*cs_A;
1092 
1093  gamma12 = g12->real;
1094  sigma12 = g12->imag;
1095  gamma23 = g23->real;
1096  sigma23 = g23->imag;
1097 
1098  is_ident12 = ( gamma12 == one && sigma12 == zero );
1099  is_ident23 = ( gamma23 == one && sigma23 == zero );
1100 
1101  if ( !is_ident12 && is_ident23 )
1102  {
1103  // Apply only to columns 1 and 2.
1104 
1105  MAC_Apply_G_mx2_asz( m_A,
1106  &gamma12,
1107  &sigma12,
1108  a1, 1,
1109  a2, 1 );
1110  }
1111  else if ( is_ident12 && !is_ident23 )
1112  {
1113  // Apply only to columns 2 and 3.
1114 
1115  MAC_Apply_G_mx2_asz( m_A,
1116  &gamma23,
1117  &sigma23,
1118  a2, 1,
1119  a3, 1 );
1120  }
1121  else if ( !is_ident12 && !is_ident23 )
1122  {
1123  // Apply to all three columns.
1124 
1125  MAC_Apply_G_mx3b_asz( m_A,
1126  &gamma12,
1127  &sigma12,
1128  &gamma23,
1129  &sigma23,
1130  a1, 1,
1131  a2, 1,
1132  a3, 1 );
1133  }
1134  }
1135  //for ( k = 0; k < n_left; k += 1, g -= 1 )
1136  if ( n_left == 1 )
1137  {
1138  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1139  a2 = buff_A + (g )*cs_A;
1140  a3 = buff_A + (g + 1)*cs_A;
1141 
1142  gamma23 = g23->real;
1143  sigma23 = g23->imag;
1144 
1145  is_ident23 = ( gamma23 == one && sigma23 == zero );
1146 
1147  if ( !is_ident23 )
1148  MAC_Apply_G_mx2_asz( m_A,
1149  &gamma23,
1150  &sigma23,
1151  a2, 1,
1152  a3, 1 );
1153  }
1154  }
1155 
1156  // Shutdown stage
1157 
1158  for ( j = 1; j < k_G; ++j )
1159  {
1160  nG_app = k_G - j;
1161  n_iter = nG_app / n_fuse;
1162  n_left = nG_app % n_fuse;
1163 
1164  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1165  {
1166  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1167  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1168  a1 = buff_A + (g - 1)*cs_A;
1169  a2 = buff_A + (g )*cs_A;
1170  a3 = buff_A + (g + 1)*cs_A;
1171 
1172  gamma12 = g12->real;
1173  sigma12 = g12->imag;
1174  gamma23 = g23->real;
1175  sigma23 = g23->imag;
1176 
1177  is_ident12 = ( gamma12 == one && sigma12 == zero );
1178  is_ident23 = ( gamma23 == one && sigma23 == zero );
1179 
1180  if ( !is_ident12 && is_ident23 )
1181  {
1182  // Apply only to columns 1 and 2.
1183 
1184  MAC_Apply_G_mx2_asz( m_A,
1185  &gamma12,
1186  &sigma12,
1187  a1, 1,
1188  a2, 1 );
1189  }
1190  else if ( is_ident12 && !is_ident23 )
1191  {
1192  // Apply only to columns 2 and 3.
1193 
1194  MAC_Apply_G_mx2_asz( m_A,
1195  &gamma23,
1196  &sigma23,
1197  a2, 1,
1198  a3, 1 );
1199  }
1200  else if ( !is_ident12 && !is_ident23 )
1201  {
1202  // Apply to all three columns.
1203 
1204  MAC_Apply_G_mx3b_asz( m_A,
1205  &gamma12,
1206  &sigma12,
1207  &gamma23,
1208  &sigma23,
1209  a1, 1,
1210  a2, 1,
1211  a3, 1 );
1212  }
1213  }
1214  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
1215  if ( n_left == 1 )
1216  {
1217  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1218  a2 = buff_A + (g )*cs_A;
1219  a3 = buff_A + (g + 1)*cs_A;
1220 
1221  gamma23 = g23->real;
1222  sigma23 = g23->imag;
1223 
1224  is_ident23 = ( gamma23 == one && sigma23 == zero );
1225 
1226  if ( !is_ident23 )
1227  MAC_Apply_G_mx2_asz( m_A,
1228  &gamma23,
1229  &sigma23,
1230  a2, 1,
1231  a3, 1 );
1232  }
1233  }
1234 
1235  return FLA_SUCCESS;
1236 }
FLA_Error FLA_Apply_G_rf_asz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:267
double imag
Definition: blis_type_defs.h:139
double bl1_d0(void)
Definition: bl1_constants.c:118
double real
Definition: blis_type_defs.h:139
int n_left
Definition: bl1_axmyv2.c:149
int i
Definition: bl1_axmyv2.c:145
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:137