libflame  revision_anchor
Functions
FLA_Apply_G_rf_opt_var9.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_opt_var9 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_opc_var9()

FLA_Error FLA_Apply_G_rf_opc_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var9().

652 {
653  float one = bl1_s1();
654  float zero = bl1_s0();
655  float gamma12;
656  float sigma12;
657  float gamma23;
658  float sigma23;
659  scomplex* a1;
660  scomplex* a2;
661  scomplex* a3;
662  scomplex* g12;
663  scomplex* g23;
664  int i, j, g, k;
665  int nG, nG_app;
666  int n_iter;
667  int n_left;
668  int k_minus_1;
669  int n_fuse;
670  int is_ident12, is_ident23;
671 
672  k_minus_1 = k_G - 1;
673  nG = n_A - 1;
674  n_fuse = 2;
675 
676  // Use the simple variant for nG < (k - 1) or k == 1.
677  if ( nG < 2*k_minus_1 || k_G == 1 )
678  {
680  m_A,
681  n_A,
682  buff_G, rs_G, cs_G,
683  buff_A, rs_A, cs_A );
684  return FLA_SUCCESS;
685  }
686 
687 
688  // Start-up phase.
689 
690  for ( j = -1; j < k_minus_1; j += n_fuse )
691  {
692  nG_app = j + 1;
693  n_iter = nG_app;
694  n_left = 1;
695 
696  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
697  {
698  g12 = buff_G + (g )*rs_G + (k )*cs_G;
699  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
700  a1 = buff_A + (g )*cs_A;
701  a2 = buff_A + (g + 1)*cs_A;
702  a3 = buff_A + (g + 2)*cs_A;
703 
704  gamma12 = g12->real;
705  sigma12 = g12->imag;
706  gamma23 = g23->real;
707  sigma23 = g23->imag;
708 
709  is_ident12 = ( gamma12 == one && sigma12 == zero );
710  is_ident23 = ( gamma23 == one && sigma23 == zero );
711 
712  if ( !is_ident12 && is_ident23 )
713  {
714  // Apply only to columns 1 and 2.
715 
716  MAC_Apply_G_mx2_opc( m_A,
717  &gamma12,
718  &sigma12,
719  a1, rs_A,
720  a2, rs_A );
721  }
722  else if ( is_ident12 && !is_ident23 )
723  {
724  // Apply only to columns 2 and 3.
725 
726  MAC_Apply_G_mx2_opc( m_A,
727  &gamma23,
728  &sigma23,
729  a2, rs_A,
730  a3, rs_A );
731  }
732  else if ( !is_ident12 && !is_ident23 )
733  {
734  // Apply to all three columns.
735 
736  MAC_Apply_G_mx3_opc( m_A,
737  &gamma12,
738  &sigma12,
739  &gamma23,
740  &sigma23,
741  a1, rs_A,
742  a2, rs_A,
743  a3, rs_A );
744  }
745  }
746 
747  if ( n_left == 1 )
748  {
749  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
750  a2 = buff_A + (g + 1)*cs_A;
751  a3 = buff_A + (g + 2)*cs_A;
752 
753  gamma23 = g23->real;
754  sigma23 = g23->imag;
755 
756  is_ident23 = ( gamma23 == one && sigma23 == zero );
757 
758  if ( !is_ident23 )
759  MAC_Apply_G_mx2_opc( m_A,
760  &gamma23,
761  &sigma23,
762  a2, rs_A,
763  a3, rs_A );
764  }
765  }
766 
767  // Pipeline stage
768 
769  for ( ; j < nG - 1; j += n_fuse )
770  {
771  nG_app = k_G;
772  n_iter = nG_app;
773  n_left = 0;
774 
775  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
776  {
777  g12 = buff_G + (g )*rs_G + (k )*cs_G;
778  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
779  a1 = buff_A + (g )*cs_A;
780  a2 = buff_A + (g + 1)*cs_A;
781  a3 = buff_A + (g + 2)*cs_A;
782 
783  gamma12 = g12->real;
784  sigma12 = g12->imag;
785  gamma23 = g23->real;
786  sigma23 = g23->imag;
787 
788  is_ident12 = ( gamma12 == one && sigma12 == zero );
789  is_ident23 = ( gamma23 == one && sigma23 == zero );
790 
791  if ( !is_ident12 && is_ident23 )
792  {
793  // Apply only to columns 1 and 2.
794 
795  MAC_Apply_G_mx2_opc( m_A,
796  &gamma12,
797  &sigma12,
798  a1, rs_A,
799  a2, rs_A );
800  }
801  else if ( is_ident12 && !is_ident23 )
802  {
803  // Apply only to columns 2 and 3.
804 
805  MAC_Apply_G_mx2_opc( m_A,
806  &gamma23,
807  &sigma23,
808  a2, rs_A,
809  a3, rs_A );
810  }
811  else if ( !is_ident12 && !is_ident23 )
812  {
813  // Apply to all three columns.
814 
815  MAC_Apply_G_mx3_opc( m_A,
816  &gamma12,
817  &sigma12,
818  &gamma23,
819  &sigma23,
820  a1, rs_A,
821  a2, rs_A,
822  a3, rs_A );
823  }
824  }
825  }
826 
827  // Shutdown stage
828 
829  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
830  {
831  g = nG - 1;
832  k = j;
833 
834  n_left = 1;
835  if ( n_left == 1 )
836  {
837  g12 = buff_G + (g )*rs_G + (k )*cs_G;
838  a1 = buff_A + (g )*cs_A;
839  a2 = buff_A + (g + 1)*cs_A;
840 
841  gamma12 = g12->real;
842  sigma12 = g12->imag;
843 
844  is_ident12 = ( gamma12 == one && sigma12 == zero );
845 
846  if ( !is_ident12 )
847  MAC_Apply_G_mx2_opc( m_A,
848  &gamma12,
849  &sigma12,
850  a1, rs_A,
851  a2, rs_A );
852  ++k;
853  --g;
854  }
855 
856  nG_app = k_minus_1 - j;
857  n_iter = nG_app;
858 
859  for ( i = 0; i < n_iter; ++i, ++k, --g )
860  {
861  g12 = buff_G + (g )*rs_G + (k )*cs_G;
862  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
863  a1 = buff_A + (g )*cs_A;
864  a2 = buff_A + (g + 1)*cs_A;
865  a3 = buff_A + (g + 2)*cs_A;
866 
867  gamma12 = g12->real;
868  sigma12 = g12->imag;
869  gamma23 = g23->real;
870  sigma23 = g23->imag;
871 
872  is_ident12 = ( gamma12 == one && sigma12 == zero );
873  is_ident23 = ( gamma23 == one && sigma23 == zero );
874 
875  if ( !is_ident12 && is_ident23 )
876  {
877  // Apply only to columns 1 and 2.
878 
879  MAC_Apply_G_mx2_opc( m_A,
880  &gamma12,
881  &sigma12,
882  a1, rs_A,
883  a2, rs_A );
884  }
885  else if ( is_ident12 && !is_ident23 )
886  {
887  // Apply only to columns 2 and 3.
888 
889  MAC_Apply_G_mx2_opc( m_A,
890  &gamma23,
891  &sigma23,
892  a2, rs_A,
893  a3, rs_A );
894  }
895  else if ( !is_ident12 && !is_ident23 )
896  {
897  // Apply to all three columns.
898 
899  MAC_Apply_G_mx3_opc( m_A,
900  &gamma12,
901  &sigma12,
902  &gamma23,
903  &sigma23,
904  a1, rs_A,
905  a2, rs_A,
906  a3, rs_A );
907  }
908  }
909  }
910 
911  return FLA_SUCCESS;
912 }
float real
Definition: blis_type_defs.h:134
float bl1_s1(void)
Definition: bl1_constants.c:47
int n_left
Definition: bl1_axmyv2.c:149
Definition: blis_type_defs.h:132
FLA_Error FLA_Apply_G_rf_opc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:215
int i
Definition: bl1_axmyv2.c:145
float bl1_s0(void)
Definition: bl1_constants.c:111
float imag
Definition: blis_type_defs.h:134

◆ FLA_Apply_G_rf_opd_var9()

FLA_Error FLA_Apply_G_rf_opd_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var9().

385 {
386  double one = bl1_d1();
387  double zero = bl1_d0();
388  double gamma12;
389  double sigma12;
390  double gamma23;
391  double sigma23;
392  double* a1;
393  double* a2;
394  double* a3;
395  dcomplex* g12;
396  dcomplex* g23;
397  int i, j, g, k;
398  int nG, nG_app;
399  int n_iter;
400  int n_left;
401  int k_minus_1;
402  int n_fuse;
403  int is_ident12, is_ident23;
404 
405  k_minus_1 = k_G - 1;
406  nG = n_A - 1;
407  n_fuse = 2;
408 
409  // Use the simple variant for nG < (k - 1) or k == 1.
410  if ( nG < 2*k_minus_1 || k_G == 1 )
411  {
413  m_A,
414  n_A,
415  buff_G, rs_G, cs_G,
416  buff_A, rs_A, cs_A );
417  return FLA_SUCCESS;
418  }
419 
420 
421  // Start-up phase.
422 
423  for ( j = -1; j < k_minus_1; j += n_fuse )
424  {
425  nG_app = j + 1;
426  n_iter = nG_app;
427  n_left = 1;
428 
429  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
430  {
431  g12 = buff_G + (g )*rs_G + (k )*cs_G;
432  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
433  a1 = buff_A + (g )*cs_A;
434  a2 = buff_A + (g + 1)*cs_A;
435  a3 = buff_A + (g + 2)*cs_A;
436 
437  gamma12 = g12->real;
438  sigma12 = g12->imag;
439  gamma23 = g23->real;
440  sigma23 = g23->imag;
441 
442  is_ident12 = ( gamma12 == one && sigma12 == zero );
443  is_ident23 = ( gamma23 == one && sigma23 == zero );
444 
445  if ( !is_ident12 && is_ident23 )
446  {
447  // Apply only to columns 1 and 2.
448 
449  MAC_Apply_G_mx2_opd( m_A,
450  &gamma12,
451  &sigma12,
452  a1, rs_A,
453  a2, rs_A );
454  }
455  else if ( is_ident12 && !is_ident23 )
456  {
457  // Apply only to columns 2 and 3.
458 
459  MAC_Apply_G_mx2_opd( m_A,
460  &gamma23,
461  &sigma23,
462  a2, rs_A,
463  a3, rs_A );
464  }
465  else if ( !is_ident12 && !is_ident23 )
466  {
467  // Apply to all three columns.
468 
469  MAC_Apply_G_mx3_opd( m_A,
470  &gamma12,
471  &sigma12,
472  &gamma23,
473  &sigma23,
474  a1, rs_A,
475  a2, rs_A,
476  a3, rs_A );
477  }
478  }
479 
480  if ( n_left == 1 )
481  {
482  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
483  a2 = buff_A + (g + 1)*cs_A;
484  a3 = buff_A + (g + 2)*cs_A;
485 
486  gamma23 = g23->real;
487  sigma23 = g23->imag;
488 
489  is_ident23 = ( gamma23 == one && sigma23 == zero );
490 
491  if ( !is_ident23 )
492  MAC_Apply_G_mx2_opd( m_A,
493  &gamma23,
494  &sigma23,
495  a2, rs_A,
496  a3, rs_A );
497  }
498  }
499 
500  // Pipeline stage
501 
502  for ( ; j < nG - 1; j += n_fuse )
503  {
504  nG_app = k_G;
505  n_iter = nG_app;
506  n_left = 0;
507 
508  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
509  {
510  g12 = buff_G + (g )*rs_G + (k )*cs_G;
511  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
512  a1 = buff_A + (g )*cs_A;
513  a2 = buff_A + (g + 1)*cs_A;
514  a3 = buff_A + (g + 2)*cs_A;
515 
516  gamma12 = g12->real;
517  sigma12 = g12->imag;
518  gamma23 = g23->real;
519  sigma23 = g23->imag;
520 
521  is_ident12 = ( gamma12 == one && sigma12 == zero );
522  is_ident23 = ( gamma23 == one && sigma23 == zero );
523 
524  if ( !is_ident12 && is_ident23 )
525  {
526  // Apply only to columns 1 and 2.
527 
528  MAC_Apply_G_mx2_opd( m_A,
529  &gamma12,
530  &sigma12,
531  a1, rs_A,
532  a2, rs_A );
533  }
534  else if ( is_ident12 && !is_ident23 )
535  {
536  // Apply only to columns 2 and 3.
537 
538  MAC_Apply_G_mx2_opd( m_A,
539  &gamma23,
540  &sigma23,
541  a2, rs_A,
542  a3, rs_A );
543  }
544  else if ( !is_ident12 && !is_ident23 )
545  {
546  // Apply to all three columns.
547 
548  MAC_Apply_G_mx3_opd( m_A,
549  &gamma12,
550  &sigma12,
551  &gamma23,
552  &sigma23,
553  a1, rs_A,
554  a2, rs_A,
555  a3, rs_A );
556  }
557  }
558  }
559 
560  // Shutdown stage
561 
562  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
563  {
564  g = nG - 1;
565  k = j;
566 
567  n_left = 1;
568  if ( n_left == 1 )
569  {
570  g12 = buff_G + (g )*rs_G + (k )*cs_G;
571  a1 = buff_A + (g )*cs_A;
572  a2 = buff_A + (g + 1)*cs_A;
573 
574  gamma12 = g12->real;
575  sigma12 = g12->imag;
576 
577  is_ident12 = ( gamma12 == one && sigma12 == zero );
578 
579  if ( !is_ident12 )
580  MAC_Apply_G_mx2_opd( m_A,
581  &gamma12,
582  &sigma12,
583  a1, rs_A,
584  a2, rs_A );
585  ++k;
586  --g;
587  }
588 
589  nG_app = k_minus_1 - j;
590  n_iter = nG_app;
591 
592  for ( i = 0; i < n_iter; ++i, ++k, --g )
593  {
594  g12 = buff_G + (g )*rs_G + (k )*cs_G;
595  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
596  a1 = buff_A + (g )*cs_A;
597  a2 = buff_A + (g + 1)*cs_A;
598  a3 = buff_A + (g + 2)*cs_A;
599 
600  gamma12 = g12->real;
601  sigma12 = g12->imag;
602  gamma23 = g23->real;
603  sigma23 = g23->imag;
604 
605  is_ident12 = ( gamma12 == one && sigma12 == zero );
606  is_ident23 = ( gamma23 == one && sigma23 == zero );
607 
608  if ( !is_ident12 && is_ident23 )
609  {
610  // Apply only to columns 1 and 2.
611 
612  MAC_Apply_G_mx2_opd( m_A,
613  &gamma12,
614  &sigma12,
615  a1, rs_A,
616  a2, rs_A );
617  }
618  else if ( is_ident12 && !is_ident23 )
619  {
620  // Apply only to columns 2 and 3.
621 
622  MAC_Apply_G_mx2_opd( m_A,
623  &gamma23,
624  &sigma23,
625  a2, rs_A,
626  a3, rs_A );
627  }
628  else if ( !is_ident12 && !is_ident23 )
629  {
630  // Apply to all three columns.
631 
632  MAC_Apply_G_mx3_opd( m_A,
633  &gamma12,
634  &sigma12,
635  &gamma23,
636  &sigma23,
637  a1, rs_A,
638  a2, rs_A,
639  a3, rs_A );
640  }
641  }
642  }
643 
644  return FLA_SUCCESS;
645 }
double imag
Definition: blis_type_defs.h:139
double bl1_d0(void)
Definition: bl1_constants.c:118
double real
Definition: blis_type_defs.h:139
int n_left
Definition: bl1_axmyv2.c:149
int i
Definition: bl1_axmyv2.c:145
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:137
FLA_Error FLA_Apply_G_rf_opd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:164

◆ FLA_Apply_G_rf_ops_var9()

FLA_Error FLA_Apply_G_rf_ops_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var9().

118 {
119  float one = bl1_s1();
120  float zero = bl1_s0();
121  float gamma12;
122  float sigma12;
123  float gamma23;
124  float sigma23;
125  float* a1;
126  float* a2;
127  float* a3;
128  scomplex* g12;
129  scomplex* g23;
130  int i, j, g, k;
131  int nG, nG_app;
132  int n_iter;
133  int n_left;
134  int k_minus_1;
135  int n_fuse;
136  int is_ident12, is_ident23;
137 
138  k_minus_1 = k_G - 1;
139  nG = n_A - 1;
140  n_fuse = 2;
141 
142  // Use the simple variant for nG < (k - 1) or k == 1.
143  if ( nG < 2*k_minus_1 || k_G == 1 )
144  {
146  m_A,
147  n_A,
148  buff_G, rs_G, cs_G,
149  buff_A, rs_A, cs_A );
150  return FLA_SUCCESS;
151  }
152 
153 
154  // Start-up phase.
155 
156  for ( j = -1; j < k_minus_1; j += n_fuse )
157  {
158  nG_app = j + 1;
159  n_iter = nG_app;
160  n_left = 1;
161 
162  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
163  {
164  g12 = buff_G + (g )*rs_G + (k )*cs_G;
165  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
166  a1 = buff_A + (g )*cs_A;
167  a2 = buff_A + (g + 1)*cs_A;
168  a3 = buff_A + (g + 2)*cs_A;
169 
170  gamma12 = g12->real;
171  sigma12 = g12->imag;
172  gamma23 = g23->real;
173  sigma23 = g23->imag;
174 
175  is_ident12 = ( gamma12 == one && sigma12 == zero );
176  is_ident23 = ( gamma23 == one && sigma23 == zero );
177 
178  if ( !is_ident12 && is_ident23 )
179  {
180  // Apply only to columns 1 and 2.
181 
182  MAC_Apply_G_mx2_ops( m_A,
183  &gamma12,
184  &sigma12,
185  a1, rs_A,
186  a2, rs_A );
187  }
188  else if ( is_ident12 && !is_ident23 )
189  {
190  // Apply only to columns 2 and 3.
191 
192  MAC_Apply_G_mx2_ops( m_A,
193  &gamma23,
194  &sigma23,
195  a2, rs_A,
196  a3, rs_A );
197  }
198  else if ( !is_ident12 && !is_ident23 )
199  {
200  // Apply to all three columns.
201 
202  MAC_Apply_G_mx3_ops( m_A,
203  &gamma12,
204  &sigma12,
205  &gamma23,
206  &sigma23,
207  a1, rs_A,
208  a2, rs_A,
209  a3, rs_A );
210  }
211  }
212 
213  if ( n_left == 1 )
214  {
215  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
216  a2 = buff_A + (g + 1)*cs_A;
217  a3 = buff_A + (g + 2)*cs_A;
218 
219  gamma23 = g23->real;
220  sigma23 = g23->imag;
221 
222  is_ident23 = ( gamma23 == one && sigma23 == zero );
223 
224  if ( !is_ident23 )
225  MAC_Apply_G_mx2_ops( m_A,
226  &gamma23,
227  &sigma23,
228  a2, rs_A,
229  a3, rs_A );
230  }
231  }
232 
233  // Pipeline stage
234 
235  for ( ; j < nG - 1; j += n_fuse )
236  {
237  nG_app = k_G;
238  n_iter = nG_app;
239  n_left = 0;
240 
241  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
242  {
243  g12 = buff_G + (g )*rs_G + (k )*cs_G;
244  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
245  a1 = buff_A + (g )*cs_A;
246  a2 = buff_A + (g + 1)*cs_A;
247  a3 = buff_A + (g + 2)*cs_A;
248 
249  gamma12 = g12->real;
250  sigma12 = g12->imag;
251  gamma23 = g23->real;
252  sigma23 = g23->imag;
253 
254  is_ident12 = ( gamma12 == one && sigma12 == zero );
255  is_ident23 = ( gamma23 == one && sigma23 == zero );
256 
257  if ( !is_ident12 && is_ident23 )
258  {
259  // Apply only to columns 1 and 2.
260 
261  MAC_Apply_G_mx2_ops( m_A,
262  &gamma12,
263  &sigma12,
264  a1, rs_A,
265  a2, rs_A );
266  }
267  else if ( is_ident12 && !is_ident23 )
268  {
269  // Apply only to columns 2 and 3.
270 
271  MAC_Apply_G_mx2_ops( m_A,
272  &gamma23,
273  &sigma23,
274  a2, rs_A,
275  a3, rs_A );
276  }
277  else if ( !is_ident12 && !is_ident23 )
278  {
279  // Apply to all three columns.
280 
281  MAC_Apply_G_mx3_ops( m_A,
282  &gamma12,
283  &sigma12,
284  &gamma23,
285  &sigma23,
286  a1, rs_A,
287  a2, rs_A,
288  a3, rs_A );
289  }
290  }
291  }
292 
293  // Shutdown stage
294 
295  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
296  {
297  g = nG - 1;
298  k = j;
299 
300  n_left = 1;
301  if ( n_left == 1 )
302  {
303  g12 = buff_G + (g )*rs_G + (k )*cs_G;
304  a1 = buff_A + (g )*cs_A;
305  a2 = buff_A + (g + 1)*cs_A;
306 
307  gamma12 = g12->real;
308  sigma12 = g12->imag;
309 
310  is_ident12 = ( gamma12 == one && sigma12 == zero );
311 
312  if ( !is_ident12 )
313  MAC_Apply_G_mx2_ops( m_A,
314  &gamma12,
315  &sigma12,
316  a1, rs_A,
317  a2, rs_A );
318  ++k;
319  --g;
320  }
321 
322  nG_app = k_minus_1 - j;
323  n_iter = nG_app;
324 
325  for ( i = 0; i < n_iter; ++i, ++k, --g )
326  {
327  g12 = buff_G + (g )*rs_G + (k )*cs_G;
328  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
329  a1 = buff_A + (g )*cs_A;
330  a2 = buff_A + (g + 1)*cs_A;
331  a3 = buff_A + (g + 2)*cs_A;
332 
333  gamma12 = g12->real;
334  sigma12 = g12->imag;
335  gamma23 = g23->real;
336  sigma23 = g23->imag;
337 
338  is_ident12 = ( gamma12 == one && sigma12 == zero );
339  is_ident23 = ( gamma23 == one && sigma23 == zero );
340 
341  if ( !is_ident12 && is_ident23 )
342  {
343  // Apply only to columns 1 and 2.
344 
345  MAC_Apply_G_mx2_ops( m_A,
346  &gamma12,
347  &sigma12,
348  a1, rs_A,
349  a2, rs_A );
350  }
351  else if ( is_ident12 && !is_ident23 )
352  {
353  // Apply only to columns 2 and 3.
354 
355  MAC_Apply_G_mx2_ops( m_A,
356  &gamma23,
357  &sigma23,
358  a2, rs_A,
359  a3, rs_A );
360  }
361  else if ( !is_ident12 && !is_ident23 )
362  {
363  // Apply to all three columns.
364 
365  MAC_Apply_G_mx3_ops( m_A,
366  &gamma12,
367  &sigma12,
368  &gamma23,
369  &sigma23,
370  a1, rs_A,
371  a2, rs_A,
372  a3, rs_A );
373  }
374  }
375  }
376 
377  return FLA_SUCCESS;
378 }
float real
Definition: blis_type_defs.h:134
float bl1_s1(void)
Definition: bl1_constants.c:47
int n_left
Definition: bl1_axmyv2.c:149
FLA_Error FLA_Apply_G_rf_ops_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:113
Definition: blis_type_defs.h:132
int i
Definition: bl1_axmyv2.c:145
float bl1_s0(void)
Definition: bl1_constants.c:111
float imag
Definition: blis_type_defs.h:134

◆ FLA_Apply_G_rf_opt_var9()

FLA_Error FLA_Apply_G_rf_opt_var9 ( FLA_Obj  G,
FLA_Obj  A 
)

References FLA_Apply_G_rf_opc_var9(), FLA_Apply_G_rf_opd_var9(), FLA_Apply_G_rf_ops_var9(), FLA_Apply_G_rf_opz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
FLA_Error FLA_Apply_G_rf_opz_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var9.c:914
FLA_Error FLA_Apply_G_rf_ops_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var9.c:113
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Apply_G_rf_opc_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var9.c:647
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
FLA_Error FLA_Apply_G_rf_opd_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var9.c:380
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137

◆ FLA_Apply_G_rf_opz_var9()

FLA_Error FLA_Apply_G_rf_opz_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var9().

919 {
920  double one = bl1_d1();
921  double zero = bl1_d0();
922  double gamma12;
923  double sigma12;
924  double gamma23;
925  double sigma23;
926  dcomplex* a1;
927  dcomplex* a2;
928  dcomplex* a3;
929  dcomplex* g12;
930  dcomplex* g23;
931  int i, j, g, k;
932  int nG, nG_app;
933  int n_iter;
934  int n_left;
935  int k_minus_1;
936  int n_fuse;
937  int is_ident12, is_ident23;
938 
939  k_minus_1 = k_G - 1;
940  nG = n_A - 1;
941  n_fuse = 2;
942 
943  // Use the simple variant for nG < (k - 1) or k == 1.
944  if ( nG < 2*k_minus_1 || k_G == 1 )
945  {
947  m_A,
948  n_A,
949  buff_G, rs_G, cs_G,
950  buff_A, rs_A, cs_A );
951  return FLA_SUCCESS;
952  }
953 
954 
955  // Start-up phase.
956 
957  for ( j = -1; j < k_minus_1; j += n_fuse )
958  {
959  nG_app = j + 1;
960  n_iter = nG_app;
961  n_left = 1;
962 
963  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
964  {
965  g12 = buff_G + (g )*rs_G + (k )*cs_G;
966  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
967  a1 = buff_A + (g )*cs_A;
968  a2 = buff_A + (g + 1)*cs_A;
969  a3 = buff_A + (g + 2)*cs_A;
970 
971  gamma12 = g12->real;
972  sigma12 = g12->imag;
973  gamma23 = g23->real;
974  sigma23 = g23->imag;
975 
976  is_ident12 = ( gamma12 == one && sigma12 == zero );
977  is_ident23 = ( gamma23 == one && sigma23 == zero );
978 
979  if ( !is_ident12 && is_ident23 )
980  {
981  // Apply only to columns 1 and 2.
982 
983  MAC_Apply_G_mx2_opz( m_A,
984  &gamma12,
985  &sigma12,
986  a1, rs_A,
987  a2, rs_A );
988  }
989  else if ( is_ident12 && !is_ident23 )
990  {
991  // Apply only to columns 2 and 3.
992 
993  MAC_Apply_G_mx2_opz( m_A,
994  &gamma23,
995  &sigma23,
996  a2, rs_A,
997  a3, rs_A );
998  }
999  else if ( !is_ident12 && !is_ident23 )
1000  {
1001  // Apply to all three columns.
1002 
1003  MAC_Apply_G_mx3_opz( m_A,
1004  &gamma12,
1005  &sigma12,
1006  &gamma23,
1007  &sigma23,
1008  a1, rs_A,
1009  a2, rs_A,
1010  a3, rs_A );
1011  }
1012  }
1013 
1014  if ( n_left == 1 )
1015  {
1016  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1017  a2 = buff_A + (g + 1)*cs_A;
1018  a3 = buff_A + (g + 2)*cs_A;
1019 
1020  gamma23 = g23->real;
1021  sigma23 = g23->imag;
1022 
1023  is_ident23 = ( gamma23 == one && sigma23 == zero );
1024 
1025  if ( !is_ident23 )
1026  MAC_Apply_G_mx2_opz( m_A,
1027  &gamma23,
1028  &sigma23,
1029  a2, rs_A,
1030  a3, rs_A );
1031  }
1032  }
1033 
1034  // Pipeline stage
1035 
1036  for ( ; j < nG - 1; j += n_fuse )
1037  {
1038  nG_app = k_G;
1039  n_iter = nG_app;
1040  n_left = 0;
1041 
1042  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
1043  {
1044  g12 = buff_G + (g )*rs_G + (k )*cs_G;
1045  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1046  a1 = buff_A + (g )*cs_A;
1047  a2 = buff_A + (g + 1)*cs_A;
1048  a3 = buff_A + (g + 2)*cs_A;
1049 
1050  gamma12 = g12->real;
1051  sigma12 = g12->imag;
1052  gamma23 = g23->real;
1053  sigma23 = g23->imag;
1054 
1055  is_ident12 = ( gamma12 == one && sigma12 == zero );
1056  is_ident23 = ( gamma23 == one && sigma23 == zero );
1057 
1058  if ( !is_ident12 && is_ident23 )
1059  {
1060  // Apply only to columns 1 and 2.
1061 
1062  MAC_Apply_G_mx2_opz( m_A,
1063  &gamma12,
1064  &sigma12,
1065  a1, rs_A,
1066  a2, rs_A );
1067  }
1068  else if ( is_ident12 && !is_ident23 )
1069  {
1070  // Apply only to columns 2 and 3.
1071 
1072  MAC_Apply_G_mx2_opz( m_A,
1073  &gamma23,
1074  &sigma23,
1075  a2, rs_A,
1076  a3, rs_A );
1077  }
1078  else if ( !is_ident12 && !is_ident23 )
1079  {
1080  // Apply to all three columns.
1081 
1082  MAC_Apply_G_mx3_opz( m_A,
1083  &gamma12,
1084  &sigma12,
1085  &gamma23,
1086  &sigma23,
1087  a1, rs_A,
1088  a2, rs_A,
1089  a3, rs_A );
1090  }
1091  }
1092  }
1093 
1094  // Shutdown stage
1095 
1096  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1097  {
1098  g = nG - 1;
1099  k = j;
1100 
1101  n_left = 1;
1102  if ( n_left == 1 )
1103  {
1104  g12 = buff_G + (g )*rs_G + (k )*cs_G;
1105  a1 = buff_A + (g )*cs_A;
1106  a2 = buff_A + (g + 1)*cs_A;
1107 
1108  gamma12 = g12->real;
1109  sigma12 = g12->imag;
1110 
1111  is_ident12 = ( gamma12 == one && sigma12 == zero );
1112 
1113  if ( !is_ident12 )
1114  MAC_Apply_G_mx2_opz( m_A,
1115  &gamma12,
1116  &sigma12,
1117  a1, rs_A,
1118  a2, rs_A );
1119  ++k;
1120  --g;
1121  }
1122 
1123  nG_app = k_minus_1 - j;
1124  n_iter = nG_app;
1125 
1126  for ( i = 0; i < n_iter; ++i, ++k, --g )
1127  {
1128  g12 = buff_G + (g )*rs_G + (k )*cs_G;
1129  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1130  a1 = buff_A + (g )*cs_A;
1131  a2 = buff_A + (g + 1)*cs_A;
1132  a3 = buff_A + (g + 2)*cs_A;
1133 
1134  gamma12 = g12->real;
1135  sigma12 = g12->imag;
1136  gamma23 = g23->real;
1137  sigma23 = g23->imag;
1138 
1139  is_ident12 = ( gamma12 == one && sigma12 == zero );
1140  is_ident23 = ( gamma23 == one && sigma23 == zero );
1141 
1142  if ( !is_ident12 && is_ident23 )
1143  {
1144  // Apply only to columns 1 and 2.
1145 
1146  MAC_Apply_G_mx2_opz( m_A,
1147  &gamma12,
1148  &sigma12,
1149  a1, rs_A,
1150  a2, rs_A );
1151  }
1152  else if ( is_ident12 && !is_ident23 )
1153  {
1154  // Apply only to columns 2 and 3.
1155 
1156  MAC_Apply_G_mx2_opz( m_A,
1157  &gamma23,
1158  &sigma23,
1159  a2, rs_A,
1160  a3, rs_A );
1161  }
1162  else if ( !is_ident12 && !is_ident23 )
1163  {
1164  // Apply to all three columns.
1165 
1166  MAC_Apply_G_mx3_opz( m_A,
1167  &gamma12,
1168  &sigma12,
1169  &gamma23,
1170  &sigma23,
1171  a1, rs_A,
1172  a2, rs_A,
1173  a3, rs_A );
1174  }
1175  }
1176  }
1177 
1178  return FLA_SUCCESS;
1179 }
double imag
Definition: blis_type_defs.h:139
double bl1_d0(void)
Definition: bl1_constants.c:118
FLA_Error FLA_Apply_G_rf_opz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:267
double real
Definition: blis_type_defs.h:139
int n_left
Definition: bl1_axmyv2.c:149
int i
Definition: bl1_axmyv2.c:145
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:137