libflame  revision_anchor
FLA_Apply_G_mx3b_opt.h
Go to the documentation of this file.
1 /*
2 
3  Copyright (C) 2014, The University of Texas at Austin
4 
5  This file is part of libflame and is available under the 3-Clause
6  BSD license, which can be found in the LICENSE file at the top-level
7  directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #define MAC_Apply_G_mx3b_ops( m_A, \
12  gamma12, \
13  sigma12, \
14  gamma23, \
15  sigma23, \
16  a1, inc_a1, \
17  a2, inc_a2, \
18  a3, inc_a3 ) \
19 { \
20  float ga12 = *gamma12; \
21  float si12 = *sigma12; \
22  float ga23 = *gamma23; \
23  float si23 = *sigma23; \
24  float* restrict alpha1 = a1; \
25  float* restrict alpha2 = a2; \
26  float* restrict alpha3 = a3; \
27  float temp1; \
28  float temp2; \
29  float temp3; \
30  int i; \
31 \
32  for ( i = 0; i < m_A; ++i ) \
33  { \
34  temp2 = *alpha2; \
35  temp3 = *alpha3; \
36 \
37  *alpha2 = temp2 * ga23 + temp3 * si23; \
38  *alpha3 = temp3 * ga23 - temp2 * si23; \
39 \
40  temp1 = *alpha1; \
41  temp2 = *alpha2; \
42 \
43  *alpha1 = temp1 * ga12 + temp2 * si12; \
44  *alpha2 = temp2 * ga12 - temp1 * si12; \
45 \
46  alpha1 += inc_a1; \
47  alpha2 += inc_a2; \
48  alpha3 += inc_a3; \
49  } \
50 }
51 
52 #define MAC_Apply_G_mx3b_opc( m_A, \
53  gamma12, \
54  sigma12, \
55  gamma23, \
56  sigma23, \
57  a1, inc_a1, \
58  a2, inc_a2, \
59  a3, inc_a3 ) \
60 { \
61  float ga12 = *gamma12; \
62  float si12 = *sigma12; \
63  float ga23 = *gamma23; \
64  float si23 = *sigma23; \
65  scomplex* restrict alpha1 = a1; \
66  scomplex* restrict alpha2 = a2; \
67  scomplex* restrict alpha3 = a3; \
68  scomplex temp1; \
69  scomplex temp2; \
70  scomplex temp3; \
71  int i; \
72 \
73  for ( i = 0; i < m_A; ++i ) \
74  { \
75  temp2 = *alpha2; \
76  temp3 = *alpha3; \
77 \
78  alpha2->real = ga23 * temp2.real + si23 * temp3.real; \
79  alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \
80 \
81  alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
82  alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
83 \
84  temp1 = *alpha1; \
85  temp2 = *alpha2; \
86 \
87  alpha1->real = ga12 * temp1.real + si12 * temp2.real; \
88  alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \
89 \
90  alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
91  alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
92 \
93  alpha1 += inc_a1; \
94  alpha2 += inc_a2; \
95  alpha3 += inc_a3; \
96  } \
97 }
98 
99 #define MAC_Apply_G_mx3b_opd( m_A, \
100  gamma12, \
101  sigma12, \
102  gamma23, \
103  sigma23, \
104  a1, inc_a1, \
105  a2, inc_a2, \
106  a3, inc_a3 ) \
107 { \
108  double ga12 = *gamma12; \
109  double si12 = *sigma12; \
110  double ga23 = *gamma23; \
111  double si23 = *sigma23; \
112  double* restrict alpha1 = a1; \
113  double* restrict alpha2 = a2; \
114  double* restrict alpha3 = a3; \
115  double temp1; \
116  double temp2; \
117  double temp3; \
118  int i; \
119 \
120  for ( i = 0; i < m_A; ++i ) \
121  { \
122  temp2 = *alpha2; \
123  temp3 = *alpha3; \
124 \
125  *alpha2 = temp2 * ga23 + temp3 * si23; \
126  *alpha3 = temp3 * ga23 - temp2 * si23; \
127 \
128  temp1 = *alpha1; \
129  temp2 = *alpha2; \
130 \
131  *alpha1 = temp1 * ga12 + temp2 * si12; \
132  *alpha2 = temp2 * ga12 - temp1 * si12; \
133 \
134  alpha1 += inc_a1; \
135  alpha2 += inc_a2; \
136  alpha3 += inc_a3; \
137  } \
138 }
139 
140 #define MAC_Apply_G_mx3b_opz( m_A, \
141  gamma12, \
142  sigma12, \
143  gamma23, \
144  sigma23, \
145  a1, inc_a1, \
146  a2, inc_a2, \
147  a3, inc_a3 ) \
148 { \
149  double ga12 = *gamma12; \
150  double si12 = *sigma12; \
151  double ga23 = *gamma23; \
152  double si23 = *sigma23; \
153  dcomplex* restrict alpha1 = a1; \
154  dcomplex* restrict alpha2 = a2; \
155  dcomplex* restrict alpha3 = a3; \
156  dcomplex temp1; \
157  dcomplex temp2; \
158  dcomplex temp3; \
159  int i; \
160 \
161  for ( i = 0; i < m_A; ++i ) \
162  { \
163  temp2 = *alpha2; \
164  temp3 = *alpha3; \
165 \
166  alpha2->real = ga23 * temp2.real + si23 * temp3.real; \
167  alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \
168 \
169  alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
170  alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
171 \
172  temp1 = *alpha1; \
173  temp2 = *alpha2; \
174 \
175  alpha1->real = ga12 * temp1.real + si12 * temp2.real; \
176  alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \
177 \
178  alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
179  alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
180 \
181  alpha1 += inc_a1; \
182  alpha2 += inc_a2; \
183  alpha3 += inc_a3; \
184  } \
185 }
186