libflame  revision_anchor
blis_macro_defs.h
Go to the documentation of this file.
1 /*
2 
3  Copyright (C) 2014, The University of Texas at Austin
4 
5  This file is part of libflame and is available under the 3-Clause
6  BSD license, which can be found in the LICENSE file at the top-level
7  directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #ifndef BLIS1_MACRO_DEFS_H
12 #define BLIS1_MACRO_DEFS_H
13 
14 // --- Constants ---------------------------------------------------------------
15 
16 #define BLIS1_NO_INTRINSICS 0
17 #define BLIS1_SSE_INTRINSICS 3
18 
19 // --- boolean ---
20 
21 #undef FALSE
22 #define FALSE 0
23 
24 #undef TRUE
25 #define TRUE 1
26 
27 /*
28 // --- trans ---
29 
30 #define BLIS1_NO_TRANSPOSE 'n'
31 #define BLIS1_TRANSPOSE 't'
32 #define BLIS1_CONJ_NO_TRANSPOSE 'c'
33 #define BLIS1_CONJ_TRANSPOSE 'h'
34 
35 // --- conj ---
36 
37 #define BLIS1_NO_CONJUGATE 'n'
38 #define BLIS1_CONJUGATE 'c'
39 
40 // --- uplo ---
41 
42 #define BLIS1_LOWER_TRIANGULAR 'l'
43 #define BLIS1_UPPER_TRIANGULAR 'u'
44 
45 // --- side ---
46 
47 #define BLIS1_LEFT 'l'
48 #define BLIS1_RIGHT 'r'
49 
50 // --- diag ---
51 
52 #define BLIS1_NONUNIT_DIAG 'n'
53 #define BLIS1_UNIT_DIAG 'u'
54 #define BLIS1_ZERO_DIAG 'z'
55 */
56 
57 // --- Functional macros -------------------------------------------------------
58 
59 // --- Type-agnostic ---
60 
61 // min, max, abs
62 
63 #define bl1_min( a, b ) ( (a) < (b) ? (a) : (b) )
64 #define bl1_max( a, b ) ( (a) > (b) ? (a) : (b) )
65 #define bl1_abs( a ) ( (a) <= 0 ? -(a) : (a) )
66 
67 // fmin, fmax, fabs
68 
69 #define bl1_fmin( a, b ) bl1_min( a, b )
70 #define bl1_fmax( a, b ) bl1_max( a, b )
71 #define bl1_fabs( a ) ( (a) <= 0.0 ? -(a) : (a) )
72 
73 // fminabs, fmaxabs
74 #define bl1_fminabs( a, b ) \
75 \
76  bl1_fmin( bl1_fabs( a ), \
77  bl1_fabs( b ) )
78 
79 #define bl1_fmaxabs( a, b ) \
80 \
81  bl1_fmax( bl1_fabs( a ), \
82  bl1_fabs( b ) )
83 
84 // --- Type-dependent ---
85 
86 // --- neg1 ---
87 
88 // void bl1_sneg1( float* x );
89 #define bl1_sneg1( x ) \
90 *(x) *= -1.0F;
91 
92 // void bl1_dneg1( double* x );
93 #define bl1_dneg1( x ) \
94 *(x) *= -1.0;
95 
96 // void bl1_cneg1( scomplex* x );
97 #define bl1_cneg1( x ) \
98 (x)->real *= -1.0F; \
99 (x)->imag *= -1.0F;
100 
101 // void bl1_zneg1( dcomplex* x );
102 #define bl1_zneg1( x ) \
103 (x)->real *= -1.0; \
104 (x)->imag *= -1.0;
105 
106 // --- neg2 ---
107 
108 // void bl1_sneg2( float* x, float* y );
109 #define bl1_sneg2( x, y ) \
110 *(y) = -1.0F * *(x);
111 
112 // void bl1_dneg2( double* x, double* y );
113 #define bl1_dneg2( x, y ) \
114 *(y) = -1.0 * *(x);
115 
116 // void bl1_cneg2( scomplex* x, scomplex* y );
117 #define bl1_cneg2( x, y ) \
118 (y)->real = -1.0F * (x)->real; \
119 (y)->imag = -1.0F * (x)->imag;
120 
121 // void bl1_zneg2( dcomplex* x, dcomplex* y );
122 #define bl1_zneg2( x, y ) \
123 (y)->real = -1.0 * (x)->real; \
124 (y)->imag = -1.0 * (x)->imag;
125 
126 // --- sqrte ---
127 
128 // void bl1_ssqrte( float* alpha, int* error );
129 #define bl1_ssqrte( alpha, error ) \
130 if ( *(alpha) <= 0.0F || isnan( *(alpha) ) ) { *(error) = FLA_FAILURE; } \
131 else { *(alpha) = ( float ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; }
132 
133 // void bl1_dsqrte( double* alpha, int* error );
134 #define bl1_dsqrte( alpha, error ) \
135 if ( *(alpha) <= 0.0 || isnan( *(alpha) ) ) { *(error) = FLA_FAILURE; } \
136 else { *(alpha) = ( double ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; }
137 
138 // void bl1_csqrte( scomplex* alpha, int* error );
139 #define bl1_csqrte( alpha, error ) \
140 if ( (alpha)->real <= 0.0F || isnan( (alpha)->real) ) \
141 { *(error) = FLA_FAILURE; } \
142 else { \
143 (alpha)->real = ( float ) sqrt( (alpha)->real ); \
144 (alpha)->imag = 0.0F; *(error) = FLA_SUCCESS; }
145 
146 // void bl1_zsqrte( dcomplex* alpha, int* error );
147 #define bl1_zsqrte( alpha, error ) \
148 if ( (alpha)->real <= 0.0 || isnan( (alpha)->real) ) \
149 { *(error) = FLA_FAILURE; } \
150 else { \
151 (alpha)->real = ( double ) sqrt( (alpha)->real ); \
152 (alpha)->imag = 0.0; *(error) = FLA_SUCCESS; }
153 
154 // --- absval2 ---
155 
156 // void bl1_sabsval2( float* alpha, float* absval );
157 #define bl1_sabsval2( alpha, absval ) \
158 *(absval) = ( float ) fabs( ( double ) *(alpha) );
159 
160 // void bl1_dabsval2( double* alpha, double* absval );
161 #define bl1_dabsval2( alpha, absval ) \
162 *(absval) = fabs( *(alpha) );
163 
164 // void bl1_cabsval2( scomplex* x, scomplex* a );
165 #define bl1_cabsval2( x, a ) \
166 { \
167  float s = bl1_fmaxabs( (x)->real, (x)->imag ); \
168  float mag = sqrtf( s ) * \
169  sqrtf( ( (x)->real / s ) * (x)->real + \
170  ( (x)->imag / s ) * (x)->imag ); \
171  (a)->real = mag; \
172  (a)->imag = 0.0F; \
173 }
174 
175 // void bl1_csabsval2( scomplex* x, float* a );
176 #define bl1_csabsval2( x, a ) \
177 { \
178  float s = bl1_fmaxabs( (x)->real, (x)->imag ); \
179  float mag = sqrtf( s ) * \
180  sqrtf( ( (x)->real / s ) * (x)->real + \
181  ( (x)->imag / s ) * (x)->imag ); \
182  *(a) = mag; \
183 }
184 
185 // void bl1_zabsval2( dcomplex* x, dcomplex* a );
186 #define bl1_zabsval2( x, a ) \
187 { \
188  double s = bl1_fmaxabs( (x)->real, (x)->imag ); \
189  double mag = sqrt( s ) * \
190  sqrt( ( (x)->real / s ) * (x)->real + \
191  ( (x)->imag / s ) * (x)->imag ); \
192  (a)->real = mag; \
193  (a)->imag = 0.0; \
194 }
195 
196 // void bl1_zdabsval2( dcomplex* x, double* a );
197 #define bl1_zdabsval2( x, a ) \
198 { \
199  double s = bl1_fmaxabs( (x)->real, (x)->imag ); \
200  double mag = sqrt( s ) * \
201  sqrt( ( (x)->real / s ) * (x)->real + \
202  ( (x)->imag / s ) * (x)->imag ); \
203  *(a) = mag; \
204 }
205 
206 
207 // --- absqr ---
208 
209 // void bl1_sabsqr( float* alpha );
210 #define bl1_sabsqr( alpha ) \
211 *(alpha) = *(alpha) * *(alpha);
212 
213 // void bl1_dabsqr( double* alpha );
214 #define bl1_dabsqr( alpha ) \
215 *(alpha) = *(alpha) * *(alpha);
216 
217 // void bl1_cabsqr( scomplex* alpha );
218 #define bl1_cabsqr( alpha ) \
219 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
220 (alpha)->imag = 0.0F;
221 
222 // void bl1_zabsqr( dcomplex* alpha );
223 #define bl1_zabsqr( alpha ) \
224 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
225 (alpha)->imag = 0.0;
226 
227 // --- invscals ---
228 
229 // void bl1_sinvscals( float* a, float* y );
230 #define bl1_sinvscals( a, y ) \
231 *(y) = *(y) / *(a);
232 
233 // void bl1_dinvscals( double* a, double* y );
234 #define bl1_dinvscals( a, y ) \
235 *(y) = *(y) / *(a);
236 
237 // void bl1_csinvscals( float* a, scomplex* y );
238 #define bl1_csinvscals( a, y ) \
239 { \
240 (y)->real = (y)->real / *(a); \
241 (y)->imag = (y)->imag / *(a); \
242 }
243 
244 // void bl1_cinvscals( scomplex* a, scomplex* y );
245 #define bl1_cinvscals( a, y ) \
246 { \
247  float s = bl1_fmaxabs( (a)->real, (a)->imag ); \
248  float ar_s = (a)->real / s; \
249  float ai_s = (a)->imag / s; \
250  float yrt = (y)->real; \
251  float temp = ( ar_s * (a)->real + ai_s * (a)->imag ); \
252  (y)->real = ( (yrt) * ar_s + (y)->imag * ai_s ) / temp; \
253  (y)->imag = ( (y)->imag * ar_s - (yrt) * ai_s ) / temp; \
254 }
255 
256 // void bl1_zdinvscals( double* a, dcomplex* y );
257 #define bl1_zdinvscals( a, y ) \
258 { \
259 (y)->real = (y)->real / *(a); \
260 (y)->imag = (y)->imag / *(a); \
261 }
262 
263 // void bl1_zinvscals( dcomplex* a, dcomplex* y );
264 #define bl1_zinvscals( a, y ) \
265 { \
266  double s = bl1_fmaxabs( (a)->real, (a)->imag ); \
267  double ar_s = (a)->real / s; \
268  double ai_s = (a)->imag / s; \
269  double yrt = (y)->real; \
270  double temp = ( ar_s * (a)->real + ai_s * (a)->imag ); \
271  (y)->real = ( (yrt) * ar_s + (y)->imag * ai_s ) / temp; \
272  (y)->imag = ( (y)->imag * ar_s - (yrt) * ai_s ) / temp; \
273 }
274 
275 // --- div3 ---
276 
277 // void bl1_sdiv3( float* x, float* y, float* a );
278 #define bl1_sdiv3( x, y, a ) \
279 *(a) = *(x) / *(y);
280 
281 // void bl1_ddiv3( double* x, double* y, double* a );
282 #define bl1_ddiv3( x, y, a ) \
283 *(a) = *(x) / *(y);
284 
285 // void bl1_cdiv3( scomplex* x, scomplex* y, scomplex* a );
286 // a = x / y;
287 #define bl1_cdiv3( x, y, a ) \
288 { \
289  *a = *x; \
290  bl1_cinvscals( y, a ); \
291 }
292 
293 // void bl1_zdiv3( dcomplex* x, dcomplex* y, dcomplex* a );
294 #define bl1_zdiv3( x, y, a ) \
295 { \
296  *a = *x; \
297  bl1_zinvscals( y, a ); \
298 }
299 
300 // --- add3 ---
301 
302 // void bl1_sadd3( float* x, float* y, float* a );
303 #define bl1_sadd3( x, y, a ) \
304 *(a) = *(x) + *(y);
305 
306 // void bl1_dadd3( double* x, double* y, double* a );
307 #define bl1_dadd3( x, y, a ) \
308 *(a) = *(x) + *(y);
309 
310 // void bl1_cadd3( scomplex* x, scomplex* y, scomplex* a );
311 #define bl1_cadd3( x, y, a ) \
312 { \
313 (a)->real = (x)->real + (y)->real; \
314 (a)->imag = (x)->imag + (y)->imag; \
315 }
316 
317 // void bl1_zadd3( dcomplex* x, dcomplex* y, dcomplex* a );
318 #define bl1_zadd3( x, y, a ) \
319 { \
320 (a)->real = (x)->real + (y)->real; \
321 (a)->imag = (x)->imag + (y)->imag; \
322 }
323 
324 // --- copys ---
325 
326 // void bl1_scopys( conj1_t conj, float* x, float* y );
327 #define bl1_scopys( conj, x, y ) \
328 *(y) = *(x);
329 
330 // void bl1_dcopys( conj1_t conj, double* x, double* y );
331 #define bl1_dcopys( conj, x, y ) \
332 *(y) = *(x);
333 
334 // void bl1_ccopys( conj1_t conj, scomplex* x, scomplex* y );
335 #define bl1_ccopys( conj, x, y ) \
336 *(y) = *(x); \
337 if ( bl1_is_conj( conj ) ) (y)->imag *= -1.0F;
338 
339 // void bl1_zcopys( conj1_t conj, dcomplex* x, dcomplex* y );
340 #define bl1_zcopys( conj, x, y ) \
341 *(y) = *(x); \
342 if ( bl1_is_conj( conj ) ) (y)->imag *= -1.0;
343 
344 // --- scals ---
345 
346 // void bl1_sscals( float* a, float* y );
347 #define bl1_sscals( a, y ) \
348 *(y) = *(a) * *(y);
349 
350 // void bl1_dscals( double* a, double* y );
351 #define bl1_dscals( a, y ) \
352 *(y) = *(a) * *(y);
353 
354 // void bl1_csscals( float* a, scomplex* y );
355 #define bl1_csscals( a, y ) \
356 { \
357 (y)->real = *(a) * (y)->real; \
358 (y)->imag = *(a) * (y)->imag; \
359 }
360 
361 // void bl1_cscals( scomplex* a, scomplex* y );
362 #define bl1_cscals( a, y ) \
363 { \
364 float tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
365 float tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
366 (y)->real = tempr; \
367 (y)->imag = tempi; \
368 }
369 
370 // void bl1_zdscals( double* a, dcomplex* y );
371 #define bl1_zdscals( a, y ) \
372 { \
373 (y)->real = *(a) * (y)->real; \
374 (y)->imag = *(a) * (y)->imag; \
375 }
376 
377 // void bl1_zscals( dcomplex* a, dcomplex* y );
378 #define bl1_zscals( a, y ) \
379 { \
380 double tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
381 double tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
382 (y)->real = tempr; \
383 (y)->imag = tempi; \
384 }
385 
386 // --- mult3 ---
387 
388 // void bl1_smult3( float* x, float* y, float* a );
389 #define bl1_smult3( x, y, a ) \
390 *(a) = *(x) * *(y);
391 
392 // void bl1_dmult3( double* x, double* y, double* a );
393 #define bl1_dmult3( x, y, a ) \
394 *(a) = *(x) * *(y);
395 
396 // void bl1_cmult3( scomplex* x, scomplex* y, scomplex* a );
397 #define bl1_cmult3( x, y, a ) \
398 { \
399 float tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
400 float tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
401 (a)->real = tempr; \
402 (a)->imag = tempi; \
403 }
404 
405 // void bl1_zmult3( dcomplex* x, dcomplex* y, dcomplex* a );
406 #define bl1_zmult3( x, y, a ) \
407 { \
408 double tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
409 double tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
410 (a)->real = tempr; \
411 (a)->imag = tempi; \
412 }
413 
414 // --- mult4 ---
415 
416 // void bl1_smult4( float* alpha, float* x, float* y1, float* y2 );
417 #define bl1_smult4( alpha, x, y1, y2 ) \
418 *(y2) = *(y1) + *(alpha) * *(x);
419 
420 // void bl1_dmult4( double* alpha, double* x, double* y1, double* y2 );
421 #define bl1_dmult4( alpha, x, y1, y2 ) \
422 *(y2) = *(y1) + *(alpha) * *(x);
423 
424 // void bl1_cmult4( scomplex* alpha, scomplex* x, scomplex* y1, scomplex* y2 );
425 #define bl1_cmult4( alpha, x, y1, y2 ) \
426 { \
427 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
428 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
429 }
430 
431 // void bl1_zmult4( dcomplex* alpha, dcomplex* x, dcomplex* y1, dcomplex* y2 );
432 #define bl1_zmult4( alpha, x, y1, y2 ) \
433 { \
434 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
435 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
436 }
437 
438 // --- conjs ---
439 
440 // void bl1_sconjs( float* a );
441 #define bl1_sconjs( a ) \
442 ;
443 
444 // void bl1_dconjs( double* a );
445 #define bl1_dconjs( a ) \
446 ;
447 
448 // void bl1_cconjs( scomplex* a );
449 #define bl1_cconjs( a ) \
450 (a)->imag *= -1.0F;
451 
452 // void bl1_zconjs( dcomplex* a );
453 #define bl1_zconjs( a ) \
454 (a)->imag *= -1.0;
455 
456 // --- copyconj ---
457 
458 // void bl1_scopyconj( float* x, float* y );
459 #define bl1_scopyconj( x, y ) \
460 *(y) = *(x);
461 
462 // void bl1_dcopyconj( double* x, double* y );
463 #define bl1_dcopyconj( x, y ) \
464 *(y) = *(x);
465 
466 // void bl1_ccopyconj( scomplex* x, scomplex* y );
467 #define bl1_ccopyconj( x, y ) \
468 (y)->real = (x)->real; \
469 (y)->imag = -1.0F * (x)->imag;
470 
471 // void bl1_zcopyconj( dcomplex* x, dcomplex* y );
472 #define bl1_zcopyconj( x, y ) \
473 (y)->real = (x)->real; \
474 (y)->imag = -1.0 * (x)->imag;
475 
476 // --- eq1 ---
477 
478 // void bl1_seq1( float* alpha );
479 #define bl1_seq1( alpha ) \
480  ( *alpha == 1.0F )
481 
482 // void bl1_deq1( double* alpha );
483 #define bl1_deq1( alpha ) \
484  ( *alpha == 1.0 )
485 
486 // void bl1_ceq1( scomplex* alpha );
487 #define bl1_ceq1( alpha ) \
488  ( (alpha)->real == 1.0F && (alpha)->imag == 0.0F )
489 
490 // void bl1_zeq1( dcomplex* alpha );
491 #define bl1_zeq1( alpha ) \
492  ( (alpha)->real == 1.0 && (alpha)->imag == 0.0 )
493 
494 // --- Swapping/toggle macros --------------------------------------------------
495 
496 // --- swap_pointers ---
497 
498 #define bl1_sswap_pointers( a, b ) \
499 { \
500 float* temp = (a); \
501 (a) = (b); \
502 (b) = temp; \
503 }
504 
505 #define bl1_dswap_pointers( a, b ) \
506 { \
507 double* temp = (a); \
508 (a) = (b); \
509 (b) = temp; \
510 }
511 
512 #define bl1_cswap_pointers( a, b ) \
513 { \
514 void* temp = (a); \
515 (a) = (b); \
516 (b) = temp; \
517 }
518 
519 #define bl1_zswap_pointers( a, b ) \
520 { \
521 void* temp = (a); \
522 (a) = (b); \
523 (b) = temp; \
524 }
525 
526 // --- swap_ints ---
527 
528 #define bl1_swap_ints( a, b ) \
529 { \
530 int temp = (a); \
531 (a) = (b); \
532 (b) = temp; \
533 }
534 
535 // --- swap_trans ---
536 
537 #define bl1_swap_trans( a, b ) \
538 { \
539 trans1_t temp = (a); \
540 (a) = (b); \
541 (b) = temp; \
542 }
543 
544 // --- swap_conj ---
545 
546 #define bl1_swap_conj( a, b ) \
547 { \
548 conj1_t temp = (a); \
549 (a) = (b); \
550 (b) = temp; \
551 }
552 
553 // --- toggle_side ---
554 
555 #define bl1_toggle_side( side ) \
556 { \
557 if ( bl1_is_left( side ) ) side = BLIS1_RIGHT; \
558 else side = BLIS1_LEFT; \
559 }
560 
561 // --- toggle_uplo ---
562 
563 #define bl1_toggle_uplo( uplo ) \
564 { \
565 if ( bl1_is_lower( uplo ) ) uplo = BLIS1_UPPER_TRIANGULAR; \
566 else uplo = BLIS1_LOWER_TRIANGULAR; \
567 }
568 
569 // --- toggle_trans ---
570 #define bl1_toggle_trans( trans ) \
571 { \
572 if ( bl1_is_notrans( trans ) ) trans = BLIS1_TRANSPOSE; \
573 else if ( bl1_is_trans( trans ) ) trans = BLIS1_NO_TRANSPOSE; \
574 else if ( bl1_is_conjnotrans( trans ) ) trans = BLIS1_CONJ_TRANSPOSE; \
575 else trans = BLIS1_CONJ_NO_TRANSPOSE; \
576 }
577 
578 // --- toggle_conjtrans ---
579 #define bl1_toggle_conjtrans( trans ) \
580 { \
581 if ( bl1_is_notrans( trans ) ) trans = BLIS1_CONJ_TRANSPOSE; \
582 else trans = BLIS1_NO_TRANSPOSE; \
583 }
584 
585 // --- toggle_conj ---
586 
587 #define bl1_toggle_conj( conj ) \
588 { \
589 if ( bl1_is_conj( conj ) ) conj = BLIS1_NO_CONJUGATE; \
590 else conj = BLIS1_CONJUGATE; \
591 }
592 
593 #endif // #ifndef BLIS1_MACRO_DEFS_H