Go to the documentation of this file. 11 #ifndef BLIS1_MACRO_DEFS_H 12 #define BLIS1_MACRO_DEFS_H 16 #define BLIS1_NO_INTRINSICS 0 17 #define BLIS1_SSE_INTRINSICS 3 63 #define bl1_min( a, b ) ( (a) < (b) ? (a) : (b) ) 64 #define bl1_max( a, b ) ( (a) > (b) ? (a) : (b) ) 65 #define bl1_abs( a ) ( (a) <= 0 ? -(a) : (a) ) 69 #define bl1_fmin( a, b ) bl1_min( a, b ) 70 #define bl1_fmax( a, b ) bl1_max( a, b ) 71 #define bl1_fabs( a ) ( (a) <= 0.0 ? -(a) : (a) ) 74 #define bl1_fminabs( a, b ) \ 76 bl1_fmin( bl1_fabs( a ), \ 79 #define bl1_fmaxabs( a, b ) \ 81 bl1_fmax( bl1_fabs( a ), \ 89 #define bl1_sneg1( x ) \ 93 #define bl1_dneg1( x ) \ 97 #define bl1_cneg1( x ) \ 102 #define bl1_zneg1( x ) \ 109 #define bl1_sneg2( x, y ) \ 113 #define bl1_dneg2( x, y ) \ 117 #define bl1_cneg2( x, y ) \ 118 (y)->real = -1.0F * (x)->real; \ 119 (y)->imag = -1.0F * (x)->imag; 122 #define bl1_zneg2( x, y ) \ 123 (y)->real = -1.0 * (x)->real; \ 124 (y)->imag = -1.0 * (x)->imag; 129 #define bl1_ssqrte( alpha, error ) \ 130 if ( *(alpha) <= 0.0F || isnan( *(alpha) ) ) { *(error) = FLA_FAILURE; } \ 131 else { *(alpha) = ( float ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; } 134 #define bl1_dsqrte( alpha, error ) \ 135 if ( *(alpha) <= 0.0 || isnan( *(alpha) ) ) { *(error) = FLA_FAILURE; } \ 136 else { *(alpha) = ( double ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; } 139 #define bl1_csqrte( alpha, error ) \ 140 if ( (alpha)->real <= 0.0F || isnan( (alpha)->real) ) \ 141 { *(error) = FLA_FAILURE; } \ 143 (alpha)->real = ( float ) sqrt( (alpha)->real ); \ 144 (alpha)->imag = 0.0F; *(error) = FLA_SUCCESS; } 147 #define bl1_zsqrte( alpha, error ) \ 148 if ( (alpha)->real <= 0.0 || isnan( (alpha)->real) ) \ 149 { *(error) = FLA_FAILURE; } \ 151 (alpha)->real = ( double ) sqrt( (alpha)->real ); \ 152 (alpha)->imag = 0.0; *(error) = FLA_SUCCESS; } 157 #define bl1_sabsval2( alpha, absval ) \ 158 *(absval) = ( float ) fabs( ( double ) *(alpha) ); 161 #define bl1_dabsval2( alpha, absval ) \ 162 *(absval) = fabs( *(alpha) ); 165 #define bl1_cabsval2( x, a ) \ 167 float s = bl1_fmaxabs( (x)->real, (x)->imag ); \ 168 float mag = sqrtf( s ) * \ 169 sqrtf( ( (x)->real / s ) * (x)->real + \ 170 ( (x)->imag / s ) * (x)->imag ); \ 176 #define bl1_csabsval2( x, a ) \ 178 float s = bl1_fmaxabs( (x)->real, (x)->imag ); \ 179 float mag = sqrtf( s ) * \ 180 sqrtf( ( (x)->real / s ) * (x)->real + \ 181 ( (x)->imag / s ) * (x)->imag ); \ 186 #define bl1_zabsval2( x, a ) \ 188 double s = bl1_fmaxabs( (x)->real, (x)->imag ); \ 189 double mag = sqrt( s ) * \ 190 sqrt( ( (x)->real / s ) * (x)->real + \ 191 ( (x)->imag / s ) * (x)->imag ); \ 197 #define bl1_zdabsval2( x, a ) \ 199 double s = bl1_fmaxabs( (x)->real, (x)->imag ); \ 200 double mag = sqrt( s ) * \ 201 sqrt( ( (x)->real / s ) * (x)->real + \ 202 ( (x)->imag / s ) * (x)->imag ); \ 210 #define bl1_sabsqr( alpha ) \ 211 *(alpha) = *(alpha) * *(alpha); 214 #define bl1_dabsqr( alpha ) \ 215 *(alpha) = *(alpha) * *(alpha); 218 #define bl1_cabsqr( alpha ) \ 219 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \ 220 (alpha)->imag = 0.0F; 223 #define bl1_zabsqr( alpha ) \ 224 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \ 230 #define bl1_sinvscals( a, y ) \ 234 #define bl1_dinvscals( a, y ) \ 238 #define bl1_csinvscals( a, y ) \ 240 (y)->real = (y)->real / *(a); \ 241 (y)->imag = (y)->imag / *(a); \ 245 #define bl1_cinvscals( a, y ) \ 247 float s = bl1_fmaxabs( (a)->real, (a)->imag ); \ 248 float ar_s = (a)->real / s; \ 249 float ai_s = (a)->imag / s; \ 250 float yrt = (y)->real; \ 251 float temp = ( ar_s * (a)->real + ai_s * (a)->imag ); \ 252 (y)->real = ( (yrt) * ar_s + (y)->imag * ai_s ) / temp; \ 253 (y)->imag = ( (y)->imag * ar_s - (yrt) * ai_s ) / temp; \ 257 #define bl1_zdinvscals( a, y ) \ 259 (y)->real = (y)->real / *(a); \ 260 (y)->imag = (y)->imag / *(a); \ 264 #define bl1_zinvscals( a, y ) \ 266 double s = bl1_fmaxabs( (a)->real, (a)->imag ); \ 267 double ar_s = (a)->real / s; \ 268 double ai_s = (a)->imag / s; \ 269 double yrt = (y)->real; \ 270 double temp = ( ar_s * (a)->real + ai_s * (a)->imag ); \ 271 (y)->real = ( (yrt) * ar_s + (y)->imag * ai_s ) / temp; \ 272 (y)->imag = ( (y)->imag * ar_s - (yrt) * ai_s ) / temp; \ 278 #define bl1_sdiv3( x, y, a ) \ 282 #define bl1_ddiv3( x, y, a ) \ 287 #define bl1_cdiv3( x, y, a ) \ 290 bl1_cinvscals( y, a ); \ 294 #define bl1_zdiv3( x, y, a ) \ 297 bl1_zinvscals( y, a ); \ 303 #define bl1_sadd3( x, y, a ) \ 307 #define bl1_dadd3( x, y, a ) \ 311 #define bl1_cadd3( x, y, a ) \ 313 (a)->real = (x)->real + (y)->real; \ 314 (a)->imag = (x)->imag + (y)->imag; \ 318 #define bl1_zadd3( x, y, a ) \ 320 (a)->real = (x)->real + (y)->real; \ 321 (a)->imag = (x)->imag + (y)->imag; \ 327 #define bl1_scopys( conj, x, y ) \ 331 #define bl1_dcopys( conj, x, y ) \ 335 #define bl1_ccopys( conj, x, y ) \ 337 if ( bl1_is_conj( conj ) ) (y)->imag *= -1.0F; 340 #define bl1_zcopys( conj, x, y ) \ 342 if ( bl1_is_conj( conj ) ) (y)->imag *= -1.0; 347 #define bl1_sscals( a, y ) \ 351 #define bl1_dscals( a, y ) \ 355 #define bl1_csscals( a, y ) \ 357 (y)->real = *(a) * (y)->real; \ 358 (y)->imag = *(a) * (y)->imag; \ 362 #define bl1_cscals( a, y ) \ 364 float tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \ 365 float tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \ 371 #define bl1_zdscals( a, y ) \ 373 (y)->real = *(a) * (y)->real; \ 374 (y)->imag = *(a) * (y)->imag; \ 378 #define bl1_zscals( a, y ) \ 380 double tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \ 381 double tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \ 389 #define bl1_smult3( x, y, a ) \ 393 #define bl1_dmult3( x, y, a ) \ 397 #define bl1_cmult3( x, y, a ) \ 399 float tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \ 400 float tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \ 406 #define bl1_zmult3( x, y, a ) \ 408 double tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \ 409 double tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \ 417 #define bl1_smult4( alpha, x, y1, y2 ) \ 418 *(y2) = *(y1) + *(alpha) * *(x); 421 #define bl1_dmult4( alpha, x, y1, y2 ) \ 422 *(y2) = *(y1) + *(alpha) * *(x); 425 #define bl1_cmult4( alpha, x, y1, y2 ) \ 427 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \ 428 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \ 432 #define bl1_zmult4( alpha, x, y1, y2 ) \ 434 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \ 435 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \ 441 #define bl1_sconjs( a ) \ 445 #define bl1_dconjs( a ) \ 449 #define bl1_cconjs( a ) \ 453 #define bl1_zconjs( a ) \ 459 #define bl1_scopyconj( x, y ) \ 463 #define bl1_dcopyconj( x, y ) \ 467 #define bl1_ccopyconj( x, y ) \ 468 (y)->real = (x)->real; \ 469 (y)->imag = -1.0F * (x)->imag; 472 #define bl1_zcopyconj( x, y ) \ 473 (y)->real = (x)->real; \ 474 (y)->imag = -1.0 * (x)->imag; 479 #define bl1_seq1( alpha ) \ 483 #define bl1_deq1( alpha ) \ 487 #define bl1_ceq1( alpha ) \ 488 ( (alpha)->real == 1.0F && (alpha)->imag == 0.0F ) 491 #define bl1_zeq1( alpha ) \ 492 ( (alpha)->real == 1.0 && (alpha)->imag == 0.0 ) 498 #define bl1_sswap_pointers( a, b ) \ 505 #define bl1_dswap_pointers( a, b ) \ 507 double* temp = (a); \ 512 #define bl1_cswap_pointers( a, b ) \ 519 #define bl1_zswap_pointers( a, b ) \ 528 #define bl1_swap_ints( a, b ) \ 537 #define bl1_swap_trans( a, b ) \ 539 trans1_t temp = (a); \ 546 #define bl1_swap_conj( a, b ) \ 548 conj1_t temp = (a); \ 555 #define bl1_toggle_side( side ) \ 557 if ( bl1_is_left( side ) ) side = BLIS1_RIGHT; \ 558 else side = BLIS1_LEFT; \ 563 #define bl1_toggle_uplo( uplo ) \ 565 if ( bl1_is_lower( uplo ) ) uplo = BLIS1_UPPER_TRIANGULAR; \ 566 else uplo = BLIS1_LOWER_TRIANGULAR; \ 570 #define bl1_toggle_trans( trans ) \ 572 if ( bl1_is_notrans( trans ) ) trans = BLIS1_TRANSPOSE; \ 573 else if ( bl1_is_trans( trans ) ) trans = BLIS1_NO_TRANSPOSE; \ 574 else if ( bl1_is_conjnotrans( trans ) ) trans = BLIS1_CONJ_TRANSPOSE; \ 575 else trans = BLIS1_CONJ_NO_TRANSPOSE; \ 579 #define bl1_toggle_conjtrans( trans ) \ 581 if ( bl1_is_notrans( trans ) ) trans = BLIS1_CONJ_TRANSPOSE; \ 582 else trans = BLIS1_NO_TRANSPOSE; \ 587 #define bl1_toggle_conj( conj ) \ 589 if ( bl1_is_conj( conj ) ) conj = BLIS1_NO_CONJUGATE; \ 590 else conj = BLIS1_CONJUGATE; \ 593 #endif // #ifndef BLIS1_MACRO_DEFS_H