libflame  revision_anchor
Functions
FLA_Apply_pivots_macro_external.c File Reference

(r)

Functions

FLA_Error FLA_Apply_pivots_macro_external (FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A)
 

Function Documentation

◆ FLA_Apply_pivots_macro_external()

FLA_Error FLA_Apply_pivots_macro_external ( FLA_Side  side,
FLA_Trans  trans,
FLA_Obj  p,
FLA_Obj  A 
)

References bl1_cswapv(), bl1_dswapv(), bl1_sswapv(), bl1_zswapv(), FLA_Obj_buffer_at_view(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_width(), and i.

Referenced by FLA_Apply_pivots_macro_task().

14 {
15  int i, j;
16  int ipiv;
17  int* buf_p = ( int* ) FLA_Obj_buffer_at_view( p );
18  FLA_Obj* blocks = FLASH_OBJ_PTR_AT( A );
19  int m_blocks = FLA_Obj_length( A );
20  int m_A = FLA_Obj_length( *blocks );
21  int n_A = FLA_Obj_width( *blocks );
22  FLA_Datatype datatype = FLA_Obj_datatype( A );
23 
24 #ifdef FLA_ENABLE_WINDOWS_BUILD
25  int* m = ( int* ) _alloca( m_blocks * sizeof( int ) );
26  int* cs = ( int* ) _alloca( m_blocks * sizeof( int ) );
27 #else
28  int* m = ( int* ) malloc( m_blocks * sizeof( int ) );
29  int* cs = ( int* ) malloc( m_blocks * sizeof( int ) );
30  //int m[m_blocks];
31  //int cs[m_blocks];
32 #endif
33 
34  if ( side != FLA_LEFT || trans != FLA_NO_TRANSPOSE )
35  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41 #ifdef FLA_ENABLE_WINDOWS_BUILD
42  float** buffer = ( float** ) _alloca( m_blocks * sizeof( float* ) );
43 #else
44  float** buffer = ( float** ) malloc( m_blocks * sizeof( float* ) );
45  //float* buffer[m_blocks];
46 #endif
47  for ( i = 0; i < m_blocks; i++ )
48  {
49  buffer[i] = ( float* ) FLA_Obj_buffer_at_view( blocks[i] );
50 
51  m[i] = FLA_Obj_length( blocks[i] );
52  cs[i] = FLA_Obj_col_stride( blocks[i] );
53  }
54 
55  for ( j = 0; j < m_A; j++ )
56  {
57  ipiv = buf_p[j] + j;
58 
59  if ( ipiv != j )
60  {
61  i = 0;
62 
63  while ( ipiv >= m[i] )
64  {
65  ipiv = ipiv - m[i];
66  i++;
67  }
68 
69  bl1_sswapv( n_A,
70  buffer[0] + j, cs[0],
71  buffer[i] + ipiv, cs[i] );
72  }
73  }
74 #ifdef FLA_ENABLE_WINDOWS_BUILD
75 #else
76  free( buffer );
77 #endif
78  break;
79  }
80  case FLA_DOUBLE:
81  {
82 #ifdef FLA_ENABLE_WINDOWS_BUILD
83  double** buffer = ( double** ) _alloca( m_blocks * sizeof( double* ) );
84 #else
85  double** buffer = ( double** ) malloc( m_blocks * sizeof( double* ) );
86  //double* buffer[m_blocks];
87 #endif
88  for ( i = 0; i < m_blocks; i++ )
89  {
90  buffer[i] = ( double* ) FLA_Obj_buffer_at_view( blocks[i] );
91 
92  m[i] = FLA_Obj_length( blocks[i] );
93  cs[i] = FLA_Obj_col_stride( blocks[i] );
94  }
95 
96  for ( j = 0; j < m_A; j++ )
97  {
98  ipiv = buf_p[j] + j;
99 
100  if ( ipiv != j )
101  {
102  i = 0;
103 
104  while ( ipiv >= m[i] )
105  {
106  ipiv = ipiv - m[i];
107  i++;
108  }
109 
110  bl1_dswapv( n_A,
111  buffer[0] + j, cs[0],
112  buffer[i] + ipiv, cs[i] );
113  }
114  }
115 #ifdef FLA_ENABLE_WINDOWS_BUILD
116 #else
117  free( buffer );
118 #endif
119  break;
120  }
121  case FLA_COMPLEX:
122  {
123 #ifdef FLA_ENABLE_WINDOWS_BUILD
124  scomplex** buffer = ( scomplex** ) _alloca( m_blocks * sizeof( scomplex* ) );
125 #else
126  scomplex** buffer = ( scomplex** ) malloc( m_blocks * sizeof( scomplex* ) );
127  //scomplex* buffer[m_blocks];
128 #endif
129  for ( i = 0; i < m_blocks; i++ )
130  {
131  buffer[i] = ( scomplex* ) FLA_Obj_buffer_at_view( blocks[i] );
132 
133  m[i] = FLA_Obj_length( blocks[i] );
134  cs[i] = FLA_Obj_col_stride( blocks[i] );
135  }
136 
137  for ( j = 0; j < m_A; j++ )
138  {
139  ipiv = buf_p[j] + j;
140 
141  if ( ipiv != j )
142  {
143  i = 0;
144 
145  while ( ipiv >= m[i] )
146  {
147  ipiv = ipiv - m[i];
148  i++;
149  }
150 
151  bl1_cswapv( n_A,
152  buffer[0] + j, cs[0],
153  buffer[i] + ipiv, cs[i] );
154  }
155  }
156 #ifdef FLA_ENABLE_WINDOWS_BUILD
157 #else
158  free( buffer );
159 #endif
160  break;
161  }
162  case FLA_DOUBLE_COMPLEX:
163  {
164 #ifdef FLA_ENABLE_WINDOWS_BUILD
165  dcomplex** buffer = ( dcomplex** ) _alloca( m_blocks * sizeof( dcomplex* ) );
166 #else
167  dcomplex** buffer = ( dcomplex** ) malloc( m_blocks * sizeof( dcomplex* ) );
168  //dcomplex* buffer[m_blocks];
169 #endif
170  for ( i = 0; i < m_blocks; i++ )
171  {
172  buffer[i] = ( dcomplex* ) FLA_Obj_buffer_at_view( blocks[i] );
173 
174  m[i] = FLA_Obj_length( blocks[i] );
175  cs[i] = FLA_Obj_col_stride( blocks[i] );
176  }
177 
178  for ( j = 0; j < m_A; j++ )
179  {
180  ipiv = buf_p[j] + j;
181 
182  if ( ipiv != j )
183  {
184  i = 0;
185 
186  while ( ipiv >= m[i] )
187  {
188  ipiv = ipiv - m[i];
189  i++;
190  }
191 
192  bl1_zswapv( n_A,
193  buffer[0] + j, cs[0],
194  buffer[i] + ipiv, cs[i] );
195  }
196  }
197 #ifdef FLA_ENABLE_WINDOWS_BUILD
198 #else
199  free( buffer );
200 #endif
201  break;
202  }
203  }
204 
205 #ifdef FLA_ENABLE_WINDOWS_BUILD
206 #else
207  free( m );
208  free( cs );
209 #endif
210 
211  return FLA_SUCCESS;
212 }
void bl1_sswapv(int n, float *x, int incx, float *y, int incy)
Definition: bl1_swapv.c:13
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
void * FLA_Obj_buffer_at_view(FLA_Obj obj)
Definition: FLA_Query.c:215
void bl1_dswapv(int n, double *x, int incx, double *y, int incy)
Definition: bl1_swapv.c:23
Definition: FLA_type_defs.h:158
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
void bl1_cswapv(int n, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_swapv.c:33
Definition: blis_type_defs.h:132
int FLA_Datatype
Definition: FLA_type_defs.h:49
void bl1_zswapv(int n, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_swapv.c:43
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
int i
Definition: bl1_axmyv2.c:145
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
Definition: blis_type_defs.h:137