1 | /* |
2 | * SparseGrid_conv_opt.hpp |
3 | * |
4 | * Created on: Jul 19, 2020 |
5 | * Author: i-bird |
6 | */ |
7 | |
8 | #ifndef SPARSEGRID_CONV_OPT_HPP_ |
9 | #define SPARSEGRID_CONV_OPT_HPP_ |
10 | |
11 | template<unsigned int l> |
12 | union data_il |
13 | { |
14 | }; |
15 | |
16 | template<> |
17 | union data_il<8> |
18 | { |
19 | typedef long int type; |
20 | |
21 | unsigned char uc[8]; |
22 | long int i; |
23 | }; |
24 | |
25 | template<> |
26 | union data_il<4> |
27 | { |
28 | typedef int type; |
29 | |
30 | unsigned char uc[4]; |
31 | int i; |
32 | }; |
33 | |
34 | template<> |
35 | union data_il<2> |
36 | { |
37 | typedef short int type; |
38 | |
39 | unsigned char uc[2]; |
40 | short int i; |
41 | }; |
42 | |
43 | template<> |
44 | union data_il<1> |
45 | { |
46 | typedef char type; |
47 | |
48 | unsigned char uc[4]; |
49 | char i; |
50 | }; |
51 | |
52 | |
53 | template<unsigned int dim, unsigned int sz> |
54 | struct ids_crs |
55 | { |
56 | long int sumdm[dim]; |
57 | long int sumdp[dim]; |
58 | |
59 | long int s2; |
60 | bool mask_row[sz]; |
61 | int k; |
62 | }; |
63 | |
64 | |
65 | |
66 | template<unsigned int dim> |
67 | struct conv_impl |
68 | { |
69 | template<unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT > |
70 | void conv(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) |
71 | { |
72 | #ifndef __NVCC__ |
73 | std::cout << __FILE__ << ":" << __LINE__ << " error conv operation not implemented for this dimension " << std::endl; |
74 | #else |
75 | std::cout << __FILE__ << ":" << __LINE__ << " error conv is unsupported when compiled on NVCC " << std::endl; |
76 | #endif |
77 | } |
78 | |
79 | template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > |
80 | static void conv_cross(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) |
81 | { |
82 | #ifndef __NVCC__ |
83 | std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross operation not implemented for this dimension " << std::endl; |
84 | #else |
85 | std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross is unsupported when compiled on NVCC " << std::endl; |
86 | #endif |
87 | } |
88 | |
89 | template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2, |
90 | unsigned int prop_dst1, unsigned int prop_dst2, |
91 | unsigned int stencil_size , unsigned int N, |
92 | typename SparseGridType, typename lambda_f, typename ... ArgsT > |
93 | static void conv2(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) |
94 | { |
95 | #ifndef __NVCC__ |
96 | std::cout << __FILE__ << ":" << __LINE__ << " error conv2 operation not implemented for this dimension " << std::endl; |
97 | #else |
98 | std::cout << __FILE__ << ":" << __LINE__ << " error conv2 is unsupported when compiled on NVCC " << std::endl; |
99 | #endif |
100 | } |
101 | |
102 | template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > |
103 | static void conv_cross2(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) |
104 | { |
105 | #ifndef __NVCC__ |
106 | std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 operation not implemented for this dimension " << std::endl; |
107 | #else |
108 | std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 is unsupported when compiled on NVCC " << std::endl; |
109 | #endif |
110 | } |
111 | }; |
112 | |
113 | #if !defined(__NVCC__) || defined(CUDA_ON_CPU) |
114 | |
115 | |
116 | template<unsigned int dir,int p, unsigned int prop_src1,typename chunk_type, typename vect_type, typename ids_type> |
117 | void load_crs(vect_type & cs1, chunk_type & chunk, ids_type & ids) |
118 | { |
119 | if (dir == 0 && p < 0) |
120 | { |
121 | Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]); |
122 | |
123 | cs1 = cmd1; |
124 | cs1 = cs1.shifted(-1); |
125 | cs1[0] = chunk.template get<prop_src1>()[ids.sumdm[dir]]; |
126 | } |
127 | else if (dir == 0 && p > 0) |
128 | { |
129 | Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]); |
130 | |
131 | cs1 = cmd1; |
132 | cs1 = cs1.shifted(1); |
133 | cs1[Vc::Vector<typename vect_type::EntryType>::Size - 1] = chunk.template get<prop_src1>()[ids.sumdp[0]]; |
134 | } |
135 | else if (p < 0) |
136 | { |
137 | cs1.load(&chunk.template get<prop_src1>()[ids.sumdm[dir]],Vc::Aligned); |
138 | } |
139 | else if (p > 0) |
140 | { |
141 | cs1.load(&chunk.template get<prop_src1>()[ids.sumdp[dir]],Vc::Aligned); |
142 | } |
143 | else |
144 | { |
145 | Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]); |
146 | |
147 | cs1 = cmd1; |
148 | } |
149 | } |
150 | |
151 | template<unsigned int prop_dst1,typename chunk_type, typename vect_type, typename ids_type> |
152 | void store_crs(chunk_type & chunk, vect_type & res, ids_type & ids) |
153 | { |
154 | Vc::Mask<typename vect_type::EntryType> m(&ids.mask_row[ids.k]); |
155 | |
156 | res.store(&chunk.template get<prop_dst1>()[ids.s2],m,Vc::Aligned); |
157 | } |
158 | |
159 | template<unsigned int prop_dst1,unsigned int comp, typename chunk_type, typename vect_type, typename ids_type> |
160 | void store_crs_v(chunk_type & chunk, vect_type & res, ids_type & ids) |
161 | { |
162 | Vc::Mask<typename vect_type::EntryType> m(&ids.mask_row[ids.k]); |
163 | |
164 | res.store(&chunk.template get<prop_dst1>()[comp][ids.s2],m,Vc::Aligned); |
165 | } |
166 | |
167 | template<unsigned int dir,int p, unsigned int comp, unsigned int prop_src1,typename chunk_type, typename vect_type, typename ids_type> |
168 | void load_crs_v(vect_type & cs1, chunk_type & chunk, ids_type & ids) |
169 | { |
170 | if (dir == 0 && p < 0) |
171 | { |
172 | Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]); |
173 | |
174 | cs1 = cmd1; |
175 | cs1 = cs1.shifted(-1); |
176 | cs1[0] = chunk.template get<prop_src1>()[comp][ids.sumdm[dir]]; |
177 | } |
178 | else if (dir == 0 && p > 0) |
179 | { |
180 | Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]); |
181 | |
182 | cs1 = cmd1; |
183 | cs1 = cs1.shifted(1); |
184 | cs1[Vc::Vector<typename vect_type::EntryType>::Size - 1] = chunk.template get<prop_src1>()[comp][ids.sumdp[dir]]; |
185 | } |
186 | else if (p < 0) |
187 | { |
188 | cs1.load(&chunk.template get<prop_src1>()[comp][ids.sumdm[dir]],Vc::Aligned); |
189 | } |
190 | else if (p > 0) |
191 | { |
192 | cs1.load(&chunk.template get<prop_src1>()[comp][ids.sumdp[dir]],Vc::Aligned); |
193 | } |
194 | else |
195 | { |
196 | Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]); |
197 | |
198 | cs1 = cmd1; |
199 | } |
200 | } |
201 | |
202 | struct cross_stencil_v |
203 | { |
204 | Vc::double_v xm; |
205 | Vc::double_v xp; |
206 | Vc::double_v ym; |
207 | Vc::double_v yp; |
208 | Vc::double_v zm; |
209 | Vc::double_v zp; |
210 | }; |
211 | |
212 | template<> |
213 | struct conv_impl<3> |
214 | { |
215 | template<bool findNN, typename NNtype, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT > |
216 | static void conv(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) |
217 | { |
218 | auto it = grid.template getBlockIterator<stencil_size>(start,stop); |
219 | |
220 | typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type; |
221 | |
222 | unsigned char mask[decltype(it)::sizeBlockBord]; |
223 | unsigned char mask_sum[decltype(it)::sizeBlockBord]; |
224 | unsigned char mask_unused[decltype(it)::sizeBlock]; |
225 | __attribute__ ((aligned (32))) prop_type block_bord_src[decltype(it)::sizeBlockBord]; |
226 | __attribute__ ((aligned (32))) prop_type block_bord_dst[decltype(it)::sizeBlock]; |
227 | |
228 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; |
229 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; |
230 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; |
231 | |
232 | while (it.isNext()) |
233 | { |
234 | it.template loadBlockBorder<prop_src,NNtype,findNN>(block_bord_src,mask); |
235 | |
236 | if (it.start_b(2) != stencil_size || it.start_b(1) != stencil_size || it.start_b(0) != stencil_size || |
237 | it.stop_b(2) != sz2::value+stencil_size || it.stop_b(1) != sz1::value+stencil_size || it.stop_b(0) != sz0::value+stencil_size) |
238 | { |
239 | auto & = grid.private_get_header_mask(); |
240 | auto & = grid.private_get_header_inf(); |
241 | |
242 | loadBlock_impl<prop_dst,0,3,typename decltype(it)::vector_blocks_exts_type, typename decltype(it)::vector_ext_type>::template loadBlock<decltype(it)::sizeBlock>(block_bord_dst,grid,it.getChunkId(),mask_unused); |
243 | } |
244 | |
245 | // Sum the mask |
246 | for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) |
247 | { |
248 | for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) |
249 | { |
250 | int cc = it.LinB(it.start_b(0),j,k); |
251 | int c[N]; |
252 | |
253 | for (int s = 0 ; s < N ; s++) |
254 | { |
255 | c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); |
256 | } |
257 | |
258 | for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += sizeof(size_t)) |
259 | { |
260 | size_t cmd = *(size_t *)&mask[cc]; |
261 | |
262 | if (cmd != 0) |
263 | { |
264 | size_t xm[N]; |
265 | |
266 | for (int s = 0 ; s < N ; s++) |
267 | { |
268 | xm[s] = *(size_t *)&mask[c[s]]; |
269 | } |
270 | |
271 | size_t sum = 0; |
272 | for (int s = 0 ; s < N ; s++) |
273 | { |
274 | sum += xm[s]; |
275 | } |
276 | |
277 | *(size_t *)&mask_sum[cc] = sum; |
278 | } |
279 | |
280 | cc += sizeof(size_t); |
281 | for (int s = 0 ; s < N ; s++) |
282 | { |
283 | c[s] += sizeof(size_t); |
284 | } |
285 | } |
286 | } |
287 | } |
288 | |
289 | for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) |
290 | { |
291 | for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) |
292 | { |
293 | int cc = it.LinB(it.start_b(0),j,k); |
294 | int c[N]; |
295 | |
296 | int cd = it.LinB_off(it.start_b(0),j,k); |
297 | |
298 | for (int s = 0 ; s < N ; s++) |
299 | { |
300 | c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); |
301 | } |
302 | |
303 | for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size) |
304 | { |
305 | Vc::Mask<prop_type> cmp; |
306 | |
307 | for (int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++) |
308 | { |
309 | cmp[s] = (mask[cc+s] == true && i+s < it.stop_b(0)); |
310 | } |
311 | |
312 | // we do only if exist the point |
313 | if (Vc::none_of(cmp) == false) |
314 | { |
315 | Vc::Mask<prop_type> surround; |
316 | |
317 | Vc::Vector<prop_type> xs[N+1]; |
318 | |
319 | xs[0] = Vc::Vector<prop_type>(&block_bord_src[cc],Vc::Unaligned); |
320 | |
321 | for (int s = 1 ; s < N+1 ; s++) |
322 | { |
323 | xs[s] = Vc::Vector<prop_type>(&block_bord_src[c[s-1]],Vc::Unaligned); |
324 | } |
325 | |
326 | auto res = func(xs, &mask_sum[cc], args ...); |
327 | |
328 | res.store(&block_bord_dst[cd],cmp,Vc::Aligned); |
329 | } |
330 | |
331 | cc += Vc::Vector<prop_type>::Size; |
332 | for (int s = 0 ; s < N ; s++) |
333 | { |
334 | c[s] += Vc::Vector<prop_type>::Size; |
335 | } |
336 | cd += Vc::Vector<prop_type>::Size; |
337 | } |
338 | } |
339 | } |
340 | |
341 | it.template storeBlock<prop_dst>(block_bord_dst); |
342 | |
343 | ++it; |
344 | } |
345 | } |
346 | |
347 | template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > |
348 | static void conv_cross(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) |
349 | { |
350 | auto it = grid.template getBlockIterator<1>(start,stop); |
351 | |
352 | auto & datas = grid.private_get_data(); |
353 | auto & = grid.private_get_header_mask(); |
354 | |
355 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; |
356 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; |
357 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; |
358 | |
359 | typedef typename SparseGridType::chunking_type chunking; |
360 | |
361 | typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type; |
362 | |
363 | while (it.isNext()) |
364 | { |
365 | // Load |
366 | long int offset_jump[6]; |
367 | |
368 | size_t cid = it.getChunkId(); |
369 | |
370 | auto chunk = datas.get(cid); |
371 | auto & mask = headers.get(cid); |
372 | |
373 | bool exist; |
374 | grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0}); |
375 | long int r = grid.getChunk(p,exist); |
376 | offset_jump[0] = (r-cid)*decltype(it)::sizeBlock; |
377 | |
378 | p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0}); |
379 | r = grid.getChunk(p,exist); |
380 | offset_jump[1] = (r-cid)*decltype(it)::sizeBlock; |
381 | |
382 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0}); |
383 | r = grid.getChunk(p,exist); |
384 | offset_jump[2] = (r-cid)*decltype(it)::sizeBlock; |
385 | |
386 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0}); |
387 | r = grid.getChunk(p,exist); |
388 | offset_jump[3] = (r-cid)*decltype(it)::sizeBlock; |
389 | |
390 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1}); |
391 | r = grid.getChunk(p,exist); |
392 | offset_jump[4] = (r-cid)*decltype(it)::sizeBlock; |
393 | |
394 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1}); |
395 | r = grid.getChunk(p,exist); |
396 | offset_jump[5] = (r-cid)*decltype(it)::sizeBlock; |
397 | |
398 | // Load offset jumps |
399 | |
400 | // construct a row mask |
401 | |
402 | long int s2 = 0; |
403 | |
404 | typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz; |
405 | typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy; |
406 | typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx; |
407 | |
408 | |
409 | bool mask_row[sx::value]; |
410 | |
411 | for (int k = 0 ; k < sx::value ; k++) |
412 | { |
413 | mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false; |
414 | } |
415 | |
416 | for (int v = it.start(2) ; v < it.stop(2) ; v++) |
417 | { |
418 | for (int j = it.start(1) ; j < it.stop(1) ; j++) |
419 | { |
420 | s2 = it.Lin(0,j,v); |
421 | for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size) |
422 | { |
423 | // we do only id exist the point |
424 | if (*(int *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;} |
425 | |
426 | data_il<Vc::Vector<prop_type>::Size> mxm; |
427 | data_il<Vc::Vector<prop_type>::Size> mxp; |
428 | data_il<Vc::Vector<prop_type>::Size> mym; |
429 | data_il<Vc::Vector<prop_type>::Size> myp; |
430 | data_il<Vc::Vector<prop_type>::Size> mzm; |
431 | data_il<Vc::Vector<prop_type>::Size> mzp; |
432 | |
433 | cross_stencil_v cs; |
434 | |
435 | Vc::Vector<prop_type> cmd(&chunk.template get<prop_src>()[s2]); |
436 | |
437 | // Load x-1 |
438 | long int sumxm = s2-1; |
439 | sumxm += (k==0)?offset_jump[0] + sx::value:0; |
440 | |
441 | // Load x+1 |
442 | long int sumxp = s2+Vc::Vector<prop_type>::Size; |
443 | sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0; |
444 | |
445 | long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value; |
446 | sumym += s2; |
447 | long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value; |
448 | sumyp += s2; |
449 | long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value; |
450 | sumzm += s2; |
451 | long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value; |
452 | sumzp += s2; |
453 | |
454 | if (Vc::Vector<prop_type>::Size == 2) |
455 | { |
456 | mxm.i = *(short int *)&mask.mask[s2]; |
457 | mxm.i = mxm.i << 8; |
458 | mxm.i |= (short int)mask.mask[sumxm]; |
459 | |
460 | mxp.i = *(short int *)&mask.mask[s2]; |
461 | mxp.i = mxp.i >> 8; |
462 | mxp.i |= ((short int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8; |
463 | |
464 | mym.i = *(short int *)&mask.mask[sumym]; |
465 | myp.i = *(short int *)&mask.mask[sumyp]; |
466 | |
467 | mzm.i = *(short int *)&mask.mask[sumzm]; |
468 | mzp.i = *(short int *)&mask.mask[sumzp]; |
469 | } |
470 | else if (Vc::Vector<prop_type>::Size == 4) |
471 | { |
472 | mxm.i = *(int *)&mask.mask[s2]; |
473 | mxm.i = mxm.i << 8; |
474 | mxm.i |= (int)mask.mask[sumxm]; |
475 | |
476 | mxp.i = *(int *)&mask.mask[s2]; |
477 | mxp.i = mxp.i >> 8; |
478 | mxp.i |= ((int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8; |
479 | |
480 | mym.i = *(int *)&mask.mask[sumym]; |
481 | myp.i = *(int *)&mask.mask[sumyp]; |
482 | |
483 | mzm.i = *(int *)&mask.mask[sumzm]; |
484 | mzp.i = *(int *)&mask.mask[sumzp]; |
485 | } |
486 | else |
487 | { |
488 | std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl; |
489 | } |
490 | |
491 | cs.xm = cmd; |
492 | cs.xm = cs.xm.shifted(-1); |
493 | cs.xm[0] = chunk.template get<prop_src>()[sumxm]; |
494 | |
495 | |
496 | cs.xp = cmd; |
497 | cs.xp = cs.xp.shifted(1); |
498 | cs.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src>()[sumxp]; |
499 | |
500 | // Load y and z direction |
501 | |
502 | cs.ym.load(&chunk.template get<prop_src>()[sumym],Vc::Aligned); |
503 | cs.yp.load(&chunk.template get<prop_src>()[sumyp],Vc::Aligned); |
504 | cs.zm.load(&chunk.template get<prop_src>()[sumzm],Vc::Aligned); |
505 | cs.zp.load(&chunk.template get<prop_src>()[sumzp],Vc::Aligned); |
506 | |
507 | // Calculate |
508 | |
509 | data_il<Vc::Vector<prop_type>::Size> tot_m; |
510 | tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i; |
511 | |
512 | Vc::Vector<prop_type> res = func(cmd,cs,tot_m.uc,args ... ); |
513 | |
514 | Vc::Mask<prop_type> m(&mask_row[k]); |
515 | |
516 | res.store(&chunk.template get<prop_dst>()[s2],m,Vc::Aligned); |
517 | |
518 | s2 += Vc::Vector<prop_type>::Size; |
519 | } |
520 | } |
521 | } |
522 | |
523 | ++it; |
524 | } |
525 | } |
526 | |
527 | |
528 | template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2, |
529 | unsigned int prop_dst1, unsigned int prop_dst2, |
530 | unsigned int stencil_size , unsigned int N, |
531 | typename SparseGridType, typename lambda_f, typename ... ArgsT > |
532 | static void conv2(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) |
533 | { |
534 | auto it = grid.template getBlockIterator<stencil_size>(start,stop); |
535 | |
536 | typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type; |
537 | |
538 | unsigned char mask[decltype(it)::sizeBlockBord]; |
539 | unsigned char mask_sum[decltype(it)::sizeBlockBord]; |
540 | __attribute__ ((aligned (64))) prop_type block_bord_src1[decltype(it)::sizeBlockBord]; |
541 | __attribute__ ((aligned (64))) prop_type block_bord_dst1[decltype(it)::sizeBlock+16]; |
542 | __attribute__ ((aligned (64))) prop_type block_bord_src2[decltype(it)::sizeBlockBord]; |
543 | __attribute__ ((aligned (64))) prop_type block_bord_dst2[decltype(it)::sizeBlock+16]; |
544 | |
545 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; |
546 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; |
547 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; |
548 | |
549 | while (it.isNext()) |
550 | { |
551 | it.template loadBlockBorder<prop_src1,NNType,findNN>(block_bord_src1,mask); |
552 | it.template loadBlockBorder<prop_src2,NNType,findNN>(block_bord_src2,mask); |
553 | |
554 | // Sum the mask |
555 | for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) |
556 | { |
557 | for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) |
558 | { |
559 | int cc = it.LinB(it.start_b(0),j,k); |
560 | int c[N]; |
561 | |
562 | for (int s = 0 ; s < N ; s++) |
563 | { |
564 | c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); |
565 | } |
566 | |
567 | for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += sizeof(size_t)) |
568 | { |
569 | size_t cmd = *(size_t *)&mask[cc]; |
570 | |
571 | if (cmd == 0) {continue;} |
572 | |
573 | |
574 | size_t xm[N]; |
575 | |
576 | for (int s = 0 ; s < N ; s++) |
577 | { |
578 | xm[s] = *(size_t *)&mask[c[s]]; |
579 | } |
580 | |
581 | size_t sum = 0; |
582 | for (int s = 0 ; s < N ; s++) |
583 | { |
584 | sum += xm[s]; |
585 | } |
586 | |
587 | *(size_t *)&mask_sum[cc] = sum; |
588 | |
589 | cc += sizeof(size_t); |
590 | for (int s = 0 ; s < N ; s++) |
591 | { |
592 | c[s] += sizeof(size_t); |
593 | } |
594 | } |
595 | } |
596 | } |
597 | |
598 | for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) |
599 | { |
600 | for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) |
601 | { |
602 | int cc = it.LinB(it.start_b(0),j,k); |
603 | int c[N]; |
604 | |
605 | int cd = it.LinB_off(it.start_b(0),j,k); |
606 | |
607 | for (int s = 0 ; s < N ; s++) |
608 | { |
609 | c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); |
610 | } |
611 | |
612 | for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size) |
613 | { |
614 | Vc::Mask<prop_type> cmp; |
615 | |
616 | for (int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++) |
617 | { |
618 | cmp[s] = (mask[cc+s] == true); |
619 | } |
620 | |
621 | // we do only id exist the point |
622 | if (Vc::none_of(cmp) == true) {continue;} |
623 | |
624 | Vc::Mask<prop_type> surround; |
625 | |
626 | Vc::Vector<prop_type> xs1[N+1]; |
627 | Vc::Vector<prop_type> xs2[N+1]; |
628 | |
629 | xs1[0] = Vc::Vector<prop_type>(&block_bord_src1[cc],Vc::Unaligned); |
630 | xs2[0] = Vc::Vector<prop_type>(&block_bord_src2[cc],Vc::Unaligned); |
631 | |
632 | for (int s = 1 ; s < N+1 ; s++) |
633 | { |
634 | xs1[s] = Vc::Vector<prop_type>(&block_bord_src1[c[s-1]],Vc::Unaligned); |
635 | xs2[s] = Vc::Vector<prop_type>(&block_bord_src2[c[s-1]],Vc::Unaligned); |
636 | } |
637 | |
638 | Vc::Vector<prop_type> vo1; |
639 | Vc::Vector<prop_type> vo2; |
640 | |
641 | func(vo1, vo2, xs1, xs2, &mask_sum[cc], args ...); |
642 | |
643 | vo1.store(&block_bord_dst1[cd],cmp,Vc::Unaligned); |
644 | vo2.store(&block_bord_dst2[cd],cmp,Vc::Unaligned); |
645 | |
646 | cc += Vc::Vector<prop_type>::Size; |
647 | for (int s = 0 ; s < N ; s++) |
648 | { |
649 | c[s] += Vc::Vector<prop_type>::Size; |
650 | } |
651 | cd += Vc::Vector<prop_type>::Size; |
652 | } |
653 | } |
654 | } |
655 | |
656 | it.template storeBlock<prop_dst1>(block_bord_dst1); |
657 | it.template storeBlock<prop_dst2>(block_bord_dst2); |
658 | |
659 | ++it; |
660 | } |
661 | } |
662 | |
663 | template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > |
664 | static void conv_cross2(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) |
665 | { |
666 | auto it = grid.template getBlockIterator<stencil_size>(start,stop); |
667 | |
668 | auto & datas = grid.private_get_data(); |
669 | auto & = grid.private_get_header_mask(); |
670 | |
671 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; |
672 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; |
673 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; |
674 | |
675 | typedef typename SparseGridType::chunking_type chunking; |
676 | |
677 | typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type; |
678 | |
679 | while (it.isNext()) |
680 | { |
681 | // Load |
682 | long int offset_jump[6]; |
683 | |
684 | size_t cid = it.getChunkId(); |
685 | |
686 | auto chunk = datas.get(cid); |
687 | auto & mask = headers.get(cid); |
688 | |
689 | bool exist; |
690 | grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0}); |
691 | long int r = grid.getChunk(p,exist); |
692 | offset_jump[0] = (r-cid)*decltype(it)::sizeBlock; |
693 | |
694 | p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0}); |
695 | r = grid.getChunk(p,exist); |
696 | offset_jump[1] = (r-cid)*decltype(it)::sizeBlock; |
697 | |
698 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0}); |
699 | r = grid.getChunk(p,exist); |
700 | offset_jump[2] = (r-cid)*decltype(it)::sizeBlock; |
701 | |
702 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0}); |
703 | r = grid.getChunk(p,exist); |
704 | offset_jump[3] = (r-cid)*decltype(it)::sizeBlock; |
705 | |
706 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1}); |
707 | r = grid.getChunk(p,exist); |
708 | offset_jump[4] = (r-cid)*decltype(it)::sizeBlock; |
709 | |
710 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1}); |
711 | r = grid.getChunk(p,exist); |
712 | offset_jump[5] = (r-cid)*decltype(it)::sizeBlock; |
713 | |
714 | // Load offset jumps |
715 | |
716 | // construct a row mask |
717 | |
718 | long int s2 = 0; |
719 | |
720 | typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz; |
721 | typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy; |
722 | typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx; |
723 | |
724 | |
725 | bool mask_row[sx::value]; |
726 | |
727 | for (int k = 0 ; k < sx::value ; k++) |
728 | { |
729 | mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false; |
730 | } |
731 | |
732 | for (int v = it.start(2) ; v < it.stop(2) ; v++) |
733 | { |
734 | for (int j = it.start(1) ; j < it.stop(1) ; j++) |
735 | { |
736 | s2 = it.Lin(0,j,v); |
737 | for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size) |
738 | { |
739 | // we do only id exist the point |
740 | if (*(int *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;} |
741 | |
742 | data_il<4> mxm; |
743 | data_il<4> mxp; |
744 | data_il<4> mym; |
745 | data_il<4> myp; |
746 | data_il<4> mzm; |
747 | data_il<4> mzp; |
748 | |
749 | cross_stencil_v cs1; |
750 | cross_stencil_v cs2; |
751 | |
752 | Vc::Vector<prop_type> cmd1(&chunk.template get<prop_src1>()[s2]); |
753 | Vc::Vector<prop_type> cmd2(&chunk.template get<prop_src2>()[s2]); |
754 | |
755 | // Load x-1 |
756 | long int sumxm = s2-1; |
757 | sumxm += (k==0)?offset_jump[0] + sx::value:0; |
758 | |
759 | // Load x+1 |
760 | long int sumxp = s2+Vc::Vector<prop_type>::Size; |
761 | sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0; |
762 | |
763 | long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value; |
764 | sumym += s2; |
765 | long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value; |
766 | sumyp += s2; |
767 | long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value; |
768 | sumzm += s2; |
769 | long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value; |
770 | sumzp += s2; |
771 | |
772 | if (Vc::Vector<prop_type>::Size == 2) |
773 | { |
774 | mxm.i = *(short int *)&mask.mask[s2]; |
775 | mxm.i = mxm.i << 8; |
776 | mxm.i |= (short int)mask.mask[sumxm]; |
777 | |
778 | mxp.i = *(short int *)&mask.mask[s2]; |
779 | mxp.i = mxp.i >> 8; |
780 | mxp.i |= ((short int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8; |
781 | |
782 | mym.i = *(short int *)&mask.mask[sumym]; |
783 | myp.i = *(short int *)&mask.mask[sumyp]; |
784 | |
785 | mzm.i = *(short int *)&mask.mask[sumzm]; |
786 | mzp.i = *(short int *)&mask.mask[sumzp]; |
787 | } |
788 | else if (Vc::Vector<prop_type>::Size == 4) |
789 | { |
790 | mxm.i = *(int *)&mask.mask[s2]; |
791 | mxm.i = mxm.i << 8; |
792 | mxm.i |= (int)mask.mask[sumxm]; |
793 | |
794 | mxp.i = *(int *)&mask.mask[s2]; |
795 | mxp.i = mxp.i >> 8; |
796 | mxp.i |= ((int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8; |
797 | |
798 | mym.i = *(int *)&mask.mask[sumym]; |
799 | myp.i = *(int *)&mask.mask[sumyp]; |
800 | |
801 | mzm.i = *(int *)&mask.mask[sumzm]; |
802 | mzp.i = *(int *)&mask.mask[sumzp]; |
803 | } |
804 | else |
805 | { |
806 | std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl; |
807 | } |
808 | |
809 | cs1.xm = cmd1; |
810 | cs1.xm = cs1.xm.shifted(-1); |
811 | cs1.xm[0] = chunk.template get<prop_src1>()[sumxm]; |
812 | |
813 | cs2.xm = cmd2; |
814 | cs2.xm = cs2.xm.shifted(-1); |
815 | cs2.xm[0] = chunk.template get<prop_src2>()[sumxm]; |
816 | |
817 | cs1.xp = cmd1; |
818 | cs1.xp = cs1.xp.shifted(1); |
819 | cs1.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src1>()[sumxp]; |
820 | |
821 | cs2.xp = cmd2; |
822 | cs2.xp = cs2.xp.shifted(1); |
823 | cs2.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src2>()[sumxp]; |
824 | |
825 | // Load y and z direction |
826 | |
827 | cs1.ym.load(&chunk.template get<prop_src1>()[sumym],Vc::Aligned); |
828 | cs1.yp.load(&chunk.template get<prop_src1>()[sumyp],Vc::Aligned); |
829 | cs1.zm.load(&chunk.template get<prop_src1>()[sumzm],Vc::Aligned); |
830 | cs1.zp.load(&chunk.template get<prop_src1>()[sumzp],Vc::Aligned); |
831 | |
832 | cs2.ym.load(&chunk.template get<prop_src2>()[sumym],Vc::Aligned); |
833 | cs2.yp.load(&chunk.template get<prop_src2>()[sumyp],Vc::Aligned); |
834 | cs2.zm.load(&chunk.template get<prop_src2>()[sumzm],Vc::Aligned); |
835 | cs2.zp.load(&chunk.template get<prop_src2>()[sumzp],Vc::Aligned); |
836 | |
837 | // Calculate |
838 | |
839 | data_il<4> tot_m; |
840 | tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i; |
841 | |
842 | Vc::Vector<prop_type> res1; |
843 | Vc::Vector<prop_type> res2; |
844 | |
845 | func(res1,res2,cmd1,cmd2,cs1,cs2,tot_m.uc,args ... ); |
846 | |
847 | Vc::Mask<prop_type> m(&mask_row[k]); |
848 | |
849 | res1.store(&chunk.template get<prop_dst1>()[s2],m,Vc::Aligned); |
850 | res2.store(&chunk.template get<prop_dst2>()[s2],m,Vc::Aligned); |
851 | |
852 | s2 += Vc::Vector<prop_type>::Size; |
853 | } |
854 | } |
855 | } |
856 | |
857 | ++it; |
858 | } |
859 | } |
860 | |
861 | template<bool findNN, unsigned int stencil_size, typename prop_type, typename SparseGridType, typename lambda_f, typename ... ArgsT > |
862 | static void conv_cross_ids(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) |
863 | { |
864 | auto it = grid.template getBlockIterator<stencil_size>(start,stop); |
865 | |
866 | auto & datas = grid.private_get_data(); |
867 | auto & = grid.private_get_header_mask(); |
868 | |
869 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; |
870 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; |
871 | typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; |
872 | |
873 | typedef typename SparseGridType::chunking_type chunking; |
874 | |
875 | while (it.isNext()) |
876 | { |
877 | // Load |
878 | long int offset_jump[6]; |
879 | |
880 | size_t cid = it.getChunkId(); |
881 | |
882 | auto chunk = datas.get(cid); |
883 | auto & mask = headers.get(cid); |
884 | |
885 | bool exist; |
886 | grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0}); |
887 | long int r = grid.getChunk(p,exist); |
888 | offset_jump[0] = (r-cid)*decltype(it)::sizeBlock; |
889 | |
890 | p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0}); |
891 | r = grid.getChunk(p,exist); |
892 | offset_jump[1] = (r-cid)*decltype(it)::sizeBlock; |
893 | |
894 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0}); |
895 | r = grid.getChunk(p,exist); |
896 | offset_jump[2] = (r-cid)*decltype(it)::sizeBlock; |
897 | |
898 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0}); |
899 | r = grid.getChunk(p,exist); |
900 | offset_jump[3] = (r-cid)*decltype(it)::sizeBlock; |
901 | |
902 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1}); |
903 | r = grid.getChunk(p,exist); |
904 | offset_jump[4] = (r-cid)*decltype(it)::sizeBlock; |
905 | |
906 | p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1}); |
907 | r = grid.getChunk(p,exist); |
908 | offset_jump[5] = (r-cid)*decltype(it)::sizeBlock; |
909 | |
910 | // Load offset jumps |
911 | |
912 | // construct a row mask |
913 | |
914 | long int s2 = 0; |
915 | |
916 | typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz; |
917 | typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy; |
918 | typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx; |
919 | |
920 | ids_crs<3,sx::value> ids; |
921 | |
922 | for (int k = 0 ; k < sx::value ; k++) |
923 | { |
924 | ids.mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false; |
925 | } |
926 | |
927 | for (int v = it.start(2) ; v < it.stop(2) ; v++) |
928 | { |
929 | for (int j = it.start(1) ; j < it.stop(1) ; j++) |
930 | { |
931 | s2 = it.Lin(0,j,v); |
932 | for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size) |
933 | { |
934 | // we do only id exist the point |
935 | if (*(int *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;} |
936 | |
937 | data_il<4> mxm; |
938 | data_il<4> mxp; |
939 | data_il<4> mym; |
940 | data_il<4> myp; |
941 | data_il<4> mzm; |
942 | data_il<4> mzp; |
943 | |
944 | ids.k = k; |
945 | |
946 | // Load x-1 |
947 | ids.sumdm[0] = s2-1; |
948 | ids.sumdm[0] += (k==0)?offset_jump[0] + sx::value:0; |
949 | |
950 | // Load x+1 |
951 | ids.sumdp[0] = s2+Vc::Vector<prop_type>::Size; |
952 | ids.sumdp[0] += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0; |
953 | |
954 | ids.sumdm[1] = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value; |
955 | ids.sumdm[1] += s2; |
956 | ids.sumdp[1] = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value; |
957 | ids.sumdp[1] += s2; |
958 | ids.sumdm[2] = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value; |
959 | ids.sumdm[2] += s2; |
960 | ids.sumdp[2] = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value; |
961 | ids.sumdp[2] += s2; |
962 | |
963 | ids.s2 = s2; |
964 | |
965 | if (Vc::Vector<prop_type>::Size == 2) |
966 | { |
967 | mxm.i = *(short int *)&mask.mask[s2]; |
968 | mxm.i = mxm.i << 8; |
969 | mxm.i |= (short int)mask.mask[ids.sumdm[0]]; |
970 | |
971 | mxp.i = *(short int *)&mask.mask[s2]; |
972 | mxp.i = mxp.i >> 8; |
973 | mxp.i |= ((short int)mask.mask[ids.sumdp[0]]) << (Vc::Vector<prop_type>::Size - 1)*8; |
974 | |
975 | mym.i = *(short int *)&mask.mask[ids.sumdm[1]]; |
976 | myp.i = *(short int *)&mask.mask[ids.sumdp[1]]; |
977 | |
978 | mzm.i = *(short int *)&mask.mask[ids.sumdm[2]]; |
979 | mzp.i = *(short int *)&mask.mask[ids.sumdp[2]]; |
980 | } |
981 | else if (Vc::Vector<prop_type>::Size == 4) |
982 | { |
983 | mxm.i = *(int *)&mask.mask[s2]; |
984 | mxm.i = mxm.i << 8; |
985 | mxm.i |= (int)mask.mask[ids.sumdm[0]]; |
986 | |
987 | mxp.i = *(int *)&mask.mask[s2]; |
988 | mxp.i = mxp.i >> 8; |
989 | mxp.i |= ((int)mask.mask[ids.sumdp[0]]) << (Vc::Vector<prop_type>::Size - 1)*8; |
990 | |
991 | mym.i = *(int *)&mask.mask[ids.sumdm[1]]; |
992 | myp.i = *(int *)&mask.mask[ids.sumdp[1]]; |
993 | |
994 | mzm.i = *(int *)&mask.mask[ids.sumdm[2]]; |
995 | mzp.i = *(int *)&mask.mask[ids.sumdp[2]]; |
996 | } |
997 | else |
998 | { |
999 | std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl; |
1000 | } |
1001 | |
1002 | // Calculate |
1003 | |
1004 | data_il<4> tot_m; |
1005 | tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i; |
1006 | |
1007 | func(chunk,ids,tot_m.uc,args ... ); |
1008 | |
1009 | s2 += Vc::Vector<prop_type>::Size; |
1010 | } |
1011 | } |
1012 | } |
1013 | |
1014 | ++it; |
1015 | } |
1016 | } |
1017 | |
1018 | }; |
1019 | |
1020 | #endif |
1021 | |
1022 | |
1023 | #endif /* SPARSEGRID_CONV_OPT_HPP_ */ |
1024 | |