SparseGrid_conv_opt.hpp source code [openfpm/openfpm_data/src/SparseGrid/SparseGrid_conv_opt.hpp]

1	/*
2	* SparseGrid_conv_opt.hpp
3	*
4	* Created on: Jul 19, 2020
5	* Author: i-bird
6	*/
7
8	#ifndef SPARSEGRID_CONV_OPT_HPP_
9	#define SPARSEGRID_CONV_OPT_HPP_
10
11	template<unsigned int l>
12	union data_il
13	{
14	};
15
16	template<>
17	union data_il<`8`>
18	{
19	typedef long int type;
20
21	unsigned char uc[`8`];
22	long int i;
23	};
24
25	template<>
26	union data_il<`4`>
27	{
28	typedef int type;
29
30	unsigned char uc[`4`];
31	int i;
32	};
33
34	template<>
35	union data_il<`2`>
36	{
37	typedef short int type;
38
39	unsigned char uc[`2`];
40	short int i;
41	};
42
43	template<>
44	union data_il<`1`>
45	{
46	typedef char type;
47
48	unsigned char uc[`4`];
49	char i;
50	};
51
52
53	template<unsigned int dim, unsigned int sz>
54	struct ids_crs
55	{
56	long int sumdm[dim];
57	long int sumdp[dim];
58
59	long int s2;
60	bool mask_row[sz];
61	int k;
62	};
63
64
65
66	template<unsigned int dim>
67	struct conv_impl
68	{
69	template<unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT >
70	void conv(int (& stencil)[N][`3`], grid_key_dx<`3`> & start, grid_key_dx<`3`> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
71	{
72	#ifndef __NVCC__
73	std::cout << __FILE__ << ":" << __LINE__ << " error conv operation not implemented for this dimension " << std::endl;
74	#else
75	std::cout << __FILE__ << ":" << __LINE__ << " error conv is unsupported when compiled on NVCC " << std::endl;
76	#endif
77	}
78
79	template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
80	static void conv_cross(grid_key_dx<`3`> & start, grid_key_dx<`3`> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
81	{
82	#ifndef __NVCC__
83	std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross operation not implemented for this dimension " << std::endl;
84	#else
85	std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross is unsupported when compiled on NVCC " << std::endl;
86	#endif
87	}
88
89	template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2,
90	unsigned int prop_dst1, unsigned int prop_dst2,
91	unsigned int stencil_size , unsigned int N,
92	typename SparseGridType, typename lambda_f, typename ... ArgsT >
93	static void conv2(int (& stencil)[N][`3`], grid_key_dx<`3`> & start, grid_key_dx<`3`> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
94	{
95	#ifndef __NVCC__
96	std::cout << __FILE__ << ":" << __LINE__ << " error conv2 operation not implemented for this dimension " << std::endl;
97	#else
98	std::cout << __FILE__ << ":" << __LINE__ << " error conv2 is unsupported when compiled on NVCC " << std::endl;
99	#endif
100	}
101
102	template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
103	static void conv_cross2(grid_key_dx<`3`> & start, grid_key_dx<`3`> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
104	{
105	#ifndef __NVCC__
106	std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 operation not implemented for this dimension " << std::endl;
107	#else
108	std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 is unsupported when compiled on NVCC " << std::endl;
109	#endif
110	}
111	};
112
113	#if !defined(__NVCC__) \|\| defined(CUDA_ON_CPU)
114
115
116	template<unsigned int dir,int p, unsigned int prop_src1,typename chunk_type, typename vect_type, typename ids_type>
117	void load_crs(vect_type & cs1, chunk_type & chunk, ids_type & ids)
118	{
119	if (dir == `0` && p < `0`)
120	{
121	Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
122
123	cs1 = cmd1;
124	cs1 = cs1.shifted(-`1`);
125	cs1[`0`] = chunk.template get<prop_src1>()[ids.sumdm[dir]];
126	}
127	else if (dir == `0` && p > `0`)
128	{
129	Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
130
131	cs1 = cmd1;
132	cs1 = cs1.shifted(`1`);
133	cs1[Vc::Vector<typename vect_type::EntryType>::Size - `1`] = chunk.template get<prop_src1>()[ids.sumdp[`0`]];
134	}
135	else if (p < `0`)
136	{
137	cs1.load(&chunk.template get<prop_src1>()[ids.sumdm[dir]],Vc::Aligned);
138	}
139	else if (p > `0`)
140	{
141	cs1.load(&chunk.template get<prop_src1>()[ids.sumdp[dir]],Vc::Aligned);
142	}
143	else
144	{
145	Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
146
147	cs1 = cmd1;
148	}
149	}
150
151	template<unsigned int prop_dst1,typename chunk_type, typename vect_type, typename ids_type>
152	void store_crs(chunk_type & chunk, vect_type & res, ids_type & ids)
153	{
154	Vc::Mask<typename vect_type::EntryType> m(&ids.mask_row[ids.k]);
155
156	res.store(&chunk.template get<prop_dst1>()[ids.s2],m,Vc::Aligned);
157	}
158
159	template<unsigned int prop_dst1,unsigned int comp, typename chunk_type, typename vect_type, typename ids_type>
160	void store_crs_v(chunk_type & chunk, vect_type & res, ids_type & ids)
161	{
162	Vc::Mask<typename vect_type::EntryType> m(&ids.mask_row[ids.k]);
163
164	res.store(&chunk.template get<prop_dst1>()[comp][ids.s2],m,Vc::Aligned);
165	}
166
167	template<unsigned int dir,int p, unsigned int comp, unsigned int prop_src1,typename chunk_type, typename vect_type, typename ids_type>
168	void load_crs_v(vect_type & cs1, chunk_type & chunk, ids_type & ids)
169	{
170	if (dir == `0` && p < `0`)
171	{
172	Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
173
174	cs1 = cmd1;
175	cs1 = cs1.shifted(-`1`);
176	cs1[`0`] = chunk.template get<prop_src1>()[comp][ids.sumdm[dir]];
177	}
178	else if (dir == `0` && p > `0`)
179	{
180	Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
181
182	cs1 = cmd1;
183	cs1 = cs1.shifted(`1`);
184	cs1[Vc::Vector<typename vect_type::EntryType>::Size - `1`] = chunk.template get<prop_src1>()[comp][ids.sumdp[dir]];
185	}
186	else if (p < `0`)
187	{
188	cs1.load(&chunk.template get<prop_src1>()[comp][ids.sumdm[dir]],Vc::Aligned);
189	}
190	else if (p > `0`)
191	{
192	cs1.load(&chunk.template get<prop_src1>()[comp][ids.sumdp[dir]],Vc::Aligned);
193	}
194	else
195	{
196	Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
197
198	cs1 = cmd1;
199	}
200	}
201
202	struct cross_stencil_v
203	{
204	Vc::double_v xm;
205	Vc::double_v xp;
206	Vc::double_v ym;
207	Vc::double_v yp;
208	Vc::double_v zm;
209	Vc::double_v zp;
210	};
211
212	template<>
213	struct conv_impl<`3`>
214	{
215	template<bool findNN, typename NNtype, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT >
216	static void conv(int (& stencil)[N][`3`], grid_key_dx<`3`> & start, grid_key_dx<`3`> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
217	{
218	auto it = grid.template getBlockIterator<stencil_size>(start,stop);
219
220	typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type;
221
222	unsigned char mask[decltype(it)::sizeBlockBord];
223	unsigned char mask_sum[decltype(it)::sizeBlockBord];
224	unsigned char mask_unused[decltype(it)::sizeBlock];
225	__attribute__ ((aligned (`32`))) prop_type block_bord_src[decltype(it)::sizeBlockBord];
226	__attribute__ ((aligned (`32`))) prop_type block_bord_dst[decltype(it)::sizeBlock];
227
228	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`0`>>::type sz0;
229	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`1`>>::type sz1;
230	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`2`>>::type sz2;
231
232	while (it.isNext())
233	{
234	it.template loadBlockBorder<prop_src,NNtype,findNN>(block_bord_src,mask);
235
236	if (it.start_b(`2`) != stencil_size \|\| it.start_b(`1`) != stencil_size \|\| it.start_b(`0`) != stencil_size \|\|
237	it.stop_b(`2`) != sz2::value+stencil_size \|\| it.stop_b(`1`) != sz1::value+stencil_size \|\| it.stop_b(`0`) != sz0::value+stencil_size)
238	{
239	auto & header_mask = grid.private_get_header_mask();
240	auto & header_inf = grid.private_get_header_inf();
241
242	loadBlock_impl<prop_dst,`0`,`3`,typename decltype(it)::vector_blocks_exts_type, typename decltype(it)::vector_ext_type>::template loadBlock<decltype(it)::sizeBlock>(block_bord_dst,grid,it.getChunkId(),mask_unused);
243	}
244
245	// Sum the mask
246	for (int k = it.start_b(`2`) ; k < it.stop_b(`2`) ; k++)
247	{
248	for (int j = it.start_b(`1`) ; j < it.stop_b(`1`) ; j++)
249	{
250	int cc = it.LinB(it.start_b(`0`),j,k);
251	int c[N];
252
253	for (int s = `0` ; s < N ; s++)
254	{
255	c[s] = it.LinB(it.start_b(`0`)+stencil[s][`0`],j+stencil[s][`1`],k+stencil[s][`2`]);
256	}
257
258	for (int i = it.start_b(`0`) ; i < it.stop_b(`0`) ; i += sizeof(size_t))
259	{
260	size_t cmd = (size_t )&mask[cc];
261
262	if (cmd != `0`)
263	{
264	size_t xm[N];
265
266	for (int s = `0` ; s < N ; s++)
267	{
268	xm[s] = (size_t )&mask[c[s]];
269	}
270
271	size_t sum = `0`;
272	for (int s = `0` ; s < N ; s++)
273	{
274	sum += xm[s];
275	}
276
277	(size_t )&mask_sum[cc] = sum;
278	}
279
280	cc += sizeof(size_t);
281	for (int s = `0` ; s < N ; s++)
282	{
283	c[s] += sizeof(size_t);
284	}
285	}
286	}
287	}
288
289	for (int k = it.start_b(`2`) ; k < it.stop_b(`2`) ; k++)
290	{
291	for (int j = it.start_b(`1`) ; j < it.stop_b(`1`) ; j++)
292	{
293	int cc = it.LinB(it.start_b(`0`),j,k);
294	int c[N];
295
296	int cd = it.LinB_off(it.start_b(`0`),j,k);
297
298	for (int s = `0` ; s < N ; s++)
299	{
300	c[s] = it.LinB(it.start_b(`0`)+stencil[s][`0`],j+stencil[s][`1`],k+stencil[s][`2`]);
301	}
302
303	for (int i = it.start_b(`0`) ; i < it.stop_b(`0`) ; i += Vc::Vector<prop_type>::Size)
304	{
305	Vc::Mask<prop_type> cmp;
306
307	for (int s = `0` ; s < Vc::Vector<prop_type>::Size ; s++)
308	{
309	cmp[s] = (mask[cc+s] == true && i+s < it.stop_b(`0`));
310	}
311
312	// we do only if exist the point
313	if (Vc::none_of(cmp) == false)
314	{
315	Vc::Mask<prop_type> surround;
316
317	Vc::Vector<prop_type> xs[N+`1`];
318
319	xs[`0`] = Vc::Vector<prop_type>(&block_bord_src[cc],Vc::Unaligned);
320
321	for (int s = `1` ; s < N+`1` ; s++)
322	{
323	xs[s] = Vc::Vector<prop_type>(&block_bord_src[c[s-`1`]],Vc::Unaligned);
324	}
325
326	auto res = func(xs, &mask_sum[cc], args ...);
327
328	res.store(&block_bord_dst[cd],cmp,Vc::Aligned);
329	}
330
331	cc += Vc::Vector<prop_type>::Size;
332	for (int s = `0` ; s < N ; s++)
333	{
334	c[s] += Vc::Vector<prop_type>::Size;
335	}
336	cd += Vc::Vector<prop_type>::Size;
337	}
338	}
339	}
340
341	it.template storeBlock<prop_dst>(block_bord_dst);
342
343	++it;
344	}
345	}
346
347	template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
348	static void conv_cross(grid_key_dx<`3`> & start, grid_key_dx<`3`> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
349	{
350	auto it = grid.template getBlockIterator<`1`>(start,stop);
351
352	auto & datas = grid.private_get_data();
353	auto & headers = grid.private_get_header_mask();
354
355	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`0`>>::type sz0;
356	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`1`>>::type sz1;
357	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`2`>>::type sz2;
358
359	typedef typename SparseGridType::chunking_type chunking;
360
361	typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type;
362
363	while (it.isNext())
364	{
365	// Load
366	long int offset_jump[`6`];
367
368	size_t cid = it.getChunkId();
369
370	auto chunk = datas.get(cid);
371	auto & mask = headers.get(cid);
372
373	bool exist;
374	grid_key_dx<`3`> p = grid.getChunkPos(cid) + grid_key_dx<`3`>({-`1`,`0`,`0`});
375	long int r = grid.getChunk(p,exist);
376	offset_jump[`0`] = (r-cid)*decltype(it)::sizeBlock;
377
378	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`1`,`0`,`0`});
379	r = grid.getChunk(p,exist);
380	offset_jump[`1`] = (r-cid)*decltype(it)::sizeBlock;
381
382	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,-`1`,`0`});
383	r = grid.getChunk(p,exist);
384	offset_jump[`2`] = (r-cid)*decltype(it)::sizeBlock;
385
386	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,`1`,`0`});
387	r = grid.getChunk(p,exist);
388	offset_jump[`3`] = (r-cid)*decltype(it)::sizeBlock;
389
390	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,`0`,-`1`});
391	r = grid.getChunk(p,exist);
392	offset_jump[`4`] = (r-cid)*decltype(it)::sizeBlock;
393
394	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,`0`,`1`});
395	r = grid.getChunk(p,exist);
396	offset_jump[`5`] = (r-cid)*decltype(it)::sizeBlock;
397
398	// Load offset jumps
399
400	// construct a row mask
401
402	long int s2 = `0`;
403
404	typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<`2`>>::type sz;
405	typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<`1`>>::type sy;
406	typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<`0`>>::type sx;
407
408
409	bool mask_row[sx::value];
410
411	for (int k = `0` ; k < sx::value ; k++)
412	{
413	mask_row[k] = (k >= it.start(`0`) && k < it.stop(`0`))?true:false;
414	}
415
416	for (int v = it.start(`2`) ; v < it.stop(`2`) ; v++)
417	{
418	for (int j = it.start(`1`) ; j < it.stop(`1`) ; j++)
419	{
420	s2 = it.Lin(`0`,j,v);
421	for (int k = `0` ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
422	{
423	// we do only id exist the point
424	if ((int* )&mask.mask[s2] == `0`) {s2 += Vc::Vector<prop_type>::Size; continue*;}
425
426	data_il<Vc::Vector<prop_type>::Size> mxm;
427	data_il<Vc::Vector<prop_type>::Size> mxp;
428	data_il<Vc::Vector<prop_type>::Size> mym;
429	data_il<Vc::Vector<prop_type>::Size> myp;
430	data_il<Vc::Vector<prop_type>::Size> mzm;
431	data_il<Vc::Vector<prop_type>::Size> mzp;
432
433	cross_stencil_v cs;
434
435	Vc::Vector<prop_type> cmd(&chunk.template get<prop_src>()[s2]);
436
437	// Load x-1
438	long int sumxm = s2-`1`;
439	sumxm += (k==`0`)?offset_jump[`0`] + sx::value:`0`;
440
441	// Load x+1
442	long int sumxp = s2+Vc::Vector<prop_type>::Size;
443	sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[`1`] - sx::value:`0`;
444
445	long int sumym = (j == `0`)?offset_jump[`2`] + (sy::value-`1`)*sx::value:-sx::value;
446	sumym += s2;
447	long int sumyp = (j == sy::value-`1`)?offset_jump[`3`] - (sy::value - `1`)*sx::value:sx::value;
448	sumyp += s2;
449	long int sumzm = (v == `0`)?offset_jump[`4`] + (sz::value-`1`)sx::valuesy::value:-sx::value*sy::value;
450	sumzm += s2;
451	long int sumzp = (v == sz::value-`1`)?offset_jump[`5`] - (sz::value - `1`)sx::valuesy::value:sx::value*sy::value;
452	sumzp += s2;
453
454	if (Vc::Vector<prop_type>::Size == `2`)
455	{
456	mxm.i = (short* int *)&mask.mask[s2];
457	mxm.i = mxm.i << `8`;
458	mxm.i \|= (short int)mask.mask[sumxm];
459
460	mxp.i = (short* int *)&mask.mask[s2];
461	mxp.i = mxp.i >> `8`;
462	mxp.i \|= ((short int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - `1`)*`8`;
463
464	mym.i = (short* int *)&mask.mask[sumym];
465	myp.i = (short* int *)&mask.mask[sumyp];
466
467	mzm.i = (short* int *)&mask.mask[sumzm];
468	mzp.i = (short* int *)&mask.mask[sumzp];
469	}
470	else if (Vc::Vector<prop_type>::Size == `4`)
471	{
472	mxm.i = (int* *)&mask.mask[s2];
473	mxm.i = mxm.i << `8`;
474	mxm.i \|= (int)mask.mask[sumxm];
475
476	mxp.i = (int* *)&mask.mask[s2];
477	mxp.i = mxp.i >> `8`;
478	mxp.i \|= ((int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - `1`)*`8`;
479
480	mym.i = (int* *)&mask.mask[sumym];
481	myp.i = (int* *)&mask.mask[sumyp];
482
483	mzm.i = (int* *)&mask.mask[sumzm];
484	mzp.i = (int* *)&mask.mask[sumzp];
485	}
486	else
487	{
488	std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl;
489	}
490
491	cs.xm = cmd;
492	cs.xm = cs.xm.shifted(-`1`);
493	cs.xm[`0`] = chunk.template get<prop_src>()[sumxm];
494
495
496	cs.xp = cmd;
497	cs.xp = cs.xp.shifted(`1`);
498	cs.xp[Vc::Vector<prop_type>::Size - `1`] = chunk.template get<prop_src>()[sumxp];
499
500	// Load y and z direction
501
502	cs.ym.load(&chunk.template get<prop_src>()[sumym],Vc::Aligned);
503	cs.yp.load(&chunk.template get<prop_src>()[sumyp],Vc::Aligned);
504	cs.zm.load(&chunk.template get<prop_src>()[sumzm],Vc::Aligned);
505	cs.zp.load(&chunk.template get<prop_src>()[sumzp],Vc::Aligned);
506
507	// Calculate
508
509	data_il<Vc::Vector<prop_type>::Size> tot_m;
510	tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
511
512	Vc::Vector<prop_type> res = func(cmd,cs,tot_m.uc,args ... );
513
514	Vc::Mask<prop_type> m(&mask_row[k]);
515
516	res.store(&chunk.template get<prop_dst>()[s2],m,Vc::Aligned);
517
518	s2 += Vc::Vector<prop_type>::Size;
519	}
520	}
521	}
522
523	++it;
524	}
525	}
526
527
528	template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2,
529	unsigned int prop_dst1, unsigned int prop_dst2,
530	unsigned int stencil_size , unsigned int N,
531	typename SparseGridType, typename lambda_f, typename ... ArgsT >
532	static void conv2(int (& stencil)[N][`3`], grid_key_dx<`3`> & start, grid_key_dx<`3`> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
533	{
534	auto it = grid.template getBlockIterator<stencil_size>(start,stop);
535
536	typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type;
537
538	unsigned char mask[decltype(it)::sizeBlockBord];
539	unsigned char mask_sum[decltype(it)::sizeBlockBord];
540	__attribute__ ((aligned (`64`))) prop_type block_bord_src1[decltype(it)::sizeBlockBord];
541	__attribute__ ((aligned (`64`))) prop_type block_bord_dst1[decltype(it)::sizeBlock+`16`];
542	__attribute__ ((aligned (`64`))) prop_type block_bord_src2[decltype(it)::sizeBlockBord];
543	__attribute__ ((aligned (`64`))) prop_type block_bord_dst2[decltype(it)::sizeBlock+`16`];
544
545	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`0`>>::type sz0;
546	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`1`>>::type sz1;
547	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`2`>>::type sz2;
548
549	while (it.isNext())
550	{
551	it.template loadBlockBorder<prop_src1,NNType,findNN>(block_bord_src1,mask);
552	it.template loadBlockBorder<prop_src2,NNType,findNN>(block_bord_src2,mask);
553
554	// Sum the mask
555	for (int k = it.start_b(`2`) ; k < it.stop_b(`2`) ; k++)
556	{
557	for (int j = it.start_b(`1`) ; j < it.stop_b(`1`) ; j++)
558	{
559	int cc = it.LinB(it.start_b(`0`),j,k);
560	int c[N];
561
562	for (int s = `0` ; s < N ; s++)
563	{
564	c[s] = it.LinB(it.start_b(`0`)+stencil[s][`0`],j+stencil[s][`1`],k+stencil[s][`2`]);
565	}
566
567	for (int i = it.start_b(`0`) ; i < it.stop_b(`0`) ; i += sizeof(size_t))
568	{
569	size_t cmd = (size_t )&mask[cc];
570
571	if (cmd == `0`) {continue;}
572
573
574	size_t xm[N];
575
576	for (int s = `0` ; s < N ; s++)
577	{
578	xm[s] = (size_t )&mask[c[s]];
579	}
580
581	size_t sum = `0`;
582	for (int s = `0` ; s < N ; s++)
583	{
584	sum += xm[s];
585	}
586
587	(size_t )&mask_sum[cc] = sum;
588
589	cc += sizeof(size_t);
590	for (int s = `0` ; s < N ; s++)
591	{
592	c[s] += sizeof(size_t);
593	}
594	}
595	}
596	}
597
598	for (int k = it.start_b(`2`) ; k < it.stop_b(`2`) ; k++)
599	{
600	for (int j = it.start_b(`1`) ; j < it.stop_b(`1`) ; j++)
601	{
602	int cc = it.LinB(it.start_b(`0`),j,k);
603	int c[N];
604
605	int cd = it.LinB_off(it.start_b(`0`),j,k);
606
607	for (int s = `0` ; s < N ; s++)
608	{
609	c[s] = it.LinB(it.start_b(`0`)+stencil[s][`0`],j+stencil[s][`1`],k+stencil[s][`2`]);
610	}
611
612	for (int i = it.start_b(`0`) ; i < it.stop_b(`0`) ; i += Vc::Vector<prop_type>::Size)
613	{
614	Vc::Mask<prop_type> cmp;
615
616	for (int s = `0` ; s < Vc::Vector<prop_type>::Size ; s++)
617	{
618	cmp[s] = (mask[cc+s] == true);
619	}
620
621	// we do only id exist the point
622	if (Vc::none_of(cmp) == true) {continue;}
623
624	Vc::Mask<prop_type> surround;
625
626	Vc::Vector<prop_type> xs1[N+`1`];
627	Vc::Vector<prop_type> xs2[N+`1`];
628
629	xs1[`0`] = Vc::Vector<prop_type>(&block_bord_src1[cc],Vc::Unaligned);
630	xs2[`0`] = Vc::Vector<prop_type>(&block_bord_src2[cc],Vc::Unaligned);
631
632	for (int s = `1` ; s < N+`1` ; s++)
633	{
634	xs1[s] = Vc::Vector<prop_type>(&block_bord_src1[c[s-`1`]],Vc::Unaligned);
635	xs2[s] = Vc::Vector<prop_type>(&block_bord_src2[c[s-`1`]],Vc::Unaligned);
636	}
637
638	Vc::Vector<prop_type> vo1;
639	Vc::Vector<prop_type> vo2;
640
641	func(vo1, vo2, xs1, xs2, &mask_sum[cc], args ...);
642
643	vo1.store(&block_bord_dst1[cd],cmp,Vc::Unaligned);
644	vo2.store(&block_bord_dst2[cd],cmp,Vc::Unaligned);
645
646	cc += Vc::Vector<prop_type>::Size;
647	for (int s = `0` ; s < N ; s++)
648	{
649	c[s] += Vc::Vector<prop_type>::Size;
650	}
651	cd += Vc::Vector<prop_type>::Size;
652	}
653	}
654	}
655
656	it.template storeBlock<prop_dst1>(block_bord_dst1);
657	it.template storeBlock<prop_dst2>(block_bord_dst2);
658
659	++it;
660	}
661	}
662
663	template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
664	static void conv_cross2(grid_key_dx<`3`> & start, grid_key_dx<`3`> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
665	{
666	auto it = grid.template getBlockIterator<stencil_size>(start,stop);
667
668	auto & datas = grid.private_get_data();
669	auto & headers = grid.private_get_header_mask();
670
671	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`0`>>::type sz0;
672	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`1`>>::type sz1;
673	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`2`>>::type sz2;
674
675	typedef typename SparseGridType::chunking_type chunking;
676
677	typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type;
678
679	while (it.isNext())
680	{
681	// Load
682	long int offset_jump[`6`];
683
684	size_t cid = it.getChunkId();
685
686	auto chunk = datas.get(cid);
687	auto & mask = headers.get(cid);
688
689	bool exist;
690	grid_key_dx<`3`> p = grid.getChunkPos(cid) + grid_key_dx<`3`>({-`1`,`0`,`0`});
691	long int r = grid.getChunk(p,exist);
692	offset_jump[`0`] = (r-cid)*decltype(it)::sizeBlock;
693
694	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`1`,`0`,`0`});
695	r = grid.getChunk(p,exist);
696	offset_jump[`1`] = (r-cid)*decltype(it)::sizeBlock;
697
698	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,-`1`,`0`});
699	r = grid.getChunk(p,exist);
700	offset_jump[`2`] = (r-cid)*decltype(it)::sizeBlock;
701
702	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,`1`,`0`});
703	r = grid.getChunk(p,exist);
704	offset_jump[`3`] = (r-cid)*decltype(it)::sizeBlock;
705
706	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,`0`,-`1`});
707	r = grid.getChunk(p,exist);
708	offset_jump[`4`] = (r-cid)*decltype(it)::sizeBlock;
709
710	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,`0`,`1`});
711	r = grid.getChunk(p,exist);
712	offset_jump[`5`] = (r-cid)*decltype(it)::sizeBlock;
713
714	// Load offset jumps
715
716	// construct a row mask
717
718	long int s2 = `0`;
719
720	typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<`2`>>::type sz;
721	typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<`1`>>::type sy;
722	typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<`0`>>::type sx;
723
724
725	bool mask_row[sx::value];
726
727	for (int k = `0` ; k < sx::value ; k++)
728	{
729	mask_row[k] = (k >= it.start(`0`) && k < it.stop(`0`))?true:false;
730	}
731
732	for (int v = it.start(`2`) ; v < it.stop(`2`) ; v++)
733	{
734	for (int j = it.start(`1`) ; j < it.stop(`1`) ; j++)
735	{
736	s2 = it.Lin(`0`,j,v);
737	for (int k = `0` ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
738	{
739	// we do only id exist the point
740	if ((int* )&mask.mask[s2] == `0`) {s2 += Vc::Vector<prop_type>::Size; continue*;}
741
742	data_il<`4`> mxm;
743	data_il<`4`> mxp;
744	data_il<`4`> mym;
745	data_il<`4`> myp;
746	data_il<`4`> mzm;
747	data_il<`4`> mzp;
748
749	cross_stencil_v cs1;
750	cross_stencil_v cs2;
751
752	Vc::Vector<prop_type> cmd1(&chunk.template get<prop_src1>()[s2]);
753	Vc::Vector<prop_type> cmd2(&chunk.template get<prop_src2>()[s2]);
754
755	// Load x-1
756	long int sumxm = s2-`1`;
757	sumxm += (k==`0`)?offset_jump[`0`] + sx::value:`0`;
758
759	// Load x+1
760	long int sumxp = s2+Vc::Vector<prop_type>::Size;
761	sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[`1`] - sx::value:`0`;
762
763	long int sumym = (j == `0`)?offset_jump[`2`] + (sy::value-`1`)*sx::value:-sx::value;
764	sumym += s2;
765	long int sumyp = (j == sy::value-`1`)?offset_jump[`3`] - (sy::value - `1`)*sx::value:sx::value;
766	sumyp += s2;
767	long int sumzm = (v == `0`)?offset_jump[`4`] + (sz::value-`1`)sx::valuesy::value:-sx::value*sy::value;
768	sumzm += s2;
769	long int sumzp = (v == sz::value-`1`)?offset_jump[`5`] - (sz::value - `1`)sx::valuesy::value:sx::value*sy::value;
770	sumzp += s2;
771
772	if (Vc::Vector<prop_type>::Size == `2`)
773	{
774	mxm.i = (short* int *)&mask.mask[s2];
775	mxm.i = mxm.i << `8`;
776	mxm.i \|= (short int)mask.mask[sumxm];
777
778	mxp.i = (short* int *)&mask.mask[s2];
779	mxp.i = mxp.i >> `8`;
780	mxp.i \|= ((short int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - `1`)*`8`;
781
782	mym.i = (short* int *)&mask.mask[sumym];
783	myp.i = (short* int *)&mask.mask[sumyp];
784
785	mzm.i = (short* int *)&mask.mask[sumzm];
786	mzp.i = (short* int *)&mask.mask[sumzp];
787	}
788	else if (Vc::Vector<prop_type>::Size == `4`)
789	{
790	mxm.i = (int* *)&mask.mask[s2];
791	mxm.i = mxm.i << `8`;
792	mxm.i \|= (int)mask.mask[sumxm];
793
794	mxp.i = (int* *)&mask.mask[s2];
795	mxp.i = mxp.i >> `8`;
796	mxp.i \|= ((int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - `1`)*`8`;
797
798	mym.i = (int* *)&mask.mask[sumym];
799	myp.i = (int* *)&mask.mask[sumyp];
800
801	mzm.i = (int* *)&mask.mask[sumzm];
802	mzp.i = (int* *)&mask.mask[sumzp];
803	}
804	else
805	{
806	std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl;
807	}
808
809	cs1.xm = cmd1;
810	cs1.xm = cs1.xm.shifted(-`1`);
811	cs1.xm[`0`] = chunk.template get<prop_src1>()[sumxm];
812
813	cs2.xm = cmd2;
814	cs2.xm = cs2.xm.shifted(-`1`);
815	cs2.xm[`0`] = chunk.template get<prop_src2>()[sumxm];
816
817	cs1.xp = cmd1;
818	cs1.xp = cs1.xp.shifted(`1`);
819	cs1.xp[Vc::Vector<prop_type>::Size - `1`] = chunk.template get<prop_src1>()[sumxp];
820
821	cs2.xp = cmd2;
822	cs2.xp = cs2.xp.shifted(`1`);
823	cs2.xp[Vc::Vector<prop_type>::Size - `1`] = chunk.template get<prop_src2>()[sumxp];
824
825	// Load y and z direction
826
827	cs1.ym.load(&chunk.template get<prop_src1>()[sumym],Vc::Aligned);
828	cs1.yp.load(&chunk.template get<prop_src1>()[sumyp],Vc::Aligned);
829	cs1.zm.load(&chunk.template get<prop_src1>()[sumzm],Vc::Aligned);
830	cs1.zp.load(&chunk.template get<prop_src1>()[sumzp],Vc::Aligned);
831
832	cs2.ym.load(&chunk.template get<prop_src2>()[sumym],Vc::Aligned);
833	cs2.yp.load(&chunk.template get<prop_src2>()[sumyp],Vc::Aligned);
834	cs2.zm.load(&chunk.template get<prop_src2>()[sumzm],Vc::Aligned);
835	cs2.zp.load(&chunk.template get<prop_src2>()[sumzp],Vc::Aligned);
836
837	// Calculate
838
839	data_il<`4`> tot_m;
840	tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
841
842	Vc::Vector<prop_type> res1;
843	Vc::Vector<prop_type> res2;
844
845	func(res1,res2,cmd1,cmd2,cs1,cs2,tot_m.uc,args ... );
846
847	Vc::Mask<prop_type> m(&mask_row[k]);
848
849	res1.store(&chunk.template get<prop_dst1>()[s2],m,Vc::Aligned);
850	res2.store(&chunk.template get<prop_dst2>()[s2],m,Vc::Aligned);
851
852	s2 += Vc::Vector<prop_type>::Size;
853	}
854	}
855	}
856
857	++it;
858	}
859	}
860
861	template<bool findNN, unsigned int stencil_size, typename prop_type, typename SparseGridType, typename lambda_f, typename ... ArgsT >
862	static void conv_cross_ids(grid_key_dx<`3`> & start, grid_key_dx<`3`> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
863	{
864	auto it = grid.template getBlockIterator<stencil_size>(start,stop);
865
866	auto & datas = grid.private_get_data();
867	auto & headers = grid.private_get_header_mask();
868
869	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`0`>>::type sz0;
870	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`1`>>::type sz1;
871	typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<`2`>>::type sz2;
872
873	typedef typename SparseGridType::chunking_type chunking;
874
875	while (it.isNext())
876	{
877	// Load
878	long int offset_jump[`6`];
879
880	size_t cid = it.getChunkId();
881
882	auto chunk = datas.get(cid);
883	auto & mask = headers.get(cid);
884
885	bool exist;
886	grid_key_dx<`3`> p = grid.getChunkPos(cid) + grid_key_dx<`3`>({-`1`,`0`,`0`});
887	long int r = grid.getChunk(p,exist);
888	offset_jump[`0`] = (r-cid)*decltype(it)::sizeBlock;
889
890	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`1`,`0`,`0`});
891	r = grid.getChunk(p,exist);
892	offset_jump[`1`] = (r-cid)*decltype(it)::sizeBlock;
893
894	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,-`1`,`0`});
895	r = grid.getChunk(p,exist);
896	offset_jump[`2`] = (r-cid)*decltype(it)::sizeBlock;
897
898	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,`1`,`0`});
899	r = grid.getChunk(p,exist);
900	offset_jump[`3`] = (r-cid)*decltype(it)::sizeBlock;
901
902	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,`0`,-`1`});
903	r = grid.getChunk(p,exist);
904	offset_jump[`4`] = (r-cid)*decltype(it)::sizeBlock;
905
906	p = grid.getChunkPos(cid) + grid_key_dx<`3`>({`0`,`0`,`1`});
907	r = grid.getChunk(p,exist);
908	offset_jump[`5`] = (r-cid)*decltype(it)::sizeBlock;
909
910	// Load offset jumps
911
912	// construct a row mask
913
914	long int s2 = `0`;
915
916	typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<`2`>>::type sz;
917	typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<`1`>>::type sy;
918	typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<`0`>>::type sx;
919
920	ids_crs<`3`,sx::value> ids;
921
922	for (int k = `0` ; k < sx::value ; k++)
923	{
924	ids.mask_row[k] = (k >= it.start(`0`) && k < it.stop(`0`))?true:false;
925	}
926
927	for (int v = it.start(`2`) ; v < it.stop(`2`) ; v++)
928	{
929	for (int j = it.start(`1`) ; j < it.stop(`1`) ; j++)
930	{
931	s2 = it.Lin(`0`,j,v);
932	for (int k = `0` ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
933	{
934	// we do only id exist the point
935	if ((int* )&mask.mask[s2] == `0`) {s2 += Vc::Vector<prop_type>::Size; continue*;}
936
937	data_il<`4`> mxm;
938	data_il<`4`> mxp;
939	data_il<`4`> mym;
940	data_il<`4`> myp;
941	data_il<`4`> mzm;
942	data_il<`4`> mzp;
943
944	ids.k = k;
945
946	// Load x-1
947	ids.sumdm[`0`] = s2-`1`;
948	ids.sumdm[`0`] += (k==`0`)?offset_jump[`0`] + sx::value:`0`;
949
950	// Load x+1
951	ids.sumdp[`0`] = s2+Vc::Vector<prop_type>::Size;
952	ids.sumdp[`0`] += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[`1`] - sx::value:`0`;
953
954	ids.sumdm[`1`] = (j == `0`)?offset_jump[`2`] + (sy::value-`1`)*sx::value:-sx::value;
955	ids.sumdm[`1`] += s2;
956	ids.sumdp[`1`] = (j == sy::value-`1`)?offset_jump[`3`] - (sy::value - `1`)*sx::value:sx::value;
957	ids.sumdp[`1`] += s2;
958	ids.sumdm[`2`] = (v == `0`)?offset_jump[`4`] + (sz::value-`1`)sx::valuesy::value:-sx::value*sy::value;
959	ids.sumdm[`2`] += s2;
960	ids.sumdp[`2`] = (v == sz::value-`1`)?offset_jump[`5`] - (sz::value - `1`)sx::valuesy::value:sx::value*sy::value;
961	ids.sumdp[`2`] += s2;
962
963	ids.s2 = s2;
964
965	if (Vc::Vector<prop_type>::Size == `2`)
966	{
967	mxm.i = (short* int *)&mask.mask[s2];
968	mxm.i = mxm.i << `8`;
969	mxm.i \|= (short int)mask.mask[ids.sumdm[`0`]];
970
971	mxp.i = (short* int *)&mask.mask[s2];
972	mxp.i = mxp.i >> `8`;
973	mxp.i \|= ((short int)mask.mask[ids.sumdp[`0`]]) << (Vc::Vector<prop_type>::Size - `1`)*`8`;
974
975	mym.i = (short* int *)&mask.mask[ids.sumdm[`1`]];
976	myp.i = (short* int *)&mask.mask[ids.sumdp[`1`]];
977
978	mzm.i = (short* int *)&mask.mask[ids.sumdm[`2`]];
979	mzp.i = (short* int *)&mask.mask[ids.sumdp[`2`]];
980	}
981	else if (Vc::Vector<prop_type>::Size == `4`)
982	{
983	mxm.i = (int* *)&mask.mask[s2];
984	mxm.i = mxm.i << `8`;
985	mxm.i \|= (int)mask.mask[ids.sumdm[`0`]];
986
987	mxp.i = (int* *)&mask.mask[s2];
988	mxp.i = mxp.i >> `8`;
989	mxp.i \|= ((int)mask.mask[ids.sumdp[`0`]]) << (Vc::Vector<prop_type>::Size - `1`)*`8`;
990
991	mym.i = (int* *)&mask.mask[ids.sumdm[`1`]];
992	myp.i = (int* *)&mask.mask[ids.sumdp[`1`]];
993
994	mzm.i = (int* *)&mask.mask[ids.sumdm[`2`]];
995	mzp.i = (int* *)&mask.mask[ids.sumdp[`2`]];
996	}
997	else
998	{
999	std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl;
1000	}
1001
1002	// Calculate
1003
1004	data_il<`4`> tot_m;
1005	tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
1006
1007	func(chunk,ids,tot_m.uc,args ... );
1008
1009	s2 += Vc::Vector<prop_type>::Size;
1010	}
1011	}
1012	}
1013
1014	++it;
1015	}
1016	}
1017
1018	};
1019
1020	#endif
1021
1022
1023	#endif /* SPARSEGRID_CONV_OPT_HPP_ */
1024

Browse the source code of openfpm/openfpm_data/src/SparseGrid/SparseGrid_conv_opt.hpp