CellList_gpu.hpp source code [openfpm/openfpm_data/src/NN/CellList/cuda/CellList_gpu.hpp]

1	/*
2	* CellList_gpu.hpp
3	*
4	* Created on: Jun 11, 2018
5	* Author: i-bird
6	*/
7
8	#ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
9	#define OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
10
11	#include "config.h"
12
13	#ifdef CUDA_GPU
14
15	#include "Vector/map_vector_sparse.hpp"
16	#include "NN/CellList/CellDecomposer.hpp"
17	#include "Vector/map_vector.hpp"
18	#include "Cuda_cell_list_util_func.hpp"
19	#include "NN/CellList/cuda/CellList_gpu_ker.cuh"
20	#include "util/cuda_util.hpp"
21	#include "NN/CellList/CellList_util.hpp"
22	#include "NN/CellList/CellList.hpp"
23	#include "util/cuda/scan_ofp.cuh"
24
25	constexpr int count = `0`;
26	constexpr int start = `1`;
27
28	template<unsigned int dim, typename T,
29	typename cnt_type, typename ids_type,
30	typename Memory,typename transform,
31	typename vector_cnt_type, typename vector_cnt_type2,
32	typename cl_sparse_type,
33	bool is_sparse>
34	struct CellList_gpu_ker_selector
35	{
36	static inline CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse> get(vector_cnt_type & starts,
37	vector_cnt_type & cell_nn,
38	vector_cnt_type2 & cell_nn_list,
39	cl_sparse_type & cl_sparse,
40	vector_cnt_type & sorted_to_not_sorted,
41	vector_cnt_type & sorted_domain_particles_ids,
42	openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad,
43	openfpm::array<T,dim,cnt_type> & spacing_c,
44	openfpm::array<ids_type,dim,cnt_type> & div_c,
45	openfpm::array<ids_type,dim,cnt_type> & off,
46	const transform & t,
47	unsigned int g_m)
48	{
49	return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse>(starts.toKernel(),
50	sorted_to_not_sorted.toKernel(),
51	sorted_domain_particles_ids.toKernel(),
52	nnc_rad.toKernel(),
53	spacing_c,
54	div_c,
55	off,
56	t,
57	g_m);
58	}
59	};
60
61	template<unsigned int dim, typename T,
62	typename cnt_type, typename ids_type,
63	typename Memory,typename transform,
64	typename vector_cnt_type, typename vector_cnt_type2,
65	typename cl_sparse_type>
66	struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector_cnt_type,vector_cnt_type2,cl_sparse_type,true>
67	{
68	static CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true> get(vector_cnt_type & starts,
69	vector_cnt_type & cell_nn,
70	vector_cnt_type2 & cell_nn_list,
71	cl_sparse_type & cl_sparse,
72	vector_cnt_type & srt,
73	vector_cnt_type & dprt,
74	openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad,
75	openfpm::array<T,dim,cnt_type> & spacing_c,
76	openfpm::array<ids_type,dim,cnt_type> & div_c,
77	openfpm::array<ids_type,dim,cnt_type> & off,
78	const transform & t,
79	unsigned int g_m)
80	{
81	return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true>(cell_nn.toKernel(),
82	cell_nn_list.toKernel(),
83	cl_sparse.toKernel(),
84	srt.toKernel(),
85	dprt.toKernel(),
86	spacing_c,
87	div_c,
88	off,
89	t,g_m);
90	}
91	};
92
93	template<unsigned int dim,
94	typename T,
95	typename Memory,
96	typename transform = no_transform_only<dim,T>,
97	typename cnt_type = unsigned int,
98	typename ids_type = int,
99	bool is_sparse = false>
100	class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
101	{
102	typedef openfpm::vector<aggregate<cnt_type>,Memory,memory_traits_inte> vector_cnt_type;
103
104	//! \brief Number of particles in each cell
105	vector_cnt_type cl_n;
106
107	//! \brief for each cell the particles id in it
108	vector_cnt_type cells;
109
110	//! \brief Cell scan with + operation of cl_n (in case of sparse it contain the cell index of the particles)
111	vector_cnt_type starts;
112
113	//! \brief sparse vector in case of sparse Cell-list
114	openfpm::vector_sparse_gpu<aggregate<cnt_type>> cl_sparse;
115
116	//! \brief number of neighborhood each cell cell has + offset
117	openfpm::vector_gpu<aggregate<cnt_type>> cells_nn;
118
119	//! \brief For each cell the list of the neighborhood cells
120	openfpm::vector_gpu<aggregate<cnt_type,cnt_type>> cells_nn_list;
121
122	//! \brief particle ids information the first "dim" componets is the cell-id in grid coordinates, the last is the local-id inside the cell
123	openfpm::vector<aggregate<cnt_type[`2`]>,Memory,memory_traits_inte> part_ids;
124
125	//! \breif Size of the Neighborhood cells
126	int cells_nn_test_size;
127
128	//! \brief Neighborhood of a cell to test
129	openfpm::vector_gpu<aggregate<int>> cells_nn_test;
130
131	//! \brief for each sorted index it show the index in the unordered
132	vector_cnt_type sorted_to_not_sorted;
133
134	//! Sorted domain particles domain or ghost
135	vector_cnt_type sorted_domain_particles_dg;
136
137	//! \brief the index of all the domain particles in the sorted vector
138	vector_cnt_type sorted_domain_particles_ids;
139
140	//! \brief for each non sorted index it show the index in the ordered vector
141	vector_cnt_type non_sorted_to_sorted;
142
143	//! Spacing
144	openfpm::array<T,dim,cnt_type> spacing_c;
145
146	//! \brief number of sub-divisions in each direction
147	openfpm::array<ids_type,dim,cnt_type> div_c;
148
149	//! \brief cell padding
150	openfpm::array<ids_type,dim,cnt_type> off;
151
152	//! Radius neighborhood
153	openfpm::vector<aggregate<int>,Memory,memory_traits_inte> nnc_rad;
154
155	//! Additional information in general (used to understand if the cell-list)
156	//! has been constructed from an old decomposition
157	size_t n_dec;
158
159	//! Initialize the structures of the data structure
160	void InitializeStructures(const size_t (& div)[dim], size_t tot_n_cell, size_t pad)
161	{
162	for (size_t i = `0` ; i < dim ; i++)
163	{
164	div_c[i] = div[i];
165	spacing_c[i] = this->getCellBox().getP2().get(i);
166	off[i] = pad;
167	}
168
169	cl_n.resize(tot_n_cell);
170
171	cells_nn_test_size = `1`;
172	construct_cell_nn_test(cells_nn_test_size);
173	}
174
175	void construct_cell_nn_test(unsigned int box_nn = `1`)
176	{
177	auto & gs = this->getGrid();
178
179	grid_key_dx<dim> start;
180	grid_key_dx<dim> stop;
181	grid_key_dx<dim> middle;
182
183	for (size_t i = `0` ; i < dim ; i++)
184	{
185	start.set_d(i,`0`);
186	stop.set_d(i,`2`*box_nn);
187	middle.set_d(i,box_nn);
188	}
189
190	cells_nn_test.resize(openfpm::math::pow(`2`*box_nn+`1`,dim));
191
192	int mid = gs.LinId(middle);
193
194	grid_key_dx_iterator_sub<dim> it(gs,start,stop);
195
196	size_t i = `0`;
197	while (it.isNext())
198	{
199	auto p = it.get();
200
201	cells_nn_test.template get<`0`>(i) = (int)gs.LinId(p) - mid;
202
203	++i;
204	++it;
205	}
206
207	cells_nn_test.template hostToDevice<`0`>();
208
209	#if defined(__NVCC__) && defined(USE_LOW_REGISTER_ITERATOR)
210
211	// copy to the constant memory
212	cudaMemcpyToSymbol(cells_striding,cells_nn_test.template getPointer<`0`>(),cells_nn_test.size()*sizeof(int));
213
214	#endif
215	}
216
217	/! \brief This function construct a sparse cell-list*
218	*
219	*
220	*/
221	template<typename vector, typename vector_prp, unsigned int ... prp>
222	void construct_sparse(vector & pl,
223	vector & pl_out,
224	vector_prp & pl_prp,
225	vector_prp & pl_prp_out,
226	mgpu::ofp_context_t & mgpuContext,
227	size_t g_m,
228	size_t start,
229	size_t stop,
230	cl_construct_opt opt = cl_construct_opt::Full)
231	{
232	#ifdef __NVCC__
233
234	part_ids.resize(stop - start);
235	starts.resize(stop - start);
236
237	// Than we construct the ids
238
239	auto ite_gpu = pl.getGPUIteratorTo(stop-start);
240
241	if (ite_gpu.wthr.x == `0`)
242	{
243	return;
244	}
245
246	CUDA_LAUNCH((subindex<true,dim,T,cnt_type,ids_type>),ite_gpu,div_c,
247	spacing_c,
248	off,
249	this->getTransform(),
250	pl.capacity(),
251	pl.size(),
252	part_ids.capacity(),
253	start,
254	static_cast<T *>(pl.template getDeviceBuffer<`0`>()),
255	static_cast<cnt_type *>(starts.template getDeviceBuffer<`0`>()),
256	static_cast<cnt_type *>(part_ids.template getDeviceBuffer<`0`>()));
257
258	// now we construct the cells
259
260	cells.resize(stop-start);
261
262	// Here we fill the sparse vector
263	cl_sparse.clear();
264	cl_sparse.template setBackground<`0`>((cnt_type)-`1`);
265	cl_sparse.setGPUInsertBuffer(ite_gpu.wthr.x,ite_gpu.thr.x);
266	CUDA_LAUNCH((fill_cells_sparse),ite_gpu,cl_sparse.toKernel(),starts.toKernel());
267	cl_sparse.template flush_vd<sstart_<`0`>>(cells,mgpuContext,FLUSH_ON_DEVICE);
268
269	cells_nn.resize(cl_sparse.size()+`1`);
270	cells_nn.template fill<`0`>(`0`);
271
272	// Here we construct the neighborhood cells for each cell
273	auto itgg = cl_sparse.getGPUIterator();
274	CUDA_LAUNCH((count_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel());
275
276	// now we scan
277	openfpm::scan((cnt_type )cells_nn.template getDeviceBuffer<`0`>(), cells_nn.size(), (cnt_type )cells_nn.template getDeviceBuffer<`0`>() , mgpuContext);
278
279	cells_nn.template deviceToHost<`0`>(cells_nn.size() - `1`, cells_nn.size() - `1`);
280	size_t n_nn_cells = cells_nn.template get<`0`>(cells_nn.size() - `1`);
281
282	cells_nn_list.resize(n_nn_cells);
283
284	CUDA_LAUNCH((fill_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel(),cells_nn_list.toKernel(),cells.size());
285
286	sorted_to_not_sorted.resize(stop-start);
287	non_sorted_to_sorted.resize(pl.size());
288
289	auto ite = pl.getGPUIteratorTo(stop-start,`64`);
290
291	// Here we reorder the particles to improve coalescing access
292	CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
293	decltype(pl.toKernel()),
294	decltype(sorted_to_not_sorted.toKernel()),
295	cnt_type,shift_ph<`0`,cnt_type>>),ite,sorted_to_not_sorted.size(),
296	pl_prp.toKernel(),
297	pl_prp_out.toKernel(),
298	pl.toKernel(),
299	pl_out.toKernel(),
300	sorted_to_not_sorted.toKernel(),
301	non_sorted_to_sorted.toKernel(),
302	static_cast<cnt_type *>(cells.template getDeviceBuffer<`0`>()));
303
304	if (opt == cl_construct_opt::Full)
305	{
306	construct_domain_ids(mgpuContext,start,stop,g_m);
307	}
308
309	#else
310
311	std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
312
313	#endif
314	}
315
316	/! \brief Construct the ids of the particles domain in the sorted array*
317	*
318	* \param mgpuContext mgpu context
319	*
320	*/
321	void construct_domain_ids(mgpu::ofp_context_t & mgpuContext, size_t start, size_t stop, size_t g_m)
322	{
323	#ifdef __NVCC__
324	sorted_domain_particles_dg.resize(stop-start+`1`);
325
326	auto ite = sorted_domain_particles_dg.getGPUIterator();
327
328	CUDA_LAUNCH((mark_domain_particles),ite,sorted_to_not_sorted.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);
329
330	// lets scan
331	openfpm::scan((unsigned int )sorted_domain_particles_dg.template getDeviceBuffer<`0`>(),sorted_domain_particles_dg.size(),(unsigned* int *)sorted_domain_particles_dg.template getDeviceBuffer<`0`>(),mgpuContext);
332
333	sorted_domain_particles_dg.template deviceToHost<`0`>(sorted_domain_particles_dg.size()-`1`,sorted_domain_particles_dg.size()-`1`);
334	auto sz = sorted_domain_particles_dg.template get<`0`>(sorted_domain_particles_dg.size()-`1`);
335
336	sorted_domain_particles_ids.resize(sz);
337
338	CUDA_LAUNCH((collect_domain_ghost_ids),ite,sorted_domain_particles_dg.toKernel(),sorted_domain_particles_ids.toKernel());
339	#endif
340	}
341
342	/! \brief This function construct a dense cell-list*
343	*
344	*
345	*/
346	template<typename vector, typename vector_prp, unsigned int ... prp>
347	void construct_dense(vector & pl,
348	vector & pl_out,
349	vector_prp & pl_prp,
350	vector_prp & pl_prp_out,
351	mgpu::ofp_context_t & mgpuContext,
352	size_t g_m,
353	size_t start,
354	size_t stop,
355	cl_construct_opt opt = cl_construct_opt::Full)
356	{
357	#ifdef __NVCC__
358
359	CUDA_SAFE()
360
361	// Than we construct the ids
362
363	auto ite_gpu = pl.getGPUIteratorTo(stop-start-`1`);
364
365	cl_n.resize(this->gr_cell.size()+`1`);
366	cl_n.template fill<`0`>(`0`);
367	// CUDA_SAFE(cudaMemset(cl_n.template getDeviceBuffer<0>(),0,cl_n.size()sizeof(cnt_type)));*
368
369	part_ids.resize(stop - start);
370
371	if (ite_gpu.wthr.x == `0` \|\| pl.size() == `0` \|\| stop == `0`)
372	{
373	// no particles
374	starts.resize(cl_n.size());
375	starts.template fill<`0`>(`0`);
376	return;
377	}
378
379	CUDA_LAUNCH((subindex<false,dim,T,cnt_type,ids_type>),ite_gpu,div_c,
380	spacing_c,
381	off,
382	this->getTransform(),
383	pl.capacity(),
384	stop,
385	part_ids.capacity(),
386	start,
387	static_cast<T *>(pl.template getDeviceBuffer<`0`>()),
388	static_cast<cnt_type *>(cl_n.template getDeviceBuffer<`0`>()),
389	static_cast<cnt_type *>(part_ids.template getDeviceBuffer<`0`>()));
390
391	// now we scan
392	starts.resize(cl_n.size());
393	openfpm::scan((cnt_type )cl_n.template getDeviceBuffer<`0`>(), cl_n.size(), (cnt_type )starts.template getDeviceBuffer<`0`>() , mgpuContext);
394
395	// now we construct the cells
396
397	cells.resize(stop-start);
398	auto itgg = part_ids.getGPUIterator();
399
400
401	#ifdef MAKE_CELLLIST_DETERMINISTIC
402
403	CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<`0`,cnt_type>>),itgg,`0`,
404	part_ids.size(),
405	static_cast<cnt_type *>(cells.template getDeviceBuffer<`0`>()) );
406
407	// sort
408
409	mgpu::mergesort(static_cast<cnt_type >(part_ids.template getDeviceBuffer<`0`>()),static_cast<cnt_type >(cells.template getDeviceBuffer<`0`>()),pl.size(),mgpu::less_t<cnt_type>(),mgpuContext);
410
411	#else
412
413	CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<`0`,cnt_type>>),itgg,`0`,
414	div_c,
415	off,
416	part_ids.size(),
417	part_ids.capacity(),
418	start,
419	static_cast<cnt_type *>(starts.template getDeviceBuffer<`0`>()),
420	static_cast<cnt_type *>(part_ids.template getDeviceBuffer<`0`>()),
421	static_cast<cnt_type *>(cells.template getDeviceBuffer<`0`>()) );
422
423	#endif
424
425
426	sorted_to_not_sorted.resize(stop-start);
427	non_sorted_to_sorted.resize(pl.size());
428
429	auto ite = pl.getGPUIteratorTo(stop-start,`64`);
430
431	if (sizeof...(prp) == `0`)
432	{
433	// Here we reorder the particles to improve coalescing access
434	CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
435	decltype(pl.toKernel()),
436	decltype(sorted_to_not_sorted.toKernel()),
437	cnt_type,shift_ph<`0`,cnt_type>>),ite,sorted_to_not_sorted.size(),
438	pl_prp.toKernel(),
439	pl_prp_out.toKernel(),
440	pl.toKernel(),
441	pl_out.toKernel(),
442	sorted_to_not_sorted.toKernel(),
443	non_sorted_to_sorted.toKernel(),
444	static_cast<cnt_type *>(cells.template getDeviceBuffer<`0`>()));
445	}
446	else
447	{
448	// Here we reorder the particles to improve coalescing access
449	CUDA_LAUNCH((reorder_parts_wprp<decltype(pl_prp.toKernel()),
450	decltype(pl.toKernel()),
451	decltype(sorted_to_not_sorted.toKernel()),
452	cnt_type,shift_ph<`0`,cnt_type>,prp...>),ite,sorted_to_not_sorted.size(),
453	pl_prp.toKernel(),
454	pl_prp_out.toKernel(),
455	pl.toKernel(),
456	pl_out.toKernel(),
457	sorted_to_not_sorted.toKernel(),
458	non_sorted_to_sorted.toKernel(),
459	static_cast<cnt_type *>(cells.template getDeviceBuffer<`0`>()));
460	}
461
462	if (opt == cl_construct_opt::Full)
463	{
464	construct_domain_ids(mgpuContext,start,stop,g_m);
465	}
466
467	#else
468
469	std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
470
471	#endif
472	}
473
474	public:
475
476	//! Indicate that this cell list is a gpu type cell-list
477	typedef int yes_is_gpu_celllist;
478
479	//! the type of the space
480	typedef T stype;
481
482	//! dimensions of space
483	static const unsigned int dims = dim;
484
485	//! count type
486	typedef cnt_type cnt_type_;
487
488	//! id type
489	typedef ids_type ids_type_;
490
491	//! transform type
492	typedef transform transform_;
493
494	//! is sparse
495	typedef boost::mpl::bool_<is_sparse> is_sparse_;
496
497	/! \brief Copy constructor*
498	*
499	* \param clg Cell list to copy
500	*
501	*/
502	CellList_gpu(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> & clg)
503	{
504	this->operator=(clg);
505	}
506
507	/! \brief Copy constructor from temporal*
508	*
509	*
510	*
511	*/
512	CellList_gpu(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)
513	{
514	this->operator=(clg);
515	}
516
517	/! \brief default constructor*
518	*
519	*
520	*/
521	CellList_gpu()
522	{}
523
524	CellList_gpu(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = `1`)
525	{
526	Initialize(box,div,pad);
527	}
528
529
530	/! Initialize the cell list*
531	*
532	* \param box Domain where this cell list is living
533	* \param div grid size on each dimension
534	* \param pad padding cell
535	* \param slot maximum number of slot
536	*
537	*/
538	void Initialize(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = `1`)
539	{
540	SpaceBox<dim,T> sbox(box);
541
542	// Initialize point transformation
543
544	Initialize(sbox,div,pad);
545	}
546
547	void setBoxNN(unsigned int n_NN)
548	{
549	cells_nn_test_size = n_NN;
550	construct_cell_nn_test(n_NN);
551	}
552
553	void re_setBoxNN()
554	{
555	construct_cell_nn_test(cells_nn_test_size);
556	}
557
558	/! Initialize the cell list constructor*
559	*
560	* \param box Domain where this cell list is living
561	* \param div grid size on each dimension
562	* \param pad padding cell
563	* \param slot maximum number of slot
564	*
565	*/
566	void Initialize(const SpaceBox<dim,T> & box, const size_t (&div)[dim], const size_t pad = `1`)
567	{
568	Matrix<dim,T> mat;
569	CellDecomposer_sm<dim,T,transform>::setDimensions(box,div, mat, pad);
570
571	// create the array that store the number of particle on each cell and se it to 0
572	InitializeStructures(this->gr_cell.getSize(),this->gr_cell.size(),pad);
573	}
574
575	vector_cnt_type & getSortToNonSort()
576	{
577	return sorted_to_not_sorted;
578	}
579
580	vector_cnt_type & getNonSortToSort()
581	{
582	return non_sorted_to_sorted;
583	}
584
585	vector_cnt_type & getDomainSortIds()
586	{
587	return sorted_domain_particles_ids;
588	}
589
590
591	/! \brief Set the radius for the getNNIteratorRadius*
592	*
593	* \param radius
594	*
595	*/
596	void setRadius(T radius)
597	{
598	openfpm::vector<long int> nnc_rad_;
599
600	NNcalc_rad(radius,nnc_rad_,this->getCellBox(),this->getGrid());
601
602	nnc_rad.resize(nnc_rad_.size(),`0`);
603
604	// copy to nnc_rad
605
606	for (unsigned int i = `0` ; i < nnc_rad_.size() ; i++)
607	{nnc_rad.template get<`0`>(i) = nnc_rad_.template get<`0`>(i);}
608
609	nnc_rad.template hostToDevice<`0`>();
610	}
611
612	/! \brief construct from a list of particles*
613	*
614	* \warning pl is assumed to be already be in device memory
615	*
616	* \param pl Particles list
617	*
618	*/
619	template<typename vector, typename vector_prp, unsigned int ... prp>
620	void construct(vector & pl,
621	vector & pl_out,
622	vector_prp & pl_prp,
623	vector_prp & pl_prp_out,
624	mgpu::ofp_context_t & mgpuContext,
625	size_t g_m = `0`,
626	size_t start = `0`,
627	size_t stop = (size_t)-`1`,
628	cl_construct_opt opt = cl_construct_opt::Full)
629	{
630	// if stop if the default set to the number of particles
631	if (stop == (size_t)-`1`)
632	{stop = pl.size();}
633
634	if (is_sparse == false) {construct_dense<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,mgpuContext,g_m,start,stop,opt);}
635	else {construct_sparse<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,mgpuContext,g_m,start,stop,opt);}
636	}
637
638	CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse> toKernel()
639	{
640	if (nnc_rad.size() == `0`)
641	{
642	// set the radius equal the cell spacing on direction X
643	// (must be initialized to something to avoid warnings)
644	setRadius(this->getCellBox().getHigh(`0`));
645	}
646
647	return CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,
648	vector_cnt_type,openfpm::vector_gpu<aggregate<cnt_type,cnt_type>>,
649	decltype(cl_sparse),is_sparse>
650	::get(starts,
651	cells_nn,
652	cells_nn_list,
653	cl_sparse,
654	sorted_to_not_sorted,
655	sorted_domain_particles_ids,
656	nnc_rad,
657	spacing_c,
658	div_c,
659	off,
660	this->getTransform(),
661	g_m);
662	}
663
664	/! \brief Clear the structure*
665	*
666	*
667	*/
668	void clear()
669	{
670	cl_n.clear();
671	cells.clear();
672	starts.clear();
673	part_ids.clear();
674	sorted_to_not_sorted.clear();
675	}
676
677	/////////////////////////////////////
678
679	//! Ghost marker
680	size_t g_m = `0`;
681
682	/! \brief return the ghost marker*
683	*
684	* \return ghost marker
685	*
686	*/
687	inline size_t get_gm()
688	{
689	return g_m;
690	}
691
692	/! \brief Set the ghost marker*
693	*
694	* \param g_m marker
695	*
696	*/
697	inline void set_gm(size_t g_m)
698	{
699	this->g_m = g_m;
700	}
701
702	/////////////////////////////////////
703
704	/! \brief Set the n_dec number*
705	*
706	* \param n_dec
707	*
708	*/
709	void set_ndec(size_t n_dec)
710	{
711	this->n_dec = n_dec;
712	}
713
714	/! \brief Set the n_dec number*
715	*
716	* \return n_dec
717	*
718	*/
719	size_t get_ndec() const
720	{
721	return n_dec;
722	}
723
724	/////////////////////////////////////
725
726	/! \brief Transfer the information computed on gpu to construct the cell-list on gpu*
727	*
728	*/
729	void debug_deviceToHost()
730	{
731	cl_n.template deviceToHost<`0`>();
732	cells.template deviceToHost<`0`>();
733	starts.template deviceToHost<`0`>();
734	}
735
736	/! \brief Return the numbers of cells contained in this cell-list*
737	*
738	* \return the number of cells
739	*
740	*/
741	size_t getNCells()
742	{
743	return cl_n.size();
744	}
745
746	/! \brief Return the numbers of elements in the cell*
747	*
748	* \return the number of elements in the cell
749	*
750	*/
751	size_t getNelements(size_t i)
752	{
753	return cl_n.template get<`0`>(i);
754	}
755
756	/! \brief Get an element in the cell*
757	*
758	* \tparam i property to get
759	*
760	* \param cell cell id
761	* \param ele element id
762	*
763	* \return The element value
764	*
765	*/
766	inline auto get(size_t cell, size_t ele) -> decltype(cells.template get<`0`>(starts.template get<`0`>(cell)+ele))
767	{
768	return cells.template get<`0`>(starts.template get<`0`>(cell)+ele);
769	}
770
771	/! \brief Get an element in the cell*
772	*
773	* \tparam i property to get
774	*
775	* \param cell cell id
776	* \param ele element id
777	*
778	* \return The element value
779	*
780	*/
781	inline auto get(size_t cell, size_t ele) const -> decltype(cells.template get<`0`>(starts.template get<`0`>(cell)+ele))
782	{
783	return cells.template get<`0`>(starts.template get<`0`>(cell)+ele);
784	}
785
786	/! \brief swap the information of the two cell-lists*
787	*
788	*
789	*
790	*/
791	void swap(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg)
792	{
793	((CellDecomposer_sm<dim,T,transform> )this*)->swap(clg);
794	cl_n.swap(clg.cl_n);
795	cells.swap(clg.cells);
796	starts.swap(clg.starts);
797	part_ids.swap(clg.part_ids);
798	cl_sparse.swap(clg.cl_sparse);
799	cells_nn.swap(clg.cells_nn);
800	cells_nn_list.swap(clg.cells_nn_list);
801	cells_nn_test.swap(clg.cells_nn_test);
802	sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);
803	sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg);
804	sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids);
805	non_sorted_to_sorted.swap(clg.non_sorted_to_sorted);
806
807	spacing_c.swap(clg.spacing_c);
808	div_c.swap(clg.div_c);
809	off.swap(clg.off);
810
811	size_t g_m_tmp = g_m;
812	g_m = clg.g_m;
813	clg.g_m = g_m_tmp;
814
815	size_t n_dec_tmp = n_dec;
816	n_dec = clg.n_dec;
817	clg.n_dec = n_dec_tmp;
818
819	int cells_nn_test_size_tmp = cells_nn_test_size;
820	cells_nn_test_size = clg.cells_nn_test_size;
821	clg.cells_nn_test_size = cells_nn_test_size_tmp;
822	}
823
824	CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> &
825	operator=(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg)
826	{
827	*static_cast<CellDecomposer_sm<dim,T,transform> >(this) = static_cast<const CellDecomposer_sm<dim,T,transform> *>(&clg);
828	cl_n = clg.cl_n;
829	cells = clg.cells;
830	starts = clg.starts;
831	part_ids = clg.part_ids;
832	cl_sparse = clg.cl_sparse;
833	cells_nn = clg.cells_nn;
834	cells_nn_list = clg.cells_nn_list;
835	cells_nn_test = clg.cells_nn_test;
836	sorted_to_not_sorted = clg.sorted_to_not_sorted;
837	sorted_domain_particles_dg = clg.sorted_domain_particles_dg;
838	sorted_domain_particles_ids = clg.sorted_domain_particles_ids;
839	non_sorted_to_sorted = clg.non_sorted_to_sorted;
840
841	spacing_c = clg.spacing_c;
842	div_c = clg.div_c;
843	off = clg.off;
844	g_m = clg.g_m;
845	n_dec = clg.n_dec;
846
847	cells_nn_test_size = clg.cells_nn_test_size;
848
849	return *this;
850	}
851
852	CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> &
853	operator=(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)
854	{
855	static_cast<CellDecomposer_sm<dim,T,transform> >(this)->swap(static_cast<CellDecomposer_sm<dim,T,transform> *>(&clg));
856	cl_n.swap(clg.cl_n);
857	cells.swap(clg.cells);
858	starts.swap(clg.starts);
859	part_ids.swap(clg.part_ids);
860	cl_sparse.swap(clg.cl_sparse);
861	cells_nn.swap(clg.cells_nn);
862	cells_nn_list.swap(clg.cells_nn_list);
863	cells_nn_test.swap(clg.cells_nn_test);
864	sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);
865	sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg);
866	sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids);
867	non_sorted_to_sorted.swap(clg.non_sorted_to_sorted);
868
869	spacing_c = clg.spacing_c;
870	div_c = clg.div_c;
871	off = clg.off;
872	g_m = clg.g_m;
873	n_dec = clg.n_dec;
874
875	cells_nn_test_size = clg.cells_nn_test_size;
876
877	return *this;
878	}
879	};
880
881	// This is a tranformation node for vector_distributed for the algorithm toKernel_tranform
882	template<template <typename> class layout_base, typename T>
883	struct toKernel_transform<layout_base,T,`4`>
884	{
885	typedef CellList_gpu_ker<T::dims,
886	typename T::stype,
887	typename T::cnt_type_,
888	typename T::ids_type_,
889	typename T::transform_,
890	T::is_sparse_::value> type;
891	};
892
893	#endif
894
895	#endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ */
896

Browse the source code of openfpm/openfpm_data/src/NN/CellList/cuda/CellList_gpu.hpp