1 | /* |
2 | * CellList_gpu.hpp |
3 | * |
4 | * Created on: Jun 11, 2018 |
5 | * Author: i-bird |
6 | */ |
7 | |
8 | #ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ |
9 | #define OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ |
10 | |
11 | #include "config.h" |
12 | |
13 | #ifdef CUDA_GPU |
14 | |
15 | #include "Vector/map_vector_sparse.hpp" |
16 | #include "NN/CellList/CellDecomposer.hpp" |
17 | #include "Vector/map_vector.hpp" |
18 | #include "Cuda_cell_list_util_func.hpp" |
19 | #include "NN/CellList/cuda/CellList_gpu_ker.cuh" |
20 | #include "util/cuda_util.hpp" |
21 | #include "NN/CellList/CellList_util.hpp" |
22 | #include "NN/CellList/CellList.hpp" |
23 | #include "util/cuda/scan_ofp.cuh" |
24 | |
25 | constexpr int count = 0; |
26 | constexpr int start = 1; |
27 | |
28 | template<unsigned int dim, typename T, |
29 | typename cnt_type, typename ids_type, |
30 | typename Memory,typename transform, |
31 | typename vector_cnt_type, typename vector_cnt_type2, |
32 | typename cl_sparse_type, |
33 | bool is_sparse> |
34 | struct CellList_gpu_ker_selector |
35 | { |
36 | static inline CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse> get(vector_cnt_type & starts, |
37 | vector_cnt_type & cell_nn, |
38 | vector_cnt_type2 & cell_nn_list, |
39 | cl_sparse_type & cl_sparse, |
40 | vector_cnt_type & sorted_to_not_sorted, |
41 | vector_cnt_type & sorted_domain_particles_ids, |
42 | openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad, |
43 | openfpm::array<T,dim,cnt_type> & spacing_c, |
44 | openfpm::array<ids_type,dim,cnt_type> & div_c, |
45 | openfpm::array<ids_type,dim,cnt_type> & off, |
46 | const transform & t, |
47 | unsigned int g_m) |
48 | { |
49 | return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse>(starts.toKernel(), |
50 | sorted_to_not_sorted.toKernel(), |
51 | sorted_domain_particles_ids.toKernel(), |
52 | nnc_rad.toKernel(), |
53 | spacing_c, |
54 | div_c, |
55 | off, |
56 | t, |
57 | g_m); |
58 | } |
59 | }; |
60 | |
61 | template<unsigned int dim, typename T, |
62 | typename cnt_type, typename ids_type, |
63 | typename Memory,typename transform, |
64 | typename vector_cnt_type, typename vector_cnt_type2, |
65 | typename cl_sparse_type> |
66 | struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector_cnt_type,vector_cnt_type2,cl_sparse_type,true> |
67 | { |
68 | static CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true> get(vector_cnt_type & starts, |
69 | vector_cnt_type & cell_nn, |
70 | vector_cnt_type2 & cell_nn_list, |
71 | cl_sparse_type & cl_sparse, |
72 | vector_cnt_type & srt, |
73 | vector_cnt_type & dprt, |
74 | openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad, |
75 | openfpm::array<T,dim,cnt_type> & spacing_c, |
76 | openfpm::array<ids_type,dim,cnt_type> & div_c, |
77 | openfpm::array<ids_type,dim,cnt_type> & off, |
78 | const transform & t, |
79 | unsigned int g_m) |
80 | { |
81 | return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true>(cell_nn.toKernel(), |
82 | cell_nn_list.toKernel(), |
83 | cl_sparse.toKernel(), |
84 | srt.toKernel(), |
85 | dprt.toKernel(), |
86 | spacing_c, |
87 | div_c, |
88 | off, |
89 | t,g_m); |
90 | } |
91 | }; |
92 | |
93 | template<unsigned int dim, |
94 | typename T, |
95 | typename Memory, |
96 | typename transform = no_transform_only<dim,T>, |
97 | typename cnt_type = unsigned int, |
98 | typename ids_type = int, |
99 | bool is_sparse = false> |
100 | class CellList_gpu : public CellDecomposer_sm<dim,T,transform> |
101 | { |
102 | typedef openfpm::vector<aggregate<cnt_type>,Memory,memory_traits_inte> vector_cnt_type; |
103 | |
104 | //! \brief Number of particles in each cell |
105 | vector_cnt_type cl_n; |
106 | |
107 | //! \brief for each cell the particles id in it |
108 | vector_cnt_type cells; |
109 | |
110 | //! \brief Cell scan with + operation of cl_n (in case of sparse it contain the cell index of the particles) |
111 | vector_cnt_type starts; |
112 | |
113 | //! \brief sparse vector in case of sparse Cell-list |
114 | openfpm::vector_sparse_gpu<aggregate<cnt_type>> cl_sparse; |
115 | |
116 | //! \brief number of neighborhood each cell cell has + offset |
117 | openfpm::vector_gpu<aggregate<cnt_type>> cells_nn; |
118 | |
119 | //! \brief For each cell the list of the neighborhood cells |
120 | openfpm::vector_gpu<aggregate<cnt_type,cnt_type>> cells_nn_list; |
121 | |
122 | //! \brief particle ids information the first "dim" componets is the cell-id in grid coordinates, the last is the local-id inside the cell |
123 | openfpm::vector<aggregate<cnt_type[2]>,Memory,memory_traits_inte> part_ids; |
124 | |
125 | //! \breif Size of the Neighborhood cells |
126 | int cells_nn_test_size; |
127 | |
128 | //! \brief Neighborhood of a cell to test |
129 | openfpm::vector_gpu<aggregate<int>> cells_nn_test; |
130 | |
131 | //! \brief for each sorted index it show the index in the unordered |
132 | vector_cnt_type sorted_to_not_sorted; |
133 | |
134 | //! Sorted domain particles domain or ghost |
135 | vector_cnt_type sorted_domain_particles_dg; |
136 | |
137 | //! \brief the index of all the domain particles in the sorted vector |
138 | vector_cnt_type sorted_domain_particles_ids; |
139 | |
140 | //! \brief for each non sorted index it show the index in the ordered vector |
141 | vector_cnt_type non_sorted_to_sorted; |
142 | |
143 | //! Spacing |
144 | openfpm::array<T,dim,cnt_type> spacing_c; |
145 | |
146 | //! \brief number of sub-divisions in each direction |
147 | openfpm::array<ids_type,dim,cnt_type> div_c; |
148 | |
149 | //! \brief cell padding |
150 | openfpm::array<ids_type,dim,cnt_type> off; |
151 | |
152 | //! Radius neighborhood |
153 | openfpm::vector<aggregate<int>,Memory,memory_traits_inte> nnc_rad; |
154 | |
155 | //! Additional information in general (used to understand if the cell-list) |
156 | //! has been constructed from an old decomposition |
157 | size_t n_dec; |
158 | |
159 | //! Initialize the structures of the data structure |
160 | void InitializeStructures(const size_t (& div)[dim], size_t tot_n_cell, size_t pad) |
161 | { |
162 | for (size_t i = 0 ; i < dim ; i++) |
163 | { |
164 | div_c[i] = div[i]; |
165 | spacing_c[i] = this->getCellBox().getP2().get(i); |
166 | off[i] = pad; |
167 | } |
168 | |
169 | cl_n.resize(tot_n_cell); |
170 | |
171 | cells_nn_test_size = 1; |
172 | construct_cell_nn_test(cells_nn_test_size); |
173 | } |
174 | |
175 | void construct_cell_nn_test(unsigned int box_nn = 1) |
176 | { |
177 | auto & gs = this->getGrid(); |
178 | |
179 | grid_key_dx<dim> start; |
180 | grid_key_dx<dim> stop; |
181 | grid_key_dx<dim> middle; |
182 | |
183 | for (size_t i = 0 ; i < dim ; i++) |
184 | { |
185 | start.set_d(i,0); |
186 | stop.set_d(i,2*box_nn); |
187 | middle.set_d(i,box_nn); |
188 | } |
189 | |
190 | cells_nn_test.resize(openfpm::math::pow(2*box_nn+1,dim)); |
191 | |
192 | int mid = gs.LinId(middle); |
193 | |
194 | grid_key_dx_iterator_sub<dim> it(gs,start,stop); |
195 | |
196 | size_t i = 0; |
197 | while (it.isNext()) |
198 | { |
199 | auto p = it.get(); |
200 | |
201 | cells_nn_test.template get<0>(i) = (int)gs.LinId(p) - mid; |
202 | |
203 | ++i; |
204 | ++it; |
205 | } |
206 | |
207 | cells_nn_test.template hostToDevice<0>(); |
208 | |
209 | #if defined(__NVCC__) && defined(USE_LOW_REGISTER_ITERATOR) |
210 | |
211 | // copy to the constant memory |
212 | cudaMemcpyToSymbol(cells_striding,cells_nn_test.template getPointer<0>(),cells_nn_test.size()*sizeof(int)); |
213 | |
214 | #endif |
215 | } |
216 | |
217 | /*! \brief This function construct a sparse cell-list |
218 | * |
219 | * |
220 | */ |
221 | template<typename vector, typename vector_prp, unsigned int ... prp> |
222 | void construct_sparse(vector & pl, |
223 | vector & pl_out, |
224 | vector_prp & pl_prp, |
225 | vector_prp & pl_prp_out, |
226 | mgpu::ofp_context_t & mgpuContext, |
227 | size_t g_m, |
228 | size_t start, |
229 | size_t stop, |
230 | cl_construct_opt opt = cl_construct_opt::Full) |
231 | { |
232 | #ifdef __NVCC__ |
233 | |
234 | part_ids.resize(stop - start); |
235 | starts.resize(stop - start); |
236 | |
237 | // Than we construct the ids |
238 | |
239 | auto ite_gpu = pl.getGPUIteratorTo(stop-start); |
240 | |
241 | if (ite_gpu.wthr.x == 0) |
242 | { |
243 | return; |
244 | } |
245 | |
246 | CUDA_LAUNCH((subindex<true,dim,T,cnt_type,ids_type>),ite_gpu,div_c, |
247 | spacing_c, |
248 | off, |
249 | this->getTransform(), |
250 | pl.capacity(), |
251 | pl.size(), |
252 | part_ids.capacity(), |
253 | start, |
254 | static_cast<T *>(pl.template getDeviceBuffer<0>()), |
255 | static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()), |
256 | static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>())); |
257 | |
258 | // now we construct the cells |
259 | |
260 | cells.resize(stop-start); |
261 | |
262 | // Here we fill the sparse vector |
263 | cl_sparse.clear(); |
264 | cl_sparse.template setBackground<0>((cnt_type)-1); |
265 | cl_sparse.setGPUInsertBuffer(ite_gpu.wthr.x,ite_gpu.thr.x); |
266 | CUDA_LAUNCH((fill_cells_sparse),ite_gpu,cl_sparse.toKernel(),starts.toKernel()); |
267 | cl_sparse.template flush_vd<sstart_<0>>(cells,mgpuContext,FLUSH_ON_DEVICE); |
268 | |
269 | cells_nn.resize(cl_sparse.size()+1); |
270 | cells_nn.template fill<0>(0); |
271 | |
272 | // Here we construct the neighborhood cells for each cell |
273 | auto itgg = cl_sparse.getGPUIterator(); |
274 | CUDA_LAUNCH((count_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel()); |
275 | |
276 | // now we scan |
277 | openfpm::scan((cnt_type *)cells_nn.template getDeviceBuffer<0>(), cells_nn.size(), (cnt_type *)cells_nn.template getDeviceBuffer<0>() , mgpuContext); |
278 | |
279 | cells_nn.template deviceToHost<0>(cells_nn.size() - 1, cells_nn.size() - 1); |
280 | size_t n_nn_cells = cells_nn.template get<0>(cells_nn.size() - 1); |
281 | |
282 | cells_nn_list.resize(n_nn_cells); |
283 | |
284 | CUDA_LAUNCH((fill_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel(),cells_nn_list.toKernel(),cells.size()); |
285 | |
286 | sorted_to_not_sorted.resize(stop-start); |
287 | non_sorted_to_sorted.resize(pl.size()); |
288 | |
289 | auto ite = pl.getGPUIteratorTo(stop-start,64); |
290 | |
291 | // Here we reorder the particles to improve coalescing access |
292 | CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()), |
293 | decltype(pl.toKernel()), |
294 | decltype(sorted_to_not_sorted.toKernel()), |
295 | cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(), |
296 | pl_prp.toKernel(), |
297 | pl_prp_out.toKernel(), |
298 | pl.toKernel(), |
299 | pl_out.toKernel(), |
300 | sorted_to_not_sorted.toKernel(), |
301 | non_sorted_to_sorted.toKernel(), |
302 | static_cast<cnt_type *>(cells.template getDeviceBuffer<0>())); |
303 | |
304 | if (opt == cl_construct_opt::Full) |
305 | { |
306 | construct_domain_ids(mgpuContext,start,stop,g_m); |
307 | } |
308 | |
309 | #else |
310 | |
311 | std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl; |
312 | |
313 | #endif |
314 | } |
315 | |
316 | /*! \brief Construct the ids of the particles domain in the sorted array |
317 | * |
318 | * \param mgpuContext mgpu context |
319 | * |
320 | */ |
321 | void construct_domain_ids(mgpu::ofp_context_t & mgpuContext, size_t start, size_t stop, size_t g_m) |
322 | { |
323 | #ifdef __NVCC__ |
324 | sorted_domain_particles_dg.resize(stop-start+1); |
325 | |
326 | auto ite = sorted_domain_particles_dg.getGPUIterator(); |
327 | |
328 | CUDA_LAUNCH((mark_domain_particles),ite,sorted_to_not_sorted.toKernel(),sorted_domain_particles_dg.toKernel(),g_m); |
329 | |
330 | // lets scan |
331 | openfpm::scan((unsigned int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),sorted_domain_particles_dg.size(),(unsigned int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),mgpuContext); |
332 | |
333 | sorted_domain_particles_dg.template deviceToHost<0>(sorted_domain_particles_dg.size()-1,sorted_domain_particles_dg.size()-1); |
334 | auto sz = sorted_domain_particles_dg.template get<0>(sorted_domain_particles_dg.size()-1); |
335 | |
336 | sorted_domain_particles_ids.resize(sz); |
337 | |
338 | CUDA_LAUNCH((collect_domain_ghost_ids),ite,sorted_domain_particles_dg.toKernel(),sorted_domain_particles_ids.toKernel()); |
339 | #endif |
340 | } |
341 | |
342 | /*! \brief This function construct a dense cell-list |
343 | * |
344 | * |
345 | */ |
346 | template<typename vector, typename vector_prp, unsigned int ... prp> |
347 | void construct_dense(vector & pl, |
348 | vector & pl_out, |
349 | vector_prp & pl_prp, |
350 | vector_prp & pl_prp_out, |
351 | mgpu::ofp_context_t & mgpuContext, |
352 | size_t g_m, |
353 | size_t start, |
354 | size_t stop, |
355 | cl_construct_opt opt = cl_construct_opt::Full) |
356 | { |
357 | #ifdef __NVCC__ |
358 | |
359 | CUDA_SAFE() |
360 | |
361 | // Than we construct the ids |
362 | |
363 | auto ite_gpu = pl.getGPUIteratorTo(stop-start-1); |
364 | |
365 | cl_n.resize(this->gr_cell.size()+1); |
366 | cl_n.template fill<0>(0); |
367 | // CUDA_SAFE(cudaMemset(cl_n.template getDeviceBuffer<0>(),0,cl_n.size()*sizeof(cnt_type))); |
368 | |
369 | part_ids.resize(stop - start); |
370 | |
371 | if (ite_gpu.wthr.x == 0 || pl.size() == 0 || stop == 0) |
372 | { |
373 | // no particles |
374 | starts.resize(cl_n.size()); |
375 | starts.template fill<0>(0); |
376 | return; |
377 | } |
378 | |
379 | CUDA_LAUNCH((subindex<false,dim,T,cnt_type,ids_type>),ite_gpu,div_c, |
380 | spacing_c, |
381 | off, |
382 | this->getTransform(), |
383 | pl.capacity(), |
384 | stop, |
385 | part_ids.capacity(), |
386 | start, |
387 | static_cast<T *>(pl.template getDeviceBuffer<0>()), |
388 | static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()), |
389 | static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>())); |
390 | |
391 | // now we scan |
392 | starts.resize(cl_n.size()); |
393 | openfpm::scan((cnt_type *)cl_n.template getDeviceBuffer<0>(), cl_n.size(), (cnt_type *)starts.template getDeviceBuffer<0>() , mgpuContext); |
394 | |
395 | // now we construct the cells |
396 | |
397 | cells.resize(stop-start); |
398 | auto itgg = part_ids.getGPUIterator(); |
399 | |
400 | |
401 | #ifdef MAKE_CELLLIST_DETERMINISTIC |
402 | |
403 | CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0, |
404 | part_ids.size(), |
405 | static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) ); |
406 | |
407 | // sort |
408 | |
409 | mgpu::mergesort(static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()),static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()),pl.size(),mgpu::less_t<cnt_type>(),mgpuContext); |
410 | |
411 | #else |
412 | |
413 | CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0, |
414 | div_c, |
415 | off, |
416 | part_ids.size(), |
417 | part_ids.capacity(), |
418 | start, |
419 | static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()), |
420 | static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()), |
421 | static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) ); |
422 | |
423 | #endif |
424 | |
425 | |
426 | sorted_to_not_sorted.resize(stop-start); |
427 | non_sorted_to_sorted.resize(pl.size()); |
428 | |
429 | auto ite = pl.getGPUIteratorTo(stop-start,64); |
430 | |
431 | if (sizeof...(prp) == 0) |
432 | { |
433 | // Here we reorder the particles to improve coalescing access |
434 | CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()), |
435 | decltype(pl.toKernel()), |
436 | decltype(sorted_to_not_sorted.toKernel()), |
437 | cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(), |
438 | pl_prp.toKernel(), |
439 | pl_prp_out.toKernel(), |
440 | pl.toKernel(), |
441 | pl_out.toKernel(), |
442 | sorted_to_not_sorted.toKernel(), |
443 | non_sorted_to_sorted.toKernel(), |
444 | static_cast<cnt_type *>(cells.template getDeviceBuffer<0>())); |
445 | } |
446 | else |
447 | { |
448 | // Here we reorder the particles to improve coalescing access |
449 | CUDA_LAUNCH((reorder_parts_wprp<decltype(pl_prp.toKernel()), |
450 | decltype(pl.toKernel()), |
451 | decltype(sorted_to_not_sorted.toKernel()), |
452 | cnt_type,shift_ph<0,cnt_type>,prp...>),ite,sorted_to_not_sorted.size(), |
453 | pl_prp.toKernel(), |
454 | pl_prp_out.toKernel(), |
455 | pl.toKernel(), |
456 | pl_out.toKernel(), |
457 | sorted_to_not_sorted.toKernel(), |
458 | non_sorted_to_sorted.toKernel(), |
459 | static_cast<cnt_type *>(cells.template getDeviceBuffer<0>())); |
460 | } |
461 | |
462 | if (opt == cl_construct_opt::Full) |
463 | { |
464 | construct_domain_ids(mgpuContext,start,stop,g_m); |
465 | } |
466 | |
467 | #else |
468 | |
469 | std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl; |
470 | |
471 | #endif |
472 | } |
473 | |
474 | public: |
475 | |
476 | //! Indicate that this cell list is a gpu type cell-list |
477 | typedef int yes_is_gpu_celllist; |
478 | |
479 | //! the type of the space |
480 | typedef T stype; |
481 | |
482 | //! dimensions of space |
483 | static const unsigned int dims = dim; |
484 | |
485 | //! count type |
486 | typedef cnt_type cnt_type_; |
487 | |
488 | //! id type |
489 | typedef ids_type ids_type_; |
490 | |
491 | //! transform type |
492 | typedef transform transform_; |
493 | |
494 | //! is sparse |
495 | typedef boost::mpl::bool_<is_sparse> is_sparse_; |
496 | |
497 | /*! \brief Copy constructor |
498 | * |
499 | * \param clg Cell list to copy |
500 | * |
501 | */ |
502 | CellList_gpu(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> & clg) |
503 | { |
504 | this->operator=(clg); |
505 | } |
506 | |
507 | /*! \brief Copy constructor from temporal |
508 | * |
509 | * |
510 | * |
511 | */ |
512 | CellList_gpu(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg) |
513 | { |
514 | this->operator=(clg); |
515 | } |
516 | |
517 | /*! \brief default constructor |
518 | * |
519 | * |
520 | */ |
521 | CellList_gpu() |
522 | {} |
523 | |
524 | CellList_gpu(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1) |
525 | { |
526 | Initialize(box,div,pad); |
527 | } |
528 | |
529 | |
530 | /*! Initialize the cell list |
531 | * |
532 | * \param box Domain where this cell list is living |
533 | * \param div grid size on each dimension |
534 | * \param pad padding cell |
535 | * \param slot maximum number of slot |
536 | * |
537 | */ |
538 | void Initialize(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1) |
539 | { |
540 | SpaceBox<dim,T> sbox(box); |
541 | |
542 | // Initialize point transformation |
543 | |
544 | Initialize(sbox,div,pad); |
545 | } |
546 | |
547 | void setBoxNN(unsigned int n_NN) |
548 | { |
549 | cells_nn_test_size = n_NN; |
550 | construct_cell_nn_test(n_NN); |
551 | } |
552 | |
553 | void re_setBoxNN() |
554 | { |
555 | construct_cell_nn_test(cells_nn_test_size); |
556 | } |
557 | |
558 | /*! Initialize the cell list constructor |
559 | * |
560 | * \param box Domain where this cell list is living |
561 | * \param div grid size on each dimension |
562 | * \param pad padding cell |
563 | * \param slot maximum number of slot |
564 | * |
565 | */ |
566 | void Initialize(const SpaceBox<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1) |
567 | { |
568 | Matrix<dim,T> mat; |
569 | CellDecomposer_sm<dim,T,transform>::setDimensions(box,div, mat, pad); |
570 | |
571 | // create the array that store the number of particle on each cell and se it to 0 |
572 | InitializeStructures(this->gr_cell.getSize(),this->gr_cell.size(),pad); |
573 | } |
574 | |
575 | vector_cnt_type & getSortToNonSort() |
576 | { |
577 | return sorted_to_not_sorted; |
578 | } |
579 | |
580 | vector_cnt_type & getNonSortToSort() |
581 | { |
582 | return non_sorted_to_sorted; |
583 | } |
584 | |
585 | vector_cnt_type & getDomainSortIds() |
586 | { |
587 | return sorted_domain_particles_ids; |
588 | } |
589 | |
590 | |
591 | /*! \brief Set the radius for the getNNIteratorRadius |
592 | * |
593 | * \param radius |
594 | * |
595 | */ |
596 | void setRadius(T radius) |
597 | { |
598 | openfpm::vector<long int> nnc_rad_; |
599 | |
600 | NNcalc_rad(radius,nnc_rad_,this->getCellBox(),this->getGrid()); |
601 | |
602 | nnc_rad.resize(nnc_rad_.size(),0); |
603 | |
604 | // copy to nnc_rad |
605 | |
606 | for (unsigned int i = 0 ; i < nnc_rad_.size() ; i++) |
607 | {nnc_rad.template get<0>(i) = nnc_rad_.template get<0>(i);} |
608 | |
609 | nnc_rad.template hostToDevice<0>(); |
610 | } |
611 | |
612 | /*! \brief construct from a list of particles |
613 | * |
614 | * \warning pl is assumed to be already be in device memory |
615 | * |
616 | * \param pl Particles list |
617 | * |
618 | */ |
619 | template<typename vector, typename vector_prp, unsigned int ... prp> |
620 | void construct(vector & pl, |
621 | vector & pl_out, |
622 | vector_prp & pl_prp, |
623 | vector_prp & pl_prp_out, |
624 | mgpu::ofp_context_t & mgpuContext, |
625 | size_t g_m = 0, |
626 | size_t start = 0, |
627 | size_t stop = (size_t)-1, |
628 | cl_construct_opt opt = cl_construct_opt::Full) |
629 | { |
630 | // if stop if the default set to the number of particles |
631 | if (stop == (size_t)-1) |
632 | {stop = pl.size();} |
633 | |
634 | if (is_sparse == false) {construct_dense<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,mgpuContext,g_m,start,stop,opt);} |
635 | else {construct_sparse<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,mgpuContext,g_m,start,stop,opt);} |
636 | } |
637 | |
638 | CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse> toKernel() |
639 | { |
640 | if (nnc_rad.size() == 0) |
641 | { |
642 | // set the radius equal the cell spacing on direction X |
643 | // (must be initialized to something to avoid warnings) |
644 | setRadius(this->getCellBox().getHigh(0)); |
645 | } |
646 | |
647 | return CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform, |
648 | vector_cnt_type,openfpm::vector_gpu<aggregate<cnt_type,cnt_type>>, |
649 | decltype(cl_sparse),is_sparse> |
650 | ::get(starts, |
651 | cells_nn, |
652 | cells_nn_list, |
653 | cl_sparse, |
654 | sorted_to_not_sorted, |
655 | sorted_domain_particles_ids, |
656 | nnc_rad, |
657 | spacing_c, |
658 | div_c, |
659 | off, |
660 | this->getTransform(), |
661 | g_m); |
662 | } |
663 | |
664 | /*! \brief Clear the structure |
665 | * |
666 | * |
667 | */ |
668 | void clear() |
669 | { |
670 | cl_n.clear(); |
671 | cells.clear(); |
672 | starts.clear(); |
673 | part_ids.clear(); |
674 | sorted_to_not_sorted.clear(); |
675 | } |
676 | |
677 | ///////////////////////////////////// |
678 | |
679 | //! Ghost marker |
680 | size_t g_m = 0; |
681 | |
682 | /*! \brief return the ghost marker |
683 | * |
684 | * \return ghost marker |
685 | * |
686 | */ |
687 | inline size_t get_gm() |
688 | { |
689 | return g_m; |
690 | } |
691 | |
692 | /*! \brief Set the ghost marker |
693 | * |
694 | * \param g_m marker |
695 | * |
696 | */ |
697 | inline void set_gm(size_t g_m) |
698 | { |
699 | this->g_m = g_m; |
700 | } |
701 | |
702 | ///////////////////////////////////// |
703 | |
704 | /*! \brief Set the n_dec number |
705 | * |
706 | * \param n_dec |
707 | * |
708 | */ |
709 | void set_ndec(size_t n_dec) |
710 | { |
711 | this->n_dec = n_dec; |
712 | } |
713 | |
714 | /*! \brief Set the n_dec number |
715 | * |
716 | * \return n_dec |
717 | * |
718 | */ |
719 | size_t get_ndec() const |
720 | { |
721 | return n_dec; |
722 | } |
723 | |
724 | ///////////////////////////////////// |
725 | |
726 | /*! \brief Transfer the information computed on gpu to construct the cell-list on gpu |
727 | * |
728 | */ |
729 | void debug_deviceToHost() |
730 | { |
731 | cl_n.template deviceToHost<0>(); |
732 | cells.template deviceToHost<0>(); |
733 | starts.template deviceToHost<0>(); |
734 | } |
735 | |
736 | /*! \brief Return the numbers of cells contained in this cell-list |
737 | * |
738 | * \return the number of cells |
739 | * |
740 | */ |
741 | size_t getNCells() |
742 | { |
743 | return cl_n.size(); |
744 | } |
745 | |
746 | /*! \brief Return the numbers of elements in the cell |
747 | * |
748 | * \return the number of elements in the cell |
749 | * |
750 | */ |
751 | size_t getNelements(size_t i) |
752 | { |
753 | return cl_n.template get<0>(i); |
754 | } |
755 | |
756 | /*! \brief Get an element in the cell |
757 | * |
758 | * \tparam i property to get |
759 | * |
760 | * \param cell cell id |
761 | * \param ele element id |
762 | * |
763 | * \return The element value |
764 | * |
765 | */ |
766 | inline auto get(size_t cell, size_t ele) -> decltype(cells.template get<0>(starts.template get<0>(cell)+ele)) |
767 | { |
768 | return cells.template get<0>(starts.template get<0>(cell)+ele); |
769 | } |
770 | |
771 | /*! \brief Get an element in the cell |
772 | * |
773 | * \tparam i property to get |
774 | * |
775 | * \param cell cell id |
776 | * \param ele element id |
777 | * |
778 | * \return The element value |
779 | * |
780 | */ |
781 | inline auto get(size_t cell, size_t ele) const -> decltype(cells.template get<0>(starts.template get<0>(cell)+ele)) |
782 | { |
783 | return cells.template get<0>(starts.template get<0>(cell)+ele); |
784 | } |
785 | |
786 | /*! \brief swap the information of the two cell-lists |
787 | * |
788 | * |
789 | * |
790 | */ |
791 | void swap(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg) |
792 | { |
793 | ((CellDecomposer_sm<dim,T,transform> *)this)->swap(clg); |
794 | cl_n.swap(clg.cl_n); |
795 | cells.swap(clg.cells); |
796 | starts.swap(clg.starts); |
797 | part_ids.swap(clg.part_ids); |
798 | cl_sparse.swap(clg.cl_sparse); |
799 | cells_nn.swap(clg.cells_nn); |
800 | cells_nn_list.swap(clg.cells_nn_list); |
801 | cells_nn_test.swap(clg.cells_nn_test); |
802 | sorted_to_not_sorted.swap(clg.sorted_to_not_sorted); |
803 | sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg); |
804 | sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids); |
805 | non_sorted_to_sorted.swap(clg.non_sorted_to_sorted); |
806 | |
807 | spacing_c.swap(clg.spacing_c); |
808 | div_c.swap(clg.div_c); |
809 | off.swap(clg.off); |
810 | |
811 | size_t g_m_tmp = g_m; |
812 | g_m = clg.g_m; |
813 | clg.g_m = g_m_tmp; |
814 | |
815 | size_t n_dec_tmp = n_dec; |
816 | n_dec = clg.n_dec; |
817 | clg.n_dec = n_dec_tmp; |
818 | |
819 | int cells_nn_test_size_tmp = cells_nn_test_size; |
820 | cells_nn_test_size = clg.cells_nn_test_size; |
821 | clg.cells_nn_test_size = cells_nn_test_size_tmp; |
822 | } |
823 | |
824 | CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & |
825 | operator=(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg) |
826 | { |
827 | *static_cast<CellDecomposer_sm<dim,T,transform> *>(this) = *static_cast<const CellDecomposer_sm<dim,T,transform> *>(&clg); |
828 | cl_n = clg.cl_n; |
829 | cells = clg.cells; |
830 | starts = clg.starts; |
831 | part_ids = clg.part_ids; |
832 | cl_sparse = clg.cl_sparse; |
833 | cells_nn = clg.cells_nn; |
834 | cells_nn_list = clg.cells_nn_list; |
835 | cells_nn_test = clg.cells_nn_test; |
836 | sorted_to_not_sorted = clg.sorted_to_not_sorted; |
837 | sorted_domain_particles_dg = clg.sorted_domain_particles_dg; |
838 | sorted_domain_particles_ids = clg.sorted_domain_particles_ids; |
839 | non_sorted_to_sorted = clg.non_sorted_to_sorted; |
840 | |
841 | spacing_c = clg.spacing_c; |
842 | div_c = clg.div_c; |
843 | off = clg.off; |
844 | g_m = clg.g_m; |
845 | n_dec = clg.n_dec; |
846 | |
847 | cells_nn_test_size = clg.cells_nn_test_size; |
848 | |
849 | return *this; |
850 | } |
851 | |
852 | CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> & |
853 | operator=(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg) |
854 | { |
855 | static_cast<CellDecomposer_sm<dim,T,transform> *>(this)->swap(*static_cast<CellDecomposer_sm<dim,T,transform> *>(&clg)); |
856 | cl_n.swap(clg.cl_n); |
857 | cells.swap(clg.cells); |
858 | starts.swap(clg.starts); |
859 | part_ids.swap(clg.part_ids); |
860 | cl_sparse.swap(clg.cl_sparse); |
861 | cells_nn.swap(clg.cells_nn); |
862 | cells_nn_list.swap(clg.cells_nn_list); |
863 | cells_nn_test.swap(clg.cells_nn_test); |
864 | sorted_to_not_sorted.swap(clg.sorted_to_not_sorted); |
865 | sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg); |
866 | sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids); |
867 | non_sorted_to_sorted.swap(clg.non_sorted_to_sorted); |
868 | |
869 | spacing_c = clg.spacing_c; |
870 | div_c = clg.div_c; |
871 | off = clg.off; |
872 | g_m = clg.g_m; |
873 | n_dec = clg.n_dec; |
874 | |
875 | cells_nn_test_size = clg.cells_nn_test_size; |
876 | |
877 | return *this; |
878 | } |
879 | }; |
880 | |
881 | // This is a tranformation node for vector_distributed for the algorithm toKernel_tranform |
882 | template<template <typename> class layout_base, typename T> |
883 | struct toKernel_transform<layout_base,T,4> |
884 | { |
885 | typedef CellList_gpu_ker<T::dims, |
886 | typename T::stype, |
887 | typename T::cnt_type_, |
888 | typename T::ids_type_, |
889 | typename T::transform_, |
890 | T::is_sparse_::value> type; |
891 | }; |
892 | |
893 | #endif |
894 | |
895 | #endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ */ |
896 | |