1/*
2 * CellList_gpu.hpp
3 *
4 * Created on: Jun 11, 2018
5 * Author: i-bird
6 */
7
8#ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
9#define OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
10
11#include "config.h"
12
13#ifdef CUDA_GPU
14
15#include "Vector/map_vector_sparse.hpp"
16#include "NN/CellList/CellDecomposer.hpp"
17#include "Vector/map_vector.hpp"
18#include "Cuda_cell_list_util_func.hpp"
19#include "NN/CellList/cuda/CellList_gpu_ker.cuh"
20#include "util/cuda_util.hpp"
21#include "NN/CellList/CellList_util.hpp"
22#include "NN/CellList/CellList.hpp"
23#include "util/cuda/scan_ofp.cuh"
24
25constexpr int count = 0;
26constexpr int start = 1;
27
28template<unsigned int dim, typename T,
29 typename cnt_type, typename ids_type,
30 typename Memory,typename transform,
31 typename vector_cnt_type, typename vector_cnt_type2,
32 typename cl_sparse_type,
33 bool is_sparse>
34struct CellList_gpu_ker_selector
35{
36 static inline CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse> get(vector_cnt_type & starts,
37 vector_cnt_type & cell_nn,
38 vector_cnt_type2 & cell_nn_list,
39 cl_sparse_type & cl_sparse,
40 vector_cnt_type & sorted_to_not_sorted,
41 vector_cnt_type & sorted_domain_particles_ids,
42 openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad,
43 openfpm::array<T,dim,cnt_type> & spacing_c,
44 openfpm::array<ids_type,dim,cnt_type> & div_c,
45 openfpm::array<ids_type,dim,cnt_type> & off,
46 const transform & t,
47 unsigned int g_m)
48 {
49 return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse>(starts.toKernel(),
50 sorted_to_not_sorted.toKernel(),
51 sorted_domain_particles_ids.toKernel(),
52 nnc_rad.toKernel(),
53 spacing_c,
54 div_c,
55 off,
56 t,
57 g_m);
58 }
59};
60
61template<unsigned int dim, typename T,
62 typename cnt_type, typename ids_type,
63 typename Memory,typename transform,
64 typename vector_cnt_type, typename vector_cnt_type2,
65 typename cl_sparse_type>
66struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector_cnt_type,vector_cnt_type2,cl_sparse_type,true>
67{
68 static CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true> get(vector_cnt_type & starts,
69 vector_cnt_type & cell_nn,
70 vector_cnt_type2 & cell_nn_list,
71 cl_sparse_type & cl_sparse,
72 vector_cnt_type & srt,
73 vector_cnt_type & dprt,
74 openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad,
75 openfpm::array<T,dim,cnt_type> & spacing_c,
76 openfpm::array<ids_type,dim,cnt_type> & div_c,
77 openfpm::array<ids_type,dim,cnt_type> & off,
78 const transform & t,
79 unsigned int g_m)
80 {
81 return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true>(cell_nn.toKernel(),
82 cell_nn_list.toKernel(),
83 cl_sparse.toKernel(),
84 srt.toKernel(),
85 dprt.toKernel(),
86 spacing_c,
87 div_c,
88 off,
89 t,g_m);
90 }
91};
92
93template<unsigned int dim,
94 typename T,
95 typename Memory,
96 typename transform = no_transform_only<dim,T>,
97 typename cnt_type = unsigned int,
98 typename ids_type = int,
99 bool is_sparse = false>
100class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
101{
102 typedef openfpm::vector<aggregate<cnt_type>,Memory,memory_traits_inte> vector_cnt_type;
103
104 //! \brief Number of particles in each cell
105 vector_cnt_type cl_n;
106
107 //! \brief for each cell the particles id in it
108 vector_cnt_type cells;
109
110 //! \brief Cell scan with + operation of cl_n (in case of sparse it contain the cell index of the particles)
111 vector_cnt_type starts;
112
113 //! \brief sparse vector in case of sparse Cell-list
114 openfpm::vector_sparse_gpu<aggregate<cnt_type>> cl_sparse;
115
116 //! \brief number of neighborhood each cell cell has + offset
117 openfpm::vector_gpu<aggregate<cnt_type>> cells_nn;
118
119 //! \brief For each cell the list of the neighborhood cells
120 openfpm::vector_gpu<aggregate<cnt_type,cnt_type>> cells_nn_list;
121
122 //! \brief particle ids information the first "dim" componets is the cell-id in grid coordinates, the last is the local-id inside the cell
123 openfpm::vector<aggregate<cnt_type[2]>,Memory,memory_traits_inte> part_ids;
124
125 //! \breif Size of the Neighborhood cells
126 int cells_nn_test_size;
127
128 //! \brief Neighborhood of a cell to test
129 openfpm::vector_gpu<aggregate<int>> cells_nn_test;
130
131 //! \brief for each sorted index it show the index in the unordered
132 vector_cnt_type sorted_to_not_sorted;
133
134 //! Sorted domain particles domain or ghost
135 vector_cnt_type sorted_domain_particles_dg;
136
137 //! \brief the index of all the domain particles in the sorted vector
138 vector_cnt_type sorted_domain_particles_ids;
139
140 //! \brief for each non sorted index it show the index in the ordered vector
141 vector_cnt_type non_sorted_to_sorted;
142
143 //! Spacing
144 openfpm::array<T,dim,cnt_type> spacing_c;
145
146 //! \brief number of sub-divisions in each direction
147 openfpm::array<ids_type,dim,cnt_type> div_c;
148
149 //! \brief cell padding
150 openfpm::array<ids_type,dim,cnt_type> off;
151
152 //! Radius neighborhood
153 openfpm::vector<aggregate<int>,Memory,memory_traits_inte> nnc_rad;
154
155 //! Additional information in general (used to understand if the cell-list)
156 //! has been constructed from an old decomposition
157 size_t n_dec;
158
159 //! Initialize the structures of the data structure
160 void InitializeStructures(const size_t (& div)[dim], size_t tot_n_cell, size_t pad)
161 {
162 for (size_t i = 0 ; i < dim ; i++)
163 {
164 div_c[i] = div[i];
165 spacing_c[i] = this->getCellBox().getP2().get(i);
166 off[i] = pad;
167 }
168
169 cl_n.resize(tot_n_cell);
170
171 cells_nn_test_size = 1;
172 construct_cell_nn_test(cells_nn_test_size);
173 }
174
175 void construct_cell_nn_test(unsigned int box_nn = 1)
176 {
177 auto & gs = this->getGrid();
178
179 grid_key_dx<dim> start;
180 grid_key_dx<dim> stop;
181 grid_key_dx<dim> middle;
182
183 for (size_t i = 0 ; i < dim ; i++)
184 {
185 start.set_d(i,0);
186 stop.set_d(i,2*box_nn);
187 middle.set_d(i,box_nn);
188 }
189
190 cells_nn_test.resize(openfpm::math::pow(2*box_nn+1,dim));
191
192 int mid = gs.LinId(middle);
193
194 grid_key_dx_iterator_sub<dim> it(gs,start,stop);
195
196 size_t i = 0;
197 while (it.isNext())
198 {
199 auto p = it.get();
200
201 cells_nn_test.template get<0>(i) = (int)gs.LinId(p) - mid;
202
203 ++i;
204 ++it;
205 }
206
207 cells_nn_test.template hostToDevice<0>();
208
209#if defined(__NVCC__) && defined(USE_LOW_REGISTER_ITERATOR)
210
211 // copy to the constant memory
212 cudaMemcpyToSymbol(cells_striding,cells_nn_test.template getPointer<0>(),cells_nn_test.size()*sizeof(int));
213
214#endif
215 }
216
217 /*! \brief This function construct a sparse cell-list
218 *
219 *
220 */
221 template<typename vector, typename vector_prp, unsigned int ... prp>
222 void construct_sparse(vector & pl,
223 vector & pl_out,
224 vector_prp & pl_prp,
225 vector_prp & pl_prp_out,
226 mgpu::ofp_context_t & mgpuContext,
227 size_t g_m,
228 size_t start,
229 size_t stop,
230 cl_construct_opt opt = cl_construct_opt::Full)
231 {
232#ifdef __NVCC__
233
234 part_ids.resize(stop - start);
235 starts.resize(stop - start);
236
237 // Than we construct the ids
238
239 auto ite_gpu = pl.getGPUIteratorTo(stop-start);
240
241 if (ite_gpu.wthr.x == 0)
242 {
243 return;
244 }
245
246 CUDA_LAUNCH((subindex<true,dim,T,cnt_type,ids_type>),ite_gpu,div_c,
247 spacing_c,
248 off,
249 this->getTransform(),
250 pl.capacity(),
251 pl.size(),
252 part_ids.capacity(),
253 start,
254 static_cast<T *>(pl.template getDeviceBuffer<0>()),
255 static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()),
256 static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()));
257
258 // now we construct the cells
259
260 cells.resize(stop-start);
261
262 // Here we fill the sparse vector
263 cl_sparse.clear();
264 cl_sparse.template setBackground<0>((cnt_type)-1);
265 cl_sparse.setGPUInsertBuffer(ite_gpu.wthr.x,ite_gpu.thr.x);
266 CUDA_LAUNCH((fill_cells_sparse),ite_gpu,cl_sparse.toKernel(),starts.toKernel());
267 cl_sparse.template flush_vd<sstart_<0>>(cells,mgpuContext,FLUSH_ON_DEVICE);
268
269 cells_nn.resize(cl_sparse.size()+1);
270 cells_nn.template fill<0>(0);
271
272 // Here we construct the neighborhood cells for each cell
273 auto itgg = cl_sparse.getGPUIterator();
274 CUDA_LAUNCH((count_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel());
275
276 // now we scan
277 openfpm::scan((cnt_type *)cells_nn.template getDeviceBuffer<0>(), cells_nn.size(), (cnt_type *)cells_nn.template getDeviceBuffer<0>() , mgpuContext);
278
279 cells_nn.template deviceToHost<0>(cells_nn.size() - 1, cells_nn.size() - 1);
280 size_t n_nn_cells = cells_nn.template get<0>(cells_nn.size() - 1);
281
282 cells_nn_list.resize(n_nn_cells);
283
284 CUDA_LAUNCH((fill_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel(),cells_nn_list.toKernel(),cells.size());
285
286 sorted_to_not_sorted.resize(stop-start);
287 non_sorted_to_sorted.resize(pl.size());
288
289 auto ite = pl.getGPUIteratorTo(stop-start,64);
290
291 // Here we reorder the particles to improve coalescing access
292 CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
293 decltype(pl.toKernel()),
294 decltype(sorted_to_not_sorted.toKernel()),
295 cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(),
296 pl_prp.toKernel(),
297 pl_prp_out.toKernel(),
298 pl.toKernel(),
299 pl_out.toKernel(),
300 sorted_to_not_sorted.toKernel(),
301 non_sorted_to_sorted.toKernel(),
302 static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
303
304 if (opt == cl_construct_opt::Full)
305 {
306 construct_domain_ids(mgpuContext,start,stop,g_m);
307 }
308
309 #else
310
311 std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
312
313 #endif
314 }
315
316 /*! \brief Construct the ids of the particles domain in the sorted array
317 *
318 * \param mgpuContext mgpu context
319 *
320 */
321 void construct_domain_ids(mgpu::ofp_context_t & mgpuContext, size_t start, size_t stop, size_t g_m)
322 {
323#ifdef __NVCC__
324 sorted_domain_particles_dg.resize(stop-start+1);
325
326 auto ite = sorted_domain_particles_dg.getGPUIterator();
327
328 CUDA_LAUNCH((mark_domain_particles),ite,sorted_to_not_sorted.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);
329
330 // lets scan
331 openfpm::scan((unsigned int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),sorted_domain_particles_dg.size(),(unsigned int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),mgpuContext);
332
333 sorted_domain_particles_dg.template deviceToHost<0>(sorted_domain_particles_dg.size()-1,sorted_domain_particles_dg.size()-1);
334 auto sz = sorted_domain_particles_dg.template get<0>(sorted_domain_particles_dg.size()-1);
335
336 sorted_domain_particles_ids.resize(sz);
337
338 CUDA_LAUNCH((collect_domain_ghost_ids),ite,sorted_domain_particles_dg.toKernel(),sorted_domain_particles_ids.toKernel());
339#endif
340 }
341
342 /*! \brief This function construct a dense cell-list
343 *
344 *
345 */
346 template<typename vector, typename vector_prp, unsigned int ... prp>
347 void construct_dense(vector & pl,
348 vector & pl_out,
349 vector_prp & pl_prp,
350 vector_prp & pl_prp_out,
351 mgpu::ofp_context_t & mgpuContext,
352 size_t g_m,
353 size_t start,
354 size_t stop,
355 cl_construct_opt opt = cl_construct_opt::Full)
356 {
357#ifdef __NVCC__
358
359 CUDA_SAFE()
360
361 // Than we construct the ids
362
363 auto ite_gpu = pl.getGPUIteratorTo(stop-start-1);
364
365 cl_n.resize(this->gr_cell.size()+1);
366 cl_n.template fill<0>(0);
367// CUDA_SAFE(cudaMemset(cl_n.template getDeviceBuffer<0>(),0,cl_n.size()*sizeof(cnt_type)));
368
369 part_ids.resize(stop - start);
370
371 if (ite_gpu.wthr.x == 0 || pl.size() == 0 || stop == 0)
372 {
373 // no particles
374 starts.resize(cl_n.size());
375 starts.template fill<0>(0);
376 return;
377 }
378
379 CUDA_LAUNCH((subindex<false,dim,T,cnt_type,ids_type>),ite_gpu,div_c,
380 spacing_c,
381 off,
382 this->getTransform(),
383 pl.capacity(),
384 stop,
385 part_ids.capacity(),
386 start,
387 static_cast<T *>(pl.template getDeviceBuffer<0>()),
388 static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
389 static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()));
390
391 // now we scan
392 starts.resize(cl_n.size());
393 openfpm::scan((cnt_type *)cl_n.template getDeviceBuffer<0>(), cl_n.size(), (cnt_type *)starts.template getDeviceBuffer<0>() , mgpuContext);
394
395 // now we construct the cells
396
397 cells.resize(stop-start);
398 auto itgg = part_ids.getGPUIterator();
399
400
401#ifdef MAKE_CELLLIST_DETERMINISTIC
402
403 CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,
404 part_ids.size(),
405 static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) );
406
407 // sort
408
409 mgpu::mergesort(static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()),static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()),pl.size(),mgpu::less_t<cnt_type>(),mgpuContext);
410
411#else
412
413 CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,
414 div_c,
415 off,
416 part_ids.size(),
417 part_ids.capacity(),
418 start,
419 static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()),
420 static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()),
421 static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) );
422
423#endif
424
425
426 sorted_to_not_sorted.resize(stop-start);
427 non_sorted_to_sorted.resize(pl.size());
428
429 auto ite = pl.getGPUIteratorTo(stop-start,64);
430
431 if (sizeof...(prp) == 0)
432 {
433 // Here we reorder the particles to improve coalescing access
434 CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
435 decltype(pl.toKernel()),
436 decltype(sorted_to_not_sorted.toKernel()),
437 cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(),
438 pl_prp.toKernel(),
439 pl_prp_out.toKernel(),
440 pl.toKernel(),
441 pl_out.toKernel(),
442 sorted_to_not_sorted.toKernel(),
443 non_sorted_to_sorted.toKernel(),
444 static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
445 }
446 else
447 {
448 // Here we reorder the particles to improve coalescing access
449 CUDA_LAUNCH((reorder_parts_wprp<decltype(pl_prp.toKernel()),
450 decltype(pl.toKernel()),
451 decltype(sorted_to_not_sorted.toKernel()),
452 cnt_type,shift_ph<0,cnt_type>,prp...>),ite,sorted_to_not_sorted.size(),
453 pl_prp.toKernel(),
454 pl_prp_out.toKernel(),
455 pl.toKernel(),
456 pl_out.toKernel(),
457 sorted_to_not_sorted.toKernel(),
458 non_sorted_to_sorted.toKernel(),
459 static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
460 }
461
462 if (opt == cl_construct_opt::Full)
463 {
464 construct_domain_ids(mgpuContext,start,stop,g_m);
465 }
466
467 #else
468
469 std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
470
471 #endif
472 }
473
474public:
475
476 //! Indicate that this cell list is a gpu type cell-list
477 typedef int yes_is_gpu_celllist;
478
479 //! the type of the space
480 typedef T stype;
481
482 //! dimensions of space
483 static const unsigned int dims = dim;
484
485 //! count type
486 typedef cnt_type cnt_type_;
487
488 //! id type
489 typedef ids_type ids_type_;
490
491 //! transform type
492 typedef transform transform_;
493
494 //! is sparse
495 typedef boost::mpl::bool_<is_sparse> is_sparse_;
496
497 /*! \brief Copy constructor
498 *
499 * \param clg Cell list to copy
500 *
501 */
502 CellList_gpu(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> & clg)
503 {
504 this->operator=(clg);
505 }
506
507 /*! \brief Copy constructor from temporal
508 *
509 *
510 *
511 */
512 CellList_gpu(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)
513 {
514 this->operator=(clg);
515 }
516
517 /*! \brief default constructor
518 *
519 *
520 */
521 CellList_gpu()
522 {}
523
524 CellList_gpu(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
525 {
526 Initialize(box,div,pad);
527 }
528
529
530 /*! Initialize the cell list
531 *
532 * \param box Domain where this cell list is living
533 * \param div grid size on each dimension
534 * \param pad padding cell
535 * \param slot maximum number of slot
536 *
537 */
538 void Initialize(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
539 {
540 SpaceBox<dim,T> sbox(box);
541
542 // Initialize point transformation
543
544 Initialize(sbox,div,pad);
545 }
546
547 void setBoxNN(unsigned int n_NN)
548 {
549 cells_nn_test_size = n_NN;
550 construct_cell_nn_test(n_NN);
551 }
552
553 void re_setBoxNN()
554 {
555 construct_cell_nn_test(cells_nn_test_size);
556 }
557
558 /*! Initialize the cell list constructor
559 *
560 * \param box Domain where this cell list is living
561 * \param div grid size on each dimension
562 * \param pad padding cell
563 * \param slot maximum number of slot
564 *
565 */
566 void Initialize(const SpaceBox<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
567 {
568 Matrix<dim,T> mat;
569 CellDecomposer_sm<dim,T,transform>::setDimensions(box,div, mat, pad);
570
571 // create the array that store the number of particle on each cell and se it to 0
572 InitializeStructures(this->gr_cell.getSize(),this->gr_cell.size(),pad);
573 }
574
575 vector_cnt_type & getSortToNonSort()
576 {
577 return sorted_to_not_sorted;
578 }
579
580 vector_cnt_type & getNonSortToSort()
581 {
582 return non_sorted_to_sorted;
583 }
584
585 vector_cnt_type & getDomainSortIds()
586 {
587 return sorted_domain_particles_ids;
588 }
589
590
591 /*! \brief Set the radius for the getNNIteratorRadius
592 *
593 * \param radius
594 *
595 */
596 void setRadius(T radius)
597 {
598 openfpm::vector<long int> nnc_rad_;
599
600 NNcalc_rad(radius,nnc_rad_,this->getCellBox(),this->getGrid());
601
602 nnc_rad.resize(nnc_rad_.size(),0);
603
604 // copy to nnc_rad
605
606 for (unsigned int i = 0 ; i < nnc_rad_.size() ; i++)
607 {nnc_rad.template get<0>(i) = nnc_rad_.template get<0>(i);}
608
609 nnc_rad.template hostToDevice<0>();
610 }
611
612 /*! \brief construct from a list of particles
613 *
614 * \warning pl is assumed to be already be in device memory
615 *
616 * \param pl Particles list
617 *
618 */
619 template<typename vector, typename vector_prp, unsigned int ... prp>
620 void construct(vector & pl,
621 vector & pl_out,
622 vector_prp & pl_prp,
623 vector_prp & pl_prp_out,
624 mgpu::ofp_context_t & mgpuContext,
625 size_t g_m = 0,
626 size_t start = 0,
627 size_t stop = (size_t)-1,
628 cl_construct_opt opt = cl_construct_opt::Full)
629 {
630 // if stop if the default set to the number of particles
631 if (stop == (size_t)-1)
632 {stop = pl.size();}
633
634 if (is_sparse == false) {construct_dense<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,mgpuContext,g_m,start,stop,opt);}
635 else {construct_sparse<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,mgpuContext,g_m,start,stop,opt);}
636 }
637
638 CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse> toKernel()
639 {
640 if (nnc_rad.size() == 0)
641 {
642 // set the radius equal the cell spacing on direction X
643 // (must be initialized to something to avoid warnings)
644 setRadius(this->getCellBox().getHigh(0));
645 }
646
647 return CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,
648 vector_cnt_type,openfpm::vector_gpu<aggregate<cnt_type,cnt_type>>,
649 decltype(cl_sparse),is_sparse>
650 ::get(starts,
651 cells_nn,
652 cells_nn_list,
653 cl_sparse,
654 sorted_to_not_sorted,
655 sorted_domain_particles_ids,
656 nnc_rad,
657 spacing_c,
658 div_c,
659 off,
660 this->getTransform(),
661 g_m);
662 }
663
664 /*! \brief Clear the structure
665 *
666 *
667 */
668 void clear()
669 {
670 cl_n.clear();
671 cells.clear();
672 starts.clear();
673 part_ids.clear();
674 sorted_to_not_sorted.clear();
675 }
676
677 /////////////////////////////////////
678
679 //! Ghost marker
680 size_t g_m = 0;
681
682 /*! \brief return the ghost marker
683 *
684 * \return ghost marker
685 *
686 */
687 inline size_t get_gm()
688 {
689 return g_m;
690 }
691
692 /*! \brief Set the ghost marker
693 *
694 * \param g_m marker
695 *
696 */
697 inline void set_gm(size_t g_m)
698 {
699 this->g_m = g_m;
700 }
701
702 /////////////////////////////////////
703
704 /*! \brief Set the n_dec number
705 *
706 * \param n_dec
707 *
708 */
709 void set_ndec(size_t n_dec)
710 {
711 this->n_dec = n_dec;
712 }
713
714 /*! \brief Set the n_dec number
715 *
716 * \return n_dec
717 *
718 */
719 size_t get_ndec() const
720 {
721 return n_dec;
722 }
723
724 /////////////////////////////////////
725
726 /*! \brief Transfer the information computed on gpu to construct the cell-list on gpu
727 *
728 */
729 void debug_deviceToHost()
730 {
731 cl_n.template deviceToHost<0>();
732 cells.template deviceToHost<0>();
733 starts.template deviceToHost<0>();
734 }
735
736 /*! \brief Return the numbers of cells contained in this cell-list
737 *
738 * \return the number of cells
739 *
740 */
741 size_t getNCells()
742 {
743 return cl_n.size();
744 }
745
746 /*! \brief Return the numbers of elements in the cell
747 *
748 * \return the number of elements in the cell
749 *
750 */
751 size_t getNelements(size_t i)
752 {
753 return cl_n.template get<0>(i);
754 }
755
756 /*! \brief Get an element in the cell
757 *
758 * \tparam i property to get
759 *
760 * \param cell cell id
761 * \param ele element id
762 *
763 * \return The element value
764 *
765 */
766 inline auto get(size_t cell, size_t ele) -> decltype(cells.template get<0>(starts.template get<0>(cell)+ele))
767 {
768 return cells.template get<0>(starts.template get<0>(cell)+ele);
769 }
770
771 /*! \brief Get an element in the cell
772 *
773 * \tparam i property to get
774 *
775 * \param cell cell id
776 * \param ele element id
777 *
778 * \return The element value
779 *
780 */
781 inline auto get(size_t cell, size_t ele) const -> decltype(cells.template get<0>(starts.template get<0>(cell)+ele))
782 {
783 return cells.template get<0>(starts.template get<0>(cell)+ele);
784 }
785
786 /*! \brief swap the information of the two cell-lists
787 *
788 *
789 *
790 */
791 void swap(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg)
792 {
793 ((CellDecomposer_sm<dim,T,transform> *)this)->swap(clg);
794 cl_n.swap(clg.cl_n);
795 cells.swap(clg.cells);
796 starts.swap(clg.starts);
797 part_ids.swap(clg.part_ids);
798 cl_sparse.swap(clg.cl_sparse);
799 cells_nn.swap(clg.cells_nn);
800 cells_nn_list.swap(clg.cells_nn_list);
801 cells_nn_test.swap(clg.cells_nn_test);
802 sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);
803 sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg);
804 sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids);
805 non_sorted_to_sorted.swap(clg.non_sorted_to_sorted);
806
807 spacing_c.swap(clg.spacing_c);
808 div_c.swap(clg.div_c);
809 off.swap(clg.off);
810
811 size_t g_m_tmp = g_m;
812 g_m = clg.g_m;
813 clg.g_m = g_m_tmp;
814
815 size_t n_dec_tmp = n_dec;
816 n_dec = clg.n_dec;
817 clg.n_dec = n_dec_tmp;
818
819 int cells_nn_test_size_tmp = cells_nn_test_size;
820 cells_nn_test_size = clg.cells_nn_test_size;
821 clg.cells_nn_test_size = cells_nn_test_size_tmp;
822 }
823
824 CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> &
825 operator=(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg)
826 {
827 *static_cast<CellDecomposer_sm<dim,T,transform> *>(this) = *static_cast<const CellDecomposer_sm<dim,T,transform> *>(&clg);
828 cl_n = clg.cl_n;
829 cells = clg.cells;
830 starts = clg.starts;
831 part_ids = clg.part_ids;
832 cl_sparse = clg.cl_sparse;
833 cells_nn = clg.cells_nn;
834 cells_nn_list = clg.cells_nn_list;
835 cells_nn_test = clg.cells_nn_test;
836 sorted_to_not_sorted = clg.sorted_to_not_sorted;
837 sorted_domain_particles_dg = clg.sorted_domain_particles_dg;
838 sorted_domain_particles_ids = clg.sorted_domain_particles_ids;
839 non_sorted_to_sorted = clg.non_sorted_to_sorted;
840
841 spacing_c = clg.spacing_c;
842 div_c = clg.div_c;
843 off = clg.off;
844 g_m = clg.g_m;
845 n_dec = clg.n_dec;
846
847 cells_nn_test_size = clg.cells_nn_test_size;
848
849 return *this;
850 }
851
852 CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> &
853 operator=(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)
854 {
855 static_cast<CellDecomposer_sm<dim,T,transform> *>(this)->swap(*static_cast<CellDecomposer_sm<dim,T,transform> *>(&clg));
856 cl_n.swap(clg.cl_n);
857 cells.swap(clg.cells);
858 starts.swap(clg.starts);
859 part_ids.swap(clg.part_ids);
860 cl_sparse.swap(clg.cl_sparse);
861 cells_nn.swap(clg.cells_nn);
862 cells_nn_list.swap(clg.cells_nn_list);
863 cells_nn_test.swap(clg.cells_nn_test);
864 sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);
865 sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg);
866 sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids);
867 non_sorted_to_sorted.swap(clg.non_sorted_to_sorted);
868
869 spacing_c = clg.spacing_c;
870 div_c = clg.div_c;
871 off = clg.off;
872 g_m = clg.g_m;
873 n_dec = clg.n_dec;
874
875 cells_nn_test_size = clg.cells_nn_test_size;
876
877 return *this;
878 }
879};
880
881// This is a tranformation node for vector_distributed for the algorithm toKernel_tranform
882template<template <typename> class layout_base, typename T>
883struct toKernel_transform<layout_base,T,4>
884{
885 typedef CellList_gpu_ker<T::dims,
886 typename T::stype,
887 typename T::cnt_type_,
888 typename T::ids_type_,
889 typename T::transform_,
890 T::is_sparse_::value> type;
891};
892
893#endif
894
895#endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ */
896