1/*
2 * Vector.hpp
3 *
4 * Created on: Mar 5, 2015
5 * Author: Pietro Incardona
6 */
7
8#ifndef VECTOR_HPP_
9#define VECTOR_HPP_
10
11#include "config.h"
12#include "util/cuda_launch.hpp"
13#include "HDF5_wr/HDF5_wr.hpp"
14#include "VCluster/VCluster.hpp"
15#include "Space/Shape/Point.hpp"
16#include "Vector/Iterators/vector_dist_iterator.hpp"
17#include "Space/Shape/Box.hpp"
18#include "Vector/vector_dist_key.hpp"
19#include "memory/PtrMemory.hpp"
20#include "NN/CellList/CellList.hpp"
21#include "NN/CellList/CellListFast_gen.hpp"
22#include "util/common.hpp"
23#include "util/object_util.hpp"
24#include "memory/ExtPreAlloc.hpp"
25#include "CSVWriter/CSVWriter.hpp"
26#include "VTKWriter/VTKWriter.hpp"
27#include "Decomposition/common.hpp"
28#include "Grid/Iterators/grid_dist_id_iterator_dec.hpp"
29#include "Grid/grid_key_dx_iterator_hilbert.hpp"
30#include "Vector/vector_dist_ofb.hpp"
31#include "Decomposition/CartDecomposition.hpp"
32#include "data_type/aggregate.hpp"
33#include "NN/VerletList/VerletList.hpp"
34#include "vector_dist_comm.hpp"
35#include "DLB/LB_Model.hpp"
36#include "Vector/vector_map_iterator.hpp"
37#include "NN/CellList/ParticleIt_Cells.hpp"
38#include "NN/CellList/ProcKeys.hpp"
39#include "Vector/vector_dist_kernel.hpp"
40#include "NN/CellList/cuda/CellList_gpu.hpp"
41#include "lib/pdata.hpp"
42#include "cuda/vector_dist_operators_list_ker.hpp"
43
44#define DEC_GRAN(gr) ((size_t)gr << 32)
45
46#ifdef CUDA_GPU
47template<unsigned int dim,typename St> using CELLLIST_GPU_SPARSE = CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>,unsigned int,int,true>;
48#endif
49
50#define VECTOR_DIST_ERROR_OBJECT std::runtime_error("Runtime vector distributed error");
51
52#ifdef SE_CLASS3
53#include "se_class3_vector.hpp"
54#endif
55
56#ifdef SE_CLASS3
57 #define SE_CLASS3_VDIST_CONSTRUCTOR ,se3(getDecomposition(),*this)
58#else
59 #define SE_CLASS3_VDIST_CONSTRUCTOR
60#endif
61
62
63#define NO_ID false
64#define ID true
65
66// Perform a ghost get or a ghost put
67constexpr int GET = 1;
68constexpr int PUT = 2;
69
70// Write the particles with ghost
71constexpr int NO_GHOST = 0;
72constexpr int WITH_GHOST = 2;
73
74constexpr int GCL_NON_SYMMETRIC = 0;
75constexpr int GCL_SYMMETRIC = 1;
76constexpr int GCL_HILBERT = 2;
77
78template<bool is_gpu_celllist, unsigned int ... prp>
79struct gcl_standard_no_symmetric_impl
80{
81 template<unsigned int dim, typename St, typename CellL, typename Vector, unsigned int impl>
82 static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
83 {
84 return vd.template getCellList<CellL>(r_cut);
85 }
86};
87
88template<unsigned int ... prp>
89struct gcl_standard_no_symmetric_impl<true,prp...>
90{
91 template<unsigned int dim, typename St, typename CellL, typename Vector, unsigned int impl>
92 static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
93 {
94 return vd.template getCellListGPU<CellL,prp...>(r_cut);
95 }
96};
97
98//! General function t get a cell-list
99template<unsigned int dim, typename St, typename CellL, typename Vector, unsigned int impl, unsigned int ... prp>
100struct gcl
101{
102 /*! \brief Get the Cell list based on the type
103 *
104 * \param vd Distributed vector
105 * \param r_cut Cut-off radius
106 * \param g Ghost
107 *
108 * \return the constructed cell-list
109 *
110 */
111 static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
112 {
113 return gcl_standard_no_symmetric_impl<is_gpu_celllist<CellL>::value,prp ...>::template get<dim,St,CellL,Vector,impl>(vd,r_cut,g);
114 }
115};
116
117//! General function t get a cell-list
118template<unsigned int dim, typename St, typename CellL, typename Vector>
119struct gcl<dim,St,CellL,Vector,GCL_HILBERT>
120{
121 /*! \brief Get the Cell list based on the type
122 *
123 * \param vd Distributed vector
124 * \param r_cut Cut-off radius
125 * \param g Ghost
126 *
127 * \return the constructed cell-list
128 *
129 */
130 static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
131 {
132 return vd.getCellList_hilb(r_cut,g);
133 }
134};
135
136//! General function t get a cell-list
137template<unsigned int dim, typename St, typename CellL, typename Vector>
138struct gcl<dim,St,CellL,Vector,GCL_SYMMETRIC>
139{
140 /*! \brief Get the Cell list based on the type
141 *
142 * \param vd Distributed vector
143 * \param r_cut Cut-off radius
144 * \param g Ghost
145 *
146 * \return the constructed cell-list
147 *
148 */
149 static inline CellL get(Vector & vd, const St & r_cut, const Ghost<dim,St> & g)
150 {
151 return vd.getCellListSym(r_cut);
152 }
153};
154
155/////////////////// GCL anisotropic ///////////////////////////////////////////
156
157//! General function t get a cell-list
158template<unsigned int dim, typename St, typename CellL, typename Vector, unsigned int impl>
159struct gcl_An
160{
161 /*! \brief Get the Cell list based on the type
162 *
163 * \param vd Distributed vector
164 * \param r_cut Cut-off radius
165 * \param g Ghost
166 *
167 * \return the constructed cell-list
168 *
169 */
170 static inline CellL get(Vector & vd, const size_t (& div)[dim], const size_t (& pad)[dim], const Ghost<dim,St> & g)
171 {
172 return vd.template getCellListSym<CellL>(div,pad);
173 }
174};
175
176#define CELL_MEMFAST(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_fast<>, shift<dim, St> >
177#define CELL_MEMBAL(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_bal<>, shift<dim, St> >
178#define CELL_MEMMW(dim,St) CellList_gen<dim, St, Process_keys_lin, Mem_mw<>, shift<dim, St> >
179
180#define CELL_MEMFAST_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_fast<>, shift<dim, St> >
181#define CELL_MEMBAL_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_bal<>, shift<dim, St> >
182#define CELL_MEMMW_HILB(dim,St) CellList_gen<dim, St, Process_keys_hilb, Mem_mw<>, shift<dim, St> >
183
184#define VERLET_MEMFAST(dim,St) VerletList<dim,St,Mem_fast<>,shift<dim,St> >
185#define VERLET_MEMBAL(dim,St) VerletList<dim,St,Mem_bal<>,shift<dim,St> >
186#define VERLET_MEMMW(dim,St) VerletList<dim,St,Mem_mw<>,shift<dim,St> >
187
188#define VERLET_MEMFAST_INT(dim,St) VerletList<dim,St,Mem_fast<HeapMemory,unsigned int>,shift<dim,St> >
189#define VERLET_MEMBAL_INT(dim,St) VerletList<dim,St,Mem_bal<unsigned int>,shift<dim,St> >
190#define VERLET_MEMMW_INT(dim,St) VerletList<dim,St,Mem_mw<unsigned int>,shift<dim,St> >
191
192enum reorder_opt
193{
194 NO_REORDER = 0,
195 HILBERT = 1,
196 LINEAR = 2
197};
198
199template<typename vector, unsigned int impl>
200struct cell_list_selector
201{
202 typedef decltype(std::declval<vector>().getCellListGPU(0.0).toKernel()) ctype;
203
204 static ctype get(vector & v,
205 typename vector::stype & r_cut)
206 {
207 return v.getCellListGPU(r_cut).toKernel();
208 }
209};
210
211template<typename vector>
212struct cell_list_selector<vector,comp_host>
213{
214 typedef decltype(std::declval<vector>().getCellList(0.0)) ctype;
215
216 static ctype get(vector & v,
217 typename vector::stype & r_cut)
218 {
219 return v.getCellList(r_cut);
220 }
221};
222
223/*! \brief Distributed vector
224 *
225 * This class represent a distributed vector, the distribution of the structure
226 * is based on the positional information of the elements the vector store
227 *
228 * ## Create a vector of random elements on each processor 2D
229 * \snippet Vector/tests/vector_dist_unit_test.cpp Create a vector of random elements on each processor 2D
230 *
231 * ## Create a vector of random elements on each processor 3D
232 * \snippet Vector/tests/vector_dist_unit_test.cpp Create a vector of random elements on each processor 3D
233 *
234 * ## Create a vector of elements distributed on a grid like way
235 * \snippet Vector/tests/vector_dist_unit_test.cpp Create a vector of elements distributed on a grid like way
236 *
237 * ## Redistribute the particles and sync the ghost properties
238 * \snippet Vector/tests/vector_dist_unit_test.cpp Redistribute the particles and sync the ghost properties
239 *
240 * ## Create a gpu distributed vector [St = float or double]
241 * \snippet Vector/cuda/vector_dist_gpu_unit_tests.cu Create a gpu vector
242 *
243 * ## Fill a GPU vector_dist on CPU and move the information to GPU and redistribute [St = float or double]
244 * \snippet Vector/cuda/vector_dist_gpu_unit_tests.cu Fill gpu vector and move to GPU
245 *
246 * ## Fill the ghost on GPU
247 * \snippet Vector/cuda/vector_dist_gpu_unit_tests.cu Fill the ghost on GPU
248 *
249 * \tparam dim Dimensionality of the space where the elements lives
250 * \tparam St type of space float, double ...
251 * \tparam prop properties the vector element store in OpenFPM data structure format
252 * \tparam Decomposition Decomposition strategy to use CartDecomposition ...
253 * \tparam Memory Memory pool where store the information HeapMemory ...
254 * \tparam Memory layout
255 *
256 */
257template<unsigned int dim,
258 typename St,
259 typename prop,
260 typename Decomposition = CartDecomposition<dim,St>,
261 typename Memory = HeapMemory,
262 template<typename> class layout_base = memory_traits_lin>
263class vector_dist : public vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>,
264#ifdef CUDA_GPU
265 private vector_dist_ker_list<vector_dist_ker<dim,St,prop,layout_base>>
266#else
267 private vector_dist_ker_list<int>
268#endif
269{
270
271public:
272
273 //! Self type
274 typedef vector_dist<dim,St,prop,Decomposition,Memory,layout_base> self;
275
276private:
277
278 //! Ghost marker, all the particle with id > g_m are ghost all with g_m < are real particle
279 size_t g_m = 0;
280
281 //! Particle position vector, (It has 2 elements) the first has real particles assigned to a processor
282 //! the second element contain unassigned particles
283 openfpm::vector<Point<dim, St>,Memory,layout_base> v_pos;
284
285 //! Particle properties vector, (It has 2 elements) the first has real particles assigned to a processor
286 //! the second element contain unassigned particles
287 openfpm::vector<prop,Memory,layout_base> v_prp;
288
289 //! reordered v_pos buffer
290 openfpm::vector<prop,Memory,layout_base> v_prp_out;
291
292 //! reordered v_prp buffer
293 openfpm::vector<Point<dim, St>,Memory,layout_base> v_pos_out;
294
295 //! option used to create this vector
296 size_t opt = 0;
297
298 //! Name of the properties
299 openfpm::vector<std::string> prp_names;
300
301#ifdef SE_CLASS3
302
303 se_class3_vector<prop::max_prop,dim,St,Decomposition,self> se3;
304
305#endif
306
307 /*! \brief Initialize the structures
308 *
309 * \param np number of particles
310 *
311 */
312 void init_structures(size_t np)
313 {
314 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
315
316 // convert to a local number of elements
317 size_t p_np = np / v_cl.getProcessingUnits();
318
319 // Get non divisible part
320 size_t r = np % v_cl.getProcessingUnits();
321
322 // Distribute the remain particles
323 if (v_cl.getProcessUnitID() < r)
324 p_np++;
325
326 // resize the position vector
327 v_pos.resize(p_np);
328
329 // resize the properties vector
330 v_prp.resize(p_np);
331
332 g_m = p_np;
333 }
334
335 /*! \brief Check if the parameters describe a valid vector. In case it does not report an error
336 *
337 * \param box Box to check
338 *
339 */
340 void check_parameters(Box<dim,St> & box)
341 {
342 // if the box is not valid return an error
343 if (box.isValid() == false)
344 {
345 std::cerr << __FILE__ << ":" << __LINE__ << " Error the domain is not valid " << box.toString() << std::endl;
346 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
347 }
348
349 }
350
351 /*! \brief It check that the r_cut is not bugger than the ghost
352 *
353 * \param r_cut cut-off radius
354 *
355 */
356 void check_ghost_compatible_rcut(St r_cut)
357 {
358 for (size_t i = 0 ; i < dim ; i++)
359 {
360 if (fabs(getDecomposition().getGhost().getLow(i)) < r_cut)
361 {
362 std::cerr << __FILE__ << ":" << __LINE__ << " Error the cut off radius " << r_cut << " is bigger that the ghost layer on the dimension " << i << " lower=" << getDecomposition().getGhost().getLow(i) << std::endl;
363 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
364 }
365 }
366 }
367
368 /*! \brief Reorder based on hilbert space filling curve
369 *
370 * \param v_pos_dest reordered vector of position
371 * \param v_prp_dest reordered vector of properties
372 * \param m order of the space filling curve
373 * \param cell_list cell-list
374 *
375 */
376 template<typename CellL, typename sfc_it>
377 void reorder_sfc(openfpm::vector<Point<dim,St>> & v_pos_dest,
378 openfpm::vector<prop> & v_prp_dest,
379 sfc_it & h_it,
380 CellL & cell_list)
381 {
382 v_pos_dest.resize(v_pos.size());
383 v_prp_dest.resize(v_prp.size());
384
385 //Index for v_pos_dest
386 size_t count = 0;
387
388 grid_key_dx<dim> ksum;
389
390 for (size_t i = 0; i < dim ; i++)
391 {ksum.set_d(i,cell_list.getPadding(i));}
392
393 while (h_it.isNext())
394 {
395 auto key = h_it.get();
396 key += ksum;
397
398 size_t lin = cell_list.getGrid().LinId(key);
399
400 // for each particle in the Cell "lin"
401 for (size_t i = 0; i < cell_list.getNelements(lin); i++)
402 {
403 //reorder
404 auto v = cell_list.get(lin,i);
405 v_pos_dest.get(count) = v_pos.get(v);
406 v_prp_dest.get(count) = v_prp.get(v);
407
408 count++;
409 }
410 ++h_it;
411 }
412 }
413
414public:
415
416 //! property object
417 typedef prop value_type;
418
419 typedef Decomposition Decomposition_type;
420
421 typedef decltype(v_pos) internal_position_vector_type;
422
423 typedef CellList<dim, St, Mem_fast<>, shift<dim, St>, internal_position_vector_type > CellList_type;
424
425 //! space type
426 typedef St stype;
427
428 //! dimensions of space
429 static const unsigned int dims = dim;
430
431 //! yes I am vector dist
432 typedef int yes_i_am_vector_dist;
433
434 /*! \brief Operator= for distributed vector
435 *
436 * \param v vector to copy
437 *
438 * \return itself
439 *
440 */
441 vector_dist<dim,St,prop,Decomposition,Memory,layout_base> &
442 operator=(const vector_dist<dim,St,prop,Decomposition,Memory,layout_base> & v)
443 {
444 static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base> *>(this)->operator=(static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>>(v));
445
446 g_m = v.g_m;
447 v_pos = v.v_pos;
448 v_prp = v.v_prp;
449
450#ifdef SE_CLASS3
451 se3 = v.se3;
452#endif
453
454 opt = v.opt;
455
456 return *this;
457 }
458
459 /*! \brief Operator= for distributed vector
460 *
461 * \param v vector to copy
462 *
463 * \return itself
464 *
465 */
466 vector_dist<dim,St,prop,Decomposition,Memory,layout_base> &
467 operator=(vector_dist<dim,St,prop,Decomposition,Memory,layout_base> && v)
468 {
469 static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base> *>(this)->operator=(static_cast<vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base> >(v));
470
471 g_m = v.g_m;
472 v_pos.swap(v.v_pos);
473 v_prp.swap(v.v_prp);
474
475#ifdef SE_CLASS3
476 se3 = v.se3;
477#endif
478
479 opt = v.opt;
480
481 return *this;
482 }
483
484 // default constructor (structure contain garbage)
485 vector_dist()
486 {}
487
488
489 /*! \brief Copy Constructor
490 *
491 * \param v vector to copy
492 *
493 */
494 vector_dist(const vector_dist<dim,St,prop,Decomposition,Memory,layout_base> & v)
495 :vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>(v.getDecomposition()) SE_CLASS3_VDIST_CONSTRUCTOR
496 {
497#ifdef SE_CLASS2
498 check_new(this,8,VECTOR_DIST_EVENT,4);
499#endif
500
501 this->operator=(v);
502 }
503
504 /*! \brief Copy constructor
505 *
506 * \param v vector to copy
507 *
508 */
509 vector_dist(vector_dist<dim,St,prop,Decomposition,Memory,layout_base> && v) noexcept
510 {
511#ifdef SE_CLASS2
512 check_new(this,8,VECTOR_DIST_EVENT,4);
513#endif
514
515 this->operator=(v);
516
517#ifdef SE_CLASS3
518 se3.Initialize();
519#endif
520 }
521
522 /*! \brief Constructor with predefined decomposition
523 *
524 * \param dec is the decomposition
525 * \param np number of particles
526 *
527 */
528 vector_dist(const Decomposition & dec, size_t np) :
529 vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>(dec) SE_CLASS3_VDIST_CONSTRUCTOR
530 {
531#ifdef SE_CLASS2
532 check_new(this,8,VECTOR_DIST_EVENT,4);
533#endif
534
535 init_structures(np);
536
537#ifdef SE_CLASS3
538 se3.Initialize();
539#endif
540 }
541
542 /*! \brief Constructor of a distributed vector
543 *
544 * \param np number of elements
545 * \param box domain where the vector of elements live
546 * \param bc boundary conditions
547 * \param g Ghost margins
548 * \param opt [Optional] additional options. BIND_DEC_TO_GHOST Bind the decomposition to be multiple of the
549 * ghost size. This is required if we want to use symmetric to eliminate
550 * ghost communications.
551 * \param gdist [Optional] override the default distribution grid
552 *
553 */
554 vector_dist(size_t np, Box<dim, St> box, const size_t (&bc)[dim], const Ghost<dim, St> & g, const grid_sm<dim,void> & gdist)
555 :opt(0) SE_CLASS3_VDIST_CONSTRUCTOR
556 {
557 if (opt >> 32 != 0)
558 {this->setDecompositionGranularity(opt >> 32);}
559
560 check_parameters(box);
561
562 init_structures(np);
563
564 this->init_decomposition_gr_cell(box,bc,g,opt,gdist);
565
566
567#ifdef SE_CLASS3
568 se3.Initialize();
569#endif
570 }
571
572 /*! \brief Constructor of a distributed vector
573 *
574 * \param np number of elements
575 * \param box domain where the vector of elements live
576 * \param bc boundary conditions
577 * \param g Ghost margins
578 * \param opt [Optional] additional options. BIND_DEC_TO_GHOST Bind the decomposition to be multiple of the
579 * ghost size. This is required if we want to use symmetric to eliminate
580 * ghost communications.
581 * \param gdist [Optional] override the default distribution grid
582 *
583 */
584 vector_dist(size_t np, Box<dim, St> box, const size_t (&bc)[dim], const Ghost<dim, St> & g, size_t opt = 0, const grid_sm<dim,void> & gdist = grid_sm<dim,void>())
585 :opt(opt) SE_CLASS3_VDIST_CONSTRUCTOR
586 {
587#ifdef SE_CLASS2
588 check_new(this,8,VECTOR_DIST_EVENT,4);
589#endif
590
591 if (opt >> 32 != 0)
592 {this->setDecompositionGranularity(opt >> 32);}
593
594 check_parameters(box);
595
596 init_structures(np);
597
598 this->init_decomposition(box,bc,g,opt,gdist);
599
600
601#ifdef SE_CLASS3
602 se3.Initialize();
603#endif
604 }
605
606 ~vector_dist()
607 {
608#ifdef SE_CLASS2
609 check_delete(this);
610#endif
611 }
612
613 /*! \brief remove all the elements
614 *
615 *
616 */
617 void clear()
618 {
619 resize(0);
620 }
621
622 /*! \brief return the local size of the vector
623 *
624 * \return local size
625 *
626 */
627 size_t size_local() const
628 {
629 return g_m;
630 }
631
632 /*! \brief return the local size of the vector
633 *
634 * \return local size
635 *
636 */
637 size_t size_local_with_ghost() const
638 {
639 return v_pos.size();
640 }
641
642#ifndef ONLY_READWRITE_GETTER
643
644 /*! \brief Get the position of an element
645 *
646 * see the vector_dist iterator usage to get an element key
647 *
648 * \param vec_key element
649 *
650 * \return the position of the element in space
651 *
652 */
653 inline auto getPos(vect_dist_key_dx vec_key) -> decltype(v_pos.template get<0>(vec_key.getKey()))
654 {
655#ifdef SE_CLASS3
656 check_for_pos_nan_inf<prop::max_prop_real,prop::max_prop>(*this,vec_key.getKey());
657#endif
658
659 return v_pos.template get<0>(vec_key.getKey());
660 }
661
662 /*! \brief Get the position of an element
663 *
664 * see the vector_dist iterator usage to get an element key
665 *
666 * \param vec_key element
667 *
668 * \return the position of the element in space
669 *
670 */
671 inline auto getPos(vect_dist_key_dx vec_key) const -> decltype(v_pos.template get<0>(vec_key.getKey()))
672 {
673#ifdef SE_CLASS3
674 check_for_pos_nan_inf<prop::max_prop_real,prop::max_prop>(*this,vec_key.getKey());
675#endif
676 return v_pos.template get<0>(vec_key.getKey());
677 }
678
679 /*! \brief Get the position of an element
680 *
681 * see the vector_dist iterator usage to get an element key
682 *
683 * \param vec_key element
684 *
685 * \return the position of the element in space
686 *
687 */
688 inline auto getPos(size_t vec_key) -> decltype(v_pos.template get<0>(vec_key))
689 {
690#ifdef SE_CLASS3
691 check_for_pos_nan_inf<prop::max_prop_real,prop::max_prop>(*this,vec_key);
692#endif
693 return v_pos.template get<0>(vec_key);
694 }
695
696 /*! \brief Get the position of an element
697 *
698 * see the vector_dist iterator usage to get an element key
699 *
700 * \param vec_key element
701 *
702 * \return the position of the element in space
703 *
704 */
705 inline auto getPos(size_t vec_key) const -> decltype(v_pos.template get<0>(vec_key))
706 {
707#ifdef SE_CLASS3
708 check_for_pos_nan_inf<prop::max_prop_real,prop::max_prop>(*this,vec_key);
709#endif
710 return v_pos.template get<0>(vec_key);
711 }
712
713 /*! \brief Get the property of an element
714 *
715 * see the vector_dist iterator usage to get an element key
716 *
717 * \tparam id property id
718 * \param vec_key vector element
719 *
720 * \return return the selected property of the vector element
721 *
722 */
723 template<unsigned int id> inline auto getProp(vect_dist_key_dx vec_key) -> decltype(v_prp.template get<id>(vec_key.getKey()))
724 {
725#ifdef SE_CLASS3
726 check_for_prop_nan_inf<id,prop::max_prop+SE3_STATUS>(*this,vec_key.getKey());
727#endif
728 return v_prp.template get<id>(vec_key.getKey());
729 }
730
731 /*! \brief Get the property of an element
732 *
733 * see the vector_dist iterator usage to get an element key
734 *
735 * \tparam id property id
736 * \param vec_key vector element
737 *
738 * \return return the selected property of the vector element
739 *
740 */
741 template<unsigned int id> inline auto getProp(vect_dist_key_dx vec_key) const -> decltype(v_prp.template get<id>(vec_key.getKey()))
742 {
743#ifdef SE_CLASS3
744 check_for_prop_nan_inf<id,prop::max_prop+SE3_STATUS>(*this,vec_key.getKey());
745#endif
746 return v_prp.template get<id>(vec_key.getKey());
747 }
748
749 /*! \brief Get the property of an element
750 *
751 * see the vector_dist iterator usage to get an element key
752 *
753 * \tparam id property id
754 * \param vec_key vector element
755 *
756 * \return return the selected property of the vector element
757 *
758 */
759 template<unsigned int id> inline auto getProp(size_t vec_key) -> decltype(v_prp.template get<id>(vec_key))
760 {
761#ifdef SE_CLASS3
762 check_for_prop_nan_inf<id,prop::max_prop+SE3_STATUS>(*this,vec_key);
763#endif
764 return v_prp.template get<id>(vec_key);
765 }
766
767 /*! \brief Get the property of an element
768 *
769 * see the vector_dist iterator usage to get an element key
770 *
771 * \tparam id property id
772 * \param vec_key vector element
773 *
774 * \return return the selected property of the vector element
775 *
776 */
777 template<unsigned int id> inline auto getProp(size_t vec_key) const -> decltype(v_prp.template get<id>(vec_key))
778 {
779#ifdef SE_CLASS3
780 check_for_prop_nan_inf<id,prop::max_prop+SE3_STATUS>(*this,vec_key);
781#endif
782 return v_prp.template get<id>(vec_key);
783 }
784
785#endif
786
787///////////////////// Read and write with no check
788
789 /*! \brief Get the position of an element
790 *
791 * see the vector_dist iterator usage to get an element key
792 *
793 * \param vec_key element
794 *
795 * \return the position of the element in space
796 *
797 */
798 inline auto getPosNC(vect_dist_key_dx vec_key) -> decltype(v_pos.template get<0>(vec_key.getKey()))
799 {
800 return v_pos.template get<0>(vec_key.getKey());
801 }
802
803 /*! \brief Get the position of an element
804 *
805 * see the vector_dist iterator usage to get an element key
806 *
807 * \param vec_key element
808 *
809 * \return the position of the element in space
810 *
811 */
812 inline auto getPosNC(vect_dist_key_dx vec_key) const -> decltype(v_pos.template get<0>(vec_key.getKey()))
813 {
814 return v_pos.template get<0>(vec_key.getKey());
815 }
816
817 /*! \brief Get the position of an element
818 *
819 * see the vector_dist iterator usage to get an element key
820 *
821 * \param vec_key element
822 *
823 * \return the position of the element in space
824 *
825 */
826 inline auto getPosNC(size_t vec_key) -> decltype(v_pos.template get<0>(vec_key))
827 {
828 return v_pos.template get<0>(vec_key);
829 }
830
831 /*! \brief Get the position of an element
832 *
833 * see the vector_dist iterator usage to get an element key
834 *
835 * \param vec_key element
836 *
837 * \return the position of the element in space
838 *
839 */
840 inline auto getPosNC(size_t vec_key) const -> decltype(v_pos.template get<0>(vec_key))
841 {
842 return v_pos.template get<0>(vec_key);
843 }
844
845 /*! \brief Get the property of an element
846 *
847 * see the vector_dist iterator usage to get an element key
848 *
849 * \tparam id property id
850 * \param vec_key vector element
851 *
852 * \return return the selected property of the vector element
853 *
854 */
855 template<unsigned int id> inline auto getPropNC(vect_dist_key_dx vec_key) -> decltype(v_prp.template get<id>(vec_key.getKey()))
856 {
857 return v_prp.template get<id>(vec_key.getKey());
858 }
859
860 /*! \brief Get the property of an element
861 *
862 * see the vector_dist iterator usage to get an element key
863 *
864 * \tparam id property id
865 * \param vec_key vector element
866 *
867 * \return return the selected property of the vector element
868 *
869 */
870 template<unsigned int id> inline auto getPropNC(vect_dist_key_dx vec_key) const -> decltype(v_prp.template get<id>(vec_key.getKey()))
871 {
872 return v_prp.template get<id>(vec_key.getKey());
873 }
874
875 /*! \brief Get the property of an element
876 *
877 * see the vector_dist iterator usage to get an element key
878 *
879 * \tparam id property id
880 * \param vec_key vector element
881 *
882 * \return return the selected property of the vector element
883 *
884 */
885 template<unsigned int id> inline auto getPropNC(size_t vec_key) -> decltype(v_prp.template get<id>(vec_key))
886 {
887 return v_prp.template get<id>(vec_key);
888 }
889
890 /*! \brief Get the property of an element
891 *
892 * see the vector_dist iterator usage to get an element key
893 *
894 * \tparam id property id
895 * \param vec_key vector element
896 *
897 * \return return the selected property of the vector element
898 *
899 */
900 template<unsigned int id> inline auto getPropNC(size_t vec_key) const -> decltype(v_prp.template get<id>(vec_key))
901 {
902 return v_prp.template get<id>(vec_key);
903 }
904
905///////////////////// Read and Write function
906
907 /*! \brief Get the position of an element
908 *
909 * see the vector_dist iterator usage to get an element key
910 *
911 * \param vec_key element
912 *
913 * \return the position of the element in space
914 *
915 */
916 inline auto getPosWrite(vect_dist_key_dx vec_key) -> decltype(v_pos.template get<0>(vec_key.getKey()))
917 {
918#ifdef SE_CLASS3
919 se3.template write<prop::max_prop_real>(*this,vec_key.getKey());
920#endif
921
922 return v_pos.template get<0>(vec_key.getKey());
923 }
924
925 /*! \brief Get the position of an element
926 *
927 * see the vector_dist iterator usage to get an element key
928 *
929 * \param vec_key element
930 *
931 * \return the position of the element in space
932 *
933 */
934 inline auto getPosRead(vect_dist_key_dx vec_key) const -> decltype(v_pos.template get<0>(vec_key.getKey()))
935 {
936#ifdef SE_CLASS3
937 se3.template read<prop::max_prop_real>(*this,vec_key.getKey());
938#endif
939
940 return v_pos.template get<0>(vec_key.getKey());
941 }
942
943 /*! \brief Get the property of an element
944 *
945 * see the vector_dist iterator usage to get an element key
946 *
947 * \tparam id property id
948 * \param vec_key vector element
949 *
950 * \return return the selected property of the vector element
951 *
952 */
953 template<unsigned int id> inline auto getPropWrite(vect_dist_key_dx vec_key) -> decltype(v_prp.template get<id>(vec_key.getKey()))
954 {
955#ifdef SE_CLASS3
956 se3.template write<id>(*this,vec_key.getKey());
957#endif
958
959 return v_prp.template get<id>(vec_key.getKey());
960 }
961
962 /*! \brief Get the property of an element
963 *
964 * see the vector_dist iterator usage to get an element key
965 *
966 * \tparam id property id
967 * \param vec_key vector element
968 *
969 * \return return the selected property of the vector element
970 *
971 */
972 template<unsigned int id> inline auto getPropRead(vect_dist_key_dx vec_key) const -> decltype(v_prp.template get<id>(vec_key.getKey()))
973 {
974#ifdef SE_CLASS3
975 se3.template read<id>(*this,vec_key.getKey());
976#endif
977
978 return v_prp.template get<id>(vec_key.getKey());
979 }
980
981//////////////////////////////////////////////
982
983 /*! \brief Add local particle
984 *
985 * It add a local particle, with "local" we mean in this processor
986 * the particle can be also created out of the processor domain, in this
987 * case a call to map is required. Added particles are always created at the
988 * end and can be accessed with getLastPos and getLastProp
989 *
990 */
991 void add()
992 {
993 v_prp.insert(g_m);
994 v_pos.insert(g_m);
995
996 g_m++;
997
998#ifdef SE_CLASS3
999 for (size_t i = 0 ; i < prop::max_prop_real+1 ; i++)
1000 v_prp.template get<prop::max_prop_real>(g_m-1)[i] = UNINITIALIZED;
1001#endif
1002 }
1003
1004#ifndef ONLY_READWRITE_GETTER
1005
1006 /*! \brief Get the position of the last element
1007 *
1008 * \return the position of the element in space
1009 *
1010 */
1011 inline auto getLastPos() -> decltype(v_pos.template get<0>(0))
1012 {
1013 return v_pos.template get<0>(g_m - 1);
1014 }
1015
1016 /*! \brief Get the property of the last element
1017 *
1018 * \tparam id property id
1019 *
1020 * \return return the selected property of the vector element
1021 *
1022 */
1023 template<unsigned int id> inline auto getLastProp() -> decltype(v_prp.template get<id>(0))
1024 {
1025 return v_prp.template get<id>(g_m - 1);
1026 }
1027
1028#endif
1029
1030////////////////////////////// READ AND WRITE VERSION //////////
1031
1032 /*! \brief Get the position of the last element
1033 *
1034 * \return the position of the element in space
1035 *
1036 */
1037 inline auto getLastPosRead() -> decltype(v_pos.template get<0>(0))
1038 {
1039#ifdef SE_CLASS3
1040 se3.template read<prop::max_prop_real>(*this,g_m-1);
1041#endif
1042
1043 return v_pos.template get<0>(g_m - 1);
1044 }
1045
1046 /*! \brief Get the property of the last element
1047 *
1048 * \tparam id property id
1049 *
1050 * \return return the selected property of the vector element
1051 *
1052 */
1053 template<unsigned int id> inline auto getLastPropRead() -> decltype(v_prp.template get<id>(0))
1054 {
1055#ifdef SE_CLASS3
1056 se3.read<id>(*this,g_m-1);
1057#endif
1058
1059 return v_prp.template get<id>(g_m - 1);
1060 }
1061
1062
1063 /*! \brief Get the position of the last element
1064 *
1065 * \return the position of the element in space
1066 *
1067 */
1068 inline auto getLastPosWrite() -> decltype(v_pos.template get<0>(0))
1069 {
1070#ifdef SE_CLASS3
1071 se3.template write<prop::max_prop_real>(*this,g_m-1);
1072#endif
1073
1074 return v_pos.template get<0>(g_m - 1);
1075 }
1076
1077 /*! \brief Get the property of the last element
1078 *
1079 * \tparam id property id
1080 *
1081 * \return return the selected property of the vector element
1082 *
1083 */
1084 template<unsigned int id> inline auto getLastPropWrite() -> decltype(v_prp.template get<id>(0))
1085 {
1086#ifdef SE_CLASS3
1087 se3.template write<id>(*this,g_m-1);
1088#endif
1089
1090 return v_prp.template get<id>(g_m - 1);
1091 }
1092
1093////////////////////////////////////////////////////////////////
1094
1095 /*! \brief Construct a cell list symmetric based on a cut of radius
1096 *
1097 * \tparam CellL CellList type to construct
1098 *
1099 * \param r_cut interation radius, or size of each cell
1100 *
1101 * \return the Cell list
1102 *
1103 */
1104 template<typename CellL = CellList<dim, St, Mem_fast<>, shift<dim, St>,internal_position_vector_type > >
1105 CellL getCellListSym(St r_cut)
1106 {
1107#ifdef SE_CLASS1
1108 if (!(opt & BIND_DEC_TO_GHOST))
1109 {
1110 if (getDecomposition().getGhost().getLow(dim-1) == 0.0)
1111 {
1112 std::cerr << __FILE__ << ":" << __LINE__ << " Error the vector has been constructed without BIND_DEC_TO_GHOST, If you construct a vector without BIND_DEC_TO_GHOST the ghost must be full without reductions " << std::endl;
1113 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
1114 }
1115 }
1116#endif
1117
1118 // Cell list
1119 CellL cell_list;
1120
1121 size_t pad = 0;
1122 CellDecomposer_sm<dim,St,shift<dim,St>> cd_sm;
1123 cl_param_calculateSym(getDecomposition().getDomain(),cd_sm,getDecomposition().getGhost(),r_cut,pad);
1124
1125 // Processor bounding box
1126 Box<dim, St> pbox = getDecomposition().getProcessorBounds();
1127
1128 // Ghost padding extension
1129 Ghost<dim,size_t> g_ext(0);
1130 cell_list.Initialize(cd_sm,pbox,pad);
1131 cell_list.set_ndec(getDecomposition().get_ndec());
1132
1133 updateCellListSym(cell_list);
1134
1135 return cell_list;
1136 }
1137
1138 /*! \brief Construct a cell list symmetric based on a cut of radius
1139 *
1140 * \tparam CellL CellList type to construct
1141 *
1142 * \param r_cut interation radius, or size of each cell
1143 *
1144 * \return the Cell list
1145 *
1146 */
1147 template<typename CellL = CellList<dim, St, Mem_fast<>, shift<dim, St> > >
1148 CellL getCellListSym(const size_t (& div)[dim],
1149 const size_t (& pad)[dim])
1150 {
1151#ifdef SE_CLASS1
1152 if (!(opt & BIND_DEC_TO_GHOST))
1153 {
1154 if (getDecomposition().getGhost().getLow(dim-1) == 0.0)
1155 {
1156 std::cerr << __FILE__ << ":" << __LINE__ << " Error the vector has been constructed without BIND_DEC_TO_GHOST, If you construct a vector without BIND_DEC_TO_GHOST the ghost must be full without reductions " << std::endl;
1157 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
1158 }
1159 }
1160#endif
1161
1162 size_t pad_max = pad[0];
1163 for (size_t i = 1 ; i < dim ; i++)
1164 {if (pad[i] > pad_max) {pad_max = pad[i];}}
1165
1166 // Cell list
1167 CellL cell_list;
1168
1169 CellDecomposer_sm<dim,St,shift<dim,St>> cd_sm;
1170 cd_sm.setDimensions(getDecomposition().getDomain(),div,pad_max);
1171
1172 // Processor bounding box
1173 Box<dim, St> pbox = getDecomposition().getProcessorBounds();
1174
1175 // Ghost padding extension
1176 Ghost<dim,size_t> g_ext(0);
1177 cell_list.Initialize(cd_sm,pbox,pad_max);
1178 cell_list.set_ndec(getDecomposition().get_ndec());
1179
1180 updateCellListSym(cell_list);
1181
1182 return cell_list;
1183 }
1184
1185 /*! \brief Construct a cell list starting from the stored particles
1186 *
1187 * \tparam CellL CellList type to construct
1188 *
1189 * \param r_cut interation radius, or size of each cell
1190 * \param no_se3 avoid SE_CLASS3 checking
1191 *
1192 * \return the Cell list
1193 *
1194 */
1195 template<unsigned int impl>
1196 typename cell_list_selector<self,impl>::ctype getCellListDev(St r_cut)
1197 {
1198 return cell_list_selector<self,impl>::get(*this,r_cut);
1199 }
1200
1201 /*! \brief Construct a cell list starting from the stored particles
1202 *
1203 * \tparam CellL CellList type to construct
1204 *
1205 * \param r_cut interation radius, or size of each cell
1206 * \param no_se3 avoid SE_CLASS3 checking
1207 *
1208 * \return the Cell list
1209 *
1210 */
1211 template<typename CellL = CellList_gen<dim, St, Process_keys_lin, Mem_fast<>, shift<dim, St>, decltype(v_pos) > >
1212 CellL getCellList(St r_cut, bool no_se3 = false)
1213 {
1214#ifdef SE_CLASS3
1215 if (no_se3 == false)
1216 {se3.getNN();}
1217#endif
1218#ifdef SE_CLASS1
1219 check_ghost_compatible_rcut(r_cut);
1220#endif
1221
1222 // Get ghost and anlarge by 1%
1223 Ghost<dim,St> g = getDecomposition().getGhost();
1224 g.magnify(1.013);
1225
1226 return getCellList<CellL>(r_cut, g,no_se3);
1227 }
1228
1229#ifdef CUDA_GPU
1230
1231 /*! \brief Construct a cell list starting from the stored particles
1232 *
1233 * \param r_cut interation radius, or size of each cell
1234 *
1235 * \return the Cell list
1236 *
1237 */
1238 template<typename CellType = CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>>,unsigned int ... prp>
1239 CellType getCellListGPU(St r_cut, bool no_se3 = false)
1240 {
1241#ifdef SE_CLASS3
1242 if (no_se3 == false)
1243 {se3.getNN();}
1244#endif
1245#ifdef SE_CLASS1
1246 check_ghost_compatible_rcut(r_cut);
1247#endif
1248
1249 // Get ghost and anlarge by 1%
1250 Ghost<dim,St> g = getDecomposition().getGhost();
1251 g.magnify(1.013);
1252
1253 return getCellListGPU<CellType>(r_cut, g,no_se3);
1254 }
1255
1256
1257 /*! \brief Construct a cell list starting from the stored particles
1258 *
1259 * It differ from the get getCellList for an additional parameter, in case the
1260 * domain + ghost is not big enough to contain additional padding particles, a Cell list
1261 * with bigger space can be created
1262 * (padding particles in general are particles added by the user out of the domains)
1263 *
1264 * \tparam CellL CellList type to construct
1265 *
1266 * \param r_cut interation radius, or size of each cell
1267 * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged
1268 *
1269 * \return the CellList
1270 *
1271 */
1272 template<typename CellType = CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>>, unsigned int ... prp>
1273 CellType getCellListGPU(St r_cut, const Ghost<dim, St> & enlarge, bool no_se3 = false)
1274 {
1275#ifdef SE_CLASS3
1276 if (no_se3 == false)
1277 {se3.getNN();}
1278#endif
1279
1280 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
1281
1282 // Division array
1283 size_t div[dim];
1284
1285 // get the processor bounding box
1286 Box<dim, St> pbox = getDecomposition().getProcessorBounds();
1287
1288 // Processor bounding box
1289 cl_param_calculate(pbox, div, r_cut, enlarge);
1290
1291 CellType cell_list(pbox,div);
1292
1293 v_prp_out.resize(v_pos.size());
1294 v_pos_out.resize(v_pos.size());
1295
1296 cell_list.template construct<decltype(v_pos),decltype(v_prp),prp ...>(v_pos,v_pos_out,v_prp,v_prp_out,v_cl.getmgpuContext(),g_m);
1297
1298 cell_list.set_ndec(getDecomposition().get_ndec());
1299 cell_list.set_gm(g_m);
1300
1301#ifdef CUDA_GPU
1302 this->update_sort(this->toKernel_sorted());
1303#endif
1304
1305 return cell_list;
1306 }
1307
1308
1309#endif
1310
1311///////////////////////// Device Interface, this interface always exist it wrap the GPU if you have one or the CPU if you do not have ////////////////
1312
1313#ifdef CUDA_GPU
1314
1315 /*! \brief Construct a cell list from the stored particles
1316 *
1317 * \param r_cut interation radius, or size of each cell
1318 *
1319 * \return the Cell list
1320 *
1321 */
1322 auto getCellListDevice(St r_cut, bool no_se3 = false) -> decltype(this->getCellListGPU(r_cut,no_se3))
1323 {
1324 return this->getCellListGPU(r_cut,no_se3);
1325 }
1326
1327
1328#else
1329
1330 /*! \brief Construct a cell list from the stored particles
1331 *
1332 * \param r_cut interation radius, or size of each cell
1333 *
1334 * \return the Cell list
1335 *
1336 */
1337 auto getCellListDevice(St r_cut, bool no_se3 = false) -> decltype(this->getCellList(r_cut,no_se3))
1338 {
1339 return this->getCellList(r_cut,no_se3);
1340 }
1341
1342#endif
1343
1344////////////////////// End Device Interface ////////////////////////////////////////////////////////////////////////////////////////////////////////////
1345
1346 /*! \brief Construct an hilbert cell list starting from the stored particles
1347 *
1348 * \tparam CellL CellList type to construct
1349 *
1350 * \param r_cut interation radius, or size of each cell
1351 *
1352 * \return the Cell list
1353 *
1354 */
1355 template<typename CellL = CellList_gen<dim, St, Process_keys_hilb, Mem_fast<>, shift<dim, St> > >
1356 CellL getCellList_hilb(St r_cut)
1357 {
1358#ifdef SE_CLASS3
1359 se3.getNN();
1360#endif
1361#ifdef SE_CLASS1
1362 check_ghost_compatible_rcut(r_cut);
1363#endif
1364
1365 // Get ghost and anlarge by 1%
1366 Ghost<dim,St> g = getDecomposition().getGhost();
1367 g.magnify(1.013);
1368
1369 return getCellList_hilb(r_cut, g);
1370 }
1371
1372 /*! \brief Update a cell list using the stored particles
1373 *
1374 * \tparam CellL CellList type to construct
1375 *
1376 * \param cell_list Cell list to update
1377 * \param no_se3 avoid se class 3 checking
1378 *
1379 */
1380 template<unsigned int ... prp,typename CellL>
1381 void updateCellList(CellL & cell_list, bool no_se3 = false, cl_construct_opt opt = cl_construct_opt::Full)
1382 {
1383#ifdef SE_CLASS3
1384 if (no_se3 == false)
1385 {se3.getNN();}
1386#endif
1387
1388 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
1389
1390 // This function assume equal spacing in all directions
1391 // but in the worst case we take the maximum
1392 St r_cut = cell_list.getCellBox().getRcut();
1393
1394 // Here we have to check that the Cell-list has been constructed
1395 // from the same decomposition
1396 bool to_reconstruct = cell_list.get_ndec() != getDecomposition().get_ndec();
1397
1398 if (to_reconstruct == false)
1399 {
1400 populate_cell_list<dim,St,prop,Memory,layout_base,CellL,prp ...>(v_pos,v_pos_out,v_prp,v_prp_out,cell_list,v_cl.getmgpuContext(false),g_m,CL_NON_SYMMETRIC,opt);
1401
1402 cell_list.set_gm(g_m);
1403 }
1404 else
1405 {
1406 CellL cli_tmp = gcl<dim,St,CellL,self,GCL_NON_SYMMETRIC>::get(*this,r_cut,getDecomposition().getGhost());
1407
1408 cell_list.swap(cli_tmp);
1409 cell_list.re_setBoxNN();
1410 }
1411 }
1412
1413 /*! \brief Update a cell list using the stored particles
1414 *
1415 * \tparam CellL CellList type to construct
1416 *
1417 * \param cell_list Cell list to update
1418 *
1419 */
1420 template<typename CellL = CellList<dim, St, Mem_fast<>, shift<dim, St> > >
1421 void updateCellListSym(CellL & cell_list)
1422 {
1423#ifdef SE_CLASS3
1424 se3.getNN();
1425#endif
1426
1427 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
1428
1429 // Here we have to check that the Cell-list has been constructed
1430 // from the same decomposition
1431 bool to_reconstruct = cell_list.get_ndec() != getDecomposition().get_ndec();
1432
1433 if (to_reconstruct == false)
1434 {
1435 populate_cell_list(v_pos,v_pos_out,v_prp,v_prp_out,cell_list,v_cl.getmgpuContext(),g_m,CL_SYMMETRIC,cl_construct_opt::Full);
1436
1437 cell_list.set_gm(g_m);
1438 }
1439 else
1440 {
1441 CellL cli_tmp = gcl_An<dim,St,CellL,self,GCL_SYMMETRIC>::get(*this,
1442 cell_list.getDivWP(),
1443 cell_list.getPadding(),
1444 getDecomposition().getGhost());
1445
1446 cell_list.swap(cli_tmp);
1447 }
1448 }
1449
1450 /*! \brief Construct a cell list starting from the stored particles
1451 *
1452 * It differ from the get getCellList for an additional parameter, in case the
1453 * domain + ghost is not big enough to contain additional padding particles, a Cell list
1454 * with bigger space can be created
1455 * (padding particles in general are particles added by the user out of the domains)
1456 *
1457 * \tparam CellL CellList type to construct
1458 *
1459 * \param r_cut interation radius, or size of each cell
1460 * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged
1461 * \param no_se3 avoid se_class3 cheking default false
1462 *
1463 * \return the CellList
1464 *
1465 */
1466 template<typename CellL = CellList_gen<dim, St, Process_keys_lin, Mem_fast<>, shift<dim, St> > >
1467 CellL getCellList(St r_cut, const Ghost<dim, St> & enlarge, bool no_se3 = false)
1468 {
1469#ifdef SE_CLASS3
1470 if (no_se3 == false)
1471 {se3.getNN();}
1472#endif
1473
1474 CellL cell_list;
1475
1476 // Division array
1477 size_t div[dim];
1478
1479 // get the processor bounding box
1480 Box<dim, St> pbox = getDecomposition().getProcessorBounds();
1481
1482 // Processor bounding box
1483 cl_param_calculate(pbox, div, r_cut, enlarge);
1484
1485 cell_list.Initialize(pbox, div);
1486 cell_list.set_gm(g_m);
1487 cell_list.set_ndec(getDecomposition().get_ndec());
1488
1489 updateCellList(cell_list,no_se3);
1490
1491 return cell_list;
1492 }
1493
1494 /*! \brief Construct an hilbert cell list starting from the stored particles
1495 *
1496 * It differ from the get getCellList for an additional parameter, in case the
1497 * domain + ghost is not big enough to contain additional padding particles, a Cell list
1498 * with bigger space can be created
1499 * (padding particles in general are particles added by the user out of the domains)
1500 *
1501 * \tparam CellL CellList type to construct
1502 *
1503 * \param r_cut interation radius, or size of each cell
1504 * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged
1505 *
1506 * \return The Cell-list
1507 *
1508 */
1509 template<typename CellL = CellList_gen<dim, St, Process_keys_hilb, Mem_fast<>, shift<dim, St> > > CellL getCellList_hilb(St r_cut, const Ghost<dim, St> & enlarge)
1510 {
1511#ifdef SE_CLASS3
1512 se3.getNN();
1513#endif
1514
1515 CellL cell_list;
1516
1517 // Division array
1518 size_t div[dim];
1519
1520 // get the processor bounding box
1521 Box<dim, St> pbox = getDecomposition().getProcessorBounds();
1522
1523 // Processor bounding box
1524 cl_param_calculate(pbox,div, r_cut, enlarge);
1525
1526 cell_list.Initialize(pbox, div);
1527 cell_list.set_gm(g_m);
1528 cell_list.set_ndec(getDecomposition().get_ndec());
1529
1530 updateCellList(cell_list);
1531
1532 return cell_list;
1533 }
1534
1535 /*! \brief for each particle get the symmetric verlet list
1536 *
1537 * \param r_cut cut-off radius
1538 *
1539 * \return the verlet list
1540 *
1541 */
1542 template <typename VerletL = VerletList<dim,St,Mem_fast<>,shift<dim,St> >>
1543 VerletL getVerletSym(St r_cut)
1544 {
1545#ifdef SE_CLASS3
1546 se3.getNN();
1547#endif
1548
1549 VerletL ver;
1550
1551 // Processor bounding box
1552 Box<dim, St> pbox = getDecomposition().getProcessorBounds();
1553
1554 ver.InitializeSym(getDecomposition().getDomain(),pbox,getDecomposition().getGhost(),r_cut,v_pos,g_m);
1555
1556 ver.set_ndec(getDecomposition().get_ndec());
1557
1558 return ver;
1559 }
1560
1561 /*! \brief for each particle get the symmetric verlet list
1562 *
1563 * \param r_cut cut-off radius
1564 *
1565 * \return the verlet list
1566 *
1567 */
1568 template <typename VerletL = VerletList<dim,St,Mem_fast<>,shift<dim,St> >>
1569 VerletL getVerletCrs(St r_cut)
1570 {
1571#ifdef SE_CLASS1
1572 if (!(opt & BIND_DEC_TO_GHOST))
1573 {
1574 std::cerr << __FILE__ << ":" << __LINE__ << " Error the vector has been constructed without BIND_DEC_TO_GHOST, getVerletCrs require the vector to be constructed with BIND_DEC_TO_GHOST option " << std::endl;
1575 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
1576 }
1577#endif
1578
1579#ifdef SE_CLASS3
1580 se3.getNN();
1581#endif
1582
1583 VerletL ver;
1584
1585 // Processor bounding box
1586 Box<dim, St> pbox = getDecomposition().getProcessorBounds();
1587
1588 // Initialize the verlet list
1589 ver.InitializeCrs(getDecomposition().getDomain(),pbox,getDecomposition().getGhost(),r_cut,v_pos,g_m);
1590
1591 // Get the internal cell list
1592 auto & NN = ver.getInternalCellList();
1593
1594 // Shift
1595 grid_key_dx<dim> shift;
1596
1597 // Add padding
1598 for (size_t i = 0 ; i < dim ; i++)
1599 shift.set_d(i,NN.getPadding(i));
1600
1601 grid_sm<dim,void> gs = NN.getInternalGrid();
1602
1603 getDecomposition().setNNParameters(shift,gs);
1604
1605 ver.createVerletCrs(r_cut,g_m,v_pos,
1606 getDecomposition().getCRSDomainCells(),
1607 getDecomposition().getCRSAnomDomainCells());
1608
1609 ver.set_ndec(getDecomposition().get_ndec());
1610
1611 return ver;
1612 }
1613
1614 /*! \brief for each particle get the verlet list
1615 *
1616 * \param r_cut cut-off radius
1617 *
1618 * \return a VerletList object
1619 *
1620 */
1621 template <typename VerletL = VerletList<dim,St,Mem_fast<>,shift<dim,St>,decltype(v_pos) >>
1622 VerletL getVerlet(St r_cut)
1623 {
1624#ifdef SE_CLASS3
1625 se3.getNN();
1626#endif
1627
1628 VerletL ver;
1629
1630 // get the processor bounding box
1631 Box<dim, St> bt = getDecomposition().getProcessorBounds();
1632
1633 // Get the ghost
1634 Ghost<dim,St> g = getDecomposition().getGhost();
1635 g.magnify(1.013);
1636
1637 // enlarge the box where the Verlet is defined
1638 bt.enlarge(g);
1639
1640 ver.Initialize(bt,getDecomposition().getProcessorBounds(),r_cut,v_pos,g_m,VL_NON_SYMMETRIC);
1641
1642 ver.set_ndec(getDecomposition().get_ndec());
1643
1644 return ver;
1645 }
1646
1647 /*! \brief for each particle get the verlet list
1648 *
1649 * \param r_cut cut-off radius
1650 * \param ver Verlet to update
1651 * \param r_cut cutoff radius
1652 * \param opt option like VL_SYMMETRIC and VL_NON_SYMMETRIC or VL_CRS_SYMMETRIC
1653 *
1654 */
1655 template<typename Mem_type> void updateVerlet(VerletList<dim,St,Mem_type,shift<dim,St> > & ver, St r_cut, size_t opt = VL_NON_SYMMETRIC)
1656 {
1657#ifdef SE_CLASS3
1658 se3.getNN();
1659#endif
1660
1661 if (opt == VL_SYMMETRIC)
1662 {
1663 auto & NN = ver.getInternalCellList();
1664
1665 // Here we have to check that the Box defined by the Cell-list is the same as the domain box of this
1666 // processor. if it is not like that we have to completely reconstruct from stratch
1667 bool to_reconstruct = NN.get_ndec() != getDecomposition().get_ndec();
1668
1669 if (to_reconstruct == false)
1670 ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt);
1671 else
1672 {
1673 VerletList<dim,St,Mem_type,shift<dim,St> > ver_tmp;
1674
1675 ver_tmp = getVerlet<VerletList<dim,St,Mem_type,shift<dim,St> >>(r_cut);
1676 ver.swap(ver_tmp);
1677 }
1678 }
1679 else if (opt == VL_CRS_SYMMETRIC)
1680 {
1681#ifdef SE_CLASS1
1682 if ((opt & BIND_DEC_TO_GHOST))
1683 {
1684 std::cerr << __FILE__ << ":" << __LINE__ << " Error the vector has been constructed without BIND_DEC_TO_GHOST, updateVerlet with the option VL_CRS_SYMMETRIC require the vector to be constructed with BIND_DEC_TO_GHOST option " << std::endl;
1685 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
1686 }
1687#endif
1688
1689 auto & NN = ver.getInternalCellList();
1690
1691 // Here we have to check that the Box defined by the Cell-list is the same as the domain box of this
1692 // processor. if it is not like that we have to completely reconstruct from stratch
1693 bool to_reconstruct = NN.get_ndec() != getDecomposition().get_ndec();
1694
1695 if (to_reconstruct == false)
1696 {
1697 // Shift
1698 grid_key_dx<dim> shift;
1699
1700 // Add padding
1701 for (size_t i = 0 ; i < dim ; i++)
1702 shift.set_d(i,NN.getPadding(i));
1703
1704 grid_sm<dim,void> gs = NN.getInternalGrid();
1705
1706 getDecomposition().setNNParameters(shift,gs);
1707
1708 ver.updateCrs(getDecomposition().getDomain(),r_cut,v_pos,g_m,
1709 getDecomposition().getCRSDomainCells(),
1710 getDecomposition().getCRSAnomDomainCells());
1711 }
1712 else
1713 {
1714 VerletList<dim,St,Mem_type,shift<dim,St> > ver_tmp;
1715
1716 ver_tmp = getVerletCrs<VerletList<dim,St,Mem_type,shift<dim,St> >>(r_cut);
1717 ver.swap(ver_tmp);
1718 }
1719 }
1720 else
1721 {
1722 auto & NN = ver.getInternalCellList();
1723
1724 // Here we have to check that the Box defined by the Cell-list is the same as the domain box of this
1725 // processor. if it is not like that we have to completely reconstruct from stratch
1726 bool to_reconstruct = NN.get_ndec() != getDecomposition().get_ndec();
1727
1728 if (to_reconstruct == false)
1729 ver.update(getDecomposition().getDomain(),r_cut,v_pos,g_m, opt);
1730 else
1731 {
1732 VerletList<dim,St,Mem_type,shift<dim,St> > ver_tmp;
1733
1734 ver_tmp = getVerlet<VerletList<dim,St,Mem_type,shift<dim,St> >>(r_cut);
1735 ver.swap(ver_tmp);
1736 }
1737 }
1738 }
1739
1740
1741 /*! \brief Construct a cell list starting from the stored particles and reorder a vector according to the Hilberts curve
1742 *
1743 * \tparam CellL CellList type to construct
1744 *
1745 * \param m an order of a hilbert curve
1746 *
1747 */
1748 template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_bal<>,shift<dim,St> > >
1749 void reorder (int32_t m, reorder_opt opt = reorder_opt::HILBERT)
1750 {
1751 reorder<CellL>(m,getDecomposition().getGhost(),opt);
1752 }
1753
1754
1755 /*! \brief Construct a cell list starting from the stored particles and reorder a vector according to the Hilberts curve
1756 *
1757 * \warning it kill the ghost and invalidate cell-lists
1758 *
1759 *It differs from the reorder(m) for an additional parameter, in case the
1760 * domain + ghost is not big enough to contain additional padding particles, a Cell list
1761 * with bigger space can be created
1762 * (padding particles in general are particles added by the user out of the domains)
1763 *
1764 * \param m order of a curve
1765 * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged
1766 *
1767 */
1768 template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_bal<>,shift<dim,St> > >
1769 void reorder(int32_t m, const Ghost<dim,St> & enlarge, reorder_opt opt = reorder_opt::HILBERT)
1770 {
1771 // reset the ghost part
1772 v_pos.resize(g_m);
1773 v_prp.resize(g_m);
1774
1775
1776 CellL cell_list;
1777
1778 // calculate the parameters of the cell list
1779
1780 // get the processor bounding box
1781 Box<dim,St> pbox = getDecomposition().getProcessorBounds();
1782 // extend by the ghost
1783 pbox.enlarge(enlarge);
1784
1785 size_t div[dim];
1786
1787 // Calculate the division array and the cell box
1788 for (size_t i = 0 ; i < dim ; i++)
1789 {
1790 div[i] = 1 << m;
1791 }
1792
1793 cell_list.Initialize(pbox,div);
1794 cell_list.set_gm(g_m);
1795
1796 // for each particle add the particle to the cell list
1797
1798 auto it = getIterator();
1799
1800 while (it.isNext())
1801 {
1802 auto key = it.get();
1803
1804 Point<dim,St> xp = this->getPos(key);
1805
1806 cell_list.add(xp,key.getKey());
1807
1808 ++it;
1809 }
1810
1811 // Use cell_list to reorder v_pos
1812
1813 //destination vector
1814 openfpm::vector<Point<dim,St>> v_pos_dest;
1815 openfpm::vector<prop> v_prp_dest;
1816
1817 if (opt == reorder_opt::HILBERT)
1818 {
1819 grid_key_dx_iterator_hilbert<dim> h_it(m);
1820
1821 reorder_sfc<CellL,grid_key_dx_iterator_hilbert<dim>>(v_pos_dest,v_prp_dest,h_it,cell_list);
1822 }
1823 else if (opt == reorder_opt::LINEAR)
1824 {
1825 grid_sm<dim,void> gs(div);
1826 grid_key_dx_iterator<dim> h_it(gs);
1827
1828 reorder_sfc<CellL,grid_key_dx_iterator<dim>>(v_pos_dest,v_prp_dest,h_it,cell_list);
1829 }
1830 else
1831 {
1832 // We do nothing, we second swap nullify the first
1833 v_pos.swap(v_pos_dest);
1834 v_prp.swap(v_prp_dest);
1835 }
1836
1837 v_pos.swap(v_pos_dest);
1838 v_prp.swap(v_prp_dest);
1839 }
1840
1841 /*! \brief Construct a cell list starting from the stored particles and reorder a vector according to the Hilberts curve
1842 *
1843 * \warning it kill the ghost and invalidate cell-lists
1844 *
1845 *It differs from the reorder(m) for an additional parameter, in case the
1846 * domain + ghost is not big enough to contain additional padding particles, a Cell list
1847 * with bigger space can be created
1848 * (padding particles in general are particles added by the user out of the domains)
1849 *
1850 * \param m order of a curve
1851 * \param enlarge In case of padding particles the cell list must be enlarged, like a ghost this parameter say how much must be enlarged
1852 *
1853 */
1854 template<typename CellL=CellList_gen<dim,St,Process_keys_lin,Mem_bal<>,shift<dim,St> > >
1855 void reorder_rcut(St r_cut)
1856 {
1857 // reset the ghost part
1858 v_pos.resize(g_m);
1859 v_prp.resize(g_m);
1860
1861 auto cell_list = getCellList<CellL>(r_cut);
1862
1863 // Use cell_list to reorder v_pos
1864
1865 //destination vector
1866 openfpm::vector<Point<dim,St>> v_pos_dest;
1867 openfpm::vector<prop> v_prp_dest;
1868
1869 size_t div[dim];
1870 for (size_t i = 0 ; i < dim ; i++)
1871 {div[i] = cell_list.getGrid().size(i) - 2*cell_list.getPadding()[i];}
1872
1873 grid_sm<dim,void> gs(div);
1874 grid_key_dx_iterator<dim> h_it(gs);
1875
1876 reorder_sfc<CellL,grid_key_dx_iterator<dim>>(v_pos_dest,v_prp_dest,h_it,cell_list);
1877
1878 v_pos.swap(v_pos_dest);
1879 v_prp.swap(v_prp_dest);
1880 }
1881
1882 /*! \brief It return the number of particles contained by the previous processors
1883 *
1884 * \warning It only work with the initial decomposition
1885 *
1886 * Given 1000 particles and 3 processors, you will get
1887 *
1888 * * Processor 0: 0
1889 * * Processor 1: 334
1890 * * Processor 2: 667
1891 *
1892 * \param np initial number of particles
1893 *
1894 * \return number of particles contained by the previous processors
1895 *
1896 */
1897 size_t init_size_accum(size_t np)
1898 {
1899 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
1900
1901 size_t accum = 0;
1902
1903 // convert to a local number of elements
1904 size_t p_np = np / v_cl.getProcessingUnits();
1905
1906 // Get non divisible part
1907 size_t r = np % v_cl.getProcessingUnits();
1908
1909 accum = p_np * v_cl.getProcessUnitID();
1910
1911 // Distribute the remain particles
1912 if (v_cl.getProcessUnitID() <= r)
1913 accum += v_cl.getProcessUnitID();
1914 else
1915 accum += r;
1916
1917 return accum;
1918 }
1919
1920 /*! \brief Get an iterator that traverse domain and ghost particles
1921 *
1922 * \return an iterator
1923 *
1924 */
1925 vector_dist_iterator getIterator()
1926 {
1927#ifdef SE_CLASS3
1928 se3.getIterator();
1929#endif
1930 return vector_dist_iterator(0, v_pos.size());
1931 }
1932
1933 /*! \brief Get an iterator that traverse domain and ghost particles
1934 *
1935 * \param start particle
1936 * \param stop particle
1937 *
1938 * \return an iterator
1939 *
1940 */
1941 vector_dist_iterator getIterator(size_t start, size_t stop)
1942 {
1943#ifdef SE_CLASS3
1944 se3.getIterator();
1945#endif
1946 return vector_dist_iterator(start, stop);
1947 }
1948
1949 /*! /brief Get a grid Iterator
1950 *
1951 * Usefull function to place particles on a grid or grid-like (grid + noise)
1952 *
1953 * \param sz size of the grid
1954 *
1955 * \return a Grid iterator
1956 *
1957 */
1958 inline grid_dist_id_iterator_dec<Decomposition> getGridIterator(const size_t (&sz)[dim])
1959 {
1960 grid_key_dx<dim> start;
1961 grid_key_dx<dim> stop;
1962 for (size_t i = 0; i < dim; i++)
1963 {
1964 start.set_d(i, 0);
1965 stop.set_d(i, sz[i] - 1);
1966 }
1967
1968 grid_dist_id_iterator_dec<Decomposition> it_dec(getDecomposition(), sz, start, stop);
1969 return it_dec;
1970 }
1971
1972 /*! \brief Get the iterator across the position of the ghost particles
1973 *
1974 * \return an iterator
1975 *
1976 */
1977 vector_dist_iterator getGhostIterator() const
1978 {
1979#ifdef SE_CLASS3
1980 se3.getIterator();
1981#endif
1982
1983 return vector_dist_iterator(g_m, v_pos.size());
1984 }
1985
1986 /*! \brief Get the iterator across the position of the ghost particles
1987 *
1988 * \return an iterator
1989 *
1990 */
1991 vector_dist_iterator getGhostIterator_no_se3() const
1992 {
1993 return vector_dist_iterator(g_m, v_pos.size());
1994 }
1995
1996 /*! \brief Get an iterator that traverse the particles in the domain
1997 * using a cell list
1998 *
1999 * \param NN Cell-list
2000 *
2001 * \return an iterator over the particles
2002 *
2003 */
2004 template<typename CellList> ParticleIt_Cells<dim,CellList>
2005 getDomainIteratorCells(CellList & NN)
2006 {
2007#ifdef SE_CLASS3
2008 se3.getIterator();
2009#endif
2010
2011 // Shift
2012 grid_key_dx<dim> shift;
2013
2014 // Add padding
2015 for (size_t i = 0 ; i < dim ; i++)
2016 shift.set_d(i,NN.getPadding(i));
2017
2018 grid_sm<dim,void> gs = NN.getInternalGrid();
2019
2020 getDecomposition().setNNParameters(shift,gs);
2021
2022 return ParticleIt_Cells<dim,CellList>(NN,getDecomposition().getDomainCells(),g_m);
2023 }
2024
2025 /*! \brief Get an iterator that traverse the particles in the domain
2026 *
2027 * \return an iterator
2028 *
2029 */
2030 vector_dist_iterator getDomainIterator() const
2031 {
2032#ifdef SE_CLASS3
2033 se3.getIterator();
2034#endif
2035
2036 return vector_dist_iterator(0, g_m);
2037 }
2038
2039#ifdef CUDA_GPU
2040
2041 /*! \brief Get an iterator that traverse the particles in the domain
2042 *
2043 * \return an iterator
2044 *
2045 */
2046 ite_gpu<1> getDomainIteratorGPU(size_t n_thr = 1024) const
2047 {
2048#ifdef SE_CLASS3
2049 se3.getIterator();
2050#endif
2051
2052 return v_pos.getGPUIteratorTo(g_m-1,n_thr);
2053 }
2054
2055 /*! \brief Get an iterator that traverse the particles in the domain
2056 *
2057 * \return an iterator
2058 *
2059 */
2060 ite_gpu<1> getDomainAndGhostIteratorGPU(size_t n_thr = 1024) const
2061 {
2062#ifdef SE_CLASS3
2063 se3.getIterator();
2064#endif
2065
2066 return v_pos.getGPUIteratorTo(v_pos.size()-1,n_thr);
2067 }
2068
2069 /*! \brief Merge the properties calculated on the sorted vector on the original vector
2070 *
2071 * \parameter Cell-list from which has been constructed the sorted vector
2072 *
2073 */
2074 template<unsigned int ... prp,typename id_1, typename id_2, bool is_sparse>
2075 void merge_sort(CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>,id_1,id_2,is_sparse> & cl, size_t n_thr = 1024)
2076 {
2077#if defined(__NVCC__)
2078
2079 auto ite = v_pos.getGPUIteratorTo(g_m-1,n_thr);
2080 bool has_work = has_work_gpu(ite);
2081
2082 if (has_work == true)
2083 {
2084 CUDA_LAUNCH((merge_sort_part<false,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(cl.getNonSortToSort().toKernel()),prp...>),
2085 ite,
2086 v_pos.toKernel(),v_prp.toKernel(),v_pos_out.toKernel(),v_prp_out.toKernel(),cl.getNonSortToSort().toKernel());
2087 }
2088
2089#endif
2090 }
2091
2092 /*! \brief print a vector type property
2093 *
2094 * \param print_sorted (Print the sorted version)
2095 *
2096 * \tparam property
2097 *
2098 */
2099 template<unsigned int prp>
2100 void debugPrintVector(bool print_sorted = false)
2101 {
2102 if (print_sorted == false)
2103 {this->v_prp.template deviceToHost<prp>();}
2104 else
2105 {this->v_prp_out.template deviceToHost<prp>();}
2106
2107 auto it = this->getDomainIterator();
2108
2109 while(it.isNext())
2110 {
2111 auto p = it.get();
2112
2113 for (size_t i = 0 ; i < std::extent<typename boost::mpl::at<typename prop::type,boost::mpl::int_<prp>>::type>::value ; i++)
2114 {
2115 if (print_sorted == false)
2116 {std::cout << v_prp.template get<prp>(p.getKey())[i] << " ";}
2117 else
2118 {std::cout << v_prp_out.template get<prp>(p.getKey())[i] << " ";}
2119 }
2120
2121 std::cout << std::endl;
2122
2123 ++it;
2124 }
2125 }
2126
2127 /*! \brief print a scalar type property
2128 *
2129 * \param print_sorted (Print the sorted version)
2130 *
2131 * \tparam property
2132 *
2133 */
2134 template<unsigned int prp>
2135 void debugPrintScalar(bool print_sorted = false)
2136 {
2137 if (print_sorted == false)
2138 {this->v_prp.template deviceToHost<prp>();}
2139 else
2140 {this->v_prp_out.template deviceToHost<prp>();}
2141
2142 auto it = this->getDomainIterator();
2143
2144 while(it.isNext())
2145 {
2146 auto p = it.get();
2147
2148 if (print_sorted == false)
2149 {std::cout << v_prp_out.template get<prp>(p.getKey()) << " " << std::endl;}
2150 else
2151 {std::cout << v_prp_out.template get<prp>(p.getKey()) << " " << std::endl;}
2152
2153 ++it;
2154 }
2155 }
2156
2157 /*! \brief Merge the properties calculated on the sorted vector on the original vector
2158 *
2159 * \parameter Cell-list from which has been constructed the sorted vector
2160 *
2161 */
2162 template<unsigned int ... prp> void merge_sort_with_pos(CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>> & cl, size_t n_thr = 1024)
2163 {
2164#if defined(__NVCC__)
2165
2166 auto ite = v_pos.getGPUIteratorTo(g_m-1,n_thr);
2167
2168 CUDA_LAUNCH((merge_sort_part<true,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(cl.getNonSortedToSorted().toKernel()),prp...>),
2169 ite,
2170 v_pos.toKernel(),v_prp.toKernel(),v_pos_out.toKernel(),v_prp_out.toKernel(),cl.getNonSortedToSorted().toKernel());
2171
2172#endif
2173 }
2174
2175#endif
2176
2177#ifdef CUDA_GPU
2178
2179 /*! \brief Get an iterator that traverse the particles in the domain
2180 *
2181 * \return an iterator
2182 *
2183 */
2184 auto getDomainIteratorDevice(size_t n_thr = 1024) const -> decltype(this->getDomainIteratorGPU(n_thr))
2185 {
2186 return this->getDomainIteratorGPU(n_thr);
2187 }
2188
2189
2190#else
2191
2192 /*! \brief Get an iterator that traverse the particles in the domain
2193 *
2194 * \return an iterator
2195 *
2196 */
2197 auto getDomainIteratorDevice(size_t n_thr = 1024) const -> decltype(this->getDomainIterator())
2198 {
2199 return this->getDomainIterator();
2200 }
2201
2202
2203#endif
2204
2205 /*! \brief Get an iterator that traverse the particles in the domain
2206 *
2207 * \return an iterator
2208 *
2209 */
2210 vector_dist_iterator getDomainIterator_no_se3() const
2211 {
2212 return vector_dist_iterator(0, g_m);
2213 }
2214
2215 /*! \brief Get an iterator that traverse the particles in the domain
2216 *
2217 * \return an iterator
2218 *
2219 */
2220 vector_dist_iterator getDomainAndGhostIterator() const
2221 {
2222#ifdef SE_CLASS3
2223 se3.getIterator();
2224#endif
2225
2226 return vector_dist_iterator(0, v_pos.size());
2227 }
2228
2229 /*! \brief Get an iterator that traverse the particles in the domain
2230 *
2231 * \return an iterator
2232 *
2233 */
2234 vector_dist_iterator getDomainAndGhostIterator_no_se3() const
2235 {
2236 return vector_dist_iterator(0, v_pos.size());
2237 }
2238
2239 /*! \brief Get the decomposition
2240 *
2241 * \return
2242 *
2243 */
2244 inline Decomposition & getDecomposition()
2245 {
2246 return vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>::getDecomposition();
2247 }
2248
2249 /*! \brief Get the decomposition
2250 *
2251 * \return
2252 *
2253 */
2254 inline const Decomposition & getDecomposition() const
2255 {
2256 return vector_dist_comm<dim,St,prop,Decomposition,Memory,layout_base>::getDecomposition();
2257 }
2258
2259 /*! \brief It move all the particles that does not belong to the local processor to the respective processor
2260 *
2261 * \tparam out of bound policy it specify what to do when the particles are detected out of bound
2262 *
2263 * In general this function is called after moving the particles to move the
2264 * elements out the local processor. Or just after initialization if each processor
2265 * contain non local particles
2266 *
2267 * \tparam prp properties to communicate
2268 *
2269 * \param opt options
2270 *
2271 */
2272 template<unsigned int ... prp> void map_list(size_t opt = NONE)
2273 {
2274#ifdef SE_CLASS3
2275 se3.map_pre();
2276#endif
2277
2278 this->template map_list_<prp...>(v_pos,v_prp,g_m,opt);
2279
2280#ifdef CUDA_GPU
2281 this->update(this->toKernel());
2282#endif
2283
2284#ifdef SE_CLASS3
2285 se3.map_post();
2286#endif
2287 }
2288
2289
2290 /*! \brief It move all the particles that does not belong to the local processor to the respective processor
2291 *
2292 * \tparam out of bound policy it specify what to do when the particles are detected out of bound
2293 *
2294 * In general this function is called after moving the particles to move the
2295 * elements out the local processor. Or just after initialization if each processor
2296 * contain non local particles
2297 *
2298 * \param opt options
2299 *
2300 */
2301 template<typename obp = KillParticle> void map(size_t opt = NONE)
2302 {
2303#ifdef SE_CLASS3
2304 se3.map_pre();
2305#endif
2306
2307 this->template map_<obp>(v_pos,v_prp,g_m,opt);
2308
2309#ifdef CUDA_GPU
2310 this->update(this->toKernel());
2311#endif
2312
2313#ifdef SE_CLASS3
2314 se3.map_post();
2315#endif
2316 }
2317
2318 /*! \brief It synchronize the properties and position of the ghost particles
2319 *
2320 * \tparam prp list of properties to get synchronize
2321 *
2322 * \param opt options WITH_POSITION, it send also the positional information of the particles
2323 *
2324 */
2325 template<int ... prp> inline void ghost_get(size_t opt = WITH_POSITION)
2326 {
2327#ifdef SE_CLASS1
2328 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
2329
2330 if (getDecomposition().getProcessorBounds().isValid() == false && size_local() != 0)
2331 {
2332 std::cerr << __FILE__ << ":" << __LINE__ << " Error the processor " << v_cl.getProcessUnitID() << " has particles, but is supposed to be unloaded" << std::endl;
2333 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
2334 }
2335#endif
2336
2337#ifdef SE_CLASS3
2338 se3.template ghost_get_pre<prp...>(opt);
2339#endif
2340
2341 this->template ghost_get_<GHOST_SYNC,prp...>(v_pos,v_prp,g_m,opt);
2342
2343#ifdef CUDA_GPU
2344 this->update(this->toKernel());
2345#endif
2346
2347#ifdef SE_CLASS3
2348
2349 this->template ghost_get_<prop::max_prop_real>(v_pos,v_prp,g_m,opt | KEEP_PROPERTIES);
2350
2351 se3.template ghost_get_post<prp...>(opt);
2352#endif
2353 }
2354
2355
2356 /*! \brief It synchronize the properties and position of the ghost particles
2357 *
2358 * \tparam prp list of properties to get synchronize
2359 *
2360 * \param opt options WITH_POSITION, it send also the positional information of the particles
2361 *
2362 */
2363 template<int ... prp> inline void Ighost_get(size_t opt = WITH_POSITION)
2364 {
2365#ifdef SE_CLASS1
2366 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
2367
2368 if (getDecomposition().getProcessorBounds().isValid() == false && size_local() != 0)
2369 {
2370 std::cerr << __FILE__ << ":" << __LINE__ << " Error the processor " << v_cl.getProcessUnitID() << " has particles, but is supposed to be unloaded" << std::endl;
2371 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
2372 }
2373#endif
2374
2375#ifdef SE_CLASS3
2376 se3.template ghost_get_pre<prp...>(opt);
2377#endif
2378
2379 this->template ghost_get_<GHOST_ASYNC,prp...>(v_pos,v_prp,g_m,opt);
2380 }
2381
2382 /*! \brief It synchronize the properties and position of the ghost particles
2383 *
2384 * \tparam prp list of properties to get synchronize
2385 *
2386 * \param opt options WITH_POSITION, it send also the positional information of the particles
2387 *
2388 */
2389 template<int ... prp> inline void ghost_wait(size_t opt = WITH_POSITION)
2390 {
2391#ifdef SE_CLASS1
2392 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
2393
2394 if (getDecomposition().getProcessorBounds().isValid() == false && size_local() != 0)
2395 {
2396 std::cerr << __FILE__ << ":" << __LINE__ << " Error the processor " << v_cl.getProcessUnitID() << " has particles, but is supposed to be unloaded" << std::endl;
2397 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
2398 }
2399#endif
2400
2401 this->template ghost_wait_<prp...>(v_pos,v_prp,g_m,opt);
2402
2403#ifdef CUDA_GPU
2404 this->update(this->toKernel());
2405#endif
2406
2407#ifdef SE_CLASS3
2408
2409 this->template ghost_get_<prop::max_prop_real>(v_pos,v_prp,g_m,opt | KEEP_PROPERTIES);
2410
2411 se3.template ghost_get_post<prp...>(opt);
2412#endif
2413 }
2414
2415 /*! \brief It synchronize the properties and position of the ghost particles
2416 *
2417 * \tparam op which kind of operation to apply
2418 * \tparam prp list of properties to get synchronize
2419 *
2420 * \param opt_ options. It is an optional parameter.
2421 * If set to NO_CHANGE_ELEMENTS the communication has lower latencies. This option has some usage limitations, so please refere to the examples
2422 * for further explanations
2423 *
2424 *
2425 */
2426 template<template<typename,typename> class op, int ... prp> inline void ghost_put(size_t opt_ = NONE)
2427 {
2428#ifdef SE_CLASS3
2429 se3.template ghost_put<prp...>();
2430#endif
2431 this->template ghost_put_<op,prp...>(v_pos,v_prp,g_m,opt_);
2432 }
2433
2434 /*! \brief Remove a set of elements from the distributed vector
2435 *
2436 * \warning keys must be sorted
2437 *
2438 * \param keys vector of elements to eliminate
2439 * \param start from where to eliminate
2440 *
2441 */
2442 void remove(openfpm::vector<size_t> & keys, size_t start = 0)
2443 {
2444 v_pos.remove(keys, start);
2445 v_prp.remove(keys, start);
2446
2447 g_m -= keys.size();
2448 }
2449
2450 /*! \brief Remove one element from the distributed vector
2451 *
2452 * \param key remove one element from the vector
2453 *
2454 */
2455 void remove(size_t key)
2456 {
2457 v_pos.remove(key);
2458 v_prp.remove(key);
2459
2460 g_m--;
2461 }
2462
2463 /*! \brief Add the computation cost on the decomposition coming
2464 * from the particles
2465 *
2466 * \param md Model to use
2467 * \param vd external vector to add for the computational cost
2468 *
2469 */
2470 template <typename Model=ModelLin>inline void addComputationCosts(const self & vd, Model md=Model())
2471 {
2472 CellDecomposer_sm<dim, St, shift<dim,St>> cdsm;
2473
2474 Decomposition & dec = getDecomposition();
2475
2476 cdsm.setDimensions(dec.getDomain(), dec.getDistGrid().getSize(), 0);
2477
2478 auto it = vd.getDomainIterator();
2479
2480 while (it.isNext())
2481 {
2482 Point<dim,St> p = vd.getPos(it.get());
2483 size_t v = cdsm.getCell(p);
2484
2485 md.addComputation(dec,vd,v,it.get().getKey());
2486
2487 ++it;
2488 }
2489 }
2490
2491 /*! \brief Add the computation cost on the decomposition coming
2492 * from the particles
2493 *
2494 * \param md Model to use
2495 * \param ts It is an optional parameter approximately should be the number of ghost get between two
2496 * rebalancing at first decomposition this number can be ignored (default = 1) because not used
2497 *
2498 */
2499 template <typename Model=ModelLin> void finalizeComputationCosts(Model md=Model(), size_t ts = 1)
2500 {
2501 Decomposition & dec = getDecomposition();
2502 auto & dist = getDecomposition().getDistribution();
2503
2504 dec.computeCommunicationAndMigrationCosts(ts);
2505
2506 // Go throught all the sub-sub-domains and apply the model
2507
2508 for (size_t i = 0 ; i < dist.getNOwnerSubSubDomains(); i++)
2509 {md.applyModel(dec,dist.getOwnerSubSubDomain(i));}
2510
2511 dist.setDistTol(md.distributionTol());
2512 }
2513
2514 /*! \brief Initialize the computational cost
2515 *
2516 */
2517 void initializeComputationCosts()
2518 {
2519 Decomposition & dec = getDecomposition();
2520 auto & dist = getDecomposition().getDistribution();
2521
2522 for (size_t i = 0; i < dist.getNOwnerSubSubDomains() ; i++)
2523 {dec.setSubSubDomainComputationCost(dist.getOwnerSubSubDomain(i) , 1);}
2524 }
2525
2526 /*! \brief Add the computation cost on the decomposition coming
2527 * from the particles
2528 *
2529 * \param md Model to use
2530 * \param ts It is an optional parameter approximately should be the number of ghost get between two
2531 * rebalancing at first decomposition this number can be ignored (default = 1) because not used
2532 *
2533 */
2534 template <typename Model=ModelLin>inline void addComputationCosts(Model md=Model(), size_t ts = 1)
2535 {
2536 initializeComputationCosts();
2537
2538 addComputationCosts(*this,md);
2539
2540 finalizeComputationCosts(md,ts);
2541 }
2542
2543 /*! \brief Save the distributed vector on HDF5 file
2544 *
2545 * \param filename file where to save
2546 *
2547 */
2548 inline void save(const std::string & filename) const
2549 {
2550 HDF5_writer<VECTOR_DIST> h5s;
2551
2552 h5s.save(filename,v_pos,v_prp);
2553 }
2554
2555 /*! \brief Load the distributed vector from an HDF5 file
2556 *
2557 * \param filename file from where to load
2558 *
2559 */
2560 inline void load(const std::string & filename)
2561 {
2562 HDF5_reader<VECTOR_DIST> h5l;
2563
2564 h5l.load(filename,v_pos,v_prp,g_m);
2565 }
2566
2567 /*! \brief Reserve space for the internal vectors
2568 *
2569 * \param
2570 *
2571 */
2572 void setCapacity(unsigned int ns)
2573 {
2574 v_pos.reserve(ns);
2575 v_prp.reserve(ns);
2576 }
2577
2578 /*! \brief Output particle position and properties
2579 *
2580 * \param out output filename
2581 * \param opt VTK_WRITER, CSV_WRITER, it is also possible to choose the format for VTK
2582 * FORMAT_BINARY. (the default is ASCII format)
2583 *
2584 * \return true if the file has been written without error
2585 *
2586 */
2587 inline bool write(std::string out ,int opt = VTK_WRITER)
2588 {
2589 return write(out,"",opt);
2590 }
2591
2592 /*! \brief Output particle position and properties
2593 *
2594 * \param out output filename
2595 * \param meta_info meta information example ("time = 1.234" add the information time to the VTK file)
2596 * \param opt VTK_WRITER, CSV_WRITER, it is also possible to choose the format for VTK
2597 * FORMAT_BINARY. (the default is ASCII format)
2598 *
2599 * \return true if the file has been written without error
2600 *
2601 */
2602 inline bool write(std::string out, std::string meta_info ,int opt = VTK_WRITER)
2603 {
2604 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
2605
2606 if ((opt & 0x0FFF0000) == CSV_WRITER)
2607 {
2608 // CSVWriter test
2609 CSVWriter<openfpm::vector<Point<dim, St>,Memory,layout_base>,
2610 openfpm::vector<prop,Memory,layout_base> > csv_writer;
2611
2612 std::string output = std::to_string(out + "_" + std::to_string(v_cl.getProcessUnitID()) + std::to_string(".csv"));
2613
2614 // Write the CSV
2615 return csv_writer.write(output,v_pos,v_prp);
2616 }
2617 else
2618 {
2619 file_type ft = file_type::ASCII;
2620
2621 if (opt & FORMAT_BINARY)
2622 ft = file_type::BINARY;
2623
2624 // VTKWriter for a set of points
2625 VTKWriter<boost::mpl::pair<openfpm::vector<Point<dim, St>,Memory,layout_base>,
2626 openfpm::vector<prop,Memory,layout_base>>,
2627 VECTOR_POINTS> vtk_writer;
2628 vtk_writer.add(v_pos,v_prp,g_m);
2629
2630 std::string output = std::to_string(out + "_" + std::to_string(v_cl.getProcessUnitID()) + std::to_string(".vtk"));
2631
2632 // Write the VTK file
2633 return vtk_writer.write(output,prp_names,"particles",meta_info,ft);
2634 }
2635 }
2636
2637 /*! \brief Delete the particles on the ghost
2638 *
2639 *
2640 */
2641 void deleteGhost()
2642 {
2643 v_pos.resize(g_m);
2644 v_prp.resize(g_m);
2645 }
2646
2647 /*! \brief Resize the vector (locally)
2648 *
2649 * \warning It automatically delete the ghosts
2650 *
2651 * \param rs
2652 *
2653 */
2654 void resize(size_t rs)
2655 {
2656 deleteGhost();
2657
2658 v_pos.resize(rs);
2659 v_prp.resize(rs);
2660
2661 g_m = rs;
2662
2663#ifdef CUDA_GPU
2664 this->update(this->toKernel());
2665#endif
2666 }
2667
2668 /*! \brief Output particle position and properties
2669 *
2670 * \param out output
2671 * \param iteration (we can append the number at the end of the file_name)
2672 * \param meta_info meta information example ("time = 1.234" add the information time to the VTK file)
2673 * \param opt VTK_WRITER, CSV_WRITER, it is also possible to choose the format for VTK
2674 * FORMAT_BINARY. (the default is ASCII format)
2675 *
2676 * \return if the file has been written correctly
2677 *
2678 */
2679 inline bool write_frame(std::string out, size_t iteration, int opt = VTK_WRITER)
2680 {
2681 return write_frame(out,iteration,"",opt);
2682 }
2683
2684 /*! \brief Output particle position and properties
2685 *
2686 * \param out output
2687 * \param iteration (we can append the number at the end of the file_name)
2688 * \param meta_info meta information example ("time = 1.234" add the information time to the VTK file)
2689 * \param opt VTK_WRITER, CSV_WRITER, it is also possible to choose the format for VTK
2690 * FORMAT_BINARY. (the default is ASCII format)
2691 *
2692 * \return if the file has been written correctly
2693 *
2694 */
2695 inline bool write_frame(std::string out, size_t iteration, std::string meta_info, int opt = VTK_WRITER)
2696 {
2697 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
2698
2699 if ((opt & 0x0FFF0000) == CSV_WRITER)
2700 {
2701 // CSVWriter test
2702 CSVWriter<openfpm::vector<Point<dim, St>,Memory,layout_base>,
2703 openfpm::vector<prop,Memory,layout_base> > csv_writer;
2704
2705 std::string output = std::to_string(out + "_" + std::to_string(v_cl.getProcessUnitID()) + "_" + std::to_string(iteration) + std::to_string(".csv"));
2706
2707 // Write the CSV
2708 return csv_writer.write(output, v_pos, v_prp);
2709 }
2710 else
2711 {
2712 file_type ft = file_type::ASCII;
2713
2714 if (opt & FORMAT_BINARY)
2715 ft = file_type::BINARY;
2716
2717 // VTKWriter for a set of points
2718 VTKWriter<boost::mpl::pair<openfpm::vector<Point<dim, St>,Memory,layout_base>,
2719 openfpm::vector<prop,Memory,layout_base>>, VECTOR_POINTS> vtk_writer;
2720 vtk_writer.add(v_pos,v_prp,g_m);
2721
2722 std::string output = std::to_string(out + "_" + std::to_string(v_cl.getProcessUnitID()) + "_" + std::to_string(iteration) + std::to_string(".vtk"));
2723
2724 // Write the VTK file
2725 return vtk_writer.write(output,prp_names,"particles",meta_info,ft);
2726 }
2727 }
2728
2729 /*! \brief Get the Celllist parameters
2730 *
2731 * \param r_cut spacing of the cell-list
2732 * \param div division required for the cell-list
2733 * \param box where the Cell list must be defined (In general Processor domain + Ghost)
2734 * \param enlarge Optionally a request to make the space a littler bit larger than Processor domain + Ghost
2735 * keeping the cell list consistent with the requests
2736 *
2737 */
2738 void getCellListParams(St r_cut, size_t (&div)[dim],Box<dim, St> & box, Ghost<dim,St> enlarge = Ghost<dim,St>(0.0))
2739 {
2740 // get the processor bounding box
2741 Box<dim, St> pbox = getDecomposition().getProcessorBounds();
2742
2743 // enlarge the processor bounding box by the ghost
2744 Ghost<dim,St> g = getDecomposition().getGhost();
2745 pbox.enlarge(g);
2746
2747 cl_param_calculate(pbox, div,r_cut,enlarge);
2748
2749 // output the fixed domain
2750 box = pbox;
2751 }
2752
2753 /*! \brief It return the id of structure in the allocation list
2754 *
2755 * \see print_alloc and SE_CLASS2
2756 *
2757 * \return the id
2758 *
2759 */
2760 long int who()
2761 {
2762#ifdef SE_CLASS2
2763 return check_whoami(this,8);
2764#else
2765 return -1;
2766#endif
2767 }
2768
2769 /*! \brief Get the Virtual Cluster machine
2770 *
2771 * \return the Virtual cluster machine
2772 *
2773 */
2774
2775 Vcluster<Memory> & getVC()
2776 {
2777#ifdef SE_CLASS2
2778 check_valid(this,8);
2779#endif
2780 return create_vcluster<Memory>();;
2781 }
2782
2783 /*! \brief return the position vector of all the particles
2784 *
2785 * \return the particle position vector
2786 *
2787 */
2788 const openfpm::vector<Point<dim, St>,Memory,layout_base> & getPosVector() const
2789 {
2790 return v_pos;
2791 }
2792
2793 /*! \brief return the position vector of all the particles
2794 *
2795 * \return the particle position vector
2796 *
2797 */
2798 openfpm::vector<Point<dim, St>,Memory,layout_base> & getPosVector()
2799 {
2800 return v_pos;
2801 }
2802
2803 /*! \brief return the property vector of all the particles
2804 *
2805 * \return the particle property vector
2806 *
2807 */
2808 const openfpm::vector<prop,Memory,layout_base> & getPropVector() const
2809 {
2810 return v_prp;
2811 }
2812
2813 /*! \brief return the property vector of all the particles
2814 *
2815 * \return the particle property vector
2816 *
2817 */
2818 openfpm::vector<prop,Memory,layout_base> & getPropVector()
2819 {
2820 return v_prp;
2821 }
2822
2823 /*! \brief return the position vector of all the particles
2824 *
2825 * \return the particle position vector
2826 *
2827 */
2828 const openfpm::vector<Point<dim, St>,Memory,layout_base> & getPosVectorSort() const
2829 {
2830 return v_pos_out;
2831 }
2832
2833 /*! \brief return the position vector of all the particles
2834 *
2835 * \return the particle position vector
2836 *
2837 */
2838 openfpm::vector<Point<dim, St>,Memory,layout_base> & getPosVectorSort()
2839 {
2840 return v_pos_out;
2841 }
2842
2843 /*! \brief return the property vector of all the particles
2844 *
2845 * \return the particle property vector
2846 *
2847 */
2848 const openfpm::vector<prop,Memory,layout_base> & getPropVectorSort() const
2849 {
2850 return v_prp_out;
2851 }
2852
2853 /*! \brief return the property vector of all the particles
2854 *
2855 * \return the particle property vector
2856 *
2857 */
2858 openfpm::vector<prop,Memory,layout_base> & getPropVectorSort()
2859 {
2860 return v_prp_out;
2861 }
2862
2863 /*! \brief It return the sum of the particles in the previous processors
2864 *
2865 * \return the particles number
2866 *
2867 */
2868 size_t accum()
2869 {
2870 Vcluster<Memory> & v_cl = create_vcluster<Memory>();
2871
2872 openfpm::vector<size_t> accu;
2873
2874 size_t sz = size_local();
2875
2876 v_cl.allGather(sz,accu);
2877 v_cl.execute();
2878
2879 sz = 0;
2880
2881 for (size_t i = 0 ; i < v_cl.getProcessUnitID() ; i++)
2882 {sz += accu.get(i);}
2883
2884 return sz;
2885 }
2886
2887 /*! \brief Get a special particle iterator able to iterate across particles using
2888 * symmetric crossing scheme
2889 *
2890 * \param NN Cell-List neighborhood
2891 *
2892 * \return Particle iterator
2893 *
2894 */
2895 template<typename cli> ParticleItCRS_Cells<dim,cli,decltype(v_pos)> getParticleIteratorCRS_Cell(cli & NN)
2896 {
2897#ifdef SE_CLASS3
2898 se3.getIterator();
2899#endif
2900
2901#ifdef SE_CLASS1
2902 if (!(opt & BIND_DEC_TO_GHOST))
2903 {
2904 std::cerr << __FILE__ << ":" << __LINE__ << " Error the vector has been constructed without BIND_DEC_TO_GHOST, getParticleIteratorCRS_Cell require the vector to be constructed with BIND_DEC_TO_GHOST option " << std::endl;
2905 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
2906 }
2907#endif
2908
2909 // Shift
2910 grid_key_dx<dim> shift;
2911
2912 // Add padding
2913 for (size_t i = 0 ; i < dim ; i++)
2914 shift.set_d(i,NN.getPadding(i));
2915
2916 grid_sm<dim,void> gs = NN.getInternalGrid();
2917
2918 getDecomposition().setNNParameters(shift,gs);
2919
2920 // First we check that
2921 return ParticleItCRS_Cells<dim,cli,decltype(v_pos)>(NN,getDecomposition().getCRSDomainCells(),
2922 getDecomposition().getCRSAnomDomainCells(),
2923 NN.getNNc_sym());
2924 }
2925
2926 /*! \brief Set the properties names
2927 *
2928 * It is useful to specify name for the properties in vtk writers
2929 *
2930 * \param names set of properties names
2931 *
2932 */
2933 void setPropNames(const openfpm::vector<std::string> & names)
2934 {
2935 prp_names = names;
2936 }
2937
2938 /*! \brief Get a special particle iterator able to iterate across particles using
2939 * symmetric crossing scheme
2940 *
2941 * \param NN Verlet list neighborhood
2942 *
2943 * \return Particle iterator
2944 *
2945 */
2946 template<typename vrl> openfpm::vector_key_iterator_seq<typename vrl::Mem_type_type::local_index_type> getParticleIteratorCRS(vrl & NN)
2947 {
2948#ifdef SE_CLASS1
2949 if (!(opt & BIND_DEC_TO_GHOST))
2950 {
2951 std::cerr << __FILE__ << ":" << __LINE__ << " Error the vector has been constructed without BIND_DEC_TO_GHOST, getParticleIteratorCRS_Cell require the vector to be constructed with BIND_DEC_TO_GHOST option " << std::endl;
2952 ACTION_ON_ERROR(VECTOR_DIST_ERROR_OBJECT);
2953 }
2954#endif
2955
2956 // First we check that
2957 return openfpm::vector_key_iterator_seq<typename vrl::Mem_type_type::local_index_type>(NN.getParticleSeq());
2958 }
2959
2960 /*! \brief Return from which cell we have to start in case of CRS interation
2961 * scheme
2962 *
2963 * \param NN cell-list
2964 *
2965 * \return The starting cell point
2966 *
2967 */
2968 template<typename Celllist> grid_key_dx<dim> getCRSStart(Celllist & NN)
2969 {
2970 return NN.getStartDomainCell();
2971 }
2972
2973 /*! \brief Return from which cell we have to stop in case of CRS interation
2974 * scheme
2975 *
2976 * \param NN cell-list
2977 *
2978 * \return The stop cell point
2979 *
2980 */
2981 template<typename Celllist> grid_key_dx<dim> getCRSStop(Celllist & NN)
2982 {
2983 grid_key_dx<dim> key = NN.getStopDomainCell();
2984
2985 for (size_t i = 0 ; i < dim ; i++)
2986 key.set_d(i,key.get(i) + 1);
2987 return key;
2988 }
2989
2990
2991#ifdef CUDA_GPU
2992
2993 /*! \brief Convert the grid into a data-structure compatible for computing into GPU
2994 *
2995 * The object created can be considered like a reference of the original
2996 *
2997 * \return an usable vector in the kernel
2998 *
2999 */
3000 template<unsigned int ... prp> vector_dist_ker<dim,St,prop,layout_base> toKernel()
3001 {
3002 vector_dist_ker<dim,St,prop,layout_base> v(g_m,v_pos.toKernel(), v_prp.toKernel());
3003
3004 return v;
3005 }
3006
3007 /*! \brief Return the internal vector_dist_ker_list structure
3008 *
3009 *
3010 *
3011 * \return
3012 */
3013 vector_dist_ker_list<vector_dist_ker<dim,St,prop,layout_base>> & private_get_vector_dist_ker_list()
3014 {
3015 return *this;
3016 }
3017
3018 /*! \brief Convert the grid into a data-structure compatible for computing into GPU
3019 *
3020 * In comparison with toGPU return a version sorted better for coalesced memory
3021 *
3022 * \return an usable vector in the kernel
3023 *
3024 */
3025 template<unsigned int ... prp> vector_dist_ker<dim,St,prop,layout_base> toKernel_sorted()
3026 {
3027 vector_dist_ker<dim,St,prop,layout_base> v(g_m,v_pos_out.toKernel(), v_prp_out.toKernel());
3028
3029 return v;
3030 }
3031
3032 /*! \brief Move the memory from the device to host memory
3033 *
3034 * \tparam property to move use POS_PROP for position property
3035 *
3036 */
3037 template<unsigned int ... prp> void deviceToHostProp()
3038 {
3039 v_prp.template deviceToHost<prp ...>();
3040 }
3041
3042 /*! \brief Move the memory from the device to host memory
3043 *
3044 * \tparam property to move use POS_PROP for position property
3045 *
3046 * \param start point
3047 * \param stop point (included)
3048 *
3049 */
3050 template<unsigned int ... prp> void deviceToHostProp(size_t start, size_t stop)
3051 {
3052 v_prp.template deviceToHost<prp ...>(start,stop);
3053 }
3054
3055 /*! \brief Move the memory from the device to host memory
3056 *
3057 * \tparam property to move use POS_PROP for position property
3058 *
3059 */
3060 void deviceToHostPos()
3061 {
3062 v_pos.template deviceToHost<0>();
3063 }
3064
3065 /*! \brief Move the memory from the device to host memory
3066 *
3067 * \tparam property to move use POS_PROP for position property
3068 *
3069 */
3070 template<unsigned int ... prp> void hostToDeviceProp()
3071 {
3072 v_prp.template hostToDevice<prp ...>();
3073 }
3074
3075 /*! \brief Move the memory from the device to host memory
3076 *
3077 * \tparam property to move use POS_PROP for position property
3078 *
3079 */
3080 void hostToDevicePos()
3081 {
3082 v_pos.template hostToDevice<0>();
3083 }
3084
3085 void set_g_m(size_t g_m)
3086 {
3087 this->g_m = g_m;
3088 }
3089
3090 /*! \brief this function sort the vector
3091 *
3092 * \warning this function kill the ghost (and invalidate the Cell-list)
3093 *
3094 * \param NN Cell-list to use to reorder
3095 *
3096 */
3097 template<typename CellList_type>
3098 void make_sort(CellList_type & NN)
3099 {
3100 deleteGhost();
3101
3102 updateCellList(NN,false,cl_construct_opt::Only_reorder);
3103
3104 // construct a cell-list forcing to create a sorted version without ghost
3105
3106 // swap the sorted with the non-sorted
3107 v_pos.swap(v_pos_out);
3108 v_prp.swap(v_prp_out);
3109 }
3110
3111 /*! \brief this function sort the vector
3112 *
3113 * \note this function does not kill the ghost and does not invalidate the Cell-list)
3114 *
3115 * \param NN Cell-list to use to reorder
3116 *
3117 */
3118 template<typename CellList_type>
3119 void make_sort_from(CellList_type & cl)
3120 {
3121#if defined(__NVCC__)
3122
3123 auto ite = v_pos.getGPUIteratorTo(g_m-1);
3124
3125 CUDA_LAUNCH((merge_sort_all<decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(cl.getNonSortToSort().toKernel())>),
3126 ite,
3127 v_pos_out.toKernel(),v_prp_out.toKernel(),v_pos.toKernel(),v_prp.toKernel(),cl.getNonSortToSort().toKernel());
3128
3129 v_pos.swap(v_pos_out);
3130 v_prp.swap(v_prp_out);
3131
3132#endif
3133 }
3134
3135 /*! \brief This function compare if the host and device buffer position match up to some tolerance
3136 *
3137 * \tparam prp property to check
3138 *
3139 * \param tol tollerance absolute
3140 *
3141 */
3142 bool compareHostAndDevicePos(St tol, St near = -1.0, bool silent = false)
3143 {
3144 return compare_host_device<Point<dim,St>,0>::compare(v_pos,tol,near,silent);
3145 }
3146
3147
3148 /*! \brief This function compare if the host and device buffer position match up to some tolerance
3149 *
3150 * \tparam prp property to check
3151 *
3152 * \param tol tollerance absolute
3153 *
3154 */
3155 template<unsigned int prp>
3156 bool compareHostAndDeviceProp(St tol, St near = -1.0, bool silent = false)
3157 {
3158 return compare_host_device<typename boost::mpl::at<typename prop::type,
3159 boost::mpl::int_<prp> >::type,prp>::compare(v_prp,tol,near,silent);
3160 }
3161
3162#else
3163
3164 /*! \brief Move the memory from the device to host memory
3165 *
3166 * \tparam property to move use POS_PROP for position property
3167 *
3168 */
3169 template<unsigned int ... prp> void deviceToHostProp()
3170 {}
3171
3172 /*! \brief Move the memory from the device to host memory
3173 *
3174 * \tparam property to move use POS_PROP for position property
3175 *
3176 */
3177 void deviceToHostPos()
3178 {}
3179
3180 /*! \brief Move the memory from the device to host memory
3181 *
3182 * \tparam property to move use POS_PROP for position property
3183 *
3184 */
3185 template<unsigned int ... prp> void hostToDeviceProp()
3186 {}
3187
3188 /*! \brief Move the memory from the device to host memory
3189 *
3190 * \tparam property to move use POS_PROP for position property
3191 *
3192 */
3193 void hostToDevicePos()
3194 {}
3195
3196#endif
3197
3198
3199#ifdef SE_CLASS3
3200
3201 se_class3_vector<prop::max_prop,dim,St,Decomposition,self> & get_se_class3()
3202 {
3203 return se3;
3204 }
3205
3206#endif
3207};
3208
3209
3210template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St,CudaMemory,memory_traits_inte>> using vector_dist_gpu = vector_dist<dim,St,prop,Decomposition,CudaMemory,memory_traits_inte>;
3211template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St,HeapMemory,memory_traits_inte>> using vector_dist_soa = vector_dist<dim,St,prop,Decomposition,HeapMemory,memory_traits_inte>;
3212template<unsigned int dim, typename St, typename prop, typename Decomposition = CartDecomposition<dim,St,CudaMemory,memory_traits_inte>> using vector_dist_dev = vector_dist<dim,St,prop,Decomposition,CudaMemory,memory_traits_inte>;
3213
3214#endif /* VECTOR_HPP_ */
3215