1/*
2 * CartDecomposition.hpp
3 *
4 * Created on: Oct 07, 2015
5 * Author: Pietro Incardona, Antonio Leo
6 */
7
8#ifndef CARTDECOMPOSITION_HPP
9#define CARTDECOMPOSITION_HPP
10
11#include "config.h"
12#include <cmath>
13#include "VCluster/VCluster.hpp"
14#include "Graph/CartesianGraphFactory.hpp"
15#include "Decomposition.hpp"
16#include "Vector/map_vector.hpp"
17#include <vector>
18#include <initializer_list>
19#include "SubdomainGraphNodes.hpp"
20#include "dec_optimizer.hpp"
21#include "Space/Shape/Box.hpp"
22#include "Space/Shape/Point.hpp"
23#include "NN/CellList/CellDecomposer.hpp"
24#include <unordered_map>
25#include "NN/CellList/CellList.hpp"
26#include "Space/Ghost.hpp"
27#include "common.hpp"
28#include "ie_loc_ghost.hpp"
29#include "ie_ghost.hpp"
30#include "nn_processor.hpp"
31#include "GraphMLWriter/GraphMLWriter.hpp"
32#include "Distribution/ParMetisDistribution.hpp"
33#include "Distribution/DistParMetisDistribution.hpp"
34#include "Distribution/MetisDistribution.hpp"
35#include "DLB/DLB.hpp"
36#include "util/se_util.hpp"
37#include "util/mathutil.hpp"
38#include "CartDecomposition_ext.hpp"
39#include "data_type/aggregate.hpp"
40#include "Domain_NN_calculator_cart.hpp"
41#include "cuda/CartDecomposition_gpu.cuh"
42#include "Domain_icells_cart.hpp"
43
44#define CARTDEC_ERROR 2000lu
45
46enum dec_options
47{
48 DEC_NONE = 0,
49 DEC_SKIP_ICELL = 1
50};
51
52/*! \brief It spread the sub-sub-domain on a regular cartesian grid of size dim
53 *
54 * \warning this function only guarantee that the division on each direction is
55 * 2^n with some n and does not guarantee that the number of
56 * sub-sub-domain is preserved
57 *
58 * \param div number of division on each direction as output
59 * \param n_sub number of sub-domain
60 * \param dim_r dimension reduction
61 *
62 */
63template<unsigned int dim> static void nsub_to_div2(size_t (& div)[dim], size_t n_sub, size_t dim_r)
64{
65 for (size_t i = 0; i < dim; i++)
66 {
67 if (i < dim_r)
68 {div[i] = openfpm::math::round_big_2(pow(n_sub, 1.0 / dim_r));}
69 else
70 {div[i] = 1;}
71 }
72}
73
74/*! \brief It spread the sub-sub-domain on a regular cartesian grid of size dim
75 *
76 * \warning this function only guarantee that the division on each direction is
77 * 2^n with some n and does not guarantee that the number of
78 * sub-sub-domain is preserved
79 *
80 * \param div number of division on each direction as output
81 * \param n_sub number of sub-domain
82 * \param dim_r dimension reduction
83 *
84 */
85template<unsigned int dim> static void nsub_to_div(size_t (& div)[dim], size_t n_sub, size_t dim_r)
86{
87 for (size_t i = 0; i < dim; i++)
88 {
89 if (i < dim_r)
90 {div[i] = std::floor(pow(n_sub, 1.0 / dim_r));}
91 else
92 {div[i] = 1;}
93 }
94}
95
96#define COMPUTE_SKIN_SUB 1
97
98/**
99 * \brief This class decompose a space into sub-sub-domains and distribute them across processors
100 *
101 * \tparam dim is the dimensionality of the physical domain we are going to decompose.
102 * \tparam T type of the space we decompose, Real, Integer, Complex ...
103 * \tparam Memory Memory factory used to allocate memory
104 * \tparam Distribution type of distribution, can be ParMetisDistribution or MetisDistribution
105 *
106 * Given an N-dimensional space, this class decompose the space into a Cartesian grid of small
107 * sub-sub-domain. To each sub-sub-domain is assigned an id that identify at which processor is
108 * assigned (in general the union of all the sub-sub-domain assigned to a processor is
109 * simply connected space), a second step merge several sub-sub-domain with same id into bigger region
110 * sub-domain. Each sub-domain has an extended space called ghost part
111 *
112 * Assuming that VCluster.getProcessUnitID(), equivalent to the MPI processor rank, return the processor local
113 * processor id, we define
114 *
115 * * local processor: processor rank
116 * * local sub-domain: sub-domain given to the local processor
117 * * external ghost box: (or ghost box) are the boxes that compose the ghost space of the processor, or the
118 * boxes produced expanding every local sub-domain by the ghost extension and intersecting with the sub-domain
119 * of the other processors
120 * * Near processors are the processors adjacent to the local processor, where with adjacent we mean all the processor
121 * that has a non-zero intersection with the ghost part of the local processor, or all the processors that
122 * produce non-zero external boxes with the local processor, or all the processor that should communicate
123 * in case of ghost data synchronization
124 * * internal ghost box: is the part of ghost of the near processor that intersect the space of the
125 * processor, or the boxes produced expanding the sub-domain of the near processors with the local sub-domain
126 * * Near processor sub-domain: is a sub-domain that live in the a near (or contiguous) processor
127 * * Near processor list: the list of all the near processor of the local processor (each processor has a list
128 * of the near processor)
129 * * Local ghosts internal or external are all the ghosts that does not involve inter-processor communications
130 *
131 * \see calculateGhostBoxes() for a visualization of internal and external ghost boxes
132 *
133 * ### Create a Cartesian decomposition object on a Box space, distribute, calculate internal and external ghost boxes
134 *
135 * \snippet CartDecomposition_unit_test.hpp Create CartDecomposition
136 *
137 */
138
139template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base, typename Distribution>
140class CartDecomposition: public ie_loc_ghost<dim, T,layout_base, Memory>,
141 public nn_prcs<dim, T,layout_base,Memory>,
142 public ie_ghost<dim,T,Memory,layout_base>,
143 public domain_nn_calculator_cart<dim>,
144 public domain_icell_calculator<dim,T,layout_base,Memory>
145{
146public:
147
148 //! Type of the domain we are going to decompose
149 typedef T domain_type;
150
151 //! It simplify to access the SpaceBox element
152 typedef SpaceBox<dim, T> Box;
153
154 //! This class is base of itself
155 typedef CartDecomposition<dim,T,Memory,layout_base,Distribution> base_type;
156
157 //! This class admit a class defined on an extended domain
158 typedef CartDecomposition_ext<dim,T,Memory,layout_base,Distribution> extended_type;
159
160protected:
161
162 //! bool that indicate whenever the buffer has been already transfer to device
163 bool host_dev_transfer = false;
164
165 //! Indicate the communication weight has been set
166 bool commCostSet = false;
167
168 //! This is the key type to access data_s, for example in the case of vector
169 //! acc_key is size_t
170 typedef typename openfpm::vector<SpaceBox<dim, T>,
171 Memory,
172 memory_traits_lin,
173 openfpm::vector_grow_policy_default,
174 openfpm::vect_isel<SpaceBox<dim, T>>::value>::access_key acc_key;
175
176 //! the set of all local sub-domain as vector
177 openfpm::vector<SpaceBox<dim, T>,Memory,layout_base> sub_domains;
178
179 //! the remote set of all sub-domains as vector of 'sub_domains' vectors
180 mutable openfpm::vector<Box_map<dim, T>,Memory,layout_base> sub_domains_global;
181
182 //! for each sub-domain, contain the list of the neighborhood processors
183 openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor;
184
185 //! Structure that contain for each sub-sub-domain box the processor id
186 //! exist for efficient global communication
187 CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> fine_s;
188
189 //! Structure that store the cartesian grid information
190 grid_sm<dim, void> gr;
191
192 //! Structure that store the cartesian grid information
193 grid_sm<dim, void> gr_dist;
194
195 //! Structure that decompose the space into cells without creating them
196 //! useful to convert positions to CellId or sub-domain id in this case
197 CellDecomposer_sm<dim, T, shift<dim,T>> cd;
198
199 //! rectangular domain to decompose
200 ::Box<dim,T> domain;
201
202 //! Box Spacing
203 T spacing[dim];
204
205 //! Magnification factor between distribution and
206 //! decomposition
207 size_t magn[dim];
208
209 //! Runtime virtual cluster machine
210 Vcluster<> & v_cl;
211
212 //! Create distribution
213 Distribution dist;
214
215 //! Processor bounding box
216 ::Box<dim,T> bbox;
217
218 //! reference counter of the object in case is shared between object
219 long int ref_cnt;
220
221 //! ghost info
222 Ghost<dim,T> ghost;
223
224 //! Boundary condition info
225 size_t bc[dim];
226
227 //! Processor domain bounding box
228 ::Box<dim,size_t> proc_box;
229
230 //! set of Boxes produced by the decomposition optimizer
231 openfpm::vector<::Box<dim, size_t>> loc_box;
232
233 /*! \brief It convert the box from the domain decomposition into sub-domain
234 *
235 * The decomposition box from the domain-decomposition contain the box in integer
236 * coordinates. This box is converted into a continuos box. It also adjust loc_box
237 * if the distribution grid and the decomposition grid are different.
238 *
239 * \param loc_box local box
240 *
241 * \return the corresponding sub-domain
242 *
243 */
244 template<typename Memory_bx> SpaceBox<dim,T> convertDecBoxIntoSubDomain(encapc<1,::Box<dim,size_t>,Memory_bx> loc_box)
245 {
246 // A point with all coordinate to one
247 size_t one[dim];
248 for (size_t i = 0 ; i < dim ; i++) {one[i] = 1;}
249
250 SpaceBox<dim, size_t> sub_dc = loc_box;
251 SpaceBox<dim, size_t> sub_dce = sub_dc;
252 sub_dce.expand(one);
253 sub_dce.mul(magn);
254
255 // shrink by one
256 for (size_t i = 0 ; i < dim ; i++)
257 {
258 loc_box.template get<Box::p1>()[i] = sub_dce.getLow(i);
259 loc_box.template get<Box::p2>()[i] = sub_dce.getHigh(i) - 1;
260 }
261
262 SpaceBox<dim, T> sub_d(sub_dce);
263 sub_d.mul(spacing);
264 sub_d += domain.getP1();
265
266 // we add the
267
268 // Fixing sub-domains to cover all the domain
269
270 // Fixing sub_d
271 // if (loc_box) is at the boundary we have to ensure that the box span the full
272 // domain (avoiding rounding off error)
273 for (size_t i = 0; i < dim; i++)
274 {
275 if (sub_dc.getHigh(i) == gr.size(i) - 1)
276 {sub_d.setHigh(i, domain.getHigh(i));}
277
278 if (sub_dc.getLow(i) == 0)
279 {sub_d.setLow(i,domain.getLow(i));}
280 }
281
282 return sub_d;
283 }
284
285 void collect_all_sub_domains(openfpm::vector<Box_map<dim,T>,Memory,layout_base> & sub_domains_global)
286 {
287#ifdef SE_CLASS2
288 check_valid(this,8);
289#endif
290
291 sub_domains_global.clear();
292 openfpm::vector<Box_map<dim,T>,Memory,layout_base> bm;
293
294 for (size_t i = 0 ; i < sub_domains.size() ; i++)
295 {
296 bm.add();
297
298 bm.template get<0>(bm.size()-1) = ::SpaceBox<dim,T>(sub_domains.get(i));
299 bm.template get<1>(bm.size()-1) = v_cl.rank();
300 }
301
302 v_cl.SGather<decltype(bm),decltype(sub_domains_global),layout_base>(bm,sub_domains_global,0);
303
304 size_t size = sub_domains_global.size();
305
306 v_cl.max(size);
307 v_cl.execute();
308
309 sub_domains_global.resize(size);
310
311 v_cl.Bcast(sub_domains_global,0);
312 v_cl.execute();
313 }
314
315public:
316
317 void initialize_fine_s(const ::Box<dim,T> & domain)
318 {
319 fine_s.clear();
320 size_t div_g[dim];
321
322 // We reduce the size of the cells by a factor 8 in 3d 4 in 2d
323 for (size_t i = 0 ; i < dim ; i++)
324 {div_g[i] = (gr.size(i) == 1)?1:gr.size(i)/2;}
325
326 fine_s.Initialize(domain,div_g);
327 }
328
329 void construct_fine_s()
330 {
331 collect_all_sub_domains(sub_domains_global);
332
333 // now draw all sub-domains in fine-s
334
335 for (size_t i = 0 ; i < sub_domains_global.size() ; i++)
336 {
337
338 // get the cells this box span
339 const grid_key_dx<dim> p1 = fine_s.getCellGrid_me(sub_domains_global.template get<0>(i).getP1());
340 const grid_key_dx<dim> p2 = fine_s.getCellGrid_pe(sub_domains_global.template get<0>(i).getP2());
341
342 // Get the grid and the sub-iterator
343 auto & gi = fine_s.getGrid();
344 grid_key_dx_iterator_sub<dim> g_sub(gi,p1,p2);
345
346 // add the box-id to the cell list
347 while (g_sub.isNext())
348 {
349 auto key = g_sub.get();
350 fine_s.addCell(gi.LinId(key),i);
351
352 ++g_sub;
353 }
354 }
355
356 host_dev_transfer = false;
357 }
358
359 /*! \brief Constructor, it decompose and distribute the sub-domains across the processors
360 *
361 * \param v_cl Virtual cluster, used internally for communications
362 * \param bc boundary conditions
363 * \param opt option (one option is to construct)
364 *
365 */
366 void createSubdomains(Vcluster<> & v_cl, const size_t (& bc)[dim], size_t opt = 0)
367 {
368 int p_id = v_cl.getProcessUnitID();
369
370 // Calculate the total number of box and and the spacing
371 // on each direction
372 // Get the box containing the domain
373 SpaceBox<dim, T> bs = domain.getBox();
374
375 for (unsigned int i = 0; i < dim; i++)
376 {
377 // Calculate the spacing
378 spacing[i] = (bs.getHigh(i) - bs.getLow(i)) / gr.size(i);
379 }
380
381 // fill the structure that store the processor id for each sub-domain
382 initialize_fine_s(domain);
383
384 // Optimize the decomposition creating bigger spaces
385 // And reducing Ghost over-stress
386 dec_optimizer<dim, Graph_CSR<nm_v<dim>, nm_e>> d_o(dist.getGraph(), gr_dist.getSize());
387
388 // Ghost
389 Ghost<dim,long int> ghe;
390
391 // Set the ghost
392 for (size_t i = 0 ; i < dim ; i++)
393 {
394 ghe.setLow(i,static_cast<long int>(ghost.getLow(i)/spacing[i]) - 1);
395 ghe.setHigh(i,static_cast<long int>(ghost.getHigh(i)/spacing[i]) + 1);
396 }
397
398 // optimize the decomposition or merge sub-sub-domain
399 d_o.template optimize<nm_v_sub_id, nm_v_proc_id>(dist.getGraph(), p_id, loc_box, box_nn_processor,ghe,bc);
400
401 // Initialize
402 if (loc_box.size() > 0)
403 {
404 bbox = convertDecBoxIntoSubDomain(loc_box.get(0));
405 proc_box = loc_box.get(0);
406 sub_domains.add(bbox);
407 }
408 else
409 {
410 // invalidate all the boxes
411 for (size_t i = 0 ; i < dim ; i++)
412 {
413 proc_box.setLow(i,0.0);
414 proc_box.setHigh(i,0);
415
416 bbox.setLow(i,0.0);
417 bbox.setHigh(i,0);
418 }
419 }
420
421 // convert into sub-domain
422 for (size_t s = 1; s < loc_box.size(); s++)
423 {
424 SpaceBox<dim,T> sub_d = convertDecBoxIntoSubDomain(loc_box.get(s));
425
426 // add the sub-domain
427 sub_domains.add(sub_d);
428
429 // Calculate the bound box
430 bbox.enclose(sub_d);
431 proc_box.enclose(loc_box.get(s));
432 }
433
434 nn_prcs<dim,T,layout_base,Memory>::create(box_nn_processor, sub_domains);
435 nn_prcs<dim,T,layout_base,Memory>::applyBC(domain,ghost,bc);
436
437 // fill fine_s structure
438 // fine_s structure contain the processor id for each sub-sub-domain
439 // with sub-sub-domain we mean the sub-domain decomposition before
440 // running dec_optimizer (before merging sub-domains)
441
442 ///////////////////////////////// TODO //////////////////////////////////////////
443
444 construct_fine_s();
445
446 Initialize_geo_cell_lists();
447 }
448
449 /*! \brief Initialize geo_cell lists
450 *
451 *
452 *
453 */
454 void Initialize_geo_cell_lists()
455 {
456 // Get the processor bounding Box
457 ::Box<dim,T> bound = getProcessorBounds();
458
459 // Check if the box is valid
460 if (bound.isValidN() == true)
461 {
462 // calculate the sub-divisions
463 size_t div[dim];
464 for (size_t i = 0; i < dim; i++)
465 {div[i] = (size_t) ((bound.getHigh(i) - bound.getLow(i)) / cd.getCellBox().getP2()[i]);}
466
467 // Initialize the geo_cell structure
468 ie_ghost<dim,T,Memory,layout_base>::Initialize_geo_cell(bound,div);
469
470 // Initialize shift vectors
471 ie_ghost<dim,T,Memory,layout_base>::generateShiftVectors(domain,bc);
472 }
473 }
474
475 /*! \brief Calculate communication and migration costs
476 *
477 * \param ts how many timesteps have passed since last calculation, used to approximate the cost
478 */
479 void computeCommunicationAndMigrationCosts(size_t ts)
480 {
481 float migration = 0;
482
483 SpaceBox<dim, T> cellBox = cd.getCellBox();
484 float b_s = static_cast<float>(cellBox.getHigh(0));
485 float gh_s = static_cast<float>(ghost.getHigh(0));
486
487 // compute the gh_area for 2 dim case
488 float gh_v = (gh_s * b_s);
489
490 // multiply for sub-sub-domain side for each domain
491 for (size_t i = 2; i < dim; i++)
492 {
493 /* coverity[dead_error_line] */
494 gh_v *= b_s;
495 }
496
497 size_t norm = (size_t) (1.0 / gh_v);
498
499 migration = pow(b_s, dim);
500
501 size_t prev = 0;
502
503 for (size_t i = 0; i < dist.getNSubSubDomains(); i++)
504 {
505 dist.setMigrationCost(i, norm * migration /* * dist.getSubSubDomainComputationCost(i)*/ );
506
507 for (size_t s = 0; s < dist.getNSubSubDomainNeighbors(i); s++)
508 {
509 // We have to remove dist.getSubSubDomainComputationCost(i) otherwise the graph is
510 // not directed
511 dist.setCommunicationCost(i, s, 1 /** dist.getSubSubDomainComputationCost(i)*/ * ts);
512 }
513 prev += dist.getNSubSubDomainNeighbors(i);
514 }
515
516 commCostSet = true;
517 }
518
519 /*! \brief Create the sub-domain that decompose your domain
520 *
521 */
522 void CreateSubspaces()
523 {
524 // Create a grid where each point is a space
525 grid_sm<dim, void> g(div);
526
527 // create a grid_key_dx iterator
528 grid_key_dx_iterator<dim> gk_it(g);
529
530 // Divide the space into subspaces
531 while (gk_it.isNext())
532 {
533 //! iterate through all subspaces
534 grid_key_dx<dim> key = gk_it.get();
535
536 //! Create a new subspace
537 SpaceBox<dim, T> tmp;
538
539 //! fill with the Margin of the box
540 for (int i = 0; i < dim; i++)
541 {
542 tmp.setHigh(i, (key.get(i) + 1) * spacing[i]);
543 tmp.setLow(i, key.get(i) * spacing[i]);
544 }
545
546 //! add the space box
547 sub_domains.add(tmp);
548
549 // Next sub-domain
550 ++gk_it;
551 }
552 }
553
554
555 /*! \brief It calculate the internal ghost boxes
556 *
557 * Example: Processor 10 calculate
558 * B8_0 B9_0 B9_1 and B5_0
559 *
560 *
561 *
562 \verbatim
563
564 +----------------------------------------------------+
565 | |
566 | Processor 8 |
567 | Sub+domain 0 +-----------------------------------+
568 | | |
569 | | |
570 ++--------------+---+---------------------------+----+ Processor 9 |
571 | | | B8_0 | | Subdomain 0 |
572 | +------------------------------------+ |
573 | | | | | |
574 | | | |B9_0| |
575 | | B | Local processor | | |
576 | Processor 5 | 5 | Subdomain 0 | | |
577 | Subdomain 0 | _ | +----------------------------------------+
578 | | 0 | | | |
579 | | | | | |
580 | | | | | Processor 9 |
581 | | | |B9_1| Subdomain 1 |
582 | | | | | |
583 | | | | | |
584 | | | | | |
585 +--------------+---+---------------------------+----+ |
586 | |
587 +-----------------------------------+
588
589
590 \endverbatim
591
592 and also
593 G8_0 G9_0 G9_1 G5_0 (External ghost boxes)
594
595\verbatim
596
597 +----------------------------------------------------+
598 | Processor 8 |
599 | Subdomain 0 +-----------------------------------+
600 | | |
601 | +---------------------------------------------+ |
602 | | G8_0 | | |
603 +-----+---------------+------------------------------------+ | Processor 9 |
604 | | | | | Subdomain 0 |
605 | | | |G9_0| |
606 | | | | | |
607 | | | | | |
608 | | | Local processor | | |
609 | Processor 5 | | Sub+domain 0 | | |
610 | Subdomain 0 | | +-----------------------------------+
611 | | | | | |
612 | | G | | | |
613 | | 5 | | | Processor 9 |
614 | | | | | | Subdomain 1 |
615 | | 0 | |G9_1| |
616 | | | | | |
617 | | | | | |
618 +---------------------+------------------------------------+ | |
619 | | | |
620 +----------------------------------------+----+------------------------------+
621
622 \endverbatim
623
624 *
625 * ghost margins for each dimensions (p1 negative part) (p2 positive part)
626 *
627 *
628 \verbatim
629
630 ^ p2[1]
631 |
632 |
633 +----+----+
634 | |
635 | |
636 p1[0]<-----+ +----> p2[0]
637 | |
638 | |
639 +----+----+
640 |
641 v p1[1]
642
643 \endverbatim
644
645 *
646 *
647 */
648 void calculateGhostBoxes()
649 {
650 // Intersect all the local sub-domains with the sub-domains of the contiguous processors
651
652 // create the internal structures that store ghost information
653 ie_ghost<dim, T,Memory,layout_base>::create_box_nn_processor_ext(v_cl, ghost, sub_domains, box_nn_processor, *this);
654 ie_ghost<dim, T,Memory,layout_base>::create_box_nn_processor_int(v_cl, ghost, sub_domains, box_nn_processor, *this);
655
656 ie_loc_ghost<dim,T,layout_base,Memory>::create(sub_domains,domain,ghost,bc);
657
658 // Ghost box information must be re-offloaded
659 host_dev_transfer = false;
660 ie_ghost<dim, T,Memory,layout_base>::reset_host_dev_transfer();
661 }
662
663
664public:
665
666 //! Space dimensions
667 static constexpr int dims = dim;
668
669 //! Space type
670 typedef T stype;
671
672 //! Increment the reference counter
673 void incRef()
674 {ref_cnt++;}
675
676 //! Decrement the reference counter
677 void decRef()
678 {ref_cnt--;}
679
680 //! Return the reference counter
681 long int ref()
682 {
683 return ref_cnt;
684 }
685
686 /*! \brief Cartesian decomposition constructor
687 *
688 * \param v_cl Virtual cluster, used internally to handle or pipeline communication
689 *
690 */
691 CartDecomposition(Vcluster<> & v_cl)
692 :nn_prcs<dim, T, layout_base,Memory>(v_cl), v_cl(v_cl), dist(v_cl),ref_cnt(0)
693 {
694 // Reset the box to zero
695 bbox.zero();
696 }
697
698 /*! \brief Cartesian decomposition copy constructor
699 *
700 * \param cart object to copy
701 *
702 */
703 CartDecomposition(const CartDecomposition<dim,T,Memory,layout_base,Distribution> & cart)
704 :nn_prcs<dim,T,layout_base,Memory>(cart.v_cl),v_cl(cart.v_cl),dist(v_cl),ref_cnt(0)
705 {
706 this->operator=(cart);
707 }
708
709 /*! \brief Cartesian decomposition copy constructor
710 *
711 * \param cart object to copy
712 *
713 */
714 CartDecomposition(CartDecomposition<dim,T,Memory,layout_base,Distribution> && cart)
715 :nn_prcs<dim,T,layout_base,Memory>(cart.v_cl),v_cl(cart.v_cl),dist(v_cl),ref_cnt(0)
716 {
717 this->operator=(cart);
718 }
719
720 //! Cartesian decomposition destructor
721 ~CartDecomposition()
722 {
723 }
724
725 /*! \brief class to select the returned id by ghost_processorID
726 *
727 */
728 class box_id
729 {
730 public:
731 /*! \brief Return the box id
732 *
733 * \param p structure containing the id informations
734 * \param b_id box_id
735 *
736 * \return box id
737 *
738 */
739 inline static size_t id(p_box<dim, T> & p, size_t b_id)
740 {
741 return b_id;
742 }
743 };
744
745 /*! \brief class to select the returned id by ghost_processorID
746 *
747 */
748 class processor_id
749 {
750 public:
751 /*! \brief Return the processor id
752 *
753 * \param p structure containing the id informations
754 * \param b_id box_id
755 *
756 * \return processor id
757 *
758 */
759 template<typename encap_type> inline static size_t id(const encap_type & p, size_t b_id)
760 {
761 return p.template get<proc_>();
762 }
763 };
764
765 /*! \brief class to select the returned id by ghost_processorID
766 *
767 */
768 class lc_processor_id
769 {
770 public:
771 /*! \brief Return the near processor id
772 *
773 * \param p structure containing the id informations
774 * \param b_id box_id
775 *
776 * \return local processor id
777 *
778 */
779 template<typename encap_type> inline static size_t id(const encap_type & p, size_t b_id)
780 {
781 return p.template get<lc_proc_>();
782 }
783 };
784
785 /*! \brief class to select the returned id by ghost_processorID
786 *
787 */
788 class shift_id
789 {
790 public:
791 /*! \brief Return the shift id
792 *
793 * \param p structure containing the id informations
794 * \param b_id box_id
795 *
796 * \return shift_id id
797 *
798 */
799 template<typename encap_type> inline static size_t id(const encap_type & p, size_t b_id)
800 {
801 return p.template get<shift_id_>();
802 }
803 };
804
805 /*! \brief Apply boundary condition to the point
806 *
807 * If the particle go out to the right, bring back the particle on the left
808 * in case of periodic, nothing in case of non periodic
809 *
810 * \param pt Point to apply the boundary condition. (it's coordinated are changed according the
811 * the explanation before)
812 *
813 */
814 void applyPointBC(float (& pt)[dim]) const
815 {
816 for (size_t i = 0 ; i < dim ; i++)
817 {
818 if (bc[i] == PERIODIC)
819 {pt[i] = openfpm::math::periodic_l(pt[i],domain.getHigh(i),domain.getLow(i));}
820 }
821 }
822
823 /*! \brief Apply boundary condition to the point
824 *
825 * If the particle go out to the right, bring back the particle on the left
826 * in case of periodic, nothing in case of non periodic
827 *
828 * \param pt Point to apply the boundary conditions.(it's coordinated are changed according the
829 * the explanation before)
830 *
831 */
832 void applyPointBC(Point<dim,T> & pt) const
833 {
834 for (size_t i = 0 ; i < dim ; i++)
835 {
836 if (bc[i] == PERIODIC)
837 {pt.get(i) = openfpm::math::periodic_l(pt.get(i),domain.getHigh(i),domain.getLow(i));}
838 }
839 }
840
841 /*! \brief Apply boundary condition to the point
842 *
843 * If the particle go out to the right, bring back the particle on the left
844 * in case of periodic, nothing in case of non periodic
845 *
846 * \param pt encapsulated point object (it's coordinated are changed according the
847 * the explanation before)
848 *
849 */
850 template<typename Mem> void applyPointBC(encapc<1,Point<dim,T>,Mem> && pt) const
851 {
852 for (size_t i = 0 ; i < dim ; i++)
853 {
854 if (bc[i] == PERIODIC)
855 {pt.template get<0>()[i] = openfpm::math::periodic_l(pt.template get<0>()[i],domain.getHigh(i),domain.getLow(i));}
856 }
857 }
858
859 /*! \brief It create another object that contain the same decomposition information but with different ghost boxes
860 *
861 * \param g ghost
862 *
863 * \return a duplicated decomposition with different ghost boxes
864 *
865 */
866 CartDecomposition<dim,T,Memory,layout_base,Distribution> duplicate(const Ghost<dim,T> & g) const
867 {
868 CartDecomposition<dim,T,Memory,layout_base,Distribution> cart(v_cl);
869
870 cart.box_nn_processor = box_nn_processor;
871 cart.sub_domains = sub_domains;
872 cart.fine_s = fine_s;
873
874 cart.gr = gr;
875 cart.cd = cd;
876 cart.domain = domain;
877 for (size_t i = 0 ; i < dim ; i++)
878 {
879 cart.spacing[i] = spacing[i];
880 cart.magn[i] = magn[i];
881 };
882
883 cart.bbox = bbox;
884 cart.ghost = g;
885
886 cart.dist = dist;
887
888 for (size_t i = 0 ; i < dim ; i++)
889 cart.bc[i] = bc[i];
890
891 (static_cast<nn_prcs<dim,T,layout_base,Memory> &>(cart)).create(box_nn_processor, sub_domains);
892 (static_cast<nn_prcs<dim,T,layout_base,Memory> &>(cart)).applyBC(domain,ghost,bc);
893
894 cart.Initialize_geo_cell_lists();
895 cart.calculateGhostBoxes();
896
897 cart.collect_all_sub_domains(cart.sub_domains_global);
898
899 return cart;
900 }
901
902 /*! \brief It create another object that contain the same information and act in the same way
903 *
904 * \return a duplicated CartDecomposition object
905 *
906 */
907 CartDecomposition<dim,T,Memory,layout_base,Distribution> duplicate() const
908 {
909 CartDecomposition<dim,T,Memory,layout_base,Distribution> cart(v_cl);
910
911 (static_cast<ie_loc_ghost<dim,T, layout_base,Memory>*>(&cart))->operator=(static_cast<ie_loc_ghost<dim,T,layout_base,Memory>>(*this));
912 (static_cast<nn_prcs<dim,T,layout_base,Memory>*>(&cart))->operator=(static_cast<nn_prcs<dim,T,layout_base,Memory>>(*this));
913 (static_cast<ie_ghost<dim,T,Memory,layout_base>*>(&cart))->operator=(static_cast<ie_ghost<dim,T,Memory,layout_base>>(*this));
914
915 cart.sub_domains = sub_domains;
916 cart.box_nn_processor = box_nn_processor;
917 cart.fine_s = fine_s;
918 cart.gr = gr;
919 cart.gr_dist = gr_dist;
920 cart.dist = dist;
921 cart.commCostSet = commCostSet;
922 cart.cd = cd;
923 cart.domain = domain;
924 cart.sub_domains_global = sub_domains_global;
925 for (size_t i = 0 ; i < dim ; i++)
926 {
927 cart.spacing[i] = spacing[i];
928 cart.magn[i] = magn[i];
929 };
930
931 cart.ghost = ghost;
932
933 cart.bbox = bbox;
934
935 for (size_t i = 0 ; i < dim ; i++)
936 {cart.bc[i] = this->bc[i];}
937
938 return cart;
939 }
940
941 /*! \brief It create another object that contain the same information and act in the same way
942 *
943 * \return a duplicated CartDecomposition object
944 *
945 */
946 template<typename Memory2, template <typename> class layout_base2>
947 CartDecomposition<dim,T,Memory2,layout_base2,Distribution> duplicate_convert() const
948 {
949 CartDecomposition<dim,T> cart(v_cl);
950
951 (static_cast<ie_loc_ghost<dim,T,layout_base2,Memory2>*>(&cart))->operator=(static_cast<ie_loc_ghost<dim,T,layout_base,Memory>>(*this));
952 (static_cast<nn_prcs<dim,T,layout_base2,Memory2>*>(&cart))->operator=(static_cast<nn_prcs<dim,T,layout_base,Memory>>(*this));
953 ie_ghost<dim,T,Memory,layout_base> * ptr = static_cast<ie_ghost<dim,T,Memory,layout_base> *>((CartDecomposition<dim,T,Memory,layout_base,Distribution> *)this);
954 (static_cast<ie_ghost<dim,T,Memory2,layout_base2>*>(&cart))->operator=(ptr->template duplicate<Memory2,layout_base2>());
955
956 cart.private_get_sub_domains() = sub_domains;
957 cart.private_get_box_nn_processor() = box_nn_processor;
958 cart.private_get_fine_s() = fine_s;
959 cart.private_get_gr() = gr;
960 cart.private_get_gr_dist() = gr_dist;
961 cart.private_get_dist() = dist;
962 cart.private_get_commCostSet() = commCostSet;
963 cart.private_get_cd() = cd;
964 cart.private_get_domain() = domain;
965 cart.private_get_sub_domains_global() = sub_domains_global;
966 for (size_t i = 0 ; i < dim ; i++)
967 {cart.private_get_spacing(i) = spacing[i];};
968
969 cart.private_get_ghost() = ghost;
970
971 cart.private_get_bbox() = bbox;
972
973 for (size_t i = 0 ; i < dim ; i++)
974 {cart.private_get_bc(i) = this->bc[i];}
975
976 return cart;
977 }
978
979 /*! \brief Copy the element
980 *
981 * \param cart element to copy
982 *
983 * \return itself
984 *
985 */
986 CartDecomposition<dim,T,Memory, layout_base, Distribution> & operator=(const CartDecomposition & cart)
987 {
988 static_cast<ie_loc_ghost<dim,T,layout_base,Memory>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T,layout_base,Memory>>(cart));
989 static_cast<nn_prcs<dim,T,layout_base,Memory>*>(this)->operator=(static_cast<nn_prcs<dim,T,layout_base,Memory>>(cart));
990 static_cast<ie_ghost<dim,T,Memory,layout_base>*>(this)->operator=(static_cast<ie_ghost<dim,T,Memory,layout_base>>(cart));
991
992 sub_domains = cart.sub_domains;
993 box_nn_processor = cart.box_nn_processor;
994 fine_s = cart.fine_s;
995 gr = cart.gr;
996 gr_dist = cart.gr_dist;
997 dist = cart.dist;
998 commCostSet = cart.commCostSet;
999 cd = cart.cd;
1000 domain = cart.domain;
1001 sub_domains_global = cart.sub_domains_global;
1002
1003 for (size_t i = 0 ; i < dim ; i++)
1004 {
1005 spacing[i] = cart.spacing[i];
1006 magn[i] = cart.magn[i];
1007 };
1008
1009 ghost = cart.ghost;
1010
1011 bbox = cart.bbox;
1012
1013 for (size_t i = 0 ; i < dim ; i++)
1014 bc[i] = cart.bc[i];
1015
1016 return *this;
1017 }
1018
1019 /*! \brief Copy the element, move semantic
1020 *
1021 * \param cart element to copy
1022 *
1023 * \return itself
1024 *
1025 */
1026 CartDecomposition<dim,T,Memory,layout_base, Distribution> & operator=(CartDecomposition && cart)
1027 {
1028 static_cast<ie_loc_ghost<dim,T,layout_base,Memory>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T,layout_base,Memory>>(cart));
1029 static_cast<nn_prcs<dim,T,layout_base,Memory>*>(this)->operator=(static_cast<nn_prcs<dim,T,layout_base,Memory>>(cart));
1030 static_cast<ie_ghost<dim,T,Memory,layout_base>*>(this)->operator=(static_cast<ie_ghost<dim,T,Memory,layout_base>>(cart));
1031
1032 sub_domains.swap(cart.sub_domains);
1033 box_nn_processor.swap(cart.box_nn_processor);
1034 fine_s.swap(cart.fine_s);
1035 gr = cart.gr;
1036 gr_dist = cart.gr_dist;
1037 dist = cart.dist;
1038 commCostSet = cart.commCostSet;
1039 cd = cart.cd;
1040 gr_dist = cart.gr_dist;
1041 dist = cart.dist;
1042
1043 domain = cart.domain;
1044 sub_domains_global.swap(cart.sub_domains_global);
1045
1046 for (size_t i = 0 ; i < dim ; i++)
1047 {
1048 spacing[i] = cart.spacing[i];
1049 magn[i] = cart.magn[i];
1050 };
1051
1052 ghost = cart.ghost;
1053
1054 bbox = cart.bbox;
1055
1056 for (size_t i = 0 ; i < dim ; i++)
1057 bc[i] = cart.bc[i];
1058
1059 return *this;
1060 }
1061
1062 /*! \brief The default grid size
1063 *
1064 * The default grid is always an isotropic grid that adapt with the number of processors,
1065 * it define in how many cell it will be divided the space for a particular required minimum
1066 * number of sub-domain
1067 *
1068 * \param n_sub number of subdomains per processors
1069 *
1070 * \return grid dimension (it is one number because on the other dimensions is the same)
1071 *
1072 */
1073 static size_t getDefaultGrid(size_t n_sub)
1074 {
1075 // Calculate the number of sub-sub-domain on
1076 // each dimension
1077 return openfpm::math::round_big_2(pow(n_sub, 1.0 / dim));
1078 }
1079
1080 /*! \brief Given a point return in which processor the particle should go
1081 *
1082 * \param p point
1083 *
1084 * \return processorID
1085 *
1086 */
1087 template<typename Mem> size_t inline processorID(const encapc<1, Point<dim,T>, Mem> & p) const
1088 {
1089 return processorID_impl(p,fine_s,sub_domains_global,getDomain(),bc);
1090 }
1091
1092 /*! \brief Given a point return in which processor the particle should go
1093 *
1094 * \param p point
1095 *
1096 * \return processorID
1097 *
1098 */
1099 size_t inline processorID(const Point<dim,T> &p) const
1100 {
1101 return processorID_impl(p,fine_s,sub_domains_global,getDomain(),bc);
1102 }
1103
1104 /*! \brief Given a point return in which processor the particle should go
1105 *
1106 * \param p point
1107 *
1108 * \return processorID
1109 *
1110 */
1111 size_t inline processorID(const T (&p)[dim]) const
1112 {
1113 return processorID_impl(p,fine_s,sub_domains_global,getDomain(),bc);
1114 }
1115
1116 /*! \brief Given a point return in which processor the point/particle should go
1117 *
1118 * Boundary conditions are considered
1119 *
1120 * \param p point
1121 *
1122 * \return processorID
1123 *
1124 */
1125 template<typename Mem> size_t inline processorIDBC(encapc<1, Point<dim,T>, Mem> p)
1126 {
1127 Point<dim,T> pt = p;
1128 applyPointBC(pt);
1129
1130
1131 return processorID_impl(pt,fine_s,sub_domains_global,getDomain(),bc);
1132 }
1133
1134 /*! \brief Given a point return in which processor the particle should go
1135 *
1136 * Boundary conditions are considered
1137 *
1138 * \param p point
1139 *
1140 * \return processorID
1141 *
1142 */
1143 size_t inline processorIDBC(const Point<dim,T> &p) const
1144 {
1145 Point<dim,T> pt = p;
1146 applyPointBC(pt);
1147
1148 // Get the number of elements in the cell
1149
1150 return processorID_impl(pt,fine_s,sub_domains_global,getDomain(),bc);
1151 }
1152
1153 /*! \brief Given a point return in which processor the particle should go
1154 *
1155 * Boundary consition are considered
1156 *
1157 * \param p point position
1158 *
1159 * \return processorID
1160 *
1161 */
1162 size_t inline processorIDBC(const T (&p)[dim]) const
1163 {
1164 Point<dim,T> pt = p;
1165 applyPointBC(pt);
1166
1167 return processorID_impl(pt,fine_s,sub_domains_global,getDomain(),bc);
1168 }
1169
1170 /*! \brief Get the periodicity on i dimension
1171 *
1172 * \param i dimension
1173 *
1174 * \return the periodicity in direction i
1175 *
1176 */
1177 inline size_t periodicity(size_t i) const
1178 {
1179 return bc[i];
1180 }
1181
1182 /*! \brief Get the periodicity
1183 *
1184 *
1185 * \return the periodicity
1186 *
1187 */
1188 inline const size_t (& periodicity() const) [dim]
1189 {
1190 return bc;
1191 }
1192
1193 /*! \brief Calculate magnification
1194 *
1195 * \param gm distribution grid
1196 *
1197 */
1198 void calculate_magn(const grid_sm<dim,void> & gm)
1199 {
1200 if (gm.size() == 0)
1201 {
1202 for (size_t i = 0 ; i < dim ; i++)
1203 magn[i] = 1;
1204 }
1205 else
1206 {
1207 for (size_t i = 0 ; i < dim ; i++)
1208 {
1209 if (gr.size(i) % gm.size(i) != 0)
1210 std::cerr << __FILE__ << ":" << __LINE__ << ".Error the decomposition grid specified as gr.size(" << i << ")=" << gr.size(i) << " is not multiple of the distribution grid gm.size(" << i << ")=" << gm.size(i) << std::endl;
1211
1212 magn[i] = gr.size(i) / gm.size(i);
1213 }
1214 }
1215 }
1216
1217 /*! \brief Set the best parameters for the decomposition
1218 *
1219 * It based on number of processors and dimensionality find a "good" parameter setting
1220 *
1221 * \param domain_ domain to decompose
1222 * \param bc boundary conditions
1223 * \param ghost Ghost size
1224 * \param sec_dist Distribution grid. The distribution grid help in reducing the underlying
1225 * distribution problem simplifying decomposition problem. This is done in order to
1226 * reduce the load/balancing dynamic load balancing problem
1227 *
1228 * \param dec_gran number of sub-sub-domain for each processor
1229 *
1230 */
1231 void setGoodParameters(::Box<dim,T> & domain_,
1232 const size_t (& bc)[dim],
1233 const Ghost<dim,T> & ghost,
1234 size_t dec_gran,
1235 const grid_sm<dim,void> & sec_dist = grid_sm<dim,void>())
1236 {
1237 size_t div[dim];
1238
1239 // Create a valid decomposition of the space
1240 // Get the number of processor and calculate the number of sub-domain
1241 // for decomposition
1242 size_t n_proc = v_cl.getProcessingUnits();
1243 size_t n_sub = n_proc * dec_gran;
1244
1245 // Calculate the maximum number (before merging) of sub-domain on
1246 // each dimension
1247
1248 nsub_to_div2(div,n_sub,dim);
1249
1250/* for (size_t i = 0; i < dim; i++)
1251 {
1252 div[i] = openfpm::math::round_big_2(pow(n_sub, 1.0 / dim));
1253 }*/
1254
1255 if (dim > 3)
1256 {
1257 long int dim_r = dim-1;
1258 do
1259 {
1260 // Check for adjustment
1261 size_t tot_size = 1;
1262 for (size_t i = 0 ; i < dim ; i++)
1263 {tot_size *= div[i];}
1264
1265 // the granularity is too coarse increase the divisions
1266 if (tot_size / n_proc > (unsigned int long)(0.75*dec_gran) )
1267 {break;}
1268
1269 nsub_to_div(div,n_sub,dim_r);
1270
1271 dim_r--;
1272 } while(dim_r > 0);
1273 }
1274
1275 setParameters(div,domain_,bc,ghost,sec_dist);
1276 }
1277
1278 /*! \brief return the parameters of the decomposition
1279 *
1280 * \param div_ number of divisions in each dimension
1281 *
1282 */
1283 void getParameters(size_t (& div_)[dim])
1284 {
1285 for (size_t i = 0 ; i < dim ; i++)
1286 {div_[i] = this->gr.size(i);}
1287 }
1288
1289 /*! \brief Set the parameter of the decomposition
1290 *
1291 * \param div_ storing into how many sub-sub-domains to decompose on each dimension
1292 * \param domain_ domain to decompose
1293 * \param bc boundary conditions
1294 * \param ghost Ghost size
1295 * \param sec_dist Distribution grid. The distribution grid help in reducing the underlying
1296 * distribution problem simplifying decomposition problem. This is done in order to
1297 * reduce the load/balancing dynamic load balancing problem
1298 *
1299 */
1300 void setParameters(const size_t (& div_)[dim],
1301 ::Box<dim,T> & domain_,
1302 const size_t (& bc)[dim],
1303 const Ghost<dim,T> & ghost,
1304 const grid_sm<dim,void> & sec_dist = grid_sm<dim,void>())
1305 {
1306 // set the boundary conditions
1307 for (size_t i = 0 ; i < dim ; i++)
1308 this->bc[i] = bc[i];
1309
1310 // set the ghost
1311 this->ghost = ghost;
1312
1313 // Set the decomposition parameters
1314 gr.setDimensions(div_);
1315 domain = domain_;
1316 cd.setDimensions(domain, div_, 0);
1317
1318 // We we have a secondary grid costruct a reduced graph
1319 if (sec_dist.size(0) != 0)
1320 {
1321 calculate_magn(sec_dist);
1322 gr_dist.setDimensions(sec_dist.getSize());
1323 }
1324 else
1325 {
1326 calculate_magn(sec_dist);
1327 gr_dist = gr;
1328 }
1329
1330 // init distribution
1331 dist.createCartGraph(gr_dist, domain);
1332
1333 }
1334
1335 /*! \brief Delete the decomposition and reset the data-structure
1336 *
1337 *
1338 */
1339 void reset()
1340 {
1341 sub_domains.clear();
1342 box_nn_processor.clear();
1343 fine_s.clear();
1344 loc_box.clear();
1345 nn_prcs<dim, T,layout_base,Memory>::reset();
1346 ie_ghost<dim,T,Memory,layout_base>::reset();
1347 ie_loc_ghost<dim, T,layout_base,Memory>::reset();
1348 }
1349
1350 /*! \brief Start decomposition
1351 *
1352 */
1353 void decompose(dec_options opt = dec_options::DEC_NONE)
1354 {
1355 reset();
1356
1357 if (commCostSet == false)
1358 {computeCommunicationAndMigrationCosts(1);}
1359
1360 dist.decompose();
1361
1362 createSubdomains(v_cl,bc);
1363
1364 calculateGhostBoxes();
1365
1366 domain_nn_calculator_cart<dim>::reset();
1367 domain_nn_calculator_cart<dim>::setParameters(proc_box);
1368
1369 if (opt != dec_options::DEC_SKIP_ICELL)
1370 {
1371
1372 domain_icell_calculator<dim,T,layout_base,Memory>
1373 ::CalculateInternalCells(v_cl,
1374 ie_ghost<dim, T,Memory,layout_base>::private_get_vb_int_box(),
1375 sub_domains,
1376 this->getProcessorBounds(),
1377 this->getGhost().getRcut(),
1378 this->getGhost());
1379 }
1380 }
1381
1382 /*! \brief Refine the decomposition, available only for ParMetis distribution, for Metis it is a null call
1383 *
1384 * \param ts number of time step from the previous load balancing
1385 *
1386 */
1387 void refine(size_t ts)
1388 {
1389 reset();
1390
1391 if (commCostSet == false)
1392 {computeCommunicationAndMigrationCosts(ts);}
1393
1394 dist.refine();
1395
1396 createSubdomains(v_cl,bc);
1397
1398 calculateGhostBoxes();
1399
1400 domain_nn_calculator_cart<dim>::reset();
1401 domain_nn_calculator_cart<dim>::setParameters(proc_box);
1402 }
1403
1404 /*! \brief Refine the decomposition, available only for ParMetis distribution, for Metis it is a null call
1405 *
1406 * \param ts number of time step from the previous load balancing
1407 *
1408 */
1409 void redecompose(size_t ts)
1410 {
1411 reset();
1412
1413 if (commCostSet == false)
1414 {computeCommunicationAndMigrationCosts(ts);}
1415
1416 dist.redecompose();
1417
1418 createSubdomains(v_cl,bc);
1419
1420 calculateGhostBoxes();
1421
1422 domain_nn_calculator_cart<dim>::reset();
1423 domain_nn_calculator_cart<dim>::setParameters(proc_box);
1424 }
1425
1426 /*! \brief Refine the decomposition, available only for ParMetis distribution, for Metis it is a null call
1427 *
1428 * \param dlb Dynamic load balancing object
1429 *
1430 * \return true if the re-balance has been executed, false otherwise
1431 */
1432 bool refine(DLB & dlb)
1433 {
1434 // if the DLB heuristic to use is the "Unbalance Threshold" get unbalance percentage
1435 if (dlb.getHeurisitc() == DLB::Heuristic::UNBALANCE_THRLD)
1436 {
1437 float unbalance = dist.getUnbalance();
1438 dlb.setUnbalance(unbalance);
1439 if (v_cl.getProcessUnitID() == 0)
1440 {
1441 std::cout << std::setprecision(3) << unbalance << "\n";
1442 }
1443 }
1444
1445 if (dlb.rebalanceNeeded())
1446 {
1447 refine(dlb.getNTimeStepSinceDLB());
1448
1449 return true;
1450 }
1451 return false;
1452 }
1453
1454// size_t n_step = 0;
1455
1456 /*! \brief Get the current un-balance value
1457 *
1458 * \return the un-balance percentage value
1459 */
1460 float getUnbalance()
1461 {
1462 return dist.getUnbalance();
1463 }
1464
1465 /*! \brief Compute the processor load counting the total weights of its vertices
1466 *
1467 * \return the current processor load
1468 */
1469 size_t getProcessorLoad()
1470 {
1471 return dist.getProcessorLoad();
1472 }
1473
1474 /*! \brief function that return the position of the cell in the space
1475 *
1476 * \param id vertex id
1477 * \param pos vector that will contain x, y, z
1478 *
1479 */
1480 inline void getSubSubDomainPosition(size_t id, T (&pos)[dim])
1481 {
1482 dist.getSubSubDomainPosition(id, pos);
1483 }
1484
1485 //TODO fix in Parmetis distribution to get only the right amount of vertices
1486 /*! \brief Get the number of sub-sub-domains in this sub-graph
1487 *
1488 * \return number of sub-sub-domains in this sub-graph
1489 */
1490 size_t getNSubSubDomains()
1491 {
1492 return dist.getNSubSubDomains();
1493 }
1494
1495 /*! \brief Function that set the computational cost for a of a sub-sub domain
1496 *
1497 * \param id vertex id
1498 * \param weight compotational cost
1499 *
1500 */
1501 inline void setSubSubDomainComputationCost(size_t id, size_t weight)
1502 {
1503 dist.setComputationCost(id, weight);
1504 }
1505
1506 /*! \brief function that return the computation cost of the sub-sub-domain id
1507 *
1508 * \param id sub-sub-domain id
1509 *
1510 * \return the computational cost
1511 *
1512 */
1513 inline size_t getSubSubDomainComputationCost(size_t id)
1514 {
1515 return dist.getSubSubDomainComputationCost(id);
1516 }
1517
1518 /*! \brief Operator to access the size of the sub-graph
1519 *
1520 * \return the size of the subgraph
1521 */
1522 size_t subSize()
1523 {
1524 return dist.subSize();
1525 }
1526
1527 /*! \brief Get the number of local sub-domains
1528 *
1529 * \return the number of sub-domains
1530 *
1531 */
1532 size_t getNSubDomain()
1533 {
1534 return sub_domains.size();
1535 }
1536
1537 /*! \brief Get the local sub-domain
1538 *
1539 * \param lc (each local processor can have more than one sub-domain)
1540 *
1541 * \return the sub-domain
1542 *
1543 */
1544 SpaceBox<dim, T> getSubDomain(size_t lc)
1545 {
1546 // Create a space box
1547 SpaceBox<dim, T> sp;
1548
1549 // fill the space box
1550
1551 for (size_t k = 0; k < dim; k++)
1552 {
1553 // create the SpaceBox Low and High
1554 sp.setLow(k, sub_domains.template get<Box::p1>(lc)[k]);
1555 sp.setHigh(k, sub_domains.template get<Box::p2>(lc)[k]);
1556 }
1557
1558 return sp;
1559 }
1560
1561 /*! \brief Get the local sub-domain enlarged with ghost extension
1562 *
1563 * \param lc (each processor can have more than one sub-domain)
1564 *
1565 * \return the sub-domain extended
1566 *
1567 */
1568 SpaceBox<dim, T> getSubDomainWithGhost(size_t lc)
1569 {
1570 // Create a space box
1571 SpaceBox<dim, T> sp = sub_domains.get(lc);
1572
1573 // enlarge with ghost
1574 sp.enlarge(ghost);
1575
1576 return sp;
1577 }
1578
1579 /*! \brief Return the box of the physical domain
1580 *
1581 * \return The physical domain box
1582 *
1583 */
1584 const ::Box<dim,T> & getDomain() const
1585 {
1586 return domain;
1587 }
1588
1589 const openfpm::vector<SpaceBox<dim, T>,Memory,layout_base> &
1590 getSubDomains() const
1591 {
1592 return sub_domains;
1593 }
1594
1595 /*! \brief Check if the particle is local
1596 *
1597 * \warning if the particle id outside the domain the result is unreliable
1598 *
1599 * \param p object position
1600 *
1601 * \return true if it is local
1602 *
1603 */
1604 template<typename Mem> bool isLocal(const encapc<1, Point<dim, T>, Mem> p) const
1605 {
1606 return processorID<Mem>(p) == v_cl.getProcessUnitID();
1607 }
1608
1609 /*! \brief Check if the particle is local
1610 *
1611 * \warning if the particle id outside the domain the result is unreliable
1612 *
1613 * \param pos object position
1614 *
1615 * \return true if it is local
1616 *
1617 */
1618 bool isLocal(const T (&pos)[dim]) const
1619 {
1620 return processorID(pos) == v_cl.getProcessUnitID();
1621 }
1622
1623 /*! \brief Check if the particle is local
1624 *
1625 * \warning if the particle id outside the domain the result is unreliable
1626 *
1627 * \param pos object position
1628 *
1629 * \return true if it is local
1630 *
1631 */
1632 bool isLocal(const Point<dim,T> & pos) const
1633 {
1634 return processorID(pos) == v_cl.getProcessUnitID();
1635 }
1636
1637 /*! \brief Check if the particle is local considering boundary conditions
1638 *
1639 * \warning if the particle id outside the domain and non periodic boundary the result
1640 * is unreliable
1641 *
1642 *
1643 * \param p object position
1644 * \param bc boundary conditions
1645 *
1646 * \return true if it is local
1647 *
1648 */
1649 template<typename Mem> bool isLocalBC(const encapc<1, Point<dim,T>, Mem> p, const size_t (& bc)[dim]) const
1650 {
1651 Point<dim,T> pt = p;
1652
1653 for (size_t i = 0 ; i < dim ; i++)
1654 {
1655 if (bc[i] == PERIODIC)
1656 pt.get(i) = openfpm::math::periodic_l(p.template get<0>()[i],domain.getHigh(i),domain.getLow(i));
1657 }
1658
1659 return processorID<Mem>(pt) == v_cl.getProcessUnitID();
1660 }
1661
1662 /*! \brief Check if the particle is local considering boundary conditions
1663 *
1664 * \warning if the particle id outside the domain and non periodic boundary the result
1665 * is unreliable
1666 *
1667 *
1668 * \param p object position
1669 * \param bc boundary conditions
1670 *
1671 * \return true if it is local
1672 *
1673 */
1674 bool isLocalBC(const Point<dim,T> & p, const size_t (& bc)[dim]) const
1675 {
1676 Point<dim,T> pt = p;
1677
1678 for (size_t i = 0 ; i < dim ; i++)
1679 {
1680 if (bc[i] == PERIODIC)
1681 pt.get(i) = openfpm::math::periodic_l(p[i],domain.getHigh(i),domain.getLow(i));
1682 }
1683
1684 return processorID(pt) == v_cl.getProcessUnitID();
1685 }
1686
1687 /*! \brief Get the domain Cells
1688 *
1689 * It return all the cells-id that are inside the processor-domain
1690 *
1691 * \return the cells id inside the domain
1692 *
1693 */
1694 openfpm::vector<size_t> & getDomainCells()
1695 {
1696 return domain_nn_calculator_cart<dim>::getDomainCells();
1697 }
1698
1699 /*! \brief Get the CRS domain Cells with normal neighborhood
1700 *
1701 * In case of symmetric interaction the neighborhood cells of
1702 * a cell is different
1703 *
1704 * \verbatim
1705
1706 Symmetric Normal
1707
1708 * * * * * *
1709 X * * X *
1710 * * *
1711 \endverbatim
1712 *
1713 *
1714 * In case of CRS scheme some cells has the symmetric neighborhood
1715 * some others has more complex neighborhood. This function return
1716 * all the cells with normal neighborhood
1717 *
1718 * \return the cell-id of the cells inside the processor-domain with normal neighborhood
1719 *
1720 */
1721 openfpm::vector<size_t> & getCRSDomainCells()
1722 {
1723 return domain_nn_calculator_cart<dim>::getCRSDomainCells();
1724 }
1725
1726 /*! \brief set NN parameters to calculate cell-list neighborhood
1727 *
1728 * \param shift to apply in cell linearization
1729 * \param gs cell grid
1730 *
1731 */
1732 void setNNParameters(grid_key_dx<dim> & shift, grid_sm<dim,void> & gs)
1733 {
1734 domain_nn_calculator_cart<dim>::setNNParameters(loc_box, shift, gs);
1735 }
1736
1737 /*! \brief Get the CRS anomalous cells
1738 *
1739 * This function return the anomalous cells
1740 *
1741 *
1742 * \return the anomalous cells with neighborhood
1743 *
1744 */
1745 openfpm::vector<subsub_lin<dim>> & getCRSAnomDomainCells()
1746 {
1747 return domain_nn_calculator_cart<dim>::getCRSAnomDomainCells();
1748 }
1749
1750 /*! \brief Check if the particle is local considering boundary conditions
1751 *
1752 * \warning if the particle id outside the domain and non periodic boundary the result
1753 * is unreliable
1754 *
1755 * \param p object position
1756 * \param bc boundary conditions
1757 *
1758 * \return true if it is local
1759 *
1760 */
1761 bool isLocalBC(const T (&p)[dim], const size_t (& bc)[dim]) const
1762 {
1763 Point<dim,T> pt = p;
1764
1765 for (size_t i = 0 ; i < dim ; i++)
1766 {
1767 if (bc[i] == PERIODIC)
1768 pt.get(i) = openfpm::math::periodic_l(p[i],domain.getHigh(i),domain.getLow(i));
1769 }
1770
1771 return processorID(pt) == v_cl.getProcessUnitID();
1772 }
1773
1774
1775 /*! \brief Return the bounding box containing union of all the sub-domains for the local processor
1776 *
1777 * \return The bounding box
1778 *
1779 */
1780 ::Box<dim, T> & getProcessorBounds()
1781 {
1782 return bbox;
1783 }
1784
1785
1786 /*! \brief Return the ghost
1787 *
1788 *
1789 * \return the ghost extension
1790 *
1791 */
1792 const Ghost<dim,T> & getGhost() const
1793 {
1794 return ghost;
1795 }
1796
1797 /*! \brief Decomposition grid
1798 *
1799 * \return the grid
1800 */
1801 const grid_sm<dim,void> getGrid()
1802 {
1803 return gr;
1804 }
1805
1806 /*! \brief Distribution grid
1807 *
1808 * \return the grid
1809 */
1810 const grid_sm<dim,void> getDistGrid()
1811 {
1812 return gr_dist;
1813 }
1814
1815 ////////////// Functions to get decomposition information ///////////////
1816
1817 /*! \brief Write the decomposition as VTK file
1818 *
1819 * The function generate several files
1820 *
1821 * * subdomains_X.vtk domain for the local processor (X) as union of sub-domain
1822 * * subdomains_adjacent_X.vtk sub-domains adjacent to the local processor (X)
1823 * * internal_ghost_X.vtk Internal ghost boxes for the local processor (X)
1824 * * external_ghost_X.vtk External ghost boxes for the local processor (X)
1825 * * local_internal_ghost_X.vtk internal local ghost boxes for the local processor (X)
1826 * * local_external_ghost_X.vtk external local ghost boxes for the local processor (X)
1827 *
1828 * where X is the local processor rank
1829 *
1830 * \param output directory where to write the files
1831 *
1832 * \return true if the write succeed
1833 *
1834 */
1835 bool write(std::string output) const
1836 {
1837 //! subdomains_X.vtk domain for the local processor (X) as union of sub-domain
1838 VTKWriter<openfpm::vector<SpaceBox<dim, T>,Memory,layout_base>, VECTOR_BOX> vtk_box1;
1839 vtk_box1.add(sub_domains);
1840 vtk_box1.write(output + std::string("subdomains_") + std::to_string(v_cl.getProcessUnitID()) + std::string(".vtk"));
1841
1842 nn_prcs<dim, T,layout_base,Memory>::write(output);
1843 ie_ghost<dim,T,Memory,layout_base>::write(output, v_cl.getProcessUnitID());
1844 ie_loc_ghost<dim, T,layout_base,Memory>::write(output, v_cl.getProcessUnitID());
1845
1846 return true;
1847 }
1848
1849 /*! \brief Get the Virtual Cluster machine
1850 *
1851 * \return the Virtual cluster machine
1852 *
1853 */
1854 Vcluster<> & getVC() const
1855 {
1856#ifdef SE_CLASS2
1857 check_valid(this,8);
1858#endif
1859 return v_cl;
1860 }
1861
1862 /*! \brief Deallocate structures that identify a point to which internal ghost is located
1863 *
1864 */
1865 void free_geo_cell()
1866 {
1867 ie_ghost<dim,T,Memory,layout_base>::free_geo_cell();
1868 }
1869
1870 /*! \brief Deallocate structures that identify a point to which internal ghost is located
1871 *
1872 */
1873 void free_fines()
1874 {
1875 fine_s.clear();
1876 fine_s.destroy();
1877 }
1878
1879 /*! \brief function to check the consistency of the information of the decomposition
1880 *
1881 * \return false if is inconsistent
1882 *
1883 */
1884 bool check_consistency()
1885 {
1886 if (ie_loc_ghost<dim, T,layout_base,Memory>::check_consistency(getNSubDomain()) == false)
1887 return false;
1888
1889 return true;
1890 }
1891
1892 /*! \brief Print subdomains, external and internal ghost boxes
1893 *
1894 */
1895 void debugPrint()
1896 {
1897 std::cout << "Subdomains\n";
1898 for (size_t p = 0; p < sub_domains.size(); p++)
1899 {
1900 std::cout << ::SpaceBox<dim, T>(sub_domains.get(p)).toString() << "\n";
1901 }
1902
1903 std::cout << "Subdomains global\n";
1904 for (size_t p = 0; p < sub_domains_global.size(); p++)
1905 {
1906 std::cout << ::SpaceBox<dim, T>(sub_domains_global.template get<0>(p)).toString() << " proc:" << sub_domains_global.template get<1>(p) << "\n";
1907 }
1908
1909 std::cout << "External ghost box\n";
1910
1911 for (size_t p = 0; p<nn_prcs < dim, T, layout_base, Memory>::getNNProcessors(); p++)
1912 {
1913 for (size_t i = 0; i<ie_ghost <dim,T,Memory,layout_base>::getProcessorNEGhost(p); i++)
1914 {
1915 std::cout << ie_ghost<dim,T,Memory,layout_base>::getProcessorEGhostBox(p, i).toString() << " prc=" << nn_prcs<dim, T, layout_base,Memory>::IDtoProc(p) << " id=" << ie_ghost<dim,T,Memory,layout_base>::getProcessorEGhostId(p, i) << "\n";
1916 }
1917 }
1918
1919 std::cout << "Internal ghost box\n";
1920
1921 for (size_t p = 0; p<nn_prcs < dim, T, layout_base, Memory>::getNNProcessors(); p++)
1922 {
1923 for (size_t i = 0; i<ie_ghost<dim,T,Memory,layout_base>::getProcessorNIGhost(p); i++)
1924 {
1925 std::cout << ie_ghost<dim,T,Memory,layout_base>::getProcessorIGhostBox(p, i).toString() << " prc=" << nn_prcs<dim, T, layout_base, Memory>::IDtoProc(p) << " id=" << ie_ghost<dim,T,Memory,layout_base>::getProcessorIGhostId(p, i) << "\n";
1926 }
1927 }
1928 }
1929
1930 /*! \brief Check if the CartDecomposition contain the same information
1931 *
1932 * \param cart Element to check with
1933 *
1934 * \return true if they are equal
1935 *
1936 */
1937 bool is_equal(CartDecomposition<dim,T,Memory> & cart)
1938 {
1939 if (static_cast<ie_loc_ghost<dim,T,layout_base,Memory>*>(this)->is_equal(static_cast<ie_loc_ghost<dim,T,layout_base,Memory>&>(cart)) == false)
1940 return false;
1941
1942 if (static_cast<nn_prcs<dim,T,layout_base,Memory>*>(this)->is_equal(static_cast<nn_prcs<dim,T, layout_base,Memory>&>(cart)) == false)
1943 return false;
1944
1945 if (static_cast<ie_ghost<dim,T,Memory,layout_base>*>(this)->is_equal(static_cast<ie_ghost<dim,T,Memory,layout_base>&>(cart)) == false)
1946 return false;
1947
1948 if (sub_domains != cart.sub_domains)
1949 return false;
1950
1951 if (box_nn_processor != cart.box_nn_processor)
1952 return false;
1953
1954 if (fine_s != cart.fine_s)
1955 return false;
1956
1957 if (gr != cart.gr)
1958 return false;
1959
1960 if (cd != cart.cd)
1961 return false;
1962
1963 if (domain != cart.domain)
1964 return false;
1965
1966 if (meta_compare<T[dim]>::meta_compare_f(cart.spacing,spacing) == false)
1967 return false;
1968
1969 if (ghost != cart.ghost)
1970 return false;
1971
1972 return true;
1973 }
1974
1975 /*! \brief Check if the CartDecomposition contain the same information with the exception of the ghost part
1976 * It is anyway required that the ghost come from the same sub-domains decomposition
1977 *
1978 * \param cart Element to check with
1979 *
1980 * \return true if the two CartDecomposition are equal
1981 *
1982 */
1983 bool is_equal_ng(CartDecomposition<dim,T,Memory> & cart)
1984 {
1985 if (static_cast<ie_loc_ghost<dim,T,layout_base,Memory>*>(this)->is_equal_ng(static_cast<ie_loc_ghost<dim,T,layout_base,Memory>&>(cart)) == false)
1986 return false;
1987
1988 if (static_cast<nn_prcs<dim,T,layout_base,Memory>*>(this)->is_equal(static_cast<nn_prcs<dim,T,layout_base,Memory>&>(cart)) == false)
1989 return false;
1990
1991 if (static_cast<ie_ghost<dim,T,Memory,layout_base>*>(this)->is_equal_ng(static_cast<ie_ghost<dim,T,Memory,layout_base>&>(cart)) == false)
1992 return false;
1993
1994 if (sub_domains != cart.sub_domains)
1995 return false;
1996
1997 if (box_nn_processor != cart.box_nn_processor)
1998 return false;
1999
2000 if (fine_s != cart.fine_s)
2001 return false;
2002
2003 if (gr != cart.gr)
2004 return false;
2005
2006 if (cd != cart.cd)
2007 return false;
2008
2009 if (domain != cart.domain)
2010 return false;
2011
2012 if (meta_compare<T[dim]>::meta_compare_f(cart.spacing,spacing) == false)
2013 return false;
2014
2015 return true;
2016 }
2017
2018 /*! \brief Return the distribution object
2019 *
2020 * \return the distribution object
2021 *
2022 */
2023 Distribution & getDistribution()
2024 {
2025 return dist;
2026 }
2027
2028
2029 /*! \brief Add computation cost i to the subsubdomain with global id gid
2030 *
2031 * \param gid global id of the subsubdomain to update
2032 * \param i Cost increment
2033 */
2034 inline void addComputationCost(size_t gid, size_t i)
2035 {
2036 size_t c = dist.getSubSubDomainComputationCost(gid);
2037
2038 dist.setComputationCost(gid, c + i);
2039 }
2040
2041 /*! \brief Get the decomposition counter
2042 *
2043 * \return the decomposition counter
2044 *
2045 */
2046 size_t get_ndec()
2047 {
2048 return dist.get_ndec();
2049 }
2050
2051 /*! \brief Get the cell decomposer of the decomposition
2052 *
2053 * \return the cell decomposer
2054 *
2055 */
2056 const CellDecomposer_sm<dim, T, shift<dim,T>> & getCellDecomposer()
2057 {
2058 return cd;
2059 }
2060
2061
2062 /*! \brief convert to a structure usable in a device kernel
2063 *
2064 * \return a data-structure that can be used directy on GPU
2065 *
2066 */
2067 CartDecomposition_gpu<dim,T,Memory,layout_base> toKernel()
2068 {
2069 if (host_dev_transfer == false)
2070 {
2071 fine_s.hostToDevice();
2072 sub_domains_global.template hostToDevice<0,1>();
2073 host_dev_transfer = true;
2074 }
2075
2076 int bc_[dim];
2077
2078 for (int i = 0 ; i < dim ; i++) {bc_[i] = this->periodicity(i);}
2079
2080 CartDecomposition_gpu<dim,T,Memory,layout_base> cdg(fine_s.toKernel(),
2081 ie_ghost<dim,T,Memory,layout_base>::toKernel(),
2082 sub_domains_global.toKernel(),
2083 getDomain(),
2084 bc_);
2085
2086 return cdg;
2087 }
2088
2089 //! friend classes
2090 friend extended_type;
2091
2092 /*! \brief Return the internal data structure sub_domains
2093 *
2094 * \return sub_domains
2095 *
2096 */
2097 openfpm::vector<SpaceBox<dim, T>> & private_get_sub_domains()
2098 {
2099 return sub_domains;
2100 }
2101
2102 /*! \brief Return the internal data structure box_nn_processor
2103 *
2104 * \return box_nn_processor
2105 *
2106 */
2107 openfpm::vector<openfpm::vector<long unsigned int> > & private_get_box_nn_processor()
2108 {
2109 return box_nn_processor;
2110 }
2111
2112 /*! \brief Return the internal data structure fine_s
2113 *
2114 * \return fine_s
2115 *
2116 */
2117 CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> & private_get_fine_s()
2118 {
2119 return fine_s;
2120 }
2121
2122 /*! \brief Return the internal data structure gr
2123 *
2124 * \return gr
2125 *
2126 */
2127 grid_sm<dim, void> & private_get_gr()
2128 {
2129 return gr;
2130 }
2131
2132 /*! \brief Return the internal data structure gr_dist
2133 *
2134 * \return gr_dist
2135 *
2136 */
2137 grid_sm<dim, void> & private_get_gr_dist()
2138 {
2139 return gr_dist;
2140 }
2141
2142 /*! \brief Return the internal data structure dist
2143 *
2144 * \return dist
2145 *
2146 */
2147 Distribution & private_get_dist()
2148 {
2149 return dist;
2150 }
2151
2152 /*! \brief Return the internal data structure commCostSet
2153 *
2154 * \return commCostSet
2155 *
2156 */
2157 bool & private_get_commCostSet()
2158 {
2159 return commCostSet;
2160 }
2161
2162 /*! \brief Return the internal data structure cd
2163 *
2164 * \return cd
2165 *
2166 */
2167 CellDecomposer_sm<dim, T, shift<dim,T>> & private_get_cd()
2168 {
2169 return cd;
2170 }
2171
2172 /*! \brief Return the internal data structure domain
2173 *
2174 * \return domain
2175 *
2176 */
2177 ::Box<dim,T> & private_get_domain()
2178 {
2179 return domain;
2180 }
2181
2182 /*! \brief Return the internal data structure sub_domains_global
2183 *
2184 * \return sub_domains_global
2185 *
2186 */
2187 openfpm::vector<Box_map<dim, T>,Memory,layout_base> & private_get_sub_domains_global()
2188 {
2189 return sub_domains_global;
2190 }
2191
2192 /*! \brief Return the internal data structure spacing
2193 *
2194 * \return spacing
2195 *
2196 */
2197 T & private_get_spacing(int i)
2198 {
2199 return spacing[i];
2200 }
2201
2202 /*! \brief Return the internal data structure ghost
2203 *
2204 * \return ghost
2205 *
2206 */
2207 Ghost<dim,T> & private_get_ghost()
2208 {
2209 return ghost;
2210 }
2211
2212 /*! \brief Return the internal data structure bbox
2213 *
2214 * \return bbox
2215 *
2216 */
2217 ::Box<dim,T> & private_get_bbox()
2218 {
2219 return bbox;
2220 }
2221
2222 /*! \brief Return the internal data structure bc
2223 *
2224 * \return bc
2225 *
2226 */
2227 size_t & private_get_bc(int i)
2228 {
2229 return bc[i];
2230 }
2231};
2232
2233
2234#endif
2235