1/*
2 * ParMetisDistribution.hpp
3 *
4 * Created on: Nov 19, 2015
5 * Author: Antonio Leo
6 */
7
8
9#ifndef SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
10#define SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
11
12
13#include "SubdomainGraphNodes.hpp"
14#include "parmetis_util.hpp"
15#include "Graph/ids.hpp"
16#include "Graph/CartesianGraphFactory.hpp"
17
18#define PARMETIS_DISTRIBUTION_ERROR 100002
19
20/*! \brief Class that distribute sub-sub-domains across processors using ParMetis Library
21 *
22 * Given a graph and setting Computational cost, Communication cost (on the edge) and
23 * Migration cost or total Communication costs, it produce the optimal balanced distribution
24 *
25 * In addition to Metis it provide the functionality to refine the previously computed
26 * decomposition
27 *
28 * ### Initialize a Cartesian graph and decompose
29 * \snippet Distribution_unit_tests.hpp Initialize a ParMetis Cartesian graph and decompose
30 *
31 * ### Refine the decomposition
32 * \snippet Distribution_unit_tests.hpp refine with parmetis the decomposition
33 *
34 */
35template<unsigned int dim, typename T>
36class ParMetisDistribution
37{
38 //! Is distributed
39 bool is_distributed = false;
40
41 //! Vcluster
42 Vcluster<> & v_cl;
43
44 //! Structure that store the cartesian grid information
45 grid_sm<dim, void> gr;
46
47 //! rectangular domain to decompose
48 Box<dim, T> domain;
49
50 //! Global sub-sub-domain graph
51 Graph_CSR<nm_v<dim>, nm_e> gp;
52
53 //! Convert the graph to parmetis format
54 Parmetis<Graph_CSR<nm_v<dim>, nm_e>> parmetis_graph;
55
56 //! Id of the sub-sub-domain where we set the costs
57 openfpm::vector<size_t> sub_sub_owner;
58
59 //! Init vtxdist needed for Parmetis
60 //
61 // vtxdist is a common array across processor, it indicate how
62 // vertex are distributed across processors
63 //
64 // Example we have 3 processors
65 //
66 // processor 0 has 3 vertices
67 // processor 1 has 5 vertices
68 // processor 2 has 4 vertices
69 //
70 // vtxdist contain, 0,3,8,12
71 //
72 // vtx dist is the unique global-id of the vertices
73 //
74 openfpm::vector<rid> vtxdist;
75
76 //! partitions
77 openfpm::vector<openfpm::vector<idx_t>> partitions;
78
79 //! Init data structure to keep trace of new vertices distribution in processors (needed to update main graph)
80 openfpm::vector<openfpm::vector<gid>> v_per_proc;
81
82 //! Hashmap to access to the global position given the re-mapped one (needed for access the map)
83 std::unordered_map<rid, gid> m2g;
84
85 //! Flag to check if weights are used on vertices
86 bool verticesGotWeights = false;
87
88 /*! \brief Update main graph ad subgraph with the received data of the partitions from the other processors
89 *
90 */
91 void updateGraphs()
92 {
93 sub_sub_owner.clear();
94
95 size_t Np = v_cl.getProcessingUnits();
96
97 // Init n_vtxdist to gather informations about the new decomposition
98 openfpm::vector<rid> n_vtxdist(Np + 1);
99 for (size_t i = 0; i <= Np; i++)
100 n_vtxdist.get(i).id = 0;
101
102 // Update the main graph with received data from processor i
103 for (size_t i = 0; i < Np; i++)
104 {
105 size_t ndata = partitions.get(i).size();
106 size_t k = 0;
107
108 // Update the main graph with the received informations
109 for (rid l = vtxdist.get(i); k < ndata && l < vtxdist.get(i + 1); k++, ++l)
110 {
111 // Create new n_vtxdist (just count processors vertices)
112 ++n_vtxdist.get(partitions.get(i).get(k) + 1);
113
114 // vertex id from vtx to grobal id
115 auto v_id = m2g.find(l)->second.id;
116
117 // Update proc id in the vertex (using the old map)
118 gp.template vertex_p<nm_v_proc_id>(v_id) = partitions.get(i).get(k);
119
120 if (partitions.get(i).get(k) == (long int)v_cl.getProcessUnitID())
121 {sub_sub_owner.add(v_id);}
122
123 // Add vertex to temporary structure of distribution (needed to update main graph)
124 v_per_proc.get(partitions.get(i).get(k)).add(getVertexGlobalId(l));
125 }
126 }
127
128 // Create new n_vtxdist (accumulate the counters)
129 for (size_t i = 2; i <= Np; i++)
130 n_vtxdist.get(i) += n_vtxdist.get(i - 1);
131
132 // Copy the new decomposition in the main vtxdist
133 for (size_t i = 0; i <= Np; i++)
134 vtxdist.get(i) = n_vtxdist.get(i);
135
136 openfpm::vector<size_t> cnt;
137 cnt.resize(Np);
138
139 for (size_t i = 0 ; i < gp.getNVertex(); ++i)
140 {
141 size_t pid = gp.template vertex_p<nm_v_proc_id>(i);
142
143 rid j = rid(vtxdist.get(pid).id + cnt.get(pid));
144 gid gi = gid(i);
145
146 gp.template vertex_p<nm_v_id>(i) = j.id;
147 cnt.get(pid)++;
148
149 setMapId(j,gi);
150 }
151 }
152
153 /*! \brief operator to access the vertex by mapped position
154 *
155 * operator to access the vertex
156 *
157 * \param id re-mapped id of the vertex to access
158 *
159 */
160 inline auto vertexByMapId(rid id) -> decltype( gp.vertex(m2g.find(id)->second.id) )
161 {
162 return gp.vertex(m2g.find(id)->second.id);
163 }
164
165 /*! \brief operator to remap vertex to a new position
166 *
167 * \param n re-mapped position
168 * \param g global position
169 *
170 */
171 inline void setMapId(rid n, gid g)
172 {
173 m2g[n] = g;
174 }
175
176 /*! \brief Get the global id of the vertex given the re-mapped one
177 *
178 * \param remapped id
179 * \return global id
180 *
181 */
182 gid getVertexGlobalId(rid n)
183 {
184 return m2g.find(n)->second;
185 }
186
187 /*! \brief operator to init ids vector
188 *
189 * operator to init ids vector
190 *
191 */
192 void initLocalToGlobalMap()
193 {
194 gid g;
195 rid i;
196 i.id = 0;
197
198 m2g.clear();
199 for ( ; (size_t)i.id < gp.getNVertex(); ++i)
200 {
201 g.id = i.id;
202
203 m2g.insert( { i, g });
204 }
205 }
206
207 /*! \brief Callback of the sendrecv to set the size of the array received
208 *
209 * \param msg_i Index of the message
210 * \param total_msg Total numeber of messages
211 * \param total_p Total number of processors to comunicate with
212 * \param i Processor id
213 * \param ri Request id
214 * \param ptr Void pointer parameter for additional data to pass to the call-back
215 */
216 static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void * ptr)
217 {
218 openfpm::vector < openfpm::vector < idx_t >> *v = static_cast<openfpm::vector<openfpm::vector<idx_t>> *>(ptr);
219
220 v->get(i).resize(msg_i / sizeof(idx_t));
221
222 return &(v->get(i).get(0));
223 }
224
225 /*! \brief It update the full decomposition
226 *
227 *
228 */
229 void postDecomposition()
230 {
231 //! Get the processor id
232 size_t p_id = v_cl.getProcessUnitID();
233
234 //! Get the number of processing units
235 size_t Np = v_cl.getProcessingUnits();
236
237 // Number of local vertex
238 size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id;
239
240 //! Get result partition for this processors
241 idx_t * partition = parmetis_graph.getPartition();
242
243 //! Prepare vector of arrays to contain all partitions
244 partitions.get(p_id).resize(nl_vertex);
245 if (nl_vertex != 0)
246 {std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0));}
247
248 // Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
249 for (size_t i = 0; i < Np; ++i)
250 {
251 v_per_proc.get(i).clear();
252 }
253
254 // Communicate the local distribution to the other processors
255 // to reconstruct individually the global graph
256 openfpm::vector<size_t> prc;
257 openfpm::vector<size_t> sz;
258 openfpm::vector<void *> ptr;
259
260 for (size_t i = 0; i < Np; i++)
261 {
262 if (i != v_cl.getProcessUnitID())
263 {
264 partitions.get(i).clear();
265 prc.add(i);
266 sz.add(nl_vertex * sizeof(idx_t));
267 ptr.add(partitions.get(p_id).getPointer());
268 }
269 }
270
271 if (prc.size() == 0)
272 v_cl.sendrecvMultipleMessagesNBX(0, NULL, NULL, NULL, message_receive, &partitions,NONE);
273 else
274 v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,NONE);
275
276 // Update graphs with the received data
277 updateGraphs();
278 }
279
280
281public:
282
283 /*! Constructor for the ParMetis class
284 *
285 * \param v_cl Vcluster to use as communication object in this class
286 */
287 ParMetisDistribution(Vcluster<> & v_cl)
288 :is_distributed(false),v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits())
289 {
290 }
291
292 /*! Copy constructor
293 *
294 * \param pm Distribution to copy
295 *
296 */
297 ParMetisDistribution(const ParMetisDistribution<dim,T> & pm)
298 :v_cl(pm.v_cl),parmetis_graph(v_cl, v_cl.getProcessingUnits())
299 {
300 this->operator=(pm);
301 }
302
303 /*! Copy constructor
304 *
305 * \param pm Distribution to copy
306 *
307 */
308 ParMetisDistribution(ParMetisDistribution<dim,T> && pm)
309 :v_cl(pm.v_cl)
310 {
311 this->operator=(pm);
312 }
313
314 /*! \brief Create the Cartesian graph
315 *
316 * \param grid info
317 * \param dom domain
318 */
319 void createCartGraph(grid_sm<dim, void> & grid, Box<dim, T> & dom)
320 {
321 size_t bc[dim];
322
323 for (size_t i = 0 ; i < dim ; i++)
324 bc[i] = NON_PERIODIC;
325
326 // Set grid and domain
327 gr = grid;
328 domain = dom;
329
330 // Create a cartesian grid graph
331 CartesianGraphFactory<dim, Graph_CSR<nm_v<dim>, nm_e>> g_factory_part;
332 gp = g_factory_part.template construct<NO_EDGE, nm_v_id, T, dim - 1, 0>(gr.getSize(), domain, bc);
333 initLocalToGlobalMap();
334
335 //! Get the number of processing units
336 size_t Np = v_cl.getProcessingUnits();
337
338 //! Division of vertices in Np graphs
339 //! Put (div+1) vertices in mod graphs
340 //! Put div vertices in the rest of the graphs
341 size_t mod_v = gr.size() % Np;
342 size_t div_v = gr.size() / Np;
343
344 for (size_t i = 0; i <= Np; i++)
345 {
346 if (i < mod_v)
347 vtxdist.get(i).id = (div_v + 1) * i;
348 else
349 vtxdist.get(i).id = (div_v) * i + mod_v;
350 }
351
352 // Init to 0.0 axis z (to fix in graphFactory)
353 if (dim < 3)
354 {
355 for (size_t i = 0; i < gp.getNVertex(); i++)
356 {
357 gp.vertex(i).template get<nm_v_x>()[2] = 0.0;
358 }
359 }
360 for (size_t i = 0; i < gp.getNVertex(); i++)
361 {
362 gp.vertex(i).template get<nm_v_global_id>() = i;
363 }
364
365 }
366
367 /*! \brief Get the current graph (main)
368 *
369 */
370 Graph_CSR<nm_v<dim>, nm_e> & getGraph()
371 {
372 return gp;
373 }
374
375 /*! \brief Create the decomposition
376 *
377 */
378 void decompose()
379 {
380 if (is_distributed == false)
381 parmetis_graph.initSubGraph(gp, vtxdist, m2g, verticesGotWeights);
382 else
383 parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights);
384
385 //! Decompose
386 parmetis_graph.decompose(vtxdist);
387
388 // update after decomposition
389 postDecomposition();
390
391 is_distributed = true;
392 }
393
394 /*! \brief Refine current decomposition
395 *
396 * It makes a refinement of the current decomposition using Parmetis function RefineKWay
397 * After that it also does the remapping of the graph
398 *
399 */
400 void refine()
401 {
402 // Reset parmetis graph and reconstruct it
403 parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights);
404
405 // Refine
406 parmetis_graph.refine(vtxdist);
407
408 postDecomposition();
409 }
410
411 /*! \brief Redecompose current decomposition
412 *
413 * It makes a redecomposition using Parmetis taking into consideration
414 * also migration cost
415 *
416 */
417 void redecompose()
418 {
419 // Reset parmetis graph and reconstruct it
420 parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights);
421
422 // Refine
423 parmetis_graph.redecompose(vtxdist);
424
425 postDecomposition();
426 }
427
428 /*! \brief Compute the unbalance of the processor compared to the optimal balance
429 *
430 * \return the unbalance from the optimal one 0.01 mean 1%
431 */
432 float getUnbalance()
433 {
434 long t_cost = 0;
435
436 long min, max, sum;
437 float unbalance;
438
439 t_cost = getProcessorLoad();
440
441 min = t_cost;
442 max = t_cost;
443 sum = t_cost;
444
445 v_cl.min(min);
446 v_cl.max(max);
447 v_cl.sum(sum);
448 v_cl.execute();
449
450 unbalance = ((float) (max - min)) / (float) (sum / v_cl.getProcessingUnits());
451
452 return unbalance * 100;
453 }
454
455 /*! \brief function that return the position of the vertex in the space
456 *
457 * \param id vertex id
458 * \param pos vector that will contain x, y, z
459 *
460 */
461 void getSubSubDomainPosition(size_t id, T (&pos)[dim])
462 {
463#ifdef SE_CLASS1
464 if (id >= gp.getNVertex())
465 std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
466#endif
467
468 // Copy the geometrical informations inside the pos vector
469 pos[0] = gp.vertex(id).template get<nm_v_x>()[0];
470 pos[1] = gp.vertex(id).template get<nm_v_x>()[1];
471 if (dim == 3)
472 pos[2] = gp.vertex(id).template get<nm_v_x>()[2];
473 }
474
475 /*! \brief Function that set the weight of the vertex
476 *
477 * \param id vertex id
478 * \param weight to give to the vertex
479 *
480 */
481 inline void setComputationCost(size_t id, size_t weight)
482 {
483 if (!verticesGotWeights)
484 {verticesGotWeights = true;}
485
486#ifdef SE_CLASS1
487 if (id >= gp.getNVertex())
488 {std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";}
489#endif
490
491 // Update vertex in main graph
492 gp.vertex(id).template get<nm_v_computation>() = weight;
493 }
494
495 /*! \brief Checks if weights are used on the vertices
496 *
497 * \return true if weights are used in the decomposition
498 */
499 bool weightsAreUsed()
500 {
501 return verticesGotWeights;
502 }
503
504 /*! \brief function that get the weight of the vertex
505 *
506 * \param id vertex id
507 *
508 */
509 size_t getSubSubDomainComputationCost(size_t id)
510 {
511#ifdef SE_CLASS1
512 if (id >= gp.getNVertex())
513 std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
514#endif
515
516 return gp.vertex(id).template get<nm_v_computation>();
517 }
518
519 /*! \brief Compute the processor load counting the total weights of its vertices
520 *
521 * \return the computational load of the processor graph
522 */
523 size_t getProcessorLoad()
524 {
525 size_t load = 0;
526
527 // Processor id
528 size_t p_id = v_cl.getProcessUnitID();
529
530
531 for (rid i = vtxdist.get(p_id); i < vtxdist.get(p_id+1) ; ++i)
532 load += gp.vertex(m2g.find(i)->second.id).template get<nm_v_computation>();
533
534 //std::cout << v_cl.getProcessUnitID() << " weight " << load << " size " << sub_g.getNVertex() << "\n";
535 return load;
536 }
537
538 /*! \brief Set migration cost of the vertex id
539 *
540 * \param id of the vertex to update
541 * \param migration cost of the migration
542 */
543 void setMigrationCost(size_t id, size_t migration)
544 {
545#ifdef SE_CLASS1
546 if (id >= gp.getNVertex())
547 std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
548#endif
549
550 gp.vertex(id).template get<nm_v_migration>() = migration;
551 }
552
553 /*! \brief Set communication cost of the edge id
554 *
555 * \param v_id Id of the source vertex of the edge
556 * \param e i child of the vertex
557 * \param communication Communication value
558 */
559 void setCommunicationCost(size_t v_id, size_t e, size_t communication)
560 {
561#ifdef SE_CLASS1
562
563 size_t e_id = v_id + e;
564
565 if (e_id >= gp.getNEdge())
566 std::cerr << "Such edge doesn't exist (id = " << e_id << ", " << "total size = " << gp.getNEdge() << ")\n";
567#endif
568
569 gp.getChildEdge(v_id, e).template get<nm_e::communication>() = communication;
570 }
571
572 /*! \brief Returns total number of sub-sub-domains in the distribution graph
573 *
574 * \return the total number of sub-sub-domains
575 *
576 */
577 size_t getNSubSubDomains() const
578 {
579 return gp.getNVertex();
580 }
581
582 /*! \brief Return the total number of sub-sub-domains this processor own
583 *
584 * \return the total number of sub-sub-domains owned by this processor
585 *
586 */
587 size_t getNOwnerSubSubDomains() const
588 {
589 return sub_sub_owner.size();
590 }
591
592 /*! \brief Return the global id of the owned sub-sub-domain
593 *
594 * \param id in the list of owned sub-sub-domains
595 *
596 * \return the global id
597 *
598 */
599 size_t getOwnerSubSubDomain(size_t id) const
600 {
601 return sub_sub_owner.get(id);
602 }
603
604 /*! \brief Returns total number of neighbors of the sub-sub-domain id
605 *
606 * \param id id of the sub-sub-domain
607 *
608 * \return the number of neighborhood sub-sub-domains for each sub-domain
609 *
610 */
611 size_t getNSubSubDomainNeighbors(size_t id)
612 {
613#ifdef SE_CLASS1
614 if (id >= gp.getNVertex())
615 std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
616#endif
617
618 return gp.getNChilds(id);
619 }
620
621 /*! \brief In case we do not do Dynamic load balancing this this data-structure it is safe to eliminate the full internal graph
622 *
623 *
624 *
625 */
626 void destroy_internal_graph()
627 {
628 gp.destroy();
629 partitions.clear();
630 partitions.shrink_to_fit();
631 v_per_proc.clear();
632 v_per_proc.shrink_to_fit();
633 m2g.clear();
634 m2g.rehash(0);
635 }
636
637 /*! \brief Print the current distribution and save it to VTK file
638 *
639 * \param file filename
640 *
641 */
642 void write(const std::string & file)
643 {
644 VTKWriter<Graph_CSR<nm_v<dim>, nm_e>, VTK_GRAPH> gv2(gp);
645 gv2.write(std::to_string(v_cl.getProcessUnitID()) + "_" + file + ".vtk");
646 }
647
648 const ParMetisDistribution<dim,T> & operator=(const ParMetisDistribution<dim,T> & dist)
649 {
650 is_distributed = dist.is_distributed;
651 gr = dist.gr;
652 domain = dist.domain;
653 gp = dist.gp;
654 vtxdist = dist.vtxdist;
655 partitions = dist.partitions;
656 v_per_proc = dist.v_per_proc;
657 verticesGotWeights = dist.verticesGotWeights;
658 sub_sub_owner = dist.sub_sub_owner;
659 m2g = dist.m2g;
660 parmetis_graph = dist.parmetis_graph;
661
662 return *this;
663 }
664
665 const ParMetisDistribution<dim,T> & operator=(ParMetisDistribution<dim,T> && dist)
666 {
667 is_distributed = dist.is_distributed;
668 gr = dist.gr;
669 domain = dist.domain;
670 gp.swap(dist.gp);
671 vtxdist.swap(dist.vtxdist);
672 partitions.swap(dist.partitions);
673 v_per_proc.swap(dist.v_per_proc);
674 verticesGotWeights = dist.verticesGotWeights;
675 sub_sub_owner.swap(dist.sub_sub_owner);
676 m2g.swap(dist.m2g);
677 parmetis_graph = dist.parmetis_graph;
678
679 return *this;
680 }
681
682 /*! \brief return the the position of the sub-sub-domain
683 *
684 * \param i sub-sub-domain id
685 * \param p point
686 *
687 */
688 void getSubSubDomainPos(size_t j, Point<dim,T> & p)
689 {
690 for (size_t i = 0 ; i < dim ; i++)
691 {p.get(i) = gp.template vertex_p<0>(sub_sub_owner.get(j))[i];}
692 }
693
694 /*! \brief Get the decomposition counter
695 *
696 * \return the decomposition counter
697 *
698 */
699 size_t get_ndec()
700 {
701 return parmetis_graph.get_ndec();
702 }
703
704 /*! \brief Set the tolerance for each partition
705 *
706 * \param tol tolerance
707 *
708 */
709 void setDistTol(double tol)
710 {
711 parmetis_graph.setDistTol(tol);
712 }
713};
714
715#endif /* SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_ */
716