ParMetisDistribution.hpp source code [openfpm/src/Decomposition/Distribution/ParMetisDistribution.hpp]

1	/*
2	* ParMetisDistribution.hpp
3	*
4	* Created on: Nov 19, 2015
5	* Author: Antonio Leo
6	*/
7
8
9	#ifndef SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
10	#define SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
11
12
13	#include "SubdomainGraphNodes.hpp"
14	#include "parmetis_util.hpp"
15	#include "Graph/ids.hpp"
16	#include "Graph/CartesianGraphFactory.hpp"
17
18	#define PARMETIS_DISTRIBUTION_ERROR 100002
19
20	/! \brief Class that distribute sub-sub-domains across processors using ParMetis Library*
21	*
22	* Given a graph and setting Computational cost, Communication cost (on the edge) and
23	* Migration cost or total Communication costs, it produce the optimal balanced distribution
24	*
25	* In addition to Metis it provide the functionality to refine the previously computed
26	* decomposition
27	*
28	* ### Initialize a Cartesian graph and decompose
29	* \snippet Distribution_unit_tests.hpp Initialize a ParMetis Cartesian graph and decompose
30	*
31	* ### Refine the decomposition
32	* \snippet Distribution_unit_tests.hpp refine with parmetis the decomposition
33	*
34	*/
35	template<unsigned int dim, typename T>
36	class ParMetisDistribution
37	{
38	//! Is distributed
39	bool is_distributed = false;
40
41	//! Vcluster
42	Vcluster<> & v_cl;
43
44	//! Structure that store the cartesian grid information
45	grid_sm<dim, void> gr;
46
47	//! rectangular domain to decompose
48	Box<dim, T> domain;
49
50	//! Global sub-sub-domain graph
51	Graph_CSR<nm_v<dim>, nm_e> gp;
52
53	//! Convert the graph to parmetis format
54	Parmetis<Graph_CSR<nm_v<dim>, nm_e>> parmetis_graph;
55
56	//! Id of the sub-sub-domain where we set the costs
57	openfpm::vector<size_t> sub_sub_owner;
58
59	//! Init vtxdist needed for Parmetis
60	//
61	// vtxdist is a common array across processor, it indicate how
62	// vertex are distributed across processors
63	//
64	// Example we have 3 processors
65	//
66	// processor 0 has 3 vertices
67	// processor 1 has 5 vertices
68	// processor 2 has 4 vertices
69	//
70	// vtxdist contain, 0,3,8,12
71	//
72	// vtx dist is the unique global-id of the vertices
73	//
74	openfpm::vector<rid> vtxdist;
75
76	//! partitions
77	openfpm::vector<openfpm::vector<idx_t>> partitions;
78
79	//! Init data structure to keep trace of new vertices distribution in processors (needed to update main graph)
80	openfpm::vector<openfpm::vector<gid>> v_per_proc;
81
82	//! Hashmap to access to the global position given the re-mapped one (needed for access the map)
83	std::unordered_map<rid, gid> m2g;
84
85	//! Flag to check if weights are used on vertices
86	bool verticesGotWeights = false;
87
88	/! \brief Update main graph ad subgraph with the received data of the partitions from the other processors*
89	*
90	*/
91	void updateGraphs()
92	{
93	sub_sub_owner.clear();
94
95	size_t Np = v_cl.getProcessingUnits();
96
97	// Init n_vtxdist to gather informations about the new decomposition
98	openfpm::vector<rid> n_vtxdist(Np + `1`);
99	for (size_t i = `0`; i <= Np; i++)
100	n_vtxdist.get(i).id = `0`;
101
102	// Update the main graph with received data from processor i
103	for (size_t i = `0`; i < Np; i++)
104	{
105	size_t ndata = partitions.get(i).size();
106	size_t k = `0`;
107
108	// Update the main graph with the received informations
109	for (rid l = vtxdist.get(i); k < ndata && l < vtxdist.get(i + `1`); k++, ++l)
110	{
111	// Create new n_vtxdist (just count processors vertices)
112	++n_vtxdist.get(partitions.get(i).get(k) + `1`);
113
114	// vertex id from vtx to grobal id
115	auto v_id = m2g.find(l)->second.id;
116
117	// Update proc id in the vertex (using the old map)
118	gp.template vertex_p<nm_v_proc_id>(v_id) = partitions.get(i).get(k);
119
120	if (partitions.get(i).get(k) == (long int)v_cl.getProcessUnitID())
121	{sub_sub_owner.add(v_id);}
122
123	// Add vertex to temporary structure of distribution (needed to update main graph)
124	v_per_proc.get(partitions.get(i).get(k)).add(getVertexGlobalId(l));
125	}
126	}
127
128	// Create new n_vtxdist (accumulate the counters)
129	for (size_t i = `2`; i <= Np; i++)
130	n_vtxdist.get(i) += n_vtxdist.get(i - `1`);
131
132	// Copy the new decomposition in the main vtxdist
133	for (size_t i = `0`; i <= Np; i++)
134	vtxdist.get(i) = n_vtxdist.get(i);
135
136	openfpm::vector<size_t> cnt;
137	cnt.resize(Np);
138
139	for (size_t i = `0` ; i < gp.getNVertex(); ++i)
140	{
141	size_t pid = gp.template vertex_p<nm_v_proc_id>(i);
142
143	rid j = rid(vtxdist.get(pid).id + cnt.get(pid));
144	gid gi = gid (i);
145
146	gp.template vertex_p<nm_v_id>(i) = j.id;
147	cnt.get(pid)++;
148
149	setMapId(j,gi);
150	}
151	}
152
153	/! \brief operator to access the vertex by mapped position*
154	*
155	* operator to access the vertex
156	*
157	* \param id re-mapped id of the vertex to access
158	*
159	*/
160	inline auto vertexByMapId(rid id) -> decltype( gp.vertex(m2g.find(id)->second.id) )
161	{
162	return gp.vertex(m2g.find(id)->second.id);
163	}
164
165	/! \brief operator to remap vertex to a new position*
166	*
167	* \param n re-mapped position
168	* \param g global position
169	*
170	*/
171	inline void setMapId(rid n, gid g)
172	{
173	m2g [n] = g;
174	}
175
176	/! \brief Get the global id of the vertex given the re-mapped one*
177	*
178	* \param remapped id
179	* \return global id
180	*
181	*/
182	gid getVertexGlobalId(rid n)
183	{
184	return m2g.find(n)->second;
185	}
186
187	/! \brief operator to init ids vector*
188	*
189	* operator to init ids vector
190	*
191	*/
192	void initLocalToGlobalMap()
193	{
194	gid g;
195	rid i;
196	i.id = `0`;
197
198	m2g.clear();
199	for ( ; (size_t)i.id < gp.getNVertex(); ++i)
200	{
201	g.id = i.id;
202
203	m2g.insert( { i, g });
204	}
205	}
206
207	/! \brief Callback of the sendrecv to set the size of the array received*
208	*
209	* \param msg_i Index of the message
210	* \param total_msg Total numeber of messages
211	* \param total_p Total number of processors to comunicate with
212	* \param i Processor id
213	* \param ri Request id
214	* \param ptr Void pointer parameter for additional data to pass to the call-back
215	*/
216	static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void * ptr)
217	{
218	openfpm::vector < openfpm::vector < idx_t >> v = static_cast<openfpm::vector<openfpm::vector<idx_t>> >(ptr);
219
220	v->get(i).resize(msg_i / sizeof(idx_t));
221
222	return &(v->get(i).get(`0`));
223	}
224
225	/! \brief It update the full decomposition*
226	*
227	*
228	*/
229	void postDecomposition()
230	{
231	//! Get the processor id
232	size_t p_id = v_cl.getProcessUnitID();
233
234	//! Get the number of processing units
235	size_t Np = v_cl.getProcessingUnits();
236
237	// Number of local vertex
238	size_t nl_vertex = vtxdist.get(p_id+`1`).id - vtxdist.get(p_id).id;
239
240	//! Get result partition for this processors
241	idx_t * partition = parmetis_graph.getPartition();
242
243	//! Prepare vector of arrays to contain all partitions
244	partitions.get(p_id).resize(nl_vertex);
245	if (nl_vertex != `0`)
246	{std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(`0`));}
247
248	// Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
249	for (size_t i = `0`; i < Np; ++i)
250	{
251	v_per_proc.get(i).clear();
252	}
253
254	// Communicate the local distribution to the other processors
255	// to reconstruct individually the global graph
256	openfpm::vector<size_t> prc;
257	openfpm::vector<size_t> sz;
258	openfpm::vector<void *> ptr;
259
260	for (size_t i = `0`; i < Np; i++)
261	{
262	if (i != v_cl.getProcessUnitID())
263	{
264	partitions.get(i).clear();
265	prc.add(i);
266	sz.add(nl_vertex * sizeof(idx_t));
267	ptr.add(partitions.get(p_id).getPointer());
268	}
269	}
270
271	if (prc.size() == `0`)
272	v_cl.sendrecvMultipleMessagesNBX(`0`, NULL, NULL, NULL, message_receive, &partitions,NONE);
273	else
274	v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(`0`), &prc.get(`0`), &ptr.get(`0`), message_receive, &partitions,NONE);
275
276	// Update graphs with the received data
277	updateGraphs();
278	}
279
280
281	public:
282
283	/! Constructor for the ParMetis class*
284	*
285	* \param v_cl Vcluster to use as communication object in this class
286	*/
287	ParMetisDistribution(Vcluster<> & v_cl)
288	:is_distributed(false),v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist (v_cl.getProcessingUnits() + `1`), partitions (v_cl.getProcessingUnits()), v_per_proc (v_cl.getProcessingUnits())
289	{
290	}
291
292	/! Copy constructor*
293	*
294	* \param pm Distribution to copy
295	*
296	*/
297	ParMetisDistribution(const ParMetisDistribution<dim,T> & pm)
298	:v_cl(pm.v_cl),parmetis_graph(v_cl, v_cl.getProcessingUnits())
299	{
300	this->operator=(pm);
301	}
302
303	/! Copy constructor*
304	*
305	* \param pm Distribution to copy
306	*
307	*/
308	ParMetisDistribution(ParMetisDistribution<dim,T> && pm)
309	:v_cl(pm.v_cl)
310	{
311	this->operator=(pm);
312	}
313
314	/! \brief Create the Cartesian graph*
315	*
316	* \param grid info
317	* \param dom domain
318	*/
319	void createCartGraph(grid_sm<dim, void> & grid, Box<dim, T> & dom)
320	{
321	size_t bc[dim];
322
323	for (size_t i = `0` ; i < dim ; i++)
324	bc[i] = NON_PERIODIC;
325
326	// Set grid and domain
327	gr = grid;
328	domain = dom;
329
330	// Create a cartesian grid graph
331	CartesianGraphFactory<dim, Graph_CSR<nm_v<dim>, nm_e>> g_factory_part;
332	gp = g_factory_part.template construct<NO_EDGE, nm_v_id, T, dim - `1`, `0`>(gr.getSize(), domain, bc);
333	initLocalToGlobalMap();
334
335	//! Get the number of processing units
336	size_t Np = v_cl.getProcessingUnits();
337
338	//! Division of vertices in Np graphs
339	//! Put (div+1) vertices in mod graphs
340	//! Put div vertices in the rest of the graphs
341	size_t mod_v = gr.size() % Np;
342	size_t div_v = gr.size() / Np;
343
344	for (size_t i = `0`; i <= Np; i++)
345	{
346	if (i < mod_v)
347	vtxdist.get(i).id = (div_v + `1`) * i;
348	else
349	vtxdist.get(i).id = (div_v) * i + mod_v;
350	}
351
352	// Init to 0.0 axis z (to fix in graphFactory)
353	if (dim < `3`)
354	{
355	for (size_t i = `0`; i < gp.getNVertex(); i++)
356	{
357	gp.vertex(i).template get<nm_v_x>()[`2`] = `0.0`;
358	}
359	}
360	for (size_t i = `0`; i < gp.getNVertex(); i++)
361	{
362	gp.vertex(i).template get<nm_v_global_id>() = i;
363	}
364
365	}
366
367	/! \brief Get the current graph (main)*
368	*
369	*/
370	Graph_CSR<nm_v<dim>, nm_e> & getGraph()
371	{
372	return gp;
373	}
374
375	/! \brief Create the decomposition*
376	*
377	*/
378	void decompose()
379	{
380	if (is_distributed == false)
381	parmetis_graph.initSubGraph(gp, vtxdist, m2g, verticesGotWeights);
382	else
383	parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights);
384
385	//! Decompose
386	parmetis_graph.decompose(vtxdist);
387
388	// update after decomposition
389	postDecomposition();
390
391	is_distributed = true;
392	}
393
394	/! \brief Refine current decomposition*
395	*
396	* It makes a refinement of the current decomposition using Parmetis function RefineKWay
397	* After that it also does the remapping of the graph
398	*
399	*/
400	void refine()
401	{
402	// Reset parmetis graph and reconstruct it
403	parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights);
404
405	// Refine
406	parmetis_graph.refine(vtxdist);
407
408	postDecomposition();
409	}
410
411	/! \brief Redecompose current decomposition*
412	*
413	* It makes a redecomposition using Parmetis taking into consideration
414	* also migration cost
415	*
416	*/
417	void redecompose()
418	{
419	// Reset parmetis graph and reconstruct it
420	parmetis_graph.reset(gp, vtxdist, m2g, verticesGotWeights);
421
422	// Refine
423	parmetis_graph.redecompose(vtxdist);
424
425	postDecomposition();
426	}
427
428	/! \brief Compute the unbalance of the processor compared to the optimal balance*
429	*
430	* \return the unbalance from the optimal one 0.01 mean 1%
431	*/
432	float getUnbalance()
433	{
434	long t_cost = `0`;
435
436	long min, max, sum;
437	float unbalance;
438
439	t_cost = getProcessorLoad();
440
441	min = t_cost;
442	max = t_cost;
443	sum = t_cost;
444
445	v_cl.min(min);
446	v_cl.max(max);
447	v_cl.sum(sum);
448	v_cl.execute();
449
450	unbalance = ((float) (max - min)) / (float) (sum / v_cl.getProcessingUnits());
451
452	return unbalance * `100`;
453	}
454
455	/! \brief function that return the position of the vertex in the space*
456	*
457	* \param id vertex id
458	* \param pos vector that will contain x, y, z
459	*
460	*/
461	void getSubSubDomainPosition(size_t id, T (&pos)[dim])
462	{
463	#ifdef SE_CLASS1
464	if (id >= gp.getNVertex())
465	std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
466	#endif
467
468	// Copy the geometrical informations inside the pos vector
469	pos[`0`] = gp.vertex(id).template get<nm_v_x>()[`0`];
470	pos[`1`] = gp.vertex(id).template get<nm_v_x>()[`1`];
471	if (dim == `3`)
472	pos[`2`] = gp.vertex(id).template get<nm_v_x>()[`2`];
473	}
474
475	/! \brief Function that set the weight of the vertex*
476	*
477	* \param id vertex id
478	* \param weight to give to the vertex
479	*
480	*/
481	inline void setComputationCost(size_t id, size_t weight)
482	{
483	if (!verticesGotWeights)
484	{verticesGotWeights = true;}
485
486	#ifdef SE_CLASS1
487	if (id >= gp.getNVertex())
488	{std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";}
489	#endif
490
491	// Update vertex in main graph
492	gp.vertex(id).template get<nm_v_computation>() = weight;
493	}
494
495	/! \brief Checks if weights are used on the vertices*
496	*
497	* \return true if weights are used in the decomposition
498	*/
499	bool weightsAreUsed()
500	{
501	return verticesGotWeights;
502	}
503
504	/! \brief function that get the weight of the vertex*
505	*
506	* \param id vertex id
507	*
508	*/
509	size_t getSubSubDomainComputationCost(size_t id)
510	{
511	#ifdef SE_CLASS1
512	if (id >= gp.getNVertex())
513	std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
514	#endif
515
516	return gp.vertex(id).template get<nm_v_computation>();
517	}
518
519	/! \brief Compute the processor load counting the total weights of its vertices*
520	*
521	* \return the computational load of the processor graph
522	*/
523	size_t getProcessorLoad()
524	{
525	size_t load = `0`;
526
527	// Processor id
528	size_t p_id = v_cl.getProcessUnitID();
529
530
531	for (rid i = vtxdist.get(p_id); i < vtxdist.get(p_id+`1`) ; ++i)
532	load += gp.vertex(m2g.find(i)->second.id).template get<nm_v_computation>();
533
534	//std::cout << v_cl.getProcessUnitID() << " weight " << load << " size " << sub_g.getNVertex() << "\n";
535	return load;
536	}
537
538	/! \brief Set migration cost of the vertex id*
539	*
540	* \param id of the vertex to update
541	* \param migration cost of the migration
542	*/
543	void setMigrationCost(size_t id, size_t migration)
544	{
545	#ifdef SE_CLASS1
546	if (id >= gp.getNVertex())
547	std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
548	#endif
549
550	gp.vertex(id).template get<nm_v_migration>() = migration;
551	}
552
553	/! \brief Set communication cost of the edge id*
554	*
555	* \param v_id Id of the source vertex of the edge
556	* \param e i child of the vertex
557	* \param communication Communication value
558	*/
559	void setCommunicationCost(size_t v_id, size_t e, size_t communication)
560	{
561	#ifdef SE_CLASS1
562
563	size_t e_id = v_id + e;
564
565	if (e_id >= gp.getNEdge())
566	std::cerr << "Such edge doesn't exist (id = " << e_id << ", " << "total size = " << gp.getNEdge() << ")\n";
567	#endif
568
569	gp.getChildEdge(v_id, e).template get<nm_e::communication>() = communication;
570	}
571
572	/! \brief Returns total number of sub-sub-domains in the distribution graph*
573	*
574	* \return the total number of sub-sub-domains
575	*
576	*/
577	size_t getNSubSubDomains() const
578	{
579	return gp.getNVertex();
580	}
581
582	/! \brief Return the total number of sub-sub-domains this processor own*
583	*
584	* \return the total number of sub-sub-domains owned by this processor
585	*
586	*/
587	size_t getNOwnerSubSubDomains() const
588	{
589	return sub_sub_owner.size();
590	}
591
592	/! \brief Return the global id of the owned sub-sub-domain*
593	*
594	* \param id in the list of owned sub-sub-domains
595	*
596	* \return the global id
597	*
598	*/
599	size_t getOwnerSubSubDomain(size_t id) const
600	{
601	return sub_sub_owner.get(id);
602	}
603
604	/! \brief Returns total number of neighbors of the sub-sub-domain id*
605	*
606	* \param id id of the sub-sub-domain
607	*
608	* \return the number of neighborhood sub-sub-domains for each sub-domain
609	*
610	*/
611	size_t getNSubSubDomainNeighbors(size_t id)
612	{
613	#ifdef SE_CLASS1
614	if (id >= gp.getNVertex())
615	std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
616	#endif
617
618	return gp.getNChilds(id);
619	}
620
621	/! \brief In case we do not do Dynamic load balancing this this data-structure it is safe to eliminate the full internal graph*
622	*
623	*
624	*
625	*/
626	void destroy_internal_graph()
627	{
628	gp.destroy();
629	partitions.clear();
630	partitions.shrink_to_fit();
631	v_per_proc.clear();
632	v_per_proc.shrink_to_fit();
633	m2g.clear();
634	m2g.rehash(`0`);
635	}
636
637	/! \brief Print the current distribution and save it to VTK file*
638	*
639	* \param file filename
640	*
641	*/
642	void write(const std::string & file)
643	{
644	VTKWriter<Graph_CSR<nm_v<dim>, nm_e>, VTK_GRAPH> gv2(gp);
645	gv2.write(std::to_string(v_cl.getProcessUnitID()) + "_" + file + ".vtk");
646	}
647
648	const ParMetisDistribution<dim,T> & operator=(const ParMetisDistribution<dim,T> & dist)
649	{
650	is_distributed = dist.is_distributed;
651	gr = dist.gr;
652	domain = dist.domain;
653	gp = dist.gp;
654	vtxdist = dist.vtxdist;
655	partitions = dist.partitions;
656	v_per_proc = dist.v_per_proc;
657	verticesGotWeights = dist.verticesGotWeights;
658	sub_sub_owner = dist.sub_sub_owner;
659	m2g = dist.m2g;
660	parmetis_graph = dist.parmetis_graph;
661
662	return *this;
663	}
664
665	const ParMetisDistribution<dim,T> & operator=(ParMetisDistribution<dim,T> && dist)
666	{
667	is_distributed = dist.is_distributed;
668	gr = dist.gr;
669	domain = dist.domain;
670	gp.swap(dist.gp);
671	vtxdist.swap(dist.vtxdist);
672	partitions.swap(dist.partitions);
673	v_per_proc.swap(dist.v_per_proc);
674	verticesGotWeights = dist.verticesGotWeights;
675	sub_sub_owner.swap(dist.sub_sub_owner);
676	m2g.swap(dist.m2g);
677	parmetis_graph = dist.parmetis_graph;
678
679	return *this;
680	}
681
682	/! \brief return the the position of the sub-sub-domain*
683	*
684	* \param i sub-sub-domain id
685	* \param p point
686	*
687	*/
688	void getSubSubDomainPos(size_t j, Point<dim,T> & p)
689	{
690	for (size_t i = `0` ; i < dim ; i++)
691	{p.get(i) = gp.template vertex_p<`0`>(sub_sub_owner.get(j))[i];}
692	}
693
694	/! \brief Get the decomposition counter*
695	*
696	* \return the decomposition counter
697	*
698	*/
699	size_t get_ndec()
700	{
701	return parmetis_graph.get_ndec();
702	}
703
704	/! \brief Set the tolerance for each partition*
705	*
706	* \param tol tolerance
707	*
708	*/
709	void setDistTol(double tol)
710	{
711	parmetis_graph.setDistTol(tol);
712	}
713	};
714
715	#endif /* SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_ */
716

Browse the source code of openfpm/src/Decomposition/Distribution/ParMetisDistribution.hpp