1/*
2 * grid_dist_id_comm.hpp
3 *
4 * Created on: Nov 13, 2016
5 * Author: yaroslav
6 */
7
8#ifndef SRC_GRID_GRID_DIST_ID_COMM_HPP_
9#define SRC_GRID_GRID_DIST_ID_COMM_HPP_
10
11#include "Vector/vector_dist_ofb.hpp"
12#include "Grid/copy_grid_fast.hpp"
13#include "grid_dist_util.hpp"
14#include "util/common_pdata.hpp"
15#include "lib/pdata.hpp"
16
17
18/*! \brief Unpack selector
19 *
20 *
21 */
22template<bool result,typename T, typename device_grid, typename Memory>
23struct grid_unpack_selector_with_prp
24{
25 /*! \brief Error i do not know how to unpack
26 *
27 * \param recv_buf buffer with data
28 * \param sub2 where to unpack (extension)
29 * \param gd grid where to unpack
30 * \param ps unpack status
31 *
32 */
33 template<template<typename,typename> class op, typename sub_it_type, int ... prp> static void call_unpack(ExtPreAlloc<Memory> & recv_buf, sub_it_type & sub2, device_grid & gd, Unpack_stat & ps)
34 {
35 std::cerr << __FILE__ << ":" << __LINE__ << " Error: complex properties on grids are not supported yet" << std::endl;
36 }
37};
38
39/*! \brief Unpack selector
40 *
41 *
42 */
43template<typename T, typename device_grid, typename Memory>
44struct grid_unpack_selector_with_prp<true,T,device_grid,Memory>
45{
46
47 /*! \brief Unpack
48 *
49 * \param recv_buf buffer with data
50 * \param sub2 where to unpack (extension)
51 * \param gd grid where to unpack
52 * \param ps unpack status
53 *
54 */
55 template<template<typename,typename> class op, typename sub_it_type, unsigned int ... prp>
56 static void call_unpack(ExtPreAlloc<Memory> & recv_buf,
57 sub_it_type & sub2,
58 device_grid & gd,
59 Unpack_stat & ps)
60 {
61 gd.template unpack_with_op<op,Memory,prp ...>(recv_buf,sub2,ps);
62 }
63};
64
65/*! \brief Unpack selector
66 *
67 * Stub version
68 *
69 */
70template<typename device_grid, typename Memory, typename T>
71struct grid_call_serialize_variadic {};
72
73/*! \brief Unpack selector
74 *
75 * Selector when there is not max_prop
76 *
77 */
78template<typename device_grid, typename Memory , int ... prp>
79struct grid_call_serialize_variadic<device_grid, Memory, index_tuple<prp...>>
80{
81
82 /*! \brief Unpack
83 *
84 * \param recv_buf buffer with data
85 * \param sub2 where to unpack (extension)
86 * \param dg grid where to unpack
87 * \param ps unpack status
88 *
89 */
90 template<template<typename,typename> class op, typename sub_it_type, typename T>
91 inline static void call_unpack(ExtPreAlloc<Memory> & recv_buf,
92 sub_it_type & sub2,
93 device_grid & dg,
94 Unpack_stat & ps)
95 {
96 const bool result = has_pack_gen<typename T::type>::value == false;
97
98 grid_unpack_selector_with_prp<result,T,device_grid,Memory>::template call_unpack<op,sub_it_type,prp...>(recv_buf,sub2,dg,ps);
99 }
100};
101
102/*! \brief Unpack selector
103 *
104 * Selector when there is max_prop
105 *
106 */
107template<template<typename,typename> class op, typename T, typename device_grid, typename Memory>
108struct grid_unpack_with_prp
109{
110
111 /*! \brief Unpack
112 *
113 * \param recv_buf buffer with data
114 * \param sub2 where to unpack (extension)
115 * \param dg grid where to unpack
116 * \param ps unpack status
117 *
118 */
119 template<typename sub_it_type, unsigned int ... prp> static void unpacking(ExtPreAlloc<Memory> & recv_buf, sub_it_type & sub2, device_grid & dg, Unpack_stat & ps)
120 {
121 typedef index_tuple<prp...> ind_prop_to_pack;
122 grid_call_serialize_variadic<device_grid,Memory,ind_prop_to_pack>::template call_unpack<op,sub_it_type,T>(recv_buf, sub2, dg, ps);
123 }
124};
125
126/*! \brief This class is an helper for the communication of grid_dist_id
127 *
128 * \tparam dim Dimensionality of the grid
129 * \tparam St Type of space where the grid is living
130 * \tparam T object the grid is storing
131 * \tparam Decomposition Class that decompose the grid for example CartDecomposition
132 * \tparam Memory Is the allocator
133 * \tparam device_grid of base structure is going to store the data
134 *
135 * \see grid_dist_id
136 *
137 */
138
139template<unsigned int dim, typename St, typename T, typename Decomposition = CartDecomposition<dim,St>,typename Memory=HeapMemory , typename device_grid=grid_cpu<dim,T> >
140class grid_dist_id_comm
141{
142 //! VCluster
143 Vcluster<Memory> & v_cl;
144
145 //! Maps the processor id with the communication request into map procedure
146 openfpm::vector<size_t> p_map_req;
147
148 //! Stores the list of processors that communicate with us (local processor)
149 openfpm::vector<size_t> prc_recv_map;
150
151 //! Stores the size of the elements added for each processor that communicate with us (local processor)
152 openfpm::vector<size_t> recv_sz_map;
153
154 //! List of processor to send to
155 openfpm::vector<size_t> send_prc_queue;
156
157 //! Pointer to the memory to send
158 openfpm::vector<void *> send_pointer;
159
160 //! size to send
161 openfpm::vector<size_t> send_size;
162
163 //! receiving buffers in case of dynamic
164 openfpm::vector_fr<BMemory<Memory>> recv_buffers;
165
166 struct rp_id
167 {
168 int p_id;
169 int i;
170
171 bool operator<(const rp_id & tmp) const
172 {
173 return p_id < tmp.p_id;
174 }
175 };
176
177 //! receiving processors
178 openfpm::vector<rp_id> recv_proc;
179
180 //! For each near processor, outgoing intersection grid
181 //! \warning m_oGrid is assumed to be an ordered list
182 //! first id is grid
183 //! second id is the processor id
184 openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> m_oGrid;
185
186 //! Memory for the ghost sending buffer
187 Memory g_send_prp_mem;
188
189 //! Memory for the ghost receiving buffer
190 Memory g_recv_prp_mem;
191
192 //! send pointers
193 openfpm::vector<void *> pointers;
194 openfpm::vector<void *> pointers2;
195
196 //! header unpacker info
197 openfpm::vector_gpu<aggregate<void *,void *,int>> pointers_h;
198 int n_headers_slot = 1;
199 openfpm::vector_gpu<aggregate<size_t,size_t,unsigned int>> headers;
200
201 //! Receiving option
202 size_t opt;
203
204 /*! \brief Sync the local ghost part
205 *
206 * \tparam prp... properties to sync
207 *
208 * \param loc_ig_box local internel ghost boxes
209 * \param loc_eg_box local external ghost boxes
210 * \param gdb_ext information about the local grids
211 * \param loc_grid local grids
212 * \param g_id_to_external_ghost_box from global index to external ghost box
213 *
214 */
215 template<int... prp> void ghost_get_local(const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
216 const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
217 const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
218 openfpm::vector<device_grid> & loc_grid,
219 std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
220 const grid_sm<dim,void> & ginfo,
221 bool use_bx_def,
222 size_t opt)
223 {
224 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
225 if (opt & SKIP_LABELLING)
226 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
227
228 if (opt_ != rem_copy_opt::KEEP_GEOMETRY)
229 {
230 for (size_t i = 0 ; i < loc_grid.size() ; i++)
231 {loc_grid.get(i).copyRemoveReset();}
232 }
233
234 grid_key_dx<dim> cnt[1];
235 cnt[0].zero();
236
237 //! For all the sub-domains
238 for (size_t i = 0 ; i < loc_ig_box.size() ; i++)
239 {
240 //! For all the internal ghost boxes of each sub-domain
241 for (size_t j = 0 ; j < loc_ig_box.get(i).bid.size() ; j++)
242 {
243 size_t sub_id_src_gdb_ext = loc_ig_box.get(i).bid.get(j).sub_gdb_ext;
244
245 // sub domain connected with external box
246 size_t sub_id_dst = loc_ig_box.get(i).bid.get(j).sub;
247
248 // local internal ghost box connected
249 for (size_t v = 0 ; v < loc_ig_box.get(i).bid.get(j).k.size() ; v++)
250 {
251 size_t k = loc_ig_box.get(i).bid.get(j).k.get(v);
252
253 Box<dim,long int> bx_dst = loc_eg_box.get(sub_id_dst).bid.get(k).ebox;
254
255 // convert into local
256 size_t sub_id_dst_gdb_ext = loc_eg_box.get(sub_id_dst).bid.get(k).sub_gdb_ext;
257 bx_dst -= gdb_ext.get(sub_id_dst_gdb_ext).origin;
258
259 // create 2 sub grid iterator
260
261 if (bx_dst.isValid() == false)
262 {continue;}
263
264 Box<dim,long int> bx_src = flip_box(loc_eg_box.get(sub_id_dst).bid.get(k).ebox,loc_eg_box.get(sub_id_dst).bid.get(k).cmb,ginfo);
265 bx_src -= gdb_ext.get(sub_id_src_gdb_ext).origin;
266
267 #ifdef SE_CLASS1
268
269 if (use_bx_def == false)
270 {
271 if (loc_eg_box.get(sub_id_dst).bid.get(k).sub != i)
272 {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination are not correctly linked" << "\n";}
273 }
274
275 if (bx_src.getVolumeKey() != bx_dst.getVolumeKey())
276 {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination does not match in size" << "\n";}
277
278 #endif
279
280 auto & gd = loc_grid.get(sub_id_dst_gdb_ext);
281
282 gd.remove(bx_dst);
283 gd.copy_to(loc_grid.get(sub_id_src_gdb_ext),bx_src,bx_dst);
284 }
285 }
286 }
287
288 for (size_t i = 0 ; i < loc_grid.size() ; i++)
289 {
290 loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getmgpuContext(), rem_copy_opt::PHASE1 | opt_);
291 }
292
293 for (size_t i = 0 ; i < loc_grid.size() ; i++)
294 {
295 loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getmgpuContext(), rem_copy_opt::PHASE2 | opt_);
296 }
297
298 for (size_t i = 0 ; i < loc_grid.size() ; i++)
299 {
300 loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getmgpuContext(), rem_copy_opt::PHASE3 | opt_);
301 }
302 }
303
304 /*! \brief Sync the local ghost part
305 *
306 * \tparam prp... properties to sync
307 *
308 * \param loc_ig_box local internel ghost boxes
309 * \param loc_eg_box local external ghost boxes
310 * \param gdb_ext information about the local grids
311 * \param loc_grid local grids
312 * \param g_id_to_external_ghost_box global-if to external ghost box
313 *
314 */
315 template<template<typename,typename> class op, int... prp> void ghost_put_local(const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
316 const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
317 const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
318 openfpm::vector<device_grid> & loc_grid,
319 openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_external_ghost_box)
320 {
321 //! For all the sub-domains
322 for (size_t i = 0 ; i < loc_eg_box.size() ; i++)
323 {
324 //! For all the external ghost boxes of each sub-domain
325 for (size_t j = 0 ; j < loc_eg_box.get(i).bid.size() ; j++)
326 {
327 if (loc_eg_box.get(i).bid.get(j).initialized == false)
328 continue;
329
330 Box<dim,long int> bx_src = loc_eg_box.get(i).bid.get(j).ebox;
331 // convert into local
332 bx_src -= gdb_ext.get(i).origin;
333
334 // sub domain connected with external box
335 size_t sub_id_dst = loc_eg_box.get(i).bid.get(j).sub;
336
337 // local external ghost box connected
338 size_t k = loc_eg_box.get(i).bid.get(j).k;
339
340 Box<dim,long int> bx_dst = loc_ig_box.get(sub_id_dst).bid.get(k).box;
341
342 // convert into local
343 bx_dst -= gdb_ext.get(sub_id_dst).origin;
344
345 // create 2 sub grid iterator
346
347 if (bx_dst.isValid() == false)
348 {continue;}
349
350#ifdef SE_CLASS1
351
352 if (loc_ig_box.get(sub_id_dst).bid.get(k).sub != i)
353 std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination are not correctly linked" << "\n";
354
355 if (bx_src.getVolume() != bx_dst.getVolume())
356 {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination does not match in size" << "\n";}
357
358#endif
359
360 auto & gd2 = loc_grid.get(sub_id_dst);
361 gd2.template copy_to_op<op,prp...>(loc_grid.get(i),bx_src,bx_dst);
362
363 }
364 }
365 }
366
367 /* Send or queue the the information
368 *
369 * This function send or queue the information to the other processor. In case the
370 * device grid is a compressed format like in multi-resolution the communication is
371 * queued because the other side does not know the size of the communication. If is
372 * not compressed the other side know the size so a direct send is done
373 *
374 */
375 void send_or_queue(size_t prc, char * pointer, char * pointer2)
376 {
377 if (device_grid::isCompressed() == false)
378 {v_cl.send(prc,0,pointer,(char *)pointer2 - (char *)pointer);}
379 else
380 {
381 send_prc_queue.add(prc);
382 send_pointer.add(pointer);
383 send_size.add(pointer2-pointer);
384 }
385 }
386
387 static void * receive_dynamic(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void * ptr)
388 {
389 grid_dist_id_comm * gd = static_cast<grid_dist_id_comm *>(ptr);
390
391 gd->recv_buffers.add();
392
393 gd->recv_buffers.last().resize(msg_i);
394 gd->recv_proc.add();
395 gd->recv_proc.last().p_id = i;
396 gd->recv_proc.last().i = gd->recv_proc.size()-1;
397
398 if (gd->opt & RUN_ON_DEVICE)
399 {return gd->recv_buffers.last().getDevicePointer();}
400
401 return gd->recv_buffers.last().getPointer();
402 }
403
404 /* Send or queue the the information
405 *
406 * This function send or queue the information to the other processor. In case the
407 * device grid is a compressed format like in multi-resolution the communication is
408 * queued because the other side does not know the size of the communication. If is
409 * not compressed the other side know the size so a direct send is done
410 *
411 */
412 template <typename prp_object>
413 void queue_recv_data_get(const openfpm::vector<ep_box_grid<dim>> & eg_box,
414 std::vector<size_t> & prp_recv,
415 ExtPreAlloc<Memory> & prRecv_prp)
416 {
417#ifdef __NVCC__
418 cudaDeviceSynchronize();
419#endif
420
421 if (device_grid::isCompressed() == false)
422 {
423 //! Receive the information from each processors
424 for ( size_t i = 0 ; i < eg_box.size() ; i++ )
425 {
426 prp_recv.push_back(eg_box.get(i).recv_pnt * sizeof(prp_object) + sizeof(size_t)*eg_box.get(i).n_r_box);
427 }
428
429 size_t tot_recv = ExtPreAlloc<Memory>::calculateMem(prp_recv);
430
431 //! Resize the receiving buffer
432 g_recv_prp_mem.resize(tot_recv);
433
434 // queue the receives
435 for ( size_t i = 0 ; i < eg_box.size() ; i++ )
436 {
437 prRecv_prp.allocate(prp_recv[i]);
438 v_cl.recv(eg_box.get(i).prc,0,prRecv_prp.getPointer(),prp_recv[i]);
439 }
440 }
441 else
442 {
443 // It is not possible to calculate the total information so we have to receive
444
445 if (send_prc_queue.size() == 0)
446 {
447 v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),NULL,
448 NULL,NULL,
449 receive_dynamic,this);
450 }
451 else
452 {
453 v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),&send_size.get(0),
454 &send_prc_queue.get(0),&send_pointer.get(0),
455 receive_dynamic,this);
456 }
457
458 // Reorder what we received
459
460 recv_proc.sort();
461
462 openfpm::vector_fr<BMemory<Memory>> tmp;
463 tmp.resize(recv_proc.size());
464
465 for (int i = 0 ; i < recv_proc.size() ; i++)
466 {
467 tmp.get(i).swap(recv_buffers.get(recv_proc.get(i).i));
468 }
469
470 recv_buffers.swap(tmp);
471 }
472 }
473
474 /* Send or queue the the information
475 *
476 * This function send or queue the information to the other processor. In case the
477 * device grid is a compressed format like in multi-resolution the communication is
478 * queued because the other side does not know the size of the communication. If is
479 * not compressed the other side know the size so a direct send is done
480 *
481 */
482 template <typename prp_object>
483 void queue_recv_data_put(const openfpm::vector<ip_box_grid<dim>> & ig_box,
484 std::vector<size_t> & prp_recv,
485 ExtPreAlloc<Memory> & prRecv_prp)
486 {
487 if (device_grid::isCompressed() == false)
488 {
489 // Receive the information from each processors
490 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
491 {
492 prp_recv.push_back(0);
493
494 // for each external ghost box
495 for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
496 {
497 // External ghost box
498 Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box;
499 prp_recv[prp_recv.size()-1] += g_ig_box.getVolumeKey() * sizeof(prp_object) + sizeof(size_t);
500 }
501 }
502
503 size_t tot_recv = ExtPreAlloc<Memory>::calculateMem(prp_recv);
504
505 //! Resize the receiving buffer
506 g_recv_prp_mem.resize(tot_recv);
507
508 prRecv_prp.incRef();
509
510 // queue the receives
511 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
512 {
513 prRecv_prp.allocate(prp_recv[i]);
514 v_cl.recv(ig_box.get(i).prc,0,prRecv_prp.getPointer(),prp_recv[i]);
515 }
516
517 prRecv_prp.decRef();
518 }
519 else
520 {
521 // It is not possible to calculate the total information so we have to receive
522
523 if (send_prc_queue.size() == 0)
524 {
525 v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),NULL,
526 NULL,NULL,
527 receive_dynamic,this);
528 }
529 else
530 {
531 v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),&send_size.get(0),
532 &send_prc_queue.get(0),&send_pointer.get(0),
533 receive_dynamic,this);
534 }
535 }
536 }
537
538 template<typename mem,unsigned ... prp>
539 void unpack_data_to_ext_ghost(ExtPreAlloc<mem> & emem,
540 openfpm::vector<device_grid> & loc_grid,
541 size_t i,
542 const openfpm::vector<ep_box_grid<dim>> & eg_box,
543 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
544 const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
545 Unpack_stat & ps,
546 size_t opt)
547 {
548 // Unpack the ghost box global-id
549
550 size_t g_id;
551 // we move from device to host the gid
552 if (opt & RUN_ON_DEVICE)
553 {emem.deviceToHost(ps.getOffset(),ps.getOffset()+sizeof(size_t));}
554 Unpacker<size_t,mem>::unpack(emem,g_id,ps);
555
556 size_t l_id = 0;
557 // convert the global id into local id
558 auto key = g_id_to_external_ghost_box.find(g_id);
559
560 if (key != g_id_to_external_ghost_box.end()) // FOUND
561 {l_id = key->second;}
562 else
563 {
564 // NOT FOUND
565
566 // It must be always found, if not it mean that the processor has no-idea of
567 // what is stored and conseguently do not know how to unpack, print a critical error
568 // and return
569
570 std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
571
572 return;
573 }
574
575
576 // we unpack into the last eb_gid_list that is always big enought to
577 // unpack the information
578
579 size_t le_id = eb_gid_list.get(l_id).full_match;
580 size_t ei = eb_gid_list.get(l_id).e_id;
581
582 // Get the external ghost box associated with the packed information
583 Box<dim,long int> box = eg_box.get(ei).bid.get(le_id).l_e_box;
584 size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
585
586 // sub-grid where to unpack
587 auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2(),false);
588
589 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
590 if (opt & SKIP_LABELLING)
591 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
592
593 // Unpack
594 loc_grid.get(sub_id).remove(box);
595 Unpacker<device_grid,mem>::template unpack<decltype(sub2),decltype(v_cl.getmgpuContext()),prp...>(emem,sub2,loc_grid.get(sub_id),ps,v_cl.getmgpuContext(),opt_);
596
597 // Copy the information on the other grid
598 for (long int j = 0 ; j < (long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
599 {
600 size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
601 if (nle_id != le_id)
602 {
603// size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
604 size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
605
606 Box<dim,long int> box = eg_box.get(ei).bid.get(nle_id).l_e_box;
607 Box<dim,long int> rbox = eg_box.get(ei).bid.get(nle_id).lr_e_box;
608
609 loc_grid.get(n_sub_id).remove(box);
610 loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
611 }
612 }
613 }
614
615 template<typename mem, typename header_type,unsigned ... prp>
616 void unpack_data_to_ext_ghost_with_header(ExtPreAlloc<mem> & emem,
617 openfpm::vector<device_grid> & loc_grid,
618 header_type & headers,
619 size_t i,
620 const openfpm::vector<ep_box_grid<dim>> & eg_box,
621 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
622 const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
623 Unpack_stat & ps,
624 size_t opt)
625 {
626 // Unpack the ghost box global-id
627
628 size_t g_id;
629 // we move from device to host the gid
630 g_id = headers.template get<0>(i);
631 ps.addOffset(sizeof(size_t));
632
633 size_t l_id = 0;
634 // convert the global id into local id
635 auto key = g_id_to_external_ghost_box.find(g_id);
636
637 if (key != g_id_to_external_ghost_box.end()) // FOUND
638 {l_id = key->second;}
639 else
640 {
641 // NOT FOUND
642
643 // It must be always found, if not it mean that the processor has no-idea of
644 // what is stored and conseguently do not know how to unpack, print a critical error
645 // and return
646
647 std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
648
649 return;
650 }
651
652
653 // we unpack into the last eb_gid_list that is always big enought to
654 // unpack the information
655
656 size_t le_id = eb_gid_list.get(l_id).full_match;
657 size_t ei = eb_gid_list.get(l_id).e_id;
658
659 // Get the external ghost box associated with the packed information
660 Box<dim,long int> box = eg_box.get(ei).bid.get(le_id).l_e_box;
661 size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
662
663 // sub-grid where to unpack
664 auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2(),false);
665
666 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
667 if (opt & SKIP_LABELLING)
668 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
669
670 // Unpack
671 loc_grid.get(sub_id).remove(box);
672 Unpacker<device_grid,mem>::template unpack_with_header<decltype(sub2),decltype(headers),decltype(v_cl.getmgpuContext()),prp...>
673 (emem,
674 sub2,
675 loc_grid.get(sub_id),
676 headers,
677 i,
678 ps,
679 v_cl.getmgpuContext(),
680 opt_);
681
682 // Copy the information on the other grid
683 for (long int j = 0 ; j < (long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
684 {
685 size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
686 if (nle_id != le_id)
687 {
688// size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
689 size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
690
691 Box<dim,long int> box = eg_box.get(ei).bid.get(nle_id).l_e_box;
692 Box<dim,long int> rbox = eg_box.get(ei).bid.get(nle_id).lr_e_box;
693
694 loc_grid.get(n_sub_id).remove(box);
695 loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
696 }
697 }
698 }
699
700 template<unsigned int ... prp>
701 void fill_headers(size_t opt)
702 {
703 if ((opt & KEEP_PROPERTIES) == 0 && device_grid::is_unpack_header_supported())
704 {
705 headers.resize(n_headers_slot * recv_buffers.size());
706
707 Memory result;
708 result.allocate(sizeof(int));
709
710 pointers_h.resize(recv_buffers.size());
711
712 for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
713 {
714 pointers_h.template get<0>(i) = recv_buffers.get(i).getDevicePointer();
715 pointers_h.template get<1>(i) = (unsigned char *)recv_buffers.get(i).getDevicePointer() + recv_buffers.get(i).size();
716 }
717
718 pointers_h.template hostToDevice<0,1>();
719
720 while(1)
721 {
722 for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
723 {pointers_h.template get<2>(i) = 0;}
724 pointers_h.template hostToDevice<2>();
725 *(int *)result.getPointer() = 0;
726 result.hostToDevice();
727
728 device_grid::template unpack_headers<decltype(pointers_h),decltype(headers),decltype(result),prp ...>(pointers_h,headers,result,n_headers_slot);
729 result.deviceToHost();
730
731 if (*(int *)result.getPointer() == 0) {break;}
732
733 n_headers_slot *= 2;
734 headers.resize(n_headers_slot * recv_buffers.size());
735
736 }
737
738 headers.template deviceToHost<0,1,2>();
739 }
740 }
741
742 template<unsigned ... prp>
743 void merge_received_data_get(openfpm::vector<device_grid> & loc_grid,
744 const openfpm::vector<ep_box_grid<dim>> & eg_box,
745 const std::vector<size_t> & prp_recv,
746 ExtPreAlloc<Memory> & prRecv_prp,
747 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
748 const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
749 size_t opt)
750 {
751 if (device_grid::isCompressed() == false)
752 {
753 // wait to receive communication
754 v_cl.execute();
755
756 Unpack_stat ps;
757
758 // Unpack the object
759 for ( size_t i = 0 ; i < eg_box.size() ; i++ )
760 {
761 size_t mark_here = ps.getOffset();
762
763 // for each external ghost box
764 while (ps.getOffset() - mark_here < prp_recv[i])
765 {
766 // Unpack the ghost box global-id
767
768
769 unpack_data_to_ext_ghost<Memory,prp ...>(prRecv_prp,loc_grid,i,
770 eg_box,g_id_to_external_ghost_box,eb_gid_list,
771 ps,opt);
772 }
773 }
774 }
775 else
776 {
777 fill_headers<prp ...>(opt);
778
779 if (headers.size() != 0)
780 {
781 // Unpack the object
782 for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
783 {
784 Unpack_stat ps;
785 size_t mark_here = ps.getOffset();
786
787 ExtPreAlloc<BMemory<Memory>> mem(recv_buffers.get(i).size(),recv_buffers.get(i));
788
789 int j = 0;
790
791 // for each external ghost box
792 while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
793 {
794 // Unpack the ghost box global-id
795
796 unpack_data_to_ext_ghost_with_header<BMemory<Memory>,decltype(headers),prp ...>(mem,loc_grid,headers,i*n_headers_slot+j,
797 eg_box,g_id_to_external_ghost_box,eb_gid_list,
798 ps,opt);
799
800 j++;
801 }
802 }
803 }
804 else
805 {
806 // Unpack the object
807 for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
808 {
809 Unpack_stat ps;
810 size_t mark_here = ps.getOffset();
811
812 ExtPreAlloc<BMemory<Memory>> mem(recv_buffers.get(i).size(),recv_buffers.get(i));
813
814 // for each external ghost box
815 while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
816 {
817 // Unpack the ghost box global-id
818
819 unpack_data_to_ext_ghost<BMemory<Memory>,prp ...>(mem,loc_grid,i,
820 eg_box,g_id_to_external_ghost_box,eb_gid_list,
821 ps,opt);
822 }
823 }
824 }
825 }
826 }
827
828
829 template<template<typename,typename> class op, unsigned ... prp>
830 void merge_received_data_put(Decomposition & dec, openfpm::vector<device_grid> & loc_grid,
831 const openfpm::vector<ip_box_grid<dim>> & ig_box,
832 const std::vector<size_t> & prp_recv,
833 ExtPreAlloc<Memory> & prRecv_prp,
834 const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
835 const openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
836 {
837 typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
838
839 if (device_grid::isCompressed() == false)
840 {
841 v_cl.execute();
842
843 Unpack_stat ps;
844
845 // Unpack the object
846 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
847 {
848 // for each external ghost box
849 for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
850 {
851 // Unpack the ghost box global-id
852
853 size_t g_id;
854 Unpacker<size_t,HeapMemory>::unpack(prRecv_prp,g_id,ps);
855
856 size_t l_id = 0;
857 // convert the global id into local id
858 auto key = g_id_to_internal_ghost_box.get(i).find(g_id);
859 if (key != g_id_to_internal_ghost_box.get(i).end()) // FOUND
860 {l_id = key->second;}
861 else
862 {
863 // NOT FOUND
864
865 // It must be always found, if not it mean that the processor has no-idea of
866 // what is stored and conseguently do not know how to unpack, print a critical error
867 // and return
868
869 std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
870
871 return;
872 }
873
874 // Get the internal ghost box associated with the packed information
875 Box<dim,size_t> box = ig_box.get(i).bid.get(l_id).box;
876 size_t sub_id = ig_box.get(i).bid.get(l_id).sub;
877 box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
878
879 // sub-grid where to unpack
880 auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2());
881 grid_unpack_with_prp<op,prp_object,device_grid,Memory>::template unpacking<decltype(sub2),prp...>(prRecv_prp,sub2,loc_grid.get(sub_id),ps);
882 }
883 }
884 }
885 else
886 {
887 // Unpack the object
888 for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
889 {
890 Unpack_stat ps;
891 size_t mark_here = ps.getOffset();
892
893 ExtPreAlloc<BMemory<HeapMemory>> mem(recv_buffers.get(i).size(),recv_buffers.get(i));
894
895 // for each external ghost box
896 while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
897 {
898 // Unpack the ghost box global-id
899
900 // Unpack the ghost box global-id
901
902 size_t g_id;
903 Unpacker<size_t,BMemory<HeapMemory>>::unpack(mem,g_id,ps);
904
905 size_t pid = dec.ProctoID(recv_proc.get(i).p_id);
906
907 size_t l_id = 0;
908 // convert the global id into local id
909 auto key = g_id_to_internal_ghost_box.get(pid).find(g_id);
910 if (key != g_id_to_internal_ghost_box.get(pid).end()) // FOUND
911 {l_id = key->second;}
912 else
913 {
914 // NOT FOUND
915
916 // It must be always found, if not it mean that the processor has no-idea of
917 // what is stored and conseguently do not know how to unpack, print a critical error
918 // and return
919
920 std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
921
922 return;
923 }
924
925 // Get the internal ghost box associated with the packed information
926 Box<dim,size_t> box = ig_box.get(pid).bid.get(l_id).box;
927 size_t sub_id = ig_box.get(pid).bid.get(l_id).sub;
928 box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
929
930 // sub-grid where to unpack
931 auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2());
932 grid_unpack_with_prp<op,prp_object,device_grid,BMemory<HeapMemory>>::template unpacking<decltype(sub2),prp...>(mem,sub2,loc_grid.get(sub_id),ps);
933 }
934 }
935 }
936 }
937
938public:
939
940 /*! \brief Reconstruct the local grids
941 *
942 * \param m_oGrid_recv Vector of labeled grids to combine into a local grid
943 * \param loc_grid local grids
944 * \param gdb_ext information of the local grids
945 * \param cd_sm Cell-decomposer
946 *
947 */
948 inline void grids_reconstruct(openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> & m_oGrid_recv,
949 openfpm::vector<device_grid> & loc_grid,
950 openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
951 CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm)
952 {
953 // Clear the information of the grid
954 for (size_t i = 0 ; i < loc_grid.size() ; i++)
955 {loc_grid.get(i).clear();}
956
957 for (size_t a = 0; a < m_oGrid_recv.size(); a++)
958 {
959 for (size_t k = 0; k < m_oGrid_recv.get(a).size(); k++)
960 {
961 device_grid & g = m_oGrid_recv.get(a).template get<0>(k);
962
963 SpaceBox<dim,long int> b = m_oGrid_recv.get(a).template get<1>(k);
964
965 Point<dim,St> p;
966 for (size_t n = 0; n < dim; n++)
967 {p.get(n) = g.getGrid().getBox().getHigh(n);}
968
969 Point<dim,St> point;
970 for (size_t n = 0; n < dim; n++)
971 {point.get(n) = (b.getHigh(n) + b.getLow(n))/2;}
972
973 for (size_t j = 0; j < gdb_ext.size(); j++)
974 {
975 // Local sub-domain
976 SpaceBox<dim,long int> sub = gdb_ext.get(j).Dbox;
977 sub += gdb_ext.get(j).origin;
978
979 if (sub.isInside(point) == true)
980 {
981
982
983 grid_key_dx<dim> start = b.getKP1() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray());
984 grid_key_dx<dim> stop = b.getKP2() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray());
985
986 Box<dim,size_t> box_src;
987 Box<dim,size_t> box_dst;
988
989 for(size_t i = 0 ; i < dim ; i++)
990 {
991 box_dst.setLow(i,start.get(i));
992 box_dst.setHigh(i,stop.get(i));
993 box_src.setLow(i,0);
994 box_src.setHigh(i,stop.get(i)-start.get(i));
995 }
996
997 loc_grid.get(j).copy_to(g,box_src,box_dst);
998 }
999 }
1000 }
1001 }
1002 }
1003
1004 /*! \brief Label intersection grids for mappings
1005 *
1006 * \param dec Decomposition
1007 * \param loc_grid_old old local grids
1008 * \param cd_sm Cell-decomposer
1009 * \param gdb_ext information of the local grids
1010 * \param gdb_ext_old information of the old local grids
1011 * \param gdb_ext_global information of the grids globaly
1012 * \param lbl_b label for each grid
1013 * \param prc_sz For each processor the number of grids to send to
1014 *
1015 */
1016 inline void labelIntersectionGridsProcessor(Decomposition & dec,
1017 CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm,
1018 openfpm::vector<device_grid> & loc_grid_old,
1019 openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
1020 openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_old,
1021 openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global,
1022 openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> & lbl_b,
1023 openfpm::vector<size_t> & prc_sz)
1024 {
1025 lbl_b.clear();
1026
1027 // resize the label buffer
1028 lbl_b.resize(v_cl.getProcessingUnits());
1029
1030 // Label all the intersection grids with the processor id where they should go
1031
1032 for (size_t i = 0; i < gdb_ext_old.size(); i++)
1033 {
1034 // Local old sub-domain in global coordinates
1035 SpaceBox<dim,long int> sub_dom = gdb_ext_old.get(i).Dbox;
1036 sub_dom += gdb_ext_old.get(i).origin;
1037
1038 for (size_t j = 0; j < gdb_ext_global.size(); j++)
1039 {
1040 size_t p_id = 0;
1041
1042 // Intersection box
1043 SpaceBox<dim,long int> inte_box;
1044
1045 // Global new sub-domain in global coordinates
1046 SpaceBox<dim,long int> sub_dom_new = gdb_ext_global.get(j).Dbox;
1047 sub_dom_new += gdb_ext_global.get(j).origin;
1048
1049 bool intersect = false;
1050
1051 if (sub_dom.isValid() == true && sub_dom_new.isValid() == true)
1052 intersect = sub_dom.Intersect(sub_dom_new, inte_box);
1053
1054 if (intersect == true)
1055 {
1056 auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box);
1057
1058 // Get processor ID that store intersection box
1059 Point<dim,St> p;
1060 for (size_t n = 0; n < dim; n++)
1061 p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2;
1062
1063 p_id = dec.processorID(p);
1064 prc_sz.get(p_id)++;
1065
1066 // Transform coordinates to local
1067 auto inte_box_local = inte_box;
1068
1069 inte_box_local -= gdb_ext_old.get(i).origin;
1070
1071 // Grid corresponding for gdb_ext_old.get(i) box
1072 device_grid & gr = loc_grid_old.get(i);
1073
1074 // Size of the grid to send
1075 size_t sz[dim];
1076 for (size_t l = 0; l < dim; l++)
1077 {
1078 sz[l] = inte_box_local.getHigh(l) - inte_box_local.getLow(l) + 1;
1079 //std::cout << "GR_send size on " << l << " dimension: " << sz[l] << std::endl;
1080 }
1081
1082 // Grid to send
1083 device_grid gr_send(sz);
1084 gr_send.setMemory();
1085
1086 // Sub iterator across intersection box inside local grid
1087 grid_key_dx<dim> start = inte_box_local.getKP1();
1088 grid_key_dx<dim> stop = inte_box_local.getKP2();
1089
1090 Box<dim,long int> box_src;
1091 Box<dim,long int> box_dst;
1092
1093 for(size_t i = 0 ; i < dim ; i++)
1094 {
1095 box_src.setLow(i,start.get(i));
1096 box_src.setHigh(i,stop.get(i));
1097 box_dst.setLow(i,0);
1098 box_dst.setHigh(i,stop.get(i)-start.get(i));
1099 }
1100
1101 gr_send.copy_to(gr,box_src,box_dst);
1102
1103 aggregate<device_grid,SpaceBox<dim,long int>> aggr;
1104
1105 aggr.template get<0>() = gr_send;
1106 aggr.template get<1>() = inte_box;
1107
1108 // Add to the labeling vector
1109 lbl_b.get(p_id).add(aggr);
1110 }
1111 }
1112 }
1113 }
1114
1115 /*! \brief Moves all the grids that does not belong to the local processor to the respective processor
1116 *
1117 * This function in general is called if the decomposition change
1118 *
1119 * \param dec Decomposition
1120 * \param cd_sm cell-decomposer
1121 * \param loc_grid set of local grids
1122 * \param loc_grid_old set of old local grids
1123 * \param gdb_ext information of the local grids
1124 * \param gdb_ext_old information of the old local grids
1125 * \param gdb_ext_global it contain the decomposition at global level
1126 *
1127 */
1128 void map_(Decomposition & dec,
1129 CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm,
1130 openfpm::vector<device_grid> & loc_grid,
1131 openfpm::vector<device_grid> & loc_grid_old,
1132 openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
1133 openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_old,
1134 openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global)
1135 {
1136 // Processor communication size
1137 openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits());
1138
1139 // Contains the processor id of each box (basically where they have to go)
1140 labelIntersectionGridsProcessor(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,m_oGrid,prc_sz);
1141
1142 // Calculate the sending buffer size for each processor, put this information in
1143 // a contiguous buffer
1144 p_map_req.resize(v_cl.getProcessingUnits());
1145
1146 // Vector of number of sending grids for each involved processor
1147 openfpm::vector<size_t> prc_sz_r;
1148 // Vector of ranks of involved processors
1149 openfpm::vector<size_t> prc_r;
1150
1151 for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
1152 {
1153 if (m_oGrid.get(i).size() != 0)
1154 {
1155 p_map_req.get(i) = prc_r.size();
1156 prc_r.add(i);
1157 prc_sz_r.add(m_oGrid.get(i).size());
1158 }
1159 }
1160
1161 decltype(m_oGrid) m_oGrid_new;
1162 for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
1163 {
1164 if (m_oGrid.get(i).size() != 0)
1165 {m_oGrid_new.add(m_oGrid.get(i));}
1166 }
1167
1168 // Vector for receiving of intersection grids
1169 openfpm::vector<openfpm::vector<aggregate<device_grid,SpaceBox<dim,long int>>>> m_oGrid_recv;
1170
1171 // Send and recieve intersection grids
1172 v_cl.SSendRecv(m_oGrid_new,m_oGrid_recv,prc_r,prc_recv_map,recv_sz_map);
1173
1174 // Reconstruct the new local grids
1175 grids_reconstruct(m_oGrid_recv,loc_grid,gdb_ext,cd_sm);
1176 }
1177
1178 /*! \brief It fill the ghost part of the grids
1179 *
1180 * \param ig_box internal ghost box
1181 * \param eg_box external ghost box
1182 * \param loc_ig_box local internal ghost box
1183 * \param loc_eg_box local external ghost box
1184 * \param gdb_ext local grids information
1185 * \param loc_grid set of local grid
1186 * \param g_id_to_external_ghost_box index to external ghost box
1187 *
1188 */
1189 template<int... prp> void ghost_get_(const openfpm::vector<ip_box_grid<dim>> & ig_box,
1190 const openfpm::vector<ep_box_grid<dim>> & eg_box,
1191 const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
1192 const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
1193 const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
1194 const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
1195 bool use_bx_def,
1196 openfpm::vector<device_grid> & loc_grid,
1197 const grid_sm<dim,void> & ginfo,
1198 std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
1199 size_t opt)
1200 {
1201#ifdef PROFILE_SCOREP
1202 SCOREP_USER_REGION("ghost_get",SCOREP_USER_REGION_TYPE_FUNCTION)
1203#endif
1204
1205 // Sending property object
1206 typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
1207
1208 recv_buffers.clear();
1209 recv_proc.clear();
1210 send_prc_queue.clear();
1211 send_pointer.clear();
1212 send_size.clear();
1213
1214 this->opt = opt;
1215
1216 size_t req = 0;
1217
1218 // Pack information
1219 Pack_stat sts;
1220
1221 // We check if skip labelling is possible in this condition
1222 for (int i = 0 ; i < loc_grid.size() ; i++)
1223 {opt &= (loc_grid.get(i).isSkipLabellingPossible())?(int)-1:~SKIP_LABELLING;}
1224
1225 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1226 timer packing_time;
1227 packing_time.start();
1228 #endif
1229
1230 if (!(opt & SKIP_LABELLING))
1231 {
1232 // first we initialize the pack buffer on all internal grids
1233
1234 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1235 {loc_grid.get(i).packReset();}
1236
1237 // Calculating the size to pack all the data to send
1238 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1239 {
1240 // for each ghost box
1241 for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1242 {
1243 // And linked sub-domain
1244 size_t sub_id = ig_box.get(i).bid.get(j).sub;
1245 // Internal ghost box
1246 Box<dim,long int> g_ig_box = ig_box.get(i).bid.get(j).box;
1247
1248 if (g_ig_box.isValid() == false)
1249 {continue;}
1250
1251 g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1252
1253 // Pack a size_t for the internal ghost id
1254 Packer<size_t,Memory>::packRequest(req);
1255 // Create a sub grid iterator spanning the internal ghost layer
1256 auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2(),false);
1257
1258 // get the size to pack
1259 Packer<device_grid,Memory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req);
1260 }
1261 }
1262
1263 // Finalize calculation
1264 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1265 {loc_grid.get(i).template packCalculate<prp ...>(req,v_cl.getmgpuContext());}
1266
1267 // resize the property buffer memory
1268 g_send_prp_mem.resize(req);
1269
1270 // Create an object of preallocated memory for properties
1271 ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1272 // Necessary. We do not want this memory to be destroyed untill is going out of scope
1273 // P.S. Packer shaoe this memory with data-structures and data structures if they see the
1274 // reference counter to zero they destriy this memory
1275 prAlloc_prp.incRef();
1276
1277 pointers.clear();
1278 pointers2.clear();
1279
1280 // Pack the information for each processor and send it
1281 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1282 {
1283
1284 sts.mark();
1285
1286 void * pointer;
1287
1288 if (opt & RUN_ON_DEVICE)
1289 {pointer = prAlloc_prp.getDevicePointerEnd();}
1290 else
1291 {pointer = prAlloc_prp.getPointerEnd();}
1292
1293 // for each ghost box
1294 for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1295 {
1296 // we pack only if it is valid
1297 if (ig_box.get(i).bid.get(j).box.isValid() == false)
1298 continue;
1299
1300 // And linked sub-domain
1301 size_t sub_id = ig_box.get(i).bid.get(j).sub;
1302 // Internal ghost box
1303 Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box;
1304 g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1305 // Ghost box global id
1306 size_t g_id = ig_box.get(i).bid.get(j).g_id;
1307
1308 // Pack a size_t for the internal ghost id
1309 Packer<size_t,Memory>::pack(prAlloc_prp,g_id,sts);
1310 prAlloc_prp.hostToDevice(prAlloc_prp.getOffset(),prAlloc_prp.getOffsetEnd());
1311 // Create a sub grid iterator spanning the internal ghost layer
1312 auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2(),false);
1313 // and pack the internal ghost grid
1314 Packer<device_grid,Memory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
1315 }
1316 // send the request
1317
1318 void * pointer2;
1319
1320 if (opt & RUN_ON_DEVICE)
1321 {pointer2 = prAlloc_prp.getDevicePointerEnd();}
1322 else
1323 {pointer2 = prAlloc_prp.getPointerEnd();}
1324
1325 pointers.add(pointer);
1326 pointers2.add(pointer2);
1327 }
1328
1329 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1330 {
1331 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1332 if (opt & SKIP_LABELLING)
1333 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1334
1335 loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,true);
1336 }
1337
1338 prAlloc_prp.decRef();
1339 delete &prAlloc_prp;
1340 }
1341 else
1342 {
1343 req = g_send_prp_mem.size();
1344
1345 // Create an object of preallocated memory for properties
1346 ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1347 prAlloc_prp.incRef();
1348
1349 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1350 {
1351 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1352 if (opt & SKIP_LABELLING)
1353 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1354
1355 loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,true);
1356 }
1357
1358 prAlloc_prp.decRef();
1359 delete &prAlloc_prp;
1360 }
1361
1362 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1363 packing_time.stop();
1364 tot_pack += packing_time.getwct();
1365 timer sendrecv_time;
1366 sendrecv_time.start();
1367 #endif
1368
1369 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1370 {
1371 // This function send (or queue for sending) the information
1372 send_or_queue(ig_box.get(i).prc,(char *)pointers.get(i),(char *)pointers2.get(i));
1373 }
1374
1375 // Calculate the total information to receive from each processors
1376 std::vector<size_t> prp_recv;
1377
1378 // Create an object of preallocated memory for properties
1379 ExtPreAlloc<Memory> & prRecv_prp = *(new ExtPreAlloc<Memory>(g_recv_prp_mem.size(),g_recv_prp_mem));
1380 prRecv_prp.incRef();
1381
1382 // Before wait for the communication to complete we sync the local ghost
1383 // in order to overlap with communication
1384
1385 queue_recv_data_get<prp_object>(eg_box,prp_recv,prRecv_prp);
1386
1387 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1388 sendrecv_time.stop();
1389 tot_sendrecv += sendrecv_time.getwct();
1390 timer merge_loc_time;
1391 merge_loc_time.start();
1392 #endif
1393
1394 ghost_get_local<prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_external_ghost_box,ginfo,use_bx_def,opt);
1395
1396 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1397 merge_loc_time.stop();
1398 tot_loc_merge += merge_loc_time.getwct();
1399 timer merge_time;
1400 merge_time.start();
1401 #endif
1402
1403 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1404 {loc_grid.get(i).removeAddUnpackReset();}
1405
1406 merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list,opt);
1407
1408 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1409 if (opt & SKIP_LABELLING)
1410 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1411
1412 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1413 {loc_grid.get(i).template removeAddUnpackFinalize<prp ...>(v_cl.getmgpuContext(),opt_);}
1414
1415 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1416 merge_time.stop();
1417 tot_merge += merge_time.getwct();
1418 #endif
1419
1420 prRecv_prp.decRef();
1421 delete &prRecv_prp;
1422 }
1423
1424 /*! \brief It merge the information in the ghost with the
1425 * real information
1426 *
1427 * \tparam op merge operation
1428 *
1429 * \param ig_box internal ghost box
1430 * \param eg_box external ghost box
1431 * \param loc_ig_box local internal ghost box
1432 * \param loc_eg_box local external ghost box
1433 * \param gdb_ext local grids information
1434 * \param loc_grid set of local grid
1435 * \param g_id_to_internal_ghost_box index to internal ghost box
1436 *
1437 */
1438 template<template<typename,typename> class op,int... prp>
1439 void ghost_put_(Decomposition & dec,
1440 const openfpm::vector<ip_box_grid<dim>> & ig_box,
1441 const openfpm::vector<ep_box_grid<dim>> & eg_box,
1442 const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
1443 const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
1444 const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
1445 openfpm::vector<device_grid> & loc_grid,
1446 openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
1447 {
1448 // Sending property object
1449 typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
1450
1451 recv_buffers.clear();
1452 recv_proc.clear();
1453 send_prc_queue.clear();
1454 send_pointer.clear();
1455 send_size.clear();
1456
1457 size_t req = 0;
1458
1459 // Create a packing request vector
1460 for ( size_t i = 0 ; i < eg_box.size() ; i++ )
1461 {
1462 // for each ghost box
1463 for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1464 {
1465 // And linked sub-domain
1466 size_t sub_id = eg_box.get(i).bid.get(j).sub;
1467 // Internal ghost box
1468 Box<dim,long int> g_eg_box = eg_box.get(i).bid.get(j).g_e_box;
1469
1470 if (g_eg_box.isValid() == false)
1471 continue;
1472
1473 g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1474
1475 // Pack a size_t for the internal ghost id
1476 Packer<size_t,HeapMemory>::packRequest(req);
1477
1478 // Create a sub grid iterator spanning the external ghost layer
1479 auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.getKP1(),g_eg_box.getKP2());
1480
1481 // and pack the internal ghost grid
1482 Packer<device_grid,HeapMemory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req);
1483 }
1484 }
1485
1486 // resize the property buffer memory
1487 g_send_prp_mem.resize(req);
1488
1489 // Create an object of preallocated memory for properties
1490 ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1491
1492 prAlloc_prp.incRef();
1493
1494 // Pack information
1495 Pack_stat sts;
1496
1497 // Pack the information for each processor and send it
1498 for ( size_t i = 0 ; i < eg_box.size() ; i++ )
1499 {
1500
1501 sts.mark();
1502 void * pointer = prAlloc_prp.getPointerEnd();
1503
1504 // for each ghost box
1505 for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1506 {
1507 // we pack only if it is valid
1508 if (eg_box.get(i).bid.get(j).g_e_box.isValid() == false)
1509 continue;
1510
1511 // And linked sub-domain
1512 size_t sub_id = eg_box.get(i).bid.get(j).sub;
1513 // Internal ghost box
1514 Box<dim,size_t> g_eg_box = eg_box.get(i).bid.get(j).g_e_box;
1515 g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1516 // Ghost box global id
1517 size_t g_id = eg_box.get(i).bid.get(j).g_id;
1518
1519 // Pack a size_t for the internal ghost id
1520 Packer<size_t,HeapMemory>::pack(prAlloc_prp,g_id,sts);
1521 // Create a sub grid iterator spanning the external ghost layer
1522 auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.getKP1(),g_eg_box.getKP2());
1523 // and pack the internal ghost grid
1524 Packer<device_grid,HeapMemory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
1525 }
1526 // send the request
1527
1528 void * pointer2 = prAlloc_prp.getPointerEnd();
1529
1530 // This function send (or queue for sending) the information
1531 send_or_queue(ig_box.get(i).prc,(char *)pointer,(char *)pointer2);
1532 }
1533
1534 // Calculate the total information to receive from each processors
1535 std::vector<size_t> prp_recv;
1536
1537 // Create an object of preallocated memory for properties
1538 ExtPreAlloc<Memory> & prRecv_prp = *(new ExtPreAlloc<Memory>(tot_recv,g_recv_prp_mem));
1539 prRecv_prp.incRef();
1540
1541 queue_recv_data_put<prp_object>(ig_box,prp_recv,prRecv_prp);
1542
1543 // Before wait for the communication to complete we sync the local ghost
1544 // in order to overlap with communication
1545
1546 ghost_put_local<op,prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_internal_ghost_box);
1547
1548 merge_received_data_put<op,prp ...>(dec,loc_grid,ig_box,prp_recv,prRecv_prp,gdb_ext,g_id_to_internal_ghost_box);
1549
1550 prRecv_prp.decRef();
1551 prAlloc_prp.decRef();
1552 delete &prAlloc_prp;
1553 delete &prRecv_prp;
1554 }
1555
1556 /*! \brief Constructor
1557 *
1558 *
1559 */
1560 grid_dist_id_comm()
1561 :v_cl(create_vcluster<Memory>())
1562 {
1563
1564 }
1565
1566 /*! \brief Copy constructor
1567 *
1568 * It does not really copy. This structure it suppose to store only
1569 * temporal data
1570 *
1571 */
1572 grid_dist_id_comm(const grid_dist_id_comm<dim,St,T,Decomposition,Memory,device_grid> & gc)
1573 :v_cl(gc.v_cl)
1574 {
1575
1576 }
1577};
1578
1579
1580#endif /* SRC_GRID_GRID_DIST_ID_COMM_HPP_ */
1581