1/*
2 * VCluster_meta_function.hpp
3 *
4 * Created on: Dec 8, 2016
5 * Author: i-bird
6 */
7
8#ifndef OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_
9#define OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_
10
11#include "memory/BHeapMemory.hpp"
12#include "Packer_Unpacker/has_max_prop.hpp"
13
14/*! \brief Return true is MPI is compiled with CUDA
15 *
16 * \return true i MPI is compiled with CUDA
17 */
18static inline bool is_mpi_rdma_cuda_active()
19{
20#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
21 return true;
22#else
23 return false;
24#endif
25}
26
27template<bool result, typename T, typename S, template<typename> class layout_base, typename Memory>
28struct unpack_selector_with_prp
29{
30 template<typename op,
31 int ... prp>
32 static void call_unpack(S & recv,
33 openfpm::vector_fr<BMemory<Memory>> & recv_buf,
34 openfpm::vector<size_t> * sz,
35 openfpm::vector<size_t> * sz_byte,
36 op & op_param,
37 size_t opt)
38 {
39 if (sz_byte != NULL)
40 sz_byte->resize(recv_buf.size());
41
42 for (size_t i = 0 ; i < recv_buf.size() ; i++)
43 {
44 T unp;
45
46 ExtPreAlloc<HeapMemory> & mem = *(new ExtPreAlloc<HeapMemory>(recv_buf.get(i).size(),recv_buf.get(i)));
47 mem.incRef();
48
49 Unpack_stat ps;
50
51 Unpacker<T,HeapMemory>::template unpack<>(mem, unp, ps);
52
53 size_t recv_size_old = recv.size();
54 // Merge the information
55
56 op_param.template execute<true,T,decltype(recv),decltype(unp),layout_base,prp...>(recv,unp,i,opt);
57
58 size_t recv_size_new = recv.size();
59
60 if (sz_byte != NULL)
61 sz_byte->get(i) = recv_buf.get(i).size();
62 if (sz != NULL)
63 sz->get(i) = recv_size_new - recv_size_old;
64
65 mem.decRef();
66 delete &mem;
67 }
68 }
69};
70
71template<typename op, typename Vt, typename S, template<typename> class layout_base, typename v_mpl>
72struct unpack_each_prop_buffer
73{
74 S & recv;
75
76 openfpm::vector_fr<BMemory<HeapMemory>> & recv_buf;
77
78 size_t i;
79
80 op & op_param;
81
82 openfpm::vector<size_t> * sz;
83
84 openfpm::vector<size_t> * sz_byte;
85
86 /*! \brief constructor
87 *
88 * \param v set of pointer buffers to set
89 *
90 */
91 inline unpack_each_prop_buffer(S & recv,
92 openfpm::vector_fr<BMemory<HeapMemory>> & recv_buf,
93 op & op_param,
94 size_t i,
95 openfpm::vector<size_t> * sz,
96 openfpm::vector<size_t> * sz_byte)
97 :recv(recv),recv_buf(recv_buf),op_param(op_param),i(i),sz(sz),sz_byte(sz_byte)
98 {};
99
100 //! It call the copy function for each property
101 template<typename T>
102 inline void operator()(T& t) const
103 {
104 // here we get the the type of the property at position T::value
105 typedef typename boost::mpl::at<typename T::value_type::type,boost::mpl::int_<T::value> >::type prp_type;
106
107 // here we get the the type of the property at position T::value
108 typedef typename boost::mpl::at<v_mpl,boost::mpl::int_<T::value>>::type prp_num;
109
110 // calculate the number of received elements
111 size_t n_ele = recv_buf.get(i).size() / sizeof(prp_type);
112
113 // add the received particles to the vector
114 PtrMemory * ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
115
116 // create vector representation to a piece of memory already allocated
117 openfpm::vector<typename Vt::value_type,PtrMemory,layout_base,openfpm::grow_policy_identity> v2;
118
119 v2.template setMemory<prp_num::value>(*ptr1);
120
121 // resize with the number of elements
122 v2.resize(n_ele);
123
124 // Merge the information
125
126 size_t recv_size_old = recv.size();
127
128 op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp_num::value>(recv,v2,i);
129
130 size_t recv_size_new = recv.size();
131
132 if (sz_byte != NULL)
133 sz_byte->get(i) = recv_buf.get(i).size();
134 if (sz != NULL)
135 sz->get(i) = recv_size_new - recv_size_old;
136 }
137};
138
139/*! \brief this class is a functor for "for_each" algorithm
140 *
141 * This class is a functor for "for_each" algorithm. For each
142 * element of the boost::vector the operator() is called.
143 * Is mainly used to process the receive buffers in case of memory_traits_inte layout receive
144 *
145 * \tparam encap source
146 * \tparam encap dst
147 *
148 */
149
150template<typename sT, template<typename> class layout_base,typename Memory>
151struct process_receive_mem_traits_inte
152{
153 //! set of pointers
154 size_t i;
155
156 //! Receive buffer
157 openfpm::vector_fr<BMemory<Memory>> & recv_buf;
158
159 //! Fake vector that map over received memory
160 openfpm::vector<typename sT::value_type,PtrMemory,layout_base,openfpm::grow_policy_identity> & v2;
161
162 size_t n_ele = 0;
163
164 // options
165 size_t opt;
166
167 /*! \brief constructor
168 *
169 * \param v set of pointer buffers to set
170 *
171 */
172 inline process_receive_mem_traits_inte(openfpm::vector<typename sT::value_type,PtrMemory,layout_base,openfpm::grow_policy_identity> & v2,
173 openfpm::vector_fr<BMemory<Memory>> & recv_buf,
174 size_t i,
175 size_t opt)
176 :i(i),recv_buf(recv_buf),v2(v2),opt(opt)
177 {};
178
179 //! It call the copy function for each property
180 template<typename T>
181 inline void operator()(T& t)
182 {
183 typedef typename boost::mpl::at<typename sT::value_type::type,T>::type type_prp;
184
185 // calculate the number of received elements
186 this->n_ele = recv_buf.get(i).size() / sizeof(type_prp);
187
188 PtrMemory * ptr1;
189
190 if (opt & MPI_GPU_DIRECT)
191 {
192#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
193 // add the received particles to the vector
194 ptr1 = new PtrMemory(recv_buf.get(i).getDevicePointer(),recv_buf.get(i).size());
195#else
196 // add the received particles to the vector
197 ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
198#endif
199 }
200 else
201 {
202 // add the received particles to the vector
203 ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
204 }
205
206 v2.template setMemory<T::value>(*ptr1);
207
208 ++i;
209 }
210};
211
212template<bool inte_or_lin,typename T, typename S, template<typename> class layout_base,typename Memory>
213struct unpack_selector_with_prp_lin
214{
215 template<typename op, unsigned int ... prp> static int call_unpack_impl(S & recv,
216 openfpm::vector_fr<BMemory<Memory>> & recv_buf,
217 openfpm::vector<size_t> * sz,
218 openfpm::vector<size_t> * sz_byte,
219 op & op_param,
220 size_t i,
221 size_t opt)
222 {
223 // create vector representation to a piece of memory already allocated
224 openfpm::vector<typename T::value_type,PtrMemory,layout_base,openfpm::grow_policy_identity> v2;
225
226 process_receive_mem_traits_inte<T,layout_base,Memory> prmti(v2,recv_buf,i,opt);
227
228 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(prmti);
229
230 v2.resize(prmti.n_ele);
231
232 // Merge the information
233
234 size_t recv_size_old = recv.size();
235
236 op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp...>(recv,v2,i,opt);
237
238 size_t recv_size_new = recv.size();
239
240 if (sz_byte != NULL)
241 sz_byte->get(i) = recv_buf.get(i).size();
242 if (sz != NULL)
243 sz->get(i) = recv_size_new - recv_size_old;
244
245 return sizeof...(prp);
246 }
247};
248
249template<typename T, typename S, template<typename> class layout_base, typename Memory>
250struct unpack_selector_with_prp_lin<true,T,S,layout_base,Memory>
251{
252 template<typename op, unsigned int ... prp> static int call_unpack_impl(S & recv,
253 openfpm::vector_fr<BMemory<Memory>> & recv_buf,
254 openfpm::vector<size_t> * sz,
255 openfpm::vector<size_t> * sz_byte,
256 op & op_param,
257 size_t i,
258 size_t opt)
259 {
260 // calculate the number of received elements
261 size_t n_ele = recv_buf.get(i).size() / sizeof(typename T::value_type);
262
263 // add the received particles to the vector
264 PtrMemory * ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
265 ptr1->incRef();
266
267 {
268 // create vector representation to a piece of memory already allocated
269 openfpm::vector<typename T::value_type,PtrMemory,layout_base,openfpm::grow_policy_identity> v2;
270
271 v2.setMemory(*ptr1);
272
273 // resize with the number of elements
274 v2.resize(n_ele);
275
276 // Merge the information
277
278 size_t recv_size_old = recv.size();
279
280 op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp...>(recv,v2,i,opt);
281
282 size_t recv_size_new = recv.size();
283
284 if (sz_byte != NULL)
285 sz_byte->get(i) = recv_buf.get(i).size();
286 if (sz != NULL)
287 sz->get(i) = recv_size_new - recv_size_old;
288 }
289
290 ptr1->decRef();
291 delete ptr1;
292 return 1;
293 }
294};
295
296typedef aggregate<int,int> dummy_type;
297
298//
299template<typename T, typename S, template<typename> class layout_base, typename Memory>
300struct unpack_selector_with_prp<true,T,S,layout_base,Memory>
301{
302 template<typename op, unsigned int ... prp> static void call_unpack(S & recv,
303 openfpm::vector_fr<BMemory<Memory>> & recv_buf,
304 openfpm::vector<size_t> * sz,
305 openfpm::vector<size_t> * sz_byte,
306 op & op_param,
307 size_t opt)
308 {
309 if (sz_byte != NULL)
310 sz_byte->resize(recv_buf.size());
311
312 for (size_t i = 0 ; i < recv_buf.size() ; )
313 {
314 i += unpack_selector_with_prp_lin<is_layout_mlin<layout_base<dummy_type>>::value,T,S,layout_base,Memory>::template call_unpack_impl<op,prp...>(recv,recv_buf,sz,sz_byte,op_param,i,opt);
315 }
316 }
317};
318
319
320template<typename T>
321struct call_serialize_variadic {};
322
323template<int ... prp>
324struct call_serialize_variadic<index_tuple<prp...>>
325{
326 template<typename T> inline static void call_pr(T & send, size_t & tot_size)
327 {
328 Packer<T,HeapMemory>::template packRequest<prp...>(send,tot_size);
329 }
330
331 template<typename T> inline static void call_pack(ExtPreAlloc<HeapMemory> & mem, T & send, Pack_stat & sts)
332 {
333 Packer<T,HeapMemory>::template pack<prp...>(mem,send,sts);
334 }
335
336 template<typename op, typename T, typename S, template<typename> class layout_base, typename Memory>
337 inline static void call_unpack(S & recv,
338 openfpm::vector_fr<BMemory<Memory>> & recv_buf,
339 openfpm::vector<size_t> * sz,
340 openfpm::vector<size_t> * sz_byte,
341 op & op_param,
342 size_t opt)
343 {
344 const bool result = has_pack_gen<typename T::value_type>::value == false && is_vector<T>::value == true;
345
346 unpack_selector_with_prp<result, T, S,layout_base,Memory>::template call_unpack<op,prp...>(recv, recv_buf, sz, sz_byte, op_param,opt);
347 }
348};
349
350/*! \brief this class is a functor for "for_each" algorithm
351 *
352 * This class is a functor for "for_each" algorithm. For each
353 * element of the boost::vector the operator() is called.
354 * Is mainly used to copy one encap into another encap object
355 *
356 * \tparam encap source
357 * \tparam encap dst
358 *
359 */
360template<typename sT>
361struct set_buf_pointer_for_each_prop
362{
363 //! set of pointers
364 sT & v;
365
366 openfpm::vector<const void *> & send_buf;
367
368 size_t opt;
369
370 /*! \brief constructor
371 *
372 * \param v set of pointer buffers to set
373 *
374 */
375 inline set_buf_pointer_for_each_prop(sT & v, openfpm::vector<const void *> & send_buf, size_t opt)
376 :v(v),send_buf(send_buf),opt(opt)
377 {};
378
379 //! It call the copy function for each property
380 template<typename T>
381 inline void operator()(T& t) const
382 {
383 // If we have GPU direct activated use directly the cuda buffer
384 if (opt & MPI_GPU_DIRECT)
385 {
386#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
387 send_buf.add(v.template getDeviceBuffer<T::value>());
388#else
389 v.template deviceToHost<T::value>();
390 send_buf.add(v.template getPointer<T::value>());
391#endif
392 }
393 else
394 {
395 send_buf.add(v.template getPointer<T::value>());
396 }
397 }
398};
399
400/*! \brief this class is a functor for "for_each" algorithm
401 *
402 * This class is a functor for "for_each" algorithm. For each
403 * element of the boost::vector the operator() is called.
404 * Is mainly used to copy one encap into another encap object
405 *
406 * \tparam encap source
407 * \tparam encap dst
408 *
409 */
410
411template<typename sT>
412struct set_buf_size_for_each_prop
413{
414 //! set of pointers
415 sT & v;
416
417 openfpm::vector<size_t> & sz;
418
419 /*! \brief constructor
420 *
421 * \param v set of pointer buffers to set
422 *
423 */
424 inline set_buf_size_for_each_prop(sT & v, openfpm::vector<size_t> & sz)
425 :v(v),sz(sz)
426 {};
427
428 //! It call the copy function for each property
429 template<typename T>
430 inline void operator()(T& t) const
431 {
432 typedef typename boost::mpl::at<typename sT::value_type::type,T>::type type_prp;
433
434 sz.add(sizeof(type_prp)*v.size());
435 }
436};
437
438template<typename T, bool impl = is_multiple_buffer_each_prp<T>::value >
439struct pack_unpack_cond_with_prp_inte_lin
440{
441 static void set_buffers(T & send, openfpm::vector<const void *> & send_buf, size_t opt)
442 {
443 send_buf.add(send.getPointer());
444 }
445
446 static void set_size_buffers(T & send, openfpm::vector<size_t> & sz)
447 {
448 sz.add(send.size()*sizeof(typename T::value_type));
449 }
450
451 static void construct_prc(openfpm::vector<size_t> & prc_send, openfpm::vector<size_t> & prc_send_)
452 {
453 for (size_t i = 0 ; i < prc_send.size() ; i++)
454 {
455 prc_send_.add(prc_send.get(i));
456 }
457 }
458};
459
460// memory_traits_inte
461template<typename T>
462struct pack_unpack_cond_with_prp_inte_lin<T,true>
463{
464 static void set_buffers(T & send, openfpm::vector<const void *> & send_buf, size_t opt)
465 {
466 set_buf_pointer_for_each_prop<T> sbp(send,send_buf,opt);
467
468 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp);
469 }
470
471 static void set_size_buffers(T & send, openfpm::vector<size_t> & sz)
472 {
473 set_buf_size_for_each_prop<T> sbp(send,sz);
474
475 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp);
476 }
477
478 static void construct_prc(openfpm::vector<size_t> & prc_send, openfpm::vector<size_t> & prc_send_)
479 {
480 for (size_t i = 0 ; i < prc_send.size() ; i++)
481 {
482 for (size_t j = 0 ; j < T::value_type::max_prop ; j++)
483 {prc_send_.add(prc_send.get(i));}
484 }
485 }
486};
487
488//! There is max_prop inside
489template<bool cond,
490 typename op,
491 typename T,
492 typename S,
493 template <typename> class layout_base,
494 unsigned int ... prp>
495struct pack_unpack_cond_with_prp
496{
497 static void packingRequest(T & send, size_t & tot_size, openfpm::vector<size_t> & sz)
498 {
499 typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type_ofp<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack;
500 if (has_pack_gen<typename T::value_type>::value == false && is_vector<T>::value == true)
501 {
502 pack_unpack_cond_with_prp_inte_lin<T>::set_size_buffers(send,sz);
503 }
504 else
505 {
506 call_serialize_variadic<ind_prop_to_pack>::call_pr(send,tot_size);
507
508 sz.add(tot_size);
509 }
510 }
511
512 static void packing(ExtPreAlloc<HeapMemory> & mem, T & send, Pack_stat & sts, openfpm::vector<const void *> & send_buf, size_t opt = 0)
513 {
514 typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type_ofp<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack;
515 if (has_pack_gen<typename T::value_type>::value == false && is_vector<T>::value == true)
516 {
517 pack_unpack_cond_with_prp_inte_lin<T>::set_buffers(send,send_buf,opt);
518 }
519 else
520 {
521 send_buf.add(mem.getPointerEnd());
522 call_serialize_variadic<ind_prop_to_pack>::call_pack(mem,send,sts);
523 }
524 }
525
526 template<typename Memory>
527 static void unpacking(S & recv,
528 openfpm::vector_fr<BMemory<Memory>> & recv_buf,
529 openfpm::vector<size_t> * sz,
530 openfpm::vector<size_t> * sz_byte,
531 op & op_param,
532 size_t opt)
533 {
534 typedef index_tuple<prp...> ind_prop_to_pack;
535 call_serialize_variadic<ind_prop_to_pack>::template call_unpack<op,T,S,layout_base>(recv, recv_buf, sz, sz_byte, op_param,opt);
536 }
537};
538
539
540/////////////////////////////
541
542//! Helper class to add data without serialization
543template<bool sr>
544struct op_ssend_recv_add_sr
545{
546 //! Add data
547 template<typename T,
548 typename D,
549 typename S,
550 template <typename> class layout_base,
551 int ... prp> static void execute(D & recv,S & v2, size_t i, size_t opt)
552 {
553 if (opt & MPI_GPU_DIRECT)
554 {
555#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
556
557 // Merge the information
558 recv.template add_prp_device<typename T::value_type,
559 PtrMemory,
560 openfpm::grow_policy_identity,
561 openfpm::vect_isel<typename T::value_type>::value,
562 layout_base,
563 prp...>(v2);
564#else
565 size_t old_size = recv.size();
566
567 // Merge the information
568 recv.template add_prp<typename T::value_type,
569 PtrMemory,
570 openfpm::grow_policy_identity,
571 openfpm::vect_isel<typename T::value_type>::value,
572 layout_base,
573 prp...>(v2);
574
575 recv.template hostToDevice<prp...>(old_size,old_size+v2.size()-1);
576
577#endif
578
579 }
580 else
581 {
582 // Merge the information
583 recv.template add_prp<typename T::value_type,
584 PtrMemory,
585 openfpm::grow_policy_identity,
586 openfpm::vect_isel<typename T::value_type>::value,
587 layout_base,
588 prp...>(v2);
589 }
590 }
591};
592
593//! Helper class to add data with serialization
594template<>
595struct op_ssend_recv_add_sr<true>
596{
597 //! Add data
598 template<typename T,
599 typename D,
600 typename S,
601 template <typename> class layout_base,
602 int ... prp>
603 static void execute(D & recv,S & v2, size_t i,size_t opt)
604 {
605 // Merge the information
606 recv.template add_prp<typename T::value_type,
607 HeapMemory,
608 typename T::grow_policy,
609 openfpm::vect_isel<typename T::value_type>::value,
610 layout_base,
611 prp...>(v2);
612 }
613};
614
615//! Helper class to add data
616template<typename op>
617struct op_ssend_recv_add
618{
619 //! Add data
620 template<bool sr,
621 typename T,
622 typename D,
623 typename S,
624 template <typename> class layout_base,
625 int ... prp>
626 static void execute(D & recv,S & v2, size_t i, size_t opt)
627 {
628 // Merge the information
629 op_ssend_recv_add_sr<sr>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,opt);
630 }
631};
632
633//! Helper class to merge data without serialization
634template<bool sr,template<typename,typename> class op, typename vector_type_opart>
635struct op_ssend_recv_merge_impl
636{
637 //! Merge the
638 template<typename T,
639 typename D,
640 typename S,
641 template <typename> class layout_base,
642 int ... prp>
643 inline static void execute(D & recv,S & v2,size_t i,vector_type_opart & opart)
644 {
645 // Merge the information
646 recv.template merge_prp_v<op,
647 typename T::value_type,
648 PtrMemory,
649 openfpm::grow_policy_identity,
650 layout_base,
651 typename vector_type_opart::value_type,
652 prp...>(v2,opart.get(i));
653 }
654};
655
656//! Helper class to merge data with serialization
657template<template<typename,typename> class op, typename vector_type_opart>
658struct op_ssend_recv_merge_impl<true,op,vector_type_opart>
659{
660 //! merge the data
661 template<typename T,
662 typename D,
663 typename S,
664 template <typename> class layout_base,
665 int ... prp>
666 inline static void execute(D & recv,S & v2,size_t i,vector_type_opart & opart)
667 {
668 // Merge the information
669 recv.template merge_prp_v<op,
670 typename T::value_type,
671 HeapMemory,
672 openfpm::grow_policy_double,
673 layout_base,
674 typename vector_type_opart::value_type,
675 prp...>(v2,opart.get(i));
676 }
677};
678
679//! Helper class to merge data
680template<template<typename,typename> class op, typename vector_type_opart>
681struct op_ssend_recv_merge
682{
683 //! For each processor contain the list of the particles with which I must merge the information
684 vector_type_opart & opart;
685
686 //! constructor
687 op_ssend_recv_merge(vector_type_opart & opart)
688 :opart(opart)
689 {}
690
691 //! execute the merge
692 template<bool sr,
693 typename T,
694 typename D,
695 typename S,
696 template <typename> class layout_base,
697 int ... prp>
698 void execute(D & recv,S & v2,size_t i,size_t opt)
699 {
700 op_ssend_recv_merge_impl<sr,op,vector_type_opart>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,opart);
701 }
702};
703
704//! Helper class to merge data without serialization
705template<bool sr,template<typename,typename> class op, typename vector_type_opart, typename vector_type_prc_offset>
706struct op_ssend_recv_merge_gpu_impl
707{
708 //! Merge the
709 template<typename T,
710 typename D,
711 typename S,
712 template <typename> class layout_base,
713 int ... prp>
714 inline static void execute(D & recv,S & v2,size_t i,vector_type_opart & opart, vector_type_prc_offset & prc_off)
715 {
716 prc_off.template deviceToHost<0>();
717
718 unsigned int start = 0;
719 unsigned int stop = prc_off.template get<0>(i / sizeof...(prp));
720
721 if (i != 0)
722 {start = prc_off.template get<0>(i / sizeof...(prp)-1);}
723
724 // Merge the information
725 recv.template merge_prp_v_device<op,
726 typename T::value_type,
727 PtrMemory,
728 openfpm::grow_policy_identity,
729 layout_base,
730 vector_type_opart,
731 prp...>(v2,opart,start,stop);
732 }
733};
734
735//! Helper class to merge data with serialization
736template<template<typename,typename> class op, typename vector_type_opart, typename vector_type_prc_offset>
737struct op_ssend_recv_merge_gpu_impl<true,op,vector_type_opart,vector_type_prc_offset>
738{
739 //! merge the data
740 template<typename T,
741 typename D,
742 typename S,
743 template <typename> class layout_base,
744 int ... prp>
745 inline static void execute(D & recv,S & v2,size_t i,vector_type_opart & opart, vector_type_prc_offset & prc_off)
746 {
747 std::cout << __FILE__ << ":" << __LINE__ << " Error: not implemented" << std::endl;
748 }
749};
750
751//! Helper class to merge data
752template<template<typename,typename> class op, typename vector_type_opart, typename vector_type_prc_offset>
753struct op_ssend_recv_merge_gpu
754{
755 //! For each processor contain the list of the particles with which I must merge the information
756 vector_type_opart & opart;
757
758 vector_type_prc_offset & prc_offset;
759
760 //! constructor
761 op_ssend_recv_merge_gpu(vector_type_opart & opart, vector_type_prc_offset & prc_offset)
762 :opart(opart),prc_offset(prc_offset)
763 {}
764
765 //! execute the merge
766 template<bool sr,
767 typename T,
768 typename D,
769 typename S,
770 template <typename> class layout_base,
771 int ... prp>
772 void execute(D & recv,S & v2,size_t i,size_t opt)
773 {
774 op_ssend_recv_merge_gpu_impl<sr,op,vector_type_opart,vector_type_prc_offset>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,opart,prc_offset);
775 }
776};
777
778//! Helper class to merge data without serialization
779template<bool sr>
780struct op_ssend_gg_recv_merge_impl
781{
782 //! Merge the
783 template<typename T,
784 typename D,
785 typename S,
786 template <typename> class layout_base,
787 int ... prp>
788 inline static void execute(D & recv,S & v2,size_t i,size_t & start)
789 {
790 // Merge the information
791 recv.template merge_prp_v<replace_,
792 typename T::value_type,
793 PtrMemory,
794 openfpm::grow_policy_identity,
795 layout_base,
796 prp...>(v2,start);
797
798 start += v2.size();
799 }
800};
801
802//! Helper class to merge data without serialization, using host memory
803template<bool sr>
804struct op_ssend_gg_recv_merge_impl_run_device
805{
806 //! Merge the
807 template<typename T,
808 typename D,
809 typename S,
810 template <typename> class layout_base,
811 int ... prp>
812 inline static void execute(D & recv,S & v2,size_t i,size_t & start)
813 {
814 // Merge the information
815 recv.template merge_prp_v<replace_,
816 typename T::value_type,
817 typename S::Memory_type,
818 openfpm::grow_policy_identity,
819 layout_base,
820 prp...>(v2,start);
821
822 recv.template hostToDevice<prp ...>(start,start+v2.size()-1);
823
824 start += v2.size();
825 }
826};
827
828//! Helper class to merge data without serialization direct transfer to CUDA buffer
829template<bool sr>
830struct op_ssend_gg_recv_merge_impl_run_device_direct
831{
832 //! Merge the
833 template<typename T,
834 typename D,
835 typename S,
836 template <typename> class layout_base,
837 int ... prp>
838 inline static void execute(D & recv,S & v2,size_t i,size_t & start)
839 {
840 // Merge the information
841 recv.template merge_prp_device<replace_,
842 typename T::value_type,
843 typename S::Memory_type,
844 openfpm::grow_policy_identity,
845 prp...>(v2,start);
846
847 start += v2.size();
848 }
849};
850
851//! Helper class to merge data with serialization
852template<>
853struct op_ssend_gg_recv_merge_impl<true>
854{
855 //! merge the data
856 template<typename T,
857 typename D,
858 typename S,
859 template <typename> class layout_base,
860 int ... prp> inline static void execute(D & recv,S & v2,size_t i,size_t & start)
861 {
862 // Merge the information
863 recv.template merge_prp_v<replace_,
864 typename T::value_type,
865 HeapMemory,
866 typename S::grow_policy,
867 layout_base,
868 prp...>(v2,start);
869
870 // from
871 start += v2.size();
872 }
873};
874
875//! Helper class to merge data
876struct op_ssend_gg_recv_merge
877{
878 //! starting marker
879 size_t start;
880
881 //! constructor
882 op_ssend_gg_recv_merge(size_t start)
883 :start(start)
884 {}
885
886 //! execute the merge
887 template<bool sr, typename T, typename D, typename S, template<typename> class layout_base, int ... prp> void execute(D & recv,S & v2,size_t i,size_t opt)
888 {
889 op_ssend_gg_recv_merge_impl<sr>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,start);
890 }
891};
892
893//! Helper class to merge data
894struct op_ssend_gg_recv_merge_run_device
895{
896 //! starting marker
897 size_t start;
898
899 //! constructor
900 op_ssend_gg_recv_merge_run_device(size_t start)
901 :start(start)
902 {}
903
904 //! execute the merge
905 template<bool sr, typename T, typename D, typename S, template<typename> class layout_base, int ... prp> void execute(D & recv,S & v2,size_t i,size_t opt)
906 {
907 bool active = is_mpi_rdma_cuda_active();
908 if (active == true)
909 {op_ssend_gg_recv_merge_impl_run_device_direct<sr>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,start);}
910 else
911 {op_ssend_gg_recv_merge_impl_run_device<sr>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,start);}
912 }
913};
914
915//////////////////////////////////////////////////
916
917
918
919#endif /* OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_ */
920