1 | /* |
2 | * VCluster_meta_function.hpp |
3 | * |
4 | * Created on: Dec 8, 2016 |
5 | * Author: i-bird |
6 | */ |
7 | |
8 | #ifndef OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_ |
9 | #define OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_ |
10 | |
11 | #include "memory/BHeapMemory.hpp" |
12 | #include "Packer_Unpacker/has_max_prop.hpp" |
13 | |
14 | /*! \brief Return true is MPI is compiled with CUDA |
15 | * |
16 | * \return true i MPI is compiled with CUDA |
17 | */ |
18 | static inline bool is_mpi_rdma_cuda_active() |
19 | { |
20 | #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT |
21 | return true; |
22 | #else |
23 | return false; |
24 | #endif |
25 | } |
26 | |
27 | template<bool result, typename T, typename S, template<typename> class layout_base, typename Memory> |
28 | struct unpack_selector_with_prp |
29 | { |
30 | template<typename op, |
31 | int ... prp> |
32 | static void call_unpack(S & recv, |
33 | openfpm::vector_fr<BMemory<Memory>> & recv_buf, |
34 | openfpm::vector<size_t> * sz, |
35 | openfpm::vector<size_t> * sz_byte, |
36 | op & op_param, |
37 | size_t opt) |
38 | { |
39 | if (sz_byte != NULL) |
40 | sz_byte->resize(recv_buf.size()); |
41 | |
42 | for (size_t i = 0 ; i < recv_buf.size() ; i++) |
43 | { |
44 | T unp; |
45 | |
46 | ExtPreAlloc<HeapMemory> & mem = *(new ExtPreAlloc<HeapMemory>(recv_buf.get(i).size(),recv_buf.get(i))); |
47 | mem.incRef(); |
48 | |
49 | Unpack_stat ps; |
50 | |
51 | Unpacker<T,HeapMemory>::template unpack<>(mem, unp, ps); |
52 | |
53 | size_t recv_size_old = recv.size(); |
54 | // Merge the information |
55 | |
56 | op_param.template execute<true,T,decltype(recv),decltype(unp),layout_base,prp...>(recv,unp,i,opt); |
57 | |
58 | size_t recv_size_new = recv.size(); |
59 | |
60 | if (sz_byte != NULL) |
61 | sz_byte->get(i) = recv_buf.get(i).size(); |
62 | if (sz != NULL) |
63 | sz->get(i) = recv_size_new - recv_size_old; |
64 | |
65 | mem.decRef(); |
66 | delete &mem; |
67 | } |
68 | } |
69 | }; |
70 | |
71 | template<typename op, typename Vt, typename S, template<typename> class layout_base, typename v_mpl> |
72 | struct unpack_each_prop_buffer |
73 | { |
74 | S & recv; |
75 | |
76 | openfpm::vector_fr<BMemory<HeapMemory>> & recv_buf; |
77 | |
78 | size_t i; |
79 | |
80 | op & op_param; |
81 | |
82 | openfpm::vector<size_t> * sz; |
83 | |
84 | openfpm::vector<size_t> * sz_byte; |
85 | |
86 | /*! \brief constructor |
87 | * |
88 | * \param v set of pointer buffers to set |
89 | * |
90 | */ |
91 | inline unpack_each_prop_buffer(S & recv, |
92 | openfpm::vector_fr<BMemory<HeapMemory>> & recv_buf, |
93 | op & op_param, |
94 | size_t i, |
95 | openfpm::vector<size_t> * sz, |
96 | openfpm::vector<size_t> * sz_byte) |
97 | :recv(recv),recv_buf(recv_buf),op_param(op_param),i(i),sz(sz),sz_byte(sz_byte) |
98 | {}; |
99 | |
100 | //! It call the copy function for each property |
101 | template<typename T> |
102 | inline void operator()(T& t) const |
103 | { |
104 | // here we get the the type of the property at position T::value |
105 | typedef typename boost::mpl::at<typename T::value_type::type,boost::mpl::int_<T::value> >::type prp_type; |
106 | |
107 | // here we get the the type of the property at position T::value |
108 | typedef typename boost::mpl::at<v_mpl,boost::mpl::int_<T::value>>::type prp_num; |
109 | |
110 | // calculate the number of received elements |
111 | size_t n_ele = recv_buf.get(i).size() / sizeof(prp_type); |
112 | |
113 | // add the received particles to the vector |
114 | PtrMemory * ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size()); |
115 | |
116 | // create vector representation to a piece of memory already allocated |
117 | openfpm::vector<typename Vt::value_type,PtrMemory,layout_base,openfpm::grow_policy_identity> v2; |
118 | |
119 | v2.template setMemory<prp_num::value>(*ptr1); |
120 | |
121 | // resize with the number of elements |
122 | v2.resize(n_ele); |
123 | |
124 | // Merge the information |
125 | |
126 | size_t recv_size_old = recv.size(); |
127 | |
128 | op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp_num::value>(recv,v2,i); |
129 | |
130 | size_t recv_size_new = recv.size(); |
131 | |
132 | if (sz_byte != NULL) |
133 | sz_byte->get(i) = recv_buf.get(i).size(); |
134 | if (sz != NULL) |
135 | sz->get(i) = recv_size_new - recv_size_old; |
136 | } |
137 | }; |
138 | |
139 | /*! \brief this class is a functor for "for_each" algorithm |
140 | * |
141 | * This class is a functor for "for_each" algorithm. For each |
142 | * element of the boost::vector the operator() is called. |
143 | * Is mainly used to process the receive buffers in case of memory_traits_inte layout receive |
144 | * |
145 | * \tparam encap source |
146 | * \tparam encap dst |
147 | * |
148 | */ |
149 | |
150 | template<typename sT, template<typename> class layout_base,typename Memory> |
151 | struct process_receive_mem_traits_inte |
152 | { |
153 | //! set of pointers |
154 | size_t i; |
155 | |
156 | //! Receive buffer |
157 | openfpm::vector_fr<BMemory<Memory>> & recv_buf; |
158 | |
159 | //! Fake vector that map over received memory |
160 | openfpm::vector<typename sT::value_type,PtrMemory,layout_base,openfpm::grow_policy_identity> & v2; |
161 | |
162 | size_t n_ele = 0; |
163 | |
164 | // options |
165 | size_t opt; |
166 | |
167 | /*! \brief constructor |
168 | * |
169 | * \param v set of pointer buffers to set |
170 | * |
171 | */ |
172 | inline process_receive_mem_traits_inte(openfpm::vector<typename sT::value_type,PtrMemory,layout_base,openfpm::grow_policy_identity> & v2, |
173 | openfpm::vector_fr<BMemory<Memory>> & recv_buf, |
174 | size_t i, |
175 | size_t opt) |
176 | :i(i),recv_buf(recv_buf),v2(v2),opt(opt) |
177 | {}; |
178 | |
179 | //! It call the copy function for each property |
180 | template<typename T> |
181 | inline void operator()(T& t) |
182 | { |
183 | typedef typename boost::mpl::at<typename sT::value_type::type,T>::type type_prp; |
184 | |
185 | // calculate the number of received elements |
186 | this->n_ele = recv_buf.get(i).size() / sizeof(type_prp); |
187 | |
188 | PtrMemory * ptr1; |
189 | |
190 | if (opt & MPI_GPU_DIRECT) |
191 | { |
192 | #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT |
193 | // add the received particles to the vector |
194 | ptr1 = new PtrMemory(recv_buf.get(i).getDevicePointer(),recv_buf.get(i).size()); |
195 | #else |
196 | // add the received particles to the vector |
197 | ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size()); |
198 | #endif |
199 | } |
200 | else |
201 | { |
202 | // add the received particles to the vector |
203 | ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size()); |
204 | } |
205 | |
206 | v2.template setMemory<T::value>(*ptr1); |
207 | |
208 | ++i; |
209 | } |
210 | }; |
211 | |
212 | template<bool inte_or_lin,typename T, typename S, template<typename> class layout_base,typename Memory> |
213 | struct unpack_selector_with_prp_lin |
214 | { |
215 | template<typename op, unsigned int ... prp> static int call_unpack_impl(S & recv, |
216 | openfpm::vector_fr<BMemory<Memory>> & recv_buf, |
217 | openfpm::vector<size_t> * sz, |
218 | openfpm::vector<size_t> * sz_byte, |
219 | op & op_param, |
220 | size_t i, |
221 | size_t opt) |
222 | { |
223 | // create vector representation to a piece of memory already allocated |
224 | openfpm::vector<typename T::value_type,PtrMemory,layout_base,openfpm::grow_policy_identity> v2; |
225 | |
226 | process_receive_mem_traits_inte<T,layout_base,Memory> prmti(v2,recv_buf,i,opt); |
227 | |
228 | boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(prmti); |
229 | |
230 | v2.resize(prmti.n_ele); |
231 | |
232 | // Merge the information |
233 | |
234 | size_t recv_size_old = recv.size(); |
235 | |
236 | op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp...>(recv,v2,i,opt); |
237 | |
238 | size_t recv_size_new = recv.size(); |
239 | |
240 | if (sz_byte != NULL) |
241 | sz_byte->get(i) = recv_buf.get(i).size(); |
242 | if (sz != NULL) |
243 | sz->get(i) = recv_size_new - recv_size_old; |
244 | |
245 | return sizeof...(prp); |
246 | } |
247 | }; |
248 | |
249 | template<typename T, typename S, template<typename> class layout_base, typename Memory> |
250 | struct unpack_selector_with_prp_lin<true,T,S,layout_base,Memory> |
251 | { |
252 | template<typename op, unsigned int ... prp> static int call_unpack_impl(S & recv, |
253 | openfpm::vector_fr<BMemory<Memory>> & recv_buf, |
254 | openfpm::vector<size_t> * sz, |
255 | openfpm::vector<size_t> * sz_byte, |
256 | op & op_param, |
257 | size_t i, |
258 | size_t opt) |
259 | { |
260 | // calculate the number of received elements |
261 | size_t n_ele = recv_buf.get(i).size() / sizeof(typename T::value_type); |
262 | |
263 | // add the received particles to the vector |
264 | PtrMemory * ptr1 = new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size()); |
265 | ptr1->incRef(); |
266 | |
267 | { |
268 | // create vector representation to a piece of memory already allocated |
269 | openfpm::vector<typename T::value_type,PtrMemory,layout_base,openfpm::grow_policy_identity> v2; |
270 | |
271 | v2.setMemory(*ptr1); |
272 | |
273 | // resize with the number of elements |
274 | v2.resize(n_ele); |
275 | |
276 | // Merge the information |
277 | |
278 | size_t recv_size_old = recv.size(); |
279 | |
280 | op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp...>(recv,v2,i,opt); |
281 | |
282 | size_t recv_size_new = recv.size(); |
283 | |
284 | if (sz_byte != NULL) |
285 | sz_byte->get(i) = recv_buf.get(i).size(); |
286 | if (sz != NULL) |
287 | sz->get(i) = recv_size_new - recv_size_old; |
288 | } |
289 | |
290 | ptr1->decRef(); |
291 | delete ptr1; |
292 | return 1; |
293 | } |
294 | }; |
295 | |
296 | typedef aggregate<int,int> dummy_type; |
297 | |
298 | // |
299 | template<typename T, typename S, template<typename> class layout_base, typename Memory> |
300 | struct unpack_selector_with_prp<true,T,S,layout_base,Memory> |
301 | { |
302 | template<typename op, unsigned int ... prp> static void call_unpack(S & recv, |
303 | openfpm::vector_fr<BMemory<Memory>> & recv_buf, |
304 | openfpm::vector<size_t> * sz, |
305 | openfpm::vector<size_t> * sz_byte, |
306 | op & op_param, |
307 | size_t opt) |
308 | { |
309 | if (sz_byte != NULL) |
310 | sz_byte->resize(recv_buf.size()); |
311 | |
312 | for (size_t i = 0 ; i < recv_buf.size() ; ) |
313 | { |
314 | i += unpack_selector_with_prp_lin<is_layout_mlin<layout_base<dummy_type>>::value,T,S,layout_base,Memory>::template call_unpack_impl<op,prp...>(recv,recv_buf,sz,sz_byte,op_param,i,opt); |
315 | } |
316 | } |
317 | }; |
318 | |
319 | |
320 | template<typename T> |
321 | struct call_serialize_variadic {}; |
322 | |
323 | template<int ... prp> |
324 | struct call_serialize_variadic<index_tuple<prp...>> |
325 | { |
326 | template<typename T> inline static void call_pr(T & send, size_t & tot_size) |
327 | { |
328 | Packer<T,HeapMemory>::template packRequest<prp...>(send,tot_size); |
329 | } |
330 | |
331 | template<typename T> inline static void call_pack(ExtPreAlloc<HeapMemory> & mem, T & send, Pack_stat & sts) |
332 | { |
333 | Packer<T,HeapMemory>::template pack<prp...>(mem,send,sts); |
334 | } |
335 | |
336 | template<typename op, typename T, typename S, template<typename> class layout_base, typename Memory> |
337 | inline static void call_unpack(S & recv, |
338 | openfpm::vector_fr<BMemory<Memory>> & recv_buf, |
339 | openfpm::vector<size_t> * sz, |
340 | openfpm::vector<size_t> * sz_byte, |
341 | op & op_param, |
342 | size_t opt) |
343 | { |
344 | const bool result = has_pack_gen<typename T::value_type>::value == false && is_vector<T>::value == true; |
345 | |
346 | unpack_selector_with_prp<result, T, S,layout_base,Memory>::template call_unpack<op,prp...>(recv, recv_buf, sz, sz_byte, op_param,opt); |
347 | } |
348 | }; |
349 | |
350 | /*! \brief this class is a functor for "for_each" algorithm |
351 | * |
352 | * This class is a functor for "for_each" algorithm. For each |
353 | * element of the boost::vector the operator() is called. |
354 | * Is mainly used to copy one encap into another encap object |
355 | * |
356 | * \tparam encap source |
357 | * \tparam encap dst |
358 | * |
359 | */ |
360 | template<typename sT> |
361 | struct set_buf_pointer_for_each_prop |
362 | { |
363 | //! set of pointers |
364 | sT & v; |
365 | |
366 | openfpm::vector<const void *> & send_buf; |
367 | |
368 | size_t opt; |
369 | |
370 | /*! \brief constructor |
371 | * |
372 | * \param v set of pointer buffers to set |
373 | * |
374 | */ |
375 | inline set_buf_pointer_for_each_prop(sT & v, openfpm::vector<const void *> & send_buf, size_t opt) |
376 | :v(v),send_buf(send_buf),opt(opt) |
377 | {}; |
378 | |
379 | //! It call the copy function for each property |
380 | template<typename T> |
381 | inline void operator()(T& t) const |
382 | { |
383 | // If we have GPU direct activated use directly the cuda buffer |
384 | if (opt & MPI_GPU_DIRECT) |
385 | { |
386 | #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT |
387 | send_buf.add(v.template getDeviceBuffer<T::value>()); |
388 | #else |
389 | v.template deviceToHost<T::value>(); |
390 | send_buf.add(v.template getPointer<T::value>()); |
391 | #endif |
392 | } |
393 | else |
394 | { |
395 | send_buf.add(v.template getPointer<T::value>()); |
396 | } |
397 | } |
398 | }; |
399 | |
400 | /*! \brief this class is a functor for "for_each" algorithm |
401 | * |
402 | * This class is a functor for "for_each" algorithm. For each |
403 | * element of the boost::vector the operator() is called. |
404 | * Is mainly used to copy one encap into another encap object |
405 | * |
406 | * \tparam encap source |
407 | * \tparam encap dst |
408 | * |
409 | */ |
410 | |
411 | template<typename sT> |
412 | struct set_buf_size_for_each_prop |
413 | { |
414 | //! set of pointers |
415 | sT & v; |
416 | |
417 | openfpm::vector<size_t> & sz; |
418 | |
419 | /*! \brief constructor |
420 | * |
421 | * \param v set of pointer buffers to set |
422 | * |
423 | */ |
424 | inline set_buf_size_for_each_prop(sT & v, openfpm::vector<size_t> & sz) |
425 | :v(v),sz(sz) |
426 | {}; |
427 | |
428 | //! It call the copy function for each property |
429 | template<typename T> |
430 | inline void operator()(T& t) const |
431 | { |
432 | typedef typename boost::mpl::at<typename sT::value_type::type,T>::type type_prp; |
433 | |
434 | sz.add(sizeof(type_prp)*v.size()); |
435 | } |
436 | }; |
437 | |
438 | template<typename T, bool impl = is_multiple_buffer_each_prp<T>::value > |
439 | struct pack_unpack_cond_with_prp_inte_lin |
440 | { |
441 | static void set_buffers(T & send, openfpm::vector<const void *> & send_buf, size_t opt) |
442 | { |
443 | send_buf.add(send.getPointer()); |
444 | } |
445 | |
446 | static void set_size_buffers(T & send, openfpm::vector<size_t> & sz) |
447 | { |
448 | sz.add(send.size()*sizeof(typename T::value_type)); |
449 | } |
450 | |
451 | static void construct_prc(openfpm::vector<size_t> & prc_send, openfpm::vector<size_t> & prc_send_) |
452 | { |
453 | for (size_t i = 0 ; i < prc_send.size() ; i++) |
454 | { |
455 | prc_send_.add(prc_send.get(i)); |
456 | } |
457 | } |
458 | }; |
459 | |
460 | // memory_traits_inte |
461 | template<typename T> |
462 | struct pack_unpack_cond_with_prp_inte_lin<T,true> |
463 | { |
464 | static void set_buffers(T & send, openfpm::vector<const void *> & send_buf, size_t opt) |
465 | { |
466 | set_buf_pointer_for_each_prop<T> sbp(send,send_buf,opt); |
467 | |
468 | boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp); |
469 | } |
470 | |
471 | static void set_size_buffers(T & send, openfpm::vector<size_t> & sz) |
472 | { |
473 | set_buf_size_for_each_prop<T> sbp(send,sz); |
474 | |
475 | boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp); |
476 | } |
477 | |
478 | static void construct_prc(openfpm::vector<size_t> & prc_send, openfpm::vector<size_t> & prc_send_) |
479 | { |
480 | for (size_t i = 0 ; i < prc_send.size() ; i++) |
481 | { |
482 | for (size_t j = 0 ; j < T::value_type::max_prop ; j++) |
483 | {prc_send_.add(prc_send.get(i));} |
484 | } |
485 | } |
486 | }; |
487 | |
488 | //! There is max_prop inside |
489 | template<bool cond, |
490 | typename op, |
491 | typename T, |
492 | typename S, |
493 | template <typename> class layout_base, |
494 | unsigned int ... prp> |
495 | struct pack_unpack_cond_with_prp |
496 | { |
497 | static void packingRequest(T & send, size_t & tot_size, openfpm::vector<size_t> & sz) |
498 | { |
499 | typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type_ofp<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack; |
500 | if (has_pack_gen<typename T::value_type>::value == false && is_vector<T>::value == true) |
501 | { |
502 | pack_unpack_cond_with_prp_inte_lin<T>::set_size_buffers(send,sz); |
503 | } |
504 | else |
505 | { |
506 | call_serialize_variadic<ind_prop_to_pack>::call_pr(send,tot_size); |
507 | |
508 | sz.add(tot_size); |
509 | } |
510 | } |
511 | |
512 | static void packing(ExtPreAlloc<HeapMemory> & mem, T & send, Pack_stat & sts, openfpm::vector<const void *> & send_buf, size_t opt = 0) |
513 | { |
514 | typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type_ofp<T>::value>::number, MetaFuncOrd>::result ind_prop_to_pack; |
515 | if (has_pack_gen<typename T::value_type>::value == false && is_vector<T>::value == true) |
516 | { |
517 | pack_unpack_cond_with_prp_inte_lin<T>::set_buffers(send,send_buf,opt); |
518 | } |
519 | else |
520 | { |
521 | send_buf.add(mem.getPointerEnd()); |
522 | call_serialize_variadic<ind_prop_to_pack>::call_pack(mem,send,sts); |
523 | } |
524 | } |
525 | |
526 | template<typename Memory> |
527 | static void unpacking(S & recv, |
528 | openfpm::vector_fr<BMemory<Memory>> & recv_buf, |
529 | openfpm::vector<size_t> * sz, |
530 | openfpm::vector<size_t> * sz_byte, |
531 | op & op_param, |
532 | size_t opt) |
533 | { |
534 | typedef index_tuple<prp...> ind_prop_to_pack; |
535 | call_serialize_variadic<ind_prop_to_pack>::template call_unpack<op,T,S,layout_base>(recv, recv_buf, sz, sz_byte, op_param,opt); |
536 | } |
537 | }; |
538 | |
539 | |
540 | ///////////////////////////// |
541 | |
542 | //! Helper class to add data without serialization |
543 | template<bool sr> |
544 | struct op_ssend_recv_add_sr |
545 | { |
546 | //! Add data |
547 | template<typename T, |
548 | typename D, |
549 | typename S, |
550 | template <typename> class layout_base, |
551 | int ... prp> static void execute(D & recv,S & v2, size_t i, size_t opt) |
552 | { |
553 | if (opt & MPI_GPU_DIRECT) |
554 | { |
555 | #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT |
556 | |
557 | // Merge the information |
558 | recv.template add_prp_device<typename T::value_type, |
559 | PtrMemory, |
560 | openfpm::grow_policy_identity, |
561 | openfpm::vect_isel<typename T::value_type>::value, |
562 | layout_base, |
563 | prp...>(v2); |
564 | #else |
565 | size_t old_size = recv.size(); |
566 | |
567 | // Merge the information |
568 | recv.template add_prp<typename T::value_type, |
569 | PtrMemory, |
570 | openfpm::grow_policy_identity, |
571 | openfpm::vect_isel<typename T::value_type>::value, |
572 | layout_base, |
573 | prp...>(v2); |
574 | |
575 | recv.template hostToDevice<prp...>(old_size,old_size+v2.size()-1); |
576 | |
577 | #endif |
578 | |
579 | } |
580 | else |
581 | { |
582 | // Merge the information |
583 | recv.template add_prp<typename T::value_type, |
584 | PtrMemory, |
585 | openfpm::grow_policy_identity, |
586 | openfpm::vect_isel<typename T::value_type>::value, |
587 | layout_base, |
588 | prp...>(v2); |
589 | } |
590 | } |
591 | }; |
592 | |
593 | //! Helper class to add data with serialization |
594 | template<> |
595 | struct op_ssend_recv_add_sr<true> |
596 | { |
597 | //! Add data |
598 | template<typename T, |
599 | typename D, |
600 | typename S, |
601 | template <typename> class layout_base, |
602 | int ... prp> |
603 | static void execute(D & recv,S & v2, size_t i,size_t opt) |
604 | { |
605 | // Merge the information |
606 | recv.template add_prp<typename T::value_type, |
607 | HeapMemory, |
608 | typename T::grow_policy, |
609 | openfpm::vect_isel<typename T::value_type>::value, |
610 | layout_base, |
611 | prp...>(v2); |
612 | } |
613 | }; |
614 | |
615 | //! Helper class to add data |
616 | template<typename op> |
617 | struct op_ssend_recv_add |
618 | { |
619 | //! Add data |
620 | template<bool sr, |
621 | typename T, |
622 | typename D, |
623 | typename S, |
624 | template <typename> class layout_base, |
625 | int ... prp> |
626 | static void execute(D & recv,S & v2, size_t i, size_t opt) |
627 | { |
628 | // Merge the information |
629 | op_ssend_recv_add_sr<sr>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,opt); |
630 | } |
631 | }; |
632 | |
633 | //! Helper class to merge data without serialization |
634 | template<bool sr,template<typename,typename> class op, typename vector_type_opart> |
635 | struct op_ssend_recv_merge_impl |
636 | { |
637 | //! Merge the |
638 | template<typename T, |
639 | typename D, |
640 | typename S, |
641 | template <typename> class layout_base, |
642 | int ... prp> |
643 | inline static void execute(D & recv,S & v2,size_t i,vector_type_opart & opart) |
644 | { |
645 | // Merge the information |
646 | recv.template merge_prp_v<op, |
647 | typename T::value_type, |
648 | PtrMemory, |
649 | openfpm::grow_policy_identity, |
650 | layout_base, |
651 | typename vector_type_opart::value_type, |
652 | prp...>(v2,opart.get(i)); |
653 | } |
654 | }; |
655 | |
656 | //! Helper class to merge data with serialization |
657 | template<template<typename,typename> class op, typename vector_type_opart> |
658 | struct op_ssend_recv_merge_impl<true,op,vector_type_opart> |
659 | { |
660 | //! merge the data |
661 | template<typename T, |
662 | typename D, |
663 | typename S, |
664 | template <typename> class layout_base, |
665 | int ... prp> |
666 | inline static void execute(D & recv,S & v2,size_t i,vector_type_opart & opart) |
667 | { |
668 | // Merge the information |
669 | recv.template merge_prp_v<op, |
670 | typename T::value_type, |
671 | HeapMemory, |
672 | openfpm::grow_policy_double, |
673 | layout_base, |
674 | typename vector_type_opart::value_type, |
675 | prp...>(v2,opart.get(i)); |
676 | } |
677 | }; |
678 | |
679 | //! Helper class to merge data |
680 | template<template<typename,typename> class op, typename vector_type_opart> |
681 | struct op_ssend_recv_merge |
682 | { |
683 | //! For each processor contain the list of the particles with which I must merge the information |
684 | vector_type_opart & opart; |
685 | |
686 | //! constructor |
687 | op_ssend_recv_merge(vector_type_opart & opart) |
688 | :opart(opart) |
689 | {} |
690 | |
691 | //! execute the merge |
692 | template<bool sr, |
693 | typename T, |
694 | typename D, |
695 | typename S, |
696 | template <typename> class layout_base, |
697 | int ... prp> |
698 | void execute(D & recv,S & v2,size_t i,size_t opt) |
699 | { |
700 | op_ssend_recv_merge_impl<sr,op,vector_type_opart>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,opart); |
701 | } |
702 | }; |
703 | |
704 | //! Helper class to merge data without serialization |
705 | template<bool sr,template<typename,typename> class op, typename vector_type_opart, typename vector_type_prc_offset> |
706 | struct op_ssend_recv_merge_gpu_impl |
707 | { |
708 | //! Merge the |
709 | template<typename T, |
710 | typename D, |
711 | typename S, |
712 | template <typename> class layout_base, |
713 | int ... prp> |
714 | inline static void execute(D & recv,S & v2,size_t i,vector_type_opart & opart, vector_type_prc_offset & prc_off) |
715 | { |
716 | prc_off.template deviceToHost<0>(); |
717 | |
718 | unsigned int start = 0; |
719 | unsigned int stop = prc_off.template get<0>(i / sizeof...(prp)); |
720 | |
721 | if (i != 0) |
722 | {start = prc_off.template get<0>(i / sizeof...(prp)-1);} |
723 | |
724 | // Merge the information |
725 | recv.template merge_prp_v_device<op, |
726 | typename T::value_type, |
727 | PtrMemory, |
728 | openfpm::grow_policy_identity, |
729 | layout_base, |
730 | vector_type_opart, |
731 | prp...>(v2,opart,start,stop); |
732 | } |
733 | }; |
734 | |
735 | //! Helper class to merge data with serialization |
736 | template<template<typename,typename> class op, typename vector_type_opart, typename vector_type_prc_offset> |
737 | struct op_ssend_recv_merge_gpu_impl<true,op,vector_type_opart,vector_type_prc_offset> |
738 | { |
739 | //! merge the data |
740 | template<typename T, |
741 | typename D, |
742 | typename S, |
743 | template <typename> class layout_base, |
744 | int ... prp> |
745 | inline static void execute(D & recv,S & v2,size_t i,vector_type_opart & opart, vector_type_prc_offset & prc_off) |
746 | { |
747 | std::cout << __FILE__ << ":" << __LINE__ << " Error: not implemented" << std::endl; |
748 | } |
749 | }; |
750 | |
751 | //! Helper class to merge data |
752 | template<template<typename,typename> class op, typename vector_type_opart, typename vector_type_prc_offset> |
753 | struct op_ssend_recv_merge_gpu |
754 | { |
755 | //! For each processor contain the list of the particles with which I must merge the information |
756 | vector_type_opart & opart; |
757 | |
758 | vector_type_prc_offset & prc_offset; |
759 | |
760 | //! constructor |
761 | op_ssend_recv_merge_gpu(vector_type_opart & opart, vector_type_prc_offset & prc_offset) |
762 | :opart(opart),prc_offset(prc_offset) |
763 | {} |
764 | |
765 | //! execute the merge |
766 | template<bool sr, |
767 | typename T, |
768 | typename D, |
769 | typename S, |
770 | template <typename> class layout_base, |
771 | int ... prp> |
772 | void execute(D & recv,S & v2,size_t i,size_t opt) |
773 | { |
774 | op_ssend_recv_merge_gpu_impl<sr,op,vector_type_opart,vector_type_prc_offset>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,opart,prc_offset); |
775 | } |
776 | }; |
777 | |
778 | //! Helper class to merge data without serialization |
779 | template<bool sr> |
780 | struct op_ssend_gg_recv_merge_impl |
781 | { |
782 | //! Merge the |
783 | template<typename T, |
784 | typename D, |
785 | typename S, |
786 | template <typename> class layout_base, |
787 | int ... prp> |
788 | inline static void execute(D & recv,S & v2,size_t i,size_t & start) |
789 | { |
790 | // Merge the information |
791 | recv.template merge_prp_v<replace_, |
792 | typename T::value_type, |
793 | PtrMemory, |
794 | openfpm::grow_policy_identity, |
795 | layout_base, |
796 | prp...>(v2,start); |
797 | |
798 | start += v2.size(); |
799 | } |
800 | }; |
801 | |
802 | //! Helper class to merge data without serialization, using host memory |
803 | template<bool sr> |
804 | struct op_ssend_gg_recv_merge_impl_run_device |
805 | { |
806 | //! Merge the |
807 | template<typename T, |
808 | typename D, |
809 | typename S, |
810 | template <typename> class layout_base, |
811 | int ... prp> |
812 | inline static void execute(D & recv,S & v2,size_t i,size_t & start) |
813 | { |
814 | // Merge the information |
815 | recv.template merge_prp_v<replace_, |
816 | typename T::value_type, |
817 | typename S::Memory_type, |
818 | openfpm::grow_policy_identity, |
819 | layout_base, |
820 | prp...>(v2,start); |
821 | |
822 | recv.template hostToDevice<prp ...>(start,start+v2.size()-1); |
823 | |
824 | start += v2.size(); |
825 | } |
826 | }; |
827 | |
828 | //! Helper class to merge data without serialization direct transfer to CUDA buffer |
829 | template<bool sr> |
830 | struct op_ssend_gg_recv_merge_impl_run_device_direct |
831 | { |
832 | //! Merge the |
833 | template<typename T, |
834 | typename D, |
835 | typename S, |
836 | template <typename> class layout_base, |
837 | int ... prp> |
838 | inline static void execute(D & recv,S & v2,size_t i,size_t & start) |
839 | { |
840 | // Merge the information |
841 | recv.template merge_prp_device<replace_, |
842 | typename T::value_type, |
843 | typename S::Memory_type, |
844 | openfpm::grow_policy_identity, |
845 | prp...>(v2,start); |
846 | |
847 | start += v2.size(); |
848 | } |
849 | }; |
850 | |
851 | //! Helper class to merge data with serialization |
852 | template<> |
853 | struct op_ssend_gg_recv_merge_impl<true> |
854 | { |
855 | //! merge the data |
856 | template<typename T, |
857 | typename D, |
858 | typename S, |
859 | template <typename> class layout_base, |
860 | int ... prp> inline static void execute(D & recv,S & v2,size_t i,size_t & start) |
861 | { |
862 | // Merge the information |
863 | recv.template merge_prp_v<replace_, |
864 | typename T::value_type, |
865 | HeapMemory, |
866 | typename S::grow_policy, |
867 | layout_base, |
868 | prp...>(v2,start); |
869 | |
870 | // from |
871 | start += v2.size(); |
872 | } |
873 | }; |
874 | |
875 | //! Helper class to merge data |
876 | struct op_ssend_gg_recv_merge |
877 | { |
878 | //! starting marker |
879 | size_t start; |
880 | |
881 | //! constructor |
882 | op_ssend_gg_recv_merge(size_t start) |
883 | :start(start) |
884 | {} |
885 | |
886 | //! execute the merge |
887 | template<bool sr, typename T, typename D, typename S, template<typename> class layout_base, int ... prp> void execute(D & recv,S & v2,size_t i,size_t opt) |
888 | { |
889 | op_ssend_gg_recv_merge_impl<sr>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,start); |
890 | } |
891 | }; |
892 | |
893 | //! Helper class to merge data |
894 | struct op_ssend_gg_recv_merge_run_device |
895 | { |
896 | //! starting marker |
897 | size_t start; |
898 | |
899 | //! constructor |
900 | op_ssend_gg_recv_merge_run_device(size_t start) |
901 | :start(start) |
902 | {} |
903 | |
904 | //! execute the merge |
905 | template<bool sr, typename T, typename D, typename S, template<typename> class layout_base, int ... prp> void execute(D & recv,S & v2,size_t i,size_t opt) |
906 | { |
907 | bool active = is_mpi_rdma_cuda_active(); |
908 | if (active == true) |
909 | {op_ssend_gg_recv_merge_impl_run_device_direct<sr>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,start);} |
910 | else |
911 | {op_ssend_gg_recv_merge_impl_run_device<sr>::template execute<T,D,S,layout_base,prp...>(recv,v2,i,start);} |
912 | } |
913 | }; |
914 | |
915 | ////////////////////////////////////////////////// |
916 | |
917 | |
918 | |
919 | #endif /* OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_ */ |
920 | |