1 | #ifndef MAP_HPP_ |
2 | #define MAP_HPP_ |
3 | |
4 | |
5 | #include "config.h" |
6 | #include "util/cuda_launch.hpp" |
7 | #include "util/object_util.hpp" |
8 | #include "Grid/util.hpp" |
9 | #include "Vector/vect_isel.hpp" |
10 | #include "Vector/util.hpp" |
11 | #include "Vector/map_vector_grow_p.hpp" |
12 | #include "memory/ExtPreAlloc.hpp" |
13 | #include "util/util_debug.hpp" |
14 | #include "util/Pack_stat.hpp" |
15 | #include <boost/fusion/include/mpl.hpp> |
16 | #include <boost/fusion/sequence/intrinsic/at_c.hpp> |
17 | #include <boost/fusion/include/at_c.hpp> |
18 | #include <boost/fusion/include/for_each.hpp> |
19 | #include <boost/fusion/container/vector.hpp> |
20 | #include <boost/fusion/include/vector.hpp> |
21 | #include <boost/fusion/container/vector/vector_fwd.hpp> |
22 | #include <boost/fusion/include/vector_fwd.hpp> |
23 | #include <boost/type_traits.hpp> |
24 | #include <boost/fusion/include/for_each.hpp> |
25 | #include <boost/mpl/range_c.hpp> |
26 | #include <boost/mpl/for_each.hpp> |
27 | #include "memory_ly/memory_conf.hpp" |
28 | #include "util/copy_compare/meta_copy.hpp" |
29 | #include "util/for_each_ref.hpp" |
30 | #include "util.hpp" |
31 | #include <utility> |
32 | #ifdef CUDA_GPU |
33 | #include "memory/CudaMemory.cuh" |
34 | #endif |
35 | #include "grid_sm.hpp" |
36 | #include "grid_zm.hpp" |
37 | #include "memory_ly/Encap.hpp" |
38 | #include "memory_ly/memory_array.hpp" |
39 | #include "memory_ly/memory_c.hpp" |
40 | #include <vector> |
41 | #include "se_grid.hpp" |
42 | #include "memory/HeapMemory.hpp" |
43 | #include "memory/PtrMemory.hpp" |
44 | #include "grid_common.hpp" |
45 | #include "util/se_util.hpp" |
46 | #include "iterators/grid_key_dx_iterator.hpp" |
47 | #include "iterators/grid_key_dx_iterator_sub.hpp" |
48 | #include "iterators/grid_key_dx_iterator_sp.hpp" |
49 | #include "iterators/grid_key_dx_iterator_sub_bc.hpp" |
50 | #include "Packer_Unpacker/Packer_util.hpp" |
51 | #include "Packer_Unpacker/has_pack_agg.hpp" |
52 | #include "cuda/cuda_grid_gpu_funcs.cuh" |
53 | #include "grid_base_implementation.hpp" |
54 | #include "util/for_each_ref.hpp" |
55 | #include "Geometry/grid_smb.hpp" |
56 | #include "Geometry/grid_zmb.hpp" |
57 | |
58 | #ifndef CUDA_GPU |
59 | typedef HeapMemory CudaMemory; |
60 | #endif |
61 | |
62 | |
63 | /*! Stub grid class |
64 | * |
65 | */ |
66 | template<unsigned int dim, typename T, typename S=HeapMemory, typename layout = typename memory_traits_lin<T>::type, typename linearizer = grid_sm<dim,void> > |
67 | class grid_base |
68 | { |
69 | }; |
70 | |
71 | /*! |
72 | * |
73 | * \brief This is an N-dimensional grid or an N-dimensional array with memory_traits_lin layout |
74 | * |
75 | * it is basically an N-dimensional Cartesian grid |
76 | * |
77 | * \tparam dim Dimensionality of the grid |
78 | * \tparam T type of object the grid store |
79 | * \tparam S type of memory HeapMemory CudaMemory |
80 | * \tparam layout memory layout |
81 | * |
82 | * ### Defining the grid size on each dimension |
83 | * |
84 | * \code{.cpp} |
85 | * size_t sz[3] = {16,16,16}; |
86 | * \endcode |
87 | * |
88 | * ### Definition and allocation of a 3D grid on CPU memory |
89 | * \snippet grid_unit_tests.hpp Definition and allocation of a 3D grid on CPU memory |
90 | * ### Access a grid c3 of size sz on each direction |
91 | * \snippet grid_unit_tests.hpp Access a grid c3 of size sz on each direction |
92 | * ### Access an N-dimensional grid with an iterator |
93 | * \snippet grid_unit_tests.hpp Access to an N-dimensional grid with an iterator |
94 | * ### Iterate only on a sub-set of the grid |
95 | * \snippet grid_unit_tests.hpp Sub-grid iterator test usage |
96 | * ### Get the full-object in an N-dimensional grid |
97 | * \snippet grid_unit_tests.hpp Get the object in an N-dimensional grid with an iterator |
98 | * ### Create a grid g1 and copy into another g2 |
99 | * \snippet grid_unit_tests.hpp Create a grid g1 and copy into another g2 |
100 | * |
101 | */ |
102 | template<unsigned int dim, typename T, typename S, typename linearizer> |
103 | class grid_base<dim,T,S,typename memory_traits_lin<T>::type, linearizer> : public grid_base_impl<dim,T,S, memory_traits_lin,linearizer> |
104 | { |
105 | typedef typename apply_transform<memory_traits_lin,T>::type T_; |
106 | |
107 | T background; |
108 | |
109 | public: |
110 | |
111 | //! type of layout of the structure |
112 | typedef typename memory_traits_lin<T>::type layout; |
113 | |
114 | //! Object container for T, it is the return type of get_o it return a object type trough |
115 | // you can access all the properties of T |
116 | typedef typename grid_base_impl<dim,T,S, memory_traits_lin>::container container; |
117 | |
118 | //! grid_base has no grow policy |
119 | typedef void grow_policy; |
120 | |
121 | //! type that identify one point in the grid |
122 | typedef grid_key_dx<dim> base_key; |
123 | |
124 | //! sub-grid iterator type |
125 | typedef grid_key_dx_iterator_sub<dim> sub_grid_iterator_type; |
126 | |
127 | //! linearizer type Z-morton Hilbert curve , normal striding |
128 | typedef typename grid_base_impl<dim,T,S, memory_traits_lin>::linearizer_type linearizer_type; |
129 | |
130 | //! Default constructor |
131 | inline grid_base() THROW |
132 | :grid_base_impl<dim,T,S,memory_traits_lin, linearizer>() |
133 | {} |
134 | |
135 | /*! \brief create a grid from another grid |
136 | * |
137 | * \tparam S memory type for allocation |
138 | * |
139 | * \param g the grid to copy |
140 | * \param mem memory object (only used for template deduction) |
141 | * |
142 | */ |
143 | inline grid_base(const grid_base<dim,T,S,typename memory_traits_lin<T>::type> & g) THROW |
144 | :grid_base_impl<dim,T,S,memory_traits_lin, linearizer>(g) |
145 | { |
146 | } |
147 | |
148 | /*! \brief create a grid of size sz on each direction |
149 | * |
150 | * \param sz size if the grid on each directions |
151 | * |
152 | */ |
153 | inline grid_base(const size_t & sz) THROW |
154 | :grid_base_impl<dim,T,S,memory_traits_lin,linearizer>(sz) |
155 | { |
156 | } |
157 | |
158 | /*! \brief Constructor allocate memory |
159 | * |
160 | * \param sz size of the grid in each dimension |
161 | * |
162 | */ |
163 | inline grid_base(const size_t (& sz)[dim]) THROW |
164 | :grid_base_impl<dim,T,S,memory_traits_lin,linearizer>(sz) |
165 | { |
166 | } |
167 | |
168 | /*! \brief Stub does not do anything |
169 | * |
170 | */ |
171 | template<typename pointers_type, |
172 | typename headers_type, |
173 | typename result_type, |
174 | unsigned int ... prp > |
175 | static void (pointers_type & pointers, headers_type & , result_type & result, int n_slot) |
176 | {} |
177 | |
178 | template<unsigned int ... prp, typename S2, typename header_type, typename ite_type, typename context_type> |
179 | void (ExtPreAlloc<S2> & mem, |
180 | ite_type & sub_it, |
181 | header_type & , |
182 | int ih, |
183 | Unpack_stat & ps, |
184 | context_type &context, |
185 | rem_copy_opt opt = rem_copy_opt::NONE_OPT) |
186 | {} |
187 | |
188 | /*! \brief It copy a grid |
189 | * |
190 | * \param g grid to copy |
191 | * |
192 | */ |
193 | grid_base<dim,T,S> & operator=(const grid_base<dim,T,S> & g) |
194 | { |
195 | (static_cast<grid_base_impl<dim,T,S, memory_traits_lin> *>(this))->swap(g.duplicate()); |
196 | |
197 | meta_copy<T>::meta_copy_(g.background,background); |
198 | |
199 | return *this; |
200 | } |
201 | |
202 | /*! \brief It copy a grid |
203 | * |
204 | * \param g grid to copy |
205 | * |
206 | */ |
207 | grid_base<dim,T,S,typename memory_traits_lin<T>::type> & operator=(grid_base<dim,T,S,typename memory_traits_lin<T>::type> && g) |
208 | { |
209 | (static_cast<grid_base_impl<dim,T,S, memory_traits_lin> *>(this))->swap(g); |
210 | |
211 | meta_copy<T>::meta_copy_(g.background,background); |
212 | |
213 | return *this; |
214 | } |
215 | |
216 | /*! \brief This structure has pointers |
217 | * |
218 | * \return false |
219 | * |
220 | */ |
221 | static bool noPointers() |
222 | { |
223 | return false; |
224 | } |
225 | |
226 | /*! \brief Copy the memory from host to device |
227 | * |
228 | * \tparam (all properties are copied to prp is useless in this case) |
229 | * |
230 | */ |
231 | template<unsigned int ... prp> void hostToDevice() |
232 | { |
233 | this->data_.mem->hostToDevice(); |
234 | } |
235 | |
236 | /*! \brief Copy the memory from host to device |
237 | * |
238 | * \tparam (all properties are copied to prp is useless in this case) |
239 | * |
240 | * \param start start point |
241 | * \param stop stop point |
242 | * |
243 | */ |
244 | template<unsigned int ... prp> void hostToDevice(size_t start, size_t stop) |
245 | { |
246 | this->data_.mem->hostToDevice(start*sizeof(T),(stop+1)*sizeof(T)); |
247 | } |
248 | |
249 | /*! \brief It return the properties arrays. |
250 | * |
251 | * In case of Cuda memory it return the device pointers to pass to the kernels |
252 | * |
253 | * This variant does not copy the host memory to the device memory |
254 | * |
255 | */ |
256 | template<unsigned int id> void * getDeviceBuffer() |
257 | { |
258 | return this->data_.mem->getDevicePointer(); |
259 | } |
260 | |
261 | /*! \brief Synchronize the memory buffer in the device with the memory in the host |
262 | * |
263 | * \tparam ingored |
264 | * |
265 | * All properties are transfered |
266 | * |
267 | */ |
268 | template<unsigned int ... prp> void deviceToHost() |
269 | { |
270 | this->data_.mem->deviceToHost(); |
271 | } |
272 | |
273 | /*! \brief Synchronize the memory buffer in the device with the memory in the host |
274 | * |
275 | * \param start starting element to transfer |
276 | * \param stop stop element to transfer |
277 | * |
278 | * \tparam properties to transfer (ignored all properties are trasfert) |
279 | * |
280 | */ |
281 | template<unsigned int ... prp> void deviceToHost(size_t start, size_t stop) |
282 | { |
283 | this->data_.mem->deviceToHost(start*sizeof(T),(stop+1)*sizeof(T)); |
284 | } |
285 | |
286 | #ifdef CUDA_GPU |
287 | |
288 | /*! \brief Convert the grid into a data-structure compatible for computing into GPU |
289 | * |
290 | * The object created can be considered like a reference of the original |
291 | * |
292 | */ |
293 | grid_gpu_ker<dim,T_,memory_traits_lin> toKernel() |
294 | { |
295 | return grid_toKernelImpl<is_layout_inte<memory_traits_lin<T_>>::value,dim,T_>::toKernel(*this); |
296 | } |
297 | |
298 | /*! \brief Convert the grid into a data-structure compatible for computing into GPU |
299 | * |
300 | * The object created can be considered like a reference of the original |
301 | * |
302 | */ |
303 | const grid_gpu_ker<dim,T_,memory_traits_lin> toKernel() const |
304 | { |
305 | return grid_toKernelImpl<is_layout_inte<memory_traits_lin<T_>>::value,dim,T_>::toKernel(*this); |
306 | } |
307 | |
308 | #endif |
309 | |
310 | /*! \brief This is a meta-function return which type of sub iterator a grid produce |
311 | * |
312 | * \return the type of the sub-grid iterator |
313 | * |
314 | */ |
315 | template <typename stencil = no_stencil> |
316 | static grid_key_dx_iterator_sub<dim, stencil> type_of_subiterator() |
317 | { |
318 | return grid_key_dx_iterator_sub<dim, stencil>(); |
319 | } |
320 | |
321 | /*! \brief Return if in this representation data are stored is a compressed way |
322 | * |
323 | * \return false this is a normal grid no compression |
324 | * |
325 | */ |
326 | static constexpr bool isCompressed() |
327 | { |
328 | return false; |
329 | } |
330 | |
331 | /*! \brief This is a meta-function return which type of iterator a grid produce |
332 | * |
333 | * \return the type of the sub-grid iterator |
334 | * |
335 | */ |
336 | static grid_key_dx_iterator<dim> type_of_iterator() |
337 | { |
338 | return grid_key_dx_iterator<dim>(); |
339 | } |
340 | |
341 | /*! \brief In this case it just copy the key_in in key_out |
342 | * |
343 | * \param key_out output key |
344 | * \param key_in input key |
345 | * |
346 | */ |
347 | void convert_key(grid_key_dx<dim> & key_out, const grid_key_dx<dim> & key_in) const |
348 | { |
349 | for (size_t i = 0 ; i < dim ; i++) |
350 | {key_out.set_d(i,key_in.get(i));} |
351 | } |
352 | |
353 | /*! \brief Get the background value |
354 | * |
355 | * For dense grid this function is useless |
356 | * |
357 | * \return background value |
358 | * |
359 | */ |
360 | T & getBackgroundValue() |
361 | { |
362 | return background; |
363 | } |
364 | |
365 | /*! \brief Get the background value |
366 | * |
367 | * For dense grid this function is useless |
368 | * |
369 | * \return background value |
370 | * |
371 | */ |
372 | T & getBackgroundValueAggr() |
373 | { |
374 | return background; |
375 | } |
376 | |
377 | /*! \brief Set the background value |
378 | * |
379 | * \tparam p property to set |
380 | * |
381 | */ |
382 | template<unsigned int p> |
383 | void setBackgroundValue(const typename boost::mpl::at<typename T::type,boost::mpl::int_<p>>::type & val) |
384 | { |
385 | meta_copy<typename boost::mpl::at<typename T::type,boost::mpl::int_<p>>::type>::meta_copy_(val,background.template get<p>()); |
386 | } |
387 | |
388 | |
389 | /*! \brief assign operator |
390 | * |
391 | * \return itself |
392 | * |
393 | */ |
394 | grid_base<dim,T,S,typename memory_traits_lin<T>::type> & operator=(const grid_base_impl<dim,T,S, memory_traits_lin> & base) |
395 | { |
396 | grid_base_impl<dim,T,S, memory_traits_inte>::operator=(base); |
397 | |
398 | return *this; |
399 | } |
400 | |
401 | /*! \brief assign operator |
402 | * |
403 | * \return itself |
404 | * |
405 | */ |
406 | grid_base<dim,T,S,typename memory_traits_lin<T>::type> & operator=(grid_base_impl<dim,T,S, memory_traits_lin> && base) |
407 | { |
408 | grid_base_impl<dim,T,S, memory_traits_lin>::operator=((grid_base_impl<dim,T,S, memory_traits_lin> &&)base); |
409 | |
410 | return *this; |
411 | } |
412 | }; |
413 | |
414 | |
415 | |
416 | /*! \brief this class is a functor for "for_each" algorithm |
417 | * |
418 | * This class is a functor for "for_each" algorithm. For each |
419 | * element of the boost::vector the operator() is called. |
420 | * Is mainly used to copy one encap into another encap object |
421 | * |
422 | * \tparam encap source |
423 | * \tparam encap dst |
424 | * |
425 | */ |
426 | template<typename T_type, unsigned int ... prp> |
427 | struct switch_copy_host_to_device |
428 | { |
429 | //! encapsulated destination object |
430 | typename memory_traits_inte<T_type>::type & dst; |
431 | |
432 | //! Convert the packed properties into an MPL vector |
433 | typedef typename to_boost_vmpl<prp...>::type v_prp; |
434 | |
435 | /*! \brief constructor |
436 | * |
437 | * \param src source encapsulated object |
438 | * \param dst source encapsulated object |
439 | * |
440 | */ |
441 | inline switch_copy_host_to_device(typename memory_traits_inte<T_type>::type & dst) |
442 | :dst(dst) |
443 | { |
444 | }; |
445 | |
446 | |
447 | //! It call the copy function for each property |
448 | template<typename T> |
449 | inline void operator()(T& t) const |
450 | { |
451 | boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).switchToDevicePtr(); |
452 | } |
453 | }; |
454 | |
455 | /*! \brief this class is a functor for "for_each" algorithm |
456 | * |
457 | * This class is a functor for "for_each" algorithm. For each |
458 | * element of the boost::vector the operator() is called. |
459 | * Is mainly used to copy one encap into another encap object |
460 | * |
461 | * \tparam encap source |
462 | * \tparam encap dst |
463 | * |
464 | */ |
465 | template<typename T_type, template<typename> class layout_base , typename Memory, unsigned int ... prp> |
466 | struct host_to_device_impl |
467 | { |
468 | //! encapsulated destination object |
469 | typename memory_traits_inte<T_type>::type & dst; |
470 | |
471 | //! Convert the packed properties into an MPL vector |
472 | typedef typename to_boost_vmpl<prp...>::type v_prp; |
473 | |
474 | //! starting element |
475 | size_t start; |
476 | |
477 | //! stop element |
478 | size_t stop; |
479 | |
480 | /*! \brief constructor |
481 | * |
482 | * \param src source encapsulated object |
483 | * \param dst source encapsulated object |
484 | * |
485 | */ |
486 | inline host_to_device_impl(typename memory_traits_inte<T_type>::type & dst,size_t start, size_t stop) |
487 | :dst(dst),start(start),stop(stop) |
488 | {}; |
489 | |
490 | |
491 | //! It call the copy function for each property |
492 | template<typename T> |
493 | inline void operator()(T& t) const |
494 | { |
495 | typedef typename boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type ele_type; |
496 | |
497 | typedef decltype(boost::fusion::at_c<ele_type::value>(dst).mem_r) mem_r_type; |
498 | |
499 | typedef typename boost::mpl::at<typename T_type::type,ele_type>::type type_prp; |
500 | |
501 | typedef typename toKernel_transform<layout_base,typename mem_r_type::value_type>::type kernel_type; |
502 | |
503 | typedef boost::mpl::int_<(is_vector<typename mem_r_type::value_type>::value || |
504 | is_vector_dist<typename mem_r_type::value_type>::value || |
505 | is_gpu_celllist<typename mem_r_type::value_type>::value) + 2*std::is_array<type_prp>::value + std::rank<type_prp>::value> crh_cond; |
506 | |
507 | call_recursive_host_device_if_vector<typename mem_r_type::value_type, |
508 | kernel_type, |
509 | type_prp, |
510 | layout_base, |
511 | crh_cond::value> |
512 | ::template transform<Memory,mem_r_type>(static_cast<Memory *>(boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem), |
513 | boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem_r, |
514 | start*sizeof(type_prp), |
515 | (stop+1)*sizeof(type_prp)); |
516 | |
517 | // here we have to recursively call hostToDevice for each nested vector |
518 | call_recursive_host_device_if_vector<typename mem_r_type::value_type, |
519 | kernel_type, |
520 | type_prp, |
521 | layout_base, |
522 | 0> |
523 | ::call(boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem_r,start,stop); |
524 | } |
525 | }; |
526 | |
527 | /*! \brief this class is a functor for "for_each" algorithm |
528 | * |
529 | * This class is a functor for "for_each" algorithm. For each |
530 | * element of the boost::vector the operator() is called. |
531 | * Is mainly used to copy one encap into another encap object |
532 | * |
533 | * \tparam encap source |
534 | * \tparam encap dst |
535 | * |
536 | */ |
537 | template<typename T_type, template<typename> class layout_base , typename Memory> |
538 | struct deconstruct_impl |
539 | { |
540 | //! object to destruct |
541 | typename memory_traits_inte<T_type>::type & dst; |
542 | |
543 | /*! \brief constructor |
544 | * |
545 | * \param src source encapsulated object |
546 | * \param dst source encapsulated object |
547 | * |
548 | */ |
549 | inline deconstruct_impl(typename memory_traits_inte<T_type>::type & dst) |
550 | :dst(dst) |
551 | {}; |
552 | |
553 | |
554 | //! It call the copy function for each property |
555 | template<typename T> |
556 | inline void operator()(T& t) const |
557 | { |
558 | typedef decltype(boost::fusion::at_c<T::value>(dst).mem_r) mem_r_type; |
559 | |
560 | typedef typename boost::mpl::at<typename T_type::type,T>::type type_prp; |
561 | |
562 | typedef typename toKernel_transform<layout_base,typename mem_r_type::value_type>::type kernel_type; |
563 | |
564 | typedef boost::mpl::int_<(is_vector<typename mem_r_type::value_type>::value || |
565 | is_vector_dist<typename mem_r_type::value_type>::value || |
566 | is_gpu_celllist<typename mem_r_type::value_type>::value) + 2*std::is_array<type_prp>::value + std::rank<type_prp>::value> crh_cond; |
567 | |
568 | call_recursive_destructor_if_vector<typename mem_r_type::value_type, |
569 | kernel_type, |
570 | type_prp, |
571 | layout_base, |
572 | crh_cond::value> |
573 | ::template destruct<Memory,mem_r_type>(static_cast<Memory *>(boost::fusion::at_c<T::value>(dst).mem), |
574 | boost::fusion::at_c<T::value>(dst).mem_r); |
575 | } |
576 | }; |
577 | |
578 | /*! \brief this class is a functor for "for_each" algorithm |
579 | * |
580 | * This class is a functor for "for_each" algorithm. For each |
581 | * element of the boost::vector the operator() is called. |
582 | * Is mainly used to copy one encap into another encap object |
583 | * |
584 | * \tparam encap source |
585 | * \tparam encap dst |
586 | * |
587 | */ |
588 | template<typename T_type, unsigned int ... prp> |
589 | struct device_to_host_impl |
590 | { |
591 | //! encapsulated destination object |
592 | typename memory_traits_inte<T_type>::type & dst; |
593 | |
594 | //! Convert the packed properties into an MPL vector |
595 | typedef typename to_boost_vmpl<prp...>::type v_prp; |
596 | |
597 | /*! \brief constructor |
598 | * |
599 | * \param src source encapsulated object |
600 | * \param dst source encapsulated object |
601 | * |
602 | */ |
603 | inline device_to_host_impl(typename memory_traits_inte<T_type>::type & dst) |
604 | :dst(dst) |
605 | { |
606 | }; |
607 | |
608 | |
609 | //! It call the copy function for each property |
610 | template<typename T> |
611 | inline void operator()(T& t) const |
612 | { |
613 | boost::fusion::at_c<boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type::value>(dst).mem->deviceToHost(); |
614 | } |
615 | }; |
616 | |
617 | /*! \brief this class is a functor for "for_each" algorithm |
618 | * |
619 | * This class is a functor for "for_each" algorithm. For each |
620 | * element of the boost::vector the operator() is called. |
621 | * Is mainly used to copy one encap into another encap object |
622 | * |
623 | * \tparam encap source |
624 | * \tparam encap dst |
625 | * |
626 | */ |
627 | template<typename T_type, unsigned int ... prp> |
628 | struct device_to_host_start_stop_impl |
629 | { |
630 | //! encapsulated destination object |
631 | typename memory_traits_inte<T_type>::type & dst; |
632 | |
633 | //! Convert the packed properties into an MPL vector |
634 | typedef typename to_boost_vmpl<prp...>::type v_prp; |
635 | |
636 | //! start |
637 | size_t start; |
638 | |
639 | //! stop |
640 | size_t stop; |
641 | |
642 | /*! \brief constructor |
643 | * |
644 | * \param src source encapsulated object |
645 | * \param dst source encapsulated object |
646 | * |
647 | */ |
648 | inline device_to_host_start_stop_impl(typename memory_traits_inte<T_type>::type & dst,size_t start,size_t stop) |
649 | :dst(dst),start(start),stop(stop) |
650 | { |
651 | }; |
652 | |
653 | |
654 | //! It call the copy function for each property |
655 | template<typename T> |
656 | inline void operator()(T& t) const |
657 | { |
658 | typedef typename boost::mpl::at<v_prp,boost::mpl::int_<T::value>>::type prp_id; |
659 | |
660 | typedef typename boost::mpl::at<typename T_type::type,prp_id>::type p_type; |
661 | |
662 | boost::fusion::at_c<prp_id::value>(dst).mem->deviceToHost(start*sizeof(p_type),(stop+1)*sizeof(p_type)); |
663 | } |
664 | }; |
665 | |
666 | |
667 | |
668 | struct dim3_ |
669 | { |
670 | //! size in x dimension |
671 | unsigned int x; |
672 | |
673 | //! size in y dimension |
674 | unsigned int y; |
675 | |
676 | //! size in z dimension |
677 | unsigned int z; |
678 | }; |
679 | |
680 | template<unsigned int dim> |
681 | struct device_grid |
682 | { |
683 | //! number of treads in each block |
684 | dim3_ threads; |
685 | |
686 | //! number of grid for the kernel execution |
687 | dim3_ grids; |
688 | }; |
689 | |
690 | |
691 | /*! \brief This is an N-dimensional grid or an N-dimensional array with memory_traits_inte layout |
692 | * |
693 | * it is basically an N-dimensional Cartesian grid |
694 | * |
695 | * \tparam dim Dimensionality of the grid |
696 | * \tparam T type of object the grid store |
697 | * \tparam Mem memory layout |
698 | * |
699 | * ### Definition and allocation of a 3D grid on GPU memory |
700 | * \snippet grid_unit_tests.hpp Definition and allocation of a 3D grid on GPU memory |
701 | * ### Access a grid c3 of size sz on each direction |
702 | * \snippet grid_unit_tests.hpp Access a grid c3 of size sz on each direction |
703 | * ### Access to an N-dimensional grid with an iterator |
704 | * \snippet grid_unit_tests.hpp Access to an N-dimensional grid with an iterator |
705 | * |
706 | */ |
707 | template<unsigned int dim, typename T, typename S> |
708 | class grid_base<dim,T,S,typename memory_traits_inte<T>::type> : public grid_base_impl<dim,T,S, memory_traits_inte> |
709 | { |
710 | typedef typename apply_transform<memory_traits_inte,T>::type T_; |
711 | |
712 | T background; |
713 | |
714 | public: |
715 | |
716 | //! grid layout |
717 | typedef typename memory_traits_inte<T>::type layout; |
718 | |
719 | //! Object container for T, it is the return type of get_o it return a object type trough |
720 | // you can access all the properties of T |
721 | typedef typename grid_base_impl<dim,T,S, memory_traits_inte>::container container; |
722 | |
723 | //! linearizer type Z-morton Hilbert curve , normal striding |
724 | typedef typename grid_base_impl<dim,T,S, memory_traits_inte>::linearizer_type linearizer_type; |
725 | |
726 | //! Default constructor |
727 | inline grid_base() THROW |
728 | :grid_base_impl<dim,T,S,memory_traits_inte>() |
729 | { |
730 | } |
731 | |
732 | /*! \brief create a grid from another grid |
733 | * |
734 | * \param g the grid to copy |
735 | * |
736 | */ |
737 | inline grid_base(const grid_base & g) THROW |
738 | :grid_base_impl<dim,T,S,memory_traits_inte>(g) |
739 | { |
740 | } |
741 | |
742 | /*! \brief create a grid from another grid |
743 | * |
744 | * \param g the grid to copy |
745 | * |
746 | */ |
747 | inline grid_base(grid_base && g) THROW |
748 | :grid_base_impl<dim,T,S,memory_traits_inte>(g) |
749 | { |
750 | } |
751 | |
752 | /*! \brief create a grid of size sz on each direction |
753 | * |
754 | * \param sz grid size in each direction |
755 | * |
756 | */ |
757 | inline grid_base(const size_t & sz) THROW |
758 | :grid_base_impl<dim,T,S,memory_traits_inte>(sz) |
759 | { |
760 | } |
761 | |
762 | //! Constructor allocate memory and give them a representation |
763 | inline grid_base(const size_t (& sz)[dim]) THROW |
764 | :grid_base_impl<dim,T,S,memory_traits_inte>(sz) |
765 | { |
766 | } |
767 | |
768 | /*! \brief Stub does not do anything |
769 | * |
770 | */ |
771 | static void () |
772 | {} |
773 | |
774 | /*! \brief Fill the memory with a byte |
775 | * |
776 | */ |
777 | template<unsigned int id> void fill(unsigned char c) |
778 | { |
779 | boost::fusion::at_c<id>(this->data_).mem->fill(c); |
780 | } |
781 | |
782 | /*! \brief Copy the memory from host to device |
783 | * |
784 | */ |
785 | template<unsigned int ... prp> void hostToDevice() |
786 | { |
787 | host_to_device_impl<T,memory_traits_inte,S,prp ...> htd(this->data_,0,this->getGrid().size()-1); |
788 | |
789 | boost::mpl::for_each_ref< boost::mpl::range_c<int,0,sizeof...(prp)> >(htd); |
790 | } |
791 | |
792 | /*! \brief It return the properties arrays. |
793 | * |
794 | * In case of Cuda memory it return the device pointers to pass to the kernels |
795 | * |
796 | * This variant does not copy the host memory to the device memory |
797 | * |
798 | */ |
799 | template<unsigned int id> void * getDeviceBuffer() |
800 | { |
801 | return boost::fusion::at_c<id>(this->data_).mem->getDevicePointer(); |
802 | } |
803 | |
804 | /*! \brief Synchronize the memory buffer in the device with the memory in the host |
805 | * |
806 | * |
807 | */ |
808 | template<unsigned int ... prp> void deviceToHost() |
809 | { |
810 | device_to_host_impl<T, prp ...> dth(this->data_); |
811 | |
812 | boost::mpl::for_each_ref< boost::mpl::range_c<int,0,sizeof...(prp)> >(dth); |
813 | } |
814 | |
815 | /*! \brief Synchronize the memory buffer in the device with the memory in the host |
816 | * |
817 | * \param start starting element to transfer |
818 | * \param stop stop element to transfer |
819 | * |
820 | * \tparam properties to transfer |
821 | * |
822 | */ |
823 | template<unsigned int ... prp> void deviceToHost(size_t start, size_t stop) |
824 | { |
825 | device_to_host_start_stop_impl<T, prp ...> dth(this->data_,start,stop); |
826 | |
827 | boost::mpl::for_each_ref< boost::mpl::range_c<int,0,sizeof...(prp)> >(dth); |
828 | } |
829 | |
830 | /*! \brief Synchronize the memory buffer in the device with the memory in the host |
831 | * |
832 | * \param start starting element to transfer |
833 | * \param stop stop element to transfer |
834 | * |
835 | * \tparam properties to transfer |
836 | * |
837 | */ |
838 | template<unsigned int ... prp> void hostToDevice(size_t start, size_t stop) |
839 | { |
840 | host_to_device_impl<T,memory_traits_inte,S, prp ...> dth(this->data_,start,stop); |
841 | |
842 | boost::mpl::for_each_ref< boost::mpl::range_c<int,0,sizeof...(prp)> >(dth); |
843 | } |
844 | |
845 | #ifdef CUDA_GPU |
846 | |
847 | /*! \brief Convert the grid into a data-structure compatible for computing into GPU |
848 | * |
849 | * The object created can be considered like a reference of the original |
850 | * |
851 | */ |
852 | grid_gpu_ker<dim,T_,memory_traits_inte> toKernel() |
853 | { |
854 | return grid_toKernelImpl<is_layout_inte<memory_traits_inte<T_>>::value,dim,T_>::toKernel(*this); |
855 | } |
856 | |
857 | /*! \brief Convert the grid into a data-structure compatible for computing into GPU |
858 | * |
859 | * The object created can be considered like a reference of the original |
860 | * |
861 | */ |
862 | const grid_gpu_ker<dim,T_,memory_traits_inte> toKernel() const |
863 | { |
864 | return grid_toKernelImpl<is_layout_inte<memory_traits_inte<T>>::value,dim,T_>::toKernel(*this); |
865 | } |
866 | |
867 | #endif |
868 | /*! \brief This is a meta-function return which type of sub iterator a grid produce |
869 | * |
870 | * \return the type of the sub-grid iterator |
871 | * |
872 | */ |
873 | template <typename stencil = no_stencil> |
874 | static grid_key_dx_iterator_sub<dim, stencil> type_of_subiterator() |
875 | { |
876 | return grid_key_dx_iterator_sub<dim, stencil>(); |
877 | } |
878 | |
879 | /*! \brief Return if in this representation data are stored is a compressed way |
880 | * |
881 | * \return false this is a normal grid no compression |
882 | * |
883 | */ |
884 | static constexpr bool isCompressed() |
885 | { |
886 | return false; |
887 | } |
888 | |
889 | /*! \brief This is a meta-function return which type of iterator a grid produce |
890 | * |
891 | * \return the type of the sub-grid iterator |
892 | * |
893 | */ |
894 | static grid_key_dx_iterator<dim> type_of_iterator() |
895 | { |
896 | return grid_key_dx_iterator<dim>(); |
897 | } |
898 | |
899 | /*! \brief In this case it just copy the key_in in key_out |
900 | * |
901 | * \param key_out output key |
902 | * \param key_in input key |
903 | * |
904 | */ |
905 | void convert_key(grid_key_dx<dim> & key_out, const grid_key_dx<dim> & key_in) const |
906 | { |
907 | for (size_t i = 0 ; i < dim ; i++) |
908 | {key_out.set_d(i,key_in.get(i));} |
909 | } |
910 | |
911 | /*! \brief Get the background value |
912 | * |
913 | * For dense grid this function is useless |
914 | * |
915 | * \return background value |
916 | * |
917 | */ |
918 | T & getBackgroundValue() |
919 | { |
920 | return background; |
921 | } |
922 | |
923 | /*! \brief Get the background value |
924 | * |
925 | * For dense grid this function is useless |
926 | * |
927 | * \return background value |
928 | * |
929 | */ |
930 | T & getBackgroundValueAggr() |
931 | { |
932 | return background; |
933 | } |
934 | |
935 | /*! \brief assign operator |
936 | * |
937 | * \return itself |
938 | * |
939 | */ |
940 | grid_base<dim,T,S,typename memory_traits_inte<T>::type> & operator=(const grid_base_impl<dim,T,S, memory_traits_inte> & base) |
941 | { |
942 | grid_base_impl<dim,T,S, memory_traits_inte>::operator=(base); |
943 | |
944 | return *this; |
945 | } |
946 | |
947 | /*! \brief assign operator |
948 | * |
949 | * \return itself |
950 | * |
951 | */ |
952 | grid_base<dim,T,S,typename memory_traits_inte<T>::type> & operator=(grid_base_impl<dim,T,S, memory_traits_inte> && base) |
953 | { |
954 | grid_base_impl<dim,T,S, memory_traits_inte>::operator=(base); |
955 | |
956 | return *this; |
957 | } |
958 | |
959 | ~grid_base() |
960 | { |
961 | deconstruct_impl<T,memory_traits_inte,S> dth(this->data_); |
962 | |
963 | boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(dth); |
964 | } |
965 | }; |
966 | |
967 | //! short formula for a grid on gpu |
968 | template <unsigned int dim, typename T, typename linearizer = grid_sm<dim,void> > using grid_gpu = grid_base<dim,T,CudaMemory,typename memory_traits_inte<T>::type>; |
969 | |
970 | //! short formula for a grid on gpu |
971 | template <unsigned int dim, typename T, typename linearizer = grid_sm<dim,void> > using grid_cpu = grid_base<dim,T,HeapMemory,typename memory_traits_lin<T>::type,linearizer>; |
972 | |
973 | |
974 | #endif |
975 | |
976 | |
977 | |