1 | /* |
2 | * map_vector_cuda.hpp |
3 | * |
4 | * Created on: Jun 28, 2018 |
5 | * Author: i-bird |
6 | */ |
7 | |
8 | #ifndef MAP_VECTOR_CUDA_HPP_ |
9 | #define MAP_VECTOR_CUDA_HPP_ |
10 | |
11 | #ifdef __NVCC__ |
12 | |
13 | template<typename vector_src_type, typename vector_dst_type, unsigned int ... args> |
14 | __global__ void merge_add_prp_device_impl(vector_src_type v_src, vector_dst_type v_dst, unsigned int old_sz) |
15 | { |
16 | int i = threadIdx.x + blockIdx.x * blockDim.x; |
17 | |
18 | if (i >= v_src.size()) |
19 | {return;} |
20 | |
21 | // write the object in the last element |
22 | object_s_di<decltype(v_src.get(i)),decltype(v_dst.get(old_sz+i)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(old_sz+i)); |
23 | } |
24 | |
25 | template<typename vector_src_type, typename vector_dst_type> |
26 | __global__ void copy_two_vectors(vector_src_type v_dst, vector_dst_type v_src) |
27 | { |
28 | int i = threadIdx.x + blockIdx.x * blockDim.x; |
29 | |
30 | if (i >= v_src.size()) |
31 | {return;} |
32 | |
33 | v_dst.get(i) = v_src.get(i); |
34 | } |
35 | |
36 | |
37 | template<template<typename,typename> class op, |
38 | typename vector_src_type, |
39 | typename vector_dst_type, |
40 | typename vector_opart_type, |
41 | unsigned int ... args> |
42 | __global__ void merge_add_prp_device_impl_src_dst_opar_offset(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart, unsigned int start) |
43 | { |
44 | int i = threadIdx.x + blockIdx.x * blockDim.x; |
45 | |
46 | if (i >= v_src.size()) |
47 | {return;} |
48 | |
49 | // write the object in the last element |
50 | object_s_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(opart.template get<1>(start + i))); |
51 | } |
52 | |
53 | template<template<typename,typename> class op, |
54 | typename vector_src_type, |
55 | typename vector_dst_type, |
56 | typename vector_opart_type, |
57 | unsigned int ... args> |
58 | __global__ void merge_add_prp_device_impl_src_offset_dst_opar(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart, unsigned int start) |
59 | { |
60 | int i = threadIdx.x + blockIdx.x * blockDim.x; |
61 | |
62 | if (i >= opart.size()) |
63 | {return;} |
64 | |
65 | // write the object in the last element |
66 | object_si_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(start + i),v_dst.get(opart.template get<0>(i))); |
67 | } |
68 | |
69 | #endif |
70 | |
71 | |
72 | template<int prp> |
73 | __device__ void fill_vector_error_array_overflow(const void * sptr,int key) |
74 | { |
75 | #ifdef CUDA_GPU |
76 | |
77 | int * ptr = (int *)&global_cuda_error_array[0]; |
78 | |
79 | ptr[0] = 1; |
80 | ptr[1] = ((size_t)sptr) & 0xFFFFFFFF; |
81 | ptr[2] = (((size_t)sptr) & 0xFFFFFFFF00000000) >> 32; |
82 | ptr[3] = prp; |
83 | ptr[4] = 1; |
84 | |
85 | for (int i = 0 ; i < 1 ; i++) |
86 | {ptr[i+5] = key;} |
87 | |
88 | #ifdef __NVCC__ |
89 | |
90 | ptr[5+1] = blockIdx.x; |
91 | ptr[6+1] = blockIdx.y; |
92 | ptr[7+1] = blockIdx.z; |
93 | |
94 | ptr[8+1] = blockDim.x; |
95 | ptr[9+1] = blockDim.y; |
96 | ptr[10+1] = blockDim.z; |
97 | |
98 | ptr[11+1] = threadIdx.x; |
99 | ptr[12+1] = threadIdx.y; |
100 | ptr[13+1] = threadIdx.z; |
101 | |
102 | #endif |
103 | |
104 | #endif |
105 | } |
106 | |
107 | |
108 | namespace openfpm |
109 | { |
110 | |
111 | /*! \brief grid interface available when on gpu |
112 | * |
113 | * \tparam n_buf number of template buffers |
114 | * |
115 | */ |
116 | |
117 | template<typename T, template <typename> class layout_base> |
118 | struct vector_gpu_ker |
119 | { |
120 | typedef vector_gpu_ker<T,layout_base> self_type; |
121 | |
122 | typedef typename apply_transform<layout_base,T>::type T_; |
123 | |
124 | //! Actual size of the vector, warning: it is not the space allocated in grid |
125 | //! grid size increase by a fixed amount every time we need a vector bigger than |
126 | //! the actually allocated space |
127 | unsigned int v_size; |
128 | |
129 | //! 1-D static grid |
130 | grid_gpu_ker<1,T_,layout_base> base; |
131 | |
132 | /*! \brief Check that the key is inside the grid |
133 | * |
134 | * \param key |
135 | * |
136 | * \return true if it is bound |
137 | * |
138 | */ |
139 | __device__ __host__ inline bool check_bound(size_t v1) const |
140 | { |
141 | return v1 < size(); |
142 | } |
143 | |
144 | public: |
145 | |
146 | //! it define that it is a vector |
147 | typedef int yes_i_am_vector; |
148 | |
149 | //! Type of the encapsulation memory parameter |
150 | typedef typename layout_base<T_>::type layout_type; |
151 | |
152 | //! Object container for T, it is the return type of get_o it return a object type trough |
153 | // you can access all the properties of T |
154 | typedef typename grid_base<1,T_,CudaMemory,typename layout_base<T_>::type>::container container; |
155 | |
156 | //! Type of the value the vector is storing |
157 | typedef T_ value_type; |
158 | |
159 | //! Indicate this structure has a function to check the device pointer |
160 | typedef int yes_has_check_device_pointer; |
161 | |
162 | /*! \brief Return the size of the vector |
163 | * |
164 | * \return the size |
165 | * |
166 | */ |
167 | __device__ __host__ unsigned int size() const |
168 | { |
169 | return v_size; |
170 | } |
171 | |
172 | /*! \brief return the maximum capacity of the vector before reallocation |
173 | * |
174 | * \return the capacity of the vector |
175 | * |
176 | */ |
177 | |
178 | __device__ __host__ unsigned int capacity() const |
179 | { |
180 | return base.size(); |
181 | } |
182 | |
183 | |
184 | /*! \brief Get an element of the vector |
185 | * |
186 | * Get an element of the vector |
187 | * |
188 | * \tparam p Property to get |
189 | * \param id Element to get |
190 | * |
191 | * \return the element value requested |
192 | * |
193 | */ |
194 | template <unsigned int p> |
195 | __device__ __host__ inline auto get(unsigned int id) const -> decltype(base.template get<p>(grid_key_dx<1>(0))) |
196 | { |
197 | #ifdef SE_CLASS1 |
198 | if (check_bound(id) == false) |
199 | {fill_vector_error_array_overflow<p>(this->getPointer<p>(),id);} |
200 | #endif |
201 | grid_key_dx<1> key(id); |
202 | |
203 | return base.template get<p>(key); |
204 | } |
205 | |
206 | /*! \brief Get an element of the vector |
207 | * |
208 | * Get an element of the vector |
209 | * |
210 | * \param id Element to get |
211 | * |
212 | * \return the element (encapsulated) |
213 | * |
214 | */ |
215 | __device__ __host__ inline auto get(unsigned int id) -> decltype(base.get_o(grid_key_dx<1>(id))) |
216 | { |
217 | #ifdef SE_CLASS1 |
218 | if (check_bound(id) == false) |
219 | {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);} |
220 | #endif |
221 | |
222 | grid_key_dx<1> key(id); |
223 | |
224 | return base.get_o(key); |
225 | } |
226 | |
227 | /*! \brief Get an element of the vector |
228 | * |
229 | * Get an element of the vector |
230 | * |
231 | * \param id Element to get |
232 | * |
233 | * \return the element (encapsulated) |
234 | * |
235 | */ |
236 | inline __device__ __host__ auto get(unsigned int id) const -> const decltype(base.get_o(grid_key_dx<1>(id))) |
237 | { |
238 | #ifdef SE_CLASS1 |
239 | if (check_bound(id) == false) |
240 | {fill_vector_error_array_overflow<-1>(this->getPointer<0>(),id);} |
241 | #endif |
242 | |
243 | grid_key_dx<1> key(id); |
244 | |
245 | return base.get_o(key); |
246 | } |
247 | |
248 | /*! \brief Get an element of the vector |
249 | * |
250 | * \deprecated |
251 | * |
252 | * exactly as get, exist to keep the compatibility with grid |
253 | * |
254 | * \param id Element to get |
255 | * |
256 | * \return the element (encapsulated) |
257 | * |
258 | */ |
259 | |
260 | inline __device__ __host__ auto get_o(unsigned int id) const -> decltype(base.get_o(id)) |
261 | { |
262 | #ifdef SE_CLASS1 |
263 | if (check_bound(id) == false) |
264 | {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);} |
265 | #endif |
266 | |
267 | grid_key_dx<1> key(id); |
268 | |
269 | return base.get_o(key); |
270 | } |
271 | |
272 | /*! \brief Get an element of the vector |
273 | * |
274 | * \deprecated |
275 | * |
276 | * exactly as get, exist to keep the compatibility with grid |
277 | * |
278 | * \param id Element to get |
279 | * |
280 | * \return the element (encapsulated) |
281 | * |
282 | */ |
283 | |
284 | inline __device__ __host__ auto get_o(unsigned int id) -> decltype(base.get_o(id)) |
285 | { |
286 | #ifdef SE_CLASS1 |
287 | if (check_bound(id) == false) |
288 | {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);} |
289 | #endif |
290 | |
291 | grid_key_dx<1> key(id); |
292 | |
293 | return base.get_o(key); |
294 | } |
295 | |
296 | /*! \brief Get the last element of the vector |
297 | * |
298 | * \return the last element (encapsulated) |
299 | * |
300 | */ |
301 | inline auto last() const -> decltype(base.get_o(0)) |
302 | { |
303 | grid_key_dx<1> key(size()-1); |
304 | |
305 | return base.get_o(key); |
306 | } |
307 | |
308 | /*! \brief Get an element of the vector |
309 | * |
310 | * Get an element of the vector |
311 | * |
312 | * \tparam p Property to get |
313 | * \param id Element to get |
314 | * |
315 | * \return the element value requested |
316 | * |
317 | */ |
318 | template <unsigned int p> |
319 | __device__ __host__ inline auto get(unsigned int id) -> decltype(base.template get<p>(grid_key_dx<1>(0))) |
320 | { |
321 | #ifdef SE_CLASS1 |
322 | if (check_bound(id) == false) |
323 | {fill_vector_error_array_overflow<p>(this->template getPointer<p>(),id);} |
324 | #endif |
325 | |
326 | grid_key_dx<1> key(id); |
327 | |
328 | return base.template get<p>(key); |
329 | } |
330 | |
331 | /*! \brief Get the last element of the vector |
332 | * |
333 | * \return the element (encapsulated) |
334 | * |
335 | */ |
336 | inline auto last() -> decltype(base.get_o(0)) |
337 | { |
338 | grid_key_dx<1> key(size()-1); |
339 | |
340 | return base.get_o(key); |
341 | } |
342 | |
343 | vector_gpu_ker() |
344 | :v_size(0) |
345 | {} |
346 | |
347 | vector_gpu_ker(int v_size, const grid_gpu_ker<1,T_,layout_base> & cpy) |
348 | :v_size(v_size),base(cpy) |
349 | {} |
350 | |
351 | |
352 | /*! \brief Set the object id to obj |
353 | * |
354 | * \param id element |
355 | * \param obj object (encapsulated) |
356 | * |
357 | */ |
358 | __device__ void set(int id, const container & obj) |
359 | { |
360 | #ifdef SE_CLASS1 |
361 | if (check_bound(id) == false) |
362 | {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);} |
363 | #endif |
364 | |
365 | //! copy the element |
366 | base.set(id,obj); |
367 | } |
368 | |
369 | /*! \brief Get the pointer for the property p |
370 | * |
371 | * \tparam property p |
372 | * |
373 | */ |
374 | template<unsigned int p> __device__ __host__ void * getPointer() |
375 | { |
376 | //! copy the element |
377 | return base.template getPointer<p>(); |
378 | } |
379 | |
380 | /*! \brief Get the pointer for the property p |
381 | * |
382 | * \tparam property p |
383 | * |
384 | */ |
385 | template<unsigned int p> __device__ __host__ const void * getPointer() const |
386 | { |
387 | //! copy the element |
388 | return base.template getPointer<p>(); |
389 | } |
390 | |
391 | /*! \brief It set an element of the vector from a object that is a subset of the vector properties |
392 | * |
393 | * The number of properties in the source vector must be smaller than the destination |
394 | * all the properties of S must be mapped so if S has 3 properties |
395 | * 3 numbers for args are required |
396 | * |
397 | * \tparam encap_S object that encapsulate the object |
398 | * \tparam args ids of the properties to map the object to |
399 | * |
400 | * \param i element to set |
401 | * \param obj object that encapsulate the object |
402 | * |
403 | * \param v source vector |
404 | * |
405 | */ |
406 | template <typename encap_S, unsigned int ...args> void set_o(unsigned int i, const encap_S & obj) |
407 | { |
408 | #ifdef SE_CLASS1 |
409 | if (check_bound(i) == false) |
410 | {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),i);} |
411 | #endif |
412 | |
413 | // write the object in the last element |
414 | object_s_di<encap_S,decltype(get(i)),OBJ_ENCAP,args...>(obj,get(i)); |
415 | } |
416 | |
417 | /*! \brief Set the element of the vector v from another element of another vector |
418 | * |
419 | * \param id element id |
420 | * \param v vector source |
421 | * \param src source element |
422 | * |
423 | */ |
424 | __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src) |
425 | { |
426 | #ifdef SE_CLASS1 |
427 | if (check_bound(id) == false) |
428 | {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);} |
429 | #endif |
430 | |
431 | base.set(id,v.base,src); |
432 | } |
433 | |
434 | /*! \brief Set the element of the vector v from another element of another vector |
435 | * |
436 | * \param id element id |
437 | * \param v vector source |
438 | * \param src source element |
439 | * |
440 | */ |
441 | template<unsigned int ... prp> |
442 | __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src) |
443 | { |
444 | #ifdef SE_CLASS1 |
445 | if (check_bound(id) == false) |
446 | {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);} |
447 | #endif |
448 | |
449 | base.template set<prp...>(id,v.base,src); |
450 | } |
451 | |
452 | /*! \brief Get an iterator for the GPU |
453 | * |
454 | * |
455 | */ |
456 | __host__ ite_gpu<1> getGPUIterator(size_t n_thr = 1024) const |
457 | { |
458 | grid_key_dx<1> start(0); |
459 | grid_key_dx<1> stop(size()-1); |
460 | |
461 | return base.getGPUIterator(start,stop,n_thr); |
462 | } |
463 | |
464 | /*! \brief Get an iterator for the GPU |
465 | * |
466 | * |
467 | */ |
468 | ite_gpu<1> getGPUIteratorTo(size_t stop, size_t n_thr = 1024) const |
469 | { |
470 | grid_key_dx<1> start(0); |
471 | grid_key_dx<1> stop_(stop); |
472 | |
473 | return base.getGPUIterator(start,stop_,n_thr); |
474 | } |
475 | |
476 | /*! \brief operator= this operator absorb the pointers, consider that this object wrap device pointers |
477 | * |
478 | * \param object to copy |
479 | * |
480 | */ |
481 | vector_gpu_ker<T,layout_base> & operator=(const vector_gpu_ker<T,layout_base> & v) |
482 | { |
483 | v_size = v.v_size; |
484 | base = v.base; |
485 | |
486 | return *this; |
487 | } |
488 | |
489 | /*! \brief Return the base |
490 | * |
491 | * \return the base |
492 | * |
493 | */ |
494 | __device__ grid_gpu_ker<1,T_,layout_base> & getBase() |
495 | { |
496 | return base; |
497 | } |
498 | |
499 | void * internal_get_size_pointer() {return &v_size;} |
500 | |
501 | void print_size() |
502 | { |
503 | #ifndef DISABLE_ALL_RTTI |
504 | std::cout << "the size of: " << demangle(typeid(self_type).name()) << " is " << sizeof(self_type) << std::endl; |
505 | std::cout << " " << demangle(typeid(decltype(v_size)).name()) << ":" << sizeof(decltype(v_size)) << std::endl; |
506 | std::cout << " " << demangle(typeid(decltype(base)).name()) << ":" << sizeof(decltype(base)) << std::endl; |
507 | #endif |
508 | } |
509 | |
510 | #ifdef SE_CLASS1 |
511 | |
512 | /*! \brief Check if the device pointer is owned by this structure |
513 | * |
514 | * \return a structure pointer check with information about the match |
515 | * |
516 | */ |
517 | pointer_check check_device_pointer(void * ptr) |
518 | { |
519 | pointer_check pc; |
520 | pc.match = false; |
521 | |
522 | check_device_ptr<self_type> ptr_chk(ptr,*this); |
523 | |
524 | boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::max_prop>>(ptr_chk); |
525 | |
526 | if (ptr_chk.result == true) |
527 | { |
528 | pc.match = true; |
529 | pc.match_str += std::string("Property: " ) + std::to_string(ptr_chk.prp) + "\n" ; |
530 | } |
531 | |
532 | return pc; |
533 | } |
534 | |
535 | #endif |
536 | }; |
537 | |
538 | } |
539 | |
540 | #endif /* MAP_VECTOR_CUDA_HPP_ */ |
541 | |