1 | /* |
2 | * map_vector_sparse_cuda_ker.cuh |
3 | * |
4 | * Created on: Jan 23, 2019 |
5 | * Author: i-bird |
6 | */ |
7 | |
8 | #ifndef MAP_VECTOR_SPARSE_CUDA_KER_CUH_ |
9 | #define MAP_VECTOR_SPARSE_CUDA_KER_CUH_ |
10 | |
11 | #include "util/for_each_ref.hpp" |
12 | |
13 | //todo: Check where it's a good place to put the following method... |
14 | template<typename dim3Ta, typename dim3Tb> |
15 | inline __device__ __host__ int dim3CoordToInt(const dim3Ta & coord, const dim3Tb & dimensions) |
16 | { |
17 | int res = coord.z; |
18 | res *= dimensions.y; |
19 | res += coord.y; |
20 | res *= dimensions.x; |
21 | res += coord.x; |
22 | return res; |
23 | } |
24 | // Specialization allowing transparency |
25 | inline __device__ __host__ int dim3CoordToInt(int coord, int dimension) |
26 | { |
27 | return coord; |
28 | } |
29 | |
30 | namespace openfpm |
31 | { |
32 | template<typename index_type> |
33 | struct sparse_index |
34 | { |
35 | index_type id; |
36 | }; |
37 | |
38 | #if defined(__NVCC__) && !defined(CUDA_ON_CPU) |
39 | static __shared__ int vct_atomic_add; |
40 | static __shared__ int vct_atomic_rem; |
41 | #endif |
42 | |
43 | template<typename T, |
44 | typename Ti, |
45 | template<typename> class layout_base> |
46 | class vector_sparse_gpu_ker |
47 | { |
48 | vector_gpu_ker<aggregate<Ti>,layout_base> vct_index; |
49 | |
50 | vector_gpu_ker<T,layout_base> vct_data; |
51 | |
52 | vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index; |
53 | |
54 | vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index; |
55 | |
56 | vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index; |
57 | |
58 | vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index; |
59 | |
60 | vector_gpu_ker<T,layout_base> vct_add_data; |
61 | |
62 | // the const is forced by the getter that only return const encap that should not allow the modification of bck |
63 | // this should possible avoid to define an object const_encap |
64 | //mutable vector_gpu_ker<T,layout_base> vct_data_bck; |
65 | |
66 | int nslot_add; |
67 | int nslot_rem; |
68 | |
69 | /*! \brief get the element i |
70 | * |
71 | * search the element x |
72 | * |
73 | * \param i element i |
74 | */ |
75 | inline __device__ void _branchfree_search(Ti x, Ti & id) const |
76 | { |
77 | if (vct_index.size() == 0) {id = 0; return;} |
78 | const Ti *base = &vct_index.template get<0>(0); |
79 | const Ti *end = (const Ti *)vct_index.template getPointer<0>() + vct_index.size(); |
80 | Ti n = vct_data.size()-1; |
81 | while (n > 1) |
82 | { |
83 | Ti half = n / 2; |
84 | base = (base[half] < x) ? base+half : base; |
85 | n -= half; |
86 | } |
87 | |
88 | int off = (*base < x); |
89 | id = base - &vct_index.template get<0>(0) + off; |
90 | Ti v = (base + off != end)?*(base + off):(Ti)-1; |
91 | id = (x == v)?id:vct_data.size()-1; |
92 | } |
93 | |
94 | public: |
95 | |
96 | typedef Ti index_type; |
97 | |
98 | //! Indicate this structure has a function to check the device pointer |
99 | typedef int yes_has_check_device_pointer; |
100 | |
101 | vector_sparse_gpu_ker(vector_gpu_ker<aggregate<Ti>,layout_base> vct_index, |
102 | vector_gpu_ker<T,layout_base> vct_data, |
103 | vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index, |
104 | vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index, |
105 | vector_gpu_ker<T,layout_base> vct_add_data, |
106 | vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index, |
107 | vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index, |
108 | int nslot_add, |
109 | int nslot_rem) |
110 | :vct_index(vct_index),vct_data(vct_data), |
111 | vct_add_index(vct_add_index),vct_rem_index(vct_rem_index),vct_add_data(vct_add_data), |
112 | vct_nadd_index(vct_nadd_index),vct_nrem_index(vct_nrem_index), |
113 | nslot_add(nslot_add),nslot_rem(nslot_rem) |
114 | {} |
115 | |
116 | /*! \brief Get the number of elements |
117 | * |
118 | * \return the number of elements |
119 | * |
120 | */ |
121 | __device__ inline int size() |
122 | { |
123 | return vct_index.size(); |
124 | } |
125 | |
126 | /*! \brief This function must be called |
127 | * |
128 | */ |
129 | __device__ inline void init() |
130 | { |
131 | #ifdef __NVCC__ |
132 | if (threadIdx.x == 0) |
133 | { |
134 | vct_atomic_add = 0; |
135 | vct_atomic_rem = 0; |
136 | } |
137 | |
138 | __syncthreads(); |
139 | #endif |
140 | } |
141 | |
142 | /*! \brief This function must be called |
143 | * |
144 | */ |
145 | __device__ inline void init_ins_inc() |
146 | { |
147 | #ifdef __NVCC__ |
148 | if (threadIdx.x == 0) |
149 | { |
150 | int blockId = dim3CoordToInt(blockIdx, gridDim); |
151 | vct_atomic_add = vct_nadd_index.template get<0>(blockId); |
152 | } |
153 | |
154 | __syncthreads(); |
155 | #endif |
156 | } |
157 | |
158 | /*! \brief This function must be called |
159 | * |
160 | */ |
161 | __device__ inline void init_rem_inc() |
162 | { |
163 | #ifdef __NVCC__ |
164 | if (threadIdx.x == 0) |
165 | { |
166 | int blockId = dim3CoordToInt(blockIdx, gridDim); |
167 | vct_atomic_rem = vct_nrem_index.template get<0>(blockId); |
168 | } |
169 | |
170 | __syncthreads(); |
171 | #endif |
172 | } |
173 | |
174 | /*! \brief Get the sparse index |
175 | * |
176 | * Get the sparse index of the element id |
177 | * |
178 | * \note use get_index and get to retrieve the value index associated to the sparse index |
179 | * |
180 | * \param id Element to get |
181 | * |
182 | * \return the element value requested |
183 | * |
184 | */ |
185 | __device__ inline openfpm::sparse_index<Ti> get_sparse(Ti id) const |
186 | { |
187 | Ti di; |
188 | this->_branchfree_search(id,di); |
189 | openfpm::sparse_index<Ti> sid; |
190 | sid.id = di; |
191 | |
192 | return sid; |
193 | } |
194 | |
195 | /*! \brief Get the background value |
196 | */ |
197 | template <unsigned int p> |
198 | __device__ inline auto getBackground() const -> decltype(vct_data.template get<p>(0)) & |
199 | { |
200 | return vct_data.template get<p>(vct_data.size()-1); |
201 | } |
202 | |
203 | /*! \brief Get an element of the vector |
204 | * |
205 | * Get an element of the vector |
206 | * |
207 | * \tparam p Property to get |
208 | * \param id Element to get |
209 | * |
210 | * \return the element value requested |
211 | * |
212 | */ |
213 | template <unsigned int p> |
214 | __device__ inline auto get(Ti id) const -> decltype(vct_data.template get<p>(id)) |
215 | { |
216 | Ti di; |
217 | this->_branchfree_search(id,di); |
218 | return vct_data.template get<p>(di); |
219 | } |
220 | |
221 | __device__ inline auto get(Ti id) const -> decltype(vct_data.get(0)) |
222 | { |
223 | Ti di; |
224 | Ti v = this->_branchfree_search(id,di); |
225 | return vct_data.get(static_cast<size_t>(di)); |
226 | } |
227 | |
228 | /*! \brief Get an element of the vector |
229 | * |
230 | * Get an element of the vector |
231 | * |
232 | * \tparam p Property to get |
233 | * \param id Element to get |
234 | * |
235 | * \return the element value requested |
236 | * |
237 | */ |
238 | template <unsigned int p> |
239 | __device__ inline auto get(openfpm::sparse_index<Ti> id) const -> decltype(vct_data.template get<p>(id.id)) |
240 | { |
241 | return vct_data.template get<p>(id.id); |
242 | } |
243 | |
244 | /*! \brief Get an element of the vector |
245 | * |
246 | * Get an element of the vector |
247 | * |
248 | * \tparam p Property to get |
249 | * \param id Element to get |
250 | * |
251 | * \return the element value requested |
252 | * |
253 | */ |
254 | template <unsigned int p> |
255 | __device__ inline auto get(openfpm::sparse_index<Ti> id) -> decltype(vct_data.template get<p>(id.id)) |
256 | { |
257 | return vct_data.template get<p>(id.id); |
258 | } |
259 | |
260 | /*! \brief Get the index associated to the element id |
261 | * |
262 | * |
263 | * \return the element value requested |
264 | * |
265 | */ |
266 | __device__ inline Ti get_index(openfpm::sparse_index<Ti> id) const |
267 | { |
268 | return vct_index.template get<0>(id.id); |
269 | } |
270 | |
271 | /*! \brief Get an element of the vector |
272 | * |
273 | * Get an element of the vector |
274 | * |
275 | * \tparam p Property to get |
276 | * \param id Element to get |
277 | * |
278 | * \return the element value requested |
279 | * |
280 | */ |
281 | template <unsigned int p> |
282 | __device__ inline auto get(Ti id, Ti & di) const -> decltype(vct_data.template get<p>(id)) |
283 | { |
284 | this->_branchfree_search(id,di); |
285 | return vct_data.template get<p>(di); |
286 | } |
287 | |
288 | /*! \brief Get an element of the vector |
289 | * |
290 | * Get an element of the vector |
291 | * |
292 | * \tparam p Property to get |
293 | * \param id Element to get |
294 | * |
295 | * \return the element value requested |
296 | * |
297 | */ |
298 | template <unsigned int p> |
299 | __device__ inline auto get_ele(Ti di) const -> decltype(vct_data.template get<p>(di)) |
300 | { |
301 | return vct_data.template get<p>(di); |
302 | } |
303 | |
304 | /*! \brief It insert an element in the sparse vector |
305 | * |
306 | * |
307 | */ |
308 | template <unsigned int p> |
309 | __device__ auto insert(Ti ele) -> decltype(vct_data.template get<p>(0)) |
310 | { |
311 | #ifdef __NVCC__ |
312 | |
313 | int blockId = dim3CoordToInt(blockIdx, gridDim); |
314 | int slot_base = blockId; |
315 | |
316 | int pos = atomicAdd(&vct_atomic_add,1); |
317 | vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele; |
318 | return vct_add_data.template get<p>(slot_base*nslot_add+pos); |
319 | #else |
320 | std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl; |
321 | #endif |
322 | } |
323 | |
324 | /*! \brief It insert an element in the sparse vector |
325 | * |
326 | * \param ele element to insert |
327 | * |
328 | * \return an object to fill the values |
329 | * |
330 | */ |
331 | __device__ void remove(Ti ele) |
332 | { |
333 | #ifdef __NVCC__ |
334 | |
335 | int blockId = dim3CoordToInt(blockIdx, gridDim); |
336 | int slot_base = blockId; |
337 | |
338 | int pos = atomicAdd(&vct_atomic_rem,1); |
339 | vct_rem_index.template get<0>(slot_base*nslot_rem+pos) = ele; |
340 | |
341 | #else |
342 | std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl; |
343 | #endif |
344 | } |
345 | |
346 | /*! \brief It insert an element in the sparse vector |
347 | * |
348 | * \param ele element to insert |
349 | * |
350 | * \return an object to fill the values |
351 | * |
352 | */ |
353 | __device__ auto insert(Ti ele) -> decltype(vct_add_data.get(0)) |
354 | { |
355 | #ifdef __NVCC__ |
356 | |
357 | int blockId = dim3CoordToInt(blockIdx, gridDim); |
358 | int slot_base = blockId; |
359 | |
360 | int pos = atomicAdd(&vct_atomic_add,1); |
361 | vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele; |
362 | |
363 | return vct_add_data.get(slot_base*nslot_add+pos); |
364 | #else |
365 | std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl; |
366 | #endif |
367 | } |
368 | |
369 | /*! \brief It insert an element in the sparse vector |
370 | * |
371 | * |
372 | */ |
373 | __device__ void remove_b(Ti ele,Ti slot_base) |
374 | { |
375 | #ifdef __NVCC__ |
376 | |
377 | int pos = atomicAdd(&vct_atomic_rem,1); |
378 | vct_rem_index.template get<0>(slot_base*nslot_rem+pos) = ele; |
379 | |
380 | #else |
381 | std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl; |
382 | #endif |
383 | } |
384 | |
385 | /*! \brief It insert an element in the sparse vector |
386 | * |
387 | * |
388 | */ |
389 | template <unsigned int p> |
390 | __device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_data.template get<p>(0)) |
391 | { |
392 | #ifdef __NVCC__ |
393 | |
394 | int pos = atomicAdd(&vct_atomic_add,1); |
395 | vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele; |
396 | return vct_add_data.template get<p>(slot_base*nslot_add+pos); |
397 | #else |
398 | std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl; |
399 | #endif |
400 | } |
401 | |
402 | /*! \brief It insert an element in the sparse vector |
403 | * |
404 | * |
405 | */ |
406 | __device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_add_data.get(0)) |
407 | { |
408 | #ifdef __NVCC__ |
409 | |
410 | int pos = atomicAdd(&vct_atomic_add,1); |
411 | vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele; |
412 | return vct_add_data.get(slot_base*nslot_add+pos); |
413 | #else |
414 | std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl; |
415 | #endif |
416 | } |
417 | |
418 | /*! \brief It insert an element in the sparse vector |
419 | * |
420 | * |
421 | */ |
422 | __device__ void flush_block_insert() |
423 | { |
424 | #ifdef __NVCC__ |
425 | |
426 | __syncthreads(); |
427 | |
428 | if (threadIdx.x == 0) |
429 | { |
430 | int blockId = dim3CoordToInt(blockIdx, gridDim); |
431 | vct_nadd_index.template get<0>(blockId) = vct_atomic_add; |
432 | } |
433 | |
434 | #else |
435 | std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl; |
436 | #endif |
437 | } |
438 | |
439 | /*! \brief It insert an element in the sparse vector |
440 | * |
441 | * |
442 | */ |
443 | __device__ void flush_block_remove() |
444 | { |
445 | #ifdef __NVCC__ |
446 | |
447 | __syncthreads(); |
448 | |
449 | if (threadIdx.x == 0) |
450 | { |
451 | int blockId = dim3CoordToInt(blockIdx, gridDim); |
452 | vct_nrem_index.template get<0>(blockId) = vct_atomic_rem; |
453 | } |
454 | |
455 | #else |
456 | std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl; |
457 | #endif |
458 | } |
459 | |
460 | /*! \brief It insert an element in the sparse vector |
461 | * |
462 | * |
463 | */ |
464 | __device__ void flush_block_insert(Ti b, bool flusher) |
465 | { |
466 | #ifdef __NVCC__ |
467 | |
468 | __syncthreads(); |
469 | |
470 | if (flusher == true) |
471 | {vct_nadd_index.template get<0>(b) = vct_atomic_add;} |
472 | |
473 | |
474 | #else |
475 | std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl; |
476 | #endif |
477 | } |
478 | |
479 | __device__ auto private_get_data() -> decltype(vct_add_data.getBase().get_data_()) |
480 | { |
481 | return vct_add_data.getBase().get_data_(); |
482 | } |
483 | |
484 | /*! \brief It insert an element in the sparse vector |
485 | * |
486 | * |
487 | */ |
488 | __device__ void flush_block_remove(unsigned int b, bool flusher) |
489 | { |
490 | #ifdef __NVCC__ |
491 | |
492 | __syncthreads(); |
493 | |
494 | if (flusher == true) |
495 | {vct_nrem_index.template get<0>(b) = vct_atomic_rem;} |
496 | |
497 | #else |
498 | std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl; |
499 | #endif |
500 | } |
501 | |
502 | /*! \brief Get the data buffer |
503 | * |
504 | * \return the reference to the data buffer |
505 | */ |
506 | __device__ auto getAddDataBuffer() -> decltype(vct_add_data)& |
507 | { |
508 | return vct_add_data; |
509 | } |
510 | |
511 | /*! \brief Get the data buffer |
512 | * |
513 | * \return the reference to the data buffer |
514 | */ |
515 | __device__ auto getDataBuffer() -> decltype(vct_data)& |
516 | { |
517 | return vct_data; |
518 | } |
519 | |
520 | /*! \brief Get the indices buffer |
521 | * |
522 | * \return the reference to the indices buffer |
523 | */ |
524 | __device__ auto getAddIndexBuffer() const -> const decltype(vct_add_index)& |
525 | { |
526 | return vct_add_index; |
527 | } |
528 | |
529 | /*! \brief Get the indices buffer |
530 | * |
531 | * \return the reference to the indices buffer |
532 | */ |
533 | __device__ auto getIndexBuffer() const -> const decltype(vct_index)& |
534 | { |
535 | return vct_index; |
536 | } |
537 | |
538 | /*! \brief Get the data buffer |
539 | * |
540 | * \return the reference to the data buffer |
541 | */ |
542 | __device__ auto getDataBuffer() const -> const decltype(vct_data)& |
543 | { |
544 | return vct_data; |
545 | } |
546 | |
547 | #ifdef SE_CLASS1 |
548 | |
549 | /*! \brief Check if the device pointer is owned by this structure |
550 | * |
551 | * \return a structure pointer check with information about the match |
552 | * |
553 | */ |
554 | pointer_check check_device_pointer(void * ptr) |
555 | { |
556 | pointer_check pc; |
557 | |
558 | pc = vct_index.check_device_pointer(ptr); |
559 | |
560 | if (pc.match == true) |
561 | { |
562 | pc.match_str = std::string("Index vector overflow: " ) + "\n" + pc.match_str; |
563 | return pc; |
564 | } |
565 | |
566 | pc = vct_data.check_device_pointer(ptr); |
567 | |
568 | if (pc.match == true) |
569 | { |
570 | pc.match_str = std::string("Data vector overflow: " ) + "\n" + pc.match_str; |
571 | return pc; |
572 | } |
573 | |
574 | pc = vct_add_index.check_device_pointer(ptr); |
575 | |
576 | if (pc.match == true) |
577 | { |
578 | pc.match_str = std::string("Add index vector overflow: " ) + "\n" + pc.match_str; |
579 | return pc; |
580 | } |
581 | |
582 | pc = vct_rem_index.check_device_pointer(ptr); |
583 | |
584 | if (pc.match == true) |
585 | { |
586 | pc.match_str = std::string("Remove index vector overflow: " ) + "\n" + pc.match_str; |
587 | return pc; |
588 | } |
589 | |
590 | pc = vct_nadd_index.check_device_pointer(ptr); |
591 | |
592 | if (pc.match == true) |
593 | { |
594 | pc.match_str = std::string("Add index counter vector overflow: " ) + "\n" + pc.match_str; |
595 | return pc; |
596 | } |
597 | |
598 | pc = vct_nrem_index.check_device_pointer(ptr); |
599 | |
600 | if (pc.match == true) |
601 | { |
602 | pc.match_str = std::string("Remove index counter vector overflow: " ) + "\n" + pc.match_str; |
603 | return pc; |
604 | } |
605 | |
606 | pc = vct_add_data.check_device_pointer(ptr); |
607 | |
608 | if (pc.match == true) |
609 | { |
610 | pc.match_str = std::string("Add data vector overflow: " ) + "\n" + pc.match_str; |
611 | return pc; |
612 | } |
613 | |
614 | return pc; |
615 | } |
616 | |
617 | #endif |
618 | |
619 | }; |
620 | } |
621 | |
622 | #endif /* MAP_VECTOR_SPARSE_CUDA_KER_CUH_ */ |
623 | |