1/*
2 * map_vector_sparse_cuda_ker.cuh
3 *
4 * Created on: Jan 23, 2019
5 * Author: i-bird
6 */
7
8#ifndef MAP_VECTOR_SPARSE_CUDA_KER_CUH_
9#define MAP_VECTOR_SPARSE_CUDA_KER_CUH_
10
11#include "util/for_each_ref.hpp"
12
13//todo: Check where it's a good place to put the following method...
14template<typename dim3Ta, typename dim3Tb>
15inline __device__ __host__ int dim3CoordToInt(const dim3Ta & coord, const dim3Tb & dimensions)
16{
17 int res = coord.z;
18 res *= dimensions.y;
19 res += coord.y;
20 res *= dimensions.x;
21 res += coord.x;
22 return res;
23}
24// Specialization allowing transparency
25inline __device__ __host__ int dim3CoordToInt(int coord, int dimension)
26{
27 return coord;
28}
29
30namespace openfpm
31{
32 template<typename index_type>
33 struct sparse_index
34 {
35 index_type id;
36 };
37
38#if defined(__NVCC__) && !defined(CUDA_ON_CPU)
39 static __shared__ int vct_atomic_add;
40 static __shared__ int vct_atomic_rem;
41#endif
42
43 template<typename T,
44 typename Ti,
45 template<typename> class layout_base>
46 class vector_sparse_gpu_ker
47 {
48 vector_gpu_ker<aggregate<Ti>,layout_base> vct_index;
49
50 vector_gpu_ker<T,layout_base> vct_data;
51
52 vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index;
53
54 vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index;
55
56 vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index;
57
58 vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index;
59
60 vector_gpu_ker<T,layout_base> vct_add_data;
61
62 // the const is forced by the getter that only return const encap that should not allow the modification of bck
63 // this should possible avoid to define an object const_encap
64 //mutable vector_gpu_ker<T,layout_base> vct_data_bck;
65
66 int nslot_add;
67 int nslot_rem;
68
69 /*! \brief get the element i
70 *
71 * search the element x
72 *
73 * \param i element i
74 */
75 inline __device__ void _branchfree_search(Ti x, Ti & id) const
76 {
77 if (vct_index.size() == 0) {id = 0; return;}
78 const Ti *base = &vct_index.template get<0>(0);
79 const Ti *end = (const Ti *)vct_index.template getPointer<0>() + vct_index.size();
80 Ti n = vct_data.size()-1;
81 while (n > 1)
82 {
83 Ti half = n / 2;
84 base = (base[half] < x) ? base+half : base;
85 n -= half;
86 }
87
88 int off = (*base < x);
89 id = base - &vct_index.template get<0>(0) + off;
90 Ti v = (base + off != end)?*(base + off):(Ti)-1;
91 id = (x == v)?id:vct_data.size()-1;
92 }
93
94 public:
95
96 typedef Ti index_type;
97
98 //! Indicate this structure has a function to check the device pointer
99 typedef int yes_has_check_device_pointer;
100
101 vector_sparse_gpu_ker(vector_gpu_ker<aggregate<Ti>,layout_base> vct_index,
102 vector_gpu_ker<T,layout_base> vct_data,
103 vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index,
104 vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index,
105 vector_gpu_ker<T,layout_base> vct_add_data,
106 vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index,
107 vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index,
108 int nslot_add,
109 int nslot_rem)
110 :vct_index(vct_index),vct_data(vct_data),
111 vct_add_index(vct_add_index),vct_rem_index(vct_rem_index),vct_add_data(vct_add_data),
112 vct_nadd_index(vct_nadd_index),vct_nrem_index(vct_nrem_index),
113 nslot_add(nslot_add),nslot_rem(nslot_rem)
114 {}
115
116 /*! \brief Get the number of elements
117 *
118 * \return the number of elements
119 *
120 */
121 __device__ inline int size()
122 {
123 return vct_index.size();
124 }
125
126 /*! \brief This function must be called
127 *
128 */
129 __device__ inline void init()
130 {
131#ifdef __NVCC__
132 if (threadIdx.x == 0)
133 {
134 vct_atomic_add = 0;
135 vct_atomic_rem = 0;
136 }
137
138 __syncthreads();
139#endif
140 }
141
142 /*! \brief This function must be called
143 *
144 */
145 __device__ inline void init_ins_inc()
146 {
147#ifdef __NVCC__
148 if (threadIdx.x == 0)
149 {
150 int blockId = dim3CoordToInt(blockIdx, gridDim);
151 vct_atomic_add = vct_nadd_index.template get<0>(blockId);
152 }
153
154 __syncthreads();
155#endif
156 }
157
158 /*! \brief This function must be called
159 *
160 */
161 __device__ inline void init_rem_inc()
162 {
163#ifdef __NVCC__
164 if (threadIdx.x == 0)
165 {
166 int blockId = dim3CoordToInt(blockIdx, gridDim);
167 vct_atomic_rem = vct_nrem_index.template get<0>(blockId);
168 }
169
170 __syncthreads();
171#endif
172 }
173
174 /*! \brief Get the sparse index
175 *
176 * Get the sparse index of the element id
177 *
178 * \note use get_index and get to retrieve the value index associated to the sparse index
179 *
180 * \param id Element to get
181 *
182 * \return the element value requested
183 *
184 */
185 __device__ inline openfpm::sparse_index<Ti> get_sparse(Ti id) const
186 {
187 Ti di;
188 this->_branchfree_search(id,di);
189 openfpm::sparse_index<Ti> sid;
190 sid.id = di;
191
192 return sid;
193 }
194
195 /*! \brief Get the background value
196 */
197 template <unsigned int p>
198 __device__ inline auto getBackground() const -> decltype(vct_data.template get<p>(0)) &
199 {
200 return vct_data.template get<p>(vct_data.size()-1);
201 }
202
203 /*! \brief Get an element of the vector
204 *
205 * Get an element of the vector
206 *
207 * \tparam p Property to get
208 * \param id Element to get
209 *
210 * \return the element value requested
211 *
212 */
213 template <unsigned int p>
214 __device__ inline auto get(Ti id) const -> decltype(vct_data.template get<p>(id))
215 {
216 Ti di;
217 this->_branchfree_search(id,di);
218 return vct_data.template get<p>(di);
219 }
220
221 __device__ inline auto get(Ti id) const -> decltype(vct_data.get(0))
222 {
223 Ti di;
224 Ti v = this->_branchfree_search(id,di);
225 return vct_data.get(static_cast<size_t>(di));
226 }
227
228 /*! \brief Get an element of the vector
229 *
230 * Get an element of the vector
231 *
232 * \tparam p Property to get
233 * \param id Element to get
234 *
235 * \return the element value requested
236 *
237 */
238 template <unsigned int p>
239 __device__ inline auto get(openfpm::sparse_index<Ti> id) const -> decltype(vct_data.template get<p>(id.id))
240 {
241 return vct_data.template get<p>(id.id);
242 }
243
244 /*! \brief Get an element of the vector
245 *
246 * Get an element of the vector
247 *
248 * \tparam p Property to get
249 * \param id Element to get
250 *
251 * \return the element value requested
252 *
253 */
254 template <unsigned int p>
255 __device__ inline auto get(openfpm::sparse_index<Ti> id) -> decltype(vct_data.template get<p>(id.id))
256 {
257 return vct_data.template get<p>(id.id);
258 }
259
260 /*! \brief Get the index associated to the element id
261 *
262 *
263 * \return the element value requested
264 *
265 */
266 __device__ inline Ti get_index(openfpm::sparse_index<Ti> id) const
267 {
268 return vct_index.template get<0>(id.id);
269 }
270
271 /*! \brief Get an element of the vector
272 *
273 * Get an element of the vector
274 *
275 * \tparam p Property to get
276 * \param id Element to get
277 *
278 * \return the element value requested
279 *
280 */
281 template <unsigned int p>
282 __device__ inline auto get(Ti id, Ti & di) const -> decltype(vct_data.template get<p>(id))
283 {
284 this->_branchfree_search(id,di);
285 return vct_data.template get<p>(di);
286 }
287
288 /*! \brief Get an element of the vector
289 *
290 * Get an element of the vector
291 *
292 * \tparam p Property to get
293 * \param id Element to get
294 *
295 * \return the element value requested
296 *
297 */
298 template <unsigned int p>
299 __device__ inline auto get_ele(Ti di) const -> decltype(vct_data.template get<p>(di))
300 {
301 return vct_data.template get<p>(di);
302 }
303
304 /*! \brief It insert an element in the sparse vector
305 *
306 *
307 */
308 template <unsigned int p>
309 __device__ auto insert(Ti ele) -> decltype(vct_data.template get<p>(0))
310 {
311#ifdef __NVCC__
312
313 int blockId = dim3CoordToInt(blockIdx, gridDim);
314 int slot_base = blockId;
315
316 int pos = atomicAdd(&vct_atomic_add,1);
317 vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
318 return vct_add_data.template get<p>(slot_base*nslot_add+pos);
319#else
320 std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
321#endif
322 }
323
324 /*! \brief It insert an element in the sparse vector
325 *
326 * \param ele element to insert
327 *
328 * \return an object to fill the values
329 *
330 */
331 __device__ void remove(Ti ele)
332 {
333#ifdef __NVCC__
334
335 int blockId = dim3CoordToInt(blockIdx, gridDim);
336 int slot_base = blockId;
337
338 int pos = atomicAdd(&vct_atomic_rem,1);
339 vct_rem_index.template get<0>(slot_base*nslot_rem+pos) = ele;
340
341#else
342 std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
343#endif
344 }
345
346 /*! \brief It insert an element in the sparse vector
347 *
348 * \param ele element to insert
349 *
350 * \return an object to fill the values
351 *
352 */
353 __device__ auto insert(Ti ele) -> decltype(vct_add_data.get(0))
354 {
355#ifdef __NVCC__
356
357 int blockId = dim3CoordToInt(blockIdx, gridDim);
358 int slot_base = blockId;
359
360 int pos = atomicAdd(&vct_atomic_add,1);
361 vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
362
363 return vct_add_data.get(slot_base*nslot_add+pos);
364#else
365 std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
366#endif
367 }
368
369 /*! \brief It insert an element in the sparse vector
370 *
371 *
372 */
373 __device__ void remove_b(Ti ele,Ti slot_base)
374 {
375#ifdef __NVCC__
376
377 int pos = atomicAdd(&vct_atomic_rem,1);
378 vct_rem_index.template get<0>(slot_base*nslot_rem+pos) = ele;
379
380#else
381 std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
382#endif
383 }
384
385 /*! \brief It insert an element in the sparse vector
386 *
387 *
388 */
389 template <unsigned int p>
390 __device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_data.template get<p>(0))
391 {
392#ifdef __NVCC__
393
394 int pos = atomicAdd(&vct_atomic_add,1);
395 vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
396 return vct_add_data.template get<p>(slot_base*nslot_add+pos);
397#else
398 std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
399#endif
400 }
401
402 /*! \brief It insert an element in the sparse vector
403 *
404 *
405 */
406 __device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_add_data.get(0))
407 {
408#ifdef __NVCC__
409
410 int pos = atomicAdd(&vct_atomic_add,1);
411 vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
412 return vct_add_data.get(slot_base*nslot_add+pos);
413#else
414 std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
415#endif
416 }
417
418 /*! \brief It insert an element in the sparse vector
419 *
420 *
421 */
422 __device__ void flush_block_insert()
423 {
424#ifdef __NVCC__
425
426 __syncthreads();
427
428 if (threadIdx.x == 0)
429 {
430 int blockId = dim3CoordToInt(blockIdx, gridDim);
431 vct_nadd_index.template get<0>(blockId) = vct_atomic_add;
432 }
433
434#else
435 std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
436#endif
437 }
438
439 /*! \brief It insert an element in the sparse vector
440 *
441 *
442 */
443 __device__ void flush_block_remove()
444 {
445#ifdef __NVCC__
446
447 __syncthreads();
448
449 if (threadIdx.x == 0)
450 {
451 int blockId = dim3CoordToInt(blockIdx, gridDim);
452 vct_nrem_index.template get<0>(blockId) = vct_atomic_rem;
453 }
454
455#else
456 std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
457#endif
458 }
459
460 /*! \brief It insert an element in the sparse vector
461 *
462 *
463 */
464 __device__ void flush_block_insert(Ti b, bool flusher)
465 {
466#ifdef __NVCC__
467
468 __syncthreads();
469
470 if (flusher == true)
471 {vct_nadd_index.template get<0>(b) = vct_atomic_add;}
472
473
474#else
475 std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
476#endif
477 }
478
479 __device__ auto private_get_data() -> decltype(vct_add_data.getBase().get_data_())
480 {
481 return vct_add_data.getBase().get_data_();
482 }
483
484 /*! \brief It insert an element in the sparse vector
485 *
486 *
487 */
488 __device__ void flush_block_remove(unsigned int b, bool flusher)
489 {
490#ifdef __NVCC__
491
492 __syncthreads();
493
494 if (flusher == true)
495 {vct_nrem_index.template get<0>(b) = vct_atomic_rem;}
496
497#else
498 std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
499#endif
500 }
501
502 /*! \brief Get the data buffer
503 *
504 * \return the reference to the data buffer
505 */
506 __device__ auto getAddDataBuffer() -> decltype(vct_add_data)&
507 {
508 return vct_add_data;
509 }
510
511 /*! \brief Get the data buffer
512 *
513 * \return the reference to the data buffer
514 */
515 __device__ auto getDataBuffer() -> decltype(vct_data)&
516 {
517 return vct_data;
518 }
519
520 /*! \brief Get the indices buffer
521 *
522 * \return the reference to the indices buffer
523 */
524 __device__ auto getAddIndexBuffer() const -> const decltype(vct_add_index)&
525 {
526 return vct_add_index;
527 }
528
529 /*! \brief Get the indices buffer
530 *
531 * \return the reference to the indices buffer
532 */
533 __device__ auto getIndexBuffer() const -> const decltype(vct_index)&
534 {
535 return vct_index;
536 }
537
538 /*! \brief Get the data buffer
539 *
540 * \return the reference to the data buffer
541 */
542 __device__ auto getDataBuffer() const -> const decltype(vct_data)&
543 {
544 return vct_data;
545 }
546
547#ifdef SE_CLASS1
548
549 /*! \brief Check if the device pointer is owned by this structure
550 *
551 * \return a structure pointer check with information about the match
552 *
553 */
554 pointer_check check_device_pointer(void * ptr)
555 {
556 pointer_check pc;
557
558 pc = vct_index.check_device_pointer(ptr);
559
560 if (pc.match == true)
561 {
562 pc.match_str = std::string("Index vector overflow: ") + "\n" + pc.match_str;
563 return pc;
564 }
565
566 pc = vct_data.check_device_pointer(ptr);
567
568 if (pc.match == true)
569 {
570 pc.match_str = std::string("Data vector overflow: ") + "\n" + pc.match_str;
571 return pc;
572 }
573
574 pc = vct_add_index.check_device_pointer(ptr);
575
576 if (pc.match == true)
577 {
578 pc.match_str = std::string("Add index vector overflow: ") + "\n" + pc.match_str;
579 return pc;
580 }
581
582 pc = vct_rem_index.check_device_pointer(ptr);
583
584 if (pc.match == true)
585 {
586 pc.match_str = std::string("Remove index vector overflow: ") + "\n" + pc.match_str;
587 return pc;
588 }
589
590 pc = vct_nadd_index.check_device_pointer(ptr);
591
592 if (pc.match == true)
593 {
594 pc.match_str = std::string("Add index counter vector overflow: ") + "\n" + pc.match_str;
595 return pc;
596 }
597
598 pc = vct_nrem_index.check_device_pointer(ptr);
599
600 if (pc.match == true)
601 {
602 pc.match_str = std::string("Remove index counter vector overflow: ") + "\n" + pc.match_str;
603 return pc;
604 }
605
606 pc = vct_add_data.check_device_pointer(ptr);
607
608 if (pc.match == true)
609 {
610 pc.match_str = std::string("Add data vector overflow: ") + "\n" + pc.match_str;
611 return pc;
612 }
613
614 return pc;
615 }
616
617#endif
618
619 };
620}
621
622#endif /* MAP_VECTOR_SPARSE_CUDA_KER_CUH_ */
623