map_vector_sparse_cuda_ker.cuh source code [openfpm/openfpm_data/src/Vector/cuda/map_vector_sparse_cuda_ker.cuh]

1	/*
2	* map_vector_sparse_cuda_ker.cuh
3	*
4	* Created on: Jan 23, 2019
5	* Author: i-bird
6	*/
7
8	#ifndef MAP_VECTOR_SPARSE_CUDA_KER_CUH_
9	#define MAP_VECTOR_SPARSE_CUDA_KER_CUH_
10
11	#include "util/for_each_ref.hpp"
12
13	//todo: Check where it's a good place to put the following method...
14	template<typename dim3Ta, typename dim3Tb>
15	inline __device__ __host__ int dim3CoordToInt(const dim3Ta & coord, const dim3Tb & dimensions)
16	{
17	int res = coord.z;
18	res *= dimensions.y;
19	res += coord.y;
20	res *= dimensions.x;
21	res += coord.x;
22	return res;
23	}
24	// Specialization allowing transparency
25	inline __device__ __host__ int dim3CoordToInt(int coord, int dimension)
26	{
27	return coord;
28	}
29
30	namespace openfpm
31	{
32	template<typename index_type>
33	struct sparse_index
34	{
35	index_type id;
36	};
37
38	#if defined(__NVCC__) && !defined(CUDA_ON_CPU)
39	static __shared__ int vct_atomic_add;
40	static __shared__ int vct_atomic_rem;
41	#endif
42
43	template<typename T,
44	typename Ti,
45	template<typename> class layout_base>
46	class vector_sparse_gpu_ker
47	{
48	vector_gpu_ker<aggregate<Ti>,layout_base> vct_index;
49
50	vector_gpu_ker<T,layout_base> vct_data;
51
52	vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index;
53
54	vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index;
55
56	vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index;
57
58	vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index;
59
60	vector_gpu_ker<T,layout_base> vct_add_data;
61
62	// the const is forced by the getter that only return const encap that should not allow the modification of bck
63	// this should possible avoid to define an object const_encap
64	//mutable vector_gpu_ker<T,layout_base> vct_data_bck;
65
66	int nslot_add;
67	int nslot_rem;
68
69	/! \brief get the element i*
70	*
71	* search the element x
72	*
73	* \param i element i
74	*/
75	inline __device__ void _branchfree_search(Ti x, Ti & id) const
76	{
77	if (vct_index.size() == `0`) {id = `0`; return;}
78	const Ti *base = &vct_index.template get<`0`>(`0`);
79	const Ti end = (const* Ti *)vct_index.template getPointer<`0`>() + vct_index.size();
80	Ti n = vct_data.size()-`1`;
81	while (n > `1`)
82	{
83	Ti half = n / `2`;
84	base = (base[half] < x) ? base+half : base;
85	n -= half;
86	}
87
88	int off = (*base < x);
89	id = base - &vct_index.template get<`0`>(`0`) + off;
90	Ti v = (base + off != end)?*(base + off):(Ti)-`1`;
91	id = (x == v)?id:vct_data.size()-`1`;
92	}
93
94	public:
95
96	typedef Ti index_type;
97
98	//! Indicate this structure has a function to check the device pointer
99	typedef int yes_has_check_device_pointer;
100
101	vector_sparse_gpu_ker(vector_gpu_ker<aggregate<Ti>,layout_base> vct_index,
102	vector_gpu_ker<T,layout_base> vct_data,
103	vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index,
104	vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index,
105	vector_gpu_ker<T,layout_base> vct_add_data,
106	vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index,
107	vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index,
108	int nslot_add,
109	int nslot_rem)
110	:vct_index(vct_index),vct_data(vct_data),
111	vct_add_index(vct_add_index),vct_rem_index(vct_rem_index),vct_add_data(vct_add_data),
112	vct_nadd_index(vct_nadd_index),vct_nrem_index(vct_nrem_index),
113	nslot_add(nslot_add),nslot_rem(nslot_rem)
114	{}
115
116	/! \brief Get the number of elements*
117	*
118	* \return the number of elements
119	*
120	*/
121	__device__ inline int size()
122	{
123	return vct_index.size();
124	}
125
126	/! \brief This function must be called*
127	*
128	*/
129	__device__ inline void init()
130	{
131	#ifdef __NVCC__
132	if (threadIdx.x == `0`)
133	{
134	vct_atomic_add = `0`;
135	vct_atomic_rem = `0`;
136	}
137
138	__syncthreads();
139	#endif
140	}
141
142	/! \brief This function must be called*
143	*
144	*/
145	__device__ inline void init_ins_inc()
146	{
147	#ifdef __NVCC__
148	if (threadIdx.x == `0`)
149	{
150	int blockId = dim3CoordToInt(blockIdx, gridDim);
151	vct_atomic_add = vct_nadd_index.template get<`0`>(blockId);
152	}
153
154	__syncthreads();
155	#endif
156	}
157
158	/! \brief This function must be called*
159	*
160	*/
161	__device__ inline void init_rem_inc()
162	{
163	#ifdef __NVCC__
164	if (threadIdx.x == `0`)
165	{
166	int blockId = dim3CoordToInt(blockIdx, gridDim);
167	vct_atomic_rem = vct_nrem_index.template get<`0`>(blockId);
168	}
169
170	__syncthreads();
171	#endif
172	}
173
174	/! \brief Get the sparse index*
175	*
176	* Get the sparse index of the element id
177	*
178	* \note use get_index and get to retrieve the value index associated to the sparse index
179	*
180	* \param id Element to get
181	*
182	* \return the element value requested
183	*
184	*/
185	__device__ inline openfpm::sparse_index<Ti> get_sparse(Ti id) const
186	{
187	Ti di;
188	this->_branchfree_search(id,di);
189	openfpm::sparse_index<Ti> sid;
190	sid.id = di;
191
192	return sid;
193	}
194
195	/! \brief Get the background value*
196	*/
197	template <unsigned int p>
198	__device__ inline auto getBackground() const -> decltype(vct_data.template get<p>(`0`)) &
199	{
200	return vct_data.template get<p>(vct_data.size()-`1`);
201	}
202
203	/! \brief Get an element of the vector*
204	*
205	* Get an element of the vector
206	*
207	* \tparam p Property to get
208	* \param id Element to get
209	*
210	* \return the element value requested
211	*
212	*/
213	template <unsigned int p>
214	__device__ inline auto get(Ti id) const -> decltype(vct_data.template get<p>(id))
215	{
216	Ti di;
217	this->_branchfree_search(id,di);
218	return vct_data.template get<p>(di);
219	}
220
221	__device__ inline auto get(Ti id) const -> decltype(vct_data.get(`0`))
222	{
223	Ti di;
224	Ti v = this->_branchfree_search(id,di);
225	return vct_data.get(static_cast<size_t>(di));
226	}
227
228	/! \brief Get an element of the vector*
229	*
230	* Get an element of the vector
231	*
232	* \tparam p Property to get
233	* \param id Element to get
234	*
235	* \return the element value requested
236	*
237	*/
238	template <unsigned int p>
239	__device__ inline auto get(openfpm::sparse_index<Ti> id) const -> decltype(vct_data.template get<p>(id.id))
240	{
241	return vct_data.template get<p>(id.id);
242	}
243
244	/! \brief Get an element of the vector*
245	*
246	* Get an element of the vector
247	*
248	* \tparam p Property to get
249	* \param id Element to get
250	*
251	* \return the element value requested
252	*
253	*/
254	template <unsigned int p>
255	__device__ inline auto get(openfpm::sparse_index<Ti> id) -> decltype(vct_data.template get<p>(id.id))
256	{
257	return vct_data.template get<p>(id.id);
258	}
259
260	/! \brief Get the index associated to the element id*
261	*
262	*
263	* \return the element value requested
264	*
265	*/
266	__device__ inline Ti get_index(openfpm::sparse_index<Ti> id) const
267	{
268	return vct_index.template get<`0`>(id.id);
269	}
270
271	/! \brief Get an element of the vector*
272	*
273	* Get an element of the vector
274	*
275	* \tparam p Property to get
276	* \param id Element to get
277	*
278	* \return the element value requested
279	*
280	*/
281	template <unsigned int p>
282	__device__ inline auto get(Ti id, Ti & di) const -> decltype(vct_data.template get<p>(id))
283	{
284	this->_branchfree_search(id,di);
285	return vct_data.template get<p>(di);
286	}
287
288	/! \brief Get an element of the vector*
289	*
290	* Get an element of the vector
291	*
292	* \tparam p Property to get
293	* \param id Element to get
294	*
295	* \return the element value requested
296	*
297	*/
298	template <unsigned int p>
299	__device__ inline auto get_ele(Ti di) const -> decltype(vct_data.template get<p>(di))
300	{
301	return vct_data.template get<p>(di);
302	}
303
304	/! \brief It insert an element in the sparse vector*
305	*
306	*
307	*/
308	template <unsigned int p>
309	__device__ auto insert(Ti ele) -> decltype(vct_data.template get<p>(`0`))
310	{
311	#ifdef __NVCC__
312
313	int blockId = dim3CoordToInt(blockIdx, gridDim);
314	int slot_base = blockId;
315
316	int pos = atomicAdd(&vct_atomic_add,`1`);
317	vct_add_index.template get<`0`>(slot_base*nslot_add+pos) = ele;
318	return vct_add_data.template get<p>(slot_base*nslot_add+pos);
319	#else
320	std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
321	#endif
322	}
323
324	/! \brief It insert an element in the sparse vector*
325	*
326	* \param ele element to insert
327	*
328	* \return an object to fill the values
329	*
330	*/
331	__device__ void remove(Ti ele)
332	{
333	#ifdef __NVCC__
334
335	int blockId = dim3CoordToInt(blockIdx, gridDim);
336	int slot_base = blockId;
337
338	int pos = atomicAdd(&vct_atomic_rem,`1`);
339	vct_rem_index.template get<`0`>(slot_base*nslot_rem+pos) = ele;
340
341	#else
342	std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
343	#endif
344	}
345
346	/! \brief It insert an element in the sparse vector*
347	*
348	* \param ele element to insert
349	*
350	* \return an object to fill the values
351	*
352	*/
353	__device__ auto insert(Ti ele) -> decltype(vct_add_data.get(`0`))
354	{
355	#ifdef __NVCC__
356
357	int blockId = dim3CoordToInt(blockIdx, gridDim);
358	int slot_base = blockId;
359
360	int pos = atomicAdd(&vct_atomic_add,`1`);
361	vct_add_index.template get<`0`>(slot_base*nslot_add+pos) = ele;
362
363	return vct_add_data.get(slot_base*nslot_add+pos);
364	#else
365	std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
366	#endif
367	}
368
369	/! \brief It insert an element in the sparse vector*
370	*
371	*
372	*/
373	__device__ void remove_b(Ti ele,Ti slot_base)
374	{
375	#ifdef __NVCC__
376
377	int pos = atomicAdd(&vct_atomic_rem,`1`);
378	vct_rem_index.template get<`0`>(slot_base*nslot_rem+pos) = ele;
379
380	#else
381	std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
382	#endif
383	}
384
385	/! \brief It insert an element in the sparse vector*
386	*
387	*
388	*/
389	template <unsigned int p>
390	__device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_data.template get<p>(`0`))
391	{
392	#ifdef __NVCC__
393
394	int pos = atomicAdd(&vct_atomic_add,`1`);
395	vct_add_index.template get<`0`>(slot_base*nslot_add+pos) = ele;
396	return vct_add_data.template get<p>(slot_base*nslot_add+pos);
397	#else
398	std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
399	#endif
400	}
401
402	/! \brief It insert an element in the sparse vector*
403	*
404	*
405	*/
406	__device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_add_data.get(`0`))
407	{
408	#ifdef __NVCC__
409
410	int pos = atomicAdd(&vct_atomic_add,`1`);
411	vct_add_index.template get<`0`>(slot_base*nslot_add+pos) = ele;
412	return vct_add_data.get(slot_base*nslot_add+pos);
413	#else
414	std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
415	#endif
416	}
417
418	/! \brief It insert an element in the sparse vector*
419	*
420	*
421	*/
422	__device__ void flush_block_insert()
423	{
424	#ifdef __NVCC__
425
426	__syncthreads();
427
428	if (threadIdx.x == `0`)
429	{
430	int blockId = dim3CoordToInt(blockIdx, gridDim);
431	vct_nadd_index.template get<`0`>(blockId) = vct_atomic_add;
432	}
433
434	#else
435	std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
436	#endif
437	}
438
439	/! \brief It insert an element in the sparse vector*
440	*
441	*
442	*/
443	__device__ void flush_block_remove()
444	{
445	#ifdef __NVCC__
446
447	__syncthreads();
448
449	if (threadIdx.x == `0`)
450	{
451	int blockId = dim3CoordToInt(blockIdx, gridDim);
452	vct_nrem_index.template get<`0`>(blockId) = vct_atomic_rem;
453	}
454
455	#else
456	std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
457	#endif
458	}
459
460	/! \brief It insert an element in the sparse vector*
461	*
462	*
463	*/
464	__device__ void flush_block_insert(Ti b, bool flusher)
465	{
466	#ifdef __NVCC__
467
468	__syncthreads();
469
470	if (flusher == true)
471	{vct_nadd_index.template get<`0`>(b) = vct_atomic_add;}
472
473
474	#else
475	std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
476	#endif
477	}
478
479	__device__ auto private_get_data() -> decltype(vct_add_data.getBase().get_data_())
480	{
481	return vct_add_data.getBase().get_data_();
482	}
483
484	/! \brief It insert an element in the sparse vector*
485	*
486	*
487	*/
488	__device__ void flush_block_remove(unsigned int b, bool flusher)
489	{
490	#ifdef __NVCC__
491
492	__syncthreads();
493
494	if (flusher == true)
495	{vct_nrem_index.template get<`0`>(b) = vct_atomic_rem;}
496
497	#else
498	std::cout << __FILE__ << ":" << __LINE__ << " Error, this function in order to work is supposed to be compiled with nvcc" << std::endl;
499	#endif
500	}
501
502	/! \brief Get the data buffer*
503	*
504	* \return the reference to the data buffer
505	*/
506	__device__ auto getAddDataBuffer() -> decltype(vct_add_data)&
507	{
508	return vct_add_data;
509	}
510
511	/! \brief Get the data buffer*
512	*
513	* \return the reference to the data buffer
514	*/
515	__device__ auto getDataBuffer() -> decltype(vct_data)&
516	{
517	return vct_data;
518	}
519
520	/! \brief Get the indices buffer*
521	*
522	* \return the reference to the indices buffer
523	*/
524	__device__ auto getAddIndexBuffer() const -> const decltype(vct_add_index)&
525	{
526	return vct_add_index;
527	}
528
529	/! \brief Get the indices buffer*
530	*
531	* \return the reference to the indices buffer
532	*/
533	__device__ auto getIndexBuffer() const -> const decltype(vct_index)&
534	{
535	return vct_index;
536	}
537
538	/! \brief Get the data buffer*
539	*
540	* \return the reference to the data buffer
541	*/
542	__device__ auto getDataBuffer() const -> const decltype(vct_data)&
543	{
544	return vct_data;
545	}
546
547	#ifdef SE_CLASS1
548
549	/! \brief Check if the device pointer is owned by this structure*
550	*
551	* \return a structure pointer check with information about the match
552	*
553	*/
554	pointer_check check_device_pointer(void * ptr)
555	{
556	pointer_check pc;
557
558	pc = vct_index.check_device_pointer(ptr);
559
560	if (pc.match == true)
561	{
562	pc.match_str = std::string("Index vector overflow: ") + "\n" + pc.match_str;
563	return pc;
564	}
565
566	pc = vct_data.check_device_pointer(ptr);
567
568	if (pc.match == true)
569	{
570	pc.match_str = std::string("Data vector overflow: ") + "\n" + pc.match_str;
571	return pc;
572	}
573
574	pc = vct_add_index.check_device_pointer(ptr);
575
576	if (pc.match == true)
577	{
578	pc.match_str = std::string("Add index vector overflow: ") + "\n" + pc.match_str;
579	return pc;
580	}
581
582	pc = vct_rem_index.check_device_pointer(ptr);
583
584	if (pc.match == true)
585	{
586	pc.match_str = std::string("Remove index vector overflow: ") + "\n" + pc.match_str;
587	return pc;
588	}
589
590	pc = vct_nadd_index.check_device_pointer(ptr);
591
592	if (pc.match == true)
593	{
594	pc.match_str = std::string("Add index counter vector overflow: ") + "\n" + pc.match_str;
595	return pc;
596	}
597
598	pc = vct_nrem_index.check_device_pointer(ptr);
599
600	if (pc.match == true)
601	{
602	pc.match_str = std::string("Remove index counter vector overflow: ") + "\n" + pc.match_str;
603	return pc;
604	}
605
606	pc = vct_add_data.check_device_pointer(ptr);
607
608	if (pc.match == true)
609	{
610	pc.match_str = std::string("Add data vector overflow: ") + "\n" + pc.match_str;
611	return pc;
612	}
613
614	return pc;
615	}
616
617	#endif
618
619	};
620	}
621
622	#endif /* MAP_VECTOR_SPARSE_CUDA_KER_CUH_ */
623

Browse the source code of openfpm/openfpm_data/src/Vector/cuda/map_vector_sparse_cuda_ker.cuh