ofp_context.hxx source code [openfpm/openfpm_data/src/util/cuda/ofp_context.hxx]

1	/*
2	* ofp_context.hxx
3	*
4	* Created on: Nov 15, 2018
5	* Author: i-bird
6	*/
7
8	#ifndef OFP_CONTEXT_HXX_
9	#define OFP_CONTEXT_HXX_
10
11	#include <iostream>
12
13	#ifdef CUDA_ON_CPU
14
15	namespace mgpu
16	{
17	enum gpu_context_opt
18	{
19	no_print_props,//!< no_print_props
20	print_props, //!< print_props
21	dummy //!< dummy
22	};
23
24	struct context_t {};
25
26	class ofp_context_t : public context_t
27	{
28	protected:
29
30	std::string _props;
31
32	openfpm::vector<aggregate<unsigned char>> tmem;
33
34	// Making this a template argument means we won't generate an instance
35	// of dummy_k for each translation unit.
36	template<int dummy_arg = `0`>
37	void init(int dev_num, gpu_context_opt opt)
38	{}
39
40	public:
41
42	/! \brief gpu context constructor*
43	*
44	* \param opt options for this gpu context
45	*
46	*/
47	ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = `0`, int stream_ = `0`)
48	{}
49
50	~ofp_context_t()
51	{}
52
53	virtual const std::string& props() const
54	{
55	return _props;
56	}
57
58	virtual int ptx_version() const
59	{
60	return `0`;
61	}
62
63	virtual int stream()
64	{
65	std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
66	return `0`;
67	}
68
69	// Alloc GPU memory.
70	virtual void* alloc(size_t size, int space)
71	{
72	std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
73	return NULL;
74	}
75
76	virtual void free(void* p, int space)
77	{
78	std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
79	}
80
81	virtual void synchronize()
82	{
83	std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
84	}
85
86	virtual int event()
87	{
88	std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
89	return `0`;
90	}
91
92	virtual void timer_begin()
93	{
94	std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
95	}
96
97	virtual double timer_end()
98	{
99	std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
100	return `0.0`;
101	}
102
103	virtual int getDevice()
104	{
105	std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
106	return `0`;
107	}
108	};
109
110	}
111
112	#else
113
114	#ifdef CUDA_GPU
115
116	#ifdef __NVCC__
117
118	#include "util/cuda/moderngpu/context.hxx"
119
120	namespace mgpu
121	{
122	enum gpu_context_opt
123	{
124	no_print_props,//!< no_print_props
125	print_props, //!< print_props
126	dummy //!< dummy
127	};
128
129
130	////////////////////////////////////////////////////////////////////////////////
131	// standard_context_t is a trivial implementation of context_t. Users can
132	// derive this type to provide a custom allocator.
133
134	class ofp_context_t : public context_t
135	{
136	protected:
137	cudaDeviceProp _props;
138	int _ptx_version;
139	cudaStream_t _stream;
140
141	cudaEvent_t _timer[`2`];
142	cudaEvent_t _event;
143
144	openfpm::vector_gpu<aggregate<unsigned char>> tmem;
145	openfpm::vector_gpu<aggregate<unsigned char>> tmem2;
146	openfpm::vector_gpu<aggregate<unsigned char>> tmem3;
147
148	// Making this a template argument means we won't generate an instance
149	// of dummy_k for each translation unit.
150	template<int dummy_arg = `0`>
151	void init(int dev_num, gpu_context_opt opt)
152	{
153	cudaFuncAttributes attr;
154	cudaError_t result = cudaFuncGetAttributes(&attr, dummy_k<`0`>);
155	if(cudaSuccess != result) throw cuda_exception_t(result);
156	_ptx_version = attr.ptxVersion;
157
158	int num_dev;
159	cudaGetDeviceCount(&num_dev);
160
161	if (num_dev == `0`) {return;}
162
163	if (opt != gpu_context_opt::dummy)
164	{
165	cudaSetDevice(dev_num % num_dev);
166	}
167
168	int ord;
169	cudaGetDevice(&ord);
170	cudaGetDeviceProperties(&_props, ord);
171
172	cudaEventCreate(&_timer[`0`]);
173	cudaEventCreate(&_timer[`1`]);
174	cudaEventCreate(&_event);
175	}
176
177	public:
178
179
180	/! \brief gpu context constructor*
181	*
182	* \param opt options for this gpu context
183	*
184	*/
185	ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = `0`, cudaStream_t stream_ = `0`)
186	:context_t(), _stream(stream_)
187	{
188	init(dev_num,opt);
189	if(opt == gpu_context_opt::print_props)
190	{
191	printf("%s\n", device_prop_string(_props).c_str());
192	}
193	}
194
195	~ofp_context_t()
196	{
197	cudaEventDestroy(_timer[`0`]);
198	cudaEventDestroy(_timer[`1`]);
199	cudaEventDestroy(_event);
200	}
201
202	virtual const cudaDeviceProp& props() const { return _props; }
203	virtual int ptx_version() const { return _ptx_version; }
204	virtual cudaStream_t stream() { return _stream; }
205
206	// Alloc GPU memory.
207	virtual void* alloc(size_t size, memory_space_t space)
208	{
209	void* p = nullptr;
210	if(size)
211	{
212	cudaError_t result = (memory_space_device == space) ?cudaMalloc(&p, size) : cudaMallocHost(&p, size);
213	if(cudaSuccess != result) throw cuda_exception_t(result);
214	}
215	return p;
216	}
217
218	virtual void free(void* p, memory_space_t space)
219	{
220	if(p)
221	{
222	cudaError_t result = (memory_space_device == space) ? cudaFree(p) : cudaFreeHost(p);
223	if(cudaSuccess != result) throw cuda_exception_t(result);
224	}
225	}
226
227	virtual void synchronize()
228	{
229	cudaError_t result = _stream ?
230	cudaStreamSynchronize(_stream) :
231	cudaDeviceSynchronize();
232	if(cudaSuccess != result) throw cuda_exception_t(result);
233	}
234
235	virtual cudaEvent_t event()
236	{
237	return _event;
238	}
239
240	virtual void timer_begin()
241	{
242	cudaEventRecord(_timer[`0`], _stream);
243	}
244
245	virtual double timer_end()
246	{
247	cudaEventRecord(_timer[`1`], _stream);
248	cudaEventSynchronize(_timer[`1`]);
249	float ms;
250	cudaEventElapsedTime(&ms, _timer[`0`], _timer[`1`]);
251	return ms / `1.0e3`;
252	}
253
254	virtual int getDevice()
255	{
256	int dev = `0`;
257
258	cudaGetDevice(&dev);
259
260	return dev;
261	}
262
263	virtual int getNDevice()
264	{
265	int num_dev;
266	cudaGetDeviceCount(&num_dev);
267
268	return num_dev;
269	}
270
271	openfpm::vector_gpu<aggregate<unsigned char>> & getTemporalCUB()
272	{
273	return tmem;
274	}
275
276	openfpm::vector_gpu<aggregate<unsigned char>> & getTemporalCUB2()
277	{
278	return tmem2;
279	}
280
281	openfpm::vector_gpu<aggregate<unsigned char>> & getTemporalCUB3()
282	{
283	return tmem3;
284	}
285	};
286
287	}
288
289	#else
290
291	#include "util/cuda/moderngpu/context_reduced.hxx"
292
293	namespace mgpu
294	{
295	enum gpu_context_opt
296	{
297	no_print_props,//!< no_print_props
298	print_props, //!< print_props
299	dummy //!< dummy
300	};
301
302
303	////////////////////////////////////////////////////////////////////////////////
304	// standard_context_t is a trivial implementation of context_t. Users can
305	// derive this type to provide a custom allocator.
306
307	class ofp_context_t : public context_t
308	{
309	protected:
310	cudaDeviceProp _props;
311	int _ptx_version;
312	cudaStream_t _stream;
313
314	cudaEvent_t _timer[`2`];
315	cudaEvent_t _event;
316
317	openfpm::vector<aggregate<unsigned char>> tmem;
318
319	// Making this a template argument means we won't generate an instance
320	// of dummy_k for each translation unit.
321	template<int dummy_arg = `0`>
322	void init(int dev_num, gpu_context_opt opt)
323	{
324	cudaFuncAttributes attr;
325
326	_ptx_version = `0`;
327
328	int num_dev;
329	cudaGetDeviceCount(&num_dev);
330
331	if (num_dev == `0`) {return;}
332
333	if (opt != gpu_context_opt::dummy)
334	{
335	cudaSetDevice(dev_num % num_dev);
336	}
337
338	int ord;
339	cudaGetDevice(&ord);
340	cudaGetDeviceProperties(&_props, ord);
341
342	cudaEventCreate(&_timer[`0`]);
343	cudaEventCreate(&_timer[`1`]);
344	cudaEventCreate(&_event);
345	}
346
347	public:
348
349	/! \brief gpu context constructor*
350	*
351	* \param opt options for this gpu context
352	*
353	*/
354	ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = `0`, cudaStream_t stream_ = `0`)
355	:context_t(), _stream(stream_)
356	{
357	init(dev_num,opt);
358	if(opt == gpu_context_opt::print_props)
359	{
360	printf("%s\n", device_prop_string(_props).c_str());
361	}
362	}
363
364	~ofp_context_t()
365	{
366	cudaEventDestroy(_timer[`0`]);
367	cudaEventDestroy(_timer[`1`]);
368	cudaEventDestroy(_event);
369	}
370
371	virtual const cudaDeviceProp& props() const
372	{
373	return _props;
374	}
375
376	virtual int ptx_version() const
377	{
378	std::cout << __FILE__ << ":" << __LINE__ << " error to use this function you must compile the class ofp_context_t with NVCC" << std::endl;
379	return `0`;
380	}
381
382	virtual cudaStream_t stream() { return _stream; }
383
384	// Alloc GPU memory.
385	virtual void* alloc(size_t size, memory_space_t space)
386	{
387	void* p = nullptr;
388	if(size)
389	{
390	cudaError_t result = (memory_space_device == space) ?cudaMalloc(&p, size) : cudaMallocHost(&p, size);
391	if(cudaSuccess != result) throw cuda_exception_t(result);
392	}
393	return p;
394	}
395
396	virtual void free(void* p, memory_space_t space)
397	{
398	if(p)
399	{
400	cudaError_t result = (memory_space_device == space) ? cudaFree(p) : cudaFreeHost(p);
401	if(cudaSuccess != result) throw cuda_exception_t(result);
402	}
403	}
404
405	virtual void synchronize()
406	{
407	cudaError_t result = _stream ?
408	cudaStreamSynchronize(_stream) :
409	cudaDeviceSynchronize();
410	if(cudaSuccess != result) throw cuda_exception_t(result);
411	}
412
413	virtual cudaEvent_t event()
414	{
415	return _event;
416	}
417
418	virtual void timer_begin()
419	{
420	cudaEventRecord(_timer[`0`], _stream);
421	}
422
423	virtual double timer_end()
424	{
425	cudaEventRecord(_timer[`1`], _stream);
426	cudaEventSynchronize(_timer[`1`]);
427	float ms;
428	cudaEventElapsedTime(&ms, _timer[`0`], _timer[`1`]);
429	return ms / `1.0e3`;
430	}
431
432	virtual int getDevice()
433	{
434	int dev = `0`;
435
436	cudaGetDevice(&dev);
437
438	return dev;
439	}
440	};
441
442	}
443
444	#endif
445
446	#else
447
448	namespace mgpu
449	{
450
451	enum gpu_context_opt
452	{
453	no_print_props,//!< no_print_props
454	print_props, //!< print_props
455	dummy //!< dummy
456	};
457
458	// Stub class for modern gpu
459
460	struct ofp_context_t
461	{
462	ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = `0`)
463	{}
464	};
465	}
466
467	#endif
468
469	#endif
470
471
472	#endif /* OFP_CONTEXT_HXX_ */
473

Browse the source code of openfpm/openfpm_data/src/util/cuda/ofp_context.hxx