1 | /* |
2 | * CudaMemory.cu |
3 | * |
4 | * Created on: Aug 17, 2014 |
5 | * Author: Pietro Incardona |
6 | */ |
7 | |
8 | /** |
9 | * \brief This class create instructions to allocate, and destroy GPU memory |
10 | * |
11 | * This class allocate, destroy, resize GPU buffer, |
12 | * eventually if direct, comunication is not supported, it can instruction |
13 | * to create an Host Pinned memory. |
14 | * |
15 | * Usage: |
16 | * |
17 | * CudaMemory m = new CudaMemory(); |
18 | * |
19 | * m.allocate(1000*sizeof(int)); |
20 | * int * ptr = m.getPointer(); |
21 | * ptr[999] = 1000; |
22 | * .... |
23 | * |
24 | * |
25 | */ |
26 | |
27 | #ifndef CUDA_MEMORY_CUH_ |
28 | #define CUDA_MEMORY_CUH_ |
29 | |
30 | #define EXCEPT_MC noexcept |
31 | |
32 | #include "config.h" |
33 | #include "memory.hpp" |
34 | #include <iostream> |
35 | |
36 | #if defined(__NVCC__) && !defined(CUDA_ON_CPU) |
37 | #include <cuda_runtime.h> |
38 | #else |
39 | #include "util/cuda_util.hpp" |
40 | #endif |
41 | |
42 | extern size_t TotCudaMemoryAllocated; |
43 | |
44 | /*! \brief given an alignment and an alignment it return the smallest number numiple of the alignment |
45 | * such that the value returned is bigger ot equal that the number given |
46 | * |
47 | * alignment 8 number 2 it return 8 |
48 | * alignment 8 number 9 it return 16 |
49 | * |
50 | * \param al alignment |
51 | * \param number |
52 | * |
53 | */ |
54 | __device__ inline size_t align_number_device(size_t al, size_t number) |
55 | { |
56 | return number + ((number % al) != 0)*(al - number % al); |
57 | } |
58 | |
59 | //! Is an array to report general error can happen in CUDA |
60 | static __device__ unsigned char global_cuda_error_array[256]; |
61 | |
62 | class CudaMemory : public memory |
63 | { |
64 | //! Is the host memory synchronized with the GPU memory |
65 | bool is_hm_sync; |
66 | |
67 | //! Size of the memory |
68 | size_t sz; |
69 | |
70 | //! device memory |
71 | void * dm; |
72 | |
73 | //! host memory |
74 | mutable void * hm; |
75 | |
76 | //! Reference counter |
77 | size_t ref_cnt; |
78 | |
79 | //! Allocate an host buffer |
80 | void allocate_host(size_t sz) const; |
81 | |
82 | //! copy from Pointer to GPU |
83 | bool copyFromPointer(const void * ptr); |
84 | |
85 | public: |
86 | |
87 | //! copy from GPU to GPU buffer directly |
88 | bool copyDeviceToDevice(const CudaMemory & m); |
89 | |
90 | //! flush the memory |
91 | virtual bool flush(); |
92 | //! allocate memory |
93 | virtual bool allocate(size_t sz); |
94 | //! destroy memory |
95 | virtual void destroy(); |
96 | //! copy from a General device |
97 | virtual bool copy(const memory & m); |
98 | //! the the size of the allocated memory |
99 | virtual size_t size() const; |
100 | //! resize the momory allocated |
101 | virtual bool resize(size_t sz); |
102 | //! get a readable pointer with the data |
103 | virtual void * getPointer(); |
104 | |
105 | //! get a readable pointer with the data |
106 | virtual const void * getPointer() const; |
107 | |
108 | //! get a readable pointer with the data |
109 | virtual void * getDevicePointer(); |
110 | |
111 | //! Move memory from host to device |
112 | virtual void hostToDevice(); |
113 | |
114 | //! Move memory from device to host |
115 | virtual void deviceToHost(); |
116 | |
117 | //! Move memory from device to host, just the selected chunk |
118 | virtual void deviceToHost(size_t start, size_t stop); |
119 | |
120 | //! Move memory from host to device, just the selected chunk |
121 | virtual void hostToDevice(size_t start, size_t top); |
122 | |
123 | //! host to device using external memory (this host memory is copied into mem device memory) |
124 | void hostToDevice(CudaMemory & mem); |
125 | |
126 | //! device to host using external memory (this device memory is copied into mem host memory) |
127 | void deviceToHost(CudaMemory & mem); |
128 | |
129 | //! fill the buffer with a byte |
130 | virtual void fill(unsigned char c); |
131 | |
132 | //! This function notify that the device memory is not sync with |
133 | //! the host memory, is called when a task is performed that write |
134 | //! on the buffer |
135 | void isNotSync() {is_hm_sync = false;} |
136 | |
137 | public: |
138 | |
139 | //! Increment the reference counter |
140 | virtual void incRef() |
141 | {ref_cnt++;} |
142 | |
143 | //! Decrement the reference counter |
144 | virtual void decRef() |
145 | {ref_cnt--;} |
146 | |
147 | //! Return the reference counter |
148 | virtual long int ref() |
149 | { |
150 | return ref_cnt; |
151 | } |
152 | |
153 | /*! \brief Allocated Memory is never initialized |
154 | * |
155 | * \return false |
156 | * |
157 | */ |
158 | bool isInitialized() |
159 | { |
160 | return false; |
161 | } |
162 | |
163 | // Copy the memory (device and host) |
164 | CudaMemory & operator=(const CudaMemory & mem) |
165 | { |
166 | copy(mem); |
167 | return *this; |
168 | } |
169 | |
170 | // Copy the Cuda memory |
171 | CudaMemory(const CudaMemory & mem) |
172 | :CudaMemory() |
173 | { |
174 | allocate(mem.size()); |
175 | copy(mem); |
176 | } |
177 | |
178 | CudaMemory(CudaMemory && mem) EXCEPT_MC |
179 | { |
180 | is_hm_sync = mem.is_hm_sync; |
181 | sz = mem.sz; |
182 | dm = mem.dm; |
183 | hm = mem.hm; |
184 | ref_cnt = mem.ref_cnt; |
185 | |
186 | // reset mem |
187 | mem.is_hm_sync = false; |
188 | mem.sz = 0; |
189 | mem.dm = NULL; |
190 | mem.hm = NULL; |
191 | mem.ref_cnt = 0; |
192 | } |
193 | |
194 | //! Constructor |
195 | CudaMemory():is_hm_sync(true),sz(0),dm(0),hm(0),ref_cnt(0) {}; |
196 | |
197 | //! Constructor |
198 | CudaMemory(size_t sz):is_hm_sync(true),sz(0),dm(0),hm(0),ref_cnt(0) |
199 | { |
200 | allocate(sz); |
201 | }; |
202 | |
203 | //! Destructor |
204 | ~CudaMemory() |
205 | { |
206 | if(ref_cnt == 0) |
207 | destroy(); |
208 | else |
209 | std::cerr << "Error: " << __FILE__ << " " << __LINE__ << " destroying a live object" << "\n" ; |
210 | }; |
211 | |
212 | /*! \brief copy memory from device to device |
213 | * |
214 | * \param external device pointer |
215 | * \param start source starting point (where it start to copy) |
216 | * \param stop end point |
217 | * \param offset where to copy in the device pointer |
218 | * |
219 | */ |
220 | void deviceToDevice(void * ptr, size_t start, size_t stop, size_t offset); |
221 | |
222 | void swap(CudaMemory & mem); |
223 | |
224 | /*! \brief Return true if the device and the host pointer are the same |
225 | * |
226 | * \return true if they are the same |
227 | * |
228 | */ |
229 | static bool isDeviceHostSame() |
230 | { |
231 | return false; |
232 | } |
233 | |
234 | /*! \brief return the device memory |
235 | * |
236 | * \see equivalent to getDevicePointer() |
237 | * |
238 | */ |
239 | void * toKernel() |
240 | { |
241 | return getDevicePointer(); |
242 | } |
243 | }; |
244 | |
245 | |
246 | #endif |
247 | |
248 | |