1/*
2 * CudaMemory.cu
3 *
4 * Created on: Aug 17, 2014
5 * Author: Pietro Incardona
6 */
7
8/**
9 * \brief This class create instructions to allocate, and destroy GPU memory
10 *
11 * This class allocate, destroy, resize GPU buffer,
12 * eventually if direct, comunication is not supported, it can instruction
13 * to create an Host Pinned memory.
14 *
15 * Usage:
16 *
17 * CudaMemory m = new CudaMemory();
18 *
19 * m.allocate(1000*sizeof(int));
20 * int * ptr = m.getPointer();
21 * ptr[999] = 1000;
22 * ....
23 *
24 *
25 */
26
27#ifndef CUDA_MEMORY_CUH_
28#define CUDA_MEMORY_CUH_
29
30#define EXCEPT_MC noexcept
31
32#include "config.h"
33#include "memory.hpp"
34#include <iostream>
35
36#if defined(__NVCC__) && !defined(CUDA_ON_CPU)
37#include <cuda_runtime.h>
38#else
39#include "util/cuda_util.hpp"
40#endif
41
42extern size_t TotCudaMemoryAllocated;
43
44/*! \brief given an alignment and an alignment it return the smallest number numiple of the alignment
45 * such that the value returned is bigger ot equal that the number given
46 *
47 * alignment 8 number 2 it return 8
48 * alignment 8 number 9 it return 16
49 *
50 * \param al alignment
51 * \param number
52 *
53 */
54__device__ inline size_t align_number_device(size_t al, size_t number)
55{
56 return number + ((number % al) != 0)*(al - number % al);
57}
58
59//! Is an array to report general error can happen in CUDA
60static __device__ unsigned char global_cuda_error_array[256];
61
62class CudaMemory : public memory
63{
64 //! Is the host memory synchronized with the GPU memory
65 bool is_hm_sync;
66
67 //! Size of the memory
68 size_t sz;
69
70 //! device memory
71 void * dm;
72
73 //! host memory
74 mutable void * hm;
75
76 //! Reference counter
77 size_t ref_cnt;
78
79 //! Allocate an host buffer
80 void allocate_host(size_t sz) const;
81
82 //! copy from Pointer to GPU
83 bool copyFromPointer(const void * ptr);
84
85public:
86
87 //! copy from GPU to GPU buffer directly
88 bool copyDeviceToDevice(const CudaMemory & m);
89
90 //! flush the memory
91 virtual bool flush();
92 //! allocate memory
93 virtual bool allocate(size_t sz);
94 //! destroy memory
95 virtual void destroy();
96 //! copy from a General device
97 virtual bool copy(const memory & m);
98 //! the the size of the allocated memory
99 virtual size_t size() const;
100 //! resize the momory allocated
101 virtual bool resize(size_t sz);
102 //! get a readable pointer with the data
103 virtual void * getPointer();
104
105 //! get a readable pointer with the data
106 virtual const void * getPointer() const;
107
108 //! get a readable pointer with the data
109 virtual void * getDevicePointer();
110
111 //! Move memory from host to device
112 virtual void hostToDevice();
113
114 //! Move memory from device to host
115 virtual void deviceToHost();
116
117 //! Move memory from device to host, just the selected chunk
118 virtual void deviceToHost(size_t start, size_t stop);
119
120 //! Move memory from host to device, just the selected chunk
121 virtual void hostToDevice(size_t start, size_t top);
122
123 //! host to device using external memory (this host memory is copied into mem device memory)
124 void hostToDevice(CudaMemory & mem);
125
126 //! device to host using external memory (this device memory is copied into mem host memory)
127 void deviceToHost(CudaMemory & mem);
128
129 //! fill the buffer with a byte
130 virtual void fill(unsigned char c);
131
132 //! This function notify that the device memory is not sync with
133 //! the host memory, is called when a task is performed that write
134 //! on the buffer
135 void isNotSync() {is_hm_sync = false;}
136
137 public:
138
139 //! Increment the reference counter
140 virtual void incRef()
141 {ref_cnt++;}
142
143 //! Decrement the reference counter
144 virtual void decRef()
145 {ref_cnt--;}
146
147 //! Return the reference counter
148 virtual long int ref()
149 {
150 return ref_cnt;
151 }
152
153 /*! \brief Allocated Memory is never initialized
154 *
155 * \return false
156 *
157 */
158 bool isInitialized()
159 {
160 return false;
161 }
162
163 // Copy the memory (device and host)
164 CudaMemory & operator=(const CudaMemory & mem)
165 {
166 copy(mem);
167 return *this;
168 }
169
170 // Copy the Cuda memory
171 CudaMemory(const CudaMemory & mem)
172 :CudaMemory()
173 {
174 allocate(mem.size());
175 copy(mem);
176 }
177
178 CudaMemory(CudaMemory && mem) EXCEPT_MC
179 {
180 is_hm_sync = mem.is_hm_sync;
181 sz = mem.sz;
182 dm = mem.dm;
183 hm = mem.hm;
184 ref_cnt = mem.ref_cnt;
185
186 // reset mem
187 mem.is_hm_sync = false;
188 mem.sz = 0;
189 mem.dm = NULL;
190 mem.hm = NULL;
191 mem.ref_cnt = 0;
192 }
193
194 //! Constructor
195 CudaMemory():is_hm_sync(true),sz(0),dm(0),hm(0),ref_cnt(0) {};
196
197 //! Constructor
198 CudaMemory(size_t sz):is_hm_sync(true),sz(0),dm(0),hm(0),ref_cnt(0)
199 {
200 allocate(sz);
201 };
202
203 //! Destructor
204 ~CudaMemory()
205 {
206 if(ref_cnt == 0)
207 destroy();
208 else
209 std::cerr << "Error: " << __FILE__ << " " << __LINE__ << " destroying a live object" << "\n";
210 };
211
212 /*! \brief copy memory from device to device
213 *
214 * \param external device pointer
215 * \param start source starting point (where it start to copy)
216 * \param stop end point
217 * \param offset where to copy in the device pointer
218 *
219 */
220 void deviceToDevice(void * ptr, size_t start, size_t stop, size_t offset);
221
222 void swap(CudaMemory & mem);
223
224 /*! \brief Return true if the device and the host pointer are the same
225 *
226 * \return true if they are the same
227 *
228 */
229 static bool isDeviceHostSame()
230 {
231 return false;
232 }
233
234 /*! \brief return the device memory
235 *
236 * \see equivalent to getDevicePointer()
237 *
238 */
239 void * toKernel()
240 {
241 return getDevicePointer();
242 }
243};
244
245
246#endif
247
248