| 1 | /* |
| 2 | * CudaMemory.cu |
| 3 | * |
| 4 | * Created on: Aug 17, 2014 |
| 5 | * Author: Pietro Incardona |
| 6 | */ |
| 7 | |
| 8 | /** |
| 9 | * \brief This class create instructions to allocate, and destroy GPU memory |
| 10 | * |
| 11 | * This class allocate, destroy, resize GPU buffer, |
| 12 | * eventually if direct, comunication is not supported, it can instruction |
| 13 | * to create an Host Pinned memory. |
| 14 | * |
| 15 | * Usage: |
| 16 | * |
| 17 | * CudaMemory m = new CudaMemory(); |
| 18 | * |
| 19 | * m.allocate(1000*sizeof(int)); |
| 20 | * int * ptr = m.getPointer(); |
| 21 | * ptr[999] = 1000; |
| 22 | * .... |
| 23 | * |
| 24 | * |
| 25 | */ |
| 26 | |
| 27 | #ifndef CUDA_MEMORY_CUH_ |
| 28 | #define CUDA_MEMORY_CUH_ |
| 29 | |
| 30 | #define EXCEPT_MC noexcept |
| 31 | |
| 32 | #include "config.h" |
| 33 | #include "memory.hpp" |
| 34 | #include <iostream> |
| 35 | |
| 36 | #if defined(__NVCC__) && !defined(CUDA_ON_CPU) |
| 37 | #include <cuda_runtime.h> |
| 38 | #else |
| 39 | #include "util/cuda_util.hpp" |
| 40 | #endif |
| 41 | |
| 42 | extern size_t TotCudaMemoryAllocated; |
| 43 | |
| 44 | /*! \brief given an alignment and an alignment it return the smallest number numiple of the alignment |
| 45 | * such that the value returned is bigger ot equal that the number given |
| 46 | * |
| 47 | * alignment 8 number 2 it return 8 |
| 48 | * alignment 8 number 9 it return 16 |
| 49 | * |
| 50 | * \param al alignment |
| 51 | * \param number |
| 52 | * |
| 53 | */ |
| 54 | __device__ inline size_t align_number_device(size_t al, size_t number) |
| 55 | { |
| 56 | return number + ((number % al) != 0)*(al - number % al); |
| 57 | } |
| 58 | |
| 59 | //! Is an array to report general error can happen in CUDA |
| 60 | static __device__ unsigned char global_cuda_error_array[256]; |
| 61 | |
| 62 | class CudaMemory : public memory |
| 63 | { |
| 64 | //! Is the host memory synchronized with the GPU memory |
| 65 | bool is_hm_sync; |
| 66 | |
| 67 | //! Size of the memory |
| 68 | size_t sz; |
| 69 | |
| 70 | //! device memory |
| 71 | void * dm; |
| 72 | |
| 73 | //! host memory |
| 74 | mutable void * hm; |
| 75 | |
| 76 | //! Reference counter |
| 77 | size_t ref_cnt; |
| 78 | |
| 79 | //! Allocate an host buffer |
| 80 | void allocate_host(size_t sz) const; |
| 81 | |
| 82 | //! copy from Pointer to GPU |
| 83 | bool copyFromPointer(const void * ptr); |
| 84 | |
| 85 | public: |
| 86 | |
| 87 | //! copy from GPU to GPU buffer directly |
| 88 | bool copyDeviceToDevice(const CudaMemory & m); |
| 89 | |
| 90 | //! flush the memory |
| 91 | virtual bool flush(); |
| 92 | //! allocate memory |
| 93 | virtual bool allocate(size_t sz); |
| 94 | //! destroy memory |
| 95 | virtual void destroy(); |
| 96 | //! copy from a General device |
| 97 | virtual bool copy(const memory & m); |
| 98 | //! the the size of the allocated memory |
| 99 | virtual size_t size() const; |
| 100 | //! resize the momory allocated |
| 101 | virtual bool resize(size_t sz); |
| 102 | //! get a readable pointer with the data |
| 103 | virtual void * getPointer(); |
| 104 | |
| 105 | //! get a readable pointer with the data |
| 106 | virtual const void * getPointer() const; |
| 107 | |
| 108 | //! get a readable pointer with the data |
| 109 | virtual void * getDevicePointer(); |
| 110 | |
| 111 | //! Move memory from host to device |
| 112 | virtual void hostToDevice(); |
| 113 | |
| 114 | //! Move memory from device to host |
| 115 | virtual void deviceToHost(); |
| 116 | |
| 117 | //! Move memory from device to host, just the selected chunk |
| 118 | virtual void deviceToHost(size_t start, size_t stop); |
| 119 | |
| 120 | //! Move memory from host to device, just the selected chunk |
| 121 | virtual void hostToDevice(size_t start, size_t top); |
| 122 | |
| 123 | //! host to device using external memory (this host memory is copied into mem device memory) |
| 124 | void hostToDevice(CudaMemory & mem); |
| 125 | |
| 126 | //! device to host using external memory (this device memory is copied into mem host memory) |
| 127 | void deviceToHost(CudaMemory & mem); |
| 128 | |
| 129 | //! fill the buffer with a byte |
| 130 | virtual void fill(unsigned char c); |
| 131 | |
| 132 | //! This function notify that the device memory is not sync with |
| 133 | //! the host memory, is called when a task is performed that write |
| 134 | //! on the buffer |
| 135 | void isNotSync() {is_hm_sync = false;} |
| 136 | |
| 137 | public: |
| 138 | |
| 139 | //! Increment the reference counter |
| 140 | virtual void incRef() |
| 141 | {ref_cnt++;} |
| 142 | |
| 143 | //! Decrement the reference counter |
| 144 | virtual void decRef() |
| 145 | {ref_cnt--;} |
| 146 | |
| 147 | //! Return the reference counter |
| 148 | virtual long int ref() |
| 149 | { |
| 150 | return ref_cnt; |
| 151 | } |
| 152 | |
| 153 | /*! \brief Allocated Memory is never initialized |
| 154 | * |
| 155 | * \return false |
| 156 | * |
| 157 | */ |
| 158 | bool isInitialized() |
| 159 | { |
| 160 | return false; |
| 161 | } |
| 162 | |
| 163 | // Copy the memory (device and host) |
| 164 | CudaMemory & operator=(const CudaMemory & mem) |
| 165 | { |
| 166 | copy(mem); |
| 167 | return *this; |
| 168 | } |
| 169 | |
| 170 | // Copy the Cuda memory |
| 171 | CudaMemory(const CudaMemory & mem) |
| 172 | :CudaMemory() |
| 173 | { |
| 174 | allocate(mem.size()); |
| 175 | copy(mem); |
| 176 | } |
| 177 | |
| 178 | CudaMemory(CudaMemory && mem) EXCEPT_MC |
| 179 | { |
| 180 | is_hm_sync = mem.is_hm_sync; |
| 181 | sz = mem.sz; |
| 182 | dm = mem.dm; |
| 183 | hm = mem.hm; |
| 184 | ref_cnt = mem.ref_cnt; |
| 185 | |
| 186 | // reset mem |
| 187 | mem.is_hm_sync = false; |
| 188 | mem.sz = 0; |
| 189 | mem.dm = NULL; |
| 190 | mem.hm = NULL; |
| 191 | mem.ref_cnt = 0; |
| 192 | } |
| 193 | |
| 194 | //! Constructor |
| 195 | CudaMemory():is_hm_sync(true),sz(0),dm(0),hm(0),ref_cnt(0) {}; |
| 196 | |
| 197 | //! Constructor |
| 198 | CudaMemory(size_t sz):is_hm_sync(true),sz(0),dm(0),hm(0),ref_cnt(0) |
| 199 | { |
| 200 | allocate(sz); |
| 201 | }; |
| 202 | |
| 203 | //! Destructor |
| 204 | ~CudaMemory() |
| 205 | { |
| 206 | if(ref_cnt == 0) |
| 207 | destroy(); |
| 208 | else |
| 209 | std::cerr << "Error: " << __FILE__ << " " << __LINE__ << " destroying a live object" << "\n" ; |
| 210 | }; |
| 211 | |
| 212 | /*! \brief copy memory from device to device |
| 213 | * |
| 214 | * \param external device pointer |
| 215 | * \param start source starting point (where it start to copy) |
| 216 | * \param stop end point |
| 217 | * \param offset where to copy in the device pointer |
| 218 | * |
| 219 | */ |
| 220 | void deviceToDevice(void * ptr, size_t start, size_t stop, size_t offset); |
| 221 | |
| 222 | void swap(CudaMemory & mem); |
| 223 | |
| 224 | /*! \brief Return true if the device and the host pointer are the same |
| 225 | * |
| 226 | * \return true if they are the same |
| 227 | * |
| 228 | */ |
| 229 | static bool isDeviceHostSame() |
| 230 | { |
| 231 | return false; |
| 232 | } |
| 233 | |
| 234 | /*! \brief return the device memory |
| 235 | * |
| 236 | * \see equivalent to getDevicePointer() |
| 237 | * |
| 238 | */ |
| 239 | void * toKernel() |
| 240 | { |
| 241 | return getDevicePointer(); |
| 242 | } |
| 243 | }; |
| 244 | |
| 245 | |
| 246 | #endif |
| 247 | |
| 248 | |