| 1 | /* |
| 2 | * cuda_grid_gpu_funcs.cuh |
| 3 | * |
| 4 | * Created on: Aug 20, 2018 |
| 5 | * Author: i-bird |
| 6 | */ |
| 7 | |
| 8 | #ifndef CUDA_GRID_GPU_FUNCS_CUH_ |
| 9 | #define CUDA_GRID_GPU_FUNCS_CUH_ |
| 10 | |
| 11 | #include "map_grid_cuda_ker.cuh" |
| 12 | |
| 13 | #if defined(CUDA_GPU) && defined(__NVCC__) |
| 14 | |
| 15 | template<unsigned int dim, typename grid_type> |
| 16 | __global__ void copy_ndim_grid_block_device(grid_type src, grid_type dst) |
| 17 | { |
| 18 | unsigned int i = blockIdx.x; |
| 19 | |
| 20 | if (i >= src.getGrid().size() || i >= dst.getGrid().size()) |
| 21 | {return;} |
| 22 | |
| 23 | auto key_src = src.getGrid().InvLinId(i); |
| 24 | |
| 25 | dst.get_o(key_src) = src.get_o(key_src); |
| 26 | }; |
| 27 | |
| 28 | template<unsigned int dim, typename grid_type> |
| 29 | struct copy_ndim_grid_impl |
| 30 | { |
| 31 | static __device__ void copy(grid_type & src, grid_type & dst) |
| 32 | { |
| 33 | unsigned int i = threadIdx.x + blockIdx.x * blockDim.x; |
| 34 | |
| 35 | if (i >= src.getGrid().size() || i >= dst.getGrid().size()) |
| 36 | {return;} |
| 37 | |
| 38 | auto key_src = src.getGrid().InvLinId(i); |
| 39 | |
| 40 | dst.get_o(key_src) = src.get_o(key_src); |
| 41 | } |
| 42 | }; |
| 43 | |
| 44 | template<typename grid_type> |
| 45 | struct copy_ndim_grid_impl<2,grid_type> |
| 46 | { |
| 47 | static __device__ void copy(grid_type & src, grid_type & dst) |
| 48 | { |
| 49 | grid_key_dx<2> key_src; |
| 50 | key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x); |
| 51 | key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y); |
| 52 | |
| 53 | if (key_src.get(0) >= src.getGrid().size(0)) {return;} |
| 54 | if (key_src.get(1) >= src.getGrid().size(1)) {return;} |
| 55 | |
| 56 | if (key_src.get(0) >= dst.getGrid().size(0)) {return;} |
| 57 | if (key_src.get(1) >= dst.getGrid().size(1)) {return;} |
| 58 | |
| 59 | dst.get_o(key_src) = src.get_o(key_src); |
| 60 | } |
| 61 | }; |
| 62 | |
| 63 | template<typename grid_type> |
| 64 | struct copy_ndim_grid_impl<3,grid_type> |
| 65 | { |
| 66 | static __device__ void copy(grid_type & src, grid_type & dst) |
| 67 | { |
| 68 | grid_key_dx<3> key_src; |
| 69 | key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x); |
| 70 | key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y); |
| 71 | key_src.set_d(2,threadIdx.y + blockIdx.y * blockDim.y); |
| 72 | |
| 73 | if (key_src.get(0) >= src.getGrid().size(0)) {return;} |
| 74 | if (key_src.get(1) >= src.getGrid().size(1)) {return;} |
| 75 | if (key_src.get(2) >= src.getGrid().size(2)) {return;} |
| 76 | |
| 77 | if (key_src.get(0) >= dst.getGrid().size(0)) {return;} |
| 78 | if (key_src.get(1) >= dst.getGrid().size(1)) {return;} |
| 79 | if (key_src.get(2) >= dst.getGrid().size(2)) {return;} |
| 80 | |
| 81 | dst.get_o(key_src) = src.get_o(key_src); |
| 82 | } |
| 83 | }; |
| 84 | |
| 85 | template<unsigned int dim, typename grid_type> |
| 86 | __global__ void copy_ndim_grid_device(grid_type src, grid_type dst) |
| 87 | { |
| 88 | copy_ndim_grid_impl<dim,grid_type>::copy(src,dst); |
| 89 | } |
| 90 | |
| 91 | |
| 92 | #endif |
| 93 | |
| 94 | |
| 95 | template<bool inte_or_lin,unsigned int dim, typename T> |
| 96 | struct grid_toKernelImpl |
| 97 | { |
| 98 | template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_lin> toKernel(grid_type & gc) |
| 99 | { |
| 100 | grid_gpu_ker<dim,T,memory_traits_lin> g(gc.getGrid()); |
| 101 | |
| 102 | g.get_data_().mem = gc.get_internal_data_().mem; |
| 103 | // Increment the reference of mem |
| 104 | g.get_data_().mem->incRef(); |
| 105 | g.get_data_().mem_r.bind_ref(gc.get_internal_data_().mem_r); |
| 106 | g.get_data_().switchToDevicePtr(); |
| 107 | |
| 108 | return g; |
| 109 | } |
| 110 | }; |
| 111 | |
| 112 | template<unsigned int dim, typename T> |
| 113 | struct grid_toKernelImpl<true,dim,T> |
| 114 | { |
| 115 | template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_inte> toKernel(grid_type & gc) |
| 116 | { |
| 117 | grid_gpu_ker<dim,T,memory_traits_inte> g(gc.getGrid()); |
| 118 | copy_switch_memory_c_no_cpy<typename std::remove_reference<decltype(gc.get_internal_data_())>::type, |
| 119 | typename std::remove_reference<decltype(g.get_data_())>::type> cp_mc(gc.get_internal_data_(),g.get_data_()); |
| 120 | |
| 121 | boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(cp_mc); |
| 122 | |
| 123 | return g; |
| 124 | } |
| 125 | }; |
| 126 | |
| 127 | #endif /* CUDA_GRID_GPU_FUNCS_CUH_ */ |
| 128 | |