1 | /* |
2 | * cuda_grid_gpu_funcs.cuh |
3 | * |
4 | * Created on: Aug 20, 2018 |
5 | * Author: i-bird |
6 | */ |
7 | |
8 | #ifndef CUDA_GRID_GPU_FUNCS_CUH_ |
9 | #define CUDA_GRID_GPU_FUNCS_CUH_ |
10 | |
11 | #include "map_grid_cuda_ker.cuh" |
12 | |
13 | #if defined(CUDA_GPU) && defined(__NVCC__) |
14 | |
15 | template<unsigned int dim, typename grid_type> |
16 | __global__ void copy_ndim_grid_block_device(grid_type src, grid_type dst) |
17 | { |
18 | unsigned int i = blockIdx.x; |
19 | |
20 | if (i >= src.getGrid().size() || i >= dst.getGrid().size()) |
21 | {return;} |
22 | |
23 | auto key_src = src.getGrid().InvLinId(i); |
24 | |
25 | dst.get_o(key_src) = src.get_o(key_src); |
26 | }; |
27 | |
28 | template<unsigned int dim, typename grid_type> |
29 | struct copy_ndim_grid_impl |
30 | { |
31 | static __device__ void copy(grid_type & src, grid_type & dst) |
32 | { |
33 | unsigned int i = threadIdx.x + blockIdx.x * blockDim.x; |
34 | |
35 | if (i >= src.getGrid().size() || i >= dst.getGrid().size()) |
36 | {return;} |
37 | |
38 | auto key_src = src.getGrid().InvLinId(i); |
39 | |
40 | dst.get_o(key_src) = src.get_o(key_src); |
41 | } |
42 | }; |
43 | |
44 | template<typename grid_type> |
45 | struct copy_ndim_grid_impl<2,grid_type> |
46 | { |
47 | static __device__ void copy(grid_type & src, grid_type & dst) |
48 | { |
49 | grid_key_dx<2> key_src; |
50 | key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x); |
51 | key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y); |
52 | |
53 | if (key_src.get(0) >= src.getGrid().size(0)) {return;} |
54 | if (key_src.get(1) >= src.getGrid().size(1)) {return;} |
55 | |
56 | if (key_src.get(0) >= dst.getGrid().size(0)) {return;} |
57 | if (key_src.get(1) >= dst.getGrid().size(1)) {return;} |
58 | |
59 | dst.get_o(key_src) = src.get_o(key_src); |
60 | } |
61 | }; |
62 | |
63 | template<typename grid_type> |
64 | struct copy_ndim_grid_impl<3,grid_type> |
65 | { |
66 | static __device__ void copy(grid_type & src, grid_type & dst) |
67 | { |
68 | grid_key_dx<3> key_src; |
69 | key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x); |
70 | key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y); |
71 | key_src.set_d(2,threadIdx.y + blockIdx.y * blockDim.y); |
72 | |
73 | if (key_src.get(0) >= src.getGrid().size(0)) {return;} |
74 | if (key_src.get(1) >= src.getGrid().size(1)) {return;} |
75 | if (key_src.get(2) >= src.getGrid().size(2)) {return;} |
76 | |
77 | if (key_src.get(0) >= dst.getGrid().size(0)) {return;} |
78 | if (key_src.get(1) >= dst.getGrid().size(1)) {return;} |
79 | if (key_src.get(2) >= dst.getGrid().size(2)) {return;} |
80 | |
81 | dst.get_o(key_src) = src.get_o(key_src); |
82 | } |
83 | }; |
84 | |
85 | template<unsigned int dim, typename grid_type> |
86 | __global__ void copy_ndim_grid_device(grid_type src, grid_type dst) |
87 | { |
88 | copy_ndim_grid_impl<dim,grid_type>::copy(src,dst); |
89 | } |
90 | |
91 | |
92 | #endif |
93 | |
94 | |
95 | template<bool inte_or_lin,unsigned int dim, typename T> |
96 | struct grid_toKernelImpl |
97 | { |
98 | template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_lin> toKernel(grid_type & gc) |
99 | { |
100 | grid_gpu_ker<dim,T,memory_traits_lin> g(gc.getGrid()); |
101 | |
102 | g.get_data_().mem = gc.get_internal_data_().mem; |
103 | // Increment the reference of mem |
104 | g.get_data_().mem->incRef(); |
105 | g.get_data_().mem_r.bind_ref(gc.get_internal_data_().mem_r); |
106 | g.get_data_().switchToDevicePtr(); |
107 | |
108 | return g; |
109 | } |
110 | }; |
111 | |
112 | template<unsigned int dim, typename T> |
113 | struct grid_toKernelImpl<true,dim,T> |
114 | { |
115 | template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_inte> toKernel(grid_type & gc) |
116 | { |
117 | grid_gpu_ker<dim,T,memory_traits_inte> g(gc.getGrid()); |
118 | copy_switch_memory_c_no_cpy<typename std::remove_reference<decltype(gc.get_internal_data_())>::type, |
119 | typename std::remove_reference<decltype(g.get_data_())>::type> cp_mc(gc.get_internal_data_(),g.get_data_()); |
120 | |
121 | boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(cp_mc); |
122 | |
123 | return g; |
124 | } |
125 | }; |
126 | |
127 | #endif /* CUDA_GRID_GPU_FUNCS_CUH_ */ |
128 | |