| 1 | /* | 
| 2 |  * cuda_grid_gpu_funcs.cuh | 
| 3 |  * | 
| 4 |  *  Created on: Aug 20, 2018 | 
| 5 |  *      Author: i-bird | 
| 6 |  */ | 
| 7 |  | 
| 8 | #ifndef CUDA_GRID_GPU_FUNCS_CUH_ | 
| 9 | #define CUDA_GRID_GPU_FUNCS_CUH_ | 
| 10 |  | 
| 11 | #include "map_grid_cuda_ker.cuh" | 
| 12 |  | 
| 13 | #if defined(CUDA_GPU) && defined(__NVCC__) | 
| 14 |  | 
| 15 | template<unsigned int dim, typename grid_type> | 
| 16 | __global__ void copy_ndim_grid_block_device(grid_type src, grid_type dst) | 
| 17 | { | 
| 18 | 	unsigned int i = blockIdx.x; | 
| 19 |  | 
| 20 | 	if (i >= src.getGrid().size() || i >= dst.getGrid().size()) | 
| 21 | 	{return;} | 
| 22 |  | 
| 23 | 	auto key_src = src.getGrid().InvLinId(i); | 
| 24 |  | 
| 25 | 	dst.get_o(key_src) = src.get_o(key_src); | 
| 26 | }; | 
| 27 |  | 
| 28 | template<unsigned int dim, typename grid_type> | 
| 29 | struct copy_ndim_grid_impl | 
| 30 | { | 
| 31 | 	static __device__ void copy(grid_type & src, grid_type & dst) | 
| 32 | 	{ | 
| 33 | 		unsigned int i = threadIdx.x + blockIdx.x * blockDim.x; | 
| 34 |  | 
| 35 | 		if (i >= src.getGrid().size() || i >= dst.getGrid().size()) | 
| 36 | 		{return;} | 
| 37 |  | 
| 38 | 		auto key_src = src.getGrid().InvLinId(i); | 
| 39 |  | 
| 40 | 		dst.get_o(key_src) = src.get_o(key_src); | 
| 41 | 	} | 
| 42 | }; | 
| 43 |  | 
| 44 | template<typename grid_type> | 
| 45 | struct copy_ndim_grid_impl<2,grid_type> | 
| 46 | { | 
| 47 | 	static __device__ void copy(grid_type & src, grid_type & dst) | 
| 48 | 	{ | 
| 49 | 		grid_key_dx<2> key_src; | 
| 50 | 		key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x); | 
| 51 | 		key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y); | 
| 52 |  | 
| 53 | 		if (key_src.get(0) >= src.getGrid().size(0))	{return;} | 
| 54 | 		if (key_src.get(1) >= src.getGrid().size(1))	{return;} | 
| 55 |  | 
| 56 | 		if (key_src.get(0) >= dst.getGrid().size(0))	{return;} | 
| 57 | 		if (key_src.get(1) >= dst.getGrid().size(1))	{return;} | 
| 58 |  | 
| 59 | 		dst.get_o(key_src) = src.get_o(key_src); | 
| 60 | 	} | 
| 61 | }; | 
| 62 |  | 
| 63 | template<typename grid_type> | 
| 64 | struct copy_ndim_grid_impl<3,grid_type> | 
| 65 | { | 
| 66 | 	static __device__ void copy(grid_type & src, grid_type & dst) | 
| 67 | 	{ | 
| 68 | 		grid_key_dx<3> key_src; | 
| 69 | 		key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x); | 
| 70 | 		key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y); | 
| 71 | 		key_src.set_d(2,threadIdx.y + blockIdx.y * blockDim.y); | 
| 72 |  | 
| 73 | 		if (key_src.get(0) >= src.getGrid().size(0))	{return;} | 
| 74 | 		if (key_src.get(1) >= src.getGrid().size(1))	{return;} | 
| 75 | 		if (key_src.get(2) >= src.getGrid().size(2))	{return;} | 
| 76 |  | 
| 77 | 		if (key_src.get(0) >= dst.getGrid().size(0))	{return;} | 
| 78 | 		if (key_src.get(1) >= dst.getGrid().size(1))	{return;} | 
| 79 | 		if (key_src.get(2) >= dst.getGrid().size(2))	{return;} | 
| 80 |  | 
| 81 | 		dst.get_o(key_src) = src.get_o(key_src); | 
| 82 | 	} | 
| 83 | }; | 
| 84 |  | 
| 85 | template<unsigned int dim, typename grid_type> | 
| 86 | __global__ void copy_ndim_grid_device(grid_type src, grid_type dst) | 
| 87 | { | 
| 88 | 	copy_ndim_grid_impl<dim,grid_type>::copy(src,dst); | 
| 89 | } | 
| 90 |  | 
| 91 |  | 
| 92 | #endif | 
| 93 |  | 
| 94 |  | 
| 95 | template<bool inte_or_lin,unsigned int dim, typename T> | 
| 96 | struct grid_toKernelImpl | 
| 97 | { | 
| 98 | 	template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_lin> toKernel(grid_type & gc) | 
| 99 | 	{ | 
| 100 | 		grid_gpu_ker<dim,T,memory_traits_lin> g(gc.getGrid()); | 
| 101 |  | 
| 102 | 		g.get_data_().mem = gc.get_internal_data_().mem; | 
| 103 | 		// Increment the reference of mem | 
| 104 | 		g.get_data_().mem->incRef(); | 
| 105 | 		g.get_data_().mem_r.bind_ref(gc.get_internal_data_().mem_r); | 
| 106 | 		g.get_data_().switchToDevicePtr(); | 
| 107 |  | 
| 108 | 		return g; | 
| 109 | 	} | 
| 110 | }; | 
| 111 |  | 
| 112 | template<unsigned int dim, typename T> | 
| 113 | struct grid_toKernelImpl<true,dim,T> | 
| 114 | { | 
| 115 | 	template<typename grid_type> static grid_gpu_ker<dim,T,memory_traits_inte> toKernel(grid_type & gc) | 
| 116 | 	{ | 
| 117 | 		grid_gpu_ker<dim,T,memory_traits_inte> g(gc.getGrid()); | 
| 118 | 		copy_switch_memory_c_no_cpy<typename std::remove_reference<decltype(gc.get_internal_data_())>::type, | 
| 119 | 				                    typename std::remove_reference<decltype(g.get_data_())>::type> cp_mc(gc.get_internal_data_(),g.get_data_()); | 
| 120 |  | 
| 121 | 		boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(cp_mc); | 
| 122 |  | 
| 123 | 		return g; | 
| 124 | 	} | 
| 125 | }; | 
| 126 |  | 
| 127 | #endif /* CUDA_GRID_GPU_FUNCS_CUH_ */ | 
| 128 |  |