| 1 | /* | 
| 2 |  * scan_ofp.hpp | 
| 3 |  * | 
| 4 |  *  Created on: May 15, 2019 | 
| 5 |  *      Author: i-bird | 
| 6 |  */ | 
| 7 |  | 
| 8 | #ifndef SCAN_OFP_HPP_ | 
| 9 | #define SCAN_OFP_HPP_ | 
| 10 |  | 
| 11 | #ifdef __NVCC__ | 
| 12 |  | 
| 13 | #include "util/cuda_launch.hpp" | 
| 14 |  | 
| 15 | #if CUDART_VERSION >= 11000 | 
| 16 | 	#ifndef CUDA_ON_CPU  | 
| 17 | 	// Here we have for sure CUDA >= 11 | 
| 18 | 	#include "cub/cub.cuh" | 
| 19 | 	#ifndef SCAN_WITH_CUB | 
| 20 | 		#define SCAN_WITH_CUB | 
| 21 | 	#endif | 
| 22 | 	#endif | 
| 23 | #else | 
| 24 | 	// Here we have old CUDA | 
| 25 | 	#include "cub_old/cub.cuh" | 
| 26 | 	#include "util/cuda/moderngpu/kernel_scan.hxx" | 
| 27 | #endif | 
| 28 | #include "util/cuda/ofp_context.hxx" | 
| 29 |  | 
| 30 | namespace openfpm | 
| 31 | { | 
| 32 | 	template<typename input_it, typename output_it> | 
| 33 | 			 void scan(input_it input, int count, output_it output, mgpu::ofp_context_t& context) | 
| 34 | 	{ | 
| 35 | #ifdef CUDA_ON_CPU | 
| 36 |  | 
| 37 | 	if (count == 0)	{return;} | 
| 38 |  | 
| 39 | 	auto prec = input[0]; | 
| 40 | 	output[0] = 0; | 
| 41 | 	for (int i = 1 ; i < count ; i++) | 
| 42 | 	{ | 
| 43 | 		auto next = prec + output[i-1]; | 
| 44 | 		prec = input[i]; | 
| 45 | 		output[i] = next; | 
| 46 | 	} | 
| 47 |  | 
| 48 | #else | 
| 49 | 	#ifdef SCAN_WITH_CUB | 
| 50 |  | 
| 51 | 			void *d_temp_storage = NULL; | 
| 52 | 			size_t temp_storage_bytes = 0; | 
| 53 | 			cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes,input, | 
| 54 | 																		output, | 
| 55 | 																		count); | 
| 56 |  | 
| 57 | 			auto & temporal = context.getTemporalCUB(); | 
| 58 | 			temporal.resize(temp_storage_bytes); | 
| 59 |  | 
| 60 | 			// Run | 
| 61 | 			cub::DeviceScan::ExclusiveSum(temporal.template getDeviceBuffer<0>(), temp_storage_bytes,input, | 
| 62 | 					output, | 
| 63 | 					count); | 
| 64 |  | 
| 65 | 	#else | 
| 66 | 			mgpu::scan(input,count,output,context); | 
| 67 | 	#endif | 
| 68 | #endif | 
| 69 | 	} | 
| 70 | } | 
| 71 |  | 
| 72 | #endif /* __NVCC__ */ | 
| 73 |  | 
| 74 | #endif /* SCAN_OFP_HPP_ */ | 
| 75 |  |