1 | /* |
2 | * scan_ofp.hpp |
3 | * |
4 | * Created on: May 15, 2019 |
5 | * Author: i-bird |
6 | */ |
7 | |
8 | #ifndef SCAN_OFP_HPP_ |
9 | #define SCAN_OFP_HPP_ |
10 | |
11 | #ifdef __NVCC__ |
12 | |
13 | #include "util/cuda_launch.hpp" |
14 | |
15 | #if CUDART_VERSION >= 11000 |
16 | #ifndef CUDA_ON_CPU |
17 | // Here we have for sure CUDA >= 11 |
18 | #include "cub/cub.cuh" |
19 | #ifndef SCAN_WITH_CUB |
20 | #define SCAN_WITH_CUB |
21 | #endif |
22 | #endif |
23 | #else |
24 | // Here we have old CUDA |
25 | #include "cub_old/cub.cuh" |
26 | #include "util/cuda/moderngpu/kernel_scan.hxx" |
27 | #endif |
28 | #include "util/cuda/ofp_context.hxx" |
29 | |
30 | namespace openfpm |
31 | { |
32 | template<typename input_it, typename output_it> |
33 | void scan(input_it input, int count, output_it output, mgpu::ofp_context_t& context) |
34 | { |
35 | #ifdef CUDA_ON_CPU |
36 | |
37 | if (count == 0) {return;} |
38 | |
39 | auto prec = input[0]; |
40 | output[0] = 0; |
41 | for (int i = 1 ; i < count ; i++) |
42 | { |
43 | auto next = prec + output[i-1]; |
44 | prec = input[i]; |
45 | output[i] = next; |
46 | } |
47 | |
48 | #else |
49 | #ifdef SCAN_WITH_CUB |
50 | |
51 | void *d_temp_storage = NULL; |
52 | size_t temp_storage_bytes = 0; |
53 | cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes,input, |
54 | output, |
55 | count); |
56 | |
57 | auto & temporal = context.getTemporalCUB(); |
58 | temporal.resize(temp_storage_bytes); |
59 | |
60 | // Run |
61 | cub::DeviceScan::ExclusiveSum(temporal.template getDeviceBuffer<0>(), temp_storage_bytes,input, |
62 | output, |
63 | count); |
64 | |
65 | #else |
66 | mgpu::scan(input,count,output,context); |
67 | #endif |
68 | #endif |
69 | } |
70 | } |
71 | |
72 | #endif /* __NVCC__ */ |
73 | |
74 | #endif /* SCAN_OFP_HPP_ */ |
75 | |