1/*
2 * scan_ofp.hpp
3 *
4 * Created on: May 15, 2019
5 * Author: i-bird
6 */
7
8#ifndef SCAN_OFP_HPP_
9#define SCAN_OFP_HPP_
10
11#ifdef __NVCC__
12
13#include "util/cuda_launch.hpp"
14
15#if CUDART_VERSION >= 11000
16 #ifndef CUDA_ON_CPU
17 // Here we have for sure CUDA >= 11
18 #include "cub/cub.cuh"
19 #ifndef SCAN_WITH_CUB
20 #define SCAN_WITH_CUB
21 #endif
22 #endif
23#else
24 // Here we have old CUDA
25 #include "cub_old/cub.cuh"
26 #include "util/cuda/moderngpu/kernel_scan.hxx"
27#endif
28#include "util/cuda/ofp_context.hxx"
29
30namespace openfpm
31{
32 template<typename input_it, typename output_it>
33 void scan(input_it input, int count, output_it output, mgpu::ofp_context_t& context)
34 {
35#ifdef CUDA_ON_CPU
36
37 if (count == 0) {return;}
38
39 auto prec = input[0];
40 output[0] = 0;
41 for (int i = 1 ; i < count ; i++)
42 {
43 auto next = prec + output[i-1];
44 prec = input[i];
45 output[i] = next;
46 }
47
48#else
49 #ifdef SCAN_WITH_CUB
50
51 void *d_temp_storage = NULL;
52 size_t temp_storage_bytes = 0;
53 cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes,input,
54 output,
55 count);
56
57 auto & temporal = context.getTemporalCUB();
58 temporal.resize(temp_storage_bytes);
59
60 // Run
61 cub::DeviceScan::ExclusiveSum(temporal.template getDeviceBuffer<0>(), temp_storage_bytes,input,
62 output,
63 count);
64
65 #else
66 mgpu::scan(input,count,output,context);
67 #endif
68#endif
69 }
70}
71
72#endif /* __NVCC__ */
73
74#endif /* SCAN_OFP_HPP_ */
75