1 | /* |
2 | * Domain_icells_cart.hpp |
3 | * |
4 | * Created on: Apr 27, 2019 |
5 | * Author: i-bird |
6 | */ |
7 | |
8 | #ifndef DOMAIN_ICELLS_CART_HPP_ |
9 | #define DOMAIN_ICELLS_CART_HPP_ |
10 | |
11 | #include "Vector/map_vector.hpp" |
12 | #include "Space/Ghost.hpp" |
13 | #include "NN/CellList/CellList.hpp" |
14 | #include "NN/CellList/cuda/CellDecomposer_gpu_ker.cuh" |
15 | #include "Vector/map_vector_sparse.hpp" |
16 | #include <iomanip> |
17 | |
18 | #ifdef __NVCC__ |
19 | |
20 | template<unsigned int dim, typename vector_sparse_type, typename CellDecomposer_type> |
21 | __global__ void insert_icell(vector_sparse_type vs, CellDecomposer_type cld, grid_key_dx<dim,int> start,grid_key_dx<dim,int> stop) |
22 | { |
23 | vs.init(); |
24 | |
25 | auto gk = grid_p<dim>::get_grid_point(cld.get_div_c()); |
26 | |
27 | unsigned int b = blockIdx.x + blockIdx.y * gridDim.x + blockIdx.z * gridDim.x * gridDim.y; |
28 | |
29 | bool out = false; |
30 | for (unsigned int i = 0 ; i < dim ; i++) |
31 | { |
32 | gk.set_d(i,gk.get(i) + start.get(i)); |
33 | if (gk.get(i) > stop.get(i)) |
34 | {out = true;} |
35 | } |
36 | |
37 | if (out == false) |
38 | { |
39 | auto id = cld.LinId(gk); |
40 | |
41 | vs.insert_b(id,b); |
42 | } |
43 | |
44 | vs.flush_block_insert(b, threadIdx.x == 0 & threadIdx.y == 0 & threadIdx.z == 0 ); |
45 | } |
46 | |
47 | template<unsigned int dim, typename vector_sparse_type, typename CellDecomposer_type> |
48 | __global__ void insert_remove_icell(vector_sparse_type vs, vector_sparse_type vsi, CellDecomposer_type cld, grid_key_dx<dim,int> start,grid_key_dx<dim,int> stop) |
49 | { |
50 | vs.init(); |
51 | vsi.init(); |
52 | |
53 | auto gk = grid_p<dim>::get_grid_point(cld.get_div_c()); |
54 | |
55 | unsigned int b = blockIdx.x + blockIdx.y * gridDim.x + blockIdx.z * gridDim.x * gridDim.y; |
56 | |
57 | bool out = false; |
58 | for (unsigned int i = 0 ; i < dim ; i++) |
59 | { |
60 | gk.set_d(i,gk.get(i) + start.get(i)); |
61 | if (gk.get(i) > stop.get(i)) |
62 | {out = true;} |
63 | } |
64 | |
65 | if (out == false) |
66 | { |
67 | auto id = cld.LinId(gk); |
68 | |
69 | vs.insert_b(id,b); |
70 | vsi.remove_b(id,b); |
71 | } |
72 | |
73 | vs.flush_block_insert(b, threadIdx.x == 0 & threadIdx.y == 0 & threadIdx.z == 0 ); |
74 | vsi.flush_block_remove(b, threadIdx.x == 0 & threadIdx.y == 0 & threadIdx.z == 0); |
75 | } |
76 | |
77 | template<unsigned int dim, typename T, template<typename> class layout_base , typename Memory, typename cnt_type, typename ids_type, bool is_gpu> |
78 | struct CalculateInternalCells_impl |
79 | { |
80 | template<typename VCluster_type> |
81 | static void CalculateInternalCells(VCluster_type & v_cl, |
82 | openfpm::vector<Box<dim,T>,Memory,layout_base> & ig_box, |
83 | openfpm::vector<SpaceBox<dim,T>,Memory,layout_base> & domain, |
84 | Box<dim,T> & pbox, |
85 | T r_cut, |
86 | const Ghost<dim,T> & enlarge, |
87 | CellDecomposer_sm<dim,T,shift<dim,T>> & cd, |
88 | openfpm::vector<aggregate<ids_type>,Memory,layout_base> & icells, |
89 | openfpm::vector<aggregate<ids_type>,Memory,layout_base> & dcells) |
90 | { |
91 | |
92 | } |
93 | }; |
94 | |
95 | template<unsigned int dim, typename T, template<typename> class layout_base , typename Memory, typename cnt_type, typename ids_type> |
96 | struct CalculateInternalCells_impl<dim,T,layout_base,Memory,cnt_type,ids_type,true> |
97 | { |
98 | template<typename VCluster_type> |
99 | static void CalculateInternalCells(VCluster_type & v_cl, |
100 | openfpm::vector<Box<dim,T>,Memory,layout_base> & ig_box, |
101 | openfpm::vector<SpaceBox<dim,T>,Memory,layout_base> & domain, |
102 | Box<dim,T> & pbox, |
103 | T r_cut, |
104 | const Ghost<dim,T> & enlarge, |
105 | CellDecomposer_sm<dim,T,shift<dim,T>> & cd, |
106 | openfpm::vector<aggregate<ids_type>,Memory,layout_base> & icells, |
107 | openfpm::vector<aggregate<ids_type>,Memory,layout_base> & dcells) |
108 | { |
109 | #if 0 |
110 | |
111 | // Division array |
112 | size_t div[dim]; |
113 | |
114 | // Calculate the parameters of the cell-list |
115 | |
116 | cl_param_calculate(pbox, div, r_cut, enlarge); |
117 | |
118 | openfpm::array<T,dim,cnt_type> spacing_c; |
119 | openfpm::array<ids_type,dim,cnt_type> div_c; |
120 | openfpm::array<ids_type,dim,cnt_type> off; |
121 | |
122 | for (size_t i = 0 ; i < dim ; i++) |
123 | { |
124 | spacing_c[i] = (pbox.getHigh(i) - pbox.getLow(i)) / div[i]; |
125 | off[i] = 1; |
126 | // div_c must include offset |
127 | div_c[i] = div[i] + 2*off[i]; |
128 | |
129 | } |
130 | |
131 | shift_only<dim,T> t(Matrix<dim,T>::identity(),pbox.getP1()); |
132 | |
133 | CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,shift_only<dim,T>> cld(spacing_c,div_c,off,t); |
134 | grid_sm<dim,void> g = cld.getGrid(); |
135 | cd.setDimensions(pbox,div,off[0]); |
136 | |
137 | openfpm::vector_sparse_gpu<aggregate<unsigned int>> vs; |
138 | openfpm::vector_sparse_gpu<aggregate<unsigned int>> vsi; |
139 | |
140 | vs.template setBackground<0>(0); |
141 | |
142 | // insert Domain cells |
143 | |
144 | for (size_t i = 0 ; i < domain.size() ; i++) |
145 | { |
146 | Box<dim,T> bx = SpaceBox<dim,T>(domain.get(i)); |
147 | |
148 | auto pp2 = bx.getP2(); |
149 | |
150 | for (size_t j = 0 ; j < dim ; j++) |
151 | {pp2.get(j) = std::nextafter(pp2.get(j),pp2.get(j) - static_cast<T>(1.0));} |
152 | |
153 | auto p1 = cld.getCell(bx.getP1()); |
154 | auto p2 = cld.getCell(pp2); |
155 | |
156 | |
157 | auto ite = g.getGPUIterator(p1,p2,256); |
158 | |
159 | if (ite.wthr.x == 0) |
160 | {continue;} |
161 | |
162 | vsi.setGPUInsertBuffer(ite.nblocks(),256); |
163 | |
164 | CUDA_LAUNCH((insert_icell<dim>),ite,vsi.toKernel(),cld,ite.start,p2); |
165 | |
166 | vsi.template flush<>(v_cl.getmgpuContext(),flush_type::FLUSH_ON_DEVICE); |
167 | } |
168 | |
169 | // calculate the number of kernel launch |
170 | |
171 | for (size_t i = 0 ; i < ig_box.size() ; i++) |
172 | { |
173 | Box<dim,T> bx = ig_box.get(i); |
174 | |
175 | auto pp2 = bx.getP2(); |
176 | |
177 | for (size_t j = 0 ; j < dim ; j++) |
178 | {pp2.get(j) = std::nextafter(pp2.get(j),pp2.get(j) - static_cast<T>(1.0));} |
179 | |
180 | auto p1 = cld.getCell(bx.getP1()); |
181 | auto p2 = cld.getCell(pp2); |
182 | |
183 | auto ite = g.getGPUIterator(p1,p2,256); |
184 | |
185 | if (ite.wthr.x == 0) |
186 | {continue;} |
187 | |
188 | vs.setGPUInsertBuffer(ite.nblocks(),256); |
189 | vsi.setGPURemoveBuffer(ite.nblocks(),256); |
190 | |
191 | CUDA_LAUNCH(insert_remove_icell<dim>,ite,vs.toKernel(),vsi.toKernel(),cld,ite.start,p2); |
192 | |
193 | vs.template flush<>(v_cl.getmgpuContext(),flush_type::FLUSH_ON_DEVICE); |
194 | vsi.flush_remove(v_cl.getmgpuContext(),flush_type::FLUSH_ON_DEVICE); |
195 | } |
196 | |
197 | |
198 | vs.swapIndexVector(icells); |
199 | vsi.swapIndexVector(dcells); |
200 | |
201 | #endif |
202 | } |
203 | }; |
204 | |
205 | #endif |
206 | |
207 | template<unsigned int dim, typename T, template<typename> class layout_base , typename Memory> |
208 | class domain_icell_calculator |
209 | { |
210 | typedef unsigned int cnt_type; |
211 | |
212 | typedef int ids_type; |
213 | |
214 | openfpm::vector<aggregate<ids_type>,Memory,layout_base> icells; |
215 | openfpm::vector<aggregate<ids_type>,Memory,layout_base> dcells; |
216 | |
217 | CellDecomposer_sm<dim,T,shift<dim,T>> cd; |
218 | |
219 | public: |
220 | |
221 | /*! \brief Calculate the subdomain that are in the skin part of the domain |
222 | * |
223 | \verbatim |
224 | |
225 | +---+---+---+---+---+---+ |
226 | | 1 | 2 | 3 | 4 | 5 | 6 | |
227 | +---+---+---+---+---+---+ |
228 | |28 | | 7 | |
229 | +---+ +---+ |
230 | |27 | | 8 | |
231 | +---+ +---+ |
232 | |26 | | 9 | |
233 | +---+ DOM1 +---+ |
234 | |25 | |10 | |
235 | +---+ +---+ |
236 | |24 | |11 | |
237 | +---+ +---+---+ |
238 | |23 | |13 |12 | |
239 | +---+-----------+---+---+ |
240 | |22 | |14 | |
241 | +---+ +---+ |
242 | |21 | DOM2 |15 | |
243 | +---+---+---+---+---+ |
244 | |20 |19 |18 | 17|16 | |
245 | +---+---+---+---+---+ <----- Domain end here |
246 | | |
247 | ^ | |
248 | |_____________| |
249 | |
250 | |
251 | \endverbatim |
252 | * |
253 | * It does it on GPU or CPU |
254 | * |
255 | */ |
256 | template<typename VCluster_type> |
257 | void CalculateInternalCells(VCluster_type & v_cl, |
258 | openfpm::vector<Box<dim,T>,Memory,layout_base> & ig_box, |
259 | openfpm::vector<SpaceBox<dim,T>,Memory,layout_base> & domain, |
260 | Box<dim,T> & pbox, |
261 | T r_cut, |
262 | const Ghost<dim,T> & enlarge) |
263 | { |
264 | #ifdef __NVCC__ |
265 | CalculateInternalCells_impl<dim,T,layout_base,Memory,cnt_type,ids_type,std::is_same<Memory,CudaMemory>::value>::CalculateInternalCells(v_cl,ig_box,domain,pbox,r_cut,enlarge,cd,icells,dcells); |
266 | #endif |
267 | } |
268 | |
269 | /*! \brief Return the list of the internal cells |
270 | * |
271 | * \return the list of the internal cells |
272 | * |
273 | */ |
274 | openfpm::vector<aggregate<ids_type>,Memory,layout_base> & getIcells() |
275 | { |
276 | return icells; |
277 | } |
278 | |
279 | /*! \brief Return the list of the internal cells |
280 | * |
281 | * \return the list of the internal cells |
282 | * |
283 | */ |
284 | openfpm::vector<aggregate<ids_type>,Memory,layout_base> & getDcells() |
285 | { |
286 | return dcells; |
287 | } |
288 | |
289 | /*! \brief Given a cell index return the cell box |
290 | * |
291 | * \param ci cell index |
292 | * |
293 | * \return The box reppresenting the cell |
294 | */ |
295 | Box<dim,T> getBoxCell(unsigned int ci) |
296 | { |
297 | Box<dim,T> b; |
298 | |
299 | for (size_t i = 0 ; i < dim ; i++) |
300 | { |
301 | auto key = cd.getGrid().InvLinId(ci); |
302 | Point<dim,T> p1 = cd.getOrig().get(i) - cd.getPadding(i)*cd.getCellBox().getHigh(i) ; |
303 | |
304 | b.setLow(i,p1.get(i) + key.get(i)*cd.getCellBox().getHigh(i)); |
305 | b.setHigh(i,p1.get(i) + ((key.get(i) + 1)*cd.getCellBox().getHigh(i))); |
306 | } |
307 | |
308 | return b; |
309 | } |
310 | |
311 | /*! \brief Get the grid base information about this cell decomposition |
312 | * |
313 | * |
314 | * \return the grid |
315 | * |
316 | */ |
317 | const grid_sm<dim,void> & getGrid() |
318 | { |
319 | return cd.getGrid(); |
320 | } |
321 | }; |
322 | |
323 | |
324 | #endif /* DOMAIN_ICELLS_CART_HPP_ */ |
325 | |