1 | /*
|
2 | * VCluster_semantic_unit_test.hpp
|
3 | *
|
4 | * Created on: Feb 8, 2016
|
5 | * Author: i-bird
|
6 | */
|
7 |
|
8 | #define BOOST_TEST_DYN_LINK
|
9 | #include <boost/test/unit_test.hpp>
|
10 | #include "Grid/grid_util_test.hpp"
|
11 | #include "data_type/aggregate.hpp"
|
12 | #include "VCluster/cuda/VCluster_semantic_unit_tests_funcs.hpp"
|
13 |
|
14 | constexpr int NBX = 1;
|
15 | constexpr int NBX_ASYNC = 2;
|
16 |
|
17 | //! Example structure
|
18 | struct Aexample
|
19 | {
|
20 | //! Example size_t
|
21 | size_t a;
|
22 |
|
23 | //! Example float
|
24 | float b;
|
25 |
|
26 | //! Example double
|
27 | double c;
|
28 | };
|
29 |
|
30 |
|
31 | BOOST_AUTO_TEST_SUITE( VCluster_semantic_test )
|
32 |
|
33 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather)
|
34 | {
|
35 | for (size_t i = 0 ; i < 100 ; i++)
|
36 | {
|
37 | Vcluster<> & vcl = create_vcluster();
|
38 |
|
39 | if (vcl.getProcessUnitID() == 0 && i == 0)
|
40 | std::cout << "Semantic gather test start" << std::endl;
|
41 |
|
42 | if (vcl.getProcessingUnits() >= 32)
|
43 | return;
|
44 |
|
45 | //! [Gather the data on master]
|
46 |
|
47 | openfpm::vector<size_t> v1;
|
48 | v1.resize(vcl.getProcessUnitID());
|
49 |
|
50 | for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++)
|
51 | {v1.get(i) = 5;}
|
52 |
|
53 | openfpm::vector<size_t> v2;
|
54 |
|
55 | vcl.SGather(v1,v2,(i%vcl.getProcessingUnits()));
|
56 |
|
57 | //! [Gather the data on master]
|
58 |
|
59 | if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits()))
|
60 | {
|
61 | size_t n = vcl.getProcessingUnits();
|
62 | BOOST_REQUIRE_EQUAL(v2.size(),n*(n-1)/2);
|
63 |
|
64 | bool is_five = true;
|
65 | for (size_t i = 0 ; i < v2.size() ; i++)
|
66 | is_five &= (v2.get(i) == 5);
|
67 |
|
68 | BOOST_REQUIRE_EQUAL(is_five,true);
|
69 | }
|
70 | }
|
71 | }
|
72 |
|
73 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_2)
|
74 | {
|
75 | for (size_t i = 0 ; i < 100 ; i++)
|
76 | {
|
77 | Vcluster<> & vcl = create_vcluster();
|
78 |
|
79 | if (vcl.getProcessingUnits() >= 32)
|
80 | return;
|
81 |
|
82 | //! [Gather the data on master complex]
|
83 |
|
84 | openfpm::vector<size_t> v1;
|
85 | v1.resize(vcl.getProcessUnitID());
|
86 |
|
87 | for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++)
|
88 | {v1.get(i) = 5;}
|
89 |
|
90 | openfpm::vector<openfpm::vector<size_t>> v2;
|
91 |
|
92 | vcl.SGather(v1,v2,0);
|
93 |
|
94 | //! [Gather the data on master complex]
|
95 |
|
96 | if (vcl.getProcessUnitID() == 0)
|
97 | {
|
98 | size_t n = vcl.getProcessingUnits();
|
99 | BOOST_REQUIRE_EQUAL(v2.size(),n);
|
100 |
|
101 | bool is_five = true;
|
102 | for (size_t i = 0 ; i < v2.size() ; i++)
|
103 | {
|
104 | for (size_t j = 0 ; j < v2.get(i).size() ; j++)
|
105 | is_five &= (v2.get(i).get(j) == 5);
|
106 | }
|
107 | BOOST_REQUIRE_EQUAL(is_five,true);
|
108 |
|
109 | }
|
110 |
|
111 | openfpm::vector<openfpm::vector<size_t>> v3;
|
112 |
|
113 | vcl.SGather(v1,v3,1);
|
114 |
|
115 | if (vcl.getProcessUnitID() == 1)
|
116 | {
|
117 | size_t n = vcl.getProcessingUnits();
|
118 | BOOST_REQUIRE_EQUAL(v3.size(),n-1);
|
119 |
|
120 | bool is_five = true;
|
121 | for (size_t i = 0 ; i < v3.size() ; i++)
|
122 | {
|
123 | for (size_t j = 0 ; j < v3.get(i).size() ; j++)
|
124 | is_five &= (v3.get(i).get(j) == 5);
|
125 | }
|
126 | BOOST_REQUIRE_EQUAL(is_five,true);
|
127 |
|
128 | }
|
129 | }
|
130 | }
|
131 |
|
132 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_3)
|
133 | {
|
134 | for (size_t i = 0 ; i < 100 ; i++)
|
135 | {
|
136 | Vcluster<> & vcl = create_vcluster();
|
137 |
|
138 | if (vcl.getProcessingUnits() >= 32)
|
139 | {return;}
|
140 |
|
141 | openfpm::vector<openfpm::vector<aggregate<float, openfpm::vector<size_t>, Point_test<float>>> > v1;
|
142 |
|
143 | openfpm::vector<aggregate<float, openfpm::vector<size_t>, Point_test<float>>> v1_int;
|
144 | aggregate<float, openfpm::vector<size_t>, Point_test<float>> aggr;
|
145 | openfpm::vector<size_t> v1_int2;
|
146 |
|
147 | v1_int2.add((size_t)7);
|
148 | v1_int2.add((size_t)7);
|
149 |
|
150 | aggr.template get<0>() = 7;
|
151 | aggr.template get<1>() = v1_int2;
|
152 | Point_test<float> p;
|
153 | p.fill();
|
154 | aggr.template get<2>() = p;
|
155 |
|
156 | v1_int.add(aggr);
|
157 | v1_int.add(aggr);
|
158 | v1_int.add(aggr);
|
159 |
|
160 | v1.add(v1_int);
|
161 | v1.add(v1_int);
|
162 | v1.add(v1_int);
|
163 | v1.add(v1_int);
|
164 |
|
165 | openfpm::vector<openfpm::vector<aggregate<float, openfpm::vector<size_t>, Point_test<float>>> > v2;
|
166 |
|
167 | vcl.SGather(v1,v2,0);
|
168 |
|
169 | if (vcl.getProcessUnitID() == 0)
|
170 | {
|
171 | size_t n = vcl.getProcessingUnits();
|
172 |
|
173 | BOOST_REQUIRE_EQUAL(v2.size(),v1.size()*n);
|
174 |
|
175 | bool is_seven = true;
|
176 | for (size_t i = 0 ; i < v2.size() ; i++)
|
177 | {
|
178 | for (size_t j = 0 ; j < v2.get(i).size() ; j++)
|
179 | {
|
180 | is_seven &= (v2.get(i).template get<0>(j) == 7);
|
181 |
|
182 | for (size_t k = 0; k < v2.get(i).template get<1>(j).size(); k++)
|
183 | is_seven &= (v2.get(i).template get<1>(j).get(k) == 7);
|
184 |
|
185 | Point_test<float> p = v2.get(i).template get<2>(j);
|
186 |
|
187 | BOOST_REQUIRE(p.template get<0>() == 1);
|
188 | BOOST_REQUIRE(p.template get<1>() == 2);
|
189 | BOOST_REQUIRE(p.template get<2>() == 3);
|
190 | BOOST_REQUIRE(p.template get<3>() == 4);
|
191 |
|
192 | for (size_t l = 0 ; l < 3 ; l++)
|
193 | p.template get<4>()[l] = 5;
|
194 |
|
195 | for (size_t m = 0 ; m < 3 ; m++)
|
196 | {
|
197 | for (size_t n = 0 ; n < 3 ; n++)
|
198 | {
|
199 | p.template get<5>()[m][n] = 6;
|
200 | }
|
201 | }
|
202 | }
|
203 | }
|
204 | BOOST_REQUIRE_EQUAL(is_seven,true);
|
205 | }
|
206 | }
|
207 | }
|
208 |
|
209 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_4)
|
210 | {
|
211 | for (size_t i = 0 ; i < 100 ; i++)
|
212 | {
|
213 | Vcluster<> & vcl = create_vcluster();
|
214 |
|
215 | if (vcl.getProcessingUnits() >= 32)
|
216 | {return;}
|
217 |
|
218 | size_t sz[] = {16,16};
|
219 |
|
220 | grid_cpu<2,Point_test<float>> g1(sz);
|
221 | g1.setMemory();
|
222 | fill_grid<2>(g1);
|
223 |
|
224 | openfpm::vector<grid_cpu<2,Point_test<float>>> v2;
|
225 |
|
226 | vcl.SGather(g1,v2,0);
|
227 |
|
228 | typedef Point_test<float> p;
|
229 |
|
230 | if (vcl.getProcessUnitID() == 0)
|
231 | {
|
232 | size_t n = vcl.getProcessingUnits();
|
233 | BOOST_REQUIRE_EQUAL(v2.size(),n);
|
234 |
|
235 | bool match = true;
|
236 | for (size_t i = 0 ; i < v2.size() ; i++)
|
237 | {
|
238 | auto it = v2.get(i).getIterator();
|
239 |
|
240 | while (it.isNext())
|
241 | {
|
242 | grid_key_dx<2> key = it.get();
|
243 |
|
244 | match &= (v2.get(i).template get<p::x>(key) == g1.template get<p::x>(key));
|
245 | match &= (v2.get(i).template get<p::y>(key) == g1.template get<p::y>(key));
|
246 | match &= (v2.get(i).template get<p::z>(key) == g1.template get<p::z>(key));
|
247 | match &= (v2.get(i).template get<p::s>(key) == g1.template get<p::s>(key));
|
248 |
|
249 | match &= (v2.get(i).template get<p::v>(key)[0] == g1.template get<p::v>(key)[0]);
|
250 | match &= (v2.get(i).template get<p::v>(key)[1] == g1.template get<p::v>(key)[1]);
|
251 | match &= (v2.get(i).template get<p::v>(key)[2] == g1.template get<p::v>(key)[2]);
|
252 |
|
253 | match &= (v2.get(i).template get<p::t>(key)[0][0] == g1.template get<p::t>(key)[0][0]);
|
254 | match &= (v2.get(i).template get<p::t>(key)[0][1] == g1.template get<p::t>(key)[0][1]);
|
255 | match &= (v2.get(i).template get<p::t>(key)[0][2] == g1.template get<p::t>(key)[0][2]);
|
256 | match &= (v2.get(i).template get<p::t>(key)[1][0] == g1.template get<p::t>(key)[1][0]);
|
257 | match &= (v2.get(i).template get<p::t>(key)[1][1] == g1.template get<p::t>(key)[1][1]);
|
258 | match &= (v2.get(i).template get<p::t>(key)[1][2] == g1.template get<p::t>(key)[1][2]);
|
259 | match &= (v2.get(i).template get<p::t>(key)[2][0] == g1.template get<p::t>(key)[2][0]);
|
260 | match &= (v2.get(i).template get<p::t>(key)[2][1] == g1.template get<p::t>(key)[2][1]);
|
261 | match &= (v2.get(i).template get<p::t>(key)[2][2] == g1.template get<p::t>(key)[2][2]);
|
262 |
|
263 | ++it;
|
264 | }
|
265 |
|
266 | }
|
267 | BOOST_REQUIRE_EQUAL(match,true);
|
268 | }
|
269 | }
|
270 | }
|
271 |
|
272 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_5)
|
273 | {
|
274 | for (size_t i = 0 ; i < 100 ; i++)
|
275 | {
|
276 | Vcluster<> & vcl = create_vcluster();
|
277 |
|
278 | if (vcl.size() == 1)
|
279 | {return;}
|
280 |
|
281 | if (vcl.getProcessingUnits() >= 32)
|
282 | {return;}
|
283 |
|
284 | size_t sz[] = {16,16};
|
285 | grid_cpu<2,Point_test<float>> g1(sz);
|
286 | g1.setMemory();
|
287 | fill_grid<2>(g1);
|
288 | openfpm::vector<grid_cpu<2,Point_test<float>>> v1;
|
289 |
|
290 | v1.add(g1);
|
291 | v1.add(g1);
|
292 | v1.add(g1);
|
293 |
|
294 | openfpm::vector<grid_cpu<2,Point_test<float>>> v2;
|
295 |
|
296 | vcl.SGather(v1,v2,1);
|
297 |
|
298 | typedef Point_test<float> p;
|
299 |
|
300 | if (vcl.getProcessUnitID() == 1)
|
301 | {
|
302 | size_t n = vcl.getProcessingUnits();
|
303 | BOOST_REQUIRE_EQUAL(v2.size(),v1.size()*n);
|
304 |
|
305 | bool match = true;
|
306 | for (size_t i = 0 ; i < v2.size() ; i++)
|
307 | {
|
308 | auto it = v2.get(i).getIterator();
|
309 |
|
310 | while (it.isNext())
|
311 | {
|
312 | grid_key_dx<2> key = it.get();
|
313 |
|
314 | match &= (v2.get(i).template get<p::x>(key) == g1.template get<p::x>(key));
|
315 | match &= (v2.get(i).template get<p::y>(key) == g1.template get<p::y>(key));
|
316 | match &= (v2.get(i).template get<p::z>(key) == g1.template get<p::z>(key));
|
317 | match &= (v2.get(i).template get<p::s>(key) == g1.template get<p::s>(key));
|
318 |
|
319 | match &= (v2.get(i).template get<p::v>(key)[0] == g1.template get<p::v>(key)[0]);
|
320 | match &= (v2.get(i).template get<p::v>(key)[1] == g1.template get<p::v>(key)[1]);
|
321 | match &= (v2.get(i).template get<p::v>(key)[2] == g1.template get<p::v>(key)[2]);
|
322 |
|
323 | match &= (v2.get(i).template get<p::t>(key)[0][0] == g1.template get<p::t>(key)[0][0]);
|
324 | match &= (v2.get(i).template get<p::t>(key)[0][1] == g1.template get<p::t>(key)[0][1]);
|
325 | match &= (v2.get(i).template get<p::t>(key)[0][2] == g1.template get<p::t>(key)[0][2]);
|
326 | match &= (v2.get(i).template get<p::t>(key)[1][0] == g1.template get<p::t>(key)[1][0]);
|
327 | match &= (v2.get(i).template get<p::t>(key)[1][1] == g1.template get<p::t>(key)[1][1]);
|
328 | match &= (v2.get(i).template get<p::t>(key)[1][2] == g1.template get<p::t>(key)[1][2]);
|
329 | match &= (v2.get(i).template get<p::t>(key)[2][0] == g1.template get<p::t>(key)[2][0]);
|
330 | match &= (v2.get(i).template get<p::t>(key)[2][1] == g1.template get<p::t>(key)[2][1]);
|
331 | match &= (v2.get(i).template get<p::t>(key)[2][2] == g1.template get<p::t>(key)[2][2]);
|
332 |
|
333 | ++it;
|
334 | }
|
335 |
|
336 | }
|
337 | BOOST_REQUIRE_EQUAL(match,true);
|
338 | }
|
339 | }
|
340 | }
|
341 |
|
342 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_6)
|
343 | {
|
344 | for (size_t i = 0 ; i < 100 ; i++)
|
345 | {
|
346 | Vcluster<> & vcl = create_vcluster();
|
347 |
|
348 | if (vcl.getProcessingUnits() >= 32)
|
349 | {return;}
|
350 |
|
351 | openfpm::vector<openfpm::vector<openfpm::vector<size_t>>> v1;
|
352 | openfpm::vector<openfpm::vector<size_t>> v1_int;
|
353 | openfpm::vector<size_t> v1_int2;
|
354 |
|
355 | v1_int2.add((size_t)7);
|
356 | v1_int2.add((size_t)7);
|
357 |
|
358 | v1_int.add(v1_int2);
|
359 | v1_int.add(v1_int2);
|
360 | v1_int.add(v1_int2);
|
361 |
|
362 | v1.add(v1_int);
|
363 | v1.add(v1_int);
|
364 | v1.add(v1_int);
|
365 | v1.add(v1_int);
|
366 |
|
367 | openfpm::vector<openfpm::vector<openfpm::vector<size_t>>> v2;
|
368 |
|
369 | vcl.SGather(v1,v2,0);
|
370 |
|
371 | if (vcl.getProcessUnitID() == 0)
|
372 | {
|
373 | size_t n = vcl.getProcessingUnits();
|
374 |
|
375 | BOOST_REQUIRE_EQUAL(v2.size(),v1.size()*n);
|
376 |
|
377 | bool is_seven = true;
|
378 | for (size_t i = 0 ; i < v2.size() ; i++)
|
379 | {
|
380 | for (size_t j = 0 ; j < v2.get(i).size() ; j++)
|
381 | {
|
382 | for (size_t k = 0 ; k < v2.get(i).get(j).size() ; k++)
|
383 | is_seven &= (v2.get(i).get(j).get(k) == 7);
|
384 | }
|
385 | }
|
386 | BOOST_REQUIRE_EQUAL(is_seven,true);
|
387 | }
|
388 | }
|
389 | }
|
390 |
|
391 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_7)
|
392 | {
|
393 | for (size_t i = 0 ; i < 100 ; i++)
|
394 | {
|
395 | Vcluster<> & vcl = create_vcluster();
|
396 |
|
397 | if (vcl.getProcessingUnits() >= 32)
|
398 | {return;}
|
399 |
|
400 | openfpm::vector<Point_test<float>> v1;
|
401 |
|
402 | Point_test<float> p1;
|
403 | p1.fill();
|
404 |
|
405 | v1.resize(vcl.getProcessUnitID());
|
406 |
|
407 | for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++)
|
408 | {v1.get(i) = p1;}
|
409 |
|
410 | openfpm::vector<openfpm::vector<Point_test<float>>> v2;
|
411 |
|
412 | vcl.SGather(v1,v2,0);
|
413 |
|
414 | typedef Point_test<float> p;
|
415 |
|
416 | if (vcl.getProcessUnitID() == 0)
|
417 | {
|
418 | size_t n = vcl.getProcessingUnits();
|
419 | BOOST_REQUIRE_EQUAL(v2.size(),n);
|
420 |
|
421 | bool match = true;
|
422 |
|
423 | for (size_t i = 0 ; i < v2.size() ; i++)
|
424 | {
|
425 | for (size_t j = 0 ; j < v2.get(i).size() ; j++)
|
426 | {
|
427 | Point_test<float> p2 = v2.get(i).get(j);
|
428 | //BOOST_REQUIRE(p2 == p1);
|
429 |
|
430 | match &= (p2.template get<p::x>() == p1.template get<p::x>());
|
431 | match &= (p2.template get<p::y>() == p1.template get<p::y>());
|
432 | match &= (p2.template get<p::z>() == p1.template get<p::z>());
|
433 | match &= (p2.template get<p::s>() == p1.template get<p::s>());
|
434 |
|
435 | match &= (p2.template get<p::v>()[0] == p1.template get<p::v>()[0]);
|
436 | match &= (p2.template get<p::v>()[1] == p1.template get<p::v>()[1]);
|
437 | match &= (p2.template get<p::v>()[2] == p1.template get<p::v>()[2]);
|
438 |
|
439 | match &= (p2.template get<p::t>()[0][0] == p1.template get<p::t>()[0][0]);
|
440 | match &= (p2.template get<p::t>()[0][1] == p1.template get<p::t>()[0][1]);
|
441 | match &= (p2.template get<p::t>()[0][2] == p1.template get<p::t>()[0][2]);
|
442 | match &= (p2.template get<p::t>()[1][0] == p1.template get<p::t>()[1][0]);
|
443 | match &= (p2.template get<p::t>()[1][1] == p1.template get<p::t>()[1][1]);
|
444 | match &= (p2.template get<p::t>()[1][2] == p1.template get<p::t>()[1][2]);
|
445 | match &= (p2.template get<p::t>()[2][0] == p1.template get<p::t>()[2][0]);
|
446 | match &= (p2.template get<p::t>()[2][1] == p1.template get<p::t>()[2][1]);
|
447 | match &= (p2.template get<p::t>()[2][2] == p1.template get<p::t>()[2][2]);
|
448 | }
|
449 | }
|
450 | BOOST_REQUIRE_EQUAL(match,true);
|
451 | }
|
452 | }
|
453 | }
|
454 |
|
455 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_gather_8)
|
456 | {
|
457 | for (size_t i = 0 ; i < 100 ; i++)
|
458 | {
|
459 | Vcluster<> & vcl = create_vcluster();
|
460 |
|
461 | if (vcl.getProcessingUnits() >= 32)
|
462 | {return;}
|
463 |
|
464 | openfpm::vector<Box<3,size_t>> v1;
|
465 |
|
466 | Box<3,size_t> bx;
|
467 | bx.setLow(0, 1);
|
468 | bx.setLow(1, 2);
|
469 | bx.setLow(2, 3);
|
470 | bx.setHigh(0, 4);
|
471 | bx.setHigh(1, 5);
|
472 | bx.setHigh(2, 6);
|
473 |
|
474 |
|
475 | v1.resize(vcl.getProcessUnitID());
|
476 |
|
477 | for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++)
|
478 | v1.get(i) = bx;
|
479 |
|
480 | openfpm::vector<openfpm::vector<Box<3,size_t>>> v2;
|
481 |
|
482 | vcl.SGather(v1,v2,0);
|
483 |
|
484 | if (vcl.getProcessUnitID() == 0)
|
485 | {
|
486 | size_t n = vcl.getProcessingUnits();
|
487 | BOOST_REQUIRE_EQUAL(v2.size(),n);
|
488 |
|
489 | for (size_t i = 0 ; i < v2.size() ; i++)
|
490 | {
|
491 | for (size_t j = 0 ; j < v2.get(i).size() ; j++)
|
492 | {
|
493 | Box<3,size_t> b2 = v2.get(i).get(j);
|
494 | BOOST_REQUIRE(bx == b2);
|
495 | }
|
496 | }
|
497 | }
|
498 | }
|
499 | }
|
500 |
|
501 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_gather)
|
502 | {
|
503 | for (size_t i = 0 ; i < 100 ; i++)
|
504 | {
|
505 | Vcluster<> & vcl = create_vcluster();
|
506 |
|
507 | if (vcl.getProcessingUnits() >= 32)
|
508 | return;
|
509 |
|
510 | openfpm::vector<Aexample> v1;
|
511 | v1.resize(vcl.getProcessUnitID());
|
512 |
|
513 | for(size_t i = 0 ; i < vcl.getProcessUnitID() ; i++)
|
514 | {
|
515 | v1.get(i).a = 5;
|
516 | v1.get(i).b = 10.0;
|
517 | v1.get(i).c = 11.0;
|
518 | }
|
519 |
|
520 | openfpm::vector<Aexample> v2;
|
521 |
|
522 | vcl.SGather(v1,v2,(i%vcl.getProcessingUnits()));
|
523 |
|
524 | if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits()))
|
525 | {
|
526 | size_t n = vcl.getProcessingUnits();
|
527 | BOOST_REQUIRE_EQUAL(v2.size(),n*(n-1)/2);
|
528 |
|
529 | bool is_correct = true;
|
530 | for (size_t i = 0 ; i < v2.size() ; i++)
|
531 | {
|
532 | is_correct &= (v2.get(i).a == 5);
|
533 | is_correct &= (v2.get(i).b == 10.0);
|
534 | is_correct &= (v2.get(i).c == 11.0);
|
535 | }
|
536 |
|
537 | BOOST_REQUIRE_EQUAL(is_correct,true);
|
538 | }
|
539 | if (vcl.getProcessUnitID() == 0 && i == 99)
|
540 | std::cout << "Semantic gather test stop" << std::endl;
|
541 | }
|
542 | }
|
543 |
|
544 |
|
545 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_layout_inte_gather)
|
546 | {
|
547 | test_different_layouts<HeapMemory,memory_traits_inte>();
|
548 | test_different_layouts<HeapMemory,memory_traits_lin>();
|
549 | }
|
550 |
|
551 | #define SSCATTER_MAX 7
|
552 |
|
553 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_scatter)
|
554 | {
|
555 | for (size_t i = 0 ; i < 100 ; i++)
|
556 | {
|
557 | Vcluster<> & vcl = create_vcluster();
|
558 |
|
559 | if (vcl.getProcessingUnits() >= 32)
|
560 | {return;}
|
561 |
|
562 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
563 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
564 | nr = ((nr-1) * nr) / 2;
|
565 |
|
566 | size_t n_elements = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
567 |
|
568 | openfpm::vector<size_t> v1;
|
569 | v1.resize(n_elements);
|
570 |
|
571 | for(size_t i = 0 ; i < n_elements ; i++)
|
572 | {v1.get(i) = 5;}
|
573 |
|
574 | //! [Scatter the data from master]
|
575 |
|
576 | openfpm::vector<size_t> v2;
|
577 |
|
578 | openfpm::vector<size_t> prc;
|
579 | openfpm::vector<size_t> sz;
|
580 |
|
581 | // Scatter pattern
|
582 | for (size_t i = 0 ; i < vcl.getProcessingUnits() ; i++)
|
583 | {
|
584 | sz.add(i % SSCATTER_MAX);
|
585 | prc.add(i);
|
586 | }
|
587 |
|
588 | vcl.SScatter(v1,v2,prc,sz,(i%vcl.getProcessingUnits()));
|
589 |
|
590 | //! [Scatter the data from master]
|
591 |
|
592 | BOOST_REQUIRE_EQUAL(v2.size(),vcl.getProcessUnitID() % SSCATTER_MAX);
|
593 |
|
594 | bool is_five = true;
|
595 | for (size_t i = 0 ; i < v2.size() ; i++)
|
596 | is_five &= (v2.get(i) == 5);
|
597 |
|
598 | BOOST_REQUIRE_EQUAL(is_five,true);
|
599 | }
|
600 | }
|
601 |
|
602 |
|
603 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_scatter)
|
604 | {
|
605 | for (size_t i = 0 ; i < 100 ; i++)
|
606 | {
|
607 | Vcluster<> & vcl = create_vcluster();
|
608 |
|
609 | if (vcl.getProcessingUnits() >= 32)
|
610 | {return;}
|
611 |
|
612 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
613 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
614 | nr = ((nr-1) * nr) / 2;
|
615 |
|
616 | size_t n_elements = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
617 |
|
618 | openfpm::vector<size_t> v1;
|
619 | v1.resize(n_elements);
|
620 |
|
621 | for(size_t i = 0 ; i < n_elements ; i++)
|
622 | v1.get(i) = 5;
|
623 |
|
624 | openfpm::vector<size_t> v2;
|
625 |
|
626 | openfpm::vector<size_t> prc;
|
627 | openfpm::vector<size_t> sz;
|
628 |
|
629 | // Scatter pattern
|
630 | for (size_t i = 0 ; i < vcl.getProcessingUnits() ; i++)
|
631 | {
|
632 | sz.add(i % SSCATTER_MAX);
|
633 | prc.add(i);
|
634 | }
|
635 |
|
636 | vcl.SScatter(v1,v2,prc,sz,(i%vcl.getProcessingUnits()));
|
637 |
|
638 | if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits()))
|
639 | {
|
640 | BOOST_REQUIRE_EQUAL(v2.size(),vcl.getProcessUnitID() % SSCATTER_MAX);
|
641 |
|
642 | bool is_five = true;
|
643 | for (size_t i = 0 ; i < v2.size() ; i++)
|
644 | is_five &= (v2.get(i) == 5);
|
645 |
|
646 | BOOST_REQUIRE_EQUAL(is_five,true);
|
647 | }
|
648 | }
|
649 | }
|
650 |
|
651 | template<unsigned int impl, typename VCluster_type, typename vector1, typename vector2, typename vector3>
|
652 | void scomm_unknown(VCluster_type & vcl, vector1 & v1, vector2 & v2, vector3 & prc_send, vector3 & prc_recv, vector3 & sz_recv)
|
653 | {
|
654 | if (impl == NBX)
|
655 | {
|
656 | // Send and receive from the other processor v2 container the received data
|
657 | // Because in this case v2 is an openfpm::vector<size_t>, all the received
|
658 | // vector are concatenated one over the other. For example if the processor receive 3 openfpm::vector<size_t>
|
659 | // each having 3,4,5 elements. v2 will be a vector of 12 elements
|
660 | vcl.SSendRecv(v1,v2,prc_send,prc_recv,sz_recv);
|
661 | }
|
662 | else
|
663 | {
|
664 | vcl.SSendRecvAsync(v1,v2,prc_send,prc_recv,sz_recv);
|
665 |
|
666 | vcl.progressCommunication();
|
667 | usleep(1000);
|
668 | vcl.progressCommunication();
|
669 | usleep(10000);
|
670 | vcl.progressCommunication();
|
671 | usleep(1000);
|
672 |
|
673 | vcl.SSendRecvWait(v1,v2,prc_send,prc_recv,sz_recv);
|
674 | }
|
675 | }
|
676 |
|
677 |
|
678 | template<unsigned int impl>
|
679 | void Vcluster_semantic_sendrecv_all_unknown_impl()
|
680 | {
|
681 | openfpm::vector<size_t> prc_recv2;
|
682 | openfpm::vector<size_t> prc_recv3;
|
683 |
|
684 | openfpm::vector<size_t> sz_recv2;
|
685 | openfpm::vector<size_t> sz_recv3;
|
686 |
|
687 | for (size_t i = 0 ; i < 100 ; i++)
|
688 | {
|
689 | Vcluster<> & vcl = create_vcluster();
|
690 |
|
691 | if (vcl.getProcessUnitID() == 0 && i == 0)
|
692 | std::cout << "Semantic sendrecv test start" << std::endl;
|
693 |
|
694 |
|
695 | if (vcl.getProcessingUnits() >= 32)
|
696 | {return;}
|
697 |
|
698 | prc_recv2.clear();
|
699 | prc_recv3.clear();
|
700 | openfpm::vector<size_t> prc_send;
|
701 | sz_recv2.clear();
|
702 | sz_recv3.clear();
|
703 |
|
704 | //! [dsde with complex objects1]
|
705 |
|
706 | // A vector of vector we want to send each internal vector to one specified processor
|
707 | openfpm::vector<openfpm::vector<size_t>> v1;
|
708 |
|
709 | // We use this empty vector to receive data
|
710 | openfpm::vector<size_t> v2;
|
711 |
|
712 | // We use this empty vector to receive data
|
713 | openfpm::vector<openfpm::vector<size_t>> v3;
|
714 |
|
715 | // in this case each processor will send a message of different size to all the other processor
|
716 | // but can also be a subset of processors
|
717 | v1.resize(vcl.getProcessingUnits());
|
718 |
|
719 | // We fill the send buffer with some sense-less data
|
720 | for(size_t i = 0 ; i < v1.size() ; i++)
|
721 | {
|
722 | // each vector is filled with a different message size
|
723 | for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
|
724 | v1.get(i).add(j);
|
725 |
|
726 | // generate the sending list (in this case the sendinf list is all the other processor)
|
727 | // but in general can be some of them and totally random
|
728 | prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
729 | }
|
730 |
|
731 | // Send and receive from the other processor v2 container the received data
|
732 | // Because in this case v2 is an openfpm::vector<size_t>, all the received
|
733 | // vector are concatenated one over the other. For example if the processor receive 3 openfpm::vector<size_t>
|
734 | // each having 3,4,5 elements. v2 will be a vector of 12 elements
|
735 | //vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2);
|
736 | scomm_unknown<impl>(vcl,v1,v2,prc_send,prc_recv2,sz_recv2);
|
737 |
|
738 | // Send and receive from the other processors v2 contain the received data
|
739 | // Because in this case v2 is an openfpm::vector<openfpm::vector<size_t>>, all the vector from
|
740 | // each processor will be collected. For example if the processor receive 3 openfpm::vector<size_t>
|
741 | // each having 3,4,5 elements. v2 will be a vector of vector of 3 elements (openfpm::vector) and
|
742 | // each element will be respectivly 3,4,5 elements
|
743 |
|
744 | scomm_unknown<impl>(vcl,v1,v3,prc_send,prc_recv3,sz_recv3);
|
745 |
|
746 | //! [dsde with complex objects1]
|
747 |
|
748 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
749 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
750 | nr = ((nr-1) * nr) / 2;
|
751 |
|
752 | size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
753 |
|
754 | BOOST_REQUIRE_EQUAL(v2.size(),n_ele);
|
755 | size_t nc_check = (vcl.getProcessingUnits()-1) / SSCATTER_MAX;
|
756 | BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc_check);
|
757 |
|
758 | bool match = true;
|
759 | size_t s = 0;
|
760 |
|
761 | for (size_t i = 0 ; i < sz_recv2.size() ; i++)
|
762 | {
|
763 | for (size_t j = 0 ; j < sz_recv2.get(i); j++)
|
764 | {
|
765 | match &= v2.get(s+j) == j;
|
766 | }
|
767 | s += sz_recv2.get(i);
|
768 | }
|
769 |
|
770 | BOOST_REQUIRE_EQUAL(match,true);
|
771 |
|
772 | for (size_t i = 0 ; i < v3.size() ; i++)
|
773 | {
|
774 | for (size_t j = 0 ; j < v3.get(i).size() ; j++)
|
775 | {
|
776 | match &= v3.get(i).get(j) == j;
|
777 | }
|
778 | }
|
779 |
|
780 | BOOST_REQUIRE_EQUAL(match,true);
|
781 | }
|
782 | }
|
783 |
|
784 | void Vcluster_semantic_sendrecv_all_unknown_multiple_impl()
|
785 | {
|
786 | openfpm::vector<size_t> prc_recv2[NQUEUE];
|
787 | openfpm::vector<size_t> prc_recv3[NQUEUE];
|
788 |
|
789 | openfpm::vector<size_t> sz_recv2[NQUEUE];
|
790 | openfpm::vector<size_t> sz_recv3[NQUEUE];
|
791 |
|
792 | openfpm::vector<size_t> prc_send[NQUEUE];
|
793 | openfpm::vector<openfpm::vector<size_t>> v1[NQUEUE];
|
794 | openfpm::vector<size_t> v2[NQUEUE];
|
795 | openfpm::vector<openfpm::vector<size_t>> v3[NQUEUE];
|
796 |
|
797 | for (size_t i = 0 ; i < 100 ; i++)
|
798 | {
|
799 | Vcluster<> & vcl = create_vcluster();
|
800 |
|
801 | for (size_t k = 0 ; k < NQUEUE / 2 ; k++)
|
802 | {
|
803 | if (vcl.getProcessUnitID() == 0 && i == 0)
|
804 | {std::cout << "Semantic sendrecv test start" << std::endl;}
|
805 |
|
806 |
|
807 | if (vcl.getProcessingUnits() >= 32)
|
808 | {return;}
|
809 |
|
810 | prc_recv2[k].clear();
|
811 | prc_recv3[k].clear();
|
812 | prc_send[k].clear();
|
813 | sz_recv2[k].clear();
|
814 | sz_recv3[k].clear();
|
815 |
|
816 | //! [dsde with complex objects1]
|
817 |
|
818 | // A vector of vector we want to send each internal vector to one specified processor
|
819 | v1[k].clear();
|
820 |
|
821 | // We use this empty vector to receive data
|
822 | v2[k].clear();
|
823 |
|
824 | // We use this empty vector to receive data
|
825 | v3[k].clear();
|
826 |
|
827 | // in this case each processor will send a message of different size to all the other processor
|
828 | // but can also be a subset of processors
|
829 | v1[k].resize(vcl.getProcessingUnits());
|
830 |
|
831 | // We fill the send buffer with some sense-less data
|
832 | for(size_t i = 0 ; i < v1[k].size() ; i++)
|
833 | {
|
834 | // each vector is filled with a different message size
|
835 | for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
|
836 | {v1[k].get(i).add(j);}
|
837 |
|
838 | // generate the sending list (in this case the sendinf list is all the other processor)
|
839 | // but in general can be some of them and totally random
|
840 | prc_send[k].add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
841 | }
|
842 |
|
843 | // Send and receive from the other processor v2 container the received data
|
844 | // Because in this case v2 is an openfpm::vector<size_t>, all the received
|
845 | // vector are concatenated one over the other. For example if the processor receive 3 openfpm::vector<size_t>
|
846 | // each having 3,4,5 elements. v2 will be a vector of 12 elements
|
847 | //vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2);
|
848 |
|
849 | vcl.SSendRecvAsync(v1[k],v2[k],prc_send[k],prc_recv2[k],sz_recv2[k]);
|
850 |
|
851 | // Send and receive from the other processors v2 contain the received data
|
852 | // Because in this case v2 is an openfpm::vector<openfpm::vector<size_t>>, all the vector from
|
853 | // each processor will be collected. For example if the processor receive 3 openfpm::vector<size_t>
|
854 | // each having 3,4,5 elements. v2 will be a vector of vector of 3 elements (openfpm::vector) and
|
855 | // each element will be respectivly 3,4,5 elements
|
856 |
|
857 | vcl.SSendRecvAsync(v1[k],v3[k],prc_send[k],prc_recv3[k],sz_recv3[k]);
|
858 | }
|
859 |
|
860 | vcl.progressCommunication();
|
861 | usleep(1000);
|
862 | vcl.progressCommunication();
|
863 | usleep(10000);
|
864 | vcl.progressCommunication();
|
865 | usleep(1000);
|
866 |
|
867 | for (size_t k = 0 ; k < NQUEUE / 2 ; k++)
|
868 | {
|
869 | vcl.SSendRecvWait(v1[k],v2[k],prc_send[k],prc_recv2[k],sz_recv2[k]);
|
870 | vcl.SSendRecvWait(v1[k],v3[k],prc_send[k],prc_recv3[k],sz_recv3[k]);
|
871 | }
|
872 |
|
873 | //! [dsde with complex objects1]
|
874 |
|
875 | for (size_t k = 0 ; k < NQUEUE / 2 ; k++)
|
876 | {
|
877 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
878 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
879 | nr = ((nr-1) * nr) / 2;
|
880 |
|
881 | size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
882 |
|
883 | BOOST_REQUIRE_EQUAL(v2[k].size(),n_ele);
|
884 | size_t nc_check = (vcl.getProcessingUnits()-1) / SSCATTER_MAX;
|
885 | BOOST_REQUIRE_EQUAL(v3[k].size(),vcl.getProcessingUnits()-1-nc_check);
|
886 |
|
887 | bool match = true;
|
888 | size_t s = 0;
|
889 |
|
890 | for (size_t i = 0 ; i < sz_recv2[k].size() ; i++)
|
891 | {
|
892 | for (size_t j = 0 ; j < sz_recv2[k].get(i); j++)
|
893 | {
|
894 | match &= v2[k].get(s+j) == j;
|
895 | }
|
896 | s += sz_recv2[k].get(i);
|
897 | }
|
898 |
|
899 | BOOST_REQUIRE_EQUAL(match,true);
|
900 |
|
901 | for (size_t i = 0 ; i < v3[k].size() ; i++)
|
902 | {
|
903 | for (size_t j = 0 ; j < v3[k].get(i).size() ; j++)
|
904 | {
|
905 | match &= v3[k].get(i).get(j) == j;
|
906 | }
|
907 | }
|
908 |
|
909 | BOOST_REQUIRE_EQUAL(match,true);
|
910 | }
|
911 | }
|
912 | }
|
913 |
|
914 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_all_unknown)
|
915 | {
|
916 | Vcluster_semantic_sendrecv_all_unknown_impl<NBX>();
|
917 | }
|
918 |
|
919 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_all_unknown_async)
|
920 | {
|
921 | Vcluster_semantic_sendrecv_all_unknown_impl<NBX_ASYNC>();
|
922 | }
|
923 |
|
924 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_all_multiple_unknown)
|
925 | {
|
926 | Vcluster_semantic_sendrecv_all_unknown_multiple_impl();
|
927 | }
|
928 |
|
929 | template<unsigned int impl, typename VCluster_type, typename vector1, typename vector2, typename vector3>
|
930 | void scomm_known(VCluster_type & vcl, vector1 & v1, vector2 & v2, vector3 & prc_send, vector3 & prc_recv, vector3 & sz_recv)
|
931 | {
|
932 | if (impl == NBX)
|
933 | {
|
934 | // Send and receive from the other processor v2 container the received data
|
935 | // Because in this case v2 is an openfpm::vector<size_t>, all the received
|
936 | // vector are concatenated one over the other. For example if the processor receive 3 openfpm::vector<size_t>
|
937 | // each having 3,4,5 elements. v2 will be a vector of 12 elements
|
938 | vcl.SSendRecv(v1,v2,prc_send,prc_recv,sz_recv,RECEIVE_KNOWN | KNOWN_ELEMENT_OR_BYTE);
|
939 | }
|
940 | else
|
941 | {
|
942 | vcl.SSendRecvAsync(v1,v2,prc_send,prc_recv,sz_recv,RECEIVE_KNOWN | KNOWN_ELEMENT_OR_BYTE);
|
943 |
|
944 | vcl.progressCommunication();
|
945 | usleep(1000);
|
946 | vcl.progressCommunication();
|
947 | usleep(1000);
|
948 | vcl.progressCommunication();
|
949 | usleep(1000);
|
950 |
|
951 | vcl.SSendRecvWait(v1,v2,prc_send,prc_recv,sz_recv,RECEIVE_KNOWN | KNOWN_ELEMENT_OR_BYTE);
|
952 | }
|
953 | }
|
954 |
|
955 |
|
956 | template<unsigned int impl>
|
957 | void Vcluster_semantic_sendrecv_receive_size_known_impl()
|
958 | {
|
959 | openfpm::vector<size_t> prc_recv2;
|
960 | openfpm::vector<size_t> prc_recv3;
|
961 |
|
962 | openfpm::vector<size_t> sz_recv2;
|
963 | openfpm::vector<size_t> sz_recv3;
|
964 |
|
965 | for (size_t i = 0 ; i < 100 ; i++)
|
966 | {
|
967 | Vcluster<> & vcl = create_vcluster();
|
968 |
|
969 | if (vcl.getProcessUnitID() == 0 && i == 0)
|
970 | {std::cout << "Semantic sendrecv test start" << std::endl;}
|
971 |
|
972 |
|
973 | if (vcl.getProcessingUnits() >= 32)
|
974 | {return;}
|
975 |
|
976 | openfpm::vector<size_t> prc_send;
|
977 |
|
978 | openfpm::vector<openfpm::vector<size_t>> v1;
|
979 | openfpm::vector<size_t> v2;
|
980 | openfpm::vector<openfpm::vector<size_t>> v3;
|
981 |
|
982 | v1.resize(vcl.getProcessingUnits());
|
983 |
|
984 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
985 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
986 | nr = ((nr-1) * nr) / 2;
|
987 |
|
988 | size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
989 |
|
990 | for(size_t i = 0 ; i < v1.size() ; i++)
|
991 | {
|
992 | for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
|
993 | v1.get(i).add(j);
|
994 |
|
995 | prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
996 | }
|
997 |
|
998 | // We receive to fill prc_recv2 and sz_recv2
|
999 | scomm_unknown<impl>(vcl,v1,v2,prc_send,prc_recv2,sz_recv2);
|
1000 |
|
1001 | // carefull because SSendRecv does not fill prc_recv2 with processor that has a sending size of 0
|
1002 | for(size_t i = 0 ; i < v1.size() ; i++)
|
1003 | {
|
1004 | if( i % SSCATTER_MAX == 0)
|
1005 | {
|
1006 | prc_recv2.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
1007 | sz_recv2.add(0);
|
1008 | }
|
1009 | }
|
1010 |
|
1011 | // We reset v2 and we receive again saying that the processors are known and we know the elements
|
1012 | v2.clear();
|
1013 | //vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2,RECEIVE_KNOWN | KNOWN_ELEMENT_OR_BYTE);
|
1014 | scomm_known<impl>(vcl,v1,v2,prc_send,prc_recv2,sz_recv2);
|
1015 | scomm_unknown<impl>(vcl,v1,v3,prc_send,prc_recv3,sz_recv3);
|
1016 |
|
1017 | BOOST_REQUIRE_EQUAL(v2.size(),n_ele);
|
1018 | size_t nc_check = (vcl.getProcessingUnits()-1) / SSCATTER_MAX;
|
1019 | BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc_check);
|
1020 |
|
1021 | bool match = true;
|
1022 | size_t s = 0;
|
1023 |
|
1024 | for (size_t i = 0 ; i < sz_recv2.size() ; i++)
|
1025 | {
|
1026 | for (size_t j = 0 ; j < sz_recv2.get(i); j++)
|
1027 | {
|
1028 | match &= v2.get(s+j) == j;
|
1029 | }
|
1030 | s += sz_recv2.get(i);
|
1031 | }
|
1032 |
|
1033 | BOOST_REQUIRE_EQUAL(match,true);
|
1034 |
|
1035 | for (size_t i = 0 ; i < v3.size() ; i++)
|
1036 | {
|
1037 | for (size_t j = 0 ; j < v3.get(i).size() ; j++)
|
1038 | {
|
1039 | match &= v3.get(i).get(j) == j;
|
1040 | }
|
1041 | }
|
1042 |
|
1043 | BOOST_REQUIRE_EQUAL(match,true);
|
1044 | }
|
1045 | }
|
1046 |
|
1047 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_receive_size_known)
|
1048 | {
|
1049 | Vcluster_semantic_sendrecv_receive_size_known_impl<NBX>();
|
1050 | }
|
1051 |
|
1052 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_receive_size_known_async)
|
1053 | {
|
1054 | Vcluster_semantic_sendrecv_receive_size_known_impl<NBX_ASYNC>();
|
1055 | }
|
1056 |
|
1057 | template<unsigned int impl, typename VCluster_type, typename vector1, typename vector2, typename vector3>
|
1058 | void scomm_known2(VCluster_type & vcl, vector1 & v1, vector2 & v2, vector3 & prc_send, vector3 & prc_recv, vector3 & sz_recv)
|
1059 | {
|
1060 | if (impl == NBX)
|
1061 | {
|
1062 | // Send and receive from the other processor v2 container the received data
|
1063 | // Because in this case v2 is an openfpm::vector<size_t>, all the received
|
1064 | // vector are concatenated one over the other. For example if the processor receive 3 openfpm::vector<size_t>
|
1065 | // each having 3,4,5 elements. v2 will be a vector of 12 elements
|
1066 | vcl.SSendRecv(v1,v2,prc_send,prc_recv,sz_recv,RECEIVE_KNOWN);
|
1067 | }
|
1068 | else
|
1069 | {
|
1070 | vcl.SSendRecvAsync(v1,v2,prc_send,prc_recv,sz_recv,RECEIVE_KNOWN);
|
1071 |
|
1072 | vcl.progressCommunication();
|
1073 | usleep(1000);
|
1074 | vcl.progressCommunication();
|
1075 | usleep(1000);
|
1076 | vcl.progressCommunication();
|
1077 | usleep(1000);
|
1078 |
|
1079 | vcl.SSendRecvWait(v1,v2,prc_send,prc_recv,sz_recv,RECEIVE_KNOWN);
|
1080 | }
|
1081 | }
|
1082 |
|
1083 | template<unsigned int impl>
|
1084 | void Vcluster_semantic_sendrecv_receive_known_impl()
|
1085 | {
|
1086 | openfpm::vector<size_t> prc_recv2;
|
1087 | openfpm::vector<size_t> prc_recv3;
|
1088 |
|
1089 | openfpm::vector<size_t> sz_recv2;
|
1090 | openfpm::vector<size_t> sz_recv3;
|
1091 |
|
1092 | for (size_t i = 0 ; i < 100 ; i++)
|
1093 | {
|
1094 | Vcluster<> & vcl = create_vcluster();
|
1095 |
|
1096 | if (vcl.getProcessUnitID() == 0 && i == 0)
|
1097 | {std::cout << "Semantic sendrecv test start" << std::endl;}
|
1098 |
|
1099 |
|
1100 | if (vcl.getProcessingUnits() >= 32)
|
1101 | {return;}
|
1102 |
|
1103 | openfpm::vector<size_t> prc_send;
|
1104 |
|
1105 | openfpm::vector<openfpm::vector<size_t>> v1;
|
1106 | openfpm::vector<size_t> v2;
|
1107 | openfpm::vector<openfpm::vector<size_t>> v3;
|
1108 |
|
1109 | v1.resize(vcl.getProcessingUnits());
|
1110 |
|
1111 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
1112 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
1113 | nr = ((nr-1) * nr) / 2;
|
1114 |
|
1115 | size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
1116 |
|
1117 | for(size_t i = 0 ; i < v1.size() ; i++)
|
1118 | {
|
1119 | for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
|
1120 | v1.get(i).add(j);
|
1121 |
|
1122 | prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
1123 | }
|
1124 |
|
1125 | // Receive to fill prc_recv2
|
1126 | scomm_unknown<impl>(vcl,v1,v2,prc_send,prc_recv2,sz_recv2);
|
1127 |
|
1128 | // carefull because SSendRecv does not fill prc_recv2 with processor that has a sending size of 0
|
1129 |
|
1130 | for(size_t i = 0 ; i < v1.size() ; i++)
|
1131 | {
|
1132 | if( i % SSCATTER_MAX == 0)
|
1133 | {prc_recv2.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());}
|
1134 | }
|
1135 |
|
1136 | // Reset v2 and sz_recv2
|
1137 |
|
1138 | v2.clear();
|
1139 | sz_recv2.clear();
|
1140 |
|
1141 | scomm_known2<impl>(vcl,v1,v2,prc_send,prc_recv2,sz_recv2);
|
1142 | scomm_unknown<impl>(vcl,v1,v3,prc_send,prc_recv3,sz_recv3);
|
1143 |
|
1144 | BOOST_REQUIRE_EQUAL(v2.size(),n_ele);
|
1145 | size_t nc_check = (vcl.getProcessingUnits()-1) / SSCATTER_MAX;
|
1146 | BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc_check);
|
1147 |
|
1148 | bool match = true;
|
1149 | size_t s = 0;
|
1150 |
|
1151 | for (size_t i = 0 ; i < sz_recv2.size() ; i++)
|
1152 | {
|
1153 | for (size_t j = 0 ; j < sz_recv2.get(i); j++)
|
1154 | {
|
1155 | match &= v2.get(s+j) == j;
|
1156 | }
|
1157 | s += sz_recv2.get(i);
|
1158 | }
|
1159 |
|
1160 | BOOST_REQUIRE_EQUAL(match,true);
|
1161 |
|
1162 | for (size_t i = 0 ; i < v3.size() ; i++)
|
1163 | {
|
1164 | for (size_t j = 0 ; j < v3.get(i).size() ; j++)
|
1165 | {
|
1166 | match &= v3.get(i).get(j) == j;
|
1167 | }
|
1168 | }
|
1169 |
|
1170 | BOOST_REQUIRE_EQUAL(match,true);
|
1171 | }
|
1172 | }
|
1173 |
|
1174 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_receive_known)
|
1175 | {
|
1176 | Vcluster_semantic_sendrecv_receive_known_impl<NBX>();
|
1177 | }
|
1178 |
|
1179 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_receive_known_async)
|
1180 | {
|
1181 | Vcluster_semantic_sendrecv_receive_known_impl<NBX_ASYNC>();
|
1182 | }
|
1183 |
|
1184 | template<unsigned int impl>
|
1185 | void Vcluster_semantic_struct_sendrecv_impl()
|
1186 | {
|
1187 | for (size_t i = 0 ; i < 100 ; i++)
|
1188 | {
|
1189 | Vcluster<> & vcl = create_vcluster();
|
1190 |
|
1191 | if (vcl.getProcessingUnits() >= 32)
|
1192 | {return;}
|
1193 |
|
1194 | openfpm::vector<size_t> prc_recv2;
|
1195 | openfpm::vector<size_t> prc_recv3;
|
1196 | openfpm::vector<size_t> prc_send;
|
1197 | openfpm::vector<size_t> sz_recv2;
|
1198 | openfpm::vector<size_t> sz_recv3;
|
1199 | openfpm::vector<openfpm::vector<Box<3,size_t>>> v1;
|
1200 | openfpm::vector<Box<3,size_t>> v2;
|
1201 | openfpm::vector<openfpm::vector<Box<3,size_t>>> v3;
|
1202 |
|
1203 | v1.resize(vcl.getProcessingUnits());
|
1204 |
|
1205 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
1206 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
1207 | nr = ((nr-1) * nr) / 2;
|
1208 |
|
1209 | size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
1210 |
|
1211 | for(size_t i = 0 ; i < v1.size() ; i++)
|
1212 | {
|
1213 | for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
|
1214 | {
|
1215 | Box<3,size_t> b({j,j,j},{j,j,j});
|
1216 | v1.get(i).add(b);
|
1217 | }
|
1218 |
|
1219 | prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
1220 | }
|
1221 |
|
1222 | scomm_unknown<impl>(vcl,v1,v2,prc_send,prc_recv2,sz_recv2);
|
1223 | scomm_unknown<impl>(vcl,v1,v3,prc_send,prc_recv3,sz_recv3);
|
1224 |
|
1225 | BOOST_REQUIRE_EQUAL(v2.size(),n_ele);
|
1226 | size_t nc_check = (vcl.getProcessingUnits()-1) / SSCATTER_MAX;
|
1227 | BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc_check);
|
1228 |
|
1229 | bool match = true;
|
1230 | size_t s = 0;
|
1231 |
|
1232 | for (size_t i = 0 ; i < sz_recv2.size() ; i++)
|
1233 | {
|
1234 | for (size_t j = 0 ; j < sz_recv2.get(i); j++)
|
1235 | {
|
1236 | Box<3,size_t> b({j,j,j},{j,j,j});
|
1237 | Box<3,size_t> bt = v2.get(s+j);
|
1238 | match &= bt == b;
|
1239 | }
|
1240 | s += sz_recv2.get(i);
|
1241 | }
|
1242 |
|
1243 | BOOST_REQUIRE_EQUAL(match,true);
|
1244 |
|
1245 | for (size_t i = 0 ; i < v3.size() ; i++)
|
1246 | {
|
1247 | for (size_t j = 0 ; j < v3.get(i).size() ; j++)
|
1248 | {
|
1249 | Box<3,size_t> b({j,j,j},{j,j,j});
|
1250 | Box<3,size_t> bt = v3.get(i).get(j);
|
1251 | match &= bt == b;
|
1252 | }
|
1253 | }
|
1254 |
|
1255 | BOOST_REQUIRE_EQUAL(match,true);
|
1256 | }
|
1257 |
|
1258 | // Send and receive 0 and check
|
1259 |
|
1260 | {
|
1261 | Vcluster<> & vcl = create_vcluster();
|
1262 |
|
1263 | openfpm::vector<size_t> prc_recv2;
|
1264 | openfpm::vector<size_t> prc_send;
|
1265 | openfpm::vector<size_t> sz_recv2;
|
1266 | openfpm::vector<openfpm::vector<Box<3,size_t>>> v1;
|
1267 | openfpm::vector<Box<3,size_t>> v2;
|
1268 |
|
1269 | v1.resize(vcl.getProcessingUnits());
|
1270 |
|
1271 |
|
1272 | for(size_t i = 0 ; i < v1.size() ; i++)
|
1273 | {
|
1274 | prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
1275 | }
|
1276 |
|
1277 | vcl.SSendRecv(v1,v2,prc_send,prc_recv2,sz_recv2);
|
1278 |
|
1279 | BOOST_REQUIRE_EQUAL(v2.size(),0ul);
|
1280 | BOOST_REQUIRE_EQUAL(prc_recv2.size(),0ul);
|
1281 | BOOST_REQUIRE_EQUAL(sz_recv2.size(),0ul);
|
1282 | }
|
1283 | }
|
1284 |
|
1285 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_sendrecv)
|
1286 | {
|
1287 | Vcluster_semantic_struct_sendrecv_impl<NBX>();
|
1288 | }
|
1289 |
|
1290 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_struct_sendrecv_async)
|
1291 | {
|
1292 | Vcluster_semantic_struct_sendrecv_impl<NBX_ASYNC>();
|
1293 | }
|
1294 |
|
1295 | template<unsigned int impl>
|
1296 | void Vcluster_semantic_sendrecv_2_impl()
|
1297 | {
|
1298 | for (size_t i = 0 ; i < 100 ; i++)
|
1299 | {
|
1300 | Vcluster<> & vcl = create_vcluster();
|
1301 |
|
1302 | if (vcl.getProcessingUnits() >= 32)
|
1303 | return;
|
1304 |
|
1305 | openfpm::vector<size_t> prc_recv2;
|
1306 | openfpm::vector<size_t> prc_recv3;
|
1307 | openfpm::vector<size_t> prc_send;
|
1308 | openfpm::vector<size_t> sz_recv2;
|
1309 | openfpm::vector<size_t> sz_recv3;
|
1310 |
|
1311 | openfpm::vector<openfpm::vector<aggregate<openfpm::vector<size_t>>> > v1;
|
1312 | openfpm::vector<aggregate<openfpm::vector<size_t>>> v2;
|
1313 | openfpm::vector<openfpm::vector<aggregate<openfpm::vector<size_t>>> > v3;
|
1314 |
|
1315 | openfpm::vector<aggregate<openfpm::vector<size_t>>> v1_int;
|
1316 | aggregate<openfpm::vector<size_t>> aggr;
|
1317 | openfpm::vector<size_t> v1_int2;
|
1318 |
|
1319 | v1_int2.add(7);
|
1320 | v1_int2.add(7);
|
1321 | v1_int2.add(7);
|
1322 |
|
1323 | aggr.template get<0>() = v1_int2;
|
1324 |
|
1325 | v1_int.add(aggr);
|
1326 | v1_int.add(aggr);
|
1327 | v1_int.add(aggr);
|
1328 |
|
1329 | v1.resize(vcl.getProcessingUnits());
|
1330 |
|
1331 | for(size_t i = 0 ; i < v1.size() ; i++)
|
1332 | {
|
1333 | for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
|
1334 | {
|
1335 | v1.get(i).add(aggr);
|
1336 | }
|
1337 |
|
1338 | prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
1339 | }
|
1340 |
|
1341 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
1342 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
1343 | nr = ((nr-1) * nr) / 2;
|
1344 |
|
1345 | size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
1346 |
|
1347 | scomm_unknown<impl>(vcl,v1,v2,prc_send,prc_recv2,sz_recv2);
|
1348 | scomm_unknown<impl>(vcl,v1,v3,prc_send,prc_recv3,sz_recv3);
|
1349 |
|
1350 | BOOST_REQUIRE_EQUAL(v2.size(),n_ele);
|
1351 |
|
1352 | BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits());
|
1353 |
|
1354 | bool match = true;
|
1355 | bool is_seven = true;
|
1356 | size_t s = 0;
|
1357 |
|
1358 | for (size_t i = 0 ; i < sz_recv2.size() ; i++)
|
1359 | {
|
1360 | for (size_t j = 0 ; j < sz_recv2.get(i); j++)
|
1361 | {
|
1362 | for (size_t k = 0; k < v2.get(s+j).template get<0>().size(); k++)
|
1363 | is_seven &= (v2.get(s+j).template get<0>().get(k) == 7);
|
1364 | }
|
1365 | s += sz_recv2.get(i);
|
1366 | }
|
1367 |
|
1368 | BOOST_REQUIRE_EQUAL(is_seven,true);
|
1369 | BOOST_REQUIRE_EQUAL(match,true);
|
1370 |
|
1371 | for (size_t i = 0 ; i < v3.size() ; i++)
|
1372 | {
|
1373 | for (size_t j = 0 ; j < v3.get(i).size(); j++)
|
1374 | {
|
1375 | for (size_t k = 0; k < v3.get(i).template get<0>(j).size(); k++)
|
1376 | is_seven &= (v3.get(i).template get<0>(j).get(k) == 7);
|
1377 | }
|
1378 | }
|
1379 |
|
1380 | BOOST_REQUIRE_EQUAL(is_seven,true);
|
1381 | BOOST_REQUIRE_EQUAL(match,true);
|
1382 | }
|
1383 | }
|
1384 |
|
1385 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_2)
|
1386 | {
|
1387 | Vcluster_semantic_sendrecv_2_impl<NBX>();
|
1388 | }
|
1389 |
|
1390 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_2_async)
|
1391 | {
|
1392 | Vcluster_semantic_sendrecv_2_impl<NBX_ASYNC>();
|
1393 | }
|
1394 |
|
1395 | template<unsigned int impl>
|
1396 | void Vcluster_semantic_sendrecv_3_impl()
|
1397 | {
|
1398 | for (size_t i = 0 ; i < 100 ; i++)
|
1399 | {
|
1400 | Vcluster<> & vcl = create_vcluster();
|
1401 |
|
1402 | if (vcl.getProcessingUnits() >= 32)
|
1403 | return;
|
1404 |
|
1405 | openfpm::vector<size_t> prc_recv2;
|
1406 | openfpm::vector<size_t> prc_recv3;
|
1407 | openfpm::vector<size_t> prc_send;
|
1408 | openfpm::vector<size_t> sz_recv2;
|
1409 | openfpm::vector<size_t> sz_recv3;
|
1410 |
|
1411 | openfpm::vector<openfpm::vector<aggregate<float, openfpm::vector<size_t>, Point_test<float>>> > v1;
|
1412 | openfpm::vector<aggregate<float, openfpm::vector<size_t>, Point_test<float>>> v2;
|
1413 | openfpm::vector<openfpm::vector<aggregate<float, openfpm::vector<size_t>, Point_test<float>>> > v3;
|
1414 |
|
1415 | openfpm::vector<aggregate<float, openfpm::vector<size_t>, Point_test<float>>> v1_int;
|
1416 | aggregate<float, openfpm::vector<size_t>, Point_test<float>> aggr;
|
1417 | openfpm::vector<size_t> v1_int2;
|
1418 |
|
1419 | v1_int2.add((size_t)7);
|
1420 | v1_int2.add((size_t)7);
|
1421 |
|
1422 | aggr.template get<0>() = 7;
|
1423 | aggr.template get<1>() = v1_int2;
|
1424 |
|
1425 | typedef Point_test<float> p;
|
1426 | p p1;
|
1427 | p1.fill();
|
1428 | aggr.template get<2>() = p1;
|
1429 |
|
1430 | v1_int.add(aggr);
|
1431 | v1_int.add(aggr);
|
1432 | v1_int.add(aggr);
|
1433 |
|
1434 | v1.resize(vcl.getProcessingUnits());
|
1435 |
|
1436 | for(size_t i = 0 ; i < v1.size() ; i++)
|
1437 | {
|
1438 | for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
|
1439 | {
|
1440 | v1.get(i).add(aggr);
|
1441 | }
|
1442 |
|
1443 | prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
1444 | }
|
1445 |
|
1446 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
1447 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
1448 | nr = ((nr-1) * nr) / 2;
|
1449 |
|
1450 | size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
1451 |
|
1452 | scomm_unknown<impl>(vcl,v1,v2,prc_send,prc_recv2,sz_recv2);
|
1453 | scomm_unknown<impl>(vcl,v1,v3,prc_send,prc_recv3,sz_recv3);
|
1454 |
|
1455 | BOOST_REQUIRE_EQUAL(v2.size(),n_ele);
|
1456 |
|
1457 | BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits());
|
1458 |
|
1459 | bool match = true;
|
1460 | bool is_seven = true;
|
1461 | size_t s = 0;
|
1462 |
|
1463 | for (size_t i = 0 ; i < sz_recv2.size() ; i++)
|
1464 | {
|
1465 | for (size_t j = 0 ; j < sz_recv2.get(i); j++)
|
1466 | {
|
1467 | is_seven &= (v2.get(s+j).template get<0>() == 7);
|
1468 |
|
1469 | for (size_t k = 0; k < v2.get(s+j).template get<1>().size(); k++)
|
1470 | is_seven &= (v2.get(s+j).template get<1>().get(k) == 7);
|
1471 |
|
1472 | Point_test<float> p2 = v2.get(s+j).template get<2>();
|
1473 |
|
1474 | match &= (p2.template get<p::x>() == p1.template get<p::x>());
|
1475 | match &= (p2.template get<p::y>() == p1.template get<p::y>());
|
1476 | match &= (p2.template get<p::z>() == p1.template get<p::z>());
|
1477 | match &= (p2.template get<p::s>() == p1.template get<p::s>());
|
1478 |
|
1479 | match &= (p2.template get<p::v>()[0] == p1.template get<p::v>()[0]);
|
1480 | match &= (p2.template get<p::v>()[1] == p1.template get<p::v>()[1]);
|
1481 | match &= (p2.template get<p::v>()[2] == p1.template get<p::v>()[2]);
|
1482 |
|
1483 | match &= (p2.template get<p::t>()[0][0] == p1.template get<p::t>()[0][0]);
|
1484 | match &= (p2.template get<p::t>()[0][1] == p1.template get<p::t>()[0][1]);
|
1485 | match &= (p2.template get<p::t>()[0][2] == p1.template get<p::t>()[0][2]);
|
1486 | match &= (p2.template get<p::t>()[1][0] == p1.template get<p::t>()[1][0]);
|
1487 | match &= (p2.template get<p::t>()[1][1] == p1.template get<p::t>()[1][1]);
|
1488 | match &= (p2.template get<p::t>()[1][2] == p1.template get<p::t>()[1][2]);
|
1489 | match &= (p2.template get<p::t>()[2][0] == p1.template get<p::t>()[2][0]);
|
1490 | match &= (p2.template get<p::t>()[2][1] == p1.template get<p::t>()[2][1]);
|
1491 | match &= (p2.template get<p::t>()[2][2] == p1.template get<p::t>()[2][2]);
|
1492 | }
|
1493 | s += sz_recv2.get(i);
|
1494 | }
|
1495 |
|
1496 | BOOST_REQUIRE_EQUAL(is_seven,true);
|
1497 | BOOST_REQUIRE_EQUAL(match,true);
|
1498 |
|
1499 | for (size_t i = 0 ; i < v3.size() ; i++)
|
1500 | {
|
1501 | for (size_t j = 0 ; j < v3.get(i).size(); j++)
|
1502 | {
|
1503 | is_seven &= (v3.get(i).get(j).template get<0>() == 7);
|
1504 |
|
1505 | for (size_t k = 0; k < v3.get(i).get(j).template get<1>().size(); k++)
|
1506 | is_seven &= (v3.get(i).get(j).template get<1>().get(k) == 7);
|
1507 |
|
1508 | Point_test<float> p2 = v3.get(i).get(j).template get<2>();
|
1509 |
|
1510 | match &= (p2.template get<p::x>() == p1.template get<p::x>());
|
1511 | match &= (p2.template get<p::y>() == p1.template get<p::y>());
|
1512 | match &= (p2.template get<p::z>() == p1.template get<p::z>());
|
1513 | match &= (p2.template get<p::s>() == p1.template get<p::s>());
|
1514 |
|
1515 | match &= (p2.template get<p::v>()[0] == p1.template get<p::v>()[0]);
|
1516 | match &= (p2.template get<p::v>()[1] == p1.template get<p::v>()[1]);
|
1517 | match &= (p2.template get<p::v>()[2] == p1.template get<p::v>()[2]);
|
1518 |
|
1519 | match &= (p2.template get<p::t>()[0][0] == p1.template get<p::t>()[0][0]);
|
1520 | match &= (p2.template get<p::t>()[0][1] == p1.template get<p::t>()[0][1]);
|
1521 | match &= (p2.template get<p::t>()[0][2] == p1.template get<p::t>()[0][2]);
|
1522 | match &= (p2.template get<p::t>()[1][0] == p1.template get<p::t>()[1][0]);
|
1523 | match &= (p2.template get<p::t>()[1][1] == p1.template get<p::t>()[1][1]);
|
1524 | match &= (p2.template get<p::t>()[1][2] == p1.template get<p::t>()[1][2]);
|
1525 | match &= (p2.template get<p::t>()[2][0] == p1.template get<p::t>()[2][0]);
|
1526 | match &= (p2.template get<p::t>()[2][1] == p1.template get<p::t>()[2][1]);
|
1527 | match &= (p2.template get<p::t>()[2][2] == p1.template get<p::t>()[2][2]);
|
1528 | }
|
1529 | }
|
1530 |
|
1531 | BOOST_REQUIRE_EQUAL(is_seven,true);
|
1532 | BOOST_REQUIRE_EQUAL(match,true);
|
1533 | }
|
1534 | }
|
1535 |
|
1536 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_3)
|
1537 | {
|
1538 | Vcluster_semantic_sendrecv_3_impl<NBX>();
|
1539 | }
|
1540 |
|
1541 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_3_async)
|
1542 | {
|
1543 | Vcluster_semantic_sendrecv_3_impl<NBX_ASYNC>();
|
1544 | }
|
1545 |
|
1546 | template<unsigned int impl>
|
1547 | void Vcluster_semantic_sendrecv_4_impl()
|
1548 | {
|
1549 | for (size_t i = 0 ; i < 100 ; i++)
|
1550 | {
|
1551 | Vcluster<> & vcl = create_vcluster();
|
1552 |
|
1553 | if (vcl.getProcessingUnits() >= 32)
|
1554 | return;
|
1555 |
|
1556 | openfpm::vector<size_t> prc_recv2;
|
1557 | openfpm::vector<size_t> prc_recv3;
|
1558 | openfpm::vector<size_t> prc_send;
|
1559 | openfpm::vector<size_t> sz_recv2;
|
1560 | openfpm::vector<size_t> sz_recv3;
|
1561 | openfpm::vector<openfpm::vector<aggregate<float,Point_test<float>>> > v1;
|
1562 | openfpm::vector<aggregate<float,Point_test<float>> > v2;
|
1563 | openfpm::vector<openfpm::vector<aggregate<float,Point_test<float>>> > v3;
|
1564 |
|
1565 | v1.resize(vcl.getProcessingUnits());
|
1566 |
|
1567 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
1568 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
1569 | nr = ((nr-1) * nr) / 2;
|
1570 |
|
1571 | size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
1572 |
|
1573 | //Prepare an aggregate
|
1574 | aggregate<float, Point_test<float> > aggr;
|
1575 |
|
1576 | typedef Point_test<float> p;
|
1577 |
|
1578 | p p1;
|
1579 | p1.fill();
|
1580 |
|
1581 | aggr.template get<0>() = 7;
|
1582 | aggr.template get<1>() = p1;
|
1583 |
|
1584 | //Fill v1 with aggregates
|
1585 | for(size_t i = 0 ; i < v1.size() ; i++)
|
1586 | {
|
1587 | for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
|
1588 | {
|
1589 | v1.get(i).add(aggr);
|
1590 | }
|
1591 |
|
1592 | prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
1593 | }
|
1594 |
|
1595 | scomm_unknown<impl>(vcl,v1,v2,prc_send,prc_recv2,sz_recv2);
|
1596 | scomm_unknown<impl>(vcl,v1,v3,prc_send,prc_recv3,sz_recv3);
|
1597 |
|
1598 | BOOST_REQUIRE_EQUAL(v2.size(),n_ele);
|
1599 | size_t nc_check = (vcl.getProcessingUnits()-1) / SSCATTER_MAX;
|
1600 | BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits()-1-nc_check);
|
1601 | bool match = true;
|
1602 | bool is_seven = true;
|
1603 | size_t s = 0;
|
1604 |
|
1605 | for (size_t i = 0 ; i < sz_recv2.size() ; i++)
|
1606 | {
|
1607 | for (size_t j = 0 ; j < sz_recv2.get(i); j++)
|
1608 | {
|
1609 | is_seven &= (v2.get(s+j).template get<0>() == 7);
|
1610 |
|
1611 | Point_test<float> p2 = v2.get(s+j).template get<1>();
|
1612 |
|
1613 | match &= (p2.template get<p::x>() == p1.template get<p::x>());
|
1614 | match &= (p2.template get<p::y>() == p1.template get<p::y>());
|
1615 | match &= (p2.template get<p::z>() == p1.template get<p::z>());
|
1616 | match &= (p2.template get<p::s>() == p1.template get<p::s>());
|
1617 |
|
1618 | match &= (p2.template get<p::v>()[0] == p1.template get<p::v>()[0]);
|
1619 | match &= (p2.template get<p::v>()[1] == p1.template get<p::v>()[1]);
|
1620 | match &= (p2.template get<p::v>()[2] == p1.template get<p::v>()[2]);
|
1621 |
|
1622 | match &= (p2.template get<p::t>()[0][0] == p1.template get<p::t>()[0][0]);
|
1623 | match &= (p2.template get<p::t>()[0][1] == p1.template get<p::t>()[0][1]);
|
1624 | match &= (p2.template get<p::t>()[0][2] == p1.template get<p::t>()[0][2]);
|
1625 | match &= (p2.template get<p::t>()[1][0] == p1.template get<p::t>()[1][0]);
|
1626 | match &= (p2.template get<p::t>()[1][1] == p1.template get<p::t>()[1][1]);
|
1627 | match &= (p2.template get<p::t>()[1][2] == p1.template get<p::t>()[1][2]);
|
1628 | match &= (p2.template get<p::t>()[2][0] == p1.template get<p::t>()[2][0]);
|
1629 | match &= (p2.template get<p::t>()[2][1] == p1.template get<p::t>()[2][1]);
|
1630 | match &= (p2.template get<p::t>()[2][2] == p1.template get<p::t>()[2][2]);
|
1631 | }
|
1632 | s += sz_recv2.get(i);
|
1633 | }
|
1634 |
|
1635 | BOOST_REQUIRE_EQUAL(is_seven,true);
|
1636 | BOOST_REQUIRE_EQUAL(match,true);
|
1637 |
|
1638 | for (size_t i = 0 ; i < v3.size() ; i++)
|
1639 | {
|
1640 | for (size_t j = 0 ; j < v3.get(i).size() ; j++)
|
1641 | {
|
1642 | is_seven &= (v3.get(i).get(j).template get<0>() == 7);
|
1643 |
|
1644 | Point_test<float> p2 = v3.get(i).get(j).template get<1>();
|
1645 |
|
1646 | match &= (p2.template get<p::x>() == p1.template get<p::x>());
|
1647 | match &= (p2.template get<p::y>() == p1.template get<p::y>());
|
1648 | match &= (p2.template get<p::z>() == p1.template get<p::z>());
|
1649 | match &= (p2.template get<p::s>() == p1.template get<p::s>());
|
1650 |
|
1651 | match &= (p2.template get<p::v>()[0] == p1.template get<p::v>()[0]);
|
1652 | match &= (p2.template get<p::v>()[1] == p1.template get<p::v>()[1]);
|
1653 | match &= (p2.template get<p::v>()[2] == p1.template get<p::v>()[2]);
|
1654 |
|
1655 | match &= (p2.template get<p::t>()[0][0] == p1.template get<p::t>()[0][0]);
|
1656 | match &= (p2.template get<p::t>()[0][1] == p1.template get<p::t>()[0][1]);
|
1657 | match &= (p2.template get<p::t>()[0][2] == p1.template get<p::t>()[0][2]);
|
1658 | match &= (p2.template get<p::t>()[1][0] == p1.template get<p::t>()[1][0]);
|
1659 | match &= (p2.template get<p::t>()[1][1] == p1.template get<p::t>()[1][1]);
|
1660 | match &= (p2.template get<p::t>()[1][2] == p1.template get<p::t>()[1][2]);
|
1661 | match &= (p2.template get<p::t>()[2][0] == p1.template get<p::t>()[2][0]);
|
1662 | match &= (p2.template get<p::t>()[2][1] == p1.template get<p::t>()[2][1]);
|
1663 | match &= (p2.template get<p::t>()[2][2] == p1.template get<p::t>()[2][2]);
|
1664 | }
|
1665 | }
|
1666 |
|
1667 | BOOST_REQUIRE_EQUAL(is_seven,true);
|
1668 | BOOST_REQUIRE_EQUAL(match,true);
|
1669 | }
|
1670 | }
|
1671 |
|
1672 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_4)
|
1673 | {
|
1674 | Vcluster_semantic_sendrecv_4_impl<NBX>();
|
1675 | }
|
1676 |
|
1677 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_4_async)
|
1678 | {
|
1679 | Vcluster_semantic_sendrecv_4_impl<NBX_ASYNC>();
|
1680 | }
|
1681 |
|
1682 | template<unsigned int impl>
|
1683 | void Vcluster_semantic_sendrecv_5_impl()
|
1684 | {
|
1685 | for (size_t i = 0 ; i < 100 ; i++)
|
1686 | {
|
1687 | Vcluster<> & vcl = create_vcluster();
|
1688 |
|
1689 | if (vcl.getProcessingUnits() >= 32)
|
1690 | return;
|
1691 |
|
1692 | openfpm::vector<size_t> prc_recv2;
|
1693 | openfpm::vector<size_t> prc_recv3;
|
1694 | openfpm::vector<size_t> prc_send;
|
1695 | openfpm::vector<size_t> sz_recv2;
|
1696 | openfpm::vector<size_t> sz_recv3;
|
1697 |
|
1698 | size_t sz[] = {16,16};
|
1699 |
|
1700 | grid_cpu<2,Point_test<float>> g1(sz);
|
1701 | g1.setMemory();
|
1702 | fill_grid<2>(g1);
|
1703 |
|
1704 | aggregate<grid_cpu<2,Point_test<float>>> aggr;
|
1705 | aggr.template get<0>() = g1;
|
1706 |
|
1707 |
|
1708 | openfpm::vector<openfpm::vector<aggregate<grid_cpu<2,Point_test<float>>>> > v1;
|
1709 | openfpm::vector<aggregate<grid_cpu<2,Point_test<float>>> > v2;
|
1710 | openfpm::vector<openfpm::vector<aggregate<grid_cpu<2,Point_test<float>>>> > v3;
|
1711 |
|
1712 | v1.resize(vcl.getProcessingUnits());
|
1713 |
|
1714 | for(size_t i = 0 ; i < v1.size() ; i++)
|
1715 | {
|
1716 | for (size_t j = 0 ; j < i % SSCATTER_MAX ; j++)
|
1717 | {
|
1718 | v1.get(i).add(aggr);
|
1719 | }
|
1720 |
|
1721 | prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
1722 | }
|
1723 |
|
1724 | size_t nc = vcl.getProcessingUnits() / SSCATTER_MAX;
|
1725 | size_t nr = vcl.getProcessingUnits() - nc * SSCATTER_MAX;
|
1726 | nr = ((nr-1) * nr) / 2;
|
1727 |
|
1728 | size_t n_ele = nc * SSCATTER_MAX * (SSCATTER_MAX - 1) / 2 + nr;
|
1729 |
|
1730 | scomm_unknown<impl>(vcl,v1,v2,prc_send,prc_recv2,sz_recv2);
|
1731 |
|
1732 | scomm_unknown<impl>(vcl,v1,v3,prc_send,prc_recv3,sz_recv3);
|
1733 |
|
1734 | BOOST_REQUIRE_EQUAL(v2.size(),n_ele);
|
1735 |
|
1736 | BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits());
|
1737 |
|
1738 | bool match = true;
|
1739 | size_t s = 0;
|
1740 | typedef Point_test<float> p;
|
1741 |
|
1742 | for (size_t i = 0 ; i < sz_recv2.size() ; i++)
|
1743 | {
|
1744 | for (size_t j = 0 ; j < sz_recv2.get(i); j++)
|
1745 | {
|
1746 | grid_cpu<2,Point_test<float>> g2 = v2.get(s+j).template get<0>();
|
1747 |
|
1748 | auto it = g2.getIterator();
|
1749 |
|
1750 | while (it.isNext())
|
1751 | {
|
1752 | grid_key_dx<2> key = it.get();
|
1753 |
|
1754 | match &= (g2.template get<p::x>(key) == g1.template get<p::x>(key));
|
1755 | match &= (g2.template get<p::y>(key) == g1.template get<p::y>(key));
|
1756 | match &= (g2.template get<p::z>(key) == g1.template get<p::z>(key));
|
1757 | match &= (g2.template get<p::s>(key) == g1.template get<p::s>(key));
|
1758 |
|
1759 | match &= (g2.template get<p::v>(key)[0] == g1.template get<p::v>(key)[0]);
|
1760 | match &= (g2.template get<p::v>(key)[1] == g1.template get<p::v>(key)[1]);
|
1761 | match &= (g2.template get<p::v>(key)[2] == g1.template get<p::v>(key)[2]);
|
1762 |
|
1763 | match &= (g2.template get<p::t>(key)[0][0] == g1.template get<p::t>(key)[0][0]);
|
1764 | match &= (g2.template get<p::t>(key)[0][1] == g1.template get<p::t>(key)[0][1]);
|
1765 | match &= (g2.template get<p::t>(key)[0][2] == g1.template get<p::t>(key)[0][2]);
|
1766 | match &= (g2.template get<p::t>(key)[1][0] == g1.template get<p::t>(key)[1][0]);
|
1767 | match &= (g2.template get<p::t>(key)[1][1] == g1.template get<p::t>(key)[1][1]);
|
1768 | match &= (g2.template get<p::t>(key)[1][2] == g1.template get<p::t>(key)[1][2]);
|
1769 | match &= (g2.template get<p::t>(key)[2][0] == g1.template get<p::t>(key)[2][0]);
|
1770 | match &= (g2.template get<p::t>(key)[2][1] == g1.template get<p::t>(key)[2][1]);
|
1771 | match &= (g2.template get<p::t>(key)[2][2] == g1.template get<p::t>(key)[2][2]);
|
1772 |
|
1773 | ++it;
|
1774 | }
|
1775 | }
|
1776 | s += sz_recv2.get(i);
|
1777 | }
|
1778 | BOOST_REQUIRE_EQUAL(match,true);
|
1779 |
|
1780 | for (size_t i = 0 ; i < v3.size() ; i++)
|
1781 | {
|
1782 | for (size_t j = 0 ; j < v3.get(i).size(); j++)
|
1783 | {
|
1784 | grid_cpu<2,Point_test<float>> g2 = v3.get(i).get(j).template get<0>();
|
1785 |
|
1786 | auto it = g2.getIterator();
|
1787 |
|
1788 | while (it.isNext())
|
1789 | {
|
1790 | grid_key_dx<2> key = it.get();
|
1791 |
|
1792 | match &= (g2.template get<p::x>(key) == g1.template get<p::x>(key));
|
1793 | match &= (g2.template get<p::y>(key) == g1.template get<p::y>(key));
|
1794 | match &= (g2.template get<p::z>(key) == g1.template get<p::z>(key));
|
1795 | match &= (g2.template get<p::s>(key) == g1.template get<p::s>(key));
|
1796 |
|
1797 | match &= (g2.template get<p::v>(key)[0] == g1.template get<p::v>(key)[0]);
|
1798 | match &= (g2.template get<p::v>(key)[1] == g1.template get<p::v>(key)[1]);
|
1799 | match &= (g2.template get<p::v>(key)[2] == g1.template get<p::v>(key)[2]);
|
1800 |
|
1801 | match &= (g2.template get<p::t>(key)[0][0] == g1.template get<p::t>(key)[0][0]);
|
1802 | match &= (g2.template get<p::t>(key)[0][1] == g1.template get<p::t>(key)[0][1]);
|
1803 | match &= (g2.template get<p::t>(key)[0][2] == g1.template get<p::t>(key)[0][2]);
|
1804 | match &= (g2.template get<p::t>(key)[1][0] == g1.template get<p::t>(key)[1][0]);
|
1805 | match &= (g2.template get<p::t>(key)[1][1] == g1.template get<p::t>(key)[1][1]);
|
1806 | match &= (g2.template get<p::t>(key)[1][2] == g1.template get<p::t>(key)[1][2]);
|
1807 | match &= (g2.template get<p::t>(key)[2][0] == g1.template get<p::t>(key)[2][0]);
|
1808 | match &= (g2.template get<p::t>(key)[2][1] == g1.template get<p::t>(key)[2][1]);
|
1809 | match &= (g2.template get<p::t>(key)[2][2] == g1.template get<p::t>(key)[2][2]);
|
1810 |
|
1811 | ++it;
|
1812 | }
|
1813 | }
|
1814 | }
|
1815 | BOOST_REQUIRE_EQUAL(match,true);
|
1816 | }
|
1817 | }
|
1818 |
|
1819 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_5)
|
1820 | {
|
1821 | Vcluster_semantic_sendrecv_5_impl<NBX>();
|
1822 | }
|
1823 |
|
1824 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_5_async)
|
1825 | {
|
1826 | Vcluster_semantic_sendrecv_5_impl<NBX_ASYNC>();
|
1827 | }
|
1828 |
|
1829 | template<unsigned int impl>
|
1830 | void Vcluster_semantic_sendrecv_6_impl()
|
1831 | {
|
1832 | for (size_t i = 0 ; i < 100 ; i++)
|
1833 | {
|
1834 | Vcluster<> & vcl = create_vcluster();
|
1835 |
|
1836 | if (vcl.getProcessingUnits() >= 32)
|
1837 | return;
|
1838 |
|
1839 | openfpm::vector<size_t> prc_recv2;
|
1840 | openfpm::vector<size_t> prc_recv3;
|
1841 | openfpm::vector<size_t> prc_send;
|
1842 | openfpm::vector<size_t> sz_recv2;
|
1843 | openfpm::vector<size_t> sz_recv3;
|
1844 |
|
1845 | size_t sz[] = {8,10};
|
1846 |
|
1847 | grid_cpu<2,Point_test<float>> g1(sz);
|
1848 | g1.setMemory();
|
1849 | fill_grid<2>(g1);
|
1850 |
|
1851 | openfpm::vector<grid_cpu<2,Point_test<float>>> v1;
|
1852 | openfpm::vector<grid_cpu<2,Point_test<float>>> v3;
|
1853 |
|
1854 | v1.resize(vcl.getProcessingUnits());
|
1855 |
|
1856 | for(size_t i = 0 ; i < v1.size() ; i++)
|
1857 | {
|
1858 | v1.get(i) = g1;
|
1859 |
|
1860 | prc_send.add((i + vcl.getProcessUnitID()) % vcl.getProcessingUnits());
|
1861 | }
|
1862 |
|
1863 | scomm_unknown<impl>(vcl,v1,v3,prc_send,prc_recv3,sz_recv3);
|
1864 |
|
1865 | BOOST_REQUIRE_EQUAL(v3.size(),vcl.getProcessingUnits());
|
1866 |
|
1867 | bool match = true;
|
1868 | typedef Point_test<float> p;
|
1869 |
|
1870 | for (size_t i = 0 ; i < v3.size() ; i++)
|
1871 | {
|
1872 | for (size_t j = 0 ; j < v3.get(i).size(); j++)
|
1873 | {
|
1874 | grid_cpu<2,Point_test<float>> g2 = v3.get(i);
|
1875 |
|
1876 | auto it = g2.getIterator();
|
1877 |
|
1878 | while (it.isNext())
|
1879 | {
|
1880 | grid_key_dx<2> key = it.get();
|
1881 |
|
1882 | match &= (g2.template get<p::x>(key) == g1.template get<p::x>(key));
|
1883 | match &= (g2.template get<p::y>(key) == g1.template get<p::y>(key));
|
1884 | match &= (g2.template get<p::z>(key) == g1.template get<p::z>(key));
|
1885 | match &= (g2.template get<p::s>(key) == g1.template get<p::s>(key));
|
1886 |
|
1887 | match &= (g2.template get<p::v>(key)[0] == g1.template get<p::v>(key)[0]);
|
1888 | match &= (g2.template get<p::v>(key)[1] == g1.template get<p::v>(key)[1]);
|
1889 | match &= (g2.template get<p::v>(key)[2] == g1.template get<p::v>(key)[2]);
|
1890 |
|
1891 | match &= (g2.template get<p::t>(key)[0][0] == g1.template get<p::t>(key)[0][0]);
|
1892 | match &= (g2.template get<p::t>(key)[0][1] == g1.template get<p::t>(key)[0][1]);
|
1893 | match &= (g2.template get<p::t>(key)[0][2] == g1.template get<p::t>(key)[0][2]);
|
1894 | match &= (g2.template get<p::t>(key)[1][0] == g1.template get<p::t>(key)[1][0]);
|
1895 | match &= (g2.template get<p::t>(key)[1][1] == g1.template get<p::t>(key)[1][1]);
|
1896 | match &= (g2.template get<p::t>(key)[1][2] == g1.template get<p::t>(key)[1][2]);
|
1897 | match &= (g2.template get<p::t>(key)[2][0] == g1.template get<p::t>(key)[2][0]);
|
1898 | match &= (g2.template get<p::t>(key)[2][1] == g1.template get<p::t>(key)[2][1]);
|
1899 | match &= (g2.template get<p::t>(key)[2][2] == g1.template get<p::t>(key)[2][2]);
|
1900 |
|
1901 | ++it;
|
1902 | }
|
1903 | }
|
1904 | }
|
1905 | BOOST_REQUIRE_EQUAL(match,true);
|
1906 |
|
1907 | if (vcl.getProcessUnitID() == 0 && i == 99)
|
1908 | std::cout << "Semantic sendrecv test start" << std::endl;
|
1909 | }
|
1910 | }
|
1911 |
|
1912 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_6)
|
1913 | {
|
1914 | Vcluster_semantic_sendrecv_6_impl<NBX>();
|
1915 | }
|
1916 |
|
1917 | BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_6_async)
|
1918 | {
|
1919 | Vcluster_semantic_sendrecv_6_impl<NBX_ASYNC>();
|
1920 | }
|
1921 |
|
1922 | BOOST_AUTO_TEST_SUITE_END()
|
1923 | |
1924 | |