1 | /* |
2 | * VCluster_semantic_unit_tests_funcs.hpp |
3 | * |
4 | * Created on: Aug 18, 2018 |
5 | * Author: i-bird |
6 | */ |
7 | |
8 | #ifndef VCLUSTER_SEMANTIC_UNIT_TESTS_FUNCS_HPP_ |
9 | #define VCLUSTER_SEMANTIC_UNIT_TESTS_FUNCS_HPP_ |
10 | |
11 | #include "VCluster/VCluster.hpp" |
12 | |
13 | template<typename Memory, template<typename> class layout_base> |
14 | void test_different_layouts() |
15 | { |
16 | for (size_t i = 0 ; i < 100 ; i++) |
17 | { |
18 | Vcluster<> & vcl = create_vcluster(); |
19 | |
20 | if (vcl.getProcessingUnits() >= 32) |
21 | return; |
22 | |
23 | openfpm::vector<aggregate<int,float,size_t>,Memory,layout_base> v1; |
24 | v1.resize(vcl.getProcessUnitID()); |
25 | |
26 | for(size_t j = 0 ; j < vcl.getProcessUnitID() ; j++) |
27 | { |
28 | v1.template get<0>(j) = 5; |
29 | v1.template get<1>(j) = 10.0+1000.0; |
30 | v1.template get<2>(j) = 11.0+100000; |
31 | } |
32 | |
33 | openfpm::vector<aggregate<int,float,size_t>,Memory,layout_base> v2; |
34 | |
35 | vcl.SGather<decltype(v1),decltype(v2),layout_base>(v1,v2,(i%vcl.getProcessingUnits())); |
36 | |
37 | if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits())) |
38 | { |
39 | size_t n = vcl.getProcessingUnits(); |
40 | BOOST_REQUIRE_EQUAL(v2.size(),n*(n-1)/2); |
41 | |
42 | bool is_correct = true; |
43 | for (size_t i = 0 ; i < v2.size() ; i++) |
44 | { |
45 | is_correct &= (v2.template get<0>(i) == 5); |
46 | is_correct &= (v2.template get<1>(i) == 10.0+1000.0); |
47 | is_correct &= (v2.template get<2>(i) == 11.0+100000.0); |
48 | } |
49 | |
50 | BOOST_REQUIRE_EQUAL(is_correct,true); |
51 | } |
52 | if (vcl.getProcessUnitID() == 0 && i == 99) |
53 | std::cout << "Semantic gather test stop" << std::endl; |
54 | } |
55 | } |
56 | |
57 | template<typename Memory> |
58 | void test_ssend_recv_layout_switch(size_t opt) |
59 | { |
60 | auto & v_cl = create_vcluster<Memory>(); |
61 | |
62 | if (v_cl.size() > 10) {return;} |
63 | |
64 | openfpm::vector<openfpm::vector_gpu_single<aggregate<float,float[3]>>> vd; |
65 | openfpm::vector_gpu<aggregate<float,float[3]>> collect; |
66 | openfpm::vector_gpu<aggregate<float,float[3]>> collect2; |
67 | openfpm::vector<size_t> prc_send; |
68 | openfpm::vector<size_t> prc_recv; |
69 | openfpm::vector<size_t> sz_recv; |
70 | |
71 | vd.resize(v_cl.size()); |
72 | |
73 | for (size_t i = 0 ; i < vd.size() ; i++) |
74 | { |
75 | vd.get(i).resize(100); |
76 | |
77 | for (size_t j = 0 ; j < vd.get(i).size() ; j++) |
78 | { |
79 | vd.get(i).template get<0>(j) = 10000*i + v_cl.rank()*100 + j; |
80 | |
81 | vd.get(i).template get<1>(j)[0] = 400000 + 10000*i + v_cl.rank()*100 + j; |
82 | vd.get(i).template get<1>(j)[1] = 400000 + 10000*i + v_cl.rank()*100 + j; |
83 | vd.get(i).template get<1>(j)[2] = 400000 + 10000*i + v_cl.rank()*100 + j; |
84 | } |
85 | |
86 | prc_send.add(i); |
87 | |
88 | if (opt & MPI_GPU_DIRECT) |
89 | { |
90 | vd.get(i).template hostToDevice<0,1>(); |
91 | |
92 | // Reset host |
93 | |
94 | for (size_t j = 0 ; j < vd.get(i).size() ; j++) |
95 | { |
96 | vd.get(i).template get<0>(j) = 0.0; |
97 | |
98 | vd.get(i).template get<1>(j)[0] = 0.0; |
99 | vd.get(i).template get<1>(j)[1] = 0.0; |
100 | vd.get(i).template get<1>(j)[2] = 0.0; |
101 | } |
102 | } |
103 | } |
104 | |
105 | v_cl.template SSendRecv<openfpm::vector_gpu_single<aggregate<float,float[3]>>,decltype(collect),memory_traits_inte> |
106 | (vd,collect,prc_send, prc_recv,sz_recv,opt); |
107 | |
108 | v_cl.template SSendRecvP<openfpm::vector_gpu_single<aggregate<float,float[3]>>,decltype(collect),memory_traits_inte,0,1> |
109 | (vd,collect2,prc_send, prc_recv,sz_recv,opt); |
110 | |
111 | // collect must have 100 * v_cl.size() |
112 | |
113 | BOOST_REQUIRE_EQUAL(collect.size(),100*v_cl.size()); |
114 | BOOST_REQUIRE_EQUAL(collect2.size(),100*v_cl.size()); |
115 | |
116 | // we reset the host collected data if data must be on device |
117 | |
118 | if (opt & MPI_GPU_DIRECT) |
119 | { |
120 | for (size_t j = 0 ; j < collect.size() ; j++) |
121 | { |
122 | collect.template get<0>(j) = 0.0; |
123 | |
124 | collect.template get<1>(j)[0] = 0.0; |
125 | collect.template get<1>(j)[1] = 0.0; |
126 | collect.template get<1>(j)[2] = 0.0; |
127 | |
128 | collect2.template get<0>(j) = 0.0; |
129 | |
130 | collect2.template get<1>(j)[0] = 0.0; |
131 | collect2.template get<1>(j)[1] = 0.0; |
132 | collect2.template get<1>(j)[2] = 0.0; |
133 | } |
134 | } |
135 | |
136 | // from device to host |
137 | |
138 | if (opt & MPI_GPU_DIRECT) |
139 | { |
140 | collect.template deviceToHost<0,1>(); |
141 | collect2.template deviceToHost<0,1>(); |
142 | } |
143 | |
144 | // now we check what we received |
145 | |
146 | // check what we received |
147 | |
148 | bool match = true; |
149 | for (size_t i = 0 ; i < v_cl.size() ; i++) |
150 | { |
151 | for (size_t j = 0 ; j < 100 ; j++) |
152 | { |
153 | match &= collect.template get<0>(i*100 +j) == v_cl.rank()*10000 + i*100 + j; |
154 | |
155 | match &= collect.template get<1>(i*100 +j)[0] == 400000 + v_cl.rank()*10000 + i*100 + j; |
156 | match &= collect.template get<1>(i*100 +j)[1] == 400000 + v_cl.rank()*10000 + i*100 + j; |
157 | match &= collect.template get<1>(i*100 +j)[2] == 400000 + v_cl.rank()*10000 + i*100 + j; |
158 | |
159 | match &= collect2.template get<0>(i*100 +j) == v_cl.rank()*10000 + i*100 + j; |
160 | |
161 | match &= collect2.template get<1>(i*100 +j)[0] == 400000 + v_cl.rank()*10000 + i*100 + j; |
162 | match &= collect2.template get<1>(i*100 +j)[1] == 400000 + v_cl.rank()*10000 + i*100 + j; |
163 | match &= collect2.template get<1>(i*100 +j)[2] == 400000 + v_cl.rank()*10000 + i*100 + j; |
164 | } |
165 | |
166 | if (match == false){break;} |
167 | } |
168 | |
169 | BOOST_REQUIRE_EQUAL(match,true); |
170 | } |
171 | |
172 | #endif /* VCLUSTER_SEMANTIC_UNIT_TESTS_FUNCS_HPP_ */ |
173 | |