VCluster_semantic_unit_tests_funcs.hpp source code [openfpm/openfpm_vcluster/src/VCluster/cuda/VCluster_semantic_unit_tests_funcs.hpp]

1	/*
2	* VCluster_semantic_unit_tests_funcs.hpp
3	*
4	* Created on: Aug 18, 2018
5	* Author: i-bird
6	*/
7
8	#ifndef VCLUSTER_SEMANTIC_UNIT_TESTS_FUNCS_HPP_
9	#define VCLUSTER_SEMANTIC_UNIT_TESTS_FUNCS_HPP_
10
11	#include "VCluster/VCluster.hpp"
12
13	template<typename Memory, template<typename> class layout_base>
14	void test_different_layouts()
15	{
16	for (size_t i = `0` ; i < `100` ; i++)
17	{
18	Vcluster<> & vcl = create_vcluster();
19
20	if (vcl.getProcessingUnits() >= `32`)
21	return;
22
23	openfpm::vector<aggregate<int,float,size_t>,Memory,layout_base> v1;
24	v1.resize(vcl.getProcessUnitID());
25
26	for(size_t j = `0` ; j < vcl.getProcessUnitID() ; j++)
27	{
28	v1.template get<`0`>(j) = `5`;
29	v1.template get<`1`>(j) = `10.0`+`1000.0`;
30	v1.template get<`2`>(j) = `11.0`+`100000`;
31	}
32
33	openfpm::vector<aggregate<int,float,size_t>,Memory,layout_base> v2;
34
35	vcl.SGather<decltype(v1),decltype(v2),layout_base>(v1,v2,(i%vcl.getProcessingUnits()));
36
37	if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits()))
38	{
39	size_t n = vcl.getProcessingUnits();
40	BOOST_REQUIRE_EQUAL(v2.size(),n*(n-`1`)/`2`);
41
42	bool is_correct = true;
43	for (size_t i = `0` ; i < v2.size() ; i++)
44	{
45	is_correct &= (v2.template get<`0`>(i) == `5`);
46	is_correct &= (v2.template get<`1`>(i) == `10.0`+`1000.0`);
47	is_correct &= (v2.template get<`2`>(i) == `11.0`+`100000.0`);
48	}
49
50	BOOST_REQUIRE_EQUAL(is_correct,true);
51	}
52	if (vcl.getProcessUnitID() == `0` && i == `99`)
53	std::cout << "Semantic gather test stop" << std::endl;
54	}
55	}
56
57	template<typename Memory>
58	void test_ssend_recv_layout_switch(size_t opt)
59	{
60	auto & v_cl = create_vcluster<Memory>();
61
62	if (v_cl.size() > `10`) {return;}
63
64	openfpm::vector<openfpm::vector_gpu_single<aggregate<float,float[`3`]>>> vd;
65	openfpm::vector_gpu<aggregate<float,float[`3`]>> collect;
66	openfpm::vector_gpu<aggregate<float,float[`3`]>> collect2;
67	openfpm::vector<size_t> prc_send;
68	openfpm::vector<size_t> prc_recv;
69	openfpm::vector<size_t> sz_recv;
70
71	vd.resize(v_cl.size());
72
73	for (size_t i = `0` ; i < vd.size() ; i++)
74	{
75	vd.get(i).resize(`100`);
76
77	for (size_t j = `0` ; j < vd.get(i).size() ; j++)
78	{
79	vd.get(i).template get<`0`>(j) = `10000`i + v_cl.rank()`100` + j;
80
81	vd.get(i).template get<`1`>(j)[`0`] = `400000` + `10000`i + v_cl.rank()`100` + j;
82	vd.get(i).template get<`1`>(j)[`1`] = `400000` + `10000`i + v_cl.rank()`100` + j;
83	vd.get(i).template get<`1`>(j)[`2`] = `400000` + `10000`i + v_cl.rank()`100` + j;
84	}
85
86	prc_send.add(i);
87
88	if (opt & MPI_GPU_DIRECT)
89	{
90	vd.get(i).template hostToDevice<`0`,`1`>();
91
92	// Reset host
93
94	for (size_t j = `0` ; j < vd.get(i).size() ; j++)
95	{
96	vd.get(i).template get<`0`>(j) = `0.0`;
97
98	vd.get(i).template get<`1`>(j)[`0`] = `0.0`;
99	vd.get(i).template get<`1`>(j)[`1`] = `0.0`;
100	vd.get(i).template get<`1`>(j)[`2`] = `0.0`;
101	}
102	}
103	}
104
105	v_cl.template SSendRecv<openfpm::vector_gpu_single<aggregate<float,float[`3`]>>,decltype(collect),memory_traits_inte>
106	(vd,collect,prc_send, prc_recv,sz_recv,opt);
107
108	v_cl.template SSendRecvP<openfpm::vector_gpu_single<aggregate<float,float[`3`]>>,decltype(collect),memory_traits_inte,`0`,`1`>
109	(vd,collect2,prc_send, prc_recv,sz_recv,opt);
110
111	// collect must have 100 v_cl.size()*
112
113	BOOST_REQUIRE_EQUAL(collect.size(),`100`*v_cl.size());
114	BOOST_REQUIRE_EQUAL(collect2.size(),`100`*v_cl.size());
115
116	// we reset the host collected data if data must be on device
117
118	if (opt & MPI_GPU_DIRECT)
119	{
120	for (size_t j = `0` ; j < collect.size() ; j++)
121	{
122	collect.template get<`0`>(j) = `0.0`;
123
124	collect.template get<`1`>(j)[`0`] = `0.0`;
125	collect.template get<`1`>(j)[`1`] = `0.0`;
126	collect.template get<`1`>(j)[`2`] = `0.0`;
127
128	collect2.template get<`0`>(j) = `0.0`;
129
130	collect2.template get<`1`>(j)[`0`] = `0.0`;
131	collect2.template get<`1`>(j)[`1`] = `0.0`;
132	collect2.template get<`1`>(j)[`2`] = `0.0`;
133	}
134	}
135
136	// from device to host
137
138	if (opt & MPI_GPU_DIRECT)
139	{
140	collect.template deviceToHost<`0`,`1`>();
141	collect2.template deviceToHost<`0`,`1`>();
142	}
143
144	// now we check what we received
145
146	// check what we received
147
148	bool match = true;
149	for (size_t i = `0` ; i < v_cl.size() ; i++)
150	{
151	for (size_t j = `0` ; j < `100` ; j++)
152	{
153	match &= collect.template get<`0`>(i`100` +j) == v_cl.rank()`10000` + i*`100` + j;
154
155	match &= collect.template get<`1`>(i`100` +j)[`0`] == `400000` + v_cl.rank()`10000` + i*`100` + j;
156	match &= collect.template get<`1`>(i`100` +j)[`1`] == `400000` + v_cl.rank()`10000` + i*`100` + j;
157	match &= collect.template get<`1`>(i`100` +j)[`2`] == `400000` + v_cl.rank()`10000` + i*`100` + j;
158
159	match &= collect2.template get<`0`>(i`100` +j) == v_cl.rank()`10000` + i*`100` + j;
160
161	match &= collect2.template get<`1`>(i`100` +j)[`0`] == `400000` + v_cl.rank()`10000` + i*`100` + j;
162	match &= collect2.template get<`1`>(i`100` +j)[`1`] == `400000` + v_cl.rank()`10000` + i*`100` + j;
163	match &= collect2.template get<`1`>(i`100` +j)[`2`] == `400000` + v_cl.rank()`10000` + i*`100` + j;
164	}
165
166	if (match == false){break;}
167	}
168
169	BOOST_REQUIRE_EQUAL(match,true);
170	}
171
172	#endif /* VCLUSTER_SEMANTIC_UNIT_TESTS_FUNCS_HPP_ */
173

Browse the source code of openfpm/openfpm_vcluster/src/VCluster/cuda/VCluster_semantic_unit_tests_funcs.hpp