| 1 | /* | 
| 2 |  * VCluster_semantic_unit_tests_funcs.hpp | 
| 3 |  * | 
| 4 |  *  Created on: Aug 18, 2018 | 
| 5 |  *      Author: i-bird | 
| 6 |  */ | 
| 7 |  | 
| 8 | #ifndef VCLUSTER_SEMANTIC_UNIT_TESTS_FUNCS_HPP_ | 
| 9 | #define VCLUSTER_SEMANTIC_UNIT_TESTS_FUNCS_HPP_ | 
| 10 |  | 
| 11 | #include "VCluster/VCluster.hpp" | 
| 12 |  | 
| 13 | template<typename Memory, template<typename> class layout_base> | 
| 14 | void test_different_layouts() | 
| 15 | { | 
| 16 | 	for (size_t i = 0 ; i < 100 ; i++) | 
| 17 | 	{ | 
| 18 | 		Vcluster<> & vcl = create_vcluster(); | 
| 19 |  | 
| 20 | 		if (vcl.getProcessingUnits() >= 32) | 
| 21 | 			return; | 
| 22 |  | 
| 23 | 		openfpm::vector<aggregate<int,float,size_t>,Memory,layout_base> v1; | 
| 24 | 		v1.resize(vcl.getProcessUnitID()); | 
| 25 |  | 
| 26 | 		for(size_t j = 0 ; j < vcl.getProcessUnitID() ; j++) | 
| 27 | 		{ | 
| 28 | 			v1.template get<0>(j) = 5; | 
| 29 | 			v1.template get<1>(j) = 10.0+1000.0; | 
| 30 | 			v1.template get<2>(j) = 11.0+100000; | 
| 31 | 		} | 
| 32 |  | 
| 33 | 		openfpm::vector<aggregate<int,float,size_t>,Memory,layout_base> v2; | 
| 34 |  | 
| 35 | 		vcl.SGather<decltype(v1),decltype(v2),layout_base>(v1,v2,(i%vcl.getProcessingUnits())); | 
| 36 |  | 
| 37 | 		if (vcl.getProcessUnitID() == (i%vcl.getProcessingUnits())) | 
| 38 | 		{ | 
| 39 | 			size_t n = vcl.getProcessingUnits(); | 
| 40 | 			BOOST_REQUIRE_EQUAL(v2.size(),n*(n-1)/2); | 
| 41 |  | 
| 42 | 			bool is_correct = true; | 
| 43 | 			for (size_t i = 0 ; i < v2.size() ; i++) | 
| 44 | 			{ | 
| 45 | 				is_correct &= (v2.template get<0>(i) == 5); | 
| 46 | 				is_correct &= (v2.template get<1>(i) == 10.0+1000.0); | 
| 47 | 				is_correct &= (v2.template get<2>(i) == 11.0+100000.0); | 
| 48 | 			} | 
| 49 |  | 
| 50 | 			BOOST_REQUIRE_EQUAL(is_correct,true); | 
| 51 | 		} | 
| 52 | 		if (vcl.getProcessUnitID() == 0 && i == 99) | 
| 53 | 			std::cout << "Semantic gather test stop"  << std::endl; | 
| 54 | 	} | 
| 55 | } | 
| 56 |  | 
| 57 | template<typename Memory> | 
| 58 | void test_ssend_recv_layout_switch(size_t opt) | 
| 59 | { | 
| 60 | 	auto & v_cl = create_vcluster<Memory>(); | 
| 61 |  | 
| 62 | 	if (v_cl.size() > 10)	{return;} | 
| 63 |  | 
| 64 | 	openfpm::vector<openfpm::vector_gpu_single<aggregate<float,float[3]>>> vd; | 
| 65 | 	openfpm::vector_gpu<aggregate<float,float[3]>> collect; | 
| 66 | 	openfpm::vector_gpu<aggregate<float,float[3]>> collect2; | 
| 67 | 	openfpm::vector<size_t> prc_send; | 
| 68 |     openfpm::vector<size_t> prc_recv; | 
| 69 |     openfpm::vector<size_t> sz_recv; | 
| 70 |  | 
| 71 | 	vd.resize(v_cl.size()); | 
| 72 |  | 
| 73 | 	for (size_t i = 0 ; i < vd.size() ; i++) | 
| 74 | 	{ | 
| 75 | 		vd.get(i).resize(100); | 
| 76 |  | 
| 77 | 		for (size_t j = 0 ; j < vd.get(i).size() ; j++) | 
| 78 | 		{ | 
| 79 | 			vd.get(i).template get<0>(j) = 10000*i + v_cl.rank()*100 + j; | 
| 80 |  | 
| 81 | 			vd.get(i).template get<1>(j)[0] = 400000 + 10000*i + v_cl.rank()*100 + j; | 
| 82 | 			vd.get(i).template get<1>(j)[1] = 400000 + 10000*i + v_cl.rank()*100 + j; | 
| 83 | 			vd.get(i).template get<1>(j)[2] = 400000 + 10000*i + v_cl.rank()*100 + j; | 
| 84 | 		} | 
| 85 |  | 
| 86 | 		prc_send.add(i); | 
| 87 |  | 
| 88 | 		if (opt & MPI_GPU_DIRECT) | 
| 89 | 		{ | 
| 90 | 			vd.get(i).template hostToDevice<0,1>(); | 
| 91 |  | 
| 92 | 			// Reset host | 
| 93 |  | 
| 94 | 			for (size_t j = 0 ; j < vd.get(i).size() ; j++) | 
| 95 | 			{ | 
| 96 | 				vd.get(i).template get<0>(j) = 0.0; | 
| 97 |  | 
| 98 | 				vd.get(i).template get<1>(j)[0] = 0.0; | 
| 99 | 				vd.get(i).template get<1>(j)[1] = 0.0; | 
| 100 | 				vd.get(i).template get<1>(j)[2] = 0.0; | 
| 101 | 			} | 
| 102 | 		} | 
| 103 | 	} | 
| 104 |  | 
| 105 | 	v_cl.template SSendRecv<openfpm::vector_gpu_single<aggregate<float,float[3]>>,decltype(collect),memory_traits_inte> | 
| 106 | 	(vd,collect,prc_send, prc_recv,sz_recv,opt); | 
| 107 |  | 
| 108 | 	v_cl.template SSendRecvP<openfpm::vector_gpu_single<aggregate<float,float[3]>>,decltype(collect),memory_traits_inte,0,1> | 
| 109 | 	(vd,collect2,prc_send, prc_recv,sz_recv,opt); | 
| 110 |  | 
| 111 | 	// collect must have 100 * v_cl.size() | 
| 112 |  | 
| 113 | 	BOOST_REQUIRE_EQUAL(collect.size(),100*v_cl.size()); | 
| 114 | 	BOOST_REQUIRE_EQUAL(collect2.size(),100*v_cl.size()); | 
| 115 |  | 
| 116 | 	// we reset the host collected data if data must be on device | 
| 117 |  | 
| 118 | 	if (opt & MPI_GPU_DIRECT) | 
| 119 | 	{ | 
| 120 | 		for (size_t j = 0 ; j < collect.size() ; j++) | 
| 121 | 		{ | 
| 122 | 			collect.template get<0>(j) = 0.0; | 
| 123 |  | 
| 124 | 			collect.template get<1>(j)[0] = 0.0; | 
| 125 | 			collect.template get<1>(j)[1] = 0.0; | 
| 126 | 			collect.template get<1>(j)[2] = 0.0; | 
| 127 |  | 
| 128 | 			collect2.template get<0>(j) = 0.0; | 
| 129 |  | 
| 130 | 			collect2.template get<1>(j)[0] = 0.0; | 
| 131 | 			collect2.template get<1>(j)[1] = 0.0; | 
| 132 | 			collect2.template get<1>(j)[2] = 0.0; | 
| 133 | 		} | 
| 134 | 	} | 
| 135 |  | 
| 136 | 	// from device to host | 
| 137 |  | 
| 138 | 	if (opt & MPI_GPU_DIRECT) | 
| 139 | 	{ | 
| 140 | 		collect.template deviceToHost<0,1>(); | 
| 141 | 		collect2.template deviceToHost<0,1>(); | 
| 142 | 	} | 
| 143 |  | 
| 144 | 	// now we check what we received | 
| 145 |  | 
| 146 | 	// check what we received | 
| 147 |  | 
| 148 | 	bool match = true; | 
| 149 | 	for (size_t i = 0 ; i < v_cl.size() ; i++) | 
| 150 | 	{ | 
| 151 | 		for (size_t j = 0 ; j < 100 ; j++) | 
| 152 | 		{ | 
| 153 | 			match &= collect.template get<0>(i*100 +j) == v_cl.rank()*10000 + i*100 + j; | 
| 154 |  | 
| 155 | 			match &= collect.template get<1>(i*100 +j)[0] == 400000 + v_cl.rank()*10000 + i*100 + j; | 
| 156 | 			match &= collect.template get<1>(i*100 +j)[1] == 400000 + v_cl.rank()*10000 + i*100 + j; | 
| 157 | 			match &= collect.template get<1>(i*100 +j)[2] == 400000 + v_cl.rank()*10000 + i*100 + j; | 
| 158 |  | 
| 159 | 			match &= collect2.template get<0>(i*100 +j) == v_cl.rank()*10000 + i*100 + j; | 
| 160 |  | 
| 161 | 			match &= collect2.template get<1>(i*100 +j)[0] == 400000 + v_cl.rank()*10000 + i*100 + j; | 
| 162 | 			match &= collect2.template get<1>(i*100 +j)[1] == 400000 + v_cl.rank()*10000 + i*100 + j; | 
| 163 | 			match &= collect2.template get<1>(i*100 +j)[2] == 400000 + v_cl.rank()*10000 + i*100 + j; | 
| 164 | 		} | 
| 165 |  | 
| 166 | 		if (match == false){break;} | 
| 167 | 	} | 
| 168 |  | 
| 169 | 	BOOST_REQUIRE_EQUAL(match,true); | 
| 170 | } | 
| 171 |  | 
| 172 | #endif /* VCLUSTER_SEMANTIC_UNIT_TESTS_FUNCS_HPP_ */ | 
| 173 |  |