1 | /* |
2 | * DLB.hpp |
3 | * |
4 | * Created on: Nov 20, 2015 |
5 | * Author: Antonio Leo |
6 | */ |
7 | |
8 | #ifndef SRC_DECOMPOSITION_DLB_HPP_ |
9 | #define SRC_DECOMPOSITION_DLB_HPP_ |
10 | |
11 | //! Time structure for statistical purposes |
12 | struct Times |
13 | { |
14 | //! starting time of the simulation (0) |
15 | size_t simulationStartTime = 0; |
16 | |
17 | //! End iteration of the simulation |
18 | size_t simulationEndTime; |
19 | |
20 | //! integration time |
21 | double timeStep = 0.1; |
22 | |
23 | //! Interval between teo rebalance |
24 | |
25 | //! Start time |
26 | size_t iterationStartTime; |
27 | |
28 | //! End time |
29 | size_t iterationEndTime; |
30 | }; |
31 | |
32 | /*! Class that implements the two heuristics to determine when a re-balance of the distribution is needed. |
33 | * |
34 | * Used heuristics are: SAR and Un-balance Threshold (Default)\n |
35 | * |
36 | * To chose the heuristic use the method setHeuristic(Heuristic) |
37 | * |
38 | * In the SAR heuristic the following formula is applied:\n |
39 | * \f$W_{n} = \frac{\sum_{j=1}^{n} (T_{max}(j) - T_{avg}(j)) + C} {n}\f$ |
40 | * |
41 | * \f$T_{max}(j)\f$ – wall-clock time of bottleneck process in time step j\n |
42 | * \f$T_{avg}(j)\f$ – average wall-clock time for time step j over all processes\n |
43 | * \f$C\f$ – cost of re-decomposing the problem\n |
44 | * \f$n\f$ – number of time steps since last re-decomposition\n |
45 | * \n |
46 | * For small n, load balance is good and W decreases since C is amortized over an increasing number of time steps. |
47 | * As the accumulated idle time starts to dominate, W starts to rise. At this point, C has been fully amortized. |
48 | * Re-decompose when \f$W_{n} > W_{n-1}\f$\n |
49 | * |
50 | * In the Un-balance Threshold heuristic the re-balance is triggered when the un-balance level exceeds a certain level. |
51 | * Levels can be chosen in the ThresholdLevel type. |
52 | */ |
53 | class DLB |
54 | { |
55 | public: |
56 | |
57 | //! Type of DLB heuristics |
58 | enum Heuristic |
59 | { |
60 | SAR_HEURISTIC, UNBALANCE_THRLD |
61 | }; |
62 | |
63 | //! Level of un-balance needed to trigger the re-balance |
64 | enum ThresholdLevel |
65 | { |
66 | THRLD_LOW = 5, THRLD_MEDIUM = 7, THRLD_HIGH = 10 |
67 | }; |
68 | |
69 | private: |
70 | |
71 | //! Runtime virtual cluster machine |
72 | Vcluster<> & v_cl; |
73 | |
74 | //! Structure that will contain all the timings |
75 | Times timeInfo; |
76 | |
77 | //! Wn for SAR heuristic |
78 | float w_n = -1; |
79 | |
80 | //! Computation cost for SAR heuristic |
81 | float c_c = 5; |
82 | |
83 | //! Number of time-steps since the previous DLB |
84 | size_t n_ts = 1; |
85 | |
86 | //! Idle time accumulated so far, needed for SAR heuristic |
87 | float i_time = 0; |
88 | |
89 | //! Vector to collect all timings |
90 | openfpm::vector<long> times; |
91 | |
92 | //! Type of the heuristic to use |
93 | Heuristic heuristic = UNBALANCE_THRLD; |
94 | |
95 | //! Un-balance value |
96 | float unbalance = -1; |
97 | |
98 | //! Threshold value |
99 | ThresholdLevel thl = THRLD_MEDIUM; |
100 | |
101 | /*! \brief Function that gather times informations and decides if a rebalance is needed it uses the SAR heuristic |
102 | * |
103 | * \return true if re-balance is needed |
104 | * |
105 | */ |
106 | inline bool SAR() |
107 | { |
108 | long t = timeInfo.iterationEndTime - timeInfo.iterationStartTime; |
109 | float t_max = t, t_avg = t; |
110 | |
111 | // Exchange time informations through processors |
112 | v_cl.max(t_max); |
113 | v_cl.sum(t_avg); |
114 | v_cl.execute(); |
115 | |
116 | t_avg /= v_cl.getProcessingUnits(); |
117 | |
118 | // add idle time to vector |
119 | i_time += t_max - t_avg; |
120 | |
121 | // Compute Wn |
122 | float nw_n = (i_time + c_c) / n_ts; |
123 | |
124 | if (w_n == -1) |
125 | w_n = nw_n; |
126 | |
127 | if (nw_n > w_n) |
128 | { |
129 | i_time = 0; |
130 | n_ts = 1; |
131 | w_n = nw_n; |
132 | return true; |
133 | } |
134 | else |
135 | { |
136 | ++n_ts; |
137 | w_n = nw_n; |
138 | return false; |
139 | } |
140 | } |
141 | |
142 | /*! \brief Check if the un-balance has exceeded the threshold |
143 | * |
144 | * \return true if re-balance is needed, false otherwise |
145 | */ |
146 | bool unbalanceThreshold() |
147 | { |
148 | if (unbalance == -1) |
149 | { |
150 | std::cerr << "Error: Un-balance value must be set before checking DLB." ; |
151 | return false; |
152 | } |
153 | |
154 | if (unbalance > thl) |
155 | { |
156 | return true; |
157 | } |
158 | |
159 | return false; |
160 | } |
161 | |
162 | public: |
163 | |
164 | /*! \brief Constructor for DLB class |
165 | * |
166 | * \param v_cl virtual cluster object |
167 | */ |
168 | DLB(Vcluster<> & v_cl) : |
169 | v_cl(v_cl) |
170 | { |
171 | } |
172 | |
173 | /*! \brief Set the heuristic to use (default: un-balance threshold) |
174 | * |
175 | * \param h |
176 | */ |
177 | void setHeurisitc(Heuristic h) |
178 | { |
179 | heuristic = h; |
180 | } |
181 | |
182 | /*! \brief Get the heuristic |
183 | * |
184 | * Indicate which heuristic model is used to calculate when a rebalance |
185 | * is needed |
186 | * |
187 | * \return the Heuristic used by DLB |
188 | * |
189 | */ |
190 | Heuristic getHeurisitc() |
191 | { |
192 | return heuristic; |
193 | } |
194 | |
195 | /*! \brief check if a re-balance is needed using the selected heuristic |
196 | * |
197 | * \return true if the rebalance is needed |
198 | * |
199 | */ |
200 | bool rebalanceNeeded() |
201 | { |
202 | if (heuristic == SAR_HEURISTIC) |
203 | { |
204 | return SAR(); |
205 | } |
206 | else |
207 | { |
208 | return unbalanceThreshold(); |
209 | } |
210 | } |
211 | |
212 | /*! \brief Set start time for the simulation |
213 | * |
214 | * \param t time when the whole simulation starts |
215 | */ |
216 | void setSimulationStartTime(size_t t) |
217 | { |
218 | timeInfo.simulationStartTime = t; |
219 | } |
220 | |
221 | /*! \brief Get start time for the simulation |
222 | * |
223 | * \return the start point of the simulation |
224 | * |
225 | */ |
226 | size_t getSimulationStartTime() |
227 | { |
228 | return timeInfo.simulationStartTime; |
229 | } |
230 | |
231 | /*! \brief Set end time for the simulation |
232 | * |
233 | * \param t time when the whole simulation ends |
234 | */ |
235 | void setSimulationEndTime(size_t t) |
236 | { |
237 | timeInfo.simulationEndTime = t; |
238 | } |
239 | |
240 | /*! \brief Get end time for the simulation |
241 | * |
242 | * \return the end time of the simulation |
243 | * |
244 | */ |
245 | size_t getSimulationEndTime() |
246 | { |
247 | return timeInfo.simulationEndTime; |
248 | } |
249 | |
250 | /*! \brief Set start time for the single iteration |
251 | * |
252 | */ |
253 | void startIteration() |
254 | { |
255 | timeInfo.iterationStartTime = clock(); |
256 | } |
257 | |
258 | /*! \brief Set start time for the single iteration |
259 | * |
260 | * \param t time when the one iteration starts |
261 | */ |
262 | void startIteration(size_t t) |
263 | { |
264 | timeInfo.iterationStartTime = t; |
265 | } |
266 | |
267 | /*! \brief Set end time for the single iteration |
268 | * |
269 | * \param time when one iteration is completed |
270 | * |
271 | */ |
272 | void endIteration() |
273 | { |
274 | timeInfo.iterationEndTime = clock(); |
275 | } |
276 | |
277 | /*! \brief Set the end time when the previous rebalance has been performed |
278 | * |
279 | * \param t time when one iteration ends |
280 | */ |
281 | void endIteration(size_t t) |
282 | { |
283 | timeInfo.iterationEndTime = t; |
284 | } |
285 | |
286 | /*! \brief Set delta time step for one iteration (Computation time) |
287 | * |
288 | * \param t timestep |
289 | */ |
290 | void setTimeStep(double t) |
291 | { |
292 | timeInfo.timeStep = t; |
293 | } |
294 | |
295 | /*! \brief Set time step for the single iteration |
296 | * |
297 | * \param computation value of the computation cost (default: 5) |
298 | */ |
299 | void setComputationCost(size_t computation) |
300 | { |
301 | c_c = computation; |
302 | } |
303 | |
304 | /*! \brief Get how many time-steps have passed since the last re-balancing |
305 | * |
306 | * \return number of timesteos |
307 | * |
308 | */ |
309 | size_t getNTimeStepSinceDLB() |
310 | { |
311 | return n_ts; |
312 | } |
313 | |
314 | /*! \brief Set un-balance value |
315 | * |
316 | * \param u unbalance |
317 | */ |
318 | void setUnbalance(float u) |
319 | { |
320 | unbalance = u; |
321 | } |
322 | |
323 | /*! \brief threshold of umbalance to start a rebalance |
324 | * |
325 | * \param t threshold level |
326 | */ |
327 | void setThresholdLevel(ThresholdLevel t) |
328 | { |
329 | thl = t; |
330 | } |
331 | |
332 | }; |
333 | |
334 | #endif /* SRC_DECOMPOSITION_DLB_HPP_ */ |
335 | |