Directory: | src/ |
---|---|
File: | src/xt_redist_p2p.c |
Date: | 2024-11-08 09:02:52 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 205 | 216 | 94.9% |
Branches: | 78 | 108 | 72.2% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * @file xt_redist_p2p.c | ||
3 | * | ||
4 | * @copyright Copyright (C) 2016 Jörg Behrens <behrens@dkrz.de> | ||
5 | * Moritz Hanke <hanke@dkrz.de> | ||
6 | * Thomas Jahns <jahns@dkrz.de> | ||
7 | * | ||
8 | * @author Jörg Behrens <behrens@dkrz.de> | ||
9 | * Moritz Hanke <hanke@dkrz.de> | ||
10 | * Thomas Jahns <jahns@dkrz.de> | ||
11 | */ | ||
12 | /* | ||
13 | * Keywords: | ||
14 | * Maintainer: Jörg Behrens <behrens@dkrz.de> | ||
15 | * Moritz Hanke <hanke@dkrz.de> | ||
16 | * Thomas Jahns <jahns@dkrz.de> | ||
17 | * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/ | ||
18 | * | ||
19 | * Redistribution and use in source and binary forms, with or without | ||
20 | * modification, are permitted provided that the following conditions are | ||
21 | * met: | ||
22 | * | ||
23 | * Redistributions of source code must retain the above copyright notice, | ||
24 | * this list of conditions and the following disclaimer. | ||
25 | * | ||
26 | * Redistributions in binary form must reproduce the above copyright | ||
27 | * notice, this list of conditions and the following disclaimer in the | ||
28 | * documentation and/or other materials provided with the distribution. | ||
29 | * | ||
30 | * Neither the name of the DKRZ GmbH nor the names of its contributors | ||
31 | * may be used to endorse or promote products derived from this software | ||
32 | * without specific prior written permission. | ||
33 | * | ||
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | ||
35 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | ||
36 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
37 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | ||
38 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
39 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
40 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
41 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
42 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
43 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
44 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
45 | */ | ||
46 | #ifdef HAVE_CONFIG_H | ||
47 | #include <config.h> | ||
48 | #endif | ||
49 | |||
50 | #include <stdlib.h> | ||
51 | #include <stdio.h> | ||
52 | #include <assert.h> | ||
53 | |||
54 | #include <mpi.h> | ||
55 | |||
56 | #include "xt/xt_mpi.h" | ||
57 | #include "xt_mpi_internal.h" | ||
58 | #include "xt/xt_redist_p2p.h" | ||
59 | #include "xt_redist_internal.h" | ||
60 | #include "xt/xt_redist_single_array_base.h" | ||
61 | #include "xt/xt_xmap.h" | ||
62 | #include "xt/xt_idxlist.h" | ||
63 | #include "core/ppm_xfuncs.h" | ||
64 | #include "core/core.h" | ||
65 | #include "xt_config_internal.h" | ||
66 | |||
67 | #include "xt_arithmetic_util.h" | ||
68 | |||
69 | #include "xt_mpi_ddt_cache.h" | ||
70 | |||
71 | /* the following two functions fullfil the same purpose as | ||
72 | * xt_disp2ext and xt_disp2ext_count but work with an indirection */ | ||
73 | static size_t | ||
74 | 160 | xt_mdisp2ext_count(size_t disp_len, const int *disp, const int *pos) | |
75 | { | ||
76 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 160 times.
|
160 | if (!disp_len) return 0; |
77 | 160 | size_t i = 0; | |
78 | 160 | int cur_stride = 1, cur_size = 1; | |
79 | 160 | int last_disp = disp[pos[0]]; | |
80 |
2/2✓ Branch 0 taken 7128 times.
✓ Branch 1 taken 160 times.
|
7288 | for (size_t p = 1; p < disp_len; ++p) { |
81 | 7128 | int new_disp = disp[pos[p]]; | |
82 | 7128 | int new_stride = new_disp - last_disp; | |
83 |
2/2✓ Branch 0 taken 760 times.
✓ Branch 1 taken 6368 times.
|
7128 | if (cur_size == 1) { |
84 | 760 | cur_stride = new_stride; | |
85 | 760 | cur_size = 2; | |
86 |
2/2✓ Branch 0 taken 5063 times.
✓ Branch 1 taken 1305 times.
|
6368 | } else if (new_stride == cur_stride) { |
87 | // cur_size >= 2: | ||
88 | 5063 | cur_size++; | |
89 |
5/6✓ Branch 0 taken 926 times.
✓ Branch 1 taken 379 times.
✓ Branch 2 taken 926 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 230 times.
✓ Branch 5 taken 696 times.
|
1305 | } else if (cur_size > 2 || (cur_size == 2 && cur_stride == 1) ) { |
90 | // we accept small contiguous vectors (nstrides==2, stride==1) | ||
91 | 609 | i++; | |
92 | 609 | cur_stride = 1; | |
93 | 609 | cur_size = 1; | |
94 | } else { // cur_size == 2, next offset doesn't match current stride | ||
95 | // break up trivial vec: | ||
96 | 696 | i++; | |
97 | 696 | cur_size = 2; | |
98 | 696 | cur_stride = new_stride; | |
99 | } | ||
100 | 7128 | last_disp = new_disp; | |
101 | } | ||
102 | // tail cases: | ||
103 |
6/6✓ Branch 0 taken 76 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 67 times.
✓ Branch 3 taken 9 times.
✓ Branch 4 taken 40 times.
✓ Branch 5 taken 27 times.
|
160 | if (cur_size > 2 || (cur_size == 2 && cur_stride == 1)) { |
104 | 124 | i++; | |
105 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 9 times.
|
36 | } else if (cur_size == 2) { |
106 | 27 | i+=2; | |
107 | } else { // cur_size == 1 | ||
108 | 9 | i++; | |
109 | } | ||
110 | |||
111 | 160 | return i; | |
112 | } | ||
113 | |||
114 | static size_t | ||
115 | 160 | xt_mdisp2ext(size_t disp_len, const int *disp, const int *pos, | |
116 | struct Xt_offset_ext *restrict v) | ||
117 | { | ||
118 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 160 times.
|
160 | if (disp_len<1) return 0; |
119 | |||
120 | 160 | int cur_start = disp[pos[0]], cur_stride = 1, cur_size = 1; | |
121 | 160 | int last_disp = cur_start; | |
122 | 160 | size_t i = 0; | |
123 |
2/2✓ Branch 0 taken 7128 times.
✓ Branch 1 taken 160 times.
|
7288 | for (size_t p = 1; p < disp_len; ++p) { |
124 | 7128 | int new_disp = disp[pos[p]]; | |
125 | 7128 | int new_stride = new_disp - last_disp; | |
126 |
2/2✓ Branch 0 taken 760 times.
✓ Branch 1 taken 6368 times.
|
7128 | if (cur_size == 1) { |
127 | 760 | cur_stride = new_stride; | |
128 | 760 | cur_size = 2; | |
129 |
2/2✓ Branch 0 taken 5063 times.
✓ Branch 1 taken 1305 times.
|
6368 | } else if (new_stride == cur_stride) { |
130 | // cur_size >= 2: | ||
131 | 5063 | cur_size++; | |
132 |
5/6✓ Branch 0 taken 926 times.
✓ Branch 1 taken 379 times.
✓ Branch 2 taken 926 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 230 times.
✓ Branch 5 taken 696 times.
|
1305 | } else if (cur_size > 2 || (cur_size == 2 && cur_stride == 1) ) { |
133 | // we accept small contiguous vectors (nstrides==2, stride==1) | ||
134 | 609 | v[i] = (struct Xt_offset_ext){ .start = cur_start, .stride = cur_stride, | |
135 | .size = cur_size }; | ||
136 | 609 | i++; | |
137 | 609 | cur_start = new_disp; | |
138 | 609 | cur_stride = 1; | |
139 | 609 | cur_size = 1; | |
140 | } else { // cur_size == 2, next offset doesn't match current stride | ||
141 | // break up trivial vec: | ||
142 | 696 | v[i].start = cur_start; | |
143 | 696 | v[i].size = 1; | |
144 | 696 | v[i].stride = 1; | |
145 | 696 | i++; | |
146 | 696 | cur_start += cur_stride; | |
147 | 696 | cur_size = 2; | |
148 | 696 | cur_stride = new_stride; | |
149 | } | ||
150 | 7128 | last_disp = new_disp; | |
151 | } | ||
152 | // tail cases: | ||
153 |
6/6✓ Branch 0 taken 76 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 67 times.
✓ Branch 3 taken 9 times.
✓ Branch 4 taken 40 times.
✓ Branch 5 taken 27 times.
|
160 | if (cur_size > 2 || (cur_size == 2 && cur_stride == 1)) { |
154 | 124 | v[i] = (struct Xt_offset_ext){ .start = cur_start, .stride = cur_stride, | |
155 | .size = cur_size }; | ||
156 | 124 | i++; | |
157 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 9 times.
|
36 | } else if (cur_size == 2) { |
158 | 27 | v[i].start = cur_start; | |
159 | 27 | v[i].size = 1; | |
160 | 27 | v[i].stride = 1; | |
161 | 27 | i++; | |
162 | 27 | v[i].start = cur_start + cur_stride; | |
163 | 27 | v[i].size = 1; | |
164 | 27 | v[i].stride = 1; | |
165 | 27 | i++; | |
166 | } else { // cur_size == 1 | ||
167 | 9 | v[i].start = cur_start; | |
168 | 9 | v[i].size = 1; | |
169 | 9 | v[i].stride = 1; | |
170 | 9 | i++; | |
171 | } | ||
172 | |||
173 | 160 | return i; | |
174 | } | ||
175 | |||
176 | |||
177 | static MPI_Datatype | ||
178 | 4468 | generate_datatype(const int *transfer_pos, int num_transfer_pos, | |
179 | const int *offsets, | ||
180 | size_t *vsize, struct Xt_offset_ext **v, | ||
181 | struct Xt_mpi_strp_prs_params *params) | ||
182 | { | ||
183 | 4468 | struct Xt_offset_ext *v_ = *v; | |
184 | size_t vlen; | ||
185 |
2/2✓ Branch 0 taken 160 times.
✓ Branch 1 taken 4308 times.
|
4468 | if (offsets != NULL) { |
186 | 160 | vlen = xt_mdisp2ext_count((size_t)num_transfer_pos, offsets, transfer_pos); | |
187 |
2/2✓ Branch 0 taken 102 times.
✓ Branch 1 taken 58 times.
|
160 | if (vlen > *vsize) { |
188 | 102 | *v = v_ = xrealloc(v_, sizeof(*v_) * vlen); | |
189 | 102 | *vsize = vlen; | |
190 | } | ||
191 | 160 | xt_mdisp2ext((size_t)num_transfer_pos, offsets, transfer_pos, v_); | |
192 | } else { | ||
193 | 4308 | vlen = xt_disp2ext_count((size_t)num_transfer_pos, transfer_pos); | |
194 |
2/2✓ Branch 0 taken 1163 times.
✓ Branch 1 taken 3145 times.
|
4308 | if (vlen > *vsize) { |
195 | 1163 | *v = v_ = xrealloc(v_, sizeof(*v_) * vlen); | |
196 | 1163 | *vsize = vlen; | |
197 | } | ||
198 | 4308 | xt_disp2ext((size_t)num_transfer_pos, transfer_pos, v_); | |
199 | } | ||
200 | |||
201 | 4468 | MPI_Datatype type = xt_mpi_parse_stripe(vlen, v_, params); | |
202 | 4468 | return type; | |
203 | } | ||
204 | |||
205 | static void | ||
206 | 1178 | generate_msg_infos(int num_msgs, Xt_xmap_iter iter, const int *offsets, | |
207 | struct Xt_redist_msg *msgs, | ||
208 | struct Xt_mpi_strp_prs_params *params) { | ||
209 | |||
210 |
2/2✓ Branch 0 taken 1168 times.
✓ Branch 1 taken 10 times.
|
1178 | if (num_msgs > 0) { |
211 | 1168 | size_t vsize = 0; | |
212 | 1168 | struct Xt_offset_ext *v = NULL; | |
213 | 1168 | struct Xt_redist_msg *restrict curr_msg = msgs; | |
214 | do { | ||
215 | |||
216 | 4468 | const int *curr_transfer_pos = xt_xmap_iterator_get_transfer_pos(iter); | |
217 | 4468 | int curr_num_transfer_pos = xt_xmap_iterator_get_num_transfer_pos(iter); | |
218 | |||
219 | curr_msg->datatype | ||
220 | 4468 | = generate_datatype(curr_transfer_pos, curr_num_transfer_pos, | |
221 | offsets, &vsize, &v, params); | ||
222 | 4468 | curr_msg->rank = xt_xmap_iterator_get_rank(iter); | |
223 | |||
224 | 4468 | curr_msg++; | |
225 |
2/2✓ Branch 1 taken 3300 times.
✓ Branch 2 taken 1168 times.
|
4468 | } while (xt_xmap_iterator_next(iter)); |
226 | 1168 | free(v); | |
227 | } | ||
228 | 1178 | } | |
229 | |||
230 | Xt_redist | ||
231 | 32 | xt_redist_p2p_off_new(Xt_xmap xmap, const int *src_offsets, | |
232 | const int *dst_offsets, MPI_Datatype datatype) | ||
233 | { | ||
234 | 32 | return xt_redist_p2p_off_custom_new(xmap, src_offsets, dst_offsets, datatype, | |
235 | (Xt_config)&xt_default_config); | ||
236 | } | ||
237 | |||
238 | Xt_redist | ||
239 | 589 | xt_redist_p2p_off_custom_new(Xt_xmap xmap, const int *src_offsets, | |
240 | const int *dst_offsets, MPI_Datatype datatype, | ||
241 | Xt_config config) { | ||
242 | // ensure that yaxt is initialized | ||
243 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 589 times.
|
589 | assert(xt_initialized()); |
244 | |||
245 | 589 | int nsend = xt_xmap_get_num_destinations(xmap), | |
246 | 589 | nrecv = xt_xmap_get_num_sources(xmap); | |
247 | 589 | size_t nmsg = (size_t)nsend + (size_t)nrecv; | |
248 | 589 | struct Xt_redist_msg *msgs = xmalloc(nmsg * sizeof (*msgs)), | |
249 | 589 | *send_msgs = msgs, *recv_msgs = msgs + nsend; | |
250 | int tag_offset; | ||
251 | 589 | MPI_Comm comm | |
252 | 589 | = xt_mpi_comm_smart_dup(xt_xmap_get_communicator(xmap), &tag_offset); | |
253 | |||
254 | struct Xt_mpi_strp_prs_params params; | ||
255 | 589 | xt_init_mpi_strp_prs_params(¶ms, datatype, comm); | |
256 | 589 | Xt_xmap_iter dst_iter = xt_xmap_get_in_iterator(xmap); | |
257 | 589 | generate_msg_infos(nrecv, dst_iter, dst_offsets, recv_msgs, ¶ms); | |
258 |
2/2✓ Branch 0 taken 587 times.
✓ Branch 1 taken 2 times.
|
589 | if (dst_iter) xt_xmap_iterator_delete(dst_iter); |
259 | |||
260 | 589 | Xt_xmap_iter src_iter = xt_xmap_get_out_iterator(xmap); | |
261 | 589 | generate_msg_infos(nsend, src_iter, src_offsets, send_msgs, ¶ms); | |
262 |
2/2✓ Branch 0 taken 581 times.
✓ Branch 1 taken 8 times.
|
589 | if (src_iter) xt_xmap_iterator_delete(src_iter); |
263 | 589 | Xt_mpi_ddt_cache_check_retention(¶ms.ddt_list, nmsg, msgs); | |
264 | 589 | xt_destroy_mpi_strp_prs_params(¶ms); | |
265 | |||
266 | 589 | struct Xt_config_ config_ = *config; | |
267 | 589 | config_.flags |= exch_no_dt_dup; | |
268 | |||
269 | 589 | Xt_redist result = xt_redist_single_array_base_custom_new( | |
270 | nsend, nrecv, send_msgs, recv_msgs, comm, &config_); | ||
271 | |||
272 | 589 | free(msgs); | |
273 | 589 | xt_mpi_comm_smart_dedup(&comm, tag_offset); | |
274 | 589 | return result; | |
275 | } | ||
276 | |||
277 | /* ====================================================================== */ | ||
278 | |||
279 | static inline int | ||
280 | 6 | pos2disp(int pos, int num_ext, const int psum_ext_size[]) | |
281 | { | ||
282 | 6 | int j = 0; | |
283 | /* FIXME: use bsearch if linear search is too slow, i.e. num_ext >> 1000 */ | ||
284 | /* what extent covers the pos'th position? */ | ||
285 |
2/4✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.
|
6 | while (j < num_ext && pos >= psum_ext_size[j + 1]) |
286 | ✗ | ++j; | |
287 | 6 | return j; | |
288 | } | ||
289 | |||
290 | static inline int | ||
291 | 72 | pos2disp2(int pos, int num_ext, | |
292 | const int psum_ext_size[], int start_ext) | ||
293 | { | ||
294 | 72 | int j = start_ext; | |
295 |
2/4✓ Branch 0 taken 72 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 72 times.
✗ Branch 3 not taken.
|
72 | if (pos < psum_ext_size[j + 1] && pos >= psum_ext_size[j]) |
296 | ; | ||
297 | ✗ | else if (pos < psum_ext_size[j + 1]) | |
298 | { | ||
299 | ✗ | j = 0; | |
300 | ✗ | while (j < start_ext && pos >= psum_ext_size[j + 1]) | |
301 | ✗ | ++j; | |
302 | } | ||
303 | else | ||
304 | ✗ | while (j < num_ext && pos >= psum_ext_size[j + 1]) | |
305 | ✗ | ++j; | |
306 | 72 | return j; | |
307 | } | ||
308 | |||
309 | #define XT_EXT_TYPE struct Xt_offset_ext | ||
310 | #define XT_EXT_TAG ext | ||
311 | #define XT_MPI_PARSE_STRIPE xt_mpi_parse_stripe | ||
312 | #define XT_EXT_STRIDE_MASK isign_mask_current_pos_ext_size | ||
313 | #define XT_EXT_STRIDE_MASK_PREP | ||
314 | #include "xt_redist_p2p_ext.h" | ||
315 | #undef XT_EXT_TYPE | ||
316 | #undef XT_EXT_TAG | ||
317 | #undef XT_MPI_PARSE_STRIPE | ||
318 | #undef XT_EXT_STRIDE_MASK | ||
319 | #undef XT_EXT_STRIDE_MASK_PREP | ||
320 | |||
321 | #define XT_EXT_TYPE struct Xt_aoffset_ext | ||
322 | #define XT_EXT_TAG aext | ||
323 | #define XT_MPI_PARSE_STRIPE xt_mpi_parse_astripe | ||
324 | #define XT_EXT_STRIDE_MASK asign_mask_current_pos_ext_size | ||
325 | #define XT_EXT_STRIDE_MASK_PREP MPI_Aint asign_mask_current_pos_ext_size \ | ||
326 | = asign_mask(current_pos_ext.size) | ||
327 | #include "xt_redist_p2p_ext.h" | ||
328 | #undef XT_EXT_TYPE | ||
329 | #undef XT_EXT_TAG | ||
330 | #undef XT_MPI_PARSE_STRIPE | ||
331 | #undef XT_EXT_STRIDE_MASK | ||
332 | #undef XT_EXT_STRIDE_MASK_PREP | ||
333 | |||
334 | |||
335 | /* ====================================================================== */ | ||
336 | |||
337 | static inline void | ||
338 | 16 | aux_gen_simple_block_offsets(int block_offsets[], const int block_sizes[], | |
339 | size_t num_blocks) { | ||
340 | |||
341 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (num_blocks > 0) { |
342 | 16 | int accum = 0; | |
343 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.
|
48 | for (size_t i = 0; i < num_blocks; ++i) { |
344 | 32 | block_offsets[i] = accum; | |
345 | 32 | accum += block_sizes[i]; | |
346 | } | ||
347 | } | ||
348 | 16 | } | |
349 | |||
350 | static MPI_Datatype | ||
351 | 82 | generate_block_datatype(const int *transfer_pos, int num_transfer_pos, | |
352 | const int *block_offsets, const int *block_sizes, | ||
353 | struct Xt_mpi_strp_prs_params *params) { | ||
354 | |||
355 |
2/4✓ Branch 0 taken 82 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 82 times.
✗ Branch 3 not taken.
|
82 | assert(block_sizes && block_offsets); |
356 | |||
357 | 82 | int *bdispl_vec = xmalloc(2 * (size_t)num_transfer_pos * sizeof(*bdispl_vec)), | |
358 | 82 | *blen_vec = bdispl_vec + num_transfer_pos; | |
359 | |||
360 |
2/2✓ Branch 0 taken 3464 times.
✓ Branch 1 taken 82 times.
|
3546 | for (int i = 0; i < num_transfer_pos; ++i) { |
361 | 3464 | int j = transfer_pos[i]; | |
362 | 3464 | bdispl_vec[i] = block_offsets[j]; | |
363 | 3464 | blen_vec[i] = block_sizes[j]; | |
364 | } | ||
365 | |||
366 | MPI_Datatype type | ||
367 | 82 | = xt_mpi_ddt_block_gen(num_transfer_pos, bdispl_vec, blen_vec, params); | |
368 | |||
369 | 82 | free(bdispl_vec); | |
370 | |||
371 | 82 | return type; | |
372 | } | ||
373 | |||
374 | static void | ||
375 | 52 | generate_block_msg_infos(int num_msgs, Xt_xmap_iter iter, | |
376 | const int *block_offsets, | ||
377 | const int *block_sizes, int **aux_offsets, | ||
378 | size_t num_blocks, | ||
379 | struct Xt_mpi_strp_prs_params *params, | ||
380 | struct Xt_redist_msg *msgs) { | ||
381 | |||
382 |
1/2✓ Branch 0 taken 52 times.
✗ Branch 1 not taken.
|
52 | if (num_msgs > 0) { |
383 | |||
384 | const int *block_offsets_; | ||
385 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 16 times.
|
52 | if (block_offsets) |
386 | 36 | block_offsets_ = block_offsets; | |
387 | else { | ||
388 | 16 | block_offsets_ = *aux_offsets | |
389 | 16 | = xrealloc(*aux_offsets, num_blocks * sizeof(*block_offsets_)); | |
390 | 16 | aux_gen_simple_block_offsets(*aux_offsets, block_sizes, num_blocks); | |
391 | } | ||
392 | |||
393 | 52 | size_t ofs = 0; | |
394 | do { | ||
395 | 82 | const int *curr_transfer_pos = xt_xmap_iterator_get_transfer_pos(iter); | |
396 | 82 | int curr_num_transfer_pos = xt_xmap_iterator_get_num_transfer_pos(iter); | |
397 | 82 | msgs[ofs].datatype | |
398 | 82 | = generate_block_datatype(curr_transfer_pos, curr_num_transfer_pos, | |
399 | block_offsets_, block_sizes, params); | ||
400 | 82 | msgs[ofs].rank = xt_xmap_iterator_get_rank(iter); | |
401 | |||
402 | 82 | ofs++; | |
403 |
2/2✓ Branch 1 taken 30 times.
✓ Branch 2 taken 52 times.
|
82 | } while (xt_xmap_iterator_next(iter)); |
404 | |||
405 | } | ||
406 | 52 | } | |
407 | |||
408 | Xt_redist | ||
409 | 10 | xt_redist_p2p_blocks_off_new(Xt_xmap xmap, | |
410 | const int *src_block_offsets, | ||
411 | const int *src_block_sizes, | ||
412 | int src_block_num, | ||
413 | const int *dst_block_offsets, | ||
414 | const int *dst_block_sizes, | ||
415 | int dst_block_num, | ||
416 | MPI_Datatype datatype) | ||
417 | { | ||
418 | 10 | return xt_redist_p2p_blocks_off_custom_new( | |
419 | xmap, src_block_offsets, src_block_sizes, src_block_num, dst_block_offsets, | ||
420 | dst_block_sizes, dst_block_num, datatype, (Xt_config)&xt_default_config); | ||
421 | } | ||
422 | |||
423 | |||
424 | Xt_redist | ||
425 | 26 | xt_redist_p2p_blocks_off_custom_new(Xt_xmap xmap, | |
426 | const int *src_block_offsets, | ||
427 | const int *src_block_sizes, | ||
428 | int src_block_num, | ||
429 | const int *dst_block_offsets, | ||
430 | const int *dst_block_sizes, | ||
431 | int dst_block_num, | ||
432 | MPI_Datatype datatype, | ||
433 | Xt_config config) | ||
434 | { | ||
435 | // ensure that yaxt is initialized | ||
436 |
3/6✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 26 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 26 times.
✗ Branch 6 not taken.
|
26 | assert(xt_initialized() && src_block_sizes && dst_block_sizes); |
437 | |||
438 | int tag_offset; | ||
439 | 26 | MPI_Comm comm | |
440 | 26 | = xt_mpi_comm_smart_dup(xt_xmap_get_communicator(xmap), &tag_offset); | |
441 | |||
442 | |||
443 | 26 | int nsend = xt_xmap_get_num_destinations(xmap), | |
444 | 26 | nrecv = xt_xmap_get_num_sources(xmap); | |
445 | |||
446 | 26 | size_t nmsg = ((size_t)nsend + (size_t)nrecv); | |
447 | 26 | struct Xt_redist_msg *msgs = xmalloc(nmsg * sizeof (*msgs)); | |
448 | |||
449 | 26 | int *aux_offsets = NULL; | |
450 | |||
451 | 26 | Xt_xmap_iter dst_iter = xt_xmap_get_in_iterator(xmap), | |
452 | 26 | src_iter = xt_xmap_get_out_iterator(xmap); | |
453 | |||
454 | // dst part: | ||
455 | #ifndef NDEBUG | ||
456 | 26 | int max_dst_pos = xt_xmap_get_max_dst_pos(xmap); | |
457 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 26 times.
|
26 | if (dst_block_num < max_dst_pos) |
458 | ✗ | die("xt_redist_p2p_blocks_off_new: dst_block_num too small"); | |
459 | #endif | ||
460 | struct Xt_mpi_strp_prs_params params; | ||
461 | 26 | xt_init_mpi_strp_prs_params(¶ms, datatype, comm); | |
462 | 26 | generate_block_msg_infos(nrecv, dst_iter, dst_block_offsets, dst_block_sizes, | |
463 | &aux_offsets, (size_t)dst_block_num, | ||
464 | ¶ms, msgs); | ||
465 |
1/2✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
|
26 | if (dst_iter) xt_xmap_iterator_delete(dst_iter); |
466 | |||
467 | // src part: | ||
468 | #ifndef NDEBUG | ||
469 | 26 | int max_src_pos = xt_xmap_get_max_src_pos(xmap); | |
470 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 26 times.
|
26 | if (src_block_num < max_src_pos) |
471 | ✗ | die("xt_redist_p2p_blocks_off_new: src_block_num too small"); | |
472 | #endif | ||
473 | 26 | generate_block_msg_infos(nsend, src_iter, src_block_offsets, src_block_sizes, | |
474 | &aux_offsets, (size_t)src_block_num, | ||
475 | 26 | ¶ms, msgs+nrecv); | |
476 | 26 | xt_destroy_mpi_strp_prs_params(¶ms); | |
477 | 26 | free(aux_offsets); | |
478 | |||
479 |
1/2✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
|
26 | if (src_iter) xt_xmap_iterator_delete(src_iter); |
480 | |||
481 | 26 | struct Xt_config_ config_ = *config; | |
482 | 26 | config_.flags |= exch_no_dt_dup; | |
483 | |||
484 | Xt_redist result | ||
485 | 26 | = xt_redist_single_array_base_custom_new( | |
486 | 26 | nsend, nrecv, msgs+nrecv, msgs, comm, &config_); | |
487 | |||
488 | 26 | free(msgs); | |
489 | 26 | xt_mpi_comm_smart_dedup(&comm, tag_offset); | |
490 | 26 | return result; | |
491 | } | ||
492 | |||
493 | ✗ | Xt_redist xt_redist_p2p_blocks_new(Xt_xmap xmap, | |
494 | const int *src_block_sizes, | ||
495 | int src_block_num, | ||
496 | const int *dst_block_sizes, | ||
497 | int dst_block_num, | ||
498 | MPI_Datatype datatype) | ||
499 | { | ||
500 | ✗ | return xt_redist_p2p_blocks_custom_new( | |
501 | xmap, src_block_sizes, src_block_num, dst_block_sizes, dst_block_num, | ||
502 | datatype, (Xt_config)&xt_default_config); | ||
503 | } | ||
504 | |||
505 | Xt_redist | ||
506 | 8 | xt_redist_p2p_blocks_custom_new(Xt_xmap xmap, | |
507 | const int *src_block_sizes, int src_block_num, | ||
508 | const int *dst_block_sizes, int dst_block_num, | ||
509 | MPI_Datatype datatype, | ||
510 | Xt_config config) | ||
511 | { | ||
512 | 8 | return xt_redist_p2p_blocks_off_custom_new( | |
513 | xmap, NULL, src_block_sizes, src_block_num, | ||
514 | NULL, dst_block_sizes, dst_block_num, datatype, config); | ||
515 | } | ||
516 | |||
517 | |||
518 | 526 | Xt_redist xt_redist_p2p_new(Xt_xmap xmap, MPI_Datatype datatype) | |
519 | { | ||
520 | 526 | return xt_redist_p2p_custom_new(xmap, datatype, | |
521 | (Xt_config)&xt_default_config); | ||
522 | } | ||
523 | |||
524 | Xt_redist | ||
525 | 554 | xt_redist_p2p_custom_new(Xt_xmap xmap, MPI_Datatype datatype, Xt_config config) | |
526 | { | ||
527 | 554 | return xt_redist_p2p_off_custom_new(xmap, NULL, NULL, datatype, config); | |
528 | } | ||
529 | |||
530 | /* | ||
531 | * Local Variables: | ||
532 | * c-basic-offset: 2 | ||
533 | * coding: utf-8 | ||
534 | * indent-tabs-mode: nil | ||
535 | * show-trailing-whitespace: t | ||
536 | * require-trailing-newline: t | ||
537 | * End: | ||
538 | */ | ||
539 |