Directory: | src/ |
---|---|
File: | src/xt_xmap_dist_dir_common.c |
Date: | 2024-11-08 09:02:52 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 125 | 126 | 99.2% |
Branches: | 41 | 52 | 78.8% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * @file xt_xmap_dist_dir_common.c | ||
3 | * | ||
4 | * @brief Implementation of utility functions for creation of | ||
5 | * distributed directories. | ||
6 | * | ||
7 | * @copyright Copyright (C) 2016 Jörg Behrens <behrens@dkrz.de> | ||
8 | * Moritz Hanke <hanke@dkrz.de> | ||
9 | * Thomas Jahns <jahns@dkrz.de> | ||
10 | * | ||
11 | * @author Jörg Behrens <behrens@dkrz.de> | ||
12 | * Moritz Hanke <hanke@dkrz.de> | ||
13 | * Thomas Jahns <jahns@dkrz.de> | ||
14 | */ | ||
15 | /* | ||
16 | * Keywords: | ||
17 | * Maintainer: Jörg Behrens <behrens@dkrz.de> | ||
18 | * Moritz Hanke <hanke@dkrz.de> | ||
19 | * Thomas Jahns <jahns@dkrz.de> | ||
20 | * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/ | ||
21 | * | ||
22 | * Redistribution and use in source and binary forms, with or without | ||
23 | * modification, are permitted provided that the following conditions are | ||
24 | * met: | ||
25 | * | ||
26 | * Redistributions of source code must retain the above copyright notice, | ||
27 | * this list of conditions and the following disclaimer. | ||
28 | * | ||
29 | * Redistributions in binary form must reproduce the above copyright | ||
30 | * notice, this list of conditions and the following disclaimer in the | ||
31 | * documentation and/or other materials provided with the distribution. | ||
32 | * | ||
33 | * Neither the name of the DKRZ GmbH nor the names of its contributors | ||
34 | * may be used to endorse or promote products derived from this software | ||
35 | * without specific prior written permission. | ||
36 | * | ||
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | ||
38 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | ||
39 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
40 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | ||
41 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
42 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
43 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
44 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
45 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
46 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
47 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
48 | */ | ||
49 | #ifdef HAVE_CONFIG_H | ||
50 | #include <config.h> | ||
51 | #endif | ||
52 | |||
53 | #include <string.h> | ||
54 | #include <assert.h> | ||
55 | |||
56 | #include "core/ppm_xfuncs.h" | ||
57 | #include "xt/xt_idxstripes.h" | ||
58 | #include "xt/xt_mpi.h" | ||
59 | #include "xt/xt_xmap_dist_dir.h" | ||
60 | #include "xt/xt_xmap_dist_dir_intercomm.h" | ||
61 | #include "xt_arithmetic_util.h" | ||
62 | #include "xt_xmap_dist_dir_common.h" | ||
63 | #include "ensure_array_size.h" | ||
64 | #include "xt_idxlist_internal.h" | ||
65 | #include "xt_idxstripes_internal.h" | ||
66 | #include "xt_config_internal.h" | ||
67 | |||
68 | static void | ||
69 | 2128 | xt_xmdd_free_dist_dir(struct dist_dir *dist_dir) { | |
70 | 2128 | size_t num_entries | |
71 | 2128 | = dist_dir->num_entries > 0 | |
72 | 2128 | ? (size_t)dist_dir->num_entries : (size_t)0; | |
73 | 2128 | struct Xt_com_list *entries = dist_dir->entries; | |
74 |
2/2✓ Branch 0 taken 4218 times.
✓ Branch 1 taken 2128 times.
|
6346 | for (size_t i = 0; i < num_entries; ++i) |
75 | 4218 | xt_idxlist_delete(entries[i].list); | |
76 | 2128 | free(dist_dir); | |
77 | 2128 | } | |
78 | |||
79 | 1064 | void xt_xmdd_free_dist_dirs(struct dist_dir_pair dist_dirs) { | |
80 | 1064 | xt_xmdd_free_dist_dir(dist_dirs.src); | |
81 | 1064 | xt_xmdd_free_dist_dir(dist_dirs.dst); | |
82 | 1064 | } | |
83 | |||
84 | |||
85 | struct Xt_xmdd_txstat | ||
86 | 912 | xt_xmap_dist_dir_send_intersections( | |
87 | void *restrict send_buffer, | ||
88 | size_t send_size_asize, size_t send_size_entry, | ||
89 | int tag, MPI_Comm comm, int rank_lim, | ||
90 | MPI_Request *restrict requests, | ||
91 | 912 | const int send_size[rank_lim][send_size_asize]) | |
92 | 912 | { | |
93 | 912 | size_t offset = 0; | |
94 | 912 | size_t reqOfs = 0; | |
95 | |||
96 | // pack the intersections into the send buffer | ||
97 |
2/2✓ Branch 0 taken 4560 times.
✓ Branch 1 taken 912 times.
|
5472 | for (size_t rank = 0; rank < (size_t)rank_lim; ++rank) |
98 |
2/2✓ Branch 0 taken 1820 times.
✓ Branch 1 taken 2740 times.
|
4560 | if (send_size[rank][send_size_entry] > 0) { |
99 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1820 times.
|
1820 | xt_mpi_call(MPI_Isend((char *)send_buffer + offset, |
100 | send_size[rank][send_size_entry], | ||
101 | MPI_PACKED, (int)rank, tag, | ||
102 | comm, requests + reqOfs), | ||
103 | comm); | ||
104 | 1820 | ++reqOfs; | |
105 | 1820 | offset += (size_t)send_size[rank][send_size_entry]; | |
106 | } | ||
107 | 912 | return (struct Xt_xmdd_txstat){ .bytes = offset, .num_msg = reqOfs }; | |
108 | } | ||
109 | |||
110 | size_t | ||
111 | 532 | xt_xmap_dist_dir_match_src_dst(const struct dist_dir *src_dist_dir, | |
112 | const struct dist_dir *dst_dist_dir, | ||
113 | struct isect **src_dst_intersections, | ||
114 | Xt_config config) | ||
115 | { | ||
116 | 532 | struct isect *src_dst_intersections_ = *src_dst_intersections | |
117 | 532 | = xmalloc((size_t)src_dist_dir->num_entries | |
118 | * (size_t)dst_dist_dir->num_entries | ||
119 | * sizeof(**src_dst_intersections)); | ||
120 | 532 | size_t isect_fill = 0; | |
121 | 532 | const struct Xt_com_list *restrict entries_src = src_dist_dir->entries, | |
122 | 532 | *restrict entries_dst = dst_dist_dir->entries; | |
123 | 532 | size_t num_entries_src = (size_t)src_dist_dir->num_entries, | |
124 | 532 | num_entries_dst = (size_t)dst_dist_dir->num_entries; | |
125 |
2/2✓ Branch 0 taken 528 times.
✓ Branch 1 taken 532 times.
|
1060 | for (size_t i = 0; i < num_entries_src; ++i) |
126 |
2/2✓ Branch 0 taken 1274 times.
✓ Branch 1 taken 528 times.
|
1802 | for (size_t j = 0; j < num_entries_dst; ++j) |
127 | { | ||
128 | Xt_idxlist intersection | ||
129 | 1274 | = xt_idxlist_get_intersection_custom( | |
130 | 1274 | entries_src[i].list, entries_dst[j].list, config); | |
131 |
1/2✓ Branch 0 taken 1274 times.
✗ Branch 1 not taken.
|
1274 | if (xt_idxlist_get_num_indices(intersection) > 0) { |
132 | 1274 | src_dst_intersections_[isect_fill] | |
133 | 1274 | = (struct isect){ | |
134 | 1274 | .rank = { [xt_xmdd_direction_src]=entries_src[i].rank, | |
135 | 1274 | [xt_xmdd_direction_dst]=entries_dst[j].rank}, | |
136 | .idxlist = intersection }; | ||
137 | 1274 | ++isect_fill; | |
138 | } else | ||
139 | ✗ | xt_idxlist_delete(intersection); | |
140 | } | ||
141 | *src_dst_intersections | ||
142 | 532 | = xrealloc(src_dst_intersections_, | |
143 | isect_fill * sizeof (*src_dst_intersections_)); | ||
144 | 532 | return isect_fill; | |
145 | } | ||
146 | |||
147 | #if __GNUC__ == 11 && __GNUC_MINOR__ <= 2 | ||
148 | /* gcc 11.1 has a bug in the -fsanitize=undefined functionality | ||
149 | * which creates a bogus warning without the below suppression, bug | ||
150 | * report at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101585 */ | ||
151 | #pragma GCC diagnostic push | ||
152 | #pragma GCC diagnostic ignored "-Wvla-parameter" | ||
153 | #endif | ||
154 | |||
155 | size_t | ||
156 | 1064 | xt_xmap_dist_dir_pack_intersections( | |
157 | enum xt_xmdd_direction target, | ||
158 | size_t num_intersections, | ||
159 | const struct isect *restrict src_dst_intersections, | ||
160 | bool isect_idxlist_delete, | ||
161 | size_t send_size_asize, size_t send_size_idx, | ||
162 | 1064 | int (*send_size)[send_size_asize], | |
163 | unsigned char *buffer, size_t buf_size, size_t *ofs, MPI_Comm comm) | ||
164 | { | ||
165 | 1064 | int prev_send_rank = -1; | |
166 | 1064 | size_t num_send_indices_requests = 0; | |
167 | 1064 | size_t origin = 1 ^ target, ofs_ = *ofs; | |
168 | 1064 | int position = 0; | |
169 |
2/2✓ Branch 0 taken 2548 times.
✓ Branch 1 taken 1064 times.
|
3612 | for (size_t i = 0; i < num_intersections; ++i) |
170 | { | ||
171 | /* see if this generates a new request? */ | ||
172 | 2548 | int send_rank = src_dst_intersections[i].rank[target]; | |
173 | 2548 | num_send_indices_requests += send_rank != prev_send_rank; | |
174 | |||
175 | // pack rank | ||
176 | 2548 | XT_MPI_SEND_BUF_CONST int *prank | |
177 | 2548 | = CAST_MPI_SEND_BUF(src_dst_intersections[i].rank + origin); | |
178 |
4/4✓ Branch 0 taken 1794 times.
✓ Branch 1 taken 754 times.
✓ Branch 2 taken 746 times.
✓ Branch 3 taken 1048 times.
|
2548 | if (send_rank != prev_send_rank && prev_send_rank != -1) { |
179 | 746 | send_size[prev_send_rank][send_size_idx] = position; | |
180 | 746 | ofs_ += (size_t)position; | |
181 | 746 | position = 0; | |
182 | } | ||
183 | 2548 | prev_send_rank = send_rank; | |
184 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2548 times.
|
2548 | xt_mpi_call(MPI_Pack(prank, 1, MPI_INT, buffer+ofs_, (int)(buf_size-ofs_), |
185 | &position, comm), comm); | ||
186 | // pack intersection | ||
187 | 2548 | xt_idxlist_pack(src_dst_intersections[i].idxlist, buffer+ofs_, | |
188 | 2548 | (int)(buf_size-ofs_), &position, comm); | |
189 | |||
190 |
2/2✓ Branch 0 taken 1274 times.
✓ Branch 1 taken 1274 times.
|
2548 | if (isect_idxlist_delete) |
191 | 1274 | xt_idxlist_delete(src_dst_intersections[i].idxlist); | |
192 | } | ||
193 |
2/2✓ Branch 0 taken 1048 times.
✓ Branch 1 taken 16 times.
|
1064 | if (prev_send_rank != -1) |
194 | 1048 | send_size[prev_send_rank][send_size_idx] = position; | |
195 | |||
196 | 1064 | *ofs = ofs_ + (size_t)position; | |
197 | 1064 | return num_send_indices_requests; | |
198 | } | ||
199 | |||
200 | #if __GNUC__ == 11 && __GNUC_MINOR__ <= 2 | ||
201 | #pragma GCC diagnostic pop | ||
202 | #endif | ||
203 | |||
204 | |||
205 | static int | ||
206 | 349 | stripe_cmp(const void *a, const void *b) | |
207 | { | ||
208 | typedef const struct Xt_stripe *csx; | ||
209 | 349 | return (((csx)a)->start > ((csx)b)->start) | |
210 | 349 | - (((csx)b)->start > ((csx)a)->start); | |
211 | } | ||
212 | |||
213 | /* | ||
214 | * @param dist_dir_results contains the intersections of this ranks | ||
215 | * dst or src idxlist with other ranks in bucket-sized chunks, the | ||
216 | * chunks belonging to the same communication partner are merged | ||
217 | * in-place here, i.e. on return (*dist_dir_results)->num_entries is | ||
218 | * less than or equal to the previous count and *dist_dir_results | ||
219 | * might point somewhere else | ||
220 | */ | ||
221 | void | ||
222 | 1064 | xt_xmap_dist_dir_same_rank_merge(struct dist_dir **dist_dir_results) { | |
223 | |||
224 | 1064 | struct Xt_com_list *restrict entries = (*dist_dir_results)->entries; | |
225 | 1064 | size_t num_isect_agg = 0; | |
226 | |||
227 | 1064 | size_t i = 0, num_shards = (size_t)(*dist_dir_results)->num_entries; | |
228 |
2/2✓ Branch 0 taken 2424 times.
✓ Branch 1 taken 1064 times.
|
3488 | while (i < num_shards) { |
229 | 2424 | int rank = entries[i].rank; | |
230 | 2424 | size_t j = i; | |
231 | 2424 | size_t num_stripes = 0; | |
232 | /* find all entries matching the currently considered rank and | ||
233 | * count their stripes */ | ||
234 | do { | ||
235 | num_stripes | ||
236 | 2548 | += (size_t)(xt_idxlist_get_num_index_stripes(entries[j].list)); | |
237 | 2548 | ++j; | |
238 |
4/4✓ Branch 0 taken 1608 times.
✓ Branch 1 taken 940 times.
✓ Branch 2 taken 124 times.
✓ Branch 3 taken 1484 times.
|
2548 | } while (j < num_shards && entries[j].rank == rank); |
239 | 2424 | struct Xt_stripes_alloc stripes_alloc = xt_idxstripes_alloc(num_stripes); | |
240 | 2424 | struct Xt_stripe *restrict stripes = stripes_alloc.stripes; | |
241 | 2424 | size_t stripe_ofs = 0; | |
242 |
2/2✓ Branch 0 taken 2548 times.
✓ Branch 1 taken 2424 times.
|
4972 | for (; i < j; ++i) { |
243 | 2548 | size_t num_stripes_of_intersection | |
244 | 2548 | = (size_t)(xt_idxlist_get_num_index_stripes(entries[i].list)); | |
245 | 2548 | xt_idxlist_get_index_stripes_keep_buf(entries[i].list, | |
246 | 2548 | stripes+stripe_ofs, | |
247 | num_stripes-stripe_ofs); | ||
248 | 2548 | xt_idxlist_delete(entries[i].list); | |
249 | 2548 | stripe_ofs += num_stripes_of_intersection; | |
250 | } | ||
251 | 2424 | qsort(stripes, num_stripes, sizeof (*stripes), stripe_cmp); | |
252 | 2424 | entries[num_isect_agg].list = xt_idxstripes_congeal(stripes_alloc); | |
253 | 2424 | entries[num_isect_agg].rank = rank; | |
254 | 2424 | ++num_isect_agg; | |
255 | } | ||
256 | 1064 | (*dist_dir_results)->num_entries = (int)num_isect_agg; | |
257 | 1064 | *dist_dir_results = xrealloc(*dist_dir_results, sizeof (struct dist_dir) | |
258 | + (size_t)num_isect_agg | ||
259 | * sizeof(struct Xt_com_list)); | ||
260 | 1064 | } | |
261 | |||
262 | |||
263 | int | ||
264 | 958 | xt_xmdd_cmp_isect_src_rank(const void *a_, const void *b_) | |
265 | { | ||
266 | 958 | const struct isect *a = a_, *b = b_; | |
267 | /* this is safe vs. overflow because ranks are in [0..MAX_INT) */ | ||
268 | 958 | return a->rank[xt_xmdd_direction_src] - b->rank[xt_xmdd_direction_src]; | |
269 | } | ||
270 | |||
271 | int | ||
272 | 1150 | xt_xmdd_cmp_isect_dst_rank(const void *a_, const void *b_) | |
273 | { | ||
274 | 1150 | const struct isect *a = a_, *b = b_; | |
275 | /* this is safe vs. overflow because ranks are in [0..MAX_INT) */ | ||
276 | 1150 | return a->rank[xt_xmdd_direction_dst] - b->rank[xt_xmdd_direction_dst]; | |
277 | } | ||
278 | |||
279 | int | ||
280 | 2416 | xt_com_list_rank_cmp(const void *a_, const void *b_) | |
281 | { | ||
282 | 2416 | const struct Xt_com_list *a = a_, *b = b_; | |
283 | /* this is overflow-safe because rank's are non-negative ints */ | ||
284 | 2416 | return a->rank - b->rank; | |
285 | } | ||
286 | |||
287 | Xt_xmap | ||
288 | 380 | xt_xmap_dist_dir_new(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, | |
289 | MPI_Comm comm) | ||
290 | { | ||
291 | 380 | return xt_xmap_dist_dir_custom_new(src_idxlist, dst_idxlist, comm, | |
292 | &xt_default_config); | ||
293 | } | ||
294 | |||
295 | Xt_xmap | ||
296 | 380 | xt_xmap_dist_dir_custom_new(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, | |
297 | MPI_Comm comm, Xt_config config) | ||
298 | { | ||
299 | // ensure that yaxt is initialized | ||
300 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 380 times.
|
380 | assert(xt_initialized()); |
301 | |||
302 | int is_inter; | ||
303 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 380 times.
|
380 | xt_mpi_call(MPI_Comm_test_inter(comm, &is_inter), comm); |
304 | Xt_xmap xmap; | ||
305 |
2/2✓ Branch 0 taken 228 times.
✓ Branch 1 taken 152 times.
|
380 | if (!is_inter) |
306 | 228 | xmap = xt_xmap_dist_dir_intracomm_custom_new(src_idxlist, dst_idxlist, comm, | |
307 | config); | ||
308 | else { | ||
309 | MPI_Comm merge_comm, local_intra_comm; | ||
310 | MPI_Group local_group; | ||
311 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
|
152 | xt_mpi_call(MPI_Comm_group(comm, &local_group), comm); |
312 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
|
152 | xt_mpi_call(MPI_Intercomm_merge(comm, 0, &merge_comm), comm); |
313 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
|
152 | xt_mpi_call(MPI_Comm_create(merge_comm, local_group, &local_intra_comm), |
314 | comm); | ||
315 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
|
152 | xt_mpi_call(MPI_Group_free(&local_group), comm); |
316 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
|
152 | xt_mpi_call(MPI_Comm_free(&merge_comm), comm); |
317 | 152 | xt_mpi_comm_mark_exclusive(local_intra_comm); | |
318 | |||
319 | xmap | ||
320 | 152 | = xt_xmap_dist_dir_intercomm_custom_new(src_idxlist, dst_idxlist, | |
321 | comm, local_intra_comm, config); | ||
322 | |||
323 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
|
152 | xt_mpi_call(MPI_Comm_free(&local_intra_comm), local_intra_comm); |
324 | } | ||
325 | 380 | return xmap; | |
326 | } | ||
327 | |||
328 | |||
329 | /* | ||
330 | * Local Variables: | ||
331 | * c-basic-offset: 2 | ||
332 | * coding: utf-8 | ||
333 | * indent-tabs-mode: nil | ||
334 | * show-trailing-whitespace: t | ||
335 | * require-trailing-newline: t | ||
336 | * End: | ||
337 | */ | ||
338 |