GCC Code Coverage Report


Directory: src/
File: src/xt_xmap_dist_dir_common.c
Date: 2024-11-08 09:02:52
Exec Total Coverage
Lines: 125 126 99.2%
Branches: 41 52 78.8%

Line Branch Exec Source
1 /**
2 * @file xt_xmap_dist_dir_common.c
3 *
4 * @brief Implementation of utility functions for creation of
5 * distributed directories.
6 *
7 * @copyright Copyright (C) 2016 Jörg Behrens <behrens@dkrz.de>
8 * Moritz Hanke <hanke@dkrz.de>
9 * Thomas Jahns <jahns@dkrz.de>
10 *
11 * @author Jörg Behrens <behrens@dkrz.de>
12 * Moritz Hanke <hanke@dkrz.de>
13 * Thomas Jahns <jahns@dkrz.de>
14 */
15 /*
16 * Keywords:
17 * Maintainer: Jörg Behrens <behrens@dkrz.de>
18 * Moritz Hanke <hanke@dkrz.de>
19 * Thomas Jahns <jahns@dkrz.de>
20 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions are
24 * met:
25 *
26 * Redistributions of source code must retain the above copyright notice,
27 * this list of conditions and the following disclaimer.
28 *
29 * Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 *
33 * Neither the name of the DKRZ GmbH nor the names of its contributors
34 * may be used to endorse or promote products derived from this software
35 * without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
38 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
39 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
41 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
42 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
43 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
44 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
46 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
47 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 */
49 #ifdef HAVE_CONFIG_H
50 #include <config.h>
51 #endif
52
53 #include <string.h>
54 #include <assert.h>
55
56 #include "core/ppm_xfuncs.h"
57 #include "xt/xt_idxstripes.h"
58 #include "xt/xt_mpi.h"
59 #include "xt/xt_xmap_dist_dir.h"
60 #include "xt/xt_xmap_dist_dir_intercomm.h"
61 #include "xt_arithmetic_util.h"
62 #include "xt_xmap_dist_dir_common.h"
63 #include "ensure_array_size.h"
64 #include "xt_idxlist_internal.h"
65 #include "xt_idxstripes_internal.h"
66 #include "xt_config_internal.h"
67
68 static void
69 2128 xt_xmdd_free_dist_dir(struct dist_dir *dist_dir) {
70 2128 size_t num_entries
71 2128 = dist_dir->num_entries > 0
72 2128 ? (size_t)dist_dir->num_entries : (size_t)0;
73 2128 struct Xt_com_list *entries = dist_dir->entries;
74
2/2
✓ Branch 0 taken 4218 times.
✓ Branch 1 taken 2128 times.
6346 for (size_t i = 0; i < num_entries; ++i)
75 4218 xt_idxlist_delete(entries[i].list);
76 2128 free(dist_dir);
77 2128 }
78
79 1064 void xt_xmdd_free_dist_dirs(struct dist_dir_pair dist_dirs) {
80 1064 xt_xmdd_free_dist_dir(dist_dirs.src);
81 1064 xt_xmdd_free_dist_dir(dist_dirs.dst);
82 1064 }
83
84
85 struct Xt_xmdd_txstat
86 912 xt_xmap_dist_dir_send_intersections(
87 void *restrict send_buffer,
88 size_t send_size_asize, size_t send_size_entry,
89 int tag, MPI_Comm comm, int rank_lim,
90 MPI_Request *restrict requests,
91 912 const int send_size[rank_lim][send_size_asize])
92 912 {
93 912 size_t offset = 0;
94 912 size_t reqOfs = 0;
95
96 // pack the intersections into the send buffer
97
2/2
✓ Branch 0 taken 4560 times.
✓ Branch 1 taken 912 times.
5472 for (size_t rank = 0; rank < (size_t)rank_lim; ++rank)
98
2/2
✓ Branch 0 taken 1820 times.
✓ Branch 1 taken 2740 times.
4560 if (send_size[rank][send_size_entry] > 0) {
99
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1820 times.
1820 xt_mpi_call(MPI_Isend((char *)send_buffer + offset,
100 send_size[rank][send_size_entry],
101 MPI_PACKED, (int)rank, tag,
102 comm, requests + reqOfs),
103 comm);
104 1820 ++reqOfs;
105 1820 offset += (size_t)send_size[rank][send_size_entry];
106 }
107 912 return (struct Xt_xmdd_txstat){ .bytes = offset, .num_msg = reqOfs };
108 }
109
110 size_t
111 532 xt_xmap_dist_dir_match_src_dst(const struct dist_dir *src_dist_dir,
112 const struct dist_dir *dst_dist_dir,
113 struct isect **src_dst_intersections,
114 Xt_config config)
115 {
116 532 struct isect *src_dst_intersections_ = *src_dst_intersections
117 532 = xmalloc((size_t)src_dist_dir->num_entries
118 * (size_t)dst_dist_dir->num_entries
119 * sizeof(**src_dst_intersections));
120 532 size_t isect_fill = 0;
121 532 const struct Xt_com_list *restrict entries_src = src_dist_dir->entries,
122 532 *restrict entries_dst = dst_dist_dir->entries;
123 532 size_t num_entries_src = (size_t)src_dist_dir->num_entries,
124 532 num_entries_dst = (size_t)dst_dist_dir->num_entries;
125
2/2
✓ Branch 0 taken 528 times.
✓ Branch 1 taken 532 times.
1060 for (size_t i = 0; i < num_entries_src; ++i)
126
2/2
✓ Branch 0 taken 1274 times.
✓ Branch 1 taken 528 times.
1802 for (size_t j = 0; j < num_entries_dst; ++j)
127 {
128 Xt_idxlist intersection
129 1274 = xt_idxlist_get_intersection_custom(
130 1274 entries_src[i].list, entries_dst[j].list, config);
131
1/2
✓ Branch 0 taken 1274 times.
✗ Branch 1 not taken.
1274 if (xt_idxlist_get_num_indices(intersection) > 0) {
132 1274 src_dst_intersections_[isect_fill]
133 1274 = (struct isect){
134 1274 .rank = { [xt_xmdd_direction_src]=entries_src[i].rank,
135 1274 [xt_xmdd_direction_dst]=entries_dst[j].rank},
136 .idxlist = intersection };
137 1274 ++isect_fill;
138 } else
139 xt_idxlist_delete(intersection);
140 }
141 *src_dst_intersections
142 532 = xrealloc(src_dst_intersections_,
143 isect_fill * sizeof (*src_dst_intersections_));
144 532 return isect_fill;
145 }
146
147 #if __GNUC__ == 11 && __GNUC_MINOR__ <= 2
148 /* gcc 11.1 has a bug in the -fsanitize=undefined functionality
149 * which creates a bogus warning without the below suppression, bug
150 * report at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101585 */
151 #pragma GCC diagnostic push
152 #pragma GCC diagnostic ignored "-Wvla-parameter"
153 #endif
154
155 size_t
156 1064 xt_xmap_dist_dir_pack_intersections(
157 enum xt_xmdd_direction target,
158 size_t num_intersections,
159 const struct isect *restrict src_dst_intersections,
160 bool isect_idxlist_delete,
161 size_t send_size_asize, size_t send_size_idx,
162 1064 int (*send_size)[send_size_asize],
163 unsigned char *buffer, size_t buf_size, size_t *ofs, MPI_Comm comm)
164 {
165 1064 int prev_send_rank = -1;
166 1064 size_t num_send_indices_requests = 0;
167 1064 size_t origin = 1 ^ target, ofs_ = *ofs;
168 1064 int position = 0;
169
2/2
✓ Branch 0 taken 2548 times.
✓ Branch 1 taken 1064 times.
3612 for (size_t i = 0; i < num_intersections; ++i)
170 {
171 /* see if this generates a new request? */
172 2548 int send_rank = src_dst_intersections[i].rank[target];
173 2548 num_send_indices_requests += send_rank != prev_send_rank;
174
175 // pack rank
176 2548 XT_MPI_SEND_BUF_CONST int *prank
177 2548 = CAST_MPI_SEND_BUF(src_dst_intersections[i].rank + origin);
178
4/4
✓ Branch 0 taken 1794 times.
✓ Branch 1 taken 754 times.
✓ Branch 2 taken 746 times.
✓ Branch 3 taken 1048 times.
2548 if (send_rank != prev_send_rank && prev_send_rank != -1) {
179 746 send_size[prev_send_rank][send_size_idx] = position;
180 746 ofs_ += (size_t)position;
181 746 position = 0;
182 }
183 2548 prev_send_rank = send_rank;
184
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2548 times.
2548 xt_mpi_call(MPI_Pack(prank, 1, MPI_INT, buffer+ofs_, (int)(buf_size-ofs_),
185 &position, comm), comm);
186 // pack intersection
187 2548 xt_idxlist_pack(src_dst_intersections[i].idxlist, buffer+ofs_,
188 2548 (int)(buf_size-ofs_), &position, comm);
189
190
2/2
✓ Branch 0 taken 1274 times.
✓ Branch 1 taken 1274 times.
2548 if (isect_idxlist_delete)
191 1274 xt_idxlist_delete(src_dst_intersections[i].idxlist);
192 }
193
2/2
✓ Branch 0 taken 1048 times.
✓ Branch 1 taken 16 times.
1064 if (prev_send_rank != -1)
194 1048 send_size[prev_send_rank][send_size_idx] = position;
195
196 1064 *ofs = ofs_ + (size_t)position;
197 1064 return num_send_indices_requests;
198 }
199
200 #if __GNUC__ == 11 && __GNUC_MINOR__ <= 2
201 #pragma GCC diagnostic pop
202 #endif
203
204
205 static int
206 349 stripe_cmp(const void *a, const void *b)
207 {
208 typedef const struct Xt_stripe *csx;
209 349 return (((csx)a)->start > ((csx)b)->start)
210 349 - (((csx)b)->start > ((csx)a)->start);
211 }
212
213 /*
214 * @param dist_dir_results contains the intersections of this ranks
215 * dst or src idxlist with other ranks in bucket-sized chunks, the
216 * chunks belonging to the same communication partner are merged
217 * in-place here, i.e. on return (*dist_dir_results)->num_entries is
218 * less than or equal to the previous count and *dist_dir_results
219 * might point somewhere else
220 */
221 void
222 1064 xt_xmap_dist_dir_same_rank_merge(struct dist_dir **dist_dir_results) {
223
224 1064 struct Xt_com_list *restrict entries = (*dist_dir_results)->entries;
225 1064 size_t num_isect_agg = 0;
226
227 1064 size_t i = 0, num_shards = (size_t)(*dist_dir_results)->num_entries;
228
2/2
✓ Branch 0 taken 2424 times.
✓ Branch 1 taken 1064 times.
3488 while (i < num_shards) {
229 2424 int rank = entries[i].rank;
230 2424 size_t j = i;
231 2424 size_t num_stripes = 0;
232 /* find all entries matching the currently considered rank and
233 * count their stripes */
234 do {
235 num_stripes
236 2548 += (size_t)(xt_idxlist_get_num_index_stripes(entries[j].list));
237 2548 ++j;
238
4/4
✓ Branch 0 taken 1608 times.
✓ Branch 1 taken 940 times.
✓ Branch 2 taken 124 times.
✓ Branch 3 taken 1484 times.
2548 } while (j < num_shards && entries[j].rank == rank);
239 2424 struct Xt_stripes_alloc stripes_alloc = xt_idxstripes_alloc(num_stripes);
240 2424 struct Xt_stripe *restrict stripes = stripes_alloc.stripes;
241 2424 size_t stripe_ofs = 0;
242
2/2
✓ Branch 0 taken 2548 times.
✓ Branch 1 taken 2424 times.
4972 for (; i < j; ++i) {
243 2548 size_t num_stripes_of_intersection
244 2548 = (size_t)(xt_idxlist_get_num_index_stripes(entries[i].list));
245 2548 xt_idxlist_get_index_stripes_keep_buf(entries[i].list,
246 2548 stripes+stripe_ofs,
247 num_stripes-stripe_ofs);
248 2548 xt_idxlist_delete(entries[i].list);
249 2548 stripe_ofs += num_stripes_of_intersection;
250 }
251 2424 qsort(stripes, num_stripes, sizeof (*stripes), stripe_cmp);
252 2424 entries[num_isect_agg].list = xt_idxstripes_congeal(stripes_alloc);
253 2424 entries[num_isect_agg].rank = rank;
254 2424 ++num_isect_agg;
255 }
256 1064 (*dist_dir_results)->num_entries = (int)num_isect_agg;
257 1064 *dist_dir_results = xrealloc(*dist_dir_results, sizeof (struct dist_dir)
258 + (size_t)num_isect_agg
259 * sizeof(struct Xt_com_list));
260 1064 }
261
262
263 int
264 958 xt_xmdd_cmp_isect_src_rank(const void *a_, const void *b_)
265 {
266 958 const struct isect *a = a_, *b = b_;
267 /* this is safe vs. overflow because ranks are in [0..MAX_INT) */
268 958 return a->rank[xt_xmdd_direction_src] - b->rank[xt_xmdd_direction_src];
269 }
270
271 int
272 1150 xt_xmdd_cmp_isect_dst_rank(const void *a_, const void *b_)
273 {
274 1150 const struct isect *a = a_, *b = b_;
275 /* this is safe vs. overflow because ranks are in [0..MAX_INT) */
276 1150 return a->rank[xt_xmdd_direction_dst] - b->rank[xt_xmdd_direction_dst];
277 }
278
279 int
280 2416 xt_com_list_rank_cmp(const void *a_, const void *b_)
281 {
282 2416 const struct Xt_com_list *a = a_, *b = b_;
283 /* this is overflow-safe because rank's are non-negative ints */
284 2416 return a->rank - b->rank;
285 }
286
287 Xt_xmap
288 380 xt_xmap_dist_dir_new(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist,
289 MPI_Comm comm)
290 {
291 380 return xt_xmap_dist_dir_custom_new(src_idxlist, dst_idxlist, comm,
292 &xt_default_config);
293 }
294
295 Xt_xmap
296 380 xt_xmap_dist_dir_custom_new(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist,
297 MPI_Comm comm, Xt_config config)
298 {
299 // ensure that yaxt is initialized
300
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 380 times.
380 assert(xt_initialized());
301
302 int is_inter;
303
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 380 times.
380 xt_mpi_call(MPI_Comm_test_inter(comm, &is_inter), comm);
304 Xt_xmap xmap;
305
2/2
✓ Branch 0 taken 228 times.
✓ Branch 1 taken 152 times.
380 if (!is_inter)
306 228 xmap = xt_xmap_dist_dir_intracomm_custom_new(src_idxlist, dst_idxlist, comm,
307 config);
308 else {
309 MPI_Comm merge_comm, local_intra_comm;
310 MPI_Group local_group;
311
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
152 xt_mpi_call(MPI_Comm_group(comm, &local_group), comm);
312
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
152 xt_mpi_call(MPI_Intercomm_merge(comm, 0, &merge_comm), comm);
313
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
152 xt_mpi_call(MPI_Comm_create(merge_comm, local_group, &local_intra_comm),
314 comm);
315
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
152 xt_mpi_call(MPI_Group_free(&local_group), comm);
316
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
152 xt_mpi_call(MPI_Comm_free(&merge_comm), comm);
317 152 xt_mpi_comm_mark_exclusive(local_intra_comm);
318
319 xmap
320 152 = xt_xmap_dist_dir_intercomm_custom_new(src_idxlist, dst_idxlist,
321 comm, local_intra_comm, config);
322
323
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
152 xt_mpi_call(MPI_Comm_free(&local_intra_comm), local_intra_comm);
324 }
325 380 return xmap;
326 }
327
328
329 /*
330 * Local Variables:
331 * c-basic-offset: 2
332 * coding: utf-8
333 * indent-tabs-mode: nil
334 * show-trailing-whitespace: t
335 * require-trailing-newline: t
336 * End:
337 */
338