GCC Code Coverage Report


Directory: src/
File: src/xt_xmap_all2all.c
Date: 2024-11-08 09:02:52
Exec Total Coverage
Lines: 91 93 97.8%
Branches: 43 56 76.8%

Line Branch Exec Source
1 /**
2 * @file xt_xmap_all2all.c
3 *
4 * @copyright Copyright (C) 2016 Jörg Behrens <behrens@dkrz.de>
5 * Moritz Hanke <hanke@dkrz.de>
6 * Thomas Jahns <jahns@dkrz.de>
7 *
8 * @author Jörg Behrens <behrens@dkrz.de>
9 * Moritz Hanke <hanke@dkrz.de>
10 * Thomas Jahns <jahns@dkrz.de>
11 */
12 /*
13 * Keywords:
14 * Maintainer: Jörg Behrens <behrens@dkrz.de>
15 * Moritz Hanke <hanke@dkrz.de>
16 * Thomas Jahns <jahns@dkrz.de>
17 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met:
22 *
23 * Redistributions of source code must retain the above copyright notice,
24 * this list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
29 *
30 * Neither the name of the DKRZ GmbH nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 */
46 #ifdef HAVE_CONFIG_H
47 #include <config.h>
48 #endif
49
50 #include <stdlib.h>
51 #include <stdio.h>
52 #include <string.h>
53 #include <assert.h>
54 #include <limits.h>
55
56 #include <mpi.h>
57
58 #include "xt/xt_idxlist.h"
59 #include "xt/xt_idxvec.h"
60 #include "xt/xt_xmap.h"
61 #include "xt/xt_mpi.h"
62 #include "xt_mpi_internal.h"
63 #include "core/core.h"
64 #include "core/ppm_xfuncs.h"
65 #include "xt/xt_xmap_all2all.h"
66 #include "xt/xt_xmap_intersection.h"
67 #include "xt_config_internal.h"
68 #include "xt_idxlist_internal.h"
69 #include "instr.h"
70
71 938 static void exchange_idxlists(struct Xt_com_list **src_intersections,
72 size_t *num_src_intersections,
73 struct Xt_com_list **dst_intersections,
74 size_t *num_dst_intersections,
75 int * stripify,
76 Xt_idxlist src_idxlist_local,
77 Xt_idxlist dst_idxlist_local,
78 MPI_Comm comm, Xt_config config)
79 {
80
81 /*
82 Note: The meaning of source (src) and destination (dst) points can already be understood by
83 looking at the serial case, where it is just a transformation of sequences of integers
84 (called indices). The starting state (source sequence) is transformed into an end state
85 (dst sequence). The transformation does not have to be bijective. For each position dpos of
86 the dst sequence we have at least one position spos in the src sequence with:
87 dst(dpos) = src(spos)
88 */
89
90 int comm_size, rank, is_inter;
91
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
938 xt_mpi_call(MPI_Comm_rank(comm, &rank), comm);
92
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
938 xt_mpi_call(MPI_Comm_test_inter(comm, &is_inter), comm);
93 938 int (*get_comm_size)(MPI_Comm, int *)
94
2/2
✓ Branch 0 taken 152 times.
✓ Branch 1 taken 786 times.
938 = is_inter ? MPI_Comm_remote_size : MPI_Comm_size;
95
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
938 xt_mpi_call(get_comm_size(comm, &comm_size), comm);
96
97 // allocate memory for intersections
98 938 struct Xt_com_list *dsti = xmalloc((size_t)comm_size * sizeof (*dsti));
99 938 struct Xt_com_list *srci = xmalloc((size_t)comm_size * sizeof (*srci));
100
101 // compute size of local index lists
102 938 size_t src_pack_size = xt_idxlist_get_pack_size(src_idxlist_local, comm);
103 938 size_t dst_pack_size = xt_idxlist_get_pack_size(dst_idxlist_local, comm);
104 938 size_t size_sum = src_pack_size + dst_pack_size;
105
106
2/4
✓ Branch 0 taken 938 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
✗ Branch 3 not taken.
938 if (size_sum >= INT_MAX || size_sum < src_pack_size
107
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 938 times.
938 || size_sum < dst_pack_size)
108 die("local src+dst index lists are too large");
109
110 938 int send_buffer_size = (int)size_sum;
111
112 // exchange buffer sizes
113 int *restrict pack_sizes
114 938 = xmalloc((size_t)comm_size * sizeof(*pack_sizes) * 2);
115
116
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
938 xt_mpi_call(MPI_Allgather(&send_buffer_size, 1, MPI_INT,
117 pack_sizes, 1, MPI_INT, comm), comm);
118
119 938 int *restrict displ = pack_sizes + comm_size;
120 938 unsigned recv_buffer_size = 0, size_overflow = 0;
121
2/2
✓ Branch 0 taken 4278 times.
✓ Branch 1 taken 938 times.
5216 for (size_t i = 0; i < (size_t)comm_size; ++i) {
122 4278 displ[i] = (int)recv_buffer_size;
123 4278 recv_buffer_size += (unsigned)pack_sizes[i];
124 4278 size_overflow |= recv_buffer_size & (1U << (sizeof(int) * CHAR_BIT - 1));
125 }
126
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 938 times.
938 if (size_overflow)
127 die("accumulated buffer sizes too big,"
128 " use distributed directory (xt_xmap_dist_dir_new)!");
129 938 void *recv_buffer = xmalloc((size_t)recv_buffer_size + size_sum),
130 938 *send_buffer = (unsigned char *)recv_buffer + (size_t)recv_buffer_size;
131 // pack local index lists
132 {
133 938 int position = 0;
134 938 xt_idxlist_pack(src_idxlist_local, send_buffer, send_buffer_size,
135 &position, comm);
136 938 xt_idxlist_pack(dst_idxlist_local, send_buffer, send_buffer_size,
137 &position, comm);
138 }
139 // exchange buffers
140
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
938 xt_mpi_call(MPI_Allgatherv(send_buffer, send_buffer_size, MPI_PACKED,
141 recv_buffer, pack_sizes, displ, MPI_PACKED,
142 comm), comm);
143
144 938 size_t dst_isect_count = 0, src_isect_count = 0;
145 938 int large_list_seen = 0;
146 // compute intersections
147
2/2
✓ Branch 0 taken 4278 times.
✓ Branch 1 taken 938 times.
5216 for (int i = 0; i < comm_size; ++i) {
148
149 4278 int position = 0;
150 // unpack buffers unless local
151 Xt_idxlist src, dst;
152
4/4
✓ Branch 0 taken 3866 times.
✓ Branch 1 taken 412 times.
✓ Branch 2 taken 3080 times.
✓ Branch 3 taken 786 times.
4278 if (is_inter || i != rank) {
153 3492 src = xt_idxlist_unpack((unsigned char *)recv_buffer + displ[i],
154 3492 pack_sizes[i], &position, comm);
155 3492 dst = xt_idxlist_unpack((unsigned char *)recv_buffer + displ[i],
156 3492 pack_sizes[i], &position, comm);
157 } else {
158 786 src = src_idxlist_local;
159 786 dst = dst_idxlist_local;
160 }
161 4278 large_list_seen = large_list_seen
162
2/2
✓ Branch 0 taken 3242 times.
✓ Branch 1 taken 154 times.
3396 || (xt_idxlist_get_num_indices(src) > config->idxv_cnv_size)
163
4/4
✓ Branch 0 taken 3396 times.
✓ Branch 1 taken 882 times.
✓ Branch 2 taken 64 times.
✓ Branch 3 taken 3178 times.
7674 || (xt_idxlist_get_num_indices(dst) > config->idxv_cnv_size);
164 Xt_idxlist intersect
165 4278 = xt_idxlist_get_intersection_custom(src, dst_idxlist_local, config);
166
2/2
✓ Branch 0 taken 3128 times.
✓ Branch 1 taken 1150 times.
4278 if (xt_idxlist_get_num_indices(intersect) > 0) {
167
168 3128 dsti[dst_isect_count].list = intersect;
169 3128 dsti[dst_isect_count].rank = i;
170 3128 ++dst_isect_count;
171 }
172 else
173 1150 xt_idxlist_delete(intersect);
174
175 intersect
176 4278 = xt_idxlist_get_intersection_custom(src_idxlist_local, dst, config);
177
2/2
✓ Branch 0 taken 3128 times.
✓ Branch 1 taken 1150 times.
4278 if (xt_idxlist_get_num_indices(intersect) > 0) {
178
179 3128 srci[src_isect_count].list = intersect;
180 3128 srci[src_isect_count].rank = i;
181 3128 ++src_isect_count;
182 }
183 else
184 1150 xt_idxlist_delete(intersect);
185
4/4
✓ Branch 0 taken 3866 times.
✓ Branch 1 taken 412 times.
✓ Branch 2 taken 3080 times.
✓ Branch 3 taken 786 times.
4278 if (is_inter || i != rank) {
186 3492 xt_idxlist_delete(src);
187 3492 xt_idxlist_delete(dst);
188 }
189 }
190
191 938 int stripify_ = XT_CONFIG_GET_XMAP_STRIPING(config);
192
1/2
✓ Branch 0 taken 938 times.
✗ Branch 1 not taken.
938 if (stripify_ == 2)
193 938 stripify_ = large_list_seen;
194 938 *stripify = stripify_;
195
196 938 free(recv_buffer);
197 938 free(pack_sizes);
198
199 /* minimize memory use of tables */
200 938 *num_src_intersections = src_isect_count;
201 938 srci = xrealloc(srci, src_isect_count * sizeof (**src_intersections));
202 938 *src_intersections = srci;
203
204 938 *num_dst_intersections = dst_isect_count;
205 938 dsti = xrealloc(dsti, dst_isect_count * sizeof (**dst_intersections));
206 938 *dst_intersections = dsti;
207 938 }
208
209 Xt_xmap
210 938 xt_xmap_all2all_new(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist,
211 MPI_Comm comm)
212 {
213 938 return xt_xmap_all2all_custom_new(src_idxlist, dst_idxlist,
214 comm, &xt_default_config);
215 }
216
217 Xt_xmap
218 938 xt_xmap_all2all_custom_new(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist,
219 MPI_Comm comm, Xt_config config) {
220 INSTR_DEF(t_xt_xmap_all2all_new,"xt_xmap_all2all_new")
221 INSTR_START(t_xt_xmap_all2all_new);
222
223 // ensure that yaxt is initialized
224
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
938 assert(xt_initialized());
225
226 int tag_offset;
227 938 MPI_Comm newcomm = xt_mpi_comm_smart_dup(comm, &tag_offset);
228
229 938 struct Xt_com_list * src_intersections = NULL, * dst_intersections = NULL;
230 size_t num_src_intersections, num_dst_intersections;
231
232 int stripify;
233 // exchange index lists between all processes in comm
234 938 exchange_idxlists(&src_intersections, &num_src_intersections,
235 &dst_intersections, &num_dst_intersections,
236 &stripify, src_idxlist, dst_idxlist, newcomm,
237 config);
238
239 938 Xt_xmap (*xmap_new)(int num_src_intersections,
240 const struct Xt_com_list *src_com,
241 int num_dst_intersections,
242 const struct Xt_com_list *dst_com,
243 Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist,
244 MPI_Comm comm)
245
2/2
✓ Branch 0 taken 218 times.
✓ Branch 1 taken 720 times.
938 = stripify ? xt_xmap_intersection_ext_new : xt_xmap_intersection_new;
246
247 938 Xt_xmap xmap = xmap_new((int)num_src_intersections, src_intersections,
248 (int)num_dst_intersections, dst_intersections,
249 src_idxlist, dst_idxlist, newcomm);
250
251 938 xt_mpi_comm_smart_dedup(&newcomm, tag_offset);
252
2/2
✓ Branch 0 taken 3128 times.
✓ Branch 1 taken 938 times.
4066 for (size_t i = 0; i < num_src_intersections; ++i)
253
1/2
✓ Branch 0 taken 3128 times.
✗ Branch 1 not taken.
3128 if (src_intersections[i].list != NULL)
254 3128 xt_idxlist_delete(src_intersections[i].list);
255
2/2
✓ Branch 0 taken 3128 times.
✓ Branch 1 taken 938 times.
4066 for (size_t i = 0; i < num_dst_intersections; ++i)
256
1/2
✓ Branch 0 taken 3128 times.
✗ Branch 1 not taken.
3128 if (dst_intersections[i].list != NULL)
257 3128 xt_idxlist_delete(dst_intersections[i].list);
258 938 free(src_intersections);
259 938 free(dst_intersections);
260 INSTR_STOP(t_xt_xmap_all2all_new);
261 938 return xmap;
262 }
263
264 /*
265 * Local Variables:
266 * c-basic-offset: 2
267 * coding: utf-8
268 * indent-tabs-mode: nil
269 * show-trailing-whitespace: t
270 * require-trailing-newline: t
271 * End:
272 */
273