Directory: | src/ |
---|---|
File: | src/xt_xmap_all2all.c |
Date: | 2024-11-08 09:02:52 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 91 | 93 | 97.8% |
Branches: | 43 | 56 | 76.8% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * @file xt_xmap_all2all.c | ||
3 | * | ||
4 | * @copyright Copyright (C) 2016 Jörg Behrens <behrens@dkrz.de> | ||
5 | * Moritz Hanke <hanke@dkrz.de> | ||
6 | * Thomas Jahns <jahns@dkrz.de> | ||
7 | * | ||
8 | * @author Jörg Behrens <behrens@dkrz.de> | ||
9 | * Moritz Hanke <hanke@dkrz.de> | ||
10 | * Thomas Jahns <jahns@dkrz.de> | ||
11 | */ | ||
12 | /* | ||
13 | * Keywords: | ||
14 | * Maintainer: Jörg Behrens <behrens@dkrz.de> | ||
15 | * Moritz Hanke <hanke@dkrz.de> | ||
16 | * Thomas Jahns <jahns@dkrz.de> | ||
17 | * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/ | ||
18 | * | ||
19 | * Redistribution and use in source and binary forms, with or without | ||
20 | * modification, are permitted provided that the following conditions are | ||
21 | * met: | ||
22 | * | ||
23 | * Redistributions of source code must retain the above copyright notice, | ||
24 | * this list of conditions and the following disclaimer. | ||
25 | * | ||
26 | * Redistributions in binary form must reproduce the above copyright | ||
27 | * notice, this list of conditions and the following disclaimer in the | ||
28 | * documentation and/or other materials provided with the distribution. | ||
29 | * | ||
30 | * Neither the name of the DKRZ GmbH nor the names of its contributors | ||
31 | * may be used to endorse or promote products derived from this software | ||
32 | * without specific prior written permission. | ||
33 | * | ||
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | ||
35 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | ||
36 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
37 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | ||
38 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
39 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
40 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
41 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
42 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
43 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
44 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
45 | */ | ||
46 | #ifdef HAVE_CONFIG_H | ||
47 | #include <config.h> | ||
48 | #endif | ||
49 | |||
50 | #include <stdlib.h> | ||
51 | #include <stdio.h> | ||
52 | #include <string.h> | ||
53 | #include <assert.h> | ||
54 | #include <limits.h> | ||
55 | |||
56 | #include <mpi.h> | ||
57 | |||
58 | #include "xt/xt_idxlist.h" | ||
59 | #include "xt/xt_idxvec.h" | ||
60 | #include "xt/xt_xmap.h" | ||
61 | #include "xt/xt_mpi.h" | ||
62 | #include "xt_mpi_internal.h" | ||
63 | #include "core/core.h" | ||
64 | #include "core/ppm_xfuncs.h" | ||
65 | #include "xt/xt_xmap_all2all.h" | ||
66 | #include "xt/xt_xmap_intersection.h" | ||
67 | #include "xt_config_internal.h" | ||
68 | #include "xt_idxlist_internal.h" | ||
69 | #include "instr.h" | ||
70 | |||
71 | 938 | static void exchange_idxlists(struct Xt_com_list **src_intersections, | |
72 | size_t *num_src_intersections, | ||
73 | struct Xt_com_list **dst_intersections, | ||
74 | size_t *num_dst_intersections, | ||
75 | int * stripify, | ||
76 | Xt_idxlist src_idxlist_local, | ||
77 | Xt_idxlist dst_idxlist_local, | ||
78 | MPI_Comm comm, Xt_config config) | ||
79 | { | ||
80 | |||
81 | /* | ||
82 | Note: The meaning of source (src) and destination (dst) points can already be understood by | ||
83 | looking at the serial case, where it is just a transformation of sequences of integers | ||
84 | (called indices). The starting state (source sequence) is transformed into an end state | ||
85 | (dst sequence). The transformation does not have to be bijective. For each position dpos of | ||
86 | the dst sequence we have at least one position spos in the src sequence with: | ||
87 | dst(dpos) = src(spos) | ||
88 | */ | ||
89 | |||
90 | int comm_size, rank, is_inter; | ||
91 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
|
938 | xt_mpi_call(MPI_Comm_rank(comm, &rank), comm); |
92 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
|
938 | xt_mpi_call(MPI_Comm_test_inter(comm, &is_inter), comm); |
93 | 938 | int (*get_comm_size)(MPI_Comm, int *) | |
94 |
2/2✓ Branch 0 taken 152 times.
✓ Branch 1 taken 786 times.
|
938 | = is_inter ? MPI_Comm_remote_size : MPI_Comm_size; |
95 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
|
938 | xt_mpi_call(get_comm_size(comm, &comm_size), comm); |
96 | |||
97 | // allocate memory for intersections | ||
98 | 938 | struct Xt_com_list *dsti = xmalloc((size_t)comm_size * sizeof (*dsti)); | |
99 | 938 | struct Xt_com_list *srci = xmalloc((size_t)comm_size * sizeof (*srci)); | |
100 | |||
101 | // compute size of local index lists | ||
102 | 938 | size_t src_pack_size = xt_idxlist_get_pack_size(src_idxlist_local, comm); | |
103 | 938 | size_t dst_pack_size = xt_idxlist_get_pack_size(dst_idxlist_local, comm); | |
104 | 938 | size_t size_sum = src_pack_size + dst_pack_size; | |
105 | |||
106 |
2/4✓ Branch 0 taken 938 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
✗ Branch 3 not taken.
|
938 | if (size_sum >= INT_MAX || size_sum < src_pack_size |
107 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 938 times.
|
938 | || size_sum < dst_pack_size) |
108 | ✗ | die("local src+dst index lists are too large"); | |
109 | |||
110 | 938 | int send_buffer_size = (int)size_sum; | |
111 | |||
112 | // exchange buffer sizes | ||
113 | int *restrict pack_sizes | ||
114 | 938 | = xmalloc((size_t)comm_size * sizeof(*pack_sizes) * 2); | |
115 | |||
116 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
|
938 | xt_mpi_call(MPI_Allgather(&send_buffer_size, 1, MPI_INT, |
117 | pack_sizes, 1, MPI_INT, comm), comm); | ||
118 | |||
119 | 938 | int *restrict displ = pack_sizes + comm_size; | |
120 | 938 | unsigned recv_buffer_size = 0, size_overflow = 0; | |
121 |
2/2✓ Branch 0 taken 4278 times.
✓ Branch 1 taken 938 times.
|
5216 | for (size_t i = 0; i < (size_t)comm_size; ++i) { |
122 | 4278 | displ[i] = (int)recv_buffer_size; | |
123 | 4278 | recv_buffer_size += (unsigned)pack_sizes[i]; | |
124 | 4278 | size_overflow |= recv_buffer_size & (1U << (sizeof(int) * CHAR_BIT - 1)); | |
125 | } | ||
126 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 938 times.
|
938 | if (size_overflow) |
127 | ✗ | die("accumulated buffer sizes too big," | |
128 | " use distributed directory (xt_xmap_dist_dir_new)!"); | ||
129 | 938 | void *recv_buffer = xmalloc((size_t)recv_buffer_size + size_sum), | |
130 | 938 | *send_buffer = (unsigned char *)recv_buffer + (size_t)recv_buffer_size; | |
131 | // pack local index lists | ||
132 | { | ||
133 | 938 | int position = 0; | |
134 | 938 | xt_idxlist_pack(src_idxlist_local, send_buffer, send_buffer_size, | |
135 | &position, comm); | ||
136 | 938 | xt_idxlist_pack(dst_idxlist_local, send_buffer, send_buffer_size, | |
137 | &position, comm); | ||
138 | } | ||
139 | // exchange buffers | ||
140 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
|
938 | xt_mpi_call(MPI_Allgatherv(send_buffer, send_buffer_size, MPI_PACKED, |
141 | recv_buffer, pack_sizes, displ, MPI_PACKED, | ||
142 | comm), comm); | ||
143 | |||
144 | 938 | size_t dst_isect_count = 0, src_isect_count = 0; | |
145 | 938 | int large_list_seen = 0; | |
146 | // compute intersections | ||
147 |
2/2✓ Branch 0 taken 4278 times.
✓ Branch 1 taken 938 times.
|
5216 | for (int i = 0; i < comm_size; ++i) { |
148 | |||
149 | 4278 | int position = 0; | |
150 | // unpack buffers unless local | ||
151 | Xt_idxlist src, dst; | ||
152 |
4/4✓ Branch 0 taken 3866 times.
✓ Branch 1 taken 412 times.
✓ Branch 2 taken 3080 times.
✓ Branch 3 taken 786 times.
|
4278 | if (is_inter || i != rank) { |
153 | 3492 | src = xt_idxlist_unpack((unsigned char *)recv_buffer + displ[i], | |
154 | 3492 | pack_sizes[i], &position, comm); | |
155 | 3492 | dst = xt_idxlist_unpack((unsigned char *)recv_buffer + displ[i], | |
156 | 3492 | pack_sizes[i], &position, comm); | |
157 | } else { | ||
158 | 786 | src = src_idxlist_local; | |
159 | 786 | dst = dst_idxlist_local; | |
160 | } | ||
161 | 4278 | large_list_seen = large_list_seen | |
162 |
2/2✓ Branch 0 taken 3242 times.
✓ Branch 1 taken 154 times.
|
3396 | || (xt_idxlist_get_num_indices(src) > config->idxv_cnv_size) |
163 |
4/4✓ Branch 0 taken 3396 times.
✓ Branch 1 taken 882 times.
✓ Branch 2 taken 64 times.
✓ Branch 3 taken 3178 times.
|
7674 | || (xt_idxlist_get_num_indices(dst) > config->idxv_cnv_size); |
164 | Xt_idxlist intersect | ||
165 | 4278 | = xt_idxlist_get_intersection_custom(src, dst_idxlist_local, config); | |
166 |
2/2✓ Branch 0 taken 3128 times.
✓ Branch 1 taken 1150 times.
|
4278 | if (xt_idxlist_get_num_indices(intersect) > 0) { |
167 | |||
168 | 3128 | dsti[dst_isect_count].list = intersect; | |
169 | 3128 | dsti[dst_isect_count].rank = i; | |
170 | 3128 | ++dst_isect_count; | |
171 | } | ||
172 | else | ||
173 | 1150 | xt_idxlist_delete(intersect); | |
174 | |||
175 | intersect | ||
176 | 4278 | = xt_idxlist_get_intersection_custom(src_idxlist_local, dst, config); | |
177 |
2/2✓ Branch 0 taken 3128 times.
✓ Branch 1 taken 1150 times.
|
4278 | if (xt_idxlist_get_num_indices(intersect) > 0) { |
178 | |||
179 | 3128 | srci[src_isect_count].list = intersect; | |
180 | 3128 | srci[src_isect_count].rank = i; | |
181 | 3128 | ++src_isect_count; | |
182 | } | ||
183 | else | ||
184 | 1150 | xt_idxlist_delete(intersect); | |
185 |
4/4✓ Branch 0 taken 3866 times.
✓ Branch 1 taken 412 times.
✓ Branch 2 taken 3080 times.
✓ Branch 3 taken 786 times.
|
4278 | if (is_inter || i != rank) { |
186 | 3492 | xt_idxlist_delete(src); | |
187 | 3492 | xt_idxlist_delete(dst); | |
188 | } | ||
189 | } | ||
190 | |||
191 | 938 | int stripify_ = XT_CONFIG_GET_XMAP_STRIPING(config); | |
192 |
1/2✓ Branch 0 taken 938 times.
✗ Branch 1 not taken.
|
938 | if (stripify_ == 2) |
193 | 938 | stripify_ = large_list_seen; | |
194 | 938 | *stripify = stripify_; | |
195 | |||
196 | 938 | free(recv_buffer); | |
197 | 938 | free(pack_sizes); | |
198 | |||
199 | /* minimize memory use of tables */ | ||
200 | 938 | *num_src_intersections = src_isect_count; | |
201 | 938 | srci = xrealloc(srci, src_isect_count * sizeof (**src_intersections)); | |
202 | 938 | *src_intersections = srci; | |
203 | |||
204 | 938 | *num_dst_intersections = dst_isect_count; | |
205 | 938 | dsti = xrealloc(dsti, dst_isect_count * sizeof (**dst_intersections)); | |
206 | 938 | *dst_intersections = dsti; | |
207 | 938 | } | |
208 | |||
209 | Xt_xmap | ||
210 | 938 | xt_xmap_all2all_new(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, | |
211 | MPI_Comm comm) | ||
212 | { | ||
213 | 938 | return xt_xmap_all2all_custom_new(src_idxlist, dst_idxlist, | |
214 | comm, &xt_default_config); | ||
215 | } | ||
216 | |||
217 | Xt_xmap | ||
218 | 938 | xt_xmap_all2all_custom_new(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, | |
219 | MPI_Comm comm, Xt_config config) { | ||
220 | INSTR_DEF(t_xt_xmap_all2all_new,"xt_xmap_all2all_new") | ||
221 | INSTR_START(t_xt_xmap_all2all_new); | ||
222 | |||
223 | // ensure that yaxt is initialized | ||
224 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 938 times.
|
938 | assert(xt_initialized()); |
225 | |||
226 | int tag_offset; | ||
227 | 938 | MPI_Comm newcomm = xt_mpi_comm_smart_dup(comm, &tag_offset); | |
228 | |||
229 | 938 | struct Xt_com_list * src_intersections = NULL, * dst_intersections = NULL; | |
230 | size_t num_src_intersections, num_dst_intersections; | ||
231 | |||
232 | int stripify; | ||
233 | // exchange index lists between all processes in comm | ||
234 | 938 | exchange_idxlists(&src_intersections, &num_src_intersections, | |
235 | &dst_intersections, &num_dst_intersections, | ||
236 | &stripify, src_idxlist, dst_idxlist, newcomm, | ||
237 | config); | ||
238 | |||
239 | 938 | Xt_xmap (*xmap_new)(int num_src_intersections, | |
240 | const struct Xt_com_list *src_com, | ||
241 | int num_dst_intersections, | ||
242 | const struct Xt_com_list *dst_com, | ||
243 | Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, | ||
244 | MPI_Comm comm) | ||
245 |
2/2✓ Branch 0 taken 218 times.
✓ Branch 1 taken 720 times.
|
938 | = stripify ? xt_xmap_intersection_ext_new : xt_xmap_intersection_new; |
246 | |||
247 | 938 | Xt_xmap xmap = xmap_new((int)num_src_intersections, src_intersections, | |
248 | (int)num_dst_intersections, dst_intersections, | ||
249 | src_idxlist, dst_idxlist, newcomm); | ||
250 | |||
251 | 938 | xt_mpi_comm_smart_dedup(&newcomm, tag_offset); | |
252 |
2/2✓ Branch 0 taken 3128 times.
✓ Branch 1 taken 938 times.
|
4066 | for (size_t i = 0; i < num_src_intersections; ++i) |
253 |
1/2✓ Branch 0 taken 3128 times.
✗ Branch 1 not taken.
|
3128 | if (src_intersections[i].list != NULL) |
254 | 3128 | xt_idxlist_delete(src_intersections[i].list); | |
255 |
2/2✓ Branch 0 taken 3128 times.
✓ Branch 1 taken 938 times.
|
4066 | for (size_t i = 0; i < num_dst_intersections; ++i) |
256 |
1/2✓ Branch 0 taken 3128 times.
✗ Branch 1 not taken.
|
3128 | if (dst_intersections[i].list != NULL) |
257 | 3128 | xt_idxlist_delete(dst_intersections[i].list); | |
258 | 938 | free(src_intersections); | |
259 | 938 | free(dst_intersections); | |
260 | INSTR_STOP(t_xt_xmap_all2all_new); | ||
261 | 938 | return xmap; | |
262 | } | ||
263 | |||
264 | /* | ||
265 | * Local Variables: | ||
266 | * c-basic-offset: 2 | ||
267 | * coding: utf-8 | ||
268 | * indent-tabs-mode: nil | ||
269 | * show-trailing-whitespace: t | ||
270 | * require-trailing-newline: t | ||
271 | * End: | ||
272 | */ | ||
273 |