Directory: | src/ |
---|---|
File: | src/xt_mpi_stripe_parse_func.h |
Date: | 2024-11-08 09:02:52 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 154 | 171 | 90.1% |
Branches: | 109 | 122 | 89.3% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * @file xt_mpi_stripe_parse_func.h | ||
3 | * | ||
4 | * @copyright Copyright (C) 2022 Jörg Behrens <behrens@dkrz.de> | ||
5 | * Moritz Hanke <hanke@dkrz.de> | ||
6 | * Thomas Jahns <jahns@dkrz.de> | ||
7 | * | ||
8 | * @author Jörg Behrens <behrens@dkrz.de> | ||
9 | * Moritz Hanke <hanke@dkrz.de> | ||
10 | * Thomas Jahns <jahns@dkrz.de> | ||
11 | */ | ||
12 | /* | ||
13 | * Keywords: | ||
14 | * Maintainer: Jörg Behrens <behrens@dkrz.de> | ||
15 | * Moritz Hanke <hanke@dkrz.de> | ||
16 | * Thomas Jahns <jahns@dkrz.de> | ||
17 | * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/ | ||
18 | * | ||
19 | * Redistribution and use in source and binary forms, with or without | ||
20 | * modification, are permitted provided that the following conditions are | ||
21 | * met: | ||
22 | * | ||
23 | * Redistributions of source code must retain the above copyright notice, | ||
24 | * this list of conditions and the following disclaimer. | ||
25 | * | ||
26 | * Redistributions in binary form must reproduce the above copyright | ||
27 | * notice, this list of conditions and the following disclaimer in the | ||
28 | * documentation and/or other materials provided with the distribution. | ||
29 | * | ||
30 | * Neither the name of the DKRZ GmbH nor the names of its contributors | ||
31 | * may be used to endorse or promote products derived from this software | ||
32 | * without specific prior written permission. | ||
33 | * | ||
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | ||
35 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | ||
36 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
37 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | ||
38 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
39 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
40 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
41 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
42 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
43 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
44 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
45 | */ | ||
46 | #ifdef HAVE_CONFIG_H | ||
47 | #include "config.h" | ||
48 | #endif | ||
49 | |||
50 | |||
51 | #define XT_TOKEN_PASTE2_(a,b) a##b | ||
52 | #define XT_TOKEN_PASTE2(a,b) XT_TOKEN_PASTE2_(a,b) | ||
53 | #define XT_TOKEN_PASTE3_(a,b,c) a##b##c | ||
54 | #define XT_TOKEN_PASTE3(a,b,c) XT_TOKEN_PASTE3_(a,b,c) | ||
55 | |||
56 | #define XT_MPI_OFFSET_EXT \ | ||
57 | struct XT_TOKEN_PASTE3(Xt_,XT_MPI_STRP_PRS_PREFIX,offset_ext) | ||
58 | #define XT_MPI_STRP_PRS_MATCH_BLOCK_VEC \ | ||
59 | XT_TOKEN_PASTE2(XT_MPI_STRP_PRS_PREFIX,match_block_vec) | ||
60 | #define XT_MPI_STRP_PRS_MATCH_INDEXED \ | ||
61 | XT_TOKEN_PASTE2(XT_MPI_STRP_PRS_PREFIX,match_indexed) | ||
62 | #define XT_MPI_STRP_PRS_MATCH_SIMPLE_VEC \ | ||
63 | XT_TOKEN_PASTE2(XT_MPI_STRP_PRS_PREFIX,match_simple_vec) | ||
64 | #define XT_MPI_STRP_PRS_MATCH_CONTIGUOUS \ | ||
65 | XT_TOKEN_PASTE2(XT_MPI_STRP_PRS_PREFIX,match_contiguous) | ||
66 | #define XT_MPI_STRP_PRS_GEN_FALLBACK_TYPE \ | ||
67 | XT_TOKEN_PASTE2(XT_MPI_STRP_PRS_PREFIX,gen_fallback_type) | ||
68 | #define XT_MPI_STRP_PRS_ENTRY \ | ||
69 | XT_TOKEN_PASTE3(parse_,XT_MPI_STRP_PRS_PREFIX,stripe) | ||
70 | #define XT_MPI_STRP_PRS_ENTRY_LIBINTERNAL \ | ||
71 | XT_TOKEN_PASTE3(xt_mpi_parse_,XT_MPI_STRP_PRS_PREFIX,stripe) | ||
72 | #define XT_MPI_STRP_PRS_DRIVER \ | ||
73 | XT_TOKEN_PASTE3(xt_mpi_generate_datatype_,XT_MPI_STRP_PRS_PREFIX,stripe) | ||
74 | |||
75 | /** | ||
76 | * @return true if matched, false if not matched | ||
77 | */ | ||
78 | static bool | ||
79 | 28016 | XT_MPI_STRP_PRS_MATCH_BLOCK_VEC | |
80 | (size_t *pstart_, | ||
81 | const XT_MPI_OFFSET_EXT *v, | ||
82 | size_t vlen, | ||
83 | MPI_Aint *disp, MPI_Datatype *dt, | ||
84 | struct Xt_mpi_strp_prs_params *params) { | ||
85 | // using at least 3 vectors | ||
86 | 28016 | size_t p = *pstart_, pstart = p; | |
87 |
4/4✓ Branch 0 taken 9594 times.
✓ Branch 1 taken 4414 times.
✓ Branch 2 taken 8852 times.
✓ Branch 3 taken 742 times.
|
28016 | if (p+2 >= vlen || v[p].stride != XT_MPI_STRP_PRS_UNITSTRIDE |
88 |
2/2✓ Branch 0 taken 196 times.
✓ Branch 1 taken 8656 times.
|
28016 | || v[p+1].stride != XT_MPI_STRP_PRS_UNITSTRIDE ) return false; |
89 | 17312 | int bl = v[p].size; | |
90 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8656 times.
|
17312 | assert(bl > 0); |
91 |
2/2✓ Branch 0 taken 1167 times.
✓ Branch 1 taken 7489 times.
|
17312 | if (v[p+1].size != bl) return false; |
92 | |||
93 | 14978 | XT_MPI_STRP_PRS_AOFS_TYPE vstride = v[p+1].start - v[p].start; | |
94 | |||
95 | 14978 | p += 2; | |
96 |
2/2✓ Branch 0 taken 15838 times.
✓ Branch 1 taken 108 times.
|
46870 | while( p < vlen && v[p].stride == XT_MPI_STRP_PRS_UNITSTRIDE |
97 |
6/6✓ Branch 0 taken 15946 times.
✓ Branch 1 taken 405 times.
✓ Branch 2 taken 15486 times.
✓ Branch 3 taken 352 times.
✓ Branch 4 taken 8862 times.
✓ Branch 5 taken 6624 times.
|
64378 | && v[p].size == bl && v[p].start - v[p-1].start == vstride ) { |
98 | 17724 | p++; | |
99 | } | ||
100 | 14978 | size_t n = p - pstart; | |
101 |
2/2✓ Branch 0 taken 6347 times.
✓ Branch 1 taken 1142 times.
|
14978 | if (n<3) return false; |
102 | 2284 | *pstart_ = p; | |
103 | |||
104 |
2/2✓ Branch 0 taken 1079 times.
✓ Branch 1 taken 63 times.
|
2284 | XT_MPI_STRP_PRS_AOFS_TYPE disp_ = n == vlen ? 0 : v[pstart].start; |
105 | 2284 | *disp = XT_MPI_STRP_PRS_DISP_ADJUST(disp_); | |
106 | |||
107 | 2284 | *dt = XT_MPI_STRP_PRS_BLOCK_VEC_CREATE( | |
108 | ¶ms->ddt_list, (int)n, bl, vstride, params->old_type, params->comm); | ||
109 | |||
110 | 2284 | XT_MPI_STRP_PRS_AOFS_TYPE start = v[pstart].start - disp_; | |
111 | |||
112 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 1082 times.
|
2284 | if (start) { |
113 | 120 | MPI_Datatype dt1 = *dt; | |
114 | // (start != 0) => add offset: | ||
115 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 60 times.
|
120 | xt_mpi_call(MPI_Type_create_hindexed( |
116 | 1, &(int){1}, | ||
117 | &(MPI_Aint){XT_MPI_STRP_PRS_DISP_ADJUST(start)}, dt1, dt), | ||
118 | params->comm); | ||
119 | 120 | Xt_mpi_ddt_cache_entry_release(¶ms->ddt_list, &dt1, params->comm); | |
120 | } | ||
121 | 2284 | return n != 0; | |
122 | } | ||
123 | |||
124 | static bool | ||
125 | 25732 | XT_MPI_STRP_PRS_MATCH_INDEXED( | |
126 | size_t *pstart_, | ||
127 | const XT_MPI_OFFSET_EXT *v, | ||
128 | size_t vlen, | ||
129 | MPI_Aint *disp, MPI_Datatype *dt, | ||
130 | struct Xt_mpi_strp_prs_params *params) { | ||
131 | // we only accept non-trivial matches | ||
132 | 25732 | size_t p = *pstart_, pstart = p; | |
133 |
5/6✓ Branch 0 taken 12866 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 11971 times.
✓ Branch 3 taken 895 times.
✓ Branch 4 taken 7250 times.
✓ Branch 5 taken 4721 times.
|
25732 | if (p >= vlen || v[p].stride != XT_MPI_STRP_PRS_UNITSTRIDE || v[p].size < 2) |
134 | 16290 | return false; | |
135 | |||
136 | do | ||
137 | 52550 | ++p; | |
138 |
4/4✓ Branch 0 taken 21787 times.
✓ Branch 1 taken 4488 times.
✓ Branch 2 taken 21554 times.
✓ Branch 3 taken 233 times.
|
52550 | while (p < vlen && v[p].stride == XT_MPI_STRP_PRS_UNITSTRIDE); |
139 | |||
140 | 9442 | size_t n = p - pstart; | |
141 | |||
142 |
2/2✓ Branch 0 taken 2971 times.
✓ Branch 1 taken 1750 times.
|
9442 | if (n < 2) return false; |
143 | 3500 | *pstart_ = p; | |
144 | |||
145 |
2/2✓ Branch 0 taken 273 times.
✓ Branch 1 taken 1477 times.
|
3500 | XT_MPI_STRP_PRS_AOFS_TYPE start = n == vlen ? 0 : v[pstart].start; |
146 | 3500 | *disp = XT_MPI_STRP_PRS_DISP_ADJUST(start); | |
147 | XT_MPI_STRP_PRS_AOFS_TYPE *restrict d | ||
148 | 3500 | = xmalloc(n * sizeof (int) + n * sizeof (*d)); | |
149 | 3500 | int *restrict bl = (int * restrict)(d + n); | |
150 | 3500 | bool hom_bl = true; | |
151 | 3500 | d[0] = v[pstart].start - start; | |
152 | 3500 | int bl0 = bl[0] = v[pstart].size; | |
153 |
2/2✓ Branch 0 taken 21554 times.
✓ Branch 1 taken 1750 times.
|
46608 | for (size_t i = 1; i < n; i++) { |
154 | 43108 | size_t iv = pstart + i; | |
155 | 43108 | d[i] = v[iv].start - start; | |
156 | 43108 | bl[i] = v[iv].size; | |
157 | 43108 | hom_bl &= (bl[i] == bl0); | |
158 | } | ||
159 | |||
160 |
2/2✓ Branch 0 taken 1006 times.
✓ Branch 1 taken 744 times.
|
3500 | if (hom_bl) { |
161 | 2012 | *dt = XT_MPI_STRP_PRS_INDEXED_BLOCK_CREATE( | |
162 | ¶ms->ddt_list, (int)n, bl0, d, params->old_type, params->comm); | ||
163 | } else { | ||
164 | 1488 | *dt = XT_MPI_STRP_PRS_INDEXED_CREATE( | |
165 | ¶ms->ddt_list, (int)n, bl, d, params->old_type, params->comm); | ||
166 | } | ||
167 | |||
168 | 3500 | free(d); | |
169 | 3500 | return n != 0; | |
170 | } | ||
171 | |||
172 | static bool | ||
173 | 22232 | XT_MPI_STRP_PRS_MATCH_SIMPLE_VEC( | |
174 | size_t *pstart_, | ||
175 | const XT_MPI_OFFSET_EXT *v, | ||
176 | size_t vlen, | ||
177 | MPI_Aint *disp, MPI_Datatype *dt, | ||
178 | struct Xt_mpi_strp_prs_params *params) { | ||
179 | // we only accept non-trivial matches (nsteps>2) with stride /= 1 | ||
180 | // using only one vector from v | ||
181 | 22232 | size_t p = *pstart_; | |
182 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 11116 times.
|
22232 | if (p >= vlen) return false; |
183 | 22232 | int nstrides = v[p].size; | |
184 | 22232 | XT_MPI_STRP_PRS_AOFS_TYPE stride = v[p].stride; | |
185 |
4/4✓ Branch 0 taken 3833 times.
✓ Branch 1 taken 7283 times.
✓ Branch 2 taken 2971 times.
✓ Branch 3 taken 862 times.
|
22232 | if (nstrides < 2 || stride == XT_MPI_STRP_PRS_UNITSTRIDE ) return false; |
186 | |||
187 | 1724 | *pstart_ = p + 1; | |
188 | |||
189 |
2/2✓ Branch 0 taken 821 times.
✓ Branch 1 taken 41 times.
|
1724 | XT_MPI_STRP_PRS_AOFS_TYPE disp_ = vlen > 1 ? v[p].start : 0; |
190 | 1724 | *disp = XT_MPI_STRP_PRS_DISP_ADJUST(disp_); | |
191 | |||
192 | 1724 | *dt = XT_MPI_STRP_PRS_BLOCK_VEC_CREATE( | |
193 | ¶ms->ddt_list, nstrides, 1, stride, params->old_type, params->comm); | ||
194 | |||
195 | 1724 | XT_MPI_STRP_PRS_AOFS_TYPE start = v[p].start - disp_; | |
196 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 850 times.
|
1724 | if (start) { |
197 | 24 | MPI_Datatype dt1 = *dt; | |
198 | |||
199 | // (start != 0) => add offset: | ||
200 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 12 times.
|
24 | xt_mpi_call(MPI_Type_create_hindexed( |
201 | 1, &(int){1}, | ||
202 | &(MPI_Aint){XT_MPI_STRP_PRS_DISP_ADJUST(start)}, dt1, dt), | ||
203 | params->comm); | ||
204 | 24 | Xt_mpi_ddt_cache_entry_release(¶ms->ddt_list, &dt1, params->comm); | |
205 | } | ||
206 | 1724 | return nstrides != 0; | |
207 | } | ||
208 | |||
209 | static bool | ||
210 | 20508 | XT_MPI_STRP_PRS_MATCH_CONTIGUOUS( | |
211 | size_t *pstart_, | ||
212 | const XT_MPI_OFFSET_EXT *v, | ||
213 | size_t vlen, | ||
214 | MPI_Aint *restrict disp, MPI_Datatype *dt, | ||
215 | struct Xt_mpi_strp_prs_params *params) { | ||
216 | 20508 | size_t p = *pstart_; | |
217 |
5/6✓ Branch 0 taken 10254 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10221 times.
✓ Branch 3 taken 33 times.
✓ Branch 4 taken 7250 times.
✓ Branch 5 taken 2971 times.
|
20508 | if (p >= vlen || v[p].stride != XT_MPI_STRP_PRS_UNITSTRIDE || v[p].size < 2) |
218 | 14566 | return false; | |
219 | |||
220 |
2/2✓ Branch 0 taken 162 times.
✓ Branch 1 taken 2809 times.
|
5942 | XT_MPI_STRP_PRS_AOFS_TYPE disp_ = vlen > 1 ? v[p].start : 0; |
221 | 5942 | *disp = XT_MPI_STRP_PRS_DISP_ADJUST(disp_); | |
222 | 5942 | XT_MPI_STRP_PRS_AOFS_TYPE d = v[p].start - disp_; | |
223 | |||
224 |
2/2✓ Branch 0 taken 1727 times.
✓ Branch 1 taken 1244 times.
|
5942 | if (!d) |
225 | 3454 | *dt = Xt_mpi_ddt_cache_acquire_contiguous( | |
226 | 3454 | ¶ms->ddt_list, v[p].size, params->old_type, params->comm); | |
227 | else | ||
228 | 2488 | *dt = XT_MPI_STRP_PRS_INDEXED_BLOCK_CREATE( | |
229 | 2488 | ¶ms->ddt_list, 1, v[p].size, &d, params->old_type, params->comm); | |
230 | |||
231 | 5942 | *pstart_ = p+1; | |
232 | 5942 | return true; | |
233 | } | ||
234 | |||
235 | static void | ||
236 | 1832 | XT_MPI_STRP_PRS_GEN_FALLBACK_TYPE( | |
237 | size_t set_start, size_t set_end, | ||
238 | const XT_MPI_OFFSET_EXT *v, | ||
239 | size_t vlen, | ||
240 | MPI_Aint *disp, | ||
241 | MPI_Datatype *dt, struct Xt_mpi_strp_prs_params *params) { | ||
242 | 1832 | size_t ia = set_start; | |
243 | 1832 | size_t ib = set_end; | |
244 |
2/4✓ Branch 0 taken 916 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 916 times.
|
1832 | if (ib <= ia || ib > vlen) return; |
245 | |||
246 | 1832 | int n = 0; | |
247 |
2/2✓ Branch 0 taken 7283 times.
✓ Branch 1 taken 916 times.
|
16398 | for (size_t i=ia; i < ib; i++) |
248 | 14566 | n += v[i].size; | |
249 | |||
250 | /* todo: given the guarantees for v that fceb584 introduced, | ||
251 | * this check should never fire */ | ||
252 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 916 times.
|
1832 | assert(n>0); |
253 | |||
254 | // generate absolute datatype if ia == 0 && ib == vlen, | ||
255 | // else generate relative datatype that gets embedded by the caller | ||
256 |
4/4✓ Branch 0 taken 536 times.
✓ Branch 1 taken 380 times.
✓ Branch 2 taken 211 times.
✓ Branch 3 taken 325 times.
|
1832 | XT_MPI_STRP_PRS_AOFS_TYPE start = (ia == 0 && ib == vlen) ? 0 : v[ia].start; |
257 | |||
258 | 1832 | *disp = XT_MPI_STRP_PRS_DISP_ADJUST(start); | |
259 | |||
260 | 1832 | XT_MPI_STRP_PRS_AOFS_TYPE *restrict d = xmalloc(sizeof (*d) * (size_t)n); | |
261 | 1832 | size_t p=0; | |
262 | #ifndef NDEBUG | ||
263 | /* did any element of v have non-positive size? */ | ||
264 | 1832 | bool found_np = false; | |
265 | #endif | ||
266 | |||
267 |
2/2✓ Branch 0 taken 7283 times.
✓ Branch 1 taken 916 times.
|
16398 | for (size_t i=ia; i < ib; i++) { |
268 | #ifndef NDEBUG | ||
269 | 14566 | found_np |= v[i].size <= 0; | |
270 | #endif | ||
271 | 14566 | size_t v_i_size = (size_t)(v[i].size > 0 ? v[i].size : 0); | |
272 |
2/2✓ Branch 0 taken 7283 times.
✓ Branch 1 taken 7283 times.
|
29132 | for (size_t k=0; k < v_i_size; k++) { |
273 | 14566 | d[p] = v[i].start + (XT_MPI_STRP_PRS_AOFS_TYPE)k * v[i].stride - start; | |
274 | 14566 | p++; | |
275 | } | ||
276 | } | ||
277 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 916 times.
|
1832 | assert(!found_np); |
278 | |||
279 |
4/4✓ Branch 0 taken 648 times.
✓ Branch 1 taken 268 times.
✓ Branch 2 taken 506 times.
✓ Branch 3 taken 142 times.
|
1832 | if (n==1 && d[0] == 0) { |
280 | 1012 | *dt = params->old_type; | |
281 | } else { | ||
282 | 820 | *dt = XT_MPI_STRP_PRS_INDEXED_BLOCK_CREATE( | |
283 | ¶ms->ddt_list, n, 1, d, params->old_type, params->comm); | ||
284 | } | ||
285 | 1832 | free(d); | |
286 | } | ||
287 | |||
288 | static MPI_Datatype | ||
289 | XT_MPI_STRP_PRS_ENTRY( | ||
290 | const XT_MPI_OFFSET_EXT *v, | ||
291 | size_t vlen, | ||
292 | struct Xt_mpi_strp_prs_params *params); | ||
293 | |||
294 | MPI_Datatype | ||
295 | 8948 | XT_MPI_STRP_PRS_ENTRY_LIBINTERNAL( | |
296 | size_t vlen, | ||
297 | const XT_MPI_OFFSET_EXT v[vlen], | ||
298 | struct Xt_mpi_strp_prs_params *params) | ||
299 | 8948 | { | |
300 | 8948 | return XT_MPI_STRP_PRS_ENTRY(v, vlen, params); | |
301 | } | ||
302 | |||
303 | static MPI_Datatype | ||
304 | 10690 | XT_MPI_STRP_PRS_ENTRY( | |
305 | const XT_MPI_OFFSET_EXT *v, | ||
306 | size_t vlen, | ||
307 | struct Xt_mpi_strp_prs_params *params) | ||
308 | { | ||
309 | /* [set_start,set_end) describes the prefix of non-matching | ||
310 | * elements in v that then need to be handled with gen_fallback_type */ | ||
311 | 10690 | size_t set_start = 0, set_end = 0; | |
312 | MPI_Aint *restrict wdisp | ||
313 | 10690 | = xmalloc(sizeof(MPI_Datatype) * vlen + sizeof (MPI_Aint) * vlen); | |
314 | 10690 | MPI_Datatype *restrict wdt = (MPI_Datatype *)(wdisp + vlen); | |
315 | /* [p,vlen) is the part of v that still needs matching performed */ | ||
316 | /* m is the index of the next datatype and displacements to write | ||
317 | * to wdt and wdisp respectively */ | ||
318 | 10690 | size_t p = 0, m = 0; | |
319 |
2/2✓ Branch 0 taken 14008 times.
✓ Branch 1 taken 5345 times.
|
38706 | while (p<vlen) { |
320 | /* depending on whether there is a non-empty prefix, the datatype | ||
321 | * and displacement corresponding to a match need to be written | ||
322 | * to wdt[m+1] and wdisp[m+1] or wdt[m] and wdisp[m] respectively */ | ||
323 | 28016 | size_t mm = m + (set_start < set_end); | |
324 |
2/2✓ Branch 0 taken 12866 times.
✓ Branch 1 taken 1142 times.
|
28016 | if ((XT_MPI_STRP_PRS_MATCH_BLOCK_VEC( |
325 | &p, v, vlen, | ||
326 | 28016 | wdisp+mm, wdt+mm, params)) | |
327 |
2/2✓ Branch 0 taken 11116 times.
✓ Branch 1 taken 1750 times.
|
25732 | || (XT_MPI_STRP_PRS_MATCH_INDEXED( |
328 | &p, v, vlen, | ||
329 | 25732 | wdisp+mm, wdt+mm, params)) | |
330 |
2/2✓ Branch 0 taken 10254 times.
✓ Branch 1 taken 862 times.
|
22232 | || (XT_MPI_STRP_PRS_MATCH_SIMPLE_VEC( |
331 | &p, v, vlen, | ||
332 | 22232 | wdisp+mm, wdt+mm, params)) | |
333 |
2/2✓ Branch 0 taken 2971 times.
✓ Branch 1 taken 7283 times.
|
20508 | || (XT_MPI_STRP_PRS_MATCH_CONTIGUOUS( |
334 | &p, v, vlen, | ||
335 | 20508 | wdisp+mm, wdt+mm, params)) ) { | |
336 | /* in case a match is found, generate fallback datatype for | ||
337 | * non-matching, preceding extents */ | ||
338 |
2/2✓ Branch 0 taken 571 times.
✓ Branch 1 taken 6154 times.
|
13450 | if (set_start < set_end) { |
339 | 1142 | XT_MPI_STRP_PRS_GEN_FALLBACK_TYPE( | |
340 | set_start, set_end, v, vlen, | ||
341 | 1142 | wdisp+m, wdt+m, params); | |
342 | 1142 | m++; | |
343 | } | ||
344 | 13450 | m++; | |
345 | 13450 | set_start = p; | |
346 | } else { | ||
347 | /* assign ext investigated last to prefix */ | ||
348 | 14566 | set_end = ++p; | |
349 | } | ||
350 | } | ||
351 |
2/2✓ Branch 0 taken 345 times.
✓ Branch 1 taken 5000 times.
|
10690 | if (set_start < set_end) { |
352 | 690 | XT_MPI_STRP_PRS_GEN_FALLBACK_TYPE( | |
353 | set_start, set_end, v, vlen, | ||
354 | 690 | wdisp+m, wdt+m, params); | |
355 | 690 | m++; | |
356 | } | ||
357 | 10690 | size_t wlen = m; | |
358 | 10690 | MPI_Datatype result_dt, old_type = params->old_type; | |
359 |
2/2✓ Branch 0 taken 4715 times.
✓ Branch 1 taken 630 times.
|
10690 | if (wlen == 1 ) { |
360 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4715 times.
|
9430 | assert(wdisp[0] == 0); |
361 |
2/2✓ Branch 0 taken 142 times.
✓ Branch 1 taken 4573 times.
|
9430 | if (wdt[0] == old_type) |
362 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 142 times.
|
284 | xt_mpi_call(MPI_Type_dup(old_type, wdt), params->comm); |
363 | 9430 | result_dt = wdt[0]; | |
364 | } else { | ||
365 | 1260 | int *restrict wblocklength | |
366 | 1260 | = wlen * sizeof (int) <= (vlen - wlen) * sizeof (*wdt) | |
367 |
2/2✓ Branch 0 taken 543 times.
✓ Branch 1 taken 87 times.
|
1260 | ? (void *)(wdt + wlen) : xmalloc(wlen * sizeof (*wblocklength)); |
368 |
2/2✓ Branch 0 taken 2926 times.
✓ Branch 1 taken 630 times.
|
7112 | for(size_t i=0; i<wlen; i++) |
369 | 5852 | wblocklength[i] = 1; | |
370 | 1260 | result_dt = Xt_mpi_ddt_cache_acquire_struct( | |
371 | ¶ms->ddt_list, (int)wlen, wblocklength, wdisp, wdt, params->comm); | ||
372 |
2/2✓ Branch 0 taken 87 times.
✓ Branch 1 taken 543 times.
|
1260 | if (wlen * sizeof (int) > (vlen - wlen) * sizeof (*wdt)) |
373 | 174 | free(wblocklength); | |
374 |
2/2✓ Branch 0 taken 2926 times.
✓ Branch 1 taken 630 times.
|
7112 | for (size_t i = 0; i < wlen; i++) |
375 |
2/2✓ Branch 0 taken 2562 times.
✓ Branch 1 taken 364 times.
|
5852 | if (wdt[i] != old_type) |
376 | 5124 | Xt_mpi_ddt_cache_entry_release(¶ms->ddt_list, wdt+i, params->comm); | |
377 | } | ||
378 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 5345 times.
|
10690 | xt_mpi_call(MPI_Type_commit(&result_dt), params->comm); |
379 | 10690 | free(wdisp); | |
380 | 10690 | return result_dt; | |
381 | } | ||
382 | |||
383 | MPI_Datatype | ||
384 | ✗ | XT_MPI_STRP_PRS_DRIVER(const XT_MPI_OFFSET_EXT *v, | |
385 | int count, MPI_Datatype old_type, | ||
386 | MPI_Comm comm) | ||
387 | { | ||
388 | ✗ | size_t count_ = (size_t)0; | |
389 | ✗ | for (int i=0; i<count; ++i) | |
390 | ✗ | count_ += (size_t)(v[i].size > 0); | |
391 | ✗ | if (count_ < 1) return MPI_DATATYPE_NULL; | |
392 | const XT_MPI_OFFSET_EXT *v_comp; | ||
393 | XT_MPI_OFFSET_EXT *v_comp_; | ||
394 | ✗ | if ((size_t)count != count_) { | |
395 | ✗ | v_comp = v_comp_ = xmalloc(count_ * sizeof (*v_comp)); | |
396 | ✗ | for (size_t i=0, j=0; i<(size_t)count; ++i) { | |
397 | ✗ | v_comp_[j] = v[i]; | |
398 | ✗ | j+= v[i].size > 0; | |
399 | } | ||
400 | } else { | ||
401 | ✗ | v_comp_ = NULL; | |
402 | ✗ | v_comp = v; | |
403 | } | ||
404 | struct Xt_mpi_strp_prs_params params; | ||
405 | ✗ | xt_init_mpi_strp_prs_params(¶ms, old_type, comm); | |
406 | ✗ | MPI_Datatype dt = XT_MPI_STRP_PRS_ENTRY(v_comp, count_, ¶ms); | |
407 | ✗ | if ((size_t)count != count_) | |
408 | ✗ | free(v_comp_); | |
409 | ✗ | return dt; | |
410 | } | ||
411 | |||
412 | |||
413 | #undef XT_MPI_STRP_PRS_ENTRY | ||
414 | #undef XT_MPI_STRP_PRS_GEN_FALLBACK_TYPE | ||
415 | #undef XT_MPI_STRP_PRS_MATCH_CONTIGUOUS | ||
416 | #undef XT_MPI_STRP_PRS_MATCH_SIMPLE_VEC | ||
417 | #undef XT_MPI_STRP_PRS_MATCH_INDEXED | ||
418 | #undef XT_MPI_STRP_PRS_MATCH_BLOCK_VEC | ||
419 | #undef XT_MPI_OFFSET_EXT | ||
420 | |||
421 | #undef XT_TOKEN_PASTE2 | ||
422 | #undef XT_TOKEN_PASTE2_ | ||
423 | #undef XT_TOKEN_PASTE3 | ||
424 | #undef XT_TOKEN_PASTE3_ | ||
425 | |||
426 | /* | ||
427 | * Local Variables: | ||
428 | * c-basic-offset: 2 | ||
429 | * coding: utf-8 | ||
430 | * indent-tabs-mode: nil | ||
431 | * show-trailing-whitespace: t | ||
432 | * require-trailing-newline: t | ||
433 | * End: | ||
434 | */ | ||
435 |