Yet Another eXchange Tool 0.11.2
Loading...
Searching...
No Matches
xt_xmap_intersection_ext.c
Go to the documentation of this file.
1
12/*
13 * Keywords:
14 * Maintainer: Jörg Behrens <behrens@dkrz.de>
15 * Moritz Hanke <hanke@dkrz.de>
16 * Thomas Jahns <jahns@dkrz.de>
17 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met:
22 *
23 * Redistributions of source code must retain the above copyright notice,
24 * this list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
29 *
30 * Neither the name of the DKRZ GmbH nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 */
46#ifdef HAVE_CONFIG_H
47#include <config.h>
48#endif
49
50#include <assert.h>
51#include <limits.h>
52#include <stdbool.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56
57#include <mpi.h>
58
59#include "xt/xt_idxlist.h"
60#include "xt_idxlist_internal.h"
61#include "xt_idxlist_unpack.h"
62#include "xt/xt_idxvec.h"
63#include "xt_idxvec_internal.h"
64#include "xt/xt_xmap.h"
65#include "xt_xmap_internal.h"
66#include "xt/xt_mpi.h"
67#include "xt_mpi_internal.h"
68#include "core/core.h"
69#include "core/ppm_xfuncs.h"
72#include "xt_arithmetic_util.h"
73#include "ensure_array_size.h"
74#include "xt_cover.h"
75#include "xt_config_internal.h"
76
77
78
79static const char filename[] = "xt_xmap_intersection_ext.c";
80
81/* unfortunately GCC 11 cannot handle the literal constants used for
82 * MPI_STATUSES_IGNORE by MPICH */
83#if __GNUC__ >= 11 && __GNUC__ <= 13 && defined MPICH
84#pragma GCC diagnostic push
85#pragma GCC diagnostic ignored "-Wstringop-overread"
86#pragma GCC diagnostic ignored "-Wstringop-overflow"
87#endif
88
92static void
94static void
102static Xt_xmap
104 Xt_config config);
105static Xt_xmap
107 const int * src_positions,
108 const int * dst_positions);
109static Xt_xmap
110xmap_intersection_ext_spread(Xt_xmap xmap, int num_repetitions,
111 const int src_displacements[num_repetitions],
112 const int dst_displacements[num_repetitions]);
113
114
130
132 // list of relative position extents in index list to send or receive
133 struct Xt_pos_ext *transfer_pos_ext;
134 /* generated on-demand */
137 int rank;
138};
139
141
142 const struct Xt_xmap_vtable * vtable;
143
145
146 // we need the max position in order to enable quick range-checks
147 // for xmap-users like redist
148 int max_src_pos; // max possible pos over all src transfer_pos (always >= 0)
149 int max_dst_pos; // same for dst
150 int tag_offset; /* offset to add to message tags for uniqueness */
152 struct exchange_ext msg[];
153};
154
156
157static inline Xt_xmap_intersection_ext xmie(void *xmap)
158{
159 return (Xt_xmap_intersection_ext)xmap;
160}
161
163{
164 Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
165 return xmap_intersection_ext->comm;
166}
167
169{
170 Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
171 // the number of destination equals the number of source messages
172 return xmap_intersection_ext->n_out;
173}
174
176{
177 Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
178 // the number of sources equals the number of destination messages
179 return xmap_intersection_ext->n_in;
180}
181
182static void
184{
185 Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
186 size_t n_out = (size_t)xmap_intersection_ext->n_out;
187 const struct exchange_ext *restrict out_msg
188 = xmap_intersection_ext->msg + xmap_intersection_ext->n_in;
189 for (size_t i = 0; i < n_out; ++i)
190 ranks[i] = out_msg[i].rank;
191}
192
193static void
195{
196 Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
197 size_t n_in = (size_t)xmap_intersection_ext->n_in;
198 const struct exchange_ext *restrict in_msg = xmap_intersection_ext->msg;
199 for (size_t i = 0; i < n_in; ++i)
200 ranks[i] = in_msg[i].rank;
201}
202
204 return xmie(xmap)->max_src_pos;
205}
206
208 return xmie(xmap)->max_dst_pos;
209}
210
211typedef int (*Xt_pos_ext_copy)(size_t num_orig_pos_ext,
212 size_t *num_pos_ext,
213 struct Xt_pos_ext **pos_ext,
214 const struct Xt_pos_ext *orig_pos_ext,
215 size_t num_orig_pos, const int *orig_pos,
216 void *state);
217
218static int
219pos_ext_copy_verbatim(size_t num_orig_pos_ext,
220 size_t *num_pos_ext,
221 struct Xt_pos_ext **pos_ext,
222 const struct Xt_pos_ext *orig_pos_ext,
223 size_t num_orig_pos, const int *orig_pos,
224 void *state)
225{
226 (void)state; (void)num_orig_pos; (void)orig_pos;
227 size_t size_pos_ext = num_orig_pos_ext * sizeof (**pos_ext);
228 struct Xt_pos_ext *pos_ext_ = *pos_ext = xmalloc(size_pos_ext);
229 memcpy(pos_ext_, orig_pos_ext, size_pos_ext);
230 *num_pos_ext = num_orig_pos_ext;
231 return -1;
232}
233
234static void
236 const struct exchange_ext *restrict msg,
237 int *nmsg_copy,
238 struct exchange_ext *restrict msg_copy,
239 int *max_pos_, int num_repetitions,
240 Xt_pos_ext_copy pos_ext_copy, void *pec_state)
241{
242 *nmsg_copy = (int)nmsg;
243 int max_pos = 0;
244 for (size_t i = 0; i < nmsg; ++i) {
245 msg_copy[i].num_transfer_pos = num_repetitions * msg[i].num_transfer_pos;
246 msg_copy[i].rank = msg[i].rank;
247 msg_copy[i].transfer_pos = NULL;
248 size_t num_transfer_pos_ext;
249 int new_max_pos
250 = pos_ext_copy((size_t)msg[i].num_transfer_pos_ext, &num_transfer_pos_ext,
251 &msg_copy[i].transfer_pos_ext, msg[i].transfer_pos_ext,
252 (size_t)msg[i].num_transfer_pos, msg[i].transfer_pos,
253 pec_state);
254 if (new_max_pos > max_pos)
255 max_pos = new_max_pos;
256 msg_copy[i].num_transfer_pos_ext = (int)num_transfer_pos_ext;
257 }
258 if (pos_ext_copy != pos_ext_copy_verbatim)
259 *max_pos_ = max_pos;
260}
261
262static Xt_xmap
263xmap_intersection_ext_copy_(Xt_xmap xmap, int num_repetitions,
264 Xt_pos_ext_copy pe_cpy_in, void *peci_state,
265 Xt_pos_ext_copy pe_cpy_out, void *peco_state)
266{
267 Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap),
268 xmap_intersection_ext_new;
269 size_t n_in = (size_t)xmap_intersection_ext->n_in,
270 n_out = (size_t)xmap_intersection_ext->n_out,
271 num_isect = n_in + n_out;
272 xmap_intersection_ext_new
273 = xmalloc(sizeof (*xmap_intersection_ext_new)
274 + num_isect * sizeof (struct exchange_ext));
275 xmap_intersection_ext_new->vtable = xmap_intersection_ext->vtable;
276 xmap_intersection_ext_new->n_in = (int)n_in;
277 xmap_intersection_ext_new->n_out = (int)n_out;
278 xmap_intersection_ext_new->max_src_pos = xmap_intersection_ext->max_src_pos;
279 xmap_intersection_ext_new->max_dst_pos = xmap_intersection_ext->max_dst_pos;
280 xmap_intersection_ext_msg_copy(n_in, xmap_intersection_ext->msg,
281 &xmap_intersection_ext_new->n_in,
282 xmap_intersection_ext_new->msg,
283 &xmap_intersection_ext_new->max_dst_pos,
284 num_repetitions, pe_cpy_in, peci_state);
285 xmap_intersection_ext_msg_copy(n_out, xmap_intersection_ext->msg+n_in,
286 &xmap_intersection_ext_new->n_out,
287 xmap_intersection_ext_new->msg+n_in,
288 &xmap_intersection_ext_new->max_src_pos,
289 num_repetitions, pe_cpy_out, peco_state);
290 xmap_intersection_ext_new->comm
291 = xt_mpi_comm_smart_dup(xmap_intersection_ext->comm,
292 &xmap_intersection_ext_new->tag_offset);
293 return (Xt_xmap)xmap_intersection_ext_new;
294}
295
296static Xt_xmap
303
304
305static void
306xt_free_exchange_ext(size_t num_msg, struct exchange_ext *restrict msg)
307{
308 for (size_t i = 0; i < num_msg; ++i) {
309 free(msg[i].transfer_pos);
310 free(msg[i].transfer_pos_ext);
311 }
312}
313
315
316 Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
317 size_t num_isect = (size_t)xmap_intersection_ext->n_in
318 + (size_t)xmap_intersection_ext->n_out;
319 xt_free_exchange_ext(num_isect, xmap_intersection_ext->msg);
320 xt_mpi_comm_smart_dedup(&xmap_intersection_ext->comm,
321 xmap_intersection_ext->tag_offset);
322 free(xmap_intersection_ext);
323}
324
325static void
327 int num_src_intersections,
328 const struct Xt_com_list src_com[num_src_intersections],
329 int num_dst_intersections,
330 const struct Xt_com_list dst_com[num_dst_intersections],
331 Xt_idxlist src_idxlist_local,
332 Xt_idxlist dst_idxlist_local,
333 MPI_Comm comm,
334 Xt_config config);
335
337xt_xmap_intersection_ext_new(int num_src_intersections,
338 const struct Xt_com_list
339 src_com[num_src_intersections],
340 int num_dst_intersections,
341 const struct Xt_com_list
342 dst_com[num_dst_intersections],
343 Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist,
344 MPI_Comm comm)
345{
347 num_src_intersections,
348 src_com, num_dst_intersections,
349 dst_com, src_idxlist, dst_idxlist, comm, &xt_default_config);
350}
351
352
355 int num_src_intersections,
356 const struct Xt_com_list src_com[num_src_intersections],
357 int num_dst_intersections,
358 const struct Xt_com_list dst_com[num_dst_intersections],
359 Xt_idxlist src_idxlist,
360 Xt_idxlist dst_idxlist,
361 MPI_Comm comm,
362 Xt_config config) {
363
364 // ensure that yaxt is initialized
365 assert(xt_initialized());
366
367 size_t num_isect
368 = (size_t)num_dst_intersections + (size_t)num_src_intersections;
370 = xmalloc(sizeof (*xmap) + num_isect * sizeof (struct exchange_ext));
371
373
374 xmap->comm = comm = xt_mpi_comm_smart_dup(comm, &xmap->tag_offset);
375
376 Xt_idxlist src_idxlist_orig = NULL, dst_idxlist_orig = NULL;
377 if (src_idxlist->vtable->idxlist_pack_code == VECTOR) {
378 src_idxlist_orig = src_idxlist;
379 src_idxlist = xt_idxvec_get_idxstripes(src_idxlist_orig);
380 }
381 if (dst_idxlist->vtable->idxlist_pack_code == VECTOR) {
382 dst_idxlist_orig = dst_idxlist;
383 dst_idxlist = xt_idxvec_get_idxstripes(dst_idxlist_orig);
384 }
385
386 // generate exchange lists
388 num_src_intersections, src_com,
389 num_dst_intersections, dst_com,
390 src_idxlist, dst_idxlist, comm,
391 config);
392
393 size_t new_num_isect = (size_t)xmap->n_in + (size_t)xmap->n_out;
394 if (new_num_isect != num_isect)
395 xmap = xrealloc(xmap, sizeof (*xmap) + (new_num_isect
396 * sizeof(struct exchange_ext)));
397
398 xmap->max_dst_pos = xt_idxlist_get_num_indices(dst_idxlist) - 1;
399
400 if (src_idxlist_orig)
401 xt_idxlist_delete(src_idxlist);
402 if (dst_idxlist_orig)
403 xt_idxlist_delete(dst_idxlist);
404 return (Xt_xmap)xmap;
405}
406
408 struct Xt_pos_ext_vec cover;
410};
411
412static struct ted_result
414 int num_intersections,
415 const struct Xt_com_list intersections[num_intersections],
416 Xt_idxlist mypart_idxlist,
417 struct exchange_ext *resSets,
418 int (*restrict dst_removals_per_intersection)[2],
419 Xt_config config);
420
421
422static struct Xt_pos_ext *
424 int num_src_intersections,
425 const struct Xt_com_list src_com[num_src_intersections],
426 int num_dst_intersections,
427 const struct Xt_com_list dst_com[num_dst_intersections],
428 struct exchange_ext dst_ext[num_dst_intersections],
429 int (*restrict src_removals_per_intersection)[2],
430 const int (*restrict dst_removals_per_intersection)[2],
431 int tag_offset,
432 MPI_Comm comm);
433
434static void
435remap_dst_intersections(int num_dst_intersections,
436 const struct Xt_com_list dst_com[num_dst_intersections],
437 Xt_idxlist mypart_idxlist,
438 int resCount,
439 struct exchange_ext resSets[resCount],
440 const int (*removals_per_intersection)[2],
441 Xt_config config);
442
446};
447
448static struct tes_result
450 int num_intersections,
451 const struct Xt_com_list intersections[num_intersections],
452 Xt_idxlist mypart_idxlist,
453 struct exchange_ext *resSets,
454 const int (*restrict removals_per_intersection)[2],
455 const struct Xt_pos_ext *pos_updates,
456 Xt_config config);
457
458static void
460 int num_src_intersections,
461 const struct Xt_com_list src_com[num_src_intersections],
462 int num_dst_intersections,
463 const struct Xt_com_list dst_com[num_dst_intersections],
464 Xt_idxlist src_idxlist,
465 Xt_idxlist dst_idxlist,
466 MPI_Comm comm,
467 Xt_config config) {
468
469 /* {dst|src}_removals_per_intersection[i][0] denotes the number of
470 * indices to be removed from the intersection with {src|dst}_com[i].rank.
471 * {dst|src}_removals_per_intersection[rank][1] denotes the number of
472 * pos_ext needed to represent this change (0 if either none or all
473 * indices got removed).
474 */
475 int (*src_removals_per_intersection)[2] =
476 xmalloc(((size_t)num_dst_intersections + (size_t)num_src_intersections)
477 * sizeof(*src_removals_per_intersection)),
478 (*dst_removals_per_intersection)[2]
479 = src_removals_per_intersection + num_src_intersections;
480
481 {
482 struct ted_result tedr
484 num_dst_intersections, dst_com, dst_idxlist,
485 xmap->msg, dst_removals_per_intersection, config);
486 struct Xt_pos_ext_vec cover = tedr.cover;
487 if (!xt_idxlist_pos_ext_is_full_cover(dst_idxlist, cover)) {
488 if (xt_idxlist_get_num_indices(dst_idxlist) == 0)
489 Xt_abort(comm, "ERROR: ups...this should not have happend...", filename,
490 __LINE__);
491 int first_missing_pos
492 = ((cover.num_pos_ext > 0) && (cover.pos_ext[0].start == 0))
493 ? cover.pos_ext[0].start + cover.pos_ext[0].size : 0;
494 print_miss_msg(dst_idxlist, first_missing_pos, comm, filename, __LINE__);
495 }
496 xt_cover_finish(&cover);
497 xmap->n_in = tedr.resCount;
498 }
499
500 // exchange pos_ext of lists where additional indices need to be removed
501 struct Xt_pos_ext *pos_updates
503 num_src_intersections, src_com, num_dst_intersections, dst_com, xmap->msg,
504 src_removals_per_intersection,
505 (const int (*)[2])dst_removals_per_intersection, xmap->tag_offset, comm);
506
507 remap_dst_intersections(num_dst_intersections, dst_com, dst_idxlist,
508 xmap->n_in, xmap->msg,
509 (const int (*)[2])dst_removals_per_intersection,
510 config);
511
512 src_removals_per_intersection =
513 xrealloc(src_removals_per_intersection, (size_t)num_src_intersections
514 * sizeof(*src_removals_per_intersection));
515
516 struct tes_result tesr
518 num_src_intersections, src_com, src_idxlist, xmap->msg+xmap->n_in,
519 (const int (*)[2])src_removals_per_intersection, pos_updates, config);
520 xmap->n_out = tesr.resCount;
521 xmap->max_src_pos = tesr.max_pos;
522 free(src_removals_per_intersection);
523 free(pos_updates);
524}
525
529
530static struct Xt_pos_ext_overlap
532{
533 /* == 0 if a.size >= 0 ; == ~0 if a.size < 0 */
534 int aSizeMaskNeg = isign_mask(a.size),
535 /* compute start and end indices of ranges */
536 a_s = a.start + (aSizeMaskNeg & (a.size + 1)),
537 a_e = a.start + (~aSizeMaskNeg & (a.size - 1)),
538 bSizeMaskNeg = isign_mask(b.size),
539 b_s = b.start + (bSizeMaskNeg & (b.size + 1)),
540 b_e = b.start + (~bSizeMaskNeg & (b.size - 1));
541 /* does overlap exist? */
542 if ((b_s > a_e) | (a_s > b_e))
543 return (struct Xt_pos_ext_overlap){ a.size, 0, 0};
544 else {
545 /* determine length of overlap parts */
546 int lowSkipA = b_s - a_s;
547 int lowSkipB = -lowSkipA;
548 lowSkipA = (int)((unsigned)(lowSkipA + abs(lowSkipA))/2U);
549 lowSkipB = (int)((unsigned)(lowSkipB + abs(lowSkipB))/2U);
550 int overlapLen = imin(b_e - b_s - lowSkipB + 1,
551 abs(a.size) - lowSkipA);
552 int highSkipA = abs(a.size) - lowSkipA - overlapLen;
553 /* then adjust lengths to direction of overlap (from
554 * perspective of a */
555 int aSkipLen = (~aSizeMaskNeg & lowSkipA)
556 | (aSizeMaskNeg & -highSkipA),
557 aTailLen = (aSizeMaskNeg & -lowSkipA)
558 | (~aSizeMaskNeg & highSkipA);
559 return (struct Xt_pos_ext_overlap){ aSkipLen, overlapLen, aTailLen };
560 }
561}
562
563
564
565static void
567 struct Xt_pos_ext_vec *pos_exts);
568
569static struct Xt_pos_ext *
571 int num_stripes,
572 const struct Xt_stripe stripes[num_stripes],
573 int *num_ext,
574 int single_match_only,
575 Xt_config config)
576{
577 struct Xt_pos_ext *pos_ext;
578#ifndef NDEBUG
579 int retval =
580#endif
582 idxlist, num_stripes, stripes,
583 num_ext, &pos_ext, single_match_only, config);
584 assert(retval == 0);
585 return pos_ext;
586}
587
588static struct ted_result
590 int num_intersections,
591 const struct Xt_com_list intersections[num_intersections],
592 Xt_idxlist mypart_idxlist,
593 struct exchange_ext *restrict resSets,
594 int (*restrict dst_removals_per_intersection)[2],
595 Xt_config config)
596{
597 int new_num_intersections = 0;
598
599 // we have to enforce single_match_only not only within a single
600 // intersection, but also between all intersections
601 /* ranges already covered from previous intersections, i.e. which
602 * must not be transmitted twice */
603 enum { initial_vec_size = 8 };
604 struct Xt_pos_ext_vec cover;
605 xt_cover_start(&cover, initial_vec_size);
606
607 for (int i = 0; i < num_intersections; ++i) {
608
609 int num_stripes, num_indices_to_remove = 0;
610 struct Xt_stripe *intersection_idxstripes;
611 xt_idxlist_get_index_stripes(intersections[i].list,
612 &intersection_idxstripes, &num_stripes);
613 int num_isect_pos_exts;
614 struct Xt_pos_ext *restrict isect_pos_exts
616 mypart_idxlist, num_stripes, intersection_idxstripes,
617 &num_isect_pos_exts, 1, config);
618 int isect_pos_exts_size_psum = 0;
619 int intersection_size = xt_idxlist_get_num_indices(intersections[i].list);
620 /* start with all indices from intersection as used,
621 later split ranges, if overlaps are found */
622 struct Xt_pos_ext_vec transferable
623 = (struct Xt_pos_ext_vec){ .num_pos_ext = 1,
624 .size_pos_ext = initial_vec_size,
625 .pos_ext = xrealloc(intersection_idxstripes,
626 sizeof (struct Xt_pos_ext) * initial_vec_size) };
627 intersection_idxstripes = NULL;
628 transferable.pos_ext[0]
629 = (struct Xt_pos_ext){ .start = 0, .size = intersection_size };
630 /* find overlap(s) with previously found ranges for all
631 * stripes mapped to position extents */
632 for (size_t j = 0; j < (size_t)num_isect_pos_exts; ++j) {
633 struct Xt_pos_ext isect_pos_ext = isect_pos_exts[j];
634 /* ensure isect_pos_ext is oriented with ascending positions */
635 int isign_mask_isect_pos_ext_size = isign_mask(isect_pos_ext.size);
636 isect_pos_ext.start
637 += isign_mask_isect_pos_ext_size & (isect_pos_ext.size + 1);
638 int isect_pos_ext_orig_size = isect_pos_ext.size;
639 isect_pos_ext.size = abs(isect_pos_ext.size);
640 isect_pos_exts_size_psum += isect_pos_ext.size;
641 /* keep progress as inverse of change to psum to compensate for
642 * eventual correction later */
643 int progress = -isect_pos_ext.size;
644 size_t search_start_pos = 0, insert_pos;
645 do {
646 struct Xt_pos_range query = (struct Xt_pos_range){
647 .start = isect_pos_ext.start,
648 .end = isect_pos_ext.start + isect_pos_ext.size - 1 };
649 insert_pos
650 = xt_cover_insert_or_overlap(&cover, query, search_start_pos);
651 if (insert_pos == SIZE_MAX)
652 goto next_isect_pos_ext;
653 struct Xt_pos_ext_overlap overlap_desc
654 = Xt_get_pos_ext_overlap(isect_pos_ext, cover.pos_ext[insert_pos]);
655 /* insert overlap into updates
656 * by ...*/
657 /* ...first inserting the skipped part into
658 * cover.pos_ext, since that is sorted
659 * and obviously precedes cover.pos_ext[insert_pos],
660 * cover.pos_ext[insert_pos] can be seemlessly extended...
661 */
662 cover.pos_ext[insert_pos].start -= overlap_desc.skip;
663 cover.pos_ext[insert_pos].size += overlap_desc.skip;
664 /* ...and optionally merged with its predecessor, if the
665 * intervening range becomes zero, ... */
666 if (insert_pos > 0
667 && (cover.pos_ext[insert_pos].start
668 == (cover.pos_ext[insert_pos - 1].start
669 + cover.pos_ext[insert_pos - 1].size)))
670 {
671 cover.pos_ext[insert_pos - 1].size
672 += cover.pos_ext[insert_pos].size;
673 memmove(cover.pos_ext + insert_pos, cover.pos_ext + insert_pos + 1,
674 (--cover.num_pos_ext - insert_pos)
675 * sizeof (*cover.pos_ext));
676 --insert_pos;
677 }
678 progress = (~isign_mask_isect_pos_ext_size
679 & (progress + overlap_desc.skip))
680 | (isign_mask_isect_pos_ext_size
681 & (isect_pos_ext_orig_size + overlap_desc.tail));
682 /* ... then splitting transferable accordingly, ... */
683 num_indices_to_remove += overlap_desc.overlap;
685 .start = isect_pos_exts_size_psum + progress,
686 .size = overlap_desc.overlap }, &transferable);
687 progress += overlap_desc.overlap;
688 /* ... lastly the search can continue with the tail ... */
689 isect_pos_ext.start += overlap_desc.skip + overlap_desc.overlap;
690 /* ... if there is any */
691 isect_pos_ext.size = overlap_desc.tail;
692 search_start_pos = ++insert_pos;
693 } while ((isect_pos_ext.size != 0)
694 & (search_start_pos != cover.num_pos_ext));
695 if (isect_pos_ext.size)
696 /* already at end of list -> append ... */
697 xt_cover_range_append(&cover, isect_pos_ext);
698 /* ... and start the next intersection range */
699 next_isect_pos_ext:
700 ;
701 }
702
703 if (intersection_size > num_indices_to_remove) {
704 resSets[new_num_intersections].transfer_pos_ext
705 = xrealloc(transferable.pos_ext, sizeof (struct Xt_pos_ext)
706 * transferable.num_pos_ext);
707 /* start with empty cache of positions to transfer */
708 resSets[new_num_intersections].transfer_pos = NULL;
709 resSets[new_num_intersections].num_transfer_pos
710 = intersection_size - num_indices_to_remove;
711 resSets[new_num_intersections].num_transfer_pos_ext
712 = (int)transferable.num_pos_ext;
713 resSets[new_num_intersections].rank = intersections[i].rank;
714 ++new_num_intersections;
715 } else
716 free(transferable.pos_ext);
717 dst_removals_per_intersection[i][0] = num_indices_to_remove;
718 dst_removals_per_intersection[i][1]
719 = ((num_indices_to_remove == intersection_size)
720 | (num_indices_to_remove == 0))?0:(int)transferable.num_pos_ext;
721 free(isect_pos_exts);
722 }
723 /* since cover is a struct, at least pgcc 11-13 cannot compile this with a
724 * compound literal or initialize r directly */
725#if defined __PGI && __PGIC__ <= 13
726 struct ted_result r;
727 r.cover = cover;
728 r.resCount = new_num_intersections;
729 return r;
730#else
731 return (struct ted_result){
732 .cover = cover,
733 .resCount = new_num_intersections };
734#endif
735}
736
737static void
739 struct Xt_pos_ext_vec *pos_exts)
740{
741 struct Xt_pos_ext *restrict pos_exts_ = pos_exts->pos_ext;
742 size_t num_pos_exts_ = pos_exts->num_pos_ext;
743 size_t i = num_pos_exts_;
744 while (pos_exts_[--i].start > pos_ext.start)
745 ;
746 int db_skip = pos_ext.start - pos_exts_[i].start;
747 if ((!db_skip) & (pos_ext.size == pos_exts_[i].size))
748 {
749 /* delete fully overlapped transfer part */
750 memmove(pos_exts_ + i, pos_exts_ + i + 1,
751 sizeof (*pos_exts_) * (num_pos_exts_ - i - 1));
752 pos_exts->num_pos_ext = --num_pos_exts_;
753 }
754 else if (db_skip + pos_ext.size == pos_exts_[i].size)
755 {
756 /* pos_ext overlaps end of pos_exts_[i] */
757 pos_exts_[i].size -= pos_ext.size;
758 }
759 else if (db_skip == 0)
760 {
761 /* pos_ext overlaps start of pos_exts_[i] */
762 pos_exts_[i].start = pos_ext.start + pos_ext.size;
763 pos_exts_[i].size -= pos_ext.size;
764 }
765 else
766 {
767 struct Xt_pos_ext orig = pos_exts_[i];
768 ENSURE_ARRAY_SIZE(pos_exts->pos_ext, pos_exts->size_pos_ext,
769 num_pos_exts_ + 1);
770 pos_exts_ = pos_exts->pos_ext;
771 memmove(pos_exts_ + i + 1, pos_exts_ + i,
772 (num_pos_exts_ - i) * sizeof (*pos_exts_));
773 pos_exts_[i] = (struct Xt_pos_ext){.start = orig.start,
774 .size = db_skip };
775 pos_exts_[i + 1] = (struct Xt_pos_ext){
776 .start = pos_ext.start + pos_ext.size,
777 .size = orig.size - db_skip - pos_ext.size };
778 pos_exts->num_pos_ext = ++num_pos_exts_;
779 }
780}
781
782static struct Xt_pos_ext *
784 int num_src_intersections,
785 const struct Xt_com_list src_com[num_src_intersections],
786 int num_dst_intersections,
787 const struct Xt_com_list dst_com[num_dst_intersections],
788 struct exchange_ext dst_ext[num_dst_intersections],
789 int (*restrict src_removals_per_intersection)[2],
790 const int (*restrict dst_removals_per_intersection)[2],
791 int tag_offset,
792 MPI_Comm comm)
793{
794 MPI_Request * requests
795 = xmalloc((size_t)(num_src_intersections + 2 * num_dst_intersections) *
796 sizeof(*requests));
797 MPI_Request *restrict send_header_requests = requests,
798 *restrict recv_requests = requests + num_dst_intersections,
799 *restrict send_data_requests = recv_requests + num_src_intersections;
800
801 // set up receives for indices that need to be removed from the send messages
802 for (int i = 0; i < num_src_intersections; ++i)
803 xt_mpi_call(MPI_Irecv(
804 src_removals_per_intersection[i], 2, MPI_INT, src_com[i].rank,
806 comm, recv_requests + i), comm);
807
808 /* send rebuilt pos_ext vectors that needed to be modified on the
809 * target side due to duplicated receives
810 */
811 unsigned num_active_dst = 0, num_dst_changes = 0;
812 for (int i = 0; i < num_dst_intersections; ++i) {
813 xt_mpi_call(MPI_Isend(
814 CAST_MPI_SEND_BUF(dst_removals_per_intersection[i]),
815 2, MPI_INT, dst_com[i].rank,
817 comm, send_header_requests + i), comm);
818
819 if (dst_removals_per_intersection[i][1] > 0) {
820
821 assert(dst_removals_per_intersection[i][1]
822 == dst_ext[num_active_dst].num_transfer_pos_ext
823 && dst_com[i].rank == dst_ext[num_active_dst].rank);
824 xt_mpi_call(MPI_Isend(
825 dst_ext[num_active_dst].transfer_pos_ext,
826 dst_removals_per_intersection[i][1],
827 MPI_2INT, dst_com[i].rank,
829 comm, send_data_requests + num_dst_changes),
830 comm);
831 ++num_dst_changes;
832 }
833 num_active_dst += (unsigned)((dst_removals_per_intersection[i][0] == 0)
834 | (dst_removals_per_intersection[i][1] != 0));
835 }
836
837 // wait for the receiving of headers to complete
838 xt_mpi_call(MPI_Waitall(num_src_intersections + num_dst_intersections,
839 requests, MPI_STATUSES_IGNORE), comm);
840
841 size_t total_num_pos_ext_to_recv = 0;
842
843 for (size_t i = 0; i < (size_t)num_src_intersections; ++i)
844 total_num_pos_ext_to_recv += (size_t)src_removals_per_intersection[i][1];
845
846 struct Xt_pos_ext *src_updated_pos_ext;
847 unsigned num_src_changes = 0;
848 if (total_num_pos_ext_to_recv > 0) {
849
850 src_updated_pos_ext
851 = xmalloc(total_num_pos_ext_to_recv * sizeof(*src_updated_pos_ext));
852
853 /* set up receive for pos_ext that need to be modified because
854 * indices needed to be removed from the intersection */
855 size_t offset = 0;
856 for (int i = 0; i < num_src_intersections; ++i)
857 if (src_removals_per_intersection[i][1] > 0) {
858 ++num_src_changes;
859 xt_mpi_call(MPI_Irecv(
860 src_updated_pos_ext + offset,
861 src_removals_per_intersection[i][1], MPI_2INT,
862 src_com[i].rank,
864 comm, send_data_requests - num_src_changes), comm);
865
866 offset += (size_t)src_removals_per_intersection[i][1];
867 }
868 } else
869 src_updated_pos_ext = NULL;
870
871 // wait until all communication is completed
872 xt_mpi_call(MPI_Waitall((int)num_src_changes + (int)num_dst_changes,
873 send_data_requests - num_src_changes,
874 MPI_STATUSES_IGNORE), comm);
875
876 free(requests);
877 return src_updated_pos_ext;
878}
879
880static void
881remap_intersection(Xt_idxlist mypart_idxlist,
882 Xt_idxlist intersection,
883 size_t num_pos_updates,
884 const struct Xt_pos_ext pos_updates[num_pos_updates],
885 struct exchange_ext *resSet,
886 int single_match_only,
887 Xt_config config);
888
889static void
891 int num_dst_intersections,
892 const struct Xt_com_list intersections[num_dst_intersections],
893 Xt_idxlist mypart_idxlist,
894 int resCount,
895 struct exchange_ext resSets[resCount],
896 const int (*removals_per_intersection)[2],
897 Xt_config config)
898{
899 size_t resIdx = 0;
900 for (size_t i = 0; i < (size_t)num_dst_intersections; ++i)
901 {
902 int intersection_size
903 = xt_idxlist_get_num_indices(intersections[i].list);
904
905 int num_indices_to_remove = removals_per_intersection[i][0];
906
907 if (num_indices_to_remove != intersection_size) {} else
908 /* intersection is made redundant */
909 continue;
910
911 struct Xt_pos_ext *pos_updates = resSets[resIdx].transfer_pos_ext;
912 remap_intersection(mypart_idxlist, intersections[i].list,
913 (size_t)removals_per_intersection[i][1],
914 pos_updates, resSets + resIdx, 1, config);
915 free(pos_updates);
916 ++resIdx;
917 }
918 assert(resIdx == (size_t)resCount);
919}
920
921static inline int
922pos_ext_find_max_pos(int num_pos_ext,
923 const struct Xt_pos_ext *restrict pos_ext)
924{
925 int max_pos = -1;
926 for (size_t i = 0; i < (size_t)num_pos_ext; ++i) {
927 int start = pos_ext[i].start,
928 size = pos_ext[i].size,
929 max = size > 0 ? start + size - 1 : start;
930 if (max > max_pos) max_pos = max;
931 }
932 return max_pos;
933}
934
935/* compute updated positions for send direction */
936static struct tes_result
938 int num_intersections,
939 const struct Xt_com_list intersections[num_intersections],
940 Xt_idxlist mypart_idxlist,
941 struct exchange_ext *resSets,
942 const int (*restrict removals_per_intersection)[2],
943 const struct Xt_pos_ext *pos_updates,
944 Xt_config config)
945{
946 /* count total number of intersections that results */
947 int new_num_intersections = 0;
948 /* indexes into pos_updates */
949 size_t intersection_pos_ext = 0;
950
951 int max_pos = -1;
952 for (int i = 0; i < num_intersections; ++i) {
953
954 int intersection_size
955 = xt_idxlist_get_num_indices(intersections[i].list);
956
957 int num_indices_to_remove = removals_per_intersection[i][0];
958
959 /* intersection might be redundant */
960 if (num_indices_to_remove != intersection_size) {
961
962 remap_intersection(mypart_idxlist, intersections[i].list,
963 (size_t)removals_per_intersection[i][1],
964 pos_updates + intersection_pos_ext,
965 resSets + new_num_intersections, 0, config);
966
967 int max = pos_ext_find_max_pos(
968 resSets[new_num_intersections].num_transfer_pos_ext,
969 resSets[new_num_intersections].transfer_pos_ext);
970 if (max > max_pos) max_pos = max;
971 /* evaluate cache lazily */
972 resSets[new_num_intersections].transfer_pos = NULL;
973 resSets[new_num_intersections].num_transfer_pos
974 = intersection_size - num_indices_to_remove;
975 resSets[new_num_intersections].rank = intersections[i].rank;
976 new_num_intersections++;
977 intersection_pos_ext += (size_t)removals_per_intersection[i][1];
978 }
979 }
980
981 return (struct tes_result) {
982 .resCount = new_num_intersections,
983 .max_pos = max_pos,
984 };
985}
986
987static struct Xt_stripe *
988refine_stripes(int *num_stripes_,
989 struct Xt_stripe *restrict intersection_idxstripes,
990 size_t num_pos_updates,
991 const struct Xt_pos_ext *restrict pos_updates)
992{
993 /* trim intersection_idxstripes to those actually used */
994 size_t num_refined_intersection_idxstripes = 0,
995 size_refined_intersection_idxstripes = num_pos_updates;
996 struct Xt_stripe *restrict refined_intersection_idxstripes
997 = xmalloc(size_refined_intersection_idxstripes
998 * sizeof (*refined_intersection_idxstripes));
999 size_t i_stripe = 0;
1000 int nstrides_psum = 0;
1001 for (size_t i_pos_ext = 0; i_pos_ext < num_pos_updates; ++i_pos_ext)
1002 {
1003 int pos = pos_updates[i_pos_ext].start;
1004 int size = pos_updates[i_pos_ext].size;
1005 while (nstrides_psum + intersection_idxstripes[i_stripe].nstrides <= pos)
1006 {
1007 nstrides_psum += intersection_idxstripes[i_stripe].nstrides;
1008 ++i_stripe;
1009 }
1010 do {
1011 int instripe_pos = pos - nstrides_psum;
1012 ENSURE_ARRAY_SIZE(refined_intersection_idxstripes,
1013 size_refined_intersection_idxstripes,
1014 num_refined_intersection_idxstripes + 1);
1015 struct Xt_stripe cur_stripe = intersection_idxstripes[i_stripe];
1016 int cur_stripe_nstrides = cur_stripe.nstrides;
1017 int overlap = imin(cur_stripe_nstrides - instripe_pos, size);
1018 cur_stripe.start
1019 = (Xt_int)(cur_stripe.start
1020 + (Xt_int)instripe_pos * cur_stripe.stride);
1021 cur_stripe.nstrides = overlap;
1022 refined_intersection_idxstripes[num_refined_intersection_idxstripes]
1023 = cur_stripe;
1024 ++num_refined_intersection_idxstripes;
1025 i_stripe += (instripe_pos + overlap == cur_stripe_nstrides);
1026 nstrides_psum += (instripe_pos + overlap == cur_stripe_nstrides)
1027 ? cur_stripe_nstrides : 0;
1028 pos += overlap;
1029 size -= overlap;
1030 } while (size);
1031 }
1032 free(intersection_idxstripes);
1033 *num_stripes_ = (int)num_refined_intersection_idxstripes;
1034 return refined_intersection_idxstripes;
1035}
1036
1037
1038/* match index stripes of intersection to corresponding positions in
1039 * part list, optionally updating the stripes
1040 * @param num_pos_updates number of position extents describing the
1041 * subset of positions from intersections to use
1042 * @param pos_updates list of position extents to use from \a intersection
1043 */
1044static void
1046 Xt_idxlist intersection,
1047 size_t num_pos_updates,
1048 const struct Xt_pos_ext pos_updates[num_pos_updates],
1049 struct exchange_ext *resSet,
1050 int single_match_only,
1051 Xt_config config)
1052{
1053 struct Xt_stripe *intersection_idxstripes;
1054 int num_stripes;
1055 xt_idxlist_get_index_stripes(intersection,
1056 &intersection_idxstripes,
1057 &num_stripes);
1058 if (num_pos_updates)
1059 intersection_idxstripes
1060 = refine_stripes(&num_stripes, intersection_idxstripes,
1061 num_pos_updates, pos_updates);
1062
1063 /* match back intersection_idxstripes to positions in mypart */
1064 resSet->transfer_pos_ext = NULL;
1065#ifndef NDEBUG
1066 int retval =
1067#endif
1069 mypart_idxlist, num_stripes, intersection_idxstripes,
1070 &resSet->num_transfer_pos_ext,
1071 &resSet->transfer_pos_ext, single_match_only, config);
1072 assert(retval == 0);
1073 free(intersection_idxstripes);
1074}
1075
1077 int n_out, const struct exchange_ext *restrict out_msg,
1078 int n_in, const struct exchange_ext *restrict in_msg,
1079 struct exchange_ext *restrict remote_out_msg,
1080 int tag_offset, MPI_Comm comm) {
1081
1082 MPI_Request * requests
1083 = xmalloc((size_t)(n_in + 2 * n_out) * sizeof(*requests));
1084 MPI_Request *send_header_requests = requests,
1085 *recv_requests = requests + n_out,
1086 *send_data_requests = recv_requests + n_in;
1087
1088 // set up receives for number of transfer_pos_ext per remote
1089 // for (int i = 0; i < n_in; ++i)
1090 for (int i = 0; i < n_in; ++i)
1091 xt_mpi_call(MPI_Irecv(
1092 &(remote_out_msg[i].num_transfer_pos_ext), 1, MPI_INT,
1093 in_msg[i].rank,
1095 comm, recv_requests + i), comm);
1096
1097 // send number of transfer_pos_ext
1098 for (int i = 0; i < n_out; ++i) {
1099 xt_mpi_call(MPI_Isend(
1100 CAST_MPI_SEND_BUF(&(out_msg[i].num_transfer_pos_ext)),
1101 1, MPI_INT, out_msg[i].rank,
1103 comm, send_header_requests + i), comm);
1104
1105 xt_mpi_call(MPI_Isend(
1106 CAST_MPI_SEND_BUF(out_msg[i].transfer_pos_ext),
1107 out_msg[i].num_transfer_pos_ext,
1108 MPI_2INT, out_msg[i].rank,
1110 comm, send_data_requests + i),
1111 comm);
1112 }
1113
1114 // wait for the receiving of headers to complete
1116 MPI_Waitall(n_out + n_in, send_header_requests, MPI_STATUSES_IGNORE), comm);
1117
1118 size_t total_num_pos_ext_to_recv = 0;
1119
1120 for (size_t i = 0; i < (size_t)n_in; ++i)
1121 total_num_pos_ext_to_recv +=
1122 (size_t)(remote_out_msg[i].num_transfer_pos_ext);
1123
1124 struct Xt_pos_ext *transfer_pos_ext_buffer;
1125 if (total_num_pos_ext_to_recv > 0) {
1126
1127 transfer_pos_ext_buffer
1128 = xmalloc(total_num_pos_ext_to_recv * sizeof(*transfer_pos_ext_buffer));
1129
1130 // set up receive for transfer_pos_ext
1131 struct Xt_pos_ext *curr_transfer_pos_ext = transfer_pos_ext_buffer;
1132 for (int i = 0; i < n_in; ++i) {
1133 xt_mpi_call(MPI_Irecv(
1134 curr_transfer_pos_ext,
1135 remote_out_msg[i].num_transfer_pos_ext, MPI_2INT,
1136 in_msg[i].rank,
1138 comm, recv_requests + i), comm);
1139
1140 remote_out_msg[i].transfer_pos_ext = curr_transfer_pos_ext;
1141 curr_transfer_pos_ext += remote_out_msg[i].num_transfer_pos_ext;
1142 }
1143 } else
1144 transfer_pos_ext_buffer = NULL;
1145
1146 // wait until all communication is completed
1148 MPI_Waitall(n_in + n_out, recv_requests, MPI_STATUSES_IGNORE), comm);
1149
1150 free(requests);
1151 return transfer_pos_ext_buffer;
1152}
1153
1154static void
1155sort_transfer_pos_ext(int n, struct exchange_ext *msg, Xt_config config) {
1156
1157 int buffer_size = 0;
1158 for (int i = 0; i < n; ++i)
1159 if (msg[i].transfer_pos == NULL && msg[i].num_transfer_pos > buffer_size)
1160 buffer_size = msg[i].num_transfer_pos;
1161
1162 int *transfer_pos_buffer
1163 = buffer_size > 0
1164 ? xmalloc((size_t)buffer_size * sizeof(*transfer_pos_buffer))
1165 : NULL;
1166
1167 for (int i = 0; i < n; ++i) {
1168
1169 // get the positions array
1170 int *restrict transfer_pos;
1171 size_t num_transfer_pos = (size_t)(msg[i].num_transfer_pos);
1172 if (msg[i].transfer_pos != NULL) {
1173 transfer_pos = msg[i].transfer_pos;
1174 } else {
1175 transfer_pos = transfer_pos_buffer;
1177 (size_t)(msg[i].num_transfer_pos_ext), msg[i].transfer_pos_ext,
1178 num_transfer_pos, transfer_pos);
1179 }
1180
1181 // sort the positions array
1182 config->sort_funcs->sort_int(transfer_pos, num_transfer_pos);
1183
1184 // convert the positions array to position extents array
1185 size_t num_transfer_pos_ext = count_pos_ext(num_transfer_pos, transfer_pos);
1186 struct Xt_pos_ext * transfer_pos_ext;
1187 if (num_transfer_pos_ext != (size_t)(msg[i].num_transfer_pos_ext)) {
1188 msg[i].num_transfer_pos_ext = (int)num_transfer_pos_ext;
1189 transfer_pos_ext =
1190 (msg[i].transfer_pos_ext =
1191 xrealloc(msg[i].transfer_pos_ext,
1192 num_transfer_pos_ext * sizeof(*transfer_pos_ext)));
1193 } else {
1194 transfer_pos_ext = msg[i].transfer_pos_ext;
1195 }
1197 num_transfer_pos, transfer_pos, num_transfer_pos_ext, transfer_pos_ext);
1198 }
1199
1200 if (buffer_size > 0) free(transfer_pos_buffer);
1201}
1202
1203static void
1205 struct exchange_ext *msg,
1206 struct exchange_ext *permutation_msg,
1207 Xt_config config) {
1208
1209 size_t buffer_size = 0;
1210 for (int i = 0; i < n; ++i) {
1211 assert(msg[i].transfer_pos == NULL
1212 && permutation_msg[i].transfer_pos == NULL);
1213 size_t curr_buffer_size
1214 = (size_t)(msg[i].num_transfer_pos)
1215 + (size_t)(permutation_msg[i].num_transfer_pos);
1216 if (curr_buffer_size > buffer_size) buffer_size = curr_buffer_size;
1217 }
1218
1219 int *transfer_pos_buffer
1220 = buffer_size > 0
1221 ? xmalloc(buffer_size * sizeof(*transfer_pos_buffer))
1222 : NULL;
1223
1224 for (int i = 0; i < n; ++i) {
1225
1226 // get the positions array
1227 size_t num_transfer_pos = (size_t)(msg[i].num_transfer_pos);
1228
1229 int *restrict transfer_pos = transfer_pos_buffer;
1231 (size_t)(msg[i].num_transfer_pos_ext), msg[i].transfer_pos_ext,
1232 num_transfer_pos, transfer_pos);
1233
1234 // get the permutation array
1235 int *permutation = transfer_pos_buffer + num_transfer_pos;
1237 (size_t)(permutation_msg[i].num_transfer_pos_ext),
1238 permutation_msg[i].transfer_pos_ext, num_transfer_pos, permutation);
1239
1240 // sort the positions array
1242 transfer_pos, num_transfer_pos, permutation);
1243
1244 // convert the permutation array to position extents array
1245 size_t num_transfer_pos_ext = count_pos_ext(num_transfer_pos, permutation);
1246 struct Xt_pos_ext * transfer_pos_ext;
1247 if (num_transfer_pos_ext !=
1248 (size_t)(permutation_msg[i].num_transfer_pos_ext)) {
1249 permutation_msg[i].num_transfer_pos_ext = (int)num_transfer_pos_ext;
1250 permutation_msg[i].transfer_pos_ext
1251 = transfer_pos_ext
1252 = xrealloc(permutation_msg[i].transfer_pos_ext,
1253 num_transfer_pos_ext * sizeof(*transfer_pos_ext));
1254 } else {
1255 transfer_pos_ext = permutation_msg[i].transfer_pos_ext;
1256 }
1258 num_transfer_pos, permutation, num_transfer_pos_ext, transfer_pos_ext);
1259 }
1260
1261 if (buffer_size > 0) free(transfer_pos_buffer);
1262}
1263
1265 int n_out, int n_in, struct exchange_ext * out_msg,
1266 struct exchange_ext * in_msg, int tag_offset, MPI_Comm comm,
1267 Xt_config config) {
1268
1269 int comm_size;
1270 xt_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1271
1272 struct exchange_ext *restrict remote_out_msg =
1273 xmalloc((size_t)n_in * sizeof(*remote_out_msg));
1274
1275 for (int i = 0; i < n_in; ++i) {
1276 remote_out_msg[i].rank = in_msg[i].rank;
1277 remote_out_msg[i].num_transfer_pos = in_msg[i].num_transfer_pos;
1278 remote_out_msg[i].transfer_pos = NULL;
1279 }
1280
1281 // exchange transfer_pos_ext of out messages to respective receivers
1282 struct Xt_pos_ext *transfer_pos_ext_buffer =
1284 n_out, out_msg, n_in, in_msg, remote_out_msg, tag_offset, comm);
1285
1286 // sort the transfer positions in all out messages
1287 sort_transfer_pos_ext(n_out, out_msg, config);
1288
1289 // sort the transfer positions in all in messages based on the remote out
1290 // messages
1291 sort_transfer_pos_ext_permutation(n_in, remote_out_msg, in_msg, config);
1292
1293 free(transfer_pos_ext_buffer);
1294 free(remote_out_msg);
1295}
1296
1297static Xt_xmap
1299 Xt_config config)
1300{
1301 Xt_xmap_intersection_ext xmap_intersection_ext_new =
1303
1304 int n_out = xmap_intersection_ext_new->n_out;
1305 int n_in = xmap_intersection_ext_new->n_in;
1306 struct exchange_ext *in_msg = xmap_intersection_ext_new->msg,
1307 *out_msg = in_msg + n_in;
1308 MPI_Comm comm = xmap_intersection_ext_new->comm;
1309 int tag_offset = xmap_intersection_ext_new->tag_offset;
1310
1311 switch ((int)type) {
1312 case (XT_REORDER_NONE):
1313 break;
1314 case (XT_REORDER_SEND_UP):
1315 reorder_transfer_pos_ext(n_out, n_in, out_msg, in_msg, tag_offset,
1316 comm, config);
1317 break;
1318 case (XT_REORDER_RECV_UP):
1319 reorder_transfer_pos_ext(n_in, n_out, in_msg, out_msg, tag_offset,
1320 comm, config);
1321 break;
1322 default:
1323 Xt_abort(comm, "ERROR(xmap_intersection_ext_reorder):invalid reorder "
1324 "type", filename, __LINE__);
1325 }
1326
1327 return (Xt_xmap)xmap_intersection_ext_new;
1328}
1329
1331{
1333 const int *new_pos;
1334};
1335
1336static int
1337update_positions(size_t num_orig_pos_ext,
1338 size_t *num_pos_ext,
1339 struct Xt_pos_ext **pos_ext,
1340 const struct Xt_pos_ext *orig_pos_ext,
1341 size_t num_orig_pos, const int *orig_pos,
1342 void *state_)
1343{
1344 (void)num_orig_pos_ext;
1345 struct up_state *state = state_;
1346 int *pos_buffer = state->pos_buffer;
1347 const int *restrict new_pos = state->new_pos;
1348 const int *pos;
1349 if (orig_pos)
1350 pos = orig_pos;
1351 else {
1352 generate_pos(num_orig_pos_ext, orig_pos_ext, num_orig_pos, pos_buffer);
1353 pos = pos_buffer;
1354 }
1355 // update positions
1356 int max_pos = 0;
1357 for (size_t j = 0; j < num_orig_pos; ++j) {
1358 int np = new_pos[pos[j]];
1359 pos_buffer[j] = np;
1360 if (np > max_pos)
1361 max_pos = np;
1362 }
1363
1364 // convert the array of substituted positions into position extents array
1365 size_t num_pos_ext_ = *num_pos_ext = count_pos_ext(num_orig_pos, pos_buffer);
1366 struct Xt_pos_ext *pos_ext_
1367 = *pos_ext = xmalloc(num_pos_ext_ * sizeof (*pos_ext_));
1368 generate_pos_ext(num_orig_pos, pos_buffer, num_pos_ext_, pos_ext_);
1369 return max_pos;
1370}
1371
1372static Xt_xmap
1374 const int *src_positions,
1375 const int *dst_positions) {
1376
1377 Xt_xmap_intersection_ext xmie_orig = xmie(xmap);
1378 size_t max_num_pos = 0;
1379 size_t n = (size_t)xmie_orig->n_in + (size_t)xmie_orig->n_out;
1380 const struct exchange_ext *restrict msg = xmie_orig->msg;
1381 for (size_t i = 0; i < n; ++i)
1382 if ((size_t)msg[i].num_transfer_pos > max_num_pos)
1383 max_num_pos = (size_t)msg[i].num_transfer_pos;
1384 int *pos_buffer
1385 = max_num_pos > 0
1386 ? xmalloc((size_t)max_num_pos * sizeof(*pos_buffer))
1387 : NULL;
1388 struct up_state ups_in = { pos_buffer, dst_positions },
1389 ups_out = { pos_buffer, src_positions };
1390 Xt_xmap xmap_new
1392 update_positions, &ups_in,
1393 update_positions, &ups_out);
1394 free(pos_buffer);
1395 return xmap_new;
1396}
1397
1398struct spread_state
1399{
1400 int num_repetitions;
1401 const int *displacements;
1402};
1403
1404static int
1405pos_ext_copy_spread(size_t num_orig_pos_ext,
1406 size_t *num_pos_ext,
1407 struct Xt_pos_ext **pos_ext,
1408 const struct Xt_pos_ext *orig_pos_ext,
1409 size_t num_orig_pos, const int *orig_pos,
1410 void *state)
1411{
1412 (void)num_orig_pos; (void)orig_pos;
1413 struct spread_state *sp = state;
1415 const int *restrict displacements = sp->displacements;
1416 size_t new_num_pos_ext = num_orig_pos_ext * (size_t)num_repetitions;
1417 size_t size_pos_ext = new_num_pos_ext * sizeof (**pos_ext);
1418 struct Xt_pos_ext *pos_ext_ = *pos_ext = xmalloc(size_pos_ext);
1419 int max_pos = -1;
1420 for (int i = 0; i < num_repetitions; ++i) {
1421 struct Xt_pos_ext *restrict curr_pos_ext =
1422 pos_ext_ + (size_t)i * num_orig_pos_ext;
1423 const int curr_displacement = displacements[i];
1424 for (size_t j = 0; j < num_orig_pos_ext; ++j) {
1425 int start = orig_pos_ext[j].start + curr_displacement,
1426 size = orig_pos_ext[j].size,
1427 max = size > 0 ? start + size - 1 : start;
1428 if (max > max_pos)
1429 max_pos = max;
1430 curr_pos_ext[j] = (struct Xt_pos_ext){ .start = start, .size = size };
1431 }
1432 }
1433 *num_pos_ext = new_num_pos_ext;
1434 return max_pos;
1435}
1436
1437
1438static Xt_xmap
1439xmap_intersection_ext_spread(Xt_xmap xmap, int num_repetitions,
1440 const int src_displacements[num_repetitions],
1441 const int dst_displacements[num_repetitions]) {
1442
1443
1444 return xmap_intersection_ext_copy_(xmap, num_repetitions,
1446 &(struct spread_state){
1447 .num_repetitions = num_repetitions,
1448 .displacements = src_displacements },
1450 &(struct spread_state){
1451 .num_repetitions = num_repetitions,
1452 .displacements = dst_displacements });
1453}
1454
1455
1456/* iterator operations */
1457
1460static int const *
1462static int
1464static const struct Xt_pos_ext *
1466static int
1469
1470static const struct Xt_xmap_iter_vtable
1480
1482
1484
1485 const struct Xt_xmap_iter_vtable *vtable;
1486
1487 struct exchange_ext *msg;
1489};
1490
1492
1493 Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
1494
1495 if (xmap_intersection_ext->n_in == 0)
1496 return NULL;
1497
1498 Xt_xmap_iter_intersection_ext iter = xmalloc(sizeof (*iter));
1499
1501 iter->msg = xmap_intersection_ext->msg;
1502 iter->msgs_left = xmap_intersection_ext->n_in - 1;
1503
1504 return (Xt_xmap_iter)iter;
1505}
1506
1508
1509 Xt_xmap_intersection_ext xmap_intersection_ext = xmie(xmap);
1510
1511 if (xmap_intersection_ext->n_out == 0)
1512 return NULL;
1513
1514 Xt_xmap_iter_intersection_ext iter = xmalloc(sizeof (*iter));
1515
1517 iter->msg = xmap_intersection_ext->msg + xmap_intersection_ext->n_in;
1518 iter->msgs_left = xmap_intersection_ext->n_out - 1;
1519
1520 return (Xt_xmap_iter)iter;
1521}
1522
1524xmiei(void *iter)
1525{
1526 return (Xt_xmap_iter_intersection_ext)iter;
1527}
1528
1530
1531 Xt_xmap_iter_intersection_ext iter_intersection = xmiei(iter);
1532
1533 if (iter_intersection == NULL || iter_intersection->msgs_left == 0)
1534 return 0;
1535
1536 iter_intersection->msg++;
1537 iter_intersection->msgs_left--;
1538
1539 return 1;
1540}
1541
1543
1544 assert(iter != NULL);
1545 return xmiei(iter)->msg->rank;
1546}
1547
1548static int const *
1550
1551 assert(iter != NULL);
1552 struct exchange_ext *restrict msg = xmiei(iter)->msg;
1553 if ((!msg->num_transfer_pos) | (msg->transfer_pos != NULL)) { } else {
1554 size_t num_transfer_pos = (size_t)msg->num_transfer_pos;
1555 int * transfer_pos =
1556 (msg->transfer_pos = xmalloc(num_transfer_pos * sizeof(*transfer_pos)));
1558 (size_t)msg->num_transfer_pos_ext, msg->transfer_pos_ext,
1560 }
1561 return msg->transfer_pos;
1562}
1563
1564static int
1566 assert(iter != NULL);
1567 return xmiei(iter)->msg->num_transfer_pos;
1568}
1569
1570static const struct Xt_pos_ext *
1572 assert(iter != NULL);
1573 return xmiei(iter)->msg->transfer_pos_ext;
1574}
1575
1576static int
1578 assert(iter != NULL);
1579 return xmiei(iter)->msg->num_transfer_pos_ext;
1580}
1581
1583
1584 free(iter);
1585}
1586
1587/*
1588 * Local Variables:
1589 * c-basic-offset: 2
1590 * coding: utf-8
1591 * indent-tabs-mode: nil
1592 * show-trailing-whitespace: t
1593 * require-trailing-newline: t
1594 * End:
1595 */
int MPI_Comm
Definition core.h:64
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
Definition ppm_xfuncs.h:71
#define xmalloc(size)
Definition ppm_xfuncs.h:70
const struct Xt_sort_algo_funcptr * sort_funcs
const struct xt_idxlist_vtable * vtable
struct Xt_pos_ext * pos_ext
Definition xt_cover.h:61
size_t num_pos_ext
Definition xt_cover.h:60
size_t size_pos_ext
Definition xt_cover.h:60
int size
Definition xt_core.h:97
int start
Definition xt_core.h:97
void(* sort_int)(int *a, size_t n)
void(* sort_int_permutation)(int a[], size_t n, int permutation[])
Xt_int stride
Definition xt_stripe.h:56
int nstrides
Definition xt_stripe.h:57
Xt_int start
Definition xt_stripe.h:55
const struct Xt_xmap_vtable * vtable
const struct Xt_xmap_iter_vtable * vtable
struct Xt_pos_ext * transfer_pos_ext
const int *restrict displacements
struct Xt_pos_ext_vec cover
static int isign_mask(int x)
static int imin(int a, int b)
static const char filename[]
Definition xt_config.c:76
struct Xt_config_ xt_default_config
Definition xt_config.c:204
struct Xt_config_ * Xt_config
Definition xt_config.h:58
implementation of configuration object
int xt_initialized(void)
struct Xt_xmap_ * Xt_xmap
Definition xt_core.h:85
XT_INT Xt_int
Definition xt_core.h:72
struct Xt_idxlist_ * Xt_idxlist
Definition xt_core.h:84
size_t xt_cover_insert_or_overlap(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range range, size_t search_start_pos)
Definition xt_cover.c:161
void xt_cover_range_append(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_ext range)
Definition xt_cover.c:141
void xt_cover_finish(struct Xt_pos_ext_vec *restrict cover)
Definition xt_cover.c:69
bool xt_idxlist_pos_ext_is_full_cover(Xt_idxlist idxlist, struct Xt_pos_ext_vec cover)
Definition xt_cover.c:75
void xt_cover_start(struct Xt_pos_ext_vec *restrict cover, size_t initial_size)
Definition xt_cover.c:60
int xt_idxlist_get_pos_exts_of_index_stripes_custom(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, struct Xt_pos_ext **pos_ext, int single_match_only, Xt_config config)
Definition xt_idxlist.c:275
index list declaration
void xt_idxlist_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
Definition xt_idxlist.c:135
void xt_idxlist_delete(Xt_idxlist idxlist)
Definition xt_idxlist.c:75
Provide non-public declarations common to all index lists.
#define xt_idxlist_get_num_indices(idxlist)
@ VECTOR
PPM_DSO_INTERNAL Xt_idxlist xt_idxvec_get_idxstripes(Xt_idxlist idxlist)
Definition xt_idxvec.c:664
MPI_Comm xt_mpi_comm_smart_dup(MPI_Comm comm, int *tag_offset)
Definition xt_mpi.c:333
void xt_mpi_comm_smart_dedup(MPI_Comm *comm, int tag_offset)
Definition xt_mpi.c:386
utility routines for MPI
#define xt_mpi_call(call, comm)
Definition xt_mpi.h:68
@ xt_mpi_tag_xmap_intersection_data_exchange
@ xt_mpi_tag_xmap_intersection_header_exchange
exchange map declarations
struct Xt_xmap_iter_ * Xt_xmap_iter
Definition xt_xmap.h:73
xt_reorder_type
Definition xt_xmap.h:240
@ XT_REORDER_RECV_UP
optimise data access on receiver side
Definition xt_xmap.h:243
@ XT_REORDER_NONE
no reordering
Definition xt_xmap.h:241
@ XT_REORDER_SEND_UP
optimise data access on sender side
Definition xt_xmap.h:242
contains declaration for the exchange map data structure
static const struct Xt_xmap_vtable xmap_intersection_vtable
Utility functions shared by xt_xmap_intersection and xt_xmap_intersection_ext.
static void print_miss_msg(Xt_idxlist dst_idxlist, int missing_pos, MPI_Comm comm, const char *source, int line) __attribute__((noreturn))
static size_t count_pos_ext(size_t num_pos, const int *restrict pos)
static void generate_pos(size_t num_pos_ext, const struct Xt_pos_ext *restrict pos_ext, size_t num_pos, int *restrict pos)
static void generate_pos_ext(size_t num_pos, const int *restrict pos, size_t num_pos_ext, struct Xt_pos_ext *restrict pos_ext)
static Xt_xmap xmap_intersection_ext_copy(Xt_xmap xmap)
static const struct Xt_xmap_iter_vtable xmap_iterator_intersection_ext_vtable
static int xmap_intersection_ext_get_num_destinations(Xt_xmap xmap)
struct Xt_xmap_iter_intersection_ext_ * Xt_xmap_iter_intersection_ext
static Xt_xmap xmap_intersection_ext_copy_(Xt_xmap xmap, int num_repetitions, Xt_pos_ext_copy pe_cpy_in, void *peci_state, Xt_pos_ext_copy pe_cpy_out, void *peco_state)
static int xmap_intersection_ext_get_max_dst_pos(Xt_xmap xmap)
static Xt_xmap_iter xmap_intersection_ext_get_out_iterator(Xt_xmap xmap)
Xt_xmap xt_xmap_intersection_ext_custom_new(int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, MPI_Comm comm, Xt_config config)
static struct Xt_pos_ext * exchange_pos_ext_modifications(int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], struct exchange_ext dst_ext[num_dst_intersections], int(*restrict src_removals_per_intersection)[2], const int(*restrict dst_removals_per_intersection)[2], int tag_offset, MPI_Comm comm)
static void xmap_intersection_ext_get_destination_ranks(Xt_xmap xmap, int *ranks)
Xt_xmap xt_xmap_intersection_ext_new(int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, MPI_Comm comm)
static const struct Xt_pos_ext * xmap_intersection_ext_iterator_get_transfer_pos_ext(Xt_xmap_iter iter)
static struct Xt_pos_ext * get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, int single_match_only, Xt_config config)
static void xmap_intersection_ext_get_source_ranks(Xt_xmap xmap, int *ranks)
static Xt_xmap xmap_intersection_ext_reorder(Xt_xmap xmap, enum xt_reorder_type type, Xt_config config)
static int xmap_intersection_ext_get_num_sources(Xt_xmap xmap)
static struct ted_result generate_dir_transfer_pos_ext_dst(int num_intersections, const struct Xt_com_list intersections[num_intersections], Xt_idxlist mypart_idxlist, struct exchange_ext *resSets, int(*restrict dst_removals_per_intersection)[2], Xt_config config)
static Xt_xmap xmap_intersection_ext_spread(Xt_xmap xmap, int num_repetitions, const int src_displacements[num_repetitions], const int dst_displacements[num_repetitions])
static int const * xmap_intersection_ext_iterator_get_transfer_pos(Xt_xmap_iter iter)
static int update_positions(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state_)
static void reorder_transfer_pos_ext(int n_out, int n_in, struct exchange_ext *out_msg, struct exchange_ext *in_msg, int tag_offset, MPI_Comm comm, Xt_config config)
static void xmap_intersection_ext_msg_copy(size_t nmsg, const struct exchange_ext *restrict msg, int *nmsg_copy, struct exchange_ext *restrict msg_copy, int *max_pos_, int num_repetitions, Xt_pos_ext_copy pos_ext_copy, void *pec_state)
static struct Xt_stripe * refine_stripes(int *num_stripes_, struct Xt_stripe *restrict intersection_idxstripes, size_t num_pos_updates, const struct Xt_pos_ext *restrict pos_updates)
static int xmap_intersection_ext_iterator_get_num_transfer_pos(Xt_xmap_iter iter)
static int xmap_intersection_ext_get_max_src_pos(Xt_xmap xmap)
static void generate_transfer_ext(struct Xt_xmap_intersection_ext_ *xmap, int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist src_idxlist_local, Xt_idxlist dst_idxlist_local, MPI_Comm comm, Xt_config config)
static struct Xt_pos_ext * exchange_transfer_pos_ext(int n_out, const struct exchange_ext *restrict out_msg, int n_in, const struct exchange_ext *restrict in_msg, struct exchange_ext *restrict remote_out_msg, int tag_offset, MPI_Comm comm)
static void sort_transfer_pos_ext(int n, struct exchange_ext *msg, Xt_config config)
static Xt_xmap xmap_intersection_ext_update_positions(Xt_xmap xmap, const int *src_positions, const int *dst_positions)
static void xmap_intersection_ext_delete(Xt_xmap xmap)
static Xt_xmap_iter xmap_intersection_ext_get_in_iterator(Xt_xmap xmap)
static Xt_xmap_intersection_ext xmie(void *xmap)
static void remap_intersection(Xt_idxlist mypart_idxlist, Xt_idxlist intersection, size_t num_pos_updates, const struct Xt_pos_ext pos_updates[num_pos_updates], struct exchange_ext *resSet, int single_match_only, Xt_config config)
static MPI_Comm xmap_intersection_ext_get_communicator(Xt_xmap xmap)
static int xmap_intersection_ext_iterator_get_num_transfer_pos_ext(Xt_xmap_iter iter)
static void cut_pos_ext_from_pos_exts(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *pos_exts)
static int xmap_intersection_ext_iterator_get_rank(Xt_xmap_iter iter)
static struct Xt_pos_ext_overlap Xt_get_pos_ext_overlap(struct Xt_pos_ext a, struct Xt_pos_ext b)
static int pos_ext_copy_verbatim(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)
static void xt_free_exchange_ext(size_t num_msg, struct exchange_ext *restrict msg)
static struct tes_result generate_dir_transfer_pos_ext_src(int num_intersections, const struct Xt_com_list intersections[num_intersections], Xt_idxlist mypart_idxlist, struct exchange_ext *resSets, const int(*restrict removals_per_intersection)[2], const struct Xt_pos_ext *pos_updates, Xt_config config)
static Xt_xmap_iter_intersection_ext xmiei(void *iter)
static void remap_dst_intersections(int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist mypart_idxlist, int resCount, struct exchange_ext resSets[resCount], const int(*removals_per_intersection)[2], Xt_config config)
static void xmap_intersection_ext_iterator_delete(Xt_xmap_iter iter)
struct Xt_xmap_intersection_ext_ * Xt_xmap_intersection_ext
static int pos_ext_find_max_pos(int num_pos_ext, const struct Xt_pos_ext *restrict pos_ext)
static int xmap_intersection_ext_iterator_next(Xt_xmap_iter iter)
int(* Xt_pos_ext_copy)(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)
static void sort_transfer_pos_ext_permutation(int n, struct exchange_ext *msg, struct exchange_ext *permutation_msg, Xt_config config)
static int pos_ext_copy_spread(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)