79static const char filename[] =
"xt_xmap_intersection_ext.c";
83#if __GNUC__ >= 11 && __GNUC__ <= 13 && defined MPICH
84#pragma GCC diagnostic push
85#pragma GCC diagnostic ignored "-Wstringop-overread"
86#pragma GCC diagnostic ignored "-Wstringop-overflow"
107 const int * src_positions,
108 const int * dst_positions);
111 const int src_displacements[num_repetitions],
112 const int dst_displacements[num_repetitions]);
152 struct exchange_ext
msg[];
165 return xmap_intersection_ext->
comm;
172 return xmap_intersection_ext->
n_out;
179 return xmap_intersection_ext->
n_in;
186 size_t n_out = (size_t)xmap_intersection_ext->
n_out;
188 = xmap_intersection_ext->
msg + xmap_intersection_ext->
n_in;
189 for (
size_t i = 0; i < n_out; ++i)
190 ranks[i] = out_msg[i].
rank;
197 size_t n_in = (size_t)xmap_intersection_ext->
n_in;
198 const struct exchange_ext *restrict in_msg = xmap_intersection_ext->
msg;
199 for (
size_t i = 0; i < n_in; ++i)
200 ranks[i] = in_msg[i].
rank;
215 size_t num_orig_pos,
const int *orig_pos,
223 size_t num_orig_pos,
const int *orig_pos,
226 (void)state; (void)num_orig_pos; (void)orig_pos;
227 size_t size_pos_ext = num_orig_pos_ext *
sizeof (**pos_ext);
229 memcpy(pos_ext_, orig_pos_ext, size_pos_ext);
230 *num_pos_ext = num_orig_pos_ext;
239 int *max_pos_,
int num_repetitions,
242 *nmsg_copy = (int)nmsg;
244 for (
size_t i = 0; i < nmsg; ++i) {
245 msg_copy[i].num_transfer_pos = num_repetitions * msg[i].num_transfer_pos;
246 msg_copy[i].rank = msg[i].rank;
247 msg_copy[i].transfer_pos = NULL;
248 size_t num_transfer_pos_ext;
250 = pos_ext_copy((
size_t)msg[i].num_transfer_pos_ext, &num_transfer_pos_ext,
251 &msg_copy[i].transfer_pos_ext, msg[i].transfer_pos_ext,
252 (
size_t)msg[i].num_transfer_pos, msg[i].transfer_pos,
254 if (new_max_pos > max_pos)
255 max_pos = new_max_pos;
256 msg_copy[i].num_transfer_pos_ext = (int)num_transfer_pos_ext;
268 xmap_intersection_ext_new;
269 size_t n_in = (size_t)xmap_intersection_ext->
n_in,
270 n_out = (
size_t)xmap_intersection_ext->
n_out,
271 num_isect = n_in + n_out;
272 xmap_intersection_ext_new
273 =
xmalloc(
sizeof (*xmap_intersection_ext_new)
275 xmap_intersection_ext_new->vtable = xmap_intersection_ext->
vtable;
276 xmap_intersection_ext_new->n_in = (int)n_in;
277 xmap_intersection_ext_new->n_out = (int)n_out;
278 xmap_intersection_ext_new->max_src_pos = xmap_intersection_ext->
max_src_pos;
279 xmap_intersection_ext_new->max_dst_pos = xmap_intersection_ext->
max_dst_pos;
281 &xmap_intersection_ext_new->n_in,
282 xmap_intersection_ext_new->msg,
283 &xmap_intersection_ext_new->max_dst_pos,
284 num_repetitions, pe_cpy_in, peci_state);
286 &xmap_intersection_ext_new->n_out,
287 xmap_intersection_ext_new->msg+n_in,
288 &xmap_intersection_ext_new->max_src_pos,
289 num_repetitions, pe_cpy_out, peco_state);
290 xmap_intersection_ext_new->comm
292 &xmap_intersection_ext_new->tag_offset);
293 return (
Xt_xmap)xmap_intersection_ext_new;
308 for (
size_t i = 0; i < num_msg; ++i) {
309 free(msg[i].transfer_pos);
310 free(msg[i].transfer_pos_ext);
317 size_t num_isect = (size_t)xmap_intersection_ext->
n_in
318 + (
size_t)xmap_intersection_ext->
n_out;
322 free(xmap_intersection_ext);
327 int num_src_intersections,
328 const struct Xt_com_list src_com[num_src_intersections],
329 int num_dst_intersections,
330 const struct Xt_com_list dst_com[num_dst_intersections],
339 src_com[num_src_intersections],
340 int num_dst_intersections,
342 dst_com[num_dst_intersections],
347 num_src_intersections,
348 src_com, num_dst_intersections,
355 int num_src_intersections,
356 const struct Xt_com_list src_com[num_src_intersections],
357 int num_dst_intersections,
358 const struct Xt_com_list dst_com[num_dst_intersections],
368 = (size_t)num_dst_intersections + (
size_t)num_src_intersections;
376 Xt_idxlist src_idxlist_orig = NULL, dst_idxlist_orig = NULL;
378 src_idxlist_orig = src_idxlist;
382 dst_idxlist_orig = dst_idxlist;
388 num_src_intersections, src_com,
389 num_dst_intersections, dst_com,
390 src_idxlist, dst_idxlist, comm,
393 size_t new_num_isect = (size_t)xmap->
n_in + (
size_t)xmap->
n_out;
394 if (new_num_isect != num_isect)
395 xmap =
xrealloc(xmap,
sizeof (*xmap) + (new_num_isect
400 if (src_idxlist_orig)
402 if (dst_idxlist_orig)
414 int num_intersections,
415 const struct
Xt_com_list intersections[num_intersections],
418 int (*restrict dst_removals_per_intersection)[2],
424 int num_src_intersections,
425 const struct Xt_com_list src_com[num_src_intersections],
426 int num_dst_intersections,
427 const struct Xt_com_list dst_com[num_dst_intersections],
429 int (*restrict src_removals_per_intersection)[2],
430 const int (*restrict dst_removals_per_intersection)[2],
436 const struct Xt_com_list dst_com[num_dst_intersections],
440 const int (*removals_per_intersection)[2],
450 int num_intersections,
451 const struct
Xt_com_list intersections[num_intersections],
454 const int (*restrict removals_per_intersection)[2],
460 int num_src_intersections,
461 const struct Xt_com_list src_com[num_src_intersections],
462 int num_dst_intersections,
463 const struct Xt_com_list dst_com[num_dst_intersections],
475 int (*src_removals_per_intersection)[2] =
476 xmalloc(((
size_t)num_dst_intersections + (
size_t)num_src_intersections)
477 *
sizeof(*src_removals_per_intersection)),
478 (*dst_removals_per_intersection)[2]
479 = src_removals_per_intersection + num_src_intersections;
484 num_dst_intersections, dst_com, dst_idxlist,
485 xmap->
msg, dst_removals_per_intersection, config);
489 Xt_abort(comm,
"ERROR: ups...this should not have happend...",
filename,
491 int first_missing_pos
503 num_src_intersections, src_com, num_dst_intersections, dst_com, xmap->
msg,
504 src_removals_per_intersection,
505 (
const int (*)[2])dst_removals_per_intersection, xmap->
tag_offset, comm);
509 (
const int (*)[2])dst_removals_per_intersection,
512 src_removals_per_intersection =
513 xrealloc(src_removals_per_intersection, (
size_t)num_src_intersections
514 *
sizeof(*src_removals_per_intersection));
518 num_src_intersections, src_com, src_idxlist, xmap->
msg+xmap->
n_in,
519 (
const int (*)[2])src_removals_per_intersection, pos_updates, config);
522 free(src_removals_per_intersection);
536 a_s = a.start + (aSizeMaskNeg & (a.size + 1)),
537 a_e = a.start + (~aSizeMaskNeg & (a.size - 1)),
539 b_s = b.start + (bSizeMaskNeg & (b.size + 1)),
540 b_e = b.start + (~bSizeMaskNeg & (b.size - 1));
542 if ((b_s > a_e) | (a_s > b_e))
543 return (
struct Xt_pos_ext_overlap){ a.size, 0, 0};
546 int lowSkipA = b_s - a_s;
547 int lowSkipB = -lowSkipA;
548 lowSkipA = (int)((
unsigned)(lowSkipA + abs(lowSkipA))/2U);
549 lowSkipB = (int)((
unsigned)(lowSkipB + abs(lowSkipB))/2U);
550 int overlapLen =
imin(b_e - b_s - lowSkipB + 1,
551 abs(a.size) - lowSkipA);
552 int highSkipA = abs(a.size) - lowSkipA - overlapLen;
555 int aSkipLen = (~aSizeMaskNeg & lowSkipA)
556 | (aSizeMaskNeg & -highSkipA),
557 aTailLen = (aSizeMaskNeg & -lowSkipA)
558 | (~aSizeMaskNeg & highSkipA);
559 return (
struct Xt_pos_ext_overlap){ aSkipLen, overlapLen, aTailLen };
572 const struct Xt_stripe stripes[num_stripes],
574 int single_match_only,
582 idxlist, num_stripes, stripes,
583 num_ext, &pos_ext, single_match_only, config);
590 int num_intersections,
591 const struct
Xt_com_list intersections[num_intersections],
594 int (*restrict dst_removals_per_intersection)[2],
597 int new_num_intersections = 0;
603 enum { initial_vec_size = 8 };
607 for (
int i = 0; i < num_intersections; ++i) {
609 int num_stripes, num_indices_to_remove = 0;
610 struct Xt_stripe *intersection_idxstripes;
612 &intersection_idxstripes, &num_stripes);
613 int num_isect_pos_exts;
616 mypart_idxlist, num_stripes, intersection_idxstripes,
617 &num_isect_pos_exts, 1, config);
618 int isect_pos_exts_size_psum = 0;
624 .size_pos_ext = initial_vec_size,
625 .pos_ext =
xrealloc(intersection_idxstripes,
626 sizeof (
struct Xt_pos_ext) * initial_vec_size) };
627 intersection_idxstripes = NULL;
632 for (
size_t j = 0; j < (size_t)num_isect_pos_exts; ++j) {
633 struct Xt_pos_ext isect_pos_ext = isect_pos_exts[j];
635 int isign_mask_isect_pos_ext_size =
isign_mask(isect_pos_ext.
size);
637 += isign_mask_isect_pos_ext_size & (isect_pos_ext.
size + 1);
638 int isect_pos_ext_orig_size = isect_pos_ext.
size;
639 isect_pos_ext.
size = abs(isect_pos_ext.
size);
640 isect_pos_exts_size_psum += isect_pos_ext.
size;
643 int progress = -isect_pos_ext.
size;
644 size_t search_start_pos = 0, insert_pos;
648 .end = isect_pos_ext.
start + isect_pos_ext.
size - 1 };
651 if (insert_pos == SIZE_MAX)
652 goto next_isect_pos_ext;
673 memmove(cover.
pos_ext + insert_pos, cover.
pos_ext + insert_pos + 1,
678 progress = (~isign_mask_isect_pos_ext_size
679 & (progress + overlap_desc.
skip))
680 | (isign_mask_isect_pos_ext_size
681 & (isect_pos_ext_orig_size + overlap_desc.
tail));
683 num_indices_to_remove += overlap_desc.
overlap;
685 .start = isect_pos_exts_size_psum + progress,
686 .size = overlap_desc.
overlap }, &transferable);
687 progress += overlap_desc.
overlap;
691 isect_pos_ext.
size = overlap_desc.
tail;
692 search_start_pos = ++insert_pos;
693 }
while ((isect_pos_ext.
size != 0)
695 if (isect_pos_ext.
size)
703 if (intersection_size > num_indices_to_remove) {
704 resSets[new_num_intersections].transfer_pos_ext
708 resSets[new_num_intersections].transfer_pos = NULL;
709 resSets[new_num_intersections].num_transfer_pos
710 = intersection_size - num_indices_to_remove;
711 resSets[new_num_intersections].num_transfer_pos_ext
713 resSets[new_num_intersections].rank = intersections[i].rank;
714 ++new_num_intersections;
717 dst_removals_per_intersection[i][0] = num_indices_to_remove;
718 dst_removals_per_intersection[i][1]
719 = ((num_indices_to_remove == intersection_size)
720 | (num_indices_to_remove == 0))?0:(int)transferable.
num_pos_ext;
721 free(isect_pos_exts);
725#if defined __PGI && __PGIC__ <= 13
733 .resCount = new_num_intersections };
743 size_t i = num_pos_exts_;
746 int db_skip = pos_ext.
start - pos_exts_[i].start;
747 if ((!db_skip) & (pos_ext.
size == pos_exts_[i].size))
750 memmove(pos_exts_ + i, pos_exts_ + i + 1,
751 sizeof (*pos_exts_) * (num_pos_exts_ - i - 1));
754 else if (db_skip + pos_ext.
size == pos_exts_[i].size)
757 pos_exts_[i].size -= pos_ext.
size;
759 else if (db_skip == 0)
762 pos_exts_[i].start = pos_ext.
start + pos_ext.
size;
763 pos_exts_[i].size -= pos_ext.
size;
771 memmove(pos_exts_ + i + 1, pos_exts_ + i,
772 (num_pos_exts_ - i) *
sizeof (*pos_exts_));
777 .size = orig.
size - db_skip - pos_ext.
size };
784 int num_src_intersections,
785 const struct Xt_com_list src_com[num_src_intersections],
786 int num_dst_intersections,
787 const struct Xt_com_list dst_com[num_dst_intersections],
789 int (*restrict src_removals_per_intersection)[2],
790 const int (*restrict dst_removals_per_intersection)[2],
794 MPI_Request * requests
795 =
xmalloc((
size_t)(num_src_intersections + 2 * num_dst_intersections) *
797 MPI_Request *restrict send_header_requests = requests,
798 *restrict recv_requests = requests + num_dst_intersections,
799 *restrict send_data_requests = recv_requests + num_src_intersections;
802 for (
int i = 0; i < num_src_intersections; ++i)
804 src_removals_per_intersection[i], 2, MPI_INT, src_com[i].rank,
806 comm, recv_requests + i), comm);
811 unsigned num_active_dst = 0, num_dst_changes = 0;
812 for (
int i = 0; i < num_dst_intersections; ++i) {
814 CAST_MPI_SEND_BUF(dst_removals_per_intersection[i]),
815 2, MPI_INT, dst_com[i].rank,
817 comm, send_header_requests + i), comm);
819 if (dst_removals_per_intersection[i][1] > 0) {
821 assert(dst_removals_per_intersection[i][1]
822 == dst_ext[num_active_dst].num_transfer_pos_ext
823 && dst_com[i].rank == dst_ext[num_active_dst].rank);
825 dst_ext[num_active_dst].transfer_pos_ext,
826 dst_removals_per_intersection[i][1],
827 MPI_2INT, dst_com[i].rank,
829 comm, send_data_requests + num_dst_changes),
833 num_active_dst += (unsigned)((dst_removals_per_intersection[i][0] == 0)
834 | (dst_removals_per_intersection[i][1] != 0));
838 xt_mpi_call(MPI_Waitall(num_src_intersections + num_dst_intersections,
839 requests, MPI_STATUSES_IGNORE), comm);
841 size_t total_num_pos_ext_to_recv = 0;
843 for (
size_t i = 0; i < (size_t)num_src_intersections; ++i)
844 total_num_pos_ext_to_recv += (
size_t)src_removals_per_intersection[i][1];
847 unsigned num_src_changes = 0;
848 if (total_num_pos_ext_to_recv > 0) {
851 =
xmalloc(total_num_pos_ext_to_recv *
sizeof(*src_updated_pos_ext));
856 for (
int i = 0; i < num_src_intersections; ++i)
857 if (src_removals_per_intersection[i][1] > 0) {
860 src_updated_pos_ext + offset,
861 src_removals_per_intersection[i][1], MPI_2INT,
864 comm, send_data_requests - num_src_changes), comm);
866 offset += (size_t)src_removals_per_intersection[i][1];
869 src_updated_pos_ext = NULL;
872 xt_mpi_call(MPI_Waitall((
int)num_src_changes + (
int)num_dst_changes,
873 send_data_requests - num_src_changes,
874 MPI_STATUSES_IGNORE), comm);
877 return src_updated_pos_ext;
883 size_t num_pos_updates,
884 const struct Xt_pos_ext pos_updates[num_pos_updates],
886 int single_match_only,
891 int num_dst_intersections,
892 const struct Xt_com_list intersections[num_dst_intersections],
896 const int (*removals_per_intersection)[2],
900 for (
size_t i = 0; i < (size_t)num_dst_intersections; ++i)
902 int intersection_size
905 int num_indices_to_remove = removals_per_intersection[i][0];
907 if (num_indices_to_remove != intersection_size) {}
else
911 struct Xt_pos_ext *pos_updates = resSets[resIdx].transfer_pos_ext;
913 (
size_t)removals_per_intersection[i][1],
914 pos_updates, resSets + resIdx, 1, config);
918 assert(resIdx == (
size_t)resCount);
926 for (
size_t i = 0; i < (size_t)num_pos_ext; ++i) {
927 int start = pos_ext[i].start,
928 size = pos_ext[i].size,
930 if (max > max_pos) max_pos = max;
938 int num_intersections,
939 const struct
Xt_com_list intersections[num_intersections],
942 const int (*restrict removals_per_intersection)[2],
947 int new_num_intersections = 0;
949 size_t intersection_pos_ext = 0;
952 for (
int i = 0; i < num_intersections; ++i) {
954 int intersection_size
957 int num_indices_to_remove = removals_per_intersection[i][0];
960 if (num_indices_to_remove != intersection_size) {
963 (
size_t)removals_per_intersection[i][1],
964 pos_updates + intersection_pos_ext,
965 resSets + new_num_intersections, 0, config);
968 resSets[new_num_intersections].num_transfer_pos_ext,
969 resSets[new_num_intersections].transfer_pos_ext);
970 if (max > max_pos) max_pos = max;
972 resSets[new_num_intersections].transfer_pos = NULL;
973 resSets[new_num_intersections].num_transfer_pos
974 = intersection_size - num_indices_to_remove;
975 resSets[new_num_intersections].rank = intersections[i].rank;
976 new_num_intersections++;
977 intersection_pos_ext += (size_t)removals_per_intersection[i][1];
982 .resCount = new_num_intersections,
989 struct Xt_stripe *restrict intersection_idxstripes,
990 size_t num_pos_updates,
991 const struct Xt_pos_ext *restrict pos_updates)
994 size_t num_refined_intersection_idxstripes = 0,
995 size_refined_intersection_idxstripes = num_pos_updates;
996 struct Xt_stripe *restrict refined_intersection_idxstripes
997 =
xmalloc(size_refined_intersection_idxstripes
998 *
sizeof (*refined_intersection_idxstripes));
1000 int nstrides_psum = 0;
1001 for (
size_t i_pos_ext = 0; i_pos_ext < num_pos_updates; ++i_pos_ext)
1003 int pos = pos_updates[i_pos_ext].start;
1004 int size = pos_updates[i_pos_ext].size;
1005 while (nstrides_psum + intersection_idxstripes[i_stripe].
nstrides <= pos)
1007 nstrides_psum += intersection_idxstripes[i_stripe].nstrides;
1011 int instripe_pos = pos - nstrides_psum;
1013 size_refined_intersection_idxstripes,
1014 num_refined_intersection_idxstripes + 1);
1015 struct Xt_stripe cur_stripe = intersection_idxstripes[i_stripe];
1016 int cur_stripe_nstrides = cur_stripe.
nstrides;
1017 int overlap =
imin(cur_stripe_nstrides - instripe_pos, size);
1022 refined_intersection_idxstripes[num_refined_intersection_idxstripes]
1024 ++num_refined_intersection_idxstripes;
1025 i_stripe += (instripe_pos + overlap == cur_stripe_nstrides);
1026 nstrides_psum += (instripe_pos + overlap == cur_stripe_nstrides)
1027 ? cur_stripe_nstrides : 0;
1032 free(intersection_idxstripes);
1033 *num_stripes_ = (int)num_refined_intersection_idxstripes;
1034 return refined_intersection_idxstripes;
1047 size_t num_pos_updates,
1048 const struct Xt_pos_ext pos_updates[num_pos_updates],
1050 int single_match_only,
1053 struct Xt_stripe *intersection_idxstripes;
1056 &intersection_idxstripes,
1058 if (num_pos_updates)
1059 intersection_idxstripes
1061 num_pos_updates, pos_updates);
1069 mypart_idxlist, num_stripes, intersection_idxstripes,
1072 assert(retval == 0);
1073 free(intersection_idxstripes);
1077 int n_out,
const struct exchange_ext *restrict out_msg,
1082 MPI_Request * requests
1083 =
xmalloc((
size_t)(n_in + 2 * n_out) *
sizeof(*requests));
1084 MPI_Request *send_header_requests = requests,
1085 *recv_requests = requests + n_out,
1086 *send_data_requests = recv_requests + n_in;
1090 for (
int i = 0; i < n_in; ++i)
1092 &(remote_out_msg[i].num_transfer_pos_ext), 1, MPI_INT,
1095 comm, recv_requests + i), comm);
1098 for (
int i = 0; i < n_out; ++i) {
1100 CAST_MPI_SEND_BUF(&(out_msg[i].num_transfer_pos_ext)),
1101 1, MPI_INT, out_msg[i].rank,
1103 comm, send_header_requests + i), comm);
1106 CAST_MPI_SEND_BUF(out_msg[i].transfer_pos_ext),
1107 out_msg[i].num_transfer_pos_ext,
1108 MPI_2INT, out_msg[i].rank,
1110 comm, send_data_requests + i),
1116 MPI_Waitall(n_out + n_in, send_header_requests, MPI_STATUSES_IGNORE), comm);
1118 size_t total_num_pos_ext_to_recv = 0;
1120 for (
size_t i = 0; i < (size_t)n_in; ++i)
1121 total_num_pos_ext_to_recv +=
1122 (
size_t)(remote_out_msg[i].num_transfer_pos_ext);
1125 if (total_num_pos_ext_to_recv > 0) {
1127 transfer_pos_ext_buffer
1128 =
xmalloc(total_num_pos_ext_to_recv *
sizeof(*transfer_pos_ext_buffer));
1131 struct Xt_pos_ext *curr_transfer_pos_ext = transfer_pos_ext_buffer;
1132 for (
int i = 0; i < n_in; ++i) {
1134 curr_transfer_pos_ext,
1135 remote_out_msg[i].num_transfer_pos_ext, MPI_2INT,
1138 comm, recv_requests + i), comm);
1140 remote_out_msg[i].transfer_pos_ext = curr_transfer_pos_ext;
1141 curr_transfer_pos_ext += remote_out_msg[i].num_transfer_pos_ext;
1144 transfer_pos_ext_buffer = NULL;
1148 MPI_Waitall(n_in + n_out, recv_requests, MPI_STATUSES_IGNORE), comm);
1151 return transfer_pos_ext_buffer;
1157 int buffer_size = 0;
1158 for (
int i = 0; i < n; ++i)
1159 if (msg[i].transfer_pos == NULL && msg[i].num_transfer_pos > buffer_size)
1162 int *transfer_pos_buffer
1164 ?
xmalloc((
size_t)buffer_size *
sizeof(*transfer_pos_buffer))
1167 for (
int i = 0; i < n; ++i) {
1170 int *restrict transfer_pos;
1171 size_t num_transfer_pos = (size_t)(msg[i].num_transfer_pos);
1172 if (msg[i].transfer_pos != NULL) {
1175 transfer_pos = transfer_pos_buffer;
1177 (
size_t)(msg[i].num_transfer_pos_ext), msg[i].transfer_pos_ext,
1178 num_transfer_pos, transfer_pos);
1185 size_t num_transfer_pos_ext =
count_pos_ext(num_transfer_pos, transfer_pos);
1187 if (num_transfer_pos_ext != (
size_t)(msg[i].num_transfer_pos_ext)) {
1192 num_transfer_pos_ext *
sizeof(*transfer_pos_ext)));
1197 num_transfer_pos, transfer_pos, num_transfer_pos_ext, transfer_pos_ext);
1200 if (buffer_size > 0) free(transfer_pos_buffer);
1209 size_t buffer_size = 0;
1210 for (
int i = 0; i < n; ++i) {
1211 assert(msg[i].transfer_pos == NULL
1212 && permutation_msg[i].transfer_pos == NULL);
1213 size_t curr_buffer_size
1214 = (size_t)(msg[i].num_transfer_pos)
1215 + (size_t)(permutation_msg[i].num_transfer_pos);
1216 if (curr_buffer_size > buffer_size) buffer_size = curr_buffer_size;
1219 int *transfer_pos_buffer
1221 ?
xmalloc(buffer_size *
sizeof(*transfer_pos_buffer))
1224 for (
int i = 0; i < n; ++i) {
1227 size_t num_transfer_pos = (size_t)(msg[i].num_transfer_pos);
1229 int *restrict transfer_pos = transfer_pos_buffer;
1231 (
size_t)(msg[i].num_transfer_pos_ext), msg[i].transfer_pos_ext,
1232 num_transfer_pos, transfer_pos);
1235 int *permutation = transfer_pos_buffer + num_transfer_pos;
1237 (
size_t)(permutation_msg[i].num_transfer_pos_ext),
1238 permutation_msg[i].transfer_pos_ext, num_transfer_pos, permutation);
1242 transfer_pos, num_transfer_pos, permutation);
1245 size_t num_transfer_pos_ext =
count_pos_ext(num_transfer_pos, permutation);
1247 if (num_transfer_pos_ext !=
1248 (
size_t)(permutation_msg[i].num_transfer_pos_ext)) {
1252 =
xrealloc(permutation_msg[i].transfer_pos_ext,
1253 num_transfer_pos_ext *
sizeof(*transfer_pos_ext));
1258 num_transfer_pos, permutation, num_transfer_pos_ext, transfer_pos_ext);
1261 if (buffer_size > 0) free(transfer_pos_buffer);
1270 xt_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1273 xmalloc((
size_t)n_in *
sizeof(*remote_out_msg));
1275 for (
int i = 0; i < n_in; ++i) {
1276 remote_out_msg[i].rank = in_msg[i].
rank;
1278 remote_out_msg[i].transfer_pos = NULL;
1284 n_out, out_msg, n_in, in_msg, remote_out_msg, tag_offset, comm);
1293 free(transfer_pos_ext_buffer);
1294 free(remote_out_msg);
1304 int n_out = xmap_intersection_ext_new->
n_out;
1305 int n_in = xmap_intersection_ext_new->
n_in;
1307 *out_msg = in_msg + n_in;
1309 int tag_offset = xmap_intersection_ext_new->
tag_offset;
1311 switch ((
int)type) {
1323 Xt_abort(comm,
"ERROR(xmap_intersection_ext_reorder):invalid reorder "
1327 return (
Xt_xmap)xmap_intersection_ext_new;
1338 size_t *num_pos_ext,
1341 size_t num_orig_pos,
const int *orig_pos,
1344 (void)num_orig_pos_ext;
1357 for (
size_t j = 0; j < num_orig_pos; ++j) {
1367 = *pos_ext =
xmalloc(num_pos_ext_ *
sizeof (*pos_ext_));
1374 const int *src_positions,
1375 const int *dst_positions) {
1378 size_t max_num_pos = 0;
1379 size_t n = (size_t)xmie_orig->
n_in + (
size_t)xmie_orig->
n_out;
1381 for (
size_t i = 0; i < n; ++i)
1386 ?
xmalloc((
size_t)max_num_pos *
sizeof(*pos_buffer))
1406 size_t *num_pos_ext,
1409 size_t num_orig_pos,
const int *orig_pos,
1412 (void)num_orig_pos; (void)orig_pos;
1417 size_t size_pos_ext = new_num_pos_ext *
sizeof (**pos_ext);
1420 for (
int i = 0; i < num_repetitions; ++i) {
1422 pos_ext_ + (size_t)i * num_orig_pos_ext;
1423 const int curr_displacement = displacements[i];
1424 for (
size_t j = 0; j < num_orig_pos_ext; ++j) {
1425 int start = orig_pos_ext[j].
start + curr_displacement,
1433 *num_pos_ext = new_num_pos_ext;
1440 const int src_displacements[num_repetitions],
1441 const int dst_displacements[num_repetitions]) {
1447 .num_repetitions = num_repetitions,
1448 .displacements = src_displacements },
1452 .displacements = dst_displacements });
1477 .get_num_transfer_pos_ext
1495 if (xmap_intersection_ext->
n_in == 0)
1501 iter->
msg = xmap_intersection_ext->
msg;
1511 if (xmap_intersection_ext->
n_out == 0)
1517 iter->
msg = xmap_intersection_ext->
msg + xmap_intersection_ext->
n_in;
1533 if (iter_intersection == NULL || iter_intersection->
msgs_left == 0)
1536 iter_intersection->
msg++;
1544 assert(iter != NULL);
1551 assert(iter != NULL);
1553 if ((!msg->num_transfer_pos) | (msg->transfer_pos != NULL)) { }
else {
1558 (
size_t)msg->num_transfer_pos_ext, msg->transfer_pos_ext,
1566 assert(iter != NULL);
1572 assert(iter != NULL);
1578 assert(iter != NULL);
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
const struct Xt_sort_algo_funcptr * sort_funcs
const struct xt_idxlist_vtable * vtable
struct Xt_pos_ext * pos_ext
void(* sort_int)(int *a, size_t n)
void(* sort_int_permutation)(int a[], size_t n, int permutation[])
struct exchange_ext msg[]
const struct Xt_xmap_vtable * vtable
struct exchange_ext * msg
const struct Xt_xmap_iter_vtable * vtable
struct Xt_pos_ext * transfer_pos_ext
const int *restrict displacements
struct Xt_pos_ext_vec cover
static int isign_mask(int x)
static int imin(int a, int b)
static const char filename[]
struct Xt_config_ xt_default_config
struct Xt_config_ * Xt_config
implementation of configuration object
struct Xt_xmap_ * Xt_xmap
struct Xt_idxlist_ * Xt_idxlist
size_t xt_cover_insert_or_overlap(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range range, size_t search_start_pos)
void xt_cover_range_append(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_ext range)
void xt_cover_finish(struct Xt_pos_ext_vec *restrict cover)
bool xt_idxlist_pos_ext_is_full_cover(Xt_idxlist idxlist, struct Xt_pos_ext_vec cover)
void xt_cover_start(struct Xt_pos_ext_vec *restrict cover, size_t initial_size)
int xt_idxlist_get_pos_exts_of_index_stripes_custom(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, struct Xt_pos_ext **pos_ext, int single_match_only, Xt_config config)
void xt_idxlist_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe **stripes, int *num_stripes)
void xt_idxlist_delete(Xt_idxlist idxlist)
Provide non-public declarations common to all index lists.
#define xt_idxlist_get_num_indices(idxlist)
PPM_DSO_INTERNAL Xt_idxlist xt_idxvec_get_idxstripes(Xt_idxlist idxlist)
MPI_Comm xt_mpi_comm_smart_dup(MPI_Comm comm, int *tag_offset)
void xt_mpi_comm_smart_dedup(MPI_Comm *comm, int tag_offset)
#define xt_mpi_call(call, comm)
@ xt_mpi_tag_xmap_intersection_data_exchange
@ xt_mpi_tag_xmap_intersection_header_exchange
exchange map declarations
struct Xt_xmap_iter_ * Xt_xmap_iter
@ XT_REORDER_RECV_UP
optimise data access on receiver side
@ XT_REORDER_NONE
no reordering
@ XT_REORDER_SEND_UP
optimise data access on sender side
contains declaration for the exchange map data structure
static const struct Xt_xmap_vtable xmap_intersection_vtable
Utility functions shared by xt_xmap_intersection and xt_xmap_intersection_ext.
static void print_miss_msg(Xt_idxlist dst_idxlist, int missing_pos, MPI_Comm comm, const char *source, int line) __attribute__((noreturn))
static size_t count_pos_ext(size_t num_pos, const int *restrict pos)
static void generate_pos(size_t num_pos_ext, const struct Xt_pos_ext *restrict pos_ext, size_t num_pos, int *restrict pos)
static void generate_pos_ext(size_t num_pos, const int *restrict pos, size_t num_pos_ext, struct Xt_pos_ext *restrict pos_ext)
static Xt_xmap xmap_intersection_ext_copy(Xt_xmap xmap)
static const struct Xt_xmap_iter_vtable xmap_iterator_intersection_ext_vtable
static int xmap_intersection_ext_get_num_destinations(Xt_xmap xmap)
struct Xt_xmap_iter_intersection_ext_ * Xt_xmap_iter_intersection_ext
static Xt_xmap xmap_intersection_ext_copy_(Xt_xmap xmap, int num_repetitions, Xt_pos_ext_copy pe_cpy_in, void *peci_state, Xt_pos_ext_copy pe_cpy_out, void *peco_state)
static int xmap_intersection_ext_get_max_dst_pos(Xt_xmap xmap)
static Xt_xmap_iter xmap_intersection_ext_get_out_iterator(Xt_xmap xmap)
Xt_xmap xt_xmap_intersection_ext_custom_new(int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, MPI_Comm comm, Xt_config config)
static struct Xt_pos_ext * exchange_pos_ext_modifications(int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], struct exchange_ext dst_ext[num_dst_intersections], int(*restrict src_removals_per_intersection)[2], const int(*restrict dst_removals_per_intersection)[2], int tag_offset, MPI_Comm comm)
static void xmap_intersection_ext_get_destination_ranks(Xt_xmap xmap, int *ranks)
Xt_xmap xt_xmap_intersection_ext_new(int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, MPI_Comm comm)
static const struct Xt_pos_ext * xmap_intersection_ext_iterator_get_transfer_pos_ext(Xt_xmap_iter iter)
static struct Xt_pos_ext * get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, int single_match_only, Xt_config config)
static void xmap_intersection_ext_get_source_ranks(Xt_xmap xmap, int *ranks)
static Xt_xmap xmap_intersection_ext_reorder(Xt_xmap xmap, enum xt_reorder_type type, Xt_config config)
static int xmap_intersection_ext_get_num_sources(Xt_xmap xmap)
static struct ted_result generate_dir_transfer_pos_ext_dst(int num_intersections, const struct Xt_com_list intersections[num_intersections], Xt_idxlist mypart_idxlist, struct exchange_ext *resSets, int(*restrict dst_removals_per_intersection)[2], Xt_config config)
static Xt_xmap xmap_intersection_ext_spread(Xt_xmap xmap, int num_repetitions, const int src_displacements[num_repetitions], const int dst_displacements[num_repetitions])
static int const * xmap_intersection_ext_iterator_get_transfer_pos(Xt_xmap_iter iter)
static int update_positions(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state_)
static void reorder_transfer_pos_ext(int n_out, int n_in, struct exchange_ext *out_msg, struct exchange_ext *in_msg, int tag_offset, MPI_Comm comm, Xt_config config)
static void xmap_intersection_ext_msg_copy(size_t nmsg, const struct exchange_ext *restrict msg, int *nmsg_copy, struct exchange_ext *restrict msg_copy, int *max_pos_, int num_repetitions, Xt_pos_ext_copy pos_ext_copy, void *pec_state)
static struct Xt_stripe * refine_stripes(int *num_stripes_, struct Xt_stripe *restrict intersection_idxstripes, size_t num_pos_updates, const struct Xt_pos_ext *restrict pos_updates)
static int xmap_intersection_ext_iterator_get_num_transfer_pos(Xt_xmap_iter iter)
static int xmap_intersection_ext_get_max_src_pos(Xt_xmap xmap)
static void generate_transfer_ext(struct Xt_xmap_intersection_ext_ *xmap, int num_src_intersections, const struct Xt_com_list src_com[num_src_intersections], int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist src_idxlist_local, Xt_idxlist dst_idxlist_local, MPI_Comm comm, Xt_config config)
static struct Xt_pos_ext * exchange_transfer_pos_ext(int n_out, const struct exchange_ext *restrict out_msg, int n_in, const struct exchange_ext *restrict in_msg, struct exchange_ext *restrict remote_out_msg, int tag_offset, MPI_Comm comm)
static void sort_transfer_pos_ext(int n, struct exchange_ext *msg, Xt_config config)
static Xt_xmap xmap_intersection_ext_update_positions(Xt_xmap xmap, const int *src_positions, const int *dst_positions)
static void xmap_intersection_ext_delete(Xt_xmap xmap)
static Xt_xmap_iter xmap_intersection_ext_get_in_iterator(Xt_xmap xmap)
static Xt_xmap_intersection_ext xmie(void *xmap)
static void remap_intersection(Xt_idxlist mypart_idxlist, Xt_idxlist intersection, size_t num_pos_updates, const struct Xt_pos_ext pos_updates[num_pos_updates], struct exchange_ext *resSet, int single_match_only, Xt_config config)
static MPI_Comm xmap_intersection_ext_get_communicator(Xt_xmap xmap)
static int xmap_intersection_ext_iterator_get_num_transfer_pos_ext(Xt_xmap_iter iter)
static void cut_pos_ext_from_pos_exts(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *pos_exts)
static int xmap_intersection_ext_iterator_get_rank(Xt_xmap_iter iter)
static struct Xt_pos_ext_overlap Xt_get_pos_ext_overlap(struct Xt_pos_ext a, struct Xt_pos_ext b)
static int pos_ext_copy_verbatim(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)
static void xt_free_exchange_ext(size_t num_msg, struct exchange_ext *restrict msg)
static struct tes_result generate_dir_transfer_pos_ext_src(int num_intersections, const struct Xt_com_list intersections[num_intersections], Xt_idxlist mypart_idxlist, struct exchange_ext *resSets, const int(*restrict removals_per_intersection)[2], const struct Xt_pos_ext *pos_updates, Xt_config config)
static Xt_xmap_iter_intersection_ext xmiei(void *iter)
static void remap_dst_intersections(int num_dst_intersections, const struct Xt_com_list dst_com[num_dst_intersections], Xt_idxlist mypart_idxlist, int resCount, struct exchange_ext resSets[resCount], const int(*removals_per_intersection)[2], Xt_config config)
static void xmap_intersection_ext_iterator_delete(Xt_xmap_iter iter)
struct Xt_xmap_intersection_ext_ * Xt_xmap_intersection_ext
static int pos_ext_find_max_pos(int num_pos_ext, const struct Xt_pos_ext *restrict pos_ext)
static int xmap_intersection_ext_iterator_next(Xt_xmap_iter iter)
int(* Xt_pos_ext_copy)(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)
static void sort_transfer_pos_ext_permutation(int n, struct exchange_ext *msg, struct exchange_ext *permutation_msg, Xt_config config)
static int pos_ext_copy_spread(size_t num_orig_pos_ext, size_t *num_pos_ext, struct Xt_pos_ext **pos_ext, const struct Xt_pos_ext *orig_pos_ext, size_t num_orig_pos, const int *orig_pos, void *state)