77#define MIN(a,b) (((a)<(b))?(a):(b))
78#define MAX(a,b) (((a)>(b))?(a):(b))
108 size_t num_stripes_alloc);
121 int num_stripes,
const struct Xt_stripe stripes[num_stripes],
123 int single_match_only,
132 int * position,
int offset);
156 .get_positions_of_indices = NULL,
159 .get_positions_of_indices_off = NULL,
163 .get_bounding_box = NULL,
171int MPI_Get_address(XT_MPI_GET_ADDRESS_LOCATION_CONST
void *location, MPI_Aint *address);
179 MPI_Aint base_address, nstrides_address;
181 MPI_Get_address(&stripe, &base_address);
182 MPI_Get_address(&stripe.
nstrides, &nstrides_address);
184 enum { num_stripe_dt_elems = 2 };
185 static const int block_lengths[num_stripe_dt_elems] = {2,1};
186 MPI_Aint displacements[num_stripe_dt_elems]
187 = { 0, nstrides_address - base_address };
188 static const MPI_Datatype types[num_stripe_dt_elems]
190 MPI_Datatype stripe_dt_unaligned;
192#ifdef XT_MPI_TYPE_CREATE_STRUCT_CONST_ARRAY_ARGS
194 block_lengths, displacements, types,
195 &stripe_dt_unaligned), Xt_default_comm);
198 (
int *)block_lengths, displacements,
199 (MPI_Datatype *)types,
200 &stripe_dt_unaligned), Xt_default_comm);
203 (MPI_Aint)
sizeof(stripe),
264#define SORT_TYPE struct Xt_stripes_sort
265#define SORT_TYPE_SUFFIX stripes_sort
266#define XT_SORTFUNC_DECL static
267#define SORT_TYPE_CMP_LT(a,b,...) ((a).range.min < (b).range.min)
268#define SORT_TYPE_CMP_LE(a,b,...) ((a).range.min <= (b).range.min)
269#define SORT_TYPE_CMP_EQ(a,b,...) ((a).range.min == (b).range.min)
272#undef SORT_TYPE_SUFFIX
273#undef SORT_TYPE_CMP_LT
274#undef SORT_TYPE_CMP_LE
275#undef SORT_TYPE_CMP_EQ
276#undef XT_SORTFUNC_DECL
295 int ntrans_up=0, ntrans_dn=0;
296 Xt_int min, max, prev_end;
297 long long num_indices;
299 int nstrides = stripes[0].nstrides;
300 Xt_int stride = stripes[0].stride;
301 prev_end = (
Xt_int)(stripes[0].start
302 + stride * (nstrides-1));
303 ntrans_up += ((nstrides - 1) & ~((stride > 0)-1));
304 ntrans_dn += ((nstrides - 1) & ~((stride < 0)-1));
305 num_indices = (
long long)nstrides;
307 stripes_sort[0].range = stripe_range;
308 stripes_sort[0].position = 0;
312 size_t num_stripes = (size_t)idxstripes->
num_stripes;
313 int sign_err = stripes[0].nstrides;
314 unsigned have_zero_stride = stripes[0].stride == 0,
315 stripes_intersect = have_zero_stride & (stripes[0].nstrides > 1);
316 for (
size_t i = 1; i < num_stripes; ++i) {
318 int nstrides = stripes[i].nstrides;
319 Xt_int start = stripes[i].start, stride = stripes[i].stride;
320 stripes_sort[i].range = stripe_range;
321 stripes_sort[i].position = (int)i;
324 num_indices += (
long long)nstrides;
325 sign_err |= nstrides;
326 have_zero_stride |= stride == 0;
327 stripes_intersect |= (stride == 0 && nstrides > 1);
328 ntrans_up += (start > prev_end) + ((nstrides - 1) & ~((stride > 0)-1));
329 ntrans_dn += (start < prev_end) + ((nstrides - 1) & ~((stride < 0)-1));
330 prev_end = (
Xt_int)(start + stride * (nstrides-1));
334 static const char template[] =
"ERROR: %s called with invalid stripes";
335 size_t buf_size =
sizeof (
template) - 2 + strlen(caller);
337 snprintf(msg, buf_size,
template, caller);
340 if (num_indices > 0) {
341 assert(num_indices <= INT_MAX);
342 xt_mergesort_stripes_sort(stripes_sort, num_stripes);
343 stripes_sort[stripes_sort[0].position].inv_position = 0;
344 unsigned stripes_do_overlap = 0;
345 for (
size_t i = 1; i < num_stripes; ++i) {
347 |= stripes_sort[i - 1].range.max >= stripes_sort[i].range.min;
348 stripes_sort[stripes_sort[i].position].inv_position = (int)i;
350 stripes_intersect = stripes_intersect
351 || (stripes_do_overlap
381 size_t header_size = ((
sizeof (
struct Xt_idxstripes_)
382 + (sizeof (struct Xt_stripes_sort)
383 * (size_t)num_stripes)
384 +
sizeof (
struct Xt_stripe) - 1)
385 / sizeof (struct Xt_stripe))
386 *
sizeof (
struct Xt_stripe),
387 body_size =
sizeof (
struct Xt_stripe) * (size_t)num_stripes;
389 struct Xt_stripe *stripes_assign
390 = (
struct Xt_stripe *)(
void *)((
unsigned char *)
idxstripes + header_size);
391 assert(num_stripes <= INT_MAX);
394 return (
struct Xt_stripes_alloc){ .idxstripes =
idxstripes,
395 .stripes = stripes_assign };
408 if (num_stripes > 0) {
446 if (num_stripes > 0) {
467 if (data == NULL)
return;
480 int size_header, size_stripes = 0;
482 xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
485 &size_stripes), comm);
487 return (
size_t)size_header + (size_t)size_stripes;
501 buffer_size, position, comm), comm);
505 buffer, buffer_size, position, comm), comm);
516 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
517 &num_stripes, 1, MPI_INT, comm), comm);
524 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
532#define XT_IDXSTRIPES_POS_EXT_MAP_COUNT
534#undef XT_IDXSTRIPES_POS_EXT_MAP_COUNT
542 INSTR_DEF(instr,
"compute_intersection_fallback")
553 &num_ext, &pos_exts,
false,
558 size_t num_result_stripes
559 = get_mapped_stripes_count((
size_t)num_ext, pos_exts, idxstripes_src);
564 map_ext2stripes((
size_t)num_ext, pos_exts, idxstripes_src,
571 intersection = intersection_unsorted;
598 b = (
Xt_int)(prev_a - q * b);
601 s = (
Xt_int)(prev_u - q * s);
604 t = (
Xt_int)(prev_v - q * t);
618 INSTR_DEF(instr,
"get_stripe_intersection")
621 struct Xt_bounded_stripe {
622 Xt_int min, max, stride, representative;
625 struct Xt_bounded_stripe bsa, bsb, bsi;
627 Xt_int stride_zero_mask_a = (stripe_a.stride != 0) - 1,
628 stride_zero_mask_b = (stripe_b.stride != 0) - 1;
631 bsa.min = (
Xt_int)(stripe_a.start
632 + (mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
633 bsa.max = (
Xt_int)(stripe_a.start
634 + (~mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
636 bsa.representative = stripe_a.start;
639 bsb.min = (
Xt_int)(stripe_b.start
640 + (mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
641 bsb.max = (
Xt_int)(stripe_b.start
642 + (~mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
644 bsb.representative = stripe_b.start;
646 bsa.stride = (
Xt_int)((stripe_a.stride & ~stride_zero_mask_a)
647 | (stride_zero_mask_a & 1));
648 bsb.stride = (
Xt_int)((stripe_b.stride & ~stride_zero_mask_b)
649 | (stride_zero_mask_b & 1));
653 Xt_int abs_stride_b = XT_INT_ABS(bsb.stride);
655 Xt_long start_diff = (Xt_long)stripe_a.start - stripe_b.start;
657 = (
Xt_int)(bsb.representative
658 + (start_diff/abs_stride_b
659 + (start_diff%abs_stride_b > abs_stride_b/2))
661 start_diff = (Xt_long)bsb.representative - bsa.representative;
663 Xt_long start_diff =
xiisub(stripe_a.start, stripe_b.start);
664 Xt_idiv start_diff_abs_stride_b_div
667 = (
Xt_int)(bsb.representative
668 + (start_diff_abs_stride_b_div.
quot
669 + start_diff_abs_stride_b_div.
rem > abs_stride_b/2)
671 start_diff =
xiisub(bsb.representative, bsa.representative);
673 Xt_int abs_stride_a = XT_INT_ABS(bsa.stride);
675 bsi.min =
MAX(bsa.min, bsb.min);
676 bsi.max =
MIN(bsa.max, bsb.max);
684 if (eg.
u && bsb.representative != bsa.representative)
696 struct Xt_stripe intersection;
699 bsi.stride = (
Xt_int)temp_stride;
703 = (
Xt_int)(bsa.representative
704 + ((bsb.representative - bsa.representative) * eg.
u
705 * abs_stride_a / eg.
gcd));
707 Xt_int abs_bsi_stride = XT_INT_ABS(temp_stride),
708 r_diff = (
Xt_int)(bsi.min - some_rep),
709 steps = (
Xt_int)(r_diff / abs_bsi_stride);
710 steps = (
Xt_int)(steps + (steps * abs_bsi_stride < r_diff));
711 min_rep = (
Xt_int)(some_rep + steps * abs_bsi_stride);
712 bsi.representative = (
Xt_int)min_rep;
714 nstrides = (int)((bsi.max - min_rep)/temp_stride +
llsign(temp_stride));
716 = ((((bsb.representative - bsa.representative) % eg.
gcd) == 0)
717 & (bsi.stride == temp_stride || abs(
nstrides) == 1));
719 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
720 & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
723 intersection.
start = (
Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
726#define ABS(x) (((x) < 0) ? -(x) : (x))
727 Xt_long temp_stride =
xiimul(abs_stride_a, bsb.stride)/eg.
gcd;
728 bsi.stride = (
Xt_int)temp_stride;
733 bool min_rep_in_range;
736 if (bsb.representative != bsa.representative) {
739 Xt_long temp_1 = start_diff * eg.
u;
740 Xt_tword temp_2 = xlimulu(temp_1, (
Xt_uint)abs_stride_a);
752 | (XT_INT_ABS((
Xt_int)(temp_2.
hi << 1) | ((Xt_long)temp_2.midlo < 0)) < eg.
gcd);
755 = (Xt_long)bsa.representative + ((Xt_long)temp_2.midlo / eg.
gcd);
757 min_rep_in_range =
true;
758 some_rep = bsa.representative;
761 Xt_long abs_bsi_stride = ABS(temp_stride),
762 r_diff = bsi.min - some_rep,
763 steps = r_diff / abs_bsi_stride;
765 steps = steps + (steps * abs_bsi_stride < r_diff);
768 min_rep += steps * abs_bsi_stride;
770 bsi.representative = (
Xt_int)min_rep;
772 int min_rep_mask = ~((int)min_rep_in_range - 1);
773 nstrides = (int)((((bsi.max - min_rep)/temp_stride)+
xlsign(temp_stride))
775 | (~min_rep_mask & (bsb.representative == bsa.representative));
777 = ((start_diff%eg.
gcd == 0) & (bsi_stride_in_range || abs(
nstrides) == 1));
779 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
780 & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
783 intersection.
start = (
Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
785 Xt_long temp_stride =
xlldiv(
xiimul(abs_stride_a, bsb.stride),
787 bsi.stride = (
Xt_int)temp_stride.
lo;
794 bool min_rep_in_range;
797 if (bsb.representative != bsa.representative) {
800 =
xiimul(bsb.representative - bsa.representative, eg.
u);
801 Xt_tword temp_2 =
xlimul(temp_1, abs_stride_a);
806 Xt_long t2 = (Xt_long){.hi = temp_2.
mid, .lo = temp_2.
lo };
810 temp_3.
rem = (Xt_long){.hi = 0, .lo = 0 };
812 temp_3.
quot = (Xt_long){ 0, 0 };
815 temp_3 =
xlldiv((Xt_long){.hi = temp_2.
mid, .lo = temp_2.
lo },
817 some_rep =
xliadd(temp_3.
quot, bsa.representative);
819 min_rep_in_range =
true;
820 some_rep =
xi2l(bsa.representative);
823 Xt_long abs_bsi_stride =
xlabs(temp_stride),
824 r_diff =
xilsub(bsi.min, some_rep);
825 Xt_ldiv steps =
xlldiv(r_diff, abs_bsi_stride);
828 && steps.
rem.
lo != 0));
830 min_rep =
xlladd(some_rep,
836 bsi.representative = (
Xt_int)min_rep.
lo;
838 if (min_rep_in_range)
841 nstrides = bsb.representative == bsa.representative;
843 = ((((bsb.representative - bsa.representative) % eg.
gcd) == 0)
844 & (bsi_stride_in_range || abs(
nstrides) == 1));
846 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
851 intersection.
start = ((bsa.stride >= 0) ? (
Xt_int)min_rep.
lo : max_rep);
858 & ~((int)stride_zero_mask_a & (int)stride_zero_mask_b))
859 | (stripe_b.nstrides & (int)stride_zero_mask_a & (int)stride_zero_mask_b);
869 INSTR_DEF(instr,
"idxstripes_compute_intersection")
872 struct Xt_stripe *restrict inter_stripes = NULL;
873 size_t num_inter_stripes = 0;
874 size_t inter_stripes_array_size = 0;
878 *restrict dst_stripes_sort = idxstripes_dst->
stripes_sort;
880 *restrict stripes_dst = idxstripes_dst->
stripes;
882 size_t i_src = 0, i_dst = 0;
883 size_t num_stripes_src = (size_t)idxstripes_src->
num_stripes,
884 num_stripes_dst = (
size_t)idxstripes_dst->
num_stripes;
886 while ((i_src < num_stripes_src) &
887 (i_dst < num_stripes_dst)) {
889 while (i_src < num_stripes_src &&
890 src_stripes_sort[i_src].range.max
891 < dst_stripes_sort[i_dst].range.min) ++i_src;
893 if ( i_src >= num_stripes_src )
break;
895 while (i_dst < num_stripes_dst &&
896 dst_stripes_sort[i_dst].range.max
897 < src_stripes_sort[i_src].range.min) ++i_dst;
899 if ( i_dst >= num_stripes_dst )
break;
901 if ((src_stripes_sort[i_src].range.min
902 <= dst_stripes_sort[i_dst].range.max)
903 & (src_stripes_sort[i_src].range.max
904 >= dst_stripes_sort[i_dst].range.min)) {
906 num_inter_stripes+1);
909 inter_stripes[num_inter_stripes] = intersection_stripe =
911 stripes_dst[dst_stripes_sort[i_dst].position]);
912 num_inter_stripes += intersection_stripe.
nstrides > 0;
915 if (dst_stripes_sort[i_dst].range.max
916 < src_stripes_sort[i_src].range.max)
922 if (num_inter_stripes) {
924 struct Xt_stripe prev_stripe = inter_stripes[0];
925 if (prev_stripe.
stride < 0) {
931 inter_stripes[0] = prev_stripe;
933 for (
size_t i = 1; i < num_inter_stripes; ++i) {
934 struct Xt_stripe stripe = inter_stripes[i];
942 == (prev_stripe.
start
946 inter_stripes[j].nstrides = prev_stripe.
nstrides;
950 inter_stripes[++j] = stripe;
951 prev_stripe = stripe;
954 num_inter_stripes = j + 1;
973 if ((idxstripes_src->
flags | idxstripes_dst->flags)
1007 size_t nsrc_stripes = (size_t)idxstripes_src->
num_stripes,
1008 num_found_indices = 0;
1010 size_t_bits =
sizeof (size_t) * CHAR_BIT,
1013 =
xcalloc(((ndst + size_t_bits - 1)/ size_t_bits),
1015 for (
size_t i = 0; i < ndst; ++i) {
1016 Xt_int dst_idx = indices[i];
1017 for (
size_t j = 0; j < nsrc_stripes; ++j)
1019 num_found_indices++;
1020 found[i/size_t_bits] |= (size_t)1 << (i%size_t_bits);
1025 if (num_found_indices) {
1028 for (
size_t i = 0, inter_ofs = 0; i < ndst; ++i) {
1029 size_t fofs = i / size_t_bits;
1030 if (found[fofs]&((
size_t)1 << (i%size_t_bits))) {
1031 found_indices[inter_ofs] = indices[i];
1039 return intersection;
1056 Xt_int adj = (
Xt_int)(stride >= 0 ? pfx_remove : nstrides - 1 - pfx_remove);
1057 return (
Xt_int)(min + stride * adj);
1062 const int pfx_remove_a,
const int pfx_remove_b)
1065 if (a.
nstrides - pfx_remove_a == 0)
1067 if (b.
nstrides - pfx_remove_b == 0)
1072 amin = XT_INT_ABS(a.
stride);
1073 bmin = XT_INT_ABS(b.
stride);
1078#define SORT_TYPE int
1079#define SORT_TYPE_SUFFIX stripe_by_min
1080#define SORT_TYPE_CMP_LT(u,v,i,j) \
1081 (stripe_min_cmp_lt(stripes[(u)], stripes[(v)], \
1082 pfx_removed[(u)], pfx_removed[(v)]))
1083#define XT_SORT_EXTRA_ARGS_DECL , int *restrict pfx_removed, \
1084 const struct Xt_stripe stripes[]
1085#define XT_SORT_EXTRA_ARGS_PASS , pfx_removed, stripes
1086#define XT_SORTFUNC_DECL static
1093 int *src_permutation,
1096 size_t min_stripe_pos = (size_t)src_permutation[0];
1097 struct Xt_stripe minstripe = src_stripes[min_stripe_pos];
1099 if (minstripe.
stride >= 0) {
1101 + minstripe.
stride * pfx_removed[min_stripe_pos]);
1104 + (minstripe.
nstrides - 1 - pfx_removed[min_stripe_pos])
1107 ++pfx_removed[min_stripe_pos];
1108 xt_heapify_stripe_by_min(src_permutation, num_src_stripes,
1109 0, pfx_removed, src_stripes);
1119 int *restrict src_permutation
1120 =
xmalloc(num_src_stripes * 2 *
sizeof (*src_permutation)),
1121 *restrict pfx_removed = src_permutation + num_src_stripes;
1122 size_t indices_left = 0;
1123 for (
size_t i = 0; i < num_src_stripes; ++i) {
1124 src_permutation[i] = (int)i;
1126 indices_left += (size_t)src_stripes[i].
nstrides;
1129 xt_heapsort_stripe_by_min(src_permutation, num_src_stripes,
1130 pfx_removed, src_stripes);
1133 =
xmalloc(
sizeof (*dst_stripes) * num_src_stripes);
1134 size_t num_dst_stripes = 0, size_dst_stripes = num_src_stripes;
1137 src_permutation, pfx_removed);
1138 dst_stripes[0] = (
struct Xt_stripe){ .
start = minidx, .stride = 1,
1140 num_dst_stripes = 1;
1143 for (; indices_left != 0; --indices_left) {
1146 src_permutation, pfx_removed);
1148 size_t i = num_dst_stripes - 1;
1149 if (dst_stripes[i].nstrides == 1) {
1150 dst_stripes[i].
stride = (
Xt_int)(minidx - dst_stripes[i].start);
1154 dst_stripes[i].start
1155 + dst_stripes[i].stride * dst_stripes[i].nstrides)) {
1159 dst_stripes[num_dst_stripes] = (
struct Xt_stripe){ .
start = minidx,
1168 free(src_permutation);
1181 for (
size_t i = 0; i < num_stripes; ++i) {
1182 Xt_int src_start = src_stripes[num_stripes-1-i].
start,
1183 src_stride = src_stripes[num_stripes-1-i].
stride;
1185 inverted_stripes[i] = (
struct Xt_stripe){
1187 .stride = (
Xt_int)-src_stride,
1199 sort_flags -= sort_flags < 3;
1201 if (sort_flags == 1)
1203 else if (sort_flags == -1)
1208 return sorted_idxlist;
1213 INSTR_DEF(instr,
"idxstripes_get_indices")
1218 size_t num_stripes = (size_t)idxstripes->
num_stripes, ofs = 0;
1219 for (
size_t i = 0; i < num_stripes; ofs += (size_t)stripes[i].nstrides, ++i )
1220 for (
size_t j = 0; j < (size_t)stripes[i].nstrides; ++j)
1221 indices[ofs+j] = (
Xt_int)(stripes[i].start
1222 + (
Xt_int)j * stripes[i].stride);
1236 =
xmalloc((
size_t)num_indices *
sizeof (*tmp_index_array));
1244 size_t num_stripes_alloc)
1247 (void)num_stripes_alloc;
1249 INSTR_DEF(instr,
"idxstripes_get_index_stripes")
1252 assert((
size_t)idxstripes->
num_stripes <= num_stripes_alloc);
1253 memcpy(stripes, idxstripes->
stripes,
1254 (
size_t)idxstripes->
num_stripes * sizeof (stripes[0]));
1285 INSTR_DEF(instr,
"idxstripes_get_index_at_position")
1290 if (position >= 0 && position < idxlist->num_indices) {
1293 size_t num_stripes = (size_t)idxstripes->
num_stripes;
1295 while (i < num_stripes && position >= stripes[i].
nstrides) {
1296 position-= stripes[i].nstrides;
1312 int num_pos,
Xt_int *index,
1315 INSTR_DEF(instr,
"idxstripes_get_indices_at_positions")
1326 int stripe_start_pos = 0;
1327 int undef_count = 0;
1329 for (
int ipos = 0; ipos < num_pos; ipos++) {
1331 int seek_pos = positions[ipos];
1333 if (seek_pos >= 0 && seek_pos < max_pos) {
1334 while (seek_pos < stripe_start_pos) {
1338 die(
"idxstripes_get_indices_at_positions: internal error:"
1339 " crossed 0-boundary");
1341 stripe_start_pos -= (int)stripes[istripe].
nstrides;
1344 while (seek_pos > stripe_start_pos + stripes[istripe].
nstrides - 1) {
1345 stripe_start_pos += (int)stripes[istripe].
nstrides;
1349 die(
"idxstripes_get_indices_at_positions: internal error:"
1350 " crossed upper boundary");
1354 sub_pos = seek_pos - stripe_start_pos;
1358 index[ipos] = undef_idx;
1377 int * position,
int offset) {
1379 INSTR_DEF(instr,
"idxstripes_get_position_of_index_off")
1383 size_t offset_ = (offset < 0) ? (
size_t)0 : (size_t)offset;
1390 size_t num_stripes = (size_t)idxstripes->
num_stripes, i;
1391 size_t position_offset = 0;
1394 position_offset + (size_t)stripes[i].nstrides <= offset_;
1396 position_offset += (
size_t)stripes[i].nstrides;
1398 for (; i < num_stripes;
1399 position_offset += (size_t)stripes[i].nstrides, ++i) {
1401 size_t inv_pos = (size_t)stripes_sort[i].inv_position;
1402 struct Xt_minmax range = stripes_sort[inv_pos].range;
1404 if (index > range.
max || index < range.
min)
1408 Xt_int stride = stripes[i].stride;
1409 if (index != stripes[i].start) {
1410 Xt_int index_offset = (
Xt_int)(index - stripes[i].start);
1411 if (index_offset % stride)
1413 rel_start = (int)(index_offset/stripes[i].stride);
1414 }
else if (stride != 0)
1417 rel_start = (int)(
MAX(offset_, position_offset) - position_offset);
1418 *position = (int)((
size_t)rel_start + position_offset);
1434 size_t num_pos_exts_ = result->num_pos_ext,
1435 size_pos_exts_ = result->size_pos_ext;
1436 struct Xt_pos_ext *restrict pos_exts_ = result->pos_ext;
1439 if (num_pos_exts_ + 1 == size_pos_exts_)
1441 size_pos_exts_ += 16;
1443 result->pos_ext = pos_exts_ = (
struct Xt_pos_ext *)
1444 xrealloc(pos_exts_ - 1, (size_pos_exts_ + 1) *
sizeof (*pos_exts_)) + 1;
1445 result->size_pos_ext = size_pos_exts_;
1447 pos_exts_[num_pos_exts_] = pos_ext;
1448 result->num_pos_ext = num_pos_exts_ + 1;
1452 pos_exts_[num_pos_exts_ - 1].size
1453 =
isign(pos_ext.
start - pos_exts_[num_pos_exts_ - 1].start)
1454 * (abs(pos_exts_[num_pos_exts_ - 1].
size) + abs(pos_ext.
size));
1470 const struct Xt_stripe *restrict stripes;
1471 db->stripes = stripes = idxstripes->
stripes;
1472 size_t num_db_stripes = (size_t)idxstripes->
num_stripes;
1476 size_t_bits =
sizeof (size_t) * CHAR_BIT,
1478 size_t overlap_handling = stripes_do_overlap
1479 ? ((num_db_stripes + size_t_bits - 1)/size_t_bits+1) *
sizeof (
size_t) : 0,
1480 align_adjust = !stripes_do_overlap ? (num_db_stripes + 1) *
sizeof (
int):
1481 ((num_db_stripes + 1)*
sizeof (int)+
sizeof (size_t)-1)
1482 /
sizeof (
size_t) *
sizeof (size_t);
1484 int *restrict db_stripes_nstrides_psum
1485 =
xmalloc(align_adjust + overlap_handling);
1486 if (stripes_do_overlap)
1487 db->stripe_candidate_bitmask
1488 = (
void *)((
unsigned char *)db_stripes_nstrides_psum+align_adjust);
1489 int accum = db_stripes_nstrides_psum[0] = 0;
1490 for (
size_t j = 0; j < num_db_stripes; ++j) {
1491 accum += stripes[j].nstrides;
1492 db_stripes_nstrides_psum[j + 1] = accum;
1495 db->num_stripes = num_db_stripes;
1496 db->stripes_nstrides_psum = db_stripes_nstrides_psum;
1497 db->stripes_do_overlap = stripes_do_overlap;
1498 db->stripes_do_intersect = stripes_do_intersect;
1503 free(db->stripes_nstrides_psum);
1521 if ((a && n > 0)) ;
else return n;
1528 if (a[m].range.min > min_key)
1536 return a[
right].range.min <= min_key ?
right : n;
1544 bool single_match_only,
1550 size_t num_db_stripes = db->num_stripes;
1551 const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1554 if (db->stripes_do_overlap) {
1558 != num_db_stripes) {
1559 int stripes_start_pos = db_stripes_sort[start_pos].position;
1562 && (!single_match_only
1563 || (!db->stripes_do_intersect
1567 .start = db->stripes_nstrides_psum[stripes_start_pos],
1568 .end = db->stripes_nstrides_psum[stripes_start_pos]
1569 + query.
nstrides -1 }, 0) == SIZE_MAX
1571 candidates->
num = 1;
1572 if (candidates->
size == 0)
1573 candidates->
p.
e1 = stripes_start_pos;
1575 candidates->
p.
vec[0] = stripes_start_pos;
1582 if (start_pos != num_db_stripes) {
1583 assert(db_stripes_sort[start_pos].
range.
min <= query_minmax.
max);
1584 size_t end_pos = start_pos;
1585 while (end_pos < num_db_stripes
1586 && db_stripes_sort[end_pos].
range.
min <= query_minmax.
max)
1590 size_t num_candidates;
1591 if (!db->stripes_do_overlap)
1593 while (start_pos > 0
1594 && (db_stripes_sort[start_pos - 1].
range.
max >= query_minmax.
min))
1596 num_candidates = end_pos - start_pos;
1597 if (candidates->
size == 0 && num_candidates == 1) {
1598 candidates->
p.
e1 = db_stripes_sort[start_pos].position;
1599 candidates->
num = 1;
1602 if (candidates->
size < num_candidates) {
1603 int *prevp = (candidates->
size == 0) ? NULL : candidates->
p.
vec;
1606 *
sizeof (candidates->
p.
vec[0]));
1607 candidates->
size = num_candidates;
1609 candidates->
num = num_candidates;
1610 int *restrict candidates_vec = candidates->
p.
vec;
1611 for (
size_t i = 0; i < num_candidates; ++i)
1612 candidates_vec[i] = db_stripes_sort[start_pos + i].
position;
1617 size_t min_candidate = start_pos;
1618 const struct Xt_stripe *stripes = db->stripes;
1620#ifdef XT_USE_FAST_DIVISIBLE_TEST
1621 struct xt_fast_div_coeff stride_div_test_coeff;
1622 stride_div_test_coeff = get_fast_div_coeff((
Xt_uint)query_stride);
1624 size_t *bm_store = db->stripe_candidate_bitmask;
1625 size_t i = end_pos - 1;
1627 size_t_bits =
sizeof (size_t) * CHAR_BIT,
1629 size_t pred_accum = 0;
1630 if (i >= size_t_bits) {
1631 if ((i+1) % size_t_bits) {
1632 for (; (i+1) % size_t_bits; --i)
1635 = db_stripes_sort[i].range.max >= query_minmax.
min;
1636 size_t pos = (size_t)db_stripes_sort[i].position;
1638 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1640 (
Xt_int)(query_start - stripes[pos].start)));
1641 num_candidates += predicate;
1642 size_t predicate_mask = predicate - 1;
1643 min_candidate = (min_candidate & predicate_mask)
1644 | (i & ~predicate_mask);
1645 pred_accum = (pred_accum << 1) | predicate;
1647 bm_store[(i+1)/size_t_bits] = pred_accum;
1650 assert(i % size_t_bits == size_t_bits - 1);
1651 for (; i >= size_t_bits;) {
1652 for (
size_t j = 0; j < size_t_bits; ++j, --i) {
1654 = db_stripes_sort[i].range.max >= query_minmax.
min;
1655 size_t pos = (size_t)db_stripes_sort[i].position;
1657 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1659 (
Xt_int)(query_start - stripes[pos].start)));
1660 num_candidates += predicate;
1661 size_t predicate_mask = predicate - 1;
1662 min_candidate = (min_candidate & predicate_mask)
1663 | (i & ~predicate_mask);
1664 pred_accum = (pred_accum << 1) | predicate;
1666 bm_store[(i+1)/size_t_bits] = pred_accum;
1670 for (; i != SIZE_MAX; --i)
1673 = db_stripes_sort[i].range.max >= query_minmax.
min;
1674 size_t pos = (size_t)db_stripes_sort[i].position;
1676 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1678 (
Xt_int)(query_start - stripes[pos].start)));
1679 num_candidates += predicate;
1680 size_t predicate_mask = predicate - 1;
1681 min_candidate = (min_candidate & predicate_mask)
1682 | (i & ~predicate_mask);
1683 pred_accum = (pred_accum << 1) | predicate;
1685 bm_store[0] = pred_accum;
1686 if (candidates->
size == 0 && num_candidates == 1) {
1687 candidates->
p.
e1 = db_stripes_sort[min_candidate].position;
1688 candidates->
num = num_candidates;
1691 if (candidates->
size < num_candidates+1) {
1692 int *prevp = (candidates->
size == 0) ? NULL : candidates->
p.
vec;
1694 (num_candidates + 1)
1695 *
sizeof (candidates->
p.
vec[0]));
1696 candidates->
size = num_candidates + 1;
1698 candidates->
num = num_candidates;
1699 int *restrict candidates_vec = candidates->
p.
vec;
1701 next_bm_mult = (min_candidate + size_t_bits - 1)/size_t_bits*size_t_bits,
1702 predicates = (bm_store[min_candidate/size_t_bits]
1703 >> (min_candidate % size_t_bits));
1704 if (num_candidates == 1) {
1705 end_pos = min_candidate+1;
1706 assert(predicates&1);
1708 for (i = min_candidate; i < end_pos; next_bm_mult+=size_t_bits) {
1709 size_t ulim =
MIN(end_pos, next_bm_mult);
1710 for (; i < ulim; ++i) {
1711 size_t predicate = predicates&1;
1714 candidates_vec[j] = db_stripes_sort[i].position;
1717 predicates = bm_store[i/size_t_bits];
1719 assert(j == num_candidates);
1723 candidates->
num = 0;
1730 size_t expansion = 0;
1734 struct Xt_stripe *restrict expanded_stripes
1735 =
xmalloc((num_stripes + expansion) *
sizeof (expanded_stripes[0]));
1737 for (
size_t i = 0; i < num_stripes; ++i) {
1739 if (stripe.
stride == 0) {
1740 for (
size_t k = 0; k < (size_t)stripe.
nstrides; ++k)
1741 expanded_stripes[j + k] = (
struct Xt_stripe){ .start = stripe.
start,
1746 expanded_stripes[j] = stripe;
1752 (
int)(num_stripes + expansion));
1762 bool single_match_only,
1763 size_t num_candidates,
1764 int *restrict candidates);
1770 const struct Xt_stripe stripes[num_stripes],
1773 int single_match_only,
1776 size_t unmatched = 0;
1784 idxstripes->stripes);
1796 struct int_vec candidates = { .size = 0, .p.vec = NULL };
1797 for (
size_t i = 0; i < (size_t)num_stripes; ++i) {
1805 single_match_only, &cover, config);
1806 if (candidates.
num) {
1809 query, &stripes_db, &result, &cover, single_match_only != 0,
1810 candidates.
num, candidates.
size != 0
1811 ? candidates.
p.
vec : &candidates.
p.
e1);
1812 }
while ((stripes[i].
stride == 0) & (--j > 0));
1815 if (candidates.
size)
1816 free(candidates.
p.
vec);
1825 free(idxstripes->stripes);
1830 return (
int)unmatched;
1839 size_t num_candidates,
1840 int *restrict candidates);
1854 bool single_match_only,
1855 size_t num_candidates,
1856 int *restrict candidates);
1864 bool single_match_only,
1865 size_t num_candidates,
1866 int *restrict candidates)
1868 size_t unmatched = 0;
1869 if (single_match_only)
1871 query, pos_ext2add, stripes_lookup, result, cover,
1872 num_candidates, candidates);
1886 bool single_match_only,
1887 size_t num_candidates,
1888 int *restrict candidates)
1890 size_t unmatched = 0;
1892 const struct Xt_stripe *restrict db_stripes = db->stripes;
1893 const int *restrict db_stripes_nstrides_psum = db->stripes_nstrides_psum;
1894 const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1895 for (
size_t j = 0; j < num_candidates; ++j) {
1896 size_t unsort_pos = (size_t)candidates[j];
1897 size_t sort_pos = (size_t)db_stripes_sort[unsort_pos].
inv_position;
1898 if ((query_minmax.
min <= db_stripes_sort[sort_pos].range.max)
1899 & (query_minmax.
max >= db_stripes_sort[sort_pos].range. min))
1903 struct Xt_stripe db_stripe = db_stripes[unsort_pos];
1913 int skipLen = (int)((overlap_start - query.
start) /
stride);
1917 .start = query.
start,
1922 query_head, db, result, cover, single_match_only,
1923 num_candidates - j - 1, candidates + j + 1);
1929 = (int)((overlap_start - db_stripe.
start) /
stride);
1930 int overlap_nstrides
1936 .stride = overlap_nstrides > 1 ? query.
stride : (
Xt_int)1,
1939 = db_stripes_nstrides_psum[unsort_pos] + db_stripe_skip,
1940 .size = overlap_nstrides
1941 }, db, result, cover, single_match_only, num_candidates - j - 1,
1942 candidates + j + 1);
1944 if (!(query.
nstrides -= overlap_nstrides))
1945 goto search_finished;
1947 query.
start = (
Xt_int)(overlap_start + stride * overlap_nstrides);
1953 else if ((stride != 0)
1954 & (stride == db_stripe.
stride)
1955 & ((query.
start - db_stripe.
start) % stride != 0)) {
1961 query, db, result, cover, single_match_only,
1962 num_candidates - j, candidates + j);
1965 goto search_finished;
1985 size_t num_candidates,
1986 int *restrict candidates)
1991 if (pos_ext2add.
size == -1)
1992 pos_ext2add.
size = 1;
1996 int pos_ext2add_s = pos_ext2add.
start
1997 + (querySizeMaskNeg & (pos_ext2add.
size + 1)),
1998 pos_ext2add_e = pos_ext2add.
start
1999 + (~querySizeMaskNeg & (pos_ext2add.
size - 1));
2001 = { .start = pos_ext2add_s, .end = pos_ext2add_e };
2003 size_t overlap_pos =
2005 if (overlap_pos == SIZE_MAX) {
2009 struct Xt_pos_ext *restrict pos_exts_ = cover->pos_ext;
2011 db_s = pos_exts_[overlap_pos].start
2012 + (dbSizeMaskNeg & (pos_exts_[overlap_pos].size + 1)),
2013 db_e = pos_exts_[overlap_pos].
start
2014 + (~dbSizeMaskNeg & (pos_exts_[overlap_pos].
size - 1));
2016 int lowQuerySkip = db_s - pos_ext2add_s;
2017 int lowDbSkip = -lowQuerySkip;
2018 lowQuerySkip = (int)((
unsigned)(lowQuerySkip + abs(lowQuerySkip))/2);
2019 lowDbSkip = (int)((
unsigned)(lowDbSkip + abs(lowDbSkip))/2);
2020 int overlapLen =
MIN(db_e - db_s - lowDbSkip + 1,
2021 abs(pos_ext2add.
size) - lowQuerySkip);
2022 int highQuerySkip = abs(pos_ext2add.
size) - lowQuerySkip - overlapLen;
2025 int querySkipLen = (~querySizeMaskNeg & lowQuerySkip)
2026 | (querySizeMaskNeg & -highQuerySkip),
2027 queryTailLen = (querySizeMaskNeg & -lowQuerySkip)
2028 | (~querySizeMaskNeg & highQuerySkip);
2031 int absQuerySkipLen = abs(querySkipLen);
2033 .start = query.
start,
2038 .start = pos_ext2add.
start,
2039 .size = querySkipLen
2043 query_skip, pos_ext2add_skip, db,
2044 result, cover, num_candidates, candidates);
2045 pos_exts_ = result->pos_ext;
2047 + stride * (
Xt_int)absQuerySkipLen);
2050 pos_ext2add.
start += querySkipLen;
2051 pos_ext2add.
size -= querySkipLen;
2056 .start = query.
start,
2062 query_head, db, result, cover,
true,
2063 num_candidates, candidates);
2064 pos_exts_ = result->pos_ext;
2070 int directedOverlapLen = (~querySizeMaskNeg & overlapLen)
2071 | (querySizeMaskNeg & -overlapLen);
2072 pos_ext2add.
start += directedOverlapLen;
2073 pos_ext2add.
size -= directedOverlapLen;
2097 return (
int)sort_flags-(sort_flags < 3);
2104#if defined _CRAYC && _RELEASE_MAJOR == 8 && _RELEASE_MINOR >= 5 && _RELEASE_MINOR < 7
2113 bool single_match_only,
2114 size_t num_candidates,
2115 int *restrict candidates)
2117 size_t unmatched = 0;
2118 const struct Xt_stripe *restrict db_stripes = db->stripes;
2119 size_t db_stripe_pos = (size_t)candidates[0];
2121 db_stripes[db_stripe_pos]);
2123 return (
struct unmatched_tail){ .unmatched = 0, .query_tail = query};
2125 int skipped = (int)((overlap.
start - query.start)/query.stride);
2129 (
struct Xt_stripe){ .start = query.start,
2130 .stride = skipped > 1 ? query.stride : (
Xt_int)1,
2131 .
nstrides = skipped}, db, result, cover,
2132 single_match_only, num_candidates - 1, candidates + 1);
2133 query.start = (
Xt_int)(query.start + skipped * query.stride);
2134 query.nstrides -= skipped;
2135 query.stride = query.nstrides > 1 ? query.stride : 1;
2144 = (int)((overlap.
start - db_stripes[db_stripe_pos].start)
2145 / db_stripes[db_stripe_pos].stride);
2146 int db_pos = db->stripes_nstrides_psum[db_stripe_pos] + db_stripe_skip;
2147 if (((overlap.
stride == query.stride)
2148 & (overlap.
stride == db_stripes[db_stripe_pos].stride))
2154 db, result, cover, single_match_only, num_candidates - 1, candidates + 1);
2155 query.nstrides -= overlap.
nstrides;
2156 query.start = (
Xt_int)(query.start + overlap.
nstrides * query.stride);
2157 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
2159 else if ((overlap.
stride == query.stride)
2160 & (overlap.
stride == -db_stripes[db_stripe_pos].stride))
2165 .start = db_pos, .size = -overlap.
nstrides },
2166 db, result, cover, single_match_only,
2167 num_candidates - 1, candidates + 1);
2168 query.nstrides -= overlap.
nstrides;
2169 query.start = (
Xt_int)(query.start + overlap.
nstrides * query.stride);
2170 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
2172 else if (overlap.
stride == query.stride)
2176 int db_step = (int)(overlap.
stride/db_stripes[db_stripe_pos].stride);
2179 for (
int i = 0; i < overlap.
nstrides; ++i, db_pos += db_step)
2182 (
struct Xt_pos_ext){ .start = db_pos, .size = 1 },
2183 db, result, cover, single_match_only,
2184 num_candidates - 1, candidates + 1);
2185 query.nstrides -= overlap.
nstrides;
2186 query.start = (
Xt_int)(query.start + overlap.
nstrides * query.stride);
2187 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
2195 int stride_step = (int)(overlap.
stride / query.stride);
2196 int db_step = (int)(overlap.
stride/db_stripes[db_stripe_pos].stride);
2198 struct Xt_stripe consecutive_overlap = { .start = query.start,
2201 intervening = { .start = (
Xt_int)(query.start + query.stride),
2202 .stride = query.stride,
2203 .nstrides =
MIN(query.nstrides - 1, stride_step - 1) };
2207 .start = db_pos, .size = 1 },
2208 db, result, cover, single_match_only,
2209 num_candidates - 1, candidates + 1);
2211 if (--query.nstrides > 0) {
2214 intervening, db, result, cover, single_match_only,
2215 num_candidates - 1, candidates + 1);
2216 query.nstrides -= intervening.nstrides;
2218 query.start = (
Xt_int)(query.start + query.stride * stride_step);
2219 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
2223 return (
struct unmatched_tail){ .unmatched = unmatched, .query_tail = query};
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
#define xcalloc(nmemb, size)
const struct Xt_sort_algo_funcptr * sort_funcs
const struct xt_idxlist_vtable * vtable
struct Xt_stripe * stripes
Xt_int * index_array_cache
struct Xt_stripes_sort stripes_sort[]
struct Xt_idxlist_ parent
struct Xt_pos_ext * pos_ext
void(* sort_int)(int *a, size_t n)
struct Xt_stripe * stripes
struct Xt_idxstripes_ * idxstripes
bool stripes_do_intersect
const struct Xt_stripe * stripes
int * stripes_nstrides_psum
const struct Xt_stripes_sort * stripes_sort
size_t * stripe_candidate_bitmask
union int_vec::@034137013171073103271351047006306162101341012025 p
struct Xt_stripe query_tail
int MPI_Type_create_struct(int count, XT_MPI2_CONST int array_of_block_lengths[], XT_MPI2_CONST MPI_Aint array_of_displacements[], XT_MPI2_CONST MPI_Datatype array_of_types[], MPI_Datatype *newtype)
int MPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Datatype *newtype)
int MPI_Type_free(MPI_Datatype *datatype)
int MPI_Type_commit(MPI_Datatype *datatype)
static Xt_tword xlimul(Xt_long a, Xt_int b)
static Xt_long xlladd(Xt_long a, Xt_long b)
static Xt_long xiisub(Xt_int a, Xt_int b)
static Xt_long xi2l(Xt_int a)
static Xt_ldiv xlldiv(Xt_long a, Xt_long b)
static Xt_long xliadd(Xt_long a, Xt_int b)
static Xt_idiv xlidivu(Xt_ulong a, Xt_uint b)
static int xlicmp_lt(Xt_long a, Xt_int b)
static Xt_long xiimul(Xt_int a, Xt_int b)
static bool xl_is_in_xt_int_range(Xt_long a)
static Xt_long xlabs(Xt_long a)
static Xt_long xilsub(Xt_int a, Xt_long b)
static Xt_long xlinc(Xt_long a, bool b)
static int xlsign(Xt_long x)
static int xlicmp_le(Xt_long a, Xt_int b)
static int xlicmp_ge(Xt_long a, Xt_int b)
static int isign_mask(int x)
static Xt_int Xt_isign_mask(Xt_int x)
static int xinlz(Xt_uint v)
static int imin(int a, int b)
static Xt_int Xt_isign(Xt_int x)
static long long llsign(long long x)
#define is_divisible(divisor, coeff, dividend)
struct Xt_config_ * Xt_config
implementation of configuration object
#define XT_CONFIG_GET_FORCE_NOSORT(config)
base definitions header file
struct Xt_idxlist_ * Xt_idxlist
size_t xt_cover_insert_or_overlap(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range range, size_t search_start_pos)
void xt_cover_finish(struct Xt_pos_ext_vec *restrict cover)
void xt_cover_start(struct Xt_pos_ext_vec *restrict cover, size_t initial_size)
size_t xt_cover_search(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range query, size_t search_start_pos)
macros to create heapsort implementations
static size_t left(size_t i)
static size_t right(size_t i)
Xt_idxlist xt_idxempty_new(void)
void xt_idxlist_get_index_stripes_keep_buf(Xt_idxlist idxlist, struct Xt_stripe *stripes, size_t num_stripes_alloc)
const Xt_int * xt_idxlist_get_indices_const(Xt_idxlist idxlist)
int xt_idxlist_get_num_index_stripes(Xt_idxlist idxlist)
void xt_idxlist_delete(Xt_idxlist idxlist)
Provide non-public declarations common to all index lists.
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
static int idxstripes_get_indices_at_positions(Xt_idxlist idxlist, const int *positions, int num, Xt_int *index, Xt_int undef_idx)
static struct unmatched_tail idxstripes_complex_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static int idxstripes_get_sorting(Xt_idxlist idxlist)
const struct Xt_stripe * xt_idxstripes_get_index_stripes_const(Xt_idxlist idxlist)
static void idxstripes_delete(Xt_idxlist data)
static int idxstripes_get_num_index_stripes(Xt_idxlist idxlist)
void xt_idxstripes_initialize(void)
struct Xt_stripes_alloc xt_idxstripes_alloc(size_t num_stripes)
static struct Xt_stripe get_stripe_intersection(struct Xt_stripe stripe_a, struct Xt_stripe stripe_b)
static int idxstripes_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
static void create_stripes_lookup(struct Xt_stripes_lookup *restrict db, Xt_idxstripes idxstripes)
Xt_idxlist xt_idxstripes_sort_new(size_t num_src_stripes, const struct Xt_stripe src_stripes[], Xt_config config)
static size_t bsearch_stripes_sort(size_t n, const struct Xt_stripes_sort a[n], Xt_int min_key)
Xt_idxlist xt_idxstripes_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
static struct extended_gcd extended_gcd(Xt_int a, Xt_int b)
void xt_idxstripes_finalize(void)
Xt_idxlist xt_idxstripes_prealloc_new(const struct Xt_stripe *stripes, int num_stripes)
Xt_idxlist xt_idxstripes_from_idxlist_new(Xt_idxlist idxlist_src)
static void idxstripes_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static Xt_idxlist idxstripes_order_invert(Xt_idxlist idxlist)
static int idxstripes_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static void idxstripes_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe *restrict stripes, size_t num_stripes_alloc)
PPM_DSO_INTERNAL Xt_idxlist xt_idxstripes_get_idxvec_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config XT_UNUSED(config))
static size_t pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static void idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static Xt_idxlist idxstripes_sorted_copy(Xt_idxlist idxlist, Xt_config config)
Xt_idxlist xt_idxstripes_congeal(struct Xt_stripes_alloc stripes_alloc)
static struct Xt_idxstripes_ * expand_zero_stripes(size_t num_stripes, const struct Xt_stripe *restrict stripes)
static int idxstripes_get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, struct Xt_pos_ext **pos_ext, int single_match_only, Xt_config config)
static int idxstripes_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static const Xt_int * idxstripes_get_indices_const(Xt_idxlist idxlist)
struct Xt_idxstripes_ * Xt_idxstripes
static void destroy_stripes_lookup(struct Xt_stripes_lookup *restrict db)
size_t xt_idxstripes_get_num_index_stripes(Xt_idxlist idxlist)
static void append_ext(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *restrict result)
static const struct xt_idxlist_vtable idxstripes_vtable
static Xt_int stripe_min(struct Xt_stripe stripe, int pfx_remove)
static Xt_idxlist idxstripes_compute_intersection(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst)
@ stripes_some_have_zero_stride_bit
static bool stripe_contains_index(struct Xt_stripe stripe, Xt_int idx)
static size_t idxstripes_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static Xt_idxlist compute_intersection_fallback(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst, Xt_config config)
Xt_idxlist xt_idxstripes_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
Xt_idxlist xt_idxstripes_new(struct Xt_stripe const *stripes, int num_stripes)
static Xt_idxlist idxstripes_copy(Xt_idxlist idxlist)
static size_t idxstripes_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static struct Xt_stripes_alloc idxstripes_alloc(size_t num_stripes)
static int stripe_min_cmp_lt(const struct Xt_stripe a, const struct Xt_stripe b, const int pfx_remove_a, const int pfx_remove_b)
static Xt_int idxstripes_get_max_index(Xt_idxlist idxlist)
static Xt_int idxstripes_get_min_index(Xt_idxlist idxlist)
@ stripes_some_have_zero_stride_mask
@ stripes_do_overlap_mask
static Xt_idxlist idxstripes_aggregate(Xt_idxstripes idxstripes, const char *caller)
static void find_candidates(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct int_vec *candidates, bool single_match_only, struct Xt_pos_ext_vec *restrict cover, Xt_config config)
static size_t conditional_pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, size_t num_candidates, int *restrict candidates)
static MPI_Datatype stripe_dt
static Xt_int retrieve_min(size_t num_src_stripes, const struct Xt_stripe *src_stripes, int *src_permutation, int *pfx_removed)
static bool xt_can_merge_pos_ext(struct Xt_pos_ext a, struct Xt_pos_ext b)
struct Xt_vec_alloc xt_idxvec_alloc(int num_indices)
Xt_idxlist xt_idxvec_congeal(struct Xt_vec_alloc vec_alloc)
macros to create mergesort implementations, 4 way top-down method
#define xt_mpi_call(call, comm)
size_t xt_stripes_merge_copy(size_t num_stripes, struct Xt_stripe *stripes_dst, const struct Xt_stripe *stripes_src, bool lookback)
bool xt_stripes_detect_duplicate(size_t num_stripes, const struct Xt_stripe stripes[num_stripes], struct Xt_minmax index_range)
static struct Xt_minmax xt_stripe2minmax(struct Xt_stripe stripe)
#define XT_SORT_FLAGS(ntrans_up, ntrans_dn)
static int xt_stripes_eq(struct Xt_stripe a, struct Xt_stripe b)