77#define MIN(a,b) (((a)<(b))?(a):(b))
78#define MAX(a,b) (((a)>(b))?(a):(b))
108 size_t num_stripes_alloc);
121 int num_stripes,
const struct Xt_stripe stripes[num_stripes],
123 int single_match_only,
132 int * position,
int offset);
156 .get_positions_of_indices = NULL,
159 .get_positions_of_indices_off = NULL,
163 .get_bounding_box = NULL,
171int MPI_Get_address(XT_MPI_SEND_BUF_CONST
void *location, MPI_Aint *address);
179 MPI_Aint base_address, nstrides_address;
181 MPI_Get_address(&stripe, &base_address);
182 MPI_Get_address(&stripe.
nstrides, &nstrides_address);
184 enum { num_stripe_dt_elems = 2 };
185 static const int block_lengths[num_stripe_dt_elems] = {2,1};
186 MPI_Aint displacements[num_stripe_dt_elems]
187 = { 0, nstrides_address - base_address };
188 static const MPI_Datatype types[num_stripe_dt_elems]
190 MPI_Datatype stripe_dt_unaligned;
193 (
int *)block_lengths, displacements,
194 (MPI_Datatype *)types,
195 &stripe_dt_unaligned), Xt_default_comm);
197 (MPI_Aint)
sizeof(stripe),
258#define SORT_TYPE struct Xt_stripes_sort
259#define SORT_TYPE_SUFFIX stripes_sort
260#define XT_SORTFUNC_DECL static
261#define SORT_TYPE_CMP_LT(a,b,...) ((a).range.min < (b).range.min)
262#define SORT_TYPE_CMP_LE(a,b,...) ((a).range.min <= (b).range.min)
263#define SORT_TYPE_CMP_EQ(a,b,...) ((a).range.min == (b).range.min)
266#undef SORT_TYPE_SUFFIX
267#undef SORT_TYPE_CMP_LT
268#undef SORT_TYPE_CMP_LE
269#undef SORT_TYPE_CMP_EQ
270#undef XT_SORTFUNC_DECL
289 int ntrans_up=0, ntrans_dn=0;
290 Xt_int min, max, prev_end;
291 long long num_indices;
293 int nstrides = stripes[0].nstrides;
294 Xt_int stride = stripes[0].stride;
295 prev_end = (
Xt_int)(stripes[0].start
296 + stride * (nstrides-1));
297 ntrans_up += ((nstrides - 1) & ~((stride > 0)-1));
298 ntrans_dn += ((nstrides - 1) & ~((stride < 0)-1));
299 num_indices = (
long long)nstrides;
301 stripes_sort[0].range = stripe_range;
302 stripes_sort[0].position = 0;
306 size_t num_stripes = (size_t)idxstripes->
num_stripes;
307 int sign_err = stripes[0].nstrides;
308 unsigned have_zero_stride = stripes[0].stride == 0,
309 stripes_intersect = have_zero_stride & (stripes[0].nstrides > 1);
310 for (
size_t i = 1; i < num_stripes; ++i) {
312 int nstrides = stripes[i].nstrides;
313 Xt_int start = stripes[i].start, stride = stripes[i].stride;
314 stripes_sort[i].range = stripe_range;
315 stripes_sort[i].position = (int)i;
318 num_indices += (
long long)nstrides;
319 sign_err |= nstrides;
320 have_zero_stride |= stride == 0;
321 stripes_intersect |= (stride == 0 && nstrides > 1);
322 ntrans_up += (start > prev_end) + ((nstrides - 1) & ~((stride > 0)-1));
323 ntrans_dn += (start < prev_end) + ((nstrides - 1) & ~((stride < 0)-1));
324 prev_end = (
Xt_int)(start + stride * (nstrides-1));
328 static const char template[] =
"ERROR: %s called with invalid stripes";
329 size_t buf_size =
sizeof (
template) - 2 + strlen(caller);
331 snprintf(msg, buf_size,
template, caller);
334 if (num_indices > 0) {
335 assert(num_indices <= INT_MAX);
336 xt_mergesort_stripes_sort(stripes_sort, num_stripes);
337 stripes_sort[stripes_sort[0].position].inv_position = 0;
338 unsigned stripes_do_overlap = 0;
339 for (
size_t i = 1; i < num_stripes; ++i) {
341 |= stripes_sort[i - 1].range.max >= stripes_sort[i].range.min;
342 stripes_sort[stripes_sort[i].position].inv_position = (int)i;
344 stripes_intersect = stripes_intersect
345 || (stripes_do_overlap
377 * (size_t)num_stripes)
381 body_size =
sizeof (
struct Xt_stripe) * (size_t)num_stripes;
385 assert(num_stripes <= INT_MAX);
402 if (num_stripes > 0) {
440 if (num_stripes > 0) {
461 if (data == NULL)
return;
474 int size_header, size_stripes = 0;
476 xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
479 &size_stripes), comm);
481 return (
size_t)size_header + (size_t)size_stripes;
495 buffer_size, position, comm), comm);
499 buffer, buffer_size, position, comm), comm);
510 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
511 &num_stripes, 1, MPI_INT, comm), comm);
518 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
526#define XT_IDXSTRIPES_POS_EXT_MAP_COUNT
528#undef XT_IDXSTRIPES_POS_EXT_MAP_COUNT
536 INSTR_DEF(instr,
"compute_intersection_fallback")
547 &num_ext, &pos_exts,
false,
552 size_t num_result_stripes
553 = get_mapped_stripes_count((
size_t)num_ext, pos_exts, idxstripes_src);
558 map_ext2stripes((
size_t)num_ext, pos_exts, idxstripes_src,
565 intersection = intersection_unsorted;
612 INSTR_DEF(instr,
"get_stripe_intersection")
615 struct Xt_bounded_stripe {
616 Xt_int min, max, stride, representative;
619 struct Xt_bounded_stripe bsa, bsb, bsi;
621 Xt_int stride_zero_mask_a = (stripe_a.stride != 0) - 1,
622 stride_zero_mask_b = (stripe_b.stride != 0) - 1;
625 bsa.min = (
Xt_int)(stripe_a.start
626 + (mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
627 bsa.max = (
Xt_int)(stripe_a.start
628 + (~mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
630 bsa.representative = stripe_a.start;
633 bsb.min = (
Xt_int)(stripe_b.start
634 + (mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
635 bsb.max = (
Xt_int)(stripe_b.start
636 + (~mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
638 bsb.representative = stripe_b.start;
640 bsa.stride = (
Xt_int)((stripe_a.stride & ~stride_zero_mask_a)
641 | (stride_zero_mask_a & 1));
642 bsb.stride = (
Xt_int)((stripe_b.stride & ~stride_zero_mask_b)
643 | (stride_zero_mask_b & 1));
647 Xt_int abs_stride_b = XT_INT_ABS(bsb.stride);
651 = (
Xt_int)(bsb.representative
652 + (start_diff/abs_stride_b
653 + (start_diff%abs_stride_b > abs_stride_b/2))
655 start_diff = (
Xt_long)bsb.representative - bsa.representative;
658 Xt_idiv start_diff_abs_stride_b_div
661 = (
Xt_int)(bsb.representative
662 + (start_diff_abs_stride_b_div.
quot
663 + start_diff_abs_stride_b_div.
rem > abs_stride_b/2)
665 start_diff =
xiisub(bsb.representative, bsa.representative);
667 Xt_int abs_stride_a = XT_INT_ABS(bsa.stride);
669 bsi.min =
MAX(bsa.min, bsb.min);
670 bsi.max =
MIN(bsa.max, bsb.max);
678 if (eg.
u && bsb.representative != bsa.representative)
693 bsi.stride = (
Xt_int)temp_stride;
697 = (
Xt_int)(bsa.representative
698 + ((bsb.representative - bsa.representative) * eg.
u
699 * abs_stride_a / eg.
gcd));
701 Xt_int abs_bsi_stride = XT_INT_ABS(temp_stride),
702 r_diff = (
Xt_int)(bsi.min - some_rep),
703 steps = (
Xt_int)(r_diff / abs_bsi_stride);
704 steps = (
Xt_int)(steps + (steps * abs_bsi_stride < r_diff));
705 min_rep = (
Xt_int)(some_rep + steps * abs_bsi_stride);
706 bsi.representative = (
Xt_int)min_rep;
708 nstrides = (int)((bsi.max - min_rep)/temp_stride +
llsign(temp_stride));
710 = ((((bsb.representative - bsa.representative) % eg.
gcd) == 0)
711 & (bsi.stride == temp_stride || abs(
nstrides) == 1));
713 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
714 & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
717 intersection.
start = (
Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
720#define ABS(x) (((x) < 0) ? -(x) : (x))
722 bsi.stride = (
Xt_int)temp_stride;
727 bool min_rep_in_range;
730 if (bsb.representative != bsa.representative) {
751 min_rep_in_range =
true;
752 some_rep = bsa.representative;
755 Xt_long abs_bsi_stride = ABS(temp_stride),
756 r_diff = bsi.min - some_rep,
757 steps = r_diff / abs_bsi_stride;
759 steps = steps + (steps * abs_bsi_stride < r_diff);
762 min_rep += steps * abs_bsi_stride;
764 bsi.representative = (
Xt_int)min_rep;
766 int min_rep_mask = ~((int)min_rep_in_range - 1);
767 nstrides = (int)((((bsi.max - min_rep)/temp_stride)+
xlsign(temp_stride))
769 | (~min_rep_mask & (bsb.representative == bsa.representative));
771 = ((start_diff%eg.
gcd == 0) & (bsi_stride_in_range || abs(
nstrides) == 1));
773 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
774 & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
777 intersection.
start = (
Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
781 bsi.stride = (
Xt_int)temp_stride.
lo;
788 bool min_rep_in_range;
791 if (bsb.representative != bsa.representative) {
794 =
xiimul(bsb.representative - bsa.representative, eg.
u);
811 some_rep =
xliadd(temp_3.
quot, bsa.representative);
813 min_rep_in_range =
true;
814 some_rep =
xi2l(bsa.representative);
818 r_diff =
xilsub(bsi.min, some_rep);
822 && steps.
rem.
lo != 0));
824 min_rep =
xlladd(some_rep,
830 bsi.representative = (
Xt_int)min_rep.
lo;
832 if (min_rep_in_range)
835 nstrides = bsb.representative == bsa.representative;
837 = ((((bsb.representative - bsa.representative) % eg.
gcd) == 0)
838 & (bsi_stride_in_range || abs(
nstrides) == 1));
840 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
845 intersection.
start = ((bsa.stride >= 0) ? (
Xt_int)min_rep.
lo : max_rep);
852 & ~((int)stride_zero_mask_a & (
int)stride_zero_mask_b))
853 | (stripe_b.nstrides & (int)stride_zero_mask_a & (
int)stride_zero_mask_b);
863 INSTR_DEF(instr,
"idxstripes_compute_intersection")
866 struct Xt_stripe *restrict inter_stripes = NULL;
867 size_t num_inter_stripes = 0;
868 size_t inter_stripes_array_size = 0;
872 *restrict dst_stripes_sort = idxstripes_dst->
stripes_sort;
874 *restrict stripes_dst = idxstripes_dst->
stripes;
876 size_t i_src = 0, i_dst = 0;
877 size_t num_stripes_src = (size_t)idxstripes_src->
num_stripes,
878 num_stripes_dst = (
size_t)idxstripes_dst->
num_stripes;
880 while ((i_src < num_stripes_src) &
881 (i_dst < num_stripes_dst)) {
883 while (i_src < num_stripes_src &&
884 src_stripes_sort[i_src].range.max
885 < dst_stripes_sort[i_dst].range.min) ++i_src;
887 if ( i_src >= num_stripes_src )
break;
889 while (i_dst < num_stripes_dst &&
890 dst_stripes_sort[i_dst].range.max
891 < src_stripes_sort[i_src].range.min) ++i_dst;
893 if ( i_dst >= num_stripes_dst )
break;
895 if ((src_stripes_sort[i_src].range.min
896 <= dst_stripes_sort[i_dst].range.max)
897 & (src_stripes_sort[i_src].range.max
898 >= dst_stripes_sort[i_dst].range.min)) {
900 num_inter_stripes+1);
903 inter_stripes[num_inter_stripes] = intersection_stripe =
905 stripes_dst[dst_stripes_sort[i_dst].position]);
906 num_inter_stripes += intersection_stripe.
nstrides > 0;
909 if (dst_stripes_sort[i_dst].range.max
910 < src_stripes_sort[i_src].range.max)
916 if (num_inter_stripes) {
918 struct Xt_stripe prev_stripe = inter_stripes[0];
919 if (prev_stripe.
stride < 0) {
925 inter_stripes[0] = prev_stripe;
927 for (
size_t i = 1; i < num_inter_stripes; ++i) {
928 struct Xt_stripe stripe = inter_stripes[i];
936 == (prev_stripe.
start
940 inter_stripes[j].nstrides = prev_stripe.
nstrides;
944 inter_stripes[++j] = stripe;
945 prev_stripe = stripe;
948 num_inter_stripes = j + 1;
967 if ((idxstripes_src->
flags | idxstripes_dst->flags)
1001 size_t nsrc_stripes = (size_t)idxstripes_src->
num_stripes,
1002 num_found_indices = 0;
1004 size_t_bits =
sizeof (size_t) * CHAR_BIT,
1007 =
xcalloc(((ndst + size_t_bits - 1)/ size_t_bits),
1009 for (
size_t i = 0; i < ndst; ++i) {
1010 Xt_int dst_idx = indices[i];
1011 for (
size_t j = 0; j < nsrc_stripes; ++j)
1013 num_found_indices++;
1014 found[i/size_t_bits] |= (size_t)1 << (i%size_t_bits);
1019 if (num_found_indices) {
1022 for (
size_t i = 0, inter_ofs = 0; i < ndst; ++i) {
1023 size_t fofs = i / size_t_bits;
1024 if (found[fofs]&((
size_t)1 << (i%size_t_bits))) {
1025 found_indices[inter_ofs] = indices[i];
1033 return intersection;
1050 Xt_int adj = (
Xt_int)(stride >= 0 ? pfx_remove : nstrides - 1 - pfx_remove);
1051 return (
Xt_int)(min + stride * adj);
1056 const int pfx_remove_a,
const int pfx_remove_b)
1059 if (a.
nstrides - pfx_remove_a == 0)
1061 if (b.
nstrides - pfx_remove_b == 0)
1066 amin = XT_INT_ABS(a.
stride);
1067 bmin = XT_INT_ABS(b.
stride);
1072#define SORT_TYPE int
1073#define SORT_TYPE_SUFFIX stripe_by_min
1074#define SORT_TYPE_CMP_LT(u,v,i,j) \
1075 (stripe_min_cmp_lt(stripes[(u)], stripes[(v)], \
1076 pfx_removed[(u)], pfx_removed[(v)]))
1077#define XT_SORT_EXTRA_ARGS_DECL , int *restrict pfx_removed, \
1078 const struct Xt_stripe stripes[]
1079#define XT_SORT_EXTRA_ARGS_PASS , pfx_removed, stripes
1080#define XT_SORTFUNC_DECL static
1087 int *src_permutation,
1090 size_t min_stripe_pos = (size_t)src_permutation[0];
1091 struct Xt_stripe minstripe = src_stripes[min_stripe_pos];
1093 if (minstripe.
stride >= 0) {
1095 + minstripe.
stride * pfx_removed[min_stripe_pos]);
1098 + (minstripe.
nstrides - 1 - pfx_removed[min_stripe_pos])
1101 ++pfx_removed[min_stripe_pos];
1102 xt_heapify_stripe_by_min(src_permutation, num_src_stripes,
1103 0, pfx_removed, src_stripes);
1113 int *restrict src_permutation
1114 =
xmalloc(num_src_stripes * 2 *
sizeof (*src_permutation)),
1115 *restrict pfx_removed = src_permutation + num_src_stripes;
1116 size_t indices_left = 0;
1117 for (
size_t i = 0; i < num_src_stripes; ++i) {
1118 src_permutation[i] = (int)i;
1120 indices_left += (size_t)src_stripes[i].
nstrides;
1123 xt_heapsort_stripe_by_min(src_permutation, num_src_stripes,
1124 pfx_removed, src_stripes);
1127 =
xmalloc(
sizeof (*dst_stripes) * num_src_stripes);
1128 size_t num_dst_stripes = 0, size_dst_stripes = num_src_stripes;
1131 src_permutation, pfx_removed);
1132 dst_stripes[0] = (
struct Xt_stripe){ .
start = minidx, .stride = 1,
1134 num_dst_stripes = 1;
1137 for (; indices_left != 0; --indices_left) {
1140 src_permutation, pfx_removed);
1142 size_t i = num_dst_stripes - 1;
1143 if (dst_stripes[i].nstrides == 1) {
1144 dst_stripes[i].
stride = (
Xt_int)(minidx - dst_stripes[i].start);
1148 dst_stripes[i].
start
1153 dst_stripes[num_dst_stripes] = (
struct Xt_stripe){ .
start = minidx,
1162 free(src_permutation);
1175 for (
size_t i = 0; i < num_stripes; ++i) {
1176 Xt_int src_start = src_stripes[num_stripes-1-i].
start,
1177 src_stride = src_stripes[num_stripes-1-i].
stride;
1179 inverted_stripes[i] = (
struct Xt_stripe){
1193 sort_flags -= sort_flags < 3;
1195 if (sort_flags == 1)
1197 else if (sort_flags == -1)
1202 return sorted_idxlist;
1207 INSTR_DEF(instr,
"idxstripes_get_indices")
1212 size_t num_stripes = (size_t)idxstripes->
num_stripes, ofs = 0;
1213 for (
size_t i = 0; i < num_stripes; ofs += (size_t)stripes[i].
nstrides, ++i )
1214 for (
size_t j = 0; j < (size_t)stripes[i].
nstrides; ++j)
1215 indices[ofs+j] = (
Xt_int)(stripes[i].start
1230 =
xmalloc((
size_t)num_indices *
sizeof (*tmp_index_array));
1238 size_t num_stripes_alloc)
1241 (void)num_stripes_alloc;
1243 INSTR_DEF(instr,
"idxstripes_get_index_stripes")
1246 assert((
size_t)idxstripes->
num_stripes <= num_stripes_alloc);
1247 memcpy(stripes, idxstripes->
stripes,
1248 (
size_t)idxstripes->
num_stripes * sizeof (stripes[0]));
1279 INSTR_DEF(instr,
"idxstripes_get_index_at_position")
1284 if (position >= 0 && position < idxlist->num_indices) {
1287 size_t num_stripes = (size_t)idxstripes->
num_stripes;
1289 while (i < num_stripes && position >= stripes[i].
nstrides) {
1290 position-= stripes[i].nstrides;
1306 int num_pos,
Xt_int *index,
1309 INSTR_DEF(instr,
"idxstripes_get_indices_at_positions")
1320 int stripe_start_pos = 0;
1321 int undef_count = 0;
1323 for (
int ipos = 0; ipos < num_pos; ipos++) {
1325 int seek_pos = positions[ipos];
1327 if (seek_pos >= 0 && seek_pos < max_pos) {
1328 while (seek_pos < stripe_start_pos) {
1332 die(
"idxstripes_get_indices_at_positions: internal error:"
1333 " crossed 0-boundary");
1335 stripe_start_pos -= (int)stripes[istripe].
nstrides;
1338 while (seek_pos > stripe_start_pos + stripes[istripe].
nstrides - 1) {
1339 stripe_start_pos += (int)stripes[istripe].
nstrides;
1343 die(
"idxstripes_get_indices_at_positions: internal error:"
1344 " crossed upper boundary");
1348 sub_pos = seek_pos - stripe_start_pos;
1352 index[ipos] = undef_idx;
1371 int * position,
int offset) {
1373 INSTR_DEF(instr,
"idxstripes_get_position_of_index_off")
1377 size_t offset_ = (offset < 0) ? (
size_t)0 : (size_t)offset;
1384 size_t num_stripes = (size_t)idxstripes->
num_stripes, i;
1385 size_t position_offset = 0;
1388 position_offset + (size_t)stripes[i].
nstrides <= offset_;
1390 position_offset += (
size_t)stripes[i].nstrides;
1392 for (; i < num_stripes;
1393 position_offset += (size_t)stripes[i].
nstrides, ++i) {
1395 size_t inv_pos = (size_t)stripes_sort[i].inv_position;
1396 struct Xt_minmax range = stripes_sort[inv_pos].range;
1398 if (index > range.
max || index < range.
min)
1402 Xt_int stride = stripes[i].stride;
1403 if (index != stripes[i].start) {
1404 Xt_int index_offset = (
Xt_int)(index - stripes[i].start);
1405 if (index_offset % stride)
1407 rel_start = (int)(index_offset/stripes[i].stride);
1408 }
else if (stride != 0)
1411 rel_start = (int)(
MAX(offset_, position_offset) - position_offset);
1412 *position = (int)((
size_t)rel_start + position_offset);
1428 size_t num_pos_exts_ = result->num_pos_ext,
1429 size_pos_exts_ = result->size_pos_ext;
1430 struct Xt_pos_ext *restrict pos_exts_ = result->pos_ext;
1433 if (num_pos_exts_ + 1 == size_pos_exts_)
1435 size_pos_exts_ += 16;
1437 result->pos_ext = pos_exts_ = (
struct Xt_pos_ext *)
1438 xrealloc(pos_exts_ - 1, (size_pos_exts_ + 1) *
sizeof (*pos_exts_)) + 1;
1439 result->size_pos_ext = size_pos_exts_;
1441 pos_exts_[num_pos_exts_] = pos_ext;
1442 result->num_pos_ext = num_pos_exts_ + 1;
1446 pos_exts_[num_pos_exts_ - 1].
size
1447 =
isign(pos_ext.
start - pos_exts_[num_pos_exts_ - 1].start)
1448 * (abs(pos_exts_[num_pos_exts_ - 1].
size) + abs(pos_ext.
size));
1464 const struct Xt_stripe *restrict stripes;
1465 db->stripes = stripes = idxstripes->
stripes;
1466 size_t num_db_stripes = (size_t)idxstripes->
num_stripes;
1470 size_t_bits =
sizeof (size_t) * CHAR_BIT,
1472 size_t overlap_handling = stripes_do_overlap
1473 ? ((num_db_stripes + size_t_bits - 1)/size_t_bits+1) *
sizeof (size_t) : 0,
1474 align_adjust = !stripes_do_overlap ? (num_db_stripes + 1) *
sizeof (
int):
1475 ((num_db_stripes + 1)*
sizeof (
int)+
sizeof (size_t)-1)
1476 /
sizeof (size_t) *
sizeof (
size_t);
1478 int *restrict db_stripes_nstrides_psum
1479 =
xmalloc(align_adjust + overlap_handling);
1480 if (stripes_do_overlap)
1481 db->stripe_candidate_bitmask
1482 = (
void *)((
unsigned char *)db_stripes_nstrides_psum+align_adjust);
1483 int accum = db_stripes_nstrides_psum[0] = 0;
1484 for (
size_t j = 0; j < num_db_stripes; ++j) {
1485 accum += stripes[j].nstrides;
1486 db_stripes_nstrides_psum[j + 1] = accum;
1489 db->num_stripes = num_db_stripes;
1490 db->stripes_nstrides_psum = db_stripes_nstrides_psum;
1491 db->stripes_do_overlap = stripes_do_overlap;
1492 db->stripes_do_intersect = stripes_do_intersect;
1497 free(db->stripes_nstrides_psum);
1515 if ((a && n > 0)) ;
else return n;
1522 if (a[m].range.min > min_key)
1530 return a[
right].range.min <= min_key ?
right : n;
1538 bool single_match_only,
1544 size_t num_db_stripes = db->num_stripes;
1545 const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1548 if (db->stripes_do_overlap) {
1552 != num_db_stripes) {
1553 int stripes_start_pos = db_stripes_sort[start_pos].
position;
1556 && (!single_match_only
1557 || (!db->stripes_do_intersect
1561 .start = db->stripes_nstrides_psum[stripes_start_pos],
1562 .end = db->stripes_nstrides_psum[stripes_start_pos]
1563 + query.
nstrides -1 }, 0) == SIZE_MAX
1565 candidates->
num = 1;
1566 if (candidates->
size == 0)
1567 candidates->
p.
e1 = stripes_start_pos;
1569 candidates->
p.
vec[0] = stripes_start_pos;
1576 if (start_pos != num_db_stripes) {
1577 assert(db_stripes_sort[start_pos].
range.
min <= query_minmax.
max);
1578 size_t end_pos = start_pos;
1579 while (end_pos < num_db_stripes
1580 && db_stripes_sort[end_pos].
range.
min <= query_minmax.
max)
1584 size_t num_candidates;
1585 if (!db->stripes_do_overlap)
1587 while (start_pos > 0
1588 && (db_stripes_sort[start_pos - 1].
range.
max >= query_minmax.
min))
1590 num_candidates = end_pos - start_pos;
1591 if (candidates->
size == 0 && num_candidates == 1) {
1592 candidates->
p.
e1 = db_stripes_sort[start_pos].position;
1593 candidates->
num = 1;
1596 if (candidates->
size < num_candidates) {
1597 int *prevp = (candidates->
size == 0) ? NULL : candidates->
p.
vec;
1600 *
sizeof (candidates->
p.
vec[0]));
1601 candidates->
size = num_candidates;
1603 candidates->
num = num_candidates;
1604 int *restrict candidates_vec = candidates->
p.
vec;
1605 for (
size_t i = 0; i < num_candidates; ++i)
1606 candidates_vec[i] = db_stripes_sort[start_pos + i].
position;
1611 size_t min_candidate = start_pos;
1612 const struct Xt_stripe *stripes = db->stripes;
1614#ifdef XT_USE_FAST_DIVISIBLE_TEST
1615 struct xt_fast_div_coeff stride_div_test_coeff;
1616 stride_div_test_coeff = get_fast_div_coeff((
Xt_uint)query_stride);
1618 size_t *bm_store = db->stripe_candidate_bitmask;
1619 size_t i = end_pos - 1;
1621 size_t_bits =
sizeof (size_t) * CHAR_BIT,
1623 size_t pred_accum = 0;
1624 if (i >= size_t_bits) {
1625 if ((i+1) % size_t_bits) {
1626 for (; (i+1) % size_t_bits; --i)
1629 = db_stripes_sort[i].range.max >= query_minmax.
min;
1630 size_t pos = (size_t)db_stripes_sort[i].position;
1632 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1634 (
Xt_int)(query_start - stripes[pos].start)));
1635 num_candidates += predicate;
1636 size_t predicate_mask = predicate - 1;
1637 min_candidate = (min_candidate & predicate_mask)
1638 | (i & ~predicate_mask);
1639 pred_accum = (pred_accum << 1) | predicate;
1641 bm_store[(i+1)/size_t_bits] = pred_accum;
1644 assert(i % size_t_bits == size_t_bits - 1);
1645 for (; i >= size_t_bits;) {
1646 for (
size_t j = 0; j < size_t_bits; ++j, --i) {
1648 = db_stripes_sort[i].range.max >= query_minmax.
min;
1649 size_t pos = (size_t)db_stripes_sort[i].position;
1651 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1653 (
Xt_int)(query_start - stripes[pos].start)));
1654 num_candidates += predicate;
1655 size_t predicate_mask = predicate - 1;
1656 min_candidate = (min_candidate & predicate_mask)
1657 | (i & ~predicate_mask);
1658 pred_accum = (pred_accum << 1) | predicate;
1660 bm_store[(i+1)/size_t_bits] = pred_accum;
1664 for (; i != SIZE_MAX; --i)
1667 = db_stripes_sort[i].range.max >= query_minmax.
min;
1668 size_t pos = (size_t)db_stripes_sort[i].position;
1670 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1672 (
Xt_int)(query_start - stripes[pos].start)));
1673 num_candidates += predicate;
1674 size_t predicate_mask = predicate - 1;
1675 min_candidate = (min_candidate & predicate_mask)
1676 | (i & ~predicate_mask);
1677 pred_accum = (pred_accum << 1) | predicate;
1679 bm_store[0] = pred_accum;
1680 if (candidates->
size == 0 && num_candidates == 1) {
1681 candidates->
p.
e1 = db_stripes_sort[min_candidate].position;
1682 candidates->
num = num_candidates;
1685 if (candidates->
size < num_candidates+1) {
1686 int *prevp = (candidates->
size == 0) ? NULL : candidates->
p.
vec;
1688 (num_candidates + 1)
1689 *
sizeof (candidates->
p.
vec[0]));
1690 candidates->
size = num_candidates + 1;
1692 candidates->
num = num_candidates;
1693 int *restrict candidates_vec = candidates->
p.
vec;
1695 next_bm_mult = (min_candidate + size_t_bits - 1)/size_t_bits*size_t_bits,
1696 predicates = (bm_store[min_candidate/size_t_bits]
1697 >> (min_candidate % size_t_bits));
1698 if (num_candidates == 1) {
1699 end_pos = min_candidate+1;
1700 assert(predicates&1);
1702 for (i = min_candidate; i < end_pos; next_bm_mult+=size_t_bits) {
1703 size_t ulim =
MIN(end_pos, next_bm_mult);
1704 for (; i < ulim; ++i) {
1705 size_t predicate = predicates&1;
1708 candidates_vec[j] = db_stripes_sort[i].position;
1711 predicates = bm_store[i/size_t_bits];
1713 assert(j == num_candidates);
1717 candidates->
num = 0;
1724 size_t expansion = 0;
1728 struct Xt_stripe *restrict expanded_stripes
1729 =
xmalloc((num_stripes + expansion) *
sizeof (expanded_stripes[0]));
1731 for (
size_t i = 0; i < num_stripes; ++i) {
1733 if (stripe.
stride == 0) {
1734 for (
size_t k = 0; k < (size_t)stripe.
nstrides; ++k)
1740 expanded_stripes[j] = stripe;
1746 (
int)(num_stripes + expansion));
1756 bool single_match_only,
1757 size_t num_candidates,
1758 int *restrict candidates);
1764 const struct Xt_stripe stripes[num_stripes],
1767 int single_match_only,
1770 size_t unmatched = 0;
1778 idxstripes->stripes);
1790 struct int_vec candidates = { .
size = 0, .p.vec = NULL };
1791 for (
size_t i = 0; i < (size_t)num_stripes; ++i) {
1799 single_match_only, &cover, config);
1800 if (candidates.
num) {
1803 query, &stripes_db, &result, &cover, single_match_only != 0,
1804 candidates.
num, candidates.
size != 0
1805 ? candidates.
p.
vec : &candidates.
p.
e1);
1806 }
while ((stripes[i].
stride == 0) & (--j > 0));
1809 if (candidates.
size)
1810 free(candidates.
p.
vec);
1819 free(idxstripes->stripes);
1824 return (
int)unmatched;
1833 size_t num_candidates,
1834 int *restrict candidates);
1848 bool single_match_only,
1849 size_t num_candidates,
1850 int *restrict candidates);
1858 bool single_match_only,
1859 size_t num_candidates,
1860 int *restrict candidates)
1862 size_t unmatched = 0;
1863 if (single_match_only)
1865 query, pos_ext2add, stripes_lookup, result, cover,
1866 num_candidates, candidates);
1880 bool single_match_only,
1881 size_t num_candidates,
1882 int *restrict candidates)
1884 size_t unmatched = 0;
1886 const struct Xt_stripe *restrict db_stripes = db->stripes;
1887 const int *restrict db_stripes_nstrides_psum = db->stripes_nstrides_psum;
1888 const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1889 for (
size_t j = 0; j < num_candidates; ++j) {
1890 size_t unsort_pos = (size_t)candidates[j];
1891 size_t sort_pos = (size_t)db_stripes_sort[unsort_pos].
inv_position;
1892 if ((query_minmax.
min <= db_stripes_sort[sort_pos].range.max)
1893 & (query_minmax.
max >= db_stripes_sort[sort_pos].range. min))
1897 struct Xt_stripe db_stripe = db_stripes[unsort_pos];
1907 int skipLen = (int)((overlap_start - query.
start) /
stride);
1916 query_head, db, result, cover, single_match_only,
1917 num_candidates - j - 1, candidates + j + 1);
1923 = (int)((overlap_start - db_stripe.
start) /
stride);
1924 int overlap_nstrides
1930 .stride = overlap_nstrides > 1 ? query.
stride : (
Xt_int)1,
1933 = db_stripes_nstrides_psum[unsort_pos] + db_stripe_skip,
1934 .size = overlap_nstrides
1935 }, db, result, cover, single_match_only, num_candidates - j - 1,
1936 candidates + j + 1);
1938 if (!(query.
nstrides -= overlap_nstrides))
1939 goto search_finished;
1941 query.
start = (
Xt_int)(overlap_start + stride * overlap_nstrides);
1947 else if ((stride != 0)
1948 & (stride == db_stripe.
stride)
1949 & ((query.
start - db_stripe.
start) % stride != 0)) {
1955 query, db, result, cover, single_match_only,
1956 num_candidates - j, candidates + j);
1959 goto search_finished;
1979 size_t num_candidates,
1980 int *restrict candidates)
1985 if (pos_ext2add.
size == -1)
1986 pos_ext2add.
size = 1;
1990 int pos_ext2add_s = pos_ext2add.
start
1991 + (querySizeMaskNeg & (pos_ext2add.
size + 1)),
1992 pos_ext2add_e = pos_ext2add.
start
1993 + (~querySizeMaskNeg & (pos_ext2add.
size - 1));
1995 = { .
start = pos_ext2add_s, .end = pos_ext2add_e };
1997 size_t overlap_pos =
1999 if (overlap_pos == SIZE_MAX) {
2003 struct Xt_pos_ext *restrict pos_exts_ = cover->pos_ext;
2005 db_s = pos_exts_[overlap_pos].
start
2006 + (dbSizeMaskNeg & (pos_exts_[overlap_pos].size + 1)),
2007 db_e = pos_exts_[overlap_pos].start
2008 + (~dbSizeMaskNeg & (pos_exts_[overlap_pos].size - 1));
2010 int lowQuerySkip = db_s - pos_ext2add_s;
2011 int lowDbSkip = -lowQuerySkip;
2012 lowQuerySkip = (int)((
unsigned)(lowQuerySkip + abs(lowQuerySkip))/2);
2013 lowDbSkip = (int)((
unsigned)(lowDbSkip + abs(lowDbSkip))/2);
2014 int overlapLen =
MIN(db_e - db_s - lowDbSkip + 1,
2015 abs(pos_ext2add.
size) - lowQuerySkip);
2016 int highQuerySkip = abs(pos_ext2add.
size) - lowQuerySkip - overlapLen;
2019 int querySkipLen = (~querySizeMaskNeg & lowQuerySkip)
2020 | (querySizeMaskNeg & -highQuerySkip),
2021 queryTailLen = (querySizeMaskNeg & -lowQuerySkip)
2022 | (~querySizeMaskNeg & highQuerySkip);
2025 int absQuerySkipLen = abs(querySkipLen);
2033 .size = querySkipLen
2037 query_skip, pos_ext2add_skip, db,
2038 result, cover, num_candidates, candidates);
2039 pos_exts_ = result->pos_ext;
2041 + stride * (
Xt_int)absQuerySkipLen);
2044 pos_ext2add.
start += querySkipLen;
2045 pos_ext2add.
size -= querySkipLen;
2056 query_head, db, result, cover,
true,
2057 num_candidates, candidates);
2058 pos_exts_ = result->pos_ext;
2064 int directedOverlapLen = (~querySizeMaskNeg & overlapLen)
2065 | (querySizeMaskNeg & -overlapLen);
2066 pos_ext2add.
start += directedOverlapLen;
2067 pos_ext2add.
size -= directedOverlapLen;
2091 return (
int)sort_flags-(sort_flags < 3);
2098#if defined _CRAYC && _RELEASE_MAJOR == 8 && _RELEASE_MINOR >= 5 && _RELEASE_MINOR < 7
2107 bool single_match_only,
2108 size_t num_candidates,
2109 int *restrict candidates)
2111 size_t unmatched = 0;
2112 const struct Xt_stripe *restrict db_stripes = db->stripes;
2113 size_t db_stripe_pos = (size_t)candidates[0];
2115 db_stripes[db_stripe_pos]);
2117 return (
struct unmatched_tail){ .unmatched = 0, .query_tail = query};
2119 int skipped = (int)((overlap.
start - query.start)/query.stride);
2123 (
struct Xt_stripe){ .start = query.start,
2124 .stride = skipped > 1 ? query.stride : (
Xt_int)1,
2125 .
nstrides = skipped}, db, result, cover,
2126 single_match_only, num_candidates - 1, candidates + 1);
2127 query.start = (
Xt_int)(query.start + skipped * query.stride);
2128 query.nstrides -= skipped;
2129 query.stride = query.nstrides > 1 ? query.stride : 1;
2138 = (int)((overlap.
start - db_stripes[db_stripe_pos].start)
2139 / db_stripes[db_stripe_pos].stride);
2140 int db_pos = db->stripes_nstrides_psum[db_stripe_pos] + db_stripe_skip;
2141 if (((overlap.
stride == query.stride)
2142 & (overlap.
stride == db_stripes[db_stripe_pos].stride))
2148 db, result, cover, single_match_only, num_candidates - 1, candidates + 1);
2149 query.nstrides -= overlap.
nstrides;
2150 query.start = (
Xt_int)(query.start + overlap.
nstrides * query.stride);
2151 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
2153 else if ((overlap.
stride == query.stride)
2154 & (overlap.
stride == -db_stripes[db_stripe_pos].stride))
2160 db, result, cover, single_match_only,
2161 num_candidates - 1, candidates + 1);
2162 query.nstrides -= overlap.
nstrides;
2163 query.start = (
Xt_int)(query.start + overlap.
nstrides * query.stride);
2164 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
2166 else if (overlap.
stride == query.stride)
2170 int db_step = (int)(overlap.
stride/db_stripes[db_stripe_pos].stride);
2173 for (
int i = 0; i < overlap.
nstrides; ++i, db_pos += db_step)
2176 (
struct Xt_pos_ext){ .start = db_pos, .size = 1 },
2177 db, result, cover, single_match_only,
2178 num_candidates - 1, candidates + 1);
2179 query.nstrides -= overlap.
nstrides;
2180 query.start = (
Xt_int)(query.start + overlap.
nstrides * query.stride);
2181 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
2189 int stride_step = (int)(overlap.
stride / query.stride);
2190 int db_step = (int)(overlap.
stride/db_stripes[db_stripe_pos].stride);
2195 intervening = { .start = (
Xt_int)(query.start + query.stride),
2196 .stride = query.stride,
2197 .nstrides =
MIN(query.nstrides - 1, stride_step - 1) };
2201 .
start = db_pos, .size = 1 },
2202 db, result, cover, single_match_only,
2203 num_candidates - 1, candidates + 1);
2205 if (--query.nstrides > 0) {
2208 intervening, db, result, cover, single_match_only,
2209 num_candidates - 1, candidates + 1);
2210 query.nstrides -= intervening.nstrides;
2212 query.start = (
Xt_int)(query.start + query.stride * stride_step);
2213 query.stride = query.nstrides > 1 ? query.stride : (
Xt_int)1;
2217 return (
struct unmatched_tail){ .unmatched = unmatched, .query_tail = query};
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
#define xcalloc(nmemb, size)
const struct Xt_sort_algo_funcptr * sort_funcs
const struct xt_idxlist_vtable * vtable
struct Xt_stripe * stripes
Xt_int * index_array_cache
struct Xt_stripes_sort stripes_sort[]
struct Xt_idxlist_ parent
struct Xt_pos_ext * pos_ext
void(* sort_int)(int *a, size_t n)
struct Xt_stripe * stripes
struct Xt_idxstripes_ * idxstripes
bool stripes_do_intersect
const struct Xt_stripe * stripes
int * stripes_nstrides_psum
const struct Xt_stripes_sort * stripes_sort
size_t * stripe_candidate_bitmask
struct Xt_stripe query_tail
void(* delete)(Xt_idxlist)
int MPI_Type_create_struct(int count, XT_MPI2_CONST int array_of_block_lengths[], XT_MPI2_CONST MPI_Aint array_of_displacements[], XT_MPI2_CONST MPI_Datatype array_of_types[], MPI_Datatype *newtype)
int MPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Datatype *newtype)
int MPI_Type_free(MPI_Datatype *datatype)
int MPI_Type_commit(MPI_Datatype *datatype)
static Xt_tword xlimul(Xt_long a, Xt_int b)
static Xt_long xlladd(Xt_long a, Xt_long b)
static Xt_long xiisub(Xt_int a, Xt_int b)
static Xt_long xi2l(Xt_int a)
static Xt_ldiv xlldiv(Xt_long a, Xt_long b)
static Xt_long xliadd(Xt_long a, Xt_int b)
static Xt_idiv xlidivu(Xt_ulong a, Xt_uint b)
static int xlicmp_lt(Xt_long a, Xt_int b)
static Xt_long xiimul(Xt_int a, Xt_int b)
static bool xl_is_in_xt_int_range(Xt_long a)
static Xt_long xlabs(Xt_long a)
static Xt_long xilsub(Xt_int a, Xt_long b)
static Xt_long xlinc(Xt_long a, bool b)
static int xlsign(Xt_long x)
static int xlicmp_le(Xt_long a, Xt_int b)
static int xlicmp_ge(Xt_long a, Xt_int b)
static int isign_mask(int x)
static Xt_int Xt_isign_mask(Xt_int x)
static int xinlz(Xt_uint v)
static int imin(int a, int b)
static Xt_int Xt_isign(Xt_int x)
static long long llsign(long long x)
#define is_divisible(divisor, coeff, dividend)
implementation of configuration object
#define XT_CONFIG_GET_FORCE_NOSORT(config)
base definitions header file
size_t xt_cover_insert_or_overlap(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range range, size_t search_start_pos)
void xt_cover_finish(struct Xt_pos_ext_vec *restrict cover)
void xt_cover_start(struct Xt_pos_ext_vec *restrict cover, size_t initial_size)
size_t xt_cover_search(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range query, size_t search_start_pos)
macros to create heapsort implementations
static size_t left(size_t i)
static size_t right(size_t i)
Xt_idxlist xt_idxempty_new(void)
void xt_idxlist_get_index_stripes_keep_buf(Xt_idxlist idxlist, struct Xt_stripe *stripes, size_t num_stripes_alloc)
const Xt_int * xt_idxlist_get_indices_const(Xt_idxlist idxlist)
int xt_idxlist_get_num_index_stripes(Xt_idxlist idxlist)
void xt_idxlist_delete(Xt_idxlist idxlist)
Provide non-public declarations common to all index lists.
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
static int idxstripes_get_indices_at_positions(Xt_idxlist idxlist, const int *positions, int num, Xt_int *index, Xt_int undef_idx)
static struct unmatched_tail idxstripes_complex_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static int idxstripes_get_sorting(Xt_idxlist idxlist)
const struct Xt_stripe * xt_idxstripes_get_index_stripes_const(Xt_idxlist idxlist)
static void idxstripes_delete(Xt_idxlist data)
static int idxstripes_get_num_index_stripes(Xt_idxlist idxlist)
void xt_idxstripes_initialize(void)
struct Xt_stripes_alloc xt_idxstripes_alloc(size_t num_stripes)
static struct Xt_stripe get_stripe_intersection(struct Xt_stripe stripe_a, struct Xt_stripe stripe_b)
static int idxstripes_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
static void create_stripes_lookup(struct Xt_stripes_lookup *restrict db, Xt_idxstripes idxstripes)
Xt_idxlist xt_idxstripes_sort_new(size_t num_src_stripes, const struct Xt_stripe src_stripes[], Xt_config config)
static size_t bsearch_stripes_sort(size_t n, const struct Xt_stripes_sort a[n], Xt_int min_key)
Xt_idxlist xt_idxstripes_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
void xt_idxstripes_finalize(void)
Xt_idxlist xt_idxstripes_prealloc_new(const struct Xt_stripe *stripes, int num_stripes)
static void idxstripes_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static Xt_idxlist idxstripes_order_invert(Xt_idxlist idxlist)
static int idxstripes_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static void idxstripes_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe *restrict stripes, size_t num_stripes_alloc)
PPM_DSO_INTERNAL Xt_idxlist xt_idxstripes_get_idxvec_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config XT_UNUSED(config))
static size_t pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static void idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static Xt_idxlist idxstripes_sorted_copy(Xt_idxlist idxlist, Xt_config config)
Xt_idxlist xt_idxstripes_congeal(struct Xt_stripes_alloc stripes_alloc)
static struct Xt_idxstripes_ * expand_zero_stripes(size_t num_stripes, const struct Xt_stripe *restrict stripes)
static int idxstripes_get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, struct Xt_pos_ext **pos_ext, int single_match_only, Xt_config config)
static int idxstripes_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static const Xt_int * idxstripes_get_indices_const(Xt_idxlist idxlist)
struct Xt_idxstripes_ * Xt_idxstripes
static void destroy_stripes_lookup(struct Xt_stripes_lookup *restrict db)
size_t xt_idxstripes_get_num_index_stripes(Xt_idxlist idxlist)
static void append_ext(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *restrict result)
static const struct xt_idxlist_vtable idxstripes_vtable
static Xt_int stripe_min(struct Xt_stripe stripe, int pfx_remove)
static Xt_idxlist idxstripes_compute_intersection(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst)
@ stripes_some_have_zero_stride_bit
static bool stripe_contains_index(struct Xt_stripe stripe, Xt_int idx)
static size_t idxstripes_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static Xt_idxlist compute_intersection_fallback(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst, Xt_config config)
Xt_idxlist xt_idxstripes_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
static Xt_idxlist idxstripes_copy(Xt_idxlist idxlist)
static size_t idxstripes_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static struct Xt_stripes_alloc idxstripes_alloc(size_t num_stripes)
static int stripe_min_cmp_lt(const struct Xt_stripe a, const struct Xt_stripe b, const int pfx_remove_a, const int pfx_remove_b)
static Xt_int idxstripes_get_max_index(Xt_idxlist idxlist)
static Xt_int idxstripes_get_min_index(Xt_idxlist idxlist)
@ stripes_some_have_zero_stride_mask
@ stripes_do_overlap_mask
static Xt_idxlist idxstripes_aggregate(Xt_idxstripes idxstripes, const char *caller)
static void find_candidates(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct int_vec *candidates, bool single_match_only, struct Xt_pos_ext_vec *restrict cover, Xt_config config)
static size_t conditional_pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, size_t num_candidates, int *restrict candidates)
static MPI_Datatype stripe_dt
static Xt_int retrieve_min(size_t num_src_stripes, const struct Xt_stripe *src_stripes, int *src_permutation, int *pfx_removed)
Xt_idxlist xt_idxstripes_from_idxlist_new(Xt_idxlist idxlist_src)
Xt_idxlist xt_idxstripes_new(struct Xt_stripe const *stripes, int num_stripes)
static bool xt_can_merge_pos_ext(struct Xt_pos_ext a, struct Xt_pos_ext b)
struct Xt_vec_alloc xt_idxvec_alloc(int num_indices)
Xt_idxlist xt_idxvec_congeal(struct Xt_vec_alloc vec_alloc)
macros to create mergesort implementations, 4 way top-down method
#define xt_mpi_call(call, comm)
size_t xt_stripes_merge_copy(size_t num_stripes, struct Xt_stripe *stripes_dst, const struct Xt_stripe *stripes_src, bool lookback)
bool xt_stripes_detect_duplicate(size_t num_stripes, const struct Xt_stripe stripes[num_stripes], struct Xt_minmax index_range)
static struct Xt_minmax xt_stripe2minmax(struct Xt_stripe stripe)
#define XT_SORT_FLAGS(ntrans_up, ntrans_dn)
static int xt_stripes_eq(struct Xt_stripe a, struct Xt_stripe b)