75#define MAX(a,b) (((a)>=(b))?(a):(b))
76#define MIN(a,b) (((a)<(b))?(a):(b))
105 size_t num_stripes_alloc);
119 int * position,
int offset);
123 size_t num_indices,
int *positions,
124 int single_match_only);
150 .get_positions_of_indices_off = NULL,
154 .get_bounding_box = NULL,
155 .idxlist_pack_code =
VECTOR,
186 size_t vector_size = (size_t)num_indices *
sizeof (
Xt_int),
187 header_size = ((
sizeof (
struct Xt_idxvec_) + sizeof (
Xt_int) - 1)
189 struct Xt_idxvec_ *idxvec_obj =
xmalloc(header_size + vector_size);
191 = (
Xt_int *)(
void *)((
unsigned char *)idxvec_obj + header_size);
192 idxvec_obj->
vector = vector_assign;
193 return (
struct Xt_vec_alloc) { idxvec_obj, vector_assign };
220 else if (num_indices == 0)
223 die(
"number of indices passed to xt_idxvec_new must not be negative!");
229 int ntrans_up=0, ntrans_dn=0;
231 for (
size_t i = 1; i < (size_t)num_indices; ++i) {
234 ntrans_up +=
idxvec[i-1] < v;
235 ntrans_dn +=
idxvec[i-1] > v;
248 return (
void *)vec_alloc.
idxvec;
260 int ntrans_up=0, ntrans_dn=0;
261 Xt_int maxv = vector[0], minv = vector[0];
262 for (
size_t i = 1; i < num_indices; ++i) {
264 ntrans_up += vector[i-1] < v;
265 ntrans_dn += vector[i-1] > v;
271#if defined __ICC && defined __OPTIMIZE__ \
272 && ( __INTEL_COMPILER_BUILD_DATE == 20110811 \
273 || __INTEL_COMPILER == 1300 )
274 if (num_indices == -1)
275 fprintf(stderr,
"%s: ntrans_up=%d, ntrans_dn=%d, flags=%u\n",
280 return (
struct flags_min_max){ .flags =
flags, .min = minv, .max = maxv };
298 ? vec_alloc.
vector : NULL;
312 else if (num_indices == 0)
315 die(
"number of indices passed to xt_idxvec_new must not be negative!");
318 idxvec_obj->vector = idxvec;
320 .idxvec = idxvec_obj, .vector = (
void *)(intptr_t)idxvec });
325 int * sorted_vec_pos,
int pos_offset) {
331 start = stripe.
start;
338 for (
int i = 0; i < stripe.
nstrides; ++i) {
340 sorted_vec_pos[i] = pos_offset + i * sign;
352 assert(num_stripes_ > 0);
353 size_t num_stripes = (size_t)num_stripes_,
355 Xt_int *restrict sorted_vector_assign
365 Xt_int (*restrict stripe_minmax)[num_stripes]
366 =
xmalloc(2 *
sizeof(*stripe_minmax));
367 int *restrict sorted_stripe_min_pos
368 =
xmalloc(num_stripes * 3 *
sizeof(*sorted_stripe_min_pos));
371 for(
size_t i = 0; i < num_stripes; ++i) {
372 Xt_int ofs = (
Xt_int)(stripes[i].stride * (stripes[i].nstrides - 1)),
382 sorted_stripe_min_pos);
384 int *restrict sorted_pos_prefix_sum = sorted_stripe_min_pos + num_stripes,
385 *restrict orig_pos_prefix_sum
386 =
xmalloc(num_stripes *
sizeof(*orig_pos_prefix_sum));
389 for (
size_t i = 0; i < num_stripes; ++i) {
390 orig_pos_prefix_sum[i] = accum;
395 for (
size_t i = 0; i < num_stripes; ++i) {
396 int sorted_pos = sorted_stripe_min_pos[i];
397 sorted_pos_prefix_sum[i] = orig_pos_prefix_sum[sorted_pos];
399 * (stripes[sorted_pos].nstrides - 1)),
401 Xt_int stripe_max = (
Xt_int)(stripes[sorted_pos].start + (ofs & ~mask));
402 stripe_minmax[1][i] = stripe_max;
407 free(orig_pos_prefix_sum);
413 int *restrict overlap_count
414 = sorted_stripe_min_pos + 2 * num_stripes;
415 for (
size_t i = 0; i < num_stripes - 1; ++i) {
416 bool do_overlap = stripe_minmax[1][i] >= stripe_minmax[0][i + 1];
422 Xt_int range_max_idx =
MAX(stripe_minmax[1][i], stripe_minmax[1][i+1]);
423 while (j + 1 < num_stripes
424 && stripe_minmax[0][j + 1] <= range_max_idx) {
425 range_max_idx =
MAX(range_max_idx, stripe_minmax[1][j+1]);
428 overlap_count[i] = (int)(j - i);
431 while (j + 1 < num_stripes
432 && stripe_minmax[0][j + 1] > stripe_minmax[1][j])
434 overlap_count[i] = -(int)(j - i - 1);
438 overlap_count[num_stripes - 1] = 0;
443 void (*sort_xt_int_permutation)(
Xt_int a[],
size_t n,
int permutation[])
445 while (i < num_stripes) {
447 bool do_overlap = overlap_count[i] > 0;
448 size_t num_selection = (size_t)(abs(overlap_count[i])) + 1;
451 for (
size_t j = 0; j < num_selection; ++j)
452 sel_size +=
decode_stripe(stripes[sorted_stripe_min_pos[i+j]],
453 sorted_vector_assign + offset + sel_size,
455 sorted_pos_prefix_sum[i+j]);
458 sort_xt_int_permutation(sorted_vector_assign + offset,
465 free(sorted_stripe_min_pos);
474 size_t num_stripes_ = (size_t)(num_stripes > 0 ? num_stripes : 0);
477 assert((
sizeof (
long long) >
sizeof (
int)) & (
num_indices <= INT_MAX)
484 size_t k = (size_t)-1;
485 for (
int i = 0; i < num_stripes; ++i)
486 for (
int j = 0; j < stripes[i].
nstrides; ++j)
513 int size_xt_idx, size_header;
517 &size_xt_idx), comm);
519 return (
size_t)size_xt_idx + (size_t)size_header;
529 buffer_size, position, comm), comm);
533 buffer_size, position, comm), comm);
540 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
541 &num_indices, 1, MPI_INT, comm), comm);
542 assert(num_indices > 0);
545 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
546 vec_alloc.
vector, num_indices,
553 ? vec_alloc.
vector : NULL;
590 size_t svec_size = num_indices *
sizeof(
Xt_int);
595 memcpy(sorted_vector, vector, svec_size);
615 size_t num_indices_inter = 0,
626 const Xt_int *restrict sorted_src_vector
628 *restrict sorted_dst_vector
632 for (
size_t i = 0, j = 0; i < num_indices_dst; ++i) {
634 while (j < num_indices_src
635 && sorted_src_vector[j] < sorted_dst_vector[i]) ++j;
636 if (j < num_indices_src) {
637 vector_assign[num_indices_inter] = sorted_dst_vector[i];
638 num_indices_inter += sorted_src_vector[j] == sorted_dst_vector[i];
643 if (num_indices_inter) {
644 size_t vector_size = num_indices_inter *
sizeof (idxvec_dst->vector[0]),
647 inter_vector =
xrealloc(inter_vector, header_size + vector_size);
649 = (
Xt_int *)(
void *)((
unsigned char *)inter_vector + header_size);
654 inter_vector->
min = inter_vector->
vector[0];
655 inter_vector->
max = inter_vector->
vector[num_indices_inter-1];
712 num_indices_inter = 0, last_match_pos = 0;
713 for (
size_t i = 0; i < ndst_stripes; ++i) {
714 size_t nstrides = (size_t)stripes_dst[i].nstrides;
716 stride = stripes_dst[i].
stride,
717 end = (
Xt_int)(start + (
int)(nstrides-1) * stride);
724 if (start > src_max || end < src_min)
726 size_t adj_first = start >= src_min
727 ? (size_t)0 : (size_t)((src_min - start)/stride);
729 size_t adj_nstrides = (size_t)((end - src_max)/stride);
730 nstrides -= adj_nstrides;
732 for (
size_t k = adj_first; k < nstrides; ++k) {
734 size_t lb = 0, ub = nsrc,
735 guess = last_match_pos;
737 Xt_int src_val = src_sorted[guess];
738 if (src_val < dst_idx)
740 else if (src_val > dst_idx)
743 found_indices[num_indices_inter++] = dst_idx;
744 last_match_pos = guess;
747 guess = lb + (ub-lb)/2;
751 if (num_indices_inter) {
752 if (num_indices_inter != ndst) {
753 size_t vector_size = num_indices_inter *
sizeof (found_indices[0]),
758 = (
Xt_int *)(
void *)((
unsigned char *)vec_alloc.
idxvec + header_size);
789 size_t num_stripes_alloc
797 num_indices, sorted_vector, num_stripes_alloc, stripes_alloc.
stripes);
798 assert(num_stripes == num_stripes_alloc);
809 memcpy(indices, ((
Xt_idxvec)idxlist)->vector,
827 assert(num_stripes <= INT_MAX);
828 return (
int)num_stripes;
833 size_t num_stripes_alloc) {
846 *index = ((
Xt_idxvec)idxlist)->vector[position];
853 const int *restrict positions,
854 int num_pos_,
Xt_int *index,
862 size_t num_pos = num_pos_ >= 0 ? (size_t)num_pos_ : (size_t)0;
863 for (
size_t ip = 0; ip < num_pos; ip++) {
864 int p = positions[ip];
865 if (p >= 0 && (
size_t)p < num_indices) {
868 index[ip] = undef_idx;
881 int * position,
int offset) {
888 if ((offset < 0) || ((
size_t)offset >= num_indices))
894 const Xt_int *sorted_vector
897 if ((index < sorted_vector[0]) ||
898 (index > sorted_vector[num_indices-1]))
903 size_t ub = num_indices - 1;
905 while (sorted_vector[lb] < index) {
907 size_t middle = (ub + lb + 1)/2;
909 if (sorted_vector[middle] <= index)
911 else if (ub == middle)
918 while (lb > 0 && sorted_vector[lb-1] == index) --lb;
922 if (sorted_vec_positions) {
923 while (lb < num_indices - 1
924 && sorted_vec_positions[lb] < offset
925 && sorted_vector[lb] == index) ++lb;
927 while (lb < num_indices - 1
929 && sorted_vector[lb] == index) ++lb;
932 if (lb >= num_indices || sorted_vector[lb] != index)
936 *position = sorted_vec_positions ? sorted_vec_positions[lb] : (int)lb;
949 for (
size_t i = 1; i < n; i++)
950 if (idx[i] < idx[i-1])
return false;
957 const Xt_int *selection_idx,
958 size_t num_selection,
int *positions,
959 int single_match_only) {
964 Xt_int const *sorted_selection;
965 int *sorted_selection_pos = NULL;
968 if (selection_is_ordered) {
969 sorted_selection = selection_idx;
971 size_t idx_memsize = num_selection *
sizeof(*sorted_selection),
972 pos_memsize = num_selection *
sizeof(*sorted_selection_pos),
973#if defined _CRAYC && _RELEASE_MAJOR < 9
974 pos_ofs_roundup = _MAXVL_8,
976 pos_ofs_roundup =
sizeof(int),
979 pos_ofs = ((idx_memsize + pos_ofs_roundup - 1)
980 & ((
size_t)-(ssize_t)(pos_ofs_roundup))),
982 alloc_size = pos_ofs + pos_memsize;
985 memcpy(tmp_idx, selection_idx, idx_memsize);
988 = (
void *)((
unsigned char *)tmp_idx + pos_ofs);
991 tmp_idx, num_selection, sorted_selection_pos);
992 sorted_selection = tmp_idx;
1002 const Xt_int *sorted_body
1006 size_t num_unmatched = 0;
1009 size_t post_match_step = single_match_only != 0;
1012 for (
size_t search_start = 0, ub_guess_ofs = 1;
1013 i < num_selection && search_start<=search_end;
1015 size_t selection_pos = selection_is_ordered ? i : (size_t)sorted_selection_pos[i];
1016 Xt_int isel = sorted_selection[i];
1018 size_t ub =
MIN(search_start + ub_guess_ofs, search_end);
1019 size_t lb = search_start;
1021 if (sorted_body[ub] < isel) {
1022 lb =
MIN(ub + 1, search_end);
1027 while ((ub-lb)/16) {
1028 size_t middle = (ub + lb + 1) / 2;
1030 if (sorted_body[middle] <= isel)
1036 while (sorted_body[lb] < isel && lb < ub)
1040 if (isel == sorted_body[lb]) {
1044 positions[selection_pos] = -1;
1049 while (match_pos > search_start && sorted_body[match_pos-1] == isel)
1054 positions[selection_pos]
1055 = sorted_body_pos ? sorted_body_pos[match_pos] : (int)match_pos;
1056 ub_guess_ofs = match_pos - search_start;
1057 search_start = match_pos + post_match_step;
1059 if (i < num_selection) {
1060 num_unmatched += num_selection - i;
1061 if (selection_is_ordered)
1064 }
while (++i < num_selection);
1067 positions[sorted_selection_pos[i]] = -1;
1068 }
while (++i < num_selection);
1070 if (tmp_idx) free(tmp_idx);
1072 return num_unmatched;
1079 return idxvec_obj->
min;
1087 return idxvec_obj->
max;
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
const struct Xt_sort_algo_funcptr * sort_funcs
const struct xt_idxlist_vtable * vtable
const Xt_int * sorted_vector
struct Xt_idxlist_ parent
int * sorted_vec_positions
void(* sort_xt_int_permutation)(Xt_int a[], size_t n, int permutation[])
void(* sort_xt_int)(Xt_int *a, size_t n)
struct Xt_stripe * stripes
struct Xt_idxvec_ * idxvec
static Xt_int Xt_isign_mask(Xt_int x)
static const char filename[]
struct Xt_config_ xt_default_config
struct Xt_config_ * Xt_config
implementation of configuration object
#define XT_CONFIG_GET_FORCE_NOSORT(config)
#define XT_CONFIG_UNSET_FORCE_NOSORT(config)
base definitions header file
struct Xt_idxlist_ * Xt_idxlist
Xt_idxlist xt_idxempty_new(void)
void xt_idxlist_delete(Xt_idxlist idxlist)
Provide non-public declarations common to all index lists.
#define xt_idxlist_get_num_indices(idxlist)
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
const struct Xt_stripe * xt_idxstripes_get_index_stripes_const(Xt_idxlist idxlist)
struct Xt_stripes_alloc xt_idxstripes_alloc(size_t num_stripes)
Xt_idxlist xt_idxstripes_congeal(struct Xt_stripes_alloc stripes_alloc)
size_t xt_idxstripes_get_num_index_stripes(Xt_idxlist idxlist)
static Xt_int stripe_min(struct Xt_stripe stripe, int pfx_remove)
Xt_idxlist xt_idxstripes_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
static Xt_int idxvec_get_min_index(Xt_idxlist idxlist)
Xt_idxlist xt_idxvec_prealloc_new(const Xt_int *idxvec, int num_indices)
static const char filename[]
static int idxvec_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static void idxvec_delete(Xt_idxlist data)
static void idxvec_get_indices(Xt_idxlist idxlist, Xt_int *indices)
Xt_idxlist xt_idxvec_new(const Xt_int *idxvec, int num_indices)
static Xt_idxlist idxvec_copy(Xt_idxlist idxlist)
static int idxvec_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static void idxvec_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static size_t idxvec_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static bool idx_vec_is_sorted(Xt_int const *idx, size_t n)
static int idxvec_get_indices_at_positions(Xt_idxlist idxlist, const int *positions, int num, Xt_int *index, Xt_int undef_idx)
static int idxvec_get_sorting(Xt_idxlist idxlist)
static struct Xt_vec_alloc idxvec_alloc(int num_indices)
static Xt_idxlist idxvec_sorted_copy(Xt_idxlist idxlist, Xt_config config)
static int idxvec_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
static struct flags_min_max get_sort_flags(size_t num_indices, const Xt_int vector[num_indices])
static const struct xt_idxlist_vtable idxvec_vtable
static void idxvec_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe *stripes, size_t num_stripes_alloc)
static struct Xt_vec_alloc idxvec_alloc_no_init(int num_indices)
static Xt_int const * idxvec_get_indices_const(Xt_idxlist idxlist)
PPM_DSO_INTERNAL const Xt_int * xt_idxvec_get_sorted_vector(Xt_idxlist idxvec, Xt_config config)
static const Xt_int * get_sorted_vector(Xt_idxvec idxvec, Xt_config config)
Xt_idxlist xt_idxvec_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
struct Xt_idxvec_ * Xt_idxvec
static void generate_sorted_vector_from_stripes(const struct Xt_stripe stripes[], int num_stripes_, Xt_idxvec idxvec, Xt_config config)
PPM_DSO_INTERNAL Xt_idxlist xt_idxvec_get_idxstripes(Xt_idxlist idxlist)
Xt_idxlist xt_idxvec_from_stripes_new(const struct Xt_stripe stripes[], int num_stripes)
static Xt_int idxvec_get_max_index(Xt_idxlist idxlist)
PPM_DSO_INTERNAL Xt_idxlist xt_idxvec_get_idxstripes_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
static size_t decode_stripe(struct Xt_stripe stripe, Xt_int *sorted_vector, int *sorted_vec_pos, int pos_offset)
static int idxvec_get_num_index_stripes(Xt_idxlist idxlist)
struct Xt_vec_alloc xt_idxvec_alloc(int num_indices)
Xt_idxlist xt_idxvec_congeal(struct Xt_vec_alloc vec_alloc)
static size_t idxvec_get_positions_of_indices(Xt_idxlist idxlist, const Xt_int *indices, size_t num_indices, int *positions, int single_match_only)
Xt_idxlist xt_idxvec_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
#define xt_mpi_call(call, comm)
void xt_assign_id_map_int(size_t n, int *restrict a, int ofs)
size_t xt_indices_count_stripes(size_t num_indices, const Xt_int indices[num_indices])
struct Xt_stripe_summary xt_summarize_stripes(size_t num_stripes, const struct Xt_stripe stripes[num_stripes])
size_t xt_convert_indices_to_stripes_buf(size_t num_indices, const Xt_int *restrict indices, size_t num_stripes_alloc, struct Xt_stripe *stripes)
#define XT_SORT_FLAGS(ntrans_up, ntrans_dn)