74#define MAX(a,b) (((a)>=(b))?(a):(b))
75#define MIN(a,b) (((a)<(b))?(a):(b))
104 size_t num_stripes_alloc);
118 int * position,
int offset);
122 size_t num_indices,
int *positions,
123 int single_match_only);
149 .get_positions_of_indices_off = NULL,
153 .get_bounding_box = NULL,
154 .idxlist_pack_code =
VECTOR,
185 size_t vector_size = (size_t)num_indices *
sizeof (
Xt_int),
190 = (
Xt_int *)(
void *)((
unsigned char *)idxvec_obj + header_size);
191 idxvec_obj->
vector = vector_assign;
192 return (
struct Xt_vec_alloc) { idxvec_obj, vector_assign };
219 else if (num_indices == 0)
222 die(
"number of indices passed to xt_idxvec_new must not be negative!");
228 int ntrans_up=0, ntrans_dn=0;
230 for (
size_t i = 1; i < (size_t)num_indices; ++i) {
233 ntrans_up +=
idxvec[i-1] < v;
234 ntrans_dn +=
idxvec[i-1] > v;
247 return (
void *)vec_alloc.
idxvec;
259 int ntrans_up=0, ntrans_dn=0;
260 Xt_int maxv = vector[0], minv = vector[0];
261 for (
size_t i = 1; i < num_indices; ++i) {
263 ntrans_up += vector[i-1] < v;
264 ntrans_dn += vector[i-1] > v;
270#if defined __ICC && defined __OPTIMIZE__ \
271 && ( __INTEL_COMPILER_BUILD_DATE == 20110811 \
272 || __INTEL_COMPILER == 1300 )
273 if (num_indices == -1)
274 fprintf(stderr,
"%s: ntrans_up=%d, ntrans_dn=%d, flags=%u\n",
297 ? vec_alloc.
vector : NULL;
311 else if (num_indices == 0)
314 die(
"number of indices passed to xt_idxvec_new must not be negative!");
317 idxvec_obj->vector = idxvec;
319 .idxvec = idxvec_obj, .vector = (
void *)idxvec });
324 int * sorted_vec_pos,
int pos_offset) {
330 start = stripe.
start;
337 for (
int i = 0; i < stripe.
nstrides; ++i) {
339 sorted_vec_pos[i] = pos_offset + i * sign;
351 assert(num_stripes_ > 0);
352 size_t num_stripes = (size_t)num_stripes_,
354 Xt_int *restrict sorted_vector_assign
364 Xt_int (*restrict stripe_minmax)[num_stripes]
365 =
xmalloc(2 *
sizeof(*stripe_minmax));
366 int *restrict sorted_stripe_min_pos
367 =
xmalloc(num_stripes * 3 *
sizeof(*sorted_stripe_min_pos));
370 for(
size_t i = 0; i < num_stripes; ++i) {
371 Xt_int ofs = (
Xt_int)(stripes[i].stride * (stripes[i].nstrides - 1)),
381 sorted_stripe_min_pos);
383 int *restrict sorted_pos_prefix_sum = sorted_stripe_min_pos + num_stripes,
384 *restrict orig_pos_prefix_sum
385 =
xmalloc(num_stripes *
sizeof(*orig_pos_prefix_sum));
388 for (
size_t i = 0; i < num_stripes; ++i) {
389 orig_pos_prefix_sum[i] = accum;
394 for (
size_t i = 0; i < num_stripes; ++i) {
395 int sorted_pos = sorted_stripe_min_pos[i];
396 sorted_pos_prefix_sum[i] = orig_pos_prefix_sum[sorted_pos];
398 * (stripes[sorted_pos].nstrides - 1)),
400 Xt_int stripe_max = (
Xt_int)(stripes[sorted_pos].start + (ofs & ~mask));
401 stripe_minmax[1][i] = stripe_max;
406 free(orig_pos_prefix_sum);
412 int *restrict overlap_count
413 = sorted_stripe_min_pos + 2 * num_stripes;
414 for (
size_t i = 0; i < num_stripes - 1; ++i) {
415 bool do_overlap = stripe_minmax[1][i] >= stripe_minmax[0][i + 1];
421 Xt_int range_max_idx =
MAX(stripe_minmax[1][i], stripe_minmax[1][i+1]);
422 while (j + 1 < num_stripes
423 && stripe_minmax[0][j + 1] <= range_max_idx) {
424 range_max_idx =
MAX(range_max_idx, stripe_minmax[1][j+1]);
427 overlap_count[i] = (int)(j - i);
430 while (j + 1 < num_stripes
431 && stripe_minmax[0][j + 1] > stripe_minmax[1][j])
433 overlap_count[i] = -(int)(j - i - 1);
437 overlap_count[num_stripes - 1] = 0;
442 void (*sort_xt_int_permutation)(
Xt_int a[],
size_t n,
int permutation[])
444 while (i < num_stripes) {
446 bool do_overlap = overlap_count[i] > 0;
447 size_t num_selection = (size_t)(abs(overlap_count[i])) + 1;
450 for (
size_t j = 0; j < num_selection; ++j)
451 sel_size +=
decode_stripe(stripes[sorted_stripe_min_pos[i+j]],
452 sorted_vector_assign + offset + sel_size,
454 sorted_pos_prefix_sum[i+j]);
457 sort_xt_int_permutation(sorted_vector_assign + offset,
464 free(sorted_stripe_min_pos);
473 size_t num_stripes_ = (size_t)(num_stripes > 0 ? num_stripes : 0);
476 assert((
sizeof (
long long) >
sizeof (
int)) & (
num_indices <= INT_MAX)
483 size_t k = (size_t)-1;
484 for (
int i = 0; i < num_stripes; ++i)
485 for (
int j = 0; j < stripes[i].
nstrides; ++j)
512 int size_xt_idx, size_header;
516 &size_xt_idx), comm);
518 return (
size_t)size_xt_idx + (size_t)size_header;
528 buffer_size, position, comm), comm);
532 buffer_size, position, comm), comm);
539 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
540 &num_indices, 1, MPI_INT, comm), comm);
541 assert(num_indices > 0);
544 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
545 vec_alloc.
vector, num_indices,
552 ? vec_alloc.
vector : NULL;
589 size_t svec_size = num_indices *
sizeof(
Xt_int);
594 memcpy(sorted_vector, vector, svec_size);
614 size_t num_indices_inter = 0,
625 const Xt_int *restrict sorted_src_vector
627 *restrict sorted_dst_vector
631 for (
size_t i = 0, j = 0; i < num_indices_dst; ++i) {
633 while (j < num_indices_src
634 && sorted_src_vector[j] < sorted_dst_vector[i]) ++j;
635 if (j < num_indices_src) {
636 vector_assign[num_indices_inter] = sorted_dst_vector[i];
637 num_indices_inter += sorted_src_vector[j] == sorted_dst_vector[i];
642 if (num_indices_inter) {
643 size_t vector_size = num_indices_inter *
sizeof (idxvec_dst->vector[0]),
646 inter_vector =
xrealloc(inter_vector, header_size + vector_size);
648 = (
Xt_int *)(
void *)((
unsigned char *)inter_vector + header_size);
653 inter_vector->
min = inter_vector->
vector[0];
654 inter_vector->
max = inter_vector->
vector[num_indices_inter-1];
711 num_indices_inter = 0, last_match_pos = 0;
712 for (
size_t i = 0; i < ndst_stripes; ++i) {
713 size_t nstrides = (size_t)stripes_dst[i].nstrides;
715 stride = stripes_dst[i].
stride,
716 end = (
Xt_int)(start + (
int)(nstrides-1) * stride);
723 if (start > src_max || end < src_min)
725 size_t adj_first = start >= src_min
726 ? (size_t)0 : (size_t)((src_min - start)/stride);
728 size_t adj_nstrides = (size_t)((end - src_max)/stride);
729 nstrides -= adj_nstrides;
731 for (
size_t k = adj_first; k < nstrides; ++k) {
733 size_t lb = 0, ub = nsrc,
734 guess = last_match_pos;
736 Xt_int src_val = src_sorted[guess];
737 if (src_val < dst_idx)
739 else if (src_val > dst_idx)
742 found_indices[num_indices_inter++] = dst_idx;
743 last_match_pos = guess;
746 guess = lb + (ub-lb)/2;
750 if (num_indices_inter) {
751 if (num_indices_inter != ndst) {
752 size_t vector_size = num_indices_inter *
sizeof (found_indices[0]),
757 = (
Xt_int *)(
void *)((
unsigned char *)vec_alloc.
idxvec + header_size);
788 size_t num_stripes_alloc
796 num_indices, sorted_vector, num_stripes_alloc, stripes_alloc.
stripes);
797 assert(num_stripes == num_stripes_alloc);
808 memcpy(indices, ((
Xt_idxvec)idxlist)->vector,
826 assert(num_stripes <= INT_MAX);
827 return (
int)num_stripes;
832 size_t num_stripes_alloc) {
845 *index = ((
Xt_idxvec)idxlist)->vector[position];
852 const int *restrict positions,
853 int num_pos_,
Xt_int *index,
861 size_t num_pos = num_pos_ >= 0 ? (size_t)num_pos_ : (size_t)0;
862 for (
size_t ip = 0; ip < num_pos; ip++) {
863 int p = positions[ip];
864 if (p >= 0 && (
size_t)p < num_indices) {
867 index[ip] = undef_idx;
880 int * position,
int offset) {
887 if ((offset < 0) || ((
size_t)offset >= num_indices))
893 const Xt_int *sorted_vector
896 if ((index < sorted_vector[0]) ||
897 (index > sorted_vector[num_indices-1]))
902 size_t ub = num_indices - 1;
904 while (sorted_vector[lb] < index) {
906 size_t middle = (ub + lb + 1)/2;
908 if (sorted_vector[middle] <= index)
910 else if (ub == middle)
917 while (lb > 0 && sorted_vector[lb-1] == index) --lb;
921 if (sorted_vec_positions) {
922 while (lb < num_indices - 1
923 && sorted_vec_positions[lb] < offset
924 && sorted_vector[lb] == index) ++lb;
926 while (lb < num_indices - 1
928 && sorted_vector[lb] == index) ++lb;
931 if (lb >= num_indices || sorted_vector[lb] != index)
935 *position = sorted_vec_positions ? sorted_vec_positions[lb] : (int)lb;
948 for (
size_t i = 1; i < n; i++)
949 if (idx[i] < idx[i-1])
return false;
956 const Xt_int *selection_idx,
957 size_t num_selection,
int *positions,
958 int single_match_only) {
963 Xt_int const *sorted_selection;
964 int *sorted_selection_pos = NULL;
967 if (selection_is_ordered) {
968 sorted_selection = selection_idx;
970 size_t idx_memsize = num_selection *
sizeof(*sorted_selection),
971 pos_memsize = num_selection *
sizeof(*sorted_selection_pos),
972#if defined _CRAYC && _RELEASE_MAJOR < 9
973 pos_ofs_roundup = _MAXVL_8,
975 pos_ofs_roundup =
sizeof(int),
978 pos_ofs = ((idx_memsize + pos_ofs_roundup - 1)
979 & ((size_t)-(ssize_t)(pos_ofs_roundup))),
981 alloc_size = pos_ofs + pos_memsize;
984 memcpy(tmp_idx, selection_idx, idx_memsize);
987 = (
void *)((
unsigned char *)tmp_idx + pos_ofs);
990 tmp_idx, num_selection, sorted_selection_pos);
991 sorted_selection = tmp_idx;
1001 const Xt_int *sorted_body
1005 size_t num_unmatched = 0;
1008 size_t post_match_step = single_match_only != 0;
1011 for (
size_t search_start = 0, ub_guess_ofs = 1;
1012 i < num_selection && search_start<=search_end;
1014 size_t selection_pos = selection_is_ordered ? i : (size_t)sorted_selection_pos[i];
1015 Xt_int isel = sorted_selection[i];
1017 size_t ub =
MIN(search_start + ub_guess_ofs, search_end);
1018 size_t lb = search_start;
1020 if (sorted_body[ub] < isel) {
1021 lb =
MIN(ub + 1, search_end);
1026 while ((ub-lb)/16) {
1027 size_t middle = (ub + lb + 1) / 2;
1029 if (sorted_body[middle] <= isel)
1035 while (sorted_body[lb] < isel && lb < ub)
1039 if (isel == sorted_body[lb]) {
1043 positions[selection_pos] = -1;
1048 while (match_pos > search_start && sorted_body[match_pos-1] == isel)
1053 positions[selection_pos]
1054 = sorted_body_pos ? sorted_body_pos[match_pos] : (int)match_pos;
1055 ub_guess_ofs = match_pos - search_start;
1056 search_start = match_pos + post_match_step;
1058 if (i < num_selection) {
1059 num_unmatched += num_selection - i;
1060 if (selection_is_ordered)
1063 }
while (++i < num_selection);
1066 positions[sorted_selection_pos[i]] = -1;
1067 }
while (++i < num_selection);
1069 if (tmp_idx) free(tmp_idx);
1071 return num_unmatched;
1078 return idxvec_obj->
min;
1086 return idxvec_obj->
max;
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
const struct Xt_sort_algo_funcptr * sort_funcs
const struct xt_idxlist_vtable * vtable
const Xt_int * sorted_vector
struct Xt_idxlist_ parent
int * sorted_vec_positions
void(* sort_xt_int_permutation)(Xt_int a[], size_t n, int permutation[])
void(* sort_xt_int)(Xt_int *a, size_t n)
struct Xt_stripe * stripes
struct Xt_idxvec_ * idxvec
void(* delete)(Xt_idxlist)
static Xt_int Xt_isign_mask(Xt_int x)
struct Xt_config_ xt_default_config
implementation of configuration object
#define XT_CONFIG_GET_FORCE_NOSORT(config)
#define XT_CONFIG_UNSET_FORCE_NOSORT(config)
base definitions header file
struct Xt_idxlist_ * Xt_idxlist
Xt_idxlist xt_idxempty_new(void)
void xt_idxlist_delete(Xt_idxlist idxlist)
Provide non-public declarations common to all index lists.
#define xt_idxlist_get_num_indices(idxlist)
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
const struct Xt_stripe * xt_idxstripes_get_index_stripes_const(Xt_idxlist idxlist)
struct Xt_stripes_alloc xt_idxstripes_alloc(size_t num_stripes)
Xt_idxlist xt_idxstripes_congeal(struct Xt_stripes_alloc stripes_alloc)
size_t xt_idxstripes_get_num_index_stripes(Xt_idxlist idxlist)
static Xt_int stripe_min(struct Xt_stripe stripe, int pfx_remove)
Xt_idxlist xt_idxstripes_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
static Xt_int idxvec_get_min_index(Xt_idxlist idxlist)
Xt_idxlist xt_idxvec_prealloc_new(const Xt_int *idxvec, int num_indices)
static const char filename[]
static int idxvec_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static void idxvec_delete(Xt_idxlist data)
static void idxvec_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static Xt_idxlist idxvec_copy(Xt_idxlist idxlist)
static int idxvec_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static void idxvec_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static size_t idxvec_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static bool idx_vec_is_sorted(Xt_int const *idx, size_t n)
static int idxvec_get_indices_at_positions(Xt_idxlist idxlist, const int *positions, int num, Xt_int *index, Xt_int undef_idx)
static int idxvec_get_sorting(Xt_idxlist idxlist)
static struct Xt_vec_alloc idxvec_alloc(int num_indices)
static Xt_idxlist idxvec_sorted_copy(Xt_idxlist idxlist, Xt_config config)
static int idxvec_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
static struct flags_min_max get_sort_flags(size_t num_indices, const Xt_int vector[num_indices])
static const struct xt_idxlist_vtable idxvec_vtable
static void idxvec_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe *stripes, size_t num_stripes_alloc)
static struct Xt_vec_alloc idxvec_alloc_no_init(int num_indices)
static Xt_int const * idxvec_get_indices_const(Xt_idxlist idxlist)
PPM_DSO_INTERNAL const Xt_int * xt_idxvec_get_sorted_vector(Xt_idxlist idxvec, Xt_config config)
static const Xt_int * get_sorted_vector(Xt_idxvec idxvec, Xt_config config)
Xt_idxlist xt_idxvec_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
struct Xt_idxvec_ * Xt_idxvec
static void generate_sorted_vector_from_stripes(const struct Xt_stripe stripes[], int num_stripes_, Xt_idxvec idxvec, Xt_config config)
PPM_DSO_INTERNAL Xt_idxlist xt_idxvec_get_idxstripes(Xt_idxlist idxlist)
static Xt_int idxvec_get_max_index(Xt_idxlist idxlist)
PPM_DSO_INTERNAL Xt_idxlist xt_idxvec_get_idxstripes_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
static size_t decode_stripe(struct Xt_stripe stripe, Xt_int *sorted_vector, int *sorted_vec_pos, int pos_offset)
static int idxvec_get_num_index_stripes(Xt_idxlist idxlist)
struct Xt_vec_alloc xt_idxvec_alloc(int num_indices)
Xt_idxlist xt_idxvec_congeal(struct Xt_vec_alloc vec_alloc)
static size_t idxvec_get_positions_of_indices(Xt_idxlist idxlist, const Xt_int *indices, size_t num_indices, int *positions, int single_match_only)
Xt_idxlist xt_idxvec_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
Xt_idxlist xt_idxvec_from_stripes_new(const struct Xt_stripe *stripes, int num_stripes)
Xt_idxlist xt_idxvec_new(const Xt_int *idxlist, int num_indices)
#define xt_mpi_call(call, comm)
void xt_assign_id_map_int(size_t n, int *restrict a, int ofs)
size_t xt_indices_count_stripes(size_t num_indices, const Xt_int indices[num_indices])
struct Xt_stripe_summary xt_summarize_stripes(size_t num_stripes, const struct Xt_stripe stripes[num_stripes])
size_t xt_convert_indices_to_stripes_buf(size_t num_indices, const Xt_int *restrict indices, size_t num_stripes_alloc, struct Xt_stripe *stripes)
#define XT_SORT_FLAGS(ntrans_up, ntrans_dn)