Yet Another eXchange Tool 0.11.2
Loading...
Searching...
No Matches
xt_idxstripes.c
Go to the documentation of this file.
1
12/*
13 * Keywords:
14 * Maintainer: Jörg Behrens <behrens@dkrz.de>
15 * Moritz Hanke <hanke@dkrz.de>
16 * Thomas Jahns <jahns@dkrz.de>
17 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met:
22 *
23 * Redistributions of source code must retain the above copyright notice,
24 * this list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
29 *
30 * Neither the name of the DKRZ GmbH nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 */
46#ifdef HAVE_CONFIG_H
47#include <config.h>
48#endif
49
50#include <assert.h>
51#include <limits.h>
52#include <stdbool.h>
53#include <stdlib.h>
54#include <stdio.h>
55#include <string.h>
56
57#include "xt/xt_core.h"
58#include "xt_arithmetic_util.h"
59#include "xt_arithmetic_long.h"
60#include "xt/xt_idxlist.h"
61#include "xt_idxlist_internal.h"
62#include "xt/xt_idxempty.h"
63#include "xt/xt_idxvec.h"
64#include "xt_idxvec_internal.h"
65#include "xt/xt_idxstripes.h"
67#include "xt_stripe_util.h"
68#include "xt/xt_mpi.h"
69#include "xt_idxlist_unpack.h"
70#include "xt_cover.h"
71#include "core/core.h"
72#include "core/ppm_xfuncs.h"
73#include "ensure_array_size.h"
74#include "instr.h"
75#include "xt_config_internal.h"
76
77#define MIN(a,b) (((a)<(b))?(a):(b))
78#define MAX(a,b) (((a)>(b))?(a):(b))
79
80static void
82
83static size_t
85
86static void
87idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size,
88 int *position, MPI_Comm comm);
89
90static Xt_idxlist
92
93static Xt_idxlist
95
96static void
98
99static const Xt_int *
101
102static int
104
105static void
107 struct Xt_stripe *restrict stripes,
108 size_t num_stripes_alloc);
109
110static int
111idxstripes_get_index_at_position(Xt_idxlist idxlist, int position,
112 Xt_int * index);
113
114static int
115idxstripes_get_indices_at_positions(Xt_idxlist idxlist, const int *positions,
116 int num, Xt_int *index,
117 Xt_int undef_idx);
118static int
120 Xt_idxlist idxlist,
121 int num_stripes, const struct Xt_stripe stripes[num_stripes],
122 int *num_ext, struct Xt_pos_ext **pos_ext,
123 int single_match_only,
124 Xt_config config);
125
126static int
128 int * position);
129
130static int
132 int * position, int offset);
133
134static Xt_int
136
137static Xt_int
139
140static int
142
144 .delete = idxstripes_delete,
145 .get_pack_size = idxstripes_get_pack_size,
146 .pack = idxstripes_pack,
147 .copy = idxstripes_copy,
148 .sorted_copy = idxstripes_sorted_copy,
149 .get_indices = idxstripes_get_indices,
150 .get_indices_const = idxstripes_get_indices_const,
151 .get_num_index_stripes = idxstripes_get_num_index_stripes,
152 .get_index_stripes = idxstripes_get_index_stripes,
153 .get_index_at_position = idxstripes_get_index_at_position,
154 .get_indices_at_positions = idxstripes_get_indices_at_positions,
155 .get_position_of_index = idxstripes_get_position_of_index,
156 .get_positions_of_indices = NULL,
157 .get_pos_exts_of_index_stripes = idxstripes_get_pos_exts_of_index_stripes,
158 .get_position_of_index_off = idxstripes_get_position_of_index_off,
159 .get_positions_of_indices_off = NULL,
160 .get_min_index = idxstripes_get_min_index,
161 .get_max_index = idxstripes_get_max_index,
162 .get_sorting = idxstripes_get_sorting,
163 .get_bounding_box = NULL,
164 .idxlist_pack_code = STRIPES,
165};
166
167static MPI_Datatype stripe_dt;
168
169#if __GNUC__ >= 11
170__attribute__ ((access (none, 1)))
171int MPI_Get_address(XT_MPI_GET_ADDRESS_LOCATION_CONST void *location, MPI_Aint *address);
172#endif
173
174void
176{
177 struct Xt_stripe stripe;
178
179 MPI_Aint base_address, nstrides_address;
180
181 MPI_Get_address(&stripe, &base_address);
182 MPI_Get_address(&stripe.nstrides, &nstrides_address);
183
184 enum { num_stripe_dt_elems = 2 };
185 static const int block_lengths[num_stripe_dt_elems] = {2,1};
186 MPI_Aint displacements[num_stripe_dt_elems]
187 = { 0, nstrides_address - base_address };
188 static const MPI_Datatype types[num_stripe_dt_elems]
189 = { Xt_int_dt, MPI_INT };
190 MPI_Datatype stripe_dt_unaligned;
191
192#ifdef XT_MPI_TYPE_CREATE_STRUCT_CONST_ARRAY_ARGS
193 xt_mpi_call(MPI_Type_create_struct(num_stripe_dt_elems,
194 block_lengths, displacements, types,
195 &stripe_dt_unaligned), Xt_default_comm);
196#else
197 xt_mpi_call(MPI_Type_create_struct(num_stripe_dt_elems,
198 (int *)block_lengths, displacements,
199 (MPI_Datatype *)types,
200 &stripe_dt_unaligned), Xt_default_comm);
201#endif
202 xt_mpi_call(MPI_Type_create_resized(stripe_dt_unaligned, 0,
203 (MPI_Aint)sizeof(stripe),
204 &stripe_dt), Xt_default_comm);
205 xt_mpi_call(MPI_Type_free(&stripe_dt_unaligned), Xt_default_comm);
206 xt_mpi_call(MPI_Type_commit(&stripe_dt), Xt_default_comm);
207}
208
209void
211{
212 xt_mpi_call(MPI_Type_free(&stripe_dt), Xt_default_comm);
213}
214
216
218 struct Xt_minmax range; /* minimal and maximal position of stripe */
219 int position; /* position of stripe this range
220 * corresponds to (permutation
221 * obtained from sorting) */
222 int inv_position; /* position in stripes_sort when
223 * indexed with unsorted index */
224};
225
237 /* todo: exploit newly introduced sorting information to speed up
238 * index position lookups */
243};
244
250
252
253 struct Xt_idxlist_ parent;
254
255 struct Xt_stripe *stripes;
258 unsigned flags;
259
261 struct Xt_stripes_sort stripes_sort[];
262};
263
264#define SORT_TYPE struct Xt_stripes_sort
265#define SORT_TYPE_SUFFIX stripes_sort
266#define XT_SORTFUNC_DECL static
267#define SORT_TYPE_CMP_LT(a,b,...) ((a).range.min < (b).range.min)
268#define SORT_TYPE_CMP_LE(a,b,...) ((a).range.min <= (b).range.min)
269#define SORT_TYPE_CMP_EQ(a,b,...) ((a).range.min == (b).range.min)
270#include "xt_mergesort_base.h"
271#undef SORT_TYPE
272#undef SORT_TYPE_SUFFIX
273#undef SORT_TYPE_CMP_LT
274#undef SORT_TYPE_CMP_LE
275#undef SORT_TYPE_CMP_EQ
276#undef XT_SORTFUNC_DECL
277
278static Xt_idxlist
280 const char *caller);
281
284{
285 return idxstripes_aggregate(stripes_alloc.idxstripes, __func__);
286}
287
288
289static Xt_idxlist
291 const char *caller)
292{
293 const struct Xt_stripe *restrict stripes = idxstripes->stripes;
294 struct Xt_stripes_sort *restrict stripes_sort = idxstripes->stripes_sort;
295 int ntrans_up=0, ntrans_dn=0;
296 Xt_int min, max, prev_end;
297 long long num_indices;
298 {
299 int nstrides = stripes[0].nstrides;
300 Xt_int stride = stripes[0].stride;
301 prev_end = (Xt_int)(stripes[0].start
302 + stride * (nstrides-1));
303 ntrans_up += ((nstrides - 1) & ~((stride > 0)-1));
304 ntrans_dn += ((nstrides - 1) & ~((stride < 0)-1));
305 num_indices = (long long)nstrides;
306 struct Xt_minmax stripe_range = xt_stripe2minmax(stripes[0]);
307 stripes_sort[0].range = stripe_range;
308 stripes_sort[0].position = 0;
309 min = stripe_range.min;
310 max = stripe_range.max;
311 }
312 size_t num_stripes = (size_t)idxstripes->num_stripes;
313 int sign_err = stripes[0].nstrides;
314 unsigned have_zero_stride = stripes[0].stride == 0,
315 stripes_intersect = have_zero_stride & (stripes[0].nstrides > 1);
316 for (size_t i = 1; i < num_stripes; ++i) {
317 struct Xt_minmax stripe_range = xt_stripe2minmax(stripes[i]);
318 int nstrides = stripes[i].nstrides;
319 Xt_int start = stripes[i].start, stride = stripes[i].stride;
320 stripes_sort[i].range = stripe_range;
321 stripes_sort[i].position = (int)i;
322 min = MIN(stripe_range.min, min);
323 max = MAX(stripe_range.max, max);
324 num_indices += (long long)nstrides;
325 sign_err |= nstrides;
326 have_zero_stride |= stride == 0;
327 stripes_intersect |= (stride == 0 && nstrides > 1);
328 ntrans_up += (start > prev_end) + ((nstrides - 1) & ~((stride > 0)-1));
329 ntrans_dn += (start < prev_end) + ((nstrides - 1) & ~((stride < 0)-1));
330 prev_end = (Xt_int)(start + stride * (nstrides-1));
331 }
332 /* test sign bit */
333 if (sign_err < 0) {
334 static const char template[] = "ERROR: %s called with invalid stripes";
335 size_t buf_size = sizeof (template) - 2 + strlen(caller);
336 char *msg = xmalloc(buf_size);
337 snprintf(msg, buf_size, template, caller);
338 die(msg);
339 }
340 if (num_indices > 0) {
341 assert(num_indices <= INT_MAX);
342 xt_mergesort_stripes_sort(stripes_sort, num_stripes);
343 stripes_sort[stripes_sort[0].position].inv_position = 0;
344 unsigned stripes_do_overlap = 0;
345 for (size_t i = 1; i < num_stripes; ++i) {
346 stripes_do_overlap
347 |= stripes_sort[i - 1].range.max >= stripes_sort[i].range.min;
348 stripes_sort[stripes_sort[i].position].inv_position = (int)i;
349 }
350 stripes_intersect = stripes_intersect
351 || (stripes_do_overlap
353 num_stripes, stripes, (struct Xt_minmax){ .min=min, .max=max }));
354 idxstripes->flags = XT_SORT_FLAGS(ntrans_up, ntrans_dn) << stripes_sort_bit
355 | stripes_do_overlap << stripes_do_overlap_bit
356 | have_zero_stride << stripes_some_have_zero_stride_bit
357 | stripes_intersect << stripes_intersect_bit;
358 idxstripes->min_index = min;
359 idxstripes->max_index = max;
360 idxstripes->index_array_cache = NULL;
361 Xt_idxlist_init(&idxstripes->parent, &idxstripes_vtable, (int)num_indices);
362 return (Xt_idxlist)idxstripes;
363 } else {
364 free(idxstripes);
365 return xt_idxempty_new();
366 }
367}
368
369static struct Xt_stripes_alloc
370idxstripes_alloc(size_t num_stripes);
371
372struct Xt_stripes_alloc
373xt_idxstripes_alloc(size_t num_stripes)
374{
375 return idxstripes_alloc(num_stripes);
376}
377
378static struct Xt_stripes_alloc
379idxstripes_alloc(size_t num_stripes)
380{
381 size_t header_size = ((sizeof (struct Xt_idxstripes_)
382 + (sizeof (struct Xt_stripes_sort)
383 * (size_t)num_stripes)
384 + sizeof (struct Xt_stripe) - 1)
385 / sizeof (struct Xt_stripe))
386 * sizeof (struct Xt_stripe),
387 body_size = sizeof (struct Xt_stripe) * (size_t)num_stripes;
388 Xt_idxstripes idxstripes = xmalloc(header_size + body_size);
389 struct Xt_stripe *stripes_assign
390 = (struct Xt_stripe *)(void *)((unsigned char *)idxstripes + header_size);
391 assert(num_stripes <= INT_MAX);
392 idxstripes->num_stripes = (int)num_stripes;
393 idxstripes->stripes = stripes_assign;
394 return (struct Xt_stripes_alloc){ .idxstripes = idxstripes,
395 .stripes = stripes_assign };
396}
397
398
400xt_idxstripes_new(struct Xt_stripe const * stripes, int num_stripes) {
401 INSTR_DEF(instr,"xt_idxstripes_new")
402 INSTR_START(instr);
403 // ensure that yaxt is initialized
404 assert(xt_initialized());
405
406 Xt_idxlist result;
407
408 if (num_stripes > 0) {
409 struct Xt_stripes_alloc stripes_alloc
410 = idxstripes_alloc((size_t)num_stripes);
411 stripes_alloc.idxstripes->num_stripes
412 = (int)xt_stripes_merge_copy((size_t)num_stripes,
413 stripes_alloc.stripes,
414 stripes, false);
415 result = idxstripes_aggregate(stripes_alloc.idxstripes, __func__);
416 } else
417 result = xt_idxempty_new();
418 INSTR_STOP(instr);
419 return result;
420}
421
424 // ensure that yaxt is initialized
425 assert(xt_initialized());
426 Xt_idxlist result;
427 if (idxlist_src->num_indices > 0) {
428 size_t num_stripes = (size_t)xt_idxlist_get_num_index_stripes(idxlist_src);
429 struct Xt_stripes_alloc stripes_alloc
430 = idxstripes_alloc(num_stripes);
431 xt_idxlist_get_index_stripes_keep_buf(idxlist_src, stripes_alloc.stripes,
432 num_stripes);
433 result = idxstripes_aggregate(stripes_alloc.idxstripes, __func__);
434 } else
435 result = xt_idxempty_new();
436 return result;
437}
438
439
440
442xt_idxstripes_prealloc_new(const struct Xt_stripe *stripes, int num_stripes)
443{
444 Xt_idxlist result;
445
446 if (num_stripes > 0) {
447 size_t header_size = ((sizeof (struct Xt_idxstripes_)
448 + ((size_t)num_stripes
449 * sizeof (struct Xt_stripes_sort))
450 + sizeof (struct Xt_stripe) - 1)
451 / sizeof (struct Xt_stripe))
452 * sizeof (struct Xt_stripe);
453 Xt_idxstripes idxstripes = xmalloc(header_size);
454 idxstripes->num_stripes = num_stripes;
455 /* this assignment is safe because no methods modify the pointee */
456 idxstripes->stripes = (struct Xt_stripe *)(intptr_t)stripes;
457 result = idxstripes_aggregate(idxstripes, __func__);
458 } else
459 result = xt_idxempty_new();
460 return result;
461}
462
463
464static void
466
467 if (data == NULL) return;
468
469 Xt_idxstripes stripes = (Xt_idxstripes)data;
470
471 free(stripes->index_array_cache);
472 free(stripes);
473}
474
475static size_t
477
478 Xt_idxstripes stripes = (Xt_idxstripes)data;
479
480 int size_header, size_stripes = 0;
481
482 xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
483 if (stripes->num_stripes)
484 xt_mpi_call(MPI_Pack_size(stripes->num_stripes, stripe_dt, comm,
485 &size_stripes), comm);
486
487 return (size_t)size_header + (size_t)size_stripes;
488}
489
490static void
491idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size,
492 int *position, MPI_Comm comm) {
493 INSTR_DEF(instr,"idxstripes_pack")
494 INSTR_START(instr);
495
496 assert(data);
497 Xt_idxstripes stripes = (Xt_idxstripes)data;
498 int header[2] = { STRIPES, stripes->num_stripes };
499
500 xt_mpi_call(MPI_Pack(header, 2, MPI_INT, buffer,
501 buffer_size, position, comm), comm);
502 int num_stripes = stripes->num_stripes;
503 if (num_stripes)
504 xt_mpi_call(MPI_Pack(stripes->stripes, num_stripes, stripe_dt,
505 buffer, buffer_size, position, comm), comm);
506 INSTR_STOP(instr);
507}
508
509Xt_idxlist xt_idxstripes_unpack(void *buffer, int buffer_size, int *position,
510 MPI_Comm comm) {
511
512 INSTR_DEF(instr,"xt_idxstripes_unpack")
513 INSTR_START(instr);
514
515 int num_stripes;
516 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
517 &num_stripes, 1, MPI_INT, comm), comm);
518
519 Xt_idxlist result;
520 assert(num_stripes);
521 struct Xt_stripes_alloc stripes_alloc =
522 idxstripes_alloc((size_t)num_stripes);
523
524 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
525 stripes_alloc.stripes,
526 num_stripes, stripe_dt, comm),comm);
527 result = idxstripes_aggregate(stripes_alloc.idxstripes, __func__);
528 INSTR_STOP(instr);
529 return result;
530}
531
532#define XT_IDXSTRIPES_POS_EXT_MAP_COUNT
534#undef XT_IDXSTRIPES_POS_EXT_MAP_COUNT
536
537static Xt_idxlist
539 Xt_idxstripes idxstripes_dst,
540 Xt_config config)
541{
542 INSTR_DEF(instr,"compute_intersection_fallback")
543 INSTR_START(instr);
544 Xt_idxlist intersection;
545 int num_ext;
546 struct Xt_pos_ext *pos_exts;
547
548 int num_unmatched =
550 &idxstripes_src->parent,
551 idxstripes_dst->num_stripes,
552 idxstripes_dst->stripes,
553 &num_ext, &pos_exts, false,
554 config);
555 (void)num_unmatched;
556 if (num_ext > 0) {
557 /* 1. count index stripes correponding to pos_exts */
558 size_t num_result_stripes
559 = get_mapped_stripes_count((size_t)num_ext, pos_exts, idxstripes_src);
560 /* 2. allocate result idxstripes */
561 struct Xt_stripes_alloc intersection_alloc
562 = idxstripes_alloc(num_result_stripes);
563 /* 3. compute result idxstripes */
564 map_ext2stripes((size_t)num_ext, pos_exts, idxstripes_src,
565 intersection_alloc.stripes);
566 /* 4. congeal */
567 Xt_idxlist intersection_unsorted
568 = idxstripes_aggregate(intersection_alloc.idxstripes, __func__);
570 || idxstripes_get_sorting(intersection_unsorted) == 1) {
571 intersection = intersection_unsorted;
572 } else {
573 intersection = idxstripes_sorted_copy(intersection_unsorted, config);
574 xt_idxlist_delete(intersection_unsorted);
575 }
576 free(pos_exts);
577 } else {
578 intersection = xt_idxempty_new();
579 }
580 INSTR_STOP(instr);
581 return intersection;
582}
583
584
587};
588
589/* computes egcd of two positive integers a and b such that
590 egcd.gcd == egcd.u * a + egcd.v * b */
591static inline struct extended_gcd extended_gcd(Xt_int a, Xt_int b) {
592 Xt_int t = 1, u = 1, v = 0, s = 0;
593 while (b>0)
594 {
595 Xt_int q = (Xt_int)(a / b);
596 Xt_int prev_a = a;
597 a = b;
598 b = (Xt_int)(prev_a - q * b);
599 Xt_int prev_u = u;
600 u = s;
601 s = (Xt_int)(prev_u - q * s);
602 Xt_int prev_v = v;
603 v = t;
604 t = (Xt_int)(prev_v - q * t);
605 }
606 return (struct extended_gcd){ .gcd = a, .u = u, .v = v };
607}
608
609/* This implementation uses the method outlined in
610 * James M. Stichnoth,
611 * Efficient Compilation of Array Statements for Private Memory Multicomputers
612 * February, 1993 CMU-CS-93-109
613 */
614static struct Xt_stripe
616 struct Xt_stripe stripe_b) {
617
618 INSTR_DEF(instr,"get_stripe_intersection")
619 INSTR_START(instr);
620
621 struct Xt_bounded_stripe {
622 Xt_int min, max, stride, representative;
623 };
624
625 struct Xt_bounded_stripe bsa, bsb, bsi;
626
627 Xt_int stride_zero_mask_a = (stripe_a.stride != 0) - 1,
628 stride_zero_mask_b = (stripe_b.stride != 0) - 1;
629 {
630 Xt_int mask = Xt_isign_mask(stripe_a.stride);
631 bsa.min = (Xt_int)(stripe_a.start
632 + (mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
633 bsa.max = (Xt_int)(stripe_a.start
634 + (~mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
635 }
636 bsa.representative = stripe_a.start;
637 {
638 Xt_int mask = Xt_isign_mask(stripe_b.stride);
639 bsb.min = (Xt_int)(stripe_b.start
640 + (mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
641 bsb.max = (Xt_int)(stripe_b.start
642 + (~mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
643 }
644 bsb.representative = stripe_b.start;
645
646 bsa.stride = (Xt_int)((stripe_a.stride & ~stride_zero_mask_a)
647 | (stride_zero_mask_a & 1));
648 bsb.stride = (Xt_int)((stripe_b.stride & ~stride_zero_mask_b)
649 | (stride_zero_mask_b & 1));
650
651 /* FIXME: adjust the one further away from 0 */
652 /* adjust second representative to minimize difference to first representative */
653 Xt_int abs_stride_b = XT_INT_ABS(bsb.stride);
654#ifdef XT_LONG
655 Xt_long start_diff = (Xt_long)stripe_a.start - stripe_b.start;
656 bsb.representative
657 = (Xt_int)(bsb.representative
658 + (start_diff/abs_stride_b
659 + (start_diff%abs_stride_b > abs_stride_b/2))
660 * abs_stride_b);
661 start_diff = (Xt_long)bsb.representative - bsa.representative;
662#else
663 Xt_long start_diff = xiisub(stripe_a.start, stripe_b.start);
664 Xt_idiv start_diff_abs_stride_b_div
665 = xlidivu(xlabs(start_diff), (Xt_uint)abs_stride_b);
666 bsb.representative
667 = (Xt_int)(bsb.representative
668 + (start_diff_abs_stride_b_div.quot
669 + start_diff_abs_stride_b_div.rem > abs_stride_b/2)
670 * abs_stride_b);
671 start_diff = xiisub(bsb.representative, bsa.representative);
672#endif
673 Xt_int abs_stride_a = XT_INT_ABS(bsa.stride);
674 struct extended_gcd eg = extended_gcd(abs_stride_a, abs_stride_b);
675 bsi.min = MAX(bsa.min, bsb.min);
676 bsi.max = MIN(bsa.max, bsb.max);
677 /* we need to temporarily represent the product
678 * (bsb.representative - bsa.representative) * eg.u * abs_stride_a
679 * so we figure out first, if that product would overflow Xt_int
680 */
681 int stride_a_bits = xt_int_bits - xinlz((Xt_uint)abs_stride_a),
682 stride_bits = xt_int_bits - xinlz((Xt_uint)abs_stride_b) + stride_a_bits,
683 product_bits;
684 if (eg.u && bsb.representative != bsa.representative)
685 product_bits =
686 xt_int_bits - xinlz((Xt_uint)XT_INT_ABS(eg.u))
687#ifdef XT_LONG
688 + xt_int_bits - xinlz((Xt_uint)xlabs(start_diff))
689#else
690 + xt_int_bits - xinlz(xlabs(start_diff).lo)
691#endif
692 + stride_a_bits;
693 else
694 product_bits = 0;
695 int strides_mask, nstrides;
696 struct Xt_stripe intersection;
697 if (product_bits < xt_int_bits && stride_bits < xt_int_bits) {
698 Xt_int temp_stride = (Xt_int)((abs_stride_a * bsb.stride)/eg.gcd);
699 bsi.stride = (Xt_int)temp_stride;
700 Xt_int min_rep;
701 {
702 Xt_int some_rep
703 = (Xt_int)(bsa.representative
704 + ((bsb.representative - bsa.representative) * eg.u
705 * abs_stride_a / eg.gcd));
706 /* compute minimal bsi representative >= bsi.min */
707 Xt_int abs_bsi_stride = XT_INT_ABS(temp_stride),
708 r_diff = (Xt_int)(bsi.min - some_rep),
709 steps = (Xt_int)(r_diff / abs_bsi_stride);
710 steps = (Xt_int)(steps + (steps * abs_bsi_stride < r_diff));
711 min_rep = (Xt_int)(some_rep + steps * abs_bsi_stride);
712 bsi.representative = (Xt_int)min_rep;
713 }
714 nstrides = (int)((bsi.max - min_rep)/temp_stride + llsign(temp_stride));
715 int even_divide
716 = ((((bsb.representative - bsa.representative) % eg.gcd) == 0)
717 & (bsi.stride == temp_stride || abs(nstrides) == 1));
718 /* requires two's complement integers */
719 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
720 & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
721 Xt_int max_rep
722 = (Xt_int)(min_rep + (nstrides - llsign(temp_stride)) * bsi.stride);
723 intersection.start = (Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
724 } else {
725#ifdef XT_LONG
726#define ABS(x) (((x) < 0) ? -(x) : (x))
727 Xt_long temp_stride = xiimul(abs_stride_a, bsb.stride)/eg.gcd;
728 bsi.stride = (Xt_int)temp_stride;
729 Xt_long min_rep;
730 /* did the computation of bsi.stride remain in Xt_int range? */
731 bool bsi_stride_in_range = xl_is_in_xt_int_range(temp_stride);
732 /* did the computation of min_rep remain in Xt_int range? */
733 bool min_rep_in_range;
734 {
735 Xt_long some_rep;
736 if (bsb.representative != bsa.representative) {
737 /* computation might generate huge intermediary values, needing 3
738 * times as many bits as Xt_int has, so we use long multiplication */
739 Xt_long temp_1 = start_diff * eg.u;
740 Xt_tword temp_2 = xlimulu(temp_1, (Xt_uint)abs_stride_a);
741 /* some_rep will be representable as Xt_long
742 * if dividing by eg.gcd results in a value in the range of Xt_long...*/
743 min_rep_in_range
744 /* ... that means either all bits in temp_2.hi are
745 * identical to the high bit of temp_2.midlo ... */
746 = (temp_2.hi
747 == (Xt_uint)(Xt_isign_mask((Xt_int)(temp_2.midlo >> xt_int_bits))))
748 /* or the aggregate value of temp_2 divided by
749 * XT_LONG_MAX+1 is less than eg.gcd, instead of actually
750 * dividing, we can simply shift the uppermost bit of
751 * temp_2.midlo into temp_2.hi left-shifted by one */
752 | (XT_INT_ABS((Xt_int)(temp_2.hi << 1) | ((Xt_long)temp_2.midlo < 0)) < eg.gcd);
753 /* compute any representative */
754 some_rep
755 = (Xt_long)bsa.representative + ((Xt_long)temp_2.midlo / eg.gcd);
756 } else {
757 min_rep_in_range = true;
758 some_rep = bsa.representative;
759 }
760 /* compute minimal bsi representative >= bsi.min */
761 Xt_long abs_bsi_stride = ABS(temp_stride),
762 r_diff = bsi.min - some_rep,
763 steps = r_diff / abs_bsi_stride;
764 bool steps_in_range = xl_is_in_xt_int_range(steps);
765 steps = steps + (steps * abs_bsi_stride < r_diff);
766 min_rep = some_rep;
767 if (steps_in_range)
768 min_rep += steps * abs_bsi_stride;
769 min_rep_in_range &= xl_is_in_xt_int_range(min_rep);
770 bsi.representative = (Xt_int)min_rep;
771 }
772 int min_rep_mask = ~((int)min_rep_in_range - 1);
773 nstrides = (int)((((bsi.max - min_rep)/temp_stride)+xlsign(temp_stride))
774 & min_rep_mask)
775 | (~min_rep_mask & (bsb.representative == bsa.representative));
776 int even_divide
777 = ((start_diff%eg.gcd == 0) & (bsi_stride_in_range || abs(nstrides) == 1));
778 /* requires two's complement integers */
779 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
780 & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
781 Xt_int max_rep
782 = (Xt_int)(min_rep + (nstrides - xlsign(temp_stride)) * bsi.stride);
783 intersection.start = (Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
784#else
785 Xt_long temp_stride = xlldiv(xiimul(abs_stride_a, bsb.stride),
786 xi2l(eg.gcd)).quot;
787 bsi.stride = (Xt_int)temp_stride.lo;
788 /* minimal value that is reachable by both stripe starts plus some
789 * multiple of the respective stride */
790 Xt_long min_rep;
791 /* did the computation of bsi.stride remain in Xt_int range? */
792 bool bsi_stride_in_range = xl_is_in_xt_int_range(temp_stride);
793 /* did the computation of min_rep remain in Xt_int range? */
794 bool min_rep_in_range;
795 {
796 Xt_long some_rep;
797 if (bsb.representative != bsa.representative) {
798 /* computation might generate huge intermediary values */
799 Xt_long temp_1
800 = xiimul(bsb.representative - bsa.representative, eg.u);
801 Xt_tword temp_2 = xlimul(temp_1, abs_stride_a);
802 min_rep_in_range
803 = (temp_2.hi
804 == (Xt_uint)(Xt_isign_mask((Xt_int)(temp_2.mid))))
805 | (XT_INT_ABS((Xt_int)(temp_2.hi << 1) | ((Xt_int)temp_2.mid < 0)) < eg.gcd);
806 Xt_long t2 = (Xt_long){.hi = temp_2.mid, .lo = temp_2.lo };
807 Xt_ldiv temp_3;
808 if (eg.gcd == 1) {
809 temp_3.quot = t2;
810 temp_3.rem = (Xt_long){.hi = 0, .lo = 0 };
811 } else if (xlicmp_lt(t2, eg.gcd)) {
812 temp_3.quot = (Xt_long){ 0, 0 };
813 temp_3.rem = t2;
814 } else
815 temp_3 = xlldiv((Xt_long){.hi = temp_2.mid, .lo = temp_2.lo },
816 xi2l(eg.gcd));
817 some_rep = xliadd(temp_3.quot, bsa.representative);
818 } else {
819 min_rep_in_range = true;
820 some_rep = xi2l(bsa.representative);
821 }
822 /* compute minimal bsi representative >= bsi.min */
823 Xt_long abs_bsi_stride = xlabs(temp_stride),
824 r_diff = xilsub(bsi.min, some_rep);
825 Xt_ldiv steps = xlldiv(r_diff, abs_bsi_stride);
826 bool steps_in_range = xl_is_in_xt_int_range(steps.quot);
827 steps.quot = xlinc(steps.quot, (bool)((Xt_int)steps.rem.hi >= 0
828 && steps.rem.lo != 0));
829 if (steps_in_range)
830 min_rep = xlladd(some_rep,
831 xiimul((Xt_int)steps.quot.lo,
832 (Xt_int)abs_bsi_stride.lo));
833 else
834 min_rep = some_rep;
835 min_rep_in_range &= xl_is_in_xt_int_range(min_rep);
836 bsi.representative = (Xt_int)min_rep.lo;
837 }
838 if (min_rep_in_range)
839 nstrides = (int)xlldiv(xilsub(bsi.max, min_rep), temp_stride).quot.lo + xlsign(temp_stride);
840 else
841 nstrides = bsb.representative == bsa.representative;
842 int even_divide
843 = ((((bsb.representative - bsa.representative) % eg.gcd) == 0)
844 & (bsi_stride_in_range || abs(nstrides) == 1));
845 /* requires two's complement integers */
846 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
847 & xlicmp_le(min_rep, bsi.max) & xlicmp_ge(min_rep, bsi.min)) - 1);
848 Xt_int max_rep
849 = (Xt_int)(min_rep.lo
850 + (Xt_uint)((nstrides - xlsign(temp_stride)) * bsi.stride));
851 intersection.start = ((bsa.stride >= 0) ? (Xt_int)min_rep.lo : max_rep);
852#endif
853 }
854 intersection.stride
855 = (Xt_int)(Xt_isign(bsa.stride) * XT_INT_ABS(bsi.stride));
856 intersection.nstrides
857 = (abs(nstrides) & strides_mask
858 & ~((int)stride_zero_mask_a & (int)stride_zero_mask_b))
859 | (stripe_b.nstrides & (int)stride_zero_mask_a & (int)stride_zero_mask_b);
860 INSTR_STOP(instr);
861 return intersection;
862}
863
864// this routine only works for idxstripes where !stripes_do_overlap
865static Xt_idxlist
867 Xt_idxstripes idxstripes_dst) {
868
869 INSTR_DEF(instr,"idxstripes_compute_intersection")
870 INSTR_START(instr);
871
872 struct Xt_stripe *restrict inter_stripes = NULL;
873 size_t num_inter_stripes = 0;
874 size_t inter_stripes_array_size = 0;
875
876 const struct Xt_stripes_sort *restrict src_stripes_sort
877 = idxstripes_src->stripes_sort,
878 *restrict dst_stripes_sort = idxstripes_dst->stripes_sort;
879 const struct Xt_stripe *restrict stripes_src = idxstripes_src->stripes,
880 *restrict stripes_dst = idxstripes_dst->stripes;
881
882 size_t i_src = 0, i_dst = 0;
883 size_t num_stripes_src = (size_t)idxstripes_src->num_stripes,
884 num_stripes_dst = (size_t)idxstripes_dst->num_stripes;
885
886 while ((i_src < num_stripes_src) &
887 (i_dst < num_stripes_dst)) {
888
889 while (i_src < num_stripes_src &&
890 src_stripes_sort[i_src].range.max
891 < dst_stripes_sort[i_dst].range.min) ++i_src;
892
893 if ( i_src >= num_stripes_src ) break;
894
895 while (i_dst < num_stripes_dst &&
896 dst_stripes_sort[i_dst].range.max
897 < src_stripes_sort[i_src].range.min) ++i_dst;
898
899 if ( i_dst >= num_stripes_dst ) break;
900
901 if ((src_stripes_sort[i_src].range.min
902 <= dst_stripes_sort[i_dst].range.max)
903 & (src_stripes_sort[i_src].range.max
904 >= dst_stripes_sort[i_dst].range.min)) {
905 ENSURE_ARRAY_SIZE(inter_stripes, inter_stripes_array_size,
906 num_inter_stripes+1);
907
908 struct Xt_stripe intersection_stripe;
909 inter_stripes[num_inter_stripes] = intersection_stripe =
910 get_stripe_intersection(stripes_src[src_stripes_sort[i_src].position],
911 stripes_dst[dst_stripes_sort[i_dst].position]);
912 num_inter_stripes += intersection_stripe.nstrides > 0;
913 }
914
915 if (dst_stripes_sort[i_dst].range.max
916 < src_stripes_sort[i_src].range.max)
917 i_dst++;
918 else
919 i_src++;
920 }
921
922 if (num_inter_stripes) {
923 /* invert negative and merge consecutive stripes */
924 struct Xt_stripe prev_stripe = inter_stripes[0];
925 if (prev_stripe.stride < 0) {
926 prev_stripe.start
927 = (Xt_int)(prev_stripe.start
928 + (prev_stripe.stride * (Xt_int)(prev_stripe.nstrides - 1)));
929 prev_stripe.stride = (Xt_int)-prev_stripe.stride;
930 }
931 inter_stripes[0] = prev_stripe;
932 size_t j = 0;
933 for (size_t i = 1; i < num_inter_stripes; ++i) {
934 struct Xt_stripe stripe = inter_stripes[i];
935 if (stripe.stride < 0) {
936 stripe.start = (Xt_int)(stripe.start
937 + stripe.stride * (Xt_int)(stripe.nstrides - 1));
938 stripe.stride = (Xt_int)-stripe.stride;
939 }
940 if ((stripe.stride == prev_stripe.stride)
941 & (stripe.start
942 == (prev_stripe.start
943 + prev_stripe.stride * (Xt_int)prev_stripe.nstrides)))
944 {
945 prev_stripe.nstrides += stripe.nstrides;
946 inter_stripes[j].nstrides = prev_stripe.nstrides;
947 }
948 else
949 {
950 inter_stripes[++j] = stripe;
951 prev_stripe = stripe;
952 }
953 }
954 num_inter_stripes = j + 1;
955 }
956
957 Xt_idxlist inter = xt_idxstripes_new(inter_stripes, (int)num_inter_stripes);
958
959 free(inter_stripes);
960
961 INSTR_STOP(instr);
962 return inter;
963}
964
967 Xt_config config)
968{
969 // both lists are index stripes:
970 Xt_idxstripes idxstripes_src = (Xt_idxstripes)idxlist_src,
971 idxstripes_dst = (Xt_idxstripes)idxlist_dst;
972
973 if ((idxstripes_src->flags | idxstripes_dst->flags)
975 return compute_intersection_fallback(idxstripes_src, idxstripes_dst,
976 config);
977 } else
978 return idxstripes_compute_intersection(idxstripes_src, idxstripes_dst);
979}
980
981static inline bool
983{
984 Xt_int start = stripe.start, stride = stripe.stride;
985 if (idx >= start && stride > 0) {
986 return idx < start + stride * stripe.nstrides
987 && (idx - start) % stride == 0;
988 } else if (idx <= start && stride < 0) {
989 return idx > start + stride * stripe.nstrides
990 && (start - idx) % stride == 0;
991 } else {
992 return idx == start;
993 }
994}
995
998 Xt_idxlist idxlist_dst,
999 Xt_config XT_UNUSED(config))
1000{
1001 Xt_idxstripes idxstripes_src = (Xt_idxstripes)idxlist_src;
1002 const struct Xt_stripe *stripes_src = idxstripes_src->stripes;
1003 assert(idxlist_dst->vtable->idxlist_pack_code == VECTOR
1004 && idxlist_src->vtable == &idxstripes_vtable);
1005 const Xt_int *indices = xt_idxlist_get_indices_const(idxlist_dst);
1006 size_t ndst = (size_t)idxlist_dst->num_indices;
1007 size_t nsrc_stripes = (size_t)idxstripes_src->num_stripes,
1008 num_found_indices = 0;
1009 enum {
1010 size_t_bits = sizeof (size_t) * CHAR_BIT,
1011 };
1012 size_t *found
1013 = xcalloc(((ndst + size_t_bits - 1)/ size_t_bits),
1014 sizeof (size_t));
1015 for (size_t i = 0; i < ndst; ++i) {
1016 Xt_int dst_idx = indices[i];
1017 for (size_t j = 0; j < nsrc_stripes; ++j)
1018 if (stripe_contains_index(stripes_src[j], dst_idx)) {
1019 num_found_indices++;
1020 found[i/size_t_bits] |= (size_t)1 << (i%size_t_bits);
1021 break;
1022 }
1023 }
1024 Xt_idxlist intersection;
1025 if (num_found_indices) {
1026 struct Xt_vec_alloc vec_alloc = xt_idxvec_alloc((int)num_found_indices);
1027 Xt_int *restrict found_indices = vec_alloc.vector;
1028 for (size_t i = 0, inter_ofs = 0; i < ndst; ++i) {
1029 size_t fofs = i / size_t_bits;
1030 if (found[fofs]&((size_t)1 << (i%size_t_bits))) {
1031 found_indices[inter_ofs] = indices[i];
1032 ++inter_ofs;
1033 }
1034 }
1035 intersection = xt_idxvec_congeal(vec_alloc);
1036 } else
1037 intersection = xt_idxempty_new();
1038 free(found);
1039 return intersection;
1040}
1041
1042static Xt_idxlist
1044
1045 Xt_idxstripes stripes = (Xt_idxstripes)idxlist;
1046
1047 return xt_idxstripes_new(stripes->stripes, stripes->num_stripes);
1048}
1049
1050static inline Xt_int
1051stripe_min(struct Xt_stripe stripe, int pfx_remove)
1052{
1053 Xt_int min = stripe.start;
1054 Xt_int stride = stripe.stride;
1055 int nstrides = stripe.nstrides;
1056 Xt_int adj = (Xt_int)(stride >= 0 ? pfx_remove : nstrides - 1 - pfx_remove);
1057 return (Xt_int)(min + stride * adj);
1058}
1059
1060static int
1061stripe_min_cmp_lt(const struct Xt_stripe a, const struct Xt_stripe b,
1062 const int pfx_remove_a, const int pfx_remove_b)
1063{
1064 /* emptied stripes compare minimal to move them to end of array */
1065 if (a.nstrides - pfx_remove_a == 0)
1066 return true;
1067 if (b.nstrides - pfx_remove_b == 0)
1068 return false;
1069 Xt_int amin = stripe_min(a, pfx_remove_a),
1070 bmin = stripe_min(b, pfx_remove_b);
1071 if (amin == bmin) {
1072 amin = XT_INT_ABS(a.stride);
1073 bmin = XT_INT_ABS(b.stride);
1074 }
1075 return amin > bmin;
1076}
1077
1078#define SORT_TYPE int
1079#define SORT_TYPE_SUFFIX stripe_by_min
1080#define SORT_TYPE_CMP_LT(u,v,i,j) \
1081 (stripe_min_cmp_lt(stripes[(u)], stripes[(v)], \
1082 pfx_removed[(u)], pfx_removed[(v)]))
1083#define XT_SORT_EXTRA_ARGS_DECL , int *restrict pfx_removed, \
1084 const struct Xt_stripe stripes[]
1085#define XT_SORT_EXTRA_ARGS_PASS , pfx_removed, stripes
1086#define XT_SORTFUNC_DECL static
1087#include "xt_heapsort_base.h"
1088
1089
1090static Xt_int
1091retrieve_min(size_t num_src_stripes,
1092 const struct Xt_stripe *src_stripes,
1093 int *src_permutation,
1094 int *pfx_removed)
1095{
1096 size_t min_stripe_pos = (size_t)src_permutation[0];
1097 struct Xt_stripe minstripe = src_stripes[min_stripe_pos];
1098 Xt_int minidx;
1099 if (minstripe.stride >= 0) {
1100 minidx = (Xt_int)(minstripe.start
1101 + minstripe.stride * pfx_removed[min_stripe_pos]);
1102 } else { /* minstripe.stride < 0 */
1103 minidx = (Xt_int)(minstripe.start
1104 + (minstripe.nstrides - 1 - pfx_removed[min_stripe_pos])
1105 * minstripe.stride);
1106 }
1107 ++pfx_removed[min_stripe_pos];
1108 xt_heapify_stripe_by_min(src_permutation, num_src_stripes,
1109 0, pfx_removed, src_stripes);
1110 return minidx;
1111}
1112
1114xt_idxstripes_sort_new(size_t num_src_stripes,
1115 const struct Xt_stripe src_stripes[],
1116 Xt_config config)
1117{
1118 (void)config;
1119 int *restrict src_permutation
1120 = xmalloc(num_src_stripes * 2 * sizeof (*src_permutation)),
1121 *restrict pfx_removed = src_permutation + num_src_stripes;
1122 size_t indices_left = 0;
1123 for (size_t i = 0; i < num_src_stripes; ++i) {
1124 src_permutation[i] = (int)i;
1125 pfx_removed[i] = 0;
1126 indices_left += (size_t)src_stripes[i].nstrides;
1127 }
1128 /* this only needs a heapsort actually for the following selection */
1129 xt_heapsort_stripe_by_min(src_permutation, num_src_stripes,
1130 pfx_removed, src_stripes);
1131 /* select stripe with minimal element */
1132 struct Xt_stripe *dst_stripes
1133 = xmalloc(sizeof (*dst_stripes) * num_src_stripes);
1134 size_t num_dst_stripes = 0, size_dst_stripes = num_src_stripes;
1135 {
1136 Xt_int minidx = retrieve_min(num_src_stripes, src_stripes,
1137 src_permutation, pfx_removed);
1138 dst_stripes[0] = (struct Xt_stripe){ .start = minidx, .stride = 1,
1139 .nstrides = 1 };
1140 num_dst_stripes = 1;
1141 --indices_left;
1142 }
1143 for (; indices_left != 0; --indices_left) {
1144 /* get lowest index */
1145 Xt_int minidx = retrieve_min(num_src_stripes, src_stripes,
1146 src_permutation, pfx_removed);
1147 /* insert lowest index into destination */
1148 size_t i = num_dst_stripes - 1;
1149 if (dst_stripes[i].nstrides == 1) {
1150 dst_stripes[i].stride = (Xt_int)(minidx - dst_stripes[i].start);
1151 dst_stripes[i].nstrides = 2;
1152 } else if (minidx
1153 == (Xt_int)(
1154 dst_stripes[i].start
1155 + dst_stripes[i].stride * dst_stripes[i].nstrides)) {
1156 ++dst_stripes[i].nstrides;
1157 } else {
1158 ENSURE_ARRAY_SIZE(dst_stripes, size_dst_stripes, num_dst_stripes+1);
1159 dst_stripes[num_dst_stripes] = (struct Xt_stripe){ .start = minidx,
1160 .stride = 1,
1161 .nstrides = 1 };
1162 ++num_dst_stripes;
1163 }
1164 }
1165 /* \todo: optimize to create destination in-place */
1166 Xt_idxlist copy = xt_idxstripes_new(dst_stripes, (int)num_dst_stripes);
1167 free(dst_stripes);
1168 free(src_permutation);
1169 return copy;
1170}
1171
1172static Xt_idxlist
1174{
1175 Xt_idxstripes src = (Xt_idxstripes)idxlist;
1176 size_t num_stripes = (size_t)src->num_stripes;
1177 struct Xt_stripes_alloc stripes_alloc
1178 = idxstripes_alloc(num_stripes);
1179 struct Xt_stripe *restrict inverted_stripes = stripes_alloc.stripes;
1180 const struct Xt_stripe *src_stripes = src->stripes;
1181 for (size_t i = 0; i < num_stripes; ++i) {
1182 Xt_int src_start = src_stripes[num_stripes-1-i].start,
1183 src_stride = src_stripes[num_stripes-1-i].stride;
1184 int nstrides = src_stripes[num_stripes-1-i].nstrides;
1185 inverted_stripes[i] = (struct Xt_stripe){
1186 .start = (Xt_int)(src_start + src_stride * (nstrides-1)),
1187 .stride = (Xt_int)-src_stride,
1188 .nstrides = nstrides,
1189 };
1190 }
1191 return idxstripes_aggregate(stripes_alloc.idxstripes, __func__);
1192}
1193
1194static Xt_idxlist
1196{
1197 Xt_idxstripes src = (Xt_idxstripes)idxlist;
1198 int sort_flags = (int)((src->flags >> stripes_sort_bit) & sort_mask);
1199 sort_flags -= sort_flags < 3;
1200 Xt_idxlist sorted_idxlist;
1201 if (sort_flags == 1)
1202 sorted_idxlist = idxstripes_copy(idxlist);
1203 else if (sort_flags == -1)
1204 sorted_idxlist = idxstripes_order_invert(idxlist);
1205 else
1206 sorted_idxlist = xt_idxstripes_sort_new((size_t)src->num_stripes,
1207 src->stripes, config);
1208 return sorted_idxlist;
1209}
1210
1211static void
1213 INSTR_DEF(instr,"idxstripes_get_indices")
1214 INSTR_START(instr);
1215
1216 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1217 const struct Xt_stripe *restrict stripes = idxstripes->stripes;
1218 size_t num_stripes = (size_t)idxstripes->num_stripes, ofs = 0;
1219 for (size_t i = 0; i < num_stripes; ofs += (size_t)stripes[i].nstrides, ++i )
1220 for (size_t j = 0; j < (size_t)stripes[i].nstrides; ++j)
1221 indices[ofs+j] = (Xt_int)(stripes[i].start
1222 + (Xt_int)j * stripes[i].stride);
1223
1224 INSTR_STOP(instr);
1225}
1226
1227static Xt_int const*
1229
1230 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1231
1232 if (idxstripes->index_array_cache) return idxstripes->index_array_cache;
1233
1234 int num_indices = idxlist->num_indices;
1235 Xt_int *tmp_index_array
1236 = xmalloc((size_t)num_indices * sizeof (*tmp_index_array));
1237 idxstripes_get_indices(idxlist, tmp_index_array);
1238 return idxstripes->index_array_cache = tmp_index_array;
1239}
1240
1241static void
1243 struct Xt_stripe *restrict stripes,
1244 size_t num_stripes_alloc)
1245{
1246#ifdef NDEBUG
1247 (void)num_stripes_alloc;
1248#endif
1249 INSTR_DEF(instr,"idxstripes_get_index_stripes")
1250 INSTR_START(instr);
1251 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1252 assert((size_t)idxstripes->num_stripes <= num_stripes_alloc);
1253 memcpy(stripes, idxstripes->stripes,
1254 (size_t)idxstripes->num_stripes * sizeof (stripes[0]));
1255 INSTR_STOP(instr);
1256}
1257
1258const struct Xt_stripe *
1260{
1261 assert(idxlist->vtable == &idxstripes_vtable);
1262 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1263 return idxstripes->stripes;
1264}
1265
1266static int
1268{
1269 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1270 return idxstripes->num_stripes;
1271}
1272
1273size_t
1275{
1276 assert(idxlist->vtable == &idxstripes_vtable);
1277 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1278 return (size_t)idxstripes->num_stripes;
1279}
1280
1281static int
1283 Xt_int * index) {
1284
1285 INSTR_DEF(instr,"idxstripes_get_index_at_position")
1286 INSTR_START(instr);
1287
1288 int retval;
1289
1290 if (position >= 0 && position < idxlist->num_indices) {
1291 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1292 const struct Xt_stripe *restrict stripes = idxstripes->stripes;
1293 size_t num_stripes = (size_t)idxstripes->num_stripes;
1294 size_t i = 0;
1295 while (i < num_stripes && position >= stripes[i].nstrides) {
1296 position-= stripes[i].nstrides;
1297 ++i;
1298 }
1299 *index = (Xt_int)(stripes[i].start + position * stripes[i].stride);
1300 retval = 0;
1301 } else {
1302 retval = 1;
1303 }
1304
1305 INSTR_STOP(instr);
1306 return retval;
1307}
1308
1309
1310static int
1312 int num_pos, Xt_int *index,
1313 Xt_int undef_idx) {
1314
1315 INSTR_DEF(instr,"idxstripes_get_indices_at_positions")
1316 INSTR_START(instr);
1317
1318 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1319 const struct Xt_stripe *stripes = idxstripes->stripes;
1320
1321 int max_pos = idxlist->num_indices;
1322 int sub_pos = 0;
1323 /* initial guess which stripe to inspect for position */
1324 int istripe = 0;
1325 /* position range corresponding to stripe indexed by istripe */
1326 int stripe_start_pos = 0;
1327 int undef_count = 0;
1328
1329 for (int ipos = 0; ipos < num_pos; ipos++) {
1330
1331 int seek_pos = positions[ipos];
1332
1333 if (seek_pos >= 0 && seek_pos < max_pos) {
1334 while (seek_pos < stripe_start_pos) {
1335 istripe--;
1336#ifndef NDEBUG
1337 if (istripe < 0)
1338 die("idxstripes_get_indices_at_positions: internal error:"
1339 " crossed 0-boundary");
1340#endif
1341 stripe_start_pos -= (int)stripes[istripe].nstrides;
1342 }
1343
1344 while (seek_pos > stripe_start_pos + stripes[istripe].nstrides - 1) {
1345 stripe_start_pos += (int)stripes[istripe].nstrides;
1346 istripe++;
1347#ifndef NDEBUG
1348 if (istripe >= idxstripes->num_stripes)
1349 die("idxstripes_get_indices_at_positions: internal error:"
1350 " crossed upper boundary");
1351#endif
1352 }
1353
1354 sub_pos = seek_pos - stripe_start_pos;
1355 index[ipos]
1356 = (Xt_int)(stripes[istripe].start + sub_pos * stripes[istripe].stride);
1357 } else {
1358 index[ipos] = undef_idx;
1359 undef_count++;
1360 }
1361 }
1362
1363 INSTR_STOP(instr);
1364
1365 return undef_count;
1366}
1367
1368static int
1370 int * position) {
1371
1372 return idxstripes_get_position_of_index_off(idxlist, index, position, 0);
1373}
1374
1375static int
1377 int * position, int offset) {
1378
1379 INSTR_DEF(instr,"idxstripes_get_position_of_index_off")
1380 INSTR_START(instr);
1381
1382 int retval = 1;
1383 size_t offset_ = (offset < 0) ? (size_t)0 : (size_t)offset;
1384
1385 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1386 const struct Xt_stripes_sort *restrict stripes_sort
1387 = idxstripes->stripes_sort;
1388 const struct Xt_stripe *restrict stripes = idxstripes->stripes;
1389 if (offset_ < (size_t)idxlist->num_indices) {
1390 size_t num_stripes = (size_t)idxstripes->num_stripes, i;
1391 size_t position_offset = 0;
1392
1393 for (i = 0;
1394 position_offset + (size_t)stripes[i].nstrides <= offset_;
1395 ++i)
1396 position_offset += (size_t)stripes[i].nstrides;
1397
1398 for (; i < num_stripes;
1399 position_offset += (size_t)stripes[i].nstrides, ++i) {
1400
1401 size_t inv_pos = (size_t)stripes_sort[i].inv_position;
1402 struct Xt_minmax range = stripes_sort[inv_pos].range;
1403
1404 if (index > range.max || index < range.min)
1405 continue;
1406
1407 int rel_start;
1408 Xt_int stride = stripes[i].stride;
1409 if (index != stripes[i].start) {
1410 Xt_int index_offset = (Xt_int)(index - stripes[i].start);
1411 if (index_offset % stride)
1412 continue;
1413 rel_start = (int)(index_offset/stripes[i].stride);
1414 } else if (stride != 0)
1415 rel_start = 0;
1416 else
1417 rel_start = (int)(MAX(offset_, position_offset) - position_offset);
1418 *position = (int)((size_t)rel_start + position_offset);
1419 retval = 0;
1420 goto fun_exit;
1421 }
1422 }
1423
1424 *position = -1;
1425
1426 fun_exit: ;
1427 INSTR_STOP(instr);
1428 return retval;
1429}
1430
1431static inline void
1432append_ext(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *restrict result)
1433{
1434 size_t num_pos_exts_ = result->num_pos_ext,
1435 size_pos_exts_ = result->size_pos_ext;
1436 struct Xt_pos_ext *restrict pos_exts_ = result->pos_ext;
1437 if (!xt_can_merge_pos_ext(pos_exts_[num_pos_exts_ - 1], pos_ext))
1438 {
1439 if (num_pos_exts_ + 1 == size_pos_exts_)
1440 {
1441 size_pos_exts_ += 16;
1442 /* offsetting by 1 necessary to keep the terminator in place */
1443 result->pos_ext = pos_exts_ = (struct Xt_pos_ext *)
1444 xrealloc(pos_exts_ - 1, (size_pos_exts_ + 1) * sizeof (*pos_exts_)) + 1;
1445 result->size_pos_ext = size_pos_exts_;
1446 }
1447 pos_exts_[num_pos_exts_] = pos_ext;
1448 result->num_pos_ext = num_pos_exts_ + 1;
1449 }
1450 else {
1451 /* merge new ext with previous */
1452 pos_exts_[num_pos_exts_ - 1].size
1453 = isign(pos_ext.start - pos_exts_[num_pos_exts_ - 1].start)
1454 * (abs(pos_exts_[num_pos_exts_ - 1].size) + abs(pos_ext.size));
1455 }
1456}
1457
1460 const struct Xt_stripe *stripes;
1461 const struct Xt_stripes_sort *stripes_sort;
1465};
1466
1467static inline void
1469 Xt_idxstripes idxstripes) {
1470 const struct Xt_stripe *restrict stripes;
1471 db->stripes = stripes = idxstripes->stripes;
1472 size_t num_db_stripes = (size_t)idxstripes->num_stripes;
1473 bool stripes_do_overlap = idxstripes->flags & stripes_do_overlap_mask,
1474 stripes_do_intersect = idxstripes->flags & stripes_intersect_mask;
1475 enum {
1476 size_t_bits = sizeof (size_t) * CHAR_BIT,
1477 };
1478 size_t overlap_handling = stripes_do_overlap
1479 ? ((num_db_stripes + size_t_bits - 1)/size_t_bits+1) * sizeof (size_t) : 0,
1480 align_adjust = !stripes_do_overlap ? (num_db_stripes + 1) * sizeof (int):
1481 ((num_db_stripes + 1)*sizeof (int)+sizeof (size_t)-1)
1482 / sizeof (size_t) * sizeof (size_t);
1483 /* using num_db_stripes + 1 ensures re-aligning is always possible */
1484 int *restrict db_stripes_nstrides_psum
1485 = xmalloc(align_adjust + overlap_handling);
1486 if (stripes_do_overlap)
1487 db->stripe_candidate_bitmask
1488 = (void *)((unsigned char *)db_stripes_nstrides_psum+align_adjust);
1489 int accum = db_stripes_nstrides_psum[0] = 0;
1490 for (size_t j = 0; j < num_db_stripes; ++j) {
1491 accum += stripes[j].nstrides;
1492 db_stripes_nstrides_psum[j + 1] = accum;
1493 }
1494 db->stripes_sort = idxstripes->stripes_sort;
1495 db->num_stripes = num_db_stripes;
1496 db->stripes_nstrides_psum = db_stripes_nstrides_psum;
1497 db->stripes_do_overlap = stripes_do_overlap;
1498 db->stripes_do_intersect = stripes_do_intersect;
1499}
1500
1501static inline void
1503 free(db->stripes_nstrides_psum);
1504}
1505
1507{
1508 size_t size, num;
1509 union {
1510 int *vec;
1511 int e1;
1512 } p;
1513};
1514
1515static inline size_t
1517 const struct Xt_stripes_sort a[n],
1518 Xt_int min_key)
1519{
1520 size_t left = 0, right = n - 1; /* avoid overflow in `(left + right)/2' */
1521 if ((a && n > 0)) ; else return n; /* invalid input or empty array */
1522 while (left < right)
1523 {
1524 /* invariant: a[left].range.min <= min_key <= a[right].range.min
1525 * or not in a */
1526 /*NOTE: *intentionally* truncate for odd sum */
1527 size_t m = (left + right + 1) / 2;
1528 if (a[m].range.min > min_key)
1529 right = m - 1; /* a[left].range.min <= min_key < a[m].range.min
1530 * or min_key not in a */
1531 else
1532 left = m;/* a[m].range.min <= min_key <= a[right].range.min
1533 * or min_key not in a */
1534 }
1535 /* assert(left == right) */
1536 return a[right].range.min <= min_key ? right : n;
1537}
1538
1539
1540static void
1542 const struct Xt_stripes_lookup *restrict db,
1543 struct int_vec *candidates,
1544 bool single_match_only,
1545 struct Xt_pos_ext_vec *restrict cover,
1546 Xt_config config)
1547{
1548 (void)cover;
1549 struct Xt_minmax query_minmax = xt_stripe2minmax(query);
1550 size_t num_db_stripes = db->num_stripes;
1551 const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1552 size_t start_pos;
1553 /* find trivial match first if sensible */
1554 if (db->stripes_do_overlap) {
1555 if ((start_pos
1556 = bsearch_stripes_sort(num_db_stripes, db_stripes_sort,
1557 query_minmax.min))
1558 != num_db_stripes) {
1559 int stripes_start_pos = db_stripes_sort[start_pos].position;
1560 if (xt_stripes_eq(db->stripes[stripes_start_pos],
1561 query)
1562 && (!single_match_only
1563 || (!db->stripes_do_intersect
1564 && xt_cover_search(
1565 cover,
1566 (struct Xt_pos_range){
1567 .start = db->stripes_nstrides_psum[stripes_start_pos],
1568 .end = db->stripes_nstrides_psum[stripes_start_pos]
1569 + query.nstrides -1 }, 0) == SIZE_MAX
1570 ))) {
1571 candidates->num = 1;
1572 if (candidates->size == 0)
1573 candidates->p.e1 = stripes_start_pos;
1574 else
1575 candidates->p.vec[0] = stripes_start_pos;
1576 return;
1577 }
1578 }
1579 }
1580 start_pos
1581 = bsearch_stripes_sort(num_db_stripes, db_stripes_sort, query_minmax.max);
1582 if (start_pos != num_db_stripes) {
1583 assert(db_stripes_sort[start_pos].range.min <= query_minmax.max);
1584 size_t end_pos = start_pos;
1585 while (end_pos < num_db_stripes
1586 && db_stripes_sort[end_pos].range.min <= query_minmax.max)
1587 ++end_pos;
1588 /* find all overlaps (which is more complicated if
1589 * non-overlapping isn't guaranteed) */
1590 size_t num_candidates;
1591 if (!db->stripes_do_overlap)
1592 {
1593 while (start_pos > 0
1594 && (db_stripes_sort[start_pos - 1].range.max >= query_minmax.min))
1595 --start_pos;
1596 num_candidates = end_pos - start_pos;
1597 if (candidates->size == 0 && num_candidates == 1) {
1598 candidates->p.e1 = db_stripes_sort[start_pos].position;
1599 candidates->num = 1;
1600 return;
1601 }
1602 if (candidates->size < num_candidates) {
1603 int *prevp = (candidates->size == 0) ? NULL : candidates->p.vec;
1604 candidates->p.vec = xrealloc(prevp,
1605 num_candidates
1606 * sizeof (candidates->p.vec[0]));
1607 candidates->size = num_candidates;
1608 }
1609 candidates->num = num_candidates;
1610 int *restrict candidates_vec = candidates->p.vec;
1611 for (size_t i = 0; i < num_candidates; ++i)
1612 candidates_vec[i] = db_stripes_sort[start_pos + i].position;
1613 }
1614 else
1615 {
1616 num_candidates = 0;
1617 size_t min_candidate = start_pos;
1618 const struct Xt_stripe *stripes = db->stripes;
1619 Xt_int query_start = query.start, query_stride = XT_INT_ABS(query.stride);
1620#ifdef XT_USE_FAST_DIVISIBLE_TEST
1621 struct xt_fast_div_coeff stride_div_test_coeff;
1622 stride_div_test_coeff = get_fast_div_coeff((Xt_uint)query_stride);
1623#endif
1624 size_t *bm_store = db->stripe_candidate_bitmask;
1625 size_t i = end_pos - 1;
1626 enum {
1627 size_t_bits = sizeof (size_t) * CHAR_BIT,
1628 };
1629 size_t pred_accum = 0;
1630 if (i >= size_t_bits) {
1631 if ((i+1) % size_t_bits) {
1632 for (; (i+1) % size_t_bits; --i)
1633 {
1634 size_t predicate
1635 = db_stripes_sort[i].range.max >= query_minmax.min;
1636 size_t pos = (size_t)db_stripes_sort[i].position;
1637 predicate
1638 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1639 || is_divisible(query_stride, stride_div_test_coeff,
1640 (Xt_int)(query_start - stripes[pos].start)));
1641 num_candidates += predicate;
1642 size_t predicate_mask = predicate - 1;
1643 min_candidate = (min_candidate & predicate_mask)
1644 | (i & ~predicate_mask);
1645 pred_accum = (pred_accum << 1) | predicate;
1646 }
1647 bm_store[(i+1)/size_t_bits] = pred_accum;
1648 pred_accum = 0;
1649 }
1650 assert(i % size_t_bits == size_t_bits - 1);
1651 for (; i >= size_t_bits;) {
1652 for (size_t j = 0; j < size_t_bits; ++j, --i) {
1653 size_t predicate
1654 = db_stripes_sort[i].range.max >= query_minmax.min;
1655 size_t pos = (size_t)db_stripes_sort[i].position;
1656 predicate
1657 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1658 || is_divisible(query_stride, stride_div_test_coeff,
1659 (Xt_int)(query_start - stripes[pos].start)));
1660 num_candidates += predicate;
1661 size_t predicate_mask = predicate - 1;
1662 min_candidate = (min_candidate & predicate_mask)
1663 | (i & ~predicate_mask);
1664 pred_accum = (pred_accum << 1) | predicate;
1665 }
1666 bm_store[(i+1)/size_t_bits] = pred_accum;
1667 pred_accum = 0;
1668 }
1669 }
1670 for (; i != SIZE_MAX; --i)
1671 {
1672 size_t predicate
1673 = db_stripes_sort[i].range.max >= query_minmax.min;
1674 size_t pos = (size_t)db_stripes_sort[i].position;
1675 predicate
1676 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1677 || is_divisible(query_stride, stride_div_test_coeff,
1678 (Xt_int)(query_start - stripes[pos].start)));
1679 num_candidates += predicate;
1680 size_t predicate_mask = predicate - 1;
1681 min_candidate = (min_candidate & predicate_mask)
1682 | (i & ~predicate_mask);
1683 pred_accum = (pred_accum << 1) | predicate;
1684 }
1685 bm_store[0] = pred_accum;
1686 if (candidates->size == 0 && num_candidates == 1) {
1687 candidates->p.e1 = db_stripes_sort[min_candidate].position;
1688 candidates->num = num_candidates;
1689 return;
1690 }
1691 if (candidates->size < num_candidates+1) {
1692 int *prevp = (candidates->size == 0) ? NULL : candidates->p.vec;
1693 candidates->p.vec = xrealloc(prevp,
1694 (num_candidates + 1)
1695 * sizeof (candidates->p.vec[0]));
1696 candidates->size = num_candidates + 1;
1697 }
1698 candidates->num = num_candidates;
1699 int *restrict candidates_vec = candidates->p.vec;
1700 size_t j = 0,
1701 next_bm_mult = (min_candidate + size_t_bits - 1)/size_t_bits*size_t_bits,
1702 predicates = (bm_store[min_candidate/size_t_bits]
1703 >> (min_candidate % size_t_bits));
1704 if (num_candidates == 1) {
1705 end_pos = min_candidate+1;
1706 assert(predicates&1);
1707 }
1708 for (i = min_candidate; i < end_pos; next_bm_mult+=size_t_bits) {
1709 size_t ulim = MIN(end_pos, next_bm_mult);
1710 for (; i < ulim; ++i) {
1711 size_t predicate = predicates&1;
1712 predicates >>= 1;
1713 if (predicate)
1714 candidates_vec[j] = db_stripes_sort[i].position;
1715 j += predicate;
1716 }
1717 predicates = bm_store[i/size_t_bits];
1718 }
1719 assert(j == num_candidates);
1720 }
1721 config->sort_funcs->sort_int(candidates->p.vec, num_candidates);
1722 } else
1723 candidates->num = 0;
1724}
1725
1726static struct Xt_idxstripes_ *
1728 const struct Xt_stripe *restrict stripes)
1729{
1730 size_t expansion = 0;
1731 for (size_t i = 0; i < num_stripes; ++i) {
1732 expansion += stripes[i].stride == 0 ? (size_t)(stripes[i].nstrides - 1) : 0;
1733 }
1734 struct Xt_stripe *restrict expanded_stripes
1735 = xmalloc((num_stripes + expansion) * sizeof (expanded_stripes[0]));
1736 size_t j = 0;
1737 for (size_t i = 0; i < num_stripes; ++i) {
1738 struct Xt_stripe stripe = stripes[i];
1739 if (stripe.stride == 0) {
1740 for (size_t k = 0; k < (size_t)stripe.nstrides; ++k)
1741 expanded_stripes[j + k] = (struct Xt_stripe){ .start = stripe.start,
1742 .stride = 1,
1743 .nstrides = 1 };
1744 j += (size_t)stripe.nstrides;
1745 } else {
1746 expanded_stripes[j] = stripe;
1747 ++j;
1748 }
1749 }
1750 return (struct Xt_idxstripes_ *)
1751 xt_idxstripes_prealloc_new(expanded_stripes,
1752 (int)(num_stripes + expansion));
1753}
1754
1755
1756static size_t
1758 struct Xt_stripe query,
1759 const struct Xt_stripes_lookup *restrict db,
1760 struct Xt_pos_ext_vec *restrict result,
1761 struct Xt_pos_ext_vec *restrict cover,
1762 bool single_match_only,
1763 size_t num_candidates,
1764 int *restrict candidates);
1765
1766int
1768 Xt_idxlist idxlist,
1769 int num_stripes,
1770 const struct Xt_stripe stripes[num_stripes],
1771 int *num_ext,
1772 struct Xt_pos_ext **pos_exts,
1773 int single_match_only,
1774 Xt_config config)
1775{
1776 size_t unmatched = 0;
1777 struct Xt_pos_ext_vec result;
1778 result.num_pos_ext = 0;
1779 struct Xt_idxstripes_ *restrict idxstripes = (struct Xt_idxstripes_ *)idxlist;
1780 if (num_stripes > 0)
1781 {
1782 if (idxstripes->flags & stripes_some_have_zero_stride_mask)
1783 idxstripes = expand_zero_stripes((size_t)idxstripes->num_stripes,
1784 idxstripes->stripes);
1785 result.size_pos_ext = (size_t)MAX(MIN(idxstripes->num_stripes,
1786 num_stripes), 8);
1787 result.pos_ext = xmalloc((result.size_pos_ext + 1)
1788 * sizeof (*result.pos_ext));
1789 /* put non-concatenable *terminator* at offset -1 */
1790 result.pos_ext[0] = (struct Xt_pos_ext){.start = INT_MIN, .size = 0 };
1791 ++result.pos_ext;
1792 struct Xt_pos_ext_vec cover;
1793 xt_cover_start(&cover, result.size_pos_ext);
1794 struct Xt_stripes_lookup stripes_db;
1795 create_stripes_lookup(&stripes_db, idxstripes);
1796 struct int_vec candidates = { .size = 0, .p.vec = NULL };
1797 for (size_t i = 0; i < (size_t)num_stripes; ++i) {
1798 struct Xt_stripe query = stripes[i];
1799 int j = query.nstrides;
1800 query.nstrides = ((query.stride != 0) | (query.nstrides == 0))
1801 ? query.nstrides : 1;
1802 query.stride = (Xt_int)((query.stride != 0 && query.nstrides != 1)
1803 ? query.stride : (Xt_int)1);
1804 find_candidates(query, &stripes_db, &candidates,
1805 single_match_only, &cover, config);
1806 if (candidates.num) {
1807 do {
1809 query, &stripes_db, &result, &cover, single_match_only != 0,
1810 candidates.num, candidates.size != 0
1811 ? candidates.p.vec : &candidates.p.e1);
1812 } while ((stripes[i].stride == 0) & (--j > 0));
1813 }
1814 }
1815 if (candidates.size)
1816 free(candidates.p.vec);
1817 --(result.pos_ext);
1818 memmove(result.pos_ext, result.pos_ext + 1,
1819 sizeof (*result.pos_ext) * result.num_pos_ext);
1820 *pos_exts = xrealloc(result.pos_ext,
1821 sizeof (*result.pos_ext) * result.num_pos_ext);
1822 destroy_stripes_lookup(&stripes_db);
1823 xt_cover_finish(&cover);
1824 if ((struct Xt_idxstripes_ *)idxlist != idxstripes) {
1825 free(idxstripes->stripes);
1826 xt_idxlist_delete((Xt_idxlist)idxstripes);
1827 }
1828 }
1829 *num_ext = (int)result.num_pos_ext;
1830 return (int)unmatched;
1831}
1832
1833static size_t
1835 struct Xt_pos_ext pos_ext2add,
1836 const struct Xt_stripes_lookup *restrict db,
1837 struct Xt_pos_ext_vec *restrict result,
1838 struct Xt_pos_ext_vec *restrict cover,
1839 size_t num_candidates,
1840 int *restrict candidates);
1841
1843{
1845 struct Xt_stripe query_tail;
1846};
1847
1848static struct unmatched_tail
1850 struct Xt_stripe query,
1851 const struct Xt_stripes_lookup *restrict stripes_lookup,
1852 struct Xt_pos_ext_vec *restrict result,
1853 struct Xt_pos_ext_vec *restrict cover,
1854 bool single_match_only,
1855 size_t num_candidates,
1856 int *restrict candidates);
1857
1858static size_t
1860 struct Xt_pos_ext pos_ext2add,
1861 const struct Xt_stripes_lookup *stripes_lookup,
1862 struct Xt_pos_ext_vec *restrict result,
1863 struct Xt_pos_ext_vec *restrict cover,
1864 bool single_match_only,
1865 size_t num_candidates,
1866 int *restrict candidates)
1867{
1868 size_t unmatched = 0;
1869 if (single_match_only)
1870 unmatched += conditional_pos_ext_insert(
1871 query, pos_ext2add, stripes_lookup, result, cover,
1872 num_candidates, candidates);
1873 else
1874 append_ext(pos_ext2add, result);
1875 return unmatched;
1876}
1877
1878
1879
1880size_t
1882 struct Xt_stripe query,
1883 const struct Xt_stripes_lookup *restrict db,
1884 struct Xt_pos_ext_vec *restrict result,
1885 struct Xt_pos_ext_vec *restrict cover,
1886 bool single_match_only,
1887 size_t num_candidates,
1888 int *restrict candidates)
1889{
1890 size_t unmatched = 0;
1891 struct Xt_minmax query_minmax = xt_stripe2minmax(query);
1892 const struct Xt_stripe *restrict db_stripes = db->stripes;
1893 const int *restrict db_stripes_nstrides_psum = db->stripes_nstrides_psum;
1894 const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1895 for (size_t j = 0; j < num_candidates; ++j) {
1896 size_t unsort_pos = (size_t)candidates[j];
1897 size_t sort_pos = (size_t)db_stripes_sort[unsort_pos].inv_position;
1898 if ((query_minmax.min <= db_stripes_sort[sort_pos].range.max)
1899 & (query_minmax.max >= db_stripes_sort[sort_pos].range. min))
1900 ;
1901 else
1902 continue;
1903 struct Xt_stripe db_stripe = db_stripes[unsort_pos];
1904 Xt_int stride = query.stride;
1905 /* determine if db_stripe and query can be easily aligned */
1906 if ((stride > 0)
1907 & (stride == db_stripe.stride)
1908 & ((query.start - db_stripe.start) % stride == 0)) {
1909 /* divide query into skipped, matching and left-over parts,
1910 * where skipped and left-over are non-matching */
1911 Xt_int overlap_start = MAX(query.start, db_stripe.start);
1912 /* handle skipped query part */
1913 int skipLen = (int)((overlap_start - query.start) / stride);
1914 if (skipLen)
1915 {
1916 struct Xt_stripe query_head = {
1917 .start = query.start,
1918 .stride = skipLen > 1 ? stride : (Xt_int)1,
1919 .nstrides = skipLen };
1920 unmatched
1922 query_head, db, result, cover, single_match_only,
1923 num_candidates - j - 1, candidates + j + 1);
1924 query.start = (Xt_int)(query.start + stride * (Xt_int)skipLen);
1925 query.nstrides -= skipLen;
1926 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1927 }
1928 int db_stripe_skip
1929 = (int)((overlap_start - db_stripe.start) / stride);
1930 int overlap_nstrides
1931 = imin(query.nstrides, db_stripe.nstrides - db_stripe_skip);
1932
1933 unmatched +=
1935 (struct Xt_stripe){ .start = query.start,
1936 .stride = overlap_nstrides > 1 ? query.stride : (Xt_int)1,
1937 .nstrides = overlap_nstrides},
1938 (struct Xt_pos_ext){ .start
1939 = db_stripes_nstrides_psum[unsort_pos] + db_stripe_skip,
1940 .size = overlap_nstrides
1941 }, db, result, cover, single_match_only, num_candidates - j - 1,
1942 candidates + j + 1);
1943
1944 if (!(query.nstrides -= overlap_nstrides))
1945 goto search_finished;
1946 else {
1947 query.start = (Xt_int)(overlap_start + stride * overlap_nstrides);
1948 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1949 query_minmax = xt_stripe2minmax(query);
1950 continue;
1951 }
1952 }
1953 else if ((stride != 0)
1954 & (stride == db_stripe.stride)
1955 & ((query.start - db_stripe.start) % stride != 0)) {
1956 /* skip stripe that cannnot match at any position */
1957 } else {
1958 /* Handle complex overlap */
1959 struct unmatched_tail search_result =
1961 query, db, result, cover, single_match_only,
1962 num_candidates - j, candidates + j);
1963 unmatched += search_result.unmatched;
1964 if (!search_result.query_tail.nstrides)
1965 goto search_finished;
1966 else {
1967 query = search_result.query_tail;
1968 query_minmax = xt_stripe2minmax(query);
1969 }
1970 }
1971 }
1972 unmatched += (size_t)query.nstrides;
1973 /* query wasn't found, add all indices to unmatched */
1974search_finished:
1975 return unmatched;
1976}
1977
1978/* todo: add indices into pos_exts which are sorted by end/first of ranges */
1979static size_t
1981 struct Xt_pos_ext pos_ext2add,
1982 const struct Xt_stripes_lookup *restrict db,
1983 struct Xt_pos_ext_vec *restrict result,
1984 struct Xt_pos_ext_vec *restrict cover,
1985 size_t num_candidates,
1986 int *restrict candidates)
1987{
1988 /* single_match_only is true => never re-match positions */
1989 size_t unmatched = 0;
1990 Xt_int stride = query.stride;
1991 if (pos_ext2add.size == -1)
1992 pos_ext2add.size = 1;
1993 int querySizeMaskNeg = isign_mask(pos_ext2add.size);
1994tail_search:
1995 ;
1996 int pos_ext2add_s = pos_ext2add.start
1997 + (querySizeMaskNeg & (pos_ext2add.size + 1)),
1998 pos_ext2add_e = pos_ext2add.start
1999 + (~querySizeMaskNeg & (pos_ext2add.size - 1));
2000 struct Xt_pos_range query_range
2001 = { .start = pos_ext2add_s, .end = pos_ext2add_e };
2002 /* does overlap exist? */
2003 size_t overlap_pos =
2004 xt_cover_insert_or_overlap(cover, query_range, 0);
2005 if (overlap_pos == SIZE_MAX) {
2006 /* remaining extent does not overlap any existing one */
2007 append_ext(pos_ext2add, result);
2008 } else {
2009 struct Xt_pos_ext *restrict pos_exts_ = cover->pos_ext;
2010 int dbSizeMaskNeg = isign_mask(pos_exts_[overlap_pos].size),
2011 db_s = pos_exts_[overlap_pos].start
2012 + (dbSizeMaskNeg & (pos_exts_[overlap_pos].size + 1)),
2013 db_e = pos_exts_[overlap_pos].start
2014 + (~dbSizeMaskNeg & (pos_exts_[overlap_pos].size - 1));
2015 /* determine length of overlap parts */
2016 int lowQuerySkip = db_s - pos_ext2add_s;
2017 int lowDbSkip = -lowQuerySkip;
2018 lowQuerySkip = (int)((unsigned)(lowQuerySkip + abs(lowQuerySkip))/2);
2019 lowDbSkip = (int)((unsigned)(lowDbSkip + abs(lowDbSkip))/2);
2020 int overlapLen = MIN(db_e - db_s - lowDbSkip + 1,
2021 abs(pos_ext2add.size) - lowQuerySkip);
2022 int highQuerySkip = abs(pos_ext2add.size) - lowQuerySkip - overlapLen;
2023 /* then adjust lengths to direction of overlap (from
2024 * perspective of pos_ext2add */
2025 int querySkipLen = (~querySizeMaskNeg & lowQuerySkip)
2026 | (querySizeMaskNeg & -highQuerySkip),
2027 queryTailLen = (querySizeMaskNeg & -lowQuerySkip)
2028 | (~querySizeMaskNeg & highQuerySkip);
2029 if (querySkipLen)
2030 {
2031 int absQuerySkipLen = abs(querySkipLen);
2032 struct Xt_stripe query_skip = {
2033 .start = query.start,
2034 .stride = absQuerySkipLen > 1 ? stride : (Xt_int)1,
2035 .nstrides = absQuerySkipLen,
2036 };
2037 struct Xt_pos_ext pos_ext2add_skip = {
2038 .start = pos_ext2add.start,
2039 .size = querySkipLen
2040 };
2041 unmatched
2043 query_skip, pos_ext2add_skip, db,
2044 result, cover, num_candidates, candidates);
2045 pos_exts_ = result->pos_ext;
2046 query.start = (Xt_int)(query.start
2047 + stride * (Xt_int)absQuerySkipLen);
2048 query.nstrides -= absQuerySkipLen;
2049 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2050 pos_ext2add.start += querySkipLen;
2051 pos_ext2add.size -= querySkipLen;
2052 }
2053 /* head part of (remaining) query matches part of already inserted index
2054 * range */
2055 struct Xt_stripe query_head = {
2056 .start = query.start,
2057 .stride = abs(overlapLen) > 1 ? stride : (Xt_int)1,
2058 .nstrides = abs(overlapLen) };
2059 /* restart search for overlapping part within following ranges */
2060 unmatched
2062 query_head, db, result, cover, true,
2063 num_candidates, candidates);
2064 pos_exts_ = result->pos_ext;
2065 if (queryTailLen) {
2066 /* shorten query accordingly */
2067 query.nstrides -= abs(overlapLen);
2068 query.start = (Xt_int)(query.start + stride * (Xt_int)abs(overlapLen));
2069 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2070 int directedOverlapLen = (~querySizeMaskNeg & overlapLen)
2071 | (querySizeMaskNeg & -overlapLen);
2072 pos_ext2add.start += directedOverlapLen;
2073 pos_ext2add.size -= directedOverlapLen;
2074 goto tail_search;
2075 }
2076 /* whole range handled, return */
2077 }
2078 return unmatched;
2079}
2080
2081static Xt_int
2083 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
2084 return idxstripes->min_index;
2085}
2086
2087static Xt_int
2089 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
2090 return idxstripes->max_index;
2091}
2092
2093static int
2095{
2096 unsigned sort_flags = (((Xt_idxstripes)idxlist)->flags >> stripes_sort_bit) & sort_mask;
2097 return (int)sort_flags-(sort_flags < 3);
2098}
2099
2100
2101/* unfortunately, Cray cc 8.[56].x miscompile the following function,
2102 * hence optimizations must be disabled. Feel free to dig deeper into
2103 * the problem, but it's fixed in 8.7. */
2104#if defined _CRAYC && _RELEASE_MAJOR == 8 && _RELEASE_MINOR >= 5 && _RELEASE_MINOR < 7
2105#pragma _CRI noopt
2106#endif
2107static struct unmatched_tail
2109 struct Xt_stripe query,
2110 const struct Xt_stripes_lookup *restrict db,
2111 struct Xt_pos_ext_vec *restrict result,
2112 struct Xt_pos_ext_vec *restrict cover,
2113 bool single_match_only,
2114 size_t num_candidates,
2115 int *restrict candidates)
2116{
2117 size_t unmatched = 0;
2118 const struct Xt_stripe *restrict db_stripes = db->stripes;
2119 size_t db_stripe_pos = (size_t)candidates[0];
2120 struct Xt_stripe overlap = get_stripe_intersection(query,
2121 db_stripes[db_stripe_pos]);
2122 if (overlap.nstrides == 0)
2123 return (struct unmatched_tail){ .unmatched = 0, .query_tail = query};
2124
2125 int skipped = (int)((overlap.start - query.start)/query.stride);
2126 if (skipped)
2127 {
2129 (struct Xt_stripe){ .start = query.start,
2130 .stride = skipped > 1 ? query.stride : (Xt_int)1,
2131 .nstrides = skipped}, db, result, cover,
2132 single_match_only, num_candidates - 1, candidates + 1);
2133 query.start = (Xt_int)(query.start + skipped * query.stride);
2134 query.nstrides -= skipped;
2135 query.stride = query.nstrides > 1 ? query.stride : 1;
2136 }
2137 /* Since overlap.nstrides > 0, overlap.start always matches, but
2138 * depending on the stride the remaining overlapping indices might or might
2139 * not be consecutive in the query, in the latter case intervening
2140 * parts need to be searched for too.
2141 * Stripes of length 1 are naturally consecutive.
2142 */
2143 int db_stripe_skip
2144 = (int)((overlap.start - db_stripes[db_stripe_pos].start)
2145 / db_stripes[db_stripe_pos].stride);
2146 int db_pos = db->stripes_nstrides_psum[db_stripe_pos] + db_stripe_skip;
2147 if (((overlap.stride == query.stride)
2148 & (overlap.stride == db_stripes[db_stripe_pos].stride))
2149 | (overlap.nstrides == 1))
2150 {
2151 unmatched += pos_ext_insert(overlap, (struct Xt_pos_ext){
2152 .start = db_pos,
2153 .size = overlap.nstrides },
2154 db, result, cover, single_match_only, num_candidates - 1, candidates + 1);
2155 query.nstrides -= overlap.nstrides;
2156 query.start = (Xt_int)(query.start + overlap.nstrides * query.stride);
2157 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2158 }
2159 else if ((overlap.stride == query.stride)
2160 & (overlap.stride == -db_stripes[db_stripe_pos].stride))
2161 {
2162 /* all parts of the overlap can be used directly,
2163 but are inversely sorted in db_stripe */
2164 unmatched += pos_ext_insert(overlap, (struct Xt_pos_ext){
2165 .start = db_pos, .size = -overlap.nstrides },
2166 db, result, cover, single_match_only,
2167 num_candidates - 1, candidates + 1);
2168 query.nstrides -= overlap.nstrides;
2169 query.start = (Xt_int)(query.start + overlap.nstrides * query.stride);
2170 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2171 }
2172 else if (overlap.stride == query.stride)
2173 {
2174 /* all parts of the overlap can be used but are non-consecutive
2175 * in db_stripe */
2176 int db_step = (int)(overlap.stride/db_stripes[db_stripe_pos].stride);
2177 /* todo: try to keep (prefix of) stripe together if the
2178 * corresponding positions cannot be inserted anyway */
2179 for (int i = 0; i < overlap.nstrides; ++i, db_pos += db_step)
2180 unmatched += pos_ext_insert(
2181 (struct Xt_stripe){ (Xt_int)(overlap.start + i*overlap.stride), 1, 1 },
2182 (struct Xt_pos_ext){ .start = db_pos, .size = 1 },
2183 db, result, cover, single_match_only,
2184 num_candidates - 1, candidates + 1);
2185 query.nstrides -= overlap.nstrides;
2186 query.start = (Xt_int)(query.start + overlap.nstrides * query.stride);
2187 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2188 }
2189 else /* overlap.stride != query.stride => overlap.stride > query.stride */
2190 {
2191 /*
2192 * query.start = (Xt_int)(query.start
2193 * + overlap.stride * overlap.nstrides);
2194 */
2195 int stride_step = (int)(overlap.stride / query.stride);
2196 int db_step = (int)(overlap.stride/db_stripes[db_stripe_pos].stride);
2197 do {
2198 struct Xt_stripe consecutive_overlap = { .start = query.start,
2199 .stride = 1,
2200 .nstrides = 1 },
2201 intervening = { .start = (Xt_int)(query.start + query.stride),
2202 .stride = query.stride,
2203 .nstrides = MIN(query.nstrides - 1, stride_step - 1) };
2204 /* split off start index, then handle intervening */
2205 unmatched +=
2206 pos_ext_insert(consecutive_overlap, (struct Xt_pos_ext){
2207 .start = db_pos, .size = 1 },
2208 db, result, cover, single_match_only,
2209 num_candidates - 1, candidates + 1);
2210 db_pos += db_step;
2211 if (--query.nstrides > 0) {
2212 unmatched +=
2214 intervening, db, result, cover, single_match_only,
2215 num_candidates - 1, candidates + 1);
2216 query.nstrides -= intervening.nstrides;
2217 }
2218 query.start = (Xt_int)(query.start + query.stride * stride_step);
2219 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2220 overlap.start = (Xt_int)(overlap.start + overlap.stride);
2221 } while (--overlap.nstrides);
2222 }
2223 return (struct unmatched_tail){ .unmatched = unmatched, .query_tail = query};
2224}
2225
2226/*
2227 * Local Variables:
2228 * c-basic-offset: 2
2229 * coding: utf-8
2230 * indent-tabs-mode: nil
2231 * show-trailing-whitespace: t
2232 * require-trailing-newline: t
2233 * End:
2234 */
int MPI_Comm
Definition core.h:64
#define XT_UNUSED(x)
Definition core.h:84
#define __attribute__(x)
Definition core.h:82
#define die(msg)
Definition core.h:131
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
#define INSTR_STOP(T)
Definition instr.h:69
#define INSTR_DEF(T, S)
Definition instr.h:66
#define INSTR_START(T)
Definition instr.h:68
#define PPM_DSO_INTERNAL
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
Definition ppm_xfuncs.h:71
#define xcalloc(nmemb, size)
Definition ppm_xfuncs.h:68
#define xmalloc(size)
Definition ppm_xfuncs.h:70
const struct Xt_sort_algo_funcptr * sort_funcs
const struct xt_idxlist_vtable * vtable
struct Xt_stripe * stripes
Xt_int * index_array_cache
struct Xt_stripes_sort stripes_sort[]
struct Xt_idxlist_ parent
struct Xt_pos_ext * pos_ext
Definition xt_cover.h:61
size_t num_pos_ext
Definition xt_cover.h:60
size_t size_pos_ext
Definition xt_cover.h:60
int size
Definition xt_core.h:97
int start
Definition xt_core.h:97
void(* sort_int)(int *a, size_t n)
Xt_int stride
Definition xt_stripe.h:56
int nstrides
Definition xt_stripe.h:57
Xt_int start
Definition xt_stripe.h:55
struct Xt_stripe * stripes
struct Xt_idxstripes_ * idxstripes
const struct Xt_stripe * stripes
const struct Xt_stripes_sort * stripes_sort
size_t * stripe_candidate_bitmask
struct Xt_minmax range
size_t size
union int_vec::@034137013171073103271351047006306162101341012025 p
struct Xt_stripe query_tail
int MPI_Type_create_struct(int count, XT_MPI2_CONST int array_of_block_lengths[], XT_MPI2_CONST MPI_Aint array_of_displacements[], XT_MPI2_CONST MPI_Datatype array_of_types[], MPI_Datatype *newtype)
int MPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Datatype *newtype)
int MPI_Type_free(MPI_Datatype *datatype)
int MPI_Type_commit(MPI_Datatype *datatype)
static Xt_tword xlimul(Xt_long a, Xt_int b)
static Xt_long xlladd(Xt_long a, Xt_long b)
static Xt_long xiisub(Xt_int a, Xt_int b)
static Xt_long xi2l(Xt_int a)
static Xt_ldiv xlldiv(Xt_long a, Xt_long b)
static Xt_long xliadd(Xt_long a, Xt_int b)
static Xt_idiv xlidivu(Xt_ulong a, Xt_uint b)
static int xlicmp_lt(Xt_long a, Xt_int b)
static Xt_long xiimul(Xt_int a, Xt_int b)
static bool xl_is_in_xt_int_range(Xt_long a)
static Xt_long xlabs(Xt_long a)
static Xt_long xilsub(Xt_int a, Xt_long b)
static Xt_long xlinc(Xt_long a, bool b)
static int xlsign(Xt_long x)
static int xlicmp_le(Xt_long a, Xt_int b)
static int xlicmp_ge(Xt_long a, Xt_int b)
@ xt_int_bits
static int isign(int x)
static int isign_mask(int x)
static Xt_int Xt_isign_mask(Xt_int x)
static int xinlz(Xt_uint v)
static int imin(int a, int b)
static Xt_int Xt_isign(Xt_int x)
static long long llsign(long long x)
#define is_divisible(divisor, coeff, dividend)
struct Xt_config_ * Xt_config
Definition xt_config.h:58
implementation of configuration object
#define XT_CONFIG_GET_FORCE_NOSORT(config)
base definitions header file
int xt_initialized(void)
#define Xt_int_dt
Definition xt_core.h:73
XT_INT Xt_int
Definition xt_core.h:72
struct Xt_idxlist_ * Xt_idxlist
Definition xt_core.h:84
unsigned XT_INT Xt_uint
Definition xt_core.h:74
size_t xt_cover_insert_or_overlap(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range range, size_t search_start_pos)
Definition xt_cover.c:161
void xt_cover_finish(struct Xt_pos_ext_vec *restrict cover)
Definition xt_cover.c:69
void xt_cover_start(struct Xt_pos_ext_vec *restrict cover, size_t initial_size)
Definition xt_cover.c:60
size_t xt_cover_search(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range query, size_t search_start_pos)
Definition xt_cover.c:100
#define MAX(a, b)
Definition xt_cuda.c:68
macros to create heapsort implementations
static size_t left(size_t i)
static size_t right(size_t i)
Xt_idxlist xt_idxempty_new(void)
void xt_idxlist_get_index_stripes_keep_buf(Xt_idxlist idxlist, struct Xt_stripe *stripes, size_t num_stripes_alloc)
Definition xt_idxlist.c:147
index list declaration
const Xt_int * xt_idxlist_get_indices_const(Xt_idxlist idxlist)
Definition xt_idxlist.c:119
int xt_idxlist_get_num_index_stripes(Xt_idxlist idxlist)
Definition xt_idxlist.c:129
void xt_idxlist_delete(Xt_idxlist idxlist)
Definition xt_idxlist.c:75
Provide non-public declarations common to all index lists.
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
@ VECTOR
@ STRIPES
static int idxstripes_get_indices_at_positions(Xt_idxlist idxlist, const int *positions, int num, Xt_int *index, Xt_int undef_idx)
static struct unmatched_tail idxstripes_complex_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static int idxstripes_get_sorting(Xt_idxlist idxlist)
const struct Xt_stripe * xt_idxstripes_get_index_stripes_const(Xt_idxlist idxlist)
static void idxstripes_delete(Xt_idxlist data)
static int idxstripes_get_num_index_stripes(Xt_idxlist idxlist)
void xt_idxstripes_initialize(void)
struct Xt_stripes_alloc xt_idxstripes_alloc(size_t num_stripes)
static struct Xt_stripe get_stripe_intersection(struct Xt_stripe stripe_a, struct Xt_stripe stripe_b)
static int idxstripes_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
#define MIN(a, b)
static void create_stripes_lookup(struct Xt_stripes_lookup *restrict db, Xt_idxstripes idxstripes)
Xt_idxlist xt_idxstripes_sort_new(size_t num_src_stripes, const struct Xt_stripe src_stripes[], Xt_config config)
static size_t bsearch_stripes_sort(size_t n, const struct Xt_stripes_sort a[n], Xt_int min_key)
Xt_idxlist xt_idxstripes_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
static struct extended_gcd extended_gcd(Xt_int a, Xt_int b)
void xt_idxstripes_finalize(void)
Xt_idxlist xt_idxstripes_prealloc_new(const struct Xt_stripe *stripes, int num_stripes)
Xt_idxlist xt_idxstripes_from_idxlist_new(Xt_idxlist idxlist_src)
static void idxstripes_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static Xt_idxlist idxstripes_order_invert(Xt_idxlist idxlist)
static int idxstripes_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static void idxstripes_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe *restrict stripes, size_t num_stripes_alloc)
PPM_DSO_INTERNAL Xt_idxlist xt_idxstripes_get_idxvec_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config XT_UNUSED(config))
static size_t pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static void idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static Xt_idxlist idxstripes_sorted_copy(Xt_idxlist idxlist, Xt_config config)
Xt_idxlist xt_idxstripes_congeal(struct Xt_stripes_alloc stripes_alloc)
static struct Xt_idxstripes_ * expand_zero_stripes(size_t num_stripes, const struct Xt_stripe *restrict stripes)
static int idxstripes_get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, struct Xt_pos_ext **pos_ext, int single_match_only, Xt_config config)
static int idxstripes_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static const Xt_int * idxstripes_get_indices_const(Xt_idxlist idxlist)
struct Xt_idxstripes_ * Xt_idxstripes
static void destroy_stripes_lookup(struct Xt_stripes_lookup *restrict db)
size_t xt_idxstripes_get_num_index_stripes(Xt_idxlist idxlist)
static void append_ext(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *restrict result)
static const struct xt_idxlist_vtable idxstripes_vtable
static Xt_int stripe_min(struct Xt_stripe stripe, int pfx_remove)
static Xt_idxlist idxstripes_compute_intersection(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst)
idxstripes_flag_bits
@ stripes_sort_bit
@ stripes_intersect_bit
@ stripes_do_overlap_bit
@ stripes_some_have_zero_stride_bit
static bool stripe_contains_index(struct Xt_stripe stripe, Xt_int idx)
static size_t idxstripes_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static Xt_idxlist compute_intersection_fallback(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst, Xt_config config)
Xt_idxlist xt_idxstripes_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
Xt_idxlist xt_idxstripes_new(struct Xt_stripe const *stripes, int num_stripes)
static Xt_idxlist idxstripes_copy(Xt_idxlist idxlist)
static size_t idxstripes_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static struct Xt_stripes_alloc idxstripes_alloc(size_t num_stripes)
static int stripe_min_cmp_lt(const struct Xt_stripe a, const struct Xt_stripe b, const int pfx_remove_a, const int pfx_remove_b)
static Xt_int idxstripes_get_max_index(Xt_idxlist idxlist)
static Xt_int idxstripes_get_min_index(Xt_idxlist idxlist)
idxstripes_flag_mask
@ stripes_some_have_zero_stride_mask
@ stripes_intersect_mask
@ stripes_do_overlap_mask
static Xt_idxlist idxstripes_aggregate(Xt_idxstripes idxstripes, const char *caller)
static void find_candidates(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct int_vec *candidates, bool single_match_only, struct Xt_pos_ext_vec *restrict cover, Xt_config config)
static size_t conditional_pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, size_t num_candidates, int *restrict candidates)
static MPI_Datatype stripe_dt
#define MAX(a, b)
static Xt_int retrieve_min(size_t num_src_stripes, const struct Xt_stripe *src_stripes, int *src_permutation, int *pfx_removed)
static bool xt_can_merge_pos_ext(struct Xt_pos_ext a, struct Xt_pos_ext b)
struct Xt_vec_alloc xt_idxvec_alloc(int num_indices)
Definition xt_idxvec.c:200
Xt_idxlist xt_idxvec_congeal(struct Xt_vec_alloc vec_alloc)
Definition xt_idxvec.c:284
macros to create mergesort implementations, 4 way top-down method
utility routines for MPI
#define xt_mpi_call(call, comm)
Definition xt_mpi.h:68
size_t xt_stripes_merge_copy(size_t num_stripes, struct Xt_stripe *stripes_dst, const struct Xt_stripe *stripes_src, bool lookback)
Definition xt_stripe.c:163
bool xt_stripes_detect_duplicate(size_t num_stripes, const struct Xt_stripe stripes[num_stripes], struct Xt_minmax index_range)
Definition xt_stripe.c:237
static struct Xt_minmax xt_stripe2minmax(struct Xt_stripe stripe)
@ sort_mask
#define XT_SORT_FLAGS(ntrans_up, ntrans_dn)
static int xt_stripes_eq(struct Xt_stripe a, struct Xt_stripe b)