Yet Another eXchange Tool 0.11.1
Loading...
Searching...
No Matches
xt_idxstripes.c
Go to the documentation of this file.
1
12/*
13 * Keywords:
14 * Maintainer: Jörg Behrens <behrens@dkrz.de>
15 * Moritz Hanke <hanke@dkrz.de>
16 * Thomas Jahns <jahns@dkrz.de>
17 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met:
22 *
23 * Redistributions of source code must retain the above copyright notice,
24 * this list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
29 *
30 * Neither the name of the DKRZ GmbH nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 */
46#ifdef HAVE_CONFIG_H
47#include <config.h>
48#endif
49
50#include <assert.h>
51#include <limits.h>
52#include <stdbool.h>
53#include <stdlib.h>
54#include <stdio.h>
55#include <string.h>
56
57#include "xt/xt_core.h"
58#include "xt_arithmetic_util.h"
59#include "xt_arithmetic_long.h"
60#include "xt/xt_idxlist.h"
61#include "xt_idxlist_internal.h"
62#include "xt/xt_idxempty.h"
63#include "xt/xt_idxvec.h"
64#include "xt_idxvec_internal.h"
65#include "xt/xt_idxstripes.h"
67#include "xt_stripe_util.h"
68#include "xt/xt_mpi.h"
69#include "xt_idxlist_unpack.h"
70#include "xt_cover.h"
71#include "core/core.h"
72#include "core/ppm_xfuncs.h"
73#include "ensure_array_size.h"
74#include "instr.h"
75#include "xt_config_internal.h"
76
77#define MIN(a,b) (((a)<(b))?(a):(b))
78#define MAX(a,b) (((a)>(b))?(a):(b))
79
80static void
82
83static size_t
85
86static void
87idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size,
88 int *position, MPI_Comm comm);
89
90static Xt_idxlist
92
93static Xt_idxlist
95
96static void
98
99static const Xt_int *
101
102static int
104
105static void
107 struct Xt_stripe *restrict stripes,
108 size_t num_stripes_alloc);
109
110static int
111idxstripes_get_index_at_position(Xt_idxlist idxlist, int position,
112 Xt_int * index);
113
114static int
115idxstripes_get_indices_at_positions(Xt_idxlist idxlist, const int *positions,
116 int num, Xt_int *index,
117 Xt_int undef_idx);
118static int
120 Xt_idxlist idxlist,
121 int num_stripes, const struct Xt_stripe stripes[num_stripes],
122 int *num_ext, struct Xt_pos_ext **pos_ext,
123 int single_match_only,
124 Xt_config config);
125
126static int
128 int * position);
129
130static int
132 int * position, int offset);
133
134static Xt_int
136
137static Xt_int
139
140static int
142
145 .get_pack_size = idxstripes_get_pack_size,
146 .pack = idxstripes_pack,
147 .copy = idxstripes_copy,
148 .sorted_copy = idxstripes_sorted_copy,
149 .get_indices = idxstripes_get_indices,
150 .get_indices_const = idxstripes_get_indices_const,
151 .get_num_index_stripes = idxstripes_get_num_index_stripes,
152 .get_index_stripes = idxstripes_get_index_stripes,
153 .get_index_at_position = idxstripes_get_index_at_position,
154 .get_indices_at_positions = idxstripes_get_indices_at_positions,
155 .get_position_of_index = idxstripes_get_position_of_index,
156 .get_positions_of_indices = NULL,
157 .get_pos_exts_of_index_stripes = idxstripes_get_pos_exts_of_index_stripes,
158 .get_position_of_index_off = idxstripes_get_position_of_index_off,
159 .get_positions_of_indices_off = NULL,
160 .get_min_index = idxstripes_get_min_index,
161 .get_max_index = idxstripes_get_max_index,
162 .get_sorting = idxstripes_get_sorting,
163 .get_bounding_box = NULL,
164 .idxlist_pack_code = STRIPES,
165};
166
167static MPI_Datatype stripe_dt;
168
169#if __GNUC__ >= 11
170__attribute__ ((access (none, 1)))
171int MPI_Get_address(XT_MPI_SEND_BUF_CONST void *location, MPI_Aint *address);
172#endif
173
174void
176{
177 struct Xt_stripe stripe;
178
179 MPI_Aint base_address, nstrides_address;
180
181 MPI_Get_address(&stripe, &base_address);
182 MPI_Get_address(&stripe.nstrides, &nstrides_address);
183
184 enum { num_stripe_dt_elems = 2 };
185 static const int block_lengths[num_stripe_dt_elems] = {2,1};
186 MPI_Aint displacements[num_stripe_dt_elems]
187 = { 0, nstrides_address - base_address };
188 static const MPI_Datatype types[num_stripe_dt_elems]
189 = { Xt_int_dt, MPI_INT };
190 MPI_Datatype stripe_dt_unaligned;
191
192 xt_mpi_call(MPI_Type_create_struct(num_stripe_dt_elems,
193 (int *)block_lengths, displacements,
194 (MPI_Datatype *)types,
195 &stripe_dt_unaligned), Xt_default_comm);
196 xt_mpi_call(MPI_Type_create_resized(stripe_dt_unaligned, 0,
197 (MPI_Aint)sizeof(stripe),
198 &stripe_dt), Xt_default_comm);
199 xt_mpi_call(MPI_Type_free(&stripe_dt_unaligned), Xt_default_comm);
200 xt_mpi_call(MPI_Type_commit(&stripe_dt), Xt_default_comm);
201}
202
203void
205{
206 xt_mpi_call(MPI_Type_free(&stripe_dt), Xt_default_comm);
207}
208
210
212 struct Xt_minmax range; /* minimal and maximal position of stripe */
213 int position; /* position of stripe this range
214 * corresponds to (permutation
215 * obtained from sorting) */
216 int inv_position; /* position in stripes_sort when
217 * indexed with unsorted index */
218};
219
231 /* todo: exploit newly introduced sorting information to speed up
232 * index position lookups */
237};
238
244
257
258#define SORT_TYPE struct Xt_stripes_sort
259#define SORT_TYPE_SUFFIX stripes_sort
260#define XT_SORTFUNC_DECL static
261#define SORT_TYPE_CMP_LT(a,b,...) ((a).range.min < (b).range.min)
262#define SORT_TYPE_CMP_LE(a,b,...) ((a).range.min <= (b).range.min)
263#define SORT_TYPE_CMP_EQ(a,b,...) ((a).range.min == (b).range.min)
264#include "xt_mergesort_base.h"
265#undef SORT_TYPE
266#undef SORT_TYPE_SUFFIX
267#undef SORT_TYPE_CMP_LT
268#undef SORT_TYPE_CMP_LE
269#undef SORT_TYPE_CMP_EQ
270#undef XT_SORTFUNC_DECL
271
272static Xt_idxlist
274 const char *caller);
275
278{
279 return idxstripes_aggregate(stripes_alloc.idxstripes, __func__);
280}
281
282
283static Xt_idxlist
285 const char *caller)
286{
287 const struct Xt_stripe *restrict stripes = idxstripes->stripes;
288 struct Xt_stripes_sort *restrict stripes_sort = idxstripes->stripes_sort;
289 int ntrans_up=0, ntrans_dn=0;
290 Xt_int min, max, prev_end;
291 long long num_indices;
292 {
293 int nstrides = stripes[0].nstrides;
294 Xt_int stride = stripes[0].stride;
295 prev_end = (Xt_int)(stripes[0].start
296 + stride * (nstrides-1));
297 ntrans_up += ((nstrides - 1) & ~((stride > 0)-1));
298 ntrans_dn += ((nstrides - 1) & ~((stride < 0)-1));
299 num_indices = (long long)nstrides;
300 struct Xt_minmax stripe_range = xt_stripe2minmax(stripes[0]);
301 stripes_sort[0].range = stripe_range;
302 stripes_sort[0].position = 0;
303 min = stripe_range.min;
304 max = stripe_range.max;
305 }
306 size_t num_stripes = (size_t)idxstripes->num_stripes;
307 int sign_err = stripes[0].nstrides;
308 unsigned have_zero_stride = stripes[0].stride == 0,
309 stripes_intersect = have_zero_stride & (stripes[0].nstrides > 1);
310 for (size_t i = 1; i < num_stripes; ++i) {
311 struct Xt_minmax stripe_range = xt_stripe2minmax(stripes[i]);
312 int nstrides = stripes[i].nstrides;
313 Xt_int start = stripes[i].start, stride = stripes[i].stride;
314 stripes_sort[i].range = stripe_range;
315 stripes_sort[i].position = (int)i;
316 min = MIN(stripe_range.min, min);
317 max = MAX(stripe_range.max, max);
318 num_indices += (long long)nstrides;
319 sign_err |= nstrides;
320 have_zero_stride |= stride == 0;
321 stripes_intersect |= (stride == 0 && nstrides > 1);
322 ntrans_up += (start > prev_end) + ((nstrides - 1) & ~((stride > 0)-1));
323 ntrans_dn += (start < prev_end) + ((nstrides - 1) & ~((stride < 0)-1));
324 prev_end = (Xt_int)(start + stride * (nstrides-1));
325 }
326 /* test sign bit */
327 if (sign_err < 0) {
328 static const char template[] = "ERROR: %s called with invalid stripes";
329 size_t buf_size = sizeof (template) - 2 + strlen(caller);
330 char *msg = xmalloc(buf_size);
331 snprintf(msg, buf_size, template, caller);
332 die(msg);
333 }
334 if (num_indices > 0) {
335 assert(num_indices <= INT_MAX);
336 xt_mergesort_stripes_sort(stripes_sort, num_stripes);
337 stripes_sort[stripes_sort[0].position].inv_position = 0;
338 unsigned stripes_do_overlap = 0;
339 for (size_t i = 1; i < num_stripes; ++i) {
340 stripes_do_overlap
341 |= stripes_sort[i - 1].range.max >= stripes_sort[i].range.min;
342 stripes_sort[stripes_sort[i].position].inv_position = (int)i;
343 }
344 stripes_intersect = stripes_intersect
345 || (stripes_do_overlap
347 num_stripes, stripes, (struct Xt_minmax){ .min=min, .max=max }));
348 idxstripes->flags = XT_SORT_FLAGS(ntrans_up, ntrans_dn) << stripes_sort_bit
349 | stripes_do_overlap << stripes_do_overlap_bit
350 | have_zero_stride << stripes_some_have_zero_stride_bit
351 | stripes_intersect << stripes_intersect_bit;
352 idxstripes->min_index = min;
353 idxstripes->max_index = max;
354 idxstripes->index_array_cache = NULL;
355 Xt_idxlist_init(&idxstripes->parent, &idxstripes_vtable, (int)num_indices);
356 return (Xt_idxlist)idxstripes;
357 } else {
358 free(idxstripes);
359 return xt_idxempty_new();
360 }
361}
362
363static struct Xt_stripes_alloc
364idxstripes_alloc(size_t num_stripes);
365
366struct Xt_stripes_alloc
367xt_idxstripes_alloc(size_t num_stripes)
368{
369 return idxstripes_alloc(num_stripes);
370}
371
372static struct Xt_stripes_alloc
373idxstripes_alloc(size_t num_stripes)
374{
375 size_t header_size = ((sizeof (struct Xt_idxstripes_)
376 + (sizeof (struct Xt_stripes_sort)
377 * (size_t)num_stripes)
378 + sizeof (struct Xt_stripe) - 1)
379 / sizeof (struct Xt_stripe))
380 * sizeof (struct Xt_stripe),
381 body_size = sizeof (struct Xt_stripe) * (size_t)num_stripes;
382 Xt_idxstripes idxstripes = xmalloc(header_size + body_size);
383 struct Xt_stripe *stripes_assign
384 = (struct Xt_stripe *)(void *)((unsigned char *)idxstripes + header_size);
385 assert(num_stripes <= INT_MAX);
386 idxstripes->num_stripes = (int)num_stripes;
387 idxstripes->stripes = stripes_assign;
388 return (struct Xt_stripes_alloc){ .idxstripes = idxstripes,
389 .stripes = stripes_assign };
390}
391
392
394xt_idxstripes_new(struct Xt_stripe const * stripes, int num_stripes) {
395 INSTR_DEF(instr,"xt_idxstripes_new")
396 INSTR_START(instr);
397 // ensure that yaxt is initialized
398 assert(xt_initialized());
399
400 Xt_idxlist result;
401
402 if (num_stripes > 0) {
403 struct Xt_stripes_alloc stripes_alloc
404 = idxstripes_alloc((size_t)num_stripes);
405 stripes_alloc.idxstripes->num_stripes
406 = (int)xt_stripes_merge_copy((size_t)num_stripes,
407 stripes_alloc.stripes,
408 stripes, false);
409 result = idxstripes_aggregate(stripes_alloc.idxstripes, __func__);
410 } else
411 result = xt_idxempty_new();
412 INSTR_STOP(instr);
413 return result;
414}
415
418 // ensure that yaxt is initialized
419 assert(xt_initialized());
420 Xt_idxlist result;
421 if (idxlist_src->num_indices > 0) {
422 size_t num_stripes = (size_t)xt_idxlist_get_num_index_stripes(idxlist_src);
423 struct Xt_stripes_alloc stripes_alloc
424 = idxstripes_alloc(num_stripes);
425 xt_idxlist_get_index_stripes_keep_buf(idxlist_src, stripes_alloc.stripes,
426 num_stripes);
427 result = idxstripes_aggregate(stripes_alloc.idxstripes, __func__);
428 } else
429 result = xt_idxempty_new();
430 return result;
431}
432
433
434
436xt_idxstripes_prealloc_new(const struct Xt_stripe *stripes, int num_stripes)
437{
438 Xt_idxlist result;
439
440 if (num_stripes > 0) {
441 size_t header_size = ((sizeof (struct Xt_idxstripes_)
442 + ((size_t)num_stripes
443 * sizeof (struct Xt_stripes_sort))
444 + sizeof (struct Xt_stripe) - 1)
445 / sizeof (struct Xt_stripe))
446 * sizeof (struct Xt_stripe);
447 Xt_idxstripes idxstripes = xmalloc(header_size);
448 idxstripes->num_stripes = num_stripes;
449 /* this assignment is safe because no methods modify the pointee */
450 idxstripes->stripes = (struct Xt_stripe *)stripes;
451 result = idxstripes_aggregate(idxstripes, __func__);
452 } else
453 result = xt_idxempty_new();
454 return result;
455}
456
457
458static void
460
461 if (data == NULL) return;
462
463 Xt_idxstripes stripes = (Xt_idxstripes)data;
464
465 free(stripes->index_array_cache);
466 free(stripes);
467}
468
469static size_t
471
472 Xt_idxstripes stripes = (Xt_idxstripes)data;
473
474 int size_header, size_stripes = 0;
475
476 xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
477 if (stripes->num_stripes)
478 xt_mpi_call(MPI_Pack_size(stripes->num_stripes, stripe_dt, comm,
479 &size_stripes), comm);
480
481 return (size_t)size_header + (size_t)size_stripes;
482}
483
484static void
485idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size,
486 int *position, MPI_Comm comm) {
487 INSTR_DEF(instr,"idxstripes_pack")
488 INSTR_START(instr);
489
490 assert(data);
491 Xt_idxstripes stripes = (Xt_idxstripes)data;
492 int header[2] = { STRIPES, stripes->num_stripes };
493
494 xt_mpi_call(MPI_Pack(header, 2, MPI_INT, buffer,
495 buffer_size, position, comm), comm);
496 int num_stripes = stripes->num_stripes;
497 if (num_stripes)
498 xt_mpi_call(MPI_Pack(stripes->stripes, num_stripes, stripe_dt,
499 buffer, buffer_size, position, comm), comm);
500 INSTR_STOP(instr);
501}
502
503Xt_idxlist xt_idxstripes_unpack(void *buffer, int buffer_size, int *position,
504 MPI_Comm comm) {
505
506 INSTR_DEF(instr,"xt_idxstripes_unpack")
507 INSTR_START(instr);
508
509 int num_stripes;
510 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
511 &num_stripes, 1, MPI_INT, comm), comm);
512
513 Xt_idxlist result;
514 assert(num_stripes);
515 struct Xt_stripes_alloc stripes_alloc =
516 idxstripes_alloc((size_t)num_stripes);
517
518 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
519 stripes_alloc.stripes,
520 num_stripes, stripe_dt, comm),comm);
521 result = idxstripes_aggregate(stripes_alloc.idxstripes, __func__);
522 INSTR_STOP(instr);
523 return result;
524}
525
526#define XT_IDXSTRIPES_POS_EXT_MAP_COUNT
528#undef XT_IDXSTRIPES_POS_EXT_MAP_COUNT
530
531static Xt_idxlist
533 Xt_idxstripes idxstripes_dst,
534 Xt_config config)
535{
536 INSTR_DEF(instr,"compute_intersection_fallback")
537 INSTR_START(instr);
538 Xt_idxlist intersection;
539 int num_ext;
540 struct Xt_pos_ext *pos_exts;
541
542 int num_unmatched =
544 &idxstripes_src->parent,
545 idxstripes_dst->num_stripes,
546 idxstripes_dst->stripes,
547 &num_ext, &pos_exts, false,
548 config);
549 (void)num_unmatched;
550 if (num_ext > 0) {
551 /* 1. count index stripes correponding to pos_exts */
552 size_t num_result_stripes
553 = get_mapped_stripes_count((size_t)num_ext, pos_exts, idxstripes_src);
554 /* 2. allocate result idxstripes */
555 struct Xt_stripes_alloc intersection_alloc
556 = idxstripes_alloc(num_result_stripes);
557 /* 3. compute result idxstripes */
558 map_ext2stripes((size_t)num_ext, pos_exts, idxstripes_src,
559 intersection_alloc.stripes);
560 /* 4. congeal */
561 Xt_idxlist intersection_unsorted
562 = idxstripes_aggregate(intersection_alloc.idxstripes, __func__);
564 || idxstripes_get_sorting(intersection_unsorted) == 1) {
565 intersection = intersection_unsorted;
566 } else {
567 intersection = idxstripes_sorted_copy(intersection_unsorted, config);
568 xt_idxlist_delete(intersection_unsorted);
569 }
570 free(pos_exts);
571 } else {
572 intersection = xt_idxempty_new();
573 }
574 INSTR_STOP(instr);
575 return intersection;
576}
577
578
581};
582
583/* computes egcd of two positive integers a and b such that
584 egcd.gcd == egcd.u * a + egcd.v * b */
585static inline struct extended_gcd extended_gcd(Xt_int a, Xt_int b) {
586 Xt_int t = 1, u = 1, v = 0, s = 0;
587 while (b>0)
588 {
589 Xt_int q = (Xt_int)(a / b);
590 Xt_int prev_a = a;
591 a = b;
592 b = (Xt_int)(prev_a - q * b);
593 Xt_int prev_u = u;
594 u = s;
595 s = (Xt_int)(prev_u - q * s);
596 Xt_int prev_v = v;
597 v = t;
598 t = (Xt_int)(prev_v - q * t);
599 }
600 return (struct extended_gcd){ .gcd = a, .u = u, .v = v };
601}
602
603/* This implementation uses the method outlined in
604 * James M. Stichnoth,
605 * Efficient Compilation of Array Statements for Private Memory Multicomputers
606 * February, 1993 CMU-CS-93-109
607 */
608static struct Xt_stripe
610 struct Xt_stripe stripe_b) {
611
612 INSTR_DEF(instr,"get_stripe_intersection")
613 INSTR_START(instr);
614
615 struct Xt_bounded_stripe {
616 Xt_int min, max, stride, representative;
617 };
618
619 struct Xt_bounded_stripe bsa, bsb, bsi;
620
621 Xt_int stride_zero_mask_a = (stripe_a.stride != 0) - 1,
622 stride_zero_mask_b = (stripe_b.stride != 0) - 1;
623 {
624 Xt_int mask = Xt_isign_mask(stripe_a.stride);
625 bsa.min = (Xt_int)(stripe_a.start
626 + (mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
627 bsa.max = (Xt_int)(stripe_a.start
628 + (~mask & (stripe_a.stride * (stripe_a.nstrides - 1))));
629 }
630 bsa.representative = stripe_a.start;
631 {
632 Xt_int mask = Xt_isign_mask(stripe_b.stride);
633 bsb.min = (Xt_int)(stripe_b.start
634 + (mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
635 bsb.max = (Xt_int)(stripe_b.start
636 + (~mask & (stripe_b.stride * (stripe_b.nstrides - 1))));
637 }
638 bsb.representative = stripe_b.start;
639
640 bsa.stride = (Xt_int)((stripe_a.stride & ~stride_zero_mask_a)
641 | (stride_zero_mask_a & 1));
642 bsb.stride = (Xt_int)((stripe_b.stride & ~stride_zero_mask_b)
643 | (stride_zero_mask_b & 1));
644
645 /* FIXME: adjust the one further away from 0 */
646 /* adjust second representative to minimize difference to first representative */
647 Xt_int abs_stride_b = XT_INT_ABS(bsb.stride);
648#ifdef XT_LONG
649 Xt_long start_diff = (Xt_long)stripe_a.start - stripe_b.start;
650 bsb.representative
651 = (Xt_int)(bsb.representative
652 + (start_diff/abs_stride_b
653 + (start_diff%abs_stride_b > abs_stride_b/2))
654 * abs_stride_b);
655 start_diff = (Xt_long)bsb.representative - bsa.representative;
656#else
657 Xt_long start_diff = xiisub(stripe_a.start, stripe_b.start);
658 Xt_idiv start_diff_abs_stride_b_div
659 = xlidivu(xlabs(start_diff), (Xt_uint)abs_stride_b);
660 bsb.representative
661 = (Xt_int)(bsb.representative
662 + (start_diff_abs_stride_b_div.quot
663 + start_diff_abs_stride_b_div.rem > abs_stride_b/2)
664 * abs_stride_b);
665 start_diff = xiisub(bsb.representative, bsa.representative);
666#endif
667 Xt_int abs_stride_a = XT_INT_ABS(bsa.stride);
668 struct extended_gcd eg = extended_gcd(abs_stride_a, abs_stride_b);
669 bsi.min = MAX(bsa.min, bsb.min);
670 bsi.max = MIN(bsa.max, bsb.max);
671 /* we need to temporarily represent the product
672 * (bsb.representative - bsa.representative) * eg.u * abs_stride_a
673 * so we figure out first, if that product would overflow Xt_int
674 */
675 int stride_a_bits = xt_int_bits - xinlz((Xt_uint)abs_stride_a),
676 stride_bits = xt_int_bits - xinlz((Xt_uint)abs_stride_b) + stride_a_bits,
677 product_bits;
678 if (eg.u && bsb.representative != bsa.representative)
679 product_bits =
680 xt_int_bits - xinlz((Xt_uint)XT_INT_ABS(eg.u))
681#ifdef XT_LONG
682 + xt_int_bits - xinlz((Xt_uint)xlabs(start_diff))
683#else
684 + xt_int_bits - xinlz(xlabs(start_diff).lo)
685#endif
686 + stride_a_bits;
687 else
688 product_bits = 0;
689 int strides_mask, nstrides;
690 struct Xt_stripe intersection;
691 if (product_bits < xt_int_bits && stride_bits < xt_int_bits) {
692 Xt_int temp_stride = (Xt_int)((abs_stride_a * bsb.stride)/eg.gcd);
693 bsi.stride = (Xt_int)temp_stride;
694 Xt_int min_rep;
695 {
696 Xt_int some_rep
697 = (Xt_int)(bsa.representative
698 + ((bsb.representative - bsa.representative) * eg.u
699 * abs_stride_a / eg.gcd));
700 /* compute minimal bsi representative >= bsi.min */
701 Xt_int abs_bsi_stride = XT_INT_ABS(temp_stride),
702 r_diff = (Xt_int)(bsi.min - some_rep),
703 steps = (Xt_int)(r_diff / abs_bsi_stride);
704 steps = (Xt_int)(steps + (steps * abs_bsi_stride < r_diff));
705 min_rep = (Xt_int)(some_rep + steps * abs_bsi_stride);
706 bsi.representative = (Xt_int)min_rep;
707 }
708 nstrides = (int)((bsi.max - min_rep)/temp_stride + llsign(temp_stride));
709 int even_divide
710 = ((((bsb.representative - bsa.representative) % eg.gcd) == 0)
711 & (bsi.stride == temp_stride || abs(nstrides) == 1));
712 /* requires two's complement integers */
713 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
714 & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
715 Xt_int max_rep
716 = (Xt_int)(min_rep + (nstrides - llsign(temp_stride)) * bsi.stride);
717 intersection.start = (Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
718 } else {
719#ifdef XT_LONG
720#define ABS(x) (((x) < 0) ? -(x) : (x))
721 Xt_long temp_stride = xiimul(abs_stride_a, bsb.stride)/eg.gcd;
722 bsi.stride = (Xt_int)temp_stride;
723 Xt_long min_rep;
724 /* did the computation of bsi.stride remain in Xt_int range? */
725 bool bsi_stride_in_range = xl_is_in_xt_int_range(temp_stride);
726 /* did the computation of min_rep remain in Xt_int range? */
727 bool min_rep_in_range;
728 {
729 Xt_long some_rep;
730 if (bsb.representative != bsa.representative) {
731 /* computation might generate huge intermediary values, needing 3
732 * times as many bits as Xt_int has, so we use long multiplication */
733 Xt_long temp_1 = start_diff * eg.u;
734 Xt_tword temp_2 = xlimulu(temp_1, (Xt_uint)abs_stride_a);
735 /* some_rep will be representable as Xt_long
736 * if dividing by eg.gcd results in a value in the range of Xt_long...*/
737 min_rep_in_range
738 /* ... that means either all bits in temp_2.hi are
739 * identical to the high bit of temp_2.midlo ... */
740 = (temp_2.hi
741 == (Xt_uint)(Xt_isign_mask((Xt_int)(temp_2.midlo >> xt_int_bits))))
742 /* or the aggregate value of temp_2 divided by
743 * XT_LONG_MAX+1 is less than eg.gcd, instead of actually
744 * dividing, we can simply shift the uppermost bit of
745 * temp_2.midlo into temp_2.hi left-shifted by one */
746 | (XT_INT_ABS((Xt_int)(temp_2.hi << 1) | ((Xt_long)temp_2.midlo < 0)) < eg.gcd);
747 /* compute any representative */
748 some_rep
749 = (Xt_long)bsa.representative + ((Xt_long)temp_2.midlo / eg.gcd);
750 } else {
751 min_rep_in_range = true;
752 some_rep = bsa.representative;
753 }
754 /* compute minimal bsi representative >= bsi.min */
755 Xt_long abs_bsi_stride = ABS(temp_stride),
756 r_diff = bsi.min - some_rep,
757 steps = r_diff / abs_bsi_stride;
758 bool steps_in_range = xl_is_in_xt_int_range(steps);
759 steps = steps + (steps * abs_bsi_stride < r_diff);
760 min_rep = some_rep;
761 if (steps_in_range)
762 min_rep += steps * abs_bsi_stride;
763 min_rep_in_range &= xl_is_in_xt_int_range(min_rep);
764 bsi.representative = (Xt_int)min_rep;
765 }
766 int min_rep_mask = ~((int)min_rep_in_range - 1);
767 nstrides = (int)((((bsi.max - min_rep)/temp_stride)+xlsign(temp_stride))
768 & min_rep_mask)
769 | (~min_rep_mask & (bsb.representative == bsa.representative));
770 int even_divide
771 = ((start_diff%eg.gcd == 0) & (bsi_stride_in_range || abs(nstrides) == 1));
772 /* requires two's complement integers */
773 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
774 & (min_rep <= bsi.max) & (min_rep >= bsi.min)) - 1);
775 Xt_int max_rep
776 = (Xt_int)(min_rep + (nstrides - xlsign(temp_stride)) * bsi.stride);
777 intersection.start = (Xt_int)((bsa.stride >= 0) ? min_rep : max_rep);
778#else
779 Xt_long temp_stride = xlldiv(xiimul(abs_stride_a, bsb.stride),
780 xi2l(eg.gcd)).quot;
781 bsi.stride = (Xt_int)temp_stride.lo;
782 /* minimal value that is reachable by both stripe starts plus some
783 * multiple of the respective stride */
784 Xt_long min_rep;
785 /* did the computation of bsi.stride remain in Xt_int range? */
786 bool bsi_stride_in_range = xl_is_in_xt_int_range(temp_stride);
787 /* did the computation of min_rep remain in Xt_int range? */
788 bool min_rep_in_range;
789 {
790 Xt_long some_rep;
791 if (bsb.representative != bsa.representative) {
792 /* computation might generate huge intermediary values */
793 Xt_long temp_1
794 = xiimul(bsb.representative - bsa.representative, eg.u);
795 Xt_tword temp_2 = xlimul(temp_1, abs_stride_a);
796 min_rep_in_range
797 = (temp_2.hi
798 == (Xt_uint)(Xt_isign_mask((Xt_int)(temp_2.mid))))
799 | (XT_INT_ABS((Xt_int)(temp_2.hi << 1) | ((Xt_int)temp_2.mid < 0)) < eg.gcd);
800 Xt_long t2 = (Xt_long){.hi = temp_2.mid, .lo = temp_2.lo };
801 Xt_ldiv temp_3;
802 if (eg.gcd == 1) {
803 temp_3.quot = t2;
804 temp_3.rem = (Xt_long){.hi = 0, .lo = 0 };
805 } else if (xlicmp_lt(t2, eg.gcd)) {
806 temp_3.quot = (Xt_long){ 0, 0 };
807 temp_3.rem = t2;
808 } else
809 temp_3 = xlldiv((Xt_long){.hi = temp_2.mid, .lo = temp_2.lo },
810 xi2l(eg.gcd));
811 some_rep = xliadd(temp_3.quot, bsa.representative);
812 } else {
813 min_rep_in_range = true;
814 some_rep = xi2l(bsa.representative);
815 }
816 /* compute minimal bsi representative >= bsi.min */
817 Xt_long abs_bsi_stride = xlabs(temp_stride),
818 r_diff = xilsub(bsi.min, some_rep);
819 Xt_ldiv steps = xlldiv(r_diff, abs_bsi_stride);
820 bool steps_in_range = xl_is_in_xt_int_range(steps.quot);
821 steps.quot = xlinc(steps.quot, (bool)((Xt_int)steps.rem.hi >= 0
822 && steps.rem.lo != 0));
823 if (steps_in_range)
824 min_rep = xlladd(some_rep,
825 xiimul((Xt_int)steps.quot.lo,
826 (Xt_int)abs_bsi_stride.lo));
827 else
828 min_rep = some_rep;
829 min_rep_in_range &= xl_is_in_xt_int_range(min_rep);
830 bsi.representative = (Xt_int)min_rep.lo;
831 }
832 if (min_rep_in_range)
833 nstrides = (int)xlldiv(xilsub(bsi.max, min_rep), temp_stride).quot.lo + xlsign(temp_stride);
834 else
835 nstrides = bsb.representative == bsa.representative;
836 int even_divide
837 = ((((bsb.representative - bsa.representative) % eg.gcd) == 0)
838 & (bsi_stride_in_range || abs(nstrides) == 1));
839 /* requires two's complement integers */
840 strides_mask = ~(((even_divide) & (bsi.min <= bsi.max)
841 & xlicmp_le(min_rep, bsi.max) & xlicmp_ge(min_rep, bsi.min)) - 1);
842 Xt_int max_rep
843 = (Xt_int)(min_rep.lo
844 + (Xt_uint)((nstrides - xlsign(temp_stride)) * bsi.stride));
845 intersection.start = ((bsa.stride >= 0) ? (Xt_int)min_rep.lo : max_rep);
846#endif
847 }
848 intersection.stride
849 = (Xt_int)(Xt_isign(bsa.stride) * XT_INT_ABS(bsi.stride));
850 intersection.nstrides
851 = (abs(nstrides) & strides_mask
852 & ~((int)stride_zero_mask_a & (int)stride_zero_mask_b))
853 | (stripe_b.nstrides & (int)stride_zero_mask_a & (int)stride_zero_mask_b);
854 INSTR_STOP(instr);
855 return intersection;
856}
857
858// this routine only works for idxstripes where !stripes_do_overlap
859static Xt_idxlist
861 Xt_idxstripes idxstripes_dst) {
862
863 INSTR_DEF(instr,"idxstripes_compute_intersection")
864 INSTR_START(instr);
865
866 struct Xt_stripe *restrict inter_stripes = NULL;
867 size_t num_inter_stripes = 0;
868 size_t inter_stripes_array_size = 0;
869
870 const struct Xt_stripes_sort *restrict src_stripes_sort
871 = idxstripes_src->stripes_sort,
872 *restrict dst_stripes_sort = idxstripes_dst->stripes_sort;
873 const struct Xt_stripe *restrict stripes_src = idxstripes_src->stripes,
874 *restrict stripes_dst = idxstripes_dst->stripes;
875
876 size_t i_src = 0, i_dst = 0;
877 size_t num_stripes_src = (size_t)idxstripes_src->num_stripes,
878 num_stripes_dst = (size_t)idxstripes_dst->num_stripes;
879
880 while ((i_src < num_stripes_src) &
881 (i_dst < num_stripes_dst)) {
882
883 while (i_src < num_stripes_src &&
884 src_stripes_sort[i_src].range.max
885 < dst_stripes_sort[i_dst].range.min) ++i_src;
886
887 if ( i_src >= num_stripes_src ) break;
888
889 while (i_dst < num_stripes_dst &&
890 dst_stripes_sort[i_dst].range.max
891 < src_stripes_sort[i_src].range.min) ++i_dst;
892
893 if ( i_dst >= num_stripes_dst ) break;
894
895 if ((src_stripes_sort[i_src].range.min
896 <= dst_stripes_sort[i_dst].range.max)
897 & (src_stripes_sort[i_src].range.max
898 >= dst_stripes_sort[i_dst].range.min)) {
899 ENSURE_ARRAY_SIZE(inter_stripes, inter_stripes_array_size,
900 num_inter_stripes+1);
901
902 struct Xt_stripe intersection_stripe;
903 inter_stripes[num_inter_stripes] = intersection_stripe =
904 get_stripe_intersection(stripes_src[src_stripes_sort[i_src].position],
905 stripes_dst[dst_stripes_sort[i_dst].position]);
906 num_inter_stripes += intersection_stripe.nstrides > 0;
907 }
908
909 if (dst_stripes_sort[i_dst].range.max
910 < src_stripes_sort[i_src].range.max)
911 i_dst++;
912 else
913 i_src++;
914 }
915
916 if (num_inter_stripes) {
917 /* invert negative and merge consecutive stripes */
918 struct Xt_stripe prev_stripe = inter_stripes[0];
919 if (prev_stripe.stride < 0) {
920 prev_stripe.start
921 = (Xt_int)(prev_stripe.start
922 + (prev_stripe.stride * (Xt_int)(prev_stripe.nstrides - 1)));
923 prev_stripe.stride = (Xt_int)-prev_stripe.stride;
924 }
925 inter_stripes[0] = prev_stripe;
926 size_t j = 0;
927 for (size_t i = 1; i < num_inter_stripes; ++i) {
928 struct Xt_stripe stripe = inter_stripes[i];
929 if (stripe.stride < 0) {
930 stripe.start = (Xt_int)(stripe.start
931 + stripe.stride * (Xt_int)(stripe.nstrides - 1));
932 stripe.stride = (Xt_int)-stripe.stride;
933 }
934 if ((stripe.stride == prev_stripe.stride)
935 & (stripe.start
936 == (prev_stripe.start
937 + prev_stripe.stride * (Xt_int)prev_stripe.nstrides)))
938 {
939 prev_stripe.nstrides += stripe.nstrides;
940 inter_stripes[j].nstrides = prev_stripe.nstrides;
941 }
942 else
943 {
944 inter_stripes[++j] = stripe;
945 prev_stripe = stripe;
946 }
947 }
948 num_inter_stripes = j + 1;
949 }
950
951 Xt_idxlist inter = xt_idxstripes_new(inter_stripes, (int)num_inter_stripes);
952
953 free(inter_stripes);
954
955 INSTR_STOP(instr);
956 return inter;
957}
958
961 Xt_config config)
962{
963 // both lists are index stripes:
964 Xt_idxstripes idxstripes_src = (Xt_idxstripes)idxlist_src,
965 idxstripes_dst = (Xt_idxstripes)idxlist_dst;
966
967 if ((idxstripes_src->flags | idxstripes_dst->flags)
969 return compute_intersection_fallback(idxstripes_src, idxstripes_dst,
970 config);
971 } else
972 return idxstripes_compute_intersection(idxstripes_src, idxstripes_dst);
973}
974
975static inline bool
977{
978 Xt_int start = stripe.start, stride = stripe.stride;
979 if (idx >= start && stride > 0) {
980 return idx < start + stride * stripe.nstrides
981 && (idx - start) % stride == 0;
982 } else if (idx <= start && stride < 0) {
983 return idx > start + stride * stripe.nstrides
984 && (start - idx) % stride == 0;
985 } else {
986 return idx == start;
987 }
988}
989
992 Xt_idxlist idxlist_dst,
993 Xt_config XT_UNUSED(config))
994{
995 Xt_idxstripes idxstripes_src = (Xt_idxstripes)idxlist_src;
996 const struct Xt_stripe *stripes_src = idxstripes_src->stripes;
997 assert(idxlist_dst->vtable->idxlist_pack_code == VECTOR
998 && idxlist_src->vtable == &idxstripes_vtable);
999 const Xt_int *indices = xt_idxlist_get_indices_const(idxlist_dst);
1000 size_t ndst = (size_t)idxlist_dst->num_indices;
1001 size_t nsrc_stripes = (size_t)idxstripes_src->num_stripes,
1002 num_found_indices = 0;
1003 enum {
1004 size_t_bits = sizeof (size_t) * CHAR_BIT,
1005 };
1006 size_t *found
1007 = xcalloc(((ndst + size_t_bits - 1)/ size_t_bits),
1008 sizeof (size_t));
1009 for (size_t i = 0; i < ndst; ++i) {
1010 Xt_int dst_idx = indices[i];
1011 for (size_t j = 0; j < nsrc_stripes; ++j)
1012 if (stripe_contains_index(stripes_src[j], dst_idx)) {
1013 num_found_indices++;
1014 found[i/size_t_bits] |= (size_t)1 << (i%size_t_bits);
1015 break;
1016 }
1017 }
1018 Xt_idxlist intersection;
1019 if (num_found_indices) {
1020 struct Xt_vec_alloc vec_alloc = xt_idxvec_alloc((int)num_found_indices);
1021 Xt_int *restrict found_indices = vec_alloc.vector;
1022 for (size_t i = 0, inter_ofs = 0; i < ndst; ++i) {
1023 size_t fofs = i / size_t_bits;
1024 if (found[fofs]&((size_t)1 << (i%size_t_bits))) {
1025 found_indices[inter_ofs] = indices[i];
1026 ++inter_ofs;
1027 }
1028 }
1029 intersection = xt_idxvec_congeal(vec_alloc);
1030 } else
1031 intersection = xt_idxempty_new();
1032 free(found);
1033 return intersection;
1034}
1035
1036static Xt_idxlist
1038
1039 Xt_idxstripes stripes = (Xt_idxstripes)idxlist;
1040
1041 return xt_idxstripes_new(stripes->stripes, stripes->num_stripes);
1042}
1043
1044static inline Xt_int
1045stripe_min(struct Xt_stripe stripe, int pfx_remove)
1046{
1047 Xt_int min = stripe.start;
1048 Xt_int stride = stripe.stride;
1049 int nstrides = stripe.nstrides;
1050 Xt_int adj = (Xt_int)(stride >= 0 ? pfx_remove : nstrides - 1 - pfx_remove);
1051 return (Xt_int)(min + stride * adj);
1052}
1053
1054static int
1055stripe_min_cmp_lt(const struct Xt_stripe a, const struct Xt_stripe b,
1056 const int pfx_remove_a, const int pfx_remove_b)
1057{
1058 /* emptied stripes compare minimal to move them to end of array */
1059 if (a.nstrides - pfx_remove_a == 0)
1060 return true;
1061 if (b.nstrides - pfx_remove_b == 0)
1062 return false;
1063 Xt_int amin = stripe_min(a, pfx_remove_a),
1064 bmin = stripe_min(b, pfx_remove_b);
1065 if (amin == bmin) {
1066 amin = XT_INT_ABS(a.stride);
1067 bmin = XT_INT_ABS(b.stride);
1068 }
1069 return amin > bmin;
1070}
1071
1072#define SORT_TYPE int
1073#define SORT_TYPE_SUFFIX stripe_by_min
1074#define SORT_TYPE_CMP_LT(u,v,i,j) \
1075 (stripe_min_cmp_lt(stripes[(u)], stripes[(v)], \
1076 pfx_removed[(u)], pfx_removed[(v)]))
1077#define XT_SORT_EXTRA_ARGS_DECL , int *restrict pfx_removed, \
1078 const struct Xt_stripe stripes[]
1079#define XT_SORT_EXTRA_ARGS_PASS , pfx_removed, stripes
1080#define XT_SORTFUNC_DECL static
1081#include "xt_heapsort_base.h"
1082
1083
1084static Xt_int
1085retrieve_min(size_t num_src_stripes,
1086 const struct Xt_stripe *src_stripes,
1087 int *src_permutation,
1088 int *pfx_removed)
1089{
1090 size_t min_stripe_pos = (size_t)src_permutation[0];
1091 struct Xt_stripe minstripe = src_stripes[min_stripe_pos];
1092 Xt_int minidx;
1093 if (minstripe.stride >= 0) {
1094 minidx = (Xt_int)(minstripe.start
1095 + minstripe.stride * pfx_removed[min_stripe_pos]);
1096 } else { /* minstripe.stride < 0 */
1097 minidx = (Xt_int)(minstripe.start
1098 + (minstripe.nstrides - 1 - pfx_removed[min_stripe_pos])
1099 * minstripe.stride);
1100 }
1101 ++pfx_removed[min_stripe_pos];
1102 xt_heapify_stripe_by_min(src_permutation, num_src_stripes,
1103 0, pfx_removed, src_stripes);
1104 return minidx;
1105}
1106
1108xt_idxstripes_sort_new(size_t num_src_stripes,
1109 const struct Xt_stripe src_stripes[],
1110 Xt_config config)
1111{
1112 (void)config;
1113 int *restrict src_permutation
1114 = xmalloc(num_src_stripes * 2 * sizeof (*src_permutation)),
1115 *restrict pfx_removed = src_permutation + num_src_stripes;
1116 size_t indices_left = 0;
1117 for (size_t i = 0; i < num_src_stripes; ++i) {
1118 src_permutation[i] = (int)i;
1119 pfx_removed[i] = 0;
1120 indices_left += (size_t)src_stripes[i].nstrides;
1121 }
1122 /* this only needs a heapsort actually for the following selection */
1123 xt_heapsort_stripe_by_min(src_permutation, num_src_stripes,
1124 pfx_removed, src_stripes);
1125 /* select stripe with minimal element */
1126 struct Xt_stripe *dst_stripes
1127 = xmalloc(sizeof (*dst_stripes) * num_src_stripes);
1128 size_t num_dst_stripes = 0, size_dst_stripes = num_src_stripes;
1129 {
1130 Xt_int minidx = retrieve_min(num_src_stripes, src_stripes,
1131 src_permutation, pfx_removed);
1132 dst_stripes[0] = (struct Xt_stripe){ .start = minidx, .stride = 1,
1133 .nstrides = 1 };
1134 num_dst_stripes = 1;
1135 --indices_left;
1136 }
1137 for (; indices_left != 0; --indices_left) {
1138 /* get lowest index */
1139 Xt_int minidx = retrieve_min(num_src_stripes, src_stripes,
1140 src_permutation, pfx_removed);
1141 /* insert lowest index into destination */
1142 size_t i = num_dst_stripes - 1;
1143 if (dst_stripes[i].nstrides == 1) {
1144 dst_stripes[i].stride = (Xt_int)(minidx - dst_stripes[i].start);
1145 dst_stripes[i].nstrides = 2;
1146 } else if (minidx
1147 == (Xt_int)(
1148 dst_stripes[i].start
1149 + dst_stripes[i].stride * dst_stripes[i].nstrides)) {
1150 ++dst_stripes[i].nstrides;
1151 } else {
1152 ENSURE_ARRAY_SIZE(dst_stripes, size_dst_stripes, num_dst_stripes+1);
1153 dst_stripes[num_dst_stripes] = (struct Xt_stripe){ .start = minidx,
1154 .stride = 1,
1155 .nstrides = 1 };
1156 ++num_dst_stripes;
1157 }
1158 }
1159 /* \todo: optimize to create destination in-place */
1160 Xt_idxlist copy = xt_idxstripes_new(dst_stripes, (int)num_dst_stripes);
1161 free(dst_stripes);
1162 free(src_permutation);
1163 return copy;
1164}
1165
1166static Xt_idxlist
1168{
1169 Xt_idxstripes src = (Xt_idxstripes)idxlist;
1170 size_t num_stripes = (size_t)src->num_stripes;
1171 struct Xt_stripes_alloc stripes_alloc
1172 = idxstripes_alloc(num_stripes);
1173 struct Xt_stripe *restrict inverted_stripes = stripes_alloc.stripes;
1174 const struct Xt_stripe *src_stripes = src->stripes;
1175 for (size_t i = 0; i < num_stripes; ++i) {
1176 Xt_int src_start = src_stripes[num_stripes-1-i].start,
1177 src_stride = src_stripes[num_stripes-1-i].stride;
1178 int nstrides = src_stripes[num_stripes-1-i].nstrides;
1179 inverted_stripes[i] = (struct Xt_stripe){
1180 .start = (Xt_int)(src_start + src_stride * (nstrides-1)),
1181 .stride = (Xt_int)-src_stride,
1182 .nstrides = nstrides,
1183 };
1184 }
1185 return idxstripes_aggregate(stripes_alloc.idxstripes, __func__);
1186}
1187
1188static Xt_idxlist
1190{
1191 Xt_idxstripes src = (Xt_idxstripes)idxlist;
1192 int sort_flags = (int)((src->flags >> stripes_sort_bit) & sort_mask);
1193 sort_flags -= sort_flags < 3;
1194 Xt_idxlist sorted_idxlist;
1195 if (sort_flags == 1)
1196 sorted_idxlist = idxstripes_copy(idxlist);
1197 else if (sort_flags == -1)
1198 sorted_idxlist = idxstripes_order_invert(idxlist);
1199 else
1200 sorted_idxlist = xt_idxstripes_sort_new((size_t)src->num_stripes,
1201 src->stripes, config);
1202 return sorted_idxlist;
1203}
1204
1205static void
1207 INSTR_DEF(instr,"idxstripes_get_indices")
1208 INSTR_START(instr);
1209
1210 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1211 const struct Xt_stripe *restrict stripes = idxstripes->stripes;
1212 size_t num_stripes = (size_t)idxstripes->num_stripes, ofs = 0;
1213 for (size_t i = 0; i < num_stripes; ofs += (size_t)stripes[i].nstrides, ++i )
1214 for (size_t j = 0; j < (size_t)stripes[i].nstrides; ++j)
1215 indices[ofs+j] = (Xt_int)(stripes[i].start
1216 + (Xt_int)j * stripes[i].stride);
1217
1218 INSTR_STOP(instr);
1219}
1220
1221static Xt_int const*
1223
1224 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1225
1226 if (idxstripes->index_array_cache) return idxstripes->index_array_cache;
1227
1228 int num_indices = idxlist->num_indices;
1229 Xt_int *tmp_index_array
1230 = xmalloc((size_t)num_indices * sizeof (*tmp_index_array));
1231 idxstripes_get_indices(idxlist, tmp_index_array);
1232 return idxstripes->index_array_cache = tmp_index_array;
1233}
1234
1235static void
1237 struct Xt_stripe *restrict stripes,
1238 size_t num_stripes_alloc)
1239{
1240#ifdef NDEBUG
1241 (void)num_stripes_alloc;
1242#endif
1243 INSTR_DEF(instr,"idxstripes_get_index_stripes")
1244 INSTR_START(instr);
1245 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1246 assert((size_t)idxstripes->num_stripes <= num_stripes_alloc);
1247 memcpy(stripes, idxstripes->stripes,
1248 (size_t)idxstripes->num_stripes * sizeof (stripes[0]));
1249 INSTR_STOP(instr);
1250}
1251
1252const struct Xt_stripe *
1254{
1255 assert(idxlist->vtable == &idxstripes_vtable);
1256 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1257 return idxstripes->stripes;
1258}
1259
1260static int
1262{
1263 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1264 return idxstripes->num_stripes;
1265}
1266
1267size_t
1269{
1270 assert(idxlist->vtable == &idxstripes_vtable);
1271 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1272 return (size_t)idxstripes->num_stripes;
1273}
1274
1275static int
1277 Xt_int * index) {
1278
1279 INSTR_DEF(instr,"idxstripes_get_index_at_position")
1280 INSTR_START(instr);
1281
1282 int retval;
1283
1284 if (position >= 0 && position < idxlist->num_indices) {
1285 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1286 const struct Xt_stripe *restrict stripes = idxstripes->stripes;
1287 size_t num_stripes = (size_t)idxstripes->num_stripes;
1288 size_t i = 0;
1289 while (i < num_stripes && position >= stripes[i].nstrides) {
1290 position-= stripes[i].nstrides;
1291 ++i;
1292 }
1293 *index = (Xt_int)(stripes[i].start + position * stripes[i].stride);
1294 retval = 0;
1295 } else {
1296 retval = 1;
1297 }
1298
1299 INSTR_STOP(instr);
1300 return retval;
1301}
1302
1303
1304static int
1306 int num_pos, Xt_int *index,
1307 Xt_int undef_idx) {
1308
1309 INSTR_DEF(instr,"idxstripes_get_indices_at_positions")
1310 INSTR_START(instr);
1311
1312 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1313 const struct Xt_stripe *stripes = idxstripes->stripes;
1314
1315 int max_pos = idxlist->num_indices;
1316 int sub_pos = 0;
1317 /* initial guess which stripe to inspect for position */
1318 int istripe = 0;
1319 /* position range corresponding to stripe indexed by istripe */
1320 int stripe_start_pos = 0;
1321 int undef_count = 0;
1322
1323 for (int ipos = 0; ipos < num_pos; ipos++) {
1324
1325 int seek_pos = positions[ipos];
1326
1327 if (seek_pos >= 0 && seek_pos < max_pos) {
1328 while (seek_pos < stripe_start_pos) {
1329 istripe--;
1330#ifndef NDEBUG
1331 if (istripe < 0)
1332 die("idxstripes_get_indices_at_positions: internal error:"
1333 " crossed 0-boundary");
1334#endif
1335 stripe_start_pos -= (int)stripes[istripe].nstrides;
1336 }
1337
1338 while (seek_pos > stripe_start_pos + stripes[istripe].nstrides - 1) {
1339 stripe_start_pos += (int)stripes[istripe].nstrides;
1340 istripe++;
1341#ifndef NDEBUG
1342 if (istripe >= idxstripes->num_stripes)
1343 die("idxstripes_get_indices_at_positions: internal error:"
1344 " crossed upper boundary");
1345#endif
1346 }
1347
1348 sub_pos = seek_pos - stripe_start_pos;
1349 index[ipos]
1350 = (Xt_int)(stripes[istripe].start + sub_pos * stripes[istripe].stride);
1351 } else {
1352 index[ipos] = undef_idx;
1353 undef_count++;
1354 }
1355 }
1356
1357 INSTR_STOP(instr);
1358
1359 return undef_count;
1360}
1361
1362static int
1364 int * position) {
1365
1366 return idxstripes_get_position_of_index_off(idxlist, index, position, 0);
1367}
1368
1369static int
1371 int * position, int offset) {
1372
1373 INSTR_DEF(instr,"idxstripes_get_position_of_index_off")
1374 INSTR_START(instr);
1375
1376 int retval = 1;
1377 size_t offset_ = (offset < 0) ? (size_t)0 : (size_t)offset;
1378
1379 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
1380 const struct Xt_stripes_sort *restrict stripes_sort
1381 = idxstripes->stripes_sort;
1382 const struct Xt_stripe *restrict stripes = idxstripes->stripes;
1383 if (offset_ < (size_t)idxlist->num_indices) {
1384 size_t num_stripes = (size_t)idxstripes->num_stripes, i;
1385 size_t position_offset = 0;
1386
1387 for (i = 0;
1388 position_offset + (size_t)stripes[i].nstrides <= offset_;
1389 ++i)
1390 position_offset += (size_t)stripes[i].nstrides;
1391
1392 for (; i < num_stripes;
1393 position_offset += (size_t)stripes[i].nstrides, ++i) {
1394
1395 size_t inv_pos = (size_t)stripes_sort[i].inv_position;
1396 struct Xt_minmax range = stripes_sort[inv_pos].range;
1397
1398 if (index > range.max || index < range.min)
1399 continue;
1400
1401 int rel_start;
1402 Xt_int stride = stripes[i].stride;
1403 if (index != stripes[i].start) {
1404 Xt_int index_offset = (Xt_int)(index - stripes[i].start);
1405 if (index_offset % stride)
1406 continue;
1407 rel_start = (int)(index_offset/stripes[i].stride);
1408 } else if (stride != 0)
1409 rel_start = 0;
1410 else
1411 rel_start = (int)(MAX(offset_, position_offset) - position_offset);
1412 *position = (int)((size_t)rel_start + position_offset);
1413 retval = 0;
1414 goto fun_exit;
1415 }
1416 }
1417
1418 *position = -1;
1419
1420 fun_exit: ;
1421 INSTR_STOP(instr);
1422 return retval;
1423}
1424
1425static inline void
1426append_ext(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *restrict result)
1427{
1428 size_t num_pos_exts_ = result->num_pos_ext,
1429 size_pos_exts_ = result->size_pos_ext;
1430 struct Xt_pos_ext *restrict pos_exts_ = result->pos_ext;
1431 if (!xt_can_merge_pos_ext(pos_exts_[num_pos_exts_ - 1], pos_ext))
1432 {
1433 if (num_pos_exts_ + 1 == size_pos_exts_)
1434 {
1435 size_pos_exts_ += 16;
1436 /* offsetting by 1 necessary to keep the terminator in place */
1437 result->pos_ext = pos_exts_ = (struct Xt_pos_ext *)
1438 xrealloc(pos_exts_ - 1, (size_pos_exts_ + 1) * sizeof (*pos_exts_)) + 1;
1439 result->size_pos_ext = size_pos_exts_;
1440 }
1441 pos_exts_[num_pos_exts_] = pos_ext;
1442 result->num_pos_ext = num_pos_exts_ + 1;
1443 }
1444 else {
1445 /* merge new ext with previous */
1446 pos_exts_[num_pos_exts_ - 1].size
1447 = isign(pos_ext.start - pos_exts_[num_pos_exts_ - 1].start)
1448 * (abs(pos_exts_[num_pos_exts_ - 1].size) + abs(pos_ext.size));
1449 }
1450}
1451
1460
1461static inline void
1463 Xt_idxstripes idxstripes) {
1464 const struct Xt_stripe *restrict stripes;
1465 db->stripes = stripes = idxstripes->stripes;
1466 size_t num_db_stripes = (size_t)idxstripes->num_stripes;
1467 bool stripes_do_overlap = idxstripes->flags & stripes_do_overlap_mask,
1468 stripes_do_intersect = idxstripes->flags & stripes_intersect_mask;
1469 enum {
1470 size_t_bits = sizeof (size_t) * CHAR_BIT,
1471 };
1472 size_t overlap_handling = stripes_do_overlap
1473 ? ((num_db_stripes + size_t_bits - 1)/size_t_bits+1) * sizeof (size_t) : 0,
1474 align_adjust = !stripes_do_overlap ? (num_db_stripes + 1) * sizeof (int):
1475 ((num_db_stripes + 1)*sizeof (int)+sizeof (size_t)-1)
1476 / sizeof (size_t) * sizeof (size_t);
1477 /* using num_db_stripes + 1 ensures re-aligning is always possible */
1478 int *restrict db_stripes_nstrides_psum
1479 = xmalloc(align_adjust + overlap_handling);
1480 if (stripes_do_overlap)
1481 db->stripe_candidate_bitmask
1482 = (void *)((unsigned char *)db_stripes_nstrides_psum+align_adjust);
1483 int accum = db_stripes_nstrides_psum[0] = 0;
1484 for (size_t j = 0; j < num_db_stripes; ++j) {
1485 accum += stripes[j].nstrides;
1486 db_stripes_nstrides_psum[j + 1] = accum;
1487 }
1488 db->stripes_sort = idxstripes->stripes_sort;
1489 db->num_stripes = num_db_stripes;
1490 db->stripes_nstrides_psum = db_stripes_nstrides_psum;
1491 db->stripes_do_overlap = stripes_do_overlap;
1492 db->stripes_do_intersect = stripes_do_intersect;
1493}
1494
1495static inline void
1497 free(db->stripes_nstrides_psum);
1498}
1499
1501{
1502 size_t size, num;
1503 union {
1504 int *vec;
1505 int e1;
1506 } p;
1507};
1508
1509static inline size_t
1511 const struct Xt_stripes_sort a[n],
1512 Xt_int min_key)
1513{
1514 size_t left = 0, right = n - 1; /* avoid overflow in `(left + right)/2' */
1515 if ((a && n > 0)) ; else return n; /* invalid input or empty array */
1516 while (left < right)
1517 {
1518 /* invariant: a[left].range.min <= min_key <= a[right].range.min
1519 * or not in a */
1520 /*NOTE: *intentionally* truncate for odd sum */
1521 size_t m = (left + right + 1) / 2;
1522 if (a[m].range.min > min_key)
1523 right = m - 1; /* a[left].range.min <= min_key < a[m].range.min
1524 * or min_key not in a */
1525 else
1526 left = m;/* a[m].range.min <= min_key <= a[right].range.min
1527 * or min_key not in a */
1528 }
1529 /* assert(left == right) */
1530 return a[right].range.min <= min_key ? right : n;
1531}
1532
1533
1534static void
1536 const struct Xt_stripes_lookup *restrict db,
1537 struct int_vec *candidates,
1538 bool single_match_only,
1539 struct Xt_pos_ext_vec *restrict cover,
1540 Xt_config config)
1541{
1542 (void)cover;
1543 struct Xt_minmax query_minmax = xt_stripe2minmax(query);
1544 size_t num_db_stripes = db->num_stripes;
1545 const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1546 size_t start_pos;
1547 /* find trivial match first if sensible */
1548 if (db->stripes_do_overlap) {
1549 if ((start_pos
1550 = bsearch_stripes_sort(num_db_stripes, db_stripes_sort,
1551 query_minmax.min))
1552 != num_db_stripes) {
1553 int stripes_start_pos = db_stripes_sort[start_pos].position;
1554 if (xt_stripes_eq(db->stripes[stripes_start_pos],
1555 query)
1556 && (!single_match_only
1557 || (!db->stripes_do_intersect
1558 && xt_cover_search(
1559 cover,
1560 (struct Xt_pos_range){
1561 .start = db->stripes_nstrides_psum[stripes_start_pos],
1562 .end = db->stripes_nstrides_psum[stripes_start_pos]
1563 + query.nstrides -1 }, 0) == SIZE_MAX
1564 ))) {
1565 candidates->num = 1;
1566 if (candidates->size == 0)
1567 candidates->p.e1 = stripes_start_pos;
1568 else
1569 candidates->p.vec[0] = stripes_start_pos;
1570 return;
1571 }
1572 }
1573 }
1574 start_pos
1575 = bsearch_stripes_sort(num_db_stripes, db_stripes_sort, query_minmax.max);
1576 if (start_pos != num_db_stripes) {
1577 assert(db_stripes_sort[start_pos].range.min <= query_minmax.max);
1578 size_t end_pos = start_pos;
1579 while (end_pos < num_db_stripes
1580 && db_stripes_sort[end_pos].range.min <= query_minmax.max)
1581 ++end_pos;
1582 /* find all overlaps (which is more complicated if
1583 * non-overlapping isn't guaranteed) */
1584 size_t num_candidates;
1585 if (!db->stripes_do_overlap)
1586 {
1587 while (start_pos > 0
1588 && (db_stripes_sort[start_pos - 1].range.max >= query_minmax.min))
1589 --start_pos;
1590 num_candidates = end_pos - start_pos;
1591 if (candidates->size == 0 && num_candidates == 1) {
1592 candidates->p.e1 = db_stripes_sort[start_pos].position;
1593 candidates->num = 1;
1594 return;
1595 }
1596 if (candidates->size < num_candidates) {
1597 int *prevp = (candidates->size == 0) ? NULL : candidates->p.vec;
1598 candidates->p.vec = xrealloc(prevp,
1599 num_candidates
1600 * sizeof (candidates->p.vec[0]));
1601 candidates->size = num_candidates;
1602 }
1603 candidates->num = num_candidates;
1604 int *restrict candidates_vec = candidates->p.vec;
1605 for (size_t i = 0; i < num_candidates; ++i)
1606 candidates_vec[i] = db_stripes_sort[start_pos + i].position;
1607 }
1608 else
1609 {
1610 num_candidates = 0;
1611 size_t min_candidate = start_pos;
1612 const struct Xt_stripe *stripes = db->stripes;
1613 Xt_int query_start = query.start, query_stride = XT_INT_ABS(query.stride);
1614#ifdef XT_USE_FAST_DIVISIBLE_TEST
1615 struct xt_fast_div_coeff stride_div_test_coeff;
1616 stride_div_test_coeff = get_fast_div_coeff((Xt_uint)query_stride);
1617#endif
1618 size_t *bm_store = db->stripe_candidate_bitmask;
1619 size_t i = end_pos - 1;
1620 enum {
1621 size_t_bits = sizeof (size_t) * CHAR_BIT,
1622 };
1623 size_t pred_accum = 0;
1624 if (i >= size_t_bits) {
1625 if ((i+1) % size_t_bits) {
1626 for (; (i+1) % size_t_bits; --i)
1627 {
1628 size_t predicate
1629 = db_stripes_sort[i].range.max >= query_minmax.min;
1630 size_t pos = (size_t)db_stripes_sort[i].position;
1631 predicate
1632 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1633 || is_divisible(query_stride, stride_div_test_coeff,
1634 (Xt_int)(query_start - stripes[pos].start)));
1635 num_candidates += predicate;
1636 size_t predicate_mask = predicate - 1;
1637 min_candidate = (min_candidate & predicate_mask)
1638 | (i & ~predicate_mask);
1639 pred_accum = (pred_accum << 1) | predicate;
1640 }
1641 bm_store[(i+1)/size_t_bits] = pred_accum;
1642 pred_accum = 0;
1643 }
1644 assert(i % size_t_bits == size_t_bits - 1);
1645 for (; i >= size_t_bits;) {
1646 for (size_t j = 0; j < size_t_bits; ++j, --i) {
1647 size_t predicate
1648 = db_stripes_sort[i].range.max >= query_minmax.min;
1649 size_t pos = (size_t)db_stripes_sort[i].position;
1650 predicate
1651 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1652 || is_divisible(query_stride, stride_div_test_coeff,
1653 (Xt_int)(query_start - stripes[pos].start)));
1654 num_candidates += predicate;
1655 size_t predicate_mask = predicate - 1;
1656 min_candidate = (min_candidate & predicate_mask)
1657 | (i & ~predicate_mask);
1658 pred_accum = (pred_accum << 1) | predicate;
1659 }
1660 bm_store[(i+1)/size_t_bits] = pred_accum;
1661 pred_accum = 0;
1662 }
1663 }
1664 for (; i != SIZE_MAX; --i)
1665 {
1666 size_t predicate
1667 = db_stripes_sort[i].range.max >= query_minmax.min;
1668 size_t pos = (size_t)db_stripes_sort[i].position;
1669 predicate
1670 &= (XT_INT_ABS(stripes[pos].stride) != query_stride
1671 || is_divisible(query_stride, stride_div_test_coeff,
1672 (Xt_int)(query_start - stripes[pos].start)));
1673 num_candidates += predicate;
1674 size_t predicate_mask = predicate - 1;
1675 min_candidate = (min_candidate & predicate_mask)
1676 | (i & ~predicate_mask);
1677 pred_accum = (pred_accum << 1) | predicate;
1678 }
1679 bm_store[0] = pred_accum;
1680 if (candidates->size == 0 && num_candidates == 1) {
1681 candidates->p.e1 = db_stripes_sort[min_candidate].position;
1682 candidates->num = num_candidates;
1683 return;
1684 }
1685 if (candidates->size < num_candidates+1) {
1686 int *prevp = (candidates->size == 0) ? NULL : candidates->p.vec;
1687 candidates->p.vec = xrealloc(prevp,
1688 (num_candidates + 1)
1689 * sizeof (candidates->p.vec[0]));
1690 candidates->size = num_candidates + 1;
1691 }
1692 candidates->num = num_candidates;
1693 int *restrict candidates_vec = candidates->p.vec;
1694 size_t j = 0,
1695 next_bm_mult = (min_candidate + size_t_bits - 1)/size_t_bits*size_t_bits,
1696 predicates = (bm_store[min_candidate/size_t_bits]
1697 >> (min_candidate % size_t_bits));
1698 if (num_candidates == 1) {
1699 end_pos = min_candidate+1;
1700 assert(predicates&1);
1701 }
1702 for (i = min_candidate; i < end_pos; next_bm_mult+=size_t_bits) {
1703 size_t ulim = MIN(end_pos, next_bm_mult);
1704 for (; i < ulim; ++i) {
1705 size_t predicate = predicates&1;
1706 predicates >>= 1;
1707 if (predicate)
1708 candidates_vec[j] = db_stripes_sort[i].position;
1709 j += predicate;
1710 }
1711 predicates = bm_store[i/size_t_bits];
1712 }
1713 assert(j == num_candidates);
1714 }
1715 config->sort_funcs->sort_int(candidates->p.vec, num_candidates);
1716 } else
1717 candidates->num = 0;
1718}
1719
1720static struct Xt_idxstripes_ *
1722 const struct Xt_stripe *restrict stripes)
1723{
1724 size_t expansion = 0;
1725 for (size_t i = 0; i < num_stripes; ++i) {
1726 expansion += stripes[i].stride == 0 ? (size_t)(stripes[i].nstrides - 1) : 0;
1727 }
1728 struct Xt_stripe *restrict expanded_stripes
1729 = xmalloc((num_stripes + expansion) * sizeof (expanded_stripes[0]));
1730 size_t j = 0;
1731 for (size_t i = 0; i < num_stripes; ++i) {
1732 struct Xt_stripe stripe = stripes[i];
1733 if (stripe.stride == 0) {
1734 for (size_t k = 0; k < (size_t)stripe.nstrides; ++k)
1735 expanded_stripes[j + k] = (struct Xt_stripe){ .start = stripe.start,
1736 .stride = 1,
1737 .nstrides = 1 };
1738 j += (size_t)stripe.nstrides;
1739 } else {
1740 expanded_stripes[j] = stripe;
1741 ++j;
1742 }
1743 }
1744 return (struct Xt_idxstripes_ *)
1745 xt_idxstripes_prealloc_new(expanded_stripes,
1746 (int)(num_stripes + expansion));
1747}
1748
1749
1750static size_t
1752 struct Xt_stripe query,
1753 const struct Xt_stripes_lookup *restrict db,
1754 struct Xt_pos_ext_vec *restrict result,
1755 struct Xt_pos_ext_vec *restrict cover,
1756 bool single_match_only,
1757 size_t num_candidates,
1758 int *restrict candidates);
1759
1760int
1762 Xt_idxlist idxlist,
1763 int num_stripes,
1764 const struct Xt_stripe stripes[num_stripes],
1765 int *num_ext,
1766 struct Xt_pos_ext **pos_exts,
1767 int single_match_only,
1768 Xt_config config)
1769{
1770 size_t unmatched = 0;
1771 struct Xt_pos_ext_vec result;
1772 result.num_pos_ext = 0;
1773 struct Xt_idxstripes_ *restrict idxstripes = (struct Xt_idxstripes_ *)idxlist;
1774 if (num_stripes > 0)
1775 {
1776 if (idxstripes->flags & stripes_some_have_zero_stride_mask)
1777 idxstripes = expand_zero_stripes((size_t)idxstripes->num_stripes,
1778 idxstripes->stripes);
1779 result.size_pos_ext = (size_t)MAX(MIN(idxstripes->num_stripes,
1780 num_stripes), 8);
1781 result.pos_ext = xmalloc((result.size_pos_ext + 1)
1782 * sizeof (*result.pos_ext));
1783 /* put non-concatenable *terminator* at offset -1 */
1784 result.pos_ext[0] = (struct Xt_pos_ext){.start = INT_MIN, .size = 0 };
1785 ++result.pos_ext;
1786 struct Xt_pos_ext_vec cover;
1787 xt_cover_start(&cover, result.size_pos_ext);
1788 struct Xt_stripes_lookup stripes_db;
1789 create_stripes_lookup(&stripes_db, idxstripes);
1790 struct int_vec candidates = { .size = 0, .p.vec = NULL };
1791 for (size_t i = 0; i < (size_t)num_stripes; ++i) {
1792 struct Xt_stripe query = stripes[i];
1793 int j = query.nstrides;
1794 query.nstrides = ((query.stride != 0) | (query.nstrides == 0))
1795 ? query.nstrides : 1;
1796 query.stride = (Xt_int)((query.stride != 0 && query.nstrides != 1)
1797 ? query.stride : (Xt_int)1);
1798 find_candidates(query, &stripes_db, &candidates,
1799 single_match_only, &cover, config);
1800 if (candidates.num) {
1801 do {
1803 query, &stripes_db, &result, &cover, single_match_only != 0,
1804 candidates.num, candidates.size != 0
1805 ? candidates.p.vec : &candidates.p.e1);
1806 } while ((stripes[i].stride == 0) & (--j > 0));
1807 }
1808 }
1809 if (candidates.size)
1810 free(candidates.p.vec);
1811 --(result.pos_ext);
1812 memmove(result.pos_ext, result.pos_ext + 1,
1813 sizeof (*result.pos_ext) * result.num_pos_ext);
1814 *pos_exts = xrealloc(result.pos_ext,
1815 sizeof (*result.pos_ext) * result.num_pos_ext);
1816 destroy_stripes_lookup(&stripes_db);
1817 xt_cover_finish(&cover);
1818 if ((struct Xt_idxstripes_ *)idxlist != idxstripes) {
1819 free(idxstripes->stripes);
1820 xt_idxlist_delete((Xt_idxlist)idxstripes);
1821 }
1822 }
1823 *num_ext = (int)result.num_pos_ext;
1824 return (int)unmatched;
1825}
1826
1827static size_t
1829 struct Xt_pos_ext pos_ext2add,
1830 const struct Xt_stripes_lookup *restrict db,
1831 struct Xt_pos_ext_vec *restrict result,
1832 struct Xt_pos_ext_vec *restrict cover,
1833 size_t num_candidates,
1834 int *restrict candidates);
1835
1837{
1840};
1841
1842static struct unmatched_tail
1844 struct Xt_stripe query,
1845 const struct Xt_stripes_lookup *restrict stripes_lookup,
1846 struct Xt_pos_ext_vec *restrict result,
1847 struct Xt_pos_ext_vec *restrict cover,
1848 bool single_match_only,
1849 size_t num_candidates,
1850 int *restrict candidates);
1851
1852static size_t
1854 struct Xt_pos_ext pos_ext2add,
1855 const struct Xt_stripes_lookup *stripes_lookup,
1856 struct Xt_pos_ext_vec *restrict result,
1857 struct Xt_pos_ext_vec *restrict cover,
1858 bool single_match_only,
1859 size_t num_candidates,
1860 int *restrict candidates)
1861{
1862 size_t unmatched = 0;
1863 if (single_match_only)
1864 unmatched += conditional_pos_ext_insert(
1865 query, pos_ext2add, stripes_lookup, result, cover,
1866 num_candidates, candidates);
1867 else
1868 append_ext(pos_ext2add, result);
1869 return unmatched;
1870}
1871
1872
1873
1874size_t
1876 struct Xt_stripe query,
1877 const struct Xt_stripes_lookup *restrict db,
1878 struct Xt_pos_ext_vec *restrict result,
1879 struct Xt_pos_ext_vec *restrict cover,
1880 bool single_match_only,
1881 size_t num_candidates,
1882 int *restrict candidates)
1883{
1884 size_t unmatched = 0;
1885 struct Xt_minmax query_minmax = xt_stripe2minmax(query);
1886 const struct Xt_stripe *restrict db_stripes = db->stripes;
1887 const int *restrict db_stripes_nstrides_psum = db->stripes_nstrides_psum;
1888 const struct Xt_stripes_sort *restrict db_stripes_sort = db->stripes_sort;
1889 for (size_t j = 0; j < num_candidates; ++j) {
1890 size_t unsort_pos = (size_t)candidates[j];
1891 size_t sort_pos = (size_t)db_stripes_sort[unsort_pos].inv_position;
1892 if ((query_minmax.min <= db_stripes_sort[sort_pos].range.max)
1893 & (query_minmax.max >= db_stripes_sort[sort_pos].range. min))
1894 ;
1895 else
1896 continue;
1897 struct Xt_stripe db_stripe = db_stripes[unsort_pos];
1898 Xt_int stride = query.stride;
1899 /* determine if db_stripe and query can be easily aligned */
1900 if ((stride > 0)
1901 & (stride == db_stripe.stride)
1902 & ((query.start - db_stripe.start) % stride == 0)) {
1903 /* divide query into skipped, matching and left-over parts,
1904 * where skipped and left-over are non-matching */
1905 Xt_int overlap_start = MAX(query.start, db_stripe.start);
1906 /* handle skipped query part */
1907 int skipLen = (int)((overlap_start - query.start) / stride);
1908 if (skipLen)
1909 {
1910 struct Xt_stripe query_head = {
1911 .start = query.start,
1912 .stride = skipLen > 1 ? stride : (Xt_int)1,
1913 .nstrides = skipLen };
1914 unmatched
1916 query_head, db, result, cover, single_match_only,
1917 num_candidates - j - 1, candidates + j + 1);
1918 query.start = (Xt_int)(query.start + stride * (Xt_int)skipLen);
1919 query.nstrides -= skipLen;
1920 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1921 }
1922 int db_stripe_skip
1923 = (int)((overlap_start - db_stripe.start) / stride);
1924 int overlap_nstrides
1925 = imin(query.nstrides, db_stripe.nstrides - db_stripe_skip);
1926
1927 unmatched +=
1929 (struct Xt_stripe){ .start = query.start,
1930 .stride = overlap_nstrides > 1 ? query.stride : (Xt_int)1,
1931 .nstrides = overlap_nstrides},
1932 (struct Xt_pos_ext){ .start
1933 = db_stripes_nstrides_psum[unsort_pos] + db_stripe_skip,
1934 .size = overlap_nstrides
1935 }, db, result, cover, single_match_only, num_candidates - j - 1,
1936 candidates + j + 1);
1937
1938 if (!(query.nstrides -= overlap_nstrides))
1939 goto search_finished;
1940 else {
1941 query.start = (Xt_int)(overlap_start + stride * overlap_nstrides);
1942 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
1943 query_minmax = xt_stripe2minmax(query);
1944 continue;
1945 }
1946 }
1947 else if ((stride != 0)
1948 & (stride == db_stripe.stride)
1949 & ((query.start - db_stripe.start) % stride != 0)) {
1950 /* skip stripe that cannnot match at any position */
1951 } else {
1952 /* Handle complex overlap */
1953 struct unmatched_tail search_result =
1955 query, db, result, cover, single_match_only,
1956 num_candidates - j, candidates + j);
1957 unmatched += search_result.unmatched;
1958 if (!search_result.query_tail.nstrides)
1959 goto search_finished;
1960 else {
1961 query = search_result.query_tail;
1962 query_minmax = xt_stripe2minmax(query);
1963 }
1964 }
1965 }
1966 unmatched += (size_t)query.nstrides;
1967 /* query wasn't found, add all indices to unmatched */
1968search_finished:
1969 return unmatched;
1970}
1971
1972/* todo: add indices into pos_exts which are sorted by end/first of ranges */
1973static size_t
1975 struct Xt_pos_ext pos_ext2add,
1976 const struct Xt_stripes_lookup *restrict db,
1977 struct Xt_pos_ext_vec *restrict result,
1978 struct Xt_pos_ext_vec *restrict cover,
1979 size_t num_candidates,
1980 int *restrict candidates)
1981{
1982 /* single_match_only is true => never re-match positions */
1983 size_t unmatched = 0;
1984 Xt_int stride = query.stride;
1985 if (pos_ext2add.size == -1)
1986 pos_ext2add.size = 1;
1987 int querySizeMaskNeg = isign_mask(pos_ext2add.size);
1988tail_search:
1989 ;
1990 int pos_ext2add_s = pos_ext2add.start
1991 + (querySizeMaskNeg & (pos_ext2add.size + 1)),
1992 pos_ext2add_e = pos_ext2add.start
1993 + (~querySizeMaskNeg & (pos_ext2add.size - 1));
1994 struct Xt_pos_range query_range
1995 = { .start = pos_ext2add_s, .end = pos_ext2add_e };
1996 /* does overlap exist? */
1997 size_t overlap_pos =
1998 xt_cover_insert_or_overlap(cover, query_range, 0);
1999 if (overlap_pos == SIZE_MAX) {
2000 /* remaining extent does not overlap any existing one */
2001 append_ext(pos_ext2add, result);
2002 } else {
2003 struct Xt_pos_ext *restrict pos_exts_ = cover->pos_ext;
2004 int dbSizeMaskNeg = isign_mask(pos_exts_[overlap_pos].size),
2005 db_s = pos_exts_[overlap_pos].start
2006 + (dbSizeMaskNeg & (pos_exts_[overlap_pos].size + 1)),
2007 db_e = pos_exts_[overlap_pos].start
2008 + (~dbSizeMaskNeg & (pos_exts_[overlap_pos].size - 1));
2009 /* determine length of overlap parts */
2010 int lowQuerySkip = db_s - pos_ext2add_s;
2011 int lowDbSkip = -lowQuerySkip;
2012 lowQuerySkip = (int)((unsigned)(lowQuerySkip + abs(lowQuerySkip))/2);
2013 lowDbSkip = (int)((unsigned)(lowDbSkip + abs(lowDbSkip))/2);
2014 int overlapLen = MIN(db_e - db_s - lowDbSkip + 1,
2015 abs(pos_ext2add.size) - lowQuerySkip);
2016 int highQuerySkip = abs(pos_ext2add.size) - lowQuerySkip - overlapLen;
2017 /* then adjust lengths to direction of overlap (from
2018 * perspective of pos_ext2add */
2019 int querySkipLen = (~querySizeMaskNeg & lowQuerySkip)
2020 | (querySizeMaskNeg & -highQuerySkip),
2021 queryTailLen = (querySizeMaskNeg & -lowQuerySkip)
2022 | (~querySizeMaskNeg & highQuerySkip);
2023 if (querySkipLen)
2024 {
2025 int absQuerySkipLen = abs(querySkipLen);
2026 struct Xt_stripe query_skip = {
2027 .start = query.start,
2028 .stride = absQuerySkipLen > 1 ? stride : (Xt_int)1,
2029 .nstrides = absQuerySkipLen,
2030 };
2031 struct Xt_pos_ext pos_ext2add_skip = {
2032 .start = pos_ext2add.start,
2033 .size = querySkipLen
2034 };
2035 unmatched
2037 query_skip, pos_ext2add_skip, db,
2038 result, cover, num_candidates, candidates);
2039 pos_exts_ = result->pos_ext;
2040 query.start = (Xt_int)(query.start
2041 + stride * (Xt_int)absQuerySkipLen);
2042 query.nstrides -= absQuerySkipLen;
2043 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2044 pos_ext2add.start += querySkipLen;
2045 pos_ext2add.size -= querySkipLen;
2046 }
2047 /* head part of (remaining) query matches part of already inserted index
2048 * range */
2049 struct Xt_stripe query_head = {
2050 .start = query.start,
2051 .stride = abs(overlapLen) > 1 ? stride : (Xt_int)1,
2052 .nstrides = abs(overlapLen) };
2053 /* restart search for overlapping part within following ranges */
2054 unmatched
2056 query_head, db, result, cover, true,
2057 num_candidates, candidates);
2058 pos_exts_ = result->pos_ext;
2059 if (queryTailLen) {
2060 /* shorten query accordingly */
2061 query.nstrides -= abs(overlapLen);
2062 query.start = (Xt_int)(query.start + stride * (Xt_int)abs(overlapLen));
2063 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2064 int directedOverlapLen = (~querySizeMaskNeg & overlapLen)
2065 | (querySizeMaskNeg & -overlapLen);
2066 pos_ext2add.start += directedOverlapLen;
2067 pos_ext2add.size -= directedOverlapLen;
2068 goto tail_search;
2069 }
2070 /* whole range handled, return */
2071 }
2072 return unmatched;
2073}
2074
2075static Xt_int
2077 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
2078 return idxstripes->min_index;
2079}
2080
2081static Xt_int
2083 Xt_idxstripes idxstripes = (Xt_idxstripes)idxlist;
2084 return idxstripes->max_index;
2085}
2086
2087static int
2089{
2090 unsigned sort_flags = (((Xt_idxstripes)idxlist)->flags >> stripes_sort_bit) & sort_mask;
2091 return (int)sort_flags-(sort_flags < 3);
2092}
2093
2094
2095/* unfortunately, Cray cc 8.[56].x miscompile the following function,
2096 * hence optimizations must be disabled. Feel free to dig deeper into
2097 * the problem, but it's fixed in 8.7. */
2098#if defined _CRAYC && _RELEASE_MAJOR == 8 && _RELEASE_MINOR >= 5 && _RELEASE_MINOR < 7
2099#pragma _CRI noopt
2100#endif
2101static struct unmatched_tail
2103 struct Xt_stripe query,
2104 const struct Xt_stripes_lookup *restrict db,
2105 struct Xt_pos_ext_vec *restrict result,
2106 struct Xt_pos_ext_vec *restrict cover,
2107 bool single_match_only,
2108 size_t num_candidates,
2109 int *restrict candidates)
2110{
2111 size_t unmatched = 0;
2112 const struct Xt_stripe *restrict db_stripes = db->stripes;
2113 size_t db_stripe_pos = (size_t)candidates[0];
2114 struct Xt_stripe overlap = get_stripe_intersection(query,
2115 db_stripes[db_stripe_pos]);
2116 if (overlap.nstrides == 0)
2117 return (struct unmatched_tail){ .unmatched = 0, .query_tail = query};
2118
2119 int skipped = (int)((overlap.start - query.start)/query.stride);
2120 if (skipped)
2121 {
2123 (struct Xt_stripe){ .start = query.start,
2124 .stride = skipped > 1 ? query.stride : (Xt_int)1,
2125 .nstrides = skipped}, db, result, cover,
2126 single_match_only, num_candidates - 1, candidates + 1);
2127 query.start = (Xt_int)(query.start + skipped * query.stride);
2128 query.nstrides -= skipped;
2129 query.stride = query.nstrides > 1 ? query.stride : 1;
2130 }
2131 /* Since overlap.nstrides > 0, overlap.start always matches, but
2132 * depending on the stride the remaining overlapping indices might or might
2133 * not be consecutive in the query, in the latter case intervening
2134 * parts need to be searched for too.
2135 * Stripes of length 1 are naturally consecutive.
2136 */
2137 int db_stripe_skip
2138 = (int)((overlap.start - db_stripes[db_stripe_pos].start)
2139 / db_stripes[db_stripe_pos].stride);
2140 int db_pos = db->stripes_nstrides_psum[db_stripe_pos] + db_stripe_skip;
2141 if (((overlap.stride == query.stride)
2142 & (overlap.stride == db_stripes[db_stripe_pos].stride))
2143 | (overlap.nstrides == 1))
2144 {
2145 unmatched += pos_ext_insert(overlap, (struct Xt_pos_ext){
2146 .start = db_pos,
2147 .size = overlap.nstrides },
2148 db, result, cover, single_match_only, num_candidates - 1, candidates + 1);
2149 query.nstrides -= overlap.nstrides;
2150 query.start = (Xt_int)(query.start + overlap.nstrides * query.stride);
2151 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2152 }
2153 else if ((overlap.stride == query.stride)
2154 & (overlap.stride == -db_stripes[db_stripe_pos].stride))
2155 {
2156 /* all parts of the overlap can be used directly,
2157 but are inversely sorted in db_stripe */
2158 unmatched += pos_ext_insert(overlap, (struct Xt_pos_ext){
2159 .start = db_pos, .size = -overlap.nstrides },
2160 db, result, cover, single_match_only,
2161 num_candidates - 1, candidates + 1);
2162 query.nstrides -= overlap.nstrides;
2163 query.start = (Xt_int)(query.start + overlap.nstrides * query.stride);
2164 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2165 }
2166 else if (overlap.stride == query.stride)
2167 {
2168 /* all parts of the overlap can be used but are non-consecutive
2169 * in db_stripe */
2170 int db_step = (int)(overlap.stride/db_stripes[db_stripe_pos].stride);
2171 /* todo: try to keep (prefix of) stripe together if the
2172 * corresponding positions cannot be inserted anyway */
2173 for (int i = 0; i < overlap.nstrides; ++i, db_pos += db_step)
2174 unmatched += pos_ext_insert(
2175 (struct Xt_stripe){ (Xt_int)(overlap.start + i*overlap.stride), 1, 1 },
2176 (struct Xt_pos_ext){ .start = db_pos, .size = 1 },
2177 db, result, cover, single_match_only,
2178 num_candidates - 1, candidates + 1);
2179 query.nstrides -= overlap.nstrides;
2180 query.start = (Xt_int)(query.start + overlap.nstrides * query.stride);
2181 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2182 }
2183 else /* overlap.stride != query.stride => overlap.stride > query.stride */
2184 {
2185 /*
2186 * query.start = (Xt_int)(query.start
2187 * + overlap.stride * overlap.nstrides);
2188 */
2189 int stride_step = (int)(overlap.stride / query.stride);
2190 int db_step = (int)(overlap.stride/db_stripes[db_stripe_pos].stride);
2191 do {
2192 struct Xt_stripe consecutive_overlap = { .start = query.start,
2193 .stride = 1,
2194 .nstrides = 1 },
2195 intervening = { .start = (Xt_int)(query.start + query.stride),
2196 .stride = query.stride,
2197 .nstrides = MIN(query.nstrides - 1, stride_step - 1) };
2198 /* split off start index, then handle intervening */
2199 unmatched +=
2200 pos_ext_insert(consecutive_overlap, (struct Xt_pos_ext){
2201 .start = db_pos, .size = 1 },
2202 db, result, cover, single_match_only,
2203 num_candidates - 1, candidates + 1);
2204 db_pos += db_step;
2205 if (--query.nstrides > 0) {
2206 unmatched +=
2208 intervening, db, result, cover, single_match_only,
2209 num_candidates - 1, candidates + 1);
2210 query.nstrides -= intervening.nstrides;
2211 }
2212 query.start = (Xt_int)(query.start + query.stride * stride_step);
2213 query.stride = query.nstrides > 1 ? query.stride : (Xt_int)1;
2214 overlap.start = (Xt_int)(overlap.start + overlap.stride);
2215 } while (--overlap.nstrides);
2216 }
2217 return (struct unmatched_tail){ .unmatched = unmatched, .query_tail = query};
2218}
2219
2220/*
2221 * Local Variables:
2222 * c-basic-offset: 2
2223 * coding: utf-8
2224 * indent-tabs-mode: nil
2225 * show-trailing-whitespace: t
2226 * require-trailing-newline: t
2227 * End:
2228 */
int MPI_Comm
Definition core.h:64
#define XT_UNUSED(x)
Definition core.h:84
#define __attribute__(x)
Definition core.h:82
#define die(msg)
Definition core.h:131
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
#define INSTR_STOP(T)
Definition instr.h:69
#define INSTR_DEF(T, S)
Definition instr.h:66
#define INSTR_START(T)
Definition instr.h:68
#define PPM_DSO_INTERNAL
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
Definition ppm_xfuncs.h:71
#define xcalloc(nmemb, size)
Definition ppm_xfuncs.h:68
#define xmalloc(size)
Definition ppm_xfuncs.h:70
const struct Xt_sort_algo_funcptr * sort_funcs
const struct xt_idxlist_vtable * vtable
struct Xt_stripe * stripes
Xt_int * index_array_cache
struct Xt_stripes_sort stripes_sort[]
struct Xt_idxlist_ parent
struct Xt_pos_ext * pos_ext
Definition xt_cover.h:61
size_t num_pos_ext
Definition xt_cover.h:60
size_t size_pos_ext
Definition xt_cover.h:60
int size
Definition xt_core.h:97
int start
Definition xt_core.h:97
void(* sort_int)(int *a, size_t n)
Xt_int stride
Definition xt_stripe.h:56
int nstrides
Definition xt_stripe.h:57
Xt_int start
Definition xt_stripe.h:55
struct Xt_stripe * stripes
struct Xt_idxstripes_ * idxstripes
const struct Xt_stripe * stripes
const struct Xt_stripes_sort * stripes_sort
size_t * stripe_candidate_bitmask
struct Xt_minmax range
union int_vec::@13 p
size_t size
struct Xt_stripe query_tail
void(* delete)(Xt_idxlist)
int MPI_Type_create_struct(int count, XT_MPI2_CONST int array_of_block_lengths[], XT_MPI2_CONST MPI_Aint array_of_displacements[], XT_MPI2_CONST MPI_Datatype array_of_types[], MPI_Datatype *newtype)
int MPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Datatype *newtype)
int MPI_Type_free(MPI_Datatype *datatype)
int MPI_Type_commit(MPI_Datatype *datatype)
static Xt_tword xlimul(Xt_long a, Xt_int b)
static Xt_long xlladd(Xt_long a, Xt_long b)
static Xt_long xiisub(Xt_int a, Xt_int b)
static Xt_long xi2l(Xt_int a)
static Xt_ldiv xlldiv(Xt_long a, Xt_long b)
static Xt_long xliadd(Xt_long a, Xt_int b)
static Xt_idiv xlidivu(Xt_ulong a, Xt_uint b)
static int xlicmp_lt(Xt_long a, Xt_int b)
static Xt_long xiimul(Xt_int a, Xt_int b)
static bool xl_is_in_xt_int_range(Xt_long a)
static Xt_long xlabs(Xt_long a)
static Xt_long xilsub(Xt_int a, Xt_long b)
static Xt_long xlinc(Xt_long a, bool b)
static int xlsign(Xt_long x)
static int xlicmp_le(Xt_long a, Xt_int b)
static int xlicmp_ge(Xt_long a, Xt_int b)
static int isign(int x)
static int isign_mask(int x)
static Xt_int Xt_isign_mask(Xt_int x)
static int xinlz(Xt_uint v)
@ xt_int_bits
static int imin(int a, int b)
static Xt_int Xt_isign(Xt_int x)
static long long llsign(long long x)
#define is_divisible(divisor, coeff, dividend)
implementation of configuration object
#define XT_CONFIG_GET_FORCE_NOSORT(config)
base definitions header file
int xt_initialized(void)
#define Xt_int_dt
Definition xt_core.h:73
XT_INT Xt_int
Definition xt_core.h:72
unsigned XT_INT Xt_uint
Definition xt_core.h:74
size_t xt_cover_insert_or_overlap(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range range, size_t search_start_pos)
Definition xt_cover.c:161
void xt_cover_finish(struct Xt_pos_ext_vec *restrict cover)
Definition xt_cover.c:69
void xt_cover_start(struct Xt_pos_ext_vec *restrict cover, size_t initial_size)
Definition xt_cover.c:60
size_t xt_cover_search(struct Xt_pos_ext_vec *restrict cover, struct Xt_pos_range query, size_t search_start_pos)
Definition xt_cover.c:100
#define MAX(a, b)
Definition xt_cuda.c:68
macros to create heapsort implementations
static size_t left(size_t i)
static size_t right(size_t i)
Xt_idxlist xt_idxempty_new(void)
void xt_idxlist_get_index_stripes_keep_buf(Xt_idxlist idxlist, struct Xt_stripe *stripes, size_t num_stripes_alloc)
Definition xt_idxlist.c:147
index list declaration
const Xt_int * xt_idxlist_get_indices_const(Xt_idxlist idxlist)
Definition xt_idxlist.c:119
int xt_idxlist_get_num_index_stripes(Xt_idxlist idxlist)
Definition xt_idxlist.c:129
void xt_idxlist_delete(Xt_idxlist idxlist)
Definition xt_idxlist.c:75
Provide non-public declarations common to all index lists.
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
@ VECTOR
@ STRIPES
static int idxstripes_get_indices_at_positions(Xt_idxlist idxlist, const int *positions, int num, Xt_int *index, Xt_int undef_idx)
static struct unmatched_tail idxstripes_complex_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static int idxstripes_get_sorting(Xt_idxlist idxlist)
const struct Xt_stripe * xt_idxstripes_get_index_stripes_const(Xt_idxlist idxlist)
static void idxstripes_delete(Xt_idxlist data)
static int idxstripes_get_num_index_stripes(Xt_idxlist idxlist)
void xt_idxstripes_initialize(void)
struct Xt_stripes_alloc xt_idxstripes_alloc(size_t num_stripes)
static struct Xt_stripe get_stripe_intersection(struct Xt_stripe stripe_a, struct Xt_stripe stripe_b)
static int idxstripes_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
#define MIN(a, b)
static void create_stripes_lookup(struct Xt_stripes_lookup *restrict db, Xt_idxstripes idxstripes)
Xt_idxlist xt_idxstripes_sort_new(size_t num_src_stripes, const struct Xt_stripe src_stripes[], Xt_config config)
static size_t bsearch_stripes_sort(size_t n, const struct Xt_stripes_sort a[n], Xt_int min_key)
Xt_idxlist xt_idxstripes_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
void xt_idxstripes_finalize(void)
Xt_idxlist xt_idxstripes_prealloc_new(const struct Xt_stripe *stripes, int num_stripes)
static void idxstripes_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static Xt_idxlist idxstripes_order_invert(Xt_idxlist idxlist)
static int idxstripes_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static void idxstripes_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe *restrict stripes, size_t num_stripes_alloc)
PPM_DSO_INTERNAL Xt_idxlist xt_idxstripes_get_idxvec_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config XT_UNUSED(config))
static size_t pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *stripes_lookup, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static void idxstripes_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static Xt_idxlist idxstripes_sorted_copy(Xt_idxlist idxlist, Xt_config config)
Xt_idxlist xt_idxstripes_congeal(struct Xt_stripes_alloc stripes_alloc)
static struct Xt_idxstripes_ * expand_zero_stripes(size_t num_stripes, const struct Xt_stripe *restrict stripes)
static int idxstripes_get_pos_exts_of_index_stripes(Xt_idxlist idxlist, int num_stripes, const struct Xt_stripe stripes[num_stripes], int *num_ext, struct Xt_pos_ext **pos_ext, int single_match_only, Xt_config config)
static int idxstripes_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static const Xt_int * idxstripes_get_indices_const(Xt_idxlist idxlist)
struct Xt_idxstripes_ * Xt_idxstripes
static void destroy_stripes_lookup(struct Xt_stripes_lookup *restrict db)
size_t xt_idxstripes_get_num_index_stripes(Xt_idxlist idxlist)
static void append_ext(struct Xt_pos_ext pos_ext, struct Xt_pos_ext_vec *restrict result)
static const struct xt_idxlist_vtable idxstripes_vtable
static Xt_int stripe_min(struct Xt_stripe stripe, int pfx_remove)
static Xt_idxlist idxstripes_compute_intersection(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst)
idxstripes_flag_bits
@ stripes_sort_bit
@ stripes_intersect_bit
@ stripes_do_overlap_bit
@ stripes_some_have_zero_stride_bit
static bool stripe_contains_index(struct Xt_stripe stripe, Xt_int idx)
static size_t idxstripes_get_pos_exts_of_index_stripe(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, bool single_match_only, size_t num_candidates, int *restrict candidates)
static Xt_idxlist compute_intersection_fallback(Xt_idxstripes idxstripes_src, Xt_idxstripes idxstripes_dst, Xt_config config)
Xt_idxlist xt_idxstripes_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
static Xt_idxlist idxstripes_copy(Xt_idxlist idxlist)
static size_t idxstripes_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static struct Xt_stripes_alloc idxstripes_alloc(size_t num_stripes)
static int stripe_min_cmp_lt(const struct Xt_stripe a, const struct Xt_stripe b, const int pfx_remove_a, const int pfx_remove_b)
static Xt_int idxstripes_get_max_index(Xt_idxlist idxlist)
static Xt_int idxstripes_get_min_index(Xt_idxlist idxlist)
idxstripes_flag_mask
@ stripes_some_have_zero_stride_mask
@ stripes_intersect_mask
@ stripes_do_overlap_mask
static Xt_idxlist idxstripes_aggregate(Xt_idxstripes idxstripes, const char *caller)
static void find_candidates(struct Xt_stripe query, const struct Xt_stripes_lookup *restrict db, struct int_vec *candidates, bool single_match_only, struct Xt_pos_ext_vec *restrict cover, Xt_config config)
static size_t conditional_pos_ext_insert(struct Xt_stripe query, struct Xt_pos_ext pos_ext2add, const struct Xt_stripes_lookup *restrict db, struct Xt_pos_ext_vec *restrict result, struct Xt_pos_ext_vec *restrict cover, size_t num_candidates, int *restrict candidates)
static MPI_Datatype stripe_dt
#define MAX(a, b)
static Xt_int retrieve_min(size_t num_src_stripes, const struct Xt_stripe *src_stripes, int *src_permutation, int *pfx_removed)
Xt_idxlist xt_idxstripes_from_idxlist_new(Xt_idxlist idxlist_src)
Xt_idxlist xt_idxstripes_new(struct Xt_stripe const *stripes, int num_stripes)
static bool xt_can_merge_pos_ext(struct Xt_pos_ext a, struct Xt_pos_ext b)
struct Xt_vec_alloc xt_idxvec_alloc(int num_indices)
Definition xt_idxvec.c:199
Xt_idxlist xt_idxvec_congeal(struct Xt_vec_alloc vec_alloc)
Definition xt_idxvec.c:283
macros to create mergesort implementations, 4 way top-down method
utility routines for MPI
#define xt_mpi_call(call, comm)
Definition xt_mpi.h:68
size_t xt_stripes_merge_copy(size_t num_stripes, struct Xt_stripe *stripes_dst, const struct Xt_stripe *stripes_src, bool lookback)
Definition xt_stripe.c:163
bool xt_stripes_detect_duplicate(size_t num_stripes, const struct Xt_stripe stripes[num_stripes], struct Xt_minmax index_range)
Definition xt_stripe.c:237
static struct Xt_minmax xt_stripe2minmax(struct Xt_stripe stripe)
#define XT_SORT_FLAGS(ntrans_up, ntrans_dn)
static int xt_stripes_eq(struct Xt_stripe a, struct Xt_stripe b)
@ sort_mask