Yet Another eXchange Tool 0.11.1
Loading...
Searching...
No Matches
xt_idxsection.c
Go to the documentation of this file.
1
12/*
13 * Keywords:
14 * Maintainer: Jörg Behrens <behrens@dkrz.de>
15 * Moritz Hanke <hanke@dkrz.de>
16 * Thomas Jahns <jahns@dkrz.de>
17 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met:
22 *
23 * Redistributions of source code must retain the above copyright notice,
24 * this list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
29 *
30 * Neither the name of the DKRZ GmbH nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 */
46#ifdef HAVE_CONFIG_H
47#include <config.h>
48#endif
49
50#include <assert.h>
51#include <limits.h>
52#include <stdlib.h>
53#include <stdio.h>
54#include <string.h>
55
56#include "xt_arithmetic_util.h"
57#include "xt_arithmetic_long.h"
58#include "xt/xt_idxlist.h"
59#include "xt_idxlist_internal.h"
60#include "xt/xt_idxempty.h"
61#include "xt/xt_idxvec.h"
62#include "xt/xt_idxsection.h"
64#include "xt/xt_mpi.h"
65#include "xt/xt_sort.h"
66#include "xt_idxlist_unpack.h"
67#include "core/ppm_xfuncs.h"
68#include "core/core.h"
69#include "instr.h"
70#include "xt_config_internal.h"
71#include "xt_stripe_util.h"
72#include "xt_idxvec_internal.h"
74
75static void
77
78static size_t
80
81static void
82idxsection_pack(Xt_idxlist data, void *buffer, int buffer_size,
83 int *position, MPI_Comm comm);
84
85static Xt_idxlist
87
88static Xt_idxlist
90
91static void
93
94static const Xt_int *
96
97static int
99
100static void
102 struct Xt_stripe *restrict stripes,
103 size_t num_stripes_alloc);
104
105static int
106idxsection_get_index_at_position(Xt_idxlist idxlist, int position,
107 Xt_int * index);
108
109static int
111 int * position);
112
113static int
115 int * position, int offset);
116static size_t
118 Xt_int const *selection_idx,
119 size_t num_selection, int *positions,
120 int single_match_only);
121
122static Xt_int
124
125static Xt_int
127
128static int
130
133 .get_pack_size = idxsection_get_pack_size,
134 .pack = idxsection_pack,
135 .copy = idxsection_copy,
136 .sorted_copy = idxsection_sorted_copy,
137 .get_indices = idxsection_get_indices,
138 .get_indices_const = idxsection_get_indices_const,
139 .get_num_index_stripes = idxsection_get_num_index_stripes,
140 .get_index_stripes = idxsection_get_index_stripes,
141 .get_index_at_position = idxsection_get_index_at_position,
142 .get_indices_at_positions = NULL,
143 .get_position_of_index = idxsection_get_position_of_index,
144 .get_positions_of_indices = idxsection_get_positions_of_indices,
145 .get_position_of_index_off = idxsection_get_position_of_index_off,
146 .get_positions_of_indices_off = NULL,
147 .get_min_index = idxsection_get_min_index,
148 .get_max_index = idxsection_get_max_index,
149 .get_sorting = idxsection_get_sorting,
150 .get_bounding_box = NULL,
151 .idxlist_pack_code = SECTION,
152};
153
154/* descriptor for per-dimension extent and stride */
156{
158#ifdef XT_LONG
159 struct Xt_muldiv agsmd; /* multiplicative inverse stored for
160 * fast division by absolute global stride */
161#endif
163};
164
165static MPI_Datatype dim_desc_dt;
166
167
169
185
186static int
188
189#if __GNUC__ >= 11
190__attribute__ ((access (none, 1)))
191int MPI_Get_address(XT_MPI_SEND_BUF_CONST void *location, MPI_Aint *address);
192#endif
193
194void
196{
197 struct dim_desc dim_desc;
198
199 MPI_Aint base_address, local_size_address;
200
201 MPI_Get_address(&dim_desc, &base_address);
202 MPI_Get_address(&dim_desc.local_size, &local_size_address);
203
204 enum { num_dt_components = 2 };
205 static const int block_lengths[num_dt_components] = { 2, 1 };
206 MPI_Aint displacements[num_dt_components]
207 = {0, local_size_address - base_address };
208 static const MPI_Datatype types[num_dt_components]
209 = { Xt_int_dt, MPI_INT };
210 MPI_Datatype dim_desc_dt_unaligned;
211 xt_mpi_call(MPI_Type_create_struct(num_dt_components,
212 (int *)block_lengths, displacements,
213 (MPI_Datatype *)types,
214 &dim_desc_dt_unaligned), Xt_default_comm);
215 xt_mpi_call(MPI_Type_create_resized(dim_desc_dt_unaligned, 0,
216 (MPI_Aint)sizeof(dim_desc),
217 &dim_desc_dt), Xt_default_comm);
218 xt_mpi_call(MPI_Type_free(&dim_desc_dt_unaligned), Xt_default_comm);
219 xt_mpi_call(MPI_Type_commit(&dim_desc_dt), Xt_default_comm);
220}
221
222void
224{
225 xt_mpi_call(MPI_Type_free(&dim_desc_dt), Xt_default_comm);
226}
227
228static struct Xt_minmax
229get_section_minmax(size_t num_dimensions,
230 Xt_int local_start_index,
231 const struct dim_desc dims[num_dimensions])
232{
233 // due the possibility of negative local and global sizes, the minimum and
234 // maximum can be in any corner of the n-dimensional section
235 Xt_int max = local_start_index, min = local_start_index;
236 for (size_t i = 0; i < num_dimensions; ++i) {
237 // if either local and global size are negative
238 if (dims[i].global_stride < 0)
239 min = (Xt_int)(min + (Xt_int)(dims[i].global_stride *
240 (Xt_int)(abs(dims[i].local_size) - 1)));
241 else // if local and global size are both positive or negative
242 max = (Xt_int)(max + (Xt_int)(dims[i].global_stride *
243 (Xt_int)(abs(dims[i].local_size) - 1)));
244 }
245 return (struct Xt_minmax){ .min = min, .max = max };
246}
247
248
251 unsigned flags;
252};
253
257static struct section_aggregate
258setup_dims(Xt_int start, size_t num_dimensions,
259 struct dim_desc dims[num_dimensions])
260{
261 int asc_dim = 0, dsc_dim = 0;
262 for (size_t i = 0; i < num_dimensions; ++i) {
263 /* are indices enumerated in reverse order along this dimension? */
264 int goes_rev_order = (dims[i].global_size < 0) ^ (dims[i].local_size < 0);
265 asc_dim += !goes_rev_order;
266 dsc_dim += goes_rev_order;
267 }
268 unsigned flags = XT_SORT_FLAGS(asc_dim, dsc_dim);
269 dims[num_dimensions - 1].global_stride =
270 (Xt_int)(Xt_isign(dims[num_dimensions - 1].global_size) *
271 isign(dims[num_dimensions - 1].local_size));
272#ifdef XT_LONG
273 dims[num_dimensions - 1].agsmd
274 = xt_get_mulinv(XT_INT_ABS(dims[num_dimensions - 1].global_stride));
275#endif
276 dims[num_dimensions - 1].local_stride = 1;
277
278 // compute local and global stride
279 // (local stride is always positive, global stride can be negative)
280 for (size_t i = num_dimensions - 2; i != (size_t)-1; --i) {
281 dims[i].global_stride
282 = (Xt_int)(XT_INT_ABS(dims[i+1].global_stride * dims[i+1].global_size)
283 * Xt_isign(dims[i].global_size)
284 * isign(dims[i].local_size));
285 dims[i].local_stride
286 = abs(dims[i+1].local_stride * dims[i+1].local_size);
287#ifdef XT_LONG
288 dims[i].agsmd
289 = xt_get_mulinv(XT_INT_ABS(dims[i].global_stride));
290#endif
291 }
292
293 // compute the local start index
294 // depends on global size and sign of local and global size
295 Xt_int local_start_index = start;
296 for (size_t i = num_dimensions - 1; i != (size_t)-1; --i) {
297 Xt_int adj_gstride
298 = (Xt_int)(dims[i].global_stride * isign(dims[i].local_size)),
299 start_ofs
300 = (Xt_int)(dims[i].global_size > 0 ? 0 : dims[i].global_size + 1);
301 local_start_index
302 = (Xt_int)(local_start_index
303 + (adj_gstride * (start_ofs + dims[i].local_start)));
304 if (dims[i].local_size < 0)
305 local_start_index
306 = (Xt_int)(local_start_index
307 + (dims[i].global_stride * (dims[i].local_size + 1)));
308 }
309 struct Xt_minmax mm = get_section_minmax(num_dimensions, local_start_index, dims);
310 return (struct section_aggregate){ .start = local_start_index,
311 .max = mm.max, .min = mm.min,
312 .flags = flags };
313}
314
315static int
316get_num_indices_from_local_sizes(size_t num_dimensions, const int local_size[num_dimensions])
317{
318 long long size = 1;
319
320 for (size_t i = 0; i < num_dimensions; ++i)
321 size *= abs(local_size[i]);
322 assert(size <= INT_MAX);
323
324 return (int)size;
325}
326
327Xt_idxlist xt_idxsection_new(Xt_int start, int num_dimensions,
328 const Xt_int global_size[num_dimensions],
329 const int local_size[num_dimensions],
330 const Xt_int local_start[num_dimensions]) {
331
332 INSTR_DEF(instr,"xt_idxsection_new")
333 INSTR_START(instr);
334 // ensure that yaxt is initialized
335 assert(xt_initialized());
336
337 size_t num_dimensions_ = (size_t)num_dimensions;
338 int num_indices
339 = get_num_indices_from_local_sizes(num_dimensions_, local_size);
340 Xt_idxlist result;
341 if (num_indices > 0) {
342 Xt_idxsection idxsection
343 = xmalloc(sizeof (*idxsection)
344 + num_dimensions_ * sizeof (idxsection->dims[0]));
345
346 idxsection->global_start_index = start;
347 idxsection->ndim = num_dimensions;
348
349 idxsection->index_array_cache = NULL;
350
351 for (size_t i = 0; i < num_dimensions_; ++i) {
352 idxsection->dims[i].global_size = global_size[i];
353 idxsection->dims[i].local_size = local_size[i];
354 idxsection->dims[i].local_start = local_start[i];
355 }
356 Xt_idxlist_init(&idxsection->parent, &idxsection_vtable, num_indices);
357 struct section_aggregate sa =
358 setup_dims(start, num_dimensions_, idxsection->dims);
359 idxsection->local_start_index = sa.start;
360 idxsection->min_index_cache = sa.min;
361 idxsection->max_index_cache = sa.max;
362 idxsection->flags = sa.flags;
363 result = &idxsection->parent;
364 } else {
365 result = xt_idxempty_new();
366 }
367 INSTR_STOP(instr);
368
369 return result;
370}
371
372static void
374
375 if (data == NULL) return;
376
377 Xt_idxsection section = (Xt_idxsection)data;
378
379 free(section->index_array_cache);
380 free(section);
381}
382
383static size_t
385
386 Xt_idxsection section = (Xt_idxsection)data;
387
388 int size_header, size_dim_descs, size_xt_int;
389
390 xt_mpi_call(MPI_Pack_size(2, MPI_INT, comm, &size_header), comm);
391 xt_mpi_call(MPI_Pack_size(1, Xt_int_dt, comm, &size_xt_int), comm);
392 xt_mpi_call(MPI_Pack_size(section->ndim, dim_desc_dt,
393 comm, &size_dim_descs), comm);
394
395 return (size_t)size_header + (size_t)size_dim_descs
396 + (size_t)size_xt_int;
397}
398
399static void
400idxsection_pack(Xt_idxlist data, void *buffer, int buffer_size,
401 int *position, MPI_Comm comm) {
402
403 INSTR_DEF(instr,"idxsection_pack")
404 INSTR_START(instr);
405
406 assert(data);
407 Xt_idxsection section = (Xt_idxsection)data;
408 int header[2] = { SECTION, section->ndim };
409 xt_mpi_call(MPI_Pack(header, 2, MPI_INT, buffer,
410 buffer_size, position, comm), comm);
411 xt_mpi_call(MPI_Pack(&section->global_start_index, 1, Xt_int_dt, buffer,
412 buffer_size, position, comm), comm);
413 xt_mpi_call(MPI_Pack(section->dims, section->ndim, dim_desc_dt,
414 buffer, buffer_size, position, comm), comm);
415 INSTR_STOP(instr);
416}
417
418Xt_idxlist xt_idxsection_unpack(void *buffer, int buffer_size, int *position,
419 MPI_Comm comm) {
420
421 INSTR_DEF(instr,"xt_idxsection_unpack")
422 INSTR_START(instr);
423
424 int ndim;
425 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position, &ndim, 1, MPI_INT,
426 comm), comm);
427 Xt_idxsection section
428 = xmalloc(sizeof (*section) + (size_t)ndim * sizeof(section->dims[0]));
429 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
430 &section->global_start_index, 1, Xt_int_dt, comm),
431 comm);
432 assert(ndim > 0);
433 section->index_array_cache = NULL;
434 section->ndim = ndim;
435
436 xt_mpi_call(MPI_Unpack(buffer, buffer_size, position,
437 section->dims, ndim, dim_desc_dt, comm),comm);
438
439 struct section_aggregate sa = setup_dims(
440 section->global_start_index, (size_t)ndim, section->dims);
441 section->local_start_index = sa.start;
442 section->min_index_cache = sa.min;
443 section->max_index_cache = sa.max;
444 section->flags = sa.flags;
445 int num_indices = idxsection_get_num_indices(section);
446 Xt_idxlist_init(&section->parent, &idxsection_vtable, num_indices);
447 INSTR_STOP(instr);
448 return (Xt_idxlist)section;
449}
450
453 Xt_idxlist dst_idxlist,
454 Xt_config config)
455{
456 // intersection between an idxsection and a general idxlist:
457 //
458 // performance picture:
459 // - src_idxsection is treated as too big for elemental transforms/access
460 // - dst_idxlist is considered to be small enough (subdomain like) for elemental usage
461
462 INSTR_DEF(instr,"idxsection_get_intersection_with_other_idxlist")
463 INSTR_START(instr);
464
465 size_t num_dst_idx = (size_t)(xt_idxlist_get_num_indices(dst_idxlist));
466
467 Xt_int const* dst_idx = xt_idxlist_get_indices_const(dst_idxlist);
468
469 Xt_int const * sorted_dst_idx;
470 Xt_int * temp_dst_idx = NULL;
471 /* todo: use xt_idxlist_get_sorting instead */
472 for (size_t i = 1; i < num_dst_idx; ++i)
473 if (dst_idx[i] < dst_idx[i-1])
474 goto unsorted;
475 sorted_dst_idx = dst_idx;
476 goto get_pos;
477unsorted:
478 temp_dst_idx = xmalloc(num_dst_idx * sizeof(*temp_dst_idx));
479 memcpy(temp_dst_idx, dst_idx, num_dst_idx * sizeof(*temp_dst_idx));
480
481 config->sort_funcs->sort_xt_int(temp_dst_idx, num_dst_idx);
482 sorted_dst_idx = temp_dst_idx;
483get_pos:;
484 int *pos = xmalloc(num_dst_idx * sizeof(*pos));
485 size_t num_unmatched = idxsection_get_positions_of_indices(
486 src_idxsection, sorted_dst_idx, num_dst_idx, pos, false);
487 size_t num_inter_idx = num_dst_idx - num_unmatched;
488 Xt_idxlist result;
489 if (num_inter_idx) {
490 struct Xt_vec_alloc vec_alloc = xt_idxvec_alloc((int)num_inter_idx);
491 Xt_int *restrict inter_vec = vec_alloc.vector;
492
493 for(size_t i = 0, j = 0; i < num_dst_idx && j < num_inter_idx; i++)
494 if (pos[i] >= 0)
495 inter_vec[j++] = sorted_dst_idx[i];
496 result = xt_idxvec_congeal(vec_alloc);
497 } else {
498 result = xt_idxempty_new();
499 }
500
501 free(temp_dst_idx);
502 free(pos);
503
504 INSTR_STOP(instr);
505 return result;
506}
507
508static void
510
511#undef XT_IDXSECTION_STRIPES_ISECT_SINGLE_MATCH_ONLY
512#define NUM_DIMENSIONS 1
514#undef NUM_DIMENSIONS
515#define NUM_DIMENSIONS 2
517#undef NUM_DIMENSIONS
518#define NUM_DIMENSIONS 3
520#undef NUM_DIMENSIONS
521#define NUM_DIMENSIONS 4
523#undef NUM_DIMENSIONS
525#undef NUM_DIMENSIONS
526
527
530 Xt_idxlist dst_idxlist,
531 Xt_config config)
532{
533 INSTR_DEF(instr,"idxsection_get_idxstripes_intersection")
534 INSTR_START(instr);
535 (void)config;
536 assert(dst_idxlist->vtable->idxlist_pack_code == STRIPES);
537 Xt_idxsection idxsection = (Xt_idxsection)src_idxlist;
538 Xt_idxlist result;
539 switch (idxsection->ndim) {
540 case 1:
541 result
542 = xt_idxsection_get_idxstripes_intersection_1(
543 (Xt_idxsection)src_idxlist, dst_idxlist, config);
544 break;
545 case 2:
546 result
547 = xt_idxsection_get_idxstripes_intersection_2(
548 (Xt_idxsection)src_idxlist, dst_idxlist, config);
549 break;
550 case 3:
551 result
552 = xt_idxsection_get_idxstripes_intersection_3(
553 (Xt_idxsection)src_idxlist, dst_idxlist, config);
554 break;
555 case 4:
556 result
557 = xt_idxsection_get_idxstripes_intersection_4(
558 (Xt_idxsection)src_idxlist, dst_idxlist, config);
559 break;
560 default:
561 result
562 = xt_idxsection_get_idxstripes_intersection_(
563 (Xt_idxsection)src_idxlist, dst_idxlist, config);
564 }
565 INSTR_STOP(instr);
566 return result;
567}
568
569#define XT_IDXSECTION_STRIPES_ISECT_SINGLE_MATCH_ONLY
570#define NUM_DIMENSIONS 1
572#undef NUM_DIMENSIONS
573#define NUM_DIMENSIONS 2
575#undef NUM_DIMENSIONS
576#define NUM_DIMENSIONS 3
578#undef NUM_DIMENSIONS
579#define NUM_DIMENSIONS 4
581#undef NUM_DIMENSIONS
583#undef XT_IDXSECTION_STRIPES_ISECT_SINGLE_MATCH_ONLY
584
587 Xt_idxlist dst_idxlist,
588 Xt_config config)
589{
590 INSTR_DEF(instr,"idxsection_get_idxstripes_r_intersection")
591 INSTR_START(instr);
592 (void)config;
593 assert(src_idxlist->vtable->idxlist_pack_code == STRIPES);
594 Xt_idxsection idxsection = (Xt_idxsection)dst_idxlist;
595 Xt_idxlist result;
596 switch (idxsection->ndim) {
597 case 1:
598 result
599 = xt_idxsection_get_idxstripes_intersection_sm_1(
600 (Xt_idxsection)dst_idxlist, src_idxlist, config);
601 break;
602 case 2:
603 result
604 = xt_idxsection_get_idxstripes_intersection_sm_2(
605 (Xt_idxsection)dst_idxlist, src_idxlist, config);
606 break;
607 case 3:
608 result
609 = xt_idxsection_get_idxstripes_intersection_sm_3(
610 (Xt_idxsection)dst_idxlist, src_idxlist, config);
611 break;
612 case 4:
613 result
614 = xt_idxsection_get_idxstripes_intersection_sm_4(
615 (Xt_idxsection)dst_idxlist, src_idxlist, config);
616 break;
617 default:
618 result
619 = xt_idxsection_get_idxstripes_intersection_sm_(
620 (Xt_idxsection)dst_idxlist, src_idxlist, config);
621 }
622 INSTR_STOP(instr);
623 return result;
624}
625
628 Xt_idxlist idxlist_dst,
629 Xt_config config) {
630 INSTR_DEF(instr,"idxsection_get_intersection.part")
631
632 // both lists are index section:
633
634 const struct Xt_idxsection_ *idxsection_src = (Xt_idxsection)idxlist_src,
635 *idxsection_dst = (Xt_idxsection)idxlist_dst;
636
637 if (idxsection_src->ndim != idxsection_dst->ndim ||
638 idxsection_src->global_start_index != idxsection_dst->global_start_index)
639 return xt_default_isect(idxlist_src, idxlist_dst, config);
640
641 size_t num_dimensions = (size_t)idxsection_src->ndim;
642 // the size of first global dimension is irrelevant,
643 // the others have to be identically
644 for (size_t i = 1; i < num_dimensions; ++i)
645 if (XT_INT_ABS(idxsection_src->dims[i].global_size)
646 != XT_INT_ABS(idxsection_dst->dims[i].global_size))
648 idxlist_src, idxlist_dst, config);
649
650 INSTR_START(instr);
651
652 Xt_idxsection idxsection_intersection
653 = xmalloc(sizeof (*idxsection_intersection)
654 + num_dimensions * sizeof (idxsection_intersection->dims[0]));
655 idxsection_intersection->global_start_index
656 = idxsection_src->global_start_index;
657 idxsection_intersection->ndim = (int)num_dimensions;
658
659 // dimension information for the intersection
660 struct dim_desc *restrict intersection_dims = idxsection_intersection->dims;
661 const struct dim_desc *src_dims = idxsection_src->dims,
662 *dst_dims = idxsection_dst->dims;
663
664 // indices in an intersection have to be sorted in ascending order. therefore,
665 // local and global sizes of the intersection have to be positive
666
667 for (size_t i = 0; i < num_dimensions; ++i) {
668
669 Xt_int src_start, src_end, dst_start, dst_end, local_end;
670
671 // the start value is the minmum position in the current dimension (with positive
672 // size)
673 // in case the global size of src or dst is negative the start value has to be
674 // adjusted accordingly
675
676 if (src_dims[i].global_size >= 0)
677 src_start = src_dims[i].local_start;
678 else
679 src_start = (Xt_int)(-src_dims[i].global_size
680 - abs(src_dims[i].local_size)
681 - src_dims[i].local_start);
682
683 if (dst_dims[i].global_size >= 0)
684 dst_start = dst_dims[i].local_start;
685 else
686 dst_start = (Xt_int)(-dst_dims[i].global_size
687 - abs(dst_dims[i].local_size)
688 - dst_dims[i].local_start);
689
690 src_end = (Xt_int)(src_start
691 + (Xt_int)abs(src_dims[i].local_size));
692 dst_end = (Xt_int)(dst_start
693 + (Xt_int)abs(dst_dims[i].local_size));
694
695 intersection_dims[i].local_start = (src_start > dst_start)?src_start:dst_start;
696 local_end = (src_end > dst_end)?dst_end:src_end;
697
698 if (local_end <= intersection_dims[i].local_start)
699 goto isect_empty;
700
701 intersection_dims[i].local_size
702 = (int)(local_end - intersection_dims[i].local_start);
703 intersection_dims[i].global_size
704 = XT_INT_ABS(src_dims[i].global_size);
705 }
706
707 {
708 idxsection_intersection->index_array_cache = NULL;
709 int num_indices = idxsection_get_num_indices(idxsection_intersection);
710 Xt_idxlist_init(&idxsection_intersection->parent,
711 &idxsection_vtable, num_indices);
712 struct section_aggregate sa =
713 setup_dims(idxsection_intersection->global_start_index,
714 num_dimensions, idxsection_intersection->dims);
715 idxsection_intersection->local_start_index = sa.start;
716 idxsection_intersection->min_index_cache = sa.min;
717 idxsection_intersection->max_index_cache = sa.max;
718 idxsection_intersection->flags = sa.flags;
719 goto ret_idxlist;
720 }
721isect_empty:;
722 free(idxsection_intersection);
723 idxsection_intersection = (Xt_idxsection)xt_idxempty_new();
724ret_idxlist:;
725 INSTR_STOP(instr);
726 return (Xt_idxlist)idxsection_intersection;
727}
728
729static Xt_idxlist
731
732 Xt_idxsection src = (Xt_idxsection)idxlist;
733
734 size_t num_dimensions = (size_t)src->ndim;
735
736 Xt_idxsection idxsection = xmalloc(
737 sizeof (*idxsection) + num_dimensions * sizeof (idxsection->dims[0]));
738 *idxsection = *src;
739 idxsection->index_array_cache = NULL;
740
741 memcpy(idxsection->dims, src->dims, num_dimensions * sizeof (src->dims[0]));
742
743 return (Xt_idxlist)idxsection;
744}
745
746static void
748{
749 size_t num_dimensions = (size_t)orig->ndim;
750
751 *copy = *orig;
752 copy->index_array_cache = NULL;
753
754 const struct dim_desc *restrict dd_orig = orig->dims;
755 struct dim_desc *restrict dd_dst = copy->dims;
756 for (size_t i = 0; i < num_dimensions; ++i) {
757 Xt_int gsize = dd_orig[i].global_size,
758 abs_gsize = XT_INT_ABS(gsize);
759 int lsize = dd_orig[i].local_size,
760 abs_lsize = abs(lsize);
761 dd_dst[i].global_size = abs_gsize;
762 dd_dst[i].local_size = abs_lsize;
763 dd_dst[i].local_start = gsize >= 0
764 ? dd_orig[i].local_start
765 : (Xt_int)(abs_gsize - abs_lsize - dd_orig[i].local_start);
766 }
767
768 struct section_aggregate sa = setup_dims(
769 orig->global_start_index, num_dimensions, copy->dims);
770 copy->local_start_index = sa.start;
771 copy->flags = sa.flags;
772 assert(copy->min_index_cache == sa.min
773 && copy->max_index_cache == sa.max);
774}
775
776
777static Xt_idxlist
779
780 (void)config;
781 size_t num_dimensions = (size_t)((Xt_idxsection)idxlist)->ndim;
782 Xt_idxsection sorted_idxsection = xmalloc(
783 sizeof (*sorted_idxsection)
784 + num_dimensions * sizeof (sorted_idxsection->dims[0]));
785 idxsection_init_sorted_copy((Xt_idxsection)idxlist, sorted_idxsection);
786 return (Xt_idxlist)sorted_idxsection;
787}
788
789static int
791
792 long long size = 1;
793 size_t num_dimensions = (size_t)section->ndim;
794 for (size_t i = 0; i < num_dimensions; ++i)
795 size *= abs(section->dims[i].local_size);
796 assert(size <= INT_MAX);
797
798 return (int)size;
799}
800
801
802static int
804 Xt_int start_index, Xt_int *indices,
805 size_t num_dimensions, struct dim_desc dims[num_dimensions])
806{
807
808 int abs_local_size = abs(dims[0].local_size);
809
810 if (num_dimensions == 1)
811 {
812 if (dims[0].global_stride > 0)
813 for (int i = 0; i < abs_local_size; ++i)
814 indices[i] = (Xt_int)(start_index + i);
815 else
816 for (int i = 0; i < abs_local_size; ++i)
817 indices[i] = (Xt_int)(start_index - i);
818 return abs_local_size;
819 }
820 else
821 {
822 int indices_written = 0, overflow = 0;
823 assert(num_dimensions > 1);
824 for (int dim_ofs = 0; dim_ofs < abs_local_size; ++dim_ofs)
825 {
826 int indices_written_temp
828 (Xt_int)(start_index
829 + dim_ofs * dims[0].global_stride),
830 indices + indices_written,
831 num_dimensions - 1, dims + 1);
832 overflow |= (indices_written_temp > INT_MAX - indices_written);
833 indices_written += indices_written_temp;
834 }
835 assert(!overflow);
836 return indices_written;
837 }
838}
839
840static void
842{
843 size_t num_indices = (size_t)xt_idxlist_get_num_indices(&section->parent);
844 Xt_int *indices = section->index_array_cache
845 = xmalloc(num_indices * sizeof(*(section->index_array_cache)));
847 (size_t)section->ndim, section->dims);
848}
849
850
851static void
853 INSTR_DEF(instr,"idxsection_get_indices")
854 INSTR_START(instr);
855 Xt_idxsection section = (Xt_idxsection)idxlist;
856
857 int num_indices
859
860 // if the indices are already computed
861 if (section->index_array_cache != NULL);
862 else
864 memcpy(indices, section->index_array_cache,
865 (size_t)num_indices * sizeof(*indices));
866 INSTR_STOP(instr);
867}
868
869static Xt_int const*
871
872 Xt_idxsection idxsection = (Xt_idxsection)idxlist;
873 if (idxsection->index_array_cache == NULL)
875 return idxsection->index_array_cache;
876}
877
878static size_t
880{
881 size_t num_dimensions = (size_t)section->ndim;
882 struct dim_desc *restrict dims = section->dims;
883 size_t i = num_dimensions-1;
884 while (i != SIZE_MAX && dims[i].local_size == 1)
885 --i;
886 size_t nstripes = 1;
887 for (i -= (i != SIZE_MAX); i != SIZE_MAX; --i)
888 nstripes *= (size_t)abs(dims[i].local_size);
889 return nstripes;
890}
891
892static int
894{
895 size_t nstripes
897 return (int)nstripes;
898}
899
900static void
902 struct Xt_stripe *restrict stripes,
903 size_t num_stripes_alloc)
904{
905#ifdef NDEBUG
906 (void)num_stripes_alloc;
907#endif
908 INSTR_DEF(instr,"idxsection_get_index_stripes.part")
909
910 Xt_idxsection section = (Xt_idxsection)idxlist;
911
912 size_t num_dimensions = (size_t)section->ndim;
913 const struct dim_desc *restrict dims = section->dims;
914 /* find last dimension that has local_size > 1 */
915 size_t ln1dim = num_dimensions-1;
916 while (ln1dim != SIZE_MAX && dims[ln1dim].local_size == 1)
917 --ln1dim;
918 ln1dim += ln1dim == SIZE_MAX;
919
920 size_t nstripes = idxsection_get_num_index_stripes_(section);
921
922 assert(nstripes != 0);
923
924 INSTR_START(instr);
925
926 enum { curr_local_position_auto_size=16 };
927 Xt_int curr_local_position_auto[curr_local_position_auto_size];
928 Xt_int *restrict curr_local_position;
929 if (ln1dim <= curr_local_position_auto_size) {
930 curr_local_position = curr_local_position_auto;
931 for (size_t i = 0; i < ln1dim; ++i)
932 curr_local_position[i] = 0;
933 } else
934 curr_local_position
935 = xcalloc(ln1dim, sizeof(*curr_local_position));
936
937 for (size_t i = 0; i < nstripes; ++i) {
938
939 stripes[i].start = section->local_start_index;
940 stripes[i].nstrides = abs(dims[ln1dim].local_size);
941 stripes[i].stride = dims[ln1dim].global_stride;
942
943 for (size_t j = 0; j < ln1dim; ++j)
944 stripes[i].start = (Xt_int)(stripes[i].start
945 + curr_local_position[j]
946 * dims[j].global_stride);
947
948 for (size_t j = ln1dim-1; j != (size_t)-1; --j)
949 if (curr_local_position[j] < abs(dims[j].local_size) - 1) {
950 curr_local_position[j]++;
951 break;
952 } else
953 curr_local_position[j] = 0;
954 }
955 assert(num_stripes_alloc >= nstripes);
956 if (curr_local_position != curr_local_position_auto)
957 free(curr_local_position);
958
959 INSTR_STOP(instr);
960}
961
962static int
964 Xt_int * index) {
965
966 Xt_idxsection section = (Xt_idxsection)idxlist;
967
968 if (position < 0) return 1;
969
970 size_t num_dimensions = (size_t)section->ndim;
971 Xt_int temp_index = section->local_start_index;
972
973 const struct dim_desc *dims = section->dims;
974
975 for (size_t dim = 0; dim < num_dimensions; ++dim) {
976
977 div_t qr = div(position, dims[dim].local_stride);
978 int curr_local_position = qr.quot;
979
980 if (curr_local_position >= abs(dims[dim].local_size))
981 return 1;
982
983 temp_index = (Xt_int)(temp_index
984 + curr_local_position
985 * dims[dim].global_stride);
986 position = qr.rem;
987 }
988
989 *index = temp_index;
990
991 return 0;
992}
993
994static int
996 int * position) {
997
998 INSTR_DEF(instr,"idxsection_get_position_of_index.part")
999
1000 Xt_idxsection section = (Xt_idxsection)idxlist;
1001 *position = -1;
1002
1003 if (index < section->min_index_cache || index > section->max_index_cache)
1004 return 1;
1005
1006 int retval = 1;
1007
1008 INSTR_START(instr);
1009
1010 // normalise index (global start of indices at 0)
1011 index = (Xt_int)(index - section->global_start_index);
1012
1013 int temp_position = 0;
1014 size_t num_dimensions = (size_t)section->ndim;
1015 const struct dim_desc *dims = section->dims;
1016 for (size_t i = 0; i < num_dimensions; ++i) {
1017
1018 XT_INT_DIV_T qr = Xt_div(index, dims[i].agsmd,
1019 XT_INT_ABS(dims[i].global_stride));
1020 Xt_int curr_global_position = (Xt_int)qr.quot;
1021
1022 // in case the global size is negative, we have to adjust the global position,
1023 // because the ordering of indices in this dimension is inverted
1024 if (dims[i].global_size < 0)
1025 curr_global_position
1026 = (Xt_int)(-dims[i].global_size - curr_global_position - 1);
1027
1028 index = (Xt_int)qr.rem;
1029
1030 if (curr_global_position < dims[i].local_start)
1031 goto fun_exit;
1032
1033 int curr_local_position
1034 = (int)(curr_global_position - dims[i].local_start);
1035
1036 int abs_local_size = abs(dims[i].local_size);
1037 // same adjustment for local position as for the global one before
1038 if (dims[i].local_size < 0)
1039 curr_local_position = abs_local_size - curr_local_position - 1;
1040
1041 if (curr_local_position >= abs_local_size)
1042 goto fun_exit;
1043
1044 temp_position += (int)curr_local_position * dims[i].local_stride;
1045 }
1046
1047 *position = temp_position;
1048
1049 retval = 0;
1050
1051 fun_exit: ;
1052 INSTR_STOP(instr);
1053 return retval;
1054}
1055
1056static size_t
1058 const Xt_int selection_idx[],
1059 size_t num_selection,
1060 int positions[],
1061 int single_match_only) {
1062
1063 INSTR_DEF(instr,"idxsection_get_positions_of_indices_v1.part")
1064 if (num_selection == 1)
1065 return (size_t)(idxsection_get_position_of_index(body_idxlist,
1066 *selection_idx,
1067 positions));
1068
1069 size_t num_unmatched = 0;
1070
1071 if (!single_match_only) {
1072 // this is the easy case, we don't care about multiple uses of the same position
1073 for (size_t i = 0; i < num_selection; ++i)
1074 num_unmatched
1075 += (size_t)(idxsection_get_position_of_index(body_idxlist,
1076 selection_idx[i],
1077 &positions[i]));
1078 return num_unmatched;
1079 }
1080
1081 INSTR_START(instr);
1082
1083 for (size_t i = 1; i < num_selection; ++i)
1084 if (selection_idx[i] < selection_idx[i-1])
1085 goto unsorted;
1086
1087 // indices are sorted
1088 {
1089 // we need an index that is different from the current one
1090 Xt_int prev_index = (Xt_int)(selection_idx[0] - 1);
1091
1092 for (size_t i = 0; i < num_selection; i++) {
1093
1094 Xt_int curr_index = selection_idx[i];
1095
1096 if (prev_index != curr_index) {
1097
1098 num_unmatched
1099 += (size_t)(idxsection_get_position_of_index(body_idxlist, curr_index,
1100 positions + i));
1101 prev_index = curr_index;
1102
1103 } else {
1104 // for an idxsection there is a unique map from indices to positions,
1105 // we got the same index again, so there is no match left:
1106 positions[i] = -1;
1107 num_unmatched++;
1108 }
1109 }
1110 }
1111 goto end;
1112 // indices are not sorted
1113unsorted:
1114 {
1115 // the remaining (single_match_only) case follows:
1116 idxpos_type *v = xmalloc(num_selection * sizeof(*v) );
1117 for (size_t i = 0; i < num_selection; i++) {
1118 v[i].idx = selection_idx[i];
1119 v[i].pos = (int)i;
1120 }
1121 xt_default_config.sort_funcs->sort_idxpos(v, num_selection);
1122 Xt_int last_jx = (Xt_int)(v[0].idx - 1); // any index that does not equal v[0].idx will do
1123 for (size_t i = 0; i < num_selection; i++) {
1124 int j = v[i].pos;
1125 Xt_int jx = v[i].idx;
1126 if (jx != last_jx) {
1127 num_unmatched
1128 += (size_t)(idxsection_get_position_of_index(body_idxlist, jx,
1129 &positions[j]));
1130 last_jx = jx;
1131 } else {
1132 // for an idxsection there is a unique map from indices to positions,
1133 // we got the same index again, so there is no match left:
1134 positions[j] = -1;
1135 num_unmatched++;
1136 }
1137 }
1138 free(v);
1139 }
1140end:
1141 INSTR_STOP(instr);
1142 return num_unmatched;
1143}
1144
1145static size_t
1147 const Xt_int selection_idx[],
1148 size_t num_selection, int positions[],
1149 int single_match_only) {
1150
1151 INSTR_DEF(instr,"idxsection_get_positions_of_indices_v2.part")
1152
1153 if (num_selection == 1)
1154 return (size_t)(idxsection_get_position_of_index(body_idxlist,
1155 *selection_idx, positions));
1156
1157 INSTR_START(instr);
1158
1159 Xt_int * temp_selection_idx = NULL;
1160 const Xt_int *restrict sorted_selection_idx;
1161 int * selection_pos = NULL;
1162
1163 for (size_t i = 1; i < num_selection; ++i)
1164 if (selection_idx[i] < selection_idx[i-1])
1165 goto unsorted;
1166
1167 sorted_selection_idx = selection_idx;
1168 goto sorted;
1169unsorted:
1170 // the indices are not sorted
1171 temp_selection_idx
1172 = xmalloc(num_selection * sizeof(*temp_selection_idx));
1173 memcpy(temp_selection_idx, selection_idx,
1174 num_selection * sizeof(*temp_selection_idx));
1175 selection_pos = xmalloc(num_selection * sizeof(*selection_pos));
1176
1177 xt_assign_id_map_int(num_selection, selection_pos, 0);
1179 temp_selection_idx, num_selection, selection_pos);
1180 sorted_selection_idx = temp_selection_idx;
1181
1182sorted: ;
1183 const Xt_int *body_indices = idxsection_get_indices_const(body_idxlist);
1184 size_t num_body_indices = (size_t)xt_idxlist_get_num_indices(body_idxlist);
1185
1186 // Xt_int last_idx = sorted_selection_idx[0] - 1;
1187 //
1188 // for (i = 0, j = 0; i < num_selection && j < num_body_indices; ++i) {
1189 //
1190 // while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1191 //
1192 // if (j >= num_body_indices) break;
1193 //
1194 // if (!single_match_only)
1195 // positions[(selection_pos == NULL)?i:selection_pos[i]] =
1196 // (body_indices[j] == sorted_selection_idx[i])?j:-1;
1197 // else
1198 // positions[selection_pos[i]] =
1199 // ((last_idx == sorted_selection_idx[i]) ||
1200 // (body_indices[j] != sorted_selection_idx[i]))?-1:j;
1201 // }
1202
1203 // the following loops are an unrolled version of the one above
1204 size_t i = 0;
1205 if (!single_match_only) {
1206
1207 if (selection_pos == NULL) {
1208 for (size_t j = 0; i < num_selection && j < num_body_indices; ++i) {
1209
1210 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1211
1212 if (j >= num_body_indices) break;
1213
1214 positions[i] = (body_indices[j] == sorted_selection_idx[i])?(int)j:-1;
1215 }
1216 } else {
1217 for (size_t j = 0; i < num_selection && j < num_body_indices; ++i) {
1218
1219 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1220
1221 if (j >= num_body_indices) break;
1222
1223 positions[selection_pos[i]] = (body_indices[j] == sorted_selection_idx[i])?(int)j:-1;
1224 }
1225 }
1226 } else {
1227
1228 Xt_int last_idx = (Xt_int)(sorted_selection_idx[0] - 1);
1229
1230 if (selection_pos == NULL) {
1231 for (size_t j = 0; i < num_selection && j < num_body_indices; ++i) {
1232
1233 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1234
1235 if (j >= num_body_indices) break;
1236
1237 positions[i] = ((last_idx == sorted_selection_idx[i]) ||
1238 (body_indices[j] != sorted_selection_idx[i]))?-1:(int)j;
1239
1240 last_idx = sorted_selection_idx[i];
1241 }
1242 } else {
1243 for (size_t j = 0; i < num_selection && j < num_body_indices; ++i) {
1244
1245 while(j < num_body_indices && body_indices[j] < sorted_selection_idx[i]) ++j;
1246
1247 if (j >= num_body_indices) break;
1248
1249 positions[selection_pos[i]] = ((last_idx == sorted_selection_idx[i]) ||
1250 (body_indices[j] != sorted_selection_idx[i]))?-1:(int)j;
1251
1252 last_idx = sorted_selection_idx[i];
1253 }
1254 }
1255 }
1256
1257 // process indices that were not handled by the loop above
1258 if (selection_pos == NULL)
1259 for (; i < num_selection; ++i)
1260 positions[i] = -1;
1261 else
1262 for (; i < num_selection; ++i)
1263 positions[selection_pos[i]] = -1;
1264
1265 free(temp_selection_idx);
1266 free(selection_pos);
1267
1268 size_t num_unmatched = 0;
1269
1270 // count the number of unmachted indices
1271 for (size_t j = 0; j < num_selection; ++j)
1272 num_unmatched += positions[j] == -1;
1273
1274 INSTR_STOP(instr);
1275 return num_unmatched;
1276}
1277
1278static size_t
1280 int position_offset,
1281 const Xt_int indices[],
1282 size_t num_indices,
1283 int positions[],
1284 int ndim,
1285 struct dim_desc dims[ndim])
1286{
1287 size_t num_processed = 0;
1288
1289 Xt_int abs_global_size = XT_INT_ABS(dims[0].global_size);
1290 int abs_local_size = abs(dims[0].local_size);
1291 Xt_int abs_global_stride = XT_INT_ABS(dims[0].global_stride);
1292 Xt_int local_start = dims[0].local_start;
1293 // we want to work on ascending indices in the lowest dimension -> have to
1294 // adjust in case of negative global size
1295 if (dims[0].global_size < 0)
1296 local_start = (Xt_int)(abs_global_size - local_start - abs_local_size);
1297
1298 Xt_int min_index
1299 = (Xt_int)(index_offset + local_start * abs_global_stride);
1300
1301 // set all indices that are smaller than the minimum to "not found"
1302 while ((num_processed < num_indices)
1303 && (indices[num_processed] < min_index))
1304 positions[num_processed++] = -1;
1305
1306 if (ndim == 1)
1307 {
1308
1309 // if either the local or the global dimension is negative
1310 if (dims[0].global_stride < 0) {
1311
1312 // for as long as we are in the range local section of the current
1313 // global dimension
1314 Xt_int curr_position;
1315 while ((num_processed < num_indices) &&
1316 ((curr_position = (Xt_int)(indices[num_processed] - min_index)) <
1317 abs_local_size)) {
1318
1319 positions[num_processed++] = position_offset
1320 + (int)(abs_local_size - curr_position - 1);
1321 }
1322 } else { // if the local and global dimension are both negative or positive
1323
1324 // for as long as we are in the range local section of the current
1325 // global dimension
1326 Xt_int curr_position;
1327 while ((num_processed < num_indices) &&
1328 ((curr_position = (Xt_int)(indices[num_processed] - min_index)) <
1329 abs_local_size)) {
1330
1331 positions[num_processed++] = position_offset + (int)curr_position;
1332 }
1333 }
1334
1335 // for all remaining indices that are in the current global dimension but not
1336 // within the local section
1337 while ((num_processed < num_indices) &&
1338 (indices[num_processed] < index_offset + abs_global_size))
1339 positions[num_processed++] = -1;
1340
1341 } else {
1342
1343 assert(ndim > 1);
1344
1345 // while there are indices that have not yet been processed
1346 while (num_processed < num_indices) {
1347
1348 // compute global position of the smallest index that has not yet been processed
1349 XT_INT_DIV_T qr
1350 = Xt_div((Xt_int)(indices[num_processed] - index_offset),
1351 dims[0].agsmd, abs_global_stride);
1352 Xt_int curr_global_position = (Xt_int)qr.quot;
1353
1354 // if the position is outside of the range of the current dimension
1355 Xt_int abs_local_position
1356 = (Xt_int)(curr_global_position - local_start);
1357 if (abs_local_position >= abs_local_size)
1358 break;
1359
1360 int curr_local_position
1361 = dims[0].global_stride < 0
1362 // if either the local or the global dimension is negative
1363 ? (int)(abs_local_size - abs_local_position - 1)
1364 // if the local and global dimension are both negative or positive
1365 : (int)abs_local_position;
1366 Xt_int curr_index_offset
1367 = (Xt_int)(index_offset + curr_global_position * abs_global_stride);
1368 /* curr_local_position * dims[0].local_stride <= INT_MAX
1369 * because abs_local_position < abs_local_size and total mesh
1370 * size is less than INT_MAX */
1371 int position_offset_ = position_offset + curr_local_position * dims[0].local_stride;
1372
1374 curr_index_offset, position_offset_, indices + num_processed,
1375 num_indices - num_processed, positions + num_processed, ndim-1,
1376 dims + 1);
1377 }
1378 }
1379
1380 return num_processed;
1381}
1382
1383static size_t
1385 const Xt_int *restrict selection_idx,
1386 size_t num_selection,
1387 int *restrict positions,
1388 int single_match_only) {
1389
1390 INSTR_DEF(instr,"idxsection_get_positions_of_indices_v3.part")
1391 INSTR_DEF(instr2,"idxsection_get_positions_of_indices_recursive")
1392
1393 Xt_idxsection section = (Xt_idxsection)body_idxlist;
1394
1395 if (num_selection == 1)
1396 return (size_t)(idxsection_get_position_of_index(body_idxlist,
1397 *selection_idx,
1398 positions));
1399
1400 INSTR_START(instr);
1401
1402 const Xt_int * restrict sorted_selection_idx;
1403 Xt_int *temp_selection_idx = NULL;
1404 int *sorted_positions;
1405 int *selection_pos = NULL;
1406
1407 for (size_t i = 1; i < num_selection; ++i)
1408 if (selection_idx[i] < selection_idx[i-1])
1409 goto unsorted_selection;
1410
1411 sorted_selection_idx = selection_idx;
1412 sorted_positions = positions;
1413 goto sorted_selection;
1414 // if the selection is not sorted
1415unsorted_selection:
1416 temp_selection_idx
1417 = xmalloc(num_selection * sizeof(*temp_selection_idx));
1418 {
1419 size_t num_sp_alloc = num_selection;
1420#if defined _CRAYC && _RELEASE_MAJOR < 9
1421 num_sp_alloc = (num_sp_alloc + _MAXVL_32 - 1) & ~(_MAXVL_32 - 1);
1422#endif
1423 size_t total_alloc = num_sp_alloc + num_selection;
1424 sorted_positions
1425 = xmalloc(total_alloc * sizeof(*sorted_positions));
1426 selection_pos = sorted_positions + num_sp_alloc;
1427 }
1428 memcpy(temp_selection_idx, selection_idx,
1429 num_selection * sizeof(*temp_selection_idx));
1430
1431 xt_assign_id_map_int(num_selection, selection_pos, 0);
1433 temp_selection_idx, num_selection, selection_pos);
1434 sorted_selection_idx = temp_selection_idx;
1435sorted_selection:
1436
1437 INSTR_START(instr2);
1438
1439 size_t num_processed
1441 section->global_start_index,
1442 0, sorted_selection_idx, num_selection,
1443 sorted_positions, section->ndim,
1444 section->dims);
1445
1446 INSTR_STOP(instr2);
1447
1448 // set remaining index positions to -1
1449 for (size_t i = num_processed; i < num_selection; ++i)
1450 sorted_positions[i] = -1;
1451
1452 // apply single match only rule
1453 if (single_match_only)
1454 for (size_t i = 1; i < num_processed; ++i)
1455 if (sorted_selection_idx[i] == sorted_selection_idx[i-1])
1456 sorted_positions[i] = -1;
1457
1458 // convert positions if unsorted
1459 if (sorted_selection_idx != selection_idx) {
1460
1461 for (size_t i = 0; i < num_selection; ++i)
1462 positions[selection_pos[i]] = sorted_positions[i];
1463
1464 free(sorted_positions);
1465 free(temp_selection_idx);
1466 }
1467
1468 // count the number of unmached indices
1469 size_t num_unmatched = num_selection - num_processed;
1470
1471 for (size_t i = 0; i < num_processed; ++i)
1472 num_unmatched += positions[i] == -1;
1473
1474 INSTR_STOP(instr);
1475
1476 return num_unmatched;
1477}
1478
1479static size_t
1481 const Xt_int *selection_idx,
1482 size_t num_selection, int * positions,
1483 int single_match_only) {
1484
1485 INSTR_DEF(instr,"idxsection_get_positions_of_indices")
1486 Xt_idxsection section = (Xt_idxsection)body_idxlist;
1487 size_t retval = 0, num_section_indices;
1488
1489
1490 INSTR_START(instr);
1491
1492 // if any dimension of the body index list is negative we have to use the
1493 // v3 version, because the other version cannot handle negative sizes
1494 if ((section->flags & sort_mask) != sort_asc)
1495 retval = idxsection_get_positions_of_indices_v3(body_idxlist, selection_idx,
1496 num_selection, positions,
1497 single_match_only);
1498 /*
1499 * if the indices are already cached or (if the caching would not
1500 * consume too much memory and the number of selection indices are
1501 * sufficient to justify the use of cached indices)
1502 */
1503 else if ((section->index_array_cache != NULL) ||
1504 (((num_section_indices
1505 = (size_t)xt_idxlist_get_num_indices(body_idxlist))
1506 * sizeof(Xt_int)
1507 <= (size_t)128 * 1024U * 1024U)
1508 && (num_section_indices <= 1000 * num_selection)))
1509 retval = idxsection_get_positions_of_indices_v2(body_idxlist, selection_idx,
1510 num_selection, positions,
1511 single_match_only);
1512 else
1513 retval = idxsection_get_positions_of_indices_v1(body_idxlist, selection_idx,
1514 num_selection, positions,
1515 single_match_only);
1516
1517 INSTR_STOP(instr);
1518 return retval;
1519}
1520
1521static int
1523 int * position, int offset) {
1524
1525 int temp_position;
1526 // we make use of the uniqueness of the index-to-position relation:
1527 if (idxsection_get_position_of_index(idxlist, index, &temp_position)
1528 || temp_position < offset)
1529 temp_position = -1;
1530 *position = temp_position;
1531 return temp_position == -1;
1532}
1533
1534static Xt_int
1536
1537 Xt_idxsection section = (Xt_idxsection)idxlist;
1538 return section->min_index_cache;
1539}
1540
1541static Xt_int
1543
1544 Xt_idxsection section = (Xt_idxsection)idxlist;
1545 return section->max_index_cache;
1546}
1547
1548static int
1550{
1551 unsigned sort_flags = (((Xt_idxsection)idxlist)->flags) & sort_mask;
1552 return (int)sort_flags-(sort_flags < 3);
1553}
1554
1555/*
1556 * Local Variables:
1557 * c-basic-offset: 2
1558 * coding: utf-8
1559 * indent-tabs-mode: nil
1560 * show-trailing-whitespace: t
1561 * require-trailing-newline: t
1562 * End:
1563 */
int MPI_Comm
Definition core.h:64
#define __attribute__(x)
Definition core.h:82
#define INSTR_STOP(T)
Definition instr.h:69
#define INSTR_DEF(T, S)
Definition instr.h:66
#define INSTR_START(T)
Definition instr.h:68
add versions of standard API functions not returning on error
#define xcalloc(nmemb, size)
Definition ppm_xfuncs.h:68
#define xmalloc(size)
Definition ppm_xfuncs.h:70
const struct Xt_sort_algo_funcptr * sort_funcs
const struct xt_idxlist_vtable * vtable
Xt_int * index_array_cache
Xt_int global_start_index
struct Xt_idxlist_ parent
Xt_int local_start_index
struct dim_desc dims[]
void(* sort_xt_int_permutation)(Xt_int a[], size_t n, int permutation[])
void(* sort_idxpos)(idxpos_type *v, size_t n)
void(* sort_xt_int)(Xt_int *a, size_t n)
Xt_int global_size
Xt_int local_start
Xt_int global_stride
void(* delete)(Xt_idxlist)
int MPI_Type_create_struct(int count, XT_MPI2_CONST int array_of_block_lengths[], XT_MPI2_CONST MPI_Aint array_of_displacements[], XT_MPI2_CONST MPI_Datatype array_of_types[], MPI_Datatype *newtype)
int MPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Datatype *newtype)
int MPI_Type_free(MPI_Datatype *datatype)
int MPI_Type_commit(MPI_Datatype *datatype)
#define Xt_div(num, muldiv, den)
static int isign(int x)
static Xt_int Xt_isign(Xt_int x)
struct Xt_config_ xt_default_config
Definition xt_config.c:203
implementation of configuration object
int xt_initialized(void)
#define Xt_int_dt
Definition xt_core.h:73
XT_INT Xt_int
Definition xt_core.h:72
Xt_idxlist xt_idxempty_new(void)
index list declaration
const Xt_int * xt_idxlist_get_indices_const(Xt_idxlist idxlist)
Definition xt_idxlist.c:119
Provide non-public declarations common to all index lists.
PPM_DSO_INTERNAL Xt_idxlist xt_default_isect(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
#define xt_idxlist_get_num_indices(idxlist)
static void Xt_idxlist_init(Xt_idxlist idxlist, const struct xt_idxlist_vtable *vtable, int num_indices)
@ STRIPES
@ SECTION
Xt_idxlist xt_idxsection_get_idxstripes_r_intersection(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, Xt_config config)
static Xt_idxlist idxsection_copy(Xt_idxlist idxlist)
static size_t idxsection_get_positions_of_indices(Xt_idxlist body_idxlist, Xt_int const *selection_idx, size_t num_selection, int *positions, int single_match_only)
static const struct xt_idxlist_vtable idxsection_vtable
Xt_idxlist xt_idxsection_get_intersection_with_other_idxlist(Xt_idxlist src_idxsection, Xt_idxlist dst_idxlist, Xt_config config)
static struct section_aggregate setup_dims(Xt_int start, size_t num_dimensions, struct dim_desc dims[num_dimensions])
static size_t idxsection_get_positions_of_indices_v1(Xt_idxlist body_idxlist, const Xt_int selection_idx[], size_t num_selection, int positions[], int single_match_only)
static size_t idxsection_get_positions_of_indices_v2(Xt_idxlist body_idxlist, const Xt_int selection_idx[], size_t num_selection, int positions[], int single_match_only)
void xt_idxsection_initialize(void)
static void idxsection_create_index_array_cache(Xt_idxsection section)
static int idxsection_get_indices_any(Xt_int start_index, Xt_int *indices, size_t num_dimensions, struct dim_desc dims[num_dimensions])
static MPI_Datatype dim_desc_dt
void xt_idxsection_finalize(void)
static int idxsection_get_position_of_index_off(Xt_idxlist idxlist, Xt_int index, int *position, int offset)
static struct Xt_minmax get_section_minmax(size_t num_dimensions, Xt_int local_start_index, const struct dim_desc dims[num_dimensions])
static size_t idxsection_get_num_index_stripes_(Xt_idxsection section)
static int get_num_indices_from_local_sizes(size_t num_dimensions, const int local_size[num_dimensions])
static Xt_int idxsection_get_max_index(Xt_idxlist idxlist)
static void idxsection_delete(Xt_idxlist data)
Xt_idxlist xt_idxsection_unpack(void *buffer, int buffer_size, int *position, MPI_Comm comm)
static void idxsection_init_sorted_copy(Xt_idxsection orig, Xt_idxsection copy)
static int idxsection_get_num_indices(Xt_idxsection section)
static int idxsection_get_num_index_stripes(Xt_idxlist idxlist)
static void idxsection_pack(Xt_idxlist data, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static size_t idxsection_get_pack_size(Xt_idxlist data, MPI_Comm comm)
static size_t idxsection_get_positions_of_indices_recursive(Xt_int index_offset, int position_offset, const Xt_int indices[], size_t num_indices, int positions[], int ndim, struct dim_desc dims[ndim])
static int idxsection_get_sorting(Xt_idxlist idxlist)
static int idxsection_get_index_at_position(Xt_idxlist idxlist, int position, Xt_int *index)
static size_t idxsection_get_positions_of_indices_v3(Xt_idxlist body_idxlist, const Xt_int *restrict selection_idx, size_t num_selection, int *restrict positions, int single_match_only)
static int idxsection_get_position_of_index(Xt_idxlist idxlist, Xt_int index, int *position)
static Xt_idxlist idxsection_sorted_copy(Xt_idxlist idxlist, Xt_config config)
struct Xt_idxsection_ * Xt_idxsection
Xt_idxlist xt_idxsection_get_idxstripes_intersection(Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist, Xt_config config)
static void idxsection_get_index_stripes(Xt_idxlist idxlist, struct Xt_stripe *restrict stripes, size_t num_stripes_alloc)
static Xt_int idxsection_get_min_index(Xt_idxlist idxlist)
Xt_idxlist xt_idxsection_get_intersection(Xt_idxlist idxlist_src, Xt_idxlist idxlist_dst, Xt_config config)
static void idxsection_get_indices(Xt_idxlist idxlist, Xt_int *indices)
static const Xt_int * idxsection_get_indices_const(Xt_idxlist idxlist)
Xt_idxlist xt_idxsection_new(Xt_int start, int num_dimensions, const Xt_int global_size[num_dimensions], const int local_size[num_dimensions], const Xt_int local_start[num_dimensions])
struct Xt_vec_alloc xt_idxvec_alloc(int num_indices)
Definition xt_idxvec.c:199
Xt_idxlist xt_idxvec_congeal(struct Xt_vec_alloc vec_alloc)
Definition xt_idxvec.c:283
utility routines for MPI
#define xt_mpi_call(call, comm)
Definition xt_mpi.h:68
void xt_assign_id_map_int(size_t n, int *restrict a, int ofs)
Definition xt_sort.c:77
#define XT_SORT_FLAGS(ntrans_up, ntrans_dn)
@ sort_mask
@ sort_asc