27static void utest_init_data(
29 double **** src_fields_frac_mask_collection,
double *** tgt_field_collection);
30static void utest_free_data(
32 double *** src_fields_frac_mask_collection,
double ** tgt_field_collection);
34 Xt_redist * src_redists,
35 size_t * num_src_points_per_field,
36 size_t * tgt_pos,
size_t tgt_count,
37 size_t * num_src_per_tgt,
double * weights,
38 size_t * src_field_idx,
size_t * src_idx,
39 size_t num_src_fields,
40 double frac_mask_fallback_value,
41 double *** src_fields_collection,
42 double *** src_fields_frac_mask_collection,
43 double ** tgt_field_collection);
52#define TGT_UNSET_VALUE (-1.0)
57 xt_initialize(MPI_COMM_WORLD);
59 int comm_rank, comm_size;
60 MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
61 MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
64 PUT_ERR(
"ERROR: test requires 2 processes");
76 } collection_selection_configs[] =
78 {.N = 3, .indices = (
size_t[]){0, 3, 2}},
80 {.N = 4, .indices = (
size_t[]){0, 1, 2, 3}},
83 enum { NUM_COLL_SEL_CONFIGS =
84 sizeof(collection_selection_configs) /
85 sizeof(collection_selection_configs[0]) };
88 for (
size_t sel_idx = 0; sel_idx < NUM_COLL_SEL_CONFIGS; ++sel_idx) {
92 collection_selection_configs[sel_idx].
N,
93 collection_selection_configs[sel_idx].indices);
144 size_t * num_src_per_tgt;
147 size_t * src_field_idx;
159 size_t num_src_fields;
164 {{.src_global_ids_per_field = {NULL},
165 .num_src_points_per_field = {0},
168 .num_src_per_tgt = NULL,
170 .src_field_idx = NULL,
174 {.src_global_ids_per_field = {NULL},
175 .num_src_points_per_field = {0},
178 .num_src_per_tgt = NULL,
180 .src_field_idx = NULL,
184 .num_src_fields = 1},
195 {{.src_global_ids_per_field = {NULL, NULL, NULL},
196 .num_src_points_per_field = {0,0,0},
199 .num_src_per_tgt = NULL,
201 .src_field_idx = NULL,
205 {.src_global_ids_per_field = {NULL, (Xt_int[]){0,1}, NULL},
206 .num_src_points_per_field = {0,2,0},
207 .tgt_pos = (
size_t[]){1,3},
209 .num_src_per_tgt = (
size_t[]){2,2},
210 .weights = (
double[]){0.5,0.5, 0.5,0.5},
211 .src_field_idx = (
size_t[]){SIZE_MAX, 1, SIZE_MAX, 1},
212 .src_idx = (
size_t[]){0,0,1,1},
215 .num_src_fields = 3},
220 {{.src_global_ids_per_field = {(Xt_int[]){4,5,6,7}},
221 .num_src_points_per_field = {4},
222 .tgt_pos = (
size_t[]){0,1,2,3},
224 .num_src_per_tgt = (
size_t[]){1,1,1,1},
225 .weights = (
double[]){1.0, 1.0, 1.0, 1.0},
226 .src_field_idx = (
size_t[]){SIZE_MAX, SIZE_MAX, SIZE_MAX, SIZE_MAX},
227 .src_idx = (
size_t[]){0,1,2,3},
230 {.src_global_ids_per_field = {(Xt_int[]){0,1,2,3}},
231 .num_src_points_per_field = {4},
232 .tgt_pos = (
size_t[]){0,1,2,3},
234 .num_src_per_tgt = (
size_t[]){1,1,1,1},
235 .weights = (
double[]){1.0, 1.0, 1.0, 1.0},
236 .src_field_idx = (
size_t[]){SIZE_MAX, SIZE_MAX, SIZE_MAX, SIZE_MAX},
237 .src_idx = (
size_t[]){0,1,2,3},
240 .num_src_fields = 1},
259 {{.src_global_ids_per_field = {NULL,
261 (Xt_int[]){4,5,6,7}},
262 .num_src_points_per_field = {0,1,4},
263 .tgt_pos = (
size_t[]){0,2},
265 .num_src_per_tgt = (
size_t[]){3,8},
266 .weights = (
double[]){0.2,0.4,0.4,
267 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0},
268 .src_field_idx = (
size_t[]){0,SIZE_MAX,SIZE_MAX,
270 SIZE_MAX,SIZE_MAX,SIZE_MAX,SIZE_MAX},
271 .src_idx = (
size_t[]){1,0,4,
275 {.src_global_ids_per_field = {(Xt_int[]){0,2},
278 .num_src_points_per_field = {2,1,3},
279 .tgt_pos = (
size_t[]){1,3},
281 .num_src_per_tgt = (
size_t[]){2,7},
282 .weights = (
double[]){0.5,0.5,
283 0.1,0.1,0.1,0.1,0.1,0.1,0.1},
284 .src_field_idx = (
size_t[]){SIZE_MAX,SIZE_MAX,
285 SIZE_MAX,SIZE_MAX,SIZE_MAX,SIZE_MAX,
287 .src_idx = (
size_t[]){3,5,
291 .num_src_fields = 3},
306 {{.src_global_ids_per_field = {NULL, NULL},
307 .num_src_points_per_field = {0,0},
308 .tgt_pos = (
size_t[]){0,1,2,3},
310 .num_src_per_tgt = (
size_t[]){2,2,2,2},
311 .weights = (
double[]){0.5,0.5, 0.5,0.5, 0.5,0.5, 0.5,0.5},
312 .src_field_idx = (
size_t[]){0,0, 0,1, 1,0, 1,1},
313 .src_idx = (
size_t[]){0,0, 1,1, 2,2, 3,3},
316 {.src_global_ids_per_field = {NULL, NULL},
317 .num_src_points_per_field = {0,0},
318 .tgt_pos = (
size_t[]){0,2},
320 .num_src_per_tgt = (
size_t[]){2,2},
321 .weights = (
double[]){0.5,0.5, 0.5,0.5},
322 .src_field_idx = (
size_t[]){0,1, 0,1},
323 .src_idx = (
size_t[]){0,0, 2,2},
326 .num_src_fields = 2}};
328 NUM_INTERP_CONFIGS =
sizeof(interp_configs) /
sizeof(interp_configs[0])};
337 src_global_ids[src_idx] =
339 Xt_idxlist src_idxlist =
343 for (
size_t interp_config_idx = 0; interp_config_idx < NUM_INTERP_CONFIGS;
344 ++interp_config_idx) {
348 size_t num_src_fields =
349 interp_configs[interp_config_idx].num_src_fields;
350 Xt_redist src_redists[num_src_fields];
351 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
356 Xt_idxlist required_source_idxlist =
357 (interp_configs[interp_config_idx].
358 interp_data[comm_rank].
359 num_src_points_per_field[src_field_idx] > 0)?
361 interp_configs[interp_config_idx].
362 interp_data[comm_rank].
363 src_global_ids_per_field[src_field_idx],
364 interp_configs[interp_config_idx].
365 interp_data[comm_rank].
366 num_src_points_per_field[src_field_idx]):
370 src_idxlist, required_source_idxlist, MPI_COMM_WORLD);
371 src_redists[src_field_idx] = xt_redist_p2p_new(xmap, MPI_DOUBLE);
372 xt_xmap_delete(xmap);
373 xt_idxlist_delete(required_source_idxlist);
377 for (
int with_frac_mask = 0; with_frac_mask <= 1; ++with_frac_mask) {
381 double frac_mask_fallback_value =
384 for (
int use_weights = 0; use_weights <= 1; ++use_weights) {
389 interp_configs[interp_config_idx].interp_data[comm_rank].weights:
394 size_t total_num_receive_src_points = 0;
396 i < interp_configs[interp_config_idx].
397 interp_data[comm_rank].tgt_count; ++i) {
398 total_num_receive_src_points +=
399 interp_configs[interp_config_idx].
400 interp_data[comm_rank].num_src_per_tgt[i];
402 for (
size_t i = 0; i < total_num_receive_src_points; ++i) {
404 (interp_configs[interp_config_idx].
405 interp_data[comm_rank].src_field_idx[i] == SIZE_MAX) ||
406 (interp_configs[interp_config_idx].
407 interp_data[comm_rank].src_field_idx[i] < num_src_fields),
408 "ERROR in src_field_idx");
410 interp_configs[interp_config_idx].
411 interp_data[comm_rank].src_idx[i] <
412 total_num_receive_src_points,
421 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
422 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
423 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
425 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
426 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
427 num_src_fields, with_frac_mask);
431 for (
int use_copy = 0; use_copy <= 1; ++use_copy) {
437 interp = interp_copy;
443 interp_configs[interp_config_idx].
444 interp_data[comm_rank].is_source)
448 interp_configs[interp_config_idx].
449 interp_data[comm_rank].is_target)
456 double *** src_fields_collection;
457 double *** src_fields_frac_mask_collection;
458 double ** tgt_field_collection;
460 sel, &src_fields_collection, &src_fields_frac_mask_collection,
461 &tgt_field_collection);
464 interp, src_fields_collection, src_fields_frac_mask_collection,
465 tgt_field_collection, frac_mask_fallback_value, 1.0, 0.0);
470 PUT_ERR(
"ERROR in execute_put_test");
473 PUT_ERR(
"ERROR in execute_get_test");
476 utest_check_sum_mvp_at_tgt(
478 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_points_per_field,
479 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
480 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
481 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
483 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
484 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
485 num_src_fields, frac_mask_fallback_value,
486 src_fields_collection, src_fields_frac_mask_collection,
487 tgt_field_collection);
491 sel, src_fields_collection, src_fields_frac_mask_collection,
492 tgt_field_collection);
498 double *** src_fields_collection;
499 double *** src_fields_frac_mask_collection;
500 double ** tgt_field_collection;
502 sel, &src_fields_collection, &src_fields_frac_mask_collection,
503 &tgt_field_collection);
506 interp, src_fields_collection,
507 src_fields_frac_mask_collection,
508 1, frac_mask_fallback_value, 1.0, 0.0);
510 interp, tgt_field_collection,
511 frac_mask_fallback_value, 1.0, 0.0);
516 PUT_ERR(
"ERROR in execute_put_test");
519 PUT_ERR(
"ERROR in execute_get_test");
522 utest_check_sum_mvp_at_tgt(
524 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_points_per_field,
525 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
526 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
527 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
529 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
530 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
531 num_src_fields, frac_mask_fallback_value,
532 src_fields_collection, src_fields_frac_mask_collection,
533 tgt_field_collection);
537 sel, src_fields_collection, src_fields_frac_mask_collection,
538 tgt_field_collection);
544 double *** src_fields_collection;
545 double *** src_fields_frac_mask_collection;
546 double ** tgt_field_collection;
548 sel, &src_fields_collection, &src_fields_frac_mask_collection,
549 &tgt_field_collection);
552 interp, src_fields_collection,
553 src_fields_frac_mask_collection,
554 1, frac_mask_fallback_value, 1.0, 0.0);
556 interp, tgt_field_collection,
557 frac_mask_fallback_value, 1.0, 0.0);
563 PUT_ERR(
"ERROR in execute_put_test");
566 PUT_ERR(
"ERROR in execute_get_test");
569 utest_check_sum_mvp_at_tgt(
571 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_points_per_field,
572 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
573 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
574 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
576 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
577 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
578 num_src_fields, frac_mask_fallback_value,
579 src_fields_collection, src_fields_frac_mask_collection,
580 tgt_field_collection);
584 sel, src_fields_collection, src_fields_frac_mask_collection,
585 tgt_field_collection);
591 double *** src_fields_collection;
592 double *** src_fields_frac_mask_collection;
593 double ** tgt_field_collection;
595 sel, &src_fields_collection, &src_fields_frac_mask_collection,
596 &tgt_field_collection);
599 interp, src_fields_collection,
600 src_fields_frac_mask_collection,
601 1, frac_mask_fallback_value, 1.0, 0.0);
603 interp, tgt_field_collection,
604 frac_mask_fallback_value, 1.0, 0.0);
612 PUT_ERR(
"ERROR in execute_put_test");
615 PUT_ERR(
"ERROR in execute_get_test");
618 utest_check_sum_mvp_at_tgt(
620 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_points_per_field,
621 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
622 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
623 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
625 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
626 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
627 num_src_fields, frac_mask_fallback_value,
628 src_fields_collection, src_fields_frac_mask_collection,
629 tgt_field_collection);
633 sel, src_fields_collection, src_fields_frac_mask_collection,
634 tgt_field_collection);
640 double *** src_fields_collection;
641 double *** src_fields_frac_mask_collection;
642 double ** tgt_field_collection;
644 sel, &src_fields_collection, &src_fields_frac_mask_collection,
645 &tgt_field_collection);
648 interp, tgt_field_collection,
649 frac_mask_fallback_value, 1.0, 0.0);
651 interp, src_fields_collection,
652 src_fields_frac_mask_collection,
653 1, frac_mask_fallback_value, 1.0, 0.0);
659 PUT_ERR(
"ERROR in execute_put_test");
662 PUT_ERR(
"ERROR in execute_get_test");
665 utest_check_sum_mvp_at_tgt(
667 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_points_per_field,
668 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
669 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
670 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
672 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
673 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
674 num_src_fields, frac_mask_fallback_value,
675 src_fields_collection, src_fields_frac_mask_collection,
676 tgt_field_collection);
680 sel, src_fields_collection, src_fields_frac_mask_collection,
681 tgt_field_collection);
690 for (
size_t f = 0; f < num_src_fields; ++f)
691 xt_redist_delete(src_redists[f]);
695 xt_idxlist_delete(src_idxlist);
705static void utest_init_data(
707 double **** src_fields_frac_mask_collection,
708 double *** tgt_field_collection) {
713 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
715 *src_fields_collection =
717 *src_fields_frac_mask_collection =
722 (*src_fields_collection)[c] =
724 (*src_fields_frac_mask_collection)[c] =
729 (*src_fields_collection)[c][f] =
731 (*src_fields_frac_mask_collection)[c][f] =
735 (*src_fields_collection)[c][f][
i] =
736 (double)(c*1000 + f*100 + rank*10 + i);
737 (*src_fields_frac_mask_collection)[c][f][
i] = 1.0;
742 *tgt_field_collection =
747 (*tgt_field_collection)[c] =
756static void utest_free_data(
758 double *** src_fields_frac_mask_collection,
double ** tgt_field_collection) {
766 free(src_fields_collection[c][f]);
767 free(src_fields_frac_mask_collection[c][f]);
770 free(src_fields_collection[c]);
771 free(src_fields_frac_mask_collection[c]);
774 free(src_fields_collection);
775 free(src_fields_frac_mask_collection);
779 free(tgt_field_collection[c]);
782 free(tgt_field_collection);
786 Xt_redist * src_redists,
787 size_t * num_src_points_per_field,
788 size_t * tgt_pos,
size_t tgt_count,
789 size_t * num_src_per_tgt,
double * weights,
790 size_t * src_field_idx,
size_t * src_idx,
791 size_t num_src_fields,
792 double frac_mask_fallback_value,
793 double *** src_fields_collection,
794 double *** src_fields_frac_mask_collection,
795 double ** tgt_field_collection) {
803 double ** ref_tgt_field_collection =
806 ref_tgt_field_collection[c] =
815 size_t * prefix_num_src_per_tgt =
816 xmalloc((tgt_count+1) *
sizeof(*prefix_num_src_per_tgt));
817 prefix_num_src_per_tgt[0] = 0;
818 for (
size_t i = 0, accu = 0;
i < tgt_count; ++
i) {
819 accu += num_src_per_tgt[
i];
820 prefix_num_src_per_tgt[
i+1] = accu;
825 double *** cont_src_fields_collection =
827 double *** cont_src_fields_frac_mask_collection =
831 size_t c_idx = (collection_indices == NULL) ? c : collection_indices[c];
832 cont_src_fields_collection[c] =
833 src_fields_collection[c_idx];
834 cont_src_fields_frac_mask_collection[c] =
835 src_fields_frac_mask_collection[c_idx];
845 enum {SRC_FIELD = 0, SRC_FRAC_MASK = 1};
849 for (
size_t i = 0;
i < 2; ++
i) {
852 remote_src_buffer[
i][c] =
855 sizeof(remote_src_buffer[i][c][0]));
859 remote_src_buffer[
i][c][k] = NAN;
864 for (
size_t f = 0, offset = 0; f < num_src_fields; ++f) {
865 remote_src_fields[c][f] =
866 &remote_src_buffer[SRC_FIELD][c][offset];
867 remote_src_frac_masks[c][f] =
868 &remote_src_buffer[SRC_FRAC_MASK][c][offset];
869 offset += num_src_points_per_field[f];
877 size_t c_idx = (collection_indices == NULL) ? c : collection_indices[c];
880 for (
size_t f = 0; f < num_src_fields; ++f) {
884 xt_redist_s_exchange1(
886 (
const void *)(src_fields_collection[c_idx][f]),
887 (
void *)(remote_src_fields[c][f]));
888 xt_redist_s_exchange1(
890 (
const void *)(src_fields_frac_mask_collection[c_idx][f]),
891 (
void *)(remote_src_frac_masks[c][f]));
897 (
double const * restrict **)cont_src_fields_collection,
898 (
double const * restrict **)cont_src_fields_frac_mask_collection,
899 (
double const **)remote_src_fields,
900 (
double const **)remote_src_frac_masks,
901 ref_tgt_field_collection,
902 tgt_pos, tgt_count, prefix_num_src_per_tgt, weights,
904 frac_mask_fallback_value, 1.0, 0.0);
912 if (fabs(tgt_field_collection[c][i] -
913 ref_tgt_field_collection[c][i]) > 1e-9) {
914 PUT_ERR(
"wrong data in sum_mvp_at_tgt interpolation");
920 free(prefix_num_src_per_tgt);
921 free(cont_src_fields_collection);
922 free(cont_src_fields_frac_mask_collection);
923 for (
size_t i = 0;
i < 2; ++
i) {
925 free(remote_src_buffer[i][c]);
929 free(ref_tgt_field_collection[c]);
931 free(ref_tgt_field_collection);
#define YAC_ASSERT(exp, msg)
size_t yac_collection_selection_get_collection_size(struct yac_collection_selection const *collection_selection)
Get the size of the collection selection.
size_t const * yac_collection_selection_get_indices(struct yac_collection_selection const *collection_selection)
Get explicit selection indices if non-contiguous.
void yac_collection_selection_delete(struct yac_collection_selection *collection_selection)
Delete a collection selection object.
struct yac_collection_selection * yac_collection_selection_new(size_t collection_size, size_t const *selection_indices)
Create a new collection selection.
struct yac_interp_operator * yac_interp_operator_sum_mvp_at_tgt_new(struct yac_collection_selection const *collection_selection, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, int with_frac_mask)
Create a sum (weighted or unweighted) interpolation operator computed on the target processes.
void yac_interp_operator_execute_wait(struct yac_interp_operator *interp)
Wait for all pending put/get operations to finish.
int yac_interp_operator_is_target(struct yac_interp_operator *interp)
Checks if the current process holds target data for the interpolation operator.
struct yac_interp_operator * yac_interp_operator_copy(struct yac_interp_operator *interp)
Create a deep copy of the interpolation operator.
enum YAC_INTERP_TEST_STATUS yac_interp_operator_execute_put_test(struct yac_interp_operator *interp)
Test whether the put phase has completed.
int yac_interp_operator_is_source(struct yac_interp_operator *interp)
Checks if the current process holds source data for the interpolation operator.
enum YAC_INTERP_TEST_STATUS yac_interp_operator_execute_get_test(struct yac_interp_operator *interp)
Test whether the get phase has completed.
void yac_interp_operator_execute_get(struct yac_interp_operator *interp, double **tgt_field, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_execute_put(struct yac_interp_operator *interp, double ***src_fields, double ***src_frac_masks, int is_target, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_execute_get_async(struct yac_interp_operator *interp, double **tgt_field, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_execute(struct yac_interp_operator *interp, double ***src_fields, double ***src_frac_masks, double **tgt_field, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_delete(struct yac_interp_operator *interp)
Delete the interpolation operator and free resources.
Weighted/unweighted sum operator at target in YAC.
double const YAC_FRAC_MASK_NO_VALUE
Public interface for interpolation execution in YAC.
static void compute_tgt_field_wgt(double const *restrict **src_fields, double const *restrict **src_frac_masks, double const *restrict *remote_src_fields, double const *restrict *remote_src_frac_masks, double *restrict *tgt_field, size_t const *restrict tgt_pos, size_t tgt_count, size_t const *restrict prefix_num_src_per_tgt, double const *restrict weights, size_t const *restrict src_field_idx, size_t const *restrict src_idx, size_t num_src_fields, size_t collection_size, double frac_mask_fallback_value, double scale_factor, double scale_summand)
Compute target field values optionally using weighted sums of source data and optionally applying fra...
Abstract interpolation operator type.