YAC 3.13.0
Yet Another Coupler
Loading...
Searching...
No Matches
test_interp_operator_sum_mvp_at_tgt.c
Go to the documentation of this file.
1// Copyright (c) 2025 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#include <stdlib.h>
6#include <stdio.h>
7#include <string.h>
8#include <unistd.h>
9#include <math.h>
10
11#include <mpi.h>
12#include <yaxt.h>
13
14#include "tests.h"
16#include "yac_mpi.h"
17#include "test_common.h"
21
27static void utest_init_data(
28 struct yac_collection_selection * sel, double **** src_fields_collection,
29 double **** src_fields_frac_mask_collection, double *** tgt_field_collection);
30static void utest_free_data(
31 struct yac_collection_selection * sel, double *** src_fields_collection,
32 double *** src_fields_frac_mask_collection, double ** tgt_field_collection);
33static void utest_check_sum_mvp_at_tgt(struct yac_collection_selection * sel,
34 Xt_redist * src_redists,
35 size_t * num_src_points_per_field,
36 size_t * tgt_pos, size_t tgt_count,
37 size_t * num_src_per_tgt, double * weights,
38 size_t * src_field_idx, size_t * src_idx,
39 size_t num_src_fields,
40 double frac_mask_fallback_value,
41 double *** src_fields_collection,
42 double *** src_fields_frac_mask_collection,
43 double ** tgt_field_collection);
44
45enum {
51};
52#define TGT_UNSET_VALUE (-1.0)
53
54int main(void) {
55
56 MPI_Init(NULL, NULL);
57 xt_initialize(MPI_COMM_WORLD);
58
59 int comm_rank, comm_size;
60 MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
61 MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
62
63 if (comm_size != NUM_PROCS) {
64 PUT_ERR("ERROR: test requires 2 processes");
65 xt_finalize();
66 MPI_Finalize();
67 return TEST_EXIT_CODE;
68 }
69
70 // collection selection configs (only applied to source field data,
71 // target field data is expected to have a contiguous collection of size
72 // collection_size)
73 struct {
74 size_t N;
75 size_t * indices;
76 } collection_selection_configs[] =
77 {// selects three entries from the collection in random order
78 {.N = 3, .indices = (size_t[]){0, 3, 2}},
79 // selects all entries from the collection in ascending order
80 {.N = 4, .indices = (size_t[]){0, 1, 2, 3}},
81 // selects all entries from the collection in default (ascending) order
82 {.N = MAX_COLLECTION_SIZE, .indices = NULL}};
83 enum { NUM_COLL_SEL_CONFIGS =
84 sizeof(collection_selection_configs) /
85 sizeof(collection_selection_configs[0]) };
86
87 // loop over collection selections
88 for (size_t sel_idx = 0; sel_idx < NUM_COLL_SEL_CONFIGS; ++sel_idx) {
89
90 struct yac_collection_selection * sel =
92 collection_selection_configs[sel_idx].N,
93 collection_selection_configs[sel_idx].indices);
94
95 // interpolation configurations
96 // (basically contains entries of the sparsely populated interpolation
97 // weight matrix)
98 // We assume that the global ids of the source source points of each field
99 // are:
100 // * for rank 0: [0, 1, 2, 3]
101 // * for rank 1: [4, 5, 6, 7]
102 struct {
103 struct {
104
105 //----------------------------------------------------------------------
106 // Information about the target points that are to be interpolated
107 //----------------------------------------------------------------------
108
109 size_t * tgt_pos; // indices of all interpolated target
110 // points
111 size_t tgt_count; // number of interpolated target points
112
113 //----------------------------------------------------------------------
114 // Information about the source points from remote processes required
115 // the interpolation of the local target points
116 //----------------------------------------------------------------------
117
118 Xt_int * src_global_ids_per_field[MAX_NUM_SRC_FIELDS];
119 // global ids of all source points of
120 // each source field from remote
121 // processes required for the
122 // interpolation of the local target
123 // points
124 size_t num_src_points_per_field[MAX_NUM_SRC_FIELDS];
125 // number of source points per source
126 // field required from remote processes
127
128 //----------------------------------------------------------------------
129 // Information on how the target points are to be interpolated
130 //
131 // Information on how to interpolate the i'th target
132 // (with i being in the range [0;tgt_count-1]) is stored at:
133 // * num_src_per_tgt[i]
134 // * weights[SUM(num_src_per_tgt[0..i])..SUM(num_src_per_tgt[0..i+1]))
135 // * src_field_idx[SUM(num_src_per_tgt[0..i])..SUM(num_src_per_tgt[0..i+1]))
136 // * src_idx[SUM(num_src_per_tgt[0..i])..SUM(num_src_per_tgt[0..i+1]))
137 //
138 // A value of SIZE_MAX in the src_field_idx array indicates a source
139 // field point from a remote process. In that case, the respective
140 // src_idx in the contiguous receive buffer containing received
141 // source points for all fields of a collection entry.
142 //----------------------------------------------------------------------
143
144 size_t * num_src_per_tgt; // number of source points per target
145 double * weights; // weights for the interpolation of
146 // the target points
147 size_t * src_field_idx; // source field indices of source points
148 // required for the interpolation of the
149 // target points.
150 size_t * src_idx; // source indices / receive buffer index
151
152 //----------------------------------------------------------------------
153 // reference data
154 //----------------------------------------------------------------------
155 int is_source;
156 int is_target;
157
158 } interp_data[NUM_PROCS];
159 size_t num_src_fields; // number of source fields
160 } interp_configs[] =
161 // no interpolation
162 // (both processes are neither source nor target)
163 {{.interp_data =
164 {{.src_global_ids_per_field = {NULL},
165 .num_src_points_per_field = {0},
166 .tgt_pos = NULL,
167 .tgt_count = 0,
168 .num_src_per_tgt = NULL,
169 .weights = NULL,
170 .src_field_idx = NULL,
171 .src_idx = NULL,
172 .is_source = 0,
173 .is_target = 0},
174 {.src_global_ids_per_field = {NULL},
175 .num_src_points_per_field = {0},
176 .tgt_pos = NULL,
177 .tgt_count = 0,
178 .num_src_per_tgt = NULL,
179 .weights = NULL,
180 .src_field_idx = NULL,
181 .src_idx = NULL,
182 .is_source = 0,
183 .is_target = 0}},
184 .num_src_fields = 1},
185 // target data contains the source points of the respective other rank
186 // and from the local one
187 // (rank 0 and rank 1 are source and rank 1 is only target)
188 // dimensions (for documentation only):
189 // tgt[rank][local_idx]
190 // src[rank][src_field_idx][local_idx]
191 //
192 // tgt[1][1] = 0.5 * src[0][1][0] + 0.5 * src[1][1][0]
193 // tgt[1][3] = 0.5 * src[0][1][1] + 0.5 * src[1][1][1]
194 {.interp_data =
195 {{.src_global_ids_per_field = {NULL, NULL, NULL},
196 .num_src_points_per_field = {0,0,0},
197 .tgt_pos = NULL,
198 .tgt_count = 0,
199 .num_src_per_tgt = NULL,
200 .weights = NULL,
201 .src_field_idx = NULL,
202 .src_idx = NULL,
203 .is_source = 1,
204 .is_target = 0},
205 {.src_global_ids_per_field = {NULL, (Xt_int[]){0,1}, NULL},
206 .num_src_points_per_field = {0,2,0},
207 .tgt_pos = (size_t[]){1,3},
208 .tgt_count = 2,
209 .num_src_per_tgt = (size_t[]){2,2},
210 .weights = (double[]){0.5,0.5, 0.5,0.5},
211 .src_field_idx = (size_t[]){SIZE_MAX, 1, SIZE_MAX, 1},
212 .src_idx = (size_t[]){0,0,1,1},
213 .is_source = 1,
214 .is_target = 1}},
215 .num_src_fields = 3},
216 // target data contains the source points of the respective other rank
217 // (both processes are source and target;
218 // source points are only from remote process)
219 {.interp_data =
220 {{.src_global_ids_per_field = {(Xt_int[]){4,5,6,7}},
221 .num_src_points_per_field = {4},
222 .tgt_pos = (size_t[]){0,1,2,3},
223 .tgt_count = 4,
224 .num_src_per_tgt = (size_t[]){1,1,1,1},
225 .weights = (double[]){1.0, 1.0, 1.0, 1.0},
226 .src_field_idx = (size_t[]){SIZE_MAX, SIZE_MAX, SIZE_MAX, SIZE_MAX},
227 .src_idx = (size_t[]){0,1,2,3},
228 .is_source = 1,
229 .is_target = 1},
230 {.src_global_ids_per_field = {(Xt_int[]){0,1,2,3}},
231 .num_src_points_per_field = {4},
232 .tgt_pos = (size_t[]){0,1,2,3},
233 .tgt_count = 4,
234 .num_src_per_tgt = (size_t[]){1,1,1,1},
235 .weights = (double[]){1.0, 1.0, 1.0, 1.0},
236 .src_field_idx = (size_t[]){SIZE_MAX, SIZE_MAX, SIZE_MAX, SIZE_MAX},
237 .src_idx = (size_t[]){0,1,2,3},
238 .is_source = 1,
239 .is_target = 1}},
240 .num_src_fields = 1},
241 // more complex combination of the source data from multiple fields
242 // dimensions (for documentation only):
243 // tgt[rank][local_idx]
244 // src[rank][src_field_idx][local_idx]
245 //
246 // tgt[0][0] = 0.2 * src[0][0][1] + 0.4 src[1][1][0] + 0.4 * src[1][2][3]
247 // tgt[0][2] = sum(src[0][1][:]) + sum(src[1][2][:])
248 // tgt[1][1] = 0.5 * src[0][2][1] + 0.5 * src[0][2][3]
249 // tgt[1][3] = 0.1 * src[0][0][0] +
250 // 0.1 * src[0][0][2] +
251 // 0.1 * src[0][1][2] +
252 // 0.1 * src[0][2][2] +
253 // 0.1 * src[1][0][2] +
254 // 0.1 * src[1][1][2] +
255 // 0.1 * src[1][2][2]
256 // (both processes are source and target;
257 // source points are both from local and remote process)
258 {.interp_data =
259 {{.src_global_ids_per_field = {NULL,
260 (Xt_int[]){4},
261 (Xt_int[]){4,5,6,7}},
262 .num_src_points_per_field = {0,1,4},
263 .tgt_pos = (size_t[]){0,2},
264 .tgt_count = 2,
265 .num_src_per_tgt = (size_t[]){3,8},
266 .weights = (double[]){0.2,0.4,0.4,
267 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0},
268 .src_field_idx = (size_t[]){0,SIZE_MAX,SIZE_MAX,
269 1,1,1,1,
270 SIZE_MAX,SIZE_MAX,SIZE_MAX,SIZE_MAX},
271 .src_idx = (size_t[]){1,0,4,
272 0,1,2,3,1,2,3,4},
273 .is_source = 1,
274 .is_target = 1},
275 {.src_global_ids_per_field = {(Xt_int[]){0,2},
276 (Xt_int[]){2},
277 (Xt_int[]){1,2,3}},
278 .num_src_points_per_field = {2,1,3},
279 .tgt_pos = (size_t[]){1,3},
280 .tgt_count = 2,
281 .num_src_per_tgt = (size_t[]){2,7},
282 .weights = (double[]){0.5,0.5,
283 0.1,0.1,0.1,0.1,0.1,0.1,0.1},
284 .src_field_idx = (size_t[]){SIZE_MAX,SIZE_MAX,
285 SIZE_MAX,SIZE_MAX,SIZE_MAX,SIZE_MAX,
286 0,1,2},
287 .src_idx = (size_t[]){3,5,
288 0,1,2,4,2,2,2},
289 .is_source = 1,
290 .is_target = 1}},
291 .num_src_fields = 3},
292 // more complex combination of the source data from multiple fields
293 // dimensions (for documentation only):
294 // tgt[rank][local_idx]
295 // src[rank][src_field_idx][local_idx]
296 //
297 // tgt[0][0] = 0.5 * src[0][0][0] + 0.5 * src[0][0][0]
298 // tgt[0][1] = 0.5 * src[0][0][1] + 0.5 * src[0][1][1]
299 // tgt[0][2] = 0.5 * src[0][1][2] + 0.5 * src[0][0][2]
300 // tgt[0][3] = 0.5 * src[0][1][3] + 0.5 * src[0][1][3]
301 // tgt[1][0] = 0.5 * src[1][0][0] + 0.5 * src[1][1][0]
302 // tgt[1][2] = 0.5 * src[1][0][2] + 0.5 * src[1][1][2]
303 // (both processes are source and target;
304 // source points are only from local)
305 {.interp_data =
306 {{.src_global_ids_per_field = {NULL, NULL},
307 .num_src_points_per_field = {0,0},
308 .tgt_pos = (size_t[]){0,1,2,3},
309 .tgt_count = 4,
310 .num_src_per_tgt = (size_t[]){2,2,2,2},
311 .weights = (double[]){0.5,0.5, 0.5,0.5, 0.5,0.5, 0.5,0.5},
312 .src_field_idx = (size_t[]){0,0, 0,1, 1,0, 1,1},
313 .src_idx = (size_t[]){0,0, 1,1, 2,2, 3,3},
314 .is_source = 1,
315 .is_target = 1},
316 {.src_global_ids_per_field = {NULL, NULL},
317 .num_src_points_per_field = {0,0},
318 .tgt_pos = (size_t[]){0,2},
319 .tgt_count = 2,
320 .num_src_per_tgt = (size_t[]){2,2},
321 .weights = (double[]){0.5,0.5, 0.5,0.5},
322 .src_field_idx = (size_t[]){0,1, 0,1},
323 .src_idx = (size_t[]){0,0, 2,2},
324 .is_source = 1,
325 .is_target = 1}},
326 .num_src_fields = 2}};
327 enum {
328 NUM_INTERP_CONFIGS = sizeof(interp_configs) / sizeof(interp_configs[0])};
329
330 // generate yaxt index list containing global ids of all locally available
331 // source points (each source field contains the same global ids)
332 // each process holds the following global ids:
333 // i + comm_rank * NUM_SRC_POINTS
334 // with i in [0..NUM_SRC_POINTS]
335 Xt_int src_global_ids[NUM_SRC_POINTS];
336 for (size_t src_idx = 0; src_idx < NUM_SRC_POINTS; ++src_idx)
337 src_global_ids[src_idx] =
338 (Xt_int)(src_idx + comm_rank * NUM_SRC_POINTS);
339 Xt_idxlist src_idxlist =
340 xt_idxvec_new(src_global_ids, NUM_SRC_POINTS);
341
342 // loop over all interpolation configurations
343 for (size_t interp_config_idx = 0; interp_config_idx < NUM_INTERP_CONFIGS;
344 ++interp_config_idx) {
345
346 // generate input data for yac_interpolation_sum_mvp_at_tgt_new from list
347 // of links
348 size_t num_src_fields =
349 interp_configs[interp_config_idx].num_src_fields;
350 Xt_redist src_redists[num_src_fields];
351 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
352 ++src_field_idx) {
353
354 // create redist for receiving source data of the current source field
355 // required for the interpolation of the local target points
356 Xt_idxlist required_source_idxlist =
357 (interp_configs[interp_config_idx].
358 interp_data[comm_rank].
359 num_src_points_per_field[src_field_idx] > 0)?
360 xt_idxvec_new(
361 interp_configs[interp_config_idx].
362 interp_data[comm_rank].
363 src_global_ids_per_field[src_field_idx],
364 interp_configs[interp_config_idx].
365 interp_data[comm_rank].
366 num_src_points_per_field[src_field_idx]):
367 xt_idxempty_new();
368 Xt_xmap xmap =
369 xt_xmap_all2all_new(
370 src_idxlist, required_source_idxlist, MPI_COMM_WORLD);
371 src_redists[src_field_idx] = xt_redist_p2p_new(xmap, MPI_DOUBLE);
372 xt_xmap_delete(xmap);
373 xt_idxlist_delete(required_source_idxlist);
374
375 } // src_field_idx
376
377 for (int with_frac_mask = 0; with_frac_mask <= 1; ++with_frac_mask) {
378
379 // if without fractional masking, set fallback value to
380 // YAC_FRAC_MASK_NO_VALUE, which deactivates it
381 double frac_mask_fallback_value =
382 with_frac_mask?-10.0:YAC_FRAC_MASK_NO_VALUE;
383
384 for (int use_weights = 0; use_weights <= 1; ++use_weights) {
385
386 // if without weights, set it to NULL, which deactivates it
387 double * weights =
388 use_weights?
389 interp_configs[interp_config_idx].interp_data[comm_rank].weights:
390 NULL;
391
392 // some consistency checking of the input data
393 {
394 size_t total_num_receive_src_points = 0;
395 for (size_t i = 0;
396 i < interp_configs[interp_config_idx].
397 interp_data[comm_rank].tgt_count; ++i) {
398 total_num_receive_src_points +=
399 interp_configs[interp_config_idx].
400 interp_data[comm_rank].num_src_per_tgt[i];
401 }
402 for (size_t i = 0; i < total_num_receive_src_points; ++i) {
404 (interp_configs[interp_config_idx].
405 interp_data[comm_rank].src_field_idx[i] == SIZE_MAX) ||
406 (interp_configs[interp_config_idx].
407 interp_data[comm_rank].src_field_idx[i] < num_src_fields),
408 "ERROR in src_field_idx");
410 interp_configs[interp_config_idx].
411 interp_data[comm_rank].src_idx[i] <
412 total_num_receive_src_points,
413 "ERROR in src_idx");
414 }
415 }
416
417 // generate interpolation for the current configuration
418 struct yac_interp_operator * interp =
420 sel, src_redists,
421 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
422 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
423 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
424 weights,
425 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
426 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
427 num_src_fields, with_frac_mask);
428
429 // test interpolation built above and a copy of it, to make sure that
430 // the copy-function works properly
431 for (int use_copy = 0; use_copy <= 1; ++use_copy) {
432
433 if (use_copy) {
434 struct yac_interp_operator * interp_copy =
437 interp = interp_copy;
438 }
439
440 { // check is_source and is_target
441
442 if (yac_interp_operator_is_source(interp) !=
443 interp_configs[interp_config_idx].
444 interp_data[comm_rank].is_source)
445 PUT_ERR("ERROR in is_source");
446
447 if (yac_interp_operator_is_target(interp) !=
448 interp_configs[interp_config_idx].
449 interp_data[comm_rank].is_target)
450 PUT_ERR("ERROR in is_target");
451 }
452
453 { // synchronous test (put and get in a single exchange call)
454
455 // initialise source and target fields
456 double *** src_fields_collection;
457 double *** src_fields_frac_mask_collection;
458 double ** tgt_field_collection;
459 utest_init_data(
460 sel, &src_fields_collection, &src_fields_frac_mask_collection,
461 &tgt_field_collection);
462
464 interp, src_fields_collection, src_fields_frac_mask_collection,
465 tgt_field_collection, frac_mask_fallback_value, 1.0, 0.0);
466
467 // there should be no open put or get operation
470 PUT_ERR("ERROR in execute_put_test");
473 PUT_ERR("ERROR in execute_get_test");
474
475 // check results
476 utest_check_sum_mvp_at_tgt(
477 sel, src_redists,
478 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_points_per_field,
479 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
480 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
481 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
482 weights,
483 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
484 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
485 num_src_fields, frac_mask_fallback_value,
486 src_fields_collection, src_fields_frac_mask_collection,
487 tgt_field_collection);
488
489 // free source and target fields
490 utest_free_data(
491 sel, src_fields_collection, src_fields_frac_mask_collection,
492 tgt_field_collection);
493 }
494
495 { // independent put + get
496
497 // initialise source and target fields
498 double *** src_fields_collection;
499 double *** src_fields_frac_mask_collection;
500 double ** tgt_field_collection;
501 utest_init_data(
502 sel, &src_fields_collection, &src_fields_frac_mask_collection,
503 &tgt_field_collection);
504
506 interp, src_fields_collection,
507 src_fields_frac_mask_collection,
508 1, frac_mask_fallback_value, 1.0, 0.0);
510 interp, tgt_field_collection,
511 frac_mask_fallback_value, 1.0, 0.0);
512
513 // there should be no open put or get operation
516 PUT_ERR("ERROR in execute_put_test");
519 PUT_ERR("ERROR in execute_get_test");
520
521 // check results
522 utest_check_sum_mvp_at_tgt(
523 sel, src_redists,
524 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_points_per_field,
525 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
526 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
527 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
528 weights,
529 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
530 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
531 num_src_fields, frac_mask_fallback_value,
532 src_fields_collection, src_fields_frac_mask_collection,
533 tgt_field_collection);
534
535 // free source and target fields
536 utest_free_data(
537 sel, src_fields_collection, src_fields_frac_mask_collection,
538 tgt_field_collection);
539 }
540
541 { // independent put + async get + wait
542
543 // initialise source and target fields
544 double *** src_fields_collection;
545 double *** src_fields_frac_mask_collection;
546 double ** tgt_field_collection;
547 utest_init_data(
548 sel, &src_fields_collection, &src_fields_frac_mask_collection,
549 &tgt_field_collection);
550
552 interp, src_fields_collection,
553 src_fields_frac_mask_collection,
554 1, frac_mask_fallback_value, 1.0, 0.0);
556 interp, tgt_field_collection,
557 frac_mask_fallback_value, 1.0, 0.0);
559
560 // there should be no open put or get operation
563 PUT_ERR("ERROR in execute_put_test");
566 PUT_ERR("ERROR in execute_get_test");
567
568 // check results
569 utest_check_sum_mvp_at_tgt(
570 sel, src_redists,
571 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_points_per_field,
572 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
573 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
574 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
575 weights,
576 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
577 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
578 num_src_fields, frac_mask_fallback_value,
579 src_fields_collection, src_fields_frac_mask_collection,
580 tgt_field_collection);
581
582 // free source and target fields
583 utest_free_data(
584 sel, src_fields_collection, src_fields_frac_mask_collection,
585 tgt_field_collection);
586 }
587
588 { // independent put + async get + test_get-loop
589
590 // initialise source and target fields
591 double *** src_fields_collection;
592 double *** src_fields_frac_mask_collection;
593 double ** tgt_field_collection;
594 utest_init_data(
595 sel, &src_fields_collection, &src_fields_frac_mask_collection,
596 &tgt_field_collection);
597
599 interp, src_fields_collection,
600 src_fields_frac_mask_collection,
601 1, frac_mask_fallback_value, 1.0, 0.0);
603 interp, tgt_field_collection,
604 frac_mask_fallback_value, 1.0, 0.0);
605 while(
608
609 // there should be no open put or get operation
612 PUT_ERR("ERROR in execute_put_test");
615 PUT_ERR("ERROR in execute_get_test");
616
617 // check results
618 utest_check_sum_mvp_at_tgt(
619 sel, src_redists,
620 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_points_per_field,
621 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
622 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
623 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
624 weights,
625 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
626 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
627 num_src_fields, frac_mask_fallback_value,
628 src_fields_collection, src_fields_frac_mask_collection,
629 tgt_field_collection);
630
631 // free source and target fields
632 utest_free_data(
633 sel, src_fields_collection, src_fields_frac_mask_collection,
634 tgt_field_collection);
635 }
636
637 { // independent async get + put + wait
638
639 // initialise source and target fields
640 double *** src_fields_collection;
641 double *** src_fields_frac_mask_collection;
642 double ** tgt_field_collection;
643 utest_init_data(
644 sel, &src_fields_collection, &src_fields_frac_mask_collection,
645 &tgt_field_collection);
646
648 interp, tgt_field_collection,
649 frac_mask_fallback_value, 1.0, 0.0);
651 interp, src_fields_collection,
652 src_fields_frac_mask_collection,
653 1, frac_mask_fallback_value, 1.0, 0.0);
655
656 // there should be no open put or get operation
659 PUT_ERR("ERROR in execute_put_test");
662 PUT_ERR("ERROR in execute_get_test");
663
664 // check results
665 utest_check_sum_mvp_at_tgt(
666 sel, src_redists,
667 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_points_per_field,
668 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_pos,
669 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_count,
670 interp_configs[interp_config_idx].interp_data[comm_rank].num_src_per_tgt,
671 weights,
672 interp_configs[interp_config_idx].interp_data[comm_rank].src_field_idx,
673 interp_configs[interp_config_idx].interp_data[comm_rank].src_idx,
674 num_src_fields, frac_mask_fallback_value,
675 src_fields_collection, src_fields_frac_mask_collection,
676 tgt_field_collection);
677
678 // free source and target fields
679 utest_free_data(
680 sel, src_fields_collection, src_fields_frac_mask_collection,
681 tgt_field_collection);
682 }
683
684 } // use_copy
685
687 } // use_weights
688 } // with_frac_mask
689
690 for (size_t f = 0; f < num_src_fields; ++f)
691 xt_redist_delete(src_redists[f]);
692
693 } // interp_config_idx
694
695 xt_idxlist_delete(src_idxlist);
697
698 } // sel_idx
699
700 xt_finalize();
701 MPI_Finalize();
702 return TEST_EXIT_CODE;
703}
704
705static void utest_init_data(
706 struct yac_collection_selection * sel, double **** src_fields_collection,
707 double **** src_fields_frac_mask_collection,
708 double *** tgt_field_collection) {
709
711
712 int rank;
713 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
714
715 *src_fields_collection =
716 xmalloc(MAX_COLLECTION_SIZE * sizeof(**src_fields_collection));
717 *src_fields_frac_mask_collection =
718 xmalloc(MAX_COLLECTION_SIZE * sizeof(**src_fields_frac_mask_collection));
719
720 for (size_t c = 0; c < MAX_COLLECTION_SIZE; ++c) {
721
722 (*src_fields_collection)[c] =
723 xmalloc(MAX_NUM_SRC_FIELDS * sizeof(***src_fields_collection));
724 (*src_fields_frac_mask_collection)[c] =
725 xmalloc(MAX_NUM_SRC_FIELDS * sizeof(***src_fields_frac_mask_collection));
726
727 for (size_t f = 0; f < MAX_NUM_SRC_FIELDS; ++f) {
728
729 (*src_fields_collection)[c][f] =
730 xmalloc(NUM_SRC_POINTS * sizeof(***src_fields_collection));
731 (*src_fields_frac_mask_collection)[c][f] =
732 xmalloc(NUM_SRC_POINTS * sizeof(***src_fields_frac_mask_collection));
733
734 for (size_t i = 0; i < NUM_SRC_POINTS; ++i) {
735 (*src_fields_collection)[c][f][i] =
736 (double)(c*1000 + f*100 + rank*10 + i);
737 (*src_fields_frac_mask_collection)[c][f][i] = 1.0;
738 }
739 }
740 }
741
742 *tgt_field_collection =
743 xmalloc(collection_size * sizeof(**tgt_field_collection));
744
745 for (size_t c = 0; c < collection_size; ++c) {
746
747 (*tgt_field_collection)[c] =
748 xmalloc(NUM_TGT_POINTS * sizeof(**tgt_field_collection));
749
750 for (size_t i = 0; i < NUM_TGT_POINTS; ++i) {
751 (*tgt_field_collection)[c][i] = TGT_UNSET_VALUE;
752 }
753 }
754}
755
756static void utest_free_data(
757 struct yac_collection_selection * sel, double *** src_fields_collection,
758 double *** src_fields_frac_mask_collection, double ** tgt_field_collection) {
759
761
762 for (size_t c = 0; c < MAX_COLLECTION_SIZE; ++c) {
763
764 for (size_t f = 0; f < MAX_NUM_SRC_FIELDS; ++f) {
765
766 free(src_fields_collection[c][f]);
767 free(src_fields_frac_mask_collection[c][f]);
768 }
769
770 free(src_fields_collection[c]);
771 free(src_fields_frac_mask_collection[c]);
772 }
773
774 free(src_fields_collection);
775 free(src_fields_frac_mask_collection);
776
777 for (size_t c = 0; c < collection_size; ++c) {
778
779 free(tgt_field_collection[c]);
780 }
781
782 free(tgt_field_collection);
783}
784
785static void utest_check_sum_mvp_at_tgt(struct yac_collection_selection * sel,
786 Xt_redist * src_redists,
787 size_t * num_src_points_per_field,
788 size_t * tgt_pos, size_t tgt_count,
789 size_t * num_src_per_tgt, double * weights,
790 size_t * src_field_idx, size_t * src_idx,
791 size_t num_src_fields,
792 double frac_mask_fallback_value,
793 double *** src_fields_collection,
794 double *** src_fields_frac_mask_collection,
795 double ** tgt_field_collection) {
796
798 size_t const * collection_indices = yac_collection_selection_get_indices(sel);
799
800 // initialise reference target field with dummy values
801 // and generate array containg pointers to the field associated to
802 // each collection
803 double ** ref_tgt_field_collection =
804 xmalloc(collection_size * sizeof(*ref_tgt_field_collection));
805 for (size_t c = 0; c < collection_size; ++c) {
806 ref_tgt_field_collection[c] =
807 xmalloc(NUM_TGT_POINTS * sizeof(**ref_tgt_field_collection));
808 for (size_t i = 0; i < NUM_TGT_POINTS; ++i) {
809 ref_tgt_field_collection[c][i] = TGT_UNSET_VALUE;
810 }
811 }
812
813 // compute prefix sum of num_src_per_tgt, which is required
814 // by compute_tgt_field_wgt
815 size_t * prefix_num_src_per_tgt =
816 xmalloc((tgt_count+1) * sizeof(*prefix_num_src_per_tgt));
817 prefix_num_src_per_tgt[0] = 0;
818 for (size_t i = 0, accu = 0; i < tgt_count; ++i) {
819 accu += num_src_per_tgt[i];
820 prefix_num_src_per_tgt[i+1] = accu;
821 }
822
823 // Generate contiguous view of selected source collection selection
824 // (This simplifies further steps.)
825 double *** cont_src_fields_collection =
826 xmalloc(collection_size * sizeof(*cont_src_fields_collection));
827 double *** cont_src_fields_frac_mask_collection =
828 xmalloc(collection_size * sizeof(*src_fields_frac_mask_collection));
829 for (size_t c = 0; c < collection_size; ++c) {
830 // get collection index (if non-contiguous) otherwise use c
831 size_t c_idx = (collection_indices == NULL) ? c : collection_indices[c];
832 cont_src_fields_collection[c] =
833 src_fields_collection[c_idx];
834 cont_src_fields_frac_mask_collection[c] =
835 src_fields_frac_mask_collection[c_idx];
836 }
837
838 // setup pointers for receiving source data from other processes
839 // (since these arrays are used during an intermediate step, contiguous
840 // collection indices can be assumed)
841 // (compute_tgt_field_wgt assumes that received source points from all fields
842 // is in one contiguous buffer, which is why pointers stored in
843 // remote_src_fields and remote_src_frac_masks are computed based on
844 // num_src_points_per_field)
845 enum {SRC_FIELD = 0, SRC_FRAC_MASK = 1};
846 double * remote_src_buffer[2][collection_size];
847 double * remote_src_fields[collection_size][num_src_fields];
848 double * remote_src_frac_masks[collection_size][num_src_fields];
849 for (size_t i = 0; i < 2; ++i) {
850 for (size_t c = 0; c < collection_size; ++c) {
851 // allocate buffer to the maximum possible buffer size
852 remote_src_buffer[i][c] =
853 xmalloc(
854 num_src_fields * (NUM_PROCS - 1) * NUM_SRC_POINTS *
855 sizeof(remote_src_buffer[i][c][0]));
856 for (size_t k = 0;
857 k < num_src_fields * (NUM_PROCS - 1) * NUM_SRC_POINTS; ++k) {
858 // initialise buffer with NAN to make error detection easier
859 remote_src_buffer[i][c][k] = NAN;
860 }
861 }
862 }
863 for (size_t c = 0; c < collection_size; ++c) {
864 for (size_t f = 0, offset = 0; f < num_src_fields; ++f) {
865 remote_src_fields[c][f] =
866 &remote_src_buffer[SRC_FIELD][c][offset];
867 remote_src_frac_masks[c][f] =
868 &remote_src_buffer[SRC_FRAC_MASK][c][offset];
869 offset += num_src_points_per_field[f];
870 }
871 }
872
873 // for each collection entry
874 for (size_t c = 0; c < collection_size; ++c) {
875
876 // get collection index (if non-contiguous) otherwise use c
877 size_t c_idx = (collection_indices == NULL) ? c : collection_indices[c];
878
879 // for each source field
880 for (size_t f = 0; f < num_src_fields; ++f) {
881
882 // receive source point and fractional mask values required for the
883 // interpolation of the local target points
884 xt_redist_s_exchange1(
885 src_redists[f],
886 (const void *)(src_fields_collection[c_idx][f]),
887 (void *)(remote_src_fields[c][f]));
888 xt_redist_s_exchange1(
889 src_redists[f],
890 (const void *)(src_fields_frac_mask_collection[c_idx][f]),
891 (void *)(remote_src_frac_masks[c][f]));
892 }
893 }
894
895 // compute target field reference
897 (double const * restrict **)cont_src_fields_collection,
898 (double const * restrict **)cont_src_fields_frac_mask_collection,
899 (double const **)remote_src_fields,
900 (double const **)remote_src_frac_masks,
901 ref_tgt_field_collection,
902 tgt_pos, tgt_count, prefix_num_src_per_tgt, weights,
903 src_field_idx, src_idx, num_src_fields, collection_size,
904 frac_mask_fallback_value, 1.0, 0.0);
905
906 // check target field results generate by interpolation operation against
907 // refernce (if a target point is not being interpolated by the
908 // interpolation operation, the target field and the reference should both
909 // contain the value TGT_UNSET_VALUE)
910 for (size_t c = 0; c < collection_size; ++c) {
911 for (size_t i = 0; i < NUM_TGT_POINTS; ++i) {
912 if (fabs(tgt_field_collection[c][i] -
913 ref_tgt_field_collection[c][i]) > 1e-9) {
914 PUT_ERR("wrong data in sum_mvp_at_tgt interpolation");
915 }
916 }
917 }
918
919 // clean up
920 free(prefix_num_src_per_tgt);
921 free(cont_src_fields_collection);
922 free(cont_src_fields_frac_mask_collection);
923 for (size_t i = 0; i < 2; ++i) {
924 for (size_t c = 0; c < collection_size; ++c) {
925 free(remote_src_buffer[i][c]);
926 }
927 }
928 for (size_t c = 0; c < collection_size; ++c) {
929 free(ref_tgt_field_collection[c]);
930 }
931 free(ref_tgt_field_collection);
932}
#define YAC_ASSERT(exp, msg)
size_t yac_collection_selection_get_collection_size(struct yac_collection_selection const *collection_selection)
Get the size of the collection selection.
size_t const * yac_collection_selection_get_indices(struct yac_collection_selection const *collection_selection)
Get explicit selection indices if non-contiguous.
void yac_collection_selection_delete(struct yac_collection_selection *collection_selection)
Delete a collection selection object.
struct yac_collection_selection * yac_collection_selection_new(size_t collection_size, size_t const *selection_indices)
Create a new collection selection.
struct yac_interp_operator * yac_interp_operator_sum_mvp_at_tgt_new(struct yac_collection_selection const *collection_selection, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, int with_frac_mask)
Create a sum (weighted or unweighted) interpolation operator computed on the target processes.
void yac_interp_operator_execute_wait(struct yac_interp_operator *interp)
Wait for all pending put/get operations to finish.
int yac_interp_operator_is_target(struct yac_interp_operator *interp)
Checks if the current process holds target data for the interpolation operator.
struct yac_interp_operator * yac_interp_operator_copy(struct yac_interp_operator *interp)
Create a deep copy of the interpolation operator.
enum YAC_INTERP_TEST_STATUS yac_interp_operator_execute_put_test(struct yac_interp_operator *interp)
Test whether the put phase has completed.
int yac_interp_operator_is_source(struct yac_interp_operator *interp)
Checks if the current process holds source data for the interpolation operator.
enum YAC_INTERP_TEST_STATUS yac_interp_operator_execute_get_test(struct yac_interp_operator *interp)
Test whether the get phase has completed.
void yac_interp_operator_execute_get(struct yac_interp_operator *interp, double **tgt_field, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_execute_put(struct yac_interp_operator *interp, double ***src_fields, double ***src_frac_masks, int is_target, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_execute_get_async(struct yac_interp_operator *interp, double **tgt_field, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_execute(struct yac_interp_operator *interp, double ***src_fields, double ***src_frac_masks, double **tgt_field, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_delete(struct yac_interp_operator *interp)
Delete the interpolation operator and free resources.
@ YAC_INTERP_INCOMPLETE
@ YAC_INTERP_COMPLETE
Weighted/unweighted sum operator at target in YAC.
double const YAC_FRAC_MASK_NO_VALUE
Public interface for interpolation execution in YAC.
static void compute_tgt_field_wgt(double const *restrict **src_fields, double const *restrict **src_frac_masks, double const *restrict *remote_src_fields, double const *restrict *remote_src_frac_masks, double *restrict *tgt_field, size_t const *restrict tgt_pos, size_t tgt_count, size_t const *restrict prefix_num_src_per_tgt, double const *restrict weights, size_t const *restrict src_field_idx, size_t const *restrict src_idx, size_t num_src_fields, size_t collection_size, double frac_mask_fallback_value, double scale_factor, double scale_summand)
Compute target field values optionally using weighted sums of source data and optionally applying fra...
#define xmalloc(size)
Definition ppm_xfuncs.h:66
Abstract interpolation operator type.
int collection_size
#define N
#define TEST_EXIT_CODE
Definition tests.h:14
#define PUT_ERR(string)
Definition tests.h:10