YAC 3.13.0
Yet Another Coupler
Loading...
Searching...
No Matches
test_interp_operator_sum_mvp_at_src.c
Go to the documentation of this file.
1// Copyright (c) 2025 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#include <stdlib.h>
6#include <stdio.h>
7#include <string.h>
8#include <unistd.h>
9#include <math.h>
10
11#include <mpi.h>
12#include <yaxt.h>
13
14#include "tests.h"
16#include "yac_mpi.h"
17#include "test_common.h"
21
27static void utest_init_data(
28 struct yac_collection_selection * sel, double **** src_fields_collection,
29 double **** src_fields_frac_mask_collection, double *** tgt_field_collection);
30static void utest_free_data(
31 struct yac_collection_selection * sel, double *** src_fields_collection,
32 double *** src_fields_frac_mask_collection, double ** tgt_field_collection);
33static void utest_check_sum_mvp_at_src(struct yac_collection_selection * sel,
34 Xt_redist * src_halo_redists,
35 Xt_redist tgt_result_redist,
36 size_t tgt_count,
37 size_t * num_src_points_per_field,
38 size_t * num_src_per_tgt, double * weights,
39 size_t * src_field_idx, size_t * src_idx,
40 size_t num_src_fields,
41 double frac_mask_fallback_value,
42 double *** src_fields_collection,
43 double *** src_fields_frac_mask_collection,
44 double ** tgt_field_collection);
45
46enum {
52};
53#define TGT_UNSET_VALUE (-1.0)
54
55int main(void) {
56
57 MPI_Init(NULL, NULL);
58 xt_initialize(MPI_COMM_WORLD);
59
60 int comm_rank, comm_size;
61 MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
62 MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
63
64 if (comm_size != NUM_PROCS) {
65 PUT_ERR("ERROR: test requires 2 processes");
66 xt_finalize();
67 MPI_Finalize();
68 return TEST_EXIT_CODE;
69 }
70
71 // collection selection configs (only applied to source field data,
72 // target field data is expected to have a contiguous collection of size
73 // collection_size)
74 struct {
75 size_t N;
76 size_t * indices;
77 } collection_selection_configs[] =
78 {// selects three entries from the collection in random order
79 {.N = 3, .indices = (size_t[]){0, 3, 2}},
80 // selects all entries from the collection in ascending order
81 {.N = 4, .indices = (size_t[]){0, 1, 2, 3}},
82 // selects all entries from the collection in default (ascending) order
83 {.N = 4, .indices = NULL}};
84 enum { NUM_COLL_SEL_CONFIGS =
85 sizeof(collection_selection_configs) /
86 sizeof(collection_selection_configs[0]) };
87
88 // loop over collection selections
89 for (size_t sel_idx = 0; sel_idx < NUM_COLL_SEL_CONFIGS; ++sel_idx) {
90
91 struct yac_collection_selection * sel =
93 collection_selection_configs[sel_idx].N,
94 collection_selection_configs[sel_idx].indices);
95
96 // interpolation configurations
97 // (basically contains entries of the sparsely populated interpolation
98 // weight matrix)
99 // We assume that the global ids of the source source points of each field
100 // are:
101 // * for rank 0: [0, 1, 2, 3]
102 // * for rank 1: [4, 5, 6, 7]
103 struct {
104 struct {
105
106 //----------------------------------------------------------------------
107 // Information about the local target points that are to be interpolated
108 //----------------------------------------------------------------------
109
110 struct {
111
112 Xt_int * ids;
113 int * offsets;
114 size_t count;
116 } tgt_receive;
117
118 //----------------------------------------------------------------------
119 // Information about the target points that are to be interpolated
120 // (these target points can either be owned by the local process or
121 // remote process
122 //----------------------------------------------------------------------
123
124 struct {
125
126 //--------------------------------------------------------------------
127 // Information about the target points that are to be interpolated by
128 // the local process
129 //--------------------------------------------------------------------
130
131 Xt_int * ids;
133 size_t count;
135
136 //--------------------------------------------------------------------
137 // Information about the source points from remote processes required
138 // the interpolation of the target points
139 //--------------------------------------------------------------------
140
141 Xt_int * src_global_ids_per_field[MAX_NUM_SRC_FIELDS];
142 // global ids of all source points of
143 // each source field from remote
144 // processes required for the
145 // interpolation of the target points
146 size_t num_src_points_per_field[MAX_NUM_SRC_FIELDS];
147 // number of source points per source
148 // field required from remote processes
149
150 //--------------------------------------------------------------------
151 // Information on how the target points are to be interpolated
152 //
153 // Information on how to interpolate the i'th target
154 // (with i being in the range [0;tgt_count-1]) is stored at:
155 // * num_src_per_tgt[i]
156 // * weights[SUM(num_src_per_tgt[0..i])..SUM(num_src_per_tgt[0..i+1]))
157 // * src_field_idx[SUM(num_src_per_tgt[0..i])..SUM(num_src_per_tgt[0..i+1]))
158 // * src_idx[SUM(num_src_per_tgt[0..i])..SUM(num_src_per_tgt[0..i+1]))
159 //
160 // A value of SIZE_MAX in the src_field_idx array indicates a source
161 // field point from a remote process. In that case, the respective
162 // src_idx in the contiguous receive buffer containing received
163 // source points for all fields of a collection entry.
164 //--------------------------------------------------------------------
165
166 size_t * num_src_per_tgt; // number of source points per target
167 double * weights; // weights for the interpolation of
168 // the target points
169 size_t * src_field_idx; // source field indices of source
170 // points required for the
171 // interpolation of the target points.
172 size_t * src_idx; // source indices / receive buffer
173 // index
174
175 } tgt_send;
176
177 //----------------------------------------------------------------------
178 // reference data
179 //----------------------------------------------------------------------
180 int is_source;
181 int is_target;
182
183 } interp_data[NUM_PROCS];
184 size_t num_src_fields; // number of source fields
185 } interp_configs[] =
186 // no interpolation
187 // (both processes are neither source nor target)
188 {{.interp_data =
189 {{.tgt_receive =
190 {.ids = NULL,
191 .offsets = NULL,
192 .count = 0},
193 .tgt_send = {
194 .ids = NULL,
195 .count = 0,
196 .src_global_ids_per_field = {NULL},
197 .num_src_points_per_field = {0},
198 .num_src_per_tgt = NULL,
199 .weights = NULL,
200 .src_field_idx = NULL,
201 .src_idx = NULL},
202 .is_source = 0,
203 .is_target = 0},
204 {.tgt_receive =
205 {.ids = NULL,
206 .offsets = NULL,
207 .count = 0},
208 .tgt_send = {
209 .ids = NULL,
210 .count = 0,
211 .src_global_ids_per_field = {NULL},
212 .num_src_points_per_field = {0},
213 .num_src_per_tgt = NULL,
214 .weights = NULL,
215 .src_field_idx = NULL,
216 .src_idx = NULL},
217 .is_source = 0,
218 .is_target = 0}},
219 .num_src_fields = 1},
220 // target data on rank 1 contains the source points of rank 0 of
221 // source field 1
222 // dimensions (for documentation only):
223 // tgt[rank][local_idx]
224 // src[rank][src_field_idx][local_idx]
225 //
226 // compute rank 0:
227 // tgt[1][0] = src[0][1][0]
228 // tgt[1][1] = src[0][1][1]
229 // tgt[1][2] = src[0][1][2]
230 // tgt[1][3] = src[0][1][3]
231 {.interp_data =
232 {{.tgt_receive =
233 {.ids = NULL,
234 .offsets = NULL,
235 .count = 0},
236 .tgt_send = {
237 .ids = (Xt_int[]){4,5,6,7},
238 .count = 4,
239 .src_global_ids_per_field = {NULL,NULL,NULL},
240 .num_src_points_per_field = {0,0,0},
241 .num_src_per_tgt = (size_t[]){1,1,1,1},
242 .weights = (double[]){1.0,1.0,1.0,1.0},
243 .src_field_idx = (size_t[]){1,1,1,1},
244 .src_idx = (size_t[]){0,1,2,3}},
245 .is_source = 1,
246 .is_target = 0},
247 {.tgt_receive =
248 {.ids = (Xt_int[]){4,5,6,7},
249 .offsets = (int[]){0,1,2,3},
250 .count = 4},
251 .tgt_send = {
252 .ids = NULL,
253 .count = 0,
254 .src_global_ids_per_field = {NULL,NULL,NULL},
255 .num_src_points_per_field = {0,0,0},
256 .num_src_per_tgt = NULL,
257 .weights = NULL,
258 .src_field_idx = NULL,
259 .src_idx = NULL},
260 .is_source = 0,
261 .is_target = 1}},
262 .num_src_fields = 3},
263 // some target data on rank 1 contains a weighted sum of source points
264 // of rank 0 of all source fields
265 // dimensions (for documentation only):
266 // tgt[rank][local_idx]
267 // src[rank][src_field_idx][local_idx]
268 //
269 // compute rank 0:
270 // tgt[1][1] = 0.4 * src[0][0][1] + 0.3 * src[0][1][1] + 0.3 * src[0][2][1]
271 // tgt[1][3] = 0.4 * src[0][0][3] + 0.3 * src[0][1][3] + 0.3 * src[0][2][3]
272 {.interp_data =
273 {{.tgt_receive =
274 {.ids = NULL,
275 .offsets = NULL,
276 .count = 0},
277 .tgt_send = {
278 .ids = (Xt_int[]){5,7},
279 .count = 2,
280 .src_global_ids_per_field = {NULL,NULL,NULL},
281 .num_src_points_per_field = {0,0,0},
282 .num_src_per_tgt = (size_t[]){3,3},
283 .weights = (double[]){0.4,0.3,0.3, 0.4,0.3,0.3},
284 .src_field_idx = (size_t[]){0,1,2, 0,1,2},
285 .src_idx = (size_t[]){1,1,1, 3,3,3}},
286 .is_source = 1,
287 .is_target = 0},
288 {.tgt_receive =
289 {.ids = (Xt_int[]){5,7},
290 .offsets = (int[]){1,3},
291 .count = 2},
292 .tgt_send = {
293 .ids = NULL,
294 .count = 0,
295 .src_global_ids_per_field = {NULL,NULL,NULL},
296 .num_src_points_per_field = {0,0,0},
297 .num_src_per_tgt = NULL,
298 .weights = NULL,
299 .src_field_idx = NULL,
300 .src_idx = NULL},
301 .is_source = 0,
302 .is_target = 1}},
303 .num_src_fields = 3},
304 // some target data on rank 0 contains a weighted sum of source points
305 // of rank 0 of all source fields
306 // dimensions (for documentation only):
307 // tgt[rank][local_idx]
308 // src[rank][src_field_idx][local_idx]
309 //
310 // compute rank 0:
311 // tgt[0][1] = 0.4 * src[0][0][1] + 0.3 * src[0][1][1] + 0.3 * src[0][2][1]
312 // tgt[0][3] = 0.4 * src[0][0][3] + 0.3 * src[0][1][3] + 0.3 * src[0][2][3]
313 {.interp_data =
314 {{.tgt_receive =
315 {.ids = (Xt_int[]){1,3},
316 .offsets = (int[]){1,3},
317 .count = 2},
318 .tgt_send = {
319 .ids = (Xt_int[]){1,3},
320 .count = 2,
321 .src_global_ids_per_field = {NULL,NULL,NULL},
322 .num_src_points_per_field = {0,0,0},
323 .num_src_per_tgt = (size_t[]){3,3},
324 .weights = (double[]){0.4,0.3,0.3, 0.4,0.3,0.3},
325 .src_field_idx = (size_t[]){0,1,2, 0,1,2},
326 .src_idx = (size_t[]){1,1,1, 3,3,3}},
327 .is_source = 1,
328 .is_target = 1},
329 {.tgt_receive =
330 {.ids = NULL,
331 .offsets = NULL,
332 .count = 0},
333 .tgt_send = {
334 .ids = NULL,
335 .count = 0,
336 .src_global_ids_per_field = {NULL,NULL,NULL},
337 .num_src_points_per_field = {0,0,0},
338 .num_src_per_tgt = NULL,
339 .weights = NULL,
340 .src_field_idx = NULL,
341 .src_idx = NULL},
342 .is_source = 0,
343 .is_target = 0}},
344 .num_src_fields = 3},
345 // some target data on rank 1 contains a weighted sum of various source
346 // points from both ranks
347 // dimensions (for documentation only):
348 // tgt[rank][local_idx]
349 // src[rank][src_field_idx][local_idx]
350 //
351 // compute rank 0:
352 // tgt[1][0] = 0.1 * src[0][1][0] + 0.1 * src[0][1][1] +
353 // 0.1 * src[1][1][0] + 0.1 * src[1][1][1]
354 // tgt[1][2] = 0.2 * src[0][0][2] + 0.2 * src[0][2][2] +
355 // 0.2 * src[1][2][2]
356 {.interp_data =
357 {{.tgt_receive =
358 {.ids = NULL,
359 .offsets = NULL,
360 .count = 0},
361 .tgt_send = {
362 .ids = (Xt_int[]){4,6},
363 .count = 2,
364 .src_global_ids_per_field = {NULL,
365 (Xt_int[]){4,5},
366 (Xt_int[]){6}},
367 .num_src_points_per_field = {0,2,1},
368 .num_src_per_tgt = (size_t[]){4,3},
369 .weights = (double[]){0.1,0.1,0.1,0.1, 0.2,0.2,0.2},
370 .src_field_idx = (size_t[]){1,1,SIZE_MAX,SIZE_MAX,
371 0,2,SIZE_MAX},
372 .src_idx = (size_t[]){0,1,0,1, 2,2,2}},
373 .is_source = 1,
374 .is_target = 0},
375 {.tgt_receive =
376 {.ids = (Xt_int[]){4,6},
377 .offsets = (int[]){0,2},
378 .count = 2},
379 .tgt_send = {
380 .ids = NULL,
381 .count = 0,
382 .src_global_ids_per_field = {NULL,NULL,NULL},
383 .num_src_points_per_field = {0,0,0},
384 .num_src_per_tgt = NULL,
385 .weights = NULL,
386 .src_field_idx = NULL,
387 .src_idx = NULL},
388 .is_source = 1,
389 .is_target = 1}},
390 .num_src_fields = 3},
391 // complex interpolation involving all processes
392 // dimensions (for documentation only):
393 // tgt[rank][local_idx]
394 // src[rank][src_field_idx][local_idx]
395 //
396 // compute rank 0:
397 // tgt[0][1] = 0.1 * src[0][0][0] + 0.2 * src[0][0][1] +
398 // 0.3 * src[1][0][2] + 0.4 * src[1][0][3]
399 // tgt[0][2] = 0.1 * src[0][1][0] + 0.2 * src[0][1][1]
400 // tgt[0][0] = 0.2 * src[1][2][2]
401 // tgt[1][0] = 0.1 * src[0][1][0] + 0.2 * src[0][1][1] +
402 // 0.3 * src[1][1][2] + 0.4 * src[1][1][3]
403 // tgt[1][1] = 0.1 * src[0][1][0] + 0.2 * src[0][1][1]
404 // compute rank 1:
405 // tgt[0][3] = 0.25 * src[1][2][0] + 0.25 * src[1][2][1] +
406 // 0.25 * src[1][2][2] + 0.25 * src[1][2][3]
407 // tgt[1][2] = 0.1 * src[0][0][0] + 0.2 * src[0][0][1] +
408 // 0.3 * src[1][0][2] + 0.4 * src[1][0][3]
409 // tgt[1][3] = 0.1 * src[0][1][0] + 0.2 * src[0][1][1]
410 //
411 {.interp_data =
412 {{.tgt_receive =
413 {.ids = (Xt_int[]){0,1,2,3},
414 .offsets = (int[]){0,1,2,3},
415 .count = 4},
416 .tgt_send = {
417 .ids = (Xt_int[]){1,2,0,4,5},
418 .count = 5,
419 .src_global_ids_per_field = {(Xt_int[]){6,7},
420 (Xt_int[]){6,7},
421 (Xt_int[]){6}},
422 .num_src_points_per_field = {2,2,1},
423 .num_src_per_tgt = (size_t[]){4,2,1,4,2},
424 .weights = (double[]){0.1,0.2,0.3,0.4,
425 0.1,0.2,
426 0.2,
427 0.1,0.2,0.3,0.4,
428 0.1,0.2},
429 .src_field_idx = (size_t[]){0,0,SIZE_MAX,SIZE_MAX,
430 1,1,
431 SIZE_MAX,
432 1,1,SIZE_MAX,SIZE_MAX,
433 1,1},
434 .src_idx = (size_t[]){0,1,0,1,
435 0,1,
436 4,
437 0,1,2,3,
438 0,1}},
439 .is_source = 1,
440 .is_target = 1},
441 {.tgt_receive =
442 {.ids = (Xt_int[]){4,5,6,7},
443 .offsets = (int[]){0,1,2,3},
444 .count = 4},
445 .tgt_send = {
446 .ids = (Xt_int[]){3,6,7},
447 .count = 3,
448 .src_global_ids_per_field = {(Xt_int[]){0,1},
449 (Xt_int[]){0,1},
450 NULL},
451 .num_src_points_per_field = {2,2,0},
452 .num_src_per_tgt = (size_t[]){4,4,2},
453 .weights = (double[]){0.25,0.25,0.25,0.25,
454 0.1,0.2,0.3,0.4,
455 0.1,0.2},
456 .src_field_idx = (size_t[]){2,2,2,2,
457 SIZE_MAX,SIZE_MAX,0,0,
458 SIZE_MAX,SIZE_MAX},
459 .src_idx = (size_t[]){0,1,2,3,
460 0,1,2,3,
461 2,3}},
462 .is_source = 1,
463 .is_target = 1}},
464 .num_src_fields = 3}};
465 enum {
466 NUM_INTERP_CONFIGS = sizeof(interp_configs) / sizeof(interp_configs[0])};
467
468 // generate yaxt index list containing global ids of all locally available
469 // source points (each source field contains the same global ids)
470 // each process holds the following global ids:
471 // i + comm_rank * NUM_SRC_POINTS
472 // with i in [0..NUM_SRC_POINTS]
473 Xt_int src_global_ids[NUM_SRC_POINTS];
474 for (size_t src_idx = 0; src_idx < NUM_SRC_POINTS; ++src_idx)
475 src_global_ids[src_idx] =
476 (Xt_int)(src_idx + comm_rank * NUM_SRC_POINTS);
477 Xt_idxlist src_idxlist =
478 xt_idxvec_new(src_global_ids, NUM_SRC_POINTS);
479
480 // loop over all interpolation configurations
481 for (size_t interp_config_idx = 0; interp_config_idx < NUM_INTERP_CONFIGS;
482 ++interp_config_idx) {
483
484 // generate input data for yac_interp_operator_sum_mvp_at_src_new from list
485 // of links
486 size_t num_src_fields =
487 interp_configs[interp_config_idx].num_src_fields;
488 Xt_redist src_halo_redists[num_src_fields];
489 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
490 ++src_field_idx) {
491
492 // create redist for receiving source data of the current source field
493 // required for the interpolation of the target points, which are to be
494 // interpolated by the local process
495 Xt_idxlist required_source_idxlist =
496 (interp_configs[interp_config_idx].
497 interp_data[comm_rank].
498 tgt_send.num_src_points_per_field[src_field_idx] > 0)?
499 xt_idxvec_new(
500 interp_configs[interp_config_idx].
501 interp_data[comm_rank].
502 tgt_send.src_global_ids_per_field[src_field_idx],
503 interp_configs[interp_config_idx].
504 interp_data[comm_rank].
505 tgt_send.num_src_points_per_field[src_field_idx]):
506 xt_idxempty_new();
507 Xt_xmap xmap =
508 xt_xmap_all2all_new(
509 src_idxlist, required_source_idxlist, MPI_COMM_WORLD);
510 src_halo_redists[src_field_idx] = xt_redist_p2p_new(xmap, MPI_DOUBLE);
511 xt_xmap_delete(xmap);
512 xt_idxlist_delete(required_source_idxlist);
513
514 } // src_field_idx
515
516 // create redist for receiving local target points that were
517 // interpolated on the source processes
518 Xt_redist tgt_result_redist;
519 {
520 Xt_idxlist interpolated_target_idxlist =
521 (interp_configs[interp_config_idx].
522 interp_data[comm_rank].tgt_send.count > 0)?
523 xt_idxvec_new(
524 interp_configs[interp_config_idx].
525 interp_data[comm_rank].tgt_send.ids,
526 interp_configs[interp_config_idx].
527 interp_data[comm_rank].tgt_send.count):
528 xt_idxempty_new();
529 int * interpolated_target_offsets =
530 xmalloc(
531 interp_configs[interp_config_idx].
532 interp_data[comm_rank].tgt_send.count *
533 sizeof(*interpolated_target_offsets));
534 for (size_t t = 0;
535 t < interp_configs[interp_config_idx].
536 interp_data[comm_rank].tgt_send.count; ++t) {
537 interpolated_target_offsets[t] = (int)t;
538 }
539 Xt_idxlist required_target_idxlist =
540 (interp_configs[interp_config_idx].
541 interp_data[comm_rank].tgt_receive.count > 0)?
542 xt_idxvec_new(
543 interp_configs[interp_config_idx].
544 interp_data[comm_rank].tgt_receive.ids,
545 interp_configs[interp_config_idx].
546 interp_data[comm_rank].tgt_receive.count):
547 xt_idxempty_new();
548
549 Xt_xmap xmap =
550 xt_xmap_all2all_new(
551 interpolated_target_idxlist, required_target_idxlist,
552 MPI_COMM_WORLD);
553 tgt_result_redist =
554 xt_redist_p2p_off_new(
555 xmap, interpolated_target_offsets,
556 interp_configs[interp_config_idx].
557 interp_data[comm_rank].tgt_receive.offsets, MPI_DOUBLE);
558
559 xt_xmap_delete(xmap);
560 xt_idxlist_delete(required_target_idxlist);
561 xt_idxlist_delete(interpolated_target_idxlist);
562 free(interpolated_target_offsets);
563 }
564
565 for (int with_frac_mask = 0; with_frac_mask <= 1; ++with_frac_mask) {
566
567 // if without fractional masking, set fallback value to
568 // YAC_FRAC_MASK_NO_VALUE, which deactivates it
569 double frac_mask_fallback_value =
570 with_frac_mask?-10.0:YAC_FRAC_MASK_NO_VALUE;
571
572 for (int use_weights = 0; use_weights <= 1; ++use_weights) {
573
574 // if without weights, set it to NULL, which deactivates it
575 double * weights =
576 use_weights?
577 interp_configs[interp_config_idx].
578 interp_data[comm_rank].tgt_send.weights:NULL;
579
580 // some consistency checking of the input data
581 {
582 size_t total_num_receive_src_points = 0;
583 for (size_t i = 0;
584 i < interp_configs[interp_config_idx].
585 interp_data[comm_rank].tgt_send.count; ++i) {
586 total_num_receive_src_points +=
587 interp_configs[interp_config_idx].
588 interp_data[comm_rank].tgt_send.num_src_per_tgt[i];
589 }
590 for (size_t i = 0; i < total_num_receive_src_points; ++i) {
592 (interp_configs[interp_config_idx].
593 interp_data[comm_rank].
594 tgt_send.src_field_idx[i] == SIZE_MAX) ||
595 (interp_configs[interp_config_idx].
596 interp_data[comm_rank].
597 tgt_send.src_field_idx[i] < num_src_fields),
598 "ERROR in src_field_idx");
600 interp_configs[interp_config_idx].
601 interp_data[comm_rank].tgt_send.src_idx[i] <
602 total_num_receive_src_points,
603 "ERROR in src_idx");
604 }
605 }
606
607 // generate interpolation for the current configuration
608 struct yac_interp_operator * interp =
610 sel, src_halo_redists,
611 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.count,
612 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_per_tgt,
613 weights,
614 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_field_idx,
615 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_idx,
616 num_src_fields, tgt_result_redist, with_frac_mask);
617
618 // test interpolation built above and a copy of it, to make sure that
619 // the copy-function works properly
620 for (int use_copy = 0; use_copy <= 1; ++use_copy) {
621
622 if (use_copy) {
623 struct yac_interp_operator * interp_copy =
626 interp = interp_copy;
627 }
628
629 { // check is_source and is_target
630
631 if (yac_interp_operator_is_source(interp) !=
632 interp_configs[interp_config_idx].
633 interp_data[comm_rank].is_source)
634 PUT_ERR("ERROR in is_source");
635
636 if (yac_interp_operator_is_target(interp) !=
637 interp_configs[interp_config_idx].
638 interp_data[comm_rank].is_target)
639 PUT_ERR("ERROR in is_target");
640 }
641
642 { // synchronous test (put and get in a single exchange call)
643
644 // initialise source and target fields
645 double *** src_fields_collection;
646 double *** src_fields_frac_mask_collection;
647 double ** tgt_field_collection;
648 utest_init_data(
649 sel, &src_fields_collection, &src_fields_frac_mask_collection,
650 &tgt_field_collection);
651
653 interp, src_fields_collection, src_fields_frac_mask_collection,
654 tgt_field_collection, frac_mask_fallback_value, 1.0, 0.0);
655
656 // there should be no open put or get operation
659 PUT_ERR("ERROR in execute_put_test");
662 PUT_ERR("ERROR in execute_get_test");
663
664 // check results
665 utest_check_sum_mvp_at_src(
666 sel, src_halo_redists, tgt_result_redist,
667 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.count,
668 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_points_per_field,
669 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_per_tgt,
670 weights,
671 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_field_idx,
672 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_idx,
673 num_src_fields, frac_mask_fallback_value,
674 src_fields_collection, src_fields_frac_mask_collection,
675 tgt_field_collection);
676
677 // free source and target fields
678 utest_free_data(
679 sel, src_fields_collection, src_fields_frac_mask_collection,
680 tgt_field_collection);
681 }
682
683 { // independent put + get
684
685 // initialise source and target fields
686 double *** src_fields_collection;
687 double *** src_fields_frac_mask_collection;
688 double ** tgt_field_collection;
689 utest_init_data(
690 sel, &src_fields_collection, &src_fields_frac_mask_collection,
691 &tgt_field_collection);
692
694 interp, src_fields_collection,
695 src_fields_frac_mask_collection,
696 1, frac_mask_fallback_value, 1.0, 0.0);
698 interp, tgt_field_collection,
699 frac_mask_fallback_value, 1.0, 0.0);
700
701 // there should be no open put or get operation
704 PUT_ERR("ERROR in execute_put_test");
707 PUT_ERR("ERROR in execute_get_test");
708
709 // check results
710 utest_check_sum_mvp_at_src(
711 sel, src_halo_redists, tgt_result_redist,
712 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.count,
713 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_points_per_field,
714 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_per_tgt,
715 weights,
716 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_field_idx,
717 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_idx,
718 num_src_fields, frac_mask_fallback_value,
719 src_fields_collection, src_fields_frac_mask_collection,
720 tgt_field_collection);
721
722 // free source and target fields
723 utest_free_data(
724 sel, src_fields_collection, src_fields_frac_mask_collection,
725 tgt_field_collection);
726 }
727
728 { // independent put + async get + wait
729
730 // initialise source and target fields
731 double *** src_fields_collection;
732 double *** src_fields_frac_mask_collection;
733 double ** tgt_field_collection;
734 utest_init_data(
735 sel, &src_fields_collection, &src_fields_frac_mask_collection,
736 &tgt_field_collection);
737
739 interp, src_fields_collection,
740 src_fields_frac_mask_collection,
741 1, frac_mask_fallback_value, 1.0, 0.0);
743 interp, tgt_field_collection,
744 frac_mask_fallback_value, 1.0, 0.0);
746
747 // there should be no open put or get operation
750 PUT_ERR("ERROR in execute_put_test");
753 PUT_ERR("ERROR in execute_get_test");
754
755 // check results
756 utest_check_sum_mvp_at_src(
757 sel, src_halo_redists, tgt_result_redist,
758 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.count,
759 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_points_per_field,
760 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_per_tgt,
761 weights,
762 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_field_idx,
763 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_idx,
764 num_src_fields, frac_mask_fallback_value,
765 src_fields_collection, src_fields_frac_mask_collection,
766 tgt_field_collection);
767
768 // free source and target fields
769 utest_free_data(
770 sel, src_fields_collection, src_fields_frac_mask_collection,
771 tgt_field_collection);
772 }
773
774 { // independent put + async get + test_get-loop
775
776 // initialise source and target fields
777 double *** src_fields_collection;
778 double *** src_fields_frac_mask_collection;
779 double ** tgt_field_collection;
780 utest_init_data(
781 sel, &src_fields_collection, &src_fields_frac_mask_collection,
782 &tgt_field_collection);
783
785 interp, src_fields_collection,
786 src_fields_frac_mask_collection,
787 1, frac_mask_fallback_value, 1.0, 0.0);
789 interp, tgt_field_collection,
790 frac_mask_fallback_value, 1.0, 0.0);
791 while(
794
795 // there should be no open put or get operation
798 PUT_ERR("ERROR in execute_put_test");
801 PUT_ERR("ERROR in execute_get_test");
802
803 // check results
804 utest_check_sum_mvp_at_src(
805 sel, src_halo_redists, tgt_result_redist,
806 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.count,
807 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_points_per_field,
808 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_per_tgt,
809 weights,
810 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_field_idx,
811 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_idx,
812 num_src_fields, frac_mask_fallback_value,
813 src_fields_collection, src_fields_frac_mask_collection,
814 tgt_field_collection);
815
816 // free source and target fields
817 utest_free_data(
818 sel, src_fields_collection, src_fields_frac_mask_collection,
819 tgt_field_collection);
820 }
821
822 { // independent async get + put + wait
823
824 // initialise source and target fields
825 double *** src_fields_collection;
826 double *** src_fields_frac_mask_collection;
827 double ** tgt_field_collection;
828 utest_init_data(
829 sel, &src_fields_collection, &src_fields_frac_mask_collection,
830 &tgt_field_collection);
831
833 interp, tgt_field_collection,
834 frac_mask_fallback_value, 1.0, 0.0);
836 interp, src_fields_collection,
837 src_fields_frac_mask_collection,
838 1, frac_mask_fallback_value, 1.0, 0.0);
840
841 // there should be no open put or get operation
844 PUT_ERR("ERROR in execute_put_test");
847 PUT_ERR("ERROR in execute_get_test");
848
849 // check results
850 utest_check_sum_mvp_at_src(
851 sel, src_halo_redists, tgt_result_redist,
852 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.count,
853 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_points_per_field,
854 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.num_src_per_tgt,
855 weights,
856 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_field_idx,
857 interp_configs[interp_config_idx].interp_data[comm_rank].tgt_send.src_idx,
858 num_src_fields, frac_mask_fallback_value,
859 src_fields_collection, src_fields_frac_mask_collection,
860 tgt_field_collection);
861
862 // free source and target fields
863 utest_free_data(
864 sel, src_fields_collection, src_fields_frac_mask_collection,
865 tgt_field_collection);
866 }
867
868 } // use_copy
869
871 } // use_weights
872
873 } // with_frac_mask
874
875 xt_redist_delete(tgt_result_redist);
876 for (size_t f = 0; f < num_src_fields; ++f)
877 xt_redist_delete(src_halo_redists[f]);
878
879 } // interp_config_idx
880
881 xt_idxlist_delete(src_idxlist);
883
884 } // sel_idx
885
886 xt_finalize();
887 MPI_Finalize();
888 return TEST_EXIT_CODE;
889}
890
891static void utest_init_data(
892 struct yac_collection_selection * sel, double **** src_fields_collection,
893 double **** src_fields_frac_mask_collection,
894 double *** tgt_field_collection) {
895
897
898 int rank;
899 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
900
901 *src_fields_collection =
902 xmalloc(MAX_COLLECTION_SIZE * sizeof(**src_fields_collection));
903 *src_fields_frac_mask_collection =
904 xmalloc(MAX_COLLECTION_SIZE * sizeof(**src_fields_frac_mask_collection));
905
906 for (size_t c = 0; c < MAX_COLLECTION_SIZE; ++c) {
907
908 (*src_fields_collection)[c] =
909 xmalloc(MAX_NUM_SRC_FIELDS * sizeof(***src_fields_collection));
910 (*src_fields_frac_mask_collection)[c] =
911 xmalloc(MAX_NUM_SRC_FIELDS * sizeof(***src_fields_frac_mask_collection));
912
913 for (size_t f = 0; f < MAX_NUM_SRC_FIELDS; ++f) {
914
915 (*src_fields_collection)[c][f] =
916 xmalloc(NUM_SRC_POINTS * sizeof(***src_fields_collection));
917 (*src_fields_frac_mask_collection)[c][f] =
918 xmalloc(NUM_SRC_POINTS * sizeof(***src_fields_frac_mask_collection));
919
920 for (size_t i = 0; i < NUM_SRC_POINTS; ++i) {
921 (*src_fields_collection)[c][f][i] =
922 (double)(c*1000 + f*100 + rank*10 + i);
923 (*src_fields_frac_mask_collection)[c][f][i] = 1.0;
924 }
925 }
926 }
927
928 *tgt_field_collection =
929 xmalloc(collection_size * sizeof(**tgt_field_collection));
930
931 for (size_t c = 0; c < collection_size; ++c) {
932
933 (*tgt_field_collection)[c] =
934 xmalloc(NUM_TGT_POINTS * sizeof(**tgt_field_collection));
935
936 for (size_t i = 0; i < NUM_TGT_POINTS; ++i) {
937 (*tgt_field_collection)[c][i] = TGT_UNSET_VALUE;
938 }
939 }
940}
941
942static void utest_free_data(
943 struct yac_collection_selection * sel, double *** src_fields_collection,
944 double *** src_fields_frac_mask_collection, double ** tgt_field_collection) {
945
947
948 for (size_t c = 0; c < MAX_COLLECTION_SIZE; ++c) {
949
950 for (size_t f = 0; f < MAX_NUM_SRC_FIELDS; ++f) {
951
952 free(src_fields_collection[c][f]);
953 free(src_fields_frac_mask_collection[c][f]);
954 }
955
956 free(src_fields_collection[c]);
957 free(src_fields_frac_mask_collection[c]);
958 }
959
960 free(src_fields_collection);
961 free(src_fields_frac_mask_collection);
962
963 for (size_t c = 0; c < collection_size; ++c) {
964
965 free(tgt_field_collection[c]);
966 }
967
968 free(tgt_field_collection);
969}
970
971static void utest_check_sum_mvp_at_src(struct yac_collection_selection * sel,
972 Xt_redist * src_halo_redists,
973 Xt_redist tgt_result_redist,
974 size_t tgt_count,
975 size_t * num_src_points_per_field,
976 size_t * num_src_per_tgt, double * weights,
977 size_t * src_field_idx, size_t * src_idx,
978 size_t num_src_fields,
979 double frac_mask_fallback_value,
980 double *** src_fields_collection,
981 double *** src_fields_frac_mask_collection,
982 double ** tgt_field_collection) {
983
985 size_t const * collection_indices = yac_collection_selection_get_indices(sel);
986
987 // initialise reference target field with dummy values
988 // and generate array containg pointers to the field associated to
989 // each collection
990 double ** ref_tgt_field_collection =
991 xmalloc(collection_size * sizeof(*ref_tgt_field_collection));
992 for (size_t c = 0; c < collection_size; ++c) {
993 ref_tgt_field_collection[c] =
994 xmalloc(NUM_TGT_POINTS * sizeof(**ref_tgt_field_collection));
995 for (size_t i = 0; i < NUM_TGT_POINTS; ++i) {
996 ref_tgt_field_collection[c][i] = TGT_UNSET_VALUE;
997 }
998 }
999
1000 // compute prefix sum of num_src_per_tgt, which is required
1001 // by compute_tgt_field_wgt
1002 size_t * prefix_num_src_per_tgt =
1003 xmalloc((tgt_count+1) * sizeof(*prefix_num_src_per_tgt));
1004 prefix_num_src_per_tgt[0] = 0;
1005 for (size_t i = 0, accu = 0; i < tgt_count; ++i) {
1006 accu += num_src_per_tgt[i];
1007 prefix_num_src_per_tgt[i+1] = accu;
1008 }
1009
1010 // Generate temporary target field collection buffer, which will hold
1011 // interpolation results on the source process. These results are then
1012 // redistributed to the target processes using tgt_result_redist.
1013 double ** temp_tgt_field_collection =
1014 xmalloc(collection_size * sizeof(*temp_tgt_field_collection));
1015 for (size_t c = 0; c < collection_size; ++c) {
1016 temp_tgt_field_collection[c] =
1017 xmalloc(tgt_count * sizeof(**temp_tgt_field_collection));
1018 for (size_t i = 0; i < tgt_count; ++i) {
1019 temp_tgt_field_collection[c][i] = TGT_UNSET_VALUE;
1020 }
1021 }
1022
1023 // On the source processes the interpolated target points are written into
1024 // a contiguous.
1025 size_t * tgt_pos = xmalloc(tgt_count * sizeof(*tgt_pos));
1026 for (size_t t = 0; t < tgt_count; ++t) {tgt_pos[t] = t;}
1027
1028 // Generate contiguous view of selected source collection selection
1029 // (This simplifies further steps.)
1030 double *** cont_src_fields_collection =
1031 xmalloc(collection_size * sizeof(*cont_src_fields_collection));
1032 double *** cont_src_fields_frac_mask_collection =
1033 xmalloc(collection_size * sizeof(*src_fields_frac_mask_collection));
1034 for (size_t c = 0; c < collection_size; ++c) {
1035 // get collection index (if non-contiguous) otherwise use c
1036 size_t c_idx = (collection_indices == NULL) ? c : collection_indices[c];
1037 cont_src_fields_collection[c] =
1038 src_fields_collection[c_idx];
1039 cont_src_fields_frac_mask_collection[c] =
1040 src_fields_frac_mask_collection[c_idx];
1041 }
1042
1043 // setup pointers for receiving source data from other processes
1044 // (since these arrays are used during an intermediate step, contiguous
1045 // collection indices can be assumed)
1046 // (compute_tgt_field_wgt assumes that received source points from all fields
1047 // is in one contiguous buffer, which is why pointers stored in
1048 // remote_src_fields and remote_src_frac_masks are computed based on
1049 // num_src_points_per_field)
1050 enum {SRC_FIELD = 0, SRC_FRAC_MASK = 1};
1051 double * remote_src_buffer[2][collection_size];
1052 double * remote_src_fields[collection_size][num_src_fields];
1053 double * remote_src_frac_masks[collection_size][num_src_fields];
1054 for (size_t i = 0; i < 2; ++i) {
1055 for (size_t c = 0; c < collection_size; ++c) {
1056 // allocate buffer to the maximum possible buffer size
1057 remote_src_buffer[i][c] =
1058 xmalloc(
1059 num_src_fields * (NUM_PROCS - 1) * NUM_SRC_POINTS *
1060 sizeof(remote_src_buffer[i][c][0]));
1061 for (size_t k = 0;
1062 k < num_src_fields * (NUM_PROCS - 1) * NUM_SRC_POINTS; ++k) {
1063 // initialise buffer with NAN to make error detection easier
1064 remote_src_buffer[i][c][k] = NAN;
1065 }
1066 }
1067 }
1068 for (size_t c = 0; c < collection_size; ++c) {
1069 for (size_t f = 0, offset = 0; f < num_src_fields; ++f) {
1070 remote_src_fields[c][f] =
1071 &remote_src_buffer[SRC_FIELD][c][offset];
1072 remote_src_frac_masks[c][f] =
1073 &remote_src_buffer[SRC_FRAC_MASK][c][offset];
1074 offset += num_src_points_per_field[f];
1075 }
1076 }
1077
1078 // for each collection entry
1079 for (size_t c = 0; c < collection_size; ++c) {
1080
1081 // get collection index (if non-contiguous) otherwise use c
1082 size_t c_idx = (collection_indices == NULL) ? c : collection_indices[c];
1083
1084 // for each source field
1085 for (size_t f = 0; f < num_src_fields; ++f) {
1086
1087 // receive source point and fractional mask values required for the
1088 // interpolation of the local target points
1089 xt_redist_s_exchange1(
1090 src_halo_redists[f],
1091 (const void *)(src_fields_collection[c_idx][f]),
1092 (void *)(remote_src_fields[c][f]));
1093 xt_redist_s_exchange1(
1094 src_halo_redists[f],
1095 (const void *)(src_fields_frac_mask_collection[c_idx][f]),
1096 (void *)(remote_src_frac_masks[c][f]));
1097 }
1098 }
1099
1100 // compute target field reference
1102 (double const * restrict **)cont_src_fields_collection,
1103 (double const * restrict **)cont_src_fields_frac_mask_collection,
1104 (double const **)remote_src_fields,
1105 (double const **)remote_src_frac_masks,
1106 temp_tgt_field_collection,
1107 tgt_pos, tgt_count, prefix_num_src_per_tgt, weights,
1108 src_field_idx, src_idx, num_src_fields, collection_size,
1109 frac_mask_fallback_value, 1.0, 0.0);
1110
1111 // for each collection entry
1112 for (size_t c = 0; c < collection_size; ++c) {
1113
1114 // redistribute the interpolated target points from the source processes to
1115 // the target ones
1116 xt_redist_s_exchange1(
1117 tgt_result_redist,
1118 (const void *)(temp_tgt_field_collection[c]),
1119 (void *)(ref_tgt_field_collection[c]));
1120 }
1121
1122 // check target field results generate by interpolation operation against
1123 // refernce (if a target point is not being interpolated by the
1124 // interpolation operation, the target field and the reference should both
1125 // contain the value TGT_UNSET_VALUE)
1126 for (size_t c = 0; c < collection_size; ++c) {
1127 for (size_t i = 0; i < NUM_TGT_POINTS; ++i) {
1128 if (fabs(tgt_field_collection[c][i] -
1129 ref_tgt_field_collection[c][i]) > 1e-9) {
1130 PUT_ERR("wrong data in sum_mvp_at_src interpolation");
1131 }
1132 }
1133 }
1134
1135 // clean up
1136 free(prefix_num_src_per_tgt);
1137 free(cont_src_fields_collection);
1138 free(cont_src_fields_frac_mask_collection);
1139 for (size_t i = 0; i < 2; ++i) {
1140 for (size_t c = 0; c < collection_size; ++c) {
1141 free(remote_src_buffer[i][c]);
1142 }
1143 }
1144 for (size_t c = 0; c < collection_size; ++c) {
1145 free(ref_tgt_field_collection[c]);
1146 }
1147 free(ref_tgt_field_collection);
1148 for (size_t c = 0; c < collection_size; ++c) {
1149 free(temp_tgt_field_collection[c]);
1150 }
1151 free(temp_tgt_field_collection);
1152 free(tgt_pos);
1153}
#define YAC_ASSERT(exp, msg)
size_t yac_collection_selection_get_collection_size(struct yac_collection_selection const *collection_selection)
Get the size of the collection selection.
size_t const * yac_collection_selection_get_indices(struct yac_collection_selection const *collection_selection)
Get explicit selection indices if non-contiguous.
void yac_collection_selection_delete(struct yac_collection_selection *collection_selection)
Delete a collection selection object.
struct yac_collection_selection * yac_collection_selection_new(size_t collection_size, size_t const *selection_indices)
Create a new collection selection.
struct yac_interp_operator * yac_interp_operator_sum_mvp_at_src_new(struct yac_collection_selection const *collection_selection, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist_, int with_frac_mask)
Create a sum (weighted or unweighted) interpolation operator computed on the source processes.
void yac_interp_operator_execute_wait(struct yac_interp_operator *interp)
Wait for all pending put/get operations to finish.
int yac_interp_operator_is_target(struct yac_interp_operator *interp)
Checks if the current process holds target data for the interpolation operator.
struct yac_interp_operator * yac_interp_operator_copy(struct yac_interp_operator *interp)
Create a deep copy of the interpolation operator.
enum YAC_INTERP_TEST_STATUS yac_interp_operator_execute_put_test(struct yac_interp_operator *interp)
Test whether the put phase has completed.
int yac_interp_operator_is_source(struct yac_interp_operator *interp)
Checks if the current process holds source data for the interpolation operator.
enum YAC_INTERP_TEST_STATUS yac_interp_operator_execute_get_test(struct yac_interp_operator *interp)
Test whether the get phase has completed.
void yac_interp_operator_execute_get(struct yac_interp_operator *interp, double **tgt_field, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_execute_put(struct yac_interp_operator *interp, double ***src_fields, double ***src_frac_masks, int is_target, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_execute_get_async(struct yac_interp_operator *interp, double **tgt_field, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_execute(struct yac_interp_operator *interp, double ***src_fields, double ***src_frac_masks, double **tgt_field, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interp_operator_delete(struct yac_interp_operator *interp)
Delete the interpolation operator and free resources.
@ YAC_INTERP_INCOMPLETE
@ YAC_INTERP_COMPLETE
Weighted/unweighted sum operator at source in YAC.
double const YAC_FRAC_MASK_NO_VALUE
Public interface for interpolation execution in YAC.
static void compute_tgt_field_wgt(double const *restrict **src_fields, double const *restrict **src_frac_masks, double const *restrict *remote_src_fields, double const *restrict *remote_src_frac_masks, double *restrict *tgt_field, size_t const *restrict tgt_pos, size_t tgt_count, size_t const *restrict prefix_num_src_per_tgt, double const *restrict weights, size_t const *restrict src_field_idx, size_t const *restrict src_idx, size_t num_src_fields, size_t collection_size, double frac_mask_fallback_value, double scale_factor, double scale_summand)
Compute target field values optionally using weighted sums of source data and optionally applying fra...
#define xmalloc(size)
Definition ppm_xfuncs.h:66
Abstract interpolation operator type.
int collection_size
#define N
#define TEST_EXIT_CODE
Definition tests.h:14
#define PUT_ERR(string)
Definition tests.h:10