YAC 3.7.0
Yet Another Coupler
Loading...
Searching...
No Matches
interp_weights.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifdef HAVE_CONFIG_H
6// Get the definition of the 'restrict' keyword.
7#include "config.h"
8#endif
9
10#include <string.h>
11
12#define WEIGHT_TOL (1e-9)
13
14#include "ppm/ppm_xfuncs.h"
15#include "yac_mpi_internal.h"
17#include "ensure_array_size.h"
18#include "io_utils.h"
19#include "utils_core.h"
20#include "interp_method_file.h"
22
23#define YAC_YAXT_EXCHANGER_STR "YAC_YAXT_EXCHANGER"
24
35
37
39 union {
40 struct {
41 double value;
43 struct {
44 struct remote_point src; // src id
46 struct {
47 struct remote_points * srcs; // src ids
48 } sum;
49 struct {
50 struct remote_points * srcs; // src ids
51 double * weights;
53 struct {
54 struct remote_point src; // src id
55 size_t field_idx;
57 struct {
58 struct remote_points * srcs; // src ids
59 size_t * field_indices;
61 struct {
62 struct remote_points * srcs; // src ids
63 double * weights;
64 size_t * field_indices;
67 struct remote_point tgt; //tgt id
68};
69
76
82
87
92
94 double value;
95 size_t orig_pos;
96};
97
102
108
113
125
131
147
149 MPI_Comm comm, enum yac_location tgt_location,
151
152 struct yac_interp_weights * weights = xmalloc(1 * sizeof(*weights));
153
154 weights->comm = comm;
155 weights->tgt_location = tgt_location;
156 weights->src_locations = xmalloc(num_src_fields * sizeof(*src_locations));
157 memcpy(
159 num_src_fields * sizeof(*src_locations));
161 weights->stencils = NULL;
162 weights->stencils_array_size = 0;
163 weights->stencils_size = 0;
164
165 return weights;
166}
167
168static inline struct remote_point copy_remote_point(
169 struct remote_point point) {
170
171 int count = point.data.count;
172 if (count > 1) {
173 struct remote_point_info * point_infos =
174 xmalloc((size_t)count * sizeof(*point_infos));
175 memcpy(point_infos, point.data.data.multi,
176 (size_t)count * sizeof(*point_infos));
177 point.data.data.multi = point_infos;
178 }
179 return point;
180}
181
183 struct remote_point * points_to, struct remote_point * points_from,
184 size_t count, struct remote_point_info ** point_info_buffer_) {
185
186 struct remote_point_info * point_info_buffer = *point_info_buffer_;
187
188 for (size_t i = 0; i < count; ++i) {
189 int curr_count = points_from[i].data.count;
190 points_to[i] = points_from[i];
191 if (curr_count > 1) {
192 points_to[i].data.data.multi = point_info_buffer;
193 memcpy(
194 point_info_buffer, points_from[i].data.data.multi,
195 (size_t)curr_count * sizeof(*point_info_buffer));
196 point_info_buffer += curr_count;
197 }
198 }
199 *point_info_buffer_ = point_info_buffer;
200}
201
202static inline struct remote_points * copy_remote_points(
203 struct remote_point * points, size_t count) {
204
205 size_t point_info_buffer_size = 0;
206 for (size_t i = 0; i < count; ++i)
207 if (points[i].data.count > 1)
208 point_info_buffer_size += (size_t)(points[i].data.count);
209
210 struct remote_points * points_copy =
211 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
212 sizeof(*points_copy));
213 points_copy->data = xmalloc(count * sizeof(*(points_copy->data)));
214 points_copy->count = count;
215 struct remote_point_info * point_info_buffer = &(points_copy->buffer[0]);
216
218 points_copy->data, points, count, &point_info_buffer);
219
220 return points_copy;
221}
222
224 struct remote_point ** points, size_t * counts, size_t num_fields) {
225
226 size_t point_info_buffer_size = 0;
227 size_t total_count = 0;
228 for (size_t i = 0; i < num_fields; ++i) {
229 total_count += counts[i];
230 for (size_t j = 0; j < counts[i]; ++j) {
231 if (points[i][j].data.count > 1)
232 point_info_buffer_size += (size_t)(points[i][j].data.count);
233 }
234 }
235
236 struct remote_points * points_copy =
237 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
238 sizeof(*points_copy));
239 points_copy->data = xmalloc(total_count * sizeof(*(points_copy->data)));
240 points_copy->count = total_count;
241 struct remote_point_info * point_info_buffer = &(points_copy->buffer[0]);
242
243 for (size_t i = 0, k = 0; i < num_fields; ++i) {
244 for (size_t j = 0; j < counts[i]; ++j, ++k) {
245 int curr_count = points[i][j].data.count;
246 points_copy->data[k] = points[i][j];
247 if (curr_count > 1) {
248 points_copy->data[k].data.data.multi = point_info_buffer;
249 memcpy(
250 point_info_buffer, points[i][j].data.data.multi,
251 (size_t)curr_count * sizeof(*point_info_buffer));
252 point_info_buffer += curr_count;
253 }
254 }
255 }
256 return points_copy;
257}
258
260 struct yac_interp_weights * weights, struct remote_points * tgts,
261 double fixed_value) {
262
263 struct interp_weight_stencil * stencils = weights->stencils;
264 size_t stencils_array_size = weights->stencils_array_size;
265 size_t stencils_size = weights->stencils_size;
266
267 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
268
269 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
270
271 stencils[stencils_size].type = FIXED;
272 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
273 stencils[stencils_size].data.fixed.value = fixed_value;
274 }
275
276 weights->stencils = stencils;
277 weights->stencils_array_size = stencils_array_size;
278 weights->stencils_size = stencils_size;
279}
280
282 struct yac_interp_weights * weights, struct remote_points * tgts,
283 size_t * num_src_per_tgt, struct remote_point * srcs, double * w) {
284
285 if (tgts->count == 0) return;
286
287 // determine whether there are zero-weights
288 int pack_flag = 0;
289 for (size_t i = 0, k = 0; (i < tgts->count) && !pack_flag; ++i)
290 for (size_t j = 0; (j < num_src_per_tgt[i]) && !pack_flag; ++j, ++k)
291 pack_flag = (fabs(w[k]) <= WEIGHT_TOL);
292
293 if (pack_flag) {
294
295 for (size_t i = 0, k = 0, l = 0;
296 i < tgts->count; i++) {
297
298 size_t curr_count = num_src_per_tgt[i];
299
300 for (size_t j = 0; j < curr_count; j++, k++) {
301
302 if (fabs(w[k]) < WEIGHT_TOL) {
303 num_src_per_tgt[i]--;
304 } else {
305 if (l != k) {
306 srcs[l] = srcs[k];
307 w[l] = w[k];
308 }
309 ++l;
310 }
311 }
312
313 // if all weights were zero
314 if ((curr_count != 0) && (num_src_per_tgt[i] == 0)) {
315
316 if (l != k) {
317 srcs[l] = srcs[k - curr_count];
318 w[l] = 0.0;
319 }
320 num_src_per_tgt[i] = 1;
321 ++l;
322 }
323 }
324 }
325
326 // check whether all weights are 1.0 and whether the number of source
327 // points per target is one for all targets
328 int flag_weight_one = 1;
329 int flag_count_one = 1;
330 for (size_t i = 0, j = 0;
331 (i < tgts->count) && (flag_weight_one || flag_count_one); ++i) {
332
333 size_t curr_count = num_src_per_tgt[i];
334 flag_count_one &= curr_count == 1;
335
336 for (size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
337 flag_weight_one &= fabs(w[j] - 1.0) < WEIGHT_TOL;
338 }
339
340 // if all weights are 1.0 -> use more optimised weight type
341 if (flag_weight_one) {
342
343 // if the number of source points for all target points is one
344 if (flag_count_one)
346 else
347 yac_interp_weights_add_sum(weights, tgts, num_src_per_tgt, srcs);
348
349 } else {
350
351 struct interp_weight_stencil * stencils = weights->stencils;
352 size_t stencils_array_size = weights->stencils_array_size;
353 size_t stencils_size = weights->stencils_size;
354
355 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
356
357 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
358
359 size_t curr_num_src = num_src_per_tgt[i];
360
361 // remove target for which no weights were provided
362 if (curr_num_src == 0) {
363 --stencils_size;
364 continue;
365 }
366
367 double * curr_weights =
368 xmalloc(curr_num_src * sizeof(*curr_weights));
369
370 stencils[stencils_size].type = WEIGHT_SUM;
371 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
372 stencils[stencils_size].data.weight_sum.srcs =
373 copy_remote_points(srcs, curr_num_src);
374 stencils[stencils_size].data.weight_sum.weights = curr_weights;
375 memcpy(curr_weights, w, curr_num_src * sizeof(*curr_weights));
376
377 srcs += curr_num_src;
378 w += curr_num_src;
379 }
380
381 weights->stencils = stencils;
382 weights->stencils_array_size = stencils_array_size;
383 weights->stencils_size = stencils_size;
384 }
385}
386
388 struct yac_interp_weights * weights, struct remote_points * tgts,
389 size_t * num_src_per_tgt, struct remote_point * srcs) {
390
391 if (tgts->count == 0) return;
392
393 // check whether the number of source points per target is one
394 // for all targets
395 int flag_count_one = 1;
396 for (size_t i = 0; i < tgts->count; ++i) {
397 if (num_src_per_tgt[i] != 1) {
398 flag_count_one = 0;
399 break;
400 }
401 }
402
403 if (flag_count_one) {
404
406
407 } else {
408
409 struct interp_weight_stencil * stencils = weights->stencils;
410 size_t stencils_array_size = weights->stencils_array_size;
411 size_t stencils_size = weights->stencils_size;
412
413 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
414
415 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
416
417 size_t curr_num_src = num_src_per_tgt[i];
418
419 stencils[stencils_size].type = SUM;
420 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
421 stencils[stencils_size].data.weight_sum.srcs =
422 copy_remote_points(srcs, curr_num_src);
423 stencils[stencils_size].data.weight_sum.weights = NULL;
424
425 srcs += curr_num_src;
426 }
427
428 weights->stencils = stencils;
429 weights->stencils_array_size = stencils_array_size;
430 weights->stencils_size = stencils_size;
431 }
432}
433
435 struct yac_interp_weights * weights, struct remote_points * tgts,
436 struct remote_point * srcs) {
437
438 if (tgts->count == 0) return;
439
440 struct interp_weight_stencil * stencils = weights->stencils;
441 size_t stencils_array_size = weights->stencils_array_size;
442 size_t stencils_size = weights->stencils_size;
443
444 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
445
446 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
447
448 stencils[stencils_size].type = DIRECT;
449 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
450 stencils[stencils_size].data.direct.src = copy_remote_point(srcs[i]);
451 }
452
453 weights->stencils = stencils;
454 weights->stencils_array_size = stencils_array_size;
455 weights->stencils_size = stencils_size;
456}
457
459 struct yac_interp_weights * weights, struct remote_points * tgts,
460 size_t * src_field_indices, struct remote_point ** srcs_per_field,
461 size_t num_src_fields) {
462
463 if (tgts->count == 0) return;
464
465 if (num_src_fields == 1) {
466 yac_interp_weights_add_direct(weights, tgts, srcs_per_field[0]);
467 return;
468 }
469
470 struct interp_weight_stencil * stencils = weights->stencils;
471 size_t stencils_array_size = weights->stencils_array_size;
472 size_t stencils_size = weights->stencils_size;
473
475 stencils, stencils_array_size, stencils_size + tgts->count);
476 stencils += stencils_size;
477
478 size_t srcs_offsets[num_src_fields];
479 memset(srcs_offsets, 0, num_src_fields * sizeof(srcs_offsets[0]));
480
481 for (size_t i = 0; i < tgts->count; ++i) {
482
483 size_t src_field_idx = src_field_indices[i];
484 stencils[i].type = DIRECT_MF;
485 stencils[i].tgt = copy_remote_point(tgts->data[i]);
486 stencils[i].data.direct_mf.src =
488 srcs_per_field[src_field_idx][srcs_offsets[src_field_idx]++]);
489 stencils[i].data.direct_mf.field_idx = src_field_idx;
490 }
491
492 weights->stencils = stencils;
493 weights->stencils_array_size = stencils_array_size;
494 weights->stencils_size += tgts->count;
495}
496
498 struct yac_interp_weights * weights, struct remote_points * tgts,
499 size_t * num_src_per_field_per_tgt, struct remote_point ** srcs_per_field,
500 size_t num_src_fields) {
501
502 if (tgts->count == 0) return;
503
504 if (num_src_fields == 1) {
506 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0]);
507 return;
508 }
509
510 // check whether the number of source points per target is one
511 // for all targets
512 int flag_count_one = 1;
513 for (size_t i = 0, k = 0; i < tgts->count; ++i) {
514 size_t count = 0;
515 for (size_t j = 0; j < num_src_fields; ++j, ++k)
516 count += num_src_per_field_per_tgt[k];
517 if (count != 1) {
518 flag_count_one = 0;
519 break;
520 }
521 }
522
523 if (flag_count_one) {
524
525 size_t * src_field_indices =
526 xmalloc(tgts->count * sizeof(*src_field_indices));
527
528 for (size_t i = 0, k = 0; i < tgts->count; ++i)
529 for (size_t j = 0; j < num_src_fields; ++j, ++k)
530 if (num_src_per_field_per_tgt[k])
531 src_field_indices[i] = j;
532
534 weights, tgts, src_field_indices, srcs_per_field, num_src_fields);
535
536 free(src_field_indices);
537
538 } else {
539 struct remote_point * curr_srcs_per_field[num_src_fields];
540 memcpy(curr_srcs_per_field, srcs_per_field,
541 num_src_fields * sizeof(*srcs_per_field));
542
543 struct interp_weight_stencil * stencils = weights->stencils;
544 size_t stencils_array_size = weights->stencils_array_size;
545 size_t stencils_size = weights->stencils_size;
546
548 stencils, stencils_array_size, stencils_size + tgts->count);
549
550 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
551
552 size_t * curr_num_src_per_src_field =
553 num_src_per_field_per_tgt + i * num_src_fields;
554 size_t curr_num_src = 0;
555 for (size_t j = 0; j < num_src_fields; ++j)
556 curr_num_src += curr_num_src_per_src_field[j];
557
558 stencils[stencils_size].type = SUM_MF;
559 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
560 stencils[stencils_size].data.sum_mf.field_indices =
561 xmalloc(
562 curr_num_src *
563 sizeof(*(stencils[stencils_size].data.sum_mf.field_indices)));
564 for (size_t j = 0, l = 0; j < num_src_fields; ++j) {
565 size_t curr_num_src = curr_num_src_per_src_field[j];
566 for (size_t k = 0; k < curr_num_src; ++k, ++l) {
567 stencils[stencils_size].data.sum_mf.field_indices[l] = j;
568 }
569 }
570 stencils[stencils_size].data.sum_mf.srcs =
572 curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
573
574 for (size_t j = 0; j < num_src_fields; ++j)
575 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
576 }
577
578 weights->stencils = stencils;
579 weights->stencils_array_size = stencils_array_size;
580 weights->stencils_size = stencils_size;
581 }
582}
583
585 struct yac_interp_weights * weights, struct remote_points * tgts,
586 size_t * num_src_per_field_per_tgt, struct remote_point ** srcs_per_field,
587 double * w, size_t num_src_fields) {
588
589 if (tgts->count == 0) return;
590
591 if (num_src_fields == 1) {
593 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0], w);
594 return;
595 }
596
597 // check whether all weights are 1.0 and whether the number of source
598 // points per target is one for all targets
599 int flag_weight_one = 1;
600 for (size_t i = 0, j = 0;
601 (i < tgts->count) && flag_weight_one; ++i) {
602
603 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
604 ++src_field_idx) {
605
606 size_t curr_count =
607 num_src_per_field_per_tgt[i * num_src_fields + src_field_idx];
608
609 for (size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
610 flag_weight_one &= fabs(w[j] - 1.0) < 1e-12;
611 }
612 }
613
614 // if all weights are 1.0 -> use more optimised weight type
615 if (flag_weight_one) {
616
618 weights, tgts, num_src_per_field_per_tgt, srcs_per_field, num_src_fields);
619
620 } else {
621
622 struct remote_point * curr_srcs_per_field[num_src_fields];
623 memcpy(curr_srcs_per_field, srcs_per_field,
624 num_src_fields * sizeof(*srcs_per_field));
625
626 struct interp_weight_stencil * stencils = weights->stencils;
627 size_t stencils_array_size = weights->stencils_array_size;
628 size_t stencils_size = weights->stencils_size;
629
631 stencils, stencils_array_size, stencils_size + tgts->count);
632
633 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
634
635 size_t * curr_num_src_per_src_field =
636 num_src_per_field_per_tgt + i * num_src_fields;
637 size_t curr_num_weights = 0;
638 for (size_t j = 0; j < num_src_fields; ++j)
639 curr_num_weights += curr_num_src_per_src_field[j];
640 double * curr_weights =
641 xmalloc(curr_num_weights * sizeof(*curr_weights));
642 size_t * field_indices =
643 xmalloc(curr_num_weights * sizeof(*field_indices));
644
645 stencils[stencils_size].type = WEIGHT_SUM_MF;
646 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
647 stencils[stencils_size].data.weight_sum_mf.field_indices = field_indices;
648 for (size_t j = 0, l = 0; j < num_src_fields; ++j) {
649 size_t curr_num_src = curr_num_src_per_src_field[j];
650 for (size_t k = 0; k < curr_num_src; ++k, ++l) field_indices[l] = j;
651 }
652 stencils[stencils_size].data.weight_sum_mf.srcs =
653 copy_remote_points_mf(curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
654 stencils[stencils_size].data.weight_sum_mf.weights = curr_weights;
655 memcpy(curr_weights, w, curr_num_weights * sizeof(*curr_weights));
656
657 for (size_t j = 0; j < num_src_fields; ++j)
658 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
659 w += curr_num_weights;
660 }
661
662 weights->stencils = stencils;
663 weights->stencils_array_size = stencils_array_size;
664 weights->stencils_size = stencils_size;
665 }
666}
667
668static int compare_stencils_fixed(const void * a, const void * b) {
669
670 int ret = (((struct interp_weight_stencil_fixed *)a)->value >
671 ((struct interp_weight_stencil_fixed *)b)->value) -
672 (((struct interp_weight_stencil_fixed *)a)->value <
673 ((struct interp_weight_stencil_fixed *)b)->value);
674
675 if (ret) return ret;
676
677 return (((struct interp_weight_stencil_fixed *)a)->orig_pos >
678 ((struct interp_weight_stencil_fixed *)b)->orig_pos) -
679 (((struct interp_weight_stencil_fixed *)a)->orig_pos <
680 ((struct interp_weight_stencil_fixed *)b)->orig_pos);
681}
682
683static MPI_Datatype get_fixed_stencil_mpi_datatype(MPI_Comm comm) {
684
685 struct interp_weight_stencil_fixed dummy;
686 MPI_Datatype fixed_stencil_dt;
687 int array_of_blocklengths[] = {1, 1};
688 const MPI_Aint array_of_displacements[] =
689 {(MPI_Aint)(intptr_t)(const void *)&(dummy.value) -
690 (MPI_Aint)(intptr_t)(const void *)&dummy,
691 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
692 (MPI_Aint)(intptr_t)(const void *)&dummy};
693 const MPI_Datatype array_of_types[] = {MPI_DOUBLE, YAC_MPI_SIZE_T};
695 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
696 array_of_types, &fixed_stencil_dt), comm);
697 return yac_create_resized(fixed_stencil_dt, sizeof(dummy), comm);
698}
699
701 void * interp, double fixed_value, size_t count, size_t * tgt_pos) {
702
704 (struct yac_interpolation*)interp, fixed_value, count, tgt_pos);
705}
706
708 void * interp, double fixed_value, size_t count, size_t * tgt_pos) {
709
710 struct yac_interp_weights_data * interp_weights_data =
711 &((struct yac_interpolation_raw *)interp)->interp_weights_data;
712
714 xrealloc(
719 fixed_value;
720
722 xrealloc(
728
729 size_t total_num_fixed_tgt = 0;
730 for (size_t i = 0; i < interp_weights_data->num_fixed_values; ++i)
731 total_num_fixed_tgt += interp_weights_data->num_tgt_per_fixed_value[i];
733 xrealloc(
735 (total_num_fixed_tgt + count) *
737 memcpy(
738 interp_weights_data->tgt_idx_fixed + total_num_fixed_tgt,
739 tgt_pos, count * sizeof(*tgt_pos));
740
742}
743
745 MPI_Comm comm, size_t count,
746 struct interp_weight_stencil * fixed_stencils,
747 void * interp, void (*interp_add_fixed)(void*, double, size_t, size_t*)) {
748
749 //---------------------------------------------------------------------------
750 // redistribute fixed stencils to owners of fixed target points
751 // (a target point can be owned by multiple processes)
752 //---------------------------------------------------------------------------
753
754 int comm_size;
755 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
756
757 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
759 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
760
761 // count the number of fixed stencils that need to be sent to each process
762 for (size_t i = 0; i < count; ++i) {
763 int curr_count = fixed_stencils[i].tgt.data.count;
764 struct remote_point_info * curr_point_infos =
765 (curr_count == 1)?
766 (&(fixed_stencils[i].tgt.data.data.single)):
767 (fixed_stencils[i].tgt.data.data.multi);
768 for (int j = 0; j < curr_count; ++j)
769 sendcounts[curr_point_infos[j].rank]++;
770 }
771
773 1, sendcounts, recvcounts, sdispls, rdispls, comm);
774
775 size_t send_buffer_size =
776 sdispls[comm_size] + sendcounts[comm_size - 1];
777 size_t recv_buffer_size =
778 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
779
780 struct interp_weight_stencil_fixed * buffer =
781 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*buffer));
782 struct interp_weight_stencil_fixed * send_buffer = buffer + recv_buffer_size;
783 struct interp_weight_stencil_fixed * recv_buffer = buffer;
784
785 // pack fixed stencils
786 for (size_t i = 0; i < count; ++i) {
787 int curr_count = fixed_stencils[i].tgt.data.count;
788 struct remote_point_info * curr_point_infos =
789 (curr_count == 1)?
790 (&(fixed_stencils[i].tgt.data.data.single)):
791 (fixed_stencils[i].tgt.data.data.multi);
792 double value = fixed_stencils[i].data.fixed.value;
793 for (int j = 0; j < curr_count; ++j) {
794 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
795 send_buffer[pos].value = value;
796 send_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
797 }
798 }
799
800 // create MPI Datatype for exchanging fixed stencils
801 MPI_Datatype stencil_fixed_dt = get_fixed_stencil_mpi_datatype(comm);
802
803 // redistribute fixed stencils to owners of respective target points
805 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
806 sizeof(*send_buffer), stencil_fixed_dt, comm,
807 "yac_interp_weights_redist_fixed", __LINE__);
808
809 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
810 yac_mpi_call(MPI_Type_free(&stencil_fixed_dt), comm);
811
812 if (recv_buffer_size == 0) {
813 free(buffer);
814 return;
815 }
816
817 //---------------------------------------------------------------------------
818 // store received stencils in interp data structure
819 //---------------------------------------------------------------------------
820
821 // sort stencils first by fixed value and second by orig_pos
822 qsort(recv_buffer, recv_buffer_size, sizeof(*recv_buffer),
824
825 size_t * tgt_pos = xmalloc(recv_buffer_size * sizeof(*tgt_pos));
826 for (size_t i = 0; i < recv_buffer_size; ++i)
827 tgt_pos[i] = (size_t)(recv_buffer[i].orig_pos);
828
829 size_t offset = 0, i = 0;
830 while (offset < recv_buffer_size) {
831 double fixed_value = recv_buffer[i].value;
832 while ((i < recv_buffer_size) && (fixed_value == recv_buffer[i].value)) ++i;
833 size_t curr_count = i - offset;
835 interp, fixed_value, curr_count, tgt_pos + offset);
836 offset = i;
837 }
838
839 free(buffer);
840 free(tgt_pos);
841}
842
843// a source point may have multiple owners, this routine returns one of them
844static inline struct remote_point_info select_src(
845 struct remote_point_infos src) {
846
847 if (src.count == 1) return src.data.single;
848
849 int min_rank = INT_MAX;
850 size_t min_rank_idx = SIZE_MAX;
851 for (int i = 0; i < src.count; ++i) {
852 if (src.data.multi[i].rank < min_rank) {
853 min_rank = src.data.multi[i].rank;
854 min_rank_idx = i;
855 }
856 }
857
858 return src.data.multi[min_rank_idx];
859}
860
862 struct Xt_redist_msg * msgs, size_t count, MPI_Comm comm) {
863 for (size_t i = 0; i < count; ++i) {
864 MPI_Datatype * dt = &(msgs[i].datatype);
865 if (*dt != MPI_DATATYPE_NULL) yac_mpi_call(MPI_Type_free(dt), comm);
866 }
867 free(msgs);
868}
869
883static Xt_redist generate_direct_redist(
884 size_t * src_orig_poses, size_t * sendcounts,
885 struct interp_weight_stencil_direct * tgt_stencils,
886 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
887
888 int comm_size;
889 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
890
891 size_t nsend = 0, nrecv = 0;
892 size_t max_buffer_size = 0;
893 for (int i = 0; i < comm_size; ++i) {
894 nsend += sendcounts[i] > 0;
895 nrecv += recvcounts[i] > 0;
896 if (max_buffer_size < sendcounts[i]) max_buffer_size = sendcounts[i];
897 if (max_buffer_size < recvcounts[i]) max_buffer_size = recvcounts[i];
898 }
899
900 size_t total_num_msg = nsend + nrecv;
901
902 struct Xt_redist_msg * msgs_buffer =
903 xmalloc(total_num_msg * sizeof(*msgs_buffer));
904 struct Xt_redist_msg * send_msgs = msgs_buffer;
905 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
906
907 int * pos_buffer = xmalloc((size_t)max_buffer_size * sizeof(*pos_buffer));
908
909 // generate yaxt send and receive messages
910 nsend = 0;
911 nrecv = 0;
912 for (int i = 0; i < comm_size; ++i) {
913 if (recvcounts[i] > 0) {
914 for (size_t j = 0; j < recvcounts[i]; ++j)
915 pos_buffer[j] = (int)tgt_stencils[j].orig_pos;
916 tgt_stencils += recvcounts[i];
917 recv_msgs[nrecv].rank = i;
918 recv_msgs[nrecv].datatype =
919 xt_mpi_generate_datatype(pos_buffer, recvcounts[i], MPI_DOUBLE, comm);
920 nrecv++;
921 }
922 if (sendcounts[i] > 0) {
923 for (size_t j = 0; j < sendcounts[i]; ++j)
924 pos_buffer[j] = (int)src_orig_poses[j];
925 src_orig_poses += sendcounts[i];
926 send_msgs[nsend].rank = i;
927 send_msgs[nsend].datatype =
928 xt_mpi_generate_datatype(pos_buffer, sendcounts[i], MPI_DOUBLE, comm);
929 nsend++;
930 }
931 }
932
933 free(pos_buffer);
934
935 Xt_redist redist;
936 MPI_Comm split_comm;
937
938 // only processes that have to sent/receive data are included in the redist,
939 // the others receive a dummy redist
940 if (total_num_msg > 0) {
941
942 // generate MPI communicator containing all ranks taking part in the
943 // exchange
944 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
945
946 int * rank_buffer =
947 xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
948 int * orig_ranks = rank_buffer;
949 int * split_ranks = rank_buffer + total_num_msg;
950
951 for (size_t i = 0; i < total_num_msg; ++i)
952 orig_ranks[i] = msgs_buffer[i].rank;
953
954 MPI_Group orig_group, split_group;
955 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
956 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
957
958 // translate the ranks in the sent/receive messages to ones in the
959 // split comm
961 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
962 split_group, split_ranks), split_comm);
963
964 for (size_t i = 0; i < total_num_msg; ++i)
965 msgs_buffer[i].rank = split_ranks[i];
966
967 free(rank_buffer);
968
969 yac_mpi_call(MPI_Group_free(&split_group), comm);
970 yac_mpi_call(MPI_Group_free(&orig_group), comm);
971
972 // generate redist
973 redist =
974 xt_redist_single_array_base_custom_new(
975 nsend, nrecv, send_msgs, recv_msgs, split_comm, redist_config);
976
977 } else {
978 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
979 redist = NULL;
980 }
981
982 yac_mpi_call(MPI_Comm_free(&split_comm), comm);
983 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
984
985 return redist;
986}
987
1004 size_t * src_orig_poses, size_t * sendcounts,
1005 struct interp_weight_stencil_direct_mf * tgt_stencils,
1006 size_t * recvcounts, size_t num_src_fields, MPI_Comm comm,
1007 Xt_config redist_config) {
1008
1009 int comm_size;
1010 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1011
1012 size_t nsends[num_src_fields], nrecvs[num_src_fields];
1013 size_t max_buffer_size = 0;
1014 memset(nsends, 0, num_src_fields * sizeof(nsends[0]));
1015 memset(nrecvs, 0, num_src_fields * sizeof(nrecvs[0]));
1016 for (int i = 0; i < comm_size; ++i) {
1017 for (size_t j = 0; j < num_src_fields; ++j) {
1018 size_t idx = (size_t)i * num_src_fields + j;
1019 if (sendcounts[idx] > 0) nsends[j]++;
1020 if (recvcounts[idx] > 0) nrecvs[j]++;
1021 if (max_buffer_size < sendcounts[idx]) max_buffer_size = sendcounts[idx];
1022 if (max_buffer_size < recvcounts[idx]) max_buffer_size = recvcounts[idx];
1023 }
1024 }
1025
1026 size_t nsend = 0, nrecv = 0;
1027 size_t send_offsets[num_src_fields];
1028 size_t recv_offsets[num_src_fields];
1029 for (size_t i = 0; i < num_src_fields; ++i) {
1030 send_offsets[i] = nsend;
1031 recv_offsets[i] = nrecv;
1032 nsend += nsends[i];
1033 nrecv += nrecvs[i];
1034 }
1035
1036 size_t total_num_msg = nsend + nrecv;
1037
1038 struct Xt_redist_msg * msgs_buffer =
1039 xmalloc(total_num_msg * sizeof(*msgs_buffer));
1040 struct Xt_redist_msg * send_msgs = msgs_buffer;
1041 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
1042
1043 int * pos_buffer = xmalloc(max_buffer_size * sizeof(*pos_buffer));
1044
1045 // generate yaxt send and receive messages
1046 for (int i = 0; i < comm_size; ++i) {
1047 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
1048 ++src_field_idx) {
1049 size_t idx = (size_t)i * num_src_fields + src_field_idx;
1050 if (recvcounts[idx] > 0) {
1051 for (size_t j = 0; j < recvcounts[idx]; ++j)
1052 pos_buffer[j] = (int)tgt_stencils[j].orig_pos;
1053 tgt_stencils += recvcounts[idx];
1054 recv_msgs[recv_offsets[src_field_idx]].rank = i;
1055 recv_msgs[recv_offsets[src_field_idx]].datatype =
1056 xt_mpi_generate_datatype(
1057 pos_buffer, recvcounts[idx], MPI_DOUBLE, comm);
1058 recv_offsets[src_field_idx]++;
1059 }
1060 if (sendcounts[idx] > 0) {
1061 for (size_t j = 0; j < sendcounts[idx]; ++j)
1062 pos_buffer[j] = (int)src_orig_poses[j];
1063 src_orig_poses += sendcounts[idx];
1064 send_msgs[send_offsets[src_field_idx]].rank = i;
1065 send_msgs[send_offsets[src_field_idx]].datatype =
1066 xt_mpi_generate_datatype(
1067 pos_buffer, sendcounts[idx], MPI_DOUBLE, comm);
1068 send_offsets[src_field_idx]++;
1069 }
1070 }
1071 }
1072
1073 free(pos_buffer);
1074
1075 Xt_redist * redists;
1076 MPI_Comm split_comm;
1077
1078 // only processes that have to sent/receive data are included in the redist,
1079 // the others receive a dummy redist
1080 if (total_num_msg > 0) {
1081
1082 // generate MPI communicator containing all ranks taking part in the
1083 // exchange
1084 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
1085
1086 int * rank_buffer =
1087 xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
1088 int * orig_ranks = rank_buffer;
1089 int * split_ranks = rank_buffer + total_num_msg;
1090
1091 for (size_t i = 0; i < total_num_msg; ++i)
1092 orig_ranks[i] = msgs_buffer[i].rank;
1093
1094 MPI_Group orig_group, split_group;
1095 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
1096 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
1097
1098 // translate the ranks in the sent/receive messages to ones in the
1099 // split comm
1101 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
1102 split_group, split_ranks), split_comm);
1103
1104 for (size_t i = 0; i < total_num_msg; ++i)
1105 msgs_buffer[i].rank = split_ranks[i];
1106
1107 free(rank_buffer);
1108
1109 yac_mpi_call(MPI_Group_free(&split_group), comm);
1110 yac_mpi_call(MPI_Group_free(&orig_group), comm);
1111
1112 // generate redists
1113 redists = xmalloc(num_src_fields * sizeof(*redists));
1114 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
1115 ++src_field_idx) {
1116 redists[src_field_idx] =
1117 xt_redist_single_array_base_custom_new(
1118 nsends[src_field_idx], nrecvs[src_field_idx],
1119 send_msgs, recv_msgs, split_comm, redist_config);
1120 send_msgs += nsends[src_field_idx];
1121 recv_msgs += nrecvs[src_field_idx];
1122 }
1123
1124 } else {
1125 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
1126 redists = NULL;
1127 }
1128
1129 yac_mpi_call(MPI_Comm_free(&split_comm), comm);
1130 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
1131
1132 return redists;
1133}
1134
1135static MPI_Datatype get_direct_stencil_mpi_datatype(MPI_Comm comm) {
1136
1137 struct interp_weight_stencil_direct dummy;
1138 MPI_Datatype direct_stencil_dt;
1139 int array_of_blocklengths[] = {1, 1};
1140 const MPI_Aint array_of_displacements[] =
1141 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src) -
1142 (MPI_Aint)(intptr_t)(const void *)&dummy,
1143 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
1144 (MPI_Aint)(intptr_t)(const void *)&dummy};
1145 MPI_Datatype array_of_types[] =
1148 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
1149 array_of_types, &direct_stencil_dt), comm);
1150 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1151 return yac_create_resized(direct_stencil_dt, sizeof(dummy), comm);
1152}
1153
1154static int compare_stencils_direct(const void * a, const void * b) {
1155
1156 int ret = ((struct interp_weight_stencil_direct *)a)->src.rank -
1157 ((struct interp_weight_stencil_direct *)b)->src.rank;
1158
1159 if (ret) return ret;
1160
1161 return (((struct interp_weight_stencil_direct *)a)->orig_pos >
1162 ((struct interp_weight_stencil_direct *)b)->orig_pos) -
1163 (((struct interp_weight_stencil_direct *)a)->orig_pos <
1164 ((struct interp_weight_stencil_direct *)b)->orig_pos);
1165}
1166
1168 void * interp, size_t * src_orig_poses, size_t * sendcounts,
1169 struct interp_weight_stencil_direct * tgt_stencils,
1170 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
1171
1172 // generate redist
1173 Xt_redist redist =
1175 src_orig_poses, sendcounts, tgt_stencils, recvcounts,
1176 comm, redist_config);
1177
1178 yac_interpolation_add_direct((struct yac_interpolation*)interp, redist);
1179
1180 if (redist != NULL) xt_redist_delete(redist);
1181}
1182
1184 MPI_Comm comm, size_t count,
1185 struct interp_weight_stencil * direct_stencils,
1186 void * interp,
1187 void (*interp_add_direct)(
1188 void *, size_t *, size_t *, struct interp_weight_stencil_direct *,
1189 size_t *, MPI_Comm, Xt_config), Xt_config redist_config) {
1190
1191 //---------------------------------------------------------------------------
1192 // redistribute direct stencils to owners of direct target points
1193 // (a target point can be owned by multiple processes)
1194 //---------------------------------------------------------------------------
1195
1196 int comm_size;
1197 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1198
1199 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1201 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1202
1203 // count the number of direct stencils that need to be sent to each process
1204 for (size_t i = 0; i < count; ++i) {
1205 int curr_count = direct_stencils[i].tgt.data.count;
1206 struct remote_point_info * curr_point_info =
1207 (curr_count == 1)?
1208 (&(direct_stencils[i].tgt.data.data.single)):
1209 (direct_stencils[i].tgt.data.data.multi);
1210 for (int j = 0; j < curr_count; ++j)
1211 sendcounts[curr_point_info[j].rank]++;
1212 }
1213
1215 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1216
1217 size_t send_buffer_size =
1218 sdispls[comm_size] + sendcounts[comm_size - 1];
1219 size_t recv_buffer_size =
1220 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1221 size_t tgt_count = recv_buffer_size;
1222
1223 struct interp_weight_stencil_direct * stencil_buffer =
1224 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*stencil_buffer));
1225 struct interp_weight_stencil_direct * send_stencil_buffer =
1226 stencil_buffer + recv_buffer_size;
1227 struct interp_weight_stencil_direct * recv_stencil_buffer = stencil_buffer;
1228
1229 // pack direct stencils
1230 for (size_t i = 0; i < count; ++i) {
1231 int curr_count = direct_stencils[i].tgt.data.count;
1232 struct remote_point_info * curr_point_infos =
1233 (curr_count == 1)?
1234 (&(direct_stencils[i].tgt.data.data.single)):
1235 (direct_stencils[i].tgt.data.data.multi);
1236 struct remote_point_info src =
1237 select_src(direct_stencils[i].data.direct.src.data);
1238 for (int j = 0; j < curr_count; ++j) {
1239 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
1240 send_stencil_buffer[pos].src = src;
1241 send_stencil_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
1242 }
1243 }
1244
1245 // create MPI Datatype for exchanging direct stencils
1246 MPI_Datatype stencil_direct_dt = get_direct_stencil_mpi_datatype(comm);
1247
1248 // redistribute stencils based on target owners
1250 send_stencil_buffer, sendcounts, sdispls,
1251 recv_stencil_buffer, recvcounts, rdispls,
1252 sizeof(*stencil_buffer), stencil_direct_dt, comm,
1253 "yac_interp_weights_redist_direct", __LINE__);
1254
1255 yac_mpi_call(MPI_Type_free(&stencil_direct_dt), comm);
1256
1257 // sort stencils based on src rank first and by target orig pos second
1258 qsort(recv_stencil_buffer, tgt_count, sizeof(*recv_stencil_buffer),
1260
1261 //---------------------------------------------------------------------------
1262 // inform source processes about their requested points
1263 //---------------------------------------------------------------------------
1264
1265 memset(sendcounts, 0, (size_t)comm_size * sizeof(*sendcounts));
1266
1267 for (size_t i = 0; i < tgt_count; ++i)
1268 sendcounts[recv_stencil_buffer[i].src.rank]++;
1269
1271 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1272
1273 send_buffer_size = sdispls[comm_size] + sendcounts[comm_size - 1];
1274 recv_buffer_size = rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1275
1276 size_t * orig_pos_buffer =
1277 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*orig_pos_buffer));
1278 size_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1279 size_t * recv_orig_pos_buffer = orig_pos_buffer;
1280
1281 for (size_t i = 0; i < tgt_count; ++i)
1282 send_orig_pos_buffer[sdispls[recv_stencil_buffer[i].src.rank + 1]++] =
1283 recv_stencil_buffer[i].src.orig_pos;
1284
1286 send_orig_pos_buffer, sendcounts, sdispls,
1287 recv_orig_pos_buffer, recvcounts, rdispls,
1288 sizeof(*send_orig_pos_buffer), YAC_MPI_SIZE_T, comm,
1289 "yac_interp_weights_redist_direct", __LINE__);
1290
1291 //---------------------------------------------------------------------------
1292 // store received stencils in interp data structure
1293 //---------------------------------------------------------------------------
1294
1296 interp, recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1297 comm, redist_config);
1298
1299 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1300 free(orig_pos_buffer);
1301 free(stencil_buffer);
1302}
1303
1304static MPI_Datatype get_direct_mf_stencil_mpi_datatype(MPI_Comm comm) {
1305
1307 MPI_Datatype direct_stencil_mf_dt;
1308 int array_of_blocklengths[] = {1, 1, 1};
1309 const MPI_Aint array_of_displacements[] =
1310 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src) -
1311 (MPI_Aint)(intptr_t)(const void *)&dummy,
1312 (MPI_Aint)(intptr_t)(const void *)&(dummy.src_field_idx) -
1313 (MPI_Aint)(intptr_t)(const void *)&dummy,
1314 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
1315 (MPI_Aint)(intptr_t)(const void *)&dummy};
1316 MPI_Datatype array_of_types[] =
1319 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
1320 array_of_types, &direct_stencil_mf_dt), comm);
1321 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1322 return yac_create_resized(direct_stencil_mf_dt, sizeof(dummy), comm);
1323}
1324
1325static int compare_stencils_direct_mf(const void * a, const void * b) {
1326
1327 int ret = ((struct interp_weight_stencil_direct_mf *)a)->src.rank -
1328 ((struct interp_weight_stencil_direct_mf *)b)->src.rank;
1329
1330 if (ret) return ret;
1331
1332 ret = (((struct interp_weight_stencil_direct_mf *)a)->src_field_idx >
1333 ((struct interp_weight_stencil_direct_mf *)b)->src_field_idx) -
1336
1337 if (ret) return ret;
1338
1339 return (((struct interp_weight_stencil_direct_mf *)a)->orig_pos >
1341 (((struct interp_weight_stencil_direct_mf *)a)->orig_pos <
1342 ((struct interp_weight_stencil_direct_mf *)b)->orig_pos);
1343}
1344
1346 void * interp, size_t num_src_fields, size_t * src_orig_poses,
1347 size_t * sendcounts, struct interp_weight_stencil_direct_mf * tgt_stencils,
1348 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
1349
1350 // generate redist
1351 Xt_redist * redists =
1353 src_orig_poses, sendcounts, tgt_stencils, recvcounts,
1354 (size_t)num_src_fields, comm, redist_config);
1355
1357 (struct yac_interpolation*)interp, redists, (size_t)num_src_fields);
1358
1359 if (redists != NULL) {
1360 for (size_t i = 0; i < (size_t)num_src_fields; ++i)
1361 xt_redist_delete(redists[i]);
1362 free(redists);
1363 }
1364}
1365
1367 struct yac_src_field_exchange_data * src_field_exchange_data) {
1368
1369 src_field_exchange_data->send.msg = NULL;
1370 src_field_exchange_data->send.num_msg = 0;
1371 src_field_exchange_data->recv.msg = NULL;
1372 src_field_exchange_data->recv.num_msg = 0;
1373}
1374
1375// routine ensures that interp_raw can handle up to num_src_fields source fields
1377 struct yac_interpolation_raw * interp_raw, size_t num_src_fields) {
1378
1379 if (interp_raw->interp_weights_data.num_src_fields < num_src_fields) {
1380
1381 interp_raw->src_field_exchange_data =
1382 xrealloc(
1383 interp_raw->src_field_exchange_data,
1384 num_src_fields * sizeof(*interp_raw->src_field_exchange_data));
1386 xrealloc(
1388 num_src_fields *
1389 sizeof(*interp_raw->interp_weights_data.src_field_buffer_size));
1390
1391 for (size_t i = interp_raw->interp_weights_data.num_src_fields;
1392 i < num_src_fields; ++i) {
1394 interp_raw->interp_weights_data.src_field_buffer_size[i] = 0;
1395 }
1396 interp_raw->interp_weights_data.num_src_fields = num_src_fields;
1397 }
1398}
1399
1420 struct yac_interp_weights_data * interp_weights_data,
1421 size_t num_src_fields, size_t tgt_count, size_t * tgt_idx,
1422 size_t * num_src_per_tgt, double * weights, size_t * src_field_idx,
1423 size_t * src_idx, size_t * src_field_buffer_size) {
1424
1425 // compute total number of weights
1426 size_t num_weights = 0;
1427 if (num_src_per_tgt) {
1428 for (size_t i = 0; i < tgt_count; ++i) num_weights += num_src_per_tgt[i];
1429 } else {
1430 num_weights = tgt_count;
1431 }
1432
1433 // compute number of weights already stored in interp_weights_data
1434 size_t weights_offset = 0;
1435 for (size_t i = 0; i < interp_weights_data->num_wgt_tgt; ++i)
1436 weights_offset += interp_weights_data->num_src_per_tgt[i];
1437
1438 // add local target indices to interp_weights_data
1439 interp_weights_data->wgt_tgt_idx =
1440 xrealloc(
1441 interp_weights_data->wgt_tgt_idx,
1442 (interp_weights_data->num_wgt_tgt + tgt_count) *
1443 sizeof(*(interp_weights_data->wgt_tgt_idx)));
1444 memcpy(
1445 interp_weights_data->wgt_tgt_idx + interp_weights_data->num_wgt_tgt,
1446 tgt_idx, tgt_count * sizeof(*tgt_idx));
1447
1448 // add number of source points per target point to interp_weights_data
1449 interp_weights_data->num_src_per_tgt =
1450 xrealloc(
1451 interp_weights_data->num_src_per_tgt,
1452 (interp_weights_data->num_wgt_tgt + tgt_count) *
1453 sizeof(*(interp_weights_data->num_src_per_tgt)));
1454 if (num_src_per_tgt) {
1455 memcpy(
1456 interp_weights_data->num_src_per_tgt + interp_weights_data->num_wgt_tgt,
1457 num_src_per_tgt, tgt_count * sizeof(*num_src_per_tgt));
1458 } else {
1459 for (size_t i = 0, j = interp_weights_data->num_wgt_tgt; i < tgt_count;
1460 ++i, ++j)
1461 interp_weights_data->num_src_per_tgt[j] = 1;
1462 }
1463
1464 // add weights to interp_weights_data
1465 interp_weights_data->weights =
1466 xrealloc(
1467 interp_weights_data->weights, (weights_offset + num_weights) *
1468 sizeof(*(interp_weights_data->weights)));
1469 if (weights) {
1470 memcpy(
1471 interp_weights_data->weights + weights_offset,
1472 weights, num_weights * sizeof(*weights));
1473 } else {
1474 for (size_t i = 0, j = weights_offset; i < num_weights; ++i, ++j)
1475 interp_weights_data->weights[j] = 1.0;
1476 }
1477
1478 // add source field indices to interp_weights_data
1479 interp_weights_data->src_field_idx =
1480 xrealloc(
1481 interp_weights_data->src_field_idx, (weights_offset + num_weights) *
1482 sizeof(*(interp_weights_data->src_field_idx)));
1483 if (src_field_idx) {
1484 memcpy(
1485 interp_weights_data->src_field_idx + weights_offset,
1486 src_field_idx, num_weights * sizeof(*src_field_idx));
1487 } else {
1488 for (size_t i = 0, j = weights_offset; i < num_weights;
1489 ++i, ++j)
1490 interp_weights_data->src_field_idx[j] = 0;
1491 // in case no source field indices were provided, we reference the ones in
1492 // interp_weights_data
1493 src_field_idx = interp_weights_data->src_field_idx + weights_offset;
1494 }
1495
1496 // add source indices to interp_weights_data
1497 // (since interp_weights_data may already contains source indices, we have to
1498 // offset the ones added here by the number of source indices already in
1499 // interp_weights_data)
1500 interp_weights_data->src_idx =
1501 xrealloc(
1502 interp_weights_data->src_idx, (weights_offset + num_weights) *
1503 sizeof(*(interp_weights_data->src_idx)));
1504 for (size_t i = 0, j = weights_offset; i < num_weights; ++i, ++j)
1505 interp_weights_data->src_idx[j] =
1506 src_idx[i] + interp_weights_data->src_field_buffer_size[src_field_idx[i]];
1507
1508 // update src_field_buffer_size in interp_weights_data
1509 for (size_t i = 0; i < num_src_fields; ++i)
1510 interp_weights_data->src_field_buffer_size[i] += src_field_buffer_size[i];
1511
1512 // update number of weighted target points
1513 interp_weights_data->num_wgt_tgt += tgt_count;
1514}
1515
1516static struct yac_src_field_exchange_data_msg *
1518 struct yac_src_field_exchange_data_msgs * msgs, int rank) {
1519
1520 // search for a matching message
1521 size_t msg_idx = 0;
1522 for (; msg_idx < msgs->num_msg; ++msg_idx)
1523 if (msgs->msg[msg_idx].rank == rank) break;
1524
1525 // allocate entry, if non for the current rank exists
1526 if (msg_idx == msgs->num_msg) {
1527 msgs->num_msg++;
1528 msgs->msg = xrealloc(msgs->msg, msgs->num_msg * sizeof(*msgs->msg));
1529 msgs->msg[msg_idx] =
1530 (struct yac_src_field_exchange_data_msg)
1531 {.rank = rank, .pos = NULL, .count = 0};
1532 }
1533
1534 return msgs->msg + msg_idx;
1535}
1536
1547 struct yac_src_field_exchange_data_msgs * msgs,
1548 int rank, size_t count, size_t * pos, size_t offset) {
1549
1550 // search for a message matching the rank (create empty message if it does
1551 // not yet exist
1552 struct yac_src_field_exchange_data_msg * msg =
1554
1555 // add positions
1556 msg->pos =
1557 xrealloc(msg->pos, ((size_t)msg->count + count) * sizeof(*msg->pos));
1558 for (size_t i = 0; i < count; ++i, ++msg->count)
1559 msg->pos[msg->count] = pos[i] + offset;
1560}
1561
1581 struct yac_src_field_exchange_data * src_field_exchange_data,
1582 size_t num_src_fields, MPI_Comm comm,
1583 size_t * send_msg_sizes, size_t * send_pos,
1584 size_t * recv_msg_sizes, size_t * recv_pos,
1585 size_t * recv_offsets) {
1586
1587 int comm_size;
1588 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1589
1590 size_t msg_idx = 0;
1591 for (int rank = 0; rank < comm_size; ++rank) {
1592
1593 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
1594 ++src_field_idx, ++msg_idx) {
1595
1596 struct yac_src_field_exchange_data * curr_src_field_exchange_data =
1597 src_field_exchange_data + src_field_idx;
1598
1599 // if data has to be sent to the current rank
1600 size_t send_msg_size = send_msg_sizes[msg_idx];
1601 if (send_msg_sizes[msg_idx] > 0) {
1602
1604 &curr_src_field_exchange_data->send, rank, send_msg_size,
1605 send_pos, 0);
1606 send_pos += send_msg_size;
1607 }
1608
1609 // if data has to be received from the current rank
1610 size_t recv_msg_size = recv_msg_sizes[msg_idx];
1611 if (recv_msg_size > 0) {
1612
1614 &curr_src_field_exchange_data->recv, rank, recv_msg_size,
1615 recv_pos, recv_offsets[src_field_idx]);
1616 recv_pos += recv_msg_size;
1617 }
1618 }
1619 }
1620}
1621
1623 void * interp, size_t * src_orig_poses, size_t * sendcounts,
1624 struct interp_weight_stencil_direct * tgt_stencils,
1625 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
1626
1627 UNUSED(redist_config);
1628
1629 struct yac_interpolation_raw * interp_raw =
1630 (struct yac_interpolation_raw *)interp;
1631
1632 int comm_size;
1633 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1634
1635 size_t num_src_fields = 1;
1636 yac_src_field_exchange_data_realloc(interp_raw, num_src_fields);
1637
1638 size_t total_sendcounts = 0, total_recvcounts = 0;
1639 for (int i = 0; i < comm_size; ++i) {
1640 total_sendcounts += sendcounts[i];
1641 total_recvcounts += recvcounts[i];
1642 }
1643
1644 size_t * size_t_buffer =
1645 xmalloc(
1646 (MAX(total_sendcounts, total_recvcounts) + total_recvcounts) *
1647 sizeof(*size_t_buffer));
1648 size_t * send_pos = size_t_buffer;
1649 size_t * recv_pos = size_t_buffer + total_sendcounts;
1650
1651 for (size_t i = 0; i < total_sendcounts; ++i)
1652 send_pos[i] = (size_t)src_orig_poses[i];
1653 for (size_t i = 0; i < total_recvcounts; ++i)
1654 recv_pos[i] = i;
1655
1656 // store source field data that needs to be exchanged
1658 interp_raw->src_field_exchange_data, num_src_fields, comm,
1659 sendcounts, send_pos, recvcounts, recv_pos,
1661
1662 size_t * tgt_idx = size_t_buffer;
1663 size_t * src_idx = size_t_buffer + total_recvcounts;
1664
1665 for (size_t i = 0; i < total_recvcounts; ++i) {
1666 tgt_idx[i] = (size_t)(tgt_stencils[i].orig_pos);
1667 src_idx[i] = i;
1668 }
1669
1670 // store mapping formation between source points in the source field buffer
1671 // to the target points
1672 size_t * num_src_per_tgt = NULL;
1673 double * weights = NULL;
1674 size_t * src_field_idx = NULL;
1675 size_t src_field_buffer_size = total_recvcounts;
1677 &(interp_raw->interp_weights_data), num_src_fields, total_recvcounts,
1678 tgt_idx, num_src_per_tgt, weights, src_field_idx, src_idx,
1679 &src_field_buffer_size);
1680
1681 free(size_t_buffer);
1682}
1683
1685 void * interp, size_t num_src_fields, size_t * src_orig_poses,
1686 size_t * sendcounts, struct interp_weight_stencil_direct_mf * tgt_stencils,
1687 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
1688
1689 UNUSED(redist_config);
1690
1691 struct yac_interpolation_raw * interp_raw =
1692 (struct yac_interpolation_raw *)interp;
1693
1694 int comm_size;
1695 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1696
1697 yac_src_field_exchange_data_realloc(interp_raw, num_src_fields);
1698
1699 size_t total_sendcounts = 0, total_recvcounts = 0;
1700 for (size_t rank = 0, idx = 0; rank < (size_t)comm_size; ++rank) {
1701 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
1702 ++src_field_idx, ++idx) {
1703 total_sendcounts += sendcounts[idx];
1704 total_recvcounts += recvcounts[idx];
1705 }
1706 }
1707
1708 size_t * size_t_buffer =
1709 xmalloc(
1710 (MAX(total_sendcounts, total_recvcounts) + 2 * total_recvcounts) *
1711 sizeof(*size_t_buffer));
1712 size_t * send_pos = size_t_buffer;
1713 size_t * recv_pos = size_t_buffer + total_sendcounts;
1714 size_t src_field_buffer_size[num_src_fields]; // number of source points to
1715 // be received per source field
1716 memset(
1717 src_field_buffer_size, 0,
1718 num_src_fields * sizeof(src_field_buffer_size[0]));
1719
1720 for (size_t i = 0; i < total_sendcounts; ++i)
1721 send_pos[i] = (size_t)src_orig_poses[i];
1722 for (size_t rank = 0, idx = 0, recv_pos_idx = 0; rank < (size_t)comm_size;
1723 ++rank) {
1724 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
1725 ++src_field_idx, ++idx) {
1726 if (recvcounts[idx] > 0) {
1727 for (size_t i = 0; i < recvcounts[idx]; ++i, ++recv_pos_idx)
1728 recv_pos[recv_pos_idx] = src_field_buffer_size[src_field_idx]++;
1729 }
1730 }
1731 }
1732
1733 // store source field data that needs to be exchanged
1735 interp_raw->src_field_exchange_data, num_src_fields, comm,
1736 sendcounts, send_pos, recvcounts, recv_pos,
1738
1739 size_t * tgt_idx = size_t_buffer;
1740 size_t * src_idx = size_t_buffer + total_recvcounts;
1741 size_t * src_field_idx = size_t_buffer + 2 * total_recvcounts;
1742 memset(
1743 src_field_buffer_size, 0,
1744 num_src_fields * sizeof(src_field_buffer_size[0]));
1745
1746 for (size_t rank = 0, idx = 0, k = 0; rank < (size_t)comm_size; ++rank) {
1747 for (size_t j = 0; j < num_src_fields; ++j, ++idx) {
1748
1749 if (recvcounts[idx] > 0) {
1750 for (size_t i = 0; i < recvcounts[idx]; ++i, ++k) {
1751 tgt_idx[k] = (size_t)(tgt_stencils[k].orig_pos);
1752 src_idx[k] = src_field_buffer_size[j]++;
1753 src_field_idx[k] = j;
1754 }
1755 }
1756 }
1757 }
1758
1759 // store mapping formation between source points in the source fields buffer
1760 // to the target points
1761 size_t * num_src_per_tgt = NULL;
1762 double * weights = NULL;
1764 &(interp_raw->interp_weights_data), num_src_fields, total_recvcounts,
1765 tgt_idx, num_src_per_tgt, weights, src_field_idx, src_idx,
1766 src_field_buffer_size);
1767
1768 free(size_t_buffer);
1769}
1770
1772 MPI_Comm comm, size_t count,
1773 struct interp_weight_stencil * direct_mf_stencils,
1774 void * interp,
1775 void (*interp_add_direct_mf)(
1776 void *, size_t, size_t *, size_t *,
1777 struct interp_weight_stencil_direct_mf *, size_t *, MPI_Comm, Xt_config),
1778 Xt_config redist_config) {
1779
1780 //---------------------------------------------------------------------------
1781 // redistribute multi field direct stencils to owners of direct target points
1782 // (a target point can be owned by multiple processes)
1783 //---------------------------------------------------------------------------
1784
1785 // determine the number of source fields
1786 size_t num_src_fields = 0;
1787 for (size_t i = 0; i < count; ++i) {
1788 size_t src_field_idx = direct_mf_stencils[i].data.direct_mf.field_idx;
1789 if (src_field_idx >= num_src_fields) num_src_fields = src_field_idx + 1;
1790 }
1792 MPI_Allreduce(
1793 MPI_IN_PLACE, &num_src_fields, 1, YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
1794
1795 int comm_size;
1796 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1797
1798 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1800 (size_t)num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1801 size_t * size_t_buffer =
1802 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
1803 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1804 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1805 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1806 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1807
1808 // count the number of multi field direct stencils that need to be sent to
1809 // each process
1810 for (size_t i = 0; i < count; ++i) {
1811 int curr_count = direct_mf_stencils[i].tgt.data.count;
1812 struct remote_point_info * curr_point_info =
1813 (curr_count == 1)?
1814 (&(direct_mf_stencils[i].tgt.data.data.single)):
1815 (direct_mf_stencils[i].tgt.data.data.multi);
1816 size_t src_field_idx = direct_mf_stencils[i].data.direct_mf.field_idx;
1817 for (int j = 0; j < curr_count; ++j)
1818 sendcounts[
1819 (size_t)(curr_point_info[j].rank) * num_src_fields + src_field_idx]++;
1820 }
1821
1823 (size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1824
1825 size_t saccu = 0, raccu = 0;
1826 for (int i = 0; i < comm_size; ++i) {
1827 total_sdispls[i] = saccu;
1828 total_rdispls[i] = raccu;
1829 total_sendcounts[i] = 0;
1830 total_recvcounts[i] = 0;
1831 for (size_t j = 0; j < num_src_fields; ++j) {
1832 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1833 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1834 }
1835 saccu += total_sendcounts[i];
1836 raccu += total_recvcounts[i];
1837 }
1838
1839 size_t send_buffer_size = total_sdispls[comm_size - 1] +
1840 total_sendcounts[comm_size - 1];
1841 size_t recv_buffer_size = total_rdispls[comm_size - 1] +
1842 total_recvcounts[comm_size - 1];
1843 size_t tgt_count = recv_buffer_size;
1844
1845 struct interp_weight_stencil_direct_mf * stencil_buffer =
1846 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*stencil_buffer));
1847 struct interp_weight_stencil_direct_mf * send_stencil_buffer =
1848 stencil_buffer + recv_buffer_size;
1849 struct interp_weight_stencil_direct_mf * recv_stencil_buffer = stencil_buffer;
1850
1851 // pack direct_mf stencils
1852 for (size_t i = 0; i < count; ++i) {
1853 int curr_count = direct_mf_stencils[i].tgt.data.count;
1854 struct remote_point_info * curr_point_infos =
1855 (curr_count == 1)?
1856 (&(direct_mf_stencils[i].tgt.data.data.single)):
1857 (direct_mf_stencils[i].tgt.data.data.multi);
1858 struct remote_point_info src =
1859 select_src(direct_mf_stencils[i].data.direct_mf.src.data);
1860 size_t src_field_idx = direct_mf_stencils[i].data.direct_mf.field_idx;
1861 for (int j = 0; j < curr_count; ++j) {
1862 size_t pos =
1863 sdispls[(size_t)(curr_point_infos[j].rank) * num_src_fields +
1864 src_field_idx + 1]++;
1865 send_stencil_buffer[pos].src = src;
1866 send_stencil_buffer[pos].src_field_idx = src_field_idx;
1867 send_stencil_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
1868 }
1869 }
1870
1871 // create MPI Datatype for exchanging direct_mf stencils
1872 MPI_Datatype stencil_direct_mf_dt = get_direct_mf_stencil_mpi_datatype(comm);
1873
1874 // redistribute stencils based on target owners
1876 send_stencil_buffer, total_sendcounts, total_sdispls,
1877 recv_stencil_buffer, total_recvcounts, total_rdispls,
1878 sizeof(*stencil_buffer), stencil_direct_mf_dt, comm,
1879 "yac_interp_weights_redist_direct_mf", __LINE__);
1880
1881 yac_mpi_call(MPI_Type_free(&stencil_direct_mf_dt), comm);
1882
1883 // sort stencils based on src rank first, src_field_idx, and
1884 // by target orig pos second
1885 qsort(recv_stencil_buffer, tgt_count, sizeof(*recv_stencil_buffer),
1887
1888 //---------------------------------------------------------------------------
1889 // inform source processes about their requested points
1890 //---------------------------------------------------------------------------
1891
1892 memset(sendcounts, 0,
1893 (size_t)comm_size * (size_t)num_src_fields * sizeof(*sendcounts));
1894
1895 for (size_t i = 0; i < tgt_count; ++i)
1896 sendcounts[(size_t)(recv_stencil_buffer[i].src.rank) * num_src_fields +
1897 recv_stencil_buffer[i].src_field_idx]++;
1898
1900 (size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1901
1902 saccu = 0, raccu = 0;
1903 for (int i = 0; i < comm_size; ++i) {
1904 total_sdispls[i] = saccu;
1905 total_rdispls[i] = raccu;
1906 total_sendcounts[i] = 0;
1907 total_recvcounts[i] = 0;
1908 for (size_t j = 0; j < num_src_fields; ++j) {
1909 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1910 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1911 }
1912 saccu += total_sendcounts[i];
1913 raccu += total_recvcounts[i];
1914 }
1915
1916 send_buffer_size = total_sdispls[comm_size - 1] +
1917 total_sendcounts[comm_size - 1];
1918 recv_buffer_size = total_rdispls[comm_size - 1] +
1919 total_recvcounts[comm_size - 1];
1920
1921 size_t * orig_pos_buffer =
1922 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*orig_pos_buffer));
1923 size_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1924 size_t * recv_orig_pos_buffer = orig_pos_buffer;
1925
1926 for (size_t i = 0; i < tgt_count; ++i)
1927 send_orig_pos_buffer[
1928 sdispls[(size_t)(recv_stencil_buffer[i].src.rank) * num_src_fields +
1929 recv_stencil_buffer[i].src_field_idx + 1]++] =
1930 recv_stencil_buffer[i].src.orig_pos;
1931
1932 // inform source processes about their requested points
1934 send_orig_pos_buffer, total_sendcounts, total_sdispls,
1935 recv_orig_pos_buffer, total_recvcounts, total_rdispls,
1936 sizeof(*send_orig_pos_buffer), YAC_MPI_SIZE_T, comm,
1937 "yac_interp_weights_redist_direct_mf", __LINE__);
1938 free(size_t_buffer);
1939
1940 //---------------------------------------------------------------------------
1941 // store received stencils in interp data structure
1942 //---------------------------------------------------------------------------
1943
1945 interp, num_src_fields,
1946 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1947 comm, redist_config);
1948
1949 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1950 free(orig_pos_buffer);
1951 free(stencil_buffer);
1952}
1953
1955 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1956 MPI_Comm comm) {
1957
1958 UNUSED(stencil);
1959 UNUSED(point_info_dt);
1960
1961 int pack_size_value;
1962
1963 yac_mpi_call(MPI_Pack_size(1, MPI_DOUBLE, comm, &pack_size_value), comm);
1964
1965 return pack_size_value;
1966}
1967
1969 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1970 MPI_Comm comm) {
1971
1972 return
1974 &(stencil->data.direct.src), point_info_dt, comm);
1975}
1976
1978 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1979 MPI_Comm comm) {
1980
1981 return
1983 stencil->data.sum.srcs, point_info_dt, comm);
1984}
1985
1987 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1988 MPI_Comm comm) {
1989
1990 int pack_size_weights;
1992 MPI_Pack_size(
1993 (int)(stencil->data.weight_sum.srcs->count),
1994 MPI_DOUBLE, comm, &pack_size_weights), comm);
1995
1996 return
1998 stencil->data.weight_sum.srcs, point_info_dt, comm) +
1999 pack_size_weights;
2000}
2001
2003 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
2004 MPI_Comm comm) {
2005
2006 int pack_size_src_field_idx;
2008 MPI_Pack_size(
2009 1, YAC_MPI_SIZE_T, comm, &pack_size_src_field_idx), comm);
2010
2011 return
2013 &(stencil->data.direct_mf.src), point_info_dt, comm) +
2014 pack_size_src_field_idx;
2015}
2016
2018 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
2019 MPI_Comm comm) {
2020
2021 int pack_size_weights, pack_size_field_indices;
2022 int count = (int)(stencil->data.weight_sum_mf.srcs->count);
2024 MPI_Pack_size(
2025 count, MPI_DOUBLE, comm, &pack_size_weights), comm);
2027 MPI_Pack_size(
2028 count, YAC_MPI_SIZE_T, comm, &pack_size_field_indices), comm);
2029
2030 return
2032 stencil->data.weight_sum_mf.srcs, point_info_dt, comm) +
2033 pack_size_weights + pack_size_field_indices;
2034}
2035
2037 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
2038 MPI_Comm comm) {
2039
2040 int pack_size_field_indices;
2042 MPI_Pack_size(
2043 (int)(stencil->data.sum_mf.srcs->count),
2044 YAC_MPI_SIZE_T, comm, &pack_size_field_indices), comm);
2045
2046 return
2048 stencil->data.sum_mf.srcs, point_info_dt, comm) +
2049 pack_size_field_indices;
2050}
2051
2053 struct interp_weight_stencil * stencil, struct remote_point point) {
2054
2055 struct interp_weight_stencil stencil_copy = *stencil;
2056 stencil_copy.tgt = copy_remote_point(point);
2057
2058 YAC_ASSERT(
2059 (stencil->type == FIXED) ||
2060 (stencil->type == DIRECT) ||
2061 (stencil->type == SUM) ||
2062 (stencil->type == WEIGHT_SUM) ||
2063 (stencil->type == DIRECT_MF) ||
2064 (stencil->type == SUM_MF) ||
2065 (stencil->type == WEIGHT_SUM_MF),
2066 "ERROR(copy_interp_weight_stencil): invalid stencil type")
2067
2068 switch (stencil->type) {
2069 case(FIXED):
2070 // nothing to be done
2071 break;
2072 case(DIRECT):
2073 stencil_copy.data.direct.src =
2074 copy_remote_point(stencil->data.direct.src);
2075 break;
2076 case(SUM):
2077 stencil_copy.data.weight_sum.weights = NULL;
2078 stencil_copy.data.sum.srcs =
2080 stencil->data.sum.srcs->data, stencil->data.sum.srcs->count);
2081 break;
2082 case(WEIGHT_SUM): {
2083 stencil_copy.data.weight_sum.srcs =
2085 stencil->data.weight_sum.srcs->data,
2086 stencil->data.weight_sum.srcs->count);
2087 size_t weight_size =
2088 stencil->data.weight_sum.srcs->count *
2089 sizeof(*(stencil_copy.data.weight_sum.weights));
2090 stencil_copy.data.weight_sum.weights = xmalloc(weight_size);
2091 memcpy(stencil_copy.data.weight_sum.weights,
2092 stencil->data.weight_sum.weights, weight_size);
2093 break;
2094 }
2095 case(DIRECT_MF):
2096 stencil_copy.data.direct_mf.src =
2098 stencil_copy.data.direct_mf.field_idx =
2099 stencil->data.direct_mf.field_idx;
2100 break;
2101 case(SUM_MF): {
2102 stencil_copy.data.sum_mf.srcs =
2104 stencil->data.sum_mf.srcs->data,
2105 stencil->data.sum_mf.srcs->count);
2106 size_t field_indices_size =
2107 stencil->data.sum_mf.srcs->count *
2108 sizeof(*(stencil_copy.data.sum_mf.field_indices));
2109 stencil_copy.data.sum_mf.field_indices = xmalloc(field_indices_size);
2110 memcpy(stencil_copy.data.sum_mf.field_indices,
2111 stencil->data.sum_mf.field_indices, field_indices_size);
2112 break;
2113 }
2114 default:
2115 case(WEIGHT_SUM_MF): {
2116 stencil_copy.data.weight_sum_mf.srcs =
2118 stencil->data.weight_sum_mf.srcs->data,
2119 stencil->data.weight_sum_mf.srcs->count);
2120 size_t weight_size =
2121 stencil->data.weight_sum_mf.srcs->count *
2122 sizeof(*(stencil_copy.data.weight_sum_mf.weights));
2123 stencil_copy.data.weight_sum_mf.weights = xmalloc(weight_size);
2124 memcpy(stencil_copy.data.weight_sum_mf.weights,
2125 stencil->data.weight_sum_mf.weights, weight_size);
2126 size_t field_indices_size =
2127 stencil->data.weight_sum_mf.srcs->count *
2128 sizeof(*(stencil_copy.data.weight_sum_mf.field_indices));
2129 stencil_copy.data.weight_sum_mf.field_indices =
2130 xmalloc(field_indices_size);
2131 memcpy(stencil_copy.data.weight_sum_mf.field_indices,
2132 stencil->data.weight_sum_mf.field_indices, field_indices_size);
2133 break;
2134 }
2135 };
2136 return stencil_copy;
2137}
2138
2140 struct interp_weight_stencil * stencil, struct remote_point point,
2141 double weight) {
2142
2143 if (weight == 1.0) return copy_interp_weight_stencil(stencil, point);
2144
2145 struct remote_point * srcs;
2146 size_t src_count;
2147 double * weights;
2148
2149 YAC_ASSERT(
2150 (stencil->type == FIXED) ||
2151 (stencil->type == DIRECT) ||
2152 (stencil->type == SUM) ||
2153 (stencil->type == WEIGHT_SUM),
2154 "ERROR(wcopy_interp_weight_stencil): invalid stencil type")
2155
2156 switch (stencil->type) {
2157 case (FIXED):
2158 return
2159 (struct interp_weight_stencil) {
2160 .type = FIXED,
2161 .data.fixed.value = stencil->data.fixed.value * weight,
2162 .tgt = copy_remote_point(point)};
2163 case (DIRECT):
2164 src_count = 1;
2165 srcs = &(stencil->data.direct.src);
2166 weights = NULL;
2167 break;
2168 case (SUM):
2169 src_count = stencil->data.sum.srcs->count;
2170 srcs = stencil->data.sum.srcs->data;
2171 weights = NULL;
2172 break;
2173 default:
2174 case (WEIGHT_SUM):
2175 src_count = stencil->data.weight_sum.srcs->count;
2176 srcs = stencil->data.weight_sum.srcs->data;
2177 weights = stencil->data.weight_sum.weights;
2178 break;
2179 };
2180
2181 double * new_weights = xmalloc(src_count * sizeof(*new_weights));
2182 if (weights == NULL)
2183 for (size_t i = 0; i < src_count; ++i) new_weights[i] = weight;
2184 else
2185 for (size_t i = 0; i < src_count; ++i) new_weights[i] = weights[i] * weight;
2186
2187 struct interp_weight_stencil stencil_wcopy;
2188 stencil_wcopy.type = WEIGHT_SUM;
2189 stencil_wcopy.data.weight_sum.srcs = copy_remote_points(srcs, src_count);
2190 stencil_wcopy.data.weight_sum.weights = new_weights;
2191 stencil_wcopy.tgt = copy_remote_point(point);
2192
2193 return stencil_wcopy;
2194}
2195
2196static int compare_w_global_id(const void * a, const void * b) {
2197
2198 int ret = (((struct weighted_global_id *)a)->global_id >
2199 ((struct weighted_global_id *)b)->global_id) -
2200 (((struct weighted_global_id *)a)->global_id <
2201 ((struct weighted_global_id *)b)->global_id);
2202
2203 if (ret) return ret;
2204
2205 return (((struct weighted_global_id *)a)->weight >
2206 ((struct weighted_global_id *)b)->weight) -
2207 (((struct weighted_global_id *)a)->weight <
2208 ((struct weighted_global_id *)b)->weight);
2209}
2210
2211static int compare_remote_point(const void * a, const void * b) {
2212
2213 return ((const struct remote_point*)a)->global_id -
2214 ((const struct remote_point*)b)->global_id;
2215}
2216
2217static void compact_srcs_w(
2218 struct remote_points * srcs, double ** w) {
2219
2220 struct remote_point * data = srcs->data;
2221 size_t count = srcs->count;
2222
2223 struct weighted_global_id * w_global_id =
2224 xmalloc(count * sizeof(*w_global_id));
2225
2226 // extract global ids and weights
2227 for (size_t i = 0; i < count; ++i) {
2228 w_global_id[i].global_id = data[i].global_id;
2229 w_global_id[i].weight = (*w)[i];
2230 }
2231
2232 // sort by global ids and weights
2233 qsort(w_global_id, count, sizeof(*w_global_id), compare_w_global_id);
2234
2235 // sort sources by global ids
2236 qsort(data, count, sizeof(*data), compare_remote_point);
2237
2238 size_t new_count = 0;
2239
2240 // compact sources
2241 for (size_t i = 0; i < count;) {
2242
2243 data[new_count] = data[i];
2244
2245 yac_int curr_global_id = w_global_id[i].global_id;
2246 double curr_weight = w_global_id[i].weight;
2247
2248 ++i;
2249
2250 while((i < count) && (curr_global_id == w_global_id[i].global_id)) {
2251
2252 curr_weight += w_global_id[i].weight;
2253 ++i;
2254 }
2255
2256 (*w)[new_count] = curr_weight;
2257 ++new_count;
2258 }
2259
2260 free(w_global_id);
2261
2262 srcs->data = xrealloc(data, new_count * sizeof(*data));
2263 srcs->count = new_count;
2264 *w = xrealloc(*w, new_count * sizeof(**w));
2265}
2266
2268 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils) {
2269
2270 size_t src_count = 0;
2271 size_t point_info_buffer_size = 0;
2272
2273 for (size_t i = 0; i < num_stencils; ++i) {
2274 size_t curr_src_count;
2275 struct remote_point * srcs;
2276 YAC_ASSERT(
2277 (stencils[i]->type == DIRECT) ||
2278 (stencils[i]->type == SUM) ||
2279 (stencils[i]->type == WEIGHT_SUM),
2280 "ERROR(stencils_merge_wsum): invalid stencil type")
2281 switch (stencils[i]->type) {
2282 case (DIRECT):
2283 curr_src_count = 1;
2284 srcs = &(stencils[i]->data.direct.src);
2285 break;
2286 case (SUM):
2287 curr_src_count = stencils[i]->data.sum.srcs->count;
2288 srcs = stencils[i]->data.sum.srcs->data;
2289 break;
2290 default:
2291 case (WEIGHT_SUM):
2292 curr_src_count = stencils[i]->data.weight_sum.srcs->count;
2293 srcs = stencils[i]->data.weight_sum.srcs->data;
2294 break;
2295 };
2296 src_count += curr_src_count;
2297 for (size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
2298 if (((curr_src_data_count = srcs[j].data.count)) > 1)
2299 point_info_buffer_size += curr_src_data_count;
2300 }
2301
2302 struct remote_points * srcs =
2303 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
2304 sizeof(*srcs));
2305 srcs->data = xmalloc(src_count * sizeof(*(srcs->data)));
2306 srcs->count = src_count;
2307 struct remote_point_info * point_info_buffer = &(srcs->buffer[0]);
2308 double * new_w = xmalloc(src_count * sizeof(*new_w));
2309
2310 for (size_t i = 0, offset = 0; i < num_stencils; ++i) {
2311 size_t curr_src_count;
2312 struct remote_point * curr_srcs;
2313 double * stencil_w;
2314 YAC_ASSERT(
2315 (stencils[i]->type == DIRECT) ||
2316 (stencils[i]->type == SUM) ||
2317 (stencils[i]->type == WEIGHT_SUM),
2318 "ERROR(stencils_merge_wsum): invalid stencil type")
2319 switch (stencils[i]->type) {
2320 case (DIRECT):
2321 curr_src_count = 1;
2322 curr_srcs = &(stencils[i]->data.direct.src);
2323 stencil_w = NULL;
2324 break;
2325 case (SUM):
2326 curr_src_count = stencils[i]->data.sum.srcs->count;
2327 curr_srcs = stencils[i]->data.sum.srcs->data;
2328 stencil_w = NULL;
2329 break;
2330 default:
2331 case (WEIGHT_SUM):
2332 curr_src_count = stencils[i]->data.weight_sum.srcs->count;
2333 curr_srcs = stencils[i]->data.weight_sum.srcs->data;
2334 stencil_w = stencils[i]->data.weight_sum.weights;
2335 break;
2336 };
2338 srcs->data + offset, curr_srcs, curr_src_count, &point_info_buffer);
2339 if (stencil_w == NULL)
2340 for (size_t j = 0; j < curr_src_count; ++j, ++offset)
2341 new_w[offset] = w[i];
2342 else
2343 for (size_t j = 0; j < curr_src_count; ++j, ++offset)
2344 new_w[offset] = w[i] * stencil_w[j];
2345 }
2346
2347 compact_srcs_w(srcs, &new_w);
2348
2349 struct interp_weight_stencil merge_stencil;
2350 merge_stencil.type = WEIGHT_SUM;
2351 merge_stencil.data.weight_sum.srcs = srcs;
2352 merge_stencil.data.weight_sum.weights = new_w;
2353
2354 return merge_stencil;
2355}
2356
2358 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils) {
2359
2360 for (size_t i = 0; i < num_stencils; ++i)
2361 if (w[i] != 1.0)
2362 return stencils_merge_wsum(stencils, w, num_stencils);
2363
2364 size_t src_count = 0;
2365 size_t point_info_buffer_size = 0;
2366
2367 for (size_t i = 0; i < num_stencils; ++i) {
2368 size_t curr_src_count;
2369 struct remote_point * srcs;
2370 YAC_ASSERT(
2371 (stencils[i]->type == DIRECT) ||
2372 (stencils[i]->type == SUM),
2373 "ERROR(stencils_merge_sum): invalid stencil type")
2374 switch (stencils[i]->type) {
2375 case (DIRECT):
2376 curr_src_count = 1;
2377 srcs = &(stencils[i]->data.direct.src);
2378 break;
2379 default:
2380 case (SUM):
2381 curr_src_count = stencils[i]->data.sum.srcs->count;
2382 srcs = stencils[i]->data.sum.srcs->data;
2383 break;
2384 };
2385 src_count += curr_src_count;
2386 for (size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
2387 if (((curr_src_data_count = srcs[j].data.count)) > 1)
2388 point_info_buffer_size += curr_src_data_count;
2389 }
2390
2391 struct remote_points * srcs =
2392 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
2393 sizeof(*srcs));
2394 srcs->data = xmalloc(src_count * sizeof(*(srcs->data)));
2395 srcs->count = src_count;
2396 struct remote_point_info * point_info_buffer = &(srcs->buffer[0]);
2397
2398 for (size_t i = 0, offset = 0; i < num_stencils; ++i) {
2399 size_t curr_src_count;
2400 struct remote_point * curr_srcs;
2401 YAC_ASSERT(
2402 (stencils[i]->type == DIRECT) ||
2403 (stencils[i]->type == SUM),
2404 "ERROR(stencils_merge_sum): invalid stencil type")
2405 switch (stencils[i]->type) {
2406 case (DIRECT):
2407 curr_src_count = 1;
2408 curr_srcs = &(stencils[i]->data.direct.src);
2409 break;
2410 default:
2411 case (SUM):
2412 curr_src_count = stencils[i]->data.sum.srcs->count;
2413 curr_srcs = stencils[i]->data.sum.srcs->data;
2414 break;
2415 };
2417 srcs->data + offset, curr_srcs, curr_src_count, &point_info_buffer);
2418 offset += curr_src_count;
2419 }
2420
2421 qsort(srcs->data, srcs->count, sizeof(*(srcs->data)), compare_remote_point);
2422
2423 struct interp_weight_stencil merge_stencil;
2424 merge_stencil.type = SUM;
2425 merge_stencil.data.sum.srcs = srcs;
2426
2427 return merge_stencil;
2428}
2429
2431 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils,
2432 struct remote_point point) {
2433
2434 if (num_stencils == 1)
2435 return wcopy_interp_weight_stencil(*stencils, point, *w);
2436
2437 int fixed_count = 0;
2438 int direct_count = 0;
2439 int sum_count = 0;
2440 int wsum_count = 0;
2441 double fixed_value = 0.0;
2442
2443 for (size_t i = 0; i < num_stencils; ++i) {
2444 YAC_ASSERT(
2445 (stencils[i]->type != DIRECT_MF) &&
2446 (stencils[i]->type != SUM_MF) &&
2447 (stencils[i]->type != WEIGHT_SUM_MF),
2448 "ERROR(stencils_merge): multiple source fields not yet supported")
2449 YAC_ASSERT(
2450 (stencils[i]->type == FIXED) ||
2451 (stencils[i]->type == DIRECT) ||
2452 (stencils[i]->type == SUM) ||
2453 (stencils[i]->type == WEIGHT_SUM),
2454 "ERROR(stencils_merge): unsupported stencil type")
2455 switch (stencils[i]->type) {
2456 case (FIXED):
2457 fixed_value += stencils[i]->data.fixed.value * w[i];
2458 fixed_count++;
2459 break;
2460 case (DIRECT):
2461 direct_count++;
2462 break;
2463 case (SUM):
2464 sum_count++;
2465 break;
2466 default:
2467 case (WEIGHT_SUM):
2468 wsum_count++;
2469 break;
2470 };
2471 }
2472
2473 struct interp_weight_stencil merge_stencil;
2474
2475 YAC_ASSERT(
2476 (fixed_count > 0) || (wsum_count > 0) ||
2477 (sum_count > 0) || (direct_count > 0),
2478 "ERROR(stencils_merge): unknown error")
2479 if (fixed_count > 0) {
2480
2481 YAC_ASSERT(
2482 (direct_count + sum_count + wsum_count) <= 0,
2483 "ERROR(stencils_merge): invalid stencil combination")
2484
2485 merge_stencil = **stencils;
2486 merge_stencil.data.fixed.value = fixed_value;
2487 } else if (wsum_count > 0)
2488 merge_stencil =
2489 stencils_merge_wsum(stencils, w, num_stencils);
2490 else if ((sum_count > 0) || (direct_count > 0))
2491 merge_stencil =
2492 stencils_merge_sum(stencils, w, num_stencils);
2493
2494 merge_stencil.tgt = copy_remote_point(point);
2495
2496 return merge_stencil;
2497}
2498
2500 struct interp_weight_stencil * stencils, size_t count, size_t * pack_order,
2501 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
2502
2503 int pack_size_type;
2504 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_type), comm);
2505
2506 for (size_t i = 0; i < count; ++i) {
2507
2508 struct interp_weight_stencil * curr_stencil = stencils + pack_order[i];
2509 int (*func_pack_size)(
2510 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
2511 MPI_Comm comm);
2512 YAC_ASSERT(
2513 (curr_stencil->type == FIXED) ||
2514 (curr_stencil->type == DIRECT) ||
2515 (curr_stencil->type == SUM) ||
2516 (curr_stencil->type == WEIGHT_SUM) ||
2517 (curr_stencil->type == DIRECT_MF) ||
2518 (curr_stencil->type == SUM_MF) ||
2519 (curr_stencil->type == WEIGHT_SUM_MF),
2520 "ERROR(get_stencils_pack_sizes): invalid stencil type")
2521 switch (curr_stencil->type) {
2522 case(FIXED):
2523 func_pack_size = get_stencil_pack_size_fixed;
2524 break;
2525 case(DIRECT):
2526 func_pack_size = get_stencil_pack_size_direct;
2527 break;
2528 case(SUM):
2529 func_pack_size = get_stencil_pack_size_sum;
2530 break;
2531 default:
2532 case(WEIGHT_SUM):
2533 func_pack_size = get_stencil_pack_size_wsum;
2534 break;
2535 case(DIRECT_MF):
2536 func_pack_size = get_stencil_pack_size_direct_mf;
2537 break;
2538 case(SUM_MF):
2539 func_pack_size = get_stencil_pack_size_sum_mf;
2540 break;
2541 case(WEIGHT_SUM_MF):
2542 func_pack_size = get_stencil_pack_size_wsum_mf;
2543 break;
2544 };
2545 pack_sizes[i] = pack_size_type +
2547 &(curr_stencil->tgt), point_info_dt, comm) +
2548 func_pack_size(curr_stencil, point_info_dt, comm);
2549 }
2550}
2551
2553 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2554 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2555
2556 UNUSED(point_info_dt);
2557
2558 // fixed value
2560 MPI_Pack(&(stencil->data.fixed.value), 1, MPI_DOUBLE, buffer, buffer_size,
2561 position, comm), comm);
2562}
2563
2565 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2566 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2567
2568 // src
2570 &(stencil->data.direct.src), buffer, buffer_size, position, point_info_dt,
2571 comm);
2572}
2573
2575 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2576 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2577
2578 // srcs
2580 stencil->data.sum.srcs, buffer, buffer_size, position, point_info_dt, comm);
2581}
2582
2584 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2585 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2586
2587 // srcs
2589 stencil->data.weight_sum.srcs, buffer, buffer_size, position,
2590 point_info_dt, comm);
2591 // weights
2593 MPI_Pack(stencil->data.weight_sum.weights,
2594 (int)(stencil->data.weight_sum.srcs->count), MPI_DOUBLE,
2595 buffer, buffer_size, position, comm), comm);
2596}
2597
2599 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2600 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2601
2602 // src
2604 &(stencil->data.direct_mf.src), buffer, buffer_size, position,
2605 point_info_dt, comm);
2606
2607 // field_idx
2608 size_t temp_field_idx = stencil->data.direct_mf.field_idx;
2610 MPI_Pack(&temp_field_idx, 1, YAC_MPI_SIZE_T,
2611 buffer, buffer_size, position, comm), comm);
2612}
2613
2615 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2616 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2617
2618 // srcs
2620 stencil->data.sum_mf.srcs, buffer, buffer_size, position,
2621 point_info_dt, comm);
2622
2623 size_t count = stencil->data.sum_mf.srcs->count;
2624 // field_indices
2625 size_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2626 for (size_t i = 0; i < count; ++i)
2627 temp_field_indices[i] = stencil->data.sum_mf.field_indices[i];
2629 MPI_Pack(temp_field_indices, (int)count, YAC_MPI_SIZE_T,
2630 buffer, buffer_size, position, comm), comm);
2631 free(temp_field_indices);
2632}
2633
2635 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2636 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2637
2638 // srcs
2640 stencil->data.weight_sum_mf.srcs, buffer, buffer_size, position,
2641 point_info_dt, comm);
2642
2643 size_t count = stencil->data.weight_sum_mf.srcs->count;
2644 // weights
2646 MPI_Pack(stencil->data.weight_sum_mf.weights, (int)count, MPI_DOUBLE,
2647 buffer, buffer_size, position, comm), comm);
2648 // field_indices
2649 size_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2650 for (size_t i = 0; i < count; ++i)
2651 temp_field_indices[i] = stencil->data.weight_sum_mf.field_indices[i];
2653 MPI_Pack(temp_field_indices, (int)count, YAC_MPI_SIZE_T,
2654 buffer, buffer_size, position, comm), comm);
2655 free(temp_field_indices);
2656}
2657
2658static void pack_stencils(
2659 struct interp_weight_stencil * stencils, size_t count, size_t * pack_order,
2660 void ** pack_data, int * pack_sizes, MPI_Datatype point_info_dt,
2661 MPI_Comm comm) {
2662
2664 stencils, count, pack_order, pack_sizes, point_info_dt, comm);
2665
2666 size_t pack_buffer_size = 0;
2667 for (size_t i = 0; i < count; ++i)
2668 pack_buffer_size += (size_t)(pack_sizes[i]);
2669
2670 void * pack_data_ = xmalloc(pack_buffer_size);
2671 size_t total_pack_size = 0;
2672
2673 for (size_t i = 0; i < count; ++i) {
2674
2675 struct interp_weight_stencil * curr_stencil = stencils + pack_order[i];
2676 void (*func_pack)(
2677 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2678 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2679
2680 YAC_ASSERT(
2681 (curr_stencil->type == FIXED) ||
2682 (curr_stencil->type == DIRECT) ||
2683 (curr_stencil->type == SUM) ||
2684 (curr_stencil->type == WEIGHT_SUM) ||
2685 (curr_stencil->type == DIRECT_MF) ||
2686 (curr_stencil->type == SUM_MF) ||
2687 (curr_stencil->type == WEIGHT_SUM_MF),
2688 "ERROR(pack_stencils): invalid stencil type")
2689 switch (curr_stencil->type) {
2690 default:
2691 case(FIXED):
2692 func_pack = pack_stencil_fixed;
2693 break;
2694 case(DIRECT):
2695 func_pack = pack_stencil_direct;
2696 break;
2697 case(SUM):
2698 func_pack = pack_stencil_sum;
2699 break;
2700 case(WEIGHT_SUM):
2701 func_pack = pack_stencil_wsum;
2702 break;
2703 case(DIRECT_MF):
2704 func_pack = pack_stencil_direct_mf;
2705 break;
2706 case(SUM_MF):
2707 func_pack = pack_stencil_sum_mf;
2708 break;
2709 case(WEIGHT_SUM_MF):
2710 func_pack = pack_stencil_wsum_mf;
2711 break;
2712 };
2713
2714 int position = 0;
2715 int type = (int)curr_stencil->type;
2716 void * buffer = (void*)((char*)pack_data_ + total_pack_size);
2717 int buffer_size = pack_sizes[i];
2718
2719 // type
2721 MPI_Pack(&type, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2722 // tgt
2723 yac_remote_point_pack(&(curr_stencil->tgt), buffer, buffer_size,
2724 &position, point_info_dt, comm);
2725 // stencil data
2726 func_pack(curr_stencil, buffer, buffer_size, &position, point_info_dt, comm);
2727
2729 pack_sizes[i] >= position,
2730 "ERROR(pack_stencils): "
2731 "actual pack size is bigger then computed one (%d > %d)",
2732 position, pack_sizes[i]);
2733
2734 pack_sizes[i] = position;
2735 total_pack_size += (size_t)position;
2736 }
2737
2738 *pack_data = xrealloc(pack_data_, total_pack_size);
2739}
2740
2742 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2743 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2744
2745 UNUSED(point_info_dt);
2746
2747 // fixed value
2749 MPI_Unpack(buffer, buffer_size, position, &(stencil->data.fixed.value), 1,
2750 MPI_DOUBLE, comm), comm);
2751}
2752
2754 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2755 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2756
2757 // src
2759 buffer, buffer_size, position, &stencil->data.direct.src,
2760 point_info_dt, comm);
2761}
2762
2764 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2765 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2766
2767 // srcs
2768 stencil->data.weight_sum.weights = NULL;
2770 buffer, buffer_size, position, &(stencil->data.sum.srcs), point_info_dt,
2771 comm);
2772}
2773
2775 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2776 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2777
2778 // srcs
2780 buffer, buffer_size, position, &(stencil->data.weight_sum.srcs),
2781 point_info_dt, comm);
2782
2783 size_t count = stencil->data.weight_sum.srcs->count;
2784
2785 stencil->data.weight_sum.weights =
2786 xmalloc(count * sizeof(*(stencil->data.weight_sum.weights)));
2787
2788 // weights
2790 MPI_Unpack(buffer, buffer_size, position, stencil->data.weight_sum.weights,
2791 (int)count, MPI_DOUBLE, comm), comm);
2792}
2793
2795 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2796 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2797
2798 // src
2800 buffer, buffer_size, position, &stencil->data.direct_mf.src,
2801 point_info_dt, comm);
2802
2803 // field_idx
2804 size_t temp_field_idx;
2806 MPI_Unpack(
2807 buffer, buffer_size, position, &temp_field_idx,
2808 1, YAC_MPI_SIZE_T, comm), comm);
2809 stencil->data.direct_mf.field_idx = (size_t)temp_field_idx;
2810}
2811
2813 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2814 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2815
2816 // srcs
2818 buffer, buffer_size, position, &(stencil->data.sum_mf.srcs),
2819 point_info_dt, comm);
2820
2821 size_t count = stencil->data.sum_mf.srcs->count;
2822
2823 size_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2824 stencil->data.sum_mf.field_indices =
2825 xmalloc(count * sizeof(*(stencil->data.sum_mf.field_indices)));
2826
2827 // field_indices
2829 MPI_Unpack(
2830 buffer, buffer_size, position, temp_field_indices,
2831 (int)count, YAC_MPI_SIZE_T, comm), comm);
2832 for (size_t i = 0; i < count; ++i)
2833 stencil->data.sum_mf.field_indices[i] = (size_t)(temp_field_indices[i]);
2834 free(temp_field_indices);
2835}
2836
2838 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2839 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2840
2841 // srcs
2843 buffer, buffer_size, position, &(stencil->data.weight_sum_mf.srcs),
2844 point_info_dt, comm);
2845
2846 size_t count = stencil->data.weight_sum_mf.srcs->count;
2847
2848 stencil->data.weight_sum_mf.weights =
2849 xmalloc(count * sizeof(*(stencil->data.weight_sum_mf.weights)));
2850
2851 // weights
2853 MPI_Unpack(
2854 buffer, buffer_size, position, stencil->data.weight_sum_mf.weights,
2855 (int)count, MPI_DOUBLE, comm), comm);
2856
2857 size_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2859 xmalloc(count * sizeof(*(stencil->data.weight_sum_mf.field_indices)));
2860
2861 // field_indices
2863 MPI_Unpack(
2864 buffer, buffer_size, position, temp_field_indices,
2865 (int)count, YAC_MPI_SIZE_T, comm), comm);
2866 for (size_t i = 0; i < count; ++i)
2867 stencil->data.weight_sum_mf.field_indices[i] =
2868 (size_t)(temp_field_indices[i]);
2869 free(temp_field_indices);
2870}
2871
2873 struct interp_weight_stencil * stencils, size_t count,
2874 void * packed_data, size_t packed_data_size,
2875 MPI_Datatype point_info_dt, MPI_Comm comm) {
2876
2877 for (size_t i = 0, offset = 0; i < count; ++i) {
2878
2879 YAC_ASSERT(
2880 packed_data_size >= offset,
2881 "ERROR(unpack_stencils): invalid offset");
2882
2883 int position = 0;
2884 void * curr_buffer = (void*)((unsigned char*)packed_data + offset);
2885 int buffer_size = (int)(MIN(packed_data_size - offset, INT_MAX));
2886 struct interp_weight_stencil * curr_stencil = stencils + i;
2887
2888 int type;
2890 MPI_Unpack(
2891 curr_buffer, buffer_size, &position, &type, 1, MPI_INT, comm), comm);
2892
2893 void (*func_unpack)(
2894 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2895 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2896
2897 YAC_ASSERT(
2898 (type == FIXED) ||
2899 (type == DIRECT) || (type == SUM) || (type == WEIGHT_SUM) ||
2900 (type == DIRECT_MF) || (type == SUM_MF) || (type == WEIGHT_SUM_MF),
2901 "ERROR(unpack_stencils): invalid stencil type")
2902 switch (type) {
2903 case(FIXED):
2904 func_unpack = unpack_stencil_fixed;
2905 break;
2906 case(DIRECT):
2907 func_unpack = unpack_stencil_direct;
2908 break;
2909 case(SUM):
2910 func_unpack = unpack_stencil_sum;
2911 break;
2912 default:
2913 case(WEIGHT_SUM):
2914 func_unpack = unpack_stencil_wsum;
2915 break;
2916 case(DIRECT_MF):
2917 func_unpack = unpack_stencil_direct_mf;
2918 break;
2919 case(SUM_MF):
2920 func_unpack = unpack_stencil_sum_mf;
2921 break;
2922 case(WEIGHT_SUM_MF):
2923 func_unpack = unpack_stencil_wsum_mf;
2924 break;
2925 };
2926
2927 curr_stencil->type =
2930 curr_buffer, buffer_size, &position, &(curr_stencil->tgt),
2931 point_info_dt, comm);
2932 func_unpack(
2933 curr_stencil, curr_buffer, buffer_size, &position, point_info_dt, comm);
2934 offset += (size_t)position;
2935 }
2936}
2937
2939 MPI_Comm comm, struct interp_weight_stencil * stencils,
2940 size_t * stencil_indices,
2941 size_t * stencil_sendcounts, size_t * stencil_recvcounts) {
2942
2943 int comm_rank, comm_size;
2944 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2945 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2946
2947 YAC_ASSERT(
2948 stencil_sendcounts[comm_rank] == stencil_recvcounts[comm_rank],
2949 "ERROR(exchange_stencils): error in arguments")
2950
2951 size_t send_count = 0, recv_count = 0;
2952 size_t local_send_offset = 0;
2953 size_t local_recv_offset = 0;
2954 size_t local_count = (size_t)(stencil_sendcounts[comm_rank]);
2955 for (int i = 0; i < comm_rank; ++i) {
2956 send_count += stencil_sendcounts[i];
2957 recv_count += stencil_recvcounts[i];
2958 local_send_offset += stencil_sendcounts[i];
2959 local_recv_offset += stencil_recvcounts[i];
2960 }
2961 local_send_offset = send_count;
2962 local_recv_offset = recv_count;
2963 stencil_sendcounts[comm_rank] = 0;
2964 stencil_recvcounts[comm_rank] = 0;
2965 for (int i = comm_rank + 1; i < comm_size; ++i) {
2966 send_count += stencil_sendcounts[i];
2967 recv_count += stencil_recvcounts[i];
2968 }
2969
2970 struct interp_weight_stencil * new_stencils =
2971 xmalloc((recv_count + local_count) * sizeof(*new_stencils));
2972 size_t * local_stencil_indices =
2973 xmalloc(local_count * sizeof(*local_stencil_indices));
2974 memcpy(local_stencil_indices, stencil_indices + local_send_offset,
2975 local_count * sizeof(*local_stencil_indices));
2976
2977 // remove the local stencil indices
2978 memmove(
2979 stencil_indices + local_send_offset,
2980 stencil_indices + local_send_offset + local_count,
2981 (send_count - local_send_offset) * sizeof(*stencil_indices));
2982
2983 // pack the stencils that need to be send to other processes
2984 void * send_buffer;
2985 int * pack_sizes = xmalloc(send_count * sizeof(*pack_sizes));
2986 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
2988 stencils, send_count, stencil_indices, &send_buffer, pack_sizes,
2989 point_info_dt, comm);
2990
2991 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2993 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2994
2995 send_count = 0;
2996 for (int rank = 0; rank < comm_size; ++rank) {
2997 size_t sendcount = 0;
2998 int curr_num_stencils = stencil_sendcounts[rank];
2999 for (int j = 0; j < curr_num_stencils; ++j, ++send_count)
3000 sendcount += (size_t)(pack_sizes[send_count]);
3001 sendcounts[rank] = sendcount;
3002 }
3003 free(pack_sizes);
3004
3006 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3007
3008 size_t recv_size = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
3009
3010 void * recv_buffer = xmalloc(recv_size);
3011
3012 // exchange stencils
3013 yac_alltoallv_packed_p2p(
3014 send_buffer, sendcounts, sdispls+1,
3015 recv_buffer, recvcounts, rdispls, comm, "exchange_stencils", __LINE__);
3016 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
3017 free(send_buffer);
3018
3019 // unpack stencils
3021 new_stencils, recv_count,
3022 recv_buffer, recv_size, point_info_dt, comm);
3023 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
3024 free(recv_buffer);
3025
3026 memmove(new_stencils + local_recv_offset + local_count,
3027 new_stencils + local_recv_offset ,
3028 (recv_count - local_recv_offset ) * sizeof(*new_stencils));
3029 for (size_t i = 0; i < local_count; ++i, ++local_recv_offset )
3030 new_stencils[local_recv_offset] =
3032 stencils + local_stencil_indices[i],
3033 stencils[local_stencil_indices[i]].tgt);
3034 free(local_stencil_indices);
3035
3036 return new_stencils;
3037}
3038
3040 struct yac_interp_weights * weights, size_t * stencil_indices,
3041 int * stencil_ranks, size_t count) {
3042
3043 char const * routine = "yac_interp_weights_get_stencils";
3044
3045 MPI_Comm comm = weights->comm;
3046 int comm_size;
3047 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3048
3050 count <= INT_MAX, "ERROR(%s): count exceeds INT_MAX", routine);
3051
3052 size_t * reorder_idx = xmalloc(count * sizeof(*reorder_idx));
3053 for (size_t i = 0; i < count; ++i) reorder_idx[i] = i;
3054
3056 stencil_ranks, count, stencil_indices, reorder_idx);
3057
3058 // exchange requested stencils indices
3059 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3061 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3062 for (size_t i = 0; i < count; ++i) sendcounts[stencil_ranks[i]]++;
3064 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3065 size_t recv_count =
3066 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
3067 size_t * size_t_buffer =
3068 xmalloc((count + recv_count) * sizeof(*size_t_buffer));
3069 size_t * send_stencil_indices = size_t_buffer;
3070 size_t * recv_stencil_indices = size_t_buffer + count;
3071 for (size_t i = 0; i < count; ++i)
3072 send_stencil_indices[i] = stencil_indices[i];
3073 yac_alltoallv_size_t_p2p(
3074 send_stencil_indices, sendcounts, sdispls+1,
3075 recv_stencil_indices, recvcounts, rdispls, comm, routine, __LINE__);
3076
3077 // exchange stencils
3078 size_t * exchange_stencil_indices =
3079 xmalloc(recv_count * sizeof(*exchange_stencil_indices));
3080 for (size_t i = 0; i < recv_count; ++i) {
3082 (size_t)(recv_stencil_indices[i]) < weights->stencils_size,
3083 "ERROR(%s): invalid stencil index", routine);
3084 exchange_stencil_indices[i] = (size_t)(recv_stencil_indices[i]);
3085 }
3086 free(size_t_buffer);
3087 struct interp_weight_stencil * stencils =
3088 exchange_stencils(comm, weights->stencils, exchange_stencil_indices,
3089 recvcounts, sendcounts);
3090 free(exchange_stencil_indices);
3091 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
3092
3093 // sort received stencils into original order
3094 struct interp_weight_stencil * sorted_stencils =
3095 xmalloc(count * sizeof(*sorted_stencils));
3096 for (size_t i = 0; i < count; ++i)
3097 sorted_stencils[reorder_idx[i]] = stencils[i];
3098 free(stencils);
3099 free(reorder_idx);
3100
3101 return sorted_stencils;
3102}
3103
3105 struct interp_weight_stencil * stencils, size_t count);
3106
3108 struct yac_interp_weights * weights, struct remote_points * tgts,
3109 size_t * num_stencils_per_tgt, size_t * stencil_indices,
3110 int * stencil_ranks, double * w) {
3111
3112 size_t count = (tgts != NULL)?tgts->count:0;
3113 MPI_Comm comm = weights->comm;
3114 int comm_rank, comm_size;
3115 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
3116 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3117
3118 // count the number of missing stencils
3119 size_t total_num_stencils = 0;
3120 size_t max_num_stencils_per_tgt = 0;
3121 for (size_t i = 0; i < count; ++i) {
3122 size_t curr_num_stencils_per_tgt = num_stencils_per_tgt[i];
3123 if (curr_num_stencils_per_tgt > max_num_stencils_per_tgt)
3124 max_num_stencils_per_tgt = curr_num_stencils_per_tgt;
3125 total_num_stencils += num_stencils_per_tgt[i];
3126 }
3127 size_t num_missing_stencils = 0;
3128 for (size_t i = 0; i < total_num_stencils; ++i)
3129 if (stencil_ranks[i] != comm_rank) num_missing_stencils++;
3130
3131 // get missing stencils
3132 size_t * missing_stencil_indices =
3133 xmalloc(num_missing_stencils * sizeof(*missing_stencil_indices));
3134 int * missing_stencil_ranks =
3135 xmalloc(num_missing_stencils * sizeof(*missing_stencil_ranks));
3136 for (size_t i = 0, j = 0; i < total_num_stencils; ++i) {
3137 if (stencil_ranks[i] != comm_rank) {
3138 missing_stencil_indices[j] = stencil_indices[i];
3139 missing_stencil_ranks[j] = stencil_ranks[i];
3140 ++j;
3141 }
3142 }
3143 struct interp_weight_stencil * missing_stencils =
3145 weights, missing_stencil_indices, missing_stencil_ranks,
3146 num_missing_stencils);
3147 free(missing_stencil_ranks);
3148 free(missing_stencil_indices);
3149
3150 // merge stencils to generate new ones
3151 {
3152 struct interp_weight_stencil * stencils = weights->stencils;
3153 size_t stencils_array_size = weights->stencils_array_size;
3154 size_t stencils_size = weights->stencils_size;
3155
3156 struct interp_weight_stencil ** stencils_buffer =
3157 xmalloc(max_num_stencils_per_tgt * sizeof(*stencils_buffer));
3158
3160 stencils, stencils_array_size, stencils_size + count);
3161
3162 for (size_t i = 0, j = 0; i < count;
3163 ++i, ++stencils_size) {
3164
3165 size_t curr_num_stencils = num_stencils_per_tgt[i];
3166 for (size_t k = 0; k < curr_num_stencils; ++k)
3167 stencils_buffer[k] =
3168 (stencil_ranks[k] == comm_rank)?
3169 (stencils + stencil_indices[k]):(missing_stencils + (j++));
3170
3171 stencils[stencils_size] =
3172 stencils_merge(stencils_buffer, w, curr_num_stencils, tgts->data[i]);
3173 w += curr_num_stencils;
3174 stencil_indices += curr_num_stencils;
3175 stencil_ranks += curr_num_stencils;
3176 }
3177
3178 weights->stencils = stencils;
3179 weights->stencils_array_size = stencils_array_size;
3180 weights->stencils_size = stencils_size;
3181
3182 free(stencils_buffer);
3183 }
3184
3185 yac_interp_weight_stencils_delete(missing_stencils, num_missing_stencils);
3186}
3187
3188static int compute_owner(int * ranks, size_t count) {
3189
3190 YAC_ASSERT(count != 0, "ERROR(compute_owner): count == 0")
3191
3192 yac_quicksort_index(ranks, count, NULL);
3193
3194 int best_rank = -1;
3195 size_t best_rank_count = 0;
3196
3197 size_t curr_rank_count = 1;
3198 int prev_rank = ranks[0];
3199
3200 for (size_t i = 1; i < count; ++i, ++curr_rank_count) {
3201 int curr_rank = ranks[i];
3202 if (prev_rank != curr_rank) {
3203 if (curr_rank_count > best_rank_count) {
3204 best_rank = prev_rank;
3205 best_rank_count = curr_rank_count;
3206 }
3207 prev_rank = curr_rank;
3208 curr_rank_count = 0;
3209 }
3210 }
3211
3212 return (curr_rank_count > best_rank_count)?prev_rank:best_rank;
3213}
3214
3216 struct interp_weight_stencil * stencils, size_t count,
3217 enum yac_interp_weight_stencil_type stencil_type) {
3218
3219 // compute total number of links
3220 size_t total_num_links = 0;
3221
3222 for (size_t i = 0; i < count; ++i) {
3223 YAC_ASSERT(
3224 stencils[i].type == stencil_type,
3225 "ERROR(generate_w_sum_mf_stencils): wrong stencil type")
3226 // due to the data layout this works for "sum" and "weight_sum"
3227 total_num_links += stencils[i].data.weight_sum.srcs->count;
3228 }
3229
3231 xmalloc(sizeof(*temp) + total_num_links * sizeof(temp->buffer[0]));
3232 struct interp_weight_stencils_wsum_mf * wsum_stencils =
3233 (struct interp_weight_stencils_wsum_mf *)temp;
3234 wsum_stencils->data = xmalloc(count * sizeof(*(wsum_stencils->data)));
3235 wsum_stencils->count = count;
3236
3237 // extract data from stencils
3238 for (size_t i = 0, k = 0; i < count; ++i) {
3239 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
3240 wsum_stencils->data + i;
3241 struct interp_weight_stencil_wsum_mf_weight * curr_links =
3242 &(temp->buffer[k]);
3243 size_t curr_stencil_size = stencils[i].data.weight_sum.srcs->count;
3244 struct remote_point * curr_srcs = stencils[i].data.weight_sum.srcs->data;
3245 curr_wsum_stencil->tgt = copy_remote_point(stencils[i].tgt);
3246 curr_wsum_stencil->count = curr_stencil_size;
3247 curr_wsum_stencil->data = curr_links;
3248 for (size_t j = 0; j < curr_stencil_size; ++j) {
3249 int curr_count = curr_srcs[j].data.count;
3250 YAC_ASSERT(
3251 curr_count >= 1,
3252 "ERROR(generate_w_sum_mf_stencils): global src id no found")
3253 curr_links[j].src =
3254 (curr_count == 1)?
3255 (curr_srcs[j].data.data.single):(curr_srcs[j].data.data.multi[0]);
3256 YAC_ASSERT(
3257 (stencil_type == SUM) || (stencil_type == WEIGHT_SUM) ||
3258 (stencil_type == SUM_MF) || (stencil_type == WEIGHT_SUM_MF),
3259 "ERROR(generate_w_sum_mf_stencils): unsupported stencil type")
3260 switch(stencil_type) {
3261 default:
3262 case(SUM):
3263 curr_links[j].weight = 1.0;
3264 curr_links[j].src_field_idx = 0;
3265 break;
3266 case(WEIGHT_SUM):
3267 curr_links[j].weight = stencils[i].data.weight_sum.weights[j];
3268 curr_links[j].src_field_idx = 0;
3269 break;
3270 case(SUM_MF):
3271 curr_links[j].weight = 1.0;
3272 curr_links[j].src_field_idx =
3273 stencils[i].data.sum_mf.field_indices[j];
3274 break;
3275 case(WEIGHT_SUM_MF):
3276 curr_links[j].weight = stencils[i].data.weight_sum_mf.weights[j];
3277 curr_links[j].src_field_idx =
3278 stencils[i].data.weight_sum_mf.field_indices[j];
3279 break;
3280 };
3281 }
3282 k += curr_stencil_size;
3283 }
3284
3285 return wsum_stencils;
3286}
3287
3288static MPI_Datatype get_wsum_mf_weight_mpi_datatype(MPI_Comm comm) {
3289
3291 MPI_Datatype dt;
3292 int array_of_blocklengths[] = {1, 1, 1, 1};
3293 const MPI_Aint array_of_displacements[] =
3294 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src.rank) -
3295 (MPI_Aint)(intptr_t)(const void *)&dummy,
3296 (MPI_Aint)(intptr_t)(const void *)&(dummy.src.orig_pos) -
3297 (MPI_Aint)(intptr_t)(const void *)&dummy,
3298 (MPI_Aint)(intptr_t)(const void *)&(dummy.src_field_idx) -
3299 (MPI_Aint)(intptr_t)(const void *)&dummy,
3300 (MPI_Aint)(intptr_t)(const void *)&(dummy.weight) -
3301 (MPI_Aint)(intptr_t)(const void *)&dummy};
3302 const MPI_Datatype array_of_types[] =
3303 {MPI_INT, YAC_MPI_SIZE_T, YAC_MPI_SIZE_T, MPI_DOUBLE};
3305 MPI_Type_create_struct(4, array_of_blocklengths, array_of_displacements,
3306 array_of_types, &dt), comm);
3307 return yac_create_resized(dt, sizeof(dummy), comm);
3308}
3309
3311 struct interp_weight_stencil_wsum_mf * stencil,
3312 MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
3313
3314 int pack_size_count,
3315 pack_size_weights,
3316 pack_size_tgt;
3317
3318 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
3320 MPI_Pack_size(
3321 (int)(stencil->count), wsum_mf_weight_dt, comm, &pack_size_weights), comm);
3322 pack_size_tgt =
3323 yac_remote_point_get_pack_size(&(stencil->tgt), point_info_dt, comm);
3324
3325 return pack_size_count + pack_size_weights + pack_size_tgt;
3326}
3327
3329 struct interp_weight_stencil_wsum_mf * wsum_stencils, size_t count,
3330 size_t * pack_order, void ** pack_data, int * pack_sizes,
3331 int * weight_counts, MPI_Comm comm) {
3332
3333 MPI_Datatype wsum_mf_weight_dt = get_wsum_mf_weight_mpi_datatype(comm);
3334 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
3335
3336 // get the pack sizes and the upper bound for the pack buffer size
3337 size_t temp_total_pack_size = 0;
3338 for (size_t i = 0; i < count; ++i) {
3339 temp_total_pack_size +=
3340 (pack_sizes[i] =
3342 wsum_stencils + pack_order[i],
3343 wsum_mf_weight_dt, point_info_dt, comm));
3344 }
3345
3346 void * pack_data_ = xmalloc(temp_total_pack_size);
3347 size_t total_pack_size = 0;
3348
3349 // pack the stencils
3350 for (size_t i = 0; i < count; ++i) {
3351
3352 size_t idx = pack_order[i];
3353
3354 int position = 0;
3355 void * buffer = (void*)((unsigned char*)pack_data_ + total_pack_size);
3356 int buffer_size = pack_sizes[i];
3357 int curr_count = wsum_stencils[idx].count;
3358
3359 // tgt
3361 &(wsum_stencils[idx].tgt), buffer, buffer_size, &position,
3362 point_info_dt, comm);
3363 // weight count
3365 MPI_Pack(&curr_count, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
3366 // weights
3368 MPI_Pack(wsum_stencils[idx].data, curr_count, wsum_mf_weight_dt,
3369 buffer, buffer_size, &position, comm), comm);
3370
3371 pack_sizes[i] = position;
3372 weight_counts[i] = curr_count;
3373 total_pack_size += (size_t)position;
3374 }
3375
3376 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
3377 yac_mpi_call(MPI_Type_free(&wsum_mf_weight_dt), comm);
3378
3379 *pack_data = xrealloc(pack_data_, total_pack_size);
3380}
3381
3383 struct interp_weight_stencil_wsum_mf * wsum_stencils,
3384 struct interp_weight_stencil_wsum_mf_weight * weight_buffer, size_t count,
3385 void * packed_data, size_t packed_data_size, MPI_Comm comm) {
3386
3387 MPI_Datatype wsum_mf_weight_dt = get_wsum_mf_weight_mpi_datatype(comm);
3388 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
3389
3390 size_t weight_offset = 0;
3391 for (size_t i = 0, offset = 0; i < count; ++i) {
3392
3393 int position = 0;
3394 void * curr_buffer = (void*)((char*)packed_data + offset);
3395 int buffer_size = (int)(packed_data_size - offset);
3396 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
3397 wsum_stencils + i;
3398
3399 struct remote_point tgt;
3400 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
3401 weight_buffer + weight_offset;
3402 int weight_count;
3404 curr_buffer, buffer_size, &position, &tgt, point_info_dt, comm);
3406 MPI_Unpack(curr_buffer, buffer_size, &position,
3407 &weight_count, 1, MPI_INT, comm),
3408 comm);
3410 MPI_Unpack(curr_buffer, buffer_size, &position,
3411 curr_weights, weight_count, wsum_mf_weight_dt, comm), comm);
3412
3413 curr_wsum_stencil->tgt = tgt;
3414 curr_wsum_stencil->data = curr_weights;
3415 curr_wsum_stencil->count = (size_t)weight_count;
3416
3417 weight_offset += (size_t)weight_count;
3418 offset += (size_t)position;
3419 }
3420
3421 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
3422 yac_mpi_call(MPI_Type_free(&wsum_mf_weight_dt), comm);
3423
3424 return weight_offset;
3425}
3426
3428 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data,
3429 int * stencil_owner, size_t * reorder_idx, size_t num_owners) {
3430
3431 struct interp_weight_stencil_wsum_mf * wsum_stencils =
3432 wsum_stencils_data->data;
3433
3434 int comm_rank, comm_size;
3435 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
3436 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3437
3438 size_t local_weight_count = 0;
3439 size_t local_count = 0;
3440 for (size_t i = 0; i < num_owners; ++i) {
3441 if (stencil_owner[i] == comm_rank) {
3442 local_weight_count += wsum_stencils[reorder_idx[i]].count;
3443 stencil_owner[i] = INT_MAX;
3444 ++local_count;
3445 }
3446 }
3447 yac_quicksort_index_int_size_t(stencil_owner, num_owners, reorder_idx);
3448
3449 size_t send_count = num_owners - local_count;
3450
3451 // pack the stencils that need to be send to other processes
3452 void * send_buffer;
3453 int * pack_sizes = xmalloc(2 * send_count * sizeof(*pack_sizes));
3454 int * weight_counts = pack_sizes + send_count;
3455 pack_stencils_wsum_mf(wsum_stencils, send_count, reorder_idx, &send_buffer,
3456 pack_sizes, weight_counts, comm);
3457
3458 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3460 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3461
3462 for (size_t i = 0; i < send_count; ++i) {
3463 int curr_rank = stencil_owner[i];
3464 sendcounts[3 * curr_rank + 0]++;
3465 sendcounts[3 * curr_rank + 1] += (size_t)(pack_sizes[i]);
3466 sendcounts[3 * curr_rank + 2] += (size_t)(weight_counts[i]);
3467 }
3468 free(pack_sizes);
3469
3470 // exchange the number of stencils to be exchanged and the total pack sizes
3471 yac_mpi_call(MPI_Alltoall(sendcounts, 3, YAC_MPI_SIZE_T,
3472 recvcounts, 3, YAC_MPI_SIZE_T, comm), comm);
3473
3474 size_t recv_count = 0;
3475 size_t recv_size = 0;
3476 size_t recv_weight_count = 0;
3477 size_t saccu = 0, raccu = 0;
3478 for (int i = 0; i < comm_size; ++i) {
3479 sdispls[i] = saccu;
3480 rdispls[i] = raccu;
3481 recv_count += recvcounts[3 * i + 0];
3482 recv_size += recvcounts[3 * i + 1];
3483 recv_weight_count += recvcounts[3 * i + 2];
3484 saccu += sendcounts[3 * i + 1];
3485 raccu += recvcounts[3 * i + 1];
3486 sendcounts[i] = sendcounts[3 * i + 1];
3487 recvcounts[i] = recvcounts[3 * i + 1];
3488 }
3489
3490 void * recv_buffer = xmalloc(recv_size);
3491
3492 // exchange stencils
3493 yac_alltoallv_packed_p2p(
3494 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm,
3495 "redist_wsum_mf_stencils", __LINE__);
3496 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
3497 free(send_buffer);
3498
3500 xmalloc(sizeof(*temp) +
3501 (local_weight_count + recv_weight_count) * sizeof(temp->buffer[0]));
3502 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
3503 (struct interp_weight_stencils_wsum_mf *)temp;
3504 struct interp_weight_stencil_wsum_mf * new_wsum_stencils =
3505 ((new_wsum_stencils_data->data =
3506 xmalloc((local_count + recv_count) *
3507 sizeof(*(new_wsum_stencils_data->data)))));
3508 new_wsum_stencils_data->count = local_count + recv_count;
3509
3510 // unpack stencils
3511 size_t weight_offset =
3513 new_wsum_stencils, &(temp->buffer[0]), recv_count,
3514 recv_buffer, recv_size, comm);
3515 free(recv_buffer);
3516 new_wsum_stencils += recv_count;
3517 struct interp_weight_stencil_wsum_mf_weight * weight_buffer =
3518 &(temp->buffer[weight_offset]);
3519
3520 // copy the stencils that stay locally into the new stencil array
3521 yac_quicksort_index_size_t_size_t(reorder_idx + send_count, local_count, NULL);
3522 for (size_t i = 0, weight_offset = 0; i < local_count; ++i) {
3523 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
3524 wsum_stencils + reorder_idx[i + send_count];
3525 struct interp_weight_stencil_wsum_mf * curr_new_wsum_stencil =
3526 new_wsum_stencils + i;
3527 struct interp_weight_stencil_wsum_mf_weight * curr_new_weights =
3528 weight_buffer + weight_offset;
3529 size_t curr_stencil_size = curr_wsum_stencil->count;
3530 curr_new_wsum_stencil->tgt = copy_remote_point(curr_wsum_stencil->tgt);
3531 curr_new_wsum_stencil->count = curr_stencil_size;
3532 curr_new_wsum_stencil->data = curr_new_weights;
3533 memcpy(curr_new_weights, curr_wsum_stencil->data,
3534 curr_stencil_size * sizeof(*curr_new_weights));
3535 weight_offset += curr_stencil_size;
3536 }
3537
3538 return new_wsum_stencils_data;
3539}
3540
3542 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data) {
3543
3544 struct interp_weight_stencil_wsum_mf * wsum_stencils =
3545 wsum_stencils_data->data;
3546 size_t count = wsum_stencils_data->count;
3547
3548 // determine maximum stencil size
3549 size_t max_stencil_size = 0;
3550 for (size_t i = 0; i < count; ++i) {
3551 size_t curr_stencil_size = wsum_stencils[i].count;
3552 if (curr_stencil_size > max_stencil_size)
3553 max_stencil_size = curr_stencil_size;
3554 }
3555
3556 // determine source process for each stencil
3557 int * rank_buffer =
3558 xmalloc((count + max_stencil_size) * sizeof(*rank_buffer));
3559 int * stencil_owner = rank_buffer;
3560 int * stencil_owners = rank_buffer + count;
3561 size_t * reorder_idx = xmalloc(count * sizeof(*reorder_idx));
3562 for (size_t i = 0; i < count; ++i) {
3563 size_t curr_stencil_size = wsum_stencils[i].count;
3564 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
3565 wsum_stencils[i].data;
3566 for (size_t j = 0; j < curr_stencil_size; ++j)
3567 stencil_owners[j] = curr_weights[j].src.rank;
3568 stencil_owner[i] = compute_owner(stencil_owners, curr_stencil_size);
3569 reorder_idx[i] = i;
3570 }
3571
3572 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
3574 comm, wsum_stencils_data, stencil_owner, reorder_idx, count);
3575
3576 free(reorder_idx);
3577 free(rank_buffer);
3578
3579 return new_wsum_stencils_data;
3580}
3581
3582static int compare_remote_point_info(const void * a, const void * b) {
3583
3584 int ret = ((struct remote_point_info *)a)->rank -
3585 ((struct remote_point_info *)b)->rank;
3586
3587 if (ret) return ret;
3588
3589 return (((struct remote_point_info *)a)->orig_pos >
3590 ((struct remote_point_info *)b)->orig_pos) -
3591 (((struct remote_point_info *)a)->orig_pos <
3592 ((struct remote_point_info *)b)->orig_pos);
3593}
3594
3596 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data) {
3597
3598 struct interp_weight_stencil_wsum_mf * wsum_stencils =
3599 wsum_stencils_data->data;
3600 size_t count = wsum_stencils_data->count;
3601
3602 // determine total number of stencils to be sent to other processes
3603 // (a single stencil may be sent to multiple target processes)
3604 size_t total_owner_count = 0;
3605 for (size_t i = 0; i < count; ++i) {
3606 int stencil_size = wsum_stencils[i].tgt.data.count;
3607 if (stencil_size == 1) {
3608 total_owner_count++;
3609 } else {
3610 struct remote_point_info * tgt_point_infos =
3611 wsum_stencils[i].tgt.data.data.multi;
3612 qsort(
3613 tgt_point_infos, stencil_size, sizeof(*tgt_point_infos),
3615 int prev_rank = INT_MAX;
3616 for (int j = 0; j < stencil_size; ++j) {
3617 int curr_rank = tgt_point_infos[j].rank;
3618 if (curr_rank != prev_rank) {
3619 ++total_owner_count;
3620 prev_rank = curr_rank;
3621 }
3622 }
3623 }
3624 }
3625
3626 int * stencil_owner = xmalloc(total_owner_count * sizeof(*stencil_owner));
3627 size_t * reorder_idx = xmalloc(total_owner_count * sizeof(*reorder_idx));
3628 for (size_t i = 0, k = 0; i < count; ++i) {
3629 int stencil_size = wsum_stencils[i].tgt.data.count;
3630 if (stencil_size == 1) {
3631 stencil_owner[k] = wsum_stencils[i].tgt.data.data.single.rank;
3632 reorder_idx[k] = i;
3633 ++k;
3634 } else {
3635 struct remote_point_info * tgt_point_infos =
3636 wsum_stencils[i].tgt.data.data.multi;
3637 int prev_rank = INT_MAX;
3638 for (int j = 0; j < stencil_size; ++j) {
3639 int curr_rank = tgt_point_infos[j].rank;
3640 if (curr_rank != prev_rank) {
3641 stencil_owner[k] = tgt_point_infos[j].rank;
3642 reorder_idx[k] = i;
3643 ++k;
3644 prev_rank = curr_rank;
3645 }
3646 }
3647 }
3648 }
3649
3650 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
3652 comm, wsum_stencils_data, stencil_owner, reorder_idx, total_owner_count);
3653
3654 wsum_stencils = new_wsum_stencils_data->data;
3655 count = new_wsum_stencils_data->count;
3656
3657 free(reorder_idx);
3658 free(stencil_owner);
3659
3660 if (count == 0) return new_wsum_stencils_data;
3661
3662 int comm_rank;
3663 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
3664
3665 // count total number of local target locations
3666 size_t total_num_tgt_pos = 0;
3667 for (size_t i = 0; i < count; ++i) {
3668 size_t curr_count = wsum_stencils[i].tgt.data.count;
3669 if (curr_count == 1) {
3670 ++total_num_tgt_pos;
3671 } else {
3672 struct remote_point_info * curr_point_infos =
3673 wsum_stencils[i].tgt.data.data.multi;
3674 for (size_t j = 0; j < curr_count; ++j)
3675 if (curr_point_infos[j].rank == comm_rank)
3676 ++total_num_tgt_pos;
3677 }
3678 }
3679
3680 if (total_num_tgt_pos != count) {
3681 new_wsum_stencils_data->data =
3682 ((wsum_stencils =
3683 xrealloc(wsum_stencils, total_num_tgt_pos * sizeof(*wsum_stencils))));
3684 new_wsum_stencils_data->count = total_num_tgt_pos;
3685 }
3686
3687 // remove all non local target point information
3688 for (size_t i = 0, offset = count; i < count; ++i) {
3689 size_t curr_count = wsum_stencils[i].tgt.data.count;
3690 if (curr_count > 1) {
3691 struct remote_point_info * curr_point_infos =
3692 wsum_stencils[i].tgt.data.data.multi;
3693 // find first local target point
3694 size_t j;
3695 for (j = 0; j < curr_count; ++j) {
3696 if (curr_point_infos[j].rank == comm_rank) {
3697 wsum_stencils[i].tgt.data.count = 1;
3698 wsum_stencils[i].tgt.data.data.single.rank = comm_rank;
3699 wsum_stencils[i].tgt.data.data.single.orig_pos =
3700 curr_point_infos[j].orig_pos;
3701 break;
3702 }
3703 }
3704 // make a copy for the remaining local target positions
3705 for (j = j + 1; j < curr_count; ++j) {
3706 if (curr_point_infos[j].rank == comm_rank) {
3707 wsum_stencils[offset] = wsum_stencils[i];
3708 wsum_stencils[offset].tgt.data.data.single.orig_pos =
3709 curr_point_infos[j].orig_pos;
3710 ++offset;
3711 }
3712 }
3713 free(curr_point_infos);
3714 }
3715 }
3716
3717 return new_wsum_stencils_data;
3718}
3719
3720static Xt_redist * generate_halo_redists(
3721 struct remote_point_info_reorder * halo_points, size_t count,
3722 size_t num_src_fields, MPI_Comm comm, Xt_config redist_config) {
3723
3724 int comm_size;
3725 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3726
3727 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3729 num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3730 size_t * size_t_buffer =
3731 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
3732 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
3733 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
3734 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
3735 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
3736
3737 for (size_t i = 0; i < count; ++i)
3738 sendcounts[halo_points[i].data.rank * num_src_fields +
3739 halo_points[i].field_idx]++;
3740
3742 num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
3743
3744 size_t saccu = 0, raccu = 0;
3745 for (int i = 0; i < comm_size; ++i) {
3746 total_sdispls[i] = saccu;
3747 total_rdispls[i] = raccu;
3748 total_sendcounts[i] = 0;
3749 total_recvcounts[i] = 0;
3750 for (size_t j = 0; j < num_src_fields; ++j) {
3751 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
3752 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
3753 }
3754 saccu += total_sendcounts[i];
3755 raccu += total_recvcounts[i];
3756 }
3757
3758 size_t recv_count = total_recvcounts[comm_size - 1] +
3759 total_rdispls[comm_size - 1];
3760
3761 int * exchange_buffer =
3762 xmalloc((2 * count + recv_count) * sizeof(*exchange_buffer));
3763 int * send_buffer = exchange_buffer;
3764 int * reorder_idx = exchange_buffer + count;
3765 int * recv_buffer = exchange_buffer + 2 * count;
3766
3767 // pack the original positions of the requested points
3768 size_t num_halo_per_src_field[num_src_fields];
3769 memset(
3770 num_halo_per_src_field, 0,
3771 num_src_fields * sizeof(num_halo_per_src_field[0]));
3772 for (size_t i = 0; i < count; ++i) {
3773 size_t curr_src_field_idx = (size_t)(halo_points[i].field_idx);
3774 size_t pos = sdispls[(size_t)(halo_points[i].data.rank) * num_src_fields +
3775 curr_src_field_idx + 1]++;
3776 size_t orig_pos = halo_points[i].data.orig_pos;
3777 YAC_ASSERT(
3778 orig_pos <= INT_MAX,
3779 "ERROR(generate_halo_redists): offset not supported by MPI")
3780 send_buffer[pos] = (int)orig_pos;
3781 reorder_idx[pos] = num_halo_per_src_field[curr_src_field_idx]++;
3782 }
3783
3784 // exchange original positions of the requested points
3785 yac_alltoallv_int_p2p(
3786 send_buffer, total_sendcounts, total_sdispls,
3787 recv_buffer, total_recvcounts, total_rdispls, comm,
3788 "generate_halo_redists", __LINE__);
3789
3790 free(size_t_buffer);
3791
3792 size_t nsend = 0, nsends[num_src_fields];
3793 size_t nrecv = 0, nrecvs[num_src_fields];
3794 memset(nsends, 0, num_src_fields * sizeof(nsends[0]));
3795 memset(nrecvs, 0, num_src_fields * sizeof(nrecvs[0]));
3796 for (int i = 0; i < comm_size; ++i) {
3797 for (size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3798 if (sendcounts[i * num_src_fields + field_idx] > 0) {
3799 nrecv++;
3800 nrecvs[field_idx]++;
3801 }
3802 if (recvcounts[i * num_src_fields + field_idx] > 0) {
3803 nsend++;
3804 nsends[field_idx]++;
3805 }
3806 }
3807 }
3808
3809 size_t total_num_msg = nsend + nrecv;
3810
3811 struct Xt_redist_msg * msgs_buffer =
3812 xmalloc(total_num_msg * sizeof(*msgs_buffer));
3813 struct Xt_redist_msg * send_msgs = msgs_buffer;
3814 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
3815
3816 for (size_t field_idx = 0, nsend = 0, nrecv = 0;
3817 field_idx < num_src_fields; ++field_idx) {
3818 for (int rank = 0; rank < comm_size; ++rank) {
3819 size_t idx = (size_t)rank * num_src_fields + field_idx;
3820 if (sendcounts[idx] > 0) {
3821 recv_msgs[nrecv].rank = rank;
3822 recv_msgs[nrecv].datatype =
3823 xt_mpi_generate_datatype(
3824 reorder_idx + sdispls[idx], sendcounts[idx], MPI_DOUBLE, comm);
3825 nrecv++;
3826 }
3827 if (recvcounts[idx] > 0) {
3828 send_msgs[nsend].rank = rank;
3829 send_msgs[nsend].datatype =
3830 xt_mpi_generate_datatype(
3831 recv_buffer + rdispls[idx], recvcounts[idx], MPI_DOUBLE, comm);
3832 nsend++;
3833 }
3834 }
3835 }
3836
3837 Xt_redist * redist;
3838 MPI_Comm halo_comm;
3839
3840 if (total_num_msg > 0) {
3841
3842 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &halo_comm), comm);
3843
3844 int * rank_buffer = xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
3845 int * orig_ranks = rank_buffer;
3846 int * split_ranks = rank_buffer + total_num_msg;
3847
3848 for (size_t i = 0; i < total_num_msg; ++i)
3849 orig_ranks[i] = msgs_buffer[i].rank;
3850
3851 MPI_Group orig_group, split_group;
3852 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
3853 yac_mpi_call(MPI_Comm_group(halo_comm, &split_group), comm);
3854
3856 MPI_Group_translate_ranks(orig_group, (int)total_num_msg, orig_ranks,
3857 split_group, split_ranks), halo_comm);
3858
3859 for (size_t i = 0; i < total_num_msg; ++i)
3860 msgs_buffer[i].rank = split_ranks[i];
3861
3862 free(rank_buffer);
3863
3864 yac_mpi_call(MPI_Group_free(&split_group), comm);
3865 yac_mpi_call(MPI_Group_free(&orig_group), comm);
3866
3867 // generate redist
3868 redist = xmalloc(num_src_fields * sizeof(*redist));
3869 if (num_src_fields == 1) {
3870 *redist =
3871 xt_redist_single_array_base_custom_new(
3872 nsend, nrecv, send_msgs, recv_msgs, halo_comm,
3873 redist_config);
3874 } else {
3875 for (size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3876 redist[field_idx] =
3877 xt_redist_single_array_base_custom_new(
3878 nsends[field_idx], nrecvs[field_idx],
3879 send_msgs, recv_msgs, halo_comm,
3880 redist_config);
3881 send_msgs += nsends[field_idx];
3882 recv_msgs += nrecvs[field_idx];
3883 }
3884 }
3885
3886 } else {
3887 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &halo_comm), comm);
3888 redist = NULL;
3889 }
3890
3891 yac_mpi_call(MPI_Comm_free(&halo_comm), comm);
3892 free(exchange_buffer);
3893 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
3894
3895 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
3896
3897 return redist;
3898}
3899
3900static int compare_rank_pos_reorder_field_idx(const void * a, const void * b) {
3901
3902 int ret = (((struct remote_point_info_reorder *)a)->field_idx >
3903 ((struct remote_point_info_reorder *)b)->field_idx) -
3904 (((struct remote_point_info_reorder *)a)->field_idx <
3905 ((struct remote_point_info_reorder *)b)->field_idx);
3906
3907 if (ret) return ret;
3908
3909 ret = ((struct remote_point_info_reorder *)a)->data.rank -
3910 ((struct remote_point_info_reorder *)b)->data.rank;
3911
3912 if (ret) return ret;
3913
3914 return (((struct remote_point_info_reorder *)a)->data.orig_pos >
3915 ((struct remote_point_info_reorder *)b)->data.orig_pos) -
3916 (((struct remote_point_info_reorder *)a)->data.orig_pos <
3917 ((struct remote_point_info_reorder *)b)->data.orig_pos);
3918}
3919
3921 const void * a, const void * b) {
3922
3923 struct interp_weight_stencil_wsum_mf * a_ =
3925 struct interp_weight_stencil_wsum_mf * b_ =
3927
3928 size_t count = MIN(a_->count, b_->count);
3929
3930 for (size_t i = 0; i < count; ++i) {
3931 int ret = (a_->data[i].src_field_idx > b_->data[i].src_field_idx) -
3932 (a_->data[i].src_field_idx < b_->data[i].src_field_idx);
3933 if (ret) return ret;
3934 ret = (a_->data[i].src.orig_pos > b_->data[i].src.orig_pos) -
3935 (a_->data[i].src.orig_pos < b_->data[i].src.orig_pos);
3936 if (ret) return ret;
3937 }
3938 return 0;
3939}
3940
3942 const void * a, const void * b) {
3943
3944 struct interp_weight_stencil_wsum_mf * a_ =
3946 struct interp_weight_stencil_wsum_mf * b_ =
3948
3949 YAC_ASSERT(
3950 (a_->tgt.data.count == 1) && (b_->tgt.data.count == 1),
3951 "ERROR(compare_interp_weight_stencil_wsum_mf_tgt_orig_pos): invalid data")
3952
3953 size_t a_orig_pos = a_->tgt.data.data.single.orig_pos;
3954 size_t b_orig_pos = b_->tgt.data.data.single.orig_pos;
3955
3956 return (a_orig_pos > b_orig_pos) - (a_orig_pos < b_orig_pos);
3957}
3958
3959static void free_remote_point(struct remote_point point) {
3960
3961 if (point.data.count > 1) free(point.data.data.multi);
3962}
3963
3965 struct remote_point_infos * point_infos, size_t count, MPI_Comm comm,
3966 Xt_config redist_config) {
3967
3968 char const * routine = "generate_redist_put_double";
3969
3970 int comm_size;
3971 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3972
3973 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
3975 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3976
3977 for (size_t i = 0; i < count; ++i) {
3978 int curr_count = point_infos[i].count;
3979 struct remote_point_info * curr_point_infos =
3980 (curr_count == 1)?
3981 (&(point_infos[i].data.single)):(point_infos[i].data.multi);
3983 curr_count >= 1, "ERROR(%s): no owner found for global id", routine)
3984 for (int j = 0; j < curr_count; ++j)
3985 sendcounts[curr_point_infos[j].rank]++;
3986 }
3987
3989 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3990
3991 size_t send_count =
3992 sdispls[comm_size] + sendcounts[comm_size - 1];
3993 size_t recv_count =
3994 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
3995
3996 int * exchange_buffer =
3997 xmalloc((2 * send_count + recv_count) * sizeof(*exchange_buffer));
3998 int * send_buffer = exchange_buffer;
3999 int * reorder_idx = exchange_buffer + send_count;
4000 int * recv_buffer = exchange_buffer + 2 * send_count;
4001
4002 // pack the original positions of the points that have to updated
4003 for (size_t i = 0; i < count; ++i) {
4004 int curr_count = point_infos[i].count;
4005 struct remote_point_info * curr_point_infos =
4006 (curr_count == 1)?
4007 (&(point_infos[i].data.single)):(point_infos[i].data.multi);
4008 for (int j = 0; j < curr_count; ++j) {
4009 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
4010 size_t orig_pos = curr_point_infos[j].orig_pos;
4012 orig_pos <= INT_MAX, "ERROR(%s): offset not supported by MPI", routine)
4013 send_buffer[pos] = (int)orig_pos;
4014 reorder_idx[pos] = i;
4015 }
4016 }
4017
4018 // exchange original positions of the points that have to updated
4019 yac_alltoallv_int_p2p(
4020 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm,
4021 routine, __LINE__);
4022
4023 size_t nsend = 0;
4024 size_t nrecv = 0;
4025 for (int i = 0; i < comm_size; ++i) {
4026 if (sendcounts[i] > 0) nsend++;
4027 if (recvcounts[i] > 0) nrecv++;
4028 }
4029
4030 struct Xt_redist_msg * send_msgs = xmalloc(nsend * sizeof(*send_msgs));
4031 struct Xt_redist_msg * recv_msgs = xmalloc(nrecv * sizeof(*send_msgs));
4032
4033 for (int i = 0, nsend = 0, nrecv = 0; i < comm_size; ++i) {
4034 if (sendcounts[i] > 0) {
4035 send_msgs[nsend].rank = i;
4036 send_msgs[nsend].datatype =
4037 xt_mpi_generate_datatype(
4038 reorder_idx + sdispls[i], sendcounts[i], MPI_DOUBLE, comm);
4039 nsend++;
4040 }
4041 if (recvcounts[i] > 0) {
4042 recv_msgs[nrecv].rank = i;
4043 recv_msgs[nrecv].datatype =
4044 xt_mpi_generate_datatype(
4045 recv_buffer + rdispls[i], recvcounts[i], MPI_DOUBLE, comm);
4046 nrecv++;
4047 }
4048 }
4049
4050 // generate redist
4051 Xt_redist redist =
4052 xt_redist_single_array_base_custom_new(
4053 nsend, nrecv, send_msgs, recv_msgs, comm, redist_config);
4054
4055 free(exchange_buffer);
4056 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4057
4058 xt_redist_msg_free(recv_msgs, nrecv, comm);
4059 xt_redist_msg_free(send_msgs, nsend, comm);
4060
4061 return redist;
4062}
4063
4065 struct remote_point_info_reorder * remote_src_points, size_t halo_size,
4066 size_t num_src_fields, size_t tgt_count,
4067 struct interp_weight_stencil_wsum_mf * tgt_stencils, size_t * num_src_per_tgt,
4068 double * weights, size_t * src_idx, size_t * src_field_idx, MPI_Comm comm,
4070 struct yac_interpolation * interp,
4071 void (*interp_add_wsum_mf_at_src)(
4072 struct yac_interpolation *, Xt_redist *, size_t, size_t *, double *,
4073 size_t *, size_t *, size_t, Xt_redist),
4074 void (*interp_add_wsum_mf_at_tgt)(
4075 struct yac_interpolation *, Xt_redist *, size_t *, size_t, size_t *, double *,
4076 size_t *, size_t *, size_t), Xt_config redist_config) {
4077
4078 // generate halo redists (one per source field)
4079 Xt_redist * halo_redists =
4081 remote_src_points, halo_size, num_src_fields, comm, redist_config);
4082
4083 // add weights to interpolation
4084 if (reorder == YAC_MAPPING_ON_SRC) {
4085
4086 // generate result redist
4087 struct remote_point_infos * tgt_infos =
4088 xmalloc(tgt_count * sizeof(*tgt_infos));
4089 for (size_t i = 0; i < tgt_count; ++i)
4090 tgt_infos[i] = tgt_stencils[i].tgt.data;
4091 Xt_redist result_redist =
4093 tgt_infos, tgt_count, comm, redist_config);
4094 free(tgt_infos);
4095
4096 interp_add_wsum_mf_at_src(
4097 interp, halo_redists, tgt_count, num_src_per_tgt, weights,
4098 src_field_idx, src_idx, num_src_fields, result_redist);
4099
4100 if (result_redist != NULL) xt_redist_delete(result_redist);
4101
4102 } else {
4103
4104 size_t * tgt_orig_pos = xmalloc(tgt_count * sizeof(*tgt_orig_pos));
4105 for (size_t i = 0; i < tgt_count; ++i) {
4106 YAC_ASSERT(
4107 tgt_stencils[i].tgt.data.count == 1,
4108 "ERROR(interpolation_add_w_sum_mf): currently unsupported target "
4109 "point distribution")
4110 tgt_orig_pos[i] =
4111 (size_t)(tgt_stencils[i].tgt.data.data.single.orig_pos);
4112 }
4113
4114 interp_add_wsum_mf_at_tgt(
4115 interp, halo_redists, tgt_orig_pos, tgt_count,
4116 num_src_per_tgt, weights, src_field_idx, src_idx, num_src_fields);
4117 free(tgt_orig_pos);
4118
4119 }
4120
4121 if (halo_redists != NULL) {
4122 for (size_t i = 0; i < num_src_fields; ++i)
4123 xt_redist_delete(halo_redists[i]);
4124 free(halo_redists);
4125 }
4126}
4127
4129 struct yac_interpolation * interp, Xt_redist * halo_redists,
4130 size_t tgt_count, size_t * num_src_per_tgt, double * weights,
4131 size_t * src_field_idx, size_t * src_idx,
4132 size_t num_src_fields, Xt_redist result_redist) {
4133
4134 UNUSED(weights);
4135
4137 interp, halo_redists, tgt_count, num_src_per_tgt, src_field_idx,
4138 src_idx, num_src_fields, result_redist);
4139}
4140
4142 struct yac_interpolation * interp, Xt_redist * src_redists,
4143 size_t * tgt_pos, size_t tgt_count, size_t * num_src_per_tgt,
4144 double * weights, size_t * src_field_idx, size_t * src_idx,
4145 size_t num_src_fields) {
4146
4147 UNUSED(weights);
4148
4150 interp, src_redists, tgt_pos, tgt_count, num_src_per_tgt,
4151 src_field_idx, src_idx, num_src_fields);
4152}
4153
4155 struct remote_point_info_reorder * remote_src_points, size_t halo_size,
4156 size_t num_src_fields, size_t tgt_count,
4157 struct interp_weight_stencil_wsum_mf * tgt_stencils, size_t * num_src_per_tgt,
4158 double * weights, size_t * src_idx, size_t * src_field_idx, MPI_Comm comm,
4159 enum yac_interp_weights_reorder_type reorder, void * interp,
4160 Xt_config redist_config) {
4161
4163 remote_src_points, halo_size, num_src_fields, tgt_count, tgt_stencils,
4164 num_src_per_tgt, weights, src_idx, src_field_idx, comm, reorder,
4165 (struct yac_interpolation *) interp,
4167 redist_config);
4168}
4169
4171 struct remote_point_info_reorder * remote_src_points, size_t halo_size,
4172 size_t num_src_fields, size_t tgt_count,
4173 struct interp_weight_stencil_wsum_mf * tgt_stencils, size_t * num_src_per_tgt,
4174 double * weights, size_t * src_idx, size_t * src_field_idx, MPI_Comm comm,
4175 enum yac_interp_weights_reorder_type reorder, void * interp,
4176 Xt_config redist_config) {
4177
4178 UNUSED(num_src_fields);
4179
4181 remote_src_points, halo_size, 1, tgt_count, tgt_stencils, num_src_per_tgt,
4182 weights, src_idx, src_field_idx, comm, reorder, interp, redist_config);
4183}
4184
4186 struct remote_point_info_reorder * remote_src_points, size_t halo_size,
4187 size_t num_src_fields, size_t tgt_count,
4188 struct interp_weight_stencil_wsum_mf * tgt_stencils, size_t * num_src_per_tgt,
4189 double * weights, size_t * src_idx, size_t * src_field_idx, MPI_Comm comm,
4190 enum yac_interp_weights_reorder_type reorder, void * interp,
4191 Xt_config redist_config) {
4192
4194 remote_src_points, halo_size, num_src_fields, tgt_count, tgt_stencils,
4195 num_src_per_tgt, weights, src_idx, src_field_idx, comm, reorder,
4196 (struct yac_interpolation *) interp,
4199}
4200
4202 struct remote_point_info_reorder * remote_src_points, size_t halo_size,
4203 size_t num_src_fields, size_t tgt_count,
4204 struct interp_weight_stencil_wsum_mf * tgt_stencils, size_t * num_src_per_tgt,
4205 double * weights, size_t * src_idx, size_t * src_field_idx, MPI_Comm comm,
4206 enum yac_interp_weights_reorder_type reorder, void * interp,
4207 Xt_config redist_config) {
4208
4209 UNUSED(num_src_fields);
4210
4212 remote_src_points, halo_size, 1, tgt_count, tgt_stencils, num_src_per_tgt,
4213 weights, src_idx, src_field_idx, comm, reorder, interp, redist_config);
4214}
4215
4217 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_mf_stencils_data,
4218 struct yac_interpolation * interp,
4220 void (*interp_add_w_sum_mf)(
4221 struct remote_point_info_reorder *, size_t, size_t, size_t,
4222 struct interp_weight_stencil_wsum_mf *, size_t *, double *, size_t *,
4223 size_t *, MPI_Comm, enum yac_interp_weights_reorder_type, void *,
4224 Xt_config), Xt_config redist_config) {
4225
4226 int comm_rank;
4227 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4228
4229 // redistribute stencils to respective owners
4230 struct interp_weight_stencils_wsum_mf * (*redist_wsum_mf_stencils)(
4231 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_mf_stencils_data);
4232 YAC_ASSERT(
4233 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
4234 "ERROR(yac_interp_weights_redist_w_sum_mf): invalid reorder type")
4236 (reorder == YAC_MAPPING_ON_SRC)?
4238 struct interp_weight_stencils_wsum_mf * new_wsum_mf_stencils_data =
4239 redist_wsum_mf_stencils(comm, wsum_mf_stencils_data);
4240
4241 size_t wsum_mf_count = new_wsum_mf_stencils_data->count;
4242 struct interp_weight_stencil_wsum_mf * wsum_mf_stencils =
4243 new_wsum_mf_stencils_data->data;
4244
4245 // compute the total number of links
4246 size_t total_num_links = 0, total_num_remote_weights = 0;
4247 for (size_t i = 0; i < wsum_mf_count; ++i) {
4248 size_t curr_stencil_size = wsum_mf_stencils[i].count;
4249 total_num_links += curr_stencil_size;
4250 for (size_t j = 0; j < curr_stencil_size; ++j)
4251 if (wsum_mf_stencils[i].data[j].src.rank != comm_rank)
4252 ++total_num_remote_weights;
4253 }
4254
4255 // gather all remote source points and determine number of source fields
4256 struct remote_point_info_reorder * remote_src_points =
4257 xmalloc(total_num_remote_weights * sizeof(*remote_src_points));
4258 size_t num_src_fields = 0;
4259 for (size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
4260 size_t curr_stencil_size = wsum_mf_stencils[i].count;
4261 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
4262 wsum_mf_stencils[i].data;
4263 for (size_t j = 0; j < curr_stencil_size; ++j) {
4264 size_t curr_src_field_idx = curr_weights[j].src_field_idx;
4265 if (curr_src_field_idx >= num_src_fields)
4266 num_src_fields = curr_src_field_idx + 1;
4267 if (curr_weights[j].src.rank != comm_rank) {
4268 remote_src_points[k].data = curr_weights[j].src;
4269 remote_src_points[k].field_idx = curr_src_field_idx;
4270 remote_src_points[k].reorder_idx = i;
4271 ++k;
4272 }
4273 }
4274 }
4276 MPI_Allreduce(
4277 MPI_IN_PLACE, &num_src_fields, 1, YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
4278
4279 // sort remote points first by field_idx, second by rank, and
4280 // then by orig_pos
4281 qsort(remote_src_points, total_num_remote_weights, sizeof(*remote_src_points),
4283
4284 // update stencils: set owner to -1; set orig_pos to position of respecitve
4285 // point in halo data
4286 // remove duplicated remote points
4287 struct remote_point_info * prev_remote_src_point;
4288 size_t prev_field_idx;
4289 size_t halo_size;
4290 if (total_num_remote_weights > 0) {
4291 prev_remote_src_point = &(remote_src_points[0].data);
4292 prev_field_idx = remote_src_points[0].field_idx;
4293 halo_size = 1;
4294 } else {
4295 prev_field_idx = SIZE_MAX;
4296 halo_size = 0;
4297 }
4298 for (size_t i = 0; i < total_num_remote_weights; ++i) {
4299 struct remote_point_info * curr_remote_src_point =
4300 &(remote_src_points[i].data);
4301 size_t curr_field_idx = remote_src_points[i].field_idx;
4303 prev_remote_src_point, curr_remote_src_point) ||
4304 (prev_field_idx != curr_field_idx)) {
4305 prev_remote_src_point = curr_remote_src_point;
4306 prev_field_idx = curr_field_idx;
4307 remote_src_points[halo_size].data = *curr_remote_src_point;
4308 remote_src_points[halo_size].field_idx = curr_field_idx;
4309 ++halo_size;
4310 }
4311 struct interp_weight_stencil_wsum_mf * curr_stencil =
4312 wsum_mf_stencils + remote_src_points[i].reorder_idx;
4313 size_t curr_stencil_size = curr_stencil->count;
4314 for (size_t j = 0; j < curr_stencil_size; ++j) {
4316 &(curr_stencil->data[j].src), curr_remote_src_point)) &&
4317 (curr_stencil->data[j].src_field_idx == curr_field_idx)) {
4318 curr_stencil->data[j].src.rank = -1;
4319 curr_stencil->data[j].src.orig_pos = halo_size - 1;
4320 curr_stencil->data[j].src_field_idx = SIZE_MAX;
4321 }
4322 }
4323 }
4324
4325 // sort stencils by their memory access pattern on the local process
4326 qsort(wsum_mf_stencils, wsum_mf_count, sizeof(*wsum_mf_stencils),
4327 (reorder == YAC_MAPPING_ON_SRC)?
4330
4331 size_t * num_src_per_tgt = xmalloc(wsum_mf_count * sizeof(*num_src_per_tgt));
4332 double * weights = xmalloc(total_num_links * sizeof(*weights));
4333 size_t * src_idx = xmalloc(total_num_links * sizeof(*src_idx));
4334 size_t * src_field_idx = xmalloc(total_num_links * sizeof(*src_field_idx));
4335
4336 // extract data from stencil
4337 for (size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
4338 size_t curr_stencil_size = wsum_mf_stencils[i].count;
4339 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
4340 wsum_mf_stencils[i].data;
4341 num_src_per_tgt[i] = curr_stencil_size;
4342 for (size_t j = 0; j < curr_stencil_size; ++j, ++k){
4343 weights[k] = curr_weights[j].weight;
4344 src_idx[k] = curr_weights[j].src.orig_pos;
4345 src_field_idx[k] = curr_weights[j].src_field_idx;
4346 }
4347 }
4348
4349 // add data to interpolation
4350 interp_add_w_sum_mf(
4351 remote_src_points, halo_size, num_src_fields, wsum_mf_count,
4352 wsum_mf_stencils, num_src_per_tgt, weights, src_idx, src_field_idx,
4353 comm, reorder, interp, redist_config);
4354
4355 for (size_t i = 0; i < new_wsum_mf_stencils_data->count; ++i)
4356 free_remote_point(new_wsum_mf_stencils_data->data[i].tgt);
4357 free(new_wsum_mf_stencils_data->data);
4358 free(new_wsum_mf_stencils_data);
4359
4360 free(remote_src_points);
4361 free(src_field_idx);
4362 free(src_idx);
4363 free(weights);
4364 free(num_src_per_tgt);
4365}
4366
4368 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_mf_stencils_data,
4369 struct yac_interpolation * interp,
4370 void (*interp_add_w_sum_mf)(
4371 struct remote_point_info_reorder *, size_t, size_t, size_t,
4372 struct interp_weight_stencil_wsum_mf *, size_t *, double *, size_t *,
4373 size_t *, MPI_Comm, void *,
4374 Xt_config), Xt_config redist_config) {
4375
4376 int comm_rank;
4377 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4378
4379 // redistribute stencils to respective owners
4380 struct interp_weight_stencils_wsum_mf * new_wsum_mf_stencils_data =
4381 redist_wsum_mf_stencils_tgt(comm, wsum_mf_stencils_data);
4382
4383 size_t wsum_mf_count = new_wsum_mf_stencils_data->count;
4384 struct interp_weight_stencil_wsum_mf * wsum_mf_stencils =
4385 new_wsum_mf_stencils_data->data;
4386
4387 // compute the total number of links
4388 size_t total_num_links = 0;
4389 for (size_t i = 0; i < wsum_mf_count; ++i)
4390 total_num_links += wsum_mf_stencils[i].count;
4391
4392 // gather all source points and determine number of source fields
4393 struct remote_point_info_reorder * src_points =
4394 xmalloc(total_num_links * sizeof(*src_points));
4395 size_t num_src_fields = 0;
4396 for (size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
4397 size_t curr_stencil_size = wsum_mf_stencils[i].count;
4398 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
4399 wsum_mf_stencils[i].data;
4400 for (size_t j = 0; j < curr_stencil_size; ++j, ++k) {
4401 size_t curr_src_field_idx = curr_weights[j].src_field_idx;
4402 if (curr_src_field_idx >= num_src_fields)
4403 num_src_fields = curr_src_field_idx + 1;
4404 src_points[k].data = curr_weights[j].src;
4405 src_points[k].field_idx = curr_src_field_idx;
4406 src_points[k].reorder_idx = i;
4407 }
4408 }
4410 MPI_Allreduce(
4411 MPI_IN_PLACE, &num_src_fields, 1, YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
4412
4413 // sort remote points first by field_idx, second by rank, and
4414 // then by orig_pos
4415 qsort(src_points, total_num_links, sizeof(*src_points),
4417
4418 // update stencils: set owner to -1; set orig_pos to position of respecitve
4419 // point in source field buffer
4420 // remove duplicated remote points
4421 struct remote_point_info * prev_src_point;
4422 size_t prev_field_idx;
4423 size_t num_src_points = 0;
4424 size_t src_field_buffer_size[num_src_fields];
4425 memset(
4426 src_field_buffer_size, 0,
4427 num_src_fields * sizeof(src_field_buffer_size[0]));
4428
4429 if (total_num_links > 0) {
4430 prev_src_point = &(src_points[0].data);
4431 prev_field_idx = src_points[0].field_idx;
4432 src_field_buffer_size[src_points[0].field_idx] = 1;
4433 num_src_points = 1;
4434 }
4435 for (size_t i = 0; i < total_num_links; ++i) {
4436 struct remote_point_info * curr_src_point = &(src_points[i].data);
4437 size_t curr_field_idx = src_points[i].field_idx;
4438 if (compare_remote_point_info(prev_src_point, curr_src_point) ||
4439 (prev_field_idx != curr_field_idx)) {
4440 prev_src_point = curr_src_point;
4441 prev_field_idx = curr_field_idx;
4442 if (num_src_points != i) {
4443 src_points[num_src_points].data = *curr_src_point;
4444 src_points[num_src_points].field_idx = curr_field_idx;
4445 }
4446 src_field_buffer_size[curr_field_idx]++;
4447 num_src_points++;
4448 }
4449 struct interp_weight_stencil_wsum_mf * curr_stencil =
4450 wsum_mf_stencils + src_points[i].reorder_idx;
4451 size_t curr_stencil_size = curr_stencil->count;
4452 for (size_t j = 0; j < curr_stencil_size; ++j) {
4454 &(curr_stencil->data[j].src), curr_src_point)) &&
4455 (curr_stencil->data[j].src_field_idx == curr_field_idx)) {
4456 curr_stencil->data[j].src.rank = -1;
4457 curr_stencil->data[j].src.orig_pos =
4458 src_field_buffer_size[curr_field_idx] - 1;
4459 }
4460 }
4461 }
4462
4463 // sort stencils by their memory access pattern on the local process
4464 qsort(wsum_mf_stencils, wsum_mf_count, sizeof(*wsum_mf_stencils),
4466
4467 size_t * num_src_per_tgt = xmalloc(wsum_mf_count * sizeof(*num_src_per_tgt));
4468 double * weights = xmalloc(total_num_links * sizeof(*weights));
4469 size_t * src_idx = xmalloc(total_num_links * sizeof(*src_idx));
4470 size_t * src_field_idx = xmalloc(total_num_links * sizeof(*src_field_idx));
4471
4472 // extract data from stencil
4473 for (size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
4474 size_t curr_stencil_size = wsum_mf_stencils[i].count;
4475 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
4476 wsum_mf_stencils[i].data;
4477 num_src_per_tgt[i] = curr_stencil_size;
4478 for (size_t j = 0; j < curr_stencil_size; ++j, ++k){
4479 weights[k] = curr_weights[j].weight;
4480 src_idx[k] = curr_weights[j].src.orig_pos;
4481 src_field_idx[k] = curr_weights[j].src_field_idx;
4482 }
4483 }
4484
4485 // add data to interpolation
4486 interp_add_w_sum_mf(
4487 src_points, num_src_points, num_src_fields, wsum_mf_count,
4488 wsum_mf_stencils, num_src_per_tgt, weights, src_idx, src_field_idx,
4489 comm, interp, redist_config);
4490
4491 for (size_t i = 0; i < new_wsum_mf_stencils_data->count; ++i)
4492 free_remote_point(new_wsum_mf_stencils_data->data[i].tgt);
4493 free(new_wsum_mf_stencils_data->data);
4494 free(new_wsum_mf_stencils_data);
4495
4496 free(src_points);
4497 free(src_field_idx);
4498 free(src_idx);
4499 free(weights);
4500 free(num_src_per_tgt);
4501}
4502
4503static int compare_stencils(const void * a, const void * b) {
4504
4505 return (int)(((struct interp_weight_stencil *)a)->type) -
4506 (int)(((struct interp_weight_stencil *)b)->type);
4507}
4508
4509static Xt_config get_redist_config(
4510 char const * yaxt_exchanger_name, MPI_Comm comm) {
4511
4512 Xt_config redist_config = xt_config_new();
4513
4514 // if no exchanger has been defined yet -> check environment
4515 char * env_exchanger_name = NULL;
4516 if (yaxt_exchanger_name == NULL) {
4517
4518 int rank;
4519 yac_mpi_call(MPI_Comm_rank(comm, &rank), comm);
4520
4521 // environment is only checked on rank 0, results are broadcasted
4522 // to other processes
4523 size_t exchanger_name_len = 0;
4524 if (rank == 0) {
4525
4526 // check if the user provided an exchanger name in the environment
4527 env_exchanger_name = getenv(YAC_YAXT_EXCHANGER_STR);
4528 exchanger_name_len =
4529 ((env_exchanger_name != NULL) && (env_exchanger_name[0] != '\0'))?
4530 strlen(env_exchanger_name):0;
4531 }
4532
4533 // broadcast the length of the exchanger name provided by the user
4534 // through the environment
4536 MPI_Bcast(&exchanger_name_len, 1, YAC_MPI_SIZE_T, 0, comm), comm);
4537
4538 if (exchanger_name_len > 0) {
4539
4540 if (rank == 0)
4541 env_exchanger_name = strdup(env_exchanger_name);
4542 else
4543 env_exchanger_name =
4544 xmalloc((exchanger_name_len + 1) * sizeof(*env_exchanger_name));
4545
4546 // broadcast name of the exchanger
4548 MPI_Bcast(
4549 env_exchanger_name, (int)(exchanger_name_len + 1), MPI_CHAR, 0, comm),
4550 comm);
4551
4552 yaxt_exchanger_name = env_exchanger_name;
4553 }
4554 }
4555
4556 if (yaxt_exchanger_name != NULL) {
4557
4558 // set exchanger
4559 int exchanger_id = xt_exchanger_id_by_name(yaxt_exchanger_name);
4561 exchanger_id >= 0,
4562 "ERROR(get_redist_config): invalid yaxt exchanger name \"%s\"",
4563 yaxt_exchanger_name);
4564 xt_config_set_exchange_method(redist_config, exchanger_id);
4565 }
4566
4567 free(env_exchanger_name);
4568
4569 return redist_config;
4570}
4571
4573 struct yac_interp_weights * weights,
4576 double scaling_factor, double scaling_summand,
4577 char const * yaxt_exchanger_name) {
4578
4579 struct yac_interpolation * interp =
4582 scaling_factor, scaling_summand);
4583
4584 MPI_Comm comm = weights->comm;
4585
4586 Xt_config redist_config = get_redist_config(yaxt_exchanger_name, comm);
4587
4588 // sort stencils by type
4589 qsort(weights->stencils, weights->stencils_size, sizeof(*(weights->stencils)),
4591
4592 size_t local_stencil_counts[WEIGHT_STENCIL_TYPE_SIZE];
4593 size_t stencils_offsets[WEIGHT_STENCIL_TYPE_SIZE];
4594
4595 // count local number of stencils per type
4596 memset(&(local_stencil_counts[0]), 0, sizeof(local_stencil_counts));
4597 for (size_t i = 0; i < weights->stencils_size; ++i)
4598 local_stencil_counts[(int)(weights->stencils[i].type)]++;
4599
4600 for (size_t i = 0, accu = 0; i < (size_t)WEIGHT_STENCIL_TYPE_SIZE; ++i) {
4601 stencils_offsets[i] = accu;
4602 accu += local_stencil_counts[i];
4603 }
4604
4605 size_t global_stencil_counts[WEIGHT_STENCIL_TYPE_SIZE];
4606
4607 // determine global number of stencils per type
4609 MPI_Allreduce(
4610 local_stencil_counts, global_stencil_counts,
4611 (int)WEIGHT_STENCIL_TYPE_SIZE, YAC_MPI_SIZE_T, MPI_SUM, comm), comm);
4612
4613 { // check whether the collection_size is consistant across all processes
4614 size_t max_collection_size = collection_size;
4616 MPI_Allreduce(
4617 MPI_IN_PLACE, &max_collection_size, 1, YAC_MPI_SIZE_T, MPI_MAX, comm),
4618 comm);
4619 YAC_ASSERT(
4620 (size_t)max_collection_size == collection_size,
4621 "ERROR(yac_interp_weights_get_interpolation): "
4622 "mismatching collection sizes")
4623 }
4624
4625 if (global_stencil_counts[FIXED] > 0)
4627 weights->comm, local_stencil_counts[FIXED],
4628 weights->stencils + stencils_offsets[FIXED],
4629 (void*)interp, interp_add_fixed);
4630
4631 if (global_stencil_counts[DIRECT] > 0)
4633 weights->comm, local_stencil_counts[DIRECT],
4634 weights->stencils + stencils_offsets[DIRECT], interp, interp_add_direct,
4635 redist_config);
4636
4637 if (global_stencil_counts[SUM] > 0) {
4638
4639 struct interp_weight_stencils_wsum_mf * wsum_stencils =
4641 weights->stencils + stencils_offsets[SUM],
4642 (size_t)(local_stencil_counts[SUM]), SUM);
4644 weights->comm, wsum_stencils, interp, reorder,
4645 interpolation_add_sum, redist_config);
4646 for (size_t i = 0; i < wsum_stencils->count; ++i)
4647 free_remote_point(wsum_stencils->data[i].tgt);
4648 free(wsum_stencils->data);
4649 free(wsum_stencils);
4650 }
4651
4652 if (global_stencil_counts[WEIGHT_SUM] > 0) {
4653
4654 struct interp_weight_stencils_wsum_mf * wsum_stencils =
4656 weights->stencils + stencils_offsets[WEIGHT_SUM],
4657 (size_t)(local_stencil_counts[WEIGHT_SUM]), WEIGHT_SUM);
4659 weights->comm, wsum_stencils, interp, reorder,
4660 interpolation_add_wsum, redist_config);
4661 for (size_t i = 0; i < wsum_stencils->count; ++i)
4662 free_remote_point(wsum_stencils->data[i].tgt);
4663 free(wsum_stencils->data);
4664 free(wsum_stencils);
4665 }
4666
4667 if (global_stencil_counts[DIRECT_MF] > 0)
4669 weights->comm, local_stencil_counts[DIRECT_MF],
4670 weights->stencils + stencils_offsets[DIRECT_MF],
4671 interp, interp_add_direct_mf, redist_config);
4672
4673 if (global_stencil_counts[SUM_MF] > 0) {
4674
4675 struct interp_weight_stencils_wsum_mf * sum_mf_stencils =
4677 weights->stencils + stencils_offsets[SUM_MF],
4678 (size_t)(local_stencil_counts[SUM_MF]), SUM_MF);
4680 weights->comm, sum_mf_stencils, interp, reorder,
4681 interpolation_add_sum_mf, redist_config);
4682 for (size_t i = 0; i < sum_mf_stencils->count; ++i)
4683 free_remote_point(sum_mf_stencils->data[i].tgt);
4684 free(sum_mf_stencils->data);
4685 free(sum_mf_stencils);
4686 }
4687
4688 if (global_stencil_counts[WEIGHT_SUM_MF] > 0) {
4689
4690 struct interp_weight_stencils_wsum_mf * wsum_mf_stencils =
4692 weights->stencils + stencils_offsets[WEIGHT_SUM_MF],
4693 (size_t)(local_stencil_counts[WEIGHT_SUM_MF]), WEIGHT_SUM_MF);
4695 weights->comm, wsum_mf_stencils, interp, reorder,
4696 interpolation_add_wsum_mf, redist_config);
4697 for (size_t i = 0; i < wsum_mf_stencils->count; ++i)
4698 free_remote_point(wsum_mf_stencils->data[i].tgt);
4699 free(wsum_mf_stencils->data);
4700 free(wsum_mf_stencils);
4701 }
4702
4703 xt_config_delete(redist_config);
4704
4705 return interp;
4706}
4707
4709 struct yac_src_field_exchange_data * src_field_exchange_data,
4710 size_t num_src_fields, MPI_Comm comm,
4711 Xt_config redist_config) {
4712
4713 int comm_size;
4714 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4715
4716 size_t nsends[num_src_fields], nrecvs[num_src_fields];
4717 int max_recv_buffer_size = 0;
4718 for (size_t i = 0; i < num_src_fields; ++i) {
4719 nsends[i] = src_field_exchange_data[i].send.num_msg;
4720 nrecvs[i] = src_field_exchange_data[i].recv.num_msg;
4721 for (size_t j = 0; j < src_field_exchange_data[i].recv.num_msg; ++j) {
4722 if (max_recv_buffer_size < src_field_exchange_data[i].recv.msg[j].count)
4723 max_recv_buffer_size = src_field_exchange_data[i].recv.msg[j].count;
4724 }
4725 }
4726
4727 size_t nsend = 0, nrecv = 0;
4728 size_t send_offsets[num_src_fields];
4729 size_t recv_offsets[num_src_fields];
4730 for (size_t i = 0; i < num_src_fields; ++i) {
4731 send_offsets[i] = nsend;
4732 recv_offsets[i] = nrecv;
4733 nsend += nsends[i];
4734 nrecv += nrecvs[i];
4735 }
4736
4737 size_t total_num_msg = nsend + nrecv;
4738
4739 struct Xt_redist_msg * msgs_buffer =
4740 xmalloc(total_num_msg * sizeof(*msgs_buffer));
4741 struct Xt_redist_msg * send_msgs = msgs_buffer;
4742 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
4743
4744 int * pos_buffer =
4745 xmalloc((size_t)max_recv_buffer_size * sizeof(*pos_buffer));
4746
4747 // generate yaxt send and receive messages
4748 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
4749 ++src_field_idx) {
4750
4751 for (size_t send_idx = 0;
4752 send_idx < src_field_exchange_data[src_field_idx].send.num_msg;
4753 ++send_idx) {
4754 send_msgs[send_offsets[src_field_idx]].rank =
4755 src_field_exchange_data[src_field_idx].send.msg[send_idx].rank;
4756 send_msgs[send_offsets[src_field_idx]].datatype =
4757 xt_mpi_generate_datatype(
4758 src_field_exchange_data[src_field_idx].send.msg[send_idx].pos,
4759 src_field_exchange_data[src_field_idx].send.msg[send_idx].count,
4760 MPI_DOUBLE, comm);
4761 send_offsets[src_field_idx]++;
4762 }
4763
4764 for (size_t recv_idx = 0;
4765 recv_idx < src_field_exchange_data[src_field_idx].recv.num_msg;
4766 ++recv_idx) {
4767 int count = 0;
4768 for (int i = 0;
4769 i < src_field_exchange_data[src_field_idx].recv.msg[recv_idx].count;
4770 ++i, ++count)
4771 pos_buffer[count] =
4772 src_field_exchange_data[src_field_idx].recv.msg[recv_idx].pos[i];
4773
4774 recv_msgs[recv_offsets[src_field_idx]].rank =
4775 src_field_exchange_data[src_field_idx].recv.msg[recv_idx].rank;
4776 recv_msgs[recv_offsets[src_field_idx]].datatype =
4777 xt_mpi_generate_datatype(pos_buffer, count, MPI_DOUBLE, comm);
4778 recv_offsets[src_field_idx]++;
4779 }
4780 }
4781
4782 free(pos_buffer);
4783
4784 Xt_redist * redists;
4785 MPI_Comm split_comm;
4786
4787 // only processes that have to sent/receive data are included in the redist,
4788 // the others receive a dummy redist
4789 if (total_num_msg > 0) {
4790
4791 // generate MPI communicator containing all ranks taking part in the
4792 // exchange
4793 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
4794
4795 int * rank_buffer =
4796 xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
4797 int * orig_ranks = rank_buffer;
4798 int * split_ranks = rank_buffer + total_num_msg;
4799
4800 for (size_t i = 0; i < total_num_msg; ++i)
4801 orig_ranks[i] = msgs_buffer[i].rank;
4802
4803 MPI_Group orig_group, split_group;
4804 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
4805 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
4806
4807 // translate the ranks in the sent/receive messages to ones in the
4808 // split comm
4810 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
4811 split_group, split_ranks), split_comm);
4812
4813 for (size_t i = 0; i < total_num_msg; ++i)
4814 msgs_buffer[i].rank = split_ranks[i];
4815
4816 free(rank_buffer);
4817
4818 yac_mpi_call(MPI_Group_free(&split_group), comm);
4819 yac_mpi_call(MPI_Group_free(&orig_group), comm);
4820
4821 // generate redists
4822 redists = xmalloc(num_src_fields * sizeof(*redists));
4823 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
4824 ++src_field_idx) {
4825 redists[src_field_idx] =
4826 xt_redist_single_array_base_custom_new(
4827 nsends[src_field_idx], nrecvs[src_field_idx],
4828 send_msgs, recv_msgs, split_comm, redist_config);
4829 send_msgs += nsends[src_field_idx];
4830 recv_msgs += nrecvs[src_field_idx];
4831 }
4832
4833 } else {
4834 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
4835 redists = NULL;
4836 }
4837
4838 yac_mpi_call(MPI_Comm_free(&split_comm), comm);
4839 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
4840
4841 return redists;
4842}
4843
4845 double frac_mask_fallback_value, double scaling_factor,
4846 double scaling_summand,
4847 struct yac_interp_weights_data * interp_weights_data) {
4848
4849 interp_weights_data->frac_mask_fallback_value = frac_mask_fallback_value;
4850 interp_weights_data->scaling_factor = scaling_factor;
4851 interp_weights_data->scaling_summand = scaling_summand;
4852
4853 interp_weights_data->num_fixed_values = 0;
4854 interp_weights_data->fixed_values = NULL;
4855 interp_weights_data->num_tgt_per_fixed_value = NULL;
4856 interp_weights_data->tgt_idx_fixed = NULL;
4857
4858 interp_weights_data->num_wgt_tgt = 0;
4859 interp_weights_data->wgt_tgt_idx = NULL;
4860 interp_weights_data->num_src_per_tgt = NULL;
4861 interp_weights_data->weights = NULL;
4862 interp_weights_data->src_field_idx = NULL;
4863 interp_weights_data->src_idx = NULL;
4864 interp_weights_data->num_src_fields = 0;
4865 interp_weights_data->src_field_buffer_size = NULL;
4866}
4867
4869 struct yac_src_field_exchange_data * src_field_exchange_data,
4870 size_t num_src_fields) {
4871
4872 for (size_t i = 0; i < num_src_fields; ++i) {
4873 for (size_t j = 0; j < src_field_exchange_data[i].send.num_msg; ++j)
4874 free(src_field_exchange_data[i].send.msg[j].pos);
4875 free(src_field_exchange_data[i].send.msg);
4876 for (size_t j = 0; j < src_field_exchange_data[i].recv.num_msg; ++j)
4877 free(src_field_exchange_data[i].recv.msg[j].pos);
4878 free(src_field_exchange_data[i].recv.msg);
4879 }
4880
4881 free(src_field_exchange_data);
4882}
4883
4885 struct remote_point_info_reorder * remote_src_points, size_t num_src_points,
4886 size_t num_src_fields, size_t tgt_count,
4887 struct interp_weight_stencil_wsum_mf * tgt_stencils, size_t * num_src_per_tgt,
4888 double * weights, size_t * src_idx, size_t * src_field_idx, MPI_Comm comm,
4889 struct yac_interpolation_raw * interp_raw, Xt_config redist_config) {
4890
4891 UNUSED(redist_config);
4892
4893 int comm_size;
4894 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4895
4896 yac_src_field_exchange_data_realloc(interp_raw, num_src_fields);
4897
4898 // generate exchange information from remote_src_points
4899
4900 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
4902 num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4903 size_t * size_t_buffer =
4904 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
4905 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
4906 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
4907 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
4908 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
4909
4910 for (size_t i = 0; i < num_src_points; ++i)
4911 sendcounts[remote_src_points[i].data.rank * num_src_fields +
4912 remote_src_points[i].field_idx]++;
4913
4915 num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
4916
4917 size_t total_sendcount = 0, total_recvcount = 0;
4918 for (int i = 0; i < comm_size; ++i) {
4919 total_sdispls[i] = total_sendcount;
4920 total_rdispls[i] = total_recvcount;
4921 total_sendcounts[i] = 0;
4922 total_recvcounts[i] = 0;
4923 for (size_t j = 0; j < num_src_fields; ++j) {
4924 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
4925 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
4926 }
4927 total_sendcount += total_sendcounts[i];
4928 total_recvcount += total_recvcounts[i];
4929 }
4930
4931 size_t recv_count = total_recvcounts[comm_size - 1] +
4932 total_rdispls[comm_size - 1];
4933
4934 size_t * exchange_buffer =
4935 xmalloc((2 * num_src_points + recv_count) * sizeof(*exchange_buffer));
4936 size_t * send_buffer = exchange_buffer;
4937 size_t * reorder_idx = exchange_buffer + num_src_points;
4938 size_t * recv_buffer = exchange_buffer + 2 * num_src_points;
4939
4940 // pack the original positions of the requested points
4941 size_t src_field_buffer_size[num_src_fields];
4942 memset(
4943 src_field_buffer_size, 0,
4944 num_src_fields * sizeof(src_field_buffer_size[0]));
4945 for (size_t i = 0; i < num_src_points; ++i) {
4946 size_t curr_src_field_idx = (size_t)(remote_src_points[i].field_idx);
4947 size_t pos = sdispls[(size_t)(remote_src_points[i].data.rank) * num_src_fields +
4948 curr_src_field_idx + 1]++;
4949 send_buffer[pos] = (size_t)remote_src_points[i].data.orig_pos;
4950 reorder_idx[pos] = src_field_buffer_size[curr_src_field_idx]++;
4951 }
4952
4953 // exchange original positions of the requested points
4954 yac_alltoallv_size_t_p2p(
4955 send_buffer, total_sendcounts, total_sdispls,
4956 recv_buffer, total_recvcounts, total_rdispls, comm,
4957 "interpolation_raw_add_w_sum_mf", __LINE__);
4958
4959 free(size_t_buffer);
4960
4962 interp_raw->src_field_exchange_data, num_src_fields, comm,
4963 recvcounts, recv_buffer, sendcounts, reorder_idx,
4965
4966 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4967 free(exchange_buffer);
4968
4969 size_t * tgt_idx = xmalloc(tgt_count * sizeof(*tgt_idx));
4970
4971 for (size_t i = 0; i < tgt_count; ++i) {
4972 YAC_ASSERT(
4973 tgt_stencils[i].tgt.data.count == 1,
4974 "ERROR(interpolation_raw_add_w_sum_mf): currently unsupported "
4975 "target point distribution")
4976 tgt_idx[i] = (size_t)(tgt_stencils[i].tgt.data.data.single.orig_pos);
4977 }
4978
4980 &(interp_raw->interp_weights_data), num_src_fields, tgt_count,
4981 tgt_idx, num_src_per_tgt, weights, src_field_idx, src_idx,
4982 src_field_buffer_size);
4983
4984 free(tgt_idx);
4985}
4986
4988 struct remote_point_info_reorder * src_points, size_t num_src_points,
4989 size_t num_src_fields, size_t tgt_count,
4990 struct interp_weight_stencil_wsum_mf * tgt_stencils, size_t * num_src_per_tgt,
4991 double * weights, size_t * src_idx, size_t * src_field_idx, MPI_Comm comm,
4992 void * interp_raw, Xt_config redist_config) {
4993
4995 src_points, num_src_points, num_src_fields, tgt_count, tgt_stencils,
4996 num_src_per_tgt, weights, src_idx, src_field_idx, comm,
4997 (struct yac_interpolation_raw *)interp_raw, redist_config);
4998}
4999
5001 struct remote_point_info_reorder * src_points, size_t num_src_points,
5002 size_t num_src_fields, size_t tgt_count,
5003 struct interp_weight_stencil_wsum_mf * tgt_stencils, size_t * num_src_per_tgt,
5004 double * weights, size_t * src_idx, size_t * src_field_idx, MPI_Comm comm,
5005 void * interp_raw, Xt_config redist_config) {
5006
5007 UNUSED(weights);
5008
5010 src_points, num_src_points, num_src_fields, tgt_count, tgt_stencils,
5011 num_src_per_tgt, NULL, src_idx, src_field_idx, comm,
5012 (struct yac_interpolation_raw *)interp_raw, redist_config);
5013}
5014
5016 struct remote_point_info_reorder * src_points, size_t num_src_points,
5017 size_t num_src_fields, size_t tgt_count,
5018 struct interp_weight_stencil_wsum_mf * tgt_stencils, size_t * num_src_per_tgt,
5019 double * weights, size_t * src_idx, size_t * src_field_idx, MPI_Comm comm,
5020 void * interp_raw, Xt_config redist_config) {
5021
5022 UNUSED(src_field_idx);
5023
5025 src_points, num_src_points, num_src_fields, tgt_count, tgt_stencils,
5026 num_src_per_tgt, weights, src_idx, NULL, comm,
5027 (struct yac_interpolation_raw *)interp_raw, redist_config);
5028}
5029
5031 struct remote_point_info_reorder * src_points, size_t num_src_points,
5032 size_t num_src_fields, size_t tgt_count,
5033 struct interp_weight_stencil_wsum_mf * tgt_stencils, size_t * num_src_per_tgt,
5034 double * weights, size_t * src_idx, size_t * src_field_idx, MPI_Comm comm,
5035 void * interp_raw, Xt_config redist_config) {
5036
5037 UNUSED(weights);
5038 UNUSED(src_field_idx);
5039
5041 src_points, num_src_points, num_src_fields, tgt_count, tgt_stencils,
5042 num_src_per_tgt, NULL, src_idx, NULL, comm,
5043 (struct yac_interpolation_raw *)interp_raw, redist_config);
5044}
5045
5047 struct yac_interp_weights * weights,
5048 size_t collection_size, double frac_mask_fallback_value,
5049 double scaling_factor, double scaling_summand,
5050 char const * yaxt_exchanger_name,
5051 struct yac_interpolation_exchange ** interpolation_exchange,
5052 struct yac_interp_weights_data * interp_weights_data) {
5053
5054 struct yac_interpolation_raw interpolation_raw;
5055
5056 interpolation_raw.src_field_exchange_data = NULL;
5058 frac_mask_fallback_value, scaling_factor, scaling_summand,
5059 &interpolation_raw.interp_weights_data);
5060
5061 MPI_Comm comm = weights->comm;
5062
5063 Xt_config redist_config = get_redist_config(yaxt_exchanger_name, comm);
5064
5065 // sort stencils by type
5066 qsort(weights->stencils, weights->stencils_size, sizeof(*(weights->stencils)),
5068
5069 size_t local_stencil_counts[WEIGHT_STENCIL_TYPE_SIZE];
5070 size_t stencils_offsets[WEIGHT_STENCIL_TYPE_SIZE];
5071
5072 // count local number of stencils per type
5073 memset(&(local_stencil_counts[0]), 0, sizeof(local_stencil_counts));
5074 for (size_t i = 0; i < weights->stencils_size; ++i)
5075 local_stencil_counts[(int)(weights->stencils[i].type)]++;
5076
5077 for (size_t i = 0, accu = 0; i < (size_t)WEIGHT_STENCIL_TYPE_SIZE; ++i) {
5078 stencils_offsets[i] = accu;
5079 accu += local_stencil_counts[i];
5080 }
5081
5082 size_t global_stencil_counts[WEIGHT_STENCIL_TYPE_SIZE];
5083
5084 // determine global number of stencils per type
5086 MPI_Allreduce(
5087 local_stencil_counts, global_stencil_counts,
5088 (int)WEIGHT_STENCIL_TYPE_SIZE, YAC_MPI_SIZE_T, MPI_SUM, comm), comm);
5089
5090 { // check whether the collection_size is consistant across all processes
5091 size_t max_collection_size = collection_size;
5093 MPI_Allreduce(
5094 MPI_IN_PLACE, &max_collection_size, 1, YAC_MPI_SIZE_T, MPI_MAX, comm),
5095 comm);
5096 YAC_ASSERT(
5097 (size_t)max_collection_size == collection_size,
5098 "ERROR(yac_interp_weights_get_interpolation_raw): "
5099 "mismatching collection sizes")
5100 }
5101
5102 if (global_stencil_counts[FIXED] > 0)
5104 weights->comm, local_stencil_counts[FIXED],
5105 weights->stencils + stencils_offsets[FIXED],
5106 (void*)&interpolation_raw, interp_raw_add_fixed);
5107
5108 if (global_stencil_counts[DIRECT] > 0)
5110 weights->comm, local_stencil_counts[DIRECT],
5111 weights->stencils + stencils_offsets[DIRECT],
5112 (void*)&interpolation_raw, interp_add_direct_raw,
5113 redist_config);
5114
5115 if (global_stencil_counts[SUM] > 0) {
5116
5117 struct interp_weight_stencils_wsum_mf * wsum_stencils =
5119 weights->stencils + stencils_offsets[SUM],
5120 (size_t)(local_stencil_counts[SUM]), SUM);
5122 weights->comm, wsum_stencils, (void*)&interpolation_raw,
5123 interpolation_raw_add_sum, redist_config);
5124 for (size_t i = 0; i < wsum_stencils->count; ++i)
5125 free_remote_point(wsum_stencils->data[i].tgt);
5126 free(wsum_stencils->data);
5127 free(wsum_stencils);
5128 }
5129
5130 if (global_stencil_counts[WEIGHT_SUM] > 0) {
5131
5132 struct interp_weight_stencils_wsum_mf * wsum_stencils =
5134 weights->stencils + stencils_offsets[WEIGHT_SUM],
5135 (size_t)(local_stencil_counts[WEIGHT_SUM]), WEIGHT_SUM);
5137 weights->comm, wsum_stencils, (void*)&interpolation_raw,
5138 interpolation_raw_add_wsum, redist_config);
5139 for (size_t i = 0; i < wsum_stencils->count; ++i)
5140 free_remote_point(wsum_stencils->data[i].tgt);
5141 free(wsum_stencils->data);
5142 free(wsum_stencils);
5143 }
5144
5145 if (global_stencil_counts[DIRECT_MF] > 0)
5147 weights->comm, local_stencil_counts[DIRECT_MF],
5148 weights->stencils + stencils_offsets[DIRECT_MF],
5149 (void*)&interpolation_raw, interp_add_direct_mf_raw, redist_config);
5150
5151 if (global_stencil_counts[SUM_MF] > 0) {
5152
5153 struct interp_weight_stencils_wsum_mf * sum_mf_stencils =
5155 weights->stencils + stencils_offsets[SUM_MF],
5156 (size_t)(local_stencil_counts[SUM_MF]), SUM_MF);
5158 weights->comm, sum_mf_stencils, (void*)&interpolation_raw,
5159 interpolation_raw_add_sum_mf, redist_config);
5160 for (size_t i = 0; i < sum_mf_stencils->count; ++i)
5161 free_remote_point(sum_mf_stencils->data[i].tgt);
5162 free(sum_mf_stencils->data);
5163 free(sum_mf_stencils);
5164 }
5165
5166 if (global_stencil_counts[WEIGHT_SUM_MF] > 0) {
5167
5168 struct interp_weight_stencils_wsum_mf * wsum_mf_stencils =
5170 weights->stencils + stencils_offsets[WEIGHT_SUM_MF],
5171 (size_t)(local_stencil_counts[WEIGHT_SUM_MF]), WEIGHT_SUM_MF);
5173 weights->comm, wsum_mf_stencils, (void*)&interpolation_raw,
5174 interpolation_raw_add_wsum_mf, redist_config);
5175 for (size_t i = 0; i < wsum_mf_stencils->count; ++i)
5176 free_remote_point(wsum_mf_stencils->data[i].tgt);
5177 free(wsum_mf_stencils->data);
5178 free(wsum_mf_stencils);
5179 }
5180
5181 Xt_redist * redists =
5183 interpolation_raw.src_field_exchange_data,
5184 interpolation_raw.interp_weights_data.num_src_fields, comm,
5185 redist_config);
5186 int with_frac_mask = YAC_FRAC_MASK_VALUE_IS_VALID(frac_mask_fallback_value);
5187 *interpolation_exchange =
5189 redists, interpolation_raw.interp_weights_data.num_src_fields,
5190 collection_size, with_frac_mask,
5191 "yac_interp_weights_get_interpolation_raw");
5192 *interp_weights_data = interpolation_raw.interp_weights_data;
5193
5194 if (redists != NULL) {
5195 for (size_t i = 0; i < interpolation_raw.interp_weights_data.num_src_fields;
5196 ++i)
5197 xt_redist_delete(redists[i]);
5198 free(redists);
5199 }
5200
5202 interpolation_raw.src_field_exchange_data,
5203 interpolation_raw.interp_weights_data.num_src_fields);
5204
5205 xt_config_delete(redist_config);
5206}
5207
5209 struct yac_interp_weights * weights, int reorder,
5211 double scaling_factor, double scaling_summand,
5212 char const * yaxt_exchanger_name) {
5213
5214 YAC_ASSERT(
5215 (reorder == YAC_MAPPING_ON_SRC) ||
5216 (reorder == YAC_MAPPING_ON_TGT),
5217 "ERROR(yac_interp_weights_get_interpolation_f2c): "
5218 "reorder type must be of YAC_MAPPING_ON_SRC/YAC_MAPPING_ON_TGT");
5219
5220 return
5222 weights, (enum yac_interp_weights_reorder_type)reorder,
5224 scaling_factor, scaling_summand,
5225 ((yaxt_exchanger_name != NULL) && (yaxt_exchanger_name[0] != '\0'))?
5226 yaxt_exchanger_name:NULL);
5227}
5228
5230
5231 free(points->data);
5232 free(points);
5233}
5234
5236 struct interp_weight_stencil * stencils, size_t count) {
5237
5238 for (size_t i = 0 ; i < count; ++i) {
5239
5240 YAC_ASSERT(
5241 (stencils[i].type == DIRECT) ||
5242 (stencils[i].type == SUM) ||
5243 (stencils[i].type == WEIGHT_SUM) ||
5244 (stencils[i].type == DIRECT_MF) ||
5245 (stencils[i].type == SUM_MF) ||
5246 (stencils[i].type == WEIGHT_SUM_MF) ||
5247 (stencils[i].type == FIXED),
5248 "ERROR(yac_interp_weights_delete): invalid stencil type")
5249 switch(stencils[i].type) {
5250
5251 case(DIRECT):
5252 free_remote_point(stencils[i].data.direct.src);
5253 break;
5254 case(SUM):
5255 free_remote_points(stencils[i].data.sum.srcs);
5256 break;
5257 case(WEIGHT_SUM):
5258 free_remote_points(stencils[i].data.weight_sum.srcs);
5259 free(stencils[i].data.weight_sum.weights);
5260 break;
5261 case(DIRECT_MF):
5262 free_remote_point(stencils[i].data.direct_mf.src);
5263 break;
5264 case(SUM_MF):
5265 free_remote_points(stencils[i].data.sum_mf.srcs);
5266 free(stencils[i].data.sum_mf.field_indices);
5267 break;
5268 case (WEIGHT_SUM_MF):
5269 free_remote_points(stencils[i].data.weight_sum_mf.srcs);
5270 free(stencils[i].data.weight_sum_mf.weights);
5271 free(stencils[i].data.weight_sum_mf.field_indices);
5272 break;
5273 default:
5274 case(FIXED):
5275 break;
5276 };
5277 free_remote_point(stencils[i].tgt);
5278 }
5279 free(stencils);
5280}
5281
5282#ifdef YAC_NETCDF_ENABLED
5283static int compare_double(void const * a, void const * b) {
5284
5285 return (*(double const *)a > *(double const *)b) -
5286 (*(double const *)a < *(double const *)b);
5287}
5288
5293 char const * filename, char const * src_grid_name, char const * tgt_grid_name,
5294 size_t num_fixed_values, double * fixed_values,
5295 size_t * num_tgt_per_fixed_value, size_t num_links,
5296 size_t num_weights_per_link, size_t num_src_fields,
5297 size_t * num_links_per_src_field,
5298 enum yac_location * src_locations, enum yac_location tgt_location,
5299 size_t src_grid_size, size_t tgt_grid_size) {
5300
5301 int ncid;
5302
5303 // create file
5304 yac_nc_create(filename, NC_CLOBBER | NC_64BIT_OFFSET, &ncid);
5305
5306 int dim_weight_id[8];
5307
5308 // define dimensions
5309 if (num_links > 0) {
5310 YAC_HANDLE_ERROR(nc_def_dim(ncid, "num_links", num_links, &dim_weight_id[0]));
5312 num_weights_per_link > 0,
5313 "ERROR(create_weight_file): number of links is %zu but number of "
5314 "weights per link is zero for weight file %s", num_links, filename)
5316 nc_def_dim(ncid, "num_wgts", num_weights_per_link, &dim_weight_id[1]));
5317 }
5319 num_src_fields > 0,
5320 "ERROR(create_weight_file): number of source fields is zero for "
5321 "weight file %s", filename)
5323 nc_def_dim(ncid, "num_src_fields", num_src_fields, &dim_weight_id[2]));
5325 nc_def_dim(
5326 ncid, "max_loc_str_len", YAC_MAX_LOC_STR_LEN, &dim_weight_id[3]));
5327
5328 if (num_fixed_values > 0) {
5330 nc_def_dim(
5331 ncid, "num_fixed_values", num_fixed_values, &dim_weight_id[4]));
5332 size_t num_fixed_dst = 0;
5333 for (size_t i = 0; i < num_fixed_values; ++i)
5334 num_fixed_dst += num_tgt_per_fixed_value[i];
5336 num_fixed_dst > 0,
5337 "ERROR(create_weight_file): number of fixed values is %zu but number "
5338 "of fixed destination points is zero for weight file %s",
5339 num_fixed_dst, filename)
5341 nc_def_dim(ncid, "num_fixed_dst", num_fixed_dst, &dim_weight_id[5]));
5342 }
5343
5344 if (src_grid_size > 0)
5346 nc_def_dim(ncid, "src_grid_size", src_grid_size, &dim_weight_id[6]));
5347
5348 if (tgt_grid_size > 0)
5350 nc_def_dim(ncid, "dst_grid_size", tgt_grid_size, &dim_weight_id[7]));
5351
5352 int var_src_add_id, var_dst_add_id, var_weight_id, var_num_links_id,
5353 src_var_locs_id, tgt_var_loc_id, var_fixed_values_id,
5354 var_num_dst_per_fixed_value_id, var_dst_add_fixed_id;
5355
5356 // define variables
5357 if (num_links > 0) {
5359 nc_def_var(
5360 ncid, "src_address", NC_INT, 1, dim_weight_id, &var_src_add_id));
5362 nc_def_var(
5363 ncid, "dst_address", NC_INT, 1, dim_weight_id, &var_dst_add_id));
5365 nc_def_var(
5366 ncid, "remap_matrix", NC_DOUBLE, 2, dim_weight_id, &var_weight_id));
5368 nc_def_var(ncid, "num_links_per_src_field", NC_INT, 1,
5369 &dim_weight_id[2], &var_num_links_id));
5370 }
5372 nc_def_var(
5373 ncid, "src_locations", NC_CHAR, 2, &dim_weight_id[2], &src_var_locs_id));
5375 nc_def_var(
5376 ncid, "dst_location", NC_CHAR, 1, &dim_weight_id[3], &tgt_var_loc_id));
5377 if (num_fixed_values > 0) {
5379 nc_def_var(ncid, "fixed_values", NC_DOUBLE, 1, &dim_weight_id[4],
5380 &var_fixed_values_id));
5382 nc_def_var(ncid, "num_dst_per_fixed_value", NC_INT, 1, &dim_weight_id[4],
5383 &var_num_dst_per_fixed_value_id));
5385 nc_def_var(ncid, "dst_address_fixed", NC_INT, 1, &dim_weight_id[5],
5386 &var_dst_add_fixed_id));
5387 }
5388
5389 // put attributes
5391 nc_put_att_text(ncid, NC_GLOBAL, "version",
5395 nc_put_att_text(ncid, NC_GLOBAL, "src_grid_name",
5396 strlen(src_grid_name), src_grid_name));
5398 nc_put_att_text(ncid, NC_GLOBAL, "dst_grid_name",
5399 strlen(tgt_grid_name), tgt_grid_name));
5400 {
5401 char const * str_logical[2] = {"FALSE", "TRUE"};
5402 YAC_HANDLE_ERROR(nc_put_att_text(ncid, NC_GLOBAL, "contains_links",
5403 strlen(str_logical[num_links > 0]),
5404 str_logical[num_links > 0]));
5405 YAC_HANDLE_ERROR(nc_put_att_text(ncid, NC_GLOBAL, "contains_fixed_dst",
5406 strlen(str_logical[num_fixed_values > 0]),
5407 str_logical[num_fixed_values > 0]));
5408 }
5409
5410 // end definition
5411 YAC_HANDLE_ERROR(nc_enddef(ncid));
5412
5413 // write some basic data
5414
5415 if (num_links > 0) {
5416 int * num_links_per_src_field_int =
5417 xmalloc(num_src_fields * sizeof(*num_links_per_src_field_int));
5418 for (size_t i = 0; i < num_src_fields; ++i) {
5419 YAC_ASSERT(
5420 num_links_per_src_field[i] <= INT_MAX,
5421 "ERROR(create_weight_file): "
5422 "number of links per source field too big (not yet supported)")
5423 num_links_per_src_field_int[i] = (int)num_links_per_src_field[i];
5424 }
5426 nc_put_var_int(ncid, var_num_links_id, num_links_per_src_field_int));
5427 free(num_links_per_src_field_int);
5428 }
5429
5430 for (size_t i = 0; i < num_src_fields; ++i) {
5431 char const * loc_str = yac_loc2str(src_locations[i]);
5432 size_t str_start[2] = {i, 0};
5433 size_t str_count[2] = {1, strlen(loc_str)};
5435 nc_put_vara_text(ncid, src_var_locs_id, str_start, str_count, loc_str));
5436 }
5437
5438 {
5439 char const * loc_str = yac_loc2str(tgt_location);
5440 size_t str_start[1] = {0};
5441 size_t str_count[1] = {strlen(loc_str)};
5443 nc_put_vara_text(ncid, tgt_var_loc_id, str_start, str_count, loc_str));
5444 }
5445 if (num_fixed_values > 0) {
5446
5447 int * num_tgt_per_fixed_value_int =
5448 xmalloc(num_fixed_values * sizeof(*num_tgt_per_fixed_value_int));
5449 for (unsigned i = 0; i < num_fixed_values; ++i) {
5450 YAC_ASSERT(
5451 num_tgt_per_fixed_value[i] <= INT_MAX,
5452 "ERROR(create_weight_file): "
5453 "number of targets per fixed value is too big (not yet supported)")
5454 num_tgt_per_fixed_value_int[i] = (int)num_tgt_per_fixed_value[i];
5455 }
5456 YAC_HANDLE_ERROR(nc_put_var_double(ncid, var_fixed_values_id, fixed_values));
5457 YAC_HANDLE_ERROR(nc_put_var_int(ncid, var_num_dst_per_fixed_value_id,
5458 num_tgt_per_fixed_value_int));
5459 free(num_tgt_per_fixed_value_int);
5460 }
5461
5462 // close file
5463 YAC_HANDLE_ERROR(nc_close(ncid));
5464}
5465
5466static int compare_interp_weight_stencil(const void * a, const void * b) {
5467
5468 int a_is_fixed = (((struct interp_weight_stencil *)a)->type == FIXED);
5469 int b_is_fixed = (((struct interp_weight_stencil *)b)->type == FIXED);
5470 int ret = b_is_fixed - a_is_fixed;
5471
5472 if (ret) return ret;
5473
5474 // if both are fixed stencils
5475 if (a_is_fixed) {
5476
5477 double fixed_value_a =
5478 ((struct interp_weight_stencil *)a)->data.fixed.value;
5479 double fixed_value_b =
5480 ((struct interp_weight_stencil *)b)->data.fixed.value;
5481 ret = (fixed_value_a > fixed_value_b) -
5482 (fixed_value_a < fixed_value_b);
5483 if (ret) return ret;
5484 }
5485
5486 return (((struct interp_weight_stencil *)a)->tgt.global_id >
5487 ((struct interp_weight_stencil *)b)->tgt.global_id) -
5488 (((struct interp_weight_stencil *)a)->tgt.global_id <
5489 ((struct interp_weight_stencil *)b)->tgt.global_id);
5490}
5491
5493 struct interp_weight_stencil * stencils, size_t stencils_size,
5494 yac_int * min_tgt_global_id, yac_int * max_tgt_global_id, MPI_Comm comm) {
5495
5496 yac_int min_max[2] = {XT_INT_MAX, XT_INT_MIN};
5497
5498 for (size_t i = 0; i < stencils_size; ++i) {
5499
5500 yac_int curr_id = stencils[i].tgt.global_id;
5501 if (curr_id < min_max[0]) min_max[0] = curr_id;
5502 if (curr_id > min_max[1]) min_max[1] = curr_id;
5503 }
5504
5505 min_max[0] = XT_INT_MAX - min_max[0];
5506
5508 MPI_Allreduce(
5509 MPI_IN_PLACE, min_max, 2, yac_int_dt, MPI_MAX, comm), comm);
5510
5511 *min_tgt_global_id = XT_INT_MAX - min_max[0];
5512 *max_tgt_global_id = min_max[1];
5513}
5514
5516 struct interp_weight_stencil * stencils, size_t stencils_size,
5517 yac_int min_tgt_global_id, yac_int max_tgt_global_id,
5518 int num_io_procs_int, int * io_owner) {
5519
5520 long long num_io_procs = (long long)num_io_procs_int;
5521 long long id_range =
5522 MAX((long long)(max_tgt_global_id - min_tgt_global_id),1);
5523
5524 for (size_t i = 0; i < stencils_size; ++i)
5525 io_owner[i] =
5526 ((int)(MIN(((long long)(stencils[i].tgt.global_id - min_tgt_global_id) *
5527 num_io_procs) / id_range, num_io_procs - 1)));
5528}
5529
5531 struct interp_weight_stencil * stencils, size_t stencil_count,
5532 double ** fixed_values, size_t * num_fixed_values, MPI_Comm comm) {
5533
5534 int comm_size;
5535 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
5536
5537 double * local_fixed_values =
5538 xmalloc(stencil_count * sizeof(*local_fixed_values));
5539
5540 int * int_buffer = xmalloc(2 * (size_t)comm_size * sizeof(*int_buffer));
5541 int * recvcounts = int_buffer + 0 * comm_size;
5542 int * rdispls = int_buffer + 1 * comm_size;
5543
5544 size_t local_num_fixed = 0;
5545
5546 // get all local fixed values
5547 for (size_t i = 0; i < stencil_count;
5548 ++i, ++local_num_fixed) {
5549 if (stencils[i].type != FIXED) break;
5550 local_fixed_values[i] = stencils[i].data.fixed.value;
5551 }
5552 qsort(local_fixed_values, local_num_fixed, sizeof(*local_fixed_values),
5554 yac_remove_duplicates_double(local_fixed_values, &local_num_fixed);
5555
5556 // get number of fixed values per rank
5557 int local_num_fixed_int = (int)(local_num_fixed);
5559 MPI_Allgather(
5560 &local_num_fixed_int, 1, MPI_INT, recvcounts, 1,MPI_INT, comm), comm);
5561 for (int i = 0, accu = 0; i < comm_size; ++i) {
5562 rdispls[i] = accu;
5563 accu += recvcounts[i];
5564 }
5565
5566 size_t num_all_fixed_values = 0;
5567 for (int i = 0; i < comm_size; ++i)
5568 num_all_fixed_values += (size_t)(recvcounts[i]);
5569
5570 double * all_fixed_values =
5571 xmalloc(num_all_fixed_values * sizeof(*all_fixed_values));
5572
5573 // gather all fixed values
5575 MPI_Allgatherv(
5576 local_fixed_values, local_num_fixed_int, MPI_DOUBLE,
5577 all_fixed_values, recvcounts, rdispls, MPI_DOUBLE, comm), comm);
5578 free(int_buffer);
5579 free(local_fixed_values);
5580
5581 qsort(all_fixed_values, num_all_fixed_values, sizeof(*all_fixed_values),
5583 yac_remove_duplicates_double(all_fixed_values, &num_all_fixed_values);
5584 *fixed_values = xrealloc(all_fixed_values,
5585 num_all_fixed_values * sizeof(*all_fixed_values));
5586 *num_fixed_values = num_all_fixed_values;
5587}
5588
5589static size_t get_num_weights_per_link(struct interp_weight_stencil * stencil) {
5590
5591 YAC_ASSERT(
5592 (stencil->type == FIXED) ||
5593 (stencil->type == DIRECT) ||
5594 (stencil->type == SUM) ||
5595 (stencil->type == WEIGHT_SUM) ||
5596 (stencil->type == DIRECT_MF) ||
5597 (stencil->type == SUM_MF) ||
5598 (stencil->type == WEIGHT_SUM_MF),
5599 "ERROR(get_num_weights_per_link): invalid stencil type")
5600
5601 return (stencil->type == FIXED)?0:1;
5602}
5603
5605 struct interp_weight_stencil * stencils, size_t stencil_count,
5606 MPI_Comm comm) {
5607
5608 size_t num_weights_per_link = 0;
5609 for (size_t i = 0; i < stencil_count; ++i)
5610 num_weights_per_link =
5611 MAX(num_weights_per_link, get_num_weights_per_link(stencils + i));
5612
5613 size_t num_weights_per_link_64_t = num_weights_per_link;
5615 MPI_Allreduce(
5616 MPI_IN_PLACE, &num_weights_per_link_64_t, 1, YAC_MPI_SIZE_T,
5617 MPI_MAX, comm), comm);
5618 num_weights_per_link = (size_t)num_weights_per_link_64_t;
5619
5620 return num_weights_per_link;
5621}
5622
5624 struct interp_weight_stencil * stencil, size_t src_field_idx) {;
5625
5626 YAC_ASSERT(
5627 stencil->type != FIXED,
5628 "ERROR(get_num_links_per_src_field): "
5629 "stencil type FIXED not supported by this routine")
5630 YAC_ASSERT(
5631 (stencil->type == DIRECT) ||
5632 (stencil->type == SUM) ||
5633 (stencil->type == WEIGHT_SUM) ||
5634 (stencil->type == DIRECT_MF) ||
5635 (stencil->type == SUM_MF) ||
5636 (stencil->type == WEIGHT_SUM_MF),
5637 "ERROR(get_num_links_per_src_field): invalid stencil type")
5638 switch (stencil->type) {
5639 default:
5640 case(DIRECT): return (src_field_idx == 0)?1:0;
5641 case(SUM): return (src_field_idx == 0)?stencil->data.sum.srcs->count:0;
5642 case(WEIGHT_SUM):
5643 return (src_field_idx == 0)?stencil->data.weight_sum.srcs->count:0;
5644 case(DIRECT_MF): return stencil->data.direct_mf.field_idx == src_field_idx;
5645 case(SUM_MF): {
5646 size_t count = 0;
5647 size_t stencil_size = stencil->data.sum_mf.srcs->count;
5648 size_t * field_indices = stencil->data.sum_mf.field_indices;
5649 for (size_t i = 0; i < stencil_size; ++i)
5650 if (field_indices[i] == src_field_idx) ++count;
5651 return count;
5652 }
5653 case(WEIGHT_SUM_MF): {
5654 size_t count = 0;
5655 size_t stencil_size = stencil->data.weight_sum_mf.srcs->count;
5656 size_t * field_indices = stencil->data.weight_sum_mf.field_indices;
5657 for (size_t i = 0; i < stencil_size; ++i)
5658 if (field_indices[i] == src_field_idx) ++count;
5659 return count;
5660 }
5661 };
5662}
5663
5665 struct interp_weight_stencil * stencils, size_t stencil_count,
5666 size_t num_fixed_values, double * fixed_values,
5667 size_t * num_tgt_per_fixed_value,
5668 size_t * num_fixed_tgt, size_t num_src_fields,
5669 size_t * num_links_per_src_field, size_t * num_links) {
5670
5671 *num_fixed_tgt = 0;
5672 *num_links = 0;
5673 for (size_t i = 0; i < num_fixed_values; ++i) num_tgt_per_fixed_value[i] = 0;
5674 for (size_t i = 0; i < num_src_fields; ++i) num_links_per_src_field[i] = 0;
5675
5676 for (size_t i = 0; i < stencil_count; ++i) {
5677 if (stencils[i].type == FIXED) {
5678 double curr_fixed_value = stencils[i].data.fixed.value;
5679 for (size_t j = 0; j < num_fixed_values; ++j) {
5680 if (curr_fixed_value == fixed_values[j]) {
5681 num_tgt_per_fixed_value[j]++;
5682 break;
5683 }
5684 }
5685 ++*num_fixed_tgt;
5686 } else {
5687 for (size_t j = 0; j < num_src_fields; ++j) {
5688 num_links_per_src_field[j] +=
5689 get_num_links_per_src_field(stencils + i, j);
5690 }
5691 }
5692 }
5693 for (size_t i = 0; i < num_src_fields; ++i)
5694 *num_links += num_links_per_src_field[i];
5695}
5696
5698 size_t num_fixed_values, size_t * num_tgt_per_fixed_value,
5699 size_t num_src_fields, size_t * num_links_per_src_field,
5700 size_t * fixed_offsets, size_t * link_offsets, MPI_Comm comm) {
5701
5702 int comm_rank;
5703 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
5704
5705 size_t count = num_fixed_values + num_src_fields;
5706 size_t * size_t_buffer = xmalloc(3 * count * sizeof(*size_t_buffer));
5707 size_t * global_counts = size_t_buffer + 0 * count;
5708 size_t * local_counts = size_t_buffer + 1 * count;
5709 size_t * offsets = size_t_buffer + 2 * count;
5710
5711 for (size_t i = 0; i < num_fixed_values; ++i)
5712 local_counts[i] = num_tgt_per_fixed_value[i];
5713 for (size_t i = 0; i < num_src_fields; ++i)
5714 local_counts[num_fixed_values + i] = num_links_per_src_field[i];
5715
5717 MPI_Allreduce(local_counts, global_counts, (int)count, YAC_MPI_SIZE_T,
5718 MPI_SUM, comm), comm);
5720 MPI_Exscan(local_counts, offsets, (int)count, YAC_MPI_SIZE_T, MPI_SUM, comm),
5721 comm);
5722 if (comm_rank == 0) memset(offsets, 0, count * sizeof(*offsets));
5723
5724 for (size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
5725 fixed_offsets[i] = (size_t)(offsets[i]) + accu;
5726 accu += (size_t)(global_counts[i]);
5727 }
5728 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
5729 link_offsets[i] = (size_t)(offsets[i+num_fixed_values]) + accu;
5730 accu += (size_t)(global_counts[i+num_fixed_values]);
5731 }
5732 free(size_t_buffer);
5733}
5734
5735static int global_id_to_address(yac_int global_id) {
5736
5737 YAC_ASSERT(
5738 (global_id < INT_MAX) && (global_id != XT_INT_MAX),
5739 "ERROR(global_id_to_address): "
5740 "a global id cannot be converted into a address; too big")
5741 return (int)global_id + 1;
5742}
5743
5745 struct interp_weight_stencil * stencils, size_t stencil_count,
5746 int * tgt_address) {
5747
5748 for (size_t i = 0; i < stencil_count; ++i)
5749 tgt_address[i] = global_id_to_address(stencils[i].tgt.global_id);
5750}
5751
5753 struct interp_weight_stencil * stencils, size_t stencil_count,
5754 size_t * num_links_per_src_field, size_t num_src_fields,
5755 int * src_address, int * tgt_address, double * weight) {
5756
5757 size_t * src_field_offsets =
5758 xmalloc(2 * num_src_fields * sizeof(*src_field_offsets));
5759 size_t * prev_src_field_offsets = src_field_offsets + num_src_fields;
5760 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
5761 src_field_offsets[i] = accu;
5762 accu += num_links_per_src_field[i];
5763 }
5764
5765 struct interp_weight_stencil * curr_stencil = stencils;
5766 for (size_t i = 0; i < stencil_count; ++i, ++curr_stencil) {
5767
5768 memcpy(prev_src_field_offsets, src_field_offsets,
5769 num_src_fields * sizeof(*prev_src_field_offsets));
5770
5771 int curr_tgt_address = global_id_to_address(curr_stencil->tgt.global_id);
5772 YAC_ASSERT(
5773 curr_stencil->type != FIXED,
5774 "ERROR(stencil_get_link_data): this call is invalid for FIXED stencils")
5775 YAC_ASSERT(
5776 (curr_stencil->type == DIRECT) ||
5777 (curr_stencil->type == SUM) ||
5778 (curr_stencil->type == WEIGHT_SUM) ||
5779 (curr_stencil->type == DIRECT_MF) ||
5780 (curr_stencil->type == SUM_MF) ||
5781 (curr_stencil->type == WEIGHT_SUM_MF),
5782 "ERROR(stencil_get_link_data): invalid stencil type")
5783 size_t src_field_offset;
5784 switch (curr_stencil->type) {
5785 default:
5786 case(DIRECT):
5787 src_field_offset = src_field_offsets[0]++;
5788 src_address[src_field_offset] =
5790 tgt_address[src_field_offset] = curr_tgt_address;
5791 weight[src_field_offset] = 1.0;
5792 break;
5793 case(SUM): {
5794 size_t curr_count = curr_stencil->data.sum.srcs->count;
5795 struct remote_point * srcs = curr_stencil->data.sum.srcs->data;
5796 for (size_t k = 0; k < curr_count; ++k) {
5797 src_field_offset = src_field_offsets[0]++;
5798 src_address[src_field_offset] =
5800 tgt_address[src_field_offset] = curr_tgt_address;
5801 weight[src_field_offset] = 1.0;
5802 }
5803 break;
5804 }
5805 case(WEIGHT_SUM): {
5806 size_t curr_count = curr_stencil->data.weight_sum.srcs->count;
5807 struct remote_point * srcs = curr_stencil->data.weight_sum.srcs->data;
5808 double * weights = curr_stencil->data.weight_sum.weights;
5809 for (size_t k = 0; k < curr_count; ++k) {
5810 src_field_offset = src_field_offsets[0]++;
5811 src_address[src_field_offset] =
5813 tgt_address[src_field_offset] = curr_tgt_address;
5814 weight[src_field_offset] = weights[k];
5815 }
5816 break;
5817 }
5818 case(DIRECT_MF):
5819 src_field_offset =
5820 src_field_offsets[curr_stencil->data.direct_mf.field_idx]++;
5821 src_address[src_field_offset ] =
5823 tgt_address[src_field_offset ] = curr_tgt_address;
5824 weight[src_field_offset ] = 1.0;
5825 break;
5826 case(SUM_MF): {
5827 size_t curr_count = curr_stencil->data.sum_mf.srcs->count;
5828 struct remote_point * srcs =
5829 curr_stencil->data.sum_mf.srcs->data;
5830 size_t * field_indices = curr_stencil->data.sum_mf.field_indices;
5831 for (size_t k = 0; k < curr_count; ++k) {
5832 src_field_offset = src_field_offsets[field_indices[k]]++;
5833 src_address[src_field_offset] =
5835 tgt_address[src_field_offset] = curr_tgt_address;
5836 weight[src_field_offset] = 1.0;
5837 }
5838 break;
5839 }
5840 case(WEIGHT_SUM_MF): {
5841 size_t curr_count = curr_stencil->data.weight_sum_mf.srcs->count;
5842 struct remote_point * srcs =
5843 curr_stencil->data.weight_sum_mf.srcs->data;
5844 double * weights = curr_stencil->data.weight_sum_mf.weights;
5845 size_t * field_indices = curr_stencil->data.weight_sum_mf.field_indices;
5846 for (size_t k = 0; k < curr_count; ++k) {
5847 src_field_offset = src_field_offsets[field_indices[k]]++;
5848 src_address[src_field_offset] =
5850 tgt_address[src_field_offset] = curr_tgt_address;
5851 weight[src_field_offset] = weights[k];
5852 }
5853 break;
5854 }
5855 };
5856
5857 for (size_t j = 0; j < num_src_fields; ++j)
5859 src_address + prev_src_field_offsets[j],
5860 src_field_offsets[j] - prev_src_field_offsets[j],
5861 weight + prev_src_field_offsets[j]);
5862 }
5863 free(src_field_offsets);
5864}
5865
5867 MPI_Comm comm, size_t count, struct interp_weight_stencil * stencils,
5868 int * owner_ranks, size_t * new_count,
5869 struct interp_weight_stencil ** new_stencils) {
5870
5871 int comm_rank, comm_size;
5872 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
5873 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
5874
5875 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
5877 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
5878
5879 size_t * stencil_indices = xmalloc(count * sizeof(*stencil_indices));
5880 for (size_t i = 0; i < count; ++i) {
5881 stencil_indices[i] = i;
5882 sendcounts[owner_ranks[i]]++;
5883 }
5884
5886 1, sendcounts, recvcounts, sdispls, rdispls, comm);
5887
5888 // sort the stencil indices by owner rank
5889 yac_quicksort_index_int_size_t(owner_ranks, count, stencil_indices);
5890
5891 *new_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
5892 *new_stencils =
5893 exchange_stencils(comm, stencils, stencil_indices, sendcounts, recvcounts);
5894 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
5895 free(stencil_indices);
5896}
5897
5898#endif // YAC_NETCDF_ENABLED
5899
5901 struct yac_interp_weights * weights, char const * filename,
5902 char const * src_grid_name, char const * tgt_grid_name,
5903 size_t src_grid_size, size_t tgt_grid_size,
5904 enum yac_weight_file_on_existing on_existing) {
5905
5906#ifndef YAC_NETCDF_ENABLED
5907
5908 UNUSED(weights);
5909 UNUSED(filename);
5910 UNUSED(src_grid_name);
5911 UNUSED(tgt_grid_name);
5912 UNUSED(src_grid_size);
5913 UNUSED(tgt_grid_size);
5914
5915 die(
5916 "ERROR(yac_interp_weights_write_to_file): "
5917 "YAC is built without the NetCDF support");
5918#else
5919
5920 MPI_Comm comm = weights->comm;
5921 int comm_rank, comm_size;
5922 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
5923 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
5924
5925 // determine processes that will do output
5926 int io_flag;
5927 int * io_ranks;
5928 int num_io_ranks;
5929 yac_get_io_ranks(comm, &io_flag, &io_ranks, &num_io_ranks);
5930
5931 // check for existing weight file
5932 int weight_file_exists =
5933 (io_ranks[0] == comm_rank)?yac_file_exists(filename):0;
5935 MPI_Bcast(&weight_file_exists, 1, MPI_INT, io_ranks[0], comm), comm);
5936
5937 // return if the weight file already exists and is supposed to be kept
5938 if ((on_existing == YAC_WEIGHT_FILE_KEEP) && weight_file_exists) return;
5939 if ((on_existing == YAC_WEIGHT_FILE_ERROR) && weight_file_exists) {
5940 if (io_ranks[0] == comm_rank) {
5941 char const msg_fmt[] =
5942 "ERROR(yac_interp_weights_write_to_file): "
5943 "weight file already exists (%s)";
5944 char msg[strlen(msg_fmt) + strlen(filename)];
5945 sprintf(msg, msg_fmt, filename);
5946 yac_abort(comm, msg, __FILE__, __LINE__);
5947 }
5948 return;
5949 }
5950
5951 // determine range of global ids
5952 yac_int min_tgt_global_id, max_tgt_global_id;
5954 weights->stencils, weights->stencils_size,
5955 &min_tgt_global_id, &max_tgt_global_id, comm);
5956
5957 // determine io owners for all stencils
5958 int * io_owner =
5959 xmalloc(weights->stencils_size * sizeof(*io_owner));
5961 weights->stencils, weights->stencils_size,
5962 min_tgt_global_id, max_tgt_global_id,
5963 num_io_ranks, io_owner);
5964 for (size_t i = 0; i < weights->stencils_size; ++i)
5965 io_owner[i] = io_ranks[io_owner[i]];
5966 free(io_ranks);
5967
5968 size_t io_stencil_count = 0;
5969 struct interp_weight_stencil * io_stencils = NULL;
5970
5971 // redistribute stencils into io decomposition
5973 comm, weights->stencils_size, weights->stencils, io_owner,
5974 &io_stencil_count, &io_stencils);
5975 free(io_owner);
5976
5977 // distribute global grid sizes
5978 size_t grid_sizes[2] = {src_grid_size, tgt_grid_size};
5980 MPI_Allreduce(
5981 MPI_IN_PLACE, grid_sizes, 2, YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
5982 src_grid_size = (size_t)(grid_sizes[0]);
5983 tgt_grid_size = (size_t)(grid_sizes[1]);
5984
5985 MPI_Comm io_comm;
5986 yac_mpi_call(MPI_Comm_split(comm, io_flag, comm_rank, &io_comm), comm);
5987
5988 // all non-io processes exit here, the remaining ones work using their own
5989 // communicator
5990 if (!io_flag) {
5991 yac_mpi_call(MPI_Comm_free(&io_comm), comm);
5992 free(io_stencils);
5993 // ensure that the writing of the weight file is complete
5994 yac_mpi_call(MPI_Barrier(comm), comm);
5995 return;
5996 }
5997
5998 // sort the stencils first by type (fixed first; fixed stencils are sorted
5999 // by their fixed value) and second by tgt id
6000 qsort(io_stencils, io_stencil_count, sizeof(*io_stencils),
6002
6003 yac_mpi_call(MPI_Comm_rank(io_comm, &comm_rank), comm);
6004 yac_mpi_call(MPI_Comm_size(io_comm, &comm_size), comm);
6005
6006 double * fixed_values = NULL;
6007 size_t num_fixed_values = 0;
6009 io_stencils, io_stencil_count, &fixed_values, &num_fixed_values, io_comm);
6010 size_t num_src_fields = weights->num_src_fields;
6011 size_t num_weights_per_link =
6012 stencil_get_num_weights_per_tgt(io_stencils, io_stencil_count, io_comm);
6013
6014 size_t * size_t_buffer =
6015 xmalloc(2 * (num_fixed_values + num_src_fields) * sizeof(*size_t_buffer));
6016 size_t * num_tgt_per_fixed_value = size_t_buffer;
6017 size_t * num_links_per_src_field = size_t_buffer + num_fixed_values;
6018 size_t * fixed_offsets = size_t_buffer + num_fixed_values + num_src_fields;
6019 size_t * link_offsets = size_t_buffer + 2 * num_fixed_values + num_src_fields;
6020
6021 size_t num_fixed_tgt = 0;
6022 size_t num_links = 0;
6024 io_stencils, io_stencil_count, num_fixed_values, fixed_values,
6025 num_tgt_per_fixed_value, &num_fixed_tgt, num_src_fields,
6026 num_links_per_src_field, &num_links);
6027
6029 num_fixed_values, num_tgt_per_fixed_value,
6030 num_src_fields, num_links_per_src_field,
6031 fixed_offsets, link_offsets, io_comm);
6032
6033 if (comm_rank == comm_size - 1) {
6034
6035 size_t * total_num_tgt_per_fixed_value =
6036 xmalloc(num_fixed_values * sizeof(*total_num_tgt_per_fixed_value));
6037 for (size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
6038 total_num_tgt_per_fixed_value[i] =
6039 fixed_offsets[i] + num_tgt_per_fixed_value[i] - accu;
6040 accu += total_num_tgt_per_fixed_value[i];
6041 }
6042 size_t total_num_links = link_offsets[num_src_fields-1] +
6043 num_links_per_src_field[num_src_fields-1];
6044
6045 size_t * total_num_links_per_src_field =
6046 xmalloc(num_src_fields * sizeof(*total_num_links_per_src_field));
6047 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
6048 total_num_links_per_src_field[i] =
6049 link_offsets[i] + num_links_per_src_field[i] - accu;
6050 accu += total_num_links_per_src_field[i];
6051 }
6052
6054 filename, src_grid_name, tgt_grid_name,
6055 num_fixed_values, fixed_values, total_num_tgt_per_fixed_value,
6056 total_num_links, num_weights_per_link,
6057 num_src_fields, total_num_links_per_src_field,
6058 weights->src_locations, weights->tgt_location,
6059 src_grid_size, tgt_grid_size);
6060
6061 free(total_num_links_per_src_field);
6062 free(total_num_tgt_per_fixed_value);
6063 }
6064 free(fixed_values);
6065
6066 // ensure that the basic weight file has been written
6067 yac_mpi_call(MPI_Barrier(io_comm), comm);
6068 yac_mpi_call(MPI_Comm_free(&io_comm), comm);
6069
6070 int ncid;
6071
6072 // open weight file
6073 yac_nc_open(filename, NC_WRITE | NC_SHARE, &ncid);
6074
6075 if (num_fixed_tgt > 0) {
6076
6077 int * tgt_address_fixed =
6078 xmalloc(num_fixed_tgt * sizeof(*tgt_address_fixed));
6079 stencil_get_tgt_address(io_stencils, num_fixed_tgt, tgt_address_fixed);
6080
6081 // inquire variable ids
6082 int var_dst_add_fixed_id;
6083 yac_nc_inq_varid(ncid, "dst_address_fixed", &var_dst_add_fixed_id);
6084
6085 // target ids that receive a fixed value to file
6086 for (size_t i = 0, offset = 0; i < num_fixed_values; ++i) {
6087
6088 if (num_tgt_per_fixed_value[i] == 0) continue;
6089
6090 size_t start[1] = {fixed_offsets[i]};
6091 size_t count[1] = {num_tgt_per_fixed_value[i]};
6093 nc_put_vara_int(
6094 ncid, var_dst_add_fixed_id, start, count, tgt_address_fixed + offset));
6095 offset += num_tgt_per_fixed_value[i];
6096 }
6097
6098 free(tgt_address_fixed);
6099 }
6100
6101 if (num_links > 0) {
6102
6103 int * src_address_link = xmalloc(num_links * sizeof(*src_address_link));
6104 int * tgt_address_link = xmalloc(num_links * sizeof(*tgt_address_link));
6105 double * w = xmalloc(num_links * num_weights_per_link * sizeof(*w));
6107 io_stencils + num_fixed_tgt, io_stencil_count - num_fixed_tgt,
6108 num_links_per_src_field, num_src_fields,
6109 src_address_link, tgt_address_link, w);
6110
6111 int var_src_add_id, var_dst_add_id, var_weight_id;
6112 yac_nc_inq_varid(ncid, "src_address", &var_src_add_id);
6113 yac_nc_inq_varid(ncid, "dst_address", &var_dst_add_id);
6114 yac_nc_inq_varid(ncid, "remap_matrix", &var_weight_id);
6115
6116 for (size_t i = 0, offset = 0; i < num_src_fields; ++i) {
6117
6118 if (num_links_per_src_field[i] == 0) continue;
6119
6120 size_t start[2] = {link_offsets[i], 0};
6121 size_t count[2] = {num_links_per_src_field[i], num_weights_per_link};
6122
6124 nc_put_vara_int(
6125 ncid, var_src_add_id, start, count, src_address_link + offset));
6127 nc_put_vara_int(
6128 ncid, var_dst_add_id, start, count, tgt_address_link + offset));
6130 nc_put_vara_double(
6131 ncid, var_weight_id, start, count,
6132 w + num_weights_per_link * offset));
6133
6134 offset += num_links_per_src_field[i];
6135 }
6136
6137 free(w);
6138 free(tgt_address_link);
6139 free(src_address_link);
6140 }
6141
6142 // close weight file
6143 YAC_HANDLE_ERROR(nc_close(ncid));
6144
6145 // ensure that the writing of the weight file is complete
6146 yac_mpi_call(MPI_Barrier(comm), comm);
6147
6148 free(size_t_buffer);
6149 yac_interp_weight_stencils_delete(io_stencils, io_stencil_count);
6150#endif
6151}
6152
6154 struct yac_interp_weights * weights) {
6155
6156 return weights->stencils_size;
6157}
6158
6160 struct yac_interp_weights * weights) {
6161
6162 struct interp_weight_stencil * stencils = weights->stencils;
6163 size_t stencils_size = weights->stencils_size;
6164
6165 yac_int * global_ids = xmalloc(stencils_size * sizeof(*global_ids));
6166
6167 for (size_t i = 0; i < stencils_size; ++i)
6168 global_ids[i] = stencils[i].tgt.global_id;
6169
6170 return global_ids;
6171}
6172
6174 return weights->comm;
6175}
6176
6178
6179 if (weights == NULL) return;
6180
6181 yac_interp_weight_stencils_delete(weights->stencils, weights->stencils_size);
6182 free(weights->src_locations);
6183 free(weights);
6184}
6185
6187 struct yac_interp_weights_data * interp_weights_data) {
6188
6190 YAC_FRAC_MASK_UNDEF, 1.0, 0.0, interp_weights_data);
6191}
6192
6194 struct yac_interp_weights_data interp_weights_data) {
6195
6196#define COPY_ARRAY(DATA, COUNT) \
6197{ \
6198 size_t size = COUNT * sizeof(*(interp_weights_data.DATA)); \
6199 interp_weights_data_copy.DATA = xmalloc(size); \
6200 memcpy(interp_weights_data_copy.DATA, interp_weights_data.DATA, size); \
6201}
6202
6203 struct yac_interp_weights_data interp_weights_data_copy;
6204
6205 interp_weights_data_copy.frac_mask_fallback_value =
6206 interp_weights_data.frac_mask_fallback_value;
6207 interp_weights_data_copy.scaling_factor =
6208 interp_weights_data.scaling_factor;
6209 interp_weights_data_copy.scaling_summand =
6210 interp_weights_data.scaling_summand;
6211
6212 size_t total_num_fixed_tgt = 0;
6213 for (size_t i = 0; i < interp_weights_data.num_fixed_values; ++i)
6214 total_num_fixed_tgt += interp_weights_data.num_tgt_per_fixed_value[i];
6215 interp_weights_data_copy.num_fixed_values =
6216 interp_weights_data.num_fixed_values;
6217 COPY_ARRAY(fixed_values, interp_weights_data.num_fixed_values)
6218 COPY_ARRAY(num_tgt_per_fixed_value, interp_weights_data.num_fixed_values)
6219 COPY_ARRAY(tgt_idx_fixed, total_num_fixed_tgt)
6220
6221 size_t num_weights = 0;
6222 for (size_t i = 0; i < interp_weights_data.num_wgt_tgt; ++i)
6223 num_weights += interp_weights_data.num_src_per_tgt[i];
6224 interp_weights_data_copy.num_wgt_tgt = interp_weights_data.num_wgt_tgt;
6225 COPY_ARRAY(wgt_tgt_idx, interp_weights_data.num_wgt_tgt)
6226 COPY_ARRAY(num_src_per_tgt, interp_weights_data.num_wgt_tgt)
6227 COPY_ARRAY(weights, num_weights)
6228 COPY_ARRAY(src_field_idx, num_weights)
6229 COPY_ARRAY(src_idx, num_weights)
6230 interp_weights_data_copy.num_src_fields =
6231 interp_weights_data.num_src_fields;
6232 COPY_ARRAY(src_field_buffer_size, interp_weights_data.num_src_fields)
6233
6234#undef COPY_ARRAY
6235
6236 return interp_weights_data_copy;
6237}
6238
6240 struct yac_interp_weights_data interp_weights_data) {
6241
6242 free(interp_weights_data.fixed_values);
6243 free(interp_weights_data.num_tgt_per_fixed_value);
6244 free(interp_weights_data.tgt_idx_fixed);
6245
6246 free(interp_weights_data.wgt_tgt_idx);
6247 free(interp_weights_data.num_src_per_tgt);
6248 free(interp_weights_data.weights);
6249 free(interp_weights_data.src_field_idx);
6250 free(interp_weights_data.src_idx);
6251 free(interp_weights_data.src_field_buffer_size);
6252}
#define UNUSED(x)
Definition core.h:73
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
struct @7::@8 value
enum callback_type type
#define YAC_WEIGHT_FILE_VERSION_STRING
static Xt_redist * generate_src_field_exchange_redists(struct yac_src_field_exchange_data *src_field_exchange_data, size_t num_src_fields, MPI_Comm comm, Xt_config redist_config)
static void interpolation_raw_add_wsum_mf(struct remote_point_info_reorder *src_points, size_t num_src_points, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, void *interp_raw, Xt_config redist_config)
static void interpolation_add_wsum(struct remote_point_info_reorder *remote_src_points, size_t halo_size, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, enum yac_interp_weights_reorder_type reorder, void *interp, Xt_config redist_config)
static MPI_Datatype get_fixed_stencil_mpi_datatype(MPI_Comm comm)
static size_t get_num_links_per_src_field(struct interp_weight_stencil *stencil, size_t src_field_idx)
static int get_stencil_pack_size_direct_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void interp_add_direct_raw(void *interp, size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
static void interpolation_add_w_sum_mf(struct remote_point_info_reorder *remote_src_points, size_t halo_size, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, enum yac_interp_weights_reorder_type reorder, struct yac_interpolation *interp, void(*interp_add_wsum_mf_at_src)(struct yac_interpolation *, Xt_redist *, size_t, size_t *, double *, size_t *, size_t *, size_t, Xt_redist), void(*interp_add_wsum_mf_at_tgt)(struct yac_interpolation *, Xt_redist *, size_t *, size_t, size_t *, double *, size_t *, size_t *, size_t), Xt_config redist_config)
static void interpolation_raw_add_sum(struct remote_point_info_reorder *src_points, size_t num_src_points, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, void *interp_raw, Xt_config redist_config)
static void unpack_stencil_wsum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static MPI_Datatype get_direct_stencil_mpi_datatype(MPI_Comm comm)
static void stencil_determine_tgt_global_id_range(struct interp_weight_stencil *stencils, size_t stencils_size, yac_int *min_tgt_global_id, yac_int *max_tgt_global_id, MPI_Comm comm)
static void unpack_stencil_sum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_fixed(struct yac_interp_weights *weights, struct remote_points *tgts, double fixed_value)
struct yac_interp_weights_data yac_interp_weights_data_copy(struct yac_interp_weights_data interp_weights_data)
static int compare_stencils_direct_mf(const void *a, const void *b)
void yac_interp_weights_add_sum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, size_t num_src_fields)
static struct remote_points * copy_remote_points_mf(struct remote_point **points, size_t *counts, size_t num_fields)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils_tgt(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data)
static int compare_remote_point_info(const void *a, const void *b)
static void yac_src_field_exchange_data_realloc(struct yac_interpolation_raw *interp_raw, size_t num_src_fields)
static int global_id_to_address(yac_int global_id)
static int get_stencil_pack_size_sum(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_data_init(struct yac_interp_weights_data *interp_weights_data)
#define COPY_ARRAY(DATA, COUNT)
static void pack_stencil_direct(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
yac_interp_weight_stencil_type
@ DIRECT
@ WEIGHT_SUM_MF
@ DIRECT_MF
@ SUM_MF
@ SUM
@ FIXED
@ WEIGHT_SUM
@ WEIGHT_STENCIL_TYPE_SIZE
static void interpolation_add_sum_mf(struct remote_point_info_reorder *remote_src_points, size_t halo_size, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, enum yac_interp_weights_reorder_type reorder, void *interp, Xt_config redist_config)
static void pack_stencil_wsum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_src_field_exchange_data_msgs_add(struct yac_src_field_exchange_data_msgs *msgs, int rank, size_t count, size_t *pos, size_t offset)
static void free_remote_points(struct remote_points *points)
static void interpolation_add_wsum_mf(struct remote_point_info_reorder *remote_src_points, size_t halo_size, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, enum yac_interp_weights_reorder_type reorder, void *interp, Xt_config redist_config)
static void interpolation_raw_add_w_sum_mf(struct remote_point_info_reorder *remote_src_points, size_t num_src_points, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, struct yac_interpolation_raw *interp_raw, Xt_config redist_config)
static void yac_interp_weight_stencils_delete(struct interp_weight_stencil *stencils, size_t count)
static void interpolation_raw_add_sum_mf(struct remote_point_info_reorder *src_points, size_t num_src_points, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, void *interp_raw, Xt_config redist_config)
static void interpolation_add_sum(struct remote_point_info_reorder *remote_src_points, size_t halo_size, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, enum yac_interp_weights_reorder_type reorder, void *interp, Xt_config redist_config)
static int compare_stencils_fixed(const void *a, const void *b)
static void interp_add_direct_mf_raw(void *interp, size_t num_src_fields, size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct_mf *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
static struct interp_weight_stencil stencils_merge(struct interp_weight_stencil **stencils, double *w, size_t num_stencils, struct remote_point point)
static int get_stencil_wsum_mf_pack_size(struct interp_weight_stencil_wsum_mf *stencil, MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_wsum(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_tgt, struct remote_point *srcs, double *w)
static struct interp_weight_stencil stencils_merge_sum(struct interp_weight_stencil **stencils, double *w, size_t num_stencils)
static int compare_w_global_id(const void *a, const void *b)
static void compact_srcs_w(struct remote_points *srcs, double **w)
void yac_interp_weights_delete(struct yac_interp_weights *weights)
static size_t unpack_stencils_wsum_mf(struct interp_weight_stencil_wsum_mf *wsum_stencils, struct interp_weight_stencil_wsum_mf_weight *weight_buffer, size_t count, void *packed_data, size_t packed_data_size, MPI_Comm comm)
static void pack_stencil_sum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void determine_stencils_io_owner(struct interp_weight_stencil *stencils, size_t stencils_size, yac_int min_tgt_global_id, yac_int max_tgt_global_id, int num_io_procs_int, int *io_owner)
static Xt_redist * generate_direct_mf_redists(size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct_mf *tgt_stencils, size_t *recvcounts, size_t num_src_fields, MPI_Comm comm, Xt_config redist_config)
static size_t get_num_weights_per_link(struct interp_weight_stencil *stencil)
static void free_remote_point(struct remote_point point)
static void yac_interp_weights_redist_stencils(MPI_Comm comm, size_t count, struct interp_weight_stencil *stencils, int *owner_ranks, size_t *new_count, struct interp_weight_stencil **new_stencils)
static struct yac_src_field_exchange_data_msg * yac_src_field_exchange_data_msgs_get_msg(struct yac_src_field_exchange_data_msgs *msgs, int rank)
void yac_interp_weights_data_free(struct yac_interp_weights_data interp_weights_data)
static void unpack_stencil_wsum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_wcopy_weights(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_stencils_per_tgt, size_t *stencil_indices, int *stencil_ranks, double *w)
static int compare_interp_weight_stencil(const void *a, const void *b)
static Xt_config get_redist_config(char const *yaxt_exchanger_name, MPI_Comm comm)
void yac_interp_weights_get_interpolation_raw(struct yac_interp_weights *weights, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand, char const *yaxt_exchanger_name, struct yac_interpolation_exchange **interpolation_exchange, struct yac_interp_weights_data *interp_weights_data)
static void yac_interp_weights_redist_w_sum_mf(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_mf_stencils_data, struct yac_interpolation *interp, enum yac_interp_weights_reorder_type reorder, void(*interp_add_w_sum_mf)(struct remote_point_info_reorder *, size_t, size_t, size_t, struct interp_weight_stencil_wsum_mf *, size_t *, double *, size_t *, size_t *, MPI_Comm, enum yac_interp_weights_reorder_type, void *, Xt_config), Xt_config redist_config)
static void pack_stencil_wsum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_wsum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, double *w, size_t num_src_fields)
static int compare_rank_pos_reorder_field_idx(const void *a, const void *b)
static int get_stencil_pack_size_wsum(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencils(struct interp_weight_stencil *stencils, size_t count, size_t *pack_order, void **pack_data, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_interp_weights_redist_fixed(MPI_Comm comm, size_t count, struct interp_weight_stencil *fixed_stencils, void *interp, void(*interp_add_fixed)(void *, double, size_t, size_t *))
static void yac_src_field_exchange_data_init(struct yac_src_field_exchange_data *src_field_exchange_data)
static void interp_add_fixed(void *interp, double fixed_value, size_t count, size_t *tgt_pos)
static void get_stencils_pack_sizes(struct interp_weight_stencil *stencils, size_t count, size_t *pack_order, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
yac_int * yac_interp_weights_get_interp_tgt(struct yac_interp_weights *weights)
static int get_stencil_pack_size_sum_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencil_fixed(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_src_field_exchange_data_add(struct yac_src_field_exchange_data *src_field_exchange_data, size_t num_src_fields, MPI_Comm comm, size_t *send_msg_sizes, size_t *send_pos, size_t *recv_msg_sizes, size_t *recv_pos, size_t *recv_offsets)
static int get_stencil_pack_size_wsum_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct interp_weight_stencil wcopy_interp_weight_stencil(struct interp_weight_stencil *stencil, struct remote_point point, double weight)
static Xt_redist generate_direct_redist(size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
static struct interp_weight_stencil * yac_interp_weights_get_stencils(struct yac_interp_weights *weights, size_t *stencil_indices, int *stencil_ranks, size_t count)
static void unpack_stencil_direct_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_stencils(const void *a, const void *b)
static void create_weight_file(char const *filename, char const *src_grid_name, char const *tgt_grid_name, size_t num_fixed_values, double *fixed_values, size_t *num_tgt_per_fixed_value, size_t num_links, size_t num_weights_per_link, size_t num_src_fields, size_t *num_links_per_src_field, enum yac_location *src_locations, enum yac_location tgt_location, size_t src_grid_size, size_t tgt_grid_size)
static int get_stencil_pack_size_direct(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void stencil_get_counts(struct interp_weight_stencil *stencils, size_t stencil_count, size_t num_fixed_values, double *fixed_values, size_t *num_tgt_per_fixed_value, size_t *num_fixed_tgt, size_t num_src_fields, size_t *num_links_per_src_field, size_t *num_links)
static void copy_remote_points_no_alloc(struct remote_point *points_to, struct remote_point *points_from, size_t count, struct remote_point_info **point_info_buffer_)
struct yac_interpolation * yac_interp_weights_get_interpolation(struct yac_interp_weights *weights, enum yac_interp_weights_reorder_type reorder, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand, char const *yaxt_exchanger_name)
static void interpolation_raw_add_wsum(struct remote_point_info_reorder *src_points, size_t num_src_points, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, void *interp_raw, Xt_config redist_config)
#define WEIGHT_TOL
static struct remote_points * copy_remote_points(struct remote_point *points, size_t count)
static void interpolation_add_sum_at_src(struct yac_interpolation *interp, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist)
static Xt_redist generate_redist_put_double(struct remote_point_infos *point_infos, size_t count, MPI_Comm comm, Xt_config redist_config)
struct yac_interpolation * yac_interp_weights_get_interpolation_f2c(struct yac_interp_weights *weights, int reorder, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand, char const *yaxt_exchanger_name)
static int compare_remote_point(const void *a, const void *b)
static MPI_Datatype get_direct_mf_stencil_mpi_datatype(MPI_Comm comm)
#define YAC_YAXT_EXCHANGER_STR
MPI_Comm yac_interp_weights_get_comm(struct yac_interp_weights *weights)
static MPI_Datatype get_wsum_mf_weight_mpi_datatype(MPI_Comm comm)
static void yac_interp_weights_redist_direct_mf(MPI_Comm comm, size_t count, struct interp_weight_stencil *direct_mf_stencils, void *interp, void(*interp_add_direct_mf)(void *, size_t, size_t *, size_t *, struct interp_weight_stencil_direct_mf *, size_t *, MPI_Comm, Xt_config), Xt_config redist_config)
static struct interp_weight_stencil copy_interp_weight_stencil(struct interp_weight_stencil *stencil, struct remote_point point)
void yac_interp_weights_add_direct(struct yac_interp_weights *weights, struct remote_points *tgts, struct remote_point *srcs)
void yac_interp_weights_add_sum(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_tgt, struct remote_point *srcs)
static void unpack_stencil_direct(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_interp_weight_stencil_wsum_mf_tgt_orig_pos(const void *a, const void *b)
static void interp_raw_add_fixed(void *interp, double fixed_value, size_t count, size_t *tgt_pos)
struct yac_interp_weights * yac_interp_weights_new(MPI_Comm comm, enum yac_location tgt_location, enum yac_location *src_locations, size_t num_src_fields)
static void unpack_stencil_sum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_interp_weights_redist_direct(MPI_Comm comm, size_t count, struct interp_weight_stencil *direct_stencils, void *interp, void(*interp_add_direct)(void *, size_t *, size_t *, struct interp_weight_stencil_direct *, size_t *, MPI_Comm, Xt_config), Xt_config redist_config)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data, int *stencil_owner, size_t *reorder_idx, size_t num_owners)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils_src(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data)
static int compare_stencils_direct(const void *a, const void *b)
static void yac_interp_weights_data_init_(double frac_mask_fallback_value, double scaling_factor, double scaling_summand, struct yac_interp_weights_data *interp_weights_data)
static size_t stencil_get_num_weights_per_tgt(struct interp_weight_stencil *stencils, size_t stencil_count, MPI_Comm comm)
static void stencil_get_tgt_address(struct interp_weight_stencil *stencils, size_t stencil_count, int *tgt_address)
static int get_stencil_pack_size_fixed(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct remote_point copy_remote_point(struct remote_point point)
static void stencil_get_link_data(struct interp_weight_stencil *stencils, size_t stencil_count, size_t *num_links_per_src_field, size_t num_src_fields, int *src_address, int *tgt_address, double *weight)
static void interp_add_direct(void *interp, size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
size_t yac_interp_weights_get_interp_count(struct yac_interp_weights *weights)
static struct interp_weight_stencil stencils_merge_wsum(struct interp_weight_stencil **stencils, double *w, size_t num_stencils)
static void pack_stencil_sum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static Xt_redist * generate_halo_redists(struct remote_point_info_reorder *halo_points, size_t count, size_t num_src_fields, MPI_Comm comm, Xt_config redist_config)
static void pack_stencil_direct_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencils_wsum_mf(struct interp_weight_stencil_wsum_mf *wsum_stencils, size_t count, size_t *pack_order, void **pack_data, int *pack_sizes, int *weight_counts, MPI_Comm comm)
static int compare_interp_weight_stencil_wsum_mf_src_orig_pos(const void *a, const void *b)
static struct interp_weight_stencil * exchange_stencils(MPI_Comm comm, struct interp_weight_stencil *stencils, size_t *stencil_indices, size_t *stencil_sendcounts, size_t *stencil_recvcounts)
static void unpack_stencil_fixed(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_double(void const *a, void const *b)
static void stencil_xscan_offsets(size_t num_fixed_values, size_t *num_tgt_per_fixed_value, size_t num_src_fields, size_t *num_links_per_src_field, size_t *fixed_offsets, size_t *link_offsets, MPI_Comm comm)
static void interpolation_add_sum_at_tgt(struct yac_interpolation *interp, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields)
static void interp_add_direct_mf(void *interp, size_t num_src_fields, size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct_mf *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
void yac_interp_weights_write_to_file(struct yac_interp_weights *weights, char const *filename, char const *src_grid_name, char const *tgt_grid_name, size_t src_grid_size, size_t tgt_grid_size, enum yac_weight_file_on_existing on_existing)
static void xt_redist_msg_free(struct Xt_redist_msg *msgs, size_t count, MPI_Comm comm)
void yac_interp_weights_add_direct_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *src_field_indices, struct remote_point **srcs_per_field, size_t num_src_fields)
static void unpack_stencils(struct interp_weight_stencil *stencils, size_t count, void *packed_data, size_t packed_data_size, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct remote_point_info select_src(struct remote_point_infos src)
static struct interp_weight_stencils_wsum_mf * generate_w_sum_mf_stencils(struct interp_weight_stencil *stencils, size_t count, enum yac_interp_weight_stencil_type stencil_type)
static void yac_src_field_exchange_data_free(struct yac_src_field_exchange_data *src_field_exchange_data, size_t num_src_fields)
static int compute_owner(int *ranks, size_t count)
static void yac_interp_weights_redist_w_sum_mf_raw(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_mf_stencils_data, struct yac_interpolation *interp, void(*interp_add_w_sum_mf)(struct remote_point_info_reorder *, size_t, size_t, size_t, struct interp_weight_stencil_wsum_mf *, size_t *, double *, size_t *, size_t *, MPI_Comm, void *, Xt_config), Xt_config redist_config)
static void stencil_get_fixed_values(struct interp_weight_stencil *stencils, size_t stencil_count, double **fixed_values, size_t *num_fixed_values, MPI_Comm comm)
static void yac_interp_weights_data_set_wgt_tgt(struct yac_interp_weights_data *interp_weights_data, size_t num_src_fields, size_t tgt_count, size_t *tgt_idx, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t *src_field_buffer_size)
yac_interp_weights_reorder_type
@ YAC_MAPPING_ON_TGT
weights will be applied at target processes
@ YAC_MAPPING_ON_SRC
weights will be applied at source processes
yac_weight_file_on_existing
@ YAC_WEIGHT_FILE_KEEP
keep existing weight file
@ YAC_WEIGHT_FILE_ERROR
error when weight file existis already
void yac_interpolation_add_sum_at_src(struct yac_interpolation *interp, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist)
void yac_interpolation_add_weight_sum_mvp_at_tgt(struct yac_interpolation *interp, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields)
struct yac_interpolation * yac_interpolation_new(size_t collection_size, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interpolation_add_direct_mf(struct yac_interpolation *interp, Xt_redist *redists, size_t num_src_fields)
void yac_interpolation_add_sum_at_tgt(struct yac_interpolation *interp, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields)
void yac_interpolation_add_fixed(struct yac_interpolation *interp, double value, size_t count, size_t *pos)
double const YAC_FRAC_MASK_UNDEF
void yac_interpolation_add_direct(struct yac_interpolation *interp, Xt_redist redist)
void yac_interpolation_add_weight_sum_mvp_at_src(struct yac_interpolation *interp, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist)
#define YAC_FRAC_MASK_VALUE_IS_VALID(value)
struct yac_interpolation_exchange * yac_interpolation_exchange_new(Xt_redist *redists, size_t num_fields, size_t collection_size, int with_frac_mask, char const *name)
void yac_get_io_ranks(MPI_Comm comm, int *local_is_io_, int **io_ranks_, int *num_io_ranks_)
Definition io_utils.c:309
void yac_nc_create(const char *path, int cmode, int *ncidp)
Definition io_utils.c:367
void yac_nc_inq_varid(int ncid, char const *name, int *varidp)
Definition io_utils.c:411
void yac_nc_open(const char *path, int omode, int *ncidp)
Definition io_utils.c:350
int yac_file_exists(const char *filename)
Definition utils_core.c:12
#define YAC_HANDLE_ERROR(exp)
Definition io_utils.h:35
char const * yac_loc2str(enum yac_location location)
Definition location.c:32
yac_location
Definition location.h:12
#define YAC_MAX_LOC_STR_LEN
Definition location.h:10
Definition __init__.py:1
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
Definition ppm_xfuncs.h:67
#define xmalloc(size)
Definition ppm_xfuncs.h:66
void yac_remote_point_pack(struct remote_point *point, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_remote_point_unpack(void *buffer, int buffer_size, int *position, struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_remote_points_pack(struct remote_points *points, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_remote_points_unpack(void *buffer, int buffer_size, int *position, struct remote_points **points, MPI_Datatype point_info_dt, MPI_Comm comm)
int yac_remote_points_get_pack_size(struct remote_points *points, MPI_Datatype point_info_dt, MPI_Comm comm)
MPI_Datatype yac_get_remote_point_info_mpi_datatype(MPI_Comm comm)
int yac_remote_point_get_pack_size(struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
struct remote_point_info src
struct remote_point_info src
struct interp_weight_stencil_wsum_mf_weight * data
struct interp_weight_stencil::@34::@41 weight_sum_mf
struct remote_points * srcs
struct interp_weight_stencil::@34::@40 sum_mf
struct interp_weight_stencil::@34::@38 weight_sum
struct interp_weight_stencil::@34::@35 fixed
struct remote_point src
struct interp_weight_stencil::@34::@39 direct_mf
struct interp_weight_stencil::@34::@37 sum
enum yac_interp_weight_stencil_type type
struct remote_point tgt
union interp_weight_stencil::@34 data
struct interp_weight_stencil::@34::@36 direct
struct interp_weight_stencils_wsum_mf stencils
struct interp_weight_stencil_wsum_mf_weight buffer[]
struct interp_weight_stencil_wsum_mf * data
struct remote_point_info data
single location information of a point
location information about a point that is located on one or
union remote_point_infos::@46 data
struct remote_point_info single
struct remote_point_info * multi
information (global id and location) about a point that
yac_int global_id
struct remote_point_infos data
structure containing the information (global id and location)
struct remote_point_info buffer[]
struct remote_point * data
struct interp_weight_stencil * stencils
enum yac_location tgt_location
enum yac_location * src_locations
struct yac_interpolation_raw::yac_src_field_exchange_data::yac_src_field_exchange_data_msgs::yac_src_field_exchange_data_msg * msg
struct yac_interpolation_raw::yac_src_field_exchange_data::yac_src_field_exchange_data_msgs send
struct yac_interpolation_raw::yac_src_field_exchange_data::yac_src_field_exchange_data_msgs recv
struct yac_interp_weights_data interp_weights_data
struct yac_interpolation_raw::yac_src_field_exchange_data * src_field_exchange_data
double frac_mask_fallback_value
#define MIN(a, b)
#define MAX(a, b)
void yac_quicksort_index_int_double(int *a, size_t n, double *idx)
void yac_quicksort_index_int_size_t(int *a, size_t n, size_t *idx)
void yac_quicksort_index_int_size_t_size_t(int *a, size_t n, size_t *b, size_t *c)
static void yac_remove_duplicates_double(double *array, size_t *n)
Definition utils_core.h:81
void yac_quicksort_index_size_t_size_t(size_t *a, size_t n, size_t *idx)
void yac_quicksort_index(int *a, size_t n, int *idx)
#define SUM
static struct user_input_data_points ** points
Definition yac.c:150
void yac_abort(MPI_Comm comm, const char *msg, const char *source, int line) __attribute__((noreturn))
#define YAC_ASSERT_F(exp, format,...)
Definition yac_assert.h:19
#define die(msg)
Definition yac_assert.h:12
#define YAC_ASSERT(exp, msg)
Definition yac_assert.h:16
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
Definition yac_mpi.c:577
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
Definition yac_mpi.c:633
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
Definition yac_mpi.c:602
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
Definition yac_mpi.c:556
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm, char const *caller, int line)
Definition yac_mpi.c:131
#define yac_mpi_call(call, comm)
#define YAC_MPI_SIZE_T
Xt_int yac_int
Definition yac_types.h:15
#define yac_int_dt
Definition yac_types.h:16