YetAnotherCoupler 3.2.0
Loading...
Searching...
No Matches
interp_weights.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifdef HAVE_CONFIG_H
6// Get the definition of the 'restrict' keyword.
7#include "config.h"
8#endif
9
10#include <string.h>
11
12#define WEIGHT_TOL (1e-9)
13
14#include "yac_mpi_internal.h"
16#include "ensure_array_size.h"
17#include "io_utils.h"
18#include "utils_core.h"
19#include "interp_method_file.h"
20
31
33
35 union {
36 struct {
37 double value;
39 struct {
40 struct remote_point src; // src id
42 struct {
43 struct remote_points * srcs; // src ids
44 } sum;
45 struct {
46 struct remote_points * srcs; // src ids
47 double * weights;
49 struct {
50 struct remote_point src; // src id
51 size_t field_idx;
53 struct {
54 struct remote_points * srcs; // src ids
55 size_t * field_indices;
57 struct {
58 struct remote_points * srcs; // src ids
59 double * weights;
60 size_t * field_indices;
63 struct remote_point tgt; //tgt id
64};
65
72
78
83
88
90 double value;
91 uint64_t orig_pos;
92};
93
98
104
109
121
122
128
130 MPI_Comm comm, enum yac_location tgt_location,
132
133 struct yac_interp_weights * weights = xmalloc(1 * sizeof(*weights));
134
135 weights->comm = comm;
136 weights->tgt_location = tgt_location;
137 weights->src_locations = xmalloc(num_src_fields * sizeof(*src_locations));
138 memcpy(
140 num_src_fields * sizeof(*src_locations));
142 weights->stencils = NULL;
143 weights->stencils_array_size = 0;
144 weights->stencils_size = 0;
145
146 return weights;
147}
148
149static inline struct remote_point copy_remote_point(
150 struct remote_point point) {
151
152 int count = point.data.count;
153 if (count > 1) {
154 struct remote_point_info * point_infos =
155 xmalloc((size_t)count * sizeof(*point_infos));
156 memcpy(point_infos, point.data.data.multi,
157 (size_t)count * sizeof(*point_infos));
158 point.data.data.multi = point_infos;
159 }
160 return point;
161}
162
164 struct remote_point * points_to, struct remote_point * points_from,
165 size_t count, struct remote_point_info ** point_info_buffer_) {
166
167 struct remote_point_info * point_info_buffer = *point_info_buffer_;
168
169 for (size_t i = 0; i < count; ++i) {
170 int curr_count = points_from[i].data.count;
171 points_to[i] = points_from[i];
172 if (curr_count > 1) {
173 points_to[i].data.data.multi = point_info_buffer;
174 memcpy(
175 point_info_buffer, points_from[i].data.data.multi,
176 (size_t)curr_count * sizeof(*point_info_buffer));
177 point_info_buffer += curr_count;
178 }
179 }
180 *point_info_buffer_ = point_info_buffer;
181}
182
183static inline struct remote_points * copy_remote_points(
184 struct remote_point * points, size_t count) {
185
186 size_t point_info_buffer_size = 0;
187 for (size_t i = 0; i < count; ++i)
188 if (points[i].data.count > 1)
189 point_info_buffer_size += (size_t)(points[i].data.count);
190
191 struct remote_points * points_copy =
192 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
193 sizeof(*points_copy));
194 points_copy->data = xmalloc(count * sizeof(*(points_copy->data)));
195 points_copy->count = count;
196 struct remote_point_info * point_info_buffer = &(points_copy->buffer[0]);
197
199 points_copy->data, points, count, &point_info_buffer);
200
201 return points_copy;
202}
203
205 struct remote_point ** points, size_t * counts, size_t num_fields) {
206
207 size_t point_info_buffer_size = 0;
208 size_t total_count = 0;
209 for (size_t i = 0; i < num_fields; ++i) {
210 total_count += counts[i];
211 for (size_t j = 0; j < counts[i]; ++j) {
212 if (points[i][j].data.count > 1)
213 point_info_buffer_size += (size_t)(points[i][j].data.count);
214 }
215 }
216
217 struct remote_points * points_copy =
218 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
219 sizeof(*points_copy));
220 points_copy->data = xmalloc(total_count * sizeof(*(points_copy->data)));
221 points_copy->count = total_count;
222 struct remote_point_info * point_info_buffer = &(points_copy->buffer[0]);
223
224 for (size_t i = 0, k = 0; i < num_fields; ++i) {
225 for (size_t j = 0; j < counts[i]; ++j, ++k) {
226 int curr_count = points[i][j].data.count;
227 points_copy->data[k] = points[i][j];
228 if (curr_count > 1) {
229 points_copy->data[k].data.data.multi = point_info_buffer;
230 memcpy(
231 point_info_buffer, points[i][j].data.data.multi,
232 (size_t)curr_count * sizeof(*point_info_buffer));
233 point_info_buffer += curr_count;
234 }
235 }
236 }
237 return points_copy;
238}
239
241 struct yac_interp_weights * weights, struct remote_points * tgts,
242 double fixed_value) {
243
244 struct interp_weight_stencil * stencils = weights->stencils;
245 size_t stencils_array_size = weights->stencils_array_size;
246 size_t stencils_size = weights->stencils_size;
247
248 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
249
250 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
251
252 stencils[stencils_size].type = FIXED;
253 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
254 stencils[stencils_size].data.fixed.value = fixed_value;
255 }
256
257 weights->stencils = stencils;
258 weights->stencils_array_size = stencils_array_size;
259 weights->stencils_size = stencils_size;
260}
261
263 struct yac_interp_weights * weights, struct remote_points * tgts,
264 size_t * num_src_per_tgt, struct remote_point * srcs, double * w) {
265
266 if (tgts->count == 0) return;
267
268 // determine whether there are zero-weights
269 int pack_flag = 0;
270 for (size_t i = 0, k = 0; (i < tgts->count) && !pack_flag; ++i)
271 for (size_t j = 0; (j < num_src_per_tgt[i]) && !pack_flag; ++j, ++k)
272 pack_flag = (fabs(w[k]) <= WEIGHT_TOL);
273
274 if (pack_flag) {
275
276 for (size_t i = 0, k = 0, l = 0;
277 i < tgts->count; i++) {
278
279 size_t curr_count = num_src_per_tgt[i];
280
281 for (size_t j = 0; j < curr_count; j++, k++) {
282
283 if (fabs(w[k]) < WEIGHT_TOL) {
284 num_src_per_tgt[i]--;
285 } else {
286 if (l != k) {
287 srcs[l] = srcs[k];
288 w[l] = w[k];
289 }
290 ++l;
291 }
292 }
293
294 // if all weights were zero
295 if ((curr_count != 0) && (num_src_per_tgt[i] == 0)) {
296
297 if (l != k) {
298 srcs[l] = srcs[k - curr_count];
299 w[l] = 0.0;
300 }
301 num_src_per_tgt[i] = 1;
302 ++l;
303 }
304 }
305 }
306
307 // check whether all weights are 1.0 and whether the number of source
308 // points per target is one for all targets
309 int flag_weight_one = 1;
310 int flag_count_one = 1;
311 for (size_t i = 0, j = 0;
312 (i < tgts->count) && (flag_weight_one || flag_count_one); ++i) {
313
314 size_t curr_count = num_src_per_tgt[i];
315 flag_count_one &= curr_count == 1;
316
317 for (size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
318 flag_weight_one &= fabs(w[j] - 1.0) < WEIGHT_TOL;
319 }
320
321 // if all weights are 1.0 -> use more optimised weight type
322 if (flag_weight_one) {
323
324 // if the number of source points for all target points is one
325 if (flag_count_one)
327 else
328 yac_interp_weights_add_sum(weights, tgts, num_src_per_tgt, srcs);
329
330 } else {
331
332 struct interp_weight_stencil * stencils = weights->stencils;
333 size_t stencils_array_size = weights->stencils_array_size;
334 size_t stencils_size = weights->stencils_size;
335
336 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
337
338 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
339
340 size_t curr_num_src = num_src_per_tgt[i];
341
342 // remove target for which no weights were provided
343 if (curr_num_src == 0) {
344 --stencils_size;
345 continue;
346 }
347
348 double * curr_weights =
349 xmalloc(curr_num_src * sizeof(*curr_weights));
350
351 stencils[stencils_size].type = WEIGHT_SUM;
352 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
353 stencils[stencils_size].data.weight_sum.srcs =
354 copy_remote_points(srcs, curr_num_src);
355 stencils[stencils_size].data.weight_sum.weights = curr_weights;
356 memcpy(curr_weights, w, curr_num_src * sizeof(*curr_weights));
357
358 srcs += curr_num_src;
359 w += curr_num_src;
360 }
361
362 weights->stencils = stencils;
363 weights->stencils_array_size = stencils_array_size;
364 weights->stencils_size = stencils_size;
365 }
366}
367
369 struct yac_interp_weights * weights, struct remote_points * tgts,
370 size_t * num_src_per_tgt, struct remote_point * srcs) {
371
372 if (tgts->count == 0) return;
373
374 // check whether the number of source points per target is one
375 // for all targets
376 int flag_count_one = 1;
377 for (size_t i = 0; i < tgts->count; ++i) {
378 if (num_src_per_tgt[i] != 1) {
379 flag_count_one = 0;
380 break;
381 }
382 }
383
384 if (flag_count_one) {
385
387
388 } else {
389
390 struct interp_weight_stencil * stencils = weights->stencils;
391 size_t stencils_array_size = weights->stencils_array_size;
392 size_t stencils_size = weights->stencils_size;
393
394 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
395
396 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
397
398 size_t curr_num_src = num_src_per_tgt[i];
399
400 stencils[stencils_size].type = SUM;
401 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
402 stencils[stencils_size].data.weight_sum.srcs =
403 copy_remote_points(srcs, curr_num_src);
404 stencils[stencils_size].data.weight_sum.weights = NULL;
405
406 srcs += curr_num_src;
407 }
408
409 weights->stencils = stencils;
410 weights->stencils_array_size = stencils_array_size;
411 weights->stencils_size = stencils_size;
412 }
413}
414
416 struct yac_interp_weights * weights, struct remote_points * tgts,
417 struct remote_point * srcs) {
418
419 if (tgts->count == 0) return;
420
421 struct interp_weight_stencil * stencils = weights->stencils;
422 size_t stencils_array_size = weights->stencils_array_size;
423 size_t stencils_size = weights->stencils_size;
424
425 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
426
427 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
428
429 stencils[stencils_size].type = DIRECT;
430 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
431 stencils[stencils_size].data.direct.src = copy_remote_point(srcs[i]);
432 }
433
434 weights->stencils = stencils;
435 weights->stencils_array_size = stencils_array_size;
436 weights->stencils_size = stencils_size;
437}
438
440 struct yac_interp_weights * weights, struct remote_points * tgts,
441 size_t * src_field_indices, struct remote_point ** srcs_per_field,
442 size_t num_src_fields) {
443
444 if (tgts->count == 0) return;
445
446 if (num_src_fields == 1) {
447 yac_interp_weights_add_direct(weights, tgts, srcs_per_field[0]);
448 return;
449 }
450
451 struct interp_weight_stencil * stencils = weights->stencils;
452 size_t stencils_array_size = weights->stencils_array_size;
453 size_t stencils_size = weights->stencils_size;
454
456 stencils, stencils_array_size, stencils_size + tgts->count);
457 stencils += stencils_size;
458
459 size_t srcs_offsets[num_src_fields];
460 memset(srcs_offsets, 0, num_src_fields * sizeof(srcs_offsets[0]));
461
462 for (size_t i = 0; i < tgts->count; ++i) {
463
464 size_t src_field_idx = src_field_indices[i];
465 stencils[i].type = DIRECT_MF;
466 stencils[i].tgt = copy_remote_point(tgts->data[i]);
467 stencils[i].data.direct_mf.src =
469 srcs_per_field[src_field_idx][srcs_offsets[src_field_idx]++]);
470 stencils[i].data.direct_mf.field_idx = src_field_idx;
471 }
472
473 weights->stencils = stencils;
474 weights->stencils_array_size = stencils_array_size;
475 weights->stencils_size += tgts->count;
476}
477
479 struct yac_interp_weights * weights, struct remote_points * tgts,
480 size_t * num_src_per_field_per_tgt, struct remote_point ** srcs_per_field,
481 size_t num_src_fields) {
482
483 if (tgts->count == 0) return;
484
485 if (num_src_fields == 1) {
487 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0]);
488 return;
489 }
490
491 // check whether the number of source points per target is one
492 // for all targets
493 int flag_count_one = 1;
494 for (size_t i = 0, k = 0; i < tgts->count; ++i) {
495 size_t count = 0;
496 for (size_t j = 0; j < num_src_fields; ++j, ++k)
497 count += num_src_per_field_per_tgt[k];
498 if (count != 1) {
499 flag_count_one = 0;
500 break;
501 }
502 }
503
504 if (flag_count_one) {
505
506 size_t * src_field_indices =
507 xmalloc(tgts->count * sizeof(*src_field_indices));
508
509 for (size_t i = 0, k = 0; i < tgts->count; ++i)
510 for (size_t j = 0; j < num_src_fields; ++j, ++k)
511 if (num_src_per_field_per_tgt[k])
512 src_field_indices[i] = j;
513
515 weights, tgts, src_field_indices, srcs_per_field, num_src_fields);
516
517 free(src_field_indices);
518
519 } else {
520 struct remote_point * curr_srcs_per_field[num_src_fields];
521 memcpy(curr_srcs_per_field, srcs_per_field,
522 num_src_fields * sizeof(*srcs_per_field));
523
524 struct interp_weight_stencil * stencils = weights->stencils;
525 size_t stencils_array_size = weights->stencils_array_size;
526 size_t stencils_size = weights->stencils_size;
527
529 stencils, stencils_array_size, stencils_size + tgts->count);
530
531 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
532
533 size_t * curr_num_src_per_src_field =
534 num_src_per_field_per_tgt + i * num_src_fields;
535 size_t curr_num_src = 0;
536 for (size_t j = 0; j < num_src_fields; ++j)
537 curr_num_src += curr_num_src_per_src_field[j];
538
539 stencils[stencils_size].type = SUM_MF;
540 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
541 stencils[stencils_size].data.sum_mf.field_indices =
542 xmalloc(
543 curr_num_src *
544 sizeof(*(stencils[stencils_size].data.sum_mf.field_indices)));
545 for (size_t j = 0, l = 0; j < num_src_fields; ++j) {
546 size_t curr_num_src = curr_num_src_per_src_field[j];
547 for (size_t k = 0; k < curr_num_src; ++k, ++l) {
548 stencils[stencils_size].data.sum_mf.field_indices[l] = j;
549 }
550 }
551 stencils[stencils_size].data.sum_mf.srcs =
553 curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
554
555 for (size_t j = 0; j < num_src_fields; ++j)
556 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
557 }
558
559 weights->stencils = stencils;
560 weights->stencils_array_size = stencils_array_size;
561 weights->stencils_size = stencils_size;
562 }
563}
564
566 struct yac_interp_weights * weights, struct remote_points * tgts,
567 size_t * num_src_per_field_per_tgt, struct remote_point ** srcs_per_field,
568 double * w, size_t num_src_fields) {
569
570 if (tgts->count == 0) return;
571
572 if (num_src_fields == 1) {
574 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0], w);
575 return;
576 }
577
578 // check whether all weights are 1.0 and whether the number of source
579 // points per target is one for all targets
580 int flag_weight_one = 1;
581 for (size_t i = 0, j = 0;
582 (i < tgts->count) && flag_weight_one; ++i) {
583
584 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
585 ++src_field_idx) {
586
587 size_t curr_count =
588 num_src_per_field_per_tgt[i * num_src_fields + src_field_idx];
589
590 for (size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
591 flag_weight_one &= fabs(w[j] - 1.0) < 1e-12;
592 }
593 }
594
595 // if all weights are 1.0 -> use more optimised weight type
596 if (flag_weight_one) {
597
599 weights, tgts, num_src_per_field_per_tgt, srcs_per_field, num_src_fields);
600
601 } else {
602
603 struct remote_point * curr_srcs_per_field[num_src_fields];
604 memcpy(curr_srcs_per_field, srcs_per_field,
605 num_src_fields * sizeof(*srcs_per_field));
606
607 struct interp_weight_stencil * stencils = weights->stencils;
608 size_t stencils_array_size = weights->stencils_array_size;
609 size_t stencils_size = weights->stencils_size;
610
612 stencils, stencils_array_size, stencils_size + tgts->count);
613
614 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
615
616 size_t * curr_num_src_per_src_field =
617 num_src_per_field_per_tgt + i * num_src_fields;
618 size_t curr_num_weights = 0;
619 for (size_t j = 0; j < num_src_fields; ++j)
620 curr_num_weights += curr_num_src_per_src_field[j];
621 double * curr_weights =
622 xmalloc(curr_num_weights * sizeof(*curr_weights));
623 size_t * field_indices =
624 xmalloc(curr_num_weights * sizeof(*field_indices));
625
626 stencils[stencils_size].type = WEIGHT_SUM_MF;
627 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
628 stencils[stencils_size].data.weight_sum_mf.field_indices = field_indices;
629 for (size_t j = 0, l = 0; j < num_src_fields; ++j) {
630 size_t curr_num_src = curr_num_src_per_src_field[j];
631 for (size_t k = 0; k < curr_num_src; ++k, ++l) field_indices[l] = j;
632 }
633 stencils[stencils_size].data.weight_sum_mf.srcs =
634 copy_remote_points_mf(curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
635 stencils[stencils_size].data.weight_sum_mf.weights = curr_weights;
636 memcpy(curr_weights, w, curr_num_weights * sizeof(*curr_weights));
637
638 for (size_t j = 0; j < num_src_fields; ++j)
639 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
640 w += curr_num_weights;
641 }
642
643 weights->stencils = stencils;
644 weights->stencils_array_size = stencils_array_size;
645 weights->stencils_size = stencils_size;
646 }
647}
648
649static int compare_stencils_fixed(const void * a, const void * b) {
650
651 int ret = (((struct interp_weight_stencil_fixed *)a)->value >
652 ((struct interp_weight_stencil_fixed *)b)->value) -
653 (((struct interp_weight_stencil_fixed *)a)->value <
654 ((struct interp_weight_stencil_fixed *)b)->value);
655
656 if (ret) return ret;
657
658 return (((struct interp_weight_stencil_fixed *)a)->orig_pos >
659 ((struct interp_weight_stencil_fixed *)b)->orig_pos) -
660 (((struct interp_weight_stencil_fixed *)a)->orig_pos <
661 ((struct interp_weight_stencil_fixed *)b)->orig_pos);
662}
663
664static MPI_Datatype get_fixed_stencil_mpi_datatype(MPI_Comm comm) {
665
666 struct interp_weight_stencil_fixed dummy;
667 MPI_Datatype fixed_stencil_dt;
668 int array_of_blocklengths[] = {1, 1};
669 const MPI_Aint array_of_displacements[] =
670 {(MPI_Aint)(intptr_t)(const void *)&(dummy.value) -
671 (MPI_Aint)(intptr_t)(const void *)&dummy,
672 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
673 (MPI_Aint)(intptr_t)(const void *)&dummy};
674 const MPI_Datatype array_of_types[] = {MPI_DOUBLE, MPI_UINT64_T};
676 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
677 array_of_types, &fixed_stencil_dt), comm);
678 return yac_create_resized(fixed_stencil_dt, sizeof(dummy), comm);
679}
680
682 MPI_Comm comm, uint64_t count,
683 struct interp_weight_stencil * fixed_stencils,
684 struct yac_interpolation * interp) {
685
686 int comm_size;
687 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
688
689 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
691 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
692
693 for (size_t i = 0; i < count; ++i) {
694 int curr_count = fixed_stencils[i].tgt.data.count;
695 struct remote_point_info * curr_point_infos =
696 (curr_count == 1)?
697 (&(fixed_stencils[i].tgt.data.data.single)):
698 (fixed_stencils[i].tgt.data.data.multi);
699 for (int j = 0; j < curr_count; ++j)
700 sendcounts[curr_point_infos[j].rank]++;
701 }
702
704 1, sendcounts, recvcounts, sdispls, rdispls, comm);
705
706 size_t send_buffer_size =
707 sdispls[comm_size] + sendcounts[comm_size - 1];
708 size_t recv_buffer_size =
709 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
710
711 struct interp_weight_stencil_fixed * buffer =
712 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*buffer));
713 struct interp_weight_stencil_fixed * send_buffer = buffer + recv_buffer_size;
714 struct interp_weight_stencil_fixed * recv_buffer = buffer;
715
716 // pack fixed stencils
717 for (size_t i = 0; i < count; ++i) {
718 int curr_count = fixed_stencils[i].tgt.data.count;
719 struct remote_point_info * curr_point_infos =
720 (curr_count == 1)?
721 (&(fixed_stencils[i].tgt.data.data.single)):
722 (fixed_stencils[i].tgt.data.data.multi);
723 double value = fixed_stencils[i].data.fixed.value;
724 for (int j = 0; j < curr_count; ++j) {
725 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
726 send_buffer[pos].value = value;
727 send_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
728 }
729 }
730
731 // create MPI Datatype for exchanging fixed stencils
732 MPI_Datatype stencil_fixed_dt = get_fixed_stencil_mpi_datatype(comm);
733
734 // redistribute fixed stencils
736 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
737 sizeof(*send_buffer), stencil_fixed_dt, comm);
738
739 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
740
741 yac_mpi_call(MPI_Type_free(&stencil_fixed_dt), comm);
742
743 if (recv_buffer_size == 0) {
744 free(buffer);
745 return;
746 }
747
748 // sort stencils first by fixed value and second by orig_pos
749 qsort(recv_buffer, recv_buffer_size, sizeof(*recv_buffer),
751
752 size_t * tgt_pos = xmalloc(recv_buffer_size * sizeof(*tgt_pos));
753 for (size_t i = 0; i < recv_buffer_size; ++i)
754 tgt_pos[i] = (size_t)(recv_buffer[i].orig_pos);
755
756 size_t offset = 0, i = 0;
757 while (offset < recv_buffer_size) {
758 double fixed_value = recv_buffer[i].value;
759 while ((i < recv_buffer_size) && (fixed_value == recv_buffer[i].value)) ++i;
760 size_t curr_count = i - offset;
762 interp, fixed_value, curr_count, tgt_pos + offset);
763 offset = i;
764 }
765
766 free(buffer);
767 free(tgt_pos);
768}
769
770static inline struct remote_point_info select_src(
771 struct remote_point_infos src) {
772
773 if (src.count == 1) return src.data.single;
774
775 int min_rank = INT_MAX;
776 size_t min_rank_idx = SIZE_MAX;
777 for (int i = 0; i < src.count; ++i) {
778 if (src.data.multi[i].rank < min_rank) {
779 min_rank = src.data.multi[i].rank;
780 min_rank_idx = i;
781 }
782 }
783
784 return src.data.multi[min_rank_idx];
785}
786
788 struct Xt_redist_msg * msgs, size_t count, MPI_Comm comm) {
789 for (size_t i = 0; i < count; ++i) {
790 MPI_Datatype * dt = &(msgs[i].datatype);
791 if (*dt != MPI_DATATYPE_NULL) yac_mpi_call(MPI_Type_free(dt), comm);
792 }
793 free(msgs);
794}
795
796static Xt_redist generate_direct_redist(
797 uint64_t * src_orig_poses, size_t * sendcounts,
798 struct interp_weight_stencil_direct * tgt_stencils,
799 size_t * recvcounts, MPI_Comm comm) {
800
801 int comm_size;
802 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
803
804 size_t nsend = 0, nrecv = 0;
805 size_t max_buffer_size = 0;
806 for (int i = 0; i < comm_size; ++i) {
807 nsend += sendcounts[i] > 0;
808 nrecv += recvcounts[i] > 0;
809 if (max_buffer_size < sendcounts[i]) max_buffer_size = sendcounts[i];
810 if (max_buffer_size < recvcounts[i]) max_buffer_size = recvcounts[i];
811 }
812
813 size_t total_num_msg = nsend + nrecv;
814
815 struct Xt_redist_msg * msgs_buffer =
816 xmalloc(total_num_msg * sizeof(*msgs_buffer));
817 struct Xt_redist_msg * send_msgs = msgs_buffer;
818 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
819
820 int * pos_buffer = xmalloc((size_t)max_buffer_size * sizeof(*pos_buffer));
821
822 nsend = 0;
823 nrecv = 0;
824 for (int i = 0; i < comm_size; ++i) {
825 if (recvcounts[i] > 0) {
826 for (size_t j = 0; j < recvcounts[i]; ++j)
827 pos_buffer[j] = (int)tgt_stencils[j].orig_pos;
828 tgt_stencils += recvcounts[i];
829 recv_msgs[nrecv].rank = i;
830 recv_msgs[nrecv].datatype =
831 xt_mpi_generate_datatype(pos_buffer, recvcounts[i], MPI_DOUBLE, comm);
832 nrecv++;
833 }
834 if (sendcounts[i] > 0) {
835 for (size_t j = 0; j < sendcounts[i]; ++j)
836 pos_buffer[j] = (int)src_orig_poses[j];
837 src_orig_poses += sendcounts[i];
838 send_msgs[nsend].rank = i;
839 send_msgs[nsend].datatype =
840 xt_mpi_generate_datatype(pos_buffer, sendcounts[i], MPI_DOUBLE, comm);
841 nsend++;
842 }
843 }
844
845 free(pos_buffer);
846
847 Xt_redist redist;
848 MPI_Comm split_comm;
849
850 if (total_num_msg > 0) {
851
852 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
853
854 int * rank_buffer =
855 xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
856 int * orig_ranks = rank_buffer;
857 int * split_ranks = rank_buffer + total_num_msg;
858
859 for (size_t i = 0; i < total_num_msg; ++i)
860 orig_ranks[i] = msgs_buffer[i].rank;
861
862 MPI_Group orig_group, split_group;
863 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
864 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
865
867 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
868 split_group, split_ranks), split_comm);
869
870 for (size_t i = 0; i < total_num_msg; ++i)
871 msgs_buffer[i].rank = split_ranks[i];
872
873 free(rank_buffer);
874
875 yac_mpi_call(MPI_Group_free(&split_group), comm);
876 yac_mpi_call(MPI_Group_free(&orig_group), comm);
877
878 // generate redist
879 redist =
880 xt_redist_single_array_base_new(
881 nsend, nrecv, send_msgs, recv_msgs, split_comm);
882
883 } else {
884 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
885 redist = NULL;
886 }
887
888 yac_mpi_call(MPI_Comm_free(&split_comm), comm);
889 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
890
891 return redist;
892}
893
894static Xt_redist * generate_direct_mf_redists(
895 uint64_t * src_orig_pos, size_t * sendcounts,
896 struct interp_weight_stencil_direct_mf * tgt_stencils,
897 size_t * recvcounts, size_t num_src_fields, MPI_Comm comm) {
898
899 int comm_size;
900 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
901
902 size_t nsends[num_src_fields], nrecvs[num_src_fields];
903 size_t max_buffer_size = 0;
904 memset(nsends, 0, num_src_fields * sizeof(nsends[0]));
905 memset(nrecvs, 0, num_src_fields * sizeof(nrecvs[0]));
906 for (int i = 0; i < comm_size; ++i) {
907 for (size_t j = 0; j < num_src_fields; ++j) {
908 size_t idx = (size_t)i * num_src_fields + j;
909 if (sendcounts[idx] > 0) nsends[j]++;
910 if (recvcounts[idx] > 0) nrecvs[j]++;
911 if (max_buffer_size < sendcounts[idx]) max_buffer_size = sendcounts[idx];
912 if (max_buffer_size < recvcounts[idx]) max_buffer_size = recvcounts[idx];
913 }
914 }
915
916 size_t nsend = 0, nrecv = 0;
917 size_t send_offsets[num_src_fields];
918 size_t recv_offsets[num_src_fields];
919 for (size_t i = 0; i < num_src_fields; ++i) {
920 send_offsets[i] = nsend;
921 recv_offsets[i] = nrecv;
922 nsend += nsends[i];
923 nrecv += nrecvs[i];
924 }
925
926 size_t total_num_msg = nsend + nrecv;
927
928 struct Xt_redist_msg * msgs_buffer =
929 xmalloc(total_num_msg * sizeof(*msgs_buffer));
930 struct Xt_redist_msg * send_msgs = msgs_buffer;
931 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
932
933 int * pos_buffer = xmalloc(max_buffer_size * sizeof(*pos_buffer));
934
935 for (int i = 0; i < comm_size; ++i) {
936 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
937 ++src_field_idx) {
938 size_t idx = (size_t)i * num_src_fields + src_field_idx;
939 if (recvcounts[idx] > 0) {
940 for (size_t j = 0; j < recvcounts[idx]; ++j)
941 pos_buffer[j] = (int)tgt_stencils[j].orig_pos;
942 tgt_stencils += recvcounts[idx];
943 recv_msgs[recv_offsets[src_field_idx]].rank = i;
944 recv_msgs[recv_offsets[src_field_idx]].datatype =
945 xt_mpi_generate_datatype(
946 pos_buffer, recvcounts[idx], MPI_DOUBLE, comm);
947 recv_offsets[src_field_idx]++;
948 }
949 if (sendcounts[idx] > 0) {
950 for (size_t j = 0; j < sendcounts[idx]; ++j)
951 pos_buffer[j] = (int)src_orig_pos[j];
952 src_orig_pos += sendcounts[idx];
953 send_msgs[send_offsets[src_field_idx]].rank = i;
954 send_msgs[send_offsets[src_field_idx]].datatype =
955 xt_mpi_generate_datatype(
956 pos_buffer, sendcounts[idx], MPI_DOUBLE, comm);
957 send_offsets[src_field_idx]++;
958 }
959 }
960 }
961
962 free(pos_buffer);
963
964 Xt_redist * redists;
965 MPI_Comm split_comm;
966
967 if (total_num_msg > 0) {
968
969 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
970
971 int * rank_buffer =
972 xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
973 int * orig_ranks = rank_buffer;
974 int * split_ranks = rank_buffer + total_num_msg;
975
976 for (size_t i = 0; i < total_num_msg; ++i)
977 orig_ranks[i] = msgs_buffer[i].rank;
978
979 MPI_Group orig_group, split_group;
980 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
981 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
982
984 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
985 split_group, split_ranks), split_comm);
986
987 for (size_t i = 0; i < total_num_msg; ++i)
988 msgs_buffer[i].rank = split_ranks[i];
989
990 free(rank_buffer);
991
992 yac_mpi_call(MPI_Group_free(&split_group), comm);
993 yac_mpi_call(MPI_Group_free(&orig_group), comm);
994
995 // generate redists
996 redists = xmalloc(num_src_fields * sizeof(*redists));
997 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
998 ++src_field_idx) {
999 redists[src_field_idx] =
1000 xt_redist_single_array_base_new(
1001 nsends[src_field_idx], nrecvs[src_field_idx],
1002 send_msgs, recv_msgs, split_comm);
1003 send_msgs += nsends[src_field_idx];
1004 recv_msgs += nrecvs[src_field_idx];
1005 }
1006
1007 } else {
1008 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
1009 redists = NULL;
1010 }
1011
1012 yac_mpi_call(MPI_Comm_free(&split_comm), comm);
1013 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
1014
1015 return redists;
1016}
1017
1018static MPI_Datatype get_direct_stencil_mpi_datatype(MPI_Comm comm) {
1019
1020 struct interp_weight_stencil_direct dummy;
1021 MPI_Datatype direct_stencil_dt;
1022 int array_of_blocklengths[] = {1, 1};
1023 const MPI_Aint array_of_displacements[] =
1024 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src) -
1025 (MPI_Aint)(intptr_t)(const void *)&dummy,
1026 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
1027 (MPI_Aint)(intptr_t)(const void *)&dummy};
1028 MPI_Datatype array_of_types[] =
1029 {yac_get_remote_point_info_mpi_datatype(comm), MPI_UINT64_T};
1031 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
1032 array_of_types, &direct_stencil_dt), comm);
1033 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1034 return yac_create_resized(direct_stencil_dt, sizeof(dummy), comm);
1035}
1036
1037static int compare_stencils_direct(const void * a, const void * b) {
1038
1039 int ret = ((struct interp_weight_stencil_direct *)a)->src.rank -
1040 ((struct interp_weight_stencil_direct *)b)->src.rank;
1041
1042 if (ret) return ret;
1043
1044 return (((struct interp_weight_stencil_direct *)a)->orig_pos >
1045 ((struct interp_weight_stencil_direct *)b)->orig_pos) -
1046 (((struct interp_weight_stencil_direct *)a)->orig_pos <
1047 ((struct interp_weight_stencil_direct *)b)->orig_pos);
1048}
1049
1051 MPI_Comm comm, uint64_t count,
1052 struct interp_weight_stencil * direct_stencils,
1053 struct yac_interpolation * interp) {
1054
1055 int comm_size;
1056 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1057
1058 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1060 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1061
1062 for (size_t i = 0; i < count; ++i) {
1063 int curr_count = direct_stencils[i].tgt.data.count;
1064 struct remote_point_info * curr_point_info =
1065 (curr_count == 1)?
1066 (&(direct_stencils[i].tgt.data.data.single)):
1067 (direct_stencils[i].tgt.data.data.multi);
1068 for (int j = 0; j < curr_count; ++j)
1069 sendcounts[curr_point_info[j].rank]++;
1070 }
1071
1073 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1074
1075 size_t send_buffer_size =
1076 sdispls[comm_size] + sendcounts[comm_size - 1];
1077 size_t recv_buffer_size =
1078 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1079 size_t tgt_count = recv_buffer_size;
1080
1081 struct interp_weight_stencil_direct * stencil_buffer =
1082 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*stencil_buffer));
1083 struct interp_weight_stencil_direct * send_stencil_buffer =
1084 stencil_buffer + recv_buffer_size;
1085 struct interp_weight_stencil_direct * recv_stencil_buffer = stencil_buffer;
1086
1087 // pack direct stencils
1088 for (size_t i = 0; i < count; ++i) {
1089 int curr_count = direct_stencils[i].tgt.data.count;
1090 struct remote_point_info * curr_point_infos =
1091 (curr_count == 1)?
1092 (&(direct_stencils[i].tgt.data.data.single)):
1093 (direct_stencils[i].tgt.data.data.multi);
1094 struct remote_point_info src =
1095 select_src(direct_stencils[i].data.direct.src.data);
1096 for (int j = 0; j < curr_count; ++j) {
1097 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
1098 send_stencil_buffer[pos].src = src;
1099 send_stencil_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
1100 }
1101 }
1102
1103 // create MPI Datatype for exchanging direct stencils
1104 MPI_Datatype stencil_direct_dt = get_direct_stencil_mpi_datatype(comm);
1105
1106 // redistribute stencils based on target owners
1108 send_stencil_buffer, sendcounts, sdispls,
1109 recv_stencil_buffer, recvcounts, rdispls,
1110 sizeof(*stencil_buffer), stencil_direct_dt, comm);
1111
1112 yac_mpi_call(MPI_Type_free(&stencil_direct_dt), comm);
1113
1114 // sort stencils based on src rank first and by target orig pos second
1115 qsort(recv_stencil_buffer, tgt_count, sizeof(*recv_stencil_buffer),
1117
1118 memset(sendcounts, 0, (size_t)comm_size * sizeof(*sendcounts));
1119
1120 for (size_t i = 0; i < tgt_count; ++i)
1121 sendcounts[recv_stencil_buffer[i].src.rank]++;
1122
1124 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1125
1126 send_buffer_size = sdispls[comm_size] + sendcounts[comm_size - 1];
1127 recv_buffer_size = rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1128
1129 uint64_t * orig_pos_buffer =
1130 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*orig_pos_buffer));
1131 uint64_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1132 uint64_t * recv_orig_pos_buffer = orig_pos_buffer;
1133
1134 for (size_t i = 0; i < tgt_count; ++i)
1135 send_orig_pos_buffer[sdispls[recv_stencil_buffer[i].src.rank + 1]++] =
1136 recv_stencil_buffer[i].src.orig_pos;
1137
1138 // inform source processes about their requested points
1140 send_orig_pos_buffer, sendcounts, sdispls,
1141 recv_orig_pos_buffer, recvcounts, rdispls,
1142 sizeof(*send_orig_pos_buffer), MPI_UINT64_T, comm);
1143
1144 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1145
1146 // generate redist
1147 Xt_redist redist =
1149 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts, comm);
1150 free(orig_pos_buffer);
1151 free(stencil_buffer);
1152
1153 yac_interpolation_add_direct(interp, redist);
1154
1155 if (redist != NULL) xt_redist_delete(redist);
1156}
1157
1158static MPI_Datatype get_direct_mf_stencil_mpi_datatype(MPI_Comm comm) {
1159
1161 MPI_Datatype direct_stencil_mf_dt;
1162 int array_of_blocklengths[] = {1, 1, 1};
1163 const MPI_Aint array_of_displacements[] =
1164 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src) -
1165 (MPI_Aint)(intptr_t)(const void *)&dummy,
1166 (MPI_Aint)(intptr_t)(const void *)&(dummy.src_field_idx) -
1167 (MPI_Aint)(intptr_t)(const void *)&dummy,
1168 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
1169 (MPI_Aint)(intptr_t)(const void *)&dummy};
1170 MPI_Datatype array_of_types[] =
1171 {yac_get_remote_point_info_mpi_datatype(comm), MPI_UINT64_T, MPI_UINT64_T};
1173 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
1174 array_of_types, &direct_stencil_mf_dt), comm);
1175 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1176 return yac_create_resized(direct_stencil_mf_dt, sizeof(dummy), comm);
1177}
1178
1179static int compare_stencils_direct_mf(const void * a, const void * b) {
1180
1181 int ret = ((struct interp_weight_stencil_direct_mf *)a)->src.rank -
1182 ((struct interp_weight_stencil_direct_mf *)b)->src.rank;
1183
1184 if (ret) return ret;
1185
1186 ret = (((struct interp_weight_stencil_direct_mf *)a)->src_field_idx >
1187 ((struct interp_weight_stencil_direct_mf *)b)->src_field_idx) -
1190
1191 if (ret) return ret;
1192
1193 return (((struct interp_weight_stencil_direct_mf *)a)->orig_pos >
1195 (((struct interp_weight_stencil_direct_mf *)a)->orig_pos <
1196 ((struct interp_weight_stencil_direct_mf *)b)->orig_pos);
1197}
1198
1200 MPI_Comm comm, uint64_t count,
1201 struct interp_weight_stencil * direct_mf_stencils,
1202 struct yac_interpolation * interp) {
1203
1204 // determine the number of source fields
1205 uint64_t num_src_fields = 0;
1206 for (size_t i = 0; i < count; ++i) {
1207 uint64_t src_field_idx =
1208 (uint64_t)(direct_mf_stencils[i].data.direct_mf.field_idx);
1209 if (src_field_idx >= num_src_fields) num_src_fields = src_field_idx + 1;
1210 }
1212 MPI_Allreduce(
1213 MPI_IN_PLACE, &num_src_fields, 1, MPI_UINT64_T, MPI_MAX, comm), comm);
1214
1215 int comm_size;
1216 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1217
1218 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1220 (size_t)num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1221 size_t * size_t_buffer =
1222 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
1223 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1224 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1225 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1226 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1227
1228 for (size_t i = 0; i < count; ++i) {
1229 int curr_count = direct_mf_stencils[i].tgt.data.count;
1230 struct remote_point_info * curr_point_info =
1231 (curr_count == 1)?
1232 (&(direct_mf_stencils[i].tgt.data.data.single)):
1233 (direct_mf_stencils[i].tgt.data.data.multi);
1234 uint64_t src_field_idx =
1235 (uint64_t)(direct_mf_stencils[i].data.direct_mf.field_idx);
1236 for (int j = 0; j < curr_count; ++j)
1237 sendcounts[
1238 (uint64_t)(curr_point_info[j].rank) * num_src_fields + src_field_idx]++;
1239 }
1240
1242 (size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1243
1244 size_t saccu = 0, raccu = 0;
1245 for (int i = 0; i < comm_size; ++i) {
1246 total_sdispls[i] = saccu;
1247 total_rdispls[i] = raccu;
1248 total_sendcounts[i] = 0;
1249 total_recvcounts[i] = 0;
1250 for (size_t j = 0; j < num_src_fields; ++j) {
1251 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1252 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1253 }
1254 saccu += total_sendcounts[i];
1255 raccu += total_recvcounts[i];
1256 }
1257
1258 size_t send_buffer_size = total_sdispls[comm_size - 1] +
1259 total_sendcounts[comm_size - 1];
1260 size_t recv_buffer_size = total_rdispls[comm_size - 1] +
1261 total_recvcounts[comm_size - 1];
1262 size_t tgt_count = recv_buffer_size;
1263
1264 struct interp_weight_stencil_direct_mf * stencil_buffer =
1265 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*stencil_buffer));
1266 struct interp_weight_stencil_direct_mf * send_stencil_buffer =
1267 stencil_buffer + recv_buffer_size;
1268 struct interp_weight_stencil_direct_mf * recv_stencil_buffer = stencil_buffer;
1269
1270 // pack direct_mf stencils
1271 for (size_t i = 0; i < count; ++i) {
1272 int curr_count = direct_mf_stencils[i].tgt.data.count;
1273 struct remote_point_info * curr_point_infos =
1274 (curr_count == 1)?
1275 (&(direct_mf_stencils[i].tgt.data.data.single)):
1276 (direct_mf_stencils[i].tgt.data.data.multi);
1277 struct remote_point_info src =
1278 select_src(direct_mf_stencils[i].data.direct_mf.src.data);
1279 uint64_t src_field_idx =
1280 (uint64_t)(direct_mf_stencils[i].data.direct_mf.field_idx);
1281 for (int j = 0; j < curr_count; ++j) {
1282 size_t pos =
1283 sdispls[(uint64_t)(curr_point_infos[j].rank) * num_src_fields +
1284 src_field_idx + 1]++;
1285 send_stencil_buffer[pos].src = src;
1286 send_stencil_buffer[pos].src_field_idx = src_field_idx;
1287 send_stencil_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
1288 }
1289 }
1290
1291 // create MPI Datatype for exchanging direct_mf stencils
1292 MPI_Datatype stencil_direct_mf_dt = get_direct_mf_stencil_mpi_datatype(comm);
1293
1294 // redistribute stencils based on target owners
1296 send_stencil_buffer, total_sendcounts, total_sdispls,
1297 recv_stencil_buffer, total_recvcounts, total_rdispls,
1298 sizeof(*stencil_buffer), stencil_direct_mf_dt, comm);
1299
1300 yac_mpi_call(MPI_Type_free(&stencil_direct_mf_dt), comm);
1301
1302 // sort stencils based on src rank first, src_field_idx, and
1303 // by target orig pos second
1304 qsort(recv_stencil_buffer, tgt_count, sizeof(*recv_stencil_buffer),
1306
1307 memset(sendcounts, 0,
1308 (size_t)comm_size * (size_t)num_src_fields * sizeof(*sendcounts));
1309
1310 for (size_t i = 0; i < tgt_count; ++i)
1311 sendcounts[(uint64_t)(recv_stencil_buffer[i].src.rank) * num_src_fields +
1312 recv_stencil_buffer[i].src_field_idx]++;
1313
1315 (size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1316
1317 saccu = 0, raccu = 0;
1318 for (int i = 0; i < comm_size; ++i) {
1319 total_sdispls[i] = saccu;
1320 total_rdispls[i] = raccu;
1321 total_sendcounts[i] = 0;
1322 total_recvcounts[i] = 0;
1323 for (size_t j = 0; j < num_src_fields; ++j) {
1324 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1325 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1326 }
1327 saccu += total_sendcounts[i];
1328 raccu += total_recvcounts[i];
1329 }
1330
1331 send_buffer_size = total_sdispls[comm_size - 1] +
1332 total_sendcounts[comm_size - 1];
1333 recv_buffer_size = total_rdispls[comm_size - 1] +
1334 total_recvcounts[comm_size - 1];
1335
1336 uint64_t * orig_pos_buffer =
1337 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*orig_pos_buffer));
1338 uint64_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1339 uint64_t * recv_orig_pos_buffer = orig_pos_buffer;
1340
1341 for (size_t i = 0; i < tgt_count; ++i)
1342 send_orig_pos_buffer[
1343 sdispls[(uint64_t)(recv_stencil_buffer[i].src.rank) * num_src_fields +
1344 recv_stencil_buffer[i].src_field_idx + 1]++] =
1345 recv_stencil_buffer[i].src.orig_pos;
1346
1347 // inform source processes about their requested points
1349 send_orig_pos_buffer, total_sendcounts, total_sdispls,
1350 recv_orig_pos_buffer, total_recvcounts, total_rdispls,
1351 sizeof(*send_orig_pos_buffer), MPI_UINT64_T, comm);
1352 free(size_t_buffer);
1353
1354 // generate redist
1355 Xt_redist * redists =
1357 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1358 (size_t)num_src_fields, comm);
1359
1360 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1361 free(orig_pos_buffer);
1362 free(stencil_buffer);
1363
1364 yac_interpolation_add_direct_mf(interp, redists, (size_t)num_src_fields);
1365
1366 if (redists != NULL) {
1367 for (size_t i = 0; i < (size_t)num_src_fields; ++i)
1368 xt_redist_delete(redists[i]);
1369 free(redists);
1370 }
1371}
1372
1374 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1375 MPI_Comm comm) {
1376
1377 UNUSED(stencil);
1378 UNUSED(point_info_dt);
1379
1380 int pack_size_value;
1381
1382 yac_mpi_call(MPI_Pack_size(1, MPI_DOUBLE, comm, &pack_size_value), comm);
1383
1384 return pack_size_value;
1385}
1386
1388 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1389 MPI_Comm comm) {
1390
1391 return
1393 &(stencil->data.direct.src), point_info_dt, comm);
1394}
1395
1397 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1398 MPI_Comm comm) {
1399
1400 return
1402 stencil->data.sum.srcs, point_info_dt, comm);
1403}
1404
1406 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1407 MPI_Comm comm) {
1408
1409 int pack_size_weights;
1411 MPI_Pack_size(
1412 (int)(stencil->data.weight_sum.srcs->count),
1413 MPI_DOUBLE, comm, &pack_size_weights), comm);
1414
1415 return
1417 stencil->data.weight_sum.srcs, point_info_dt, comm) +
1418 pack_size_weights;
1419}
1420
1422 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1423 MPI_Comm comm) {
1424
1425 int pack_size_src_field_idx;
1427 MPI_Pack_size(
1428 1, MPI_UINT64_T, comm, &pack_size_src_field_idx), comm);
1429
1430 return
1432 &(stencil->data.direct_mf.src), point_info_dt, comm) +
1433 pack_size_src_field_idx;
1434}
1435
1437 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1438 MPI_Comm comm) {
1439
1440 int pack_size_weights, pack_size_field_indices;
1441 int count = (int)(stencil->data.weight_sum_mf.srcs->count);
1443 MPI_Pack_size(
1444 count, MPI_DOUBLE, comm, &pack_size_weights), comm);
1446 MPI_Pack_size(
1447 count, MPI_UINT64_T, comm, &pack_size_field_indices), comm);
1448
1449 return
1451 stencil->data.weight_sum_mf.srcs, point_info_dt, comm) +
1452 pack_size_weights + pack_size_field_indices;
1453}
1454
1456 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1457 MPI_Comm comm) {
1458
1459 int pack_size_field_indices;
1461 MPI_Pack_size(
1462 (int)(stencil->data.sum_mf.srcs->count),
1463 MPI_UINT64_T, comm, &pack_size_field_indices), comm);
1464
1465 return
1467 stencil->data.sum_mf.srcs, point_info_dt, comm) +
1468 pack_size_field_indices;
1469}
1470
1472 struct interp_weight_stencil * stencil, struct remote_point point) {
1473
1474 struct interp_weight_stencil stencil_copy = *stencil;
1475 stencil_copy.tgt = copy_remote_point(point);
1476
1477 YAC_ASSERT(
1478 (stencil->type == FIXED) ||
1479 (stencil->type == DIRECT) ||
1480 (stencil->type == SUM) ||
1481 (stencil->type == WEIGHT_SUM) ||
1482 (stencil->type == DIRECT_MF) ||
1483 (stencil->type == SUM_MF) ||
1484 (stencil->type == WEIGHT_SUM_MF),
1485 "ERROR(copy_interp_weight_stencil): invalid stencil type")
1486
1487 switch (stencil->type) {
1488 case(FIXED):
1489 // nothing to be done
1490 break;
1491 case(DIRECT):
1492 stencil_copy.data.direct.src =
1493 copy_remote_point(stencil->data.direct.src);
1494 break;
1495 case(SUM):
1496 stencil_copy.data.weight_sum.weights = NULL;
1497 stencil_copy.data.sum.srcs =
1499 stencil->data.sum.srcs->data, stencil->data.sum.srcs->count);
1500 break;
1501 case(WEIGHT_SUM): {
1502 stencil_copy.data.weight_sum.srcs =
1504 stencil->data.weight_sum.srcs->data,
1505 stencil->data.weight_sum.srcs->count);
1506 size_t weight_size =
1507 stencil->data.weight_sum.srcs->count *
1508 sizeof(*(stencil_copy.data.weight_sum.weights));
1509 stencil_copy.data.weight_sum.weights = xmalloc(weight_size);
1510 memcpy(stencil_copy.data.weight_sum.weights,
1511 stencil->data.weight_sum.weights, weight_size);
1512 break;
1513 }
1514 case(DIRECT_MF):
1515 stencil_copy.data.direct_mf.src =
1517 stencil_copy.data.direct_mf.field_idx =
1518 stencil->data.direct_mf.field_idx;
1519 break;
1520 case(SUM_MF): {
1521 stencil_copy.data.sum_mf.srcs =
1523 stencil->data.sum_mf.srcs->data,
1524 stencil->data.sum_mf.srcs->count);
1525 size_t field_indices_size =
1526 stencil->data.sum_mf.srcs->count *
1527 sizeof(*(stencil_copy.data.sum_mf.field_indices));
1528 stencil_copy.data.sum_mf.field_indices = xmalloc(field_indices_size);
1529 memcpy(stencil_copy.data.sum_mf.field_indices,
1530 stencil->data.sum_mf.field_indices, field_indices_size);
1531 break;
1532 }
1533 default:
1534 case(WEIGHT_SUM_MF): {
1535 stencil_copy.data.weight_sum_mf.srcs =
1537 stencil->data.weight_sum_mf.srcs->data,
1538 stencil->data.weight_sum_mf.srcs->count);
1539 size_t weight_size =
1540 stencil->data.weight_sum_mf.srcs->count *
1541 sizeof(*(stencil_copy.data.weight_sum_mf.weights));
1542 stencil_copy.data.weight_sum_mf.weights = xmalloc(weight_size);
1543 memcpy(stencil_copy.data.weight_sum_mf.weights,
1544 stencil->data.weight_sum_mf.weights, weight_size);
1545 size_t field_indices_size =
1546 stencil->data.weight_sum_mf.srcs->count *
1547 sizeof(*(stencil_copy.data.weight_sum_mf.field_indices));
1548 stencil_copy.data.weight_sum_mf.field_indices =
1549 xmalloc(field_indices_size);
1550 memcpy(stencil_copy.data.weight_sum_mf.field_indices,
1551 stencil->data.weight_sum_mf.field_indices, field_indices_size);
1552 break;
1553 }
1554 };
1555 return stencil_copy;
1556}
1557
1559 struct interp_weight_stencil * stencil, struct remote_point point,
1560 double weight) {
1561
1562 if (weight == 1.0) return copy_interp_weight_stencil(stencil, point);
1563
1564 struct remote_point * srcs;
1565 size_t src_count;
1566 double * weights;
1567
1568 YAC_ASSERT(
1569 (stencil->type == FIXED) ||
1570 (stencil->type == DIRECT) ||
1571 (stencil->type == SUM) ||
1572 (stencil->type == WEIGHT_SUM),
1573 "ERROR(wcopy_interp_weight_stencil): invalid stencil type")
1574
1575 switch (stencil->type) {
1576 case (FIXED):
1577 return
1578 (struct interp_weight_stencil) {
1579 .type = FIXED,
1580 .data.fixed.value = stencil->data.fixed.value * weight,
1581 .tgt = copy_remote_point(point)};
1582 case (DIRECT):
1583 src_count = 1;
1584 srcs = &(stencil->data.direct.src);
1585 weights = NULL;
1586 break;
1587 case (SUM):
1588 src_count = stencil->data.sum.srcs->count;
1589 srcs = stencil->data.sum.srcs->data;
1590 weights = NULL;
1591 break;
1592 default:
1593 case (WEIGHT_SUM):
1594 src_count = stencil->data.weight_sum.srcs->count;
1595 srcs = stencil->data.weight_sum.srcs->data;
1596 weights = stencil->data.weight_sum.weights;
1597 break;
1598 };
1599
1600 double * new_weights = xmalloc(src_count * sizeof(*new_weights));
1601 if (weights == NULL)
1602 for (size_t i = 0; i < src_count; ++i) new_weights[i] = weight;
1603 else
1604 for (size_t i = 0; i < src_count; ++i) new_weights[i] = weights[i] * weight;
1605
1606 struct interp_weight_stencil stencil_wcopy;
1607 stencil_wcopy.type = WEIGHT_SUM;
1608 stencil_wcopy.data.weight_sum.srcs = copy_remote_points(srcs, src_count);
1609 stencil_wcopy.data.weight_sum.weights = new_weights;
1610 stencil_wcopy.tgt = copy_remote_point(point);
1611
1612 return stencil_wcopy;
1613}
1614
1615static int compare_w_global_id(const void * a, const void * b) {
1616
1617 int ret = (((struct weighted_global_id *)a)->global_id >
1618 ((struct weighted_global_id *)b)->global_id) -
1619 (((struct weighted_global_id *)a)->global_id <
1620 ((struct weighted_global_id *)b)->global_id);
1621
1622 if (ret) return ret;
1623
1624 return (((struct weighted_global_id *)a)->weight >
1625 ((struct weighted_global_id *)b)->weight) -
1626 (((struct weighted_global_id *)a)->weight <
1627 ((struct weighted_global_id *)b)->weight);
1628}
1629
1630static int compare_remote_point(const void * a, const void * b) {
1631
1632 return ((const struct remote_point*)a)->global_id -
1633 ((const struct remote_point*)b)->global_id;
1634}
1635
1636static void compact_srcs_w(
1637 struct remote_points * srcs, double ** w) {
1638
1639 struct remote_point * data = srcs->data;
1640 size_t count = srcs->count;
1641
1642 struct weighted_global_id * w_global_id =
1643 xmalloc(count * sizeof(*w_global_id));
1644
1645 // extract global ids and weights
1646 for (size_t i = 0; i < count; ++i) {
1647 w_global_id[i].global_id = data[i].global_id;
1648 w_global_id[i].weight = (*w)[i];
1649 }
1650
1651 // sort by global ids and weights
1652 qsort(w_global_id, count, sizeof(*w_global_id), compare_w_global_id);
1653
1654 // sort sources by global ids
1655 qsort(data, count, sizeof(*data), compare_remote_point);
1656
1657 size_t new_count = 0;
1658
1659 // compact sources
1660 for (size_t i = 0; i < count;) {
1661
1662 data[new_count] = data[i];
1663
1664 yac_int curr_global_id = w_global_id[i].global_id;
1665 double curr_weight = w_global_id[i].weight;
1666
1667 ++i;
1668
1669 while((i < count) && (curr_global_id == w_global_id[i].global_id)) {
1670
1671 curr_weight += w_global_id[i].weight;
1672 ++i;
1673 }
1674
1675 (*w)[new_count] = curr_weight;
1676 ++new_count;
1677 }
1678
1679 srcs->data = xrealloc(data, new_count * sizeof(*data));
1680 srcs->count = new_count;
1681 *w = xrealloc(*w, new_count * sizeof(**w));
1682}
1683
1685 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils) {
1686
1687 size_t src_count = 0;
1688 size_t point_info_buffer_size = 0;
1689
1690 for (size_t i = 0; i < num_stencils; ++i) {
1691 size_t curr_src_count;
1692 struct remote_point * srcs;
1693 YAC_ASSERT(
1694 (stencils[i]->type == DIRECT) ||
1695 (stencils[i]->type == SUM) ||
1696 (stencils[i]->type == WEIGHT_SUM),
1697 "ERROR(stencils_merge_wsum): invalid stencil type")
1698 switch (stencils[i]->type) {
1699 case (DIRECT):
1700 curr_src_count = 1;
1701 srcs = &(stencils[i]->data.direct.src);
1702 break;
1703 case (SUM):
1704 curr_src_count = stencils[i]->data.sum.srcs->count;
1705 srcs = stencils[i]->data.sum.srcs->data;
1706 break;
1707 default:
1708 case (WEIGHT_SUM):
1709 curr_src_count = stencils[i]->data.weight_sum.srcs->count;
1710 srcs = stencils[i]->data.weight_sum.srcs->data;
1711 break;
1712 };
1713 src_count += curr_src_count;
1714 for (size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
1715 if (((curr_src_data_count = srcs[j].data.count)) > 1)
1716 point_info_buffer_size += curr_src_data_count;
1717 }
1718
1719 struct remote_points * srcs =
1720 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
1721 sizeof(*srcs));
1722 srcs->data = xmalloc(src_count * sizeof(*(srcs->data)));
1723 srcs->count = src_count;
1724 struct remote_point_info * point_info_buffer = &(srcs->buffer[0]);
1725 double * new_w = xmalloc(src_count * sizeof(*new_w));
1726
1727 for (size_t i = 0, offset = 0; i < num_stencils; ++i) {
1728 size_t curr_src_count;
1729 struct remote_point * curr_srcs;
1730 double * stencil_w;
1731 YAC_ASSERT(
1732 (stencils[i]->type == DIRECT) ||
1733 (stencils[i]->type == SUM) ||
1734 (stencils[i]->type == WEIGHT_SUM),
1735 "ERROR(stencils_merge_wsum): invalid stencil type")
1736 switch (stencils[i]->type) {
1737 case (DIRECT):
1738 curr_src_count = 1;
1739 curr_srcs = &(stencils[i]->data.direct.src);
1740 stencil_w = NULL;
1741 break;
1742 case (SUM):
1743 curr_src_count = stencils[i]->data.sum.srcs->count;
1744 curr_srcs = stencils[i]->data.sum.srcs->data;
1745 stencil_w = NULL;
1746 break;
1747 default:
1748 case (WEIGHT_SUM):
1749 curr_src_count = stencils[i]->data.weight_sum.srcs->count;
1750 curr_srcs = stencils[i]->data.weight_sum.srcs->data;
1751 stencil_w = stencils[i]->data.weight_sum.weights;
1752 break;
1753 };
1755 srcs->data + offset, curr_srcs, curr_src_count, &point_info_buffer);
1756 if (stencil_w == NULL)
1757 for (size_t j = 0; j < curr_src_count; ++j, ++offset)
1758 new_w[offset] = w[i];
1759 else
1760 for (size_t j = 0; j < curr_src_count; ++j, ++offset)
1761 new_w[offset] = w[i] * stencil_w[j];
1762 }
1763
1764 compact_srcs_w(srcs, &new_w);
1765
1766 struct interp_weight_stencil merge_stencil;
1767 merge_stencil.type = WEIGHT_SUM;
1768 merge_stencil.data.weight_sum.srcs = srcs;
1769 merge_stencil.data.weight_sum.weights = new_w;
1770
1771 return merge_stencil;
1772}
1773
1775 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils) {
1776
1777 for (size_t i = 0; i < num_stencils; ++i)
1778 if (w[i] != 1.0)
1779 return stencils_merge_wsum(stencils, w, num_stencils);
1780
1781 size_t src_count = 0;
1782 size_t point_info_buffer_size = 0;
1783
1784 for (size_t i = 0; i < num_stencils; ++i) {
1785 size_t curr_src_count;
1786 struct remote_point * srcs;
1787 YAC_ASSERT(
1788 (stencils[i]->type == DIRECT) ||
1789 (stencils[i]->type == SUM),
1790 "ERROR(stencils_merge_sum): invalid stencil type")
1791 switch (stencils[i]->type) {
1792 case (DIRECT):
1793 curr_src_count = 1;
1794 srcs = &(stencils[i]->data.direct.src);
1795 break;
1796 default:
1797 case (SUM):
1798 curr_src_count = stencils[i]->data.sum.srcs->count;
1799 srcs = stencils[i]->data.sum.srcs->data;
1800 break;
1801 };
1802 src_count += curr_src_count;
1803 for (size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
1804 if (((curr_src_data_count = srcs[j].data.count)) > 1)
1805 point_info_buffer_size += curr_src_data_count;
1806 }
1807
1808 struct remote_points * srcs =
1809 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
1810 sizeof(*srcs));
1811 srcs->data = xmalloc(src_count * sizeof(*(srcs->data)));
1812 srcs->count = src_count;
1813 struct remote_point_info * point_info_buffer = &(srcs->buffer[0]);
1814
1815 for (size_t i = 0, offset = 0; i < num_stencils; ++i) {
1816 size_t curr_src_count;
1817 struct remote_point * curr_srcs;
1818 YAC_ASSERT(
1819 (stencils[i]->type == DIRECT) ||
1820 (stencils[i]->type == SUM),
1821 "ERROR(stencils_merge_sum): invalid stencil type")
1822 switch (stencils[i]->type) {
1823 case (DIRECT):
1824 curr_src_count = 1;
1825 curr_srcs = &(stencils[i]->data.direct.src);
1826 break;
1827 default:
1828 case (SUM):
1829 curr_src_count = stencils[i]->data.sum.srcs->count;
1830 curr_srcs = stencils[i]->data.sum.srcs->data;
1831 break;
1832 };
1834 srcs->data + offset, curr_srcs, curr_src_count, &point_info_buffer);
1835 offset += curr_src_count;
1836 }
1837
1838 qsort(srcs->data, srcs->count, sizeof(*(srcs->data)), compare_remote_point);
1839
1840 struct interp_weight_stencil merge_stencil;
1841 merge_stencil.type = SUM;
1842 merge_stencil.data.sum.srcs = srcs;
1843
1844 return merge_stencil;
1845}
1846
1848 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils,
1849 struct remote_point point) {
1850
1851 if (num_stencils == 1)
1852 return wcopy_interp_weight_stencil(*stencils, point, *w);
1853
1854 int fixed_count = 0;
1855 int direct_count = 0;
1856 int sum_count = 0;
1857 int wsum_count = 0;
1858 double fixed_value = 0.0;
1859
1860 for (size_t i = 0; i < num_stencils; ++i) {
1861 YAC_ASSERT(
1862 (stencils[i]->type != DIRECT_MF) &&
1863 (stencils[i]->type != SUM_MF) &&
1864 (stencils[i]->type != WEIGHT_SUM_MF),
1865 "ERROR(stencils_merge): multiple source fields not yet supported")
1866 YAC_ASSERT(
1867 (stencils[i]->type == FIXED) ||
1868 (stencils[i]->type == DIRECT) ||
1869 (stencils[i]->type == SUM) ||
1870 (stencils[i]->type == WEIGHT_SUM),
1871 "ERROR(stencils_merge): unsupported stencil type")
1872 switch (stencils[i]->type) {
1873 case (FIXED):
1874 fixed_value += stencils[i]->data.fixed.value * w[i];
1875 fixed_count++;
1876 break;
1877 case (DIRECT):
1878 direct_count++;
1879 break;
1880 case (SUM):
1881 sum_count++;
1882 break;
1883 default:
1884 case (WEIGHT_SUM):
1885 wsum_count++;
1886 break;
1887 };
1888 }
1889
1890 struct interp_weight_stencil merge_stencil;
1891
1892 YAC_ASSERT(
1893 (fixed_count > 0) || (wsum_count > 0) ||
1894 (sum_count > 0) || (direct_count > 0),
1895 "ERROR(stencils_merge): unknown error")
1896 if (fixed_count > 0) {
1897
1898 YAC_ASSERT(
1899 (direct_count + sum_count + wsum_count) <= 0,
1900 "ERROR(stencils_merge): invalid stencil combination")
1901
1902 merge_stencil = **stencils;
1903 merge_stencil.data.fixed.value = fixed_value;
1904 } else if (wsum_count > 0)
1905 merge_stencil =
1906 stencils_merge_wsum(stencils, w, num_stencils);
1907 else if ((sum_count > 0) || (direct_count > 0))
1908 merge_stencil =
1909 stencils_merge_sum(stencils, w, num_stencils);
1910
1911 merge_stencil.tgt = copy_remote_point(point);
1912
1913 return merge_stencil;
1914}
1915
1917 struct interp_weight_stencil * stencils, size_t count, size_t * pack_order,
1918 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
1919
1920 int pack_size_type;
1921 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_type), comm);
1922
1923 for (size_t i = 0; i < count; ++i) {
1924
1925 struct interp_weight_stencil * curr_stencil = stencils + pack_order[i];
1926 int (*func_pack_size)(
1927 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1928 MPI_Comm comm);
1929 YAC_ASSERT(
1930 (curr_stencil->type == FIXED) ||
1931 (curr_stencil->type == DIRECT) ||
1932 (curr_stencil->type == SUM) ||
1933 (curr_stencil->type == WEIGHT_SUM) ||
1934 (curr_stencil->type == DIRECT_MF) ||
1935 (curr_stencil->type == SUM_MF) ||
1936 (curr_stencil->type == WEIGHT_SUM_MF),
1937 "ERROR(get_stencils_pack_sizes): invalid stencil type")
1938 switch (curr_stencil->type) {
1939 case(FIXED):
1940 func_pack_size = get_stencil_pack_size_fixed;
1941 break;
1942 case(DIRECT):
1943 func_pack_size = get_stencil_pack_size_direct;
1944 break;
1945 case(SUM):
1946 func_pack_size = get_stencil_pack_size_sum;
1947 break;
1948 default:
1949 case(WEIGHT_SUM):
1950 func_pack_size = get_stencil_pack_size_wsum;
1951 break;
1952 case(DIRECT_MF):
1953 func_pack_size = get_stencil_pack_size_direct_mf;
1954 break;
1955 case(SUM_MF):
1956 func_pack_size = get_stencil_pack_size_sum_mf;
1957 break;
1958 case(WEIGHT_SUM_MF):
1959 func_pack_size = get_stencil_pack_size_wsum_mf;
1960 break;
1961 };
1962 pack_sizes[i] = pack_size_type +
1964 &(curr_stencil->tgt), point_info_dt, comm) +
1965 func_pack_size(curr_stencil, point_info_dt, comm);
1966 }
1967}
1968
1970 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
1971 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1972
1973 UNUSED(point_info_dt);
1974
1975 // fixed value
1977 MPI_Pack(&(stencil->data.fixed.value), 1, MPI_DOUBLE, buffer, buffer_size,
1978 position, comm), comm);
1979}
1980
1982 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
1983 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1984
1985 // src
1987 &(stencil->data.direct.src), buffer, buffer_size, position, point_info_dt,
1988 comm);
1989}
1990
1992 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
1993 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1994
1995 // srcs
1997 stencil->data.sum.srcs, buffer, buffer_size, position, point_info_dt, comm);
1998}
1999
2001 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2002 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2003
2004 // srcs
2006 stencil->data.weight_sum.srcs, buffer, buffer_size, position,
2007 point_info_dt, comm);
2008 // weights
2010 MPI_Pack(stencil->data.weight_sum.weights,
2011 (int)(stencil->data.weight_sum.srcs->count), MPI_DOUBLE,
2012 buffer, buffer_size, position, comm), comm);
2013}
2014
2016 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2017 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2018
2019 // src
2021 &(stencil->data.direct_mf.src), buffer, buffer_size, position,
2022 point_info_dt, comm);
2023
2024 // field_idx
2025 uint64_t temp_field_idx = (uint64_t)(stencil->data.direct_mf.field_idx);
2027 MPI_Pack(&temp_field_idx, 1, MPI_UINT64_T,
2028 buffer, buffer_size, position, comm), comm);
2029}
2030
2032 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2033 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2034
2035 // srcs
2037 stencil->data.sum_mf.srcs, buffer, buffer_size, position,
2038 point_info_dt, comm);
2039
2040 size_t count = stencil->data.sum_mf.srcs->count;
2041 // field_indices
2042 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2043 for (size_t i = 0; i < count; ++i)
2044 temp_field_indices[i] =
2045 (uint64_t)(stencil->data.sum_mf.field_indices[i]);
2047 MPI_Pack(temp_field_indices, (int)count, MPI_UINT64_T,
2048 buffer, buffer_size, position, comm), comm);
2049 free(temp_field_indices);
2050}
2051
2053 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2054 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2055
2056 // srcs
2058 stencil->data.weight_sum_mf.srcs, buffer, buffer_size, position,
2059 point_info_dt, comm);
2060
2061 size_t count = stencil->data.weight_sum_mf.srcs->count;
2062 // weights
2064 MPI_Pack(stencil->data.weight_sum_mf.weights, (int)count, MPI_DOUBLE,
2065 buffer, buffer_size, position, comm), comm);
2066 // field_indices
2067 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2068 for (size_t i = 0; i < count; ++i)
2069 temp_field_indices[i] =
2070 (uint64_t)(stencil->data.weight_sum_mf.field_indices[i]);
2072 MPI_Pack(temp_field_indices, (int)count, MPI_UINT64_T,
2073 buffer, buffer_size, position, comm), comm);
2074 free(temp_field_indices);
2075}
2076
2077static void pack_stencils(
2078 struct interp_weight_stencil * stencils, size_t count, size_t * pack_order,
2079 void ** pack_data, int * pack_sizes, MPI_Datatype point_info_dt,
2080 MPI_Comm comm) {
2081
2083 stencils, count, pack_order, pack_sizes, point_info_dt, comm);
2084
2085 size_t pack_buffer_size = 0;
2086 for (size_t i = 0; i < count; ++i)
2087 pack_buffer_size += (size_t)(pack_sizes[i]);
2088
2089 void * pack_data_ = xmalloc(pack_buffer_size);
2090 size_t total_pack_size = 0;
2091
2092 for (size_t i = 0; i < count; ++i) {
2093
2094 struct interp_weight_stencil * curr_stencil = stencils + pack_order[i];
2095 void (*func_pack)(
2096 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2097 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2098
2099 YAC_ASSERT(
2100 (curr_stencil->type == FIXED) ||
2101 (curr_stencil->type == DIRECT) ||
2102 (curr_stencil->type == SUM) ||
2103 (curr_stencil->type == WEIGHT_SUM) ||
2104 (curr_stencil->type == DIRECT_MF) ||
2105 (curr_stencil->type == SUM_MF) ||
2106 (curr_stencil->type == WEIGHT_SUM_MF),
2107 "ERROR(pack_stencils): invalid stencil type")
2108 switch (curr_stencil->type) {
2109 default:
2110 case(FIXED):
2111 func_pack = pack_stencil_fixed;
2112 break;
2113 case(DIRECT):
2114 func_pack = pack_stencil_direct;
2115 break;
2116 case(SUM):
2117 func_pack = pack_stencil_sum;
2118 break;
2119 case(WEIGHT_SUM):
2120 func_pack = pack_stencil_wsum;
2121 break;
2122 case(DIRECT_MF):
2123 func_pack = pack_stencil_direct_mf;
2124 break;
2125 case(SUM_MF):
2126 func_pack = pack_stencil_sum_mf;
2127 break;
2128 case(WEIGHT_SUM_MF):
2129 func_pack = pack_stencil_wsum_mf;
2130 break;
2131 };
2132
2133 int position = 0;
2134 int type = (int)curr_stencil->type;
2135 void * buffer = (void*)((char*)pack_data_ + total_pack_size);
2136 int buffer_size = pack_sizes[i];
2137
2138 // type
2140 MPI_Pack(&type, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2141 // tgt
2142 yac_remote_point_pack(&(curr_stencil->tgt), buffer, buffer_size,
2143 &position, point_info_dt, comm);
2144 // stencil data
2145 func_pack(curr_stencil, buffer, buffer_size, &position, point_info_dt, comm);
2146
2148 pack_sizes[i] >= position,
2149 "ERROR(pack_stencils): "
2150 "actual pack size is bigger then computed one (%d > %d)",
2151 position, pack_sizes[i]);
2152
2153 pack_sizes[i] = position;
2154 total_pack_size += (size_t)position;
2155 }
2156
2157 *pack_data = xrealloc(pack_data_, total_pack_size);
2158}
2159
2161 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2162 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2163
2164 UNUSED(point_info_dt);
2165
2166 // fixed value
2168 MPI_Unpack(buffer, buffer_size, position, &(stencil->data.fixed.value), 1,
2169 MPI_DOUBLE, comm), comm);
2170}
2171
2173 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2174 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2175
2176 // src
2178 buffer, buffer_size, position, &stencil->data.direct.src,
2179 point_info_dt, comm);
2180}
2181
2183 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2184 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2185
2186 // srcs
2187 stencil->data.weight_sum.weights = NULL;
2189 buffer, buffer_size, position, &(stencil->data.sum.srcs), point_info_dt,
2190 comm);
2191}
2192
2194 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2195 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2196
2197 // srcs
2199 buffer, buffer_size, position, &(stencil->data.weight_sum.srcs),
2200 point_info_dt, comm);
2201
2202 size_t count = stencil->data.weight_sum.srcs->count;
2203
2204 stencil->data.weight_sum.weights =
2205 xmalloc(count * sizeof(*(stencil->data.weight_sum.weights)));
2206
2207 // weights
2209 MPI_Unpack(buffer, buffer_size, position, stencil->data.weight_sum.weights,
2210 (int)count, MPI_DOUBLE, comm), comm);
2211}
2212
2214 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2215 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2216
2217 // src
2219 buffer, buffer_size, position, &stencil->data.direct_mf.src,
2220 point_info_dt, comm);
2221
2222 // field_idx
2223 uint64_t temp_field_idx;
2225 MPI_Unpack(
2226 buffer, buffer_size, position, &temp_field_idx,
2227 1, MPI_UINT64_T, comm), comm);
2228 stencil->data.direct_mf.field_idx = (size_t)temp_field_idx;
2229}
2230
2232 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2233 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2234
2235 // srcs
2237 buffer, buffer_size, position, &(stencil->data.sum_mf.srcs),
2238 point_info_dt, comm);
2239
2240 size_t count = stencil->data.sum_mf.srcs->count;
2241
2242 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2243 stencil->data.sum_mf.field_indices =
2244 xmalloc(count * sizeof(*(stencil->data.sum_mf.field_indices)));
2245
2246 // field_indices
2248 MPI_Unpack(
2249 buffer, buffer_size, position, temp_field_indices,
2250 (int)count, MPI_UINT64_T, comm), comm);
2251 for (size_t i = 0; i < count; ++i)
2252 stencil->data.sum_mf.field_indices[i] = (size_t)(temp_field_indices[i]);
2253 free(temp_field_indices);
2254}
2255
2257 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2258 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2259
2260 // srcs
2262 buffer, buffer_size, position, &(stencil->data.weight_sum_mf.srcs),
2263 point_info_dt, comm);
2264
2265 size_t count = stencil->data.weight_sum_mf.srcs->count;
2266
2267 stencil->data.weight_sum_mf.weights =
2268 xmalloc(count * sizeof(*(stencil->data.weight_sum_mf.weights)));
2269
2270 // weights
2272 MPI_Unpack(
2273 buffer, buffer_size, position, stencil->data.weight_sum_mf.weights,
2274 (int)count, MPI_DOUBLE, comm), comm);
2275
2276 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2278 xmalloc(count * sizeof(*(stencil->data.weight_sum_mf.field_indices)));
2279
2280 // field_indices
2282 MPI_Unpack(
2283 buffer, buffer_size, position, temp_field_indices,
2284 (int)count, MPI_UINT64_T, comm), comm);
2285 for (size_t i = 0; i < count; ++i)
2286 stencil->data.weight_sum_mf.field_indices[i] =
2287 (size_t)(temp_field_indices[i]);
2288 free(temp_field_indices);
2289}
2290
2292 struct interp_weight_stencil * stencils, size_t count,
2293 void * packed_data, size_t packed_data_size,
2294 MPI_Datatype point_info_dt, MPI_Comm comm) {
2295
2296 for (size_t i = 0, offset = 0; i < count; ++i) {
2297
2298 YAC_ASSERT(
2299 packed_data_size >= offset,
2300 "ERROR(unpack_stencils): invalid offset");
2301
2302 int position = 0;
2303 void * curr_buffer = (void*)((unsigned char*)packed_data + offset);
2304 int buffer_size = (int)(MIN(packed_data_size - offset, INT_MAX));
2305 struct interp_weight_stencil * curr_stencil = stencils + i;
2306
2307 int type;
2309 MPI_Unpack(
2310 curr_buffer, buffer_size, &position, &type, 1, MPI_INT, comm), comm);
2311
2312 void (*func_unpack)(
2313 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2314 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2315
2316 YAC_ASSERT(
2317 (type == FIXED) ||
2318 (type == DIRECT) || (type == SUM) || (type == WEIGHT_SUM) ||
2319 (type == DIRECT_MF) || (type == SUM_MF) || (type == WEIGHT_SUM_MF),
2320 "ERROR(unpack_stencils): invalid stencil type")
2321 switch (type) {
2322 case(FIXED):
2323 func_unpack = unpack_stencil_fixed;
2324 break;
2325 case(DIRECT):
2326 func_unpack = unpack_stencil_direct;
2327 break;
2328 case(SUM):
2329 func_unpack = unpack_stencil_sum;
2330 break;
2331 default:
2332 case(WEIGHT_SUM):
2333 func_unpack = unpack_stencil_wsum;
2334 break;
2335 case(DIRECT_MF):
2336 func_unpack = unpack_stencil_direct_mf;
2337 break;
2338 case(SUM_MF):
2339 func_unpack = unpack_stencil_sum_mf;
2340 break;
2341 case(WEIGHT_SUM_MF):
2342 func_unpack = unpack_stencil_wsum_mf;
2343 break;
2344 };
2345
2346 curr_stencil->type =
2349 curr_buffer, buffer_size, &position, &(curr_stencil->tgt),
2350 point_info_dt, comm);
2351 func_unpack(
2352 curr_stencil, curr_buffer, buffer_size, &position, point_info_dt, comm);
2353 offset += (size_t)position;
2354 }
2355}
2356
2358 MPI_Comm comm, struct interp_weight_stencil * stencils,
2359 size_t * stencil_indices,
2360 size_t * stencil_sendcounts, size_t * stencil_recvcounts) {
2361
2362 int comm_rank, comm_size;
2363 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2364 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2365
2366 YAC_ASSERT(
2367 stencil_sendcounts[comm_rank] == stencil_recvcounts[comm_rank],
2368 "ERROR(exchange_stencils): error in arguments")
2369
2370 size_t send_count = 0, recv_count = 0;
2371 size_t local_send_offset = 0;
2372 size_t local_recv_offset = 0;
2373 size_t local_count = (size_t)(stencil_sendcounts[comm_rank]);
2374 for (int i = 0; i < comm_rank; ++i) {
2375 send_count += stencil_sendcounts[i];
2376 recv_count += stencil_recvcounts[i];
2377 local_send_offset += stencil_sendcounts[i];
2378 local_recv_offset += stencil_recvcounts[i];
2379 }
2380 local_send_offset = send_count;
2381 local_recv_offset = recv_count;
2382 stencil_sendcounts[comm_rank] = 0;
2383 stencil_recvcounts[comm_rank] = 0;
2384 for (int i = comm_rank + 1; i < comm_size; ++i) {
2385 send_count += stencil_sendcounts[i];
2386 recv_count += stencil_recvcounts[i];
2387 }
2388
2389 struct interp_weight_stencil * new_stencils =
2390 xmalloc((recv_count + local_count) * sizeof(*new_stencils));
2391 size_t * local_stencil_indices =
2392 xmalloc(local_count * sizeof(*local_stencil_indices));
2393 memcpy(local_stencil_indices, stencil_indices + local_send_offset,
2394 local_count * sizeof(*local_stencil_indices));
2395
2396 // remove the local stencil indices
2397 memmove(
2398 stencil_indices + local_send_offset,
2399 stencil_indices + local_send_offset + local_count,
2400 (send_count - local_send_offset) * sizeof(*stencil_indices));
2401
2402 // pack the stencils that need to be send to other processes
2403 void * send_buffer;
2404 int * pack_sizes = xmalloc(send_count * sizeof(*pack_sizes));
2405 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
2407 stencils, send_count, stencil_indices, &send_buffer, pack_sizes,
2408 point_info_dt, comm);
2409
2410 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2412 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2413
2414 send_count = 0;
2415 for (int rank = 0; rank < comm_size; ++rank) {
2416 size_t sendcount = 0;
2417 int curr_num_stencils = stencil_sendcounts[rank];
2418 for (int j = 0; j < curr_num_stencils; ++j, ++send_count)
2419 sendcount += (size_t)(pack_sizes[send_count]);
2420 sendcounts[rank] = sendcount;
2421 }
2422 free(pack_sizes);
2423
2425 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2426
2427 size_t recv_size = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
2428
2429 void * recv_buffer = xmalloc(recv_size);
2430
2431 // exchange stencils
2432 yac_alltoallv_packed_p2p(
2433 send_buffer, sendcounts, sdispls+1,
2434 recv_buffer, recvcounts, rdispls, comm);
2435 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2436 free(send_buffer);
2437
2438 // unpack stencils
2440 new_stencils, recv_count,
2441 recv_buffer, recv_size, point_info_dt, comm);
2442 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
2443 free(recv_buffer);
2444
2445 memmove(new_stencils + local_recv_offset + local_count,
2446 new_stencils + local_recv_offset ,
2447 (recv_count - local_recv_offset ) * sizeof(*new_stencils));
2448 for (size_t i = 0; i < local_count; ++i, ++local_recv_offset )
2449 new_stencils[local_recv_offset] =
2451 stencils + local_stencil_indices[i],
2452 stencils[local_stencil_indices[i]].tgt);
2453 free(local_stencil_indices);
2454
2455 return new_stencils;
2456}
2457
2459 struct yac_interp_weights * weights, size_t * stencil_indices,
2460 int * stencil_ranks, size_t count) {
2461
2462 MPI_Comm comm = weights->comm;
2463 int comm_size;
2464 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2465
2466 YAC_ASSERT(
2467 count <= INT_MAX,
2468 "ERROR(yac_interp_weights_get_stencils): count exceeds INT_MAX");
2469
2470 size_t * reorder_idx = xmalloc(count * sizeof(*reorder_idx));
2471 for (size_t i = 0; i < count; ++i) reorder_idx[i] = i;
2472
2474 stencil_ranks, count, stencil_indices, reorder_idx);
2475
2476 // exchange requested stencils indices
2477 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2479 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2480 for (size_t i = 0; i < count; ++i) sendcounts[stencil_ranks[i]]++;
2482 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2483 size_t recv_count =
2484 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
2485 uint64_t * uint64_t_buffer =
2486 xmalloc((count + recv_count) * sizeof(*uint64_t_buffer));
2487 uint64_t * send_stencil_indices = uint64_t_buffer;
2488 uint64_t * recv_stencil_indices = uint64_t_buffer + count;
2489 for (size_t i = 0; i < count; ++i)
2490 send_stencil_indices[i] = (uint64_t)(stencil_indices[i]);
2491 yac_alltoallv_uint64_p2p(
2492 send_stencil_indices, sendcounts, sdispls+1,
2493 recv_stencil_indices, recvcounts, rdispls, comm);
2494
2495 // exchange stencils
2496 size_t * exchange_stencil_indices =
2497 xmalloc(recv_count * sizeof(*exchange_stencil_indices));
2498 for (size_t i = 0; i < recv_count; ++i) {
2499 YAC_ASSERT(
2500 (size_t)(recv_stencil_indices[i]) < weights->stencils_size,
2501 "ERROR(yac_interp_weights_get_stencils): invalid stencil index");
2502 exchange_stencil_indices[i] = (size_t)(recv_stencil_indices[i]);
2503 }
2504 free(uint64_t_buffer);
2505 struct interp_weight_stencil * stencils =
2506 exchange_stencils(comm, weights->stencils, exchange_stencil_indices,
2507 recvcounts, sendcounts);
2508 free(exchange_stencil_indices);
2509 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2510
2511 // sort received stencils into original order
2512 struct interp_weight_stencil * sorted_stencils =
2513 xmalloc(count * sizeof(*sorted_stencils));
2514 for (size_t i = 0; i < count; ++i)
2515 sorted_stencils[reorder_idx[i]] = stencils[i];
2516 free(stencils);
2517 free(reorder_idx);
2518
2519 return sorted_stencils;
2520}
2521
2523 struct interp_weight_stencil * stencils, size_t count);
2524
2526 struct yac_interp_weights * weights, struct remote_points * tgts,
2527 size_t * num_stencils_per_tgt, size_t * stencil_indices,
2528 int * stencil_ranks, double * w) {
2529
2530 size_t count = (tgts != NULL)?tgts->count:0;
2531 MPI_Comm comm = weights->comm;
2532 int comm_rank, comm_size;
2533 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2534 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2535
2536 // count the number of missing stencils
2537 size_t total_num_stencils = 0;
2538 size_t max_num_stencils_per_tgt = 0;
2539 for (size_t i = 0; i < count; ++i) {
2540 size_t curr_num_stencils_per_tgt = num_stencils_per_tgt[i];
2541 if (curr_num_stencils_per_tgt > max_num_stencils_per_tgt)
2542 max_num_stencils_per_tgt = curr_num_stencils_per_tgt;
2543 total_num_stencils += num_stencils_per_tgt[i];
2544 }
2545 size_t num_missing_stencils = 0;
2546 for (size_t i = 0; i < total_num_stencils; ++i)
2547 if (stencil_ranks[i] != comm_rank) num_missing_stencils++;
2548
2549 // get missing stencils
2550 size_t * missing_stencil_indices =
2551 xmalloc(num_missing_stencils * sizeof(*missing_stencil_indices));
2552 int * missing_stencil_ranks =
2553 xmalloc(num_missing_stencils * sizeof(*missing_stencil_ranks));
2554 for (size_t i = 0, j = 0; i < total_num_stencils; ++i) {
2555 if (stencil_ranks[i] != comm_rank) {
2556 missing_stencil_indices[j] = stencil_indices[i];
2557 missing_stencil_ranks[j] = stencil_ranks[i];
2558 ++j;
2559 }
2560 }
2561 struct interp_weight_stencil * missing_stencils =
2563 weights, missing_stencil_indices, missing_stencil_ranks,
2564 num_missing_stencils);
2565 free(missing_stencil_ranks);
2566 free(missing_stencil_indices);
2567
2568 // merge stencils to generate new ones
2569 {
2570 struct interp_weight_stencil * stencils = weights->stencils;
2571 size_t stencils_array_size = weights->stencils_array_size;
2572 size_t stencils_size = weights->stencils_size;
2573
2574 struct interp_weight_stencil ** stencils_buffer =
2575 xmalloc(max_num_stencils_per_tgt * sizeof(*stencils_buffer));
2576
2578 stencils, stencils_array_size, stencils_size + count);
2579
2580 for (size_t i = 0, j = 0; i < count;
2581 ++i, ++stencils_size) {
2582
2583 size_t curr_num_stencils = num_stencils_per_tgt[i];
2584 for (size_t k = 0; k < curr_num_stencils; ++k)
2585 stencils_buffer[k] =
2586 (stencil_ranks[k] == comm_rank)?
2587 (stencils + stencil_indices[k]):(missing_stencils + (j++));
2588
2589 stencils[stencils_size] =
2590 stencils_merge(stencils_buffer, w, curr_num_stencils, tgts->data[i]);
2591 w += curr_num_stencils;
2592 stencil_indices += curr_num_stencils;
2593 stencil_ranks += curr_num_stencils;
2594 }
2595
2596 weights->stencils = stencils;
2597 weights->stencils_array_size = stencils_array_size;
2598 weights->stencils_size = stencils_size;
2599
2600 free(stencils_buffer);
2601 }
2602
2603 yac_interp_weight_stencils_delete(missing_stencils, num_missing_stencils);
2604}
2605
2606static int compute_owner(int * ranks, size_t count) {
2607
2608 YAC_ASSERT(count != 0, "ERROR(compute_owner): count == 0")
2609
2610 yac_quicksort_index(ranks, count, NULL);
2611
2612 int best_rank = -1;
2613 size_t best_rank_count = 0;
2614
2615 size_t curr_rank_count = 1;
2616 int prev_rank = ranks[0];
2617
2618 for (size_t i = 1; i < count; ++i, ++curr_rank_count) {
2619 int curr_rank = ranks[i];
2620 if (prev_rank != curr_rank) {
2621 if (curr_rank_count > best_rank_count) {
2622 best_rank = prev_rank;
2623 best_rank_count = curr_rank_count;
2624 }
2625 prev_rank = curr_rank;
2626 curr_rank_count = 0;
2627 }
2628 }
2629
2630 return (curr_rank_count > best_rank_count)?prev_rank:best_rank;
2631}
2632
2634 struct interp_weight_stencil * stencils, size_t count,
2635 enum yac_interp_weight_stencil_type stencil_type) {
2636
2637 // compute total number of links
2638 size_t total_num_links = 0;
2639
2640 for (size_t i = 0; i < count; ++i) {
2641 YAC_ASSERT(
2642 stencils[i].type == stencil_type,
2643 "ERROR(generate_w_sum_mf_stencils): wrong stencil type")
2644 // due to the data layout this works for "sum" and "weight_sum"
2645 total_num_links += stencils[i].data.weight_sum.srcs->count;
2646 }
2647
2649 xmalloc(sizeof(*temp) + total_num_links * sizeof(temp->buffer[0]));
2650 struct interp_weight_stencils_wsum_mf * wsum_stencils =
2651 (struct interp_weight_stencils_wsum_mf *)temp;
2652 wsum_stencils->data = xmalloc(count * sizeof(*(wsum_stencils->data)));
2653 wsum_stencils->count = count;
2654
2655 // extract data from stencils
2656 for (size_t i = 0, k = 0; i < count; ++i) {
2657 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
2658 wsum_stencils->data + i;
2659 struct interp_weight_stencil_wsum_mf_weight * curr_links =
2660 &(temp->buffer[k]);
2661 size_t curr_stencil_size = stencils[i].data.weight_sum.srcs->count;
2662 struct remote_point * curr_srcs = stencils[i].data.weight_sum.srcs->data;
2663 curr_wsum_stencil->tgt = copy_remote_point(stencils[i].tgt);
2664 curr_wsum_stencil->count = curr_stencil_size;
2665 curr_wsum_stencil->data = curr_links;
2666 for (size_t j = 0; j < curr_stencil_size; ++j) {
2667 int curr_count = curr_srcs[j].data.count;
2668 YAC_ASSERT(
2669 curr_count >= 1,
2670 "ERROR(generate_w_sum_mf_stencils): global src id no found")
2671 curr_links[j].src =
2672 (curr_count == 1)?
2673 (curr_srcs[j].data.data.single):(curr_srcs[j].data.data.multi[0]);
2674 YAC_ASSERT(
2675 (stencil_type == SUM) || (stencil_type == WEIGHT_SUM) ||
2676 (stencil_type == SUM_MF) || (stencil_type == WEIGHT_SUM_MF),
2677 "ERROR(generate_w_sum_mf_stencils): unsupported stencil type")
2678 switch(stencil_type) {
2679 default:
2680 case(SUM):
2681 curr_links[j].weight = 1.0;
2682 curr_links[j].src_field_idx = 0;
2683 break;
2684 case(WEIGHT_SUM):
2685 curr_links[j].weight = stencils[i].data.weight_sum.weights[j];
2686 curr_links[j].src_field_idx = 0;
2687 break;
2688 case(SUM_MF):
2689 curr_links[j].weight = 1.0;
2690 curr_links[j].src_field_idx =
2691 stencils[i].data.sum_mf.field_indices[j];
2692 break;
2693 case(WEIGHT_SUM_MF):
2694 curr_links[j].weight = stencils[i].data.weight_sum_mf.weights[j];
2695 curr_links[j].src_field_idx =
2696 stencils[i].data.weight_sum_mf.field_indices[j];
2697 break;
2698 };
2699 }
2700 k += curr_stencil_size;
2701 }
2702
2703 return wsum_stencils;
2704}
2705
2706static MPI_Datatype get_wsum_mf_weight_mpi_datatype(MPI_Comm comm) {
2707
2709 MPI_Datatype dt;
2710 int array_of_blocklengths[] = {1, 1, 1, 1};
2711 const MPI_Aint array_of_displacements[] =
2712 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src.rank) -
2713 (MPI_Aint)(intptr_t)(const void *)&dummy,
2714 (MPI_Aint)(intptr_t)(const void *)&(dummy.src.orig_pos) -
2715 (MPI_Aint)(intptr_t)(const void *)&dummy,
2716 (MPI_Aint)(intptr_t)(const void *)&(dummy.src_field_idx) -
2717 (MPI_Aint)(intptr_t)(const void *)&dummy,
2718 (MPI_Aint)(intptr_t)(const void *)&(dummy.weight) -
2719 (MPI_Aint)(intptr_t)(const void *)&dummy};
2720 const MPI_Datatype array_of_types[] =
2721 {MPI_INT, MPI_UINT64_T, MPI_UINT64_T, MPI_DOUBLE};
2723 MPI_Type_create_struct(4, array_of_blocklengths, array_of_displacements,
2724 array_of_types, &dt), comm);
2725 return yac_create_resized(dt, sizeof(dummy), comm);
2726}
2727
2729 struct interp_weight_stencil_wsum_mf * stencil,
2730 MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
2731
2732 int pack_size_count,
2733 pack_size_weights,
2734 pack_size_tgt;
2735
2736 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
2738 MPI_Pack_size(
2739 (int)(stencil->count), wsum_mf_weight_dt, comm, &pack_size_weights), comm);
2740 pack_size_tgt =
2741 yac_remote_point_get_pack_size(&(stencil->tgt), point_info_dt, comm);
2742
2743 return pack_size_count + pack_size_weights + pack_size_tgt;
2744}
2745
2747 struct interp_weight_stencil_wsum_mf * wsum_stencils, size_t count,
2748 size_t * pack_order, void ** pack_data, int * pack_sizes,
2749 int * weight_counts, MPI_Comm comm) {
2750
2751 MPI_Datatype wsum_mf_weight_dt = get_wsum_mf_weight_mpi_datatype(comm);
2752 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
2753
2754 // get the pack sizes and the upper bound for the pack buffer size
2755 size_t temp_total_pack_size = 0;
2756 for (size_t i = 0; i < count; ++i) {
2757 temp_total_pack_size +=
2758 (pack_sizes[i] =
2760 wsum_stencils + pack_order[i],
2761 wsum_mf_weight_dt, point_info_dt, comm));
2762 }
2763
2764 void * pack_data_ = xmalloc(temp_total_pack_size);
2765 size_t total_pack_size = 0;
2766
2767 // pack the stencils
2768 for (size_t i = 0; i < count; ++i) {
2769
2770 size_t idx = pack_order[i];
2771
2772 int position = 0;
2773 void * buffer = (void*)((unsigned char*)pack_data_ + total_pack_size);
2774 int buffer_size = pack_sizes[i];
2775 int curr_count = wsum_stencils[idx].count;
2776
2777 // tgt
2779 &(wsum_stencils[idx].tgt), buffer, buffer_size, &position,
2780 point_info_dt, comm);
2781 // weight count
2783 MPI_Pack(&curr_count, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2784 // weights
2786 MPI_Pack(wsum_stencils[idx].data, curr_count, wsum_mf_weight_dt,
2787 buffer, buffer_size, &position, comm), comm);
2788
2789 pack_sizes[i] = position;
2790 weight_counts[i] = curr_count;
2791 total_pack_size += (size_t)position;
2792 }
2793
2794 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
2795 yac_mpi_call(MPI_Type_free(&wsum_mf_weight_dt), comm);
2796
2797 *pack_data = xrealloc(pack_data_, total_pack_size);
2798}
2799
2801 struct interp_weight_stencil_wsum_mf * wsum_stencils,
2802 struct interp_weight_stencil_wsum_mf_weight * weight_buffer, size_t count,
2803 void * packed_data, size_t packed_data_size, MPI_Comm comm) {
2804
2805 MPI_Datatype wsum_mf_weight_dt = get_wsum_mf_weight_mpi_datatype(comm);
2806 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
2807
2808 size_t weight_offset = 0;
2809 for (size_t i = 0, offset = 0; i < count; ++i) {
2810
2811 int position = 0;
2812 void * curr_buffer = (void*)((char*)packed_data + offset);
2813 int buffer_size = (int)(packed_data_size - offset);
2814 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
2815 wsum_stencils + i;
2816
2817 struct remote_point tgt;
2818 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
2819 weight_buffer + weight_offset;
2820 int weight_count;
2822 curr_buffer, buffer_size, &position, &tgt, point_info_dt, comm);
2824 MPI_Unpack(curr_buffer, buffer_size, &position,
2825 &weight_count, 1, MPI_INT, comm),
2826 comm);
2828 MPI_Unpack(curr_buffer, buffer_size, &position,
2829 curr_weights, weight_count, wsum_mf_weight_dt, comm), comm);
2830
2831 curr_wsum_stencil->tgt = tgt;
2832 curr_wsum_stencil->data = curr_weights;
2833 curr_wsum_stencil->count = (size_t)weight_count;
2834
2835 weight_offset += (size_t)weight_count;
2836 offset += (size_t)position;
2837 }
2838
2839 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
2840 yac_mpi_call(MPI_Type_free(&wsum_mf_weight_dt), comm);
2841
2842 return weight_offset;
2843}
2844
2846 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data,
2847 int * stencil_owner, size_t * reorder_idx, size_t num_owners) {
2848
2849 struct interp_weight_stencil_wsum_mf * wsum_stencils =
2850 wsum_stencils_data->data;
2851
2852 int comm_rank, comm_size;
2853 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2854 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2855
2856 size_t local_weight_count = 0;
2857 size_t local_count = 0;
2858 for (size_t i = 0; i < num_owners; ++i) {
2859 if (stencil_owner[i] == comm_rank) {
2860 local_weight_count += wsum_stencils[reorder_idx[i]].count;
2861 stencil_owner[i] = INT_MAX;
2862 ++local_count;
2863 }
2864 }
2865 yac_quicksort_index_int_size_t(stencil_owner, num_owners, reorder_idx);
2866
2867 size_t send_count = num_owners - local_count;
2868
2869 // pack the stencils that need to be send to other processes
2870 void * send_buffer;
2871 int * pack_sizes = xmalloc(2 * send_count * sizeof(*pack_sizes));
2872 int * weight_counts = pack_sizes + send_count;
2873 pack_stencils_wsum_mf(wsum_stencils, send_count, reorder_idx, &send_buffer,
2874 pack_sizes, weight_counts, comm);
2875
2876 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2878 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2879
2880 for (size_t i = 0; i < send_count; ++i) {
2881 int curr_rank = stencil_owner[i];
2882 sendcounts[3 * curr_rank + 0]++;
2883 sendcounts[3 * curr_rank + 1] += (size_t)(pack_sizes[i]);
2884 sendcounts[3 * curr_rank + 2] += (size_t)(weight_counts[i]);
2885 }
2886 free(pack_sizes);
2887
2888 // exchange the number of stencils to be exchanged and the total pack sizes
2889 yac_mpi_call(MPI_Alltoall(sendcounts, 3, YAC_MPI_SIZE_T,
2890 recvcounts, 3, YAC_MPI_SIZE_T, comm), comm);
2891
2892 size_t recv_count = 0;
2893 size_t recv_size = 0;
2894 size_t recv_weight_count = 0;
2895 size_t saccu = 0, raccu = 0;
2896 for (int i = 0; i < comm_size; ++i) {
2897 sdispls[i] = saccu;
2898 rdispls[i] = raccu;
2899 recv_count += recvcounts[3 * i + 0];
2900 recv_size += recvcounts[3 * i + 1];
2901 recv_weight_count += recvcounts[3 * i + 2];
2902 saccu += sendcounts[3 * i + 1];
2903 raccu += recvcounts[3 * i + 1];
2904 sendcounts[i] = sendcounts[3 * i + 1];
2905 recvcounts[i] = recvcounts[3 * i + 1];
2906 }
2907
2908 void * recv_buffer = xmalloc(recv_size);
2909
2910 // exchange stencils
2911 yac_alltoallv_packed_p2p(
2912 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm);
2913 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2914 free(send_buffer);
2915
2917 xmalloc(sizeof(*temp) +
2918 (local_weight_count + recv_weight_count) * sizeof(temp->buffer[0]));
2919 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
2920 (struct interp_weight_stencils_wsum_mf *)temp;
2921 struct interp_weight_stencil_wsum_mf * new_wsum_stencils =
2922 ((new_wsum_stencils_data->data =
2923 xmalloc((local_count + recv_count) *
2924 sizeof(*(new_wsum_stencils_data->data)))));
2925 new_wsum_stencils_data->count = local_count + recv_count;
2926
2927 // unpack stencils
2928 size_t weight_offset =
2930 new_wsum_stencils, &(temp->buffer[0]), recv_count,
2931 recv_buffer, recv_size, comm);
2932 free(recv_buffer);
2933 new_wsum_stencils += recv_count;
2934 struct interp_weight_stencil_wsum_mf_weight * weight_buffer =
2935 &(temp->buffer[weight_offset]);
2936
2937 // copy the stencils that stay locally into the new stencil array
2938 yac_quicksort_index_size_t_size_t(reorder_idx + send_count, local_count, NULL);
2939 for (size_t i = 0, weight_offset = 0; i < local_count; ++i) {
2940 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
2941 wsum_stencils + reorder_idx[i + send_count];
2942 struct interp_weight_stencil_wsum_mf * curr_new_wsum_stencil =
2943 new_wsum_stencils + i;
2944 struct interp_weight_stencil_wsum_mf_weight * curr_new_weights =
2945 weight_buffer + weight_offset;
2946 size_t curr_stencil_size = curr_wsum_stencil->count;
2947 curr_new_wsum_stencil->tgt = copy_remote_point(curr_wsum_stencil->tgt);
2948 curr_new_wsum_stencil->count = curr_stencil_size;
2949 curr_new_wsum_stencil->data = curr_new_weights;
2950 memcpy(curr_new_weights, curr_wsum_stencil->data,
2951 curr_stencil_size * sizeof(*curr_new_weights));
2952 weight_offset += curr_stencil_size;
2953 }
2954
2955 return new_wsum_stencils_data;
2956}
2957
2959 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data) {
2960
2961 struct interp_weight_stencil_wsum_mf * wsum_stencils =
2962 wsum_stencils_data->data;
2963 size_t count = wsum_stencils_data->count;
2964
2965 // determine maximum stencil size
2966 size_t max_stencil_size = 0;
2967 for (size_t i = 0; i < count; ++i) {
2968 size_t curr_stencil_size = wsum_stencils[i].count;
2969 if (curr_stencil_size > max_stencil_size)
2970 max_stencil_size = curr_stencil_size;
2971 }
2972
2973 // determine source process for each stencil
2974 int * rank_buffer =
2975 xmalloc((count + max_stencil_size) * sizeof(*rank_buffer));
2976 int * stencil_owner = rank_buffer;
2977 int * stencil_owners = rank_buffer + count;
2978 size_t * reorder_idx = xmalloc(count * sizeof(*reorder_idx));
2979 for (size_t i = 0; i < count; ++i) {
2980 size_t curr_stencil_size = wsum_stencils[i].count;
2981 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
2982 wsum_stencils[i].data;
2983 for (size_t j = 0; j < curr_stencil_size; ++j)
2984 stencil_owners[j] = curr_weights[j].src.rank;
2985 stencil_owner[i] = compute_owner(stencil_owners, curr_stencil_size);
2986 reorder_idx[i] = i;
2987 }
2988
2989 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
2991 comm, wsum_stencils_data, stencil_owner, reorder_idx, count);
2992
2993 free(reorder_idx);
2994 free(rank_buffer);
2995
2996 return new_wsum_stencils_data;
2997}
2998
2999static int compare_remote_point_info(const void * a, const void * b) {
3000
3001 int ret = ((struct remote_point_info *)a)->rank -
3002 ((struct remote_point_info *)b)->rank;
3003
3004 if (ret) return ret;
3005
3006 return (((struct remote_point_info *)a)->orig_pos >
3007 ((struct remote_point_info *)b)->orig_pos) -
3008 (((struct remote_point_info *)a)->orig_pos <
3009 ((struct remote_point_info *)b)->orig_pos);
3010}
3011
3013 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data) {
3014
3015 struct interp_weight_stencil_wsum_mf * wsum_stencils =
3016 wsum_stencils_data->data;
3017 size_t count = wsum_stencils_data->count;
3018
3019 // determine total number of stencils to be sent to other processes
3020 // (a single stencil may be sent to multiple target processes)
3021 size_t total_owner_count = 0;
3022 for (size_t i = 0; i < count; ++i) {
3023 int stencil_size = wsum_stencils[i].tgt.data.count;
3024 if (stencil_size == 1) {
3025 total_owner_count++;
3026 } else {
3027 struct remote_point_info * tgt_point_infos =
3028 wsum_stencils[i].tgt.data.data.multi;
3029 qsort(
3030 tgt_point_infos, stencil_size, sizeof(*tgt_point_infos),
3032 int prev_rank = INT_MAX;
3033 for (int j = 0; j < stencil_size; ++j) {
3034 int curr_rank = tgt_point_infos[j].rank;
3035 if (curr_rank != prev_rank) {
3036 ++total_owner_count;
3037 prev_rank = curr_rank;
3038 }
3039 }
3040 }
3041 }
3042
3043 int * stencil_owner = xmalloc(total_owner_count * sizeof(*stencil_owner));
3044 size_t * reorder_idx = xmalloc(total_owner_count * sizeof(*reorder_idx));
3045 for (size_t i = 0, k = 0; i < count; ++i) {
3046 int stencil_size = wsum_stencils[i].tgt.data.count;
3047 if (stencil_size == 1) {
3048 stencil_owner[k] = wsum_stencils[i].tgt.data.data.single.rank;
3049 reorder_idx[k] = i;
3050 ++k;
3051 } else {
3052 struct remote_point_info * tgt_point_infos =
3053 wsum_stencils[i].tgt.data.data.multi;
3054 int prev_rank = INT_MAX;
3055 for (int j = 0; j < stencil_size; ++j) {
3056 int curr_rank = tgt_point_infos[j].rank;
3057 if (curr_rank != prev_rank) {
3058 stencil_owner[k] = tgt_point_infos[j].rank;
3059 reorder_idx[k] = i;
3060 ++k;
3061 prev_rank = curr_rank;
3062 }
3063 }
3064 }
3065 }
3066
3067 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
3069 comm, wsum_stencils_data, stencil_owner, reorder_idx, total_owner_count);
3070
3071 wsum_stencils = new_wsum_stencils_data->data;
3072 count = new_wsum_stencils_data->count;
3073
3074 free(reorder_idx);
3075 free(stencil_owner);
3076
3077 if (count == 0) return new_wsum_stencils_data;
3078
3079 int comm_rank;
3080 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
3081
3082 // count total number of local target locations
3083 size_t total_num_tgt_pos = 0;
3084 for (size_t i = 0; i < count; ++i) {
3085 size_t curr_count = wsum_stencils[i].tgt.data.count;
3086 if (curr_count == 1) {
3087 ++total_num_tgt_pos;
3088 } else {
3089 struct remote_point_info * curr_point_infos =
3090 wsum_stencils[i].tgt.data.data.multi;
3091 for (size_t j = 0; j < curr_count; ++j)
3092 if (curr_point_infos[j].rank == comm_rank)
3093 ++total_num_tgt_pos;
3094 }
3095 }
3096
3097 if (total_num_tgt_pos != count) {
3098 new_wsum_stencils_data->data =
3099 ((wsum_stencils =
3100 xrealloc(wsum_stencils, total_num_tgt_pos * sizeof(*wsum_stencils))));
3101 new_wsum_stencils_data->count = total_num_tgt_pos;
3102 }
3103
3104 // remove all non local target point information
3105 for (size_t i = 0, offset = count; i < count; ++i) {
3106 size_t curr_count = wsum_stencils[i].tgt.data.count;
3107 if (curr_count > 1) {
3108 struct remote_point_info * curr_point_infos =
3109 wsum_stencils[i].tgt.data.data.multi;
3110 // find first local target point
3111 size_t j;
3112 for (j = 0; j < curr_count; ++j) {
3113 if (curr_point_infos[j].rank == comm_rank) {
3114 wsum_stencils[i].tgt.data.count = 1;
3115 wsum_stencils[i].tgt.data.data.single.rank = comm_rank;
3116 wsum_stencils[i].tgt.data.data.single.orig_pos =
3117 curr_point_infos[j].orig_pos;
3118 break;
3119 }
3120 }
3121 // make a copy for the remaining local target positions
3122 for (j = j + 1; j < curr_count; ++j) {
3123 if (curr_point_infos[j].rank == comm_rank) {
3124 wsum_stencils[offset] = wsum_stencils[i];
3125 wsum_stencils[offset].tgt.data.data.single.orig_pos =
3126 curr_point_infos[j].orig_pos;
3127 ++offset;
3128 }
3129 }
3130 free(curr_point_infos);
3131 }
3132 }
3133
3134 return new_wsum_stencils_data;
3135}
3136
3137static Xt_redist * generate_halo_redists(
3138 struct remote_point_info_reorder * halo_points, size_t count,
3139 size_t num_src_fields, MPI_Comm comm) {
3140
3141 int comm_size;
3142 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3143
3144 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3146 num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3147 size_t * size_t_buffer =
3148 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
3149 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
3150 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
3151 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
3152 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
3153
3154 for (size_t i = 0; i < count; ++i)
3155 sendcounts[halo_points[i].data.rank * num_src_fields +
3156 halo_points[i].field_idx]++;
3157
3159 num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
3160
3161 size_t saccu = 0, raccu = 0;
3162 for (int i = 0; i < comm_size; ++i) {
3163 total_sdispls[i] = saccu;
3164 total_rdispls[i] = raccu;
3165 total_sendcounts[i] = 0;
3166 total_recvcounts[i] = 0;
3167 for (size_t j = 0; j < num_src_fields; ++j) {
3168 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
3169 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
3170 }
3171 saccu += total_sendcounts[i];
3172 raccu += total_recvcounts[i];
3173 }
3174
3175 size_t recv_count = total_recvcounts[comm_size - 1] +
3176 total_rdispls[comm_size - 1];
3177
3178 int * exchange_buffer =
3179 xmalloc((2 * count + recv_count) * sizeof(*exchange_buffer));
3180 int * send_buffer = exchange_buffer;
3181 int * reorder_idx = exchange_buffer + count;
3182 int * recv_buffer = exchange_buffer + 2 * count;
3183
3184 // pack the original positions of the requested points
3185 size_t num_halo_per_src_field[num_src_fields];
3186 memset(
3187 num_halo_per_src_field, 0,
3188 num_src_fields * sizeof(num_halo_per_src_field[0]));
3189 for (size_t i = 0; i < count; ++i) {
3190 size_t curr_src_field_idx = (size_t)(halo_points[i].field_idx);
3191 size_t pos = sdispls[(size_t)(halo_points[i].data.rank) * num_src_fields +
3192 curr_src_field_idx + 1]++;
3193 uint64_t orig_pos = halo_points[i].data.orig_pos;
3194 YAC_ASSERT(
3195 orig_pos <= INT_MAX,
3196 "ERROR(generate_halo_redists): offset not supported by MPI")
3197 send_buffer[pos] = (int)orig_pos;
3198 reorder_idx[pos] = num_halo_per_src_field[curr_src_field_idx]++;
3199 }
3200
3201 // exchange original positions of the requested points
3202 yac_alltoallv_int_p2p(
3203 send_buffer, total_sendcounts, total_sdispls,
3204 recv_buffer, total_recvcounts, total_rdispls, comm);
3205
3206 free(size_t_buffer);
3207
3208 size_t nsend = 0, nsends[num_src_fields];
3209 size_t nrecv = 0, nrecvs[num_src_fields];
3210 memset(nsends, 0, num_src_fields * sizeof(nsends[0]));
3211 memset(nrecvs, 0, num_src_fields * sizeof(nrecvs[0]));
3212 for (int i = 0; i < comm_size; ++i) {
3213 for (size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3214 if (sendcounts[i * num_src_fields + field_idx] > 0) {
3215 nrecv++;
3216 nrecvs[field_idx]++;
3217 }
3218 if (recvcounts[i * num_src_fields + field_idx] > 0) {
3219 nsend++;
3220 nsends[field_idx]++;
3221 }
3222 }
3223 }
3224
3225 size_t total_num_msg = nsend + nrecv;
3226
3227 struct Xt_redist_msg * msgs_buffer =
3228 xmalloc(total_num_msg * sizeof(*msgs_buffer));
3229 struct Xt_redist_msg * send_msgs = msgs_buffer;
3230 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
3231
3232 for (size_t field_idx = 0, nsend = 0, nrecv = 0;
3233 field_idx < num_src_fields; ++field_idx) {
3234 for (int rank = 0; rank < comm_size; ++rank) {
3235 size_t idx = (size_t)rank * num_src_fields + field_idx;
3236 if (sendcounts[idx] > 0) {
3237 recv_msgs[nrecv].rank = rank;
3238 recv_msgs[nrecv].datatype =
3239 xt_mpi_generate_datatype(
3240 reorder_idx + sdispls[idx], sendcounts[idx], MPI_DOUBLE, comm);
3241 nrecv++;
3242 }
3243 if (recvcounts[idx] > 0) {
3244 send_msgs[nsend].rank = rank;
3245 send_msgs[nsend].datatype =
3246 xt_mpi_generate_datatype(
3247 recv_buffer + rdispls[idx], recvcounts[idx], MPI_DOUBLE, comm);
3248 nsend++;
3249 }
3250 }
3251 }
3252
3253 Xt_redist * redist;
3254 MPI_Comm halo_comm;
3255
3256 if (total_num_msg > 0) {
3257
3258 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &halo_comm), comm);
3259
3260 int * rank_buffer = xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
3261 int * orig_ranks = rank_buffer;
3262 int * split_ranks = rank_buffer + total_num_msg;
3263
3264 for (size_t i = 0; i < total_num_msg; ++i)
3265 orig_ranks[i] = msgs_buffer[i].rank;
3266
3267 MPI_Group orig_group, split_group;
3268 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
3269 yac_mpi_call(MPI_Comm_group(halo_comm, &split_group), comm);
3270
3272 MPI_Group_translate_ranks(orig_group, (int)total_num_msg, orig_ranks,
3273 split_group, split_ranks), halo_comm);
3274
3275 for (size_t i = 0; i < total_num_msg; ++i)
3276 msgs_buffer[i].rank = split_ranks[i];
3277
3278 free(rank_buffer);
3279
3280 yac_mpi_call(MPI_Group_free(&split_group), comm);
3281 yac_mpi_call(MPI_Group_free(&orig_group), comm);
3282
3283 // generate redist
3284 redist = xmalloc(num_src_fields * sizeof(*redist));
3285 if (num_src_fields == 1) {
3286 *redist =
3287 xt_redist_single_array_base_new(
3288 nsend, nrecv, send_msgs, recv_msgs, halo_comm);
3289 } else {
3290 for (size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3291 redist[field_idx] =
3292 xt_redist_single_array_base_new(
3293 nsends[field_idx], nrecvs[field_idx],
3294 send_msgs, recv_msgs, halo_comm);
3295 send_msgs += nsends[field_idx];
3296 recv_msgs += nrecvs[field_idx];
3297 }
3298 }
3299
3300 } else {
3301 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &halo_comm), comm);
3302 redist = NULL;
3303 }
3304
3305 yac_mpi_call(MPI_Comm_free(&halo_comm), comm);
3306 free(exchange_buffer);
3307 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
3308
3309 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
3310
3311 return redist;
3312}
3313
3314static int compare_rank_pos_reorder_field_idx(const void * a, const void * b) {
3315
3316 int ret = (((struct remote_point_info_reorder *)a)->field_idx >
3317 ((struct remote_point_info_reorder *)b)->field_idx) -
3318 (((struct remote_point_info_reorder *)a)->field_idx <
3319 ((struct remote_point_info_reorder *)b)->field_idx);
3320
3321 if (ret) return ret;
3322
3323 ret = ((struct remote_point_info_reorder *)a)->data.rank -
3324 ((struct remote_point_info_reorder *)b)->data.rank;
3325
3326 if (ret) return ret;
3327
3328 return (((struct remote_point_info_reorder *)a)->data.orig_pos >
3329 ((struct remote_point_info_reorder *)b)->data.orig_pos) -
3330 (((struct remote_point_info_reorder *)a)->data.orig_pos <
3331 ((struct remote_point_info_reorder *)b)->data.orig_pos);
3332}
3333
3335 const void * a, const void * b) {
3336
3337 struct interp_weight_stencil_wsum_mf * a_ =
3339 struct interp_weight_stencil_wsum_mf * b_ =
3341
3342 size_t count = MIN(a_->count, b_->count);
3343
3344 for (size_t i = 0; i < count; ++i) {
3345 int ret = (a_->data[i].src_field_idx > b_->data[i].src_field_idx) -
3346 (a_->data[i].src_field_idx < b_->data[i].src_field_idx);
3347 if (ret) return ret;
3348 ret = (a_->data[i].src.orig_pos > b_->data[i].src.orig_pos) -
3349 (a_->data[i].src.orig_pos < b_->data[i].src.orig_pos);
3350 if (ret) return ret;
3351 }
3352 return 0;
3353}
3354
3356 const void * a, const void * b) {
3357
3358 struct interp_weight_stencil_wsum_mf * a_ =
3360 struct interp_weight_stencil_wsum_mf * b_ =
3362
3363 YAC_ASSERT(
3364 (a_->tgt.data.count == 1) && (b_->tgt.data.count == 1),
3365 "ERROR(compare_interp_weight_stencil_wsum_tgt_orig_pos): invalid data")
3366
3367 size_t a_orig_pos = a_->tgt.data.data.single.orig_pos;
3368 size_t b_orig_pos = b_->tgt.data.data.single.orig_pos;
3369
3370 return (a_orig_pos > b_orig_pos) - (a_orig_pos < b_orig_pos);
3371}
3372
3373static void free_remote_point(struct remote_point point) {
3374
3375 if (point.data.count > 1) free(point.data.data.multi);
3376}
3377
3379 struct remote_point_infos * point_infos, size_t count, MPI_Comm comm) {
3380
3381 int comm_size;
3382 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3383
3384 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
3386 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3387
3388 for (size_t i = 0; i < count; ++i) {
3389 int curr_count = point_infos[i].count;
3390 struct remote_point_info * curr_point_infos =
3391 (curr_count == 1)?
3392 (&(point_infos[i].data.single)):(point_infos[i].data.multi);
3393 YAC_ASSERT(
3394 curr_count >= 1,
3395 "ERROR(generate_redist_put_double): no owner found for global id")
3396 for (int j = 0; j < curr_count; ++j)
3397 sendcounts[curr_point_infos[j].rank]++;
3398 }
3399
3401 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3402
3403 size_t send_count =
3404 sdispls[comm_size] + sendcounts[comm_size - 1];
3405 size_t recv_count =
3406 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
3407
3408 int * exchange_buffer =
3409 xmalloc((2 * send_count + recv_count) * sizeof(*exchange_buffer));
3410 int * send_buffer = exchange_buffer;
3411 int * reorder_idx = exchange_buffer + send_count;
3412 int * recv_buffer = exchange_buffer + 2 * send_count;
3413
3414 // pack the original positions of the points that have to updated
3415 for (size_t i = 0; i < count; ++i) {
3416 int curr_count = point_infos[i].count;
3417 struct remote_point_info * curr_point_infos =
3418 (curr_count == 1)?
3419 (&(point_infos[i].data.single)):(point_infos[i].data.multi);
3420 for (int j = 0; j < curr_count; ++j) {
3421 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
3422 uint64_t orig_pos = curr_point_infos[j].orig_pos;
3423 YAC_ASSERT(
3424 orig_pos <= INT_MAX,
3425 "ERROR(generate_redist_put_double): offset not supported by MPI")
3426 send_buffer[pos] = (int)orig_pos;
3427 reorder_idx[pos] = i;
3428 }
3429 }
3430
3431 // exchange original positions of the points that have to updated
3432 yac_alltoallv_int_p2p(
3433 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm);
3434
3435 size_t nsend = 0;
3436 size_t nrecv = 0;
3437 for (int i = 0; i < comm_size; ++i) {
3438 if (sendcounts[i] > 0) nsend++;
3439 if (recvcounts[i] > 0) nrecv++;
3440 }
3441
3442 struct Xt_redist_msg * send_msgs = xmalloc(nsend * sizeof(*send_msgs));
3443 struct Xt_redist_msg * recv_msgs = xmalloc(nrecv * sizeof(*send_msgs));
3444
3445 for (int i = 0, nsend = 0, nrecv = 0; i < comm_size; ++i) {
3446 if (sendcounts[i] > 0) {
3447 send_msgs[nsend].rank = i;
3448 send_msgs[nsend].datatype =
3449 xt_mpi_generate_datatype(
3450 reorder_idx + sdispls[i], sendcounts[i], MPI_DOUBLE, comm);
3451 nsend++;
3452 }
3453 if (recvcounts[i] > 0) {
3454 recv_msgs[nrecv].rank = i;
3455 recv_msgs[nrecv].datatype =
3456 xt_mpi_generate_datatype(
3457 recv_buffer + rdispls[i], recvcounts[i], MPI_DOUBLE, comm);
3458 nrecv++;
3459 }
3460 }
3461
3462 // generate redist
3463 Xt_redist redist =
3464 xt_redist_single_array_base_new(nsend, nrecv, send_msgs, recv_msgs, comm);
3465
3466 free(exchange_buffer);
3467 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
3468
3469 xt_redist_msg_free(recv_msgs, nrecv, comm);
3470 xt_redist_msg_free(send_msgs, nsend, comm);
3471
3472 return redist;
3473}
3474
3476 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_mf_stencils_data,
3477 struct yac_interpolation * interp,
3479 enum yac_interp_weight_stencil_type stencil_type) {
3480
3481 int comm_rank;
3482 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
3483
3484 // redistribute stencils to respective owners
3485 struct interp_weight_stencils_wsum_mf * (*redist_wsum_mf_stencils)(
3486 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_mf_stencils_data);
3487 YAC_ASSERT(
3488 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3489 "ERROR(yac_interp_weights_redist_w_sum_mf): invalid reorder type")
3491 (reorder == YAC_MAPPING_ON_SRC)?
3493 struct interp_weight_stencils_wsum_mf * new_wsum_mf_stencils_data =
3494 redist_wsum_mf_stencils(comm, wsum_mf_stencils_data);
3495
3496 size_t wsum_mf_count = new_wsum_mf_stencils_data->count;
3497 struct interp_weight_stencil_wsum_mf * wsum_mf_stencils =
3498 new_wsum_mf_stencils_data->data;
3499
3500 // compute the total number of links
3501 size_t total_num_links = 0, total_num_remote_weights = 0;
3502 for (size_t i = 0; i < wsum_mf_count; ++i) {
3503 size_t curr_stencil_size = wsum_mf_stencils[i].count;
3504 total_num_links += curr_stencil_size;
3505 for (size_t j = 0; j < curr_stencil_size; ++j)
3506 if (wsum_mf_stencils[i].data[j].src.rank != comm_rank)
3507 ++total_num_remote_weights;
3508 }
3509
3510 // gather all remote source points and determine number of source fields
3511 struct remote_point_info_reorder * remote_src_points =
3512 xmalloc(total_num_remote_weights * sizeof(*remote_src_points));
3513 size_t num_src_fields = 0;
3514 for (size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
3515 size_t curr_stencil_size = wsum_mf_stencils[i].count;
3516 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
3517 wsum_mf_stencils[i].data;
3518 for (size_t j = 0; j < curr_stencil_size; ++j) {
3519 size_t curr_src_field_idx = curr_weights[j].src_field_idx;
3520 if (curr_src_field_idx >= num_src_fields)
3521 num_src_fields = curr_src_field_idx + 1;
3522 if (curr_weights[j].src.rank != comm_rank) {
3523 remote_src_points[k].data = curr_weights[j].src;
3524 remote_src_points[k].field_idx = curr_src_field_idx;
3525 remote_src_points[k].reorder_idx = i;
3526 ++k;
3527 }
3528 }
3529 }
3531 MPI_Allreduce(
3532 MPI_IN_PLACE, &num_src_fields, 1, YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
3533
3534 // sort remote points first by field_idx, second by rank, and
3535 // then by orig_pos
3536 qsort(remote_src_points, total_num_remote_weights, sizeof(*remote_src_points),
3538
3539 // update stencils: set owner to -1; set orig_pos to position of respecitve
3540 // point in halo data
3541 // remove duplicated remote points
3542 struct remote_point_info * prev_remote_src_point;
3543 size_t prev_field_idx;
3544 size_t halo_size;
3545 if (total_num_remote_weights > 0) {
3546 prev_remote_src_point = &(remote_src_points[0].data);
3547 prev_field_idx = remote_src_points[0].field_idx;
3548 halo_size = 1;
3549 } else {
3550 prev_field_idx = SIZE_MAX;
3551 halo_size = 0;
3552 }
3553 for (size_t i = 0; i < total_num_remote_weights; ++i) {
3554 struct remote_point_info * curr_remote_src_point =
3555 &(remote_src_points[i].data);
3556 size_t curr_field_idx = remote_src_points[i].field_idx;
3558 prev_remote_src_point, curr_remote_src_point) ||
3559 (prev_field_idx != curr_field_idx)) {
3560 prev_remote_src_point = curr_remote_src_point;
3561 prev_field_idx = curr_field_idx;
3562 remote_src_points[halo_size].data = *curr_remote_src_point;
3563 remote_src_points[halo_size].field_idx = curr_field_idx;
3564 ++halo_size;
3565 }
3566 struct interp_weight_stencil_wsum_mf * curr_stencil =
3567 wsum_mf_stencils + remote_src_points[i].reorder_idx;
3568 size_t curr_stencil_size = curr_stencil->count;
3569 for (size_t j = 0; j < curr_stencil_size; ++j) {
3571 &(curr_stencil->data[j].src), curr_remote_src_point)) &&
3572 (curr_stencil->data[j].src_field_idx == curr_field_idx)) {
3573 curr_stencil->data[j].src.rank = -1;
3574 curr_stencil->data[j].src.orig_pos = halo_size - 1;
3575 curr_stencil->data[j].src_field_idx = SIZE_MAX;
3576 }
3577 }
3578 }
3579
3580 // sort stencils by their memory access pattern on the local process
3581 qsort(wsum_mf_stencils, wsum_mf_count, sizeof(*wsum_mf_stencils),
3582 (reorder == YAC_MAPPING_ON_SRC)?
3585
3586 size_t * num_src_per_tgt = xmalloc(wsum_mf_count * sizeof(*num_src_per_tgt));
3587 double * weights = xmalloc(total_num_links * sizeof(*weights));
3588 size_t * src_idx = xmalloc(total_num_links * sizeof(*src_idx));
3589 size_t * src_field_idx = xmalloc(total_num_links * sizeof(*src_field_idx));
3590
3591 // extract data from stencil
3592 for (size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
3593 size_t curr_stencil_size = wsum_mf_stencils[i].count;
3594 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
3595 wsum_mf_stencils[i].data;
3596 num_src_per_tgt[i] = curr_stencil_size;
3597 for (size_t j = 0; j < curr_stencil_size; ++j, ++k){
3598 weights[k] = curr_weights[j].weight;
3599 src_idx[k] = curr_weights[j].src.orig_pos;
3600 src_field_idx[k] = curr_weights[j].src_field_idx;
3601 }
3602 }
3603
3604 // generate halo redists (one per source field
3605 Xt_redist * halo_redists =
3607 remote_src_points, halo_size, num_src_fields, comm);
3608
3609 YAC_ASSERT(
3610 (stencil_type == WEIGHT_SUM) ||
3611 (stencil_type == SUM) ||
3612 (stencil_type == WEIGHT_SUM_MF) ||
3613 (stencil_type == SUM_MF),
3614 "ERROR(yac_interp_weights_redist_w_sum_mf): unsupported stencil type");
3615
3616 // add weights to interpolation
3617 if (reorder == YAC_MAPPING_ON_SRC) {
3618
3619 // generate result redist
3620 struct remote_point_infos * tgt_infos =
3621 xmalloc(wsum_mf_count * sizeof(*tgt_infos));
3622 for (size_t i = 0; i < wsum_mf_count; ++i)
3623 tgt_infos[i] = wsum_mf_stencils[i].tgt.data;
3624 Xt_redist result_redist =
3626 tgt_infos, wsum_mf_count, comm);
3627 free(tgt_infos);
3628
3629 switch(stencil_type) {
3630 default:
3631 case (WEIGHT_SUM):
3632 case (WEIGHT_SUM_MF):
3634 interp, halo_redists, wsum_mf_count, num_src_per_tgt, weights,
3635 src_field_idx, src_idx, ((stencil_type==WEIGHT_SUM)?1:num_src_fields),
3636 result_redist);
3637 break;
3638 case (SUM):
3639 case (SUM_MF):
3641 interp, halo_redists, wsum_mf_count, num_src_per_tgt,
3642 src_field_idx, src_idx, ((stencil_type == SUM)?1:num_src_fields),
3643 result_redist);
3644 break;
3645 }
3646
3647 if (result_redist != NULL) xt_redist_delete(result_redist);
3648
3649 } else {
3650
3651 size_t * tgt_orig_pos = xmalloc(wsum_mf_count * sizeof(*tgt_orig_pos));
3652 for (size_t i = 0; i < wsum_mf_count; ++i) {
3653 YAC_ASSERT(
3654 wsum_mf_stencils[i].tgt.data.count == 1,
3655 "ERROR(yac_interp_weights_redist_w_sum): currently unsupported target "
3656 "point distribution")
3657 tgt_orig_pos[i] =
3658 (size_t)(wsum_mf_stencils[i].tgt.data.data.single.orig_pos);
3659 }
3660
3661 switch(stencil_type) {
3662 default:
3663 case (WEIGHT_SUM):
3664 case (WEIGHT_SUM_MF):
3666 interp, halo_redists, tgt_orig_pos, wsum_mf_count,
3667 num_src_per_tgt, weights, src_field_idx, src_idx,
3668 ((stencil_type == WEIGHT_SUM)?1:num_src_fields));
3669 break;
3670 case (SUM):
3671 case (SUM_MF):
3673 interp, halo_redists, tgt_orig_pos, wsum_mf_count,
3674 num_src_per_tgt, src_field_idx, src_idx,
3675 ((stencil_type == SUM)?1:num_src_fields));
3676 break;
3677 }
3678 free(tgt_orig_pos);
3679 }
3680
3681 for (size_t i = 0; i < new_wsum_mf_stencils_data->count; ++i)
3682 free_remote_point(new_wsum_mf_stencils_data->data[i].tgt);
3683 free(new_wsum_mf_stencils_data->data);
3684 free(new_wsum_mf_stencils_data);
3685
3686 free(remote_src_points);
3687 free(src_field_idx);
3688 free(src_idx);
3689 free(weights);
3690 free(num_src_per_tgt);
3691 if (halo_redists != NULL) {
3692 for (size_t i = 0; i < num_src_fields; ++i)
3693 xt_redist_delete(halo_redists[i]);
3694 free(halo_redists);
3695 }
3696}
3697
3698static int compare_stencils(const void * a, const void * b) {
3699
3700 return (int)(((struct interp_weight_stencil *)a)->type) -
3701 (int)(((struct interp_weight_stencil *)b)->type);
3702}
3703
3705 struct yac_interp_weights * weights,
3708 double scaling_factor, double scaling_summand) {
3709
3710 MPI_Comm comm = weights->comm;
3711
3712 // sort stencils by type
3713 qsort(weights->stencils, weights->stencils_size, sizeof(*(weights->stencils)),
3715
3716 uint64_t local_stencil_counts[WEIGHT_STENCIL_TYPE_SIZE];
3717 size_t stencils_offsets[WEIGHT_STENCIL_TYPE_SIZE];
3718
3719 // count local number of stencils per type
3720 memset(&(local_stencil_counts[0]), 0, sizeof(local_stencil_counts));
3721 for (size_t i = 0; i < weights->stencils_size; ++i)
3722 local_stencil_counts[(int)(weights->stencils[i].type)]++;
3723
3724 for (size_t i = 0, accu = 0; i < (size_t)WEIGHT_STENCIL_TYPE_SIZE; ++i) {
3725 stencils_offsets[i] = accu;
3726 accu += local_stencil_counts[i];
3727 }
3728
3729 uint64_t global_stencil_counts[WEIGHT_STENCIL_TYPE_SIZE];
3730
3731 // determine global number of stencils per type
3733 MPI_Allreduce(
3734 local_stencil_counts, global_stencil_counts,
3735 (int)WEIGHT_STENCIL_TYPE_SIZE, MPI_UINT64_T, MPI_SUM, comm), comm);
3736
3737 { // check whether the collection_size is consistant across all processes
3738 uint64_t max_collection_size = (uint64_t)collection_size;
3740 MPI_Allreduce(
3741 MPI_IN_PLACE, &max_collection_size, 1, MPI_UINT64_T, MPI_MAX, comm),
3742 comm);
3743 YAC_ASSERT(
3744 (size_t)max_collection_size == collection_size,
3745 "ERROR(yac_interp_weights_get_interpolation): "
3746 "mismatching collection sizes")
3747 }
3748
3749 struct yac_interpolation * interp =
3752 scaling_factor, scaling_summand);
3753
3754 if (global_stencil_counts[FIXED] > 0)
3756 weights->comm, local_stencil_counts[FIXED],
3757 weights->stencils + stencils_offsets[FIXED], interp);
3758
3759 if (global_stencil_counts[DIRECT] > 0)
3761 weights->comm, local_stencil_counts[DIRECT],
3762 weights->stencils + stencils_offsets[DIRECT], interp);
3763
3764 if (global_stencil_counts[SUM] > 0) {
3765
3766 struct interp_weight_stencils_wsum_mf * wsum_stencils =
3768 weights->stencils + stencils_offsets[SUM],
3769 (size_t)(local_stencil_counts[SUM]), SUM);
3770 YAC_ASSERT(
3771 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3772 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3774 weights->comm, wsum_stencils, interp, reorder, SUM);
3775 for (size_t i = 0; i < wsum_stencils->count; ++i)
3776 free_remote_point(wsum_stencils->data[i].tgt);
3777 free(wsum_stencils->data);
3778 free(wsum_stencils);
3779 }
3780
3781 if (global_stencil_counts[WEIGHT_SUM] > 0) {
3782
3783 struct interp_weight_stencils_wsum_mf * wsum_stencils =
3785 weights->stencils + stencils_offsets[WEIGHT_SUM],
3786 (size_t)(local_stencil_counts[WEIGHT_SUM]), WEIGHT_SUM);
3787 YAC_ASSERT(
3788 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3789 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3791 weights->comm, wsum_stencils, interp, reorder, WEIGHT_SUM);
3792 for (size_t i = 0; i < wsum_stencils->count; ++i)
3793 free_remote_point(wsum_stencils->data[i].tgt);
3794 free(wsum_stencils->data);
3795 free(wsum_stencils);
3796 }
3797
3798 if (global_stencil_counts[DIRECT_MF] > 0)
3800 weights->comm, local_stencil_counts[DIRECT_MF],
3801 weights->stencils + stencils_offsets[DIRECT_MF], interp);
3802
3803 if (global_stencil_counts[SUM_MF] > 0) {
3804
3805 struct interp_weight_stencils_wsum_mf * sum_mf_stencils =
3807 weights->stencils + stencils_offsets[SUM_MF],
3808 (size_t)(local_stencil_counts[SUM_MF]), SUM_MF);
3809 YAC_ASSERT(
3810 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3811 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3813 weights->comm, sum_mf_stencils, interp, reorder, SUM_MF);
3814 for (size_t i = 0; i < sum_mf_stencils->count; ++i)
3815 free_remote_point(sum_mf_stencils->data[i].tgt);
3816 free(sum_mf_stencils->data);
3817 free(sum_mf_stencils);
3818 }
3819
3820 if (global_stencil_counts[WEIGHT_SUM_MF] > 0) {
3821
3822 struct interp_weight_stencils_wsum_mf * wsum_mf_stencils =
3824 weights->stencils + stencils_offsets[WEIGHT_SUM_MF],
3825 (size_t)(local_stencil_counts[WEIGHT_SUM_MF]), WEIGHT_SUM_MF);
3826 YAC_ASSERT(
3827 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3828 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3830 weights->comm, wsum_mf_stencils, interp, reorder, WEIGHT_SUM_MF);
3831 for (size_t i = 0; i < wsum_mf_stencils->count; ++i)
3832 free_remote_point(wsum_mf_stencils->data[i].tgt);
3833 free(wsum_mf_stencils->data);
3834 free(wsum_mf_stencils);
3835 }
3836
3837 return interp;
3838}
3839
3841 struct yac_interp_weights * weights, int reorder,
3843 double scaling_factor, double scaling_summand) {
3844
3845 YAC_ASSERT(
3846 (reorder == YAC_MAPPING_ON_SRC) ||
3847 (reorder == YAC_MAPPING_ON_TGT),
3848 "ERROR(yac_interp_weights_get_interpolation_f2c): "
3849 "reorder type must be of YAC_MAPPING_ON_SRC/YAC_MAPPING_ON_TGT");
3850
3851 return
3853 weights, (enum yac_interp_weights_reorder_type)reorder,
3855 scaling_factor, scaling_summand);
3856}
3857
3859
3860 free(points->data);
3861 free(points);
3862}
3863
3865 struct interp_weight_stencil * stencils, size_t count) {
3866
3867 for (size_t i = 0 ; i < count; ++i) {
3868
3869 YAC_ASSERT(
3870 (stencils[i].type == DIRECT) ||
3871 (stencils[i].type == SUM) ||
3872 (stencils[i].type == WEIGHT_SUM) ||
3873 (stencils[i].type == DIRECT_MF) ||
3874 (stencils[i].type == SUM_MF) ||
3875 (stencils[i].type == WEIGHT_SUM_MF) ||
3876 (stencils[i].type == FIXED),
3877 "ERROR(yac_interp_weights_delete): invalid stencil type")
3878 switch(stencils[i].type) {
3879
3880 case(DIRECT):
3881 free_remote_point(stencils[i].data.direct.src);
3882 break;
3883 case(SUM):
3884 free_remote_points(stencils[i].data.sum.srcs);
3885 break;
3886 case(WEIGHT_SUM):
3887 free_remote_points(stencils[i].data.weight_sum.srcs);
3888 free(stencils[i].data.weight_sum.weights);
3889 break;
3890 case(DIRECT_MF):
3891 free_remote_point(stencils[i].data.direct_mf.src);
3892 break;
3893 case(SUM_MF):
3894 free_remote_points(stencils[i].data.sum_mf.srcs);
3895 free(stencils[i].data.sum_mf.field_indices);
3896 break;
3897 case (WEIGHT_SUM_MF):
3898 free_remote_points(stencils[i].data.weight_sum_mf.srcs);
3899 free(stencils[i].data.weight_sum_mf.weights);
3900 free(stencils[i].data.weight_sum_mf.field_indices);
3901 break;
3902 default:
3903 case(FIXED):
3904 break;
3905 };
3906 free_remote_point(stencils[i].tgt);
3907 }
3908 free(stencils);
3909}
3910
3911#ifdef YAC_NETCDF_ENABLED
3912static int compare_double(void const * a, void const * b) {
3913
3914 return (*(double const *)a > *(double const *)b) -
3915 (*(double const *)a < *(double const *)b);
3916}
3917
3922 char const * filename, char const * src_grid_name, char const * tgt_grid_name,
3923 size_t num_fixed_values, double * fixed_values,
3924 size_t * num_tgt_per_fixed_value, size_t num_links,
3925 size_t num_weights_per_link, size_t num_src_fields,
3926 size_t * num_links_per_src_field,
3927 enum yac_location * src_locations, enum yac_location tgt_location,
3928 size_t src_grid_size, size_t tgt_grid_size) {
3929
3930 int ncid;
3931
3932 // create file
3933 yac_nc_create(filename, NC_CLOBBER | NC_64BIT_OFFSET, &ncid);
3934
3935 int dim_weight_id[8];
3936
3937 // define dimensions
3938 if (num_links > 0) {
3939 YAC_HANDLE_ERROR(nc_def_dim(ncid, "num_links", num_links, &dim_weight_id[0]));
3941 num_weights_per_link > 0,
3942 "ERROR(create_weight_file): number of links is %zu but number of "
3943 "weights per link is zero for weight file %s", num_links, filename)
3945 nc_def_dim(ncid, "num_wgts", num_weights_per_link, &dim_weight_id[1]));
3946 }
3948 num_src_fields > 0,
3949 "ERROR(create_weight_file): number of source fields is zero for "
3950 "weight file %s", filename)
3952 nc_def_dim(ncid, "num_src_fields", num_src_fields, &dim_weight_id[2]));
3954 nc_def_dim(
3955 ncid, "max_loc_str_len", YAC_MAX_LOC_STR_LEN, &dim_weight_id[3]));
3956
3957 if (num_fixed_values > 0) {
3959 nc_def_dim(
3960 ncid, "num_fixed_values", num_fixed_values, &dim_weight_id[4]));
3961 size_t num_fixed_dst = 0;
3962 for (size_t i = 0; i < num_fixed_values; ++i)
3963 num_fixed_dst += num_tgt_per_fixed_value[i];
3965 num_fixed_dst > 0,
3966 "ERROR(create_weight_file): number of fixed values is %zu but number "
3967 "of fixed destination points is zero for weight file %s",
3968 num_fixed_dst, filename)
3970 nc_def_dim(ncid, "num_fixed_dst", num_fixed_dst, &dim_weight_id[5]));
3971 }
3972
3973 if (src_grid_size > 0)
3975 nc_def_dim(ncid, "src_grid_size", src_grid_size, &dim_weight_id[6]));
3976
3977 if (tgt_grid_size > 0)
3979 nc_def_dim(ncid, "dst_grid_size", tgt_grid_size, &dim_weight_id[7]));
3980
3981 int var_src_add_id, var_dst_add_id, var_weight_id, var_num_links_id,
3982 src_var_locs_id, tgt_var_loc_id, var_fixed_values_id,
3983 var_num_dst_per_fixed_value_id, var_dst_add_fixed_id;
3984
3985 // define variables
3986 if (num_links > 0) {
3988 nc_def_var(
3989 ncid, "src_address", NC_INT, 1, dim_weight_id, &var_src_add_id));
3991 nc_def_var(
3992 ncid, "dst_address", NC_INT, 1, dim_weight_id, &var_dst_add_id));
3994 nc_def_var(
3995 ncid, "remap_matrix", NC_DOUBLE, 2, dim_weight_id, &var_weight_id));
3997 nc_def_var(ncid, "num_links_per_src_field", NC_INT, 1,
3998 &dim_weight_id[2], &var_num_links_id));
3999 }
4001 nc_def_var(
4002 ncid, "src_locations", NC_CHAR, 2, &dim_weight_id[2], &src_var_locs_id));
4004 nc_def_var(
4005 ncid, "dst_location", NC_CHAR, 1, &dim_weight_id[3], &tgt_var_loc_id));
4006 if (num_fixed_values > 0) {
4008 nc_def_var(ncid, "fixed_values", NC_DOUBLE, 1, &dim_weight_id[4],
4009 &var_fixed_values_id));
4011 nc_def_var(ncid, "num_dst_per_fixed_value", NC_INT, 1, &dim_weight_id[4],
4012 &var_num_dst_per_fixed_value_id));
4014 nc_def_var(ncid, "dst_address_fixed", NC_INT, 1, &dim_weight_id[5],
4015 &var_dst_add_fixed_id));
4016 }
4017
4018 // put attributes
4020 nc_put_att_text(ncid, NC_GLOBAL, "version",
4024 nc_put_att_text(ncid, NC_GLOBAL, "src_grid_name",
4025 strlen(src_grid_name), src_grid_name));
4027 nc_put_att_text(ncid, NC_GLOBAL, "dst_grid_name",
4028 strlen(tgt_grid_name), tgt_grid_name));
4029 {
4030 char const * str_logical[2] = {"FALSE", "TRUE"};
4031 YAC_HANDLE_ERROR(nc_put_att_text(ncid, NC_GLOBAL, "contains_links",
4032 strlen(str_logical[num_links > 0]),
4033 str_logical[num_links > 0]));
4034 YAC_HANDLE_ERROR(nc_put_att_text(ncid, NC_GLOBAL, "contains_fixed_dst",
4035 strlen(str_logical[num_fixed_values > 0]),
4036 str_logical[num_fixed_values > 0]));
4037 }
4038
4039 // end definition
4040 YAC_HANDLE_ERROR(nc_enddef(ncid));
4041
4042 // write some basic data
4043
4044 if (num_links > 0) {
4045 int * num_links_per_src_field_int =
4046 xmalloc(num_src_fields * sizeof(*num_links_per_src_field_int));
4047 for (size_t i = 0; i < num_src_fields; ++i) {
4048 YAC_ASSERT(
4049 num_links_per_src_field[i] <= INT_MAX,
4050 "ERROR(create_weight_file): "
4051 "number of links per source field too big (not yet supported)")
4052 num_links_per_src_field_int[i] = (int)num_links_per_src_field[i];
4053 }
4055 nc_put_var_int(ncid, var_num_links_id, num_links_per_src_field_int));
4056 free(num_links_per_src_field_int);
4057 }
4058
4059 for (size_t i = 0; i < num_src_fields; ++i) {
4060 char const * loc_str = yac_loc2str(src_locations[i]);
4061 size_t str_start[2] = {i, 0};
4062 size_t str_count[2] = {1, strlen(loc_str)};
4064 nc_put_vara_text(ncid, src_var_locs_id, str_start, str_count, loc_str));
4065 }
4066
4067 {
4068 char const * loc_str = yac_loc2str(tgt_location);
4069 size_t str_start[1] = {0};
4070 size_t str_count[1] = {strlen(loc_str)};
4072 nc_put_vara_text(ncid, tgt_var_loc_id, str_start, str_count, loc_str));
4073 }
4074 if (num_fixed_values > 0) {
4075
4076 int * num_tgt_per_fixed_value_int =
4077 xmalloc(num_fixed_values * sizeof(*num_tgt_per_fixed_value_int));
4078 for (unsigned i = 0; i < num_fixed_values; ++i) {
4079 YAC_ASSERT(
4080 num_tgt_per_fixed_value[i] <= INT_MAX,
4081 "ERROR(create_weight_file): "
4082 "number of targets per fixed value is too big (not yet supported)")
4083 num_tgt_per_fixed_value_int[i] = (int)num_tgt_per_fixed_value[i];
4084 }
4085 YAC_HANDLE_ERROR(nc_put_var_double(ncid, var_fixed_values_id, fixed_values));
4086 YAC_HANDLE_ERROR(nc_put_var_int(ncid, var_num_dst_per_fixed_value_id,
4087 num_tgt_per_fixed_value_int));
4088 free(num_tgt_per_fixed_value_int);
4089 }
4090
4091 // close file
4092 YAC_HANDLE_ERROR(nc_close(ncid));
4093}
4094
4095static int compare_interp_weight_stencil(const void * a, const void * b) {
4096
4097 int a_is_fixed = (((struct interp_weight_stencil *)a)->type == FIXED);
4098 int b_is_fixed = (((struct interp_weight_stencil *)b)->type == FIXED);
4099 int ret = b_is_fixed - a_is_fixed;
4100
4101 if (ret) return ret;
4102
4103 // if both are fixed stencils
4104 if (a_is_fixed) {
4105
4106 double fixed_value_a =
4107 ((struct interp_weight_stencil *)a)->data.fixed.value;
4108 double fixed_value_b =
4109 ((struct interp_weight_stencil *)b)->data.fixed.value;
4110 ret = (fixed_value_a > fixed_value_b) -
4111 (fixed_value_a < fixed_value_b);
4112 if (ret) return ret;
4113 }
4114
4115 return (((struct interp_weight_stencil *)a)->tgt.global_id >
4116 ((struct interp_weight_stencil *)b)->tgt.global_id) -
4117 (((struct interp_weight_stencil *)a)->tgt.global_id <
4118 ((struct interp_weight_stencil *)b)->tgt.global_id);
4119}
4120
4122 struct interp_weight_stencil * stencils, size_t stencils_size,
4123 yac_int * min_tgt_global_id, yac_int * max_tgt_global_id, MPI_Comm comm) {
4124
4125 yac_int min_max[2] = {XT_INT_MAX, XT_INT_MIN};
4126
4127 for (size_t i = 0; i < stencils_size; ++i) {
4128
4129 yac_int curr_id = stencils[i].tgt.global_id;
4130 if (curr_id < min_max[0]) min_max[0] = curr_id;
4131 if (curr_id > min_max[1]) min_max[1] = curr_id;
4132 }
4133
4134 min_max[0] = XT_INT_MAX - min_max[0];
4135
4137 MPI_Allreduce(
4138 MPI_IN_PLACE, min_max, 2, yac_int_dt, MPI_MAX, comm), comm);
4139
4140 *min_tgt_global_id = XT_INT_MAX - min_max[0];
4141 *max_tgt_global_id = min_max[1];
4142}
4143
4145 struct interp_weight_stencil * stencils, size_t stencils_size,
4146 yac_int min_tgt_global_id, yac_int max_tgt_global_id,
4147 int num_io_procs_int, int * io_owner) {
4148
4149 long long num_io_procs = (long long)num_io_procs_int;
4150 long long id_range =
4151 MAX((long long)(max_tgt_global_id - min_tgt_global_id),1);
4152
4153 for (size_t i = 0; i < stencils_size; ++i)
4154 io_owner[i] =
4155 ((int)(MIN(((long long)(stencils[i].tgt.global_id - min_tgt_global_id) *
4156 num_io_procs) / id_range, num_io_procs - 1)));
4157}
4158
4160 struct interp_weight_stencil * stencils, size_t stencil_count,
4161 double ** fixed_values, size_t * num_fixed_values, MPI_Comm comm) {
4162
4163 int comm_size;
4164 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4165
4166 double * local_fixed_values =
4167 xmalloc(stencil_count * sizeof(*local_fixed_values));
4168
4169 int * int_buffer = xmalloc(2 * (size_t)comm_size * sizeof(*int_buffer));
4170 int * recvcounts = int_buffer + 0 * comm_size;
4171 int * rdispls = int_buffer + 1 * comm_size;
4172
4173 size_t local_num_fixed = 0;
4174
4175 // get all local fixed values
4176 for (size_t i = 0; i < stencil_count;
4177 ++i, ++local_num_fixed) {
4178 if (stencils[i].type != FIXED) break;
4179 local_fixed_values[i] = stencils[i].data.fixed.value;
4180 }
4181 qsort(local_fixed_values, local_num_fixed, sizeof(*local_fixed_values),
4183 yac_remove_duplicates_double(local_fixed_values, &local_num_fixed);
4184
4185 // get number of fixed values per rank
4186 int local_num_fixed_int = (int)(local_num_fixed);
4188 MPI_Allgather(
4189 &local_num_fixed_int, 1, MPI_INT, recvcounts, 1,MPI_INT, comm), comm);
4190 for (int i = 0, accu = 0; i < comm_size; ++i) {
4191 rdispls[i] = accu;
4192 accu += recvcounts[i];
4193 }
4194
4195 size_t num_all_fixed_values = 0;
4196 for (int i = 0; i < comm_size; ++i)
4197 num_all_fixed_values += (size_t)(recvcounts[i]);
4198
4199 double * all_fixed_values =
4200 xmalloc(num_all_fixed_values * sizeof(*all_fixed_values));
4201
4202 // gather all fixed values
4204 MPI_Allgatherv(
4205 local_fixed_values, local_num_fixed_int, MPI_DOUBLE,
4206 all_fixed_values, recvcounts, rdispls, MPI_DOUBLE, comm), comm);
4207 free(int_buffer);
4208 free(local_fixed_values);
4209
4210 qsort(all_fixed_values, num_all_fixed_values, sizeof(*all_fixed_values),
4212 yac_remove_duplicates_double(all_fixed_values, &num_all_fixed_values);
4213 *fixed_values = xrealloc(all_fixed_values,
4214 num_all_fixed_values * sizeof(*all_fixed_values));
4215 *num_fixed_values = num_all_fixed_values;
4216}
4217
4218static size_t get_num_weights_per_link(struct interp_weight_stencil * stencil) {
4219
4220 YAC_ASSERT(
4221 (stencil->type == FIXED) ||
4222 (stencil->type == DIRECT) ||
4223 (stencil->type == SUM) ||
4224 (stencil->type == WEIGHT_SUM) ||
4225 (stencil->type == DIRECT_MF) ||
4226 (stencil->type == SUM_MF) ||
4227 (stencil->type == WEIGHT_SUM_MF),
4228 "ERROR(get_num_weights_per_link): invalid stencil type")
4229
4230 return (stencil->type == FIXED)?0:1;
4231}
4232
4234 struct interp_weight_stencil * stencils, size_t stencil_count,
4235 MPI_Comm comm) {
4236
4237 size_t num_weights_per_link = 0;
4238 for (size_t i = 0; i < stencil_count; ++i)
4239 num_weights_per_link =
4240 MAX(num_weights_per_link, get_num_weights_per_link(stencils + i));
4241
4242 uint64_t num_weights_per_link_64_t = (uint64_t)num_weights_per_link;
4244 MPI_Allreduce(
4245 MPI_IN_PLACE, &num_weights_per_link_64_t, 1, MPI_UINT64_T,
4246 MPI_MAX, comm), comm);
4247 num_weights_per_link = (size_t)num_weights_per_link_64_t;
4248
4249 return num_weights_per_link;
4250}
4251
4253 struct interp_weight_stencil * stencil, size_t src_field_idx) {;
4254
4255 YAC_ASSERT(
4256 stencil->type != FIXED,
4257 "ERROR(get_num_links_per_src_field): "
4258 "stencil type FIXED not supported by this routine")
4259 YAC_ASSERT(
4260 (stencil->type == DIRECT) ||
4261 (stencil->type == SUM) ||
4262 (stencil->type == WEIGHT_SUM) ||
4263 (stencil->type == DIRECT_MF) ||
4264 (stencil->type == SUM_MF) ||
4265 (stencil->type == WEIGHT_SUM_MF),
4266 "ERROR(get_num_links_per_src_field): invalid stencil type")
4267 switch (stencil->type) {
4268 default:
4269 case(DIRECT): return (src_field_idx == 0)?1:0;
4270 case(SUM): return (src_field_idx == 0)?stencil->data.sum.srcs->count:0;
4271 case(WEIGHT_SUM):
4272 return (src_field_idx == 0)?stencil->data.weight_sum.srcs->count:0;
4273 case(DIRECT_MF): return stencil->data.direct_mf.field_idx == src_field_idx;
4274 case(SUM_MF): {
4275 size_t count = 0;
4276 size_t stencil_size = stencil->data.sum_mf.srcs->count;
4277 size_t * field_indices = stencil->data.sum_mf.field_indices;
4278 for (size_t i = 0; i < stencil_size; ++i)
4279 if (field_indices[i] == src_field_idx) ++count;
4280 return count;
4281 }
4282 case(WEIGHT_SUM_MF): {
4283 size_t count = 0;
4284 size_t stencil_size = stencil->data.weight_sum_mf.srcs->count;
4285 size_t * field_indices = stencil->data.weight_sum_mf.field_indices;
4286 for (size_t i = 0; i < stencil_size; ++i)
4287 if (field_indices[i] == src_field_idx) ++count;
4288 return count;
4289 }
4290 };
4291}
4292
4294 struct interp_weight_stencil * stencils, size_t stencil_count,
4295 size_t num_fixed_values, double * fixed_values,
4296 size_t * num_tgt_per_fixed_value,
4297 size_t * num_fixed_tgt, size_t num_src_fields,
4298 size_t * num_links_per_src_field, size_t * num_links) {
4299
4300 *num_fixed_tgt = 0;
4301 *num_links = 0;
4302 for (size_t i = 0; i < num_fixed_values; ++i) num_tgt_per_fixed_value[i] = 0;
4303 for (size_t i = 0; i < num_src_fields; ++i) num_links_per_src_field[i] = 0;
4304
4305 for (size_t i = 0; i < stencil_count; ++i) {
4306 if (stencils[i].type == FIXED) {
4307 double curr_fixed_value = stencils[i].data.fixed.value;
4308 for (size_t j = 0; j < num_fixed_values; ++j) {
4309 if (curr_fixed_value == fixed_values[j]) {
4310 num_tgt_per_fixed_value[j]++;
4311 break;
4312 }
4313 }
4314 ++*num_fixed_tgt;
4315 } else {
4316 for (size_t j = 0; j < num_src_fields; ++j) {
4317 num_links_per_src_field[j] +=
4318 get_num_links_per_src_field(stencils + i, j);
4319 }
4320 }
4321 }
4322 for (size_t i = 0; i < num_src_fields; ++i)
4323 *num_links += num_links_per_src_field[i];
4324}
4325
4327 size_t num_fixed_values, size_t * num_tgt_per_fixed_value,
4328 size_t num_src_fields, size_t * num_links_per_src_field,
4329 size_t * fixed_offsets, size_t * link_offsets, MPI_Comm comm) {
4330
4331 int comm_rank;
4332 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4333
4334 size_t count = num_fixed_values + num_src_fields;
4335 uint64_t * uint64_t_buffer = xmalloc(3 * count * sizeof(*uint64_t_buffer));
4336 uint64_t * global_counts = uint64_t_buffer + 0 * count;
4337 uint64_t * local_counts = uint64_t_buffer + 1 * count;
4338 uint64_t * offsets = uint64_t_buffer + 2 * count;
4339
4340 for (size_t i = 0; i < num_fixed_values; ++i)
4341 local_counts[i] = (uint64_t)(num_tgt_per_fixed_value[i]);
4342 for (size_t i = 0; i < num_src_fields; ++i)
4343 local_counts[num_fixed_values + i] = (uint64_t)(num_links_per_src_field[i]);
4344
4346 MPI_Allreduce(local_counts, global_counts, (int)count, MPI_UINT64_T,
4347 MPI_SUM, comm), comm);
4349 MPI_Exscan(local_counts, offsets, (int)count, MPI_UINT64_T, MPI_SUM, comm),
4350 comm);
4351 if (comm_rank == 0) memset(offsets, 0, count * sizeof(*offsets));
4352
4353 for (size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
4354 fixed_offsets[i] = (size_t)(offsets[i]) + accu;
4355 accu += (size_t)(global_counts[i]);
4356 }
4357 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4358 link_offsets[i] = (size_t)(offsets[i+num_fixed_values]) + accu;
4359 accu += (size_t)(global_counts[i+num_fixed_values]);
4360 }
4361 free(uint64_t_buffer);
4362}
4363
4364static int global_id_to_address(yac_int global_id) {
4365
4366 YAC_ASSERT(
4367 (global_id < INT_MAX) && (global_id != XT_INT_MAX),
4368 "ERROR(global_id_to_address): "
4369 "a global id cannot be converted into a address; too big")
4370 return (int)global_id + 1;
4371}
4372
4374 struct interp_weight_stencil * stencils, size_t stencil_count,
4375 int * tgt_address) {
4376
4377 for (size_t i = 0; i < stencil_count; ++i)
4378 tgt_address[i] = global_id_to_address(stencils[i].tgt.global_id);
4379}
4380
4382 struct interp_weight_stencil * stencils, size_t stencil_count,
4383 size_t * num_links_per_src_field, size_t num_src_fields,
4384 int * src_address, int * tgt_address, double * weight) {
4385
4386 size_t * src_field_offsets =
4387 xmalloc(2 * num_src_fields * sizeof(*src_field_offsets));
4388 size_t * prev_src_field_offsets = src_field_offsets + num_src_fields;
4389 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4390 src_field_offsets[i] = accu;
4391 accu += num_links_per_src_field[i];
4392 }
4393
4394 struct interp_weight_stencil * curr_stencil = stencils;
4395 for (size_t i = 0; i < stencil_count; ++i, ++curr_stencil) {
4396
4397 memcpy(prev_src_field_offsets, src_field_offsets,
4398 num_src_fields * sizeof(*prev_src_field_offsets));
4399
4400 int curr_tgt_address = global_id_to_address(curr_stencil->tgt.global_id);
4401 YAC_ASSERT(
4402 curr_stencil->type != FIXED,
4403 "ERROR(stencil_get_link_data): this call is invalid for FIXED stencils")
4404 YAC_ASSERT(
4405 (curr_stencil->type == DIRECT) ||
4406 (curr_stencil->type == SUM) ||
4407 (curr_stencil->type == WEIGHT_SUM) ||
4408 (curr_stencil->type == DIRECT_MF) ||
4409 (curr_stencil->type == SUM_MF) ||
4410 (curr_stencil->type == WEIGHT_SUM_MF),
4411 "ERROR(stencil_get_link_data): invalid stencil type")
4412 size_t src_field_offset;
4413 switch (curr_stencil->type) {
4414 default:
4415 case(DIRECT):
4416 src_field_offset = src_field_offsets[0]++;
4417 src_address[src_field_offset] =
4419 tgt_address[src_field_offset] = curr_tgt_address;
4420 weight[src_field_offset] = 1.0;
4421 break;
4422 case(SUM): {
4423 size_t curr_count = curr_stencil->data.sum.srcs->count;
4424 struct remote_point * srcs = curr_stencil->data.sum.srcs->data;
4425 for (size_t k = 0; k < curr_count; ++k) {
4426 src_field_offset = src_field_offsets[0]++;
4427 src_address[src_field_offset] =
4429 tgt_address[src_field_offset] = curr_tgt_address;
4430 weight[src_field_offset] = 1.0;
4431 }
4432 break;
4433 }
4434 case(WEIGHT_SUM): {
4435 size_t curr_count = curr_stencil->data.weight_sum.srcs->count;
4436 struct remote_point * srcs = curr_stencil->data.weight_sum.srcs->data;
4437 double * weights = curr_stencil->data.weight_sum.weights;
4438 for (size_t k = 0; k < curr_count; ++k) {
4439 src_field_offset = src_field_offsets[0]++;
4440 src_address[src_field_offset] =
4442 tgt_address[src_field_offset] = curr_tgt_address;
4443 weight[src_field_offset] = weights[k];
4444 }
4445 break;
4446 }
4447 case(DIRECT_MF):
4448 src_field_offset =
4449 src_field_offsets[curr_stencil->data.direct_mf.field_idx]++;
4450 src_address[src_field_offset ] =
4452 tgt_address[src_field_offset ] = curr_tgt_address;
4453 weight[src_field_offset ] = 1.0;
4454 break;
4455 case(SUM_MF): {
4456 size_t curr_count = curr_stencil->data.sum_mf.srcs->count;
4457 struct remote_point * srcs =
4458 curr_stencil->data.sum_mf.srcs->data;
4459 size_t * field_indices = curr_stencil->data.sum_mf.field_indices;
4460 for (size_t k = 0; k < curr_count; ++k) {
4461 src_field_offset = src_field_offsets[field_indices[k]]++;
4462 src_address[src_field_offset] =
4464 tgt_address[src_field_offset] = curr_tgt_address;
4465 weight[src_field_offset] = 1.0;
4466 }
4467 break;
4468 }
4469 case(WEIGHT_SUM_MF): {
4470 size_t curr_count = curr_stencil->data.weight_sum_mf.srcs->count;
4471 struct remote_point * srcs =
4472 curr_stencil->data.weight_sum_mf.srcs->data;
4473 double * weights = curr_stencil->data.weight_sum_mf.weights;
4474 size_t * field_indices = curr_stencil->data.weight_sum_mf.field_indices;
4475 for (size_t k = 0; k < curr_count; ++k) {
4476 src_field_offset = src_field_offsets[field_indices[k]]++;
4477 src_address[src_field_offset] =
4479 tgt_address[src_field_offset] = curr_tgt_address;
4480 weight[src_field_offset] = weights[k];
4481 }
4482 break;
4483 }
4484 };
4485
4486 for (size_t j = 0; j < num_src_fields; ++j)
4488 src_address + prev_src_field_offsets[j],
4489 src_field_offsets[j] - prev_src_field_offsets[j],
4490 weight + prev_src_field_offsets[j]);
4491 }
4492 free(src_field_offsets);
4493}
4494
4496 MPI_Comm comm, size_t count, struct interp_weight_stencil * stencils,
4497 int * owner_ranks, size_t * new_count,
4498 struct interp_weight_stencil ** new_stencils) {
4499
4500 int comm_rank, comm_size;
4501 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4502 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4503
4504 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
4506 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4507
4508 size_t * stencil_indices = xmalloc(count * sizeof(*stencil_indices));
4509 for (size_t i = 0; i < count; ++i) {
4510 stencil_indices[i] = i;
4511 sendcounts[owner_ranks[i]]++;
4512 }
4513
4515 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4516
4517 // sort the stencil indices by owner rank
4518 yac_quicksort_index_int_size_t(owner_ranks, count, stencil_indices);
4519
4520 *new_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
4521 *new_stencils =
4522 exchange_stencils(comm, stencils, stencil_indices, sendcounts, recvcounts);
4523 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4524 free(stencil_indices);
4525}
4526
4527#endif // YAC_NETCDF_ENABLED
4528
4530 struct yac_interp_weights * weights, char const * filename,
4531 char const * src_grid_name, char const * tgt_grid_name,
4532 size_t src_grid_size, size_t tgt_grid_size) {
4533
4534#ifndef YAC_NETCDF_ENABLED
4535
4536 UNUSED(weights);
4537 UNUSED(filename);
4538 UNUSED(src_grid_name);
4539 UNUSED(tgt_grid_name);
4540 UNUSED(src_grid_size);
4541 UNUSED(tgt_grid_size);
4542
4543 die(
4544 "ERROR(yac_interp_weights_write_to_file): "
4545 "YAC is built without the NetCDF support");
4546#else
4547
4548 MPI_Comm comm = weights->comm;
4549 int comm_rank, comm_size;
4550 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4551 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4552
4553 // determine processes that will do output
4554 int io_flag;
4555 int * io_ranks;
4556 int num_io_ranks;
4557 yac_get_io_ranks(comm, &io_flag, &io_ranks, &num_io_ranks);
4558
4559 // determine range of global ids
4560 yac_int min_tgt_global_id, max_tgt_global_id;
4562 weights->stencils, weights->stencils_size,
4563 &min_tgt_global_id, &max_tgt_global_id, comm);
4564
4565 // determine io owners for all stencils
4566 int * io_owner =
4567 xmalloc(weights->stencils_size * sizeof(*io_owner));
4569 weights->stencils, weights->stencils_size,
4570 min_tgt_global_id, max_tgt_global_id,
4571 num_io_ranks, io_owner);
4572 for (size_t i = 0; i < weights->stencils_size; ++i)
4573 io_owner[i] = io_ranks[io_owner[i]];
4574 free(io_ranks);
4575
4576 size_t io_stencil_count = 0;
4577 struct interp_weight_stencil * io_stencils = NULL;
4578
4579 // redistribute stencils into io decomposition
4581 comm, weights->stencils_size, weights->stencils, io_owner,
4582 &io_stencil_count, &io_stencils);
4583 free(io_owner);
4584
4585 // distribute global grid sizes
4586 uint64_t grid_sizes[2] = {(uint64_t)src_grid_size, (uint64_t)tgt_grid_size};
4588 MPI_Allreduce(
4589 MPI_IN_PLACE, grid_sizes, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
4590 src_grid_size = (size_t)(grid_sizes[0]);
4591 tgt_grid_size = (size_t)(grid_sizes[1]);
4592
4593 MPI_Comm io_comm;
4594 yac_mpi_call(MPI_Comm_split(comm, io_flag, comm_rank, &io_comm), comm);
4595
4596 // all non-io processes exit here, the remaining ones work using their own
4597 // communicator
4598 if (!io_flag) {
4599 yac_mpi_call(MPI_Comm_free(&io_comm), comm);
4600 free(io_stencils);
4601 // ensure that the writing of the weight file is complete
4602 yac_mpi_call(MPI_Barrier(comm), comm);
4603 return;
4604 }
4605
4606 // sort the stencils first by type (fixed first; fixed stencils are sorted
4607 // by their fixed value) and second by tgt id
4608 qsort(io_stencils, io_stencil_count, sizeof(*io_stencils),
4610
4611 yac_mpi_call(MPI_Comm_rank(io_comm, &comm_rank), comm);
4612 yac_mpi_call(MPI_Comm_size(io_comm, &comm_size), comm);
4613
4614 double * fixed_values = NULL;
4615 size_t num_fixed_values = 0;
4617 io_stencils, io_stencil_count, &fixed_values, &num_fixed_values, io_comm);
4618 size_t num_src_fields = weights->num_src_fields;
4619 size_t num_weights_per_link =
4620 stencil_get_num_weights_per_tgt(io_stencils, io_stencil_count, io_comm);
4621
4622 size_t * size_t_buffer =
4623 xmalloc(2 * (num_fixed_values + num_src_fields) * sizeof(*size_t_buffer));
4624 size_t * num_tgt_per_fixed_value = size_t_buffer;
4625 size_t * num_links_per_src_field = size_t_buffer + num_fixed_values;
4626 size_t * fixed_offsets = size_t_buffer + num_fixed_values + num_src_fields;
4627 size_t * link_offsets = size_t_buffer + 2 * num_fixed_values + num_src_fields;
4628
4629 size_t num_fixed_tgt = 0;
4630 size_t num_links = 0;
4632 io_stencils, io_stencil_count, num_fixed_values, fixed_values,
4633 num_tgt_per_fixed_value, &num_fixed_tgt, num_src_fields,
4634 num_links_per_src_field, &num_links);
4635
4637 num_fixed_values, num_tgt_per_fixed_value,
4638 num_src_fields, num_links_per_src_field,
4639 fixed_offsets, link_offsets, io_comm);
4640
4641 if (comm_rank == comm_size - 1) {
4642
4643 size_t * total_num_tgt_per_fixed_value =
4644 xmalloc(num_fixed_values * sizeof(*total_num_tgt_per_fixed_value));
4645 for (size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
4646 total_num_tgt_per_fixed_value[i] =
4647 fixed_offsets[i] + num_tgt_per_fixed_value[i] - accu;
4648 accu += total_num_tgt_per_fixed_value[i];
4649 }
4650 size_t total_num_links = link_offsets[num_src_fields-1] +
4651 num_links_per_src_field[num_src_fields-1];
4652
4653 size_t * total_num_links_per_src_field =
4654 xmalloc(num_src_fields * sizeof(*total_num_links_per_src_field));
4655 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4656 total_num_links_per_src_field[i] =
4657 link_offsets[i] + num_links_per_src_field[i] - accu;
4658 accu += total_num_links_per_src_field[i];
4659 }
4660
4662 filename, src_grid_name, tgt_grid_name,
4663 num_fixed_values, fixed_values, total_num_tgt_per_fixed_value,
4664 total_num_links, num_weights_per_link,
4665 num_src_fields, total_num_links_per_src_field,
4666 weights->src_locations, weights->tgt_location,
4667 src_grid_size, tgt_grid_size);
4668
4669 free(total_num_links_per_src_field);
4670 free(total_num_tgt_per_fixed_value);
4671 }
4672 free(fixed_values);
4673
4674 // ensure that the basic weight file has been written
4675 yac_mpi_call(MPI_Barrier(io_comm), comm);
4676 yac_mpi_call(MPI_Comm_free(&io_comm), comm);
4677
4678 int ncid;
4679
4680 // open weight file
4681 yac_nc_open(filename, NC_WRITE | NC_SHARE, &ncid);
4682
4683 if (num_fixed_tgt > 0) {
4684
4685 int * tgt_address_fixed =
4686 xmalloc(num_fixed_tgt * sizeof(*tgt_address_fixed));
4687 stencil_get_tgt_address(io_stencils, num_fixed_tgt, tgt_address_fixed);
4688
4689 // inquire variable ids
4690 int var_dst_add_fixed_id;
4691 yac_nc_inq_varid(ncid, "dst_address_fixed", &var_dst_add_fixed_id);
4692
4693 // target ids that receive a fixed value to file
4694 for (size_t i = 0, offset = 0; i < num_fixed_values; ++i) {
4695
4696 if (num_tgt_per_fixed_value[i] == 0) continue;
4697
4698 size_t start[1] = {fixed_offsets[i]};
4699 size_t count[1] = {num_tgt_per_fixed_value[i]};
4701 nc_put_vara_int(
4702 ncid, var_dst_add_fixed_id, start, count, tgt_address_fixed + offset));
4703 offset += num_tgt_per_fixed_value[i];
4704 }
4705
4706 free(tgt_address_fixed);
4707 }
4708
4709 if (num_links > 0) {
4710
4711 int * src_address_link = xmalloc(num_links * sizeof(*src_address_link));
4712 int * tgt_address_link = xmalloc(num_links * sizeof(*tgt_address_link));
4713 double * w = xmalloc(num_links * num_weights_per_link * sizeof(*w));
4715 io_stencils + num_fixed_tgt, io_stencil_count - num_fixed_tgt,
4716 num_links_per_src_field, num_src_fields,
4717 src_address_link, tgt_address_link, w);
4718
4719 int var_src_add_id, var_dst_add_id, var_weight_id;
4720 yac_nc_inq_varid(ncid, "src_address", &var_src_add_id);
4721 yac_nc_inq_varid(ncid, "dst_address", &var_dst_add_id);
4722 yac_nc_inq_varid(ncid, "remap_matrix", &var_weight_id);
4723
4724 for (size_t i = 0, offset = 0; i < num_src_fields; ++i) {
4725
4726 if (num_links_per_src_field[i] == 0) continue;
4727
4728 size_t start[2] = {link_offsets[i], 0};
4729 size_t count[2] = {num_links_per_src_field[i], num_weights_per_link};
4730
4732 nc_put_vara_int(
4733 ncid, var_src_add_id, start, count, src_address_link + offset));
4735 nc_put_vara_int(
4736 ncid, var_dst_add_id, start, count, tgt_address_link + offset));
4738 nc_put_vara_double(
4739 ncid, var_weight_id, start, count,
4740 w + num_weights_per_link * offset));
4741
4742 offset += num_links_per_src_field[i];
4743 }
4744
4745 free(w);
4746 free(tgt_address_link);
4747 free(src_address_link);
4748 }
4749
4750 // close weight file
4751 YAC_HANDLE_ERROR(nc_close(ncid));
4752
4753 // ensure that the writing of the weight file is complete
4754 yac_mpi_call(MPI_Barrier(comm), comm);
4755
4756 free(size_t_buffer);
4757 yac_interp_weight_stencils_delete(io_stencils, io_stencil_count);
4758#endif
4759}
4760
4762 struct yac_interp_weights * weights) {
4763
4764 return weights->stencils_size;
4765}
4766
4768 struct yac_interp_weights * weights) {
4769
4770 struct interp_weight_stencil * stencils = weights->stencils;
4771 size_t stencils_size = weights->stencils_size;
4772
4773 yac_int * global_ids = xmalloc(stencils_size * sizeof(*global_ids));
4774
4775 for (size_t i = 0; i < stencils_size; ++i)
4776 global_ids[i] = stencils[i].tgt.global_id;
4777
4778 return global_ids;
4779}
4780
4782 return weights->comm;
4783}
4784
4786
4787 if (weights == NULL) return;
4788
4789 yac_interp_weight_stencils_delete(weights->stencils, weights->stencils_size);
4790 free(weights->src_locations);
4791 free(weights);
4792}
#define UNUSED(x)
Definition core.h:71
void yac_remote_point_pack(struct remote_point *point, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:536
void yac_remote_point_unpack(void *buffer, int buffer_size, int *position, struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:548
void yac_remote_points_pack(struct remote_points *points, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:592
void yac_remote_points_unpack(void *buffer, int buffer_size, int *position, struct remote_points **points, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:611
int yac_remote_points_get_pack_size(struct remote_points *points, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:574
MPI_Datatype yac_get_remote_point_info_mpi_datatype(MPI_Comm comm)
Definition dist_grid.c:322
int yac_remote_point_get_pack_size(struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:507
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
struct @8::@9 value
enum callback_type type
#define YAC_WEIGHT_FILE_VERSION_STRING
static MPI_Datatype get_fixed_stencil_mpi_datatype(MPI_Comm comm)
static size_t get_num_links_per_src_field(struct interp_weight_stencil *stencil, size_t src_field_idx)
static int get_stencil_pack_size_direct_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void unpack_stencil_wsum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static MPI_Datatype get_direct_stencil_mpi_datatype(MPI_Comm comm)
static void stencil_determine_tgt_global_id_range(struct interp_weight_stencil *stencils, size_t stencils_size, yac_int *min_tgt_global_id, yac_int *max_tgt_global_id, MPI_Comm comm)
static void unpack_stencil_sum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_fixed(struct yac_interp_weights *weights, struct remote_points *tgts, double fixed_value)
static int compare_stencils_direct_mf(const void *a, const void *b)
void yac_interp_weights_add_sum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, size_t num_src_fields)
static struct remote_points * copy_remote_points_mf(struct remote_point **points, size_t *counts, size_t num_fields)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils_tgt(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data)
static int compare_remote_point_info(const void *a, const void *b)
static int global_id_to_address(yac_int global_id)
static void yac_interp_weights_redist_direct_mf(MPI_Comm comm, uint64_t count, struct interp_weight_stencil *direct_mf_stencils, struct yac_interpolation *interp)
static int get_stencil_pack_size_sum(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencil_direct(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
yac_interp_weight_stencil_type
@ DIRECT
@ WEIGHT_SUM_MF
@ DIRECT_MF
@ SUM_MF
@ SUM
@ FIXED
@ WEIGHT_SUM
@ WEIGHT_STENCIL_TYPE_SIZE
static void pack_stencil_wsum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void free_remote_points(struct remote_points *points)
struct yac_interpolation * yac_interp_weights_get_interpolation(struct yac_interp_weights *weights, enum yac_interp_weights_reorder_type reorder, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand)
static void yac_interp_weight_stencils_delete(struct interp_weight_stencil *stencils, size_t count)
static int compare_stencils_fixed(const void *a, const void *b)
static struct interp_weight_stencil stencils_merge(struct interp_weight_stencil **stencils, double *w, size_t num_stencils, struct remote_point point)
static int get_stencil_wsum_mf_pack_size(struct interp_weight_stencil_wsum_mf *stencil, MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_wsum(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_tgt, struct remote_point *srcs, double *w)
static struct interp_weight_stencil stencils_merge_sum(struct interp_weight_stencil **stencils, double *w, size_t num_stencils)
static int compare_w_global_id(const void *a, const void *b)
static void compact_srcs_w(struct remote_points *srcs, double **w)
void yac_interp_weights_delete(struct yac_interp_weights *weights)