YetAnotherCoupler 3.2.0_a
Loading...
Searching...
No Matches
interp_weights.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifdef HAVE_CONFIG_H
6// Get the definition of the 'restrict' keyword.
7#include "config.h"
8#endif
9
10#include <string.h>
11
12#define WEIGHT_TOL (1e-9)
13
14#include "yac_mpi_internal.h"
16#include "ensure_array_size.h"
17#include "io_utils.h"
18#include "utils_core.h"
19#include "interp_method_file.h"
20
31
33
35 union {
36 struct {
37 double value;
39 struct {
40 struct remote_point src; // src id
42 struct {
43 struct remote_points * srcs; // src ids
44 } sum;
45 struct {
46 struct remote_points * srcs; // src ids
47 double * weights;
49 struct {
50 struct remote_point src; // src id
51 size_t field_idx;
53 struct {
54 struct remote_points * srcs; // src ids
55 size_t * field_indices;
57 struct {
58 struct remote_points * srcs; // src ids
59 double * weights;
60 size_t * field_indices;
63 struct remote_point tgt; //tgt id
64};
65
72
78
83
88
90 double value;
91 uint64_t orig_pos;
92};
93
98
104
109
121
122
128
130 MPI_Comm comm, enum yac_location tgt_location,
132
133 struct yac_interp_weights * weights = xmalloc(1 * sizeof(*weights));
134
135 weights->comm = comm;
136 weights->tgt_location = tgt_location;
137 weights->src_locations = xmalloc(num_src_fields * sizeof(*src_locations));
138 memcpy(
140 num_src_fields * sizeof(*src_locations));
142 weights->stencils = NULL;
143 weights->stencils_array_size = 0;
144 weights->stencils_size = 0;
145
146 return weights;
147}
148
149static inline struct remote_point copy_remote_point(
150 struct remote_point point) {
151
152 int count = point.data.count;
153 if (count > 1) {
154 struct remote_point_info * point_infos =
155 xmalloc((size_t)count * sizeof(*point_infos));
156 memcpy(point_infos, point.data.data.multi,
157 (size_t)count * sizeof(*point_infos));
158 point.data.data.multi = point_infos;
159 }
160 return point;
161}
162
164 struct remote_point * points_to, struct remote_point * points_from,
165 size_t count, struct remote_point_info ** point_info_buffer_) {
166
167 struct remote_point_info * point_info_buffer = *point_info_buffer_;
168
169 for (size_t i = 0; i < count; ++i) {
170 int curr_count = points_from[i].data.count;
171 points_to[i] = points_from[i];
172 if (curr_count > 1) {
173 points_to[i].data.data.multi = point_info_buffer;
174 memcpy(
175 point_info_buffer, points_from[i].data.data.multi,
176 (size_t)curr_count * sizeof(*point_info_buffer));
177 point_info_buffer += curr_count;
178 }
179 }
180 *point_info_buffer_ = point_info_buffer;
181}
182
183static inline struct remote_points * copy_remote_points(
184 struct remote_point * points, size_t count) {
185
186 size_t point_info_buffer_size = 0;
187 for (size_t i = 0; i < count; ++i)
188 if (points[i].data.count > 1)
189 point_info_buffer_size += (size_t)(points[i].data.count);
190
191 struct remote_points * points_copy =
192 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
193 sizeof(*points_copy));
194 points_copy->data = xmalloc(count * sizeof(*(points_copy->data)));
195 points_copy->count = count;
196 struct remote_point_info * point_info_buffer = &(points_copy->buffer[0]);
197
199 points_copy->data, points, count, &point_info_buffer);
200
201 return points_copy;
202}
203
205 struct remote_point ** points, size_t * counts, size_t num_fields) {
206
207 size_t point_info_buffer_size = 0;
208 size_t total_count = 0;
209 for (size_t i = 0; i < num_fields; ++i) {
210 total_count += counts[i];
211 for (size_t j = 0; j < counts[i]; ++j) {
212 if (points[i][j].data.count > 1)
213 point_info_buffer_size += (size_t)(points[i][j].data.count);
214 }
215 }
216
217 struct remote_points * points_copy =
218 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
219 sizeof(*points_copy));
220 points_copy->data = xmalloc(total_count * sizeof(*(points_copy->data)));
221 points_copy->count = total_count;
222 struct remote_point_info * point_info_buffer = &(points_copy->buffer[0]);
223
224 for (size_t i = 0, k = 0; i < num_fields; ++i) {
225 for (size_t j = 0; j < counts[i]; ++j, ++k) {
226 int curr_count = points[i][j].data.count;
227 points_copy->data[k] = points[i][j];
228 if (curr_count > 1) {
229 points_copy->data[k].data.data.multi = point_info_buffer;
230 memcpy(
231 point_info_buffer, points[i][j].data.data.multi,
232 (size_t)curr_count * sizeof(*point_info_buffer));
233 point_info_buffer += curr_count;
234 }
235 }
236 }
237 return points_copy;
238}
239
241 struct yac_interp_weights * weights, struct remote_points * tgts,
242 double fixed_value) {
243
244 struct interp_weight_stencil * stencils = weights->stencils;
245 size_t stencils_array_size = weights->stencils_array_size;
246 size_t stencils_size = weights->stencils_size;
247
248 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
249
250 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
251
252 stencils[stencils_size].type = FIXED;
253 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
254 stencils[stencils_size].data.fixed.value = fixed_value;
255 }
256
257 weights->stencils = stencils;
258 weights->stencils_array_size = stencils_array_size;
259 weights->stencils_size = stencils_size;
260}
261
263 struct yac_interp_weights * weights, struct remote_points * tgts,
264 size_t * num_src_per_tgt, struct remote_point * srcs, double * w) {
265
266 if (tgts->count == 0) return;
267
268 // determine whether there are zero-weights
269 int pack_flag = 0;
270 for (size_t i = 0, k = 0; (i < tgts->count) && !pack_flag; ++i)
271 for (size_t j = 0; (j < num_src_per_tgt[i]) && !pack_flag; ++j, ++k)
272 pack_flag = (fabs(w[k]) <= WEIGHT_TOL);
273
274 if (pack_flag) {
275
276 for (size_t i = 0, k = 0, l = 0;
277 i < tgts->count; i++) {
278
279 size_t curr_count = num_src_per_tgt[i];
280
281 for (size_t j = 0; j < curr_count; j++, k++) {
282
283 if (fabs(w[k]) < WEIGHT_TOL) {
284 num_src_per_tgt[i]--;
285 } else {
286 if (l != k) {
287 srcs[l] = srcs[k];
288 w[l] = w[k];
289 }
290 ++l;
291 }
292 }
293
294 // if all weights were zero
295 if ((curr_count != 0) && (num_src_per_tgt[i] == 0)) {
296
297 if (l != k) {
298 srcs[l] = srcs[k - curr_count];
299 w[l] = 0.0;
300 }
301 num_src_per_tgt[i] = 1;
302 ++l;
303 }
304 }
305 }
306
307 // check whether all weights are 1.0 and whether the number of source
308 // points per target is one for all targets
309 int flag_weight_one = 1;
310 int flag_count_one = 1;
311 for (size_t i = 0, j = 0;
312 (i < tgts->count) && (flag_weight_one || flag_count_one); ++i) {
313
314 size_t curr_count = num_src_per_tgt[i];
315 flag_count_one &= curr_count == 1;
316
317 for (size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
318 flag_weight_one &= fabs(w[j] - 1.0) < WEIGHT_TOL;
319 }
320
321 // if all weights are 1.0 -> use more optimised weight type
322 if (flag_weight_one) {
323
324 // if the number of source points for all target points is one
325 if (flag_count_one)
327 else
328 yac_interp_weights_add_sum(weights, tgts, num_src_per_tgt, srcs);
329
330 } else {
331
332 struct interp_weight_stencil * stencils = weights->stencils;
333 size_t stencils_array_size = weights->stencils_array_size;
334 size_t stencils_size = weights->stencils_size;
335
336 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
337
338 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
339
340 size_t curr_num_src = num_src_per_tgt[i];
341
342 // remove target for which no weights were provided
343 if (curr_num_src == 0) {
344 --stencils_size;
345 continue;
346 }
347
348 double * curr_weights =
349 xmalloc(curr_num_src * sizeof(*curr_weights));
350
351 stencils[stencils_size].type = WEIGHT_SUM;
352 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
353 stencils[stencils_size].data.weight_sum.srcs =
354 copy_remote_points(srcs, curr_num_src);
355 stencils[stencils_size].data.weight_sum.weights = curr_weights;
356 memcpy(curr_weights, w, curr_num_src * sizeof(*curr_weights));
357
358 srcs += curr_num_src;
359 w += curr_num_src;
360 }
361
362 weights->stencils = stencils;
363 weights->stencils_array_size = stencils_array_size;
364 weights->stencils_size = stencils_size;
365 }
366}
367
369 struct yac_interp_weights * weights, struct remote_points * tgts,
370 size_t * num_src_per_tgt, struct remote_point * srcs) {
371
372 if (tgts->count == 0) return;
373
374 // check whether the number of source points per target is one
375 // for all targets
376 int flag_count_one = 1;
377 for (size_t i = 0; i < tgts->count; ++i) {
378 if (num_src_per_tgt[i] != 1) {
379 flag_count_one = 0;
380 break;
381 }
382 }
383
384 if (flag_count_one) {
385
387
388 } else {
389
390 struct interp_weight_stencil * stencils = weights->stencils;
391 size_t stencils_array_size = weights->stencils_array_size;
392 size_t stencils_size = weights->stencils_size;
393
394 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
395
396 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
397
398 size_t curr_num_src = num_src_per_tgt[i];
399
400 stencils[stencils_size].type = SUM;
401 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
402 stencils[stencils_size].data.weight_sum.srcs =
403 copy_remote_points(srcs, curr_num_src);
404 stencils[stencils_size].data.weight_sum.weights = NULL;
405
406 srcs += curr_num_src;
407 }
408
409 weights->stencils = stencils;
410 weights->stencils_array_size = stencils_array_size;
411 weights->stencils_size = stencils_size;
412 }
413}
414
416 struct yac_interp_weights * weights, struct remote_points * tgts,
417 struct remote_point * srcs) {
418
419 if (tgts->count == 0) return;
420
421 struct interp_weight_stencil * stencils = weights->stencils;
422 size_t stencils_array_size = weights->stencils_array_size;
423 size_t stencils_size = weights->stencils_size;
424
425 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
426
427 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
428
429 stencils[stencils_size].type = DIRECT;
430 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
431 stencils[stencils_size].data.direct.src = copy_remote_point(srcs[i]);
432 }
433
434 weights->stencils = stencils;
435 weights->stencils_array_size = stencils_array_size;
436 weights->stencils_size = stencils_size;
437}
438
440 struct yac_interp_weights * weights, struct remote_points * tgts,
441 size_t * src_field_indices, struct remote_point ** srcs_per_field,
442 size_t num_src_fields) {
443
444 if (tgts->count == 0) return;
445
446 if (num_src_fields == 1) {
447 yac_interp_weights_add_direct(weights, tgts, srcs_per_field[0]);
448 return;
449 }
450
451 struct interp_weight_stencil * stencils = weights->stencils;
452 size_t stencils_array_size = weights->stencils_array_size;
453 size_t stencils_size = weights->stencils_size;
454
456 stencils, stencils_array_size, stencils_size + tgts->count);
457 stencils += stencils_size;
458
459 size_t srcs_offsets[num_src_fields];
460 memset(srcs_offsets, 0, num_src_fields * sizeof(srcs_offsets[0]));
461
462 for (size_t i = 0; i < tgts->count; ++i) {
463
464 size_t src_field_idx = src_field_indices[i];
465 stencils[i].type = DIRECT_MF;
466 stencils[i].tgt = copy_remote_point(tgts->data[i]);
467 stencils[i].data.direct_mf.src =
469 srcs_per_field[src_field_idx][srcs_offsets[src_field_idx]++]);
470 stencils[i].data.direct_mf.field_idx = src_field_idx;
471 }
472
473 weights->stencils = stencils;
474 weights->stencils_array_size = stencils_array_size;
475 weights->stencils_size += tgts->count;
476}
477
479 struct yac_interp_weights * weights, struct remote_points * tgts,
480 size_t * num_src_per_field_per_tgt, struct remote_point ** srcs_per_field,
481 size_t num_src_fields) {
482
483 if (tgts->count == 0) return;
484
485 if (num_src_fields == 1) {
487 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0]);
488 return;
489 }
490
491 // check whether the number of source points per target is one
492 // for all targets
493 int flag_count_one = 1;
494 for (size_t i = 0, k = 0; i < tgts->count; ++i) {
495 size_t count = 0;
496 for (size_t j = 0; j < num_src_fields; ++j, ++k)
497 count += num_src_per_field_per_tgt[k];
498 if (count != 1) {
499 flag_count_one = 0;
500 break;
501 }
502 }
503
504 if (flag_count_one) {
505
506 size_t * src_field_indices =
507 xmalloc(tgts->count * sizeof(*src_field_indices));
508
509 for (size_t i = 0, k = 0; i < tgts->count; ++i)
510 for (size_t j = 0; j < num_src_fields; ++j, ++k)
511 if (num_src_per_field_per_tgt[k])
512 src_field_indices[i] = j;
513
515 weights, tgts, src_field_indices, srcs_per_field, num_src_fields);
516
517 free(src_field_indices);
518
519 } else {
520 struct remote_point * curr_srcs_per_field[num_src_fields];
521 memcpy(curr_srcs_per_field, srcs_per_field,
522 num_src_fields * sizeof(*srcs_per_field));
523
524 struct interp_weight_stencil * stencils = weights->stencils;
525 size_t stencils_array_size = weights->stencils_array_size;
526 size_t stencils_size = weights->stencils_size;
527
529 stencils, stencils_array_size, stencils_size + tgts->count);
530
531 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
532
533 size_t * curr_num_src_per_src_field =
534 num_src_per_field_per_tgt + i * num_src_fields;
535 size_t curr_num_src = 0;
536 for (size_t j = 0; j < num_src_fields; ++j)
537 curr_num_src += curr_num_src_per_src_field[j];
538
539 stencils[stencils_size].type = SUM_MF;
540 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
541 stencils[stencils_size].data.sum_mf.field_indices =
542 xmalloc(
543 curr_num_src *
544 sizeof(*(stencils[stencils_size].data.sum_mf.field_indices)));
545 for (size_t j = 0, l = 0; j < num_src_fields; ++j) {
546 size_t curr_num_src = curr_num_src_per_src_field[j];
547 for (size_t k = 0; k < curr_num_src; ++k, ++l) {
548 stencils[stencils_size].data.sum_mf.field_indices[l] = j;
549 }
550 }
551 stencils[stencils_size].data.sum_mf.srcs =
553 curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
554
555 for (size_t j = 0; j < num_src_fields; ++j)
556 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
557 }
558
559 weights->stencils = stencils;
560 weights->stencils_array_size = stencils_array_size;
561 weights->stencils_size = stencils_size;
562 }
563}
564
566 struct yac_interp_weights * weights, struct remote_points * tgts,
567 size_t * num_src_per_field_per_tgt, struct remote_point ** srcs_per_field,
568 double * w, size_t num_src_fields) {
569
570 if (tgts->count == 0) return;
571
572 if (num_src_fields == 1) {
574 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0], w);
575 return;
576 }
577
578 // check whether all weights are 1.0 and whether the number of source
579 // points per target is one for all targets
580 int flag_weight_one = 1;
581 for (size_t i = 0, j = 0;
582 (i < tgts->count) && flag_weight_one; ++i) {
583
584 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
585 ++src_field_idx) {
586
587 size_t curr_count =
588 num_src_per_field_per_tgt[i * num_src_fields + src_field_idx];
589
590 for (size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
591 flag_weight_one &= fabs(w[j] - 1.0) < 1e-12;
592 }
593 }
594
595 // if all weights are 1.0 -> use more optimised weight type
596 if (flag_weight_one) {
597
599 weights, tgts, num_src_per_field_per_tgt, srcs_per_field, num_src_fields);
600
601 } else {
602
603 struct remote_point * curr_srcs_per_field[num_src_fields];
604 memcpy(curr_srcs_per_field, srcs_per_field,
605 num_src_fields * sizeof(*srcs_per_field));
606
607 struct interp_weight_stencil * stencils = weights->stencils;
608 size_t stencils_array_size = weights->stencils_array_size;
609 size_t stencils_size = weights->stencils_size;
610
612 stencils, stencils_array_size, stencils_size + tgts->count);
613
614 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
615
616 size_t * curr_num_src_per_src_field =
617 num_src_per_field_per_tgt + i * num_src_fields;
618 size_t curr_num_weights = 0;
619 for (size_t j = 0; j < num_src_fields; ++j)
620 curr_num_weights += curr_num_src_per_src_field[j];
621 double * curr_weights =
622 xmalloc(curr_num_weights * sizeof(*curr_weights));
623 size_t * field_indices =
624 xmalloc(curr_num_weights * sizeof(*field_indices));
625
626 stencils[stencils_size].type = WEIGHT_SUM_MF;
627 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
628 stencils[stencils_size].data.weight_sum_mf.field_indices = field_indices;
629 for (size_t j = 0, l = 0; j < num_src_fields; ++j) {
630 size_t curr_num_src = curr_num_src_per_src_field[j];
631 for (size_t k = 0; k < curr_num_src; ++k, ++l) field_indices[l] = j;
632 }
633 stencils[stencils_size].data.weight_sum_mf.srcs =
634 copy_remote_points_mf(curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
635 stencils[stencils_size].data.weight_sum_mf.weights = curr_weights;
636 memcpy(curr_weights, w, curr_num_weights * sizeof(*curr_weights));
637
638 for (size_t j = 0; j < num_src_fields; ++j)
639 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
640 w += curr_num_weights;
641 }
642
643 weights->stencils = stencils;
644 weights->stencils_array_size = stencils_array_size;
645 weights->stencils_size = stencils_size;
646 }
647}
648
649static int compare_stencils_fixed(const void * a, const void * b) {
650
651 int ret = (((struct interp_weight_stencil_fixed *)a)->value >
652 ((struct interp_weight_stencil_fixed *)b)->value) -
653 (((struct interp_weight_stencil_fixed *)a)->value <
654 ((struct interp_weight_stencil_fixed *)b)->value);
655
656 if (ret) return ret;
657
658 return (((struct interp_weight_stencil_fixed *)a)->orig_pos >
659 ((struct interp_weight_stencil_fixed *)b)->orig_pos) -
660 (((struct interp_weight_stencil_fixed *)a)->orig_pos <
661 ((struct interp_weight_stencil_fixed *)b)->orig_pos);
662}
663
664static MPI_Datatype get_fixed_stencil_mpi_datatype(MPI_Comm comm) {
665
666 struct interp_weight_stencil_fixed dummy;
667 MPI_Datatype fixed_stencil_dt;
668 int array_of_blocklengths[] = {1, 1};
669 const MPI_Aint array_of_displacements[] =
670 {(MPI_Aint)(intptr_t)(const void *)&(dummy.value) -
671 (MPI_Aint)(intptr_t)(const void *)&dummy,
672 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
673 (MPI_Aint)(intptr_t)(const void *)&dummy};
674 const MPI_Datatype array_of_types[] = {MPI_DOUBLE, MPI_UINT64_T};
676 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
677 array_of_types, &fixed_stencil_dt), comm);
678 return yac_create_resized(fixed_stencil_dt, sizeof(dummy), comm);
679}
680
682 MPI_Comm comm, uint64_t count,
683 struct interp_weight_stencil * fixed_stencils,
684 struct yac_interpolation * interp) {
685
686 int comm_size;
687 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
688
689 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
691 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
692
693 for (size_t i = 0; i < count; ++i) {
694 int curr_count = fixed_stencils[i].tgt.data.count;
695 struct remote_point_info * curr_point_infos =
696 (curr_count == 1)?
697 (&(fixed_stencils[i].tgt.data.data.single)):
698 (fixed_stencils[i].tgt.data.data.multi);
699 for (int j = 0; j < curr_count; ++j)
700 sendcounts[curr_point_infos[j].rank]++;
701 }
702
704 1, sendcounts, recvcounts, sdispls, rdispls, comm);
705
706 size_t send_buffer_size =
707 sdispls[comm_size] + sendcounts[comm_size - 1];
708 size_t recv_buffer_size =
709 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
710
711 struct interp_weight_stencil_fixed * buffer =
712 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*buffer));
713 struct interp_weight_stencil_fixed * send_buffer = buffer + recv_buffer_size;
714 struct interp_weight_stencil_fixed * recv_buffer = buffer;
715
716 // pack fixed stencils
717 for (size_t i = 0; i < count; ++i) {
718 int curr_count = fixed_stencils[i].tgt.data.count;
719 struct remote_point_info * curr_point_infos =
720 (curr_count == 1)?
721 (&(fixed_stencils[i].tgt.data.data.single)):
722 (fixed_stencils[i].tgt.data.data.multi);
723 double value = fixed_stencils[i].data.fixed.value;
724 for (int j = 0; j < curr_count; ++j) {
725 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
726 send_buffer[pos].value = value;
727 send_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
728 }
729 }
730
731 // create MPI Datatype for exchanging fixed stencils
732 MPI_Datatype stencil_fixed_dt = get_fixed_stencil_mpi_datatype(comm);
733
734 // redistribute fixed stencils
736 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
737 sizeof(*send_buffer), stencil_fixed_dt, comm);
738
739 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
740
741 yac_mpi_call(MPI_Type_free(&stencil_fixed_dt), comm);
742
743 if (recv_buffer_size == 0) {
744 free(buffer);
745 return;
746 }
747
748 // sort stencils first by fixed value and second by orig_pos
749 qsort(recv_buffer, recv_buffer_size, sizeof(*recv_buffer),
751
752 size_t * tgt_pos = xmalloc(recv_buffer_size * sizeof(*tgt_pos));
753 for (size_t i = 0; i < recv_buffer_size; ++i)
754 tgt_pos[i] = (size_t)(recv_buffer[i].orig_pos);
755
756 size_t offset = 0, i = 0;
757 while (offset < recv_buffer_size) {
758 double fixed_value = recv_buffer[i].value;
759 while ((i < recv_buffer_size) && (fixed_value == recv_buffer[i].value)) ++i;
760 size_t curr_count = i - offset;
762 interp, fixed_value, curr_count, tgt_pos + offset);
763 offset = i;
764 }
765
766 free(buffer);
767 free(tgt_pos);
768}
769
770static inline struct remote_point_info select_src(
771 struct remote_point_infos src) {
772
773 if (src.count == 1) return src.data.single;
774
775 int min_rank = INT_MAX;
776 size_t min_rank_idx = SIZE_MAX;
777 for (size_t i = 0; i < src.count; ++i) {
778 if (src.data.multi[i].rank < min_rank) {
779 min_rank = src.data.multi[i].rank;
780 min_rank_idx = i;
781 }
782 }
783
784 return src.data.multi[min_rank_idx];
785}
786
788 struct Xt_redist_msg * msgs, size_t count, MPI_Comm comm) {
789 for (size_t i = 0; i < count; ++i) {
790 MPI_Datatype * dt = &(msgs[i].datatype);
791 if (*dt != MPI_DATATYPE_NULL) yac_mpi_call(MPI_Type_free(dt), comm);
792 }
793 free(msgs);
794}
795
796static Xt_redist generate_direct_redist(
797 uint64_t * src_orig_poses, size_t * sendcounts,
798 struct interp_weight_stencil_direct * tgt_stencils,
799 size_t * recvcounts, MPI_Comm comm) {
800
801 int comm_size;
802 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
803
804 size_t nsend = 0, nrecv = 0;
805 size_t max_buffer_size = 0;
806 for (int i = 0; i < comm_size; ++i) {
807 nsend += sendcounts[i] > 0;
808 nrecv += recvcounts[i] > 0;
809 if (max_buffer_size < sendcounts[i]) max_buffer_size = sendcounts[i];
810 if (max_buffer_size < recvcounts[i]) max_buffer_size = recvcounts[i];
811 }
812
813 size_t total_num_msg = nsend + nrecv;
814
815 struct Xt_redist_msg * msgs_buffer =
816 xmalloc(total_num_msg * sizeof(*msgs_buffer));
817 struct Xt_redist_msg * send_msgs = msgs_buffer;
818 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
819
820 int * pos_buffer = xmalloc((size_t)max_buffer_size * sizeof(*pos_buffer));
821
822 nsend = 0;
823 nrecv = 0;
824 for (int i = 0; i < comm_size; ++i) {
825 if (recvcounts[i] > 0) {
826 for (size_t j = 0; j < recvcounts[i]; ++j)
827 pos_buffer[j] = (int)tgt_stencils[j].orig_pos;
828 tgt_stencils += recvcounts[i];
829 recv_msgs[nrecv].rank = i;
830 recv_msgs[nrecv].datatype =
831 xt_mpi_generate_datatype(pos_buffer, recvcounts[i], MPI_DOUBLE, comm);
832 nrecv++;
833 }
834 if (sendcounts[i] > 0) {
835 for (size_t j = 0; j < sendcounts[i]; ++j)
836 pos_buffer[j] = (int)src_orig_poses[j];
837 src_orig_poses += sendcounts[i];
838 send_msgs[nsend].rank = i;
839 send_msgs[nsend].datatype =
840 xt_mpi_generate_datatype(pos_buffer, sendcounts[i], MPI_DOUBLE, comm);
841 nsend++;
842 }
843 }
844
845 free(pos_buffer);
846
847 Xt_redist redist;
848 MPI_Comm split_comm;
849
850 if (total_num_msg > 0) {
851
852 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
853
854 int * rank_buffer =
855 xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
856 int * orig_ranks = rank_buffer;
857 int * split_ranks = rank_buffer + total_num_msg;
858
859 for (size_t i = 0; i < total_num_msg; ++i)
860 orig_ranks[i] = msgs_buffer[i].rank;
861
862 MPI_Group orig_group, split_group;
863 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
864 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
865
867 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
868 split_group, split_ranks), split_comm);
869
870 for (size_t i = 0; i < total_num_msg; ++i)
871 msgs_buffer[i].rank = split_ranks[i];
872
873 free(rank_buffer);
874
875 yac_mpi_call(MPI_Group_free(&split_group), comm);
876 yac_mpi_call(MPI_Group_free(&orig_group), comm);
877
878 // generate redist
879 redist =
880 xt_redist_single_array_base_new(
881 nsend, nrecv, send_msgs, recv_msgs, split_comm);
882
883 } else {
884 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
885 redist = NULL;
886 }
887
888 yac_mpi_call(MPI_Comm_free(&split_comm), comm);
889 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
890
891 return redist;
892}
893
894static Xt_redist * generate_direct_mf_redists(
895 uint64_t * src_orig_pos, size_t * sendcounts,
896 struct interp_weight_stencil_direct_mf * tgt_stencils,
897 size_t * recvcounts, size_t num_src_fields, MPI_Comm comm) {
898
899 int comm_size;
900 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
901
902 size_t nsends[num_src_fields], nrecvs[num_src_fields];
903 size_t max_buffer_size = 0;
904 memset(nsends, 0, num_src_fields * sizeof(nsends[0]));
905 memset(nrecvs, 0, num_src_fields * sizeof(nrecvs[0]));
906 for (int i = 0; i < comm_size; ++i) {
907 for (size_t j = 0; j < num_src_fields; ++j) {
908 size_t idx = (size_t)i * num_src_fields + j;
909 if (sendcounts[idx] > 0) nsends[j]++;
910 if (recvcounts[idx] > 0) nrecvs[j]++;
911 if (max_buffer_size < sendcounts[idx]) max_buffer_size = sendcounts[idx];
912 if (max_buffer_size < recvcounts[idx]) max_buffer_size = recvcounts[idx];
913 }
914 }
915
916 size_t nsend = 0, nrecv = 0;
917 size_t send_offsets[num_src_fields];
918 size_t recv_offsets[num_src_fields];
919 for (size_t i = 0; i < num_src_fields; ++i) {
920 send_offsets[i] = nsend;
921 recv_offsets[i] = nrecv;
922 nsend += nsends[i];
923 nrecv += nrecvs[i];
924 }
925
926 size_t total_num_msg = nsend + nrecv;
927
928 struct Xt_redist_msg * msgs_buffer =
929 xmalloc(total_num_msg * sizeof(*msgs_buffer));
930 struct Xt_redist_msg * send_msgs = msgs_buffer;
931 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
932
933 int * pos_buffer = xmalloc(max_buffer_size * sizeof(*pos_buffer));
934
935 for (int i = 0; i < comm_size; ++i) {
936 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
937 ++src_field_idx) {
938 size_t idx = (size_t)i * num_src_fields + src_field_idx;
939 if (recvcounts[idx] > 0) {
940 for (size_t j = 0; j < recvcounts[idx]; ++j)
941 pos_buffer[j] = (int)tgt_stencils[j].orig_pos;
942 tgt_stencils += recvcounts[idx];
943 recv_msgs[recv_offsets[src_field_idx]].rank = i;
944 recv_msgs[recv_offsets[src_field_idx]].datatype =
945 xt_mpi_generate_datatype(
946 pos_buffer, recvcounts[idx], MPI_DOUBLE, comm);
947 recv_offsets[src_field_idx]++;
948 }
949 if (sendcounts[idx] > 0) {
950 for (size_t j = 0; j < sendcounts[idx]; ++j)
951 pos_buffer[j] = (int)src_orig_pos[j];
952 src_orig_pos += sendcounts[idx];
953 send_msgs[send_offsets[src_field_idx]].rank = i;
954 send_msgs[send_offsets[src_field_idx]].datatype =
955 xt_mpi_generate_datatype(
956 pos_buffer, sendcounts[idx], MPI_DOUBLE, comm);
957 send_offsets[src_field_idx]++;
958 }
959 }
960 }
961
962 free(pos_buffer);
963
964 Xt_redist * redists;
965 MPI_Comm split_comm;
966
967 if (total_num_msg > 0) {
968
969 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
970
971 int * rank_buffer =
972 xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
973 int * orig_ranks = rank_buffer;
974 int * split_ranks = rank_buffer + total_num_msg;
975
976 for (size_t i = 0; i < total_num_msg; ++i)
977 orig_ranks[i] = msgs_buffer[i].rank;
978
979 MPI_Group orig_group, split_group;
980 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
981 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
982
984 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
985 split_group, split_ranks), split_comm);
986
987 for (size_t i = 0; i < total_num_msg; ++i)
988 msgs_buffer[i].rank = split_ranks[i];
989
990 free(rank_buffer);
991
992 yac_mpi_call(MPI_Group_free(&split_group), comm);
993 yac_mpi_call(MPI_Group_free(&orig_group), comm);
994
995 // generate redists
996 redists = xmalloc(num_src_fields * sizeof(*redists));
997 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
998 ++src_field_idx) {
999 redists[src_field_idx] =
1000 xt_redist_single_array_base_new(
1001 nsends[src_field_idx], nrecvs[src_field_idx],
1002 send_msgs, recv_msgs, split_comm);
1003 send_msgs += nsends[src_field_idx];
1004 recv_msgs += nrecvs[src_field_idx];
1005 }
1006
1007 } else {
1008 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
1009 redists = NULL;
1010 }
1011
1012 yac_mpi_call(MPI_Comm_free(&split_comm), comm);
1013 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
1014
1015 return redists;
1016}
1017
1018static MPI_Datatype get_direct_stencil_mpi_datatype(MPI_Comm comm) {
1019
1020 struct interp_weight_stencil_direct dummy;
1021 MPI_Datatype direct_stencil_dt;
1022 int array_of_blocklengths[] = {1, 1};
1023 const MPI_Aint array_of_displacements[] =
1024 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src) -
1025 (MPI_Aint)(intptr_t)(const void *)&dummy,
1026 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
1027 (MPI_Aint)(intptr_t)(const void *)&dummy};
1028 MPI_Datatype array_of_types[] =
1029 {yac_get_remote_point_info_mpi_datatype(comm), MPI_UINT64_T};
1031 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
1032 array_of_types, &direct_stencil_dt), comm);
1033 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1034 return yac_create_resized(direct_stencil_dt, sizeof(dummy), comm);
1035}
1036
1037static int compare_stencils_direct(const void * a, const void * b) {
1038
1039 int ret = ((struct interp_weight_stencil_direct *)a)->src.rank -
1040 ((struct interp_weight_stencil_direct *)b)->src.rank;
1041
1042 if (ret) return ret;
1043
1044 return (((struct interp_weight_stencil_direct *)a)->orig_pos >
1045 ((struct interp_weight_stencil_direct *)b)->orig_pos) -
1046 (((struct interp_weight_stencil_direct *)a)->orig_pos <
1047 ((struct interp_weight_stencil_direct *)b)->orig_pos);
1048}
1049
1051 MPI_Comm comm, uint64_t count,
1052 struct interp_weight_stencil * direct_stencils,
1053 struct yac_interpolation * interp) {
1054
1055 int comm_size;
1056 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1057
1058 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1060 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1061
1062 for (size_t i = 0; i < count; ++i) {
1063 int curr_count = direct_stencils[i].tgt.data.count;
1064 struct remote_point_info * curr_point_info =
1065 (curr_count == 1)?
1066 (&(direct_stencils[i].tgt.data.data.single)):
1067 (direct_stencils[i].tgt.data.data.multi);
1068 for (int j = 0; j < curr_count; ++j)
1069 sendcounts[curr_point_info[j].rank]++;
1070 }
1071
1073 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1074
1075 size_t send_buffer_size =
1076 sdispls[comm_size] + sendcounts[comm_size - 1];
1077 size_t recv_buffer_size =
1078 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1079 size_t tgt_count = recv_buffer_size;
1080
1081 struct interp_weight_stencil_direct * stencil_buffer =
1082 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*stencil_buffer));
1083 struct interp_weight_stencil_direct * send_stencil_buffer =
1084 stencil_buffer + recv_buffer_size;
1085 struct interp_weight_stencil_direct * recv_stencil_buffer = stencil_buffer;
1086
1087 // pack direct stencils
1088 for (size_t i = 0; i < count; ++i) {
1089 int curr_count = direct_stencils[i].tgt.data.count;
1090 struct remote_point_info * curr_point_infos =
1091 (curr_count == 1)?
1092 (&(direct_stencils[i].tgt.data.data.single)):
1093 (direct_stencils[i].tgt.data.data.multi);
1094 struct remote_point_info src =
1095 select_src(direct_stencils[i].data.direct.src.data);
1096 for (int j = 0; j < curr_count; ++j) {
1097 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
1098 send_stencil_buffer[pos].src = src;
1099 send_stencil_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
1100 }
1101 }
1102
1103 // create MPI Datatype for exchanging direct stencils
1104 MPI_Datatype stencil_direct_dt = get_direct_stencil_mpi_datatype(comm);
1105
1106 // redistribute stencils based on target owners
1108 send_stencil_buffer, sendcounts, sdispls,
1109 recv_stencil_buffer, recvcounts, rdispls,
1110 sizeof(*stencil_buffer), stencil_direct_dt, comm);
1111
1112 yac_mpi_call(MPI_Type_free(&stencil_direct_dt), comm);
1113
1114 // sort stencils based on src rank first and by target orig pos second
1115 qsort(recv_stencil_buffer, tgt_count, sizeof(*recv_stencil_buffer),
1117
1118 memset(sendcounts, 0, (size_t)comm_size * sizeof(*sendcounts));
1119
1120 for (size_t i = 0; i < tgt_count; ++i)
1121 sendcounts[recv_stencil_buffer[i].src.rank]++;
1122
1124 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1125
1126 send_buffer_size = sdispls[comm_size] + sendcounts[comm_size - 1];
1127 recv_buffer_size = rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1128
1129 uint64_t * orig_pos_buffer =
1130 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*orig_pos_buffer));
1131 uint64_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1132 uint64_t * recv_orig_pos_buffer = orig_pos_buffer;
1133
1134 for (size_t i = 0; i < tgt_count; ++i)
1135 send_orig_pos_buffer[sdispls[recv_stencil_buffer[i].src.rank + 1]++] =
1136 recv_stencil_buffer[i].src.orig_pos;
1137
1138 // inform source processes about their requested points
1140 send_orig_pos_buffer, sendcounts, sdispls,
1141 recv_orig_pos_buffer, recvcounts, rdispls,
1142 sizeof(*send_orig_pos_buffer), MPI_UINT64_T, comm);
1143
1144 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1145
1146 // generate redist
1147 Xt_redist redist =
1149 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts, comm);
1150 free(orig_pos_buffer);
1151 free(stencil_buffer);
1152
1153 yac_interpolation_add_direct(interp, redist);
1154
1155 if (redist != NULL) xt_redist_delete(redist);
1156}
1157
1158static MPI_Datatype get_direct_mf_stencil_mpi_datatype(MPI_Comm comm) {
1159
1161 MPI_Datatype direct_stencil_mf_dt;
1162 int array_of_blocklengths[] = {1, 1, 1};
1163 const MPI_Aint array_of_displacements[] =
1164 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src) -
1165 (MPI_Aint)(intptr_t)(const void *)&dummy,
1166 (MPI_Aint)(intptr_t)(const void *)&(dummy.src_field_idx) -
1167 (MPI_Aint)(intptr_t)(const void *)&dummy,
1168 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
1169 (MPI_Aint)(intptr_t)(const void *)&dummy};
1170 MPI_Datatype array_of_types[] =
1171 {yac_get_remote_point_info_mpi_datatype(comm), MPI_UINT64_T, MPI_UINT64_T};
1173 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
1174 array_of_types, &direct_stencil_mf_dt), comm);
1175 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1176 return yac_create_resized(direct_stencil_mf_dt, sizeof(dummy), comm);
1177}
1178
1179static int compare_stencils_direct_mf(const void * a, const void * b) {
1180
1181 int ret = ((struct interp_weight_stencil_direct_mf *)a)->src.rank -
1182 ((struct interp_weight_stencil_direct_mf *)b)->src.rank;
1183
1184 if (ret) return ret;
1185
1186 ret = (((struct interp_weight_stencil_direct_mf *)a)->src_field_idx >
1187 ((struct interp_weight_stencil_direct_mf *)b)->src_field_idx) -
1190
1191 if (ret) return ret;
1192
1193 return (((struct interp_weight_stencil_direct_mf *)a)->orig_pos >
1195 (((struct interp_weight_stencil_direct_mf *)a)->orig_pos <
1196 ((struct interp_weight_stencil_direct_mf *)b)->orig_pos);
1197}
1198
1200 MPI_Comm comm, uint64_t count,
1201 struct interp_weight_stencil * direct_mf_stencils,
1202 struct yac_interpolation * interp) {
1203
1204 // determine the number of source fields
1205 uint64_t num_src_fields = 0;
1206 for (size_t i = 0; i < count; ++i) {
1207 uint64_t src_field_idx =
1208 (uint64_t)(direct_mf_stencils[i].data.direct_mf.field_idx);
1209 if (src_field_idx >= num_src_fields) num_src_fields = src_field_idx + 1;
1210 }
1212 MPI_Allreduce(
1213 MPI_IN_PLACE, &num_src_fields, 1, MPI_UINT64_T, MPI_MAX, comm), comm);
1214
1215 int comm_size;
1216 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1217
1218 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1220 (size_t)num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1221 size_t * size_t_buffer =
1222 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
1223 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1224 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1225 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1226 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1227
1228 for (size_t i = 0; i < count; ++i) {
1229 int curr_count = direct_mf_stencils[i].tgt.data.count;
1230 struct remote_point_info * curr_point_info =
1231 (curr_count == 1)?
1232 (&(direct_mf_stencils[i].tgt.data.data.single)):
1233 (direct_mf_stencils[i].tgt.data.data.multi);
1234 uint64_t src_field_idx =
1235 (uint64_t)(direct_mf_stencils[i].data.direct_mf.field_idx);
1236 for (int j = 0; j < curr_count; ++j)
1237 sendcounts[
1238 (uint64_t)(curr_point_info[j].rank) * num_src_fields + src_field_idx]++;
1239 }
1240
1242 (size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1243
1244 size_t saccu = 0, raccu = 0;
1245 for (int i = 0; i < comm_size; ++i) {
1246 total_sdispls[i] = saccu;
1247 total_rdispls[i] = raccu;
1248 total_sendcounts[i] = 0;
1249 total_recvcounts[i] = 0;
1250 for (size_t j = 0; j < num_src_fields; ++j) {
1251 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1252 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1253 }
1254 saccu += total_sendcounts[i];
1255 raccu += total_recvcounts[i];
1256 }
1257
1258 size_t send_buffer_size = total_sdispls[comm_size - 1] +
1259 total_sendcounts[comm_size - 1];
1260 size_t recv_buffer_size = total_rdispls[comm_size - 1] +
1261 total_recvcounts[comm_size - 1];
1262 size_t tgt_count = recv_buffer_size;
1263
1264 struct interp_weight_stencil_direct_mf * stencil_buffer =
1265 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*stencil_buffer));
1266 struct interp_weight_stencil_direct_mf * send_stencil_buffer =
1267 stencil_buffer + recv_buffer_size;
1268 struct interp_weight_stencil_direct_mf * recv_stencil_buffer = stencil_buffer;
1269
1270 // pack direct_mf stencils
1271 for (size_t i = 0; i < count; ++i) {
1272 int curr_count = direct_mf_stencils[i].tgt.data.count;
1273 struct remote_point_info * curr_point_infos =
1274 (curr_count == 1)?
1275 (&(direct_mf_stencils[i].tgt.data.data.single)):
1276 (direct_mf_stencils[i].tgt.data.data.multi);
1277 struct remote_point_info src =
1278 select_src(direct_mf_stencils[i].data.direct_mf.src.data);
1279 uint64_t src_field_idx =
1280 (uint64_t)(direct_mf_stencils[i].data.direct_mf.field_idx);
1281 for (int j = 0; j < curr_count; ++j) {
1282 size_t pos =
1283 sdispls[(uint64_t)(curr_point_infos[j].rank) * num_src_fields +
1284 src_field_idx + 1]++;
1285 send_stencil_buffer[pos].src = src;
1286 send_stencil_buffer[pos].src_field_idx = src_field_idx;
1287 send_stencil_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
1288 }
1289 }
1290
1291 // create MPI Datatype for exchanging direct_mf stencils
1292 MPI_Datatype stencil_direct_mf_dt = get_direct_mf_stencil_mpi_datatype(comm);
1293
1294 // redistribute stencils based on target owners
1296 send_stencil_buffer, total_sendcounts, total_sdispls,
1297 recv_stencil_buffer, total_recvcounts, total_rdispls,
1298 sizeof(*stencil_buffer), stencil_direct_mf_dt, comm);
1299
1300 yac_mpi_call(MPI_Type_free(&stencil_direct_mf_dt), comm);
1301
1302 // sort stencils based on src rank first, src_field_idx, and
1303 // by target orig pos second
1304 qsort(recv_stencil_buffer, tgt_count, sizeof(*recv_stencil_buffer),
1306
1307 memset(sendcounts, 0,
1308 (size_t)comm_size * (size_t)num_src_fields * sizeof(*sendcounts));
1309
1310 for (size_t i = 0; i < tgt_count; ++i)
1311 sendcounts[(uint64_t)(recv_stencil_buffer[i].src.rank) * num_src_fields +
1312 recv_stencil_buffer[i].src_field_idx]++;
1313
1315 (size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1316
1317 saccu = 0, raccu = 0;
1318 for (int i = 0; i < comm_size; ++i) {
1319 total_sdispls[i] = saccu;
1320 total_rdispls[i] = raccu;
1321 total_sendcounts[i] = 0;
1322 total_recvcounts[i] = 0;
1323 for (size_t j = 0; j < num_src_fields; ++j) {
1324 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1325 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1326 }
1327 saccu += total_sendcounts[i];
1328 raccu += total_recvcounts[i];
1329 }
1330
1331 send_buffer_size = total_sdispls[comm_size - 1] +
1332 total_sendcounts[comm_size - 1];
1333 recv_buffer_size = total_rdispls[comm_size - 1] +
1334 total_recvcounts[comm_size - 1];
1335
1336 uint64_t * orig_pos_buffer =
1337 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*orig_pos_buffer));
1338 uint64_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1339 uint64_t * recv_orig_pos_buffer = orig_pos_buffer;
1340
1341 for (size_t i = 0; i < tgt_count; ++i)
1342 send_orig_pos_buffer[
1343 sdispls[(uint64_t)(recv_stencil_buffer[i].src.rank) * num_src_fields +
1344 recv_stencil_buffer[i].src_field_idx + 1]++] =
1345 recv_stencil_buffer[i].src.orig_pos;
1346
1347 // inform source processes about their requested points
1349 send_orig_pos_buffer, total_sendcounts, total_sdispls,
1350 recv_orig_pos_buffer, total_recvcounts, total_rdispls,
1351 sizeof(*send_orig_pos_buffer), MPI_UINT64_T, comm);
1352 free(size_t_buffer);
1353
1354 // generate redist
1355 Xt_redist * redists =
1357 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1358 (size_t)num_src_fields, comm);
1359
1360 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1361 free(orig_pos_buffer);
1362 free(stencil_buffer);
1363
1364 yac_interpolation_add_direct_mf(interp, redists, (size_t)num_src_fields);
1365
1366 if (redists != NULL) {
1367 for (size_t i = 0; i < (size_t)num_src_fields; ++i)
1368 xt_redist_delete(redists[i]);
1369 free(redists);
1370 }
1371}
1372
1374 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1375 MPI_Comm comm) {
1376
1377 int pack_size_value;
1378
1379 yac_mpi_call(MPI_Pack_size(1, MPI_DOUBLE, comm, &pack_size_value), comm);
1380
1381 return pack_size_value;
1382}
1383
1385 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1386 MPI_Comm comm) {
1387
1388 return
1390 &(stencil->data.direct.src), point_info_dt, comm);
1391}
1392
1394 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1395 MPI_Comm comm) {
1396
1397 return
1399 stencil->data.sum.srcs, point_info_dt, comm);
1400}
1401
1403 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1404 MPI_Comm comm) {
1405
1406 int pack_size_weights;
1408 MPI_Pack_size(
1409 (int)(stencil->data.weight_sum.srcs->count),
1410 MPI_DOUBLE, comm, &pack_size_weights), comm);
1411
1412 return
1414 stencil->data.weight_sum.srcs, point_info_dt, comm) +
1415 pack_size_weights;
1416}
1417
1419 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1420 MPI_Comm comm) {
1421
1422 int pack_size_src_field_idx;
1424 MPI_Pack_size(
1425 1, MPI_UINT64_T, comm, &pack_size_src_field_idx), comm);
1426
1427 return
1429 &(stencil->data.direct_mf.src), point_info_dt, comm) +
1430 pack_size_src_field_idx;
1431}
1432
1434 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1435 MPI_Comm comm) {
1436
1437 int pack_size_weights, pack_size_field_indices;
1438 int count = (int)(stencil->data.weight_sum_mf.srcs->count);
1440 MPI_Pack_size(
1441 count, MPI_DOUBLE, comm, &pack_size_weights), comm);
1443 MPI_Pack_size(
1444 count, MPI_UINT64_T, comm, &pack_size_field_indices), comm);
1445
1446 return
1448 stencil->data.weight_sum_mf.srcs, point_info_dt, comm) +
1449 pack_size_weights + pack_size_field_indices;
1450}
1451
1453 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1454 MPI_Comm comm) {
1455
1456 int pack_size_field_indices;
1458 MPI_Pack_size(
1459 (int)(stencil->data.sum_mf.srcs->count),
1460 MPI_UINT64_T, comm, &pack_size_field_indices), comm);
1461
1462 return
1464 stencil->data.sum_mf.srcs, point_info_dt, comm) +
1465 pack_size_field_indices;
1466}
1467
1469 struct interp_weight_stencil * stencil, struct remote_point point) {
1470
1471 struct interp_weight_stencil stencil_copy = *stencil;
1472 stencil_copy.tgt = copy_remote_point(point);
1473
1474 YAC_ASSERT(
1475 (stencil->type == FIXED) ||
1476 (stencil->type == DIRECT) ||
1477 (stencil->type == SUM) ||
1478 (stencil->type == WEIGHT_SUM) ||
1479 (stencil->type == DIRECT_MF) ||
1480 (stencil->type == SUM_MF) ||
1481 (stencil->type == WEIGHT_SUM_MF),
1482 "ERROR(copy_interp_weight_stencil): invalid stencil type")
1483
1484 switch (stencil->type) {
1485 case(FIXED):
1486 // nothing to be done
1487 break;
1488 case(DIRECT):
1489 stencil_copy.data.direct.src =
1490 copy_remote_point(stencil->data.direct.src);
1491 break;
1492 case(SUM):
1493 stencil_copy.data.weight_sum.weights = NULL;
1494 stencil_copy.data.sum.srcs =
1496 stencil->data.sum.srcs->data, stencil->data.sum.srcs->count);
1497 break;
1498 case(WEIGHT_SUM): {
1499 stencil_copy.data.weight_sum.srcs =
1501 stencil->data.weight_sum.srcs->data,
1502 stencil->data.weight_sum.srcs->count);
1503 size_t weight_size =
1504 stencil->data.weight_sum.srcs->count *
1505 sizeof(*(stencil_copy.data.weight_sum.weights));
1506 stencil_copy.data.weight_sum.weights = xmalloc(weight_size);
1507 memcpy(stencil_copy.data.weight_sum.weights,
1508 stencil->data.weight_sum.weights, weight_size);
1509 break;
1510 }
1511 case(DIRECT_MF):
1512 stencil_copy.data.direct_mf.src =
1514 stencil_copy.data.direct_mf.field_idx =
1515 stencil->data.direct_mf.field_idx;
1516 break;
1517 case(SUM_MF): {
1518 stencil_copy.data.sum_mf.srcs =
1520 stencil->data.sum_mf.srcs->data,
1521 stencil->data.sum_mf.srcs->count);
1522 size_t field_indices_size =
1523 stencil->data.sum_mf.srcs->count *
1524 sizeof(*(stencil_copy.data.sum_mf.field_indices));
1525 stencil_copy.data.sum_mf.field_indices = xmalloc(field_indices_size);
1526 memcpy(stencil_copy.data.sum_mf.field_indices,
1527 stencil->data.sum_mf.field_indices, field_indices_size);
1528 break;
1529 }
1530 default:
1531 case(WEIGHT_SUM_MF): {
1532 stencil_copy.data.weight_sum_mf.srcs =
1534 stencil->data.weight_sum_mf.srcs->data,
1535 stencil->data.weight_sum_mf.srcs->count);
1536 size_t weight_size =
1537 stencil->data.weight_sum_mf.srcs->count *
1538 sizeof(*(stencil_copy.data.weight_sum_mf.weights));
1539 stencil_copy.data.weight_sum_mf.weights = xmalloc(weight_size);
1540 memcpy(stencil_copy.data.weight_sum_mf.weights,
1541 stencil->data.weight_sum_mf.weights, weight_size);
1542 size_t field_indices_size =
1543 stencil->data.weight_sum_mf.srcs->count *
1544 sizeof(*(stencil_copy.data.weight_sum_mf.field_indices));
1545 stencil_copy.data.weight_sum_mf.field_indices =
1546 xmalloc(field_indices_size);
1547 memcpy(stencil_copy.data.weight_sum_mf.field_indices,
1548 stencil->data.weight_sum_mf.field_indices, field_indices_size);
1549 break;
1550 }
1551 };
1552 return stencil_copy;
1553}
1554
1556 struct interp_weight_stencil * stencil, struct remote_point point,
1557 double weight) {
1558
1559 if (weight == 1.0) return copy_interp_weight_stencil(stencil, point);
1560
1561 struct remote_point * srcs;
1562 size_t src_count;
1563 double * weights;
1564
1565 YAC_ASSERT(
1566 (stencil->type == FIXED) ||
1567 (stencil->type == DIRECT) ||
1568 (stencil->type == SUM) ||
1569 (stencil->type == WEIGHT_SUM),
1570 "ERROR(wcopy_interp_weight_stencil): invalid stencil type")
1571
1572 switch (stencil->type) {
1573 case (FIXED):
1574 return
1575 (struct interp_weight_stencil) {
1576 .type = FIXED,
1577 .data.fixed.value = stencil->data.fixed.value * weight,
1578 .tgt = copy_remote_point(point)};
1579 case (DIRECT):
1580 src_count = 1;
1581 srcs = &(stencil->data.direct.src);
1582 weights = NULL;
1583 break;
1584 case (SUM):
1585 src_count = stencil->data.sum.srcs->count;
1586 srcs = stencil->data.sum.srcs->data;
1587 weights = NULL;
1588 break;
1589 default:
1590 case (WEIGHT_SUM):
1591 src_count = stencil->data.weight_sum.srcs->count;
1592 srcs = stencil->data.weight_sum.srcs->data;
1593 weights = stencil->data.weight_sum.weights;
1594 break;
1595 };
1596
1597 double * new_weights = xmalloc(src_count * sizeof(*new_weights));
1598 if (weights == NULL)
1599 for (size_t i = 0; i < src_count; ++i) new_weights[i] = weight;
1600 else
1601 for (size_t i = 0; i < src_count; ++i) new_weights[i] = weights[i] * weight;
1602
1603 struct interp_weight_stencil stencil_wcopy;
1604 stencil_wcopy.type = WEIGHT_SUM;
1605 stencil_wcopy.data.weight_sum.srcs = copy_remote_points(srcs, src_count);
1606 stencil_wcopy.data.weight_sum.weights = new_weights;
1607 stencil_wcopy.tgt = copy_remote_point(point);
1608
1609 return stencil_wcopy;
1610}
1611
1612static int compare_w_global_id(const void * a, const void * b) {
1613
1614 int ret = (((struct weighted_global_id *)a)->global_id >
1615 ((struct weighted_global_id *)b)->global_id) -
1616 (((struct weighted_global_id *)a)->global_id <
1617 ((struct weighted_global_id *)b)->global_id);
1618
1619 if (ret) return ret;
1620
1621 return (((struct weighted_global_id *)a)->weight >
1622 ((struct weighted_global_id *)b)->weight) -
1623 (((struct weighted_global_id *)a)->weight <
1624 ((struct weighted_global_id *)b)->weight);
1625}
1626
1627static int compare_remote_point(const void * a, const void * b) {
1628
1629 return ((const struct remote_point*)a)->global_id -
1630 ((const struct remote_point*)b)->global_id;
1631}
1632
1633static void compact_srcs_w(
1634 struct remote_points * srcs, double ** w) {
1635
1636 struct remote_point * data = srcs->data;
1637 size_t count = srcs->count;
1638
1639 struct weighted_global_id * w_global_id =
1640 xmalloc(count * sizeof(*w_global_id));
1641
1642 // extract global ids and weights
1643 for (size_t i = 0; i < count; ++i) {
1644 w_global_id[i].global_id = data[i].global_id;
1645 w_global_id[i].weight = (*w)[i];
1646 }
1647
1648 // sort by global ids and weights
1649 qsort(w_global_id, count, sizeof(*w_global_id), compare_w_global_id);
1650
1651 // sort sources by global ids
1652 qsort(data, count, sizeof(*data), compare_remote_point);
1653
1654 size_t new_count = 0;
1655
1656 // compact sources
1657 for (size_t i = 0; i < count;) {
1658
1659 data[new_count] = data[i];
1660
1661 yac_int curr_global_id = w_global_id[i].global_id;
1662 double curr_weight = w_global_id[i].weight;
1663
1664 ++i;
1665
1666 while((i < count) && (curr_global_id == w_global_id[i].global_id)) {
1667
1668 curr_weight += w_global_id[i].weight;
1669 ++i;
1670 }
1671
1672 (*w)[new_count] = curr_weight;
1673 ++new_count;
1674 }
1675
1676 srcs->data = xrealloc(data, new_count * sizeof(*data));
1677 srcs->count = new_count;
1678 *w = xrealloc(*w, new_count * sizeof(**w));
1679}
1680
1682 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils) {
1683
1684 size_t src_count = 0;
1685 size_t point_info_buffer_size = 0;
1686
1687 for (size_t i = 0; i < num_stencils; ++i) {
1688 size_t curr_src_count;
1689 struct remote_point * srcs;
1690 YAC_ASSERT(
1691 (stencils[i]->type == DIRECT) ||
1692 (stencils[i]->type == SUM) ||
1693 (stencils[i]->type == WEIGHT_SUM),
1694 "ERROR(stencils_merge_wsum): invalid stencil type")
1695 switch (stencils[i]->type) {
1696 case (DIRECT):
1697 curr_src_count = 1;
1698 srcs = &(stencils[i]->data.direct.src);
1699 break;
1700 case (SUM):
1701 curr_src_count = stencils[i]->data.sum.srcs->count;
1702 srcs = stencils[i]->data.sum.srcs->data;
1703 break;
1704 default:
1705 case (WEIGHT_SUM):
1706 curr_src_count = stencils[i]->data.weight_sum.srcs->count;
1707 srcs = stencils[i]->data.weight_sum.srcs->data;
1708 break;
1709 };
1710 src_count += curr_src_count;
1711 for (size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
1712 if (((curr_src_data_count = srcs[j].data.count)) > 1)
1713 point_info_buffer_size += curr_src_data_count;
1714 }
1715
1716 struct remote_points * srcs =
1717 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
1718 sizeof(*srcs));
1719 srcs->data = xmalloc(src_count * sizeof(*(srcs->data)));
1720 srcs->count = src_count;
1721 struct remote_point_info * point_info_buffer = &(srcs->buffer[0]);
1722 double * new_w = xmalloc(src_count * sizeof(*new_w));
1723
1724 for (size_t i = 0, offset = 0; i < num_stencils; ++i) {
1725 size_t curr_src_count;
1726 struct remote_point * curr_srcs;
1727 double * stencil_w;
1728 YAC_ASSERT(
1729 (stencils[i]->type == DIRECT) ||
1730 (stencils[i]->type == SUM) ||
1731 (stencils[i]->type == WEIGHT_SUM),
1732 "ERROR(stencils_merge_wsum): invalid stencil type")
1733 switch (stencils[i]->type) {
1734 case (DIRECT):
1735 curr_src_count = 1;
1736 curr_srcs = &(stencils[i]->data.direct.src);
1737 stencil_w = NULL;
1738 break;
1739 case (SUM):
1740 curr_src_count = stencils[i]->data.sum.srcs->count;
1741 curr_srcs = stencils[i]->data.sum.srcs->data;
1742 stencil_w = NULL;
1743 break;
1744 default:
1745 case (WEIGHT_SUM):
1746 curr_src_count = stencils[i]->data.weight_sum.srcs->count;
1747 curr_srcs = stencils[i]->data.weight_sum.srcs->data;
1748 stencil_w = stencils[i]->data.weight_sum.weights;
1749 break;
1750 };
1752 srcs->data + offset, curr_srcs, curr_src_count, &point_info_buffer);
1753 if (stencil_w == NULL)
1754 for (size_t j = 0; j < curr_src_count; ++j, ++offset)
1755 new_w[offset] = w[i];
1756 else
1757 for (size_t j = 0; j < curr_src_count; ++j, ++offset)
1758 new_w[offset] = w[i] * stencil_w[j];
1759 }
1760
1761 compact_srcs_w(srcs, &new_w);
1762
1763 struct interp_weight_stencil merge_stencil;
1764 merge_stencil.type = WEIGHT_SUM;
1765 merge_stencil.data.weight_sum.srcs = srcs;
1766 merge_stencil.data.weight_sum.weights = new_w;
1767
1768 return merge_stencil;
1769}
1770
1772 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils) {
1773
1774 for (size_t i = 0; i < num_stencils; ++i)
1775 if (w[i] != 1.0)
1776 return stencils_merge_wsum(stencils, w, num_stencils);
1777
1778 size_t src_count = 0;
1779 size_t point_info_buffer_size = 0;
1780
1781 for (size_t i = 0; i < num_stencils; ++i) {
1782 size_t curr_src_count;
1783 struct remote_point * srcs;
1784 YAC_ASSERT(
1785 (stencils[i]->type == DIRECT) ||
1786 (stencils[i]->type == SUM),
1787 "ERROR(stencils_merge_sum): invalid stencil type")
1788 switch (stencils[i]->type) {
1789 case (DIRECT):
1790 curr_src_count = 1;
1791 srcs = &(stencils[i]->data.direct.src);
1792 break;
1793 default:
1794 case (SUM):
1795 curr_src_count = stencils[i]->data.sum.srcs->count;
1796 srcs = stencils[i]->data.sum.srcs->data;
1797 break;
1798 };
1799 src_count += curr_src_count;
1800 for (size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
1801 if (((curr_src_data_count = srcs[j].data.count)) > 1)
1802 point_info_buffer_size += curr_src_data_count;
1803 }
1804
1805 struct remote_points * srcs =
1806 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
1807 sizeof(*srcs));
1808 srcs->data = xmalloc(src_count * sizeof(*(srcs->data)));
1809 srcs->count = src_count;
1810 struct remote_point_info * point_info_buffer = &(srcs->buffer[0]);
1811
1812 for (size_t i = 0, offset = 0; i < num_stencils; ++i) {
1813 size_t curr_src_count;
1814 struct remote_point * curr_srcs;
1815 YAC_ASSERT(
1816 (stencils[i]->type == DIRECT) ||
1817 (stencils[i]->type == SUM),
1818 "ERROR(stencils_merge_sum): invalid stencil type")
1819 switch (stencils[i]->type) {
1820 case (DIRECT):
1821 curr_src_count = 1;
1822 curr_srcs = &(stencils[i]->data.direct.src);
1823 break;
1824 default:
1825 case (SUM):
1826 curr_src_count = stencils[i]->data.sum.srcs->count;
1827 curr_srcs = stencils[i]->data.sum.srcs->data;
1828 break;
1829 };
1831 srcs->data + offset, curr_srcs, curr_src_count, &point_info_buffer);
1832 offset += curr_src_count;
1833 }
1834
1835 qsort(srcs->data, srcs->count, sizeof(*(srcs->data)), compare_remote_point);
1836
1837 struct interp_weight_stencil merge_stencil;
1838 merge_stencil.type = SUM;
1839 merge_stencil.data.sum.srcs = srcs;
1840
1841 return merge_stencil;
1842}
1843
1845 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils,
1846 struct remote_point point) {
1847
1848 if (num_stencils == 1)
1849 return wcopy_interp_weight_stencil(*stencils, point, *w);
1850
1851 int fixed_count = 0;
1852 int direct_count = 0;
1853 int sum_count = 0;
1854 int wsum_count = 0;
1855 double fixed_value = 0.0;
1856
1857 for (size_t i = 0; i < num_stencils; ++i) {
1858 YAC_ASSERT(
1859 (stencils[i]->type != DIRECT_MF) &&
1860 (stencils[i]->type != SUM_MF) &&
1861 (stencils[i]->type != WEIGHT_SUM_MF),
1862 "ERROR(stencils_merge): multiple source fields not yet supported")
1863 YAC_ASSERT(
1864 (stencils[i]->type == FIXED) ||
1865 (stencils[i]->type == DIRECT) ||
1866 (stencils[i]->type == SUM) ||
1867 (stencils[i]->type == WEIGHT_SUM),
1868 "ERROR(stencils_merge): unsupported stencil type")
1869 switch (stencils[i]->type) {
1870 case (FIXED):
1871 fixed_value += stencils[i]->data.fixed.value * w[i];
1872 fixed_count++;
1873 break;
1874 case (DIRECT):
1875 direct_count++;
1876 break;
1877 case (SUM):
1878 sum_count++;
1879 break;
1880 default:
1881 case (WEIGHT_SUM):
1882 wsum_count++;
1883 break;
1884 };
1885 }
1886
1887 struct interp_weight_stencil merge_stencil;
1888
1889 YAC_ASSERT(
1890 (fixed_count > 0) || (wsum_count > 0) ||
1891 (sum_count > 0) || (direct_count > 0),
1892 "ERROR(stencils_merge): unknown error")
1893 if (fixed_count > 0) {
1894
1895 YAC_ASSERT(
1896 (direct_count + sum_count + wsum_count) <= 0,
1897 "ERROR(stencils_merge): invalid stencil combination")
1898
1899 merge_stencil = **stencils;
1900 merge_stencil.data.fixed.value = fixed_value;
1901 } else if (wsum_count > 0)
1902 merge_stencil =
1903 stencils_merge_wsum(stencils, w, num_stencils);
1904 else if ((sum_count > 0) || (direct_count > 0))
1905 merge_stencil =
1906 stencils_merge_sum(stencils, w, num_stencils);
1907
1908 merge_stencil.tgt = copy_remote_point(point);
1909
1910 return merge_stencil;
1911}
1912
1914 struct interp_weight_stencil * stencils, size_t count, size_t * pack_order,
1915 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
1916
1917 int pack_size_type;
1918 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_type), comm);
1919
1920 for (size_t i = 0; i < count; ++i) {
1921
1922 struct interp_weight_stencil * curr_stencil = stencils + pack_order[i];
1923 int (*func_pack_size)(
1924 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1925 MPI_Comm comm);
1926 YAC_ASSERT(
1927 (curr_stencil->type == FIXED) ||
1928 (curr_stencil->type == DIRECT) ||
1929 (curr_stencil->type == SUM) ||
1930 (curr_stencil->type == WEIGHT_SUM) ||
1931 (curr_stencil->type == DIRECT_MF) ||
1932 (curr_stencil->type == SUM_MF) ||
1933 (curr_stencil->type == WEIGHT_SUM_MF),
1934 "ERROR(get_stencils_pack_sizes): invalid stencil type")
1935 switch (curr_stencil->type) {
1936 case(FIXED):
1937 func_pack_size = get_stencil_pack_size_fixed;
1938 break;
1939 case(DIRECT):
1940 func_pack_size = get_stencil_pack_size_direct;
1941 break;
1942 case(SUM):
1943 func_pack_size = get_stencil_pack_size_sum;
1944 break;
1945 default:
1946 case(WEIGHT_SUM):
1947 func_pack_size = get_stencil_pack_size_wsum;
1948 break;
1949 case(DIRECT_MF):
1950 func_pack_size = get_stencil_pack_size_direct_mf;
1951 break;
1952 case(SUM_MF):
1953 func_pack_size = get_stencil_pack_size_sum_mf;
1954 break;
1955 case(WEIGHT_SUM_MF):
1956 func_pack_size = get_stencil_pack_size_wsum_mf;
1957 break;
1958 };
1959 pack_sizes[i] = pack_size_type +
1961 &(curr_stencil->tgt), point_info_dt, comm) +
1962 func_pack_size(curr_stencil, point_info_dt, comm);
1963 }
1964}
1965
1967 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
1968 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1969
1970 // fixed value
1972 MPI_Pack(&(stencil->data.fixed.value), 1, MPI_DOUBLE, buffer, buffer_size,
1973 position, comm), comm);
1974}
1975
1977 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
1978 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1979
1980 // src
1982 &(stencil->data.direct.src), buffer, buffer_size, position, point_info_dt,
1983 comm);
1984}
1985
1987 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
1988 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1989
1990 // srcs
1992 stencil->data.sum.srcs, buffer, buffer_size, position, point_info_dt, comm);
1993}
1994
1996 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
1997 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1998
1999 // srcs
2001 stencil->data.weight_sum.srcs, buffer, buffer_size, position,
2002 point_info_dt, comm);
2003 // weights
2005 MPI_Pack(stencil->data.weight_sum.weights,
2006 (int)(stencil->data.weight_sum.srcs->count), MPI_DOUBLE,
2007 buffer, buffer_size, position, comm), comm);
2008}
2009
2011 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2012 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2013
2014 // src
2016 &(stencil->data.direct_mf.src), buffer, buffer_size, position,
2017 point_info_dt, comm);
2018
2019 // field_idx
2020 uint64_t temp_field_idx = (uint64_t)(stencil->data.direct_mf.field_idx);
2022 MPI_Pack(&temp_field_idx, 1, MPI_UINT64_T,
2023 buffer, buffer_size, position, comm), comm);
2024}
2025
2027 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2028 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2029
2030 // srcs
2032 stencil->data.sum_mf.srcs, buffer, buffer_size, position,
2033 point_info_dt, comm);
2034
2035 size_t count = stencil->data.sum_mf.srcs->count;
2036 // field_indices
2037 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2038 for (size_t i = 0; i < count; ++i)
2039 temp_field_indices[i] =
2040 (uint64_t)(stencil->data.sum_mf.field_indices[i]);
2042 MPI_Pack(temp_field_indices, (int)count, MPI_UINT64_T,
2043 buffer, buffer_size, position, comm), comm);
2044 free(temp_field_indices);
2045}
2046
2048 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2049 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2050
2051 // srcs
2053 stencil->data.weight_sum_mf.srcs, buffer, buffer_size, position,
2054 point_info_dt, comm);
2055
2056 size_t count = stencil->data.weight_sum_mf.srcs->count;
2057 // weights
2059 MPI_Pack(stencil->data.weight_sum_mf.weights, (int)count, MPI_DOUBLE,
2060 buffer, buffer_size, position, comm), comm);
2061 // field_indices
2062 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2063 for (size_t i = 0; i < count; ++i)
2064 temp_field_indices[i] =
2065 (uint64_t)(stencil->data.weight_sum_mf.field_indices[i]);
2067 MPI_Pack(temp_field_indices, (int)count, MPI_UINT64_T,
2068 buffer, buffer_size, position, comm), comm);
2069 free(temp_field_indices);
2070}
2071
2072static void pack_stencils(
2073 struct interp_weight_stencil * stencils, size_t count, size_t * pack_order,
2074 void ** pack_data, int * pack_sizes, MPI_Datatype point_info_dt,
2075 MPI_Comm comm) {
2076
2078 stencils, count, pack_order, pack_sizes, point_info_dt, comm);
2079
2080 size_t pack_buffer_size = 0;
2081 for (size_t i = 0; i < count; ++i)
2082 pack_buffer_size += (size_t)(pack_sizes[i]);
2083
2084 void * pack_data_ = xmalloc(pack_buffer_size);
2085 size_t total_pack_size = 0;
2086
2087 for (size_t i = 0; i < count; ++i) {
2088
2089 struct interp_weight_stencil * curr_stencil = stencils + pack_order[i];
2090 void (*func_pack)(
2091 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2092 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2093
2094 YAC_ASSERT(
2095 (curr_stencil->type == FIXED) ||
2096 (curr_stencil->type == DIRECT) ||
2097 (curr_stencil->type == SUM) ||
2098 (curr_stencil->type == WEIGHT_SUM) ||
2099 (curr_stencil->type == DIRECT_MF) ||
2100 (curr_stencil->type == SUM_MF) ||
2101 (curr_stencil->type == WEIGHT_SUM_MF),
2102 "ERROR(pack_stencils): invalid stencil type")
2103 switch (curr_stencil->type) {
2104 default:
2105 case(FIXED):
2106 func_pack = pack_stencil_fixed;
2107 break;
2108 case(DIRECT):
2109 func_pack = pack_stencil_direct;
2110 break;
2111 case(SUM):
2112 func_pack = pack_stencil_sum;
2113 break;
2114 case(WEIGHT_SUM):
2115 func_pack = pack_stencil_wsum;
2116 break;
2117 case(DIRECT_MF):
2118 func_pack = pack_stencil_direct_mf;
2119 break;
2120 case(SUM_MF):
2121 func_pack = pack_stencil_sum_mf;
2122 break;
2123 case(WEIGHT_SUM_MF):
2124 func_pack = pack_stencil_wsum_mf;
2125 break;
2126 };
2127
2128 int position = 0;
2129 int type = (int)curr_stencil->type;
2130 void * buffer = (void*)((char*)pack_data_ + total_pack_size);
2131 int buffer_size = pack_sizes[i];
2132
2133 // type
2135 MPI_Pack(&type, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2136 // tgt
2137 yac_remote_point_pack(&(curr_stencil->tgt), buffer, buffer_size,
2138 &position, point_info_dt, comm);
2139 // stencil data
2140 func_pack(curr_stencil, buffer, buffer_size, &position, point_info_dt, comm);
2141
2143 pack_sizes[i] >= position,
2144 "ERROR(pack_stencils): "
2145 "actual pack size is bigger then computed one (%d > %d)",
2146 position, pack_sizes[i]);
2147
2148 pack_sizes[i] = position;
2149 total_pack_size += (size_t)position;
2150 }
2151
2152 *pack_data = xrealloc(pack_data_, total_pack_size);
2153}
2154
2156 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2157 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2158
2159 // fixed value
2161 MPI_Unpack(buffer, buffer_size, position, &(stencil->data.fixed.value), 1,
2162 MPI_DOUBLE, comm), comm);
2163}
2164
2166 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2167 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2168
2169 // src
2171 buffer, buffer_size, position, &stencil->data.direct.src,
2172 point_info_dt, comm);
2173}
2174
2176 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2177 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2178
2179 // srcs
2180 stencil->data.weight_sum.weights = NULL;
2182 buffer, buffer_size, position, &(stencil->data.sum.srcs), point_info_dt,
2183 comm);
2184}
2185
2187 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2188 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2189
2190 // srcs
2192 buffer, buffer_size, position, &(stencil->data.weight_sum.srcs),
2193 point_info_dt, comm);
2194
2195 size_t count = stencil->data.weight_sum.srcs->count;
2196
2197 stencil->data.weight_sum.weights =
2198 xmalloc(count * sizeof(*(stencil->data.weight_sum.weights)));
2199
2200 // weights
2202 MPI_Unpack(buffer, buffer_size, position, stencil->data.weight_sum.weights,
2203 (int)count, MPI_DOUBLE, comm), comm);
2204}
2205
2207 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2208 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2209
2210 // src
2212 buffer, buffer_size, position, &stencil->data.direct_mf.src,
2213 point_info_dt, comm);
2214
2215 // field_idx
2216 uint64_t temp_field_idx;
2218 MPI_Unpack(
2219 buffer, buffer_size, position, &temp_field_idx,
2220 1, MPI_UINT64_T, comm), comm);
2221 stencil->data.direct_mf.field_idx = (size_t)temp_field_idx;
2222}
2223
2225 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2226 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2227
2228 // srcs
2230 buffer, buffer_size, position, &(stencil->data.sum_mf.srcs),
2231 point_info_dt, comm);
2232
2233 size_t count = stencil->data.sum_mf.srcs->count;
2234
2235 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2236 stencil->data.sum_mf.field_indices =
2237 xmalloc(count * sizeof(*(stencil->data.sum_mf.field_indices)));
2238
2239 // field_indices
2241 MPI_Unpack(
2242 buffer, buffer_size, position, temp_field_indices,
2243 (int)count, MPI_UINT64_T, comm), comm);
2244 for (size_t i = 0; i < count; ++i)
2245 stencil->data.sum_mf.field_indices[i] = (size_t)(temp_field_indices[i]);
2246 free(temp_field_indices);
2247}
2248
2250 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2251 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2252
2253 // srcs
2255 buffer, buffer_size, position, &(stencil->data.weight_sum_mf.srcs),
2256 point_info_dt, comm);
2257
2258 size_t count = stencil->data.weight_sum_mf.srcs->count;
2259
2260 stencil->data.weight_sum_mf.weights =
2261 xmalloc(count * sizeof(*(stencil->data.weight_sum_mf.weights)));
2262
2263 // weights
2265 MPI_Unpack(
2266 buffer, buffer_size, position, stencil->data.weight_sum_mf.weights,
2267 (int)count, MPI_DOUBLE, comm), comm);
2268
2269 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2271 xmalloc(count * sizeof(*(stencil->data.weight_sum_mf.field_indices)));
2272
2273 // field_indices
2275 MPI_Unpack(
2276 buffer, buffer_size, position, temp_field_indices,
2277 (int)count, MPI_UINT64_T, comm), comm);
2278 for (size_t i = 0; i < count; ++i)
2279 stencil->data.weight_sum_mf.field_indices[i] =
2280 (size_t)(temp_field_indices[i]);
2281 free(temp_field_indices);
2282}
2283
2285 struct interp_weight_stencil * stencils, size_t count,
2286 void * packed_data, size_t packed_data_size,
2287 MPI_Datatype point_info_dt, MPI_Comm comm) {
2288
2289 for (size_t i = 0, offset = 0; i < count; ++i) {
2290
2291 YAC_ASSERT(
2292 packed_data_size >= offset,
2293 "ERROR(unpack_stencils): invalid offset");
2294
2295 int position = 0;
2296 void * curr_buffer = (void*)((unsigned char*)packed_data + offset);
2297 int buffer_size = (int)(MIN(packed_data_size - offset, INT_MAX));
2298 struct interp_weight_stencil * curr_stencil = stencils + i;
2299
2300 int type;
2302 MPI_Unpack(
2303 curr_buffer, buffer_size, &position, &type, 1, MPI_INT, comm), comm);
2304
2305 void (*func_unpack)(
2306 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2307 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2308
2309 YAC_ASSERT(
2310 (type == FIXED) ||
2311 (type == DIRECT) || (type == SUM) || (type == WEIGHT_SUM) ||
2312 (type == DIRECT_MF) || (type == SUM_MF) || (type == WEIGHT_SUM_MF),
2313 "ERROR(unpack_stencils): invalid stencil type")
2314 switch (type) {
2315 case(FIXED):
2316 func_unpack = unpack_stencil_fixed;
2317 break;
2318 case(DIRECT):
2319 func_unpack = unpack_stencil_direct;
2320 break;
2321 case(SUM):
2322 func_unpack = unpack_stencil_sum;
2323 break;
2324 default:
2325 case(WEIGHT_SUM):
2326 func_unpack = unpack_stencil_wsum;
2327 break;
2328 case(DIRECT_MF):
2329 func_unpack = unpack_stencil_direct_mf;
2330 break;
2331 case(SUM_MF):
2332 func_unpack = unpack_stencil_sum_mf;
2333 break;
2334 case(WEIGHT_SUM_MF):
2335 func_unpack = unpack_stencil_wsum_mf;
2336 break;
2337 };
2338
2339 curr_stencil->type =
2342 curr_buffer, buffer_size, &position, &(curr_stencil->tgt),
2343 point_info_dt, comm);
2344 func_unpack(
2345 curr_stencil, curr_buffer, buffer_size, &position, point_info_dt, comm);
2346 offset += (size_t)position;
2347 }
2348}
2349
2351 MPI_Comm comm, struct interp_weight_stencil * stencils,
2352 size_t * stencil_indices,
2353 size_t * stencil_sendcounts, size_t * stencil_recvcounts) {
2354
2355 int comm_rank, comm_size;
2356 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2357 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2358
2359 YAC_ASSERT(
2360 stencil_sendcounts[comm_rank] == stencil_recvcounts[comm_rank],
2361 "ERROR(exchange_stencils): error in arguments")
2362
2363 size_t send_count = 0, recv_count = 0;
2364 size_t local_send_offset = 0;
2365 size_t local_recv_offset = 0;
2366 size_t local_count = (size_t)(stencil_sendcounts[comm_rank]);
2367 for (int i = 0; i < comm_rank; ++i) {
2368 send_count += stencil_sendcounts[i];
2369 recv_count += stencil_recvcounts[i];
2370 local_send_offset += stencil_sendcounts[i];
2371 local_recv_offset += stencil_recvcounts[i];
2372 }
2373 local_send_offset = send_count;
2374 local_recv_offset = recv_count;
2375 stencil_sendcounts[comm_rank] = 0;
2376 stencil_recvcounts[comm_rank] = 0;
2377 for (int i = comm_rank + 1; i < comm_size; ++i) {
2378 send_count += stencil_sendcounts[i];
2379 recv_count += stencil_recvcounts[i];
2380 }
2381
2382 struct interp_weight_stencil * new_stencils =
2383 xmalloc((recv_count + local_count) * sizeof(*new_stencils));
2384 size_t * local_stencil_indices =
2385 xmalloc(local_count * sizeof(*local_stencil_indices));
2386 memcpy(local_stencil_indices, stencil_indices + local_send_offset,
2387 local_count * sizeof(*local_stencil_indices));
2388
2389 // remove the local stencil indices
2390 memmove(
2391 stencil_indices + local_send_offset,
2392 stencil_indices + local_send_offset + local_count,
2393 (send_count - local_send_offset) * sizeof(*stencil_indices));
2394
2395 // pack the stencils that need to be send to other processes
2396 void * send_buffer;
2397 int * pack_sizes = xmalloc(send_count * sizeof(*pack_sizes));
2398 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
2400 stencils, send_count, stencil_indices, &send_buffer, pack_sizes,
2401 point_info_dt, comm);
2402
2403 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2405 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2406
2407 send_count = 0;
2408 for (int rank = 0; rank < comm_size; ++rank) {
2409 size_t sendcount = 0;
2410 int curr_num_stencils = stencil_sendcounts[rank];
2411 for (int j = 0; j < curr_num_stencils; ++j, ++send_count)
2412 sendcount += (size_t)(pack_sizes[send_count]);
2413 sendcounts[rank] = sendcount;
2414 }
2415 free(pack_sizes);
2416
2418 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2419
2420 size_t recv_size = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
2421
2422 void * recv_buffer = xmalloc(recv_size);
2423
2424 // exchange stencils
2425 yac_alltoallv_packed_p2p(
2426 send_buffer, sendcounts, sdispls+1,
2427 recv_buffer, recvcounts, rdispls, comm);
2428 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2429 free(send_buffer);
2430
2431 // unpack stencils
2433 new_stencils, recv_count,
2434 recv_buffer, recv_size, point_info_dt, comm);
2435 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
2436 free(recv_buffer);
2437
2438 memmove(new_stencils + local_recv_offset + local_count,
2439 new_stencils + local_recv_offset ,
2440 (recv_count - local_recv_offset ) * sizeof(*new_stencils));
2441 for (size_t i = 0; i < local_count; ++i, ++local_recv_offset )
2442 new_stencils[local_recv_offset] =
2444 stencils + local_stencil_indices[i],
2445 stencils[local_stencil_indices[i]].tgt);
2446 free(local_stencil_indices);
2447
2448 return new_stencils;
2449}
2450
2452 struct yac_interp_weights * weights, size_t * stencil_indices,
2453 int * stencil_ranks, size_t count) {
2454
2455 MPI_Comm comm = weights->comm;
2456 int comm_size;
2457 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2458
2459 YAC_ASSERT(
2460 count <= INT_MAX,
2461 "ERROR(yac_interp_weights_get_stencils): count exceeds INT_MAX");
2462
2463 size_t * reorder_idx = xmalloc(count * sizeof(*reorder_idx));
2464 for (size_t i = 0; i < count; ++i) reorder_idx[i] = i;
2465
2467 stencil_ranks, count, stencil_indices, reorder_idx);
2468
2469 // exchange requested stencils indices
2470 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2472 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2473 for (size_t i = 0; i < count; ++i) sendcounts[stencil_ranks[i]]++;
2475 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2476 size_t recv_count =
2477 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
2478 uint64_t * uint64_t_buffer =
2479 xmalloc((count + recv_count) * sizeof(*uint64_t_buffer));
2480 uint64_t * send_stencil_indices = uint64_t_buffer;
2481 uint64_t * recv_stencil_indices = uint64_t_buffer + count;
2482 for (size_t i = 0; i < count; ++i)
2483 send_stencil_indices[i] = (uint64_t)(stencil_indices[i]);
2484 yac_alltoallv_uint64_p2p(
2485 send_stencil_indices, sendcounts, sdispls+1,
2486 recv_stencil_indices, recvcounts, rdispls, comm);
2487
2488 // exchange stencils
2489 size_t * exchange_stencil_indices =
2490 xmalloc(recv_count * sizeof(*exchange_stencil_indices));
2491 for (size_t i = 0; i < recv_count; ++i) {
2492 YAC_ASSERT(
2493 (size_t)(recv_stencil_indices[i]) < weights->stencils_size,
2494 "ERROR(yac_interp_weights_get_stencils): invalid stencil index");
2495 exchange_stencil_indices[i] = (size_t)(recv_stencil_indices[i]);
2496 }
2497 free(uint64_t_buffer);
2498 struct interp_weight_stencil * stencils =
2499 exchange_stencils(comm, weights->stencils, exchange_stencil_indices,
2500 recvcounts, sendcounts);
2501 free(exchange_stencil_indices);
2502 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2503
2504 // sort received stencils into original order
2505 struct interp_weight_stencil * sorted_stencils =
2506 xmalloc(count * sizeof(*sorted_stencils));
2507 for (size_t i = 0; i < count; ++i)
2508 sorted_stencils[reorder_idx[i]] = stencils[i];
2509 free(stencils);
2510 free(reorder_idx);
2511
2512 return sorted_stencils;
2513}
2514
2516 struct interp_weight_stencil * stencils, size_t count);
2517
2519 struct yac_interp_weights * weights, struct remote_points * tgts,
2520 size_t * num_stencils_per_tgt, size_t * stencil_indices,
2521 int * stencil_ranks, double * w) {
2522
2523 size_t count = (tgts != NULL)?tgts->count:0;
2524 MPI_Comm comm = weights->comm;
2525 int comm_rank, comm_size;
2526 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2527 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2528
2529 // count the number of missing stencils
2530 size_t total_num_stencils = 0;
2531 size_t max_num_stencils_per_tgt = 0;
2532 for (size_t i = 0; i < count; ++i) {
2533 size_t curr_num_stencils_per_tgt = num_stencils_per_tgt[i];
2534 if (curr_num_stencils_per_tgt > max_num_stencils_per_tgt)
2535 max_num_stencils_per_tgt = curr_num_stencils_per_tgt;
2536 total_num_stencils += num_stencils_per_tgt[i];
2537 }
2538 size_t num_missing_stencils = 0;
2539 for (size_t i = 0; i < total_num_stencils; ++i)
2540 if (stencil_ranks[i] != comm_rank) num_missing_stencils++;
2541
2542 // get missing stencils
2543 size_t * missing_stencil_indices =
2544 xmalloc(num_missing_stencils * sizeof(*missing_stencil_indices));
2545 int * missing_stencil_ranks =
2546 xmalloc(num_missing_stencils * sizeof(*missing_stencil_ranks));
2547 for (size_t i = 0, j = 0; i < total_num_stencils; ++i) {
2548 if (stencil_ranks[i] != comm_rank) {
2549 missing_stencil_indices[j] = stencil_indices[i];
2550 missing_stencil_ranks[j] = stencil_ranks[i];
2551 ++j;
2552 }
2553 }
2554 struct interp_weight_stencil * missing_stencils =
2556 weights, missing_stencil_indices, missing_stencil_ranks,
2557 num_missing_stencils);
2558 free(missing_stencil_ranks);
2559 free(missing_stencil_indices);
2560
2561 // merge stencils to generate new ones
2562 {
2563 struct interp_weight_stencil * stencils = weights->stencils;
2564 size_t stencils_array_size = weights->stencils_array_size;
2565 size_t stencils_size = weights->stencils_size;
2566
2567 struct interp_weight_stencil ** stencils_buffer =
2568 xmalloc(max_num_stencils_per_tgt * sizeof(*stencils_buffer));
2569
2571 stencils, stencils_array_size, stencils_size + count);
2572
2573 for (size_t i = 0, j = 0; i < count;
2574 ++i, ++stencils_size) {
2575
2576 size_t curr_num_stencils = num_stencils_per_tgt[i];
2577 for (size_t k = 0; k < curr_num_stencils; ++k)
2578 stencils_buffer[k] =
2579 (stencil_ranks[k] == comm_rank)?
2580 (stencils + stencil_indices[k]):(missing_stencils + (j++));
2581
2582 stencils[stencils_size] =
2583 stencils_merge(stencils_buffer, w, curr_num_stencils, tgts->data[i]);
2584 w += curr_num_stencils;
2585 stencil_indices += curr_num_stencils;
2586 stencil_ranks += curr_num_stencils;
2587 }
2588
2589 weights->stencils = stencils;
2590 weights->stencils_array_size = stencils_array_size;
2591 weights->stencils_size = stencils_size;
2592
2593 free(stencils_buffer);
2594 }
2595
2596 yac_interp_weight_stencils_delete(missing_stencils, num_missing_stencils);
2597}
2598
2599static int compute_owner(int * ranks, size_t count) {
2600
2601 YAC_ASSERT(count != 0, "ERROR(compute_owner): count == 0")
2602
2603 yac_quicksort_index(ranks, count, NULL);
2604
2605 int best_rank = -1;
2606 size_t best_rank_count = 0;
2607
2608 size_t curr_rank_count = 1;
2609 int prev_rank = ranks[0];
2610
2611 for (size_t i = 1; i < count; ++i, ++curr_rank_count) {
2612 int curr_rank = ranks[i];
2613 if (prev_rank != curr_rank) {
2614 if (curr_rank_count > best_rank_count) {
2615 best_rank = prev_rank;
2616 best_rank_count = curr_rank_count;
2617 }
2618 prev_rank = curr_rank;
2619 curr_rank_count = 0;
2620 }
2621 }
2622
2623 return (curr_rank_count > best_rank_count)?prev_rank:best_rank;
2624}
2625
2627 struct interp_weight_stencil * stencils, size_t count,
2628 enum yac_interp_weight_stencil_type stencil_type) {
2629
2630 // compute total number of links
2631 size_t total_num_links = 0;
2632
2633 for (size_t i = 0; i < count; ++i) {
2634 YAC_ASSERT(
2635 stencils[i].type == stencil_type,
2636 "ERROR(generate_w_sum_mf_stencils): wrong stencil type")
2637 // due to the data layout this works for "sum" and "weight_sum"
2638 total_num_links += stencils[i].data.weight_sum.srcs->count;
2639 }
2640
2642 xmalloc(sizeof(*temp) + total_num_links * sizeof(temp->buffer[0]));
2643 struct interp_weight_stencils_wsum_mf * wsum_stencils =
2644 (struct interp_weight_stencils_wsum_mf *)temp;
2645 wsum_stencils->data = xmalloc(count * sizeof(*(wsum_stencils->data)));
2646 wsum_stencils->count = count;
2647
2648 // extract data from stencils
2649 for (size_t i = 0, k = 0; i < count; ++i) {
2650 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
2651 wsum_stencils->data + i;
2652 struct interp_weight_stencil_wsum_mf_weight * curr_links =
2653 &(temp->buffer[k]);
2654 size_t curr_stencil_size = stencils[i].data.weight_sum.srcs->count;
2655 struct remote_point * curr_srcs = stencils[i].data.weight_sum.srcs->data;
2656 curr_wsum_stencil->tgt = copy_remote_point(stencils[i].tgt);
2657 curr_wsum_stencil->count = curr_stencil_size;
2658 curr_wsum_stencil->data = curr_links;
2659 for (size_t j = 0; j < curr_stencil_size; ++j) {
2660 int curr_count = curr_srcs[j].data.count;
2661 YAC_ASSERT(
2662 curr_count >= 1,
2663 "ERROR(generate_w_sum_mf_stencils): global src id no found")
2664 curr_links[j].src =
2665 (curr_count == 1)?
2666 (curr_srcs[j].data.data.single):(curr_srcs[j].data.data.multi[0]);
2667 YAC_ASSERT(
2668 (stencil_type == SUM) || (stencil_type == WEIGHT_SUM) ||
2669 (stencil_type == SUM_MF) || (stencil_type == WEIGHT_SUM_MF),
2670 "ERROR(generate_w_sum_mf_stencils): unsupported stencil type")
2671 switch(stencil_type) {
2672 default:
2673 case(SUM):
2674 curr_links[j].weight = 1.0;
2675 curr_links[j].src_field_idx = 0;
2676 break;
2677 case(WEIGHT_SUM):
2678 curr_links[j].weight = stencils[i].data.weight_sum.weights[j];
2679 curr_links[j].src_field_idx = 0;
2680 break;
2681 case(SUM_MF):
2682 curr_links[j].weight = 1.0;
2683 curr_links[j].src_field_idx =
2684 stencils[i].data.sum_mf.field_indices[j];
2685 break;
2686 case(WEIGHT_SUM_MF):
2687 curr_links[j].weight = stencils[i].data.weight_sum_mf.weights[j];
2688 curr_links[j].src_field_idx =
2689 stencils[i].data.weight_sum_mf.field_indices[j];
2690 break;
2691 };
2692 }
2693 k += curr_stencil_size;
2694 }
2695
2696 return wsum_stencils;
2697}
2698
2699static MPI_Datatype get_wsum_mf_weight_mpi_datatype(MPI_Comm comm) {
2700
2702 MPI_Datatype dt;
2703 int array_of_blocklengths[] = {1, 1, 1, 1};
2704 const MPI_Aint array_of_displacements[] =
2705 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src.rank) -
2706 (MPI_Aint)(intptr_t)(const void *)&dummy,
2707 (MPI_Aint)(intptr_t)(const void *)&(dummy.src.orig_pos) -
2708 (MPI_Aint)(intptr_t)(const void *)&dummy,
2709 (MPI_Aint)(intptr_t)(const void *)&(dummy.src_field_idx) -
2710 (MPI_Aint)(intptr_t)(const void *)&dummy,
2711 (MPI_Aint)(intptr_t)(const void *)&(dummy.weight) -
2712 (MPI_Aint)(intptr_t)(const void *)&dummy};
2713 const MPI_Datatype array_of_types[] =
2714 {MPI_INT, MPI_UINT64_T, MPI_UINT64_T, MPI_DOUBLE};
2716 MPI_Type_create_struct(4, array_of_blocklengths, array_of_displacements,
2717 array_of_types, &dt), comm);
2718 return yac_create_resized(dt, sizeof(dummy), comm);
2719}
2720
2722 struct interp_weight_stencil_wsum_mf * stencil,
2723 MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
2724
2725 int pack_size_count,
2726 pack_size_weights,
2727 pack_size_tgt;
2728
2729 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
2731 MPI_Pack_size(
2732 (int)(stencil->count), wsum_mf_weight_dt, comm, &pack_size_weights), comm);
2733 pack_size_tgt =
2734 yac_remote_point_get_pack_size(&(stencil->tgt), point_info_dt, comm);
2735
2736 return pack_size_count + pack_size_weights + pack_size_tgt;
2737}
2738
2740 struct interp_weight_stencil_wsum_mf * wsum_stencils, size_t count,
2741 size_t * pack_order, void ** pack_data, int * pack_sizes,
2742 int * weight_counts, MPI_Comm comm) {
2743
2744 MPI_Datatype wsum_mf_weight_dt = get_wsum_mf_weight_mpi_datatype(comm);
2745 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
2746
2747 // get the pack sizes and the upper bound for the pack buffer size
2748 size_t temp_total_pack_size = 0;
2749 for (size_t i = 0; i < count; ++i) {
2750 temp_total_pack_size +=
2751 (pack_sizes[i] =
2753 wsum_stencils + pack_order[i],
2754 wsum_mf_weight_dt, point_info_dt, comm));
2755 }
2756
2757 void * pack_data_ = xmalloc(temp_total_pack_size);
2758 size_t total_pack_size = 0;
2759
2760 // pack the stencils
2761 for (size_t i = 0; i < count; ++i) {
2762
2763 size_t idx = pack_order[i];
2764
2765 int position = 0;
2766 void * buffer = (void*)((unsigned char*)pack_data_ + total_pack_size);
2767 int buffer_size = pack_sizes[i];
2768 int curr_count = wsum_stencils[idx].count;
2769
2770 // tgt
2772 &(wsum_stencils[idx].tgt), buffer, buffer_size, &position,
2773 point_info_dt, comm);
2774 // weight count
2776 MPI_Pack(&curr_count, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2777 // weights
2779 MPI_Pack(wsum_stencils[idx].data, curr_count, wsum_mf_weight_dt,
2780 buffer, buffer_size, &position, comm), comm);
2781
2782 pack_sizes[i] = position;
2783 weight_counts[i] = curr_count;
2784 total_pack_size += (size_t)position;
2785 }
2786
2787 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
2788 yac_mpi_call(MPI_Type_free(&wsum_mf_weight_dt), comm);
2789
2790 *pack_data = xrealloc(pack_data_, total_pack_size);
2791}
2792
2794 struct interp_weight_stencil_wsum_mf * wsum_stencils,
2795 struct interp_weight_stencil_wsum_mf_weight * weight_buffer, size_t count,
2796 void * packed_data, size_t packed_data_size, MPI_Comm comm) {
2797
2798 MPI_Datatype wsum_mf_weight_dt = get_wsum_mf_weight_mpi_datatype(comm);
2799 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
2800
2801 size_t weight_offset = 0;
2802 for (size_t i = 0, offset = 0; i < count; ++i) {
2803
2804 int position = 0;
2805 void * curr_buffer = (void*)((char*)packed_data + offset);
2806 int buffer_size = (int)(packed_data_size - offset);
2807 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
2808 wsum_stencils + i;
2809
2810 struct remote_point tgt;
2811 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
2812 weight_buffer + weight_offset;
2813 int weight_count;
2815 curr_buffer, buffer_size, &position, &tgt, point_info_dt, comm);
2817 MPI_Unpack(curr_buffer, buffer_size, &position,
2818 &weight_count, 1, MPI_INT, comm),
2819 comm);
2821 MPI_Unpack(curr_buffer, buffer_size, &position,
2822 curr_weights, weight_count, wsum_mf_weight_dt, comm), comm);
2823
2824 curr_wsum_stencil->tgt = tgt;
2825 curr_wsum_stencil->data = curr_weights;
2826 curr_wsum_stencil->count = (size_t)weight_count;
2827
2828 weight_offset += (size_t)weight_count;
2829 offset += (size_t)position;
2830 }
2831
2832 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
2833 yac_mpi_call(MPI_Type_free(&wsum_mf_weight_dt), comm);
2834
2835 return weight_offset;
2836}
2837
2839 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data,
2840 int * stencil_owner, size_t * reorder_idx, size_t num_owners) {
2841
2842 struct interp_weight_stencil_wsum_mf * wsum_stencils =
2843 wsum_stencils_data->data;
2844
2845 int comm_rank, comm_size;
2846 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2847 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2848
2849 size_t local_weight_count = 0;
2850 size_t local_count = 0;
2851 for (size_t i = 0; i < num_owners; ++i) {
2852 if (stencil_owner[i] == comm_rank) {
2853 local_weight_count += wsum_stencils[reorder_idx[i]].count;
2854 stencil_owner[i] = INT_MAX;
2855 ++local_count;
2856 }
2857 }
2858 yac_quicksort_index_int_size_t(stencil_owner, num_owners, reorder_idx);
2859
2860 size_t send_count = num_owners - local_count;
2861
2862 // pack the stencils that need to be send to other processes
2863 void * send_buffer;
2864 int * pack_sizes = xmalloc(2 * send_count * sizeof(*pack_sizes));
2865 int * weight_counts = pack_sizes + send_count;
2866 pack_stencils_wsum_mf(wsum_stencils, send_count, reorder_idx, &send_buffer,
2867 pack_sizes, weight_counts, comm);
2868
2869 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2871 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2872
2873 for (size_t i = 0; i < send_count; ++i) {
2874 int curr_rank = stencil_owner[i];
2875 sendcounts[3 * curr_rank + 0]++;
2876 sendcounts[3 * curr_rank + 1] += (size_t)(pack_sizes[i]);
2877 sendcounts[3 * curr_rank + 2] += (size_t)(weight_counts[i]);
2878 }
2879 free(pack_sizes);
2880
2881 // exchange the number of stencils to be exchanged and the total pack sizes
2882 yac_mpi_call(MPI_Alltoall(sendcounts, 3, YAC_MPI_SIZE_T,
2883 recvcounts, 3, YAC_MPI_SIZE_T, comm), comm);
2884
2885 size_t recv_count = 0;
2886 size_t recv_size = 0;
2887 size_t recv_weight_count = 0;
2888 size_t saccu = 0, raccu = 0;
2889 for (int i = 0; i < comm_size; ++i) {
2890 sdispls[i] = saccu;
2891 rdispls[i] = raccu;
2892 recv_count += recvcounts[3 * i + 0];
2893 recv_size += recvcounts[3 * i + 1];
2894 recv_weight_count += recvcounts[3 * i + 2];
2895 saccu += sendcounts[3 * i + 1];
2896 raccu += recvcounts[3 * i + 1];
2897 sendcounts[i] = sendcounts[3 * i + 1];
2898 recvcounts[i] = recvcounts[3 * i + 1];
2899 }
2900
2901 void * recv_buffer = xmalloc(recv_size);
2902
2903 // exchange stencils
2904 yac_alltoallv_packed_p2p(
2905 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm);
2906 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2907 free(send_buffer);
2908
2910 xmalloc(sizeof(*temp) +
2911 (local_weight_count + recv_weight_count) * sizeof(temp->buffer[0]));
2912 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
2913 (struct interp_weight_stencils_wsum_mf *)temp;
2914 struct interp_weight_stencil_wsum_mf * new_wsum_stencils =
2915 ((new_wsum_stencils_data->data =
2916 xmalloc((local_count + recv_count) *
2917 sizeof(*(new_wsum_stencils_data->data)))));
2918 new_wsum_stencils_data->count = local_count + recv_count;
2919
2920 // unpack stencils
2921 size_t weight_offset =
2923 new_wsum_stencils, &(temp->buffer[0]), recv_count,
2924 recv_buffer, recv_size, comm);
2925 free(recv_buffer);
2926 new_wsum_stencils += recv_count;
2927 struct interp_weight_stencil_wsum_mf_weight * weight_buffer =
2928 &(temp->buffer[weight_offset]);
2929
2930 // copy the stencils that stay locally into the new stencil array
2931 yac_quicksort_index_size_t_size_t(reorder_idx + send_count, local_count, NULL);
2932 for (size_t i = 0, weight_offset = 0; i < local_count; ++i) {
2933 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
2934 wsum_stencils + reorder_idx[i + send_count];
2935 struct interp_weight_stencil_wsum_mf * curr_new_wsum_stencil =
2936 new_wsum_stencils + i;
2937 struct interp_weight_stencil_wsum_mf_weight * curr_new_weights =
2938 weight_buffer + weight_offset;
2939 size_t curr_stencil_size = curr_wsum_stencil->count;
2940 curr_new_wsum_stencil->tgt = copy_remote_point(curr_wsum_stencil->tgt);
2941 curr_new_wsum_stencil->count = curr_stencil_size;
2942 curr_new_wsum_stencil->data = curr_new_weights;
2943 memcpy(curr_new_weights, curr_wsum_stencil->data,
2944 curr_stencil_size * sizeof(*curr_new_weights));
2945 weight_offset += curr_stencil_size;
2946 }
2947
2948 return new_wsum_stencils_data;
2949}
2950
2952 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data) {
2953
2954 struct interp_weight_stencil_wsum_mf * wsum_stencils =
2955 wsum_stencils_data->data;
2956 size_t count = wsum_stencils_data->count;
2957
2958 // determine maximum stencil size
2959 size_t max_stencil_size = 0;
2960 for (size_t i = 0; i < count; ++i) {
2961 size_t curr_stencil_size = wsum_stencils[i].count;
2962 if (curr_stencil_size > max_stencil_size)
2963 max_stencil_size = curr_stencil_size;
2964 }
2965
2966 // determine source process for each stencil
2967 int * rank_buffer =
2968 xmalloc((count + max_stencil_size) * sizeof(*rank_buffer));
2969 int * stencil_owner = rank_buffer;
2970 int * stencil_owners = rank_buffer + count;
2971 size_t * reorder_idx = xmalloc(count * sizeof(*reorder_idx));
2972 for (size_t i = 0; i < count; ++i) {
2973 size_t curr_stencil_size = wsum_stencils[i].count;
2974 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
2975 wsum_stencils[i].data;
2976 for (size_t j = 0; j < curr_stencil_size; ++j)
2977 stencil_owners[j] = curr_weights[j].src.rank;
2978 stencil_owner[i] = compute_owner(stencil_owners, curr_stencil_size);
2979 reorder_idx[i] = i;
2980 }
2981
2982 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
2984 comm, wsum_stencils_data, stencil_owner, reorder_idx, count);
2985
2986 free(reorder_idx);
2987 free(rank_buffer);
2988
2989 return new_wsum_stencils_data;
2990}
2991
2992static int compare_remote_point_info(const void * a, const void * b) {
2993
2994 int ret = ((struct remote_point_info *)a)->rank -
2995 ((struct remote_point_info *)b)->rank;
2996
2997 if (ret) return ret;
2998
2999 return (((struct remote_point_info *)a)->orig_pos >
3000 ((struct remote_point_info *)b)->orig_pos) -
3001 (((struct remote_point_info *)a)->orig_pos <
3002 ((struct remote_point_info *)b)->orig_pos);
3003}
3004
3006 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data) {
3007
3008 struct interp_weight_stencil_wsum_mf * wsum_stencils =
3009 wsum_stencils_data->data;
3010 size_t count = wsum_stencils_data->count;
3011
3012 // determine total number of stencils to be sent to other processes
3013 // (a single stencil may be sent to multiple target processes)
3014 size_t total_owner_count = 0;
3015 for (size_t i = 0; i < count; ++i) {
3016 int stencil_size = wsum_stencils[i].tgt.data.count;
3017 if (stencil_size == 1) {
3018 total_owner_count++;
3019 } else {
3020 struct remote_point_info * tgt_point_infos =
3021 wsum_stencils[i].tgt.data.data.multi;
3022 qsort(
3023 tgt_point_infos, stencil_size, sizeof(*tgt_point_infos),
3025 int prev_rank = INT_MAX;
3026 for (size_t j = 0; j < stencil_size; ++j) {
3027 int curr_rank = tgt_point_infos[j].rank;
3028 if (curr_rank != prev_rank) {
3029 ++total_owner_count;
3030 prev_rank = curr_rank;
3031 }
3032 }
3033 }
3034 }
3035
3036 int * stencil_owner = xmalloc(total_owner_count * sizeof(*stencil_owner));
3037 size_t * reorder_idx = xmalloc(total_owner_count * sizeof(*reorder_idx));
3038 for (size_t i = 0, k = 0; i < count; ++i) {
3039 int stencil_size = wsum_stencils[i].tgt.data.count;
3040 if (stencil_size == 1) {
3041 stencil_owner[k] = wsum_stencils[i].tgt.data.data.single.rank;
3042 reorder_idx[k] = i;
3043 ++k;
3044 } else {
3045 struct remote_point_info * tgt_point_infos =
3046 wsum_stencils[i].tgt.data.data.multi;
3047 int prev_rank = INT_MAX;
3048 for (int j = 0; j < stencil_size; ++j) {
3049 int curr_rank = tgt_point_infos[j].rank;
3050 if (curr_rank != prev_rank) {
3051 stencil_owner[k] = tgt_point_infos[j].rank;
3052 reorder_idx[k] = i;
3053 ++k;
3054 prev_rank = curr_rank;
3055 }
3056 }
3057 }
3058 }
3059
3060 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
3062 comm, wsum_stencils_data, stencil_owner, reorder_idx, total_owner_count);
3063
3064 wsum_stencils = new_wsum_stencils_data->data;
3065 count = new_wsum_stencils_data->count;
3066
3067 free(reorder_idx);
3068 free(stencil_owner);
3069
3070 if (count == 0) return new_wsum_stencils_data;
3071
3072 int comm_rank;
3073 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
3074
3075 // count total number of local target locations
3076 size_t total_num_tgt_pos = 0;
3077 for (size_t i = 0; i < count; ++i) {
3078 size_t curr_count = wsum_stencils[i].tgt.data.count;
3079 if (curr_count == 1) {
3080 ++total_num_tgt_pos;
3081 } else {
3082 struct remote_point_info * curr_point_infos =
3083 wsum_stencils[i].tgt.data.data.multi;
3084 for (size_t j = 0; j < curr_count; ++j)
3085 if (curr_point_infos[j].rank == comm_rank)
3086 ++total_num_tgt_pos;
3087 }
3088 }
3089
3090 if (total_num_tgt_pos != count) {
3091 new_wsum_stencils_data->data =
3092 ((wsum_stencils =
3093 xrealloc(wsum_stencils, total_num_tgt_pos * sizeof(*wsum_stencils))));
3094 new_wsum_stencils_data->count = total_num_tgt_pos;
3095 }
3096
3097 // remove all non local target point information
3098 for (size_t i = 0, offset = count; i < count; ++i) {
3099 size_t curr_count = wsum_stencils[i].tgt.data.count;
3100 if (curr_count > 1) {
3101 struct remote_point_info * curr_point_infos =
3102 wsum_stencils[i].tgt.data.data.multi;
3103 // find first local target point
3104 size_t j;
3105 for (j = 0; j < curr_count; ++j) {
3106 if (curr_point_infos[j].rank == comm_rank) {
3107 wsum_stencils[i].tgt.data.count = 1;
3108 wsum_stencils[i].tgt.data.data.single.rank = comm_rank;
3109 wsum_stencils[i].tgt.data.data.single.orig_pos =
3110 curr_point_infos[j].orig_pos;
3111 break;
3112 }
3113 }
3114 // make a copy for the remaining local target positions
3115 for (j = j + 1; j < curr_count; ++j) {
3116 if (curr_point_infos[j].rank == comm_rank) {
3117 wsum_stencils[offset] = wsum_stencils[i];
3118 wsum_stencils[offset].tgt.data.data.single.orig_pos =
3119 curr_point_infos[j].orig_pos;
3120 ++offset;
3121 }
3122 }
3123 free(curr_point_infos);
3124 }
3125 }
3126
3127 return new_wsum_stencils_data;
3128}
3129
3130static Xt_redist * generate_halo_redists(
3131 struct remote_point_info_reorder * halo_points, size_t count,
3132 size_t num_src_fields, MPI_Comm comm) {
3133
3134 int comm_size;
3135 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3136
3137 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3139 num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3140 size_t * size_t_buffer =
3141 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
3142 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
3143 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
3144 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
3145 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
3146
3147 for (size_t i = 0; i < count; ++i)
3148 sendcounts[halo_points[i].data.rank * num_src_fields +
3149 halo_points[i].field_idx]++;
3150
3152 num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
3153
3154 size_t saccu = 0, raccu = 0;
3155 for (int i = 0; i < comm_size; ++i) {
3156 total_sdispls[i] = saccu;
3157 total_rdispls[i] = raccu;
3158 total_sendcounts[i] = 0;
3159 total_recvcounts[i] = 0;
3160 for (size_t j = 0; j < num_src_fields; ++j) {
3161 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
3162 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
3163 }
3164 saccu += total_sendcounts[i];
3165 raccu += total_recvcounts[i];
3166 }
3167
3168 size_t recv_count = total_recvcounts[comm_size - 1] +
3169 total_rdispls[comm_size - 1];
3170
3171 int * exchange_buffer =
3172 xmalloc((2 * count + recv_count) * sizeof(*exchange_buffer));
3173 int * send_buffer = exchange_buffer;
3174 int * reorder_idx = exchange_buffer + count;
3175 int * recv_buffer = exchange_buffer + 2 * count;
3176
3177 // pack the original positions of the requested points
3178 size_t num_halo_per_src_field[num_src_fields];
3179 memset(
3180 num_halo_per_src_field, 0,
3181 num_src_fields * sizeof(num_halo_per_src_field[0]));
3182 for (size_t i = 0; i < count; ++i) {
3183 size_t curr_src_field_idx = (size_t)(halo_points[i].field_idx);
3184 size_t pos = sdispls[(size_t)(halo_points[i].data.rank) * num_src_fields +
3185 curr_src_field_idx + 1]++;
3186 uint64_t orig_pos = halo_points[i].data.orig_pos;
3187 YAC_ASSERT(
3188 orig_pos <= INT_MAX,
3189 "ERROR(generate_halo_redists): offset not supported by MPI")
3190 send_buffer[pos] = (int)orig_pos;
3191 reorder_idx[pos] = num_halo_per_src_field[curr_src_field_idx]++;
3192 }
3193
3194 // exchange original positions of the requested points
3195 yac_alltoallv_int_p2p(
3196 send_buffer, total_sendcounts, total_sdispls,
3197 recv_buffer, total_recvcounts, total_rdispls, comm);
3198
3199 free(size_t_buffer);
3200
3201 size_t nsend = 0, nsends[num_src_fields];
3202 size_t nrecv = 0, nrecvs[num_src_fields];
3203 memset(nsends, 0, num_src_fields * sizeof(nsends[0]));
3204 memset(nrecvs, 0, num_src_fields * sizeof(nrecvs[0]));
3205 for (int i = 0; i < comm_size; ++i) {
3206 for (size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3207 if (sendcounts[i * num_src_fields + field_idx] > 0) {
3208 nrecv++;
3209 nrecvs[field_idx]++;
3210 }
3211 if (recvcounts[i * num_src_fields + field_idx] > 0) {
3212 nsend++;
3213 nsends[field_idx]++;
3214 }
3215 }
3216 }
3217
3218 size_t total_num_msg = nsend + nrecv;
3219
3220 struct Xt_redist_msg * msgs_buffer =
3221 xmalloc(total_num_msg * sizeof(*msgs_buffer));
3222 struct Xt_redist_msg * send_msgs = msgs_buffer;
3223 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
3224
3225 for (size_t field_idx = 0, nsend = 0, nrecv = 0;
3226 field_idx < num_src_fields; ++field_idx) {
3227 for (int rank = 0; rank < comm_size; ++rank) {
3228 size_t idx = (size_t)rank * num_src_fields + field_idx;
3229 if (sendcounts[idx] > 0) {
3230 recv_msgs[nrecv].rank = rank;
3231 recv_msgs[nrecv].datatype =
3232 xt_mpi_generate_datatype(
3233 reorder_idx + sdispls[idx], sendcounts[idx], MPI_DOUBLE, comm);
3234 nrecv++;
3235 }
3236 if (recvcounts[idx] > 0) {
3237 send_msgs[nsend].rank = rank;
3238 send_msgs[nsend].datatype =
3239 xt_mpi_generate_datatype(
3240 recv_buffer + rdispls[idx], recvcounts[idx], MPI_DOUBLE, comm);
3241 nsend++;
3242 }
3243 }
3244 }
3245
3246 Xt_redist * redist;
3247 MPI_Comm halo_comm;
3248
3249 if (total_num_msg > 0) {
3250
3251 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &halo_comm), comm);
3252
3253 int * rank_buffer = xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
3254 int * orig_ranks = rank_buffer;
3255 int * split_ranks = rank_buffer + total_num_msg;
3256
3257 for (size_t i = 0; i < total_num_msg; ++i)
3258 orig_ranks[i] = msgs_buffer[i].rank;
3259
3260 MPI_Group orig_group, split_group;
3261 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
3262 yac_mpi_call(MPI_Comm_group(halo_comm, &split_group), comm);
3263
3265 MPI_Group_translate_ranks(orig_group, (int)total_num_msg, orig_ranks,
3266 split_group, split_ranks), halo_comm);
3267
3268 for (size_t i = 0; i < total_num_msg; ++i)
3269 msgs_buffer[i].rank = split_ranks[i];
3270
3271 free(rank_buffer);
3272
3273 yac_mpi_call(MPI_Group_free(&split_group), comm);
3274 yac_mpi_call(MPI_Group_free(&orig_group), comm);
3275
3276 // generate redist
3277 redist = xmalloc(num_src_fields * sizeof(*redist));
3278 if (num_src_fields == 1) {
3279 *redist =
3280 xt_redist_single_array_base_new(
3281 nsend, nrecv, send_msgs, recv_msgs, halo_comm);
3282 } else {
3283 for (size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3284 redist[field_idx] =
3285 xt_redist_single_array_base_new(
3286 nsends[field_idx], nrecvs[field_idx],
3287 send_msgs, recv_msgs, halo_comm);
3288 send_msgs += nsends[field_idx];
3289 recv_msgs += nrecvs[field_idx];
3290 }
3291 }
3292
3293 } else {
3294 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &halo_comm), comm);
3295 redist = NULL;
3296 }
3297
3298 yac_mpi_call(MPI_Comm_free(&halo_comm), comm);
3299 free(exchange_buffer);
3300 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
3301
3302 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
3303
3304 return redist;
3305}
3306
3307static int compare_rank_pos_reorder_field_idx(const void * a, const void * b) {
3308
3309 int ret = (((struct remote_point_info_reorder *)a)->field_idx >
3310 ((struct remote_point_info_reorder *)b)->field_idx) -
3311 (((struct remote_point_info_reorder *)a)->field_idx <
3312 ((struct remote_point_info_reorder *)b)->field_idx);
3313
3314 if (ret) return ret;
3315
3316 ret = ((struct remote_point_info_reorder *)a)->data.rank -
3317 ((struct remote_point_info_reorder *)b)->data.rank;
3318
3319 if (ret) return ret;
3320
3321 return (((struct remote_point_info_reorder *)a)->data.orig_pos >
3322 ((struct remote_point_info_reorder *)b)->data.orig_pos) -
3323 (((struct remote_point_info_reorder *)a)->data.orig_pos <
3324 ((struct remote_point_info_reorder *)b)->data.orig_pos);
3325}
3326
3328 const void * a, const void * b) {
3329
3330 struct interp_weight_stencil_wsum_mf * a_ =
3332 struct interp_weight_stencil_wsum_mf * b_ =
3334
3335 size_t count = MIN(a_->count, b_->count);
3336
3337 for (size_t i = 0; i < count; ++i) {
3338 int ret = (a_->data[i].src_field_idx > b_->data[i].src_field_idx) -
3339 (a_->data[i].src_field_idx < b_->data[i].src_field_idx);
3340 if (ret) return ret;
3341 ret = (a_->data[i].src.orig_pos > b_->data[i].src.orig_pos) -
3342 (a_->data[i].src.orig_pos < b_->data[i].src.orig_pos);
3343 if (ret) return ret;
3344 }
3345 return 0;
3346}
3347
3349 const void * a, const void * b) {
3350
3351 struct interp_weight_stencil_wsum_mf * a_ =
3353 struct interp_weight_stencil_wsum_mf * b_ =
3355
3356 YAC_ASSERT(
3357 (a_->tgt.data.count == 1) && (b_->tgt.data.count == 1),
3358 "ERROR(compare_interp_weight_stencil_wsum_tgt_orig_pos): invalid data")
3359
3360 size_t a_orig_pos = a_->tgt.data.data.single.orig_pos;
3361 size_t b_orig_pos = b_->tgt.data.data.single.orig_pos;
3362
3363 return (a_orig_pos > b_orig_pos) - (a_orig_pos < b_orig_pos);
3364}
3365
3366static void free_remote_point(struct remote_point point) {
3367
3368 if (point.data.count > 1) free(point.data.data.multi);
3369}
3370
3372 enum yac_location location, struct remote_point_infos * point_infos,
3373 size_t count, MPI_Comm comm) {
3374
3375 int comm_size;
3376 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3377
3378 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
3380 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3381
3382 for (size_t i = 0; i < count; ++i) {
3383 int curr_count = point_infos[i].count;
3384 struct remote_point_info * curr_point_infos =
3385 (curr_count == 1)?
3386 (&(point_infos[i].data.single)):(point_infos[i].data.multi);
3387 YAC_ASSERT(
3388 curr_count >= 1,
3389 "ERROR(generate_redist_put_double): no owner found for global id")
3390 for (int j = 0; j < curr_count; ++j)
3391 sendcounts[curr_point_infos[j].rank]++;
3392 }
3393
3395 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3396
3397 size_t send_count =
3398 sdispls[comm_size] + sendcounts[comm_size - 1];
3399 size_t recv_count =
3400 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
3401
3402 int * exchange_buffer =
3403 xmalloc((2 * send_count + recv_count) * sizeof(*exchange_buffer));
3404 int * send_buffer = exchange_buffer;
3405 int * reorder_idx = exchange_buffer + send_count;
3406 int * recv_buffer = exchange_buffer + 2 * send_count;
3407
3408 // pack the original positions of the points that have to updated
3409 for (size_t i = 0; i < count; ++i) {
3410 int curr_count = point_infos[i].count;
3411 struct remote_point_info * curr_point_infos =
3412 (curr_count == 1)?
3413 (&(point_infos[i].data.single)):(point_infos[i].data.multi);
3414 for (int j = 0; j < curr_count; ++j) {
3415 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
3416 uint64_t orig_pos = curr_point_infos[j].orig_pos;
3417 YAC_ASSERT(
3418 orig_pos <= INT_MAX,
3419 "ERROR(generate_redist_put_double): offset not supported by MPI")
3420 send_buffer[pos] = (int)orig_pos;
3421 reorder_idx[pos] = i;
3422 }
3423 }
3424
3425 // exchange original positions of the points that have to updated
3426 yac_alltoallv_int_p2p(
3427 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm);
3428
3429 size_t nsend = 0;
3430 size_t nrecv = 0;
3431 for (int i = 0; i < comm_size; ++i) {
3432 if (sendcounts[i] > 0) nsend++;
3433 if (recvcounts[i] > 0) nrecv++;
3434 }
3435
3436 struct Xt_redist_msg * send_msgs = xmalloc(nsend * sizeof(*send_msgs));
3437 struct Xt_redist_msg * recv_msgs = xmalloc(nrecv * sizeof(*send_msgs));
3438
3439 for (int i = 0, nsend = 0, nrecv = 0; i < comm_size; ++i) {
3440 if (sendcounts[i] > 0) {
3441 send_msgs[nsend].rank = i;
3442 send_msgs[nsend].datatype =
3443 xt_mpi_generate_datatype(
3444 reorder_idx + sdispls[i], sendcounts[i], MPI_DOUBLE, comm);
3445 nsend++;
3446 }
3447 if (recvcounts[i] > 0) {
3448 recv_msgs[nrecv].rank = i;
3449 recv_msgs[nrecv].datatype =
3450 xt_mpi_generate_datatype(
3451 recv_buffer + rdispls[i], recvcounts[i], MPI_DOUBLE, comm);
3452 nrecv++;
3453 }
3454 }
3455
3456 // generate redist
3457 Xt_redist redist =
3458 xt_redist_single_array_base_new(nsend, nrecv, send_msgs, recv_msgs, comm);
3459
3460 free(exchange_buffer);
3461 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
3462
3463 xt_redist_msg_free(recv_msgs, nrecv, comm);
3464 xt_redist_msg_free(send_msgs, nsend, comm);
3465
3466 return redist;
3467}
3468
3470 MPI_Comm comm, enum yac_location tgt_location,
3471 struct interp_weight_stencils_wsum_mf * wsum_mf_stencils_data,
3472 struct yac_interpolation * interp,
3474 enum yac_interp_weight_stencil_type stencil_type) {
3475
3476 int comm_rank;
3477 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
3478
3479 // redistribute stencils to respective owners
3480 struct interp_weight_stencils_wsum_mf * (*redist_wsum_mf_stencils)(
3481 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_mf_stencils_data);
3482 YAC_ASSERT(
3483 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3484 "ERROR(yac_interp_weights_redist_w_sum_mf): invalid reorder type")
3486 (reorder == YAC_MAPPING_ON_SRC)?
3488 struct interp_weight_stencils_wsum_mf * new_wsum_mf_stencils_data =
3489 redist_wsum_mf_stencils(comm, wsum_mf_stencils_data);
3490
3491 size_t wsum_mf_count = new_wsum_mf_stencils_data->count;
3492 struct interp_weight_stencil_wsum_mf * wsum_mf_stencils =
3493 new_wsum_mf_stencils_data->data;
3494
3495 // compute the total number of links
3496 size_t total_num_links = 0, total_num_remote_weights = 0;
3497 for (size_t i = 0; i < wsum_mf_count; ++i) {
3498 size_t curr_stencil_size = wsum_mf_stencils[i].count;
3499 total_num_links += curr_stencil_size;
3500 for (size_t j = 0; j < curr_stencil_size; ++j)
3501 if (wsum_mf_stencils[i].data[j].src.rank != comm_rank)
3502 ++total_num_remote_weights;
3503 }
3504
3505 // gather all remote source points and determine number of source fields
3506 struct remote_point_info_reorder * remote_src_points =
3507 xmalloc(total_num_remote_weights * sizeof(*remote_src_points));
3508 size_t num_src_fields = 0;
3509 for (size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
3510 size_t curr_stencil_size = wsum_mf_stencils[i].count;
3511 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
3512 wsum_mf_stencils[i].data;
3513 for (size_t j = 0; j < curr_stencil_size; ++j) {
3514 size_t curr_src_field_idx = curr_weights[j].src_field_idx;
3515 if (curr_src_field_idx >= num_src_fields)
3516 num_src_fields = curr_src_field_idx + 1;
3517 if (curr_weights[j].src.rank != comm_rank) {
3518 remote_src_points[k].data = curr_weights[j].src;
3519 remote_src_points[k].field_idx = curr_src_field_idx;
3520 remote_src_points[k].reorder_idx = i;
3521 ++k;
3522 }
3523 }
3524 }
3526 MPI_Allreduce(
3527 MPI_IN_PLACE, &num_src_fields, 1, YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
3528
3529 // sort remote points first by field_idx, second by rank, and
3530 // then by orig_pos
3531 qsort(remote_src_points, total_num_remote_weights, sizeof(*remote_src_points),
3533
3534 // update stencils: set owner to -1; set orig_pos to position of respecitve
3535 // point in halo data
3536 // remove duplicated remote points
3537 struct remote_point_info * prev_remote_src_point;
3538 size_t prev_field_idx;
3539 size_t halo_size;
3540 if (total_num_remote_weights > 0) {
3541 prev_remote_src_point = &(remote_src_points[0].data);
3542 prev_field_idx = remote_src_points[0].field_idx;
3543 halo_size = 1;
3544 } else {
3545 prev_field_idx = SIZE_MAX;
3546 halo_size = 0;
3547 }
3548 for (size_t i = 0; i < total_num_remote_weights; ++i) {
3549 struct remote_point_info * curr_remote_src_point =
3550 &(remote_src_points[i].data);
3551 size_t curr_field_idx = remote_src_points[i].field_idx;
3553 prev_remote_src_point, curr_remote_src_point) ||
3554 (prev_field_idx != curr_field_idx)) {
3555 prev_remote_src_point = curr_remote_src_point;
3556 prev_field_idx = curr_field_idx;
3557 remote_src_points[halo_size].data = *curr_remote_src_point;
3558 remote_src_points[halo_size].field_idx = curr_field_idx;
3559 ++halo_size;
3560 }
3561 struct interp_weight_stencil_wsum_mf * curr_stencil =
3562 wsum_mf_stencils + remote_src_points[i].reorder_idx;
3563 size_t curr_stencil_size = curr_stencil->count;
3564 for (size_t j = 0; j < curr_stencil_size; ++j) {
3566 &(curr_stencil->data[j].src), curr_remote_src_point)) &&
3567 (curr_stencil->data[j].src_field_idx == curr_field_idx)) {
3568 curr_stencil->data[j].src.rank = -1;
3569 curr_stencil->data[j].src.orig_pos = halo_size - 1;
3570 curr_stencil->data[j].src_field_idx = SIZE_MAX;
3571 }
3572 }
3573 }
3574
3575 // sort stencils by their memory access pattern on the local process
3576 qsort(wsum_mf_stencils, wsum_mf_count, sizeof(*wsum_mf_stencils),
3577 (reorder == YAC_MAPPING_ON_SRC)?
3580
3581 size_t * num_src_per_tgt = xmalloc(wsum_mf_count * sizeof(*num_src_per_tgt));
3582 double * weights = xmalloc(total_num_links * sizeof(*weights));
3583 size_t * src_idx = xmalloc(total_num_links * sizeof(*src_idx));
3584 size_t * src_field_idx = xmalloc(total_num_links * sizeof(*src_field_idx));
3585
3586 // extract data from stencil
3587 for (size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
3588 size_t curr_stencil_size = wsum_mf_stencils[i].count;
3589 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
3590 wsum_mf_stencils[i].data;
3591 num_src_per_tgt[i] = curr_stencil_size;
3592 for (size_t j = 0; j < curr_stencil_size; ++j, ++k){
3593 weights[k] = curr_weights[j].weight;
3594 src_idx[k] = curr_weights[j].src.orig_pos;
3595 src_field_idx[k] = curr_weights[j].src_field_idx;
3596 }
3597 }
3598
3599 // generate halo redists (one per source field
3600 Xt_redist * halo_redists =
3602 remote_src_points, halo_size, num_src_fields, comm);
3603
3604 YAC_ASSERT(
3605 (stencil_type == WEIGHT_SUM) ||
3606 (stencil_type == SUM) ||
3607 (stencil_type == WEIGHT_SUM_MF) ||
3608 (stencil_type == SUM_MF),
3609 "ERROR(yac_interp_weights_redist_w_sum_mf): unsupported stencil type");
3610
3611 // add weights to interpolation
3612 if (reorder == YAC_MAPPING_ON_SRC) {
3613
3614 // generate result redist
3615 struct remote_point_infos * tgt_infos =
3616 xmalloc(wsum_mf_count * sizeof(*tgt_infos));
3617 for (size_t i = 0; i < wsum_mf_count; ++i)
3618 tgt_infos[i] = wsum_mf_stencils[i].tgt.data;
3619 Xt_redist result_redist =
3621 tgt_location, tgt_infos, wsum_mf_count, comm);
3622 free(tgt_infos);
3623
3624 switch(stencil_type) {
3625 default:
3626 case (WEIGHT_SUM):
3627 case (WEIGHT_SUM_MF):
3629 interp, halo_redists, wsum_mf_count, num_src_per_tgt, weights,
3630 src_field_idx, src_idx, ((stencil_type==WEIGHT_SUM)?1:num_src_fields),
3631 result_redist);
3632 break;
3633 case (SUM):
3634 case (SUM_MF):
3636 interp, halo_redists, wsum_mf_count, num_src_per_tgt,
3637 src_field_idx, src_idx, ((stencil_type == SUM)?1:num_src_fields),
3638 result_redist);
3639 break;
3640 }
3641
3642 if (result_redist != NULL) xt_redist_delete(result_redist);
3643
3644 } else {
3645
3646 size_t * tgt_orig_pos = xmalloc(wsum_mf_count * sizeof(*tgt_orig_pos));
3647 for (size_t i = 0; i < wsum_mf_count; ++i) {
3648 YAC_ASSERT(
3649 wsum_mf_stencils[i].tgt.data.count == 1,
3650 "ERROR(yac_interp_weights_redist_w_sum): currently unsupported target "
3651 "point distribution")
3652 tgt_orig_pos[i] =
3653 (size_t)(wsum_mf_stencils[i].tgt.data.data.single.orig_pos);
3654 }
3655
3656 switch(stencil_type) {
3657 default:
3658 case (WEIGHT_SUM):
3659 case (WEIGHT_SUM_MF):
3661 interp, halo_redists, tgt_orig_pos, wsum_mf_count,
3662 num_src_per_tgt, weights, src_field_idx, src_idx,
3663 ((stencil_type == WEIGHT_SUM)?1:num_src_fields));
3664 break;
3665 case (SUM):
3666 case (SUM_MF):
3668 interp, halo_redists, tgt_orig_pos, wsum_mf_count,
3669 num_src_per_tgt, src_field_idx, src_idx,
3670 ((stencil_type == SUM)?1:num_src_fields));
3671 break;
3672 }
3673 free(tgt_orig_pos);
3674 }
3675
3676 for (size_t i = 0; i < new_wsum_mf_stencils_data->count; ++i)
3677 free_remote_point(new_wsum_mf_stencils_data->data[i].tgt);
3678 free(new_wsum_mf_stencils_data->data);
3679 free(new_wsum_mf_stencils_data);
3680
3681 free(remote_src_points);
3682 free(src_field_idx);
3683 free(src_idx);
3684 free(weights);
3685 free(num_src_per_tgt);
3686 if (halo_redists != NULL) {
3687 for (size_t i = 0; i < num_src_fields; ++i)
3688 xt_redist_delete(halo_redists[i]);
3689 free(halo_redists);
3690 }
3691}
3692
3693static int compare_stencils(const void * a, const void * b) {
3694
3695 return (int)(((struct interp_weight_stencil *)a)->type) -
3696 (int)(((struct interp_weight_stencil *)b)->type);
3697}
3698
3700 struct yac_interp_weights * weights,
3703 double scaling_factor, double scaling_summand) {
3704
3705 MPI_Comm comm = weights->comm;
3706
3707 // sort stencils by type
3708 qsort(weights->stencils, weights->stencils_size, sizeof(*(weights->stencils)),
3710
3711 uint64_t local_stencil_counts[WEIGHT_STENCIL_TYPE_SIZE];
3712 size_t stencils_offsets[WEIGHT_STENCIL_TYPE_SIZE];
3713
3714 // count local number of stencils per type
3715 memset(&(local_stencil_counts[0]), 0, sizeof(local_stencil_counts));
3716 for (size_t i = 0; i < weights->stencils_size; ++i)
3717 local_stencil_counts[(int)(weights->stencils[i].type)]++;
3718
3719 for (size_t i = 0, accu = 0; i < (size_t)WEIGHT_STENCIL_TYPE_SIZE; ++i) {
3720 stencils_offsets[i] = accu;
3721 accu += local_stencil_counts[i];
3722 }
3723
3724 uint64_t global_stencil_counts[WEIGHT_STENCIL_TYPE_SIZE];
3725
3726 // determine global number of stencils per type
3728 MPI_Allreduce(
3729 local_stencil_counts, global_stencil_counts,
3730 (int)WEIGHT_STENCIL_TYPE_SIZE, MPI_UINT64_T, MPI_SUM, comm), comm);
3731
3732 { // check whether the collection_size is consistant across all processes
3733 uint64_t max_collection_size = (uint64_t)collection_size;
3735 MPI_Allreduce(
3736 MPI_IN_PLACE, &max_collection_size, 1, MPI_UINT64_T, MPI_MAX, comm),
3737 comm);
3738 YAC_ASSERT(
3739 (size_t)max_collection_size == collection_size,
3740 "ERROR(yac_interp_weights_get_interpolation): "
3741 "mismatching collection sizes")
3742 }
3743
3744 struct yac_interpolation * interp =
3747 scaling_factor, scaling_summand);
3748
3749 if (global_stencil_counts[FIXED] > 0)
3751 weights->comm, local_stencil_counts[FIXED],
3752 weights->stencils + stencils_offsets[FIXED], interp);
3753
3754 if (global_stencil_counts[DIRECT] > 0)
3756 weights->comm, local_stencil_counts[DIRECT],
3757 weights->stencils + stencils_offsets[DIRECT], interp);
3758
3759 if (global_stencil_counts[SUM] > 0) {
3760
3761 struct interp_weight_stencils_wsum_mf * wsum_stencils =
3763 weights->stencils + stencils_offsets[SUM],
3764 (size_t)(local_stencil_counts[SUM]), SUM);
3765 YAC_ASSERT(
3766 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3767 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3769 weights->comm, weights->tgt_location,
3770 wsum_stencils, interp, reorder, SUM);
3771 for (size_t i = 0; i < wsum_stencils->count; ++i)
3772 free_remote_point(wsum_stencils->data[i].tgt);
3773 free(wsum_stencils->data);
3774 free(wsum_stencils);
3775 }
3776
3777 if (global_stencil_counts[WEIGHT_SUM] > 0) {
3778
3779 struct interp_weight_stencils_wsum_mf * wsum_stencils =
3781 weights->stencils + stencils_offsets[WEIGHT_SUM],
3782 (size_t)(local_stencil_counts[WEIGHT_SUM]), WEIGHT_SUM);
3783 YAC_ASSERT(
3784 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3785 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3787 weights->comm, weights->tgt_location,
3788 wsum_stencils, interp, reorder, WEIGHT_SUM);
3789 for (size_t i = 0; i < wsum_stencils->count; ++i)
3790 free_remote_point(wsum_stencils->data[i].tgt);
3791 free(wsum_stencils->data);
3792 free(wsum_stencils);
3793 }
3794
3795 if (global_stencil_counts[DIRECT_MF] > 0)
3797 weights->comm, local_stencil_counts[DIRECT_MF],
3798 weights->stencils + stencils_offsets[DIRECT_MF], interp);
3799
3800 if (global_stencil_counts[SUM_MF] > 0) {
3801
3802 struct interp_weight_stencils_wsum_mf * sum_mf_stencils =
3804 weights->stencils + stencils_offsets[SUM_MF],
3805 (size_t)(local_stencil_counts[SUM_MF]), SUM_MF);
3806 YAC_ASSERT(
3807 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3808 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3810 weights->comm, weights->tgt_location,
3811 sum_mf_stencils, interp, reorder, SUM_MF);
3812 for (size_t i = 0; i < sum_mf_stencils->count; ++i)
3813 free_remote_point(sum_mf_stencils->data[i].tgt);
3814 free(sum_mf_stencils->data);
3815 free(sum_mf_stencils);
3816 }
3817
3818 if (global_stencil_counts[WEIGHT_SUM_MF] > 0) {
3819
3820 struct interp_weight_stencils_wsum_mf * wsum_mf_stencils =
3822 weights->stencils + stencils_offsets[WEIGHT_SUM_MF],
3823 (size_t)(local_stencil_counts[WEIGHT_SUM_MF]), WEIGHT_SUM_MF);
3824 YAC_ASSERT(
3825 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3826 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3828 weights->comm, weights->tgt_location,
3829 wsum_mf_stencils, interp, reorder, WEIGHT_SUM_MF);
3830 for (size_t i = 0; i < wsum_mf_stencils->count; ++i)
3831 free_remote_point(wsum_mf_stencils->data[i].tgt);
3832 free(wsum_mf_stencils->data);
3833 free(wsum_mf_stencils);
3834 }
3835
3836 return interp;
3837}
3838
3840 struct yac_interp_weights * weights, int reorder,
3842 double scaling_factor, double scaling_summand) {
3843
3844 YAC_ASSERT(
3845 (reorder == YAC_MAPPING_ON_SRC) ||
3846 (reorder == YAC_MAPPING_ON_TGT),
3847 "ERROR(yac_interp_weights_get_interpolation_f2c): "
3848 "reorder type must be of YAC_MAPPING_ON_SRC/YAC_MAPPING_ON_TGT");
3849
3850 return
3852 weights, (enum yac_interp_weights_reorder_type)reorder,
3854 scaling_factor, scaling_summand);
3855}
3856
3858
3859 free(points->data);
3860 free(points);
3861}
3862
3864 struct interp_weight_stencil * stencils, size_t count) {
3865
3866 for (size_t i = 0 ; i < count; ++i) {
3867
3868 YAC_ASSERT(
3869 (stencils[i].type == DIRECT) ||
3870 (stencils[i].type == SUM) ||
3871 (stencils[i].type == WEIGHT_SUM) ||
3872 (stencils[i].type == DIRECT_MF) ||
3873 (stencils[i].type == SUM_MF) ||
3874 (stencils[i].type == WEIGHT_SUM_MF) ||
3875 (stencils[i].type == FIXED),
3876 "ERROR(yac_interp_weights_delete): invalid stencil type")
3877 switch(stencils[i].type) {
3878
3879 case(DIRECT):
3880 free_remote_point(stencils[i].data.direct.src);
3881 break;
3882 case(SUM):
3883 free_remote_points(stencils[i].data.sum.srcs);
3884 break;
3885 case(WEIGHT_SUM):
3886 free_remote_points(stencils[i].data.weight_sum.srcs);
3887 free(stencils[i].data.weight_sum.weights);
3888 break;
3889 case(DIRECT_MF):
3890 free_remote_point(stencils[i].data.direct_mf.src);
3891 break;
3892 case(SUM_MF):
3893 free_remote_points(stencils[i].data.sum_mf.srcs);
3894 free(stencils[i].data.sum_mf.field_indices);
3895 break;
3896 case (WEIGHT_SUM_MF):
3897 free_remote_points(stencils[i].data.weight_sum_mf.srcs);
3898 free(stencils[i].data.weight_sum_mf.weights);
3899 free(stencils[i].data.weight_sum_mf.field_indices);
3900 break;
3901 default:
3902 case(FIXED):
3903 break;
3904 };
3905 free_remote_point(stencils[i].tgt);
3906 }
3907 free(stencils);
3908}
3909
3910#ifdef YAC_NETCDF_ENABLED
3911static int compare_double(void const * a, void const * b) {
3912
3913 return (*(double const *)a > *(double const *)b) -
3914 (*(double const *)a < *(double const *)b);
3915}
3916
3921 char const * filename, char const * src_grid_name, char const * tgt_grid_name,
3922 size_t num_fixed_values, double * fixed_values,
3923 size_t * num_tgt_per_fixed_value, size_t num_links,
3924 size_t num_weights_per_link, size_t num_src_fields,
3925 size_t * num_links_per_src_field,
3926 enum yac_location * src_locations, enum yac_location tgt_location,
3927 size_t src_grid_size, size_t tgt_grid_size) {
3928
3929 int ncid;
3930
3931 // create file
3932 yac_nc_create(filename, NC_CLOBBER | NC_64BIT_OFFSET, &ncid);
3933
3934 int dim_weight_id[8];
3935
3936 // define dimensions
3937 if (num_links > 0) {
3938 YAC_HANDLE_ERROR(nc_def_dim(ncid, "num_links", num_links, &dim_weight_id[0]));
3940 num_weights_per_link > 0,
3941 "ERROR(create_weight_file): number of links is %zu but number of "
3942 "weights per link is zero for weight file %s", num_links, filename)
3944 nc_def_dim(ncid, "num_wgts", num_weights_per_link, &dim_weight_id[1]));
3945 }
3947 num_src_fields > 0,
3948 "ERROR(create_weight_file): number of source fields is zero for "
3949 "weight file %s", filename)
3951 nc_def_dim(ncid, "num_src_fields", num_src_fields, &dim_weight_id[2]));
3953 nc_def_dim(
3954 ncid, "max_loc_str_len", YAC_MAX_LOC_STR_LEN, &dim_weight_id[3]));
3955
3956 if (num_fixed_values > 0) {
3958 nc_def_dim(
3959 ncid, "num_fixed_values", num_fixed_values, &dim_weight_id[4]));
3960 size_t num_fixed_dst = 0;
3961 for (size_t i = 0; i < num_fixed_values; ++i)
3962 num_fixed_dst += num_tgt_per_fixed_value[i];
3964 num_fixed_dst > 0,
3965 "ERROR(create_weight_file): number of fixed values is %zu but number "
3966 "of fixed destination points is zero for weight file %s",
3967 num_fixed_dst, filename)
3969 nc_def_dim(ncid, "num_fixed_dst", num_fixed_dst, &dim_weight_id[5]));
3970 }
3971
3972 if (src_grid_size > 0)
3974 nc_def_dim(ncid, "src_grid_size", src_grid_size, &dim_weight_id[6]));
3975
3976 if (tgt_grid_size > 0)
3978 nc_def_dim(ncid, "dst_grid_size", tgt_grid_size, &dim_weight_id[7]));
3979
3980 int var_src_add_id, var_dst_add_id, var_weight_id, var_num_links_id,
3981 src_var_locs_id, tgt_var_loc_id, var_fixed_values_id,
3982 var_num_dst_per_fixed_value_id, var_dst_add_fixed_id;
3983
3984 // define variables
3985 if (num_links > 0) {
3987 nc_def_var(
3988 ncid, "src_address", NC_INT, 1, dim_weight_id, &var_src_add_id));
3990 nc_def_var(
3991 ncid, "dst_address", NC_INT, 1, dim_weight_id, &var_dst_add_id));
3993 nc_def_var(
3994 ncid, "remap_matrix", NC_DOUBLE, 2, dim_weight_id, &var_weight_id));
3996 nc_def_var(ncid, "num_links_per_src_field", NC_INT, 1,
3997 &dim_weight_id[2], &var_num_links_id));
3998 }
4000 nc_def_var(
4001 ncid, "src_locations", NC_CHAR, 2, &dim_weight_id[2], &src_var_locs_id));
4003 nc_def_var(
4004 ncid, "dst_location", NC_CHAR, 1, &dim_weight_id[3], &tgt_var_loc_id));
4005 if (num_fixed_values > 0) {
4007 nc_def_var(ncid, "fixed_values", NC_DOUBLE, 1, &dim_weight_id[4],
4008 &var_fixed_values_id));
4010 nc_def_var(ncid, "num_dst_per_fixed_value", NC_INT, 1, &dim_weight_id[4],
4011 &var_num_dst_per_fixed_value_id));
4013 nc_def_var(ncid, "dst_address_fixed", NC_INT, 1, &dim_weight_id[5],
4014 &var_dst_add_fixed_id));
4015 }
4016
4017 // put attributes
4019 nc_put_att_text(ncid, NC_GLOBAL, "version",
4023 nc_put_att_text(ncid, NC_GLOBAL, "src_grid_name",
4024 strlen(src_grid_name), src_grid_name));
4026 nc_put_att_text(ncid, NC_GLOBAL, "dst_grid_name",
4027 strlen(tgt_grid_name), tgt_grid_name));
4028 {
4029 char const * str_logical[2] = {"FALSE", "TRUE"};
4030 YAC_HANDLE_ERROR(nc_put_att_text(ncid, NC_GLOBAL, "contains_links",
4031 strlen(str_logical[num_links > 0]),
4032 str_logical[num_links > 0]));
4033 YAC_HANDLE_ERROR(nc_put_att_text(ncid, NC_GLOBAL, "contains_fixed_dst",
4034 strlen(str_logical[num_fixed_values > 0]),
4035 str_logical[num_fixed_values > 0]));
4036 }
4037
4038 // end definition
4039 YAC_HANDLE_ERROR(nc_enddef(ncid));
4040
4041 // write some basic data
4042
4043 if (num_links > 0) {
4044 int * num_links_per_src_field_int =
4045 xmalloc(num_src_fields * sizeof(*num_links_per_src_field_int));
4046 for (size_t i = 0; i < num_src_fields; ++i) {
4047 YAC_ASSERT(
4048 num_links_per_src_field[i] <= INT_MAX,
4049 "ERROR(create_weight_file): "
4050 "number of links per source field too big (not yet supported)")
4051 num_links_per_src_field_int[i] = (int)num_links_per_src_field[i];
4052 }
4054 nc_put_var_int(ncid, var_num_links_id, num_links_per_src_field_int));
4055 free(num_links_per_src_field_int);
4056 }
4057
4058 for (size_t i = 0; i < num_src_fields; ++i) {
4059 char const * loc_str = yac_loc2str(src_locations[i]);
4060 size_t str_start[2] = {i, 0};
4061 size_t str_count[2] = {1, strlen(loc_str)};
4063 nc_put_vara_text(ncid, src_var_locs_id, str_start, str_count, loc_str));
4064 }
4065
4066 {
4067 char const * loc_str = yac_loc2str(tgt_location);
4068 size_t str_start[1] = {0};
4069 size_t str_count[1] = {strlen(loc_str)};
4071 nc_put_vara_text(ncid, tgt_var_loc_id, str_start, str_count, loc_str));
4072 }
4073 if (num_fixed_values > 0) {
4074
4075 int * num_tgt_per_fixed_value_int =
4076 xmalloc(num_fixed_values * sizeof(*num_tgt_per_fixed_value_int));
4077 for (unsigned i = 0; i < num_fixed_values; ++i) {
4078 YAC_ASSERT(
4079 num_tgt_per_fixed_value[i] <= INT_MAX,
4080 "ERROR(create_weight_file): "
4081 "number of targets per fixed value is too big (not yet supported)")
4082 num_tgt_per_fixed_value_int[i] = (int)num_tgt_per_fixed_value[i];
4083 }
4084 YAC_HANDLE_ERROR(nc_put_var_double(ncid, var_fixed_values_id, fixed_values));
4085 YAC_HANDLE_ERROR(nc_put_var_int(ncid, var_num_dst_per_fixed_value_id,
4086 num_tgt_per_fixed_value_int));
4087 free(num_tgt_per_fixed_value_int);
4088 }
4089
4090 // close file
4091 YAC_HANDLE_ERROR(nc_close(ncid));
4092}
4093
4094static int compare_interp_weight_stencil(const void * a, const void * b) {
4095
4096 int a_is_fixed = (((struct interp_weight_stencil *)a)->type == FIXED);
4097 int b_is_fixed = (((struct interp_weight_stencil *)b)->type == FIXED);
4098 int ret = b_is_fixed - a_is_fixed;
4099
4100 if (ret) return ret;
4101
4102 // if both are fixed stencils
4103 if (a_is_fixed) {
4104
4105 double fixed_value_a =
4106 ((struct interp_weight_stencil *)a)->data.fixed.value;
4107 double fixed_value_b =
4108 ((struct interp_weight_stencil *)b)->data.fixed.value;
4109 ret = (fixed_value_a > fixed_value_b) -
4110 (fixed_value_a < fixed_value_b);
4111 if (ret) return ret;
4112 }
4113
4114 return (((struct interp_weight_stencil *)a)->tgt.global_id >
4115 ((struct interp_weight_stencil *)b)->tgt.global_id) -
4116 (((struct interp_weight_stencil *)a)->tgt.global_id <
4117 ((struct interp_weight_stencil *)b)->tgt.global_id);
4118}
4119
4121 struct interp_weight_stencil * stencils, size_t stencils_size,
4122 yac_int * min_tgt_global_id, yac_int * max_tgt_global_id, MPI_Comm comm) {
4123
4124 yac_int min_max[2] = {XT_INT_MAX, XT_INT_MIN};
4125
4126 for (size_t i = 0; i < stencils_size; ++i) {
4127
4128 yac_int curr_id = stencils[i].tgt.global_id;
4129 if (curr_id < min_max[0]) min_max[0] = curr_id;
4130 if (curr_id > min_max[1]) min_max[1] = curr_id;
4131 }
4132
4133 min_max[0] = XT_INT_MAX - min_max[0];
4134
4136 MPI_Allreduce(
4137 MPI_IN_PLACE, min_max, 2, yac_int_dt, MPI_MAX, comm), comm);
4138
4139 *min_tgt_global_id = XT_INT_MAX - min_max[0];
4140 *max_tgt_global_id = min_max[1];
4141}
4142
4144 struct interp_weight_stencil * stencils, size_t stencils_size,
4145 yac_int min_tgt_global_id, yac_int max_tgt_global_id,
4146 int num_io_procs_int, int * io_owner) {
4147
4148 long long num_io_procs = (long long)num_io_procs_int;
4149 long long id_range =
4150 MAX((long long)(max_tgt_global_id - min_tgt_global_id),1);
4151
4152 for (size_t i = 0; i < stencils_size; ++i)
4153 io_owner[i] =
4154 ((int)(MIN(((long long)(stencils[i].tgt.global_id - min_tgt_global_id) *
4155 num_io_procs) / id_range, num_io_procs - 1)));
4156}
4157
4159 struct interp_weight_stencil * stencils, size_t stencil_count,
4160 double ** fixed_values, size_t * num_fixed_values, MPI_Comm comm) {
4161
4162 int comm_size;
4163 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4164
4165 double * local_fixed_values =
4166 xmalloc(stencil_count * sizeof(*local_fixed_values));
4167
4168 int * int_buffer = xmalloc(2 * (size_t)comm_size * sizeof(*int_buffer));
4169 int * recvcounts = int_buffer + 0 * comm_size;
4170 int * rdispls = int_buffer + 1 * comm_size;
4171
4172 size_t local_num_fixed = 0;
4173
4174 // get all local fixed values
4175 for (size_t i = 0; i < stencil_count;
4176 ++i, ++local_num_fixed) {
4177 if (stencils[i].type != FIXED) break;
4178 local_fixed_values[i] = stencils[i].data.fixed.value;
4179 }
4180 qsort(local_fixed_values, local_num_fixed, sizeof(*local_fixed_values),
4182 yac_remove_duplicates_double(local_fixed_values, &local_num_fixed);
4183
4184 // get number of fixed values per rank
4185 int local_num_fixed_int = (int)(local_num_fixed);
4187 MPI_Allgather(
4188 &local_num_fixed_int, 1, MPI_INT, recvcounts, 1,MPI_INT, comm), comm);
4189 for (int i = 0, accu = 0; i < comm_size; ++i) {
4190 rdispls[i] = accu;
4191 accu += recvcounts[i];
4192 }
4193
4194 size_t num_all_fixed_values = 0;
4195 for (int i = 0; i < comm_size; ++i)
4196 num_all_fixed_values += (size_t)(recvcounts[i]);
4197
4198 double * all_fixed_values =
4199 xmalloc(num_all_fixed_values * sizeof(*all_fixed_values));
4200
4201 // gather all fixed values
4203 MPI_Allgatherv(
4204 local_fixed_values, local_num_fixed_int, MPI_DOUBLE,
4205 all_fixed_values, recvcounts, rdispls, MPI_DOUBLE, comm), comm);
4206 free(int_buffer);
4207 free(local_fixed_values);
4208
4209 qsort(all_fixed_values, num_all_fixed_values, sizeof(*all_fixed_values),
4211 yac_remove_duplicates_double(all_fixed_values, &num_all_fixed_values);
4212 *fixed_values = xrealloc(all_fixed_values,
4213 num_all_fixed_values * sizeof(*all_fixed_values));
4214 *num_fixed_values = num_all_fixed_values;
4215}
4216
4217static size_t get_num_weights_per_link(struct interp_weight_stencil * stencil) {
4218
4219 YAC_ASSERT(
4220 (stencil->type == FIXED) ||
4221 (stencil->type == DIRECT) ||
4222 (stencil->type == SUM) ||
4223 (stencil->type == WEIGHT_SUM) ||
4224 (stencil->type == DIRECT_MF) ||
4225 (stencil->type == SUM_MF) ||
4226 (stencil->type == WEIGHT_SUM_MF),
4227 "ERROR(get_num_weights_per_link): invalid stencil type")
4228
4229 return (stencil->type == FIXED)?0:1;
4230}
4231
4233 struct interp_weight_stencil * stencils, size_t stencil_count,
4234 MPI_Comm comm) {
4235
4236 size_t num_weights_per_link = 0;
4237 for (size_t i = 0; i < stencil_count; ++i)
4238 num_weights_per_link =
4239 MAX(num_weights_per_link, get_num_weights_per_link(stencils + i));
4240
4241 uint64_t num_weights_per_link_64_t = (uint64_t)num_weights_per_link;
4243 MPI_Allreduce(
4244 MPI_IN_PLACE, &num_weights_per_link_64_t, 1, MPI_UINT64_T,
4245 MPI_MAX, comm), comm);
4246 num_weights_per_link = (size_t)num_weights_per_link_64_t;
4247
4248 return num_weights_per_link;
4249}
4250
4252 struct interp_weight_stencil * stencil, size_t src_field_idx) {;
4253
4254 YAC_ASSERT(
4255 stencil->type != FIXED,
4256 "ERROR(get_num_links_per_src_field): "
4257 "stencil type FIXED not supported by this routine")
4258 YAC_ASSERT(
4259 (stencil->type == DIRECT) ||
4260 (stencil->type == SUM) ||
4261 (stencil->type == WEIGHT_SUM) ||
4262 (stencil->type == DIRECT_MF) ||
4263 (stencil->type == SUM_MF) ||
4264 (stencil->type == WEIGHT_SUM_MF),
4265 "ERROR(get_num_links_per_src_field): invalid stencil type")
4266 switch (stencil->type) {
4267 default:
4268 case(DIRECT): return (src_field_idx == 0)?1:0;
4269 case(SUM): return (src_field_idx == 0)?stencil->data.sum.srcs->count:0;
4270 case(WEIGHT_SUM):
4271 return (src_field_idx == 0)?stencil->data.weight_sum.srcs->count:0;
4272 case(DIRECT_MF): return stencil->data.direct_mf.field_idx == src_field_idx;
4273 case(SUM_MF): {
4274 size_t count = 0;
4275 size_t stencil_size = stencil->data.sum_mf.srcs->count;
4276 size_t * field_indices = stencil->data.sum_mf.field_indices;
4277 for (size_t i = 0; i < stencil_size; ++i)
4278 if (field_indices[i] == src_field_idx) ++count;
4279 return count;
4280 }
4281 case(WEIGHT_SUM_MF): {
4282 size_t count = 0;
4283 size_t stencil_size = stencil->data.weight_sum_mf.srcs->count;
4284 size_t * field_indices = stencil->data.weight_sum_mf.field_indices;
4285 for (size_t i = 0; i < stencil_size; ++i)
4286 if (field_indices[i] == src_field_idx) ++count;
4287 return count;
4288 }
4289 };
4290}
4291
4293 struct interp_weight_stencil * stencils, size_t stencil_count,
4294 size_t num_fixed_values, double * fixed_values,
4295 size_t * num_tgt_per_fixed_value,
4296 size_t * num_fixed_tgt, size_t num_src_fields,
4297 size_t * num_links_per_src_field, size_t * num_links) {
4298
4299 *num_fixed_tgt = 0;
4300 *num_links = 0;
4301 for (size_t i = 0; i < num_fixed_values; ++i) num_tgt_per_fixed_value[i] = 0;
4302 for (size_t i = 0; i < num_src_fields; ++i) num_links_per_src_field[i] = 0;
4303
4304 for (size_t i = 0; i < stencil_count; ++i) {
4305 if (stencils[i].type == FIXED) {
4306 double curr_fixed_value = stencils[i].data.fixed.value;
4307 for (size_t j = 0; j < num_fixed_values; ++j) {
4308 if (curr_fixed_value == fixed_values[j]) {
4309 num_tgt_per_fixed_value[j]++;
4310 break;
4311 }
4312 }
4313 ++*num_fixed_tgt;
4314 } else {
4315 for (size_t j = 0; j < num_src_fields; ++j) {
4316 num_links_per_src_field[j] +=
4317 get_num_links_per_src_field(stencils + i, j);
4318 }
4319 }
4320 }
4321 for (size_t i = 0; i < num_src_fields; ++i)
4322 *num_links += num_links_per_src_field[i];
4323}
4324
4326 size_t num_fixed_values, size_t * num_tgt_per_fixed_value,
4327 size_t num_src_fields, size_t * num_links_per_src_field,
4328 size_t * fixed_offsets, size_t * link_offsets, MPI_Comm comm) {
4329
4330 int comm_rank;
4331 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4332
4333 size_t count = num_fixed_values + num_src_fields;
4334 uint64_t * uint64_t_buffer = xmalloc(3 * count * sizeof(*uint64_t_buffer));
4335 uint64_t * global_counts = uint64_t_buffer + 0 * count;
4336 uint64_t * local_counts = uint64_t_buffer + 1 * count;
4337 uint64_t * offsets = uint64_t_buffer + 2 * count;
4338
4339 for (size_t i = 0; i < num_fixed_values; ++i)
4340 local_counts[i] = (uint64_t)(num_tgt_per_fixed_value[i]);
4341 for (size_t i = 0; i < num_src_fields; ++i)
4342 local_counts[num_fixed_values + i] = (uint64_t)(num_links_per_src_field[i]);
4343
4345 MPI_Allreduce(local_counts, global_counts, (int)count, MPI_UINT64_T,
4346 MPI_SUM, comm), comm);
4348 MPI_Exscan(local_counts, offsets, (int)count, MPI_UINT64_T, MPI_SUM, comm),
4349 comm);
4350 if (comm_rank == 0) memset(offsets, 0, count * sizeof(*offsets));
4351
4352 for (size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
4353 fixed_offsets[i] = (size_t)(offsets[i]) + accu;
4354 accu += (size_t)(global_counts[i]);
4355 }
4356 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4357 link_offsets[i] = (size_t)(offsets[i+num_fixed_values]) + accu;
4358 accu += (size_t)(global_counts[i+num_fixed_values]);
4359 }
4360 free(uint64_t_buffer);
4361}
4362
4363static int global_id_to_address(yac_int global_id) {
4364
4365 YAC_ASSERT(
4366 (global_id < INT_MAX) && (global_id != XT_INT_MAX),
4367 "ERROR(global_id_to_address): "
4368 "a global id cannot be converted into a address; too big")
4369 return (int)global_id + 1;
4370}
4371
4373 struct interp_weight_stencil * stencils, size_t stencil_count,
4374 int * tgt_address) {
4375
4376 for (size_t i = 0; i < stencil_count; ++i)
4377 tgt_address[i] = global_id_to_address(stencils[i].tgt.global_id);
4378}
4379
4381 struct interp_weight_stencil * stencils, size_t stencil_count,
4382 size_t * num_links_per_src_field, size_t num_src_fields,
4383 int * src_address, int * tgt_address, double * weight) {
4384
4385 size_t * src_field_offsets =
4386 xmalloc(2 * num_src_fields * sizeof(*src_field_offsets));
4387 size_t * prev_src_field_offsets = src_field_offsets + num_src_fields;
4388 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4389 src_field_offsets[i] = accu;
4390 accu += num_links_per_src_field[i];
4391 }
4392
4393 struct interp_weight_stencil * curr_stencil = stencils;
4394 for (size_t i = 0; i < stencil_count; ++i, ++curr_stencil) {
4395
4396 memcpy(prev_src_field_offsets, src_field_offsets,
4397 num_src_fields * sizeof(*prev_src_field_offsets));
4398
4399 int curr_tgt_address = global_id_to_address(curr_stencil->tgt.global_id);
4400 YAC_ASSERT(
4401 curr_stencil->type != FIXED,
4402 "ERROR(stencil_get_link_data): this call is invalid for FIXED stencils")
4403 YAC_ASSERT(
4404 (curr_stencil->type == DIRECT) ||
4405 (curr_stencil->type == SUM) ||
4406 (curr_stencil->type == WEIGHT_SUM) ||
4407 (curr_stencil->type == DIRECT_MF) ||
4408 (curr_stencil->type == SUM_MF) ||
4409 (curr_stencil->type == WEIGHT_SUM_MF),
4410 "ERROR(stencil_get_link_data): invalid stencil type")
4411 size_t src_field_offset;
4412 switch (curr_stencil->type) {
4413 default:
4414 case(DIRECT):
4415 src_field_offset = src_field_offsets[0]++;
4416 src_address[src_field_offset] =
4418 tgt_address[src_field_offset] = curr_tgt_address;
4419 weight[src_field_offset] = 1.0;
4420 break;
4421 case(SUM): {
4422 size_t curr_count = curr_stencil->data.sum.srcs->count;
4423 struct remote_point * srcs = curr_stencil->data.sum.srcs->data;
4424 for (size_t k = 0; k < curr_count; ++k) {
4425 src_field_offset = src_field_offsets[0]++;
4426 src_address[src_field_offset] =
4428 tgt_address[src_field_offset] = curr_tgt_address;
4429 weight[src_field_offset] = 1.0;
4430 }
4431 break;
4432 }
4433 case(WEIGHT_SUM): {
4434 size_t curr_count = curr_stencil->data.weight_sum.srcs->count;
4435 struct remote_point * srcs = curr_stencil->data.weight_sum.srcs->data;
4436 double * weights = curr_stencil->data.weight_sum.weights;
4437 for (size_t k = 0; k < curr_count; ++k) {
4438 src_field_offset = src_field_offsets[0]++;
4439 src_address[src_field_offset] =
4441 tgt_address[src_field_offset] = curr_tgt_address;
4442 weight[src_field_offset] = weights[k];
4443 }
4444 break;
4445 }
4446 case(DIRECT_MF):
4447 src_field_offset =
4448 src_field_offsets[curr_stencil->data.direct_mf.field_idx]++;
4449 src_address[src_field_offset ] =
4451 tgt_address[src_field_offset ] = curr_tgt_address;
4452 weight[src_field_offset ] = 1.0;
4453 break;
4454 case(SUM_MF): {
4455 size_t curr_count = curr_stencil->data.sum_mf.srcs->count;
4456 struct remote_point * srcs =
4457 curr_stencil->data.sum_mf.srcs->data;
4458 size_t * field_indices = curr_stencil->data.sum_mf.field_indices;
4459 for (size_t k = 0; k < curr_count; ++k) {
4460 src_field_offset = src_field_offsets[field_indices[k]]++;
4461 src_address[src_field_offset] =
4463 tgt_address[src_field_offset] = curr_tgt_address;
4464 weight[src_field_offset] = 1.0;
4465 }
4466 break;
4467 }
4468 case(WEIGHT_SUM_MF): {
4469 size_t curr_count = curr_stencil->data.weight_sum_mf.srcs->count;
4470 struct remote_point * srcs =
4471 curr_stencil->data.weight_sum_mf.srcs->data;
4472 double * weights = curr_stencil->data.weight_sum_mf.weights;
4473 size_t * field_indices = curr_stencil->data.weight_sum_mf.field_indices;
4474 for (size_t k = 0; k < curr_count; ++k) {
4475 src_field_offset = src_field_offsets[field_indices[k]]++;
4476 src_address[src_field_offset] =
4478 tgt_address[src_field_offset] = curr_tgt_address;
4479 weight[src_field_offset] = weights[k];
4480 }
4481 break;
4482 }
4483 };
4484
4485 for (size_t j = 0; j < num_src_fields; ++j)
4487 src_address + prev_src_field_offsets[j],
4488 src_field_offsets[j] - prev_src_field_offsets[j],
4489 weight + prev_src_field_offsets[j]);
4490 }
4491 free(src_field_offsets);
4492}
4493
4495 MPI_Comm comm, size_t count, struct interp_weight_stencil * stencils,
4496 int * owner_ranks, size_t * new_count,
4497 struct interp_weight_stencil ** new_stencils) {
4498
4499 int comm_rank, comm_size;
4500 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4501 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4502
4503 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
4505 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4506
4507 size_t * stencil_indices = xmalloc(count * sizeof(*stencil_indices));
4508 for (size_t i = 0; i < count; ++i) {
4509 stencil_indices[i] = i;
4510 sendcounts[owner_ranks[i]]++;
4511 }
4512
4514 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4515
4516 // sort the stencil indices by owner rank
4517 yac_quicksort_index_int_size_t(owner_ranks, count, stencil_indices);
4518
4519 *new_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
4520 *new_stencils =
4521 exchange_stencils(comm, stencils, stencil_indices, sendcounts, recvcounts);
4522 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4523 free(stencil_indices);
4524}
4525
4526#endif // YAC_NETCDF_ENABLED
4527
4529 struct yac_interp_weights * weights, char const * filename,
4530 char const * src_grid_name, char const * tgt_grid_name,
4531 size_t src_grid_size, size_t tgt_grid_size) {
4532
4533#ifndef YAC_NETCDF_ENABLED
4534
4535 UNUSED(weights);
4536 UNUSED(filename);
4537 UNUSED(src_grid_name);
4538 UNUSED(tgt_grid_name);
4539 UNUSED(src_grid_size);
4540 UNUSED(tgt_grid_size);
4541
4542 die(
4543 "ERROR(yac_interp_weights_write_to_file): "
4544 "YAC is built without the NetCDF support");
4545#else
4546
4547 MPI_Comm comm = weights->comm;
4548 int comm_rank, comm_size;
4549 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4550 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4551
4552 // determine processes that will do output
4553 int io_flag;
4554 int * io_ranks;
4555 int num_io_ranks;
4556 yac_get_io_ranks(comm, &io_flag, &io_ranks, &num_io_ranks);
4557
4558 // determine range of global ids
4559 yac_int min_tgt_global_id, max_tgt_global_id;
4561 weights->stencils, weights->stencils_size,
4562 &min_tgt_global_id, &max_tgt_global_id, comm);
4563
4564 // determine io owners for all stencils
4565 int * io_owner =
4566 xmalloc(weights->stencils_size * sizeof(*io_owner));
4568 weights->stencils, weights->stencils_size,
4569 min_tgt_global_id, max_tgt_global_id,
4570 num_io_ranks, io_owner);
4571 for (size_t i = 0; i < weights->stencils_size; ++i)
4572 io_owner[i] = io_ranks[io_owner[i]];
4573 free(io_ranks);
4574
4575 size_t io_stencil_count = 0;
4576 struct interp_weight_stencil * io_stencils = NULL;
4577
4578 // redistribute stencils into io decomposition
4580 comm, weights->stencils_size, weights->stencils, io_owner,
4581 &io_stencil_count, &io_stencils);
4582 free(io_owner);
4583
4584 // distribute global grid sizes
4585 uint64_t grid_sizes[2] = {(uint64_t)src_grid_size, (uint64_t)tgt_grid_size};
4587 MPI_Allreduce(
4588 MPI_IN_PLACE, grid_sizes, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
4589 src_grid_size = (size_t)(grid_sizes[0]);
4590 tgt_grid_size = (size_t)(grid_sizes[1]);
4591
4592 MPI_Comm io_comm;
4593 yac_mpi_call(MPI_Comm_split(comm, io_flag, comm_rank, &io_comm), comm);
4594
4595 // all non-io processes exit here, the remaining ones work using their own
4596 // communicator
4597 if (!io_flag) {
4598 yac_mpi_call(MPI_Comm_free(&io_comm), comm);
4599 free(io_stencils);
4600 // ensure that the writing of the weight file is complete
4601 yac_mpi_call(MPI_Barrier(comm), comm);
4602 return;
4603 }
4604
4605 // sort the stencils first by type (fixed first; fixed stencils are sorted
4606 // by their fixed value) and second by tgt id
4607 qsort(io_stencils, io_stencil_count, sizeof(*io_stencils),
4609
4610 yac_mpi_call(MPI_Comm_rank(io_comm, &comm_rank), comm);
4611 yac_mpi_call(MPI_Comm_size(io_comm, &comm_size), comm);
4612
4613 double * fixed_values = NULL;
4614 size_t num_fixed_values = 0;
4616 io_stencils, io_stencil_count, &fixed_values, &num_fixed_values, io_comm);
4617 size_t num_src_fields = weights->num_src_fields;
4618 size_t num_weights_per_link =
4619 stencil_get_num_weights_per_tgt(io_stencils, io_stencil_count, io_comm);
4620
4621 size_t * size_t_buffer =
4622 xmalloc(2 * (num_fixed_values + num_src_fields) * sizeof(*size_t_buffer));
4623 size_t * num_tgt_per_fixed_value = size_t_buffer;
4624 size_t * num_links_per_src_field = size_t_buffer + num_fixed_values;
4625 size_t * fixed_offsets = size_t_buffer + num_fixed_values + num_src_fields;
4626 size_t * link_offsets = size_t_buffer + 2 * num_fixed_values + num_src_fields;
4627
4628 size_t num_fixed_tgt = 0;
4629 size_t num_links = 0;
4631 io_stencils, io_stencil_count, num_fixed_values, fixed_values,
4632 num_tgt_per_fixed_value, &num_fixed_tgt, num_src_fields,
4633 num_links_per_src_field, &num_links);
4634
4636 num_fixed_values, num_tgt_per_fixed_value,
4637 num_src_fields, num_links_per_src_field,
4638 fixed_offsets, link_offsets, io_comm);
4639
4640 if (comm_rank == comm_size - 1) {
4641
4642 size_t * total_num_tgt_per_fixed_value =
4643 xmalloc(num_fixed_values * sizeof(*total_num_tgt_per_fixed_value));
4644 for (size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
4645 total_num_tgt_per_fixed_value[i] =
4646 fixed_offsets[i] + num_tgt_per_fixed_value[i] - accu;
4647 accu += total_num_tgt_per_fixed_value[i];
4648 }
4649 size_t total_num_links = link_offsets[num_src_fields-1] +
4650 num_links_per_src_field[num_src_fields-1];
4651
4652 size_t * total_num_links_per_src_field =
4653 xmalloc(num_src_fields * sizeof(*total_num_links_per_src_field));
4654 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4655 total_num_links_per_src_field[i] =
4656 link_offsets[i] + num_links_per_src_field[i] - accu;
4657 accu += total_num_links_per_src_field[i];
4658 }
4659
4661 filename, src_grid_name, tgt_grid_name,
4662 num_fixed_values, fixed_values, total_num_tgt_per_fixed_value,
4663 total_num_links, num_weights_per_link,
4664 num_src_fields, total_num_links_per_src_field,
4665 weights->src_locations, weights->tgt_location,
4666 src_grid_size, tgt_grid_size);
4667
4668 free(total_num_links_per_src_field);
4669 free(total_num_tgt_per_fixed_value);
4670 }
4671 free(fixed_values);
4672
4673 // ensure that the basic weight file has been written
4674 yac_mpi_call(MPI_Barrier(io_comm), comm);
4675 yac_mpi_call(MPI_Comm_free(&io_comm), comm);
4676
4677 int ncid;
4678
4679 // open weight file
4680 yac_nc_open(filename, NC_WRITE | NC_SHARE, &ncid);
4681
4682 if (num_fixed_tgt > 0) {
4683
4684 int * tgt_address_fixed =
4685 xmalloc(num_fixed_tgt * sizeof(*tgt_address_fixed));
4686 stencil_get_tgt_address(io_stencils, num_fixed_tgt, tgt_address_fixed);
4687
4688 // inquire variable ids
4689 int var_dst_add_fixed_id;
4690 yac_nc_inq_varid(ncid, "dst_address_fixed", &var_dst_add_fixed_id);
4691
4692 // target ids that receive a fixed value to file
4693 for (size_t i = 0, offset = 0; i < num_fixed_values; ++i) {
4694
4695 if (num_tgt_per_fixed_value[i] == 0) continue;
4696
4697 size_t start[1] = {fixed_offsets[i]};
4698 size_t count[1] = {num_tgt_per_fixed_value[i]};
4700 nc_put_vara_int(
4701 ncid, var_dst_add_fixed_id, start, count, tgt_address_fixed + offset));
4702 offset += num_tgt_per_fixed_value[i];
4703 }
4704
4705 free(tgt_address_fixed);
4706 }
4707
4708 if (num_links > 0) {
4709
4710 int * src_address_link = xmalloc(num_links * sizeof(*src_address_link));
4711 int * tgt_address_link = xmalloc(num_links * sizeof(*tgt_address_link));
4712 double * w = xmalloc(num_links * num_weights_per_link * sizeof(*w));
4714 io_stencils + num_fixed_tgt, io_stencil_count - num_fixed_tgt,
4715 num_links_per_src_field, num_src_fields,
4716 src_address_link, tgt_address_link, w);
4717
4718 int var_src_add_id, var_dst_add_id, var_weight_id;
4719 yac_nc_inq_varid(ncid, "src_address", &var_src_add_id);
4720 yac_nc_inq_varid(ncid, "dst_address", &var_dst_add_id);
4721 yac_nc_inq_varid(ncid, "remap_matrix", &var_weight_id);
4722
4723 for (size_t i = 0, offset = 0; i < num_src_fields; ++i) {
4724
4725 if (num_links_per_src_field[i] == 0) continue;
4726
4727 size_t start[2] = {link_offsets[i], 0};
4728 size_t count[2] = {num_links_per_src_field[i], num_weights_per_link};
4729
4731 nc_put_vara_int(
4732 ncid, var_src_add_id, start, count, src_address_link + offset));
4734 nc_put_vara_int(
4735 ncid, var_dst_add_id, start, count, tgt_address_link + offset));
4737 nc_put_vara_double(
4738 ncid, var_weight_id, start, count,
4739 w + num_weights_per_link * offset));
4740
4741 offset += num_links_per_src_field[i];
4742 }
4743
4744 free(w);
4745 free(tgt_address_link);
4746 free(src_address_link);
4747 }
4748
4749 // close weight file
4750 YAC_HANDLE_ERROR(nc_close(ncid));
4751
4752 // ensure that the writing of the weight file is complete
4753 yac_mpi_call(MPI_Barrier(comm), comm);
4754
4755 free(size_t_buffer);
4756 yac_interp_weight_stencils_delete(io_stencils, io_stencil_count);
4757#endif
4758}
4759
4761 struct yac_interp_weights * weights) {
4762
4763 return weights->stencils_size;
4764}
4765
4767 struct yac_interp_weights * weights) {
4768
4769 struct interp_weight_stencil * stencils = weights->stencils;
4770 size_t stencils_size = weights->stencils_size;
4771
4772 yac_int * global_ids = xmalloc(stencils_size * sizeof(*global_ids));
4773
4774 for (size_t i = 0; i < stencils_size; ++i)
4775 global_ids[i] = stencils[i].tgt.global_id;
4776
4777 return global_ids;
4778}
4779
4781 return weights->comm;
4782}
4783
4785
4786 if (weights == NULL) return;
4787
4788 yac_interp_weight_stencils_delete(weights->stencils, weights->stencils_size);
4789 free(weights->src_locations);
4790 free(weights);
4791}
#define UNUSED(x)
Definition core.h:71
void yac_remote_point_pack(struct remote_point *point, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:536
void yac_remote_point_unpack(void *buffer, int buffer_size, int *position, struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:548
void yac_remote_points_pack(struct remote_points *points, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:592
void yac_remote_points_unpack(void *buffer, int buffer_size, int *position, struct remote_points **points, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:611
int yac_remote_points_get_pack_size(struct remote_points *points, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:574
MPI_Datatype yac_get_remote_point_info_mpi_datatype(MPI_Comm comm)
Definition dist_grid.c:322
int yac_remote_point_get_pack_size(struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
Definition dist_grid.c:507
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
struct @8::@9 value
enum callback_type type
#define YAC_WEIGHT_FILE_VERSION_STRING
static MPI_Datatype get_fixed_stencil_mpi_datatype(MPI_Comm comm)
static size_t get_num_links_per_src_field(struct interp_weight_stencil *stencil, size_t src_field_idx)
static int get_stencil_pack_size_direct_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void unpack_stencil_wsum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static MPI_Datatype get_direct_stencil_mpi_datatype(MPI_Comm comm)
static void stencil_determine_tgt_global_id_range(struct interp_weight_stencil *stencils, size_t stencils_size, yac_int *min_tgt_global_id, yac_int *max_tgt_global_id, MPI_Comm comm)
static void unpack_stencil_sum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_fixed(struct yac_interp_weights *weights, struct remote_points *tgts, double fixed_value)
static int compare_stencils_direct_mf(const void *a, const void *b)
void yac_interp_weights_add_sum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, size_t num_src_fields)
static struct remote_points * copy_remote_points_mf(struct remote_point **points, size_t *counts, size_t num_fields)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils_tgt(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data)
static int compare_remote_point_info(const void *a, const void *b)
static int global_id_to_address(yac_int global_id)
static void yac_interp_weights_redist_direct_mf(MPI_Comm comm, uint64_t count, struct interp_weight_stencil *direct_mf_stencils, struct yac_interpolation *interp)
static int get_stencil_pack_size_sum(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencil_direct(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
yac_interp_weight_stencil_type
@ DIRECT
@ WEIGHT_SUM_MF
@ DIRECT_MF
@ SUM_MF
@ SUM
@ FIXED
@ WEIGHT_SUM
@ WEIGHT_STENCIL_TYPE_SIZE
static Xt_redist generate_redist_put_double(enum yac_location location, struct remote_point_infos *point_infos, size_t count, MPI_Comm comm)
static void pack_stencil_wsum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void free_remote_points(struct remote_points *points)
struct yac_interpolation * yac_interp_weights_get_interpolation(struct yac_interp_weights *weights, enum yac_interp_weights_reorder_type reorder, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand)
static void yac_interp_weight_stencils_delete(struct interp_weight_stencil *stencils, size_t count)
static int compare_stencils_fixed(const void *a, const void *b)
static struct interp_weight_stencil stencils_merge(struct interp_weight_stencil **stencils, double *w, size_t num_stencils, struct remote_point point)
static int get_stencil_wsum_mf_pack_size(struct interp_weight_stencil_wsum_mf *stencil, MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_wsum(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_tgt, struct remote_point *srcs, double *w)
static struct interp_weight_stencil stencils_merge_sum(struct interp_weight_stencil **stencils, double *w, size_t num_stencils)
static int compare_w_global_id(const void *a, const void *b)
static void compact_srcs_w(struct remote_points *srcs, double **w)
void yac_interp_weights_delete(struct yac_interp_weights *weights)
static size_t unpack_stencils_wsum_mf(struct interp_weight_stencil_wsum_mf *wsum_stencils, struct interp_weight_stencil_wsum_mf_weight *weight_buffer, size_t count, void *packed_data, size_t packed_data_size, MPI_Comm comm)
static void pack_stencil_sum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void determine_stencils_io_owner(struct interp_weight_stencil *stencils, size_t stencils_size, yac_int min_tgt_global_id, yac_int max_tgt_global_id, int num_io_procs_int, int *io_owner)
static Xt_redist * generate_halo_redists(struct remote_point_info_reorder *halo_points, size_t count, size_t num_src_fields, MPI_Comm comm)
static size_t get_num_weights_per_link(struct interp_weight_stencil *stencil)
static void free_remote_point(struct remote_point point)
static void yac_interp_weights_redist_stencils(MPI_Comm comm, size_t count, struct interp_weight_stencil *stencils, int *owner_ranks, size_t *new_count, struct interp_weight_stencil **new_stencils)
static void unpack_stencil_wsum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_wcopy_weights(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_stencils_per_tgt, size_t *stencil_indices, int *stencil_ranks, double *w)
static int compare_interp_weight_stencil(const void *a, const void *b)
static void pack_stencil_wsum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_wsum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, double *w, size_t num_src_fields)
static int compare_rank_pos_reorder_field_idx(const void *a, const void *b)
static int get_stencil_pack_size_wsum(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencils(struct interp_weight_stencil *stencils, size_t count, size_t *pack_order, void **pack_data, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
static void get_stencils_pack_sizes(struct interp_weight_stencil *stencils, size_t count, size_t *pack_order, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
yac_int * yac_interp_weights_get_interp_tgt(struct yac_interp_weights *weights)
static int get_stencil_pack_size_sum_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencil_fixed(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int get_stencil_pack_size_wsum_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct interp_weight_stencil wcopy_interp_weight_stencil(struct interp_weight_stencil *stencil, struct remote_point point, double weight)
static struct interp_weight_stencil * yac_interp_weights_get_stencils(struct yac_interp_weights *weights, size_t *stencil_indices, int *stencil_ranks, size_t count)
static void unpack_stencil_direct_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_stencils(const void *a, const void *b)
static void create_weight_file(char const *filename, char const *src_grid_name, char const *tgt_grid_name, size_t num_fixed_values, double *fixed_values, size_t *num_tgt_per_fixed_value, size_t num_links, size_t num_weights_per_link, size_t num_src_fields, size_t *num_links_per_src_field, enum yac_location *src_locations, enum yac_location tgt_location, size_t src_grid_size, size_t tgt_grid_size)
static int get_stencil_pack_size_direct(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_write_to_file(struct yac_interp_weights *weights, char const *filename, char const *src_grid_name, char const *tgt_grid_name, size_t src_grid_size, size_t tgt_grid_size)
static void stencil_get_counts(struct interp_weight_stencil *stencils, size_t stencil_count, size_t num_fixed_values, double *fixed_values, size_t *num_tgt_per_fixed_value, size_t *num_fixed_tgt, size_t num_src_fields, size_t *num_links_per_src_field, size_t *num_links)
static void copy_remote_points_no_alloc(struct remote_point *points_to, struct remote_point *points_from, size_t count, struct remote_point_info **point_info_buffer_)
#define WEIGHT_TOL
static struct remote_points * copy_remote_points(struct remote_point *points, size_t count)
static int compare_remote_point(const void *a, const void *b)
static MPI_Datatype get_direct_mf_stencil_mpi_datatype(MPI_Comm comm)
struct yac_interpolation * yac_interp_weights_get_interpolation_f2c(struct yac_interp_weights *weights, int reorder, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand)
MPI_Comm yac_interp_weights_get_comm(struct yac_interp_weights *weights)
static MPI_Datatype get_wsum_mf_weight_mpi_datatype(MPI_Comm comm)
static struct interp_weight_stencil copy_interp_weight_stencil(struct interp_weight_stencil *stencil, struct remote_point point)
void yac_interp_weights_add_direct(struct yac_interp_weights *weights, struct remote_points *tgts, struct remote_point *srcs)
void yac_interp_weights_add_sum(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_tgt, struct remote_point *srcs)
static void unpack_stencil_direct(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_interp_weight_stencil_wsum_mf_tgt_orig_pos(const void *a, const void *b)
struct yac_interp_weights * yac_interp_weights_new(MPI_Comm comm, enum yac_location tgt_location, enum yac_location *src_locations, size_t num_src_fields)
static void unpack_stencil_sum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data, int *stencil_owner, size_t *reorder_idx, size_t num_owners)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils_src(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data)
static int compare_stencils_direct(const void *a, const void *b)
static size_t stencil_get_num_weights_per_tgt(struct interp_weight_stencil *stencils, size_t stencil_count, MPI_Comm comm)
static void stencil_get_tgt_address(struct interp_weight_stencil *stencils, size_t stencil_count, int *tgt_address)
static void yac_interp_weights_redist_w_sum_mf(MPI_Comm comm, enum yac_location tgt_location, struct interp_weight_stencils_wsum_mf *wsum_mf_stencils_data, struct yac_interpolation *interp, enum yac_interp_weights_reorder_type reorder, enum yac_interp_weight_stencil_type stencil_type)
static int get_stencil_pack_size_fixed(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct remote_point copy_remote_point(struct remote_point point)
static void stencil_get_link_data(struct interp_weight_stencil *stencils, size_t stencil_count, size_t *num_links_per_src_field, size_t num_src_fields, int *src_address, int *tgt_address, double *weight)
size_t yac_interp_weights_get_interp_count(struct yac_interp_weights *weights)
static Xt_redist generate_direct_redist(uint64_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct *tgt_stencils, size_t *recvcounts, MPI_Comm comm)
static struct interp_weight_stencil stencils_merge_wsum(struct interp_weight_stencil **stencils, double *w, size_t num_stencils)
static void pack_stencil_sum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencil_direct_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencils_wsum_mf(struct interp_weight_stencil_wsum_mf *wsum_stencils, size_t count, size_t *pack_order, void **pack_data, int *pack_sizes, int *weight_counts, MPI_Comm comm)
static int compare_interp_weight_stencil_wsum_mf_src_orig_pos(const void *a, const void *b)
static struct interp_weight_stencil * exchange_stencils(MPI_Comm comm, struct interp_weight_stencil *stencils, size_t *stencil_indices, size_t *stencil_sendcounts, size_t *stencil_recvcounts)
static void unpack_stencil_fixed(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_interp_weights_redist_fixed(MPI_Comm comm, uint64_t count, struct interp_weight_stencil *fixed_stencils, struct yac_interpolation *interp)
static Xt_redist * generate_direct_mf_redists(uint64_t *src_orig_pos, size_t *sendcounts, struct interp_weight_stencil_direct_mf *tgt_stencils, size_t *recvcounts, size_t num_src_fields, MPI_Comm comm)
static int compare_double(void const *a, void const *b)
static void stencil_xscan_offsets(size_t num_fixed_values, size_t *num_tgt_per_fixed_value, size_t num_src_fields, size_t *num_links_per_src_field, size_t *fixed_offsets, size_t *link_offsets, MPI_Comm comm)
static void yac_interp_weights_redist_direct(MPI_Comm comm, uint64_t count, struct interp_weight_stencil *direct_stencils, struct yac_interpolation *interp)
static void xt_redist_msg_free(struct Xt_redist_msg *msgs, size_t count, MPI_Comm comm)
void yac_interp_weights_add_direct_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *src_field_indices, struct remote_point **srcs_per_field, size_t num_src_fields)
static void unpack_stencils(struct interp_weight_stencil *stencils, size_t count, void *packed_data, size_t packed_data_size, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct remote_point_info select_src(struct remote_point_infos src)
static struct interp_weight_stencils_wsum_mf * generate_w_sum_mf_stencils(struct interp_weight_stencil *stencils, size_t count, enum yac_interp_weight_stencil_type stencil_type)
static int compute_owner(int *ranks, size_t count)
static void stencil_get_fixed_values(struct interp_weight_stencil *stencils, size_t stencil_count, double **fixed_values, size_t *num_fixed_values, MPI_Comm comm)
yac_interp_weights_reorder_type
@ YAC_MAPPING_ON_TGT
weights will be applied at target processes
@ YAC_MAPPING_ON_SRC
weights will be appied at source processes
void yac_interpolation_add_sum_at_src(struct yac_interpolation *interp, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist)
void yac_interpolation_add_weight_sum_mvp_at_tgt(struct yac_interpolation *interp, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields)
struct yac_interpolation * yac_interpolation_new(size_t collection_size, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interpolation_add_direct_mf(struct yac_interpolation *interp, Xt_redist *redists, size_t num_src_fields)
void yac_interpolation_add_sum_at_tgt(struct yac_interpolation *interp, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields)
void yac_interpolation_add_fixed(struct yac_interpolation *interp, double value, size_t count, size_t *pos)
void yac_interpolation_add_direct(struct yac_interpolation *interp, Xt_redist redist)
void yac_interpolation_add_weight_sum_mvp_at_src(struct yac_interpolation *interp, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist)
void yac_get_io_ranks(MPI_Comm comm, int *local_is_io_, int **io_ranks_, int *num_io_ranks_)
Definition io_utils.c:303
void yac_nc_create(const char *path, int cmode, int *ncidp)
Definition io_utils.c:361
void yac_nc_inq_varid(int ncid, char const *name, int *varidp)
Definition io_utils.c:403
void yac_nc_open(const char *path, int omode, int *ncidp)
Definition io_utils.c:344
#define YAC_HANDLE_ERROR(exp)
Definition io_utils.h:30
char const * yac_loc2str(enum yac_location location)
Definition location.c:32
yac_location
Definition location.h:12
#define YAC_MAX_LOC_STR_LEN
Definition location.h:10
Definition __init__.py:1
#define xrealloc(ptr, size)
Definition ppm_xfuncs.h:67
#define xmalloc(size)
Definition ppm_xfuncs.h:66
struct remote_point_info src
struct remote_point_info src
struct interp_weight_stencil_wsum_mf_weight * data
union interp_weight_stencil::@35 data
struct interp_weight_stencil::@35::@39 weight_sum
struct remote_points * srcs
struct interp_weight_stencil::@35::@42 weight_sum_mf
struct interp_weight_stencil::@35::@41 sum_mf
struct remote_point src
struct interp_weight_stencil::@35::@36 fixed
enum yac_interp_weight_stencil_type type
struct remote_point tgt
struct interp_weight_stencil::@35::@38 sum
struct interp_weight_stencil::@35::@40 direct_mf
struct interp_weight_stencil::@35::@37 direct
struct interp_weight_stencils_wsum_mf stencils
struct interp_weight_stencil_wsum_mf_weight buffer[]
struct interp_weight_stencil_wsum_mf * data
struct remote_point_info data
struct remote_point_info single
struct remote_point_info * multi
union remote_point_infos::@1 data
struct remote_point_infos data
struct remote_point_info buffer[]
struct remote_point * data
struct interp_weight_stencil * stencils
enum yac_location tgt_location
enum yac_location * src_locations
double frac_mask_fallback_value
#define MIN(a, b)
#define MAX(a, b)
void yac_quicksort_index_int_double(int *a, size_t n, double *idx)
void yac_quicksort_index_int_size_t(int *a, size_t n, size_t *idx)
void yac_quicksort_index_int_size_t_size_t(int *a, size_t n, size_t *b, size_t *c)
static void yac_remove_duplicates_double(double *array, size_t *n)
Definition utils_core.h:81
void yac_quicksort_index_size_t_size_t(size_t *a, size_t n, size_t *idx)
void yac_quicksort_index(int *a, size_t n, int *idx)
#define SUM
static struct user_input_data_points ** points
Definition yac.c:120
#define YAC_ASSERT_F(exp, format,...)
Definition yac_assert.h:18
#define die(msg)
Definition yac_assert.h:12
#define YAC_ASSERT(exp, msg)
Definition yac_assert.h:15
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
Definition yac_mpi.c:569
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
Definition yac_mpi.c:624
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
Definition yac_mpi.c:593
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
Definition yac_mpi.c:548
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm)
Definition yac_mpi.c:129
#define yac_mpi_call(call, comm)
#define YAC_MPI_SIZE_T
Xt_int yac_int
Definition yac_types.h:15
#define yac_int_dt
Definition yac_types.h:16