YetAnotherCoupler 3.5.2
Loading...
Searching...
No Matches
interp_weights.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifdef HAVE_CONFIG_H
6// Get the definition of the 'restrict' keyword.
7#include "config.h"
8#endif
9
10#include <string.h>
11
12#define WEIGHT_TOL (1e-9)
13
14#include "yac_mpi_internal.h"
16#include "ensure_array_size.h"
17#include "io_utils.h"
18#include "utils_core.h"
19#include "interp_method_file.h"
20
21#define YAC_YAXT_EXCHANGER_STR "YAC_YAXT_EXCHANGER"
22
33
35
37 union {
38 struct {
39 double value;
41 struct {
42 struct remote_point src; // src id
44 struct {
45 struct remote_points * srcs; // src ids
46 } sum;
47 struct {
48 struct remote_points * srcs; // src ids
49 double * weights;
51 struct {
52 struct remote_point src; // src id
53 size_t field_idx;
55 struct {
56 struct remote_points * srcs; // src ids
57 size_t * field_indices;
59 struct {
60 struct remote_points * srcs; // src ids
61 double * weights;
62 size_t * field_indices;
65 struct remote_point tgt; //tgt id
66};
67
74
80
85
90
92 double value;
93 uint64_t orig_pos;
94};
95
100
106
111
123
129
131 MPI_Comm comm, enum yac_location tgt_location,
133
134 struct yac_interp_weights * weights = xmalloc(1 * sizeof(*weights));
135
136 weights->comm = comm;
137 weights->tgt_location = tgt_location;
138 weights->src_locations = xmalloc(num_src_fields * sizeof(*src_locations));
139 memcpy(
141 num_src_fields * sizeof(*src_locations));
143 weights->stencils = NULL;
144 weights->stencils_array_size = 0;
145 weights->stencils_size = 0;
146
147 return weights;
148}
149
150static inline struct remote_point copy_remote_point(
151 struct remote_point point) {
152
153 int count = point.data.count;
154 if (count > 1) {
155 struct remote_point_info * point_infos =
156 xmalloc((size_t)count * sizeof(*point_infos));
157 memcpy(point_infos, point.data.data.multi,
158 (size_t)count * sizeof(*point_infos));
159 point.data.data.multi = point_infos;
160 }
161 return point;
162}
163
165 struct remote_point * points_to, struct remote_point * points_from,
166 size_t count, struct remote_point_info ** point_info_buffer_) {
167
168 struct remote_point_info * point_info_buffer = *point_info_buffer_;
169
170 for (size_t i = 0; i < count; ++i) {
171 int curr_count = points_from[i].data.count;
172 points_to[i] = points_from[i];
173 if (curr_count > 1) {
174 points_to[i].data.data.multi = point_info_buffer;
175 memcpy(
176 point_info_buffer, points_from[i].data.data.multi,
177 (size_t)curr_count * sizeof(*point_info_buffer));
178 point_info_buffer += curr_count;
179 }
180 }
181 *point_info_buffer_ = point_info_buffer;
182}
183
184static inline struct remote_points * copy_remote_points(
185 struct remote_point * points, size_t count) {
186
187 size_t point_info_buffer_size = 0;
188 for (size_t i = 0; i < count; ++i)
189 if (points[i].data.count > 1)
190 point_info_buffer_size += (size_t)(points[i].data.count);
191
192 struct remote_points * points_copy =
193 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
194 sizeof(*points_copy));
195 points_copy->data = xmalloc(count * sizeof(*(points_copy->data)));
196 points_copy->count = count;
197 struct remote_point_info * point_info_buffer = &(points_copy->buffer[0]);
198
200 points_copy->data, points, count, &point_info_buffer);
201
202 return points_copy;
203}
204
206 struct remote_point ** points, size_t * counts, size_t num_fields) {
207
208 size_t point_info_buffer_size = 0;
209 size_t total_count = 0;
210 for (size_t i = 0; i < num_fields; ++i) {
211 total_count += counts[i];
212 for (size_t j = 0; j < counts[i]; ++j) {
213 if (points[i][j].data.count > 1)
214 point_info_buffer_size += (size_t)(points[i][j].data.count);
215 }
216 }
217
218 struct remote_points * points_copy =
219 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
220 sizeof(*points_copy));
221 points_copy->data = xmalloc(total_count * sizeof(*(points_copy->data)));
222 points_copy->count = total_count;
223 struct remote_point_info * point_info_buffer = &(points_copy->buffer[0]);
224
225 for (size_t i = 0, k = 0; i < num_fields; ++i) {
226 for (size_t j = 0; j < counts[i]; ++j, ++k) {
227 int curr_count = points[i][j].data.count;
228 points_copy->data[k] = points[i][j];
229 if (curr_count > 1) {
230 points_copy->data[k].data.data.multi = point_info_buffer;
231 memcpy(
232 point_info_buffer, points[i][j].data.data.multi,
233 (size_t)curr_count * sizeof(*point_info_buffer));
234 point_info_buffer += curr_count;
235 }
236 }
237 }
238 return points_copy;
239}
240
242 struct yac_interp_weights * weights, struct remote_points * tgts,
243 double fixed_value) {
244
245 struct interp_weight_stencil * stencils = weights->stencils;
246 size_t stencils_array_size = weights->stencils_array_size;
247 size_t stencils_size = weights->stencils_size;
248
249 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
250
251 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
252
253 stencils[stencils_size].type = FIXED;
254 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
255 stencils[stencils_size].data.fixed.value = fixed_value;
256 }
257
258 weights->stencils = stencils;
259 weights->stencils_array_size = stencils_array_size;
260 weights->stencils_size = stencils_size;
261}
262
264 struct yac_interp_weights * weights, struct remote_points * tgts,
265 size_t * num_src_per_tgt, struct remote_point * srcs, double * w) {
266
267 if (tgts->count == 0) return;
268
269 // determine whether there are zero-weights
270 int pack_flag = 0;
271 for (size_t i = 0, k = 0; (i < tgts->count) && !pack_flag; ++i)
272 for (size_t j = 0; (j < num_src_per_tgt[i]) && !pack_flag; ++j, ++k)
273 pack_flag = (fabs(w[k]) <= WEIGHT_TOL);
274
275 if (pack_flag) {
276
277 for (size_t i = 0, k = 0, l = 0;
278 i < tgts->count; i++) {
279
280 size_t curr_count = num_src_per_tgt[i];
281
282 for (size_t j = 0; j < curr_count; j++, k++) {
283
284 if (fabs(w[k]) < WEIGHT_TOL) {
285 num_src_per_tgt[i]--;
286 } else {
287 if (l != k) {
288 srcs[l] = srcs[k];
289 w[l] = w[k];
290 }
291 ++l;
292 }
293 }
294
295 // if all weights were zero
296 if ((curr_count != 0) && (num_src_per_tgt[i] == 0)) {
297
298 if (l != k) {
299 srcs[l] = srcs[k - curr_count];
300 w[l] = 0.0;
301 }
302 num_src_per_tgt[i] = 1;
303 ++l;
304 }
305 }
306 }
307
308 // check whether all weights are 1.0 and whether the number of source
309 // points per target is one for all targets
310 int flag_weight_one = 1;
311 int flag_count_one = 1;
312 for (size_t i = 0, j = 0;
313 (i < tgts->count) && (flag_weight_one || flag_count_one); ++i) {
314
315 size_t curr_count = num_src_per_tgt[i];
316 flag_count_one &= curr_count == 1;
317
318 for (size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
319 flag_weight_one &= fabs(w[j] - 1.0) < WEIGHT_TOL;
320 }
321
322 // if all weights are 1.0 -> use more optimised weight type
323 if (flag_weight_one) {
324
325 // if the number of source points for all target points is one
326 if (flag_count_one)
328 else
329 yac_interp_weights_add_sum(weights, tgts, num_src_per_tgt, srcs);
330
331 } else {
332
333 struct interp_weight_stencil * stencils = weights->stencils;
334 size_t stencils_array_size = weights->stencils_array_size;
335 size_t stencils_size = weights->stencils_size;
336
337 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
338
339 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
340
341 size_t curr_num_src = num_src_per_tgt[i];
342
343 // remove target for which no weights were provided
344 if (curr_num_src == 0) {
345 --stencils_size;
346 continue;
347 }
348
349 double * curr_weights =
350 xmalloc(curr_num_src * sizeof(*curr_weights));
351
352 stencils[stencils_size].type = WEIGHT_SUM;
353 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
354 stencils[stencils_size].data.weight_sum.srcs =
355 copy_remote_points(srcs, curr_num_src);
356 stencils[stencils_size].data.weight_sum.weights = curr_weights;
357 memcpy(curr_weights, w, curr_num_src * sizeof(*curr_weights));
358
359 srcs += curr_num_src;
360 w += curr_num_src;
361 }
362
363 weights->stencils = stencils;
364 weights->stencils_array_size = stencils_array_size;
365 weights->stencils_size = stencils_size;
366 }
367}
368
370 struct yac_interp_weights * weights, struct remote_points * tgts,
371 size_t * num_src_per_tgt, struct remote_point * srcs) {
372
373 if (tgts->count == 0) return;
374
375 // check whether the number of source points per target is one
376 // for all targets
377 int flag_count_one = 1;
378 for (size_t i = 0; i < tgts->count; ++i) {
379 if (num_src_per_tgt[i] != 1) {
380 flag_count_one = 0;
381 break;
382 }
383 }
384
385 if (flag_count_one) {
386
388
389 } else {
390
391 struct interp_weight_stencil * stencils = weights->stencils;
392 size_t stencils_array_size = weights->stencils_array_size;
393 size_t stencils_size = weights->stencils_size;
394
395 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
396
397 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
398
399 size_t curr_num_src = num_src_per_tgt[i];
400
401 stencils[stencils_size].type = SUM;
402 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
403 stencils[stencils_size].data.weight_sum.srcs =
404 copy_remote_points(srcs, curr_num_src);
405 stencils[stencils_size].data.weight_sum.weights = NULL;
406
407 srcs += curr_num_src;
408 }
409
410 weights->stencils = stencils;
411 weights->stencils_array_size = stencils_array_size;
412 weights->stencils_size = stencils_size;
413 }
414}
415
417 struct yac_interp_weights * weights, struct remote_points * tgts,
418 struct remote_point * srcs) {
419
420 if (tgts->count == 0) return;
421
422 struct interp_weight_stencil * stencils = weights->stencils;
423 size_t stencils_array_size = weights->stencils_array_size;
424 size_t stencils_size = weights->stencils_size;
425
426 ENSURE_ARRAY_SIZE(stencils, stencils_array_size, stencils_size + tgts->count);
427
428 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
429
430 stencils[stencils_size].type = DIRECT;
431 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
432 stencils[stencils_size].data.direct.src = copy_remote_point(srcs[i]);
433 }
434
435 weights->stencils = stencils;
436 weights->stencils_array_size = stencils_array_size;
437 weights->stencils_size = stencils_size;
438}
439
441 struct yac_interp_weights * weights, struct remote_points * tgts,
442 size_t * src_field_indices, struct remote_point ** srcs_per_field,
443 size_t num_src_fields) {
444
445 if (tgts->count == 0) return;
446
447 if (num_src_fields == 1) {
448 yac_interp_weights_add_direct(weights, tgts, srcs_per_field[0]);
449 return;
450 }
451
452 struct interp_weight_stencil * stencils = weights->stencils;
453 size_t stencils_array_size = weights->stencils_array_size;
454 size_t stencils_size = weights->stencils_size;
455
457 stencils, stencils_array_size, stencils_size + tgts->count);
458 stencils += stencils_size;
459
460 size_t srcs_offsets[num_src_fields];
461 memset(srcs_offsets, 0, num_src_fields * sizeof(srcs_offsets[0]));
462
463 for (size_t i = 0; i < tgts->count; ++i) {
464
465 size_t src_field_idx = src_field_indices[i];
466 stencils[i].type = DIRECT_MF;
467 stencils[i].tgt = copy_remote_point(tgts->data[i]);
468 stencils[i].data.direct_mf.src =
470 srcs_per_field[src_field_idx][srcs_offsets[src_field_idx]++]);
471 stencils[i].data.direct_mf.field_idx = src_field_idx;
472 }
473
474 weights->stencils = stencils;
475 weights->stencils_array_size = stencils_array_size;
476 weights->stencils_size += tgts->count;
477}
478
480 struct yac_interp_weights * weights, struct remote_points * tgts,
481 size_t * num_src_per_field_per_tgt, struct remote_point ** srcs_per_field,
482 size_t num_src_fields) {
483
484 if (tgts->count == 0) return;
485
486 if (num_src_fields == 1) {
488 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0]);
489 return;
490 }
491
492 // check whether the number of source points per target is one
493 // for all targets
494 int flag_count_one = 1;
495 for (size_t i = 0, k = 0; i < tgts->count; ++i) {
496 size_t count = 0;
497 for (size_t j = 0; j < num_src_fields; ++j, ++k)
498 count += num_src_per_field_per_tgt[k];
499 if (count != 1) {
500 flag_count_one = 0;
501 break;
502 }
503 }
504
505 if (flag_count_one) {
506
507 size_t * src_field_indices =
508 xmalloc(tgts->count * sizeof(*src_field_indices));
509
510 for (size_t i = 0, k = 0; i < tgts->count; ++i)
511 for (size_t j = 0; j < num_src_fields; ++j, ++k)
512 if (num_src_per_field_per_tgt[k])
513 src_field_indices[i] = j;
514
516 weights, tgts, src_field_indices, srcs_per_field, num_src_fields);
517
518 free(src_field_indices);
519
520 } else {
521 struct remote_point * curr_srcs_per_field[num_src_fields];
522 memcpy(curr_srcs_per_field, srcs_per_field,
523 num_src_fields * sizeof(*srcs_per_field));
524
525 struct interp_weight_stencil * stencils = weights->stencils;
526 size_t stencils_array_size = weights->stencils_array_size;
527 size_t stencils_size = weights->stencils_size;
528
530 stencils, stencils_array_size, stencils_size + tgts->count);
531
532 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
533
534 size_t * curr_num_src_per_src_field =
535 num_src_per_field_per_tgt + i * num_src_fields;
536 size_t curr_num_src = 0;
537 for (size_t j = 0; j < num_src_fields; ++j)
538 curr_num_src += curr_num_src_per_src_field[j];
539
540 stencils[stencils_size].type = SUM_MF;
541 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
542 stencils[stencils_size].data.sum_mf.field_indices =
543 xmalloc(
544 curr_num_src *
545 sizeof(*(stencils[stencils_size].data.sum_mf.field_indices)));
546 for (size_t j = 0, l = 0; j < num_src_fields; ++j) {
547 size_t curr_num_src = curr_num_src_per_src_field[j];
548 for (size_t k = 0; k < curr_num_src; ++k, ++l) {
549 stencils[stencils_size].data.sum_mf.field_indices[l] = j;
550 }
551 }
552 stencils[stencils_size].data.sum_mf.srcs =
554 curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
555
556 for (size_t j = 0; j < num_src_fields; ++j)
557 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
558 }
559
560 weights->stencils = stencils;
561 weights->stencils_array_size = stencils_array_size;
562 weights->stencils_size = stencils_size;
563 }
564}
565
567 struct yac_interp_weights * weights, struct remote_points * tgts,
568 size_t * num_src_per_field_per_tgt, struct remote_point ** srcs_per_field,
569 double * w, size_t num_src_fields) {
570
571 if (tgts->count == 0) return;
572
573 if (num_src_fields == 1) {
575 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0], w);
576 return;
577 }
578
579 // check whether all weights are 1.0 and whether the number of source
580 // points per target is one for all targets
581 int flag_weight_one = 1;
582 for (size_t i = 0, j = 0;
583 (i < tgts->count) && flag_weight_one; ++i) {
584
585 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
586 ++src_field_idx) {
587
588 size_t curr_count =
589 num_src_per_field_per_tgt[i * num_src_fields + src_field_idx];
590
591 for (size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
592 flag_weight_one &= fabs(w[j] - 1.0) < 1e-12;
593 }
594 }
595
596 // if all weights are 1.0 -> use more optimised weight type
597 if (flag_weight_one) {
598
600 weights, tgts, num_src_per_field_per_tgt, srcs_per_field, num_src_fields);
601
602 } else {
603
604 struct remote_point * curr_srcs_per_field[num_src_fields];
605 memcpy(curr_srcs_per_field, srcs_per_field,
606 num_src_fields * sizeof(*srcs_per_field));
607
608 struct interp_weight_stencil * stencils = weights->stencils;
609 size_t stencils_array_size = weights->stencils_array_size;
610 size_t stencils_size = weights->stencils_size;
611
613 stencils, stencils_array_size, stencils_size + tgts->count);
614
615 for (size_t i = 0; i < tgts->count; ++i, ++stencils_size) {
616
617 size_t * curr_num_src_per_src_field =
618 num_src_per_field_per_tgt + i * num_src_fields;
619 size_t curr_num_weights = 0;
620 for (size_t j = 0; j < num_src_fields; ++j)
621 curr_num_weights += curr_num_src_per_src_field[j];
622 double * curr_weights =
623 xmalloc(curr_num_weights * sizeof(*curr_weights));
624 size_t * field_indices =
625 xmalloc(curr_num_weights * sizeof(*field_indices));
626
627 stencils[stencils_size].type = WEIGHT_SUM_MF;
628 stencils[stencils_size].tgt = copy_remote_point(tgts->data[i]);
629 stencils[stencils_size].data.weight_sum_mf.field_indices = field_indices;
630 for (size_t j = 0, l = 0; j < num_src_fields; ++j) {
631 size_t curr_num_src = curr_num_src_per_src_field[j];
632 for (size_t k = 0; k < curr_num_src; ++k, ++l) field_indices[l] = j;
633 }
634 stencils[stencils_size].data.weight_sum_mf.srcs =
635 copy_remote_points_mf(curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
636 stencils[stencils_size].data.weight_sum_mf.weights = curr_weights;
637 memcpy(curr_weights, w, curr_num_weights * sizeof(*curr_weights));
638
639 for (size_t j = 0; j < num_src_fields; ++j)
640 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
641 w += curr_num_weights;
642 }
643
644 weights->stencils = stencils;
645 weights->stencils_array_size = stencils_array_size;
646 weights->stencils_size = stencils_size;
647 }
648}
649
650static int compare_stencils_fixed(const void * a, const void * b) {
651
652 int ret = (((struct interp_weight_stencil_fixed *)a)->value >
653 ((struct interp_weight_stencil_fixed *)b)->value) -
654 (((struct interp_weight_stencil_fixed *)a)->value <
655 ((struct interp_weight_stencil_fixed *)b)->value);
656
657 if (ret) return ret;
658
659 return (((struct interp_weight_stencil_fixed *)a)->orig_pos >
660 ((struct interp_weight_stencil_fixed *)b)->orig_pos) -
661 (((struct interp_weight_stencil_fixed *)a)->orig_pos <
662 ((struct interp_weight_stencil_fixed *)b)->orig_pos);
663}
664
665static MPI_Datatype get_fixed_stencil_mpi_datatype(MPI_Comm comm) {
666
667 struct interp_weight_stencil_fixed dummy;
668 MPI_Datatype fixed_stencil_dt;
669 int array_of_blocklengths[] = {1, 1};
670 const MPI_Aint array_of_displacements[] =
671 {(MPI_Aint)(intptr_t)(const void *)&(dummy.value) -
672 (MPI_Aint)(intptr_t)(const void *)&dummy,
673 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
674 (MPI_Aint)(intptr_t)(const void *)&dummy};
675 const MPI_Datatype array_of_types[] = {MPI_DOUBLE, MPI_UINT64_T};
677 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
678 array_of_types, &fixed_stencil_dt), comm);
679 return yac_create_resized(fixed_stencil_dt, sizeof(dummy), comm);
680}
681
683 MPI_Comm comm, uint64_t count,
684 struct interp_weight_stencil * fixed_stencils,
685 struct yac_interpolation * interp) {
686
687 int comm_size;
688 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
689
690 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
692 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
693
694 for (size_t i = 0; i < count; ++i) {
695 int curr_count = fixed_stencils[i].tgt.data.count;
696 struct remote_point_info * curr_point_infos =
697 (curr_count == 1)?
698 (&(fixed_stencils[i].tgt.data.data.single)):
699 (fixed_stencils[i].tgt.data.data.multi);
700 for (int j = 0; j < curr_count; ++j)
701 sendcounts[curr_point_infos[j].rank]++;
702 }
703
705 1, sendcounts, recvcounts, sdispls, rdispls, comm);
706
707 size_t send_buffer_size =
708 sdispls[comm_size] + sendcounts[comm_size - 1];
709 size_t recv_buffer_size =
710 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
711
712 struct interp_weight_stencil_fixed * buffer =
713 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*buffer));
714 struct interp_weight_stencil_fixed * send_buffer = buffer + recv_buffer_size;
715 struct interp_weight_stencil_fixed * recv_buffer = buffer;
716
717 // pack fixed stencils
718 for (size_t i = 0; i < count; ++i) {
719 int curr_count = fixed_stencils[i].tgt.data.count;
720 struct remote_point_info * curr_point_infos =
721 (curr_count == 1)?
722 (&(fixed_stencils[i].tgt.data.data.single)):
723 (fixed_stencils[i].tgt.data.data.multi);
724 double value = fixed_stencils[i].data.fixed.value;
725 for (int j = 0; j < curr_count; ++j) {
726 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
727 send_buffer[pos].value = value;
728 send_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
729 }
730 }
731
732 // create MPI Datatype for exchanging fixed stencils
733 MPI_Datatype stencil_fixed_dt = get_fixed_stencil_mpi_datatype(comm);
734
735 // redistribute fixed stencils
737 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
738 sizeof(*send_buffer), stencil_fixed_dt, comm);
739
740 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
741
742 yac_mpi_call(MPI_Type_free(&stencil_fixed_dt), comm);
743
744 if (recv_buffer_size == 0) {
745 free(buffer);
746 return;
747 }
748
749 // sort stencils first by fixed value and second by orig_pos
750 qsort(recv_buffer, recv_buffer_size, sizeof(*recv_buffer),
752
753 size_t * tgt_pos = xmalloc(recv_buffer_size * sizeof(*tgt_pos));
754 for (size_t i = 0; i < recv_buffer_size; ++i)
755 tgt_pos[i] = (size_t)(recv_buffer[i].orig_pos);
756
757 size_t offset = 0, i = 0;
758 while (offset < recv_buffer_size) {
759 double fixed_value = recv_buffer[i].value;
760 while ((i < recv_buffer_size) && (fixed_value == recv_buffer[i].value)) ++i;
761 size_t curr_count = i - offset;
763 interp, fixed_value, curr_count, tgt_pos + offset);
764 offset = i;
765 }
766
767 free(buffer);
768 free(tgt_pos);
769}
770
771static inline struct remote_point_info select_src(
772 struct remote_point_infos src) {
773
774 if (src.count == 1) return src.data.single;
775
776 int min_rank = INT_MAX;
777 size_t min_rank_idx = SIZE_MAX;
778 for (int i = 0; i < src.count; ++i) {
779 if (src.data.multi[i].rank < min_rank) {
780 min_rank = src.data.multi[i].rank;
781 min_rank_idx = i;
782 }
783 }
784
785 return src.data.multi[min_rank_idx];
786}
787
789 struct Xt_redist_msg * msgs, size_t count, MPI_Comm comm) {
790 for (size_t i = 0; i < count; ++i) {
791 MPI_Datatype * dt = &(msgs[i].datatype);
792 if (*dt != MPI_DATATYPE_NULL) yac_mpi_call(MPI_Type_free(dt), comm);
793 }
794 free(msgs);
795}
796
797static Xt_redist generate_direct_redist(
798 uint64_t * src_orig_poses, size_t * sendcounts,
799 struct interp_weight_stencil_direct * tgt_stencils,
800 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
801
802 int comm_size;
803 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
804
805 size_t nsend = 0, nrecv = 0;
806 size_t max_buffer_size = 0;
807 for (int i = 0; i < comm_size; ++i) {
808 nsend += sendcounts[i] > 0;
809 nrecv += recvcounts[i] > 0;
810 if (max_buffer_size < sendcounts[i]) max_buffer_size = sendcounts[i];
811 if (max_buffer_size < recvcounts[i]) max_buffer_size = recvcounts[i];
812 }
813
814 size_t total_num_msg = nsend + nrecv;
815
816 struct Xt_redist_msg * msgs_buffer =
817 xmalloc(total_num_msg * sizeof(*msgs_buffer));
818 struct Xt_redist_msg * send_msgs = msgs_buffer;
819 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
820
821 int * pos_buffer = xmalloc((size_t)max_buffer_size * sizeof(*pos_buffer));
822
823 nsend = 0;
824 nrecv = 0;
825 for (int i = 0; i < comm_size; ++i) {
826 if (recvcounts[i] > 0) {
827 for (size_t j = 0; j < recvcounts[i]; ++j)
828 pos_buffer[j] = (int)tgt_stencils[j].orig_pos;
829 tgt_stencils += recvcounts[i];
830 recv_msgs[nrecv].rank = i;
831 recv_msgs[nrecv].datatype =
832 xt_mpi_generate_datatype(pos_buffer, recvcounts[i], MPI_DOUBLE, comm);
833 nrecv++;
834 }
835 if (sendcounts[i] > 0) {
836 for (size_t j = 0; j < sendcounts[i]; ++j)
837 pos_buffer[j] = (int)src_orig_poses[j];
838 src_orig_poses += sendcounts[i];
839 send_msgs[nsend].rank = i;
840 send_msgs[nsend].datatype =
841 xt_mpi_generate_datatype(pos_buffer, sendcounts[i], MPI_DOUBLE, comm);
842 nsend++;
843 }
844 }
845
846 free(pos_buffer);
847
848 Xt_redist redist;
849 MPI_Comm split_comm;
850
851 if (total_num_msg > 0) {
852
853 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
854
855 int * rank_buffer =
856 xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
857 int * orig_ranks = rank_buffer;
858 int * split_ranks = rank_buffer + total_num_msg;
859
860 for (size_t i = 0; i < total_num_msg; ++i)
861 orig_ranks[i] = msgs_buffer[i].rank;
862
863 MPI_Group orig_group, split_group;
864 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
865 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
866
868 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
869 split_group, split_ranks), split_comm);
870
871 for (size_t i = 0; i < total_num_msg; ++i)
872 msgs_buffer[i].rank = split_ranks[i];
873
874 free(rank_buffer);
875
876 yac_mpi_call(MPI_Group_free(&split_group), comm);
877 yac_mpi_call(MPI_Group_free(&orig_group), comm);
878
879 // generate redist
880 redist =
881 xt_redist_single_array_base_custom_new(
882 nsend, nrecv, send_msgs, recv_msgs, split_comm, redist_config);
883
884 } else {
885 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
886 redist = NULL;
887 }
888
889 yac_mpi_call(MPI_Comm_free(&split_comm), comm);
890 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
891
892 return redist;
893}
894
895static Xt_redist * generate_direct_mf_redists(
896 uint64_t * src_orig_pos, size_t * sendcounts,
897 struct interp_weight_stencil_direct_mf * tgt_stencils,
898 size_t * recvcounts, size_t num_src_fields, MPI_Comm comm,
899 Xt_config redist_config) {
900
901 int comm_size;
902 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
903
904 size_t nsends[num_src_fields], nrecvs[num_src_fields];
905 size_t max_buffer_size = 0;
906 memset(nsends, 0, num_src_fields * sizeof(nsends[0]));
907 memset(nrecvs, 0, num_src_fields * sizeof(nrecvs[0]));
908 for (int i = 0; i < comm_size; ++i) {
909 for (size_t j = 0; j < num_src_fields; ++j) {
910 size_t idx = (size_t)i * num_src_fields + j;
911 if (sendcounts[idx] > 0) nsends[j]++;
912 if (recvcounts[idx] > 0) nrecvs[j]++;
913 if (max_buffer_size < sendcounts[idx]) max_buffer_size = sendcounts[idx];
914 if (max_buffer_size < recvcounts[idx]) max_buffer_size = recvcounts[idx];
915 }
916 }
917
918 size_t nsend = 0, nrecv = 0;
919 size_t send_offsets[num_src_fields];
920 size_t recv_offsets[num_src_fields];
921 for (size_t i = 0; i < num_src_fields; ++i) {
922 send_offsets[i] = nsend;
923 recv_offsets[i] = nrecv;
924 nsend += nsends[i];
925 nrecv += nrecvs[i];
926 }
927
928 size_t total_num_msg = nsend + nrecv;
929
930 struct Xt_redist_msg * msgs_buffer =
931 xmalloc(total_num_msg * sizeof(*msgs_buffer));
932 struct Xt_redist_msg * send_msgs = msgs_buffer;
933 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
934
935 int * pos_buffer = xmalloc(max_buffer_size * sizeof(*pos_buffer));
936
937 for (int i = 0; i < comm_size; ++i) {
938 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
939 ++src_field_idx) {
940 size_t idx = (size_t)i * num_src_fields + src_field_idx;
941 if (recvcounts[idx] > 0) {
942 for (size_t j = 0; j < recvcounts[idx]; ++j)
943 pos_buffer[j] = (int)tgt_stencils[j].orig_pos;
944 tgt_stencils += recvcounts[idx];
945 recv_msgs[recv_offsets[src_field_idx]].rank = i;
946 recv_msgs[recv_offsets[src_field_idx]].datatype =
947 xt_mpi_generate_datatype(
948 pos_buffer, recvcounts[idx], MPI_DOUBLE, comm);
949 recv_offsets[src_field_idx]++;
950 }
951 if (sendcounts[idx] > 0) {
952 for (size_t j = 0; j < sendcounts[idx]; ++j)
953 pos_buffer[j] = (int)src_orig_pos[j];
954 src_orig_pos += sendcounts[idx];
955 send_msgs[send_offsets[src_field_idx]].rank = i;
956 send_msgs[send_offsets[src_field_idx]].datatype =
957 xt_mpi_generate_datatype(
958 pos_buffer, sendcounts[idx], MPI_DOUBLE, comm);
959 send_offsets[src_field_idx]++;
960 }
961 }
962 }
963
964 free(pos_buffer);
965
966 Xt_redist * redists;
967 MPI_Comm split_comm;
968
969 if (total_num_msg > 0) {
970
971 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
972
973 int * rank_buffer =
974 xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
975 int * orig_ranks = rank_buffer;
976 int * split_ranks = rank_buffer + total_num_msg;
977
978 for (size_t i = 0; i < total_num_msg; ++i)
979 orig_ranks[i] = msgs_buffer[i].rank;
980
981 MPI_Group orig_group, split_group;
982 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
983 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
984
986 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
987 split_group, split_ranks), split_comm);
988
989 for (size_t i = 0; i < total_num_msg; ++i)
990 msgs_buffer[i].rank = split_ranks[i];
991
992 free(rank_buffer);
993
994 yac_mpi_call(MPI_Group_free(&split_group), comm);
995 yac_mpi_call(MPI_Group_free(&orig_group), comm);
996
997 // generate redists
998 redists = xmalloc(num_src_fields * sizeof(*redists));
999 for (size_t src_field_idx = 0; src_field_idx < num_src_fields;
1000 ++src_field_idx) {
1001 redists[src_field_idx] =
1002 xt_redist_single_array_base_custom_new(
1003 nsends[src_field_idx], nrecvs[src_field_idx],
1004 send_msgs, recv_msgs, split_comm, redist_config);
1005 send_msgs += nsends[src_field_idx];
1006 recv_msgs += nrecvs[src_field_idx];
1007 }
1008
1009 } else {
1010 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
1011 redists = NULL;
1012 }
1013
1014 yac_mpi_call(MPI_Comm_free(&split_comm), comm);
1015 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
1016
1017 return redists;
1018}
1019
1020static MPI_Datatype get_direct_stencil_mpi_datatype(MPI_Comm comm) {
1021
1022 struct interp_weight_stencil_direct dummy;
1023 MPI_Datatype direct_stencil_dt;
1024 int array_of_blocklengths[] = {1, 1};
1025 const MPI_Aint array_of_displacements[] =
1026 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src) -
1027 (MPI_Aint)(intptr_t)(const void *)&dummy,
1028 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
1029 (MPI_Aint)(intptr_t)(const void *)&dummy};
1030 MPI_Datatype array_of_types[] =
1031 {yac_get_remote_point_info_mpi_datatype(comm), MPI_UINT64_T};
1033 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
1034 array_of_types, &direct_stencil_dt), comm);
1035 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1036 return yac_create_resized(direct_stencil_dt, sizeof(dummy), comm);
1037}
1038
1039static int compare_stencils_direct(const void * a, const void * b) {
1040
1041 int ret = ((struct interp_weight_stencil_direct *)a)->src.rank -
1042 ((struct interp_weight_stencil_direct *)b)->src.rank;
1043
1044 if (ret) return ret;
1045
1046 return (((struct interp_weight_stencil_direct *)a)->orig_pos >
1047 ((struct interp_weight_stencil_direct *)b)->orig_pos) -
1048 (((struct interp_weight_stencil_direct *)a)->orig_pos <
1049 ((struct interp_weight_stencil_direct *)b)->orig_pos);
1050}
1051
1053 MPI_Comm comm, uint64_t count,
1054 struct interp_weight_stencil * direct_stencils,
1055 struct yac_interpolation * interp, Xt_config redist_config) {
1056
1057 int comm_size;
1058 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1059
1060 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1062 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1063
1064 for (size_t i = 0; i < count; ++i) {
1065 int curr_count = direct_stencils[i].tgt.data.count;
1066 struct remote_point_info * curr_point_info =
1067 (curr_count == 1)?
1068 (&(direct_stencils[i].tgt.data.data.single)):
1069 (direct_stencils[i].tgt.data.data.multi);
1070 for (int j = 0; j < curr_count; ++j)
1071 sendcounts[curr_point_info[j].rank]++;
1072 }
1073
1075 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1076
1077 size_t send_buffer_size =
1078 sdispls[comm_size] + sendcounts[comm_size - 1];
1079 size_t recv_buffer_size =
1080 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1081 size_t tgt_count = recv_buffer_size;
1082
1083 struct interp_weight_stencil_direct * stencil_buffer =
1084 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*stencil_buffer));
1085 struct interp_weight_stencil_direct * send_stencil_buffer =
1086 stencil_buffer + recv_buffer_size;
1087 struct interp_weight_stencil_direct * recv_stencil_buffer = stencil_buffer;
1088
1089 // pack direct stencils
1090 for (size_t i = 0; i < count; ++i) {
1091 int curr_count = direct_stencils[i].tgt.data.count;
1092 struct remote_point_info * curr_point_infos =
1093 (curr_count == 1)?
1094 (&(direct_stencils[i].tgt.data.data.single)):
1095 (direct_stencils[i].tgt.data.data.multi);
1096 struct remote_point_info src =
1097 select_src(direct_stencils[i].data.direct.src.data);
1098 for (int j = 0; j < curr_count; ++j) {
1099 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
1100 send_stencil_buffer[pos].src = src;
1101 send_stencil_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
1102 }
1103 }
1104
1105 // create MPI Datatype for exchanging direct stencils
1106 MPI_Datatype stencil_direct_dt = get_direct_stencil_mpi_datatype(comm);
1107
1108 // redistribute stencils based on target owners
1110 send_stencil_buffer, sendcounts, sdispls,
1111 recv_stencil_buffer, recvcounts, rdispls,
1112 sizeof(*stencil_buffer), stencil_direct_dt, comm);
1113
1114 yac_mpi_call(MPI_Type_free(&stencil_direct_dt), comm);
1115
1116 // sort stencils based on src rank first and by target orig pos second
1117 qsort(recv_stencil_buffer, tgt_count, sizeof(*recv_stencil_buffer),
1119
1120 memset(sendcounts, 0, (size_t)comm_size * sizeof(*sendcounts));
1121
1122 for (size_t i = 0; i < tgt_count; ++i)
1123 sendcounts[recv_stencil_buffer[i].src.rank]++;
1124
1126 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1127
1128 send_buffer_size = sdispls[comm_size] + sendcounts[comm_size - 1];
1129 recv_buffer_size = rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1130
1131 uint64_t * orig_pos_buffer =
1132 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*orig_pos_buffer));
1133 uint64_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1134 uint64_t * recv_orig_pos_buffer = orig_pos_buffer;
1135
1136 for (size_t i = 0; i < tgt_count; ++i)
1137 send_orig_pos_buffer[sdispls[recv_stencil_buffer[i].src.rank + 1]++] =
1138 recv_stencil_buffer[i].src.orig_pos;
1139
1140 // inform source processes about their requested points
1142 send_orig_pos_buffer, sendcounts, sdispls,
1143 recv_orig_pos_buffer, recvcounts, rdispls,
1144 sizeof(*send_orig_pos_buffer), MPI_UINT64_T, comm);
1145
1146 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1147
1148 // generate redist
1149 Xt_redist redist =
1151 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1152 comm, redist_config);
1153 free(orig_pos_buffer);
1154 free(stencil_buffer);
1155
1156 yac_interpolation_add_direct(interp, redist);
1157
1158 if (redist != NULL) xt_redist_delete(redist);
1159}
1160
1161static MPI_Datatype get_direct_mf_stencil_mpi_datatype(MPI_Comm comm) {
1162
1164 MPI_Datatype direct_stencil_mf_dt;
1165 int array_of_blocklengths[] = {1, 1, 1};
1166 const MPI_Aint array_of_displacements[] =
1167 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src) -
1168 (MPI_Aint)(intptr_t)(const void *)&dummy,
1169 (MPI_Aint)(intptr_t)(const void *)&(dummy.src_field_idx) -
1170 (MPI_Aint)(intptr_t)(const void *)&dummy,
1171 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
1172 (MPI_Aint)(intptr_t)(const void *)&dummy};
1173 MPI_Datatype array_of_types[] =
1174 {yac_get_remote_point_info_mpi_datatype(comm), MPI_UINT64_T, MPI_UINT64_T};
1176 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
1177 array_of_types, &direct_stencil_mf_dt), comm);
1178 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1179 return yac_create_resized(direct_stencil_mf_dt, sizeof(dummy), comm);
1180}
1181
1182static int compare_stencils_direct_mf(const void * a, const void * b) {
1183
1184 int ret = ((struct interp_weight_stencil_direct_mf *)a)->src.rank -
1185 ((struct interp_weight_stencil_direct_mf *)b)->src.rank;
1186
1187 if (ret) return ret;
1188
1189 ret = (((struct interp_weight_stencil_direct_mf *)a)->src_field_idx >
1190 ((struct interp_weight_stencil_direct_mf *)b)->src_field_idx) -
1193
1194 if (ret) return ret;
1195
1196 return (((struct interp_weight_stencil_direct_mf *)a)->orig_pos >
1198 (((struct interp_weight_stencil_direct_mf *)a)->orig_pos <
1199 ((struct interp_weight_stencil_direct_mf *)b)->orig_pos);
1200}
1201
1203 MPI_Comm comm, uint64_t count,
1204 struct interp_weight_stencil * direct_mf_stencils,
1205 struct yac_interpolation * interp, Xt_config redist_config) {
1206
1207 // determine the number of source fields
1208 uint64_t num_src_fields = 0;
1209 for (size_t i = 0; i < count; ++i) {
1210 uint64_t src_field_idx =
1211 (uint64_t)(direct_mf_stencils[i].data.direct_mf.field_idx);
1212 if (src_field_idx >= num_src_fields) num_src_fields = src_field_idx + 1;
1213 }
1215 MPI_Allreduce(
1216 MPI_IN_PLACE, &num_src_fields, 1, MPI_UINT64_T, MPI_MAX, comm), comm);
1217
1218 int comm_size;
1219 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1220
1221 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1223 (size_t)num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1224 size_t * size_t_buffer =
1225 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
1226 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1227 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1228 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1229 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1230
1231 for (size_t i = 0; i < count; ++i) {
1232 int curr_count = direct_mf_stencils[i].tgt.data.count;
1233 struct remote_point_info * curr_point_info =
1234 (curr_count == 1)?
1235 (&(direct_mf_stencils[i].tgt.data.data.single)):
1236 (direct_mf_stencils[i].tgt.data.data.multi);
1237 uint64_t src_field_idx =
1238 (uint64_t)(direct_mf_stencils[i].data.direct_mf.field_idx);
1239 for (int j = 0; j < curr_count; ++j)
1240 sendcounts[
1241 (uint64_t)(curr_point_info[j].rank) * num_src_fields + src_field_idx]++;
1242 }
1243
1245 (size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1246
1247 size_t saccu = 0, raccu = 0;
1248 for (int i = 0; i < comm_size; ++i) {
1249 total_sdispls[i] = saccu;
1250 total_rdispls[i] = raccu;
1251 total_sendcounts[i] = 0;
1252 total_recvcounts[i] = 0;
1253 for (size_t j = 0; j < num_src_fields; ++j) {
1254 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1255 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1256 }
1257 saccu += total_sendcounts[i];
1258 raccu += total_recvcounts[i];
1259 }
1260
1261 size_t send_buffer_size = total_sdispls[comm_size - 1] +
1262 total_sendcounts[comm_size - 1];
1263 size_t recv_buffer_size = total_rdispls[comm_size - 1] +
1264 total_recvcounts[comm_size - 1];
1265 size_t tgt_count = recv_buffer_size;
1266
1267 struct interp_weight_stencil_direct_mf * stencil_buffer =
1268 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*stencil_buffer));
1269 struct interp_weight_stencil_direct_mf * send_stencil_buffer =
1270 stencil_buffer + recv_buffer_size;
1271 struct interp_weight_stencil_direct_mf * recv_stencil_buffer = stencil_buffer;
1272
1273 // pack direct_mf stencils
1274 for (size_t i = 0; i < count; ++i) {
1275 int curr_count = direct_mf_stencils[i].tgt.data.count;
1276 struct remote_point_info * curr_point_infos =
1277 (curr_count == 1)?
1278 (&(direct_mf_stencils[i].tgt.data.data.single)):
1279 (direct_mf_stencils[i].tgt.data.data.multi);
1280 struct remote_point_info src =
1281 select_src(direct_mf_stencils[i].data.direct_mf.src.data);
1282 uint64_t src_field_idx =
1283 (uint64_t)(direct_mf_stencils[i].data.direct_mf.field_idx);
1284 for (int j = 0; j < curr_count; ++j) {
1285 size_t pos =
1286 sdispls[(uint64_t)(curr_point_infos[j].rank) * num_src_fields +
1287 src_field_idx + 1]++;
1288 send_stencil_buffer[pos].src = src;
1289 send_stencil_buffer[pos].src_field_idx = src_field_idx;
1290 send_stencil_buffer[pos].orig_pos = curr_point_infos[j].orig_pos;
1291 }
1292 }
1293
1294 // create MPI Datatype for exchanging direct_mf stencils
1295 MPI_Datatype stencil_direct_mf_dt = get_direct_mf_stencil_mpi_datatype(comm);
1296
1297 // redistribute stencils based on target owners
1299 send_stencil_buffer, total_sendcounts, total_sdispls,
1300 recv_stencil_buffer, total_recvcounts, total_rdispls,
1301 sizeof(*stencil_buffer), stencil_direct_mf_dt, comm);
1302
1303 yac_mpi_call(MPI_Type_free(&stencil_direct_mf_dt), comm);
1304
1305 // sort stencils based on src rank first, src_field_idx, and
1306 // by target orig pos second
1307 qsort(recv_stencil_buffer, tgt_count, sizeof(*recv_stencil_buffer),
1309
1310 memset(sendcounts, 0,
1311 (size_t)comm_size * (size_t)num_src_fields * sizeof(*sendcounts));
1312
1313 for (size_t i = 0; i < tgt_count; ++i)
1314 sendcounts[(uint64_t)(recv_stencil_buffer[i].src.rank) * num_src_fields +
1315 recv_stencil_buffer[i].src_field_idx]++;
1316
1318 (size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1319
1320 saccu = 0, raccu = 0;
1321 for (int i = 0; i < comm_size; ++i) {
1322 total_sdispls[i] = saccu;
1323 total_rdispls[i] = raccu;
1324 total_sendcounts[i] = 0;
1325 total_recvcounts[i] = 0;
1326 for (size_t j = 0; j < num_src_fields; ++j) {
1327 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1328 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1329 }
1330 saccu += total_sendcounts[i];
1331 raccu += total_recvcounts[i];
1332 }
1333
1334 send_buffer_size = total_sdispls[comm_size - 1] +
1335 total_sendcounts[comm_size - 1];
1336 recv_buffer_size = total_rdispls[comm_size - 1] +
1337 total_recvcounts[comm_size - 1];
1338
1339 uint64_t * orig_pos_buffer =
1340 xmalloc((send_buffer_size + recv_buffer_size) * sizeof(*orig_pos_buffer));
1341 uint64_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1342 uint64_t * recv_orig_pos_buffer = orig_pos_buffer;
1343
1344 for (size_t i = 0; i < tgt_count; ++i)
1345 send_orig_pos_buffer[
1346 sdispls[(uint64_t)(recv_stencil_buffer[i].src.rank) * num_src_fields +
1347 recv_stencil_buffer[i].src_field_idx + 1]++] =
1348 recv_stencil_buffer[i].src.orig_pos;
1349
1350 // inform source processes about their requested points
1352 send_orig_pos_buffer, total_sendcounts, total_sdispls,
1353 recv_orig_pos_buffer, total_recvcounts, total_rdispls,
1354 sizeof(*send_orig_pos_buffer), MPI_UINT64_T, comm);
1355 free(size_t_buffer);
1356
1357 // generate redist
1358 Xt_redist * redists =
1360 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1361 (size_t)num_src_fields, comm, redist_config);
1362
1363 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1364 free(orig_pos_buffer);
1365 free(stencil_buffer);
1366
1367 yac_interpolation_add_direct_mf(interp, redists, (size_t)num_src_fields);
1368
1369 if (redists != NULL) {
1370 for (size_t i = 0; i < (size_t)num_src_fields; ++i)
1371 xt_redist_delete(redists[i]);
1372 free(redists);
1373 }
1374}
1375
1377 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1378 MPI_Comm comm) {
1379
1380 UNUSED(stencil);
1381 UNUSED(point_info_dt);
1382
1383 int pack_size_value;
1384
1385 yac_mpi_call(MPI_Pack_size(1, MPI_DOUBLE, comm, &pack_size_value), comm);
1386
1387 return pack_size_value;
1388}
1389
1391 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1392 MPI_Comm comm) {
1393
1394 return
1396 &(stencil->data.direct.src), point_info_dt, comm);
1397}
1398
1400 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1401 MPI_Comm comm) {
1402
1403 return
1405 stencil->data.sum.srcs, point_info_dt, comm);
1406}
1407
1409 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1410 MPI_Comm comm) {
1411
1412 int pack_size_weights;
1414 MPI_Pack_size(
1415 (int)(stencil->data.weight_sum.srcs->count),
1416 MPI_DOUBLE, comm, &pack_size_weights), comm);
1417
1418 return
1420 stencil->data.weight_sum.srcs, point_info_dt, comm) +
1421 pack_size_weights;
1422}
1423
1425 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1426 MPI_Comm comm) {
1427
1428 int pack_size_src_field_idx;
1430 MPI_Pack_size(
1431 1, MPI_UINT64_T, comm, &pack_size_src_field_idx), comm);
1432
1433 return
1435 &(stencil->data.direct_mf.src), point_info_dt, comm) +
1436 pack_size_src_field_idx;
1437}
1438
1440 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1441 MPI_Comm comm) {
1442
1443 int pack_size_weights, pack_size_field_indices;
1444 int count = (int)(stencil->data.weight_sum_mf.srcs->count);
1446 MPI_Pack_size(
1447 count, MPI_DOUBLE, comm, &pack_size_weights), comm);
1449 MPI_Pack_size(
1450 count, MPI_UINT64_T, comm, &pack_size_field_indices), comm);
1451
1452 return
1454 stencil->data.weight_sum_mf.srcs, point_info_dt, comm) +
1455 pack_size_weights + pack_size_field_indices;
1456}
1457
1459 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1460 MPI_Comm comm) {
1461
1462 int pack_size_field_indices;
1464 MPI_Pack_size(
1465 (int)(stencil->data.sum_mf.srcs->count),
1466 MPI_UINT64_T, comm, &pack_size_field_indices), comm);
1467
1468 return
1470 stencil->data.sum_mf.srcs, point_info_dt, comm) +
1471 pack_size_field_indices;
1472}
1473
1475 struct interp_weight_stencil * stencil, struct remote_point point) {
1476
1477 struct interp_weight_stencil stencil_copy = *stencil;
1478 stencil_copy.tgt = copy_remote_point(point);
1479
1480 YAC_ASSERT(
1481 (stencil->type == FIXED) ||
1482 (stencil->type == DIRECT) ||
1483 (stencil->type == SUM) ||
1484 (stencil->type == WEIGHT_SUM) ||
1485 (stencil->type == DIRECT_MF) ||
1486 (stencil->type == SUM_MF) ||
1487 (stencil->type == WEIGHT_SUM_MF),
1488 "ERROR(copy_interp_weight_stencil): invalid stencil type")
1489
1490 switch (stencil->type) {
1491 case(FIXED):
1492 // nothing to be done
1493 break;
1494 case(DIRECT):
1495 stencil_copy.data.direct.src =
1496 copy_remote_point(stencil->data.direct.src);
1497 break;
1498 case(SUM):
1499 stencil_copy.data.weight_sum.weights = NULL;
1500 stencil_copy.data.sum.srcs =
1502 stencil->data.sum.srcs->data, stencil->data.sum.srcs->count);
1503 break;
1504 case(WEIGHT_SUM): {
1505 stencil_copy.data.weight_sum.srcs =
1507 stencil->data.weight_sum.srcs->data,
1508 stencil->data.weight_sum.srcs->count);
1509 size_t weight_size =
1510 stencil->data.weight_sum.srcs->count *
1511 sizeof(*(stencil_copy.data.weight_sum.weights));
1512 stencil_copy.data.weight_sum.weights = xmalloc(weight_size);
1513 memcpy(stencil_copy.data.weight_sum.weights,
1514 stencil->data.weight_sum.weights, weight_size);
1515 break;
1516 }
1517 case(DIRECT_MF):
1518 stencil_copy.data.direct_mf.src =
1520 stencil_copy.data.direct_mf.field_idx =
1521 stencil->data.direct_mf.field_idx;
1522 break;
1523 case(SUM_MF): {
1524 stencil_copy.data.sum_mf.srcs =
1526 stencil->data.sum_mf.srcs->data,
1527 stencil->data.sum_mf.srcs->count);
1528 size_t field_indices_size =
1529 stencil->data.sum_mf.srcs->count *
1530 sizeof(*(stencil_copy.data.sum_mf.field_indices));
1531 stencil_copy.data.sum_mf.field_indices = xmalloc(field_indices_size);
1532 memcpy(stencil_copy.data.sum_mf.field_indices,
1533 stencil->data.sum_mf.field_indices, field_indices_size);
1534 break;
1535 }
1536 default:
1537 case(WEIGHT_SUM_MF): {
1538 stencil_copy.data.weight_sum_mf.srcs =
1540 stencil->data.weight_sum_mf.srcs->data,
1541 stencil->data.weight_sum_mf.srcs->count);
1542 size_t weight_size =
1543 stencil->data.weight_sum_mf.srcs->count *
1544 sizeof(*(stencil_copy.data.weight_sum_mf.weights));
1545 stencil_copy.data.weight_sum_mf.weights = xmalloc(weight_size);
1546 memcpy(stencil_copy.data.weight_sum_mf.weights,
1547 stencil->data.weight_sum_mf.weights, weight_size);
1548 size_t field_indices_size =
1549 stencil->data.weight_sum_mf.srcs->count *
1550 sizeof(*(stencil_copy.data.weight_sum_mf.field_indices));
1551 stencil_copy.data.weight_sum_mf.field_indices =
1552 xmalloc(field_indices_size);
1553 memcpy(stencil_copy.data.weight_sum_mf.field_indices,
1554 stencil->data.weight_sum_mf.field_indices, field_indices_size);
1555 break;
1556 }
1557 };
1558 return stencil_copy;
1559}
1560
1562 struct interp_weight_stencil * stencil, struct remote_point point,
1563 double weight) {
1564
1565 if (weight == 1.0) return copy_interp_weight_stencil(stencil, point);
1566
1567 struct remote_point * srcs;
1568 size_t src_count;
1569 double * weights;
1570
1571 YAC_ASSERT(
1572 (stencil->type == FIXED) ||
1573 (stencil->type == DIRECT) ||
1574 (stencil->type == SUM) ||
1575 (stencil->type == WEIGHT_SUM),
1576 "ERROR(wcopy_interp_weight_stencil): invalid stencil type")
1577
1578 switch (stencil->type) {
1579 case (FIXED):
1580 return
1581 (struct interp_weight_stencil) {
1582 .type = FIXED,
1583 .data.fixed.value = stencil->data.fixed.value * weight,
1584 .tgt = copy_remote_point(point)};
1585 case (DIRECT):
1586 src_count = 1;
1587 srcs = &(stencil->data.direct.src);
1588 weights = NULL;
1589 break;
1590 case (SUM):
1591 src_count = stencil->data.sum.srcs->count;
1592 srcs = stencil->data.sum.srcs->data;
1593 weights = NULL;
1594 break;
1595 default:
1596 case (WEIGHT_SUM):
1597 src_count = stencil->data.weight_sum.srcs->count;
1598 srcs = stencil->data.weight_sum.srcs->data;
1599 weights = stencil->data.weight_sum.weights;
1600 break;
1601 };
1602
1603 double * new_weights = xmalloc(src_count * sizeof(*new_weights));
1604 if (weights == NULL)
1605 for (size_t i = 0; i < src_count; ++i) new_weights[i] = weight;
1606 else
1607 for (size_t i = 0; i < src_count; ++i) new_weights[i] = weights[i] * weight;
1608
1609 struct interp_weight_stencil stencil_wcopy;
1610 stencil_wcopy.type = WEIGHT_SUM;
1611 stencil_wcopy.data.weight_sum.srcs = copy_remote_points(srcs, src_count);
1612 stencil_wcopy.data.weight_sum.weights = new_weights;
1613 stencil_wcopy.tgt = copy_remote_point(point);
1614
1615 return stencil_wcopy;
1616}
1617
1618static int compare_w_global_id(const void * a, const void * b) {
1619
1620 int ret = (((struct weighted_global_id *)a)->global_id >
1621 ((struct weighted_global_id *)b)->global_id) -
1622 (((struct weighted_global_id *)a)->global_id <
1623 ((struct weighted_global_id *)b)->global_id);
1624
1625 if (ret) return ret;
1626
1627 return (((struct weighted_global_id *)a)->weight >
1628 ((struct weighted_global_id *)b)->weight) -
1629 (((struct weighted_global_id *)a)->weight <
1630 ((struct weighted_global_id *)b)->weight);
1631}
1632
1633static int compare_remote_point(const void * a, const void * b) {
1634
1635 return ((const struct remote_point*)a)->global_id -
1636 ((const struct remote_point*)b)->global_id;
1637}
1638
1639static void compact_srcs_w(
1640 struct remote_points * srcs, double ** w) {
1641
1642 struct remote_point * data = srcs->data;
1643 size_t count = srcs->count;
1644
1645 struct weighted_global_id * w_global_id =
1646 xmalloc(count * sizeof(*w_global_id));
1647
1648 // extract global ids and weights
1649 for (size_t i = 0; i < count; ++i) {
1650 w_global_id[i].global_id = data[i].global_id;
1651 w_global_id[i].weight = (*w)[i];
1652 }
1653
1654 // sort by global ids and weights
1655 qsort(w_global_id, count, sizeof(*w_global_id), compare_w_global_id);
1656
1657 // sort sources by global ids
1658 qsort(data, count, sizeof(*data), compare_remote_point);
1659
1660 size_t new_count = 0;
1661
1662 // compact sources
1663 for (size_t i = 0; i < count;) {
1664
1665 data[new_count] = data[i];
1666
1667 yac_int curr_global_id = w_global_id[i].global_id;
1668 double curr_weight = w_global_id[i].weight;
1669
1670 ++i;
1671
1672 while((i < count) && (curr_global_id == w_global_id[i].global_id)) {
1673
1674 curr_weight += w_global_id[i].weight;
1675 ++i;
1676 }
1677
1678 (*w)[new_count] = curr_weight;
1679 ++new_count;
1680 }
1681
1682 free(w_global_id);
1683
1684 srcs->data = xrealloc(data, new_count * sizeof(*data));
1685 srcs->count = new_count;
1686 *w = xrealloc(*w, new_count * sizeof(**w));
1687}
1688
1690 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils) {
1691
1692 size_t src_count = 0;
1693 size_t point_info_buffer_size = 0;
1694
1695 for (size_t i = 0; i < num_stencils; ++i) {
1696 size_t curr_src_count;
1697 struct remote_point * srcs;
1698 YAC_ASSERT(
1699 (stencils[i]->type == DIRECT) ||
1700 (stencils[i]->type == SUM) ||
1701 (stencils[i]->type == WEIGHT_SUM),
1702 "ERROR(stencils_merge_wsum): invalid stencil type")
1703 switch (stencils[i]->type) {
1704 case (DIRECT):
1705 curr_src_count = 1;
1706 srcs = &(stencils[i]->data.direct.src);
1707 break;
1708 case (SUM):
1709 curr_src_count = stencils[i]->data.sum.srcs->count;
1710 srcs = stencils[i]->data.sum.srcs->data;
1711 break;
1712 default:
1713 case (WEIGHT_SUM):
1714 curr_src_count = stencils[i]->data.weight_sum.srcs->count;
1715 srcs = stencils[i]->data.weight_sum.srcs->data;
1716 break;
1717 };
1718 src_count += curr_src_count;
1719 for (size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
1720 if (((curr_src_data_count = srcs[j].data.count)) > 1)
1721 point_info_buffer_size += curr_src_data_count;
1722 }
1723
1724 struct remote_points * srcs =
1725 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
1726 sizeof(*srcs));
1727 srcs->data = xmalloc(src_count * sizeof(*(srcs->data)));
1728 srcs->count = src_count;
1729 struct remote_point_info * point_info_buffer = &(srcs->buffer[0]);
1730 double * new_w = xmalloc(src_count * sizeof(*new_w));
1731
1732 for (size_t i = 0, offset = 0; i < num_stencils; ++i) {
1733 size_t curr_src_count;
1734 struct remote_point * curr_srcs;
1735 double * stencil_w;
1736 YAC_ASSERT(
1737 (stencils[i]->type == DIRECT) ||
1738 (stencils[i]->type == SUM) ||
1739 (stencils[i]->type == WEIGHT_SUM),
1740 "ERROR(stencils_merge_wsum): invalid stencil type")
1741 switch (stencils[i]->type) {
1742 case (DIRECT):
1743 curr_src_count = 1;
1744 curr_srcs = &(stencils[i]->data.direct.src);
1745 stencil_w = NULL;
1746 break;
1747 case (SUM):
1748 curr_src_count = stencils[i]->data.sum.srcs->count;
1749 curr_srcs = stencils[i]->data.sum.srcs->data;
1750 stencil_w = NULL;
1751 break;
1752 default:
1753 case (WEIGHT_SUM):
1754 curr_src_count = stencils[i]->data.weight_sum.srcs->count;
1755 curr_srcs = stencils[i]->data.weight_sum.srcs->data;
1756 stencil_w = stencils[i]->data.weight_sum.weights;
1757 break;
1758 };
1760 srcs->data + offset, curr_srcs, curr_src_count, &point_info_buffer);
1761 if (stencil_w == NULL)
1762 for (size_t j = 0; j < curr_src_count; ++j, ++offset)
1763 new_w[offset] = w[i];
1764 else
1765 for (size_t j = 0; j < curr_src_count; ++j, ++offset)
1766 new_w[offset] = w[i] * stencil_w[j];
1767 }
1768
1769 compact_srcs_w(srcs, &new_w);
1770
1771 struct interp_weight_stencil merge_stencil;
1772 merge_stencil.type = WEIGHT_SUM;
1773 merge_stencil.data.weight_sum.srcs = srcs;
1774 merge_stencil.data.weight_sum.weights = new_w;
1775
1776 return merge_stencil;
1777}
1778
1780 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils) {
1781
1782 for (size_t i = 0; i < num_stencils; ++i)
1783 if (w[i] != 1.0)
1784 return stencils_merge_wsum(stencils, w, num_stencils);
1785
1786 size_t src_count = 0;
1787 size_t point_info_buffer_size = 0;
1788
1789 for (size_t i = 0; i < num_stencils; ++i) {
1790 size_t curr_src_count;
1791 struct remote_point * srcs;
1792 YAC_ASSERT(
1793 (stencils[i]->type == DIRECT) ||
1794 (stencils[i]->type == SUM),
1795 "ERROR(stencils_merge_sum): invalid stencil type")
1796 switch (stencils[i]->type) {
1797 case (DIRECT):
1798 curr_src_count = 1;
1799 srcs = &(stencils[i]->data.direct.src);
1800 break;
1801 default:
1802 case (SUM):
1803 curr_src_count = stencils[i]->data.sum.srcs->count;
1804 srcs = stencils[i]->data.sum.srcs->data;
1805 break;
1806 };
1807 src_count += curr_src_count;
1808 for (size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
1809 if (((curr_src_data_count = srcs[j].data.count)) > 1)
1810 point_info_buffer_size += curr_src_data_count;
1811 }
1812
1813 struct remote_points * srcs =
1814 xmalloc(point_info_buffer_size * sizeof(struct remote_point_info) +
1815 sizeof(*srcs));
1816 srcs->data = xmalloc(src_count * sizeof(*(srcs->data)));
1817 srcs->count = src_count;
1818 struct remote_point_info * point_info_buffer = &(srcs->buffer[0]);
1819
1820 for (size_t i = 0, offset = 0; i < num_stencils; ++i) {
1821 size_t curr_src_count;
1822 struct remote_point * curr_srcs;
1823 YAC_ASSERT(
1824 (stencils[i]->type == DIRECT) ||
1825 (stencils[i]->type == SUM),
1826 "ERROR(stencils_merge_sum): invalid stencil type")
1827 switch (stencils[i]->type) {
1828 case (DIRECT):
1829 curr_src_count = 1;
1830 curr_srcs = &(stencils[i]->data.direct.src);
1831 break;
1832 default:
1833 case (SUM):
1834 curr_src_count = stencils[i]->data.sum.srcs->count;
1835 curr_srcs = stencils[i]->data.sum.srcs->data;
1836 break;
1837 };
1839 srcs->data + offset, curr_srcs, curr_src_count, &point_info_buffer);
1840 offset += curr_src_count;
1841 }
1842
1843 qsort(srcs->data, srcs->count, sizeof(*(srcs->data)), compare_remote_point);
1844
1845 struct interp_weight_stencil merge_stencil;
1846 merge_stencil.type = SUM;
1847 merge_stencil.data.sum.srcs = srcs;
1848
1849 return merge_stencil;
1850}
1851
1853 struct interp_weight_stencil ** stencils, double * w, size_t num_stencils,
1854 struct remote_point point) {
1855
1856 if (num_stencils == 1)
1857 return wcopy_interp_weight_stencil(*stencils, point, *w);
1858
1859 int fixed_count = 0;
1860 int direct_count = 0;
1861 int sum_count = 0;
1862 int wsum_count = 0;
1863 double fixed_value = 0.0;
1864
1865 for (size_t i = 0; i < num_stencils; ++i) {
1866 YAC_ASSERT(
1867 (stencils[i]->type != DIRECT_MF) &&
1868 (stencils[i]->type != SUM_MF) &&
1869 (stencils[i]->type != WEIGHT_SUM_MF),
1870 "ERROR(stencils_merge): multiple source fields not yet supported")
1871 YAC_ASSERT(
1872 (stencils[i]->type == FIXED) ||
1873 (stencils[i]->type == DIRECT) ||
1874 (stencils[i]->type == SUM) ||
1875 (stencils[i]->type == WEIGHT_SUM),
1876 "ERROR(stencils_merge): unsupported stencil type")
1877 switch (stencils[i]->type) {
1878 case (FIXED):
1879 fixed_value += stencils[i]->data.fixed.value * w[i];
1880 fixed_count++;
1881 break;
1882 case (DIRECT):
1883 direct_count++;
1884 break;
1885 case (SUM):
1886 sum_count++;
1887 break;
1888 default:
1889 case (WEIGHT_SUM):
1890 wsum_count++;
1891 break;
1892 };
1893 }
1894
1895 struct interp_weight_stencil merge_stencil;
1896
1897 YAC_ASSERT(
1898 (fixed_count > 0) || (wsum_count > 0) ||
1899 (sum_count > 0) || (direct_count > 0),
1900 "ERROR(stencils_merge): unknown error")
1901 if (fixed_count > 0) {
1902
1903 YAC_ASSERT(
1904 (direct_count + sum_count + wsum_count) <= 0,
1905 "ERROR(stencils_merge): invalid stencil combination")
1906
1907 merge_stencil = **stencils;
1908 merge_stencil.data.fixed.value = fixed_value;
1909 } else if (wsum_count > 0)
1910 merge_stencil =
1911 stencils_merge_wsum(stencils, w, num_stencils);
1912 else if ((sum_count > 0) || (direct_count > 0))
1913 merge_stencil =
1914 stencils_merge_sum(stencils, w, num_stencils);
1915
1916 merge_stencil.tgt = copy_remote_point(point);
1917
1918 return merge_stencil;
1919}
1920
1922 struct interp_weight_stencil * stencils, size_t count, size_t * pack_order,
1923 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
1924
1925 int pack_size_type;
1926 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_type), comm);
1927
1928 for (size_t i = 0; i < count; ++i) {
1929
1930 struct interp_weight_stencil * curr_stencil = stencils + pack_order[i];
1931 int (*func_pack_size)(
1932 struct interp_weight_stencil * stencil, MPI_Datatype point_info_dt,
1933 MPI_Comm comm);
1934 YAC_ASSERT(
1935 (curr_stencil->type == FIXED) ||
1936 (curr_stencil->type == DIRECT) ||
1937 (curr_stencil->type == SUM) ||
1938 (curr_stencil->type == WEIGHT_SUM) ||
1939 (curr_stencil->type == DIRECT_MF) ||
1940 (curr_stencil->type == SUM_MF) ||
1941 (curr_stencil->type == WEIGHT_SUM_MF),
1942 "ERROR(get_stencils_pack_sizes): invalid stencil type")
1943 switch (curr_stencil->type) {
1944 case(FIXED):
1945 func_pack_size = get_stencil_pack_size_fixed;
1946 break;
1947 case(DIRECT):
1948 func_pack_size = get_stencil_pack_size_direct;
1949 break;
1950 case(SUM):
1951 func_pack_size = get_stencil_pack_size_sum;
1952 break;
1953 default:
1954 case(WEIGHT_SUM):
1955 func_pack_size = get_stencil_pack_size_wsum;
1956 break;
1957 case(DIRECT_MF):
1958 func_pack_size = get_stencil_pack_size_direct_mf;
1959 break;
1960 case(SUM_MF):
1961 func_pack_size = get_stencil_pack_size_sum_mf;
1962 break;
1963 case(WEIGHT_SUM_MF):
1964 func_pack_size = get_stencil_pack_size_wsum_mf;
1965 break;
1966 };
1967 pack_sizes[i] = pack_size_type +
1969 &(curr_stencil->tgt), point_info_dt, comm) +
1970 func_pack_size(curr_stencil, point_info_dt, comm);
1971 }
1972}
1973
1975 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
1976 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1977
1978 UNUSED(point_info_dt);
1979
1980 // fixed value
1982 MPI_Pack(&(stencil->data.fixed.value), 1, MPI_DOUBLE, buffer, buffer_size,
1983 position, comm), comm);
1984}
1985
1987 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
1988 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1989
1990 // src
1992 &(stencil->data.direct.src), buffer, buffer_size, position, point_info_dt,
1993 comm);
1994}
1995
1997 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
1998 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1999
2000 // srcs
2002 stencil->data.sum.srcs, buffer, buffer_size, position, point_info_dt, comm);
2003}
2004
2006 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2007 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2008
2009 // srcs
2011 stencil->data.weight_sum.srcs, buffer, buffer_size, position,
2012 point_info_dt, comm);
2013 // weights
2015 MPI_Pack(stencil->data.weight_sum.weights,
2016 (int)(stencil->data.weight_sum.srcs->count), MPI_DOUBLE,
2017 buffer, buffer_size, position, comm), comm);
2018}
2019
2021 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2022 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2023
2024 // src
2026 &(stencil->data.direct_mf.src), buffer, buffer_size, position,
2027 point_info_dt, comm);
2028
2029 // field_idx
2030 uint64_t temp_field_idx = (uint64_t)(stencil->data.direct_mf.field_idx);
2032 MPI_Pack(&temp_field_idx, 1, MPI_UINT64_T,
2033 buffer, buffer_size, position, comm), comm);
2034}
2035
2037 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2038 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2039
2040 // srcs
2042 stencil->data.sum_mf.srcs, buffer, buffer_size, position,
2043 point_info_dt, comm);
2044
2045 size_t count = stencil->data.sum_mf.srcs->count;
2046 // field_indices
2047 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2048 for (size_t i = 0; i < count; ++i)
2049 temp_field_indices[i] =
2050 (uint64_t)(stencil->data.sum_mf.field_indices[i]);
2052 MPI_Pack(temp_field_indices, (int)count, MPI_UINT64_T,
2053 buffer, buffer_size, position, comm), comm);
2054 free(temp_field_indices);
2055}
2056
2058 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2059 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2060
2061 // srcs
2063 stencil->data.weight_sum_mf.srcs, buffer, buffer_size, position,
2064 point_info_dt, comm);
2065
2066 size_t count = stencil->data.weight_sum_mf.srcs->count;
2067 // weights
2069 MPI_Pack(stencil->data.weight_sum_mf.weights, (int)count, MPI_DOUBLE,
2070 buffer, buffer_size, position, comm), comm);
2071 // field_indices
2072 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2073 for (size_t i = 0; i < count; ++i)
2074 temp_field_indices[i] =
2075 (uint64_t)(stencil->data.weight_sum_mf.field_indices[i]);
2077 MPI_Pack(temp_field_indices, (int)count, MPI_UINT64_T,
2078 buffer, buffer_size, position, comm), comm);
2079 free(temp_field_indices);
2080}
2081
2082static void pack_stencils(
2083 struct interp_weight_stencil * stencils, size_t count, size_t * pack_order,
2084 void ** pack_data, int * pack_sizes, MPI_Datatype point_info_dt,
2085 MPI_Comm comm) {
2086
2088 stencils, count, pack_order, pack_sizes, point_info_dt, comm);
2089
2090 size_t pack_buffer_size = 0;
2091 for (size_t i = 0; i < count; ++i)
2092 pack_buffer_size += (size_t)(pack_sizes[i]);
2093
2094 void * pack_data_ = xmalloc(pack_buffer_size);
2095 size_t total_pack_size = 0;
2096
2097 for (size_t i = 0; i < count; ++i) {
2098
2099 struct interp_weight_stencil * curr_stencil = stencils + pack_order[i];
2100 void (*func_pack)(
2101 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2102 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2103
2104 YAC_ASSERT(
2105 (curr_stencil->type == FIXED) ||
2106 (curr_stencil->type == DIRECT) ||
2107 (curr_stencil->type == SUM) ||
2108 (curr_stencil->type == WEIGHT_SUM) ||
2109 (curr_stencil->type == DIRECT_MF) ||
2110 (curr_stencil->type == SUM_MF) ||
2111 (curr_stencil->type == WEIGHT_SUM_MF),
2112 "ERROR(pack_stencils): invalid stencil type")
2113 switch (curr_stencil->type) {
2114 default:
2115 case(FIXED):
2116 func_pack = pack_stencil_fixed;
2117 break;
2118 case(DIRECT):
2119 func_pack = pack_stencil_direct;
2120 break;
2121 case(SUM):
2122 func_pack = pack_stencil_sum;
2123 break;
2124 case(WEIGHT_SUM):
2125 func_pack = pack_stencil_wsum;
2126 break;
2127 case(DIRECT_MF):
2128 func_pack = pack_stencil_direct_mf;
2129 break;
2130 case(SUM_MF):
2131 func_pack = pack_stencil_sum_mf;
2132 break;
2133 case(WEIGHT_SUM_MF):
2134 func_pack = pack_stencil_wsum_mf;
2135 break;
2136 };
2137
2138 int position = 0;
2139 int type = (int)curr_stencil->type;
2140 void * buffer = (void*)((char*)pack_data_ + total_pack_size);
2141 int buffer_size = pack_sizes[i];
2142
2143 // type
2145 MPI_Pack(&type, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2146 // tgt
2147 yac_remote_point_pack(&(curr_stencil->tgt), buffer, buffer_size,
2148 &position, point_info_dt, comm);
2149 // stencil data
2150 func_pack(curr_stencil, buffer, buffer_size, &position, point_info_dt, comm);
2151
2153 pack_sizes[i] >= position,
2154 "ERROR(pack_stencils): "
2155 "actual pack size is bigger then computed one (%d > %d)",
2156 position, pack_sizes[i]);
2157
2158 pack_sizes[i] = position;
2159 total_pack_size += (size_t)position;
2160 }
2161
2162 *pack_data = xrealloc(pack_data_, total_pack_size);
2163}
2164
2166 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2167 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2168
2169 UNUSED(point_info_dt);
2170
2171 // fixed value
2173 MPI_Unpack(buffer, buffer_size, position, &(stencil->data.fixed.value), 1,
2174 MPI_DOUBLE, comm), comm);
2175}
2176
2178 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2179 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2180
2181 // src
2183 buffer, buffer_size, position, &stencil->data.direct.src,
2184 point_info_dt, comm);
2185}
2186
2188 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2189 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2190
2191 // srcs
2192 stencil->data.weight_sum.weights = NULL;
2194 buffer, buffer_size, position, &(stencil->data.sum.srcs), point_info_dt,
2195 comm);
2196}
2197
2199 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2200 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2201
2202 // srcs
2204 buffer, buffer_size, position, &(stencil->data.weight_sum.srcs),
2205 point_info_dt, comm);
2206
2207 size_t count = stencil->data.weight_sum.srcs->count;
2208
2209 stencil->data.weight_sum.weights =
2210 xmalloc(count * sizeof(*(stencil->data.weight_sum.weights)));
2211
2212 // weights
2214 MPI_Unpack(buffer, buffer_size, position, stencil->data.weight_sum.weights,
2215 (int)count, MPI_DOUBLE, comm), comm);
2216}
2217
2219 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2220 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2221
2222 // src
2224 buffer, buffer_size, position, &stencil->data.direct_mf.src,
2225 point_info_dt, comm);
2226
2227 // field_idx
2228 uint64_t temp_field_idx;
2230 MPI_Unpack(
2231 buffer, buffer_size, position, &temp_field_idx,
2232 1, MPI_UINT64_T, comm), comm);
2233 stencil->data.direct_mf.field_idx = (size_t)temp_field_idx;
2234}
2235
2237 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2238 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2239
2240 // srcs
2242 buffer, buffer_size, position, &(stencil->data.sum_mf.srcs),
2243 point_info_dt, comm);
2244
2245 size_t count = stencil->data.sum_mf.srcs->count;
2246
2247 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2248 stencil->data.sum_mf.field_indices =
2249 xmalloc(count * sizeof(*(stencil->data.sum_mf.field_indices)));
2250
2251 // field_indices
2253 MPI_Unpack(
2254 buffer, buffer_size, position, temp_field_indices,
2255 (int)count, MPI_UINT64_T, comm), comm);
2256 for (size_t i = 0; i < count; ++i)
2257 stencil->data.sum_mf.field_indices[i] = (size_t)(temp_field_indices[i]);
2258 free(temp_field_indices);
2259}
2260
2262 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2263 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2264
2265 // srcs
2267 buffer, buffer_size, position, &(stencil->data.weight_sum_mf.srcs),
2268 point_info_dt, comm);
2269
2270 size_t count = stencil->data.weight_sum_mf.srcs->count;
2271
2272 stencil->data.weight_sum_mf.weights =
2273 xmalloc(count * sizeof(*(stencil->data.weight_sum_mf.weights)));
2274
2275 // weights
2277 MPI_Unpack(
2278 buffer, buffer_size, position, stencil->data.weight_sum_mf.weights,
2279 (int)count, MPI_DOUBLE, comm), comm);
2280
2281 uint64_t * temp_field_indices = xmalloc(count * sizeof(*temp_field_indices));
2283 xmalloc(count * sizeof(*(stencil->data.weight_sum_mf.field_indices)));
2284
2285 // field_indices
2287 MPI_Unpack(
2288 buffer, buffer_size, position, temp_field_indices,
2289 (int)count, MPI_UINT64_T, comm), comm);
2290 for (size_t i = 0; i < count; ++i)
2291 stencil->data.weight_sum_mf.field_indices[i] =
2292 (size_t)(temp_field_indices[i]);
2293 free(temp_field_indices);
2294}
2295
2297 struct interp_weight_stencil * stencils, size_t count,
2298 void * packed_data, size_t packed_data_size,
2299 MPI_Datatype point_info_dt, MPI_Comm comm) {
2300
2301 for (size_t i = 0, offset = 0; i < count; ++i) {
2302
2303 YAC_ASSERT(
2304 packed_data_size >= offset,
2305 "ERROR(unpack_stencils): invalid offset");
2306
2307 int position = 0;
2308 void * curr_buffer = (void*)((unsigned char*)packed_data + offset);
2309 int buffer_size = (int)(MIN(packed_data_size - offset, INT_MAX));
2310 struct interp_weight_stencil * curr_stencil = stencils + i;
2311
2312 int type;
2314 MPI_Unpack(
2315 curr_buffer, buffer_size, &position, &type, 1, MPI_INT, comm), comm);
2316
2317 void (*func_unpack)(
2318 struct interp_weight_stencil * stencil, void * buffer, int buffer_size,
2319 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2320
2321 YAC_ASSERT(
2322 (type == FIXED) ||
2323 (type == DIRECT) || (type == SUM) || (type == WEIGHT_SUM) ||
2324 (type == DIRECT_MF) || (type == SUM_MF) || (type == WEIGHT_SUM_MF),
2325 "ERROR(unpack_stencils): invalid stencil type")
2326 switch (type) {
2327 case(FIXED):
2328 func_unpack = unpack_stencil_fixed;
2329 break;
2330 case(DIRECT):
2331 func_unpack = unpack_stencil_direct;
2332 break;
2333 case(SUM):
2334 func_unpack = unpack_stencil_sum;
2335 break;
2336 default:
2337 case(WEIGHT_SUM):
2338 func_unpack = unpack_stencil_wsum;
2339 break;
2340 case(DIRECT_MF):
2341 func_unpack = unpack_stencil_direct_mf;
2342 break;
2343 case(SUM_MF):
2344 func_unpack = unpack_stencil_sum_mf;
2345 break;
2346 case(WEIGHT_SUM_MF):
2347 func_unpack = unpack_stencil_wsum_mf;
2348 break;
2349 };
2350
2351 curr_stencil->type =
2354 curr_buffer, buffer_size, &position, &(curr_stencil->tgt),
2355 point_info_dt, comm);
2356 func_unpack(
2357 curr_stencil, curr_buffer, buffer_size, &position, point_info_dt, comm);
2358 offset += (size_t)position;
2359 }
2360}
2361
2363 MPI_Comm comm, struct interp_weight_stencil * stencils,
2364 size_t * stencil_indices,
2365 size_t * stencil_sendcounts, size_t * stencil_recvcounts) {
2366
2367 int comm_rank, comm_size;
2368 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2369 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2370
2371 YAC_ASSERT(
2372 stencil_sendcounts[comm_rank] == stencil_recvcounts[comm_rank],
2373 "ERROR(exchange_stencils): error in arguments")
2374
2375 size_t send_count = 0, recv_count = 0;
2376 size_t local_send_offset = 0;
2377 size_t local_recv_offset = 0;
2378 size_t local_count = (size_t)(stencil_sendcounts[comm_rank]);
2379 for (int i = 0; i < comm_rank; ++i) {
2380 send_count += stencil_sendcounts[i];
2381 recv_count += stencil_recvcounts[i];
2382 local_send_offset += stencil_sendcounts[i];
2383 local_recv_offset += stencil_recvcounts[i];
2384 }
2385 local_send_offset = send_count;
2386 local_recv_offset = recv_count;
2387 stencil_sendcounts[comm_rank] = 0;
2388 stencil_recvcounts[comm_rank] = 0;
2389 for (int i = comm_rank + 1; i < comm_size; ++i) {
2390 send_count += stencil_sendcounts[i];
2391 recv_count += stencil_recvcounts[i];
2392 }
2393
2394 struct interp_weight_stencil * new_stencils =
2395 xmalloc((recv_count + local_count) * sizeof(*new_stencils));
2396 size_t * local_stencil_indices =
2397 xmalloc(local_count * sizeof(*local_stencil_indices));
2398 memcpy(local_stencil_indices, stencil_indices + local_send_offset,
2399 local_count * sizeof(*local_stencil_indices));
2400
2401 // remove the local stencil indices
2402 memmove(
2403 stencil_indices + local_send_offset,
2404 stencil_indices + local_send_offset + local_count,
2405 (send_count - local_send_offset) * sizeof(*stencil_indices));
2406
2407 // pack the stencils that need to be send to other processes
2408 void * send_buffer;
2409 int * pack_sizes = xmalloc(send_count * sizeof(*pack_sizes));
2410 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
2412 stencils, send_count, stencil_indices, &send_buffer, pack_sizes,
2413 point_info_dt, comm);
2414
2415 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2417 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2418
2419 send_count = 0;
2420 for (int rank = 0; rank < comm_size; ++rank) {
2421 size_t sendcount = 0;
2422 int curr_num_stencils = stencil_sendcounts[rank];
2423 for (int j = 0; j < curr_num_stencils; ++j, ++send_count)
2424 sendcount += (size_t)(pack_sizes[send_count]);
2425 sendcounts[rank] = sendcount;
2426 }
2427 free(pack_sizes);
2428
2430 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2431
2432 size_t recv_size = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
2433
2434 void * recv_buffer = xmalloc(recv_size);
2435
2436 // exchange stencils
2437 yac_alltoallv_packed_p2p(
2438 send_buffer, sendcounts, sdispls+1,
2439 recv_buffer, recvcounts, rdispls, comm);
2440 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2441 free(send_buffer);
2442
2443 // unpack stencils
2445 new_stencils, recv_count,
2446 recv_buffer, recv_size, point_info_dt, comm);
2447 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
2448 free(recv_buffer);
2449
2450 memmove(new_stencils + local_recv_offset + local_count,
2451 new_stencils + local_recv_offset ,
2452 (recv_count - local_recv_offset ) * sizeof(*new_stencils));
2453 for (size_t i = 0; i < local_count; ++i, ++local_recv_offset )
2454 new_stencils[local_recv_offset] =
2456 stencils + local_stencil_indices[i],
2457 stencils[local_stencil_indices[i]].tgt);
2458 free(local_stencil_indices);
2459
2460 return new_stencils;
2461}
2462
2464 struct yac_interp_weights * weights, size_t * stencil_indices,
2465 int * stencil_ranks, size_t count) {
2466
2467 MPI_Comm comm = weights->comm;
2468 int comm_size;
2469 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2470
2471 YAC_ASSERT(
2472 count <= INT_MAX,
2473 "ERROR(yac_interp_weights_get_stencils): count exceeds INT_MAX");
2474
2475 size_t * reorder_idx = xmalloc(count * sizeof(*reorder_idx));
2476 for (size_t i = 0; i < count; ++i) reorder_idx[i] = i;
2477
2479 stencil_ranks, count, stencil_indices, reorder_idx);
2480
2481 // exchange requested stencils indices
2482 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2484 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2485 for (size_t i = 0; i < count; ++i) sendcounts[stencil_ranks[i]]++;
2487 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2488 size_t recv_count =
2489 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
2490 uint64_t * uint64_t_buffer =
2491 xmalloc((count + recv_count) * sizeof(*uint64_t_buffer));
2492 uint64_t * send_stencil_indices = uint64_t_buffer;
2493 uint64_t * recv_stencil_indices = uint64_t_buffer + count;
2494 for (size_t i = 0; i < count; ++i)
2495 send_stencil_indices[i] = (uint64_t)(stencil_indices[i]);
2496 yac_alltoallv_uint64_p2p(
2497 send_stencil_indices, sendcounts, sdispls+1,
2498 recv_stencil_indices, recvcounts, rdispls, comm);
2499
2500 // exchange stencils
2501 size_t * exchange_stencil_indices =
2502 xmalloc(recv_count * sizeof(*exchange_stencil_indices));
2503 for (size_t i = 0; i < recv_count; ++i) {
2504 YAC_ASSERT(
2505 (size_t)(recv_stencil_indices[i]) < weights->stencils_size,
2506 "ERROR(yac_interp_weights_get_stencils): invalid stencil index");
2507 exchange_stencil_indices[i] = (size_t)(recv_stencil_indices[i]);
2508 }
2509 free(uint64_t_buffer);
2510 struct interp_weight_stencil * stencils =
2511 exchange_stencils(comm, weights->stencils, exchange_stencil_indices,
2512 recvcounts, sendcounts);
2513 free(exchange_stencil_indices);
2514 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2515
2516 // sort received stencils into original order
2517 struct interp_weight_stencil * sorted_stencils =
2518 xmalloc(count * sizeof(*sorted_stencils));
2519 for (size_t i = 0; i < count; ++i)
2520 sorted_stencils[reorder_idx[i]] = stencils[i];
2521 free(stencils);
2522 free(reorder_idx);
2523
2524 return sorted_stencils;
2525}
2526
2528 struct interp_weight_stencil * stencils, size_t count);
2529
2531 struct yac_interp_weights * weights, struct remote_points * tgts,
2532 size_t * num_stencils_per_tgt, size_t * stencil_indices,
2533 int * stencil_ranks, double * w) {
2534
2535 size_t count = (tgts != NULL)?tgts->count:0;
2536 MPI_Comm comm = weights->comm;
2537 int comm_rank, comm_size;
2538 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2539 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2540
2541 // count the number of missing stencils
2542 size_t total_num_stencils = 0;
2543 size_t max_num_stencils_per_tgt = 0;
2544 for (size_t i = 0; i < count; ++i) {
2545 size_t curr_num_stencils_per_tgt = num_stencils_per_tgt[i];
2546 if (curr_num_stencils_per_tgt > max_num_stencils_per_tgt)
2547 max_num_stencils_per_tgt = curr_num_stencils_per_tgt;
2548 total_num_stencils += num_stencils_per_tgt[i];
2549 }
2550 size_t num_missing_stencils = 0;
2551 for (size_t i = 0; i < total_num_stencils; ++i)
2552 if (stencil_ranks[i] != comm_rank) num_missing_stencils++;
2553
2554 // get missing stencils
2555 size_t * missing_stencil_indices =
2556 xmalloc(num_missing_stencils * sizeof(*missing_stencil_indices));
2557 int * missing_stencil_ranks =
2558 xmalloc(num_missing_stencils * sizeof(*missing_stencil_ranks));
2559 for (size_t i = 0, j = 0; i < total_num_stencils; ++i) {
2560 if (stencil_ranks[i] != comm_rank) {
2561 missing_stencil_indices[j] = stencil_indices[i];
2562 missing_stencil_ranks[j] = stencil_ranks[i];
2563 ++j;
2564 }
2565 }
2566 struct interp_weight_stencil * missing_stencils =
2568 weights, missing_stencil_indices, missing_stencil_ranks,
2569 num_missing_stencils);
2570 free(missing_stencil_ranks);
2571 free(missing_stencil_indices);
2572
2573 // merge stencils to generate new ones
2574 {
2575 struct interp_weight_stencil * stencils = weights->stencils;
2576 size_t stencils_array_size = weights->stencils_array_size;
2577 size_t stencils_size = weights->stencils_size;
2578
2579 struct interp_weight_stencil ** stencils_buffer =
2580 xmalloc(max_num_stencils_per_tgt * sizeof(*stencils_buffer));
2581
2583 stencils, stencils_array_size, stencils_size + count);
2584
2585 for (size_t i = 0, j = 0; i < count;
2586 ++i, ++stencils_size) {
2587
2588 size_t curr_num_stencils = num_stencils_per_tgt[i];
2589 for (size_t k = 0; k < curr_num_stencils; ++k)
2590 stencils_buffer[k] =
2591 (stencil_ranks[k] == comm_rank)?
2592 (stencils + stencil_indices[k]):(missing_stencils + (j++));
2593
2594 stencils[stencils_size] =
2595 stencils_merge(stencils_buffer, w, curr_num_stencils, tgts->data[i]);
2596 w += curr_num_stencils;
2597 stencil_indices += curr_num_stencils;
2598 stencil_ranks += curr_num_stencils;
2599 }
2600
2601 weights->stencils = stencils;
2602 weights->stencils_array_size = stencils_array_size;
2603 weights->stencils_size = stencils_size;
2604
2605 free(stencils_buffer);
2606 }
2607
2608 yac_interp_weight_stencils_delete(missing_stencils, num_missing_stencils);
2609}
2610
2611static int compute_owner(int * ranks, size_t count) {
2612
2613 YAC_ASSERT(count != 0, "ERROR(compute_owner): count == 0")
2614
2615 yac_quicksort_index(ranks, count, NULL);
2616
2617 int best_rank = -1;
2618 size_t best_rank_count = 0;
2619
2620 size_t curr_rank_count = 1;
2621 int prev_rank = ranks[0];
2622
2623 for (size_t i = 1; i < count; ++i, ++curr_rank_count) {
2624 int curr_rank = ranks[i];
2625 if (prev_rank != curr_rank) {
2626 if (curr_rank_count > best_rank_count) {
2627 best_rank = prev_rank;
2628 best_rank_count = curr_rank_count;
2629 }
2630 prev_rank = curr_rank;
2631 curr_rank_count = 0;
2632 }
2633 }
2634
2635 return (curr_rank_count > best_rank_count)?prev_rank:best_rank;
2636}
2637
2639 struct interp_weight_stencil * stencils, size_t count,
2640 enum yac_interp_weight_stencil_type stencil_type) {
2641
2642 // compute total number of links
2643 size_t total_num_links = 0;
2644
2645 for (size_t i = 0; i < count; ++i) {
2646 YAC_ASSERT(
2647 stencils[i].type == stencil_type,
2648 "ERROR(generate_w_sum_mf_stencils): wrong stencil type")
2649 // due to the data layout this works for "sum" and "weight_sum"
2650 total_num_links += stencils[i].data.weight_sum.srcs->count;
2651 }
2652
2654 xmalloc(sizeof(*temp) + total_num_links * sizeof(temp->buffer[0]));
2655 struct interp_weight_stencils_wsum_mf * wsum_stencils =
2656 (struct interp_weight_stencils_wsum_mf *)temp;
2657 wsum_stencils->data = xmalloc(count * sizeof(*(wsum_stencils->data)));
2658 wsum_stencils->count = count;
2659
2660 // extract data from stencils
2661 for (size_t i = 0, k = 0; i < count; ++i) {
2662 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
2663 wsum_stencils->data + i;
2664 struct interp_weight_stencil_wsum_mf_weight * curr_links =
2665 &(temp->buffer[k]);
2666 size_t curr_stencil_size = stencils[i].data.weight_sum.srcs->count;
2667 struct remote_point * curr_srcs = stencils[i].data.weight_sum.srcs->data;
2668 curr_wsum_stencil->tgt = copy_remote_point(stencils[i].tgt);
2669 curr_wsum_stencil->count = curr_stencil_size;
2670 curr_wsum_stencil->data = curr_links;
2671 for (size_t j = 0; j < curr_stencil_size; ++j) {
2672 int curr_count = curr_srcs[j].data.count;
2673 YAC_ASSERT(
2674 curr_count >= 1,
2675 "ERROR(generate_w_sum_mf_stencils): global src id no found")
2676 curr_links[j].src =
2677 (curr_count == 1)?
2678 (curr_srcs[j].data.data.single):(curr_srcs[j].data.data.multi[0]);
2679 YAC_ASSERT(
2680 (stencil_type == SUM) || (stencil_type == WEIGHT_SUM) ||
2681 (stencil_type == SUM_MF) || (stencil_type == WEIGHT_SUM_MF),
2682 "ERROR(generate_w_sum_mf_stencils): unsupported stencil type")
2683 switch(stencil_type) {
2684 default:
2685 case(SUM):
2686 curr_links[j].weight = 1.0;
2687 curr_links[j].src_field_idx = 0;
2688 break;
2689 case(WEIGHT_SUM):
2690 curr_links[j].weight = stencils[i].data.weight_sum.weights[j];
2691 curr_links[j].src_field_idx = 0;
2692 break;
2693 case(SUM_MF):
2694 curr_links[j].weight = 1.0;
2695 curr_links[j].src_field_idx =
2696 stencils[i].data.sum_mf.field_indices[j];
2697 break;
2698 case(WEIGHT_SUM_MF):
2699 curr_links[j].weight = stencils[i].data.weight_sum_mf.weights[j];
2700 curr_links[j].src_field_idx =
2701 stencils[i].data.weight_sum_mf.field_indices[j];
2702 break;
2703 };
2704 }
2705 k += curr_stencil_size;
2706 }
2707
2708 return wsum_stencils;
2709}
2710
2711static MPI_Datatype get_wsum_mf_weight_mpi_datatype(MPI_Comm comm) {
2712
2714 MPI_Datatype dt;
2715 int array_of_blocklengths[] = {1, 1, 1, 1};
2716 const MPI_Aint array_of_displacements[] =
2717 {(MPI_Aint)(intptr_t)(const void *)&(dummy.src.rank) -
2718 (MPI_Aint)(intptr_t)(const void *)&dummy,
2719 (MPI_Aint)(intptr_t)(const void *)&(dummy.src.orig_pos) -
2720 (MPI_Aint)(intptr_t)(const void *)&dummy,
2721 (MPI_Aint)(intptr_t)(const void *)&(dummy.src_field_idx) -
2722 (MPI_Aint)(intptr_t)(const void *)&dummy,
2723 (MPI_Aint)(intptr_t)(const void *)&(dummy.weight) -
2724 (MPI_Aint)(intptr_t)(const void *)&dummy};
2725 const MPI_Datatype array_of_types[] =
2726 {MPI_INT, MPI_UINT64_T, MPI_UINT64_T, MPI_DOUBLE};
2728 MPI_Type_create_struct(4, array_of_blocklengths, array_of_displacements,
2729 array_of_types, &dt), comm);
2730 return yac_create_resized(dt, sizeof(dummy), comm);
2731}
2732
2734 struct interp_weight_stencil_wsum_mf * stencil,
2735 MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
2736
2737 int pack_size_count,
2738 pack_size_weights,
2739 pack_size_tgt;
2740
2741 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
2743 MPI_Pack_size(
2744 (int)(stencil->count), wsum_mf_weight_dt, comm, &pack_size_weights), comm);
2745 pack_size_tgt =
2746 yac_remote_point_get_pack_size(&(stencil->tgt), point_info_dt, comm);
2747
2748 return pack_size_count + pack_size_weights + pack_size_tgt;
2749}
2750
2752 struct interp_weight_stencil_wsum_mf * wsum_stencils, size_t count,
2753 size_t * pack_order, void ** pack_data, int * pack_sizes,
2754 int * weight_counts, MPI_Comm comm) {
2755
2756 MPI_Datatype wsum_mf_weight_dt = get_wsum_mf_weight_mpi_datatype(comm);
2757 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
2758
2759 // get the pack sizes and the upper bound for the pack buffer size
2760 size_t temp_total_pack_size = 0;
2761 for (size_t i = 0; i < count; ++i) {
2762 temp_total_pack_size +=
2763 (pack_sizes[i] =
2765 wsum_stencils + pack_order[i],
2766 wsum_mf_weight_dt, point_info_dt, comm));
2767 }
2768
2769 void * pack_data_ = xmalloc(temp_total_pack_size);
2770 size_t total_pack_size = 0;
2771
2772 // pack the stencils
2773 for (size_t i = 0; i < count; ++i) {
2774
2775 size_t idx = pack_order[i];
2776
2777 int position = 0;
2778 void * buffer = (void*)((unsigned char*)pack_data_ + total_pack_size);
2779 int buffer_size = pack_sizes[i];
2780 int curr_count = wsum_stencils[idx].count;
2781
2782 // tgt
2784 &(wsum_stencils[idx].tgt), buffer, buffer_size, &position,
2785 point_info_dt, comm);
2786 // weight count
2788 MPI_Pack(&curr_count, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2789 // weights
2791 MPI_Pack(wsum_stencils[idx].data, curr_count, wsum_mf_weight_dt,
2792 buffer, buffer_size, &position, comm), comm);
2793
2794 pack_sizes[i] = position;
2795 weight_counts[i] = curr_count;
2796 total_pack_size += (size_t)position;
2797 }
2798
2799 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
2800 yac_mpi_call(MPI_Type_free(&wsum_mf_weight_dt), comm);
2801
2802 *pack_data = xrealloc(pack_data_, total_pack_size);
2803}
2804
2806 struct interp_weight_stencil_wsum_mf * wsum_stencils,
2807 struct interp_weight_stencil_wsum_mf_weight * weight_buffer, size_t count,
2808 void * packed_data, size_t packed_data_size, MPI_Comm comm) {
2809
2810 MPI_Datatype wsum_mf_weight_dt = get_wsum_mf_weight_mpi_datatype(comm);
2811 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
2812
2813 size_t weight_offset = 0;
2814 for (size_t i = 0, offset = 0; i < count; ++i) {
2815
2816 int position = 0;
2817 void * curr_buffer = (void*)((char*)packed_data + offset);
2818 int buffer_size = (int)(packed_data_size - offset);
2819 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
2820 wsum_stencils + i;
2821
2822 struct remote_point tgt;
2823 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
2824 weight_buffer + weight_offset;
2825 int weight_count;
2827 curr_buffer, buffer_size, &position, &tgt, point_info_dt, comm);
2829 MPI_Unpack(curr_buffer, buffer_size, &position,
2830 &weight_count, 1, MPI_INT, comm),
2831 comm);
2833 MPI_Unpack(curr_buffer, buffer_size, &position,
2834 curr_weights, weight_count, wsum_mf_weight_dt, comm), comm);
2835
2836 curr_wsum_stencil->tgt = tgt;
2837 curr_wsum_stencil->data = curr_weights;
2838 curr_wsum_stencil->count = (size_t)weight_count;
2839
2840 weight_offset += (size_t)weight_count;
2841 offset += (size_t)position;
2842 }
2843
2844 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
2845 yac_mpi_call(MPI_Type_free(&wsum_mf_weight_dt), comm);
2846
2847 return weight_offset;
2848}
2849
2851 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data,
2852 int * stencil_owner, size_t * reorder_idx, size_t num_owners) {
2853
2854 struct interp_weight_stencil_wsum_mf * wsum_stencils =
2855 wsum_stencils_data->data;
2856
2857 int comm_rank, comm_size;
2858 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2859 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2860
2861 size_t local_weight_count = 0;
2862 size_t local_count = 0;
2863 for (size_t i = 0; i < num_owners; ++i) {
2864 if (stencil_owner[i] == comm_rank) {
2865 local_weight_count += wsum_stencils[reorder_idx[i]].count;
2866 stencil_owner[i] = INT_MAX;
2867 ++local_count;
2868 }
2869 }
2870 yac_quicksort_index_int_size_t(stencil_owner, num_owners, reorder_idx);
2871
2872 size_t send_count = num_owners - local_count;
2873
2874 // pack the stencils that need to be send to other processes
2875 void * send_buffer;
2876 int * pack_sizes = xmalloc(2 * send_count * sizeof(*pack_sizes));
2877 int * weight_counts = pack_sizes + send_count;
2878 pack_stencils_wsum_mf(wsum_stencils, send_count, reorder_idx, &send_buffer,
2879 pack_sizes, weight_counts, comm);
2880
2881 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2883 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2884
2885 for (size_t i = 0; i < send_count; ++i) {
2886 int curr_rank = stencil_owner[i];
2887 sendcounts[3 * curr_rank + 0]++;
2888 sendcounts[3 * curr_rank + 1] += (size_t)(pack_sizes[i]);
2889 sendcounts[3 * curr_rank + 2] += (size_t)(weight_counts[i]);
2890 }
2891 free(pack_sizes);
2892
2893 // exchange the number of stencils to be exchanged and the total pack sizes
2894 yac_mpi_call(MPI_Alltoall(sendcounts, 3, YAC_MPI_SIZE_T,
2895 recvcounts, 3, YAC_MPI_SIZE_T, comm), comm);
2896
2897 size_t recv_count = 0;
2898 size_t recv_size = 0;
2899 size_t recv_weight_count = 0;
2900 size_t saccu = 0, raccu = 0;
2901 for (int i = 0; i < comm_size; ++i) {
2902 sdispls[i] = saccu;
2903 rdispls[i] = raccu;
2904 recv_count += recvcounts[3 * i + 0];
2905 recv_size += recvcounts[3 * i + 1];
2906 recv_weight_count += recvcounts[3 * i + 2];
2907 saccu += sendcounts[3 * i + 1];
2908 raccu += recvcounts[3 * i + 1];
2909 sendcounts[i] = sendcounts[3 * i + 1];
2910 recvcounts[i] = recvcounts[3 * i + 1];
2911 }
2912
2913 void * recv_buffer = xmalloc(recv_size);
2914
2915 // exchange stencils
2916 yac_alltoallv_packed_p2p(
2917 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm);
2918 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2919 free(send_buffer);
2920
2922 xmalloc(sizeof(*temp) +
2923 (local_weight_count + recv_weight_count) * sizeof(temp->buffer[0]));
2924 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
2925 (struct interp_weight_stencils_wsum_mf *)temp;
2926 struct interp_weight_stencil_wsum_mf * new_wsum_stencils =
2927 ((new_wsum_stencils_data->data =
2928 xmalloc((local_count + recv_count) *
2929 sizeof(*(new_wsum_stencils_data->data)))));
2930 new_wsum_stencils_data->count = local_count + recv_count;
2931
2932 // unpack stencils
2933 size_t weight_offset =
2935 new_wsum_stencils, &(temp->buffer[0]), recv_count,
2936 recv_buffer, recv_size, comm);
2937 free(recv_buffer);
2938 new_wsum_stencils += recv_count;
2939 struct interp_weight_stencil_wsum_mf_weight * weight_buffer =
2940 &(temp->buffer[weight_offset]);
2941
2942 // copy the stencils that stay locally into the new stencil array
2943 yac_quicksort_index_size_t_size_t(reorder_idx + send_count, local_count, NULL);
2944 for (size_t i = 0, weight_offset = 0; i < local_count; ++i) {
2945 struct interp_weight_stencil_wsum_mf * curr_wsum_stencil =
2946 wsum_stencils + reorder_idx[i + send_count];
2947 struct interp_weight_stencil_wsum_mf * curr_new_wsum_stencil =
2948 new_wsum_stencils + i;
2949 struct interp_weight_stencil_wsum_mf_weight * curr_new_weights =
2950 weight_buffer + weight_offset;
2951 size_t curr_stencil_size = curr_wsum_stencil->count;
2952 curr_new_wsum_stencil->tgt = copy_remote_point(curr_wsum_stencil->tgt);
2953 curr_new_wsum_stencil->count = curr_stencil_size;
2954 curr_new_wsum_stencil->data = curr_new_weights;
2955 memcpy(curr_new_weights, curr_wsum_stencil->data,
2956 curr_stencil_size * sizeof(*curr_new_weights));
2957 weight_offset += curr_stencil_size;
2958 }
2959
2960 return new_wsum_stencils_data;
2961}
2962
2964 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data) {
2965
2966 struct interp_weight_stencil_wsum_mf * wsum_stencils =
2967 wsum_stencils_data->data;
2968 size_t count = wsum_stencils_data->count;
2969
2970 // determine maximum stencil size
2971 size_t max_stencil_size = 0;
2972 for (size_t i = 0; i < count; ++i) {
2973 size_t curr_stencil_size = wsum_stencils[i].count;
2974 if (curr_stencil_size > max_stencil_size)
2975 max_stencil_size = curr_stencil_size;
2976 }
2977
2978 // determine source process for each stencil
2979 int * rank_buffer =
2980 xmalloc((count + max_stencil_size) * sizeof(*rank_buffer));
2981 int * stencil_owner = rank_buffer;
2982 int * stencil_owners = rank_buffer + count;
2983 size_t * reorder_idx = xmalloc(count * sizeof(*reorder_idx));
2984 for (size_t i = 0; i < count; ++i) {
2985 size_t curr_stencil_size = wsum_stencils[i].count;
2986 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
2987 wsum_stencils[i].data;
2988 for (size_t j = 0; j < curr_stencil_size; ++j)
2989 stencil_owners[j] = curr_weights[j].src.rank;
2990 stencil_owner[i] = compute_owner(stencil_owners, curr_stencil_size);
2991 reorder_idx[i] = i;
2992 }
2993
2994 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
2996 comm, wsum_stencils_data, stencil_owner, reorder_idx, count);
2997
2998 free(reorder_idx);
2999 free(rank_buffer);
3000
3001 return new_wsum_stencils_data;
3002}
3003
3004static int compare_remote_point_info(const void * a, const void * b) {
3005
3006 int ret = ((struct remote_point_info *)a)->rank -
3007 ((struct remote_point_info *)b)->rank;
3008
3009 if (ret) return ret;
3010
3011 return (((struct remote_point_info *)a)->orig_pos >
3012 ((struct remote_point_info *)b)->orig_pos) -
3013 (((struct remote_point_info *)a)->orig_pos <
3014 ((struct remote_point_info *)b)->orig_pos);
3015}
3016
3018 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_stencils_data) {
3019
3020 struct interp_weight_stencil_wsum_mf * wsum_stencils =
3021 wsum_stencils_data->data;
3022 size_t count = wsum_stencils_data->count;
3023
3024 // determine total number of stencils to be sent to other processes
3025 // (a single stencil may be sent to multiple target processes)
3026 size_t total_owner_count = 0;
3027 for (size_t i = 0; i < count; ++i) {
3028 int stencil_size = wsum_stencils[i].tgt.data.count;
3029 if (stencil_size == 1) {
3030 total_owner_count++;
3031 } else {
3032 struct remote_point_info * tgt_point_infos =
3033 wsum_stencils[i].tgt.data.data.multi;
3034 qsort(
3035 tgt_point_infos, stencil_size, sizeof(*tgt_point_infos),
3037 int prev_rank = INT_MAX;
3038 for (int j = 0; j < stencil_size; ++j) {
3039 int curr_rank = tgt_point_infos[j].rank;
3040 if (curr_rank != prev_rank) {
3041 ++total_owner_count;
3042 prev_rank = curr_rank;
3043 }
3044 }
3045 }
3046 }
3047
3048 int * stencil_owner = xmalloc(total_owner_count * sizeof(*stencil_owner));
3049 size_t * reorder_idx = xmalloc(total_owner_count * sizeof(*reorder_idx));
3050 for (size_t i = 0, k = 0; i < count; ++i) {
3051 int stencil_size = wsum_stencils[i].tgt.data.count;
3052 if (stencil_size == 1) {
3053 stencil_owner[k] = wsum_stencils[i].tgt.data.data.single.rank;
3054 reorder_idx[k] = i;
3055 ++k;
3056 } else {
3057 struct remote_point_info * tgt_point_infos =
3058 wsum_stencils[i].tgt.data.data.multi;
3059 int prev_rank = INT_MAX;
3060 for (int j = 0; j < stencil_size; ++j) {
3061 int curr_rank = tgt_point_infos[j].rank;
3062 if (curr_rank != prev_rank) {
3063 stencil_owner[k] = tgt_point_infos[j].rank;
3064 reorder_idx[k] = i;
3065 ++k;
3066 prev_rank = curr_rank;
3067 }
3068 }
3069 }
3070 }
3071
3072 struct interp_weight_stencils_wsum_mf * new_wsum_stencils_data =
3074 comm, wsum_stencils_data, stencil_owner, reorder_idx, total_owner_count);
3075
3076 wsum_stencils = new_wsum_stencils_data->data;
3077 count = new_wsum_stencils_data->count;
3078
3079 free(reorder_idx);
3080 free(stencil_owner);
3081
3082 if (count == 0) return new_wsum_stencils_data;
3083
3084 int comm_rank;
3085 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
3086
3087 // count total number of local target locations
3088 size_t total_num_tgt_pos = 0;
3089 for (size_t i = 0; i < count; ++i) {
3090 size_t curr_count = wsum_stencils[i].tgt.data.count;
3091 if (curr_count == 1) {
3092 ++total_num_tgt_pos;
3093 } else {
3094 struct remote_point_info * curr_point_infos =
3095 wsum_stencils[i].tgt.data.data.multi;
3096 for (size_t j = 0; j < curr_count; ++j)
3097 if (curr_point_infos[j].rank == comm_rank)
3098 ++total_num_tgt_pos;
3099 }
3100 }
3101
3102 if (total_num_tgt_pos != count) {
3103 new_wsum_stencils_data->data =
3104 ((wsum_stencils =
3105 xrealloc(wsum_stencils, total_num_tgt_pos * sizeof(*wsum_stencils))));
3106 new_wsum_stencils_data->count = total_num_tgt_pos;
3107 }
3108
3109 // remove all non local target point information
3110 for (size_t i = 0, offset = count; i < count; ++i) {
3111 size_t curr_count = wsum_stencils[i].tgt.data.count;
3112 if (curr_count > 1) {
3113 struct remote_point_info * curr_point_infos =
3114 wsum_stencils[i].tgt.data.data.multi;
3115 // find first local target point
3116 size_t j;
3117 for (j = 0; j < curr_count; ++j) {
3118 if (curr_point_infos[j].rank == comm_rank) {
3119 wsum_stencils[i].tgt.data.count = 1;
3120 wsum_stencils[i].tgt.data.data.single.rank = comm_rank;
3121 wsum_stencils[i].tgt.data.data.single.orig_pos =
3122 curr_point_infos[j].orig_pos;
3123 break;
3124 }
3125 }
3126 // make a copy for the remaining local target positions
3127 for (j = j + 1; j < curr_count; ++j) {
3128 if (curr_point_infos[j].rank == comm_rank) {
3129 wsum_stencils[offset] = wsum_stencils[i];
3130 wsum_stencils[offset].tgt.data.data.single.orig_pos =
3131 curr_point_infos[j].orig_pos;
3132 ++offset;
3133 }
3134 }
3135 free(curr_point_infos);
3136 }
3137 }
3138
3139 return new_wsum_stencils_data;
3140}
3141
3142static Xt_redist * generate_halo_redists(
3143 struct remote_point_info_reorder * halo_points, size_t count,
3144 size_t num_src_fields, MPI_Comm comm, Xt_config redist_config) {
3145
3146 int comm_size;
3147 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3148
3149 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3151 num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3152 size_t * size_t_buffer =
3153 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
3154 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
3155 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
3156 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
3157 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
3158
3159 for (size_t i = 0; i < count; ++i)
3160 sendcounts[halo_points[i].data.rank * num_src_fields +
3161 halo_points[i].field_idx]++;
3162
3164 num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
3165
3166 size_t saccu = 0, raccu = 0;
3167 for (int i = 0; i < comm_size; ++i) {
3168 total_sdispls[i] = saccu;
3169 total_rdispls[i] = raccu;
3170 total_sendcounts[i] = 0;
3171 total_recvcounts[i] = 0;
3172 for (size_t j = 0; j < num_src_fields; ++j) {
3173 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
3174 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
3175 }
3176 saccu += total_sendcounts[i];
3177 raccu += total_recvcounts[i];
3178 }
3179
3180 size_t recv_count = total_recvcounts[comm_size - 1] +
3181 total_rdispls[comm_size - 1];
3182
3183 int * exchange_buffer =
3184 xmalloc((2 * count + recv_count) * sizeof(*exchange_buffer));
3185 int * send_buffer = exchange_buffer;
3186 int * reorder_idx = exchange_buffer + count;
3187 int * recv_buffer = exchange_buffer + 2 * count;
3188
3189 // pack the original positions of the requested points
3190 size_t num_halo_per_src_field[num_src_fields];
3191 memset(
3192 num_halo_per_src_field, 0,
3193 num_src_fields * sizeof(num_halo_per_src_field[0]));
3194 for (size_t i = 0; i < count; ++i) {
3195 size_t curr_src_field_idx = (size_t)(halo_points[i].field_idx);
3196 size_t pos = sdispls[(size_t)(halo_points[i].data.rank) * num_src_fields +
3197 curr_src_field_idx + 1]++;
3198 uint64_t orig_pos = halo_points[i].data.orig_pos;
3199 YAC_ASSERT(
3200 orig_pos <= INT_MAX,
3201 "ERROR(generate_halo_redists): offset not supported by MPI")
3202 send_buffer[pos] = (int)orig_pos;
3203 reorder_idx[pos] = num_halo_per_src_field[curr_src_field_idx]++;
3204 }
3205
3206 // exchange original positions of the requested points
3207 yac_alltoallv_int_p2p(
3208 send_buffer, total_sendcounts, total_sdispls,
3209 recv_buffer, total_recvcounts, total_rdispls, comm);
3210
3211 free(size_t_buffer);
3212
3213 size_t nsend = 0, nsends[num_src_fields];
3214 size_t nrecv = 0, nrecvs[num_src_fields];
3215 memset(nsends, 0, num_src_fields * sizeof(nsends[0]));
3216 memset(nrecvs, 0, num_src_fields * sizeof(nrecvs[0]));
3217 for (int i = 0; i < comm_size; ++i) {
3218 for (size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3219 if (sendcounts[i * num_src_fields + field_idx] > 0) {
3220 nrecv++;
3221 nrecvs[field_idx]++;
3222 }
3223 if (recvcounts[i * num_src_fields + field_idx] > 0) {
3224 nsend++;
3225 nsends[field_idx]++;
3226 }
3227 }
3228 }
3229
3230 size_t total_num_msg = nsend + nrecv;
3231
3232 struct Xt_redist_msg * msgs_buffer =
3233 xmalloc(total_num_msg * sizeof(*msgs_buffer));
3234 struct Xt_redist_msg * send_msgs = msgs_buffer;
3235 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
3236
3237 for (size_t field_idx = 0, nsend = 0, nrecv = 0;
3238 field_idx < num_src_fields; ++field_idx) {
3239 for (int rank = 0; rank < comm_size; ++rank) {
3240 size_t idx = (size_t)rank * num_src_fields + field_idx;
3241 if (sendcounts[idx] > 0) {
3242 recv_msgs[nrecv].rank = rank;
3243 recv_msgs[nrecv].datatype =
3244 xt_mpi_generate_datatype(
3245 reorder_idx + sdispls[idx], sendcounts[idx], MPI_DOUBLE, comm);
3246 nrecv++;
3247 }
3248 if (recvcounts[idx] > 0) {
3249 send_msgs[nsend].rank = rank;
3250 send_msgs[nsend].datatype =
3251 xt_mpi_generate_datatype(
3252 recv_buffer + rdispls[idx], recvcounts[idx], MPI_DOUBLE, comm);
3253 nsend++;
3254 }
3255 }
3256 }
3257
3258 Xt_redist * redist;
3259 MPI_Comm halo_comm;
3260
3261 if (total_num_msg > 0) {
3262
3263 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &halo_comm), comm);
3264
3265 int * rank_buffer = xmalloc(2 * total_num_msg * sizeof(*rank_buffer));
3266 int * orig_ranks = rank_buffer;
3267 int * split_ranks = rank_buffer + total_num_msg;
3268
3269 for (size_t i = 0; i < total_num_msg; ++i)
3270 orig_ranks[i] = msgs_buffer[i].rank;
3271
3272 MPI_Group orig_group, split_group;
3273 yac_mpi_call(MPI_Comm_group(comm, &orig_group), comm);
3274 yac_mpi_call(MPI_Comm_group(halo_comm, &split_group), comm);
3275
3277 MPI_Group_translate_ranks(orig_group, (int)total_num_msg, orig_ranks,
3278 split_group, split_ranks), halo_comm);
3279
3280 for (size_t i = 0; i < total_num_msg; ++i)
3281 msgs_buffer[i].rank = split_ranks[i];
3282
3283 free(rank_buffer);
3284
3285 yac_mpi_call(MPI_Group_free(&split_group), comm);
3286 yac_mpi_call(MPI_Group_free(&orig_group), comm);
3287
3288 // generate redist
3289 redist = xmalloc(num_src_fields * sizeof(*redist));
3290 if (num_src_fields == 1) {
3291 *redist =
3292 xt_redist_single_array_base_custom_new(
3293 nsend, nrecv, send_msgs, recv_msgs, halo_comm,
3294 redist_config);
3295 } else {
3296 for (size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3297 redist[field_idx] =
3298 xt_redist_single_array_base_custom_new(
3299 nsends[field_idx], nrecvs[field_idx],
3300 send_msgs, recv_msgs, halo_comm,
3301 redist_config);
3302 send_msgs += nsends[field_idx];
3303 recv_msgs += nrecvs[field_idx];
3304 }
3305 }
3306
3307 } else {
3308 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &halo_comm), comm);
3309 redist = NULL;
3310 }
3311
3312 yac_mpi_call(MPI_Comm_free(&halo_comm), comm);
3313 free(exchange_buffer);
3314 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
3315
3316 xt_redist_msg_free(msgs_buffer, total_num_msg, comm);
3317
3318 return redist;
3319}
3320
3321static int compare_rank_pos_reorder_field_idx(const void * a, const void * b) {
3322
3323 int ret = (((struct remote_point_info_reorder *)a)->field_idx >
3324 ((struct remote_point_info_reorder *)b)->field_idx) -
3325 (((struct remote_point_info_reorder *)a)->field_idx <
3326 ((struct remote_point_info_reorder *)b)->field_idx);
3327
3328 if (ret) return ret;
3329
3330 ret = ((struct remote_point_info_reorder *)a)->data.rank -
3331 ((struct remote_point_info_reorder *)b)->data.rank;
3332
3333 if (ret) return ret;
3334
3335 return (((struct remote_point_info_reorder *)a)->data.orig_pos >
3336 ((struct remote_point_info_reorder *)b)->data.orig_pos) -
3337 (((struct remote_point_info_reorder *)a)->data.orig_pos <
3338 ((struct remote_point_info_reorder *)b)->data.orig_pos);
3339}
3340
3342 const void * a, const void * b) {
3343
3344 struct interp_weight_stencil_wsum_mf * a_ =
3346 struct interp_weight_stencil_wsum_mf * b_ =
3348
3349 size_t count = MIN(a_->count, b_->count);
3350
3351 for (size_t i = 0; i < count; ++i) {
3352 int ret = (a_->data[i].src_field_idx > b_->data[i].src_field_idx) -
3353 (a_->data[i].src_field_idx < b_->data[i].src_field_idx);
3354 if (ret) return ret;
3355 ret = (a_->data[i].src.orig_pos > b_->data[i].src.orig_pos) -
3356 (a_->data[i].src.orig_pos < b_->data[i].src.orig_pos);
3357 if (ret) return ret;
3358 }
3359 return 0;
3360}
3361
3363 const void * a, const void * b) {
3364
3365 struct interp_weight_stencil_wsum_mf * a_ =
3367 struct interp_weight_stencil_wsum_mf * b_ =
3369
3370 YAC_ASSERT(
3371 (a_->tgt.data.count == 1) && (b_->tgt.data.count == 1),
3372 "ERROR(compare_interp_weight_stencil_wsum_tgt_orig_pos): invalid data")
3373
3374 size_t a_orig_pos = a_->tgt.data.data.single.orig_pos;
3375 size_t b_orig_pos = b_->tgt.data.data.single.orig_pos;
3376
3377 return (a_orig_pos > b_orig_pos) - (a_orig_pos < b_orig_pos);
3378}
3379
3380static void free_remote_point(struct remote_point point) {
3381
3382 if (point.data.count > 1) free(point.data.data.multi);
3383}
3384
3386 struct remote_point_infos * point_infos, size_t count, MPI_Comm comm,
3387 Xt_config redist_config) {
3388
3389 int comm_size;
3390 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3391
3392 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
3394 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3395
3396 for (size_t i = 0; i < count; ++i) {
3397 int curr_count = point_infos[i].count;
3398 struct remote_point_info * curr_point_infos =
3399 (curr_count == 1)?
3400 (&(point_infos[i].data.single)):(point_infos[i].data.multi);
3401 YAC_ASSERT(
3402 curr_count >= 1,
3403 "ERROR(generate_redist_put_double): no owner found for global id")
3404 for (int j = 0; j < curr_count; ++j)
3405 sendcounts[curr_point_infos[j].rank]++;
3406 }
3407
3409 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3410
3411 size_t send_count =
3412 sdispls[comm_size] + sendcounts[comm_size - 1];
3413 size_t recv_count =
3414 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
3415
3416 int * exchange_buffer =
3417 xmalloc((2 * send_count + recv_count) * sizeof(*exchange_buffer));
3418 int * send_buffer = exchange_buffer;
3419 int * reorder_idx = exchange_buffer + send_count;
3420 int * recv_buffer = exchange_buffer + 2 * send_count;
3421
3422 // pack the original positions of the points that have to updated
3423 for (size_t i = 0; i < count; ++i) {
3424 int curr_count = point_infos[i].count;
3425 struct remote_point_info * curr_point_infos =
3426 (curr_count == 1)?
3427 (&(point_infos[i].data.single)):(point_infos[i].data.multi);
3428 for (int j = 0; j < curr_count; ++j) {
3429 size_t pos = sdispls[curr_point_infos[j].rank + 1]++;
3430 uint64_t orig_pos = curr_point_infos[j].orig_pos;
3431 YAC_ASSERT(
3432 orig_pos <= INT_MAX,
3433 "ERROR(generate_redist_put_double): offset not supported by MPI")
3434 send_buffer[pos] = (int)orig_pos;
3435 reorder_idx[pos] = i;
3436 }
3437 }
3438
3439 // exchange original positions of the points that have to updated
3440 yac_alltoallv_int_p2p(
3441 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm);
3442
3443 size_t nsend = 0;
3444 size_t nrecv = 0;
3445 for (int i = 0; i < comm_size; ++i) {
3446 if (sendcounts[i] > 0) nsend++;
3447 if (recvcounts[i] > 0) nrecv++;
3448 }
3449
3450 struct Xt_redist_msg * send_msgs = xmalloc(nsend * sizeof(*send_msgs));
3451 struct Xt_redist_msg * recv_msgs = xmalloc(nrecv * sizeof(*send_msgs));
3452
3453 for (int i = 0, nsend = 0, nrecv = 0; i < comm_size; ++i) {
3454 if (sendcounts[i] > 0) {
3455 send_msgs[nsend].rank = i;
3456 send_msgs[nsend].datatype =
3457 xt_mpi_generate_datatype(
3458 reorder_idx + sdispls[i], sendcounts[i], MPI_DOUBLE, comm);
3459 nsend++;
3460 }
3461 if (recvcounts[i] > 0) {
3462 recv_msgs[nrecv].rank = i;
3463 recv_msgs[nrecv].datatype =
3464 xt_mpi_generate_datatype(
3465 recv_buffer + rdispls[i], recvcounts[i], MPI_DOUBLE, comm);
3466 nrecv++;
3467 }
3468 }
3469
3470 // generate redist
3471 Xt_redist redist =
3472 xt_redist_single_array_base_custom_new(
3473 nsend, nrecv, send_msgs, recv_msgs, comm, redist_config);
3474
3475 free(exchange_buffer);
3476 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
3477
3478 xt_redist_msg_free(recv_msgs, nrecv, comm);
3479 xt_redist_msg_free(send_msgs, nsend, comm);
3480
3481 return redist;
3482}
3483
3485 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_mf_stencils_data,
3486 struct yac_interpolation * interp,
3488 enum yac_interp_weight_stencil_type stencil_type, Xt_config redist_config) {
3489
3490 int comm_rank;
3491 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
3492
3493 // redistribute stencils to respective owners
3494 struct interp_weight_stencils_wsum_mf * (*redist_wsum_mf_stencils)(
3495 MPI_Comm comm, struct interp_weight_stencils_wsum_mf * wsum_mf_stencils_data);
3496 YAC_ASSERT(
3497 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3498 "ERROR(yac_interp_weights_redist_w_sum_mf): invalid reorder type")
3500 (reorder == YAC_MAPPING_ON_SRC)?
3502 struct interp_weight_stencils_wsum_mf * new_wsum_mf_stencils_data =
3503 redist_wsum_mf_stencils(comm, wsum_mf_stencils_data);
3504
3505 size_t wsum_mf_count = new_wsum_mf_stencils_data->count;
3506 struct interp_weight_stencil_wsum_mf * wsum_mf_stencils =
3507 new_wsum_mf_stencils_data->data;
3508
3509 // compute the total number of links
3510 size_t total_num_links = 0, total_num_remote_weights = 0;
3511 for (size_t i = 0; i < wsum_mf_count; ++i) {
3512 size_t curr_stencil_size = wsum_mf_stencils[i].count;
3513 total_num_links += curr_stencil_size;
3514 for (size_t j = 0; j < curr_stencil_size; ++j)
3515 if (wsum_mf_stencils[i].data[j].src.rank != comm_rank)
3516 ++total_num_remote_weights;
3517 }
3518
3519 // gather all remote source points and determine number of source fields
3520 struct remote_point_info_reorder * remote_src_points =
3521 xmalloc(total_num_remote_weights * sizeof(*remote_src_points));
3522 size_t num_src_fields = 0;
3523 for (size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
3524 size_t curr_stencil_size = wsum_mf_stencils[i].count;
3525 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
3526 wsum_mf_stencils[i].data;
3527 for (size_t j = 0; j < curr_stencil_size; ++j) {
3528 size_t curr_src_field_idx = curr_weights[j].src_field_idx;
3529 if (curr_src_field_idx >= num_src_fields)
3530 num_src_fields = curr_src_field_idx + 1;
3531 if (curr_weights[j].src.rank != comm_rank) {
3532 remote_src_points[k].data = curr_weights[j].src;
3533 remote_src_points[k].field_idx = curr_src_field_idx;
3534 remote_src_points[k].reorder_idx = i;
3535 ++k;
3536 }
3537 }
3538 }
3540 MPI_Allreduce(
3541 MPI_IN_PLACE, &num_src_fields, 1, YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
3542
3543 // sort remote points first by field_idx, second by rank, and
3544 // then by orig_pos
3545 qsort(remote_src_points, total_num_remote_weights, sizeof(*remote_src_points),
3547
3548 // update stencils: set owner to -1; set orig_pos to position of respecitve
3549 // point in halo data
3550 // remove duplicated remote points
3551 struct remote_point_info * prev_remote_src_point;
3552 size_t prev_field_idx;
3553 size_t halo_size;
3554 if (total_num_remote_weights > 0) {
3555 prev_remote_src_point = &(remote_src_points[0].data);
3556 prev_field_idx = remote_src_points[0].field_idx;
3557 halo_size = 1;
3558 } else {
3559 prev_field_idx = SIZE_MAX;
3560 halo_size = 0;
3561 }
3562 for (size_t i = 0; i < total_num_remote_weights; ++i) {
3563 struct remote_point_info * curr_remote_src_point =
3564 &(remote_src_points[i].data);
3565 size_t curr_field_idx = remote_src_points[i].field_idx;
3567 prev_remote_src_point, curr_remote_src_point) ||
3568 (prev_field_idx != curr_field_idx)) {
3569 prev_remote_src_point = curr_remote_src_point;
3570 prev_field_idx = curr_field_idx;
3571 remote_src_points[halo_size].data = *curr_remote_src_point;
3572 remote_src_points[halo_size].field_idx = curr_field_idx;
3573 ++halo_size;
3574 }
3575 struct interp_weight_stencil_wsum_mf * curr_stencil =
3576 wsum_mf_stencils + remote_src_points[i].reorder_idx;
3577 size_t curr_stencil_size = curr_stencil->count;
3578 for (size_t j = 0; j < curr_stencil_size; ++j) {
3580 &(curr_stencil->data[j].src), curr_remote_src_point)) &&
3581 (curr_stencil->data[j].src_field_idx == curr_field_idx)) {
3582 curr_stencil->data[j].src.rank = -1;
3583 curr_stencil->data[j].src.orig_pos = halo_size - 1;
3584 curr_stencil->data[j].src_field_idx = SIZE_MAX;
3585 }
3586 }
3587 }
3588
3589 // sort stencils by their memory access pattern on the local process
3590 qsort(wsum_mf_stencils, wsum_mf_count, sizeof(*wsum_mf_stencils),
3591 (reorder == YAC_MAPPING_ON_SRC)?
3594
3595 size_t * num_src_per_tgt = xmalloc(wsum_mf_count * sizeof(*num_src_per_tgt));
3596 double * weights = xmalloc(total_num_links * sizeof(*weights));
3597 size_t * src_idx = xmalloc(total_num_links * sizeof(*src_idx));
3598 size_t * src_field_idx = xmalloc(total_num_links * sizeof(*src_field_idx));
3599
3600 // extract data from stencil
3601 for (size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
3602 size_t curr_stencil_size = wsum_mf_stencils[i].count;
3603 struct interp_weight_stencil_wsum_mf_weight * curr_weights =
3604 wsum_mf_stencils[i].data;
3605 num_src_per_tgt[i] = curr_stencil_size;
3606 for (size_t j = 0; j < curr_stencil_size; ++j, ++k){
3607 weights[k] = curr_weights[j].weight;
3608 src_idx[k] = curr_weights[j].src.orig_pos;
3609 src_field_idx[k] = curr_weights[j].src_field_idx;
3610 }
3611 }
3612
3613 // generate halo redists (one per source field
3614 Xt_redist * halo_redists =
3616 remote_src_points, halo_size, num_src_fields, comm, redist_config);
3617
3618 YAC_ASSERT(
3619 (stencil_type == WEIGHT_SUM) ||
3620 (stencil_type == SUM) ||
3621 (stencil_type == WEIGHT_SUM_MF) ||
3622 (stencil_type == SUM_MF),
3623 "ERROR(yac_interp_weights_redist_w_sum_mf): unsupported stencil type");
3624
3625 // add weights to interpolation
3626 if (reorder == YAC_MAPPING_ON_SRC) {
3627
3628 // generate result redist
3629 struct remote_point_infos * tgt_infos =
3630 xmalloc(wsum_mf_count * sizeof(*tgt_infos));
3631 for (size_t i = 0; i < wsum_mf_count; ++i)
3632 tgt_infos[i] = wsum_mf_stencils[i].tgt.data;
3633 Xt_redist result_redist =
3635 tgt_infos, wsum_mf_count, comm, redist_config);
3636 free(tgt_infos);
3637
3638 switch(stencil_type) {
3639 default:
3640 case (WEIGHT_SUM):
3641 case (WEIGHT_SUM_MF):
3643 interp, halo_redists, wsum_mf_count, num_src_per_tgt, weights,
3644 src_field_idx, src_idx, ((stencil_type==WEIGHT_SUM)?1:num_src_fields),
3645 result_redist);
3646 break;
3647 case (SUM):
3648 case (SUM_MF):
3650 interp, halo_redists, wsum_mf_count, num_src_per_tgt,
3651 src_field_idx, src_idx, ((stencil_type == SUM)?1:num_src_fields),
3652 result_redist);
3653 break;
3654 }
3655
3656 if (result_redist != NULL) xt_redist_delete(result_redist);
3657
3658 } else {
3659
3660 size_t * tgt_orig_pos = xmalloc(wsum_mf_count * sizeof(*tgt_orig_pos));
3661 for (size_t i = 0; i < wsum_mf_count; ++i) {
3662 YAC_ASSERT(
3663 wsum_mf_stencils[i].tgt.data.count == 1,
3664 "ERROR(yac_interp_weights_redist_w_sum): currently unsupported target "
3665 "point distribution")
3666 tgt_orig_pos[i] =
3667 (size_t)(wsum_mf_stencils[i].tgt.data.data.single.orig_pos);
3668 }
3669
3670 switch(stencil_type) {
3671 default:
3672 case (WEIGHT_SUM):
3673 case (WEIGHT_SUM_MF):
3675 interp, halo_redists, tgt_orig_pos, wsum_mf_count,
3676 num_src_per_tgt, weights, src_field_idx, src_idx,
3677 ((stencil_type == WEIGHT_SUM)?1:num_src_fields));
3678 break;
3679 case (SUM):
3680 case (SUM_MF):
3682 interp, halo_redists, tgt_orig_pos, wsum_mf_count,
3683 num_src_per_tgt, src_field_idx, src_idx,
3684 ((stencil_type == SUM)?1:num_src_fields));
3685 break;
3686 }
3687 free(tgt_orig_pos);
3688 }
3689
3690 for (size_t i = 0; i < new_wsum_mf_stencils_data->count; ++i)
3691 free_remote_point(new_wsum_mf_stencils_data->data[i].tgt);
3692 free(new_wsum_mf_stencils_data->data);
3693 free(new_wsum_mf_stencils_data);
3694
3695 free(remote_src_points);
3696 free(src_field_idx);
3697 free(src_idx);
3698 free(weights);
3699 free(num_src_per_tgt);
3700 if (halo_redists != NULL) {
3701 for (size_t i = 0; i < num_src_fields; ++i)
3702 xt_redist_delete(halo_redists[i]);
3703 free(halo_redists);
3704 }
3705}
3706
3707static int compare_stencils(const void * a, const void * b) {
3708
3709 return (int)(((struct interp_weight_stencil *)a)->type) -
3710 (int)(((struct interp_weight_stencil *)b)->type);
3711}
3712
3713static Xt_config get_redist_config(
3714 char const * yaxt_exchanger_name, MPI_Comm comm) {
3715
3716 Xt_config redist_config = xt_config_new();
3717
3718 // if no exchanger has been defined yet -> check environment
3719 char * env_exchanger_name = NULL;
3720 if (yaxt_exchanger_name == NULL) {
3721
3722 int rank;
3723 yac_mpi_call(MPI_Comm_rank(comm, &rank), comm);
3724
3725 // environment is only checked on rank 0, results are broadcasted
3726 // to other processes
3727 size_t exchanger_name_len = 0;
3728 if (rank == 0) {
3729
3730 // check if the user provided an exchanger name in the environment
3731 env_exchanger_name = getenv(YAC_YAXT_EXCHANGER_STR);
3732 exchanger_name_len =
3733 ((env_exchanger_name != NULL) && (env_exchanger_name[0] != '\0'))?
3734 strlen(env_exchanger_name):0;
3735 }
3736
3737 // broadcast the length of the exchanger name provided by the user
3738 // through the environment
3740 MPI_Bcast(&exchanger_name_len, 1, YAC_MPI_SIZE_T, 0, comm), comm);
3741
3742 if (exchanger_name_len > 0) {
3743
3744 if (rank == 0)
3745 env_exchanger_name = strdup(env_exchanger_name);
3746 else
3747 env_exchanger_name =
3748 xmalloc((exchanger_name_len + 1) * sizeof(*env_exchanger_name));
3749
3750 // broadcast name of the exchanger
3752 MPI_Bcast(
3753 env_exchanger_name, (int)(exchanger_name_len + 1), MPI_CHAR, 0, comm),
3754 comm);
3755
3756 yaxt_exchanger_name = env_exchanger_name;
3757 }
3758 }
3759
3760 if (yaxt_exchanger_name != NULL) {
3761
3762 // set exchanger
3763 int exchanger_id = xt_exchanger_id_by_name(yaxt_exchanger_name);
3765 exchanger_id >= 0,
3766 "ERROR(get_redist_config): invalid yaxt exchanger name \"%s\"",
3767 yaxt_exchanger_name);
3768 xt_config_set_exchange_method(redist_config, exchanger_id);
3769 }
3770
3771 free(env_exchanger_name);
3772
3773 return redist_config;
3774}
3775
3777 struct yac_interp_weights * weights,
3780 double scaling_factor, double scaling_summand,
3781 char const * yaxt_exchanger_name) {
3782
3783 MPI_Comm comm = weights->comm;
3784
3785 Xt_config redist_config = get_redist_config(yaxt_exchanger_name, comm);
3786
3787 // sort stencils by type
3788 qsort(weights->stencils, weights->stencils_size, sizeof(*(weights->stencils)),
3790
3791 uint64_t local_stencil_counts[WEIGHT_STENCIL_TYPE_SIZE];
3792 size_t stencils_offsets[WEIGHT_STENCIL_TYPE_SIZE];
3793
3794 // count local number of stencils per type
3795 memset(&(local_stencil_counts[0]), 0, sizeof(local_stencil_counts));
3796 for (size_t i = 0; i < weights->stencils_size; ++i)
3797 local_stencil_counts[(int)(weights->stencils[i].type)]++;
3798
3799 for (size_t i = 0, accu = 0; i < (size_t)WEIGHT_STENCIL_TYPE_SIZE; ++i) {
3800 stencils_offsets[i] = accu;
3801 accu += local_stencil_counts[i];
3802 }
3803
3804 uint64_t global_stencil_counts[WEIGHT_STENCIL_TYPE_SIZE];
3805
3806 // determine global number of stencils per type
3808 MPI_Allreduce(
3809 local_stencil_counts, global_stencil_counts,
3810 (int)WEIGHT_STENCIL_TYPE_SIZE, MPI_UINT64_T, MPI_SUM, comm), comm);
3811
3812 { // check whether the collection_size is consistant across all processes
3813 uint64_t max_collection_size = (uint64_t)collection_size;
3815 MPI_Allreduce(
3816 MPI_IN_PLACE, &max_collection_size, 1, MPI_UINT64_T, MPI_MAX, comm),
3817 comm);
3818 YAC_ASSERT(
3819 (size_t)max_collection_size == collection_size,
3820 "ERROR(yac_interp_weights_get_interpolation): "
3821 "mismatching collection sizes")
3822 }
3823
3824 struct yac_interpolation * interp =
3827 scaling_factor, scaling_summand);
3828
3829 if (global_stencil_counts[FIXED] > 0)
3831 weights->comm, local_stencil_counts[FIXED],
3832 weights->stencils + stencils_offsets[FIXED], interp);
3833
3834 if (global_stencil_counts[DIRECT] > 0)
3836 weights->comm, local_stencil_counts[DIRECT],
3837 weights->stencils + stencils_offsets[DIRECT], interp,
3838 redist_config);
3839
3840 if (global_stencil_counts[SUM] > 0) {
3841
3842 struct interp_weight_stencils_wsum_mf * wsum_stencils =
3844 weights->stencils + stencils_offsets[SUM],
3845 (size_t)(local_stencil_counts[SUM]), SUM);
3846 YAC_ASSERT(
3847 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3848 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3850 weights->comm, wsum_stencils, interp, reorder, SUM,
3851 redist_config);
3852 for (size_t i = 0; i < wsum_stencils->count; ++i)
3853 free_remote_point(wsum_stencils->data[i].tgt);
3854 free(wsum_stencils->data);
3855 free(wsum_stencils);
3856 }
3857
3858 if (global_stencil_counts[WEIGHT_SUM] > 0) {
3859
3860 struct interp_weight_stencils_wsum_mf * wsum_stencils =
3862 weights->stencils + stencils_offsets[WEIGHT_SUM],
3863 (size_t)(local_stencil_counts[WEIGHT_SUM]), WEIGHT_SUM);
3864 YAC_ASSERT(
3865 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3866 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3868 weights->comm, wsum_stencils, interp, reorder, WEIGHT_SUM,
3869 redist_config);
3870 for (size_t i = 0; i < wsum_stencils->count; ++i)
3871 free_remote_point(wsum_stencils->data[i].tgt);
3872 free(wsum_stencils->data);
3873 free(wsum_stencils);
3874 }
3875
3876 if (global_stencil_counts[DIRECT_MF] > 0)
3878 weights->comm, local_stencil_counts[DIRECT_MF],
3879 weights->stencils + stencils_offsets[DIRECT_MF], interp,
3880 redist_config);
3881
3882 if (global_stencil_counts[SUM_MF] > 0) {
3883
3884 struct interp_weight_stencils_wsum_mf * sum_mf_stencils =
3886 weights->stencils + stencils_offsets[SUM_MF],
3887 (size_t)(local_stencil_counts[SUM_MF]), SUM_MF);
3888 YAC_ASSERT(
3889 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3890 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3892 weights->comm, sum_mf_stencils, interp, reorder, SUM_MF,
3893 redist_config);
3894 for (size_t i = 0; i < sum_mf_stencils->count; ++i)
3895 free_remote_point(sum_mf_stencils->data[i].tgt);
3896 free(sum_mf_stencils->data);
3897 free(sum_mf_stencils);
3898 }
3899
3900 if (global_stencil_counts[WEIGHT_SUM_MF] > 0) {
3901
3902 struct interp_weight_stencils_wsum_mf * wsum_mf_stencils =
3904 weights->stencils + stencils_offsets[WEIGHT_SUM_MF],
3905 (size_t)(local_stencil_counts[WEIGHT_SUM_MF]), WEIGHT_SUM_MF);
3906 YAC_ASSERT(
3907 (reorder == YAC_MAPPING_ON_SRC) || (reorder == YAC_MAPPING_ON_TGT),
3908 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3910 weights->comm, wsum_mf_stencils, interp, reorder, WEIGHT_SUM_MF,
3911 redist_config);
3912 for (size_t i = 0; i < wsum_mf_stencils->count; ++i)
3913 free_remote_point(wsum_mf_stencils->data[i].tgt);
3914 free(wsum_mf_stencils->data);
3915 free(wsum_mf_stencils);
3916 }
3917
3918 xt_config_delete(redist_config);
3919
3920 return interp;
3921}
3922
3924 struct yac_interp_weights * weights, int reorder,
3926 double scaling_factor, double scaling_summand,
3927 char const * yaxt_exchanger_name) {
3928
3929 YAC_ASSERT(
3930 (reorder == YAC_MAPPING_ON_SRC) ||
3931 (reorder == YAC_MAPPING_ON_TGT),
3932 "ERROR(yac_interp_weights_get_interpolation_f2c): "
3933 "reorder type must be of YAC_MAPPING_ON_SRC/YAC_MAPPING_ON_TGT");
3934
3935 return
3937 weights, (enum yac_interp_weights_reorder_type)reorder,
3939 scaling_factor, scaling_summand,
3940 ((yaxt_exchanger_name != NULL) && (yaxt_exchanger_name[0] != '\0'))?
3941 yaxt_exchanger_name:NULL);
3942}
3943
3945
3946 free(points->data);
3947 free(points);
3948}
3949
3951 struct interp_weight_stencil * stencils, size_t count) {
3952
3953 for (size_t i = 0 ; i < count; ++i) {
3954
3955 YAC_ASSERT(
3956 (stencils[i].type == DIRECT) ||
3957 (stencils[i].type == SUM) ||
3958 (stencils[i].type == WEIGHT_SUM) ||
3959 (stencils[i].type == DIRECT_MF) ||
3960 (stencils[i].type == SUM_MF) ||
3961 (stencils[i].type == WEIGHT_SUM_MF) ||
3962 (stencils[i].type == FIXED),
3963 "ERROR(yac_interp_weights_delete): invalid stencil type")
3964 switch(stencils[i].type) {
3965
3966 case(DIRECT):
3967 free_remote_point(stencils[i].data.direct.src);
3968 break;
3969 case(SUM):
3970 free_remote_points(stencils[i].data.sum.srcs);
3971 break;
3972 case(WEIGHT_SUM):
3973 free_remote_points(stencils[i].data.weight_sum.srcs);
3974 free(stencils[i].data.weight_sum.weights);
3975 break;
3976 case(DIRECT_MF):
3977 free_remote_point(stencils[i].data.direct_mf.src);
3978 break;
3979 case(SUM_MF):
3980 free_remote_points(stencils[i].data.sum_mf.srcs);
3981 free(stencils[i].data.sum_mf.field_indices);
3982 break;
3983 case (WEIGHT_SUM_MF):
3984 free_remote_points(stencils[i].data.weight_sum_mf.srcs);
3985 free(stencils[i].data.weight_sum_mf.weights);
3986 free(stencils[i].data.weight_sum_mf.field_indices);
3987 break;
3988 default:
3989 case(FIXED):
3990 break;
3991 };
3992 free_remote_point(stencils[i].tgt);
3993 }
3994 free(stencils);
3995}
3996
3997#ifdef YAC_NETCDF_ENABLED
3998static int compare_double(void const * a, void const * b) {
3999
4000 return (*(double const *)a > *(double const *)b) -
4001 (*(double const *)a < *(double const *)b);
4002}
4003
4008 char const * filename, char const * src_grid_name, char const * tgt_grid_name,
4009 size_t num_fixed_values, double * fixed_values,
4010 size_t * num_tgt_per_fixed_value, size_t num_links,
4011 size_t num_weights_per_link, size_t num_src_fields,
4012 size_t * num_links_per_src_field,
4013 enum yac_location * src_locations, enum yac_location tgt_location,
4014 size_t src_grid_size, size_t tgt_grid_size) {
4015
4016 int ncid;
4017
4018 // create file
4019 yac_nc_create(filename, NC_CLOBBER | NC_64BIT_OFFSET, &ncid);
4020
4021 int dim_weight_id[8];
4022
4023 // define dimensions
4024 if (num_links > 0) {
4025 YAC_HANDLE_ERROR(nc_def_dim(ncid, "num_links", num_links, &dim_weight_id[0]));
4027 num_weights_per_link > 0,
4028 "ERROR(create_weight_file): number of links is %zu but number of "
4029 "weights per link is zero for weight file %s", num_links, filename)
4031 nc_def_dim(ncid, "num_wgts", num_weights_per_link, &dim_weight_id[1]));
4032 }
4034 num_src_fields > 0,
4035 "ERROR(create_weight_file): number of source fields is zero for "
4036 "weight file %s", filename)
4038 nc_def_dim(ncid, "num_src_fields", num_src_fields, &dim_weight_id[2]));
4040 nc_def_dim(
4041 ncid, "max_loc_str_len", YAC_MAX_LOC_STR_LEN, &dim_weight_id[3]));
4042
4043 if (num_fixed_values > 0) {
4045 nc_def_dim(
4046 ncid, "num_fixed_values", num_fixed_values, &dim_weight_id[4]));
4047 size_t num_fixed_dst = 0;
4048 for (size_t i = 0; i < num_fixed_values; ++i)
4049 num_fixed_dst += num_tgt_per_fixed_value[i];
4051 num_fixed_dst > 0,
4052 "ERROR(create_weight_file): number of fixed values is %zu but number "
4053 "of fixed destination points is zero for weight file %s",
4054 num_fixed_dst, filename)
4056 nc_def_dim(ncid, "num_fixed_dst", num_fixed_dst, &dim_weight_id[5]));
4057 }
4058
4059 if (src_grid_size > 0)
4061 nc_def_dim(ncid, "src_grid_size", src_grid_size, &dim_weight_id[6]));
4062
4063 if (tgt_grid_size > 0)
4065 nc_def_dim(ncid, "dst_grid_size", tgt_grid_size, &dim_weight_id[7]));
4066
4067 int var_src_add_id, var_dst_add_id, var_weight_id, var_num_links_id,
4068 src_var_locs_id, tgt_var_loc_id, var_fixed_values_id,
4069 var_num_dst_per_fixed_value_id, var_dst_add_fixed_id;
4070
4071 // define variables
4072 if (num_links > 0) {
4074 nc_def_var(
4075 ncid, "src_address", NC_INT, 1, dim_weight_id, &var_src_add_id));
4077 nc_def_var(
4078 ncid, "dst_address", NC_INT, 1, dim_weight_id, &var_dst_add_id));
4080 nc_def_var(
4081 ncid, "remap_matrix", NC_DOUBLE, 2, dim_weight_id, &var_weight_id));
4083 nc_def_var(ncid, "num_links_per_src_field", NC_INT, 1,
4084 &dim_weight_id[2], &var_num_links_id));
4085 }
4087 nc_def_var(
4088 ncid, "src_locations", NC_CHAR, 2, &dim_weight_id[2], &src_var_locs_id));
4090 nc_def_var(
4091 ncid, "dst_location", NC_CHAR, 1, &dim_weight_id[3], &tgt_var_loc_id));
4092 if (num_fixed_values > 0) {
4094 nc_def_var(ncid, "fixed_values", NC_DOUBLE, 1, &dim_weight_id[4],
4095 &var_fixed_values_id));
4097 nc_def_var(ncid, "num_dst_per_fixed_value", NC_INT, 1, &dim_weight_id[4],
4098 &var_num_dst_per_fixed_value_id));
4100 nc_def_var(ncid, "dst_address_fixed", NC_INT, 1, &dim_weight_id[5],
4101 &var_dst_add_fixed_id));
4102 }
4103
4104 // put attributes
4106 nc_put_att_text(ncid, NC_GLOBAL, "version",
4110 nc_put_att_text(ncid, NC_GLOBAL, "src_grid_name",
4111 strlen(src_grid_name), src_grid_name));
4113 nc_put_att_text(ncid, NC_GLOBAL, "dst_grid_name",
4114 strlen(tgt_grid_name), tgt_grid_name));
4115 {
4116 char const * str_logical[2] = {"FALSE", "TRUE"};
4117 YAC_HANDLE_ERROR(nc_put_att_text(ncid, NC_GLOBAL, "contains_links",
4118 strlen(str_logical[num_links > 0]),
4119 str_logical[num_links > 0]));
4120 YAC_HANDLE_ERROR(nc_put_att_text(ncid, NC_GLOBAL, "contains_fixed_dst",
4121 strlen(str_logical[num_fixed_values > 0]),
4122 str_logical[num_fixed_values > 0]));
4123 }
4124
4125 // end definition
4126 YAC_HANDLE_ERROR(nc_enddef(ncid));
4127
4128 // write some basic data
4129
4130 if (num_links > 0) {
4131 int * num_links_per_src_field_int =
4132 xmalloc(num_src_fields * sizeof(*num_links_per_src_field_int));
4133 for (size_t i = 0; i < num_src_fields; ++i) {
4134 YAC_ASSERT(
4135 num_links_per_src_field[i] <= INT_MAX,
4136 "ERROR(create_weight_file): "
4137 "number of links per source field too big (not yet supported)")
4138 num_links_per_src_field_int[i] = (int)num_links_per_src_field[i];
4139 }
4141 nc_put_var_int(ncid, var_num_links_id, num_links_per_src_field_int));
4142 free(num_links_per_src_field_int);
4143 }
4144
4145 for (size_t i = 0; i < num_src_fields; ++i) {
4146 char const * loc_str = yac_loc2str(src_locations[i]);
4147 size_t str_start[2] = {i, 0};
4148 size_t str_count[2] = {1, strlen(loc_str)};
4150 nc_put_vara_text(ncid, src_var_locs_id, str_start, str_count, loc_str));
4151 }
4152
4153 {
4154 char const * loc_str = yac_loc2str(tgt_location);
4155 size_t str_start[1] = {0};
4156 size_t str_count[1] = {strlen(loc_str)};
4158 nc_put_vara_text(ncid, tgt_var_loc_id, str_start, str_count, loc_str));
4159 }
4160 if (num_fixed_values > 0) {
4161
4162 int * num_tgt_per_fixed_value_int =
4163 xmalloc(num_fixed_values * sizeof(*num_tgt_per_fixed_value_int));
4164 for (unsigned i = 0; i < num_fixed_values; ++i) {
4165 YAC_ASSERT(
4166 num_tgt_per_fixed_value[i] <= INT_MAX,
4167 "ERROR(create_weight_file): "
4168 "number of targets per fixed value is too big (not yet supported)")
4169 num_tgt_per_fixed_value_int[i] = (int)num_tgt_per_fixed_value[i];
4170 }
4171 YAC_HANDLE_ERROR(nc_put_var_double(ncid, var_fixed_values_id, fixed_values));
4172 YAC_HANDLE_ERROR(nc_put_var_int(ncid, var_num_dst_per_fixed_value_id,
4173 num_tgt_per_fixed_value_int));
4174 free(num_tgt_per_fixed_value_int);
4175 }
4176
4177 // close file
4178 YAC_HANDLE_ERROR(nc_close(ncid));
4179}
4180
4181static int compare_interp_weight_stencil(const void * a, const void * b) {
4182
4183 int a_is_fixed = (((struct interp_weight_stencil *)a)->type == FIXED);
4184 int b_is_fixed = (((struct interp_weight_stencil *)b)->type == FIXED);
4185 int ret = b_is_fixed - a_is_fixed;
4186
4187 if (ret) return ret;
4188
4189 // if both are fixed stencils
4190 if (a_is_fixed) {
4191
4192 double fixed_value_a =
4193 ((struct interp_weight_stencil *)a)->data.fixed.value;
4194 double fixed_value_b =
4195 ((struct interp_weight_stencil *)b)->data.fixed.value;
4196 ret = (fixed_value_a > fixed_value_b) -
4197 (fixed_value_a < fixed_value_b);
4198 if (ret) return ret;
4199 }
4200
4201 return (((struct interp_weight_stencil *)a)->tgt.global_id >
4202 ((struct interp_weight_stencil *)b)->tgt.global_id) -
4203 (((struct interp_weight_stencil *)a)->tgt.global_id <
4204 ((struct interp_weight_stencil *)b)->tgt.global_id);
4205}
4206
4208 struct interp_weight_stencil * stencils, size_t stencils_size,
4209 yac_int * min_tgt_global_id, yac_int * max_tgt_global_id, MPI_Comm comm) {
4210
4211 yac_int min_max[2] = {XT_INT_MAX, XT_INT_MIN};
4212
4213 for (size_t i = 0; i < stencils_size; ++i) {
4214
4215 yac_int curr_id = stencils[i].tgt.global_id;
4216 if (curr_id < min_max[0]) min_max[0] = curr_id;
4217 if (curr_id > min_max[1]) min_max[1] = curr_id;
4218 }
4219
4220 min_max[0] = XT_INT_MAX - min_max[0];
4221
4223 MPI_Allreduce(
4224 MPI_IN_PLACE, min_max, 2, yac_int_dt, MPI_MAX, comm), comm);
4225
4226 *min_tgt_global_id = XT_INT_MAX - min_max[0];
4227 *max_tgt_global_id = min_max[1];
4228}
4229
4231 struct interp_weight_stencil * stencils, size_t stencils_size,
4232 yac_int min_tgt_global_id, yac_int max_tgt_global_id,
4233 int num_io_procs_int, int * io_owner) {
4234
4235 long long num_io_procs = (long long)num_io_procs_int;
4236 long long id_range =
4237 MAX((long long)(max_tgt_global_id - min_tgt_global_id),1);
4238
4239 for (size_t i = 0; i < stencils_size; ++i)
4240 io_owner[i] =
4241 ((int)(MIN(((long long)(stencils[i].tgt.global_id - min_tgt_global_id) *
4242 num_io_procs) / id_range, num_io_procs - 1)));
4243}
4244
4246 struct interp_weight_stencil * stencils, size_t stencil_count,
4247 double ** fixed_values, size_t * num_fixed_values, MPI_Comm comm) {
4248
4249 int comm_size;
4250 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4251
4252 double * local_fixed_values =
4253 xmalloc(stencil_count * sizeof(*local_fixed_values));
4254
4255 int * int_buffer = xmalloc(2 * (size_t)comm_size * sizeof(*int_buffer));
4256 int * recvcounts = int_buffer + 0 * comm_size;
4257 int * rdispls = int_buffer + 1 * comm_size;
4258
4259 size_t local_num_fixed = 0;
4260
4261 // get all local fixed values
4262 for (size_t i = 0; i < stencil_count;
4263 ++i, ++local_num_fixed) {
4264 if (stencils[i].type != FIXED) break;
4265 local_fixed_values[i] = stencils[i].data.fixed.value;
4266 }
4267 qsort(local_fixed_values, local_num_fixed, sizeof(*local_fixed_values),
4269 yac_remove_duplicates_double(local_fixed_values, &local_num_fixed);
4270
4271 // get number of fixed values per rank
4272 int local_num_fixed_int = (int)(local_num_fixed);
4274 MPI_Allgather(
4275 &local_num_fixed_int, 1, MPI_INT, recvcounts, 1,MPI_INT, comm), comm);
4276 for (int i = 0, accu = 0; i < comm_size; ++i) {
4277 rdispls[i] = accu;
4278 accu += recvcounts[i];
4279 }
4280
4281 size_t num_all_fixed_values = 0;
4282 for (int i = 0; i < comm_size; ++i)
4283 num_all_fixed_values += (size_t)(recvcounts[i]);
4284
4285 double * all_fixed_values =
4286 xmalloc(num_all_fixed_values * sizeof(*all_fixed_values));
4287
4288 // gather all fixed values
4290 MPI_Allgatherv(
4291 local_fixed_values, local_num_fixed_int, MPI_DOUBLE,
4292 all_fixed_values, recvcounts, rdispls, MPI_DOUBLE, comm), comm);
4293 free(int_buffer);
4294 free(local_fixed_values);
4295
4296 qsort(all_fixed_values, num_all_fixed_values, sizeof(*all_fixed_values),
4298 yac_remove_duplicates_double(all_fixed_values, &num_all_fixed_values);
4299 *fixed_values = xrealloc(all_fixed_values,
4300 num_all_fixed_values * sizeof(*all_fixed_values));
4301 *num_fixed_values = num_all_fixed_values;
4302}
4303
4304static size_t get_num_weights_per_link(struct interp_weight_stencil * stencil) {
4305
4306 YAC_ASSERT(
4307 (stencil->type == FIXED) ||
4308 (stencil->type == DIRECT) ||
4309 (stencil->type == SUM) ||
4310 (stencil->type == WEIGHT_SUM) ||
4311 (stencil->type == DIRECT_MF) ||
4312 (stencil->type == SUM_MF) ||
4313 (stencil->type == WEIGHT_SUM_MF),
4314 "ERROR(get_num_weights_per_link): invalid stencil type")
4315
4316 return (stencil->type == FIXED)?0:1;
4317}
4318
4320 struct interp_weight_stencil * stencils, size_t stencil_count,
4321 MPI_Comm comm) {
4322
4323 size_t num_weights_per_link = 0;
4324 for (size_t i = 0; i < stencil_count; ++i)
4325 num_weights_per_link =
4326 MAX(num_weights_per_link, get_num_weights_per_link(stencils + i));
4327
4328 uint64_t num_weights_per_link_64_t = (uint64_t)num_weights_per_link;
4330 MPI_Allreduce(
4331 MPI_IN_PLACE, &num_weights_per_link_64_t, 1, MPI_UINT64_T,
4332 MPI_MAX, comm), comm);
4333 num_weights_per_link = (size_t)num_weights_per_link_64_t;
4334
4335 return num_weights_per_link;
4336}
4337
4339 struct interp_weight_stencil * stencil, size_t src_field_idx) {;
4340
4341 YAC_ASSERT(
4342 stencil->type != FIXED,
4343 "ERROR(get_num_links_per_src_field): "
4344 "stencil type FIXED not supported by this routine")
4345 YAC_ASSERT(
4346 (stencil->type == DIRECT) ||
4347 (stencil->type == SUM) ||
4348 (stencil->type == WEIGHT_SUM) ||
4349 (stencil->type == DIRECT_MF) ||
4350 (stencil->type == SUM_MF) ||
4351 (stencil->type == WEIGHT_SUM_MF),
4352 "ERROR(get_num_links_per_src_field): invalid stencil type")
4353 switch (stencil->type) {
4354 default:
4355 case(DIRECT): return (src_field_idx == 0)?1:0;
4356 case(SUM): return (src_field_idx == 0)?stencil->data.sum.srcs->count:0;
4357 case(WEIGHT_SUM):
4358 return (src_field_idx == 0)?stencil->data.weight_sum.srcs->count:0;
4359 case(DIRECT_MF): return stencil->data.direct_mf.field_idx == src_field_idx;
4360 case(SUM_MF): {
4361 size_t count = 0;
4362 size_t stencil_size = stencil->data.sum_mf.srcs->count;
4363 size_t * field_indices = stencil->data.sum_mf.field_indices;
4364 for (size_t i = 0; i < stencil_size; ++i)
4365 if (field_indices[i] == src_field_idx) ++count;
4366 return count;
4367 }
4368 case(WEIGHT_SUM_MF): {
4369 size_t count = 0;
4370 size_t stencil_size = stencil->data.weight_sum_mf.srcs->count;
4371 size_t * field_indices = stencil->data.weight_sum_mf.field_indices;
4372 for (size_t i = 0; i < stencil_size; ++i)
4373 if (field_indices[i] == src_field_idx) ++count;
4374 return count;
4375 }
4376 };
4377}
4378
4380 struct interp_weight_stencil * stencils, size_t stencil_count,
4381 size_t num_fixed_values, double * fixed_values,
4382 size_t * num_tgt_per_fixed_value,
4383 size_t * num_fixed_tgt, size_t num_src_fields,
4384 size_t * num_links_per_src_field, size_t * num_links) {
4385
4386 *num_fixed_tgt = 0;
4387 *num_links = 0;
4388 for (size_t i = 0; i < num_fixed_values; ++i) num_tgt_per_fixed_value[i] = 0;
4389 for (size_t i = 0; i < num_src_fields; ++i) num_links_per_src_field[i] = 0;
4390
4391 for (size_t i = 0; i < stencil_count; ++i) {
4392 if (stencils[i].type == FIXED) {
4393 double curr_fixed_value = stencils[i].data.fixed.value;
4394 for (size_t j = 0; j < num_fixed_values; ++j) {
4395 if (curr_fixed_value == fixed_values[j]) {
4396 num_tgt_per_fixed_value[j]++;
4397 break;
4398 }
4399 }
4400 ++*num_fixed_tgt;
4401 } else {
4402 for (size_t j = 0; j < num_src_fields; ++j) {
4403 num_links_per_src_field[j] +=
4404 get_num_links_per_src_field(stencils + i, j);
4405 }
4406 }
4407 }
4408 for (size_t i = 0; i < num_src_fields; ++i)
4409 *num_links += num_links_per_src_field[i];
4410}
4411
4413 size_t num_fixed_values, size_t * num_tgt_per_fixed_value,
4414 size_t num_src_fields, size_t * num_links_per_src_field,
4415 size_t * fixed_offsets, size_t * link_offsets, MPI_Comm comm) {
4416
4417 int comm_rank;
4418 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4419
4420 size_t count = num_fixed_values + num_src_fields;
4421 uint64_t * uint64_t_buffer = xmalloc(3 * count * sizeof(*uint64_t_buffer));
4422 uint64_t * global_counts = uint64_t_buffer + 0 * count;
4423 uint64_t * local_counts = uint64_t_buffer + 1 * count;
4424 uint64_t * offsets = uint64_t_buffer + 2 * count;
4425
4426 for (size_t i = 0; i < num_fixed_values; ++i)
4427 local_counts[i] = (uint64_t)(num_tgt_per_fixed_value[i]);
4428 for (size_t i = 0; i < num_src_fields; ++i)
4429 local_counts[num_fixed_values + i] = (uint64_t)(num_links_per_src_field[i]);
4430
4432 MPI_Allreduce(local_counts, global_counts, (int)count, MPI_UINT64_T,
4433 MPI_SUM, comm), comm);
4435 MPI_Exscan(local_counts, offsets, (int)count, MPI_UINT64_T, MPI_SUM, comm),
4436 comm);
4437 if (comm_rank == 0) memset(offsets, 0, count * sizeof(*offsets));
4438
4439 for (size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
4440 fixed_offsets[i] = (size_t)(offsets[i]) + accu;
4441 accu += (size_t)(global_counts[i]);
4442 }
4443 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4444 link_offsets[i] = (size_t)(offsets[i+num_fixed_values]) + accu;
4445 accu += (size_t)(global_counts[i+num_fixed_values]);
4446 }
4447 free(uint64_t_buffer);
4448}
4449
4450static int global_id_to_address(yac_int global_id) {
4451
4452 YAC_ASSERT(
4453 (global_id < INT_MAX) && (global_id != XT_INT_MAX),
4454 "ERROR(global_id_to_address): "
4455 "a global id cannot be converted into a address; too big")
4456 return (int)global_id + 1;
4457}
4458
4460 struct interp_weight_stencil * stencils, size_t stencil_count,
4461 int * tgt_address) {
4462
4463 for (size_t i = 0; i < stencil_count; ++i)
4464 tgt_address[i] = global_id_to_address(stencils[i].tgt.global_id);
4465}
4466
4468 struct interp_weight_stencil * stencils, size_t stencil_count,
4469 size_t * num_links_per_src_field, size_t num_src_fields,
4470 int * src_address, int * tgt_address, double * weight) {
4471
4472 size_t * src_field_offsets =
4473 xmalloc(2 * num_src_fields * sizeof(*src_field_offsets));
4474 size_t * prev_src_field_offsets = src_field_offsets + num_src_fields;
4475 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4476 src_field_offsets[i] = accu;
4477 accu += num_links_per_src_field[i];
4478 }
4479
4480 struct interp_weight_stencil * curr_stencil = stencils;
4481 for (size_t i = 0; i < stencil_count; ++i, ++curr_stencil) {
4482
4483 memcpy(prev_src_field_offsets, src_field_offsets,
4484 num_src_fields * sizeof(*prev_src_field_offsets));
4485
4486 int curr_tgt_address = global_id_to_address(curr_stencil->tgt.global_id);
4487 YAC_ASSERT(
4488 curr_stencil->type != FIXED,
4489 "ERROR(stencil_get_link_data): this call is invalid for FIXED stencils")
4490 YAC_ASSERT(
4491 (curr_stencil->type == DIRECT) ||
4492 (curr_stencil->type == SUM) ||
4493 (curr_stencil->type == WEIGHT_SUM) ||
4494 (curr_stencil->type == DIRECT_MF) ||
4495 (curr_stencil->type == SUM_MF) ||
4496 (curr_stencil->type == WEIGHT_SUM_MF),
4497 "ERROR(stencil_get_link_data): invalid stencil type")
4498 size_t src_field_offset;
4499 switch (curr_stencil->type) {
4500 default:
4501 case(DIRECT):
4502 src_field_offset = src_field_offsets[0]++;
4503 src_address[src_field_offset] =
4505 tgt_address[src_field_offset] = curr_tgt_address;
4506 weight[src_field_offset] = 1.0;
4507 break;
4508 case(SUM): {
4509 size_t curr_count = curr_stencil->data.sum.srcs->count;
4510 struct remote_point * srcs = curr_stencil->data.sum.srcs->data;
4511 for (size_t k = 0; k < curr_count; ++k) {
4512 src_field_offset = src_field_offsets[0]++;
4513 src_address[src_field_offset] =
4515 tgt_address[src_field_offset] = curr_tgt_address;
4516 weight[src_field_offset] = 1.0;
4517 }
4518 break;
4519 }
4520 case(WEIGHT_SUM): {
4521 size_t curr_count = curr_stencil->data.weight_sum.srcs->count;
4522 struct remote_point * srcs = curr_stencil->data.weight_sum.srcs->data;
4523 double * weights = curr_stencil->data.weight_sum.weights;
4524 for (size_t k = 0; k < curr_count; ++k) {
4525 src_field_offset = src_field_offsets[0]++;
4526 src_address[src_field_offset] =
4528 tgt_address[src_field_offset] = curr_tgt_address;
4529 weight[src_field_offset] = weights[k];
4530 }
4531 break;
4532 }
4533 case(DIRECT_MF):
4534 src_field_offset =
4535 src_field_offsets[curr_stencil->data.direct_mf.field_idx]++;
4536 src_address[src_field_offset ] =
4538 tgt_address[src_field_offset ] = curr_tgt_address;
4539 weight[src_field_offset ] = 1.0;
4540 break;
4541 case(SUM_MF): {
4542 size_t curr_count = curr_stencil->data.sum_mf.srcs->count;
4543 struct remote_point * srcs =
4544 curr_stencil->data.sum_mf.srcs->data;
4545 size_t * field_indices = curr_stencil->data.sum_mf.field_indices;
4546 for (size_t k = 0; k < curr_count; ++k) {
4547 src_field_offset = src_field_offsets[field_indices[k]]++;
4548 src_address[src_field_offset] =
4550 tgt_address[src_field_offset] = curr_tgt_address;
4551 weight[src_field_offset] = 1.0;
4552 }
4553 break;
4554 }
4555 case(WEIGHT_SUM_MF): {
4556 size_t curr_count = curr_stencil->data.weight_sum_mf.srcs->count;
4557 struct remote_point * srcs =
4558 curr_stencil->data.weight_sum_mf.srcs->data;
4559 double * weights = curr_stencil->data.weight_sum_mf.weights;
4560 size_t * field_indices = curr_stencil->data.weight_sum_mf.field_indices;
4561 for (size_t k = 0; k < curr_count; ++k) {
4562 src_field_offset = src_field_offsets[field_indices[k]]++;
4563 src_address[src_field_offset] =
4565 tgt_address[src_field_offset] = curr_tgt_address;
4566 weight[src_field_offset] = weights[k];
4567 }
4568 break;
4569 }
4570 };
4571
4572 for (size_t j = 0; j < num_src_fields; ++j)
4574 src_address + prev_src_field_offsets[j],
4575 src_field_offsets[j] - prev_src_field_offsets[j],
4576 weight + prev_src_field_offsets[j]);
4577 }
4578 free(src_field_offsets);
4579}
4580
4582 MPI_Comm comm, size_t count, struct interp_weight_stencil * stencils,
4583 int * owner_ranks, size_t * new_count,
4584 struct interp_weight_stencil ** new_stencils) {
4585
4586 int comm_rank, comm_size;
4587 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4588 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4589
4590 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
4592 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4593
4594 size_t * stencil_indices = xmalloc(count * sizeof(*stencil_indices));
4595 for (size_t i = 0; i < count; ++i) {
4596 stencil_indices[i] = i;
4597 sendcounts[owner_ranks[i]]++;
4598 }
4599
4601 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4602
4603 // sort the stencil indices by owner rank
4604 yac_quicksort_index_int_size_t(owner_ranks, count, stencil_indices);
4605
4606 *new_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
4607 *new_stencils =
4608 exchange_stencils(comm, stencils, stencil_indices, sendcounts, recvcounts);
4609 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4610 free(stencil_indices);
4611}
4612
4613#endif // YAC_NETCDF_ENABLED
4614
4616 struct yac_interp_weights * weights, char const * filename,
4617 char const * src_grid_name, char const * tgt_grid_name,
4618 size_t src_grid_size, size_t tgt_grid_size) {
4619
4620#ifndef YAC_NETCDF_ENABLED
4621
4622 UNUSED(weights);
4623 UNUSED(filename);
4624 UNUSED(src_grid_name);
4625 UNUSED(tgt_grid_name);
4626 UNUSED(src_grid_size);
4627 UNUSED(tgt_grid_size);
4628
4629 die(
4630 "ERROR(yac_interp_weights_write_to_file): "
4631 "YAC is built without the NetCDF support");
4632#else
4633
4634 MPI_Comm comm = weights->comm;
4635 int comm_rank, comm_size;
4636 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4637 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4638
4639 // determine processes that will do output
4640 int io_flag;
4641 int * io_ranks;
4642 int num_io_ranks;
4643 yac_get_io_ranks(comm, &io_flag, &io_ranks, &num_io_ranks);
4644
4645 // determine range of global ids
4646 yac_int min_tgt_global_id, max_tgt_global_id;
4648 weights->stencils, weights->stencils_size,
4649 &min_tgt_global_id, &max_tgt_global_id, comm);
4650
4651 // determine io owners for all stencils
4652 int * io_owner =
4653 xmalloc(weights->stencils_size * sizeof(*io_owner));
4655 weights->stencils, weights->stencils_size,
4656 min_tgt_global_id, max_tgt_global_id,
4657 num_io_ranks, io_owner);
4658 for (size_t i = 0; i < weights->stencils_size; ++i)
4659 io_owner[i] = io_ranks[io_owner[i]];
4660 free(io_ranks);
4661
4662 size_t io_stencil_count = 0;
4663 struct interp_weight_stencil * io_stencils = NULL;
4664
4665 // redistribute stencils into io decomposition
4667 comm, weights->stencils_size, weights->stencils, io_owner,
4668 &io_stencil_count, &io_stencils);
4669 free(io_owner);
4670
4671 // distribute global grid sizes
4672 uint64_t grid_sizes[2] = {(uint64_t)src_grid_size, (uint64_t)tgt_grid_size};
4674 MPI_Allreduce(
4675 MPI_IN_PLACE, grid_sizes, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
4676 src_grid_size = (size_t)(grid_sizes[0]);
4677 tgt_grid_size = (size_t)(grid_sizes[1]);
4678
4679 MPI_Comm io_comm;
4680 yac_mpi_call(MPI_Comm_split(comm, io_flag, comm_rank, &io_comm), comm);
4681
4682 // all non-io processes exit here, the remaining ones work using their own
4683 // communicator
4684 if (!io_flag) {
4685 yac_mpi_call(MPI_Comm_free(&io_comm), comm);
4686 free(io_stencils);
4687 // ensure that the writing of the weight file is complete
4688 yac_mpi_call(MPI_Barrier(comm), comm);
4689 return;
4690 }
4691
4692 // sort the stencils first by type (fixed first; fixed stencils are sorted
4693 // by their fixed value) and second by tgt id
4694 qsort(io_stencils, io_stencil_count, sizeof(*io_stencils),
4696
4697 yac_mpi_call(MPI_Comm_rank(io_comm, &comm_rank), comm);
4698 yac_mpi_call(MPI_Comm_size(io_comm, &comm_size), comm);
4699
4700 double * fixed_values = NULL;
4701 size_t num_fixed_values = 0;
4703 io_stencils, io_stencil_count, &fixed_values, &num_fixed_values, io_comm);
4704 size_t num_src_fields = weights->num_src_fields;
4705 size_t num_weights_per_link =
4706 stencil_get_num_weights_per_tgt(io_stencils, io_stencil_count, io_comm);
4707
4708 size_t * size_t_buffer =
4709 xmalloc(2 * (num_fixed_values + num_src_fields) * sizeof(*size_t_buffer));
4710 size_t * num_tgt_per_fixed_value = size_t_buffer;
4711 size_t * num_links_per_src_field = size_t_buffer + num_fixed_values;
4712 size_t * fixed_offsets = size_t_buffer + num_fixed_values + num_src_fields;
4713 size_t * link_offsets = size_t_buffer + 2 * num_fixed_values + num_src_fields;
4714
4715 size_t num_fixed_tgt = 0;
4716 size_t num_links = 0;
4718 io_stencils, io_stencil_count, num_fixed_values, fixed_values,
4719 num_tgt_per_fixed_value, &num_fixed_tgt, num_src_fields,
4720 num_links_per_src_field, &num_links);
4721
4723 num_fixed_values, num_tgt_per_fixed_value,
4724 num_src_fields, num_links_per_src_field,
4725 fixed_offsets, link_offsets, io_comm);
4726
4727 if (comm_rank == comm_size - 1) {
4728
4729 size_t * total_num_tgt_per_fixed_value =
4730 xmalloc(num_fixed_values * sizeof(*total_num_tgt_per_fixed_value));
4731 for (size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
4732 total_num_tgt_per_fixed_value[i] =
4733 fixed_offsets[i] + num_tgt_per_fixed_value[i] - accu;
4734 accu += total_num_tgt_per_fixed_value[i];
4735 }
4736 size_t total_num_links = link_offsets[num_src_fields-1] +
4737 num_links_per_src_field[num_src_fields-1];
4738
4739 size_t * total_num_links_per_src_field =
4740 xmalloc(num_src_fields * sizeof(*total_num_links_per_src_field));
4741 for (size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4742 total_num_links_per_src_field[i] =
4743 link_offsets[i] + num_links_per_src_field[i] - accu;
4744 accu += total_num_links_per_src_field[i];
4745 }
4746
4748 filename, src_grid_name, tgt_grid_name,
4749 num_fixed_values, fixed_values, total_num_tgt_per_fixed_value,
4750 total_num_links, num_weights_per_link,
4751 num_src_fields, total_num_links_per_src_field,
4752 weights->src_locations, weights->tgt_location,
4753 src_grid_size, tgt_grid_size);
4754
4755 free(total_num_links_per_src_field);
4756 free(total_num_tgt_per_fixed_value);
4757 }
4758 free(fixed_values);
4759
4760 // ensure that the basic weight file has been written
4761 yac_mpi_call(MPI_Barrier(io_comm), comm);
4762 yac_mpi_call(MPI_Comm_free(&io_comm), comm);
4763
4764 int ncid;
4765
4766 // open weight file
4767 yac_nc_open(filename, NC_WRITE | NC_SHARE, &ncid);
4768
4769 if (num_fixed_tgt > 0) {
4770
4771 int * tgt_address_fixed =
4772 xmalloc(num_fixed_tgt * sizeof(*tgt_address_fixed));
4773 stencil_get_tgt_address(io_stencils, num_fixed_tgt, tgt_address_fixed);
4774
4775 // inquire variable ids
4776 int var_dst_add_fixed_id;
4777 yac_nc_inq_varid(ncid, "dst_address_fixed", &var_dst_add_fixed_id);
4778
4779 // target ids that receive a fixed value to file
4780 for (size_t i = 0, offset = 0; i < num_fixed_values; ++i) {
4781
4782 if (num_tgt_per_fixed_value[i] == 0) continue;
4783
4784 size_t start[1] = {fixed_offsets[i]};
4785 size_t count[1] = {num_tgt_per_fixed_value[i]};
4787 nc_put_vara_int(
4788 ncid, var_dst_add_fixed_id, start, count, tgt_address_fixed + offset));
4789 offset += num_tgt_per_fixed_value[i];
4790 }
4791
4792 free(tgt_address_fixed);
4793 }
4794
4795 if (num_links > 0) {
4796
4797 int * src_address_link = xmalloc(num_links * sizeof(*src_address_link));
4798 int * tgt_address_link = xmalloc(num_links * sizeof(*tgt_address_link));
4799 double * w = xmalloc(num_links * num_weights_per_link * sizeof(*w));
4801 io_stencils + num_fixed_tgt, io_stencil_count - num_fixed_tgt,
4802 num_links_per_src_field, num_src_fields,
4803 src_address_link, tgt_address_link, w);
4804
4805 int var_src_add_id, var_dst_add_id, var_weight_id;
4806 yac_nc_inq_varid(ncid, "src_address", &var_src_add_id);
4807 yac_nc_inq_varid(ncid, "dst_address", &var_dst_add_id);
4808 yac_nc_inq_varid(ncid, "remap_matrix", &var_weight_id);
4809
4810 for (size_t i = 0, offset = 0; i < num_src_fields; ++i) {
4811
4812 if (num_links_per_src_field[i] == 0) continue;
4813
4814 size_t start[2] = {link_offsets[i], 0};
4815 size_t count[2] = {num_links_per_src_field[i], num_weights_per_link};
4816
4818 nc_put_vara_int(
4819 ncid, var_src_add_id, start, count, src_address_link + offset));
4821 nc_put_vara_int(
4822 ncid, var_dst_add_id, start, count, tgt_address_link + offset));
4824 nc_put_vara_double(
4825 ncid, var_weight_id, start, count,
4826 w + num_weights_per_link * offset));
4827
4828 offset += num_links_per_src_field[i];
4829 }
4830
4831 free(w);
4832 free(tgt_address_link);
4833 free(src_address_link);
4834 }
4835
4836 // close weight file
4837 YAC_HANDLE_ERROR(nc_close(ncid));
4838
4839 // ensure that the writing of the weight file is complete
4840 yac_mpi_call(MPI_Barrier(comm), comm);
4841
4842 free(size_t_buffer);
4843 yac_interp_weight_stencils_delete(io_stencils, io_stencil_count);
4844#endif
4845}
4846
4848 struct yac_interp_weights * weights) {
4849
4850 return weights->stencils_size;
4851}
4852
4854 struct yac_interp_weights * weights) {
4855
4856 struct interp_weight_stencil * stencils = weights->stencils;
4857 size_t stencils_size = weights->stencils_size;
4858
4859 yac_int * global_ids = xmalloc(stencils_size * sizeof(*global_ids));
4860
4861 for (size_t i = 0; i < stencils_size; ++i)
4862 global_ids[i] = stencils[i].tgt.global_id;
4863
4864 return global_ids;
4865}
4866
4868 return weights->comm;
4869}
4870
4872
4873 if (weights == NULL) return;
4874
4875 yac_interp_weight_stencils_delete(weights->stencils, weights->stencils_size);
4876 free(weights->src_locations);
4877 free(weights);
4878}
#define UNUSED(x)
Definition core.h:73
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
struct @7::@8 value
enum callback_type type
#define YAC_WEIGHT_FILE_VERSION_STRING
static MPI_Datatype get_fixed_stencil_mpi_datatype(MPI_Comm comm)
static size_t get_num_links_per_src_field(struct interp_weight_stencil *stencil, size_t src_field_idx)
static int get_stencil_pack_size_direct_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void unpack_stencil_wsum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static MPI_Datatype get_direct_stencil_mpi_datatype(MPI_Comm comm)
static void stencil_determine_tgt_global_id_range(struct interp_weight_stencil *stencils, size_t stencils_size, yac_int *min_tgt_global_id, yac_int *max_tgt_global_id, MPI_Comm comm)
static void unpack_stencil_sum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_fixed(struct yac_interp_weights *weights, struct remote_points *tgts, double fixed_value)
static int compare_stencils_direct_mf(const void *a, const void *b)
void yac_interp_weights_add_sum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, size_t num_src_fields)
static struct remote_points * copy_remote_points_mf(struct remote_point **points, size_t *counts, size_t num_fields)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils_tgt(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data)
static int compare_remote_point_info(const void *a, const void *b)
static int global_id_to_address(yac_int global_id)
static int get_stencil_pack_size_sum(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static Xt_redist generate_direct_redist(uint64_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
static void pack_stencil_direct(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
yac_interp_weight_stencil_type