12#define WEIGHT_TOL (1e-9)
60 size_t * field_indices;
155 xmalloc((
size_t)count *
sizeof(*point_infos));
156 memcpy(point_infos, point.data.data.multi,
157 (
size_t)count *
sizeof(*point_infos));
158 point.data.data.multi = point_infos;
169 for (
size_t i = 0; i < count; ++i) {
170 int curr_count = points_from[i].
data.
count;
171 points_to[i] = points_from[i];
172 if (curr_count > 1) {
175 point_info_buffer, points_from[i].data.
data.
multi,
176 (
size_t)curr_count *
sizeof(*point_info_buffer));
177 point_info_buffer += curr_count;
180 *point_info_buffer_ = point_info_buffer;
186 size_t point_info_buffer_size = 0;
187 for (
size_t i = 0; i <
count; ++i)
189 point_info_buffer_size += (size_t)(
points[i].
data.count);
193 sizeof(*points_copy));
199 points_copy->
data,
points, count, &point_info_buffer);
207 size_t point_info_buffer_size = 0;
208 size_t total_count = 0;
209 for (
size_t i = 0; i < num_fields; ++i) {
210 total_count += counts[i];
211 for (
size_t j = 0; j < counts[i]; ++j) {
213 point_info_buffer_size += (size_t)(
points[i][j].
data.count);
219 sizeof(*points_copy));
220 points_copy->
data =
xmalloc(total_count *
sizeof(*(points_copy->
data)));
221 points_copy->
count = total_count;
224 for (
size_t i = 0, k = 0; i < num_fields; ++i) {
225 for (
size_t j = 0; j < counts[i]; ++j, ++k) {
226 int curr_count =
points[i][j].data.count;
228 if (curr_count > 1) {
231 point_info_buffer,
points[i][j].data.data.multi,
232 (
size_t)curr_count *
sizeof(*point_info_buffer));
233 point_info_buffer += curr_count;
242 double fixed_value) {
245 size_t stencils_array_size =
weights->stencils_array_size;
246 size_t stencils_size =
weights->stencils_size;
250 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
258 weights->stencils_array_size = stencils_array_size;
259 weights->stencils_size = stencils_size;
266 if (tgts->
count == 0)
return;
270 for (
size_t i = 0, k = 0; (i < tgts->
count) && !pack_flag; ++i)
271 for (
size_t j = 0; (j < num_src_per_tgt[i]) && !pack_flag; ++j, ++k)
276 for (
size_t i = 0, k = 0, l = 0;
277 i < tgts->
count; i++) {
279 size_t curr_count = num_src_per_tgt[i];
281 for (
size_t j = 0; j < curr_count; j++, k++) {
284 num_src_per_tgt[i]--;
295 if ((curr_count != 0) && (num_src_per_tgt[i] == 0)) {
301 num_src_per_tgt[i] = 1;
309 int flag_weight_one = 1;
310 int flag_count_one = 1;
311 for (
size_t i = 0, j = 0;
312 (i < tgts->
count) && (flag_weight_one || flag_count_one); ++i) {
314 size_t curr_count = num_src_per_tgt[i];
315 flag_count_one &= curr_count == 1;
317 for (
size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
318 flag_weight_one &= fabs(w[j] - 1.0) <
WEIGHT_TOL;
322 if (flag_weight_one) {
333 size_t stencils_array_size =
weights->stencils_array_size;
334 size_t stencils_size =
weights->stencils_size;
338 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
340 size_t curr_num_src = num_src_per_tgt[i];
343 if (curr_num_src == 0) {
348 double * curr_weights =
349 xmalloc(curr_num_src *
sizeof(*curr_weights));
356 memcpy(curr_weights, w, curr_num_src *
sizeof(*curr_weights));
358 srcs += curr_num_src;
363 weights->stencils_array_size = stencils_array_size;
364 weights->stencils_size = stencils_size;
372 if (tgts->
count == 0)
return;
376 int flag_count_one = 1;
377 for (
size_t i = 0; i < tgts->
count; ++i) {
378 if (num_src_per_tgt[i] != 1) {
384 if (flag_count_one) {
391 size_t stencils_array_size =
weights->stencils_array_size;
392 size_t stencils_size =
weights->stencils_size;
396 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
398 size_t curr_num_src = num_src_per_tgt[i];
400 stencils[stencils_size].
type =
SUM;
406 srcs += curr_num_src;
410 weights->stencils_array_size = stencils_array_size;
411 weights->stencils_size = stencils_size;
419 if (tgts->
count == 0)
return;
422 size_t stencils_array_size =
weights->stencils_array_size;
423 size_t stencils_size =
weights->stencils_size;
427 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
435 weights->stencils_array_size = stencils_array_size;
436 weights->stencils_size = stencils_size;
441 size_t * src_field_indices,
struct remote_point ** srcs_per_field,
442 size_t num_src_fields) {
444 if (tgts->
count == 0)
return;
446 if (num_src_fields == 1) {
452 size_t stencils_array_size =
weights->stencils_array_size;
453 size_t stencils_size =
weights->stencils_size;
456 stencils, stencils_array_size, stencils_size + tgts->
count);
457 stencils += stencils_size;
459 size_t srcs_offsets[num_src_fields];
460 memset(srcs_offsets, 0, num_src_fields *
sizeof(srcs_offsets[0]));
462 for (
size_t i = 0; i < tgts->
count; ++i) {
464 size_t src_field_idx = src_field_indices[i];
469 srcs_per_field[src_field_idx][srcs_offsets[src_field_idx]++]);
474 weights->stencils_array_size = stencils_array_size;
480 size_t * num_src_per_field_per_tgt,
struct remote_point ** srcs_per_field,
481 size_t num_src_fields) {
483 if (tgts->
count == 0)
return;
485 if (num_src_fields == 1) {
487 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0]);
493 int flag_count_one = 1;
494 for (
size_t i = 0, k = 0; i < tgts->
count; ++i) {
496 for (
size_t j = 0; j < num_src_fields; ++j, ++k)
497 count += num_src_per_field_per_tgt[k];
504 if (flag_count_one) {
506 size_t * src_field_indices =
509 for (
size_t i = 0, k = 0; i < tgts->
count; ++i)
510 for (
size_t j = 0; j < num_src_fields; ++j, ++k)
511 if (num_src_per_field_per_tgt[k])
512 src_field_indices[i] = j;
515 weights, tgts, src_field_indices, srcs_per_field, num_src_fields);
517 free(src_field_indices);
520 struct remote_point * curr_srcs_per_field[num_src_fields];
521 memcpy(curr_srcs_per_field, srcs_per_field,
522 num_src_fields *
sizeof(*srcs_per_field));
525 size_t stencils_array_size =
weights->stencils_array_size;
526 size_t stencils_size =
weights->stencils_size;
529 stencils, stencils_array_size, stencils_size + tgts->
count);
531 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
533 size_t * curr_num_src_per_src_field =
534 num_src_per_field_per_tgt + i * num_src_fields;
535 size_t curr_num_src = 0;
536 for (
size_t j = 0; j < num_src_fields; ++j)
537 curr_num_src += curr_num_src_per_src_field[j];
545 for (
size_t j = 0, l = 0; j < num_src_fields; ++j) {
546 size_t curr_num_src = curr_num_src_per_src_field[j];
547 for (
size_t k = 0; k < curr_num_src; ++k, ++l) {
553 curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
555 for (
size_t j = 0; j < num_src_fields; ++j)
556 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
560 weights->stencils_array_size = stencils_array_size;
561 weights->stencils_size = stencils_size;
567 size_t * num_src_per_field_per_tgt,
struct remote_point ** srcs_per_field,
568 double * w,
size_t num_src_fields) {
570 if (tgts->
count == 0)
return;
572 if (num_src_fields == 1) {
574 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0], w);
580 int flag_weight_one = 1;
581 for (
size_t i = 0, j = 0;
582 (i < tgts->
count) && flag_weight_one; ++i) {
584 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
588 num_src_per_field_per_tgt[i * num_src_fields + src_field_idx];
590 for (
size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
591 flag_weight_one &= fabs(w[j] - 1.0) < 1e-12;
596 if (flag_weight_one) {
599 weights, tgts, num_src_per_field_per_tgt, srcs_per_field, num_src_fields);
603 struct remote_point * curr_srcs_per_field[num_src_fields];
604 memcpy(curr_srcs_per_field, srcs_per_field,
605 num_src_fields *
sizeof(*srcs_per_field));
608 size_t stencils_array_size =
weights->stencils_array_size;
609 size_t stencils_size =
weights->stencils_size;
612 stencils, stencils_array_size, stencils_size + tgts->
count);
614 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
616 size_t * curr_num_src_per_src_field =
617 num_src_per_field_per_tgt + i * num_src_fields;
618 size_t curr_num_weights = 0;
619 for (
size_t j = 0; j < num_src_fields; ++j)
620 curr_num_weights += curr_num_src_per_src_field[j];
621 double * curr_weights =
622 xmalloc(curr_num_weights *
sizeof(*curr_weights));
629 for (
size_t j = 0, l = 0; j < num_src_fields; ++j) {
630 size_t curr_num_src = curr_num_src_per_src_field[j];
631 for (
size_t k = 0; k < curr_num_src; ++k, ++l)
field_indices[l] = j;
636 memcpy(curr_weights, w, curr_num_weights *
sizeof(*curr_weights));
638 for (
size_t j = 0; j < num_src_fields; ++j)
639 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
640 w += curr_num_weights;
644 weights->stencils_array_size = stencils_array_size;
645 weights->stencils_size = stencils_size;
667 MPI_Datatype fixed_stencil_dt;
668 int array_of_blocklengths[] = {1, 1};
669 const MPI_Aint array_of_displacements[] =
670 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
value) -
671 (MPI_Aint)(intptr_t)(
const void *)&dummy,
672 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
orig_pos) -
673 (MPI_Aint)(intptr_t)(
const void *)&dummy};
674 const MPI_Datatype array_of_types[] = {MPI_DOUBLE, MPI_UINT64_T};
676 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
677 array_of_types, &fixed_stencil_dt), comm);
682 MPI_Comm comm, uint64_t count,
689 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
691 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
693 for (
size_t i = 0; i < count; ++i) {
697 (&(fixed_stencils[i].tgt.
data.
data.single)):
699 for (
int j = 0; j < curr_count; ++j)
700 sendcounts[curr_point_infos[j].
rank]++;
704 1, sendcounts, recvcounts, sdispls, rdispls, comm);
706 size_t send_buffer_size =
707 sdispls[comm_size] + sendcounts[comm_size - 1];
708 size_t recv_buffer_size =
709 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
712 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*buffer));
717 for (
size_t i = 0; i < count; ++i) {
721 (&(fixed_stencils[i].tgt.
data.
data.single)):
724 for (
int j = 0; j < curr_count; ++j) {
725 size_t pos = sdispls[curr_point_infos[j].
rank + 1]++;
727 send_buffer[pos].
orig_pos = curr_point_infos[j].orig_pos;
736 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
737 sizeof(*send_buffer), stencil_fixed_dt, comm);
743 if (recv_buffer_size == 0) {
749 qsort(recv_buffer, recv_buffer_size,
sizeof(*recv_buffer),
752 size_t * tgt_pos =
xmalloc(recv_buffer_size *
sizeof(*tgt_pos));
753 for (
size_t i = 0; i < recv_buffer_size; ++i)
754 tgt_pos[i] = (
size_t)(recv_buffer[i].
orig_pos);
756 size_t offset = 0, i = 0;
757 while (offset < recv_buffer_size) {
758 double fixed_value = recv_buffer[i].
value;
759 while ((i < recv_buffer_size) && (fixed_value == recv_buffer[i].
value)) ++i;
760 size_t curr_count = i - offset;
762 interp, fixed_value, curr_count, tgt_pos + offset);
773 if (
src.count == 1)
return src.data.single;
775 int min_rank = INT_MAX;
776 size_t min_rank_idx = SIZE_MAX;
777 for (
size_t i = 0; i <
src.count; ++i) {
778 if (
src.data.multi[i].rank < min_rank) {
779 min_rank =
src.data.multi[i].rank;
784 return src.data.multi[min_rank_idx];
788 struct Xt_redist_msg * msgs,
size_t count, MPI_Comm comm) {
789 for (
size_t i = 0; i < count; ++i) {
790 MPI_Datatype * dt = &(msgs[i].datatype);
791 if (*dt != MPI_DATATYPE_NULL)
yac_mpi_call(MPI_Type_free(dt), comm);
797 uint64_t * src_orig_poses,
size_t * sendcounts,
799 size_t * recvcounts, MPI_Comm comm) {
804 size_t nsend = 0, nrecv = 0;
805 size_t max_buffer_size = 0;
806 for (
int i = 0; i < comm_size; ++i) {
807 nsend += sendcounts[i] > 0;
808 nrecv += recvcounts[i] > 0;
809 if (max_buffer_size < sendcounts[i]) max_buffer_size = sendcounts[i];
810 if (max_buffer_size < recvcounts[i]) max_buffer_size = recvcounts[i];
813 size_t total_num_msg = nsend + nrecv;
815 struct Xt_redist_msg * msgs_buffer =
816 xmalloc(total_num_msg *
sizeof(*msgs_buffer));
817 struct Xt_redist_msg * send_msgs = msgs_buffer;
818 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
820 int * pos_buffer =
xmalloc((
size_t)max_buffer_size *
sizeof(*pos_buffer));
824 for (
int i = 0; i < comm_size; ++i) {
825 if (recvcounts[i] > 0) {
826 for (
size_t j = 0; j < recvcounts[i]; ++j)
827 pos_buffer[j] = (
int)tgt_stencils[j].
orig_pos;
828 tgt_stencils += recvcounts[i];
829 recv_msgs[nrecv].rank = i;
830 recv_msgs[nrecv].datatype =
831 xt_mpi_generate_datatype(pos_buffer, recvcounts[i], MPI_DOUBLE, comm);
834 if (sendcounts[i] > 0) {
835 for (
size_t j = 0; j < sendcounts[i]; ++j)
836 pos_buffer[j] = (
int)src_orig_poses[j];
837 src_orig_poses += sendcounts[i];
838 send_msgs[nsend].rank = i;
839 send_msgs[nsend].datatype =
840 xt_mpi_generate_datatype(pos_buffer, sendcounts[i], MPI_DOUBLE, comm);
850 if (total_num_msg > 0) {
852 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
855 xmalloc(2 * total_num_msg *
sizeof(*rank_buffer));
856 int * orig_ranks = rank_buffer;
857 int * split_ranks = rank_buffer + total_num_msg;
859 for (
size_t i = 0; i < total_num_msg; ++i)
860 orig_ranks[i] = msgs_buffer[i].rank;
862 MPI_Group orig_group, split_group;
864 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
867 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
868 split_group, split_ranks), split_comm);
870 for (
size_t i = 0; i < total_num_msg; ++i)
871 msgs_buffer[i].rank = split_ranks[i];
880 xt_redist_single_array_base_new(
881 nsend, nrecv, send_msgs, recv_msgs, split_comm);
884 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
895 uint64_t * src_orig_pos,
size_t * sendcounts,
897 size_t * recvcounts,
size_t num_src_fields, MPI_Comm comm) {
902 size_t nsends[num_src_fields], nrecvs[num_src_fields];
903 size_t max_buffer_size = 0;
904 memset(nsends, 0, num_src_fields *
sizeof(nsends[0]));
905 memset(nrecvs, 0, num_src_fields *
sizeof(nrecvs[0]));
906 for (
int i = 0; i < comm_size; ++i) {
907 for (
size_t j = 0; j < num_src_fields; ++j) {
908 size_t idx = (size_t)i * num_src_fields + j;
909 if (sendcounts[idx] > 0) nsends[j]++;
910 if (recvcounts[idx] > 0) nrecvs[j]++;
911 if (max_buffer_size < sendcounts[idx]) max_buffer_size = sendcounts[idx];
912 if (max_buffer_size < recvcounts[idx]) max_buffer_size = recvcounts[idx];
916 size_t nsend = 0, nrecv = 0;
917 size_t send_offsets[num_src_fields];
918 size_t recv_offsets[num_src_fields];
919 for (
size_t i = 0; i < num_src_fields; ++i) {
920 send_offsets[i] = nsend;
921 recv_offsets[i] = nrecv;
926 size_t total_num_msg = nsend + nrecv;
928 struct Xt_redist_msg * msgs_buffer =
929 xmalloc(total_num_msg *
sizeof(*msgs_buffer));
930 struct Xt_redist_msg * send_msgs = msgs_buffer;
931 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
933 int * pos_buffer =
xmalloc(max_buffer_size *
sizeof(*pos_buffer));
935 for (
int i = 0; i < comm_size; ++i) {
936 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
938 size_t idx = (size_t)i * num_src_fields + src_field_idx;
939 if (recvcounts[idx] > 0) {
940 for (
size_t j = 0; j < recvcounts[idx]; ++j)
941 pos_buffer[j] = (
int)tgt_stencils[j].
orig_pos;
942 tgt_stencils += recvcounts[idx];
943 recv_msgs[recv_offsets[src_field_idx]].rank = i;
944 recv_msgs[recv_offsets[src_field_idx]].datatype =
945 xt_mpi_generate_datatype(
946 pos_buffer, recvcounts[idx], MPI_DOUBLE, comm);
947 recv_offsets[src_field_idx]++;
949 if (sendcounts[idx] > 0) {
950 for (
size_t j = 0; j < sendcounts[idx]; ++j)
951 pos_buffer[j] = (
int)src_orig_pos[j];
952 src_orig_pos += sendcounts[idx];
953 send_msgs[send_offsets[src_field_idx]].rank = i;
954 send_msgs[send_offsets[src_field_idx]].datatype =
955 xt_mpi_generate_datatype(
956 pos_buffer, sendcounts[idx], MPI_DOUBLE, comm);
957 send_offsets[src_field_idx]++;
967 if (total_num_msg > 0) {
969 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
972 xmalloc(2 * total_num_msg *
sizeof(*rank_buffer));
973 int * orig_ranks = rank_buffer;
974 int * split_ranks = rank_buffer + total_num_msg;
976 for (
size_t i = 0; i < total_num_msg; ++i)
977 orig_ranks[i] = msgs_buffer[i].rank;
979 MPI_Group orig_group, split_group;
981 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
984 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
985 split_group, split_ranks), split_comm);
987 for (
size_t i = 0; i < total_num_msg; ++i)
988 msgs_buffer[i].rank = split_ranks[i];
996 redists =
xmalloc(num_src_fields *
sizeof(*redists));
997 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
999 redists[src_field_idx] =
1000 xt_redist_single_array_base_new(
1001 nsends[src_field_idx], nrecvs[src_field_idx],
1002 send_msgs, recv_msgs, split_comm);
1003 send_msgs += nsends[src_field_idx];
1004 recv_msgs += nrecvs[src_field_idx];
1008 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
1021 MPI_Datatype direct_stencil_dt;
1022 int array_of_blocklengths[] = {1, 1};
1023 const MPI_Aint array_of_displacements[] =
1024 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
src) -
1025 (MPI_Aint)(intptr_t)(
const void *)&dummy,
1026 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
orig_pos) -
1027 (MPI_Aint)(intptr_t)(
const void *)&dummy};
1028 MPI_Datatype array_of_types[] =
1031 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
1032 array_of_types, &direct_stencil_dt), comm);
1033 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1042 if (ret)
return ret;
1051 MPI_Comm comm, uint64_t count,
1058 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1060 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1062 for (
size_t i = 0; i < count; ++i) {
1066 (&(direct_stencils[i].tgt.
data.
data.single)):
1068 for (
int j = 0; j < curr_count; ++j)
1069 sendcounts[curr_point_info[j].
rank]++;
1073 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1075 size_t send_buffer_size =
1076 sdispls[comm_size] + sendcounts[comm_size - 1];
1077 size_t recv_buffer_size =
1078 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1079 size_t tgt_count = recv_buffer_size;
1082 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*stencil_buffer));
1084 stencil_buffer + recv_buffer_size;
1088 for (
size_t i = 0; i < count; ++i) {
1092 (&(direct_stencils[i].tgt.
data.
data.single)):
1096 for (
int j = 0; j < curr_count; ++j) {
1097 size_t pos = sdispls[curr_point_infos[j].
rank + 1]++;
1098 send_stencil_buffer[pos].
src =
src;
1099 send_stencil_buffer[pos].
orig_pos = curr_point_infos[j].orig_pos;
1108 send_stencil_buffer, sendcounts, sdispls,
1109 recv_stencil_buffer, recvcounts, rdispls,
1110 sizeof(*stencil_buffer), stencil_direct_dt, comm);
1115 qsort(recv_stencil_buffer, tgt_count,
sizeof(*recv_stencil_buffer),
1118 memset(sendcounts, 0, (
size_t)comm_size *
sizeof(*sendcounts));
1120 for (
size_t i = 0; i < tgt_count; ++i)
1121 sendcounts[recv_stencil_buffer[i].
src.rank]++;
1124 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1126 send_buffer_size = sdispls[comm_size] + sendcounts[comm_size - 1];
1127 recv_buffer_size = rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1129 uint64_t * orig_pos_buffer =
1130 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*orig_pos_buffer));
1131 uint64_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1132 uint64_t * recv_orig_pos_buffer = orig_pos_buffer;
1134 for (
size_t i = 0; i < tgt_count; ++i)
1135 send_orig_pos_buffer[sdispls[recv_stencil_buffer[i].
src.rank + 1]++] =
1140 send_orig_pos_buffer, sendcounts, sdispls,
1141 recv_orig_pos_buffer, recvcounts, rdispls,
1142 sizeof(*send_orig_pos_buffer), MPI_UINT64_T, comm);
1149 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts, comm);
1150 free(orig_pos_buffer);
1151 free(stencil_buffer);
1155 if (redist != NULL) xt_redist_delete(redist);
1161 MPI_Datatype direct_stencil_mf_dt;
1162 int array_of_blocklengths[] = {1, 1, 1};
1163 const MPI_Aint array_of_displacements[] =
1164 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
src) -
1165 (MPI_Aint)(intptr_t)(
const void *)&dummy,
1167 (MPI_Aint)(intptr_t)(
const void *)&dummy,
1168 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
orig_pos) -
1169 (MPI_Aint)(intptr_t)(
const void *)&dummy};
1170 MPI_Datatype array_of_types[] =
1173 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
1174 array_of_types, &direct_stencil_mf_dt), comm);
1175 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1184 if (ret)
return ret;
1191 if (ret)
return ret;
1200 MPI_Comm comm, uint64_t count,
1205 uint64_t num_src_fields = 0;
1206 for (
size_t i = 0; i < count; ++i) {
1213 MPI_IN_PLACE, &num_src_fields, 1, MPI_UINT64_T, MPI_MAX, comm), comm);
1218 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1220 (
size_t)num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1221 size_t * size_t_buffer =
1222 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
1223 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1224 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1225 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1226 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1228 for (
size_t i = 0; i < count; ++i) {
1232 (&(direct_mf_stencils[i].tgt.
data.
data.single)):
1234 uint64_t src_field_idx =
1236 for (
int j = 0; j < curr_count; ++j)
1238 (uint64_t)(curr_point_info[j].
rank) * num_src_fields + src_field_idx]++;
1242 (
size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1244 size_t saccu = 0, raccu = 0;
1245 for (
int i = 0; i < comm_size; ++i) {
1246 total_sdispls[i] = saccu;
1247 total_rdispls[i] = raccu;
1248 total_sendcounts[i] = 0;
1249 total_recvcounts[i] = 0;
1250 for (
size_t j = 0; j < num_src_fields; ++j) {
1251 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1252 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1254 saccu += total_sendcounts[i];
1255 raccu += total_recvcounts[i];
1258 size_t send_buffer_size = total_sdispls[comm_size - 1] +
1259 total_sendcounts[comm_size - 1];
1260 size_t recv_buffer_size = total_rdispls[comm_size - 1] +
1261 total_recvcounts[comm_size - 1];
1262 size_t tgt_count = recv_buffer_size;
1265 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*stencil_buffer));
1267 stencil_buffer + recv_buffer_size;
1271 for (
size_t i = 0; i < count; ++i) {
1275 (&(direct_mf_stencils[i].tgt.
data.
data.single)):
1279 uint64_t src_field_idx =
1281 for (
int j = 0; j < curr_count; ++j) {
1283 sdispls[(uint64_t)(curr_point_infos[j].
rank) * num_src_fields +
1284 src_field_idx + 1]++;
1285 send_stencil_buffer[pos].
src =
src;
1296 send_stencil_buffer, total_sendcounts, total_sdispls,
1297 recv_stencil_buffer, total_recvcounts, total_rdispls,
1298 sizeof(*stencil_buffer), stencil_direct_mf_dt, comm);
1300 yac_mpi_call(MPI_Type_free(&stencil_direct_mf_dt), comm);
1304 qsort(recv_stencil_buffer, tgt_count,
sizeof(*recv_stencil_buffer),
1307 memset(sendcounts, 0,
1308 (
size_t)comm_size * (
size_t)num_src_fields *
sizeof(*sendcounts));
1310 for (
size_t i = 0; i < tgt_count; ++i)
1311 sendcounts[(uint64_t)(recv_stencil_buffer[i].
src.
rank) * num_src_fields +
1312 recv_stencil_buffer[i].src_field_idx]++;
1315 (
size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1317 saccu = 0, raccu = 0;
1318 for (
int i = 0; i < comm_size; ++i) {
1319 total_sdispls[i] = saccu;
1320 total_rdispls[i] = raccu;
1321 total_sendcounts[i] = 0;
1322 total_recvcounts[i] = 0;
1323 for (
size_t j = 0; j < num_src_fields; ++j) {
1324 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1325 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1327 saccu += total_sendcounts[i];
1328 raccu += total_recvcounts[i];
1331 send_buffer_size = total_sdispls[comm_size - 1] +
1332 total_sendcounts[comm_size - 1];
1333 recv_buffer_size = total_rdispls[comm_size - 1] +
1334 total_recvcounts[comm_size - 1];
1336 uint64_t * orig_pos_buffer =
1337 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*orig_pos_buffer));
1338 uint64_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1339 uint64_t * recv_orig_pos_buffer = orig_pos_buffer;
1341 for (
size_t i = 0; i < tgt_count; ++i)
1342 send_orig_pos_buffer[
1343 sdispls[(uint64_t)(recv_stencil_buffer[i].
src.
rank) * num_src_fields +
1344 recv_stencil_buffer[i].src_field_idx + 1]++] =
1345 recv_stencil_buffer[i].
src.orig_pos;
1349 send_orig_pos_buffer, total_sendcounts, total_sdispls,
1350 recv_orig_pos_buffer, total_recvcounts, total_rdispls,
1351 sizeof(*send_orig_pos_buffer), MPI_UINT64_T, comm);
1352 free(size_t_buffer);
1355 Xt_redist * redists =
1357 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1358 (
size_t)num_src_fields, comm);
1361 free(orig_pos_buffer);
1362 free(stencil_buffer);
1366 if (redists != NULL) {
1367 for (
size_t i = 0; i < (size_t)num_src_fields; ++i)
1368 xt_redist_delete(redists[i]);
1377 int pack_size_value;
1379 yac_mpi_call(MPI_Pack_size(1, MPI_DOUBLE, comm, &pack_size_value), comm);
1381 return pack_size_value;
1406 int pack_size_weights;
1410 MPI_DOUBLE, comm, &pack_size_weights), comm);
1422 int pack_size_src_field_idx;
1425 1, MPI_UINT64_T, comm, &pack_size_src_field_idx), comm);
1430 pack_size_src_field_idx;
1437 int pack_size_weights, pack_size_field_indices;
1441 count, MPI_DOUBLE, comm, &pack_size_weights), comm);
1444 count, MPI_UINT64_T, comm, &pack_size_field_indices), comm);
1449 pack_size_weights + pack_size_field_indices;
1456 int pack_size_field_indices;
1460 MPI_UINT64_T, comm, &pack_size_field_indices), comm);
1465 pack_size_field_indices;
1482 "ERROR(copy_interp_weight_stencil): invalid stencil type")
1484 switch (stencil->
type) {
1503 size_t weight_size =
1522 size_t field_indices_size =
1536 size_t weight_size =
1542 size_t field_indices_size =
1552 return stencil_copy;
1570 "ERROR(wcopy_interp_weight_stencil): invalid stencil type")
1572 switch (stencil->
type) {
1597 double * new_weights =
xmalloc(src_count *
sizeof(*new_weights));
1598 if (weights == NULL)
1599 for (
size_t i = 0; i < src_count; ++i) new_weights[i] = weight;
1601 for (
size_t i = 0; i < src_count; ++i) new_weights[i] = weights[i] * weight;
1609 return stencil_wcopy;
1619 if (ret)
return ret;
1637 size_t count = srcs->
count;
1640 xmalloc(count *
sizeof(*w_global_id));
1643 for (
size_t i = 0; i < count; ++i) {
1645 w_global_id[i].
weight = (*w)[i];
1654 size_t new_count = 0;
1657 for (
size_t i = 0; i < count;) {
1659 data[new_count] = data[i];
1662 double curr_weight = w_global_id[i].
weight;
1666 while((i < count) && (curr_global_id == w_global_id[i].
global_id)) {
1668 curr_weight += w_global_id[i].
weight;
1672 (*w)[new_count] = curr_weight;
1676 srcs->
data =
xrealloc(data, new_count *
sizeof(*data));
1677 srcs->
count = new_count;
1678 *w =
xrealloc(*w, new_count *
sizeof(**w));
1684 size_t src_count = 0;
1685 size_t point_info_buffer_size = 0;
1687 for (
size_t i = 0; i < num_stencils; ++i) {
1688 size_t curr_src_count;
1694 "ERROR(stencils_merge_wsum): invalid stencil type")
1695 switch (stencils[i]->
type) {
1710 src_count += curr_src_count;
1711 for (
size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
1712 if (((curr_src_data_count = srcs[j].
data.
count)) > 1)
1713 point_info_buffer_size += curr_src_data_count;
1720 srcs->
count = src_count;
1722 double * new_w =
xmalloc(src_count *
sizeof(*new_w));
1724 for (
size_t i = 0, offset = 0; i < num_stencils; ++i) {
1725 size_t curr_src_count;
1732 "ERROR(stencils_merge_wsum): invalid stencil type")
1733 switch (stencils[i]->
type) {
1752 srcs->
data + offset, curr_srcs, curr_src_count, &point_info_buffer);
1753 if (stencil_w == NULL)
1754 for (
size_t j = 0; j < curr_src_count; ++j, ++offset)
1755 new_w[offset] = w[i];
1757 for (
size_t j = 0; j < curr_src_count; ++j, ++offset)
1758 new_w[offset] = w[i] * stencil_w[j];
1768 return merge_stencil;
1774 for (
size_t i = 0; i < num_stencils; ++i)
1778 size_t src_count = 0;
1779 size_t point_info_buffer_size = 0;
1781 for (
size_t i = 0; i < num_stencils; ++i) {
1782 size_t curr_src_count;
1787 "ERROR(stencils_merge_sum): invalid stencil type")
1788 switch (stencils[i]->
type) {
1799 src_count += curr_src_count;
1800 for (
size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
1801 if (((curr_src_data_count = srcs[j].
data.
count)) > 1)
1802 point_info_buffer_size += curr_src_data_count;
1809 srcs->
count = src_count;
1812 for (
size_t i = 0, offset = 0; i < num_stencils; ++i) {
1813 size_t curr_src_count;
1818 "ERROR(stencils_merge_sum): invalid stencil type")
1819 switch (stencils[i]->
type) {
1831 srcs->
data + offset, curr_srcs, curr_src_count, &point_info_buffer);
1832 offset += curr_src_count;
1841 return merge_stencil;
1848 if (num_stencils == 1)
1851 int fixed_count = 0;
1852 int direct_count = 0;
1855 double fixed_value = 0.0;
1857 for (
size_t i = 0; i < num_stencils; ++i) {
1862 "ERROR(stencils_merge): multiple source fields not yet supported")
1868 "ERROR(stencils_merge): unsupported stencil type")
1869 switch (stencils[i]->
type) {
1890 (fixed_count > 0) || (wsum_count > 0) ||
1891 (sum_count > 0) || (direct_count > 0),
1892 "ERROR(stencils_merge): unknown error")
1893 if (fixed_count > 0) {
1896 (direct_count + sum_count + wsum_count) <= 0,
1897 "ERROR(stencils_merge): invalid stencil combination")
1899 merge_stencil = **stencils;
1901 }
else if (wsum_count > 0)
1904 else if ((sum_count > 0) || (direct_count > 0))
1910 return merge_stencil;
1915 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
1918 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_type), comm);
1920 for (
size_t i = 0; i < count; ++i) {
1923 int (*func_pack_size)(
1929 (curr_stencil->
type ==
SUM) ||
1934 "ERROR(get_stencils_pack_sizes): invalid stencil type")
1935 switch (curr_stencil->
type) {
1959 pack_sizes[i] = pack_size_type +
1961 &(curr_stencil->
tgt), point_info_dt, comm) +
1962 func_pack_size(curr_stencil, point_info_dt, comm);
1968 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1972 MPI_Pack(&(stencil->
data.
fixed.
value), 1, MPI_DOUBLE, buffer, buffer_size,
1973 position, comm), comm);
1978 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1982 &(stencil->
data.
direct.
src), buffer, buffer_size, position, point_info_dt,
1988 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
1992 stencil->
data.
sum.
srcs, buffer, buffer_size, position, point_info_dt, comm);
1997 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2002 point_info_dt, comm);
2007 buffer, buffer_size, position, comm), comm);
2012 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2017 point_info_dt, comm);
2022 MPI_Pack(&temp_field_idx, 1, MPI_UINT64_T,
2023 buffer, buffer_size, position, comm), comm);
2028 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2033 point_info_dt, comm);
2037 uint64_t * temp_field_indices =
xmalloc(count *
sizeof(*temp_field_indices));
2038 for (
size_t i = 0; i < count; ++i)
2039 temp_field_indices[i] =
2042 MPI_Pack(temp_field_indices, (
int)count, MPI_UINT64_T,
2043 buffer, buffer_size, position, comm), comm);
2044 free(temp_field_indices);
2049 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2054 point_info_dt, comm);
2060 buffer, buffer_size, position, comm), comm);
2062 uint64_t * temp_field_indices =
xmalloc(count *
sizeof(*temp_field_indices));
2063 for (
size_t i = 0; i < count; ++i)
2064 temp_field_indices[i] =
2067 MPI_Pack(temp_field_indices, (
int)count, MPI_UINT64_T,
2068 buffer, buffer_size, position, comm), comm);
2069 free(temp_field_indices);
2074 void ** pack_data,
int * pack_sizes, MPI_Datatype point_info_dt,
2078 stencils, count, pack_order, pack_sizes, point_info_dt, comm);
2080 size_t pack_buffer_size = 0;
2081 for (
size_t i = 0; i < count; ++i)
2082 pack_buffer_size += (
size_t)(pack_sizes[i]);
2084 void * pack_data_ =
xmalloc(pack_buffer_size);
2085 size_t total_pack_size = 0;
2087 for (
size_t i = 0; i < count; ++i) {
2092 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2097 (curr_stencil->
type ==
SUM) ||
2102 "ERROR(pack_stencils): invalid stencil type")
2103 switch (curr_stencil->
type) {
2129 int type = (int)curr_stencil->
type;
2130 void * buffer = (
void*)((
char*)pack_data_ + total_pack_size);
2131 int buffer_size = pack_sizes[i];
2135 MPI_Pack(&
type, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2138 &position, point_info_dt, comm);
2140 func_pack(curr_stencil, buffer, buffer_size, &position, point_info_dt, comm);
2143 pack_sizes[i] >= position,
2144 "ERROR(pack_stencils): "
2145 "actual pack size is bigger then computed one (%d > %d)",
2146 position, pack_sizes[i]);
2148 pack_sizes[i] = position;
2149 total_pack_size += (size_t)position;
2152 *pack_data =
xrealloc(pack_data_, total_pack_size);
2157 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2161 MPI_Unpack(buffer, buffer_size, position, &(stencil->
data.
fixed.
value), 1,
2162 MPI_DOUBLE, comm), comm);
2167 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2172 point_info_dt, comm);
2177 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2182 buffer, buffer_size, position, &(stencil->
data.
sum.
srcs), point_info_dt,
2188 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2193 point_info_dt, comm);
2203 (
int)count, MPI_DOUBLE, comm), comm);
2208 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2213 point_info_dt, comm);
2216 uint64_t temp_field_idx;
2219 buffer, buffer_size, position, &temp_field_idx,
2220 1, MPI_UINT64_T, comm), comm);
2226 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2231 point_info_dt, comm);
2235 uint64_t * temp_field_indices =
xmalloc(count *
sizeof(*temp_field_indices));
2242 buffer, buffer_size, position, temp_field_indices,
2243 (
int)count, MPI_UINT64_T, comm), comm);
2244 for (
size_t i = 0; i < count; ++i)
2246 free(temp_field_indices);
2251 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2256 point_info_dt, comm);
2267 (
int)count, MPI_DOUBLE, comm), comm);
2269 uint64_t * temp_field_indices =
xmalloc(count *
sizeof(*temp_field_indices));
2276 buffer, buffer_size, position, temp_field_indices,
2277 (
int)count, MPI_UINT64_T, comm), comm);
2278 for (
size_t i = 0; i < count; ++i)
2280 (
size_t)(temp_field_indices[i]);
2281 free(temp_field_indices);
2286 void * packed_data,
size_t packed_data_size,
2287 MPI_Datatype point_info_dt, MPI_Comm comm) {
2289 for (
size_t i = 0, offset = 0; i < count; ++i) {
2292 packed_data_size >= offset,
2293 "ERROR(unpack_stencils): invalid offset");
2296 void * curr_buffer = (
void*)((
unsigned char*)packed_data + offset);
2297 int buffer_size = (int)(
MIN(packed_data_size - offset, INT_MAX));
2303 curr_buffer, buffer_size, &position, &
type, 1, MPI_INT, comm), comm);
2305 void (*func_unpack)(
2307 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2313 "ERROR(unpack_stencils): invalid stencil type")
2339 curr_stencil->
type =
2342 curr_buffer, buffer_size, &position, &(curr_stencil->
tgt),
2343 point_info_dt, comm);
2345 curr_stencil, curr_buffer, buffer_size, &position, point_info_dt, comm);
2346 offset += (size_t)position;
2352 size_t * stencil_indices,
2353 size_t * stencil_sendcounts,
size_t * stencil_recvcounts) {
2355 int comm_rank, comm_size;
2360 stencil_sendcounts[comm_rank] == stencil_recvcounts[comm_rank],
2361 "ERROR(exchange_stencils): error in arguments")
2363 size_t send_count = 0, recv_count = 0;
2364 size_t local_send_offset = 0;
2365 size_t local_recv_offset = 0;
2366 size_t local_count = (size_t)(stencil_sendcounts[comm_rank]);
2367 for (
int i = 0; i < comm_rank; ++i) {
2368 send_count += stencil_sendcounts[i];
2369 recv_count += stencil_recvcounts[i];
2370 local_send_offset += stencil_sendcounts[i];
2371 local_recv_offset += stencil_recvcounts[i];
2373 local_send_offset = send_count;
2374 local_recv_offset = recv_count;
2375 stencil_sendcounts[comm_rank] = 0;
2376 stencil_recvcounts[comm_rank] = 0;
2377 for (
int i = comm_rank + 1; i < comm_size; ++i) {
2378 send_count += stencil_sendcounts[i];
2379 recv_count += stencil_recvcounts[i];
2383 xmalloc((recv_count + local_count) *
sizeof(*new_stencils));
2384 size_t * local_stencil_indices =
2385 xmalloc(local_count *
sizeof(*local_stencil_indices));
2386 memcpy(local_stencil_indices, stencil_indices + local_send_offset,
2387 local_count *
sizeof(*local_stencil_indices));
2391 stencil_indices + local_send_offset,
2392 stencil_indices + local_send_offset + local_count,
2393 (send_count - local_send_offset) *
sizeof(*stencil_indices));
2397 int * pack_sizes =
xmalloc(send_count *
sizeof(*pack_sizes));
2400 stencils, send_count, stencil_indices, &send_buffer, pack_sizes,
2401 point_info_dt, comm);
2403 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2405 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2408 for (
int rank = 0; rank < comm_size; ++rank) {
2409 size_t sendcount = 0;
2410 int curr_num_stencils = stencil_sendcounts[rank];
2411 for (
int j = 0; j < curr_num_stencils; ++j, ++send_count)
2412 sendcount += (
size_t)(pack_sizes[send_count]);
2413 sendcounts[rank] = sendcount;
2418 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2420 size_t recv_size = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
2422 void * recv_buffer =
xmalloc(recv_size);
2425 yac_alltoallv_packed_p2p(
2426 send_buffer, sendcounts, sdispls+1,
2427 recv_buffer, recvcounts, rdispls, comm);
2433 new_stencils, recv_count,
2434 recv_buffer, recv_size, point_info_dt, comm);
2438 memmove(new_stencils + local_recv_offset + local_count,
2439 new_stencils + local_recv_offset ,
2440 (recv_count - local_recv_offset ) *
sizeof(*new_stencils));
2441 for (
size_t i = 0; i < local_count; ++i, ++local_recv_offset )
2442 new_stencils[local_recv_offset] =
2444 stencils + local_stencil_indices[i],
2445 stencils[local_stencil_indices[i]].
tgt);
2446 free(local_stencil_indices);
2448 return new_stencils;
2453 int * stencil_ranks,
size_t count) {
2455 MPI_Comm comm =
weights->comm;
2461 "ERROR(yac_interp_weights_get_stencils): count exceeds INT_MAX");
2463 size_t * reorder_idx =
xmalloc(count *
sizeof(*reorder_idx));
2464 for (
size_t i = 0; i < count; ++i) reorder_idx[i] = i;
2467 stencil_ranks, count, stencil_indices, reorder_idx);
2470 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2472 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2473 for (
size_t i = 0; i < count; ++i) sendcounts[stencil_ranks[i]]++;
2475 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2477 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
2478 uint64_t * uint64_t_buffer =
2479 xmalloc((count + recv_count) *
sizeof(*uint64_t_buffer));
2480 uint64_t * send_stencil_indices = uint64_t_buffer;
2481 uint64_t * recv_stencil_indices = uint64_t_buffer + count;
2482 for (
size_t i = 0; i < count; ++i)
2483 send_stencil_indices[i] = (uint64_t)(stencil_indices[i]);
2484 yac_alltoallv_uint64_p2p(
2485 send_stencil_indices, sendcounts, sdispls+1,
2486 recv_stencil_indices, recvcounts, rdispls, comm);
2489 size_t * exchange_stencil_indices =
2490 xmalloc(recv_count *
sizeof(*exchange_stencil_indices));
2491 for (
size_t i = 0; i < recv_count; ++i) {
2493 (
size_t)(recv_stencil_indices[i]) <
weights->stencils_size,
2494 "ERROR(yac_interp_weights_get_stencils): invalid stencil index");
2495 exchange_stencil_indices[i] = (size_t)(recv_stencil_indices[i]);
2497 free(uint64_t_buffer);
2500 recvcounts, sendcounts);
2501 free(exchange_stencil_indices);
2506 xmalloc(count *
sizeof(*sorted_stencils));
2507 for (
size_t i = 0; i < count; ++i)
2508 sorted_stencils[reorder_idx[i]] = stencils[i];
2512 return sorted_stencils;
2520 size_t * num_stencils_per_tgt,
size_t * stencil_indices,
2521 int * stencil_ranks,
double * w) {
2523 size_t count = (tgts != NULL)?tgts->
count:0;
2524 MPI_Comm comm =
weights->comm;
2525 int comm_rank, comm_size;
2530 size_t total_num_stencils = 0;
2531 size_t max_num_stencils_per_tgt = 0;
2532 for (
size_t i = 0; i < count; ++i) {
2533 size_t curr_num_stencils_per_tgt = num_stencils_per_tgt[i];
2534 if (curr_num_stencils_per_tgt > max_num_stencils_per_tgt)
2535 max_num_stencils_per_tgt = curr_num_stencils_per_tgt;
2536 total_num_stencils += num_stencils_per_tgt[i];
2538 size_t num_missing_stencils = 0;
2539 for (
size_t i = 0; i < total_num_stencils; ++i)
2540 if (stencil_ranks[i] != comm_rank) num_missing_stencils++;
2543 size_t * missing_stencil_indices =
2544 xmalloc(num_missing_stencils *
sizeof(*missing_stencil_indices));
2545 int * missing_stencil_ranks =
2546 xmalloc(num_missing_stencils *
sizeof(*missing_stencil_ranks));
2547 for (
size_t i = 0, j = 0; i < total_num_stencils; ++i) {
2548 if (stencil_ranks[i] != comm_rank) {
2549 missing_stencil_indices[j] = stencil_indices[i];
2550 missing_stencil_ranks[j] = stencil_ranks[i];
2556 weights, missing_stencil_indices, missing_stencil_ranks,
2557 num_missing_stencils);
2558 free(missing_stencil_ranks);
2559 free(missing_stencil_indices);
2564 size_t stencils_array_size =
weights->stencils_array_size;
2565 size_t stencils_size =
weights->stencils_size;
2568 xmalloc(max_num_stencils_per_tgt *
sizeof(*stencils_buffer));
2571 stencils, stencils_array_size, stencils_size + count);
2573 for (
size_t i = 0, j = 0; i < count;
2574 ++i, ++stencils_size) {
2576 size_t curr_num_stencils = num_stencils_per_tgt[i];
2577 for (
size_t k = 0; k < curr_num_stencils; ++k)
2578 stencils_buffer[k] =
2579 (stencil_ranks[k] == comm_rank)?
2580 (stencils + stencil_indices[k]):(missing_stencils + (j++));
2582 stencils[stencils_size] =
2584 w += curr_num_stencils;
2585 stencil_indices += curr_num_stencils;
2586 stencil_ranks += curr_num_stencils;
2590 weights->stencils_array_size = stencils_array_size;
2591 weights->stencils_size = stencils_size;
2593 free(stencils_buffer);
2601 YAC_ASSERT(count != 0,
"ERROR(compute_owner): count == 0")
2606 size_t best_rank_count = 0;
2608 size_t curr_rank_count = 1;
2609 int prev_rank = ranks[0];
2611 for (
size_t i = 1; i < count; ++i, ++curr_rank_count) {
2612 int curr_rank = ranks[i];
2613 if (prev_rank != curr_rank) {
2614 if (curr_rank_count > best_rank_count) {
2615 best_rank = prev_rank;
2616 best_rank_count = curr_rank_count;
2618 prev_rank = curr_rank;
2619 curr_rank_count = 0;
2623 return (curr_rank_count > best_rank_count)?prev_rank:best_rank;
2631 size_t total_num_links = 0;
2633 for (
size_t i = 0; i <
count; ++i) {
2635 stencils[i].
type == stencil_type,
2636 "ERROR(generate_w_sum_mf_stencils): wrong stencil type")
2642 xmalloc(
sizeof(*temp) + total_num_links *
sizeof(temp->
buffer[0]));
2649 for (
size_t i = 0, k = 0; i <
count; ++i) {
2651 wsum_stencils->
data + i;
2657 curr_wsum_stencil->
count = curr_stencil_size;
2658 curr_wsum_stencil->
data = curr_links;
2659 for (
size_t j = 0; j < curr_stencil_size; ++j) {
2660 int curr_count = curr_srcs[j].
data.
count;
2663 "ERROR(generate_w_sum_mf_stencils): global src id no found")
2670 "ERROR(generate_w_sum_mf_stencils): unsupported stencil type")
2671 switch(stencil_type) {
2674 curr_links[j].
weight = 1.0;
2682 curr_links[j].
weight = 1.0;
2693 k += curr_stencil_size;
2696 return wsum_stencils;
2703 int array_of_blocklengths[] = {1, 1, 1, 1};
2704 const MPI_Aint array_of_displacements[] =
2705 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
src.
rank) -
2706 (MPI_Aint)(intptr_t)(
const void *)&dummy,
2707 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
src.
orig_pos) -
2708 (MPI_Aint)(intptr_t)(
const void *)&dummy,
2710 (MPI_Aint)(intptr_t)(
const void *)&dummy,
2711 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
weight) -
2712 (MPI_Aint)(intptr_t)(
const void *)&dummy};
2713 const MPI_Datatype array_of_types[] =
2714 {MPI_INT, MPI_UINT64_T, MPI_UINT64_T, MPI_DOUBLE};
2716 MPI_Type_create_struct(4, array_of_blocklengths, array_of_displacements,
2717 array_of_types, &dt), comm);
2723 MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
2725 int pack_size_count,
2729 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
2732 (
int)(stencil->
count), wsum_mf_weight_dt, comm, &pack_size_weights), comm);
2736 return pack_size_count + pack_size_weights + pack_size_tgt;
2741 size_t * pack_order,
void ** pack_data,
int * pack_sizes,
2742 int * weight_counts, MPI_Comm comm) {
2748 size_t temp_total_pack_size = 0;
2749 for (
size_t i = 0; i < count; ++i) {
2750 temp_total_pack_size +=
2753 wsum_stencils + pack_order[i],
2754 wsum_mf_weight_dt, point_info_dt, comm));
2757 void * pack_data_ =
xmalloc(temp_total_pack_size);
2758 size_t total_pack_size = 0;
2761 for (
size_t i = 0; i < count; ++i) {
2763 size_t idx = pack_order[i];
2766 void * buffer = (
void*)((
unsigned char*)pack_data_ + total_pack_size);
2767 int buffer_size = pack_sizes[i];
2768 int curr_count = wsum_stencils[idx].
count;
2772 &(wsum_stencils[idx].tgt), buffer, buffer_size, &position,
2773 point_info_dt, comm);
2776 MPI_Pack(&curr_count, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2779 MPI_Pack(wsum_stencils[idx].data, curr_count, wsum_mf_weight_dt,
2780 buffer, buffer_size, &position, comm), comm);
2782 pack_sizes[i] = position;
2783 weight_counts[i] = curr_count;
2784 total_pack_size += (size_t)position;
2790 *pack_data =
xrealloc(pack_data_, total_pack_size);
2796 void * packed_data,
size_t packed_data_size, MPI_Comm comm) {
2801 size_t weight_offset = 0;
2802 for (
size_t i = 0, offset = 0; i < count; ++i) {
2805 void * curr_buffer = (
void*)((
char*)packed_data + offset);
2806 int buffer_size = (int)(packed_data_size - offset);
2812 weight_buffer + weight_offset;
2815 curr_buffer, buffer_size, &position, &tgt, point_info_dt, comm);
2817 MPI_Unpack(curr_buffer, buffer_size, &position,
2818 &weight_count, 1, MPI_INT, comm),
2821 MPI_Unpack(curr_buffer, buffer_size, &position,
2822 curr_weights, weight_count, wsum_mf_weight_dt, comm), comm);
2824 curr_wsum_stencil->
tgt = tgt;
2825 curr_wsum_stencil->
data = curr_weights;
2826 curr_wsum_stencil->
count = (size_t)weight_count;
2828 weight_offset += (size_t)weight_count;
2829 offset += (size_t)position;
2835 return weight_offset;
2840 int * stencil_owner,
size_t * reorder_idx,
size_t num_owners) {
2843 wsum_stencils_data->
data;
2845 int comm_rank, comm_size;
2849 size_t local_weight_count = 0;
2850 size_t local_count = 0;
2851 for (
size_t i = 0; i < num_owners; ++i) {
2852 if (stencil_owner[i] == comm_rank) {
2853 local_weight_count += wsum_stencils[reorder_idx[i]].
count;
2854 stencil_owner[i] = INT_MAX;
2860 size_t send_count = num_owners - local_count;
2864 int * pack_sizes =
xmalloc(2 * send_count *
sizeof(*pack_sizes));
2865 int * weight_counts = pack_sizes + send_count;
2867 pack_sizes, weight_counts, comm);
2869 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2871 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2873 for (
size_t i = 0; i < send_count; ++i) {
2874 int curr_rank = stencil_owner[i];
2875 sendcounts[3 * curr_rank + 0]++;
2876 sendcounts[3 * curr_rank + 1] += (size_t)(pack_sizes[i]);
2877 sendcounts[3 * curr_rank + 2] += (size_t)(weight_counts[i]);
2885 size_t recv_count = 0;
2886 size_t recv_size = 0;
2887 size_t recv_weight_count = 0;
2888 size_t saccu = 0, raccu = 0;
2889 for (
int i = 0; i < comm_size; ++i) {
2892 recv_count += recvcounts[3 * i + 0];
2893 recv_size += recvcounts[3 * i + 1];
2894 recv_weight_count += recvcounts[3 * i + 2];
2895 saccu += sendcounts[3 * i + 1];
2896 raccu += recvcounts[3 * i + 1];
2897 sendcounts[i] = sendcounts[3 * i + 1];
2898 recvcounts[i] = recvcounts[3 * i + 1];
2901 void * recv_buffer =
xmalloc(recv_size);
2904 yac_alltoallv_packed_p2p(
2905 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm);
2911 (local_weight_count + recv_weight_count) *
sizeof(temp->
buffer[0]));
2915 ((new_wsum_stencils_data->
data =
2916 xmalloc((local_count + recv_count) *
2917 sizeof(*(new_wsum_stencils_data->
data)))));
2918 new_wsum_stencils_data->
count = local_count + recv_count;
2921 size_t weight_offset =
2923 new_wsum_stencils, &(temp->
buffer[0]), recv_count,
2924 recv_buffer, recv_size, comm);
2926 new_wsum_stencils += recv_count;
2928 &(temp->
buffer[weight_offset]);
2932 for (
size_t i = 0, weight_offset = 0; i < local_count; ++i) {
2934 wsum_stencils + reorder_idx[i + send_count];
2936 new_wsum_stencils + i;
2938 weight_buffer + weight_offset;
2939 size_t curr_stencil_size = curr_wsum_stencil->
count;
2941 curr_new_wsum_stencil->
count = curr_stencil_size;
2942 curr_new_wsum_stencil->
data = curr_new_weights;
2943 memcpy(curr_new_weights, curr_wsum_stencil->
data,
2944 curr_stencil_size *
sizeof(*curr_new_weights));
2945 weight_offset += curr_stencil_size;
2948 return new_wsum_stencils_data;
2955 wsum_stencils_data->
data;
2959 size_t max_stencil_size = 0;
2960 for (
size_t i = 0; i <
count; ++i) {
2961 size_t curr_stencil_size = wsum_stencils[i].
count;
2962 if (curr_stencil_size > max_stencil_size)
2963 max_stencil_size = curr_stencil_size;
2968 xmalloc((
count + max_stencil_size) *
sizeof(*rank_buffer));
2969 int * stencil_owner = rank_buffer;
2970 int * stencil_owners = rank_buffer +
count;
2971 size_t * reorder_idx =
xmalloc(
count *
sizeof(*reorder_idx));
2972 for (
size_t i = 0; i <
count; ++i) {
2973 size_t curr_stencil_size = wsum_stencils[i].
count;
2975 wsum_stencils[i].
data;
2976 for (
size_t j = 0; j < curr_stencil_size; ++j)
2977 stencil_owners[j] = curr_weights[j].
src.rank;
2978 stencil_owner[i] =
compute_owner(stencil_owners, curr_stencil_size);
2984 comm, wsum_stencils_data, stencil_owner, reorder_idx,
count);
2989 return new_wsum_stencils_data;
2997 if (ret)
return ret;
3009 wsum_stencils_data->
data;
3014 size_t total_owner_count = 0;
3015 for (
size_t i = 0; i <
count; ++i) {
3017 if (stencil_size == 1) {
3018 total_owner_count++;
3023 tgt_point_infos, stencil_size,
sizeof(*tgt_point_infos),
3025 int prev_rank = INT_MAX;
3026 for (
size_t j = 0; j < stencil_size; ++j) {
3027 int curr_rank = tgt_point_infos[j].
rank;
3028 if (curr_rank != prev_rank) {
3029 ++total_owner_count;
3030 prev_rank = curr_rank;
3036 int * stencil_owner =
xmalloc(total_owner_count *
sizeof(*stencil_owner));
3037 size_t * reorder_idx =
xmalloc(total_owner_count *
sizeof(*reorder_idx));
3038 for (
size_t i = 0, k = 0; i < count; ++i) {
3040 if (stencil_size == 1) {
3047 int prev_rank = INT_MAX;
3048 for (
int j = 0; j < stencil_size; ++j) {
3049 int curr_rank = tgt_point_infos[j].
rank;
3050 if (curr_rank != prev_rank) {
3051 stencil_owner[k] = tgt_point_infos[j].
rank;
3054 prev_rank = curr_rank;
3062 comm, wsum_stencils_data, stencil_owner, reorder_idx, total_owner_count);
3064 wsum_stencils = new_wsum_stencils_data->
data;
3068 free(stencil_owner);
3070 if (
count == 0)
return new_wsum_stencils_data;
3076 size_t total_num_tgt_pos = 0;
3077 for (
size_t i = 0; i <
count; ++i) {
3079 if (curr_count == 1) {
3080 ++total_num_tgt_pos;
3084 for (
size_t j = 0; j < curr_count; ++j)
3085 if (curr_point_infos[j].
rank == comm_rank)
3086 ++total_num_tgt_pos;
3090 if (total_num_tgt_pos != count) {
3091 new_wsum_stencils_data->
data =
3093 xrealloc(wsum_stencils, total_num_tgt_pos *
sizeof(*wsum_stencils))));
3094 new_wsum_stencils_data->
count = total_num_tgt_pos;
3098 for (
size_t i = 0, offset = count; i < count; ++i) {
3100 if (curr_count > 1) {
3105 for (j = 0; j < curr_count; ++j) {
3106 if (curr_point_infos[j].
rank == comm_rank) {
3115 for (j = j + 1; j < curr_count; ++j) {
3116 if (curr_point_infos[j].
rank == comm_rank) {
3117 wsum_stencils[offset] = wsum_stencils[i];
3123 free(curr_point_infos);
3127 return new_wsum_stencils_data;
3132 size_t num_src_fields, MPI_Comm comm) {
3137 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3139 num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3140 size_t * size_t_buffer =
3141 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
3142 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
3143 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
3144 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
3145 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
3147 for (
size_t i = 0; i < count; ++i)
3148 sendcounts[halo_points[i].data.rank * num_src_fields +
3152 num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
3154 size_t saccu = 0, raccu = 0;
3155 for (
int i = 0; i < comm_size; ++i) {
3156 total_sdispls[i] = saccu;
3157 total_rdispls[i] = raccu;
3158 total_sendcounts[i] = 0;
3159 total_recvcounts[i] = 0;
3160 for (
size_t j = 0; j < num_src_fields; ++j) {
3161 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
3162 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
3164 saccu += total_sendcounts[i];
3165 raccu += total_recvcounts[i];
3168 size_t recv_count = total_recvcounts[comm_size - 1] +
3169 total_rdispls[comm_size - 1];
3171 int * exchange_buffer =
3172 xmalloc((2 * count + recv_count) *
sizeof(*exchange_buffer));
3173 int * send_buffer = exchange_buffer;
3174 int * reorder_idx = exchange_buffer + count;
3175 int * recv_buffer = exchange_buffer + 2 * count;
3178 size_t num_halo_per_src_field[num_src_fields];
3180 num_halo_per_src_field, 0,
3181 num_src_fields *
sizeof(num_halo_per_src_field[0]));
3182 for (
size_t i = 0; i < count; ++i) {
3183 size_t curr_src_field_idx = (size_t)(halo_points[i].field_idx);
3184 size_t pos = sdispls[(size_t)(halo_points[i].data.rank) * num_src_fields +
3185 curr_src_field_idx + 1]++;
3189 "ERROR(generate_halo_redists): offset not supported by MPI")
3191 reorder_idx[pos] = num_halo_per_src_field[curr_src_field_idx]++;
3195 yac_alltoallv_int_p2p(
3196 send_buffer, total_sendcounts, total_sdispls,
3197 recv_buffer, total_recvcounts, total_rdispls, comm);
3199 free(size_t_buffer);
3201 size_t nsend = 0, nsends[num_src_fields];
3202 size_t nrecv = 0, nrecvs[num_src_fields];
3203 memset(nsends, 0, num_src_fields *
sizeof(nsends[0]));
3204 memset(nrecvs, 0, num_src_fields *
sizeof(nrecvs[0]));
3205 for (
int i = 0; i < comm_size; ++i) {
3206 for (
size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3207 if (sendcounts[i * num_src_fields + field_idx] > 0) {
3209 nrecvs[field_idx]++;
3211 if (recvcounts[i * num_src_fields + field_idx] > 0) {
3213 nsends[field_idx]++;
3218 size_t total_num_msg = nsend + nrecv;
3220 struct Xt_redist_msg * msgs_buffer =
3221 xmalloc(total_num_msg *
sizeof(*msgs_buffer));
3222 struct Xt_redist_msg * send_msgs = msgs_buffer;
3223 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
3225 for (
size_t field_idx = 0, nsend = 0, nrecv = 0;
3226 field_idx < num_src_fields; ++field_idx) {
3227 for (
int rank = 0; rank < comm_size; ++rank) {
3228 size_t idx = (size_t)rank * num_src_fields + field_idx;
3229 if (sendcounts[idx] > 0) {
3230 recv_msgs[nrecv].rank = rank;
3231 recv_msgs[nrecv].datatype =
3232 xt_mpi_generate_datatype(
3233 reorder_idx + sdispls[idx], sendcounts[idx], MPI_DOUBLE, comm);
3236 if (recvcounts[idx] > 0) {
3237 send_msgs[nsend].rank = rank;
3238 send_msgs[nsend].datatype =
3239 xt_mpi_generate_datatype(
3240 recv_buffer + rdispls[idx], recvcounts[idx], MPI_DOUBLE, comm);
3249 if (total_num_msg > 0) {
3251 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &halo_comm), comm);
3253 int * rank_buffer =
xmalloc(2 * total_num_msg *
sizeof(*rank_buffer));
3254 int * orig_ranks = rank_buffer;
3255 int * split_ranks = rank_buffer + total_num_msg;
3257 for (
size_t i = 0; i < total_num_msg; ++i)
3258 orig_ranks[i] = msgs_buffer[i].rank;
3260 MPI_Group orig_group, split_group;
3262 yac_mpi_call(MPI_Comm_group(halo_comm, &split_group), comm);
3265 MPI_Group_translate_ranks(orig_group, (
int)total_num_msg, orig_ranks,
3266 split_group, split_ranks), halo_comm);
3268 for (
size_t i = 0; i < total_num_msg; ++i)
3269 msgs_buffer[i].rank = split_ranks[i];
3277 redist =
xmalloc(num_src_fields *
sizeof(*redist));
3278 if (num_src_fields == 1) {
3280 xt_redist_single_array_base_new(
3281 nsend, nrecv, send_msgs, recv_msgs, halo_comm);
3283 for (
size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3285 xt_redist_single_array_base_new(
3286 nsends[field_idx], nrecvs[field_idx],
3287 send_msgs, recv_msgs, halo_comm);
3288 send_msgs += nsends[field_idx];
3289 recv_msgs += nrecvs[field_idx];
3294 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &halo_comm), comm);
3299 free(exchange_buffer);
3314 if (ret)
return ret;
3319 if (ret)
return ret;
3328 const void * a,
const void * b) {
3337 for (
size_t i = 0; i <
count; ++i) {
3340 if (ret)
return ret;
3343 if (ret)
return ret;
3349 const void * a,
const void * b) {
3358 "ERROR(compare_interp_weight_stencil_wsum_tgt_orig_pos): invalid data")
3363 return (a_orig_pos > b_orig_pos) - (a_orig_pos < b_orig_pos);
3373 size_t count, MPI_Comm comm) {
3378 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
3380 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3382 for (
size_t i = 0; i <
count; ++i) {
3383 int curr_count = point_infos[i].
count;
3389 "ERROR(generate_redist_put_double): no owner found for global id")
3390 for (
int j = 0; j < curr_count; ++j)
3391 sendcounts[curr_point_infos[j].
rank]++;
3395 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3398 sdispls[comm_size] + sendcounts[comm_size - 1];
3400 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
3402 int * exchange_buffer =
3403 xmalloc((2 * send_count + recv_count) *
sizeof(*exchange_buffer));
3404 int * send_buffer = exchange_buffer;
3405 int * reorder_idx = exchange_buffer + send_count;
3406 int * recv_buffer = exchange_buffer + 2 * send_count;
3409 for (
size_t i = 0; i < count; ++i) {
3410 int curr_count = point_infos[i].
count;
3414 for (
int j = 0; j < curr_count; ++j) {
3415 size_t pos = sdispls[curr_point_infos[j].
rank + 1]++;
3416 uint64_t
orig_pos = curr_point_infos[j].orig_pos;
3419 "ERROR(generate_redist_put_double): offset not supported by MPI")
3421 reorder_idx[pos] = i;
3426 yac_alltoallv_int_p2p(
3427 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm);
3431 for (
int i = 0; i < comm_size; ++i) {
3432 if (sendcounts[i] > 0) nsend++;
3433 if (recvcounts[i] > 0) nrecv++;
3436 struct Xt_redist_msg * send_msgs =
xmalloc(nsend *
sizeof(*send_msgs));
3437 struct Xt_redist_msg * recv_msgs =
xmalloc(nrecv *
sizeof(*send_msgs));
3439 for (
int i = 0, nsend = 0, nrecv = 0; i < comm_size; ++i) {
3440 if (sendcounts[i] > 0) {
3441 send_msgs[nsend].rank = i;
3442 send_msgs[nsend].datatype =
3443 xt_mpi_generate_datatype(
3444 reorder_idx + sdispls[i], sendcounts[i], MPI_DOUBLE, comm);
3447 if (recvcounts[i] > 0) {
3448 recv_msgs[nrecv].rank = i;
3449 recv_msgs[nrecv].datatype =
3450 xt_mpi_generate_datatype(
3451 recv_buffer + rdispls[i], recvcounts[i], MPI_DOUBLE, comm);
3458 xt_redist_single_array_base_new(nsend, nrecv, send_msgs, recv_msgs, comm);
3460 free(exchange_buffer);
3484 "ERROR(yac_interp_weights_redist_w_sum_mf): invalid reorder type")
3491 size_t wsum_mf_count = new_wsum_mf_stencils_data->
count;
3493 new_wsum_mf_stencils_data->
data;
3496 size_t total_num_links = 0, total_num_remote_weights = 0;
3497 for (
size_t i = 0; i < wsum_mf_count; ++i) {
3498 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
3499 total_num_links += curr_stencil_size;
3500 for (
size_t j = 0; j < curr_stencil_size; ++j)
3501 if (wsum_mf_stencils[i].
data[j].
src.rank != comm_rank)
3502 ++total_num_remote_weights;
3507 xmalloc(total_num_remote_weights *
sizeof(*remote_src_points));
3508 size_t num_src_fields = 0;
3509 for (
size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
3510 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
3512 wsum_mf_stencils[i].
data;
3513 for (
size_t j = 0; j < curr_stencil_size; ++j) {
3515 if (curr_src_field_idx >= num_src_fields)
3516 num_src_fields = curr_src_field_idx + 1;
3517 if (curr_weights[j].
src.rank != comm_rank) {
3518 remote_src_points[k].
data = curr_weights[j].
src;
3519 remote_src_points[k].
field_idx = curr_src_field_idx;
3527 MPI_IN_PLACE, &num_src_fields, 1,
YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
3531 qsort(remote_src_points, total_num_remote_weights,
sizeof(*remote_src_points),
3538 size_t prev_field_idx;
3540 if (total_num_remote_weights > 0) {
3541 prev_remote_src_point = &(remote_src_points[0].
data);
3542 prev_field_idx = remote_src_points[0].
field_idx;
3545 prev_field_idx = SIZE_MAX;
3548 for (
size_t i = 0; i < total_num_remote_weights; ++i) {
3550 &(remote_src_points[i].
data);
3551 size_t curr_field_idx = remote_src_points[i].
field_idx;
3553 prev_remote_src_point, curr_remote_src_point) ||
3554 (prev_field_idx != curr_field_idx)) {
3555 prev_remote_src_point = curr_remote_src_point;
3556 prev_field_idx = curr_field_idx;
3557 remote_src_points[halo_size].
data = *curr_remote_src_point;
3558 remote_src_points[halo_size].
field_idx = curr_field_idx;
3562 wsum_mf_stencils + remote_src_points[i].
reorder_idx;
3563 size_t curr_stencil_size = curr_stencil->
count;
3564 for (
size_t j = 0; j < curr_stencil_size; ++j) {
3566 &(curr_stencil->
data[j].
src), curr_remote_src_point)) &&
3576 qsort(wsum_mf_stencils, wsum_mf_count,
sizeof(*wsum_mf_stencils),
3581 size_t * num_src_per_tgt =
xmalloc(wsum_mf_count *
sizeof(*num_src_per_tgt));
3582 double * weights =
xmalloc(total_num_links *
sizeof(*weights));
3583 size_t * src_idx =
xmalloc(total_num_links *
sizeof(*src_idx));
3584 size_t * src_field_idx =
xmalloc(total_num_links *
sizeof(*src_field_idx));
3587 for (
size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
3588 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
3590 wsum_mf_stencils[i].
data;
3591 num_src_per_tgt[i] = curr_stencil_size;
3592 for (
size_t j = 0; j < curr_stencil_size; ++j, ++k){
3593 weights[k] = curr_weights[j].
weight;
3600 Xt_redist * halo_redists =
3602 remote_src_points, halo_size, num_src_fields, comm);
3606 (stencil_type ==
SUM) ||
3608 (stencil_type ==
SUM_MF),
3609 "ERROR(yac_interp_weights_redist_w_sum_mf): unsupported stencil type");
3616 xmalloc(wsum_mf_count *
sizeof(*tgt_infos));
3617 for (
size_t i = 0; i < wsum_mf_count; ++i)
3618 tgt_infos[i] = wsum_mf_stencils[i].tgt.
data;
3619 Xt_redist result_redist =
3621 tgt_location, tgt_infos, wsum_mf_count, comm);
3624 switch(stencil_type) {
3629 interp, halo_redists, wsum_mf_count, num_src_per_tgt, weights,
3630 src_field_idx, src_idx, ((stencil_type==
WEIGHT_SUM)?1:num_src_fields),
3636 interp, halo_redists, wsum_mf_count, num_src_per_tgt,
3637 src_field_idx, src_idx, ((stencil_type ==
SUM)?1:num_src_fields),
3642 if (result_redist != NULL) xt_redist_delete(result_redist);
3646 size_t * tgt_orig_pos =
xmalloc(wsum_mf_count *
sizeof(*tgt_orig_pos));
3647 for (
size_t i = 0; i < wsum_mf_count; ++i) {
3649 wsum_mf_stencils[i].tgt.
data.count == 1,
3650 "ERROR(yac_interp_weights_redist_w_sum): currently unsupported target "
3651 "point distribution")
3653 (size_t)(wsum_mf_stencils[i].tgt.
data.data.single.orig_pos);
3656 switch(stencil_type) {
3661 interp, halo_redists, tgt_orig_pos, wsum_mf_count,
3662 num_src_per_tgt, weights, src_field_idx, src_idx,
3663 ((stencil_type ==
WEIGHT_SUM)?1:num_src_fields));
3668 interp, halo_redists, tgt_orig_pos, wsum_mf_count,
3669 num_src_per_tgt, src_field_idx, src_idx,
3670 ((stencil_type ==
SUM)?1:num_src_fields));
3676 for (
size_t i = 0; i < new_wsum_mf_stencils_data->
count; ++i)
3678 free(new_wsum_mf_stencils_data->
data);
3679 free(new_wsum_mf_stencils_data);
3681 free(remote_src_points);
3682 free(src_field_idx);
3685 free(num_src_per_tgt);
3686 if (halo_redists != NULL) {
3687 for (
size_t i = 0; i < num_src_fields; ++i)
3688 xt_redist_delete(halo_redists[i]);
3703 double scaling_factor,
double scaling_summand) {
3705 MPI_Comm comm = weights->
comm;
3715 memset(&(local_stencil_counts[0]), 0,
sizeof(local_stencil_counts));
3717 local_stencil_counts[(
int)(weights->
stencils[i].
type)]++;
3720 stencils_offsets[i] = accu;
3721 accu += local_stencil_counts[i];
3729 local_stencil_counts, global_stencil_counts,
3736 MPI_IN_PLACE, &max_collection_size, 1, MPI_UINT64_T, MPI_MAX, comm),
3740 "ERROR(yac_interp_weights_get_interpolation): "
3741 "mismatching collection sizes")
3747 scaling_factor, scaling_summand);
3749 if (global_stencil_counts[
FIXED] > 0)
3751 weights->
comm, local_stencil_counts[
FIXED],
3754 if (global_stencil_counts[
DIRECT] > 0)
3759 if (global_stencil_counts[
SUM] > 0) {
3764 (
size_t)(local_stencil_counts[
SUM]),
SUM);
3767 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3770 wsum_stencils, interp, reorder,
SUM);
3771 for (
size_t i = 0; i < wsum_stencils->
count; ++i)
3773 free(wsum_stencils->
data);
3774 free(wsum_stencils);
3785 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3789 for (
size_t i = 0; i < wsum_stencils->
count; ++i)
3791 free(wsum_stencils->
data);
3792 free(wsum_stencils);
3795 if (global_stencil_counts[
DIRECT_MF] > 0)
3800 if (global_stencil_counts[
SUM_MF] > 0) {
3808 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3811 sum_mf_stencils, interp, reorder,
SUM_MF);
3812 for (
size_t i = 0; i < sum_mf_stencils->
count; ++i)
3814 free(sum_mf_stencils->
data);
3815 free(sum_mf_stencils);
3826 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3830 for (
size_t i = 0; i < wsum_mf_stencils->
count; ++i)
3832 free(wsum_mf_stencils->
data);
3833 free(wsum_mf_stencils);
3842 double scaling_factor,
double scaling_summand) {
3847 "ERROR(yac_interp_weights_get_interpolation_f2c): "
3848 "reorder type must be of YAC_MAPPING_ON_SRC/YAC_MAPPING_ON_TGT");
3854 scaling_factor, scaling_summand);
3866 for (
size_t i = 0 ; i < count; ++i) {
3876 "ERROR(yac_interp_weights_delete): invalid stencil type")
3877 switch(stencils[i].
type) {
3910#ifdef YAC_NETCDF_ENABLED
3913 return (*(
double const *)a > *(
double const *)b) -
3914 (*(
double const *)a < *(
double const *)b);
3921 char const * filename,
char const * src_grid_name,
char const * tgt_grid_name,
3922 size_t num_fixed_values,
double * fixed_values,
3923 size_t * num_tgt_per_fixed_value,
size_t num_links,
3924 size_t num_weights_per_link,
size_t num_src_fields,
3925 size_t * num_links_per_src_field,
3927 size_t src_grid_size,
size_t tgt_grid_size) {
3932 yac_nc_create(filename, NC_CLOBBER | NC_64BIT_OFFSET, &ncid);
3934 int dim_weight_id[8];
3937 if (num_links > 0) {
3938 YAC_HANDLE_ERROR(nc_def_dim(ncid,
"num_links", num_links, &dim_weight_id[0]));
3940 num_weights_per_link > 0,
3941 "ERROR(create_weight_file): number of links is %zu but number of "
3942 "weights per link is zero for weight file %s", num_links, filename)
3944 nc_def_dim(ncid,
"num_wgts", num_weights_per_link, &dim_weight_id[1]));
3948 "ERROR(create_weight_file): number of source fields is zero for "
3949 "weight file %s", filename)
3951 nc_def_dim(ncid,
"num_src_fields", num_src_fields, &dim_weight_id[2]));
3956 if (num_fixed_values > 0) {
3959 ncid,
"num_fixed_values", num_fixed_values, &dim_weight_id[4]));
3960 size_t num_fixed_dst = 0;
3961 for (
size_t i = 0; i < num_fixed_values; ++i)
3962 num_fixed_dst += num_tgt_per_fixed_value[i];
3965 "ERROR(create_weight_file): number of fixed values is %zu but number "
3966 "of fixed destination points is zero for weight file %s",
3967 num_fixed_dst, filename)
3969 nc_def_dim(ncid,
"num_fixed_dst", num_fixed_dst, &dim_weight_id[5]));
3972 if (src_grid_size > 0)
3974 nc_def_dim(ncid,
"src_grid_size", src_grid_size, &dim_weight_id[6]));
3976 if (tgt_grid_size > 0)
3978 nc_def_dim(ncid,
"dst_grid_size", tgt_grid_size, &dim_weight_id[7]));
3980 int var_src_add_id, var_dst_add_id, var_weight_id, var_num_links_id,
3981 src_var_locs_id, tgt_var_loc_id, var_fixed_values_id,
3982 var_num_dst_per_fixed_value_id, var_dst_add_fixed_id;
3985 if (num_links > 0) {
3988 ncid,
"src_address", NC_INT, 1, dim_weight_id, &var_src_add_id));
3991 ncid,
"dst_address", NC_INT, 1, dim_weight_id, &var_dst_add_id));
3994 ncid,
"remap_matrix", NC_DOUBLE, 2, dim_weight_id, &var_weight_id));
3996 nc_def_var(ncid,
"num_links_per_src_field", NC_INT, 1,
3997 &dim_weight_id[2], &var_num_links_id));
4001 ncid,
"src_locations", NC_CHAR, 2, &dim_weight_id[2], &src_var_locs_id));
4004 ncid,
"dst_location", NC_CHAR, 1, &dim_weight_id[3], &tgt_var_loc_id));
4005 if (num_fixed_values > 0) {
4007 nc_def_var(ncid,
"fixed_values", NC_DOUBLE, 1, &dim_weight_id[4],
4008 &var_fixed_values_id));
4010 nc_def_var(ncid,
"num_dst_per_fixed_value", NC_INT, 1, &dim_weight_id[4],
4011 &var_num_dst_per_fixed_value_id));
4013 nc_def_var(ncid,
"dst_address_fixed", NC_INT, 1, &dim_weight_id[5],
4014 &var_dst_add_fixed_id));
4019 nc_put_att_text(ncid, NC_GLOBAL,
"version",
4023 nc_put_att_text(ncid, NC_GLOBAL,
"src_grid_name",
4024 strlen(src_grid_name), src_grid_name));
4026 nc_put_att_text(ncid, NC_GLOBAL,
"dst_grid_name",
4027 strlen(tgt_grid_name), tgt_grid_name));
4029 char const * str_logical[2] = {
"FALSE",
"TRUE"};
4031 strlen(str_logical[num_links > 0]),
4032 str_logical[num_links > 0]));
4034 strlen(str_logical[num_fixed_values > 0]),
4035 str_logical[num_fixed_values > 0]));
4043 if (num_links > 0) {
4044 int * num_links_per_src_field_int =
4045 xmalloc(num_src_fields *
sizeof(*num_links_per_src_field_int));
4046 for (
size_t i = 0; i < num_src_fields; ++i) {
4048 num_links_per_src_field[i] <= INT_MAX,
4049 "ERROR(create_weight_file): "
4050 "number of links per source field too big (not yet supported)")
4051 num_links_per_src_field_int[i] = (int)num_links_per_src_field[i];
4054 nc_put_var_int(ncid, var_num_links_id, num_links_per_src_field_int));
4055 free(num_links_per_src_field_int);
4058 for (
size_t i = 0; i < num_src_fields; ++i) {
4059 char const * loc_str =
yac_loc2str(src_locations[i]);
4060 size_t str_start[2] = {i, 0};
4061 size_t str_count[2] = {1, strlen(loc_str)};
4063 nc_put_vara_text(ncid, src_var_locs_id, str_start, str_count, loc_str));
4068 size_t str_start[1] = {0};
4069 size_t str_count[1] = {strlen(loc_str)};
4071 nc_put_vara_text(ncid, tgt_var_loc_id, str_start, str_count, loc_str));
4073 if (num_fixed_values > 0) {
4075 int * num_tgt_per_fixed_value_int =
4076 xmalloc(num_fixed_values *
sizeof(*num_tgt_per_fixed_value_int));
4077 for (
unsigned i = 0; i < num_fixed_values; ++i) {
4079 num_tgt_per_fixed_value[i] <= INT_MAX,
4080 "ERROR(create_weight_file): "
4081 "number of targets per fixed value is too big (not yet supported)")
4082 num_tgt_per_fixed_value_int[i] = (int)num_tgt_per_fixed_value[i];
4084 YAC_HANDLE_ERROR(nc_put_var_double(ncid, var_fixed_values_id, fixed_values));
4086 num_tgt_per_fixed_value_int));
4087 free(num_tgt_per_fixed_value_int);
4098 int ret = b_is_fixed - a_is_fixed;
4100 if (ret)
return ret;
4105 double fixed_value_a =
4107 double fixed_value_b =
4109 ret = (fixed_value_a > fixed_value_b) -
4110 (fixed_value_a < fixed_value_b);
4111 if (ret)
return ret;
4122 yac_int * min_tgt_global_id,
yac_int * max_tgt_global_id, MPI_Comm comm) {
4124 yac_int min_max[2] = {XT_INT_MAX, XT_INT_MIN};
4126 for (
size_t i = 0; i < stencils_size; ++i) {
4129 if (curr_id < min_max[0]) min_max[0] = curr_id;
4130 if (curr_id > min_max[1]) min_max[1] = curr_id;
4133 min_max[0] = XT_INT_MAX - min_max[0];
4137 MPI_IN_PLACE, min_max, 2,
yac_int_dt, MPI_MAX, comm), comm);
4139 *min_tgt_global_id = XT_INT_MAX - min_max[0];
4140 *max_tgt_global_id = min_max[1];
4146 int num_io_procs_int,
int * io_owner) {
4148 long long num_io_procs = (
long long)num_io_procs_int;
4149 long long id_range =
4150 MAX((
long long)(max_tgt_global_id - min_tgt_global_id),1);
4152 for (
size_t i = 0; i < stencils_size; ++i)
4154 ((
int)(
MIN(((
long long)(stencils[i].
tgt.
global_id - min_tgt_global_id) *
4155 num_io_procs) / id_range, num_io_procs - 1)));
4160 double ** fixed_values,
size_t * num_fixed_values, MPI_Comm comm) {
4165 double * local_fixed_values =
4166 xmalloc(stencil_count *
sizeof(*local_fixed_values));
4168 int * int_buffer =
xmalloc(2 * (
size_t)comm_size *
sizeof(*int_buffer));
4169 int * recvcounts = int_buffer + 0 * comm_size;
4170 int * rdispls = int_buffer + 1 * comm_size;
4172 size_t local_num_fixed = 0;
4175 for (
size_t i = 0; i < stencil_count;
4176 ++i, ++local_num_fixed) {
4180 qsort(local_fixed_values, local_num_fixed,
sizeof(*local_fixed_values),
4185 int local_num_fixed_int = (int)(local_num_fixed);
4188 &local_num_fixed_int, 1, MPI_INT, recvcounts, 1,MPI_INT, comm), comm);
4189 for (
int i = 0, accu = 0; i < comm_size; ++i) {
4191 accu += recvcounts[i];
4194 size_t num_all_fixed_values = 0;
4195 for (
int i = 0; i < comm_size; ++i)
4196 num_all_fixed_values += (
size_t)(recvcounts[i]);
4198 double * all_fixed_values =
4199 xmalloc(num_all_fixed_values *
sizeof(*all_fixed_values));
4204 local_fixed_values, local_num_fixed_int, MPI_DOUBLE,
4205 all_fixed_values, recvcounts, rdispls, MPI_DOUBLE, comm), comm);
4207 free(local_fixed_values);
4209 qsort(all_fixed_values, num_all_fixed_values,
sizeof(*all_fixed_values),
4212 *fixed_values =
xrealloc(all_fixed_values,
4213 num_all_fixed_values *
sizeof(*all_fixed_values));
4214 *num_fixed_values = num_all_fixed_values;
4227 "ERROR(get_num_weights_per_link): invalid stencil type")
4236 size_t num_weights_per_link = 0;
4237 for (
size_t i = 0; i < stencil_count; ++i)
4238 num_weights_per_link =
4241 uint64_t num_weights_per_link_64_t = (uint64_t)num_weights_per_link;
4244 MPI_IN_PLACE, &num_weights_per_link_64_t, 1, MPI_UINT64_T,
4245 MPI_MAX, comm), comm);
4246 num_weights_per_link = (size_t)num_weights_per_link_64_t;
4248 return num_weights_per_link;
4256 "ERROR(get_num_links_per_src_field): "
4257 "stencil type FIXED not supported by this routine")
4265 "ERROR(get_num_links_per_src_field): invalid stencil type")
4266 switch (stencil->
type) {
4268 case(
DIRECT):
return (src_field_idx == 0)?1:0;
4277 for (
size_t i = 0; i < stencil_size; ++i)
4285 for (
size_t i = 0; i < stencil_size; ++i)
4294 size_t num_fixed_values,
double * fixed_values,
4295 size_t * num_tgt_per_fixed_value,
4296 size_t * num_fixed_tgt,
size_t num_src_fields,
4297 size_t * num_links_per_src_field,
size_t * num_links) {
4301 for (
size_t i = 0; i < num_fixed_values; ++i) num_tgt_per_fixed_value[i] = 0;
4302 for (
size_t i = 0; i < num_src_fields; ++i) num_links_per_src_field[i] = 0;
4304 for (
size_t i = 0; i < stencil_count; ++i) {
4307 for (
size_t j = 0; j < num_fixed_values; ++j) {
4308 if (curr_fixed_value == fixed_values[j]) {
4309 num_tgt_per_fixed_value[j]++;
4315 for (
size_t j = 0; j < num_src_fields; ++j) {
4316 num_links_per_src_field[j] +=
4321 for (
size_t i = 0; i < num_src_fields; ++i)
4322 *num_links += num_links_per_src_field[i];
4326 size_t num_fixed_values,
size_t * num_tgt_per_fixed_value,
4327 size_t num_src_fields,
size_t * num_links_per_src_field,
4328 size_t * fixed_offsets,
size_t * link_offsets, MPI_Comm comm) {
4333 size_t count = num_fixed_values + num_src_fields;
4334 uint64_t * uint64_t_buffer =
xmalloc(3 * count *
sizeof(*uint64_t_buffer));
4335 uint64_t * global_counts = uint64_t_buffer + 0 * count;
4336 uint64_t * local_counts = uint64_t_buffer + 1 * count;
4337 uint64_t * offsets = uint64_t_buffer + 2 * count;
4339 for (
size_t i = 0; i < num_fixed_values; ++i)
4340 local_counts[i] = (uint64_t)(num_tgt_per_fixed_value[i]);
4341 for (
size_t i = 0; i < num_src_fields; ++i)
4342 local_counts[num_fixed_values + i] = (uint64_t)(num_links_per_src_field[i]);
4345 MPI_Allreduce(local_counts, global_counts, (
int)count, MPI_UINT64_T,
4346 MPI_SUM, comm), comm);
4348 MPI_Exscan(local_counts, offsets, (
int)count, MPI_UINT64_T, MPI_SUM, comm),
4350 if (comm_rank == 0) memset(offsets, 0, count *
sizeof(*offsets));
4352 for (
size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
4353 fixed_offsets[i] = (size_t)(offsets[i]) + accu;
4354 accu += (size_t)(global_counts[i]);
4356 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4357 link_offsets[i] = (size_t)(offsets[i+num_fixed_values]) + accu;
4358 accu += (size_t)(global_counts[i+num_fixed_values]);
4360 free(uint64_t_buffer);
4366 (global_id < INT_MAX) && (global_id != XT_INT_MAX),
4367 "ERROR(global_id_to_address): "
4368 "a global id cannot be converted into a address; too big")
4369 return (
int)global_id + 1;
4374 int * tgt_address) {
4376 for (
size_t i = 0; i < stencil_count; ++i)
4382 size_t * num_links_per_src_field,
size_t num_src_fields,
4383 int * src_address,
int * tgt_address,
double * weight) {
4385 size_t * src_field_offsets =
4386 xmalloc(2 * num_src_fields *
sizeof(*src_field_offsets));
4387 size_t * prev_src_field_offsets = src_field_offsets + num_src_fields;
4388 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4389 src_field_offsets[i] = accu;
4390 accu += num_links_per_src_field[i];
4394 for (
size_t i = 0; i < stencil_count; ++i, ++curr_stencil) {
4396 memcpy(prev_src_field_offsets, src_field_offsets,
4397 num_src_fields *
sizeof(*prev_src_field_offsets));
4402 "ERROR(stencil_get_link_data): this call is invalid for FIXED stencils")
4405 (curr_stencil->
type ==
SUM) ||
4410 "ERROR(stencil_get_link_data): invalid stencil type")
4411 size_t src_field_offset;
4412 switch (curr_stencil->
type) {
4415 src_field_offset = src_field_offsets[0]++;
4416 src_address[src_field_offset] =
4418 tgt_address[src_field_offset] = curr_tgt_address;
4419 weight[src_field_offset] = 1.0;
4424 for (
size_t k = 0; k < curr_count; ++k) {
4425 src_field_offset = src_field_offsets[0]++;
4426 src_address[src_field_offset] =
4428 tgt_address[src_field_offset] = curr_tgt_address;
4429 weight[src_field_offset] = 1.0;
4437 for (
size_t k = 0; k < curr_count; ++k) {
4438 src_field_offset = src_field_offsets[0]++;
4439 src_address[src_field_offset] =
4441 tgt_address[src_field_offset] = curr_tgt_address;
4442 weight[src_field_offset] = weights[k];
4449 src_address[src_field_offset ] =
4451 tgt_address[src_field_offset ] = curr_tgt_address;
4452 weight[src_field_offset ] = 1.0;
4459 for (
size_t k = 0; k < curr_count; ++k) {
4460 src_field_offset = src_field_offsets[field_indices[k]]++;
4461 src_address[src_field_offset] =
4463 tgt_address[src_field_offset] = curr_tgt_address;
4464 weight[src_field_offset] = 1.0;
4474 for (
size_t k = 0; k < curr_count; ++k) {
4475 src_field_offset = src_field_offsets[field_indices[k]]++;
4476 src_address[src_field_offset] =
4478 tgt_address[src_field_offset] = curr_tgt_address;
4479 weight[src_field_offset] = weights[k];
4485 for (
size_t j = 0; j < num_src_fields; ++j)
4487 src_address + prev_src_field_offsets[j],
4488 src_field_offsets[j] - prev_src_field_offsets[j],
4489 weight + prev_src_field_offsets[j]);
4491 free(src_field_offsets);
4496 int * owner_ranks,
size_t * new_count,
4499 int comm_rank, comm_size;
4503 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
4505 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4507 size_t * stencil_indices =
xmalloc(count *
sizeof(*stencil_indices));
4508 for (
size_t i = 0; i < count; ++i) {
4509 stencil_indices[i] = i;
4510 sendcounts[owner_ranks[i]]++;
4514 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4519 *new_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
4523 free(stencil_indices);
4530 char const * src_grid_name,
char const * tgt_grid_name,
4531 size_t src_grid_size,
size_t tgt_grid_size) {
4533#ifndef YAC_NETCDF_ENABLED
4543 "ERROR(yac_interp_weights_write_to_file): "
4544 "YAC is built without the NetCDF support");
4547 MPI_Comm comm = weights->
comm;
4548 int comm_rank, comm_size;
4559 yac_int min_tgt_global_id, max_tgt_global_id;
4562 &min_tgt_global_id, &max_tgt_global_id, comm);
4569 min_tgt_global_id, max_tgt_global_id,
4570 num_io_ranks, io_owner);
4572 io_owner[i] = io_ranks[io_owner[i]];
4575 size_t io_stencil_count = 0;
4581 &io_stencil_count, &io_stencils);
4585 uint64_t grid_sizes[2] = {(uint64_t)src_grid_size, (uint64_t)tgt_grid_size};
4588 MPI_IN_PLACE, grid_sizes, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
4589 src_grid_size = (size_t)(grid_sizes[0]);
4590 tgt_grid_size = (size_t)(grid_sizes[1]);
4593 yac_mpi_call(MPI_Comm_split(comm, io_flag, comm_rank, &io_comm), comm);
4607 qsort(io_stencils, io_stencil_count,
sizeof(*io_stencils),
4610 yac_mpi_call(MPI_Comm_rank(io_comm, &comm_rank), comm);
4611 yac_mpi_call(MPI_Comm_size(io_comm, &comm_size), comm);
4613 double * fixed_values = NULL;
4614 size_t num_fixed_values = 0;
4616 io_stencils, io_stencil_count, &fixed_values, &num_fixed_values, io_comm);
4617 size_t num_src_fields =
weights->num_src_fields;
4618 size_t num_weights_per_link =
4621 size_t * size_t_buffer =
4622 xmalloc(2 * (num_fixed_values + num_src_fields) *
sizeof(*size_t_buffer));
4623 size_t * num_tgt_per_fixed_value = size_t_buffer;
4624 size_t * num_links_per_src_field = size_t_buffer + num_fixed_values;
4625 size_t * fixed_offsets = size_t_buffer + num_fixed_values + num_src_fields;
4626 size_t * link_offsets = size_t_buffer + 2 * num_fixed_values + num_src_fields;
4628 size_t num_fixed_tgt = 0;
4629 size_t num_links = 0;
4631 io_stencils, io_stencil_count, num_fixed_values, fixed_values,
4632 num_tgt_per_fixed_value, &num_fixed_tgt, num_src_fields,
4633 num_links_per_src_field, &num_links);
4636 num_fixed_values, num_tgt_per_fixed_value,
4637 num_src_fields, num_links_per_src_field,
4638 fixed_offsets, link_offsets, io_comm);
4640 if (comm_rank == comm_size - 1) {
4642 size_t * total_num_tgt_per_fixed_value =
4643 xmalloc(num_fixed_values *
sizeof(*total_num_tgt_per_fixed_value));
4644 for (
size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
4645 total_num_tgt_per_fixed_value[i] =
4646 fixed_offsets[i] + num_tgt_per_fixed_value[i] - accu;
4647 accu += total_num_tgt_per_fixed_value[i];
4649 size_t total_num_links = link_offsets[num_src_fields-1] +
4650 num_links_per_src_field[num_src_fields-1];
4652 size_t * total_num_links_per_src_field =
4653 xmalloc(num_src_fields *
sizeof(*total_num_links_per_src_field));
4654 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4655 total_num_links_per_src_field[i] =
4656 link_offsets[i] + num_links_per_src_field[i] - accu;
4657 accu += total_num_links_per_src_field[i];
4661 filename, src_grid_name, tgt_grid_name,
4662 num_fixed_values, fixed_values, total_num_tgt_per_fixed_value,
4663 total_num_links, num_weights_per_link,
4664 num_src_fields, total_num_links_per_src_field,
4666 src_grid_size, tgt_grid_size);
4668 free(total_num_links_per_src_field);
4669 free(total_num_tgt_per_fixed_value);
4680 yac_nc_open(filename, NC_WRITE | NC_SHARE, &ncid);
4682 if (num_fixed_tgt > 0) {
4684 int * tgt_address_fixed =
4685 xmalloc(num_fixed_tgt *
sizeof(*tgt_address_fixed));
4689 int var_dst_add_fixed_id;
4693 for (
size_t i = 0, offset = 0; i < num_fixed_values; ++i) {
4695 if (num_tgt_per_fixed_value[i] == 0)
continue;
4697 size_t start[1] = {fixed_offsets[i]};
4698 size_t count[1] = {num_tgt_per_fixed_value[i]};
4701 ncid, var_dst_add_fixed_id, start, count, tgt_address_fixed + offset));
4702 offset += num_tgt_per_fixed_value[i];
4705 free(tgt_address_fixed);
4708 if (num_links > 0) {
4710 int * src_address_link =
xmalloc(num_links *
sizeof(*src_address_link));
4711 int * tgt_address_link =
xmalloc(num_links *
sizeof(*tgt_address_link));
4712 double * w =
xmalloc(num_links * num_weights_per_link *
sizeof(*w));
4714 io_stencils + num_fixed_tgt, io_stencil_count - num_fixed_tgt,
4715 num_links_per_src_field, num_src_fields,
4716 src_address_link, tgt_address_link, w);
4718 int var_src_add_id, var_dst_add_id, var_weight_id;
4723 for (
size_t i = 0, offset = 0; i < num_src_fields; ++i) {
4725 if (num_links_per_src_field[i] == 0)
continue;
4727 size_t start[2] = {link_offsets[i], 0};
4728 size_t count[2] = {num_links_per_src_field[i], num_weights_per_link};
4732 ncid, var_src_add_id, start, count, src_address_link + offset));
4735 ncid, var_dst_add_id, start, count, tgt_address_link + offset));
4738 ncid, var_weight_id, start, count,
4739 w + num_weights_per_link * offset));
4741 offset += num_links_per_src_field[i];
4745 free(tgt_address_link);
4746 free(src_address_link);
4755 free(size_t_buffer);
4763 return weights->stencils_size;
4770 size_t stencils_size =
weights->stencils_size;
4772 yac_int * global_ids =
xmalloc(stencils_size *
sizeof(*global_ids));
4774 for (
size_t i = 0; i < stencils_size; ++i)
void yac_remote_point_pack(struct remote_point *point, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_remote_point_unpack(void *buffer, int buffer_size, int *position, struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_remote_points_pack(struct remote_points *points, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_remote_points_unpack(void *buffer, int buffer_size, int *position, struct remote_points **points, MPI_Datatype point_info_dt, MPI_Comm comm)
int yac_remote_points_get_pack_size(struct remote_points *points, MPI_Datatype point_info_dt, MPI_Comm comm)
MPI_Datatype yac_get_remote_point_info_mpi_datatype(MPI_Comm comm)
int yac_remote_point_get_pack_size(struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
#define YAC_WEIGHT_FILE_VERSION_STRING
static MPI_Datatype get_fixed_stencil_mpi_datatype(MPI_Comm comm)
static size_t get_num_links_per_src_field(struct interp_weight_stencil *stencil, size_t src_field_idx)
static int get_stencil_pack_size_direct_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void unpack_stencil_wsum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static MPI_Datatype get_direct_stencil_mpi_datatype(MPI_Comm comm)
static void stencil_determine_tgt_global_id_range(struct interp_weight_stencil *stencils, size_t stencils_size, yac_int *min_tgt_global_id, yac_int *max_tgt_global_id, MPI_Comm comm)
static void unpack_stencil_sum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_fixed(struct yac_interp_weights *weights, struct remote_points *tgts, double fixed_value)
static int compare_stencils_direct_mf(const void *a, const void *b)
void yac_interp_weights_add_sum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, size_t num_src_fields)
static struct remote_points * copy_remote_points_mf(struct remote_point **points, size_t *counts, size_t num_fields)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils_tgt(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data)
static int compare_remote_point_info(const void *a, const void *b)
static int global_id_to_address(yac_int global_id)
static void yac_interp_weights_redist_direct_mf(MPI_Comm comm, uint64_t count, struct interp_weight_stencil *direct_mf_stencils, struct yac_interpolation *interp)
static int get_stencil_pack_size_sum(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencil_direct(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
yac_interp_weight_stencil_type
@ WEIGHT_STENCIL_TYPE_SIZE
static Xt_redist generate_redist_put_double(enum yac_location location, struct remote_point_infos *point_infos, size_t count, MPI_Comm comm)
static void pack_stencil_wsum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void free_remote_points(struct remote_points *points)
struct yac_interpolation * yac_interp_weights_get_interpolation(struct yac_interp_weights *weights, enum yac_interp_weights_reorder_type reorder, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand)
static void yac_interp_weight_stencils_delete(struct interp_weight_stencil *stencils, size_t count)
static int compare_stencils_fixed(const void *a, const void *b)
static struct interp_weight_stencil stencils_merge(struct interp_weight_stencil **stencils, double *w, size_t num_stencils, struct remote_point point)
static int get_stencil_wsum_mf_pack_size(struct interp_weight_stencil_wsum_mf *stencil, MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_wsum(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_tgt, struct remote_point *srcs, double *w)
static struct interp_weight_stencil stencils_merge_sum(struct interp_weight_stencil **stencils, double *w, size_t num_stencils)
static int compare_w_global_id(const void *a, const void *b)
static void compact_srcs_w(struct remote_points *srcs, double **w)
void yac_interp_weights_delete(struct yac_interp_weights *weights)
static size_t unpack_stencils_wsum_mf(struct interp_weight_stencil_wsum_mf *wsum_stencils, struct interp_weight_stencil_wsum_mf_weight *weight_buffer, size_t count, void *packed_data, size_t packed_data_size, MPI_Comm comm)
static void pack_stencil_sum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void determine_stencils_io_owner(struct interp_weight_stencil *stencils, size_t stencils_size, yac_int min_tgt_global_id, yac_int max_tgt_global_id, int num_io_procs_int, int *io_owner)
static Xt_redist * generate_halo_redists(struct remote_point_info_reorder *halo_points, size_t count, size_t num_src_fields, MPI_Comm comm)
static size_t get_num_weights_per_link(struct interp_weight_stencil *stencil)
static void free_remote_point(struct remote_point point)
static void yac_interp_weights_redist_stencils(MPI_Comm comm, size_t count, struct interp_weight_stencil *stencils, int *owner_ranks, size_t *new_count, struct interp_weight_stencil **new_stencils)
static void unpack_stencil_wsum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_wcopy_weights(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_stencils_per_tgt, size_t *stencil_indices, int *stencil_ranks, double *w)
static int compare_interp_weight_stencil(const void *a, const void *b)
static void pack_stencil_wsum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_wsum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, double *w, size_t num_src_fields)
static int compare_rank_pos_reorder_field_idx(const void *a, const void *b)
static int get_stencil_pack_size_wsum(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencils(struct interp_weight_stencil *stencils, size_t count, size_t *pack_order, void **pack_data, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
static void get_stencils_pack_sizes(struct interp_weight_stencil *stencils, size_t count, size_t *pack_order, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
yac_int * yac_interp_weights_get_interp_tgt(struct yac_interp_weights *weights)
static int get_stencil_pack_size_sum_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencil_fixed(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int get_stencil_pack_size_wsum_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct interp_weight_stencil wcopy_interp_weight_stencil(struct interp_weight_stencil *stencil, struct remote_point point, double weight)
static struct interp_weight_stencil * yac_interp_weights_get_stencils(struct yac_interp_weights *weights, size_t *stencil_indices, int *stencil_ranks, size_t count)
static void unpack_stencil_direct_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_stencils(const void *a, const void *b)
static void create_weight_file(char const *filename, char const *src_grid_name, char const *tgt_grid_name, size_t num_fixed_values, double *fixed_values, size_t *num_tgt_per_fixed_value, size_t num_links, size_t num_weights_per_link, size_t num_src_fields, size_t *num_links_per_src_field, enum yac_location *src_locations, enum yac_location tgt_location, size_t src_grid_size, size_t tgt_grid_size)
static int get_stencil_pack_size_direct(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_write_to_file(struct yac_interp_weights *weights, char const *filename, char const *src_grid_name, char const *tgt_grid_name, size_t src_grid_size, size_t tgt_grid_size)
static void stencil_get_counts(struct interp_weight_stencil *stencils, size_t stencil_count, size_t num_fixed_values, double *fixed_values, size_t *num_tgt_per_fixed_value, size_t *num_fixed_tgt, size_t num_src_fields, size_t *num_links_per_src_field, size_t *num_links)
static void copy_remote_points_no_alloc(struct remote_point *points_to, struct remote_point *points_from, size_t count, struct remote_point_info **point_info_buffer_)
static struct remote_points * copy_remote_points(struct remote_point *points, size_t count)
static int compare_remote_point(const void *a, const void *b)
static MPI_Datatype get_direct_mf_stencil_mpi_datatype(MPI_Comm comm)
struct yac_interpolation * yac_interp_weights_get_interpolation_f2c(struct yac_interp_weights *weights, int reorder, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand)
MPI_Comm yac_interp_weights_get_comm(struct yac_interp_weights *weights)
static MPI_Datatype get_wsum_mf_weight_mpi_datatype(MPI_Comm comm)
static struct interp_weight_stencil copy_interp_weight_stencil(struct interp_weight_stencil *stencil, struct remote_point point)
void yac_interp_weights_add_direct(struct yac_interp_weights *weights, struct remote_points *tgts, struct remote_point *srcs)
void yac_interp_weights_add_sum(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_tgt, struct remote_point *srcs)
static void unpack_stencil_direct(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_interp_weight_stencil_wsum_mf_tgt_orig_pos(const void *a, const void *b)
struct yac_interp_weights * yac_interp_weights_new(MPI_Comm comm, enum yac_location tgt_location, enum yac_location *src_locations, size_t num_src_fields)
static void unpack_stencil_sum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data, int *stencil_owner, size_t *reorder_idx, size_t num_owners)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils_src(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data)
static int compare_stencils_direct(const void *a, const void *b)
static size_t stencil_get_num_weights_per_tgt(struct interp_weight_stencil *stencils, size_t stencil_count, MPI_Comm comm)
static void stencil_get_tgt_address(struct interp_weight_stencil *stencils, size_t stencil_count, int *tgt_address)
static void yac_interp_weights_redist_w_sum_mf(MPI_Comm comm, enum yac_location tgt_location, struct interp_weight_stencils_wsum_mf *wsum_mf_stencils_data, struct yac_interpolation *interp, enum yac_interp_weights_reorder_type reorder, enum yac_interp_weight_stencil_type stencil_type)
static int get_stencil_pack_size_fixed(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct remote_point copy_remote_point(struct remote_point point)
static void stencil_get_link_data(struct interp_weight_stencil *stencils, size_t stencil_count, size_t *num_links_per_src_field, size_t num_src_fields, int *src_address, int *tgt_address, double *weight)
size_t yac_interp_weights_get_interp_count(struct yac_interp_weights *weights)
static Xt_redist generate_direct_redist(uint64_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct *tgt_stencils, size_t *recvcounts, MPI_Comm comm)
static struct interp_weight_stencil stencils_merge_wsum(struct interp_weight_stencil **stencils, double *w, size_t num_stencils)
static void pack_stencil_sum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencil_direct_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencils_wsum_mf(struct interp_weight_stencil_wsum_mf *wsum_stencils, size_t count, size_t *pack_order, void **pack_data, int *pack_sizes, int *weight_counts, MPI_Comm comm)
static int compare_interp_weight_stencil_wsum_mf_src_orig_pos(const void *a, const void *b)
static struct interp_weight_stencil * exchange_stencils(MPI_Comm comm, struct interp_weight_stencil *stencils, size_t *stencil_indices, size_t *stencil_sendcounts, size_t *stencil_recvcounts)
static void unpack_stencil_fixed(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_interp_weights_redist_fixed(MPI_Comm comm, uint64_t count, struct interp_weight_stencil *fixed_stencils, struct yac_interpolation *interp)
static Xt_redist * generate_direct_mf_redists(uint64_t *src_orig_pos, size_t *sendcounts, struct interp_weight_stencil_direct_mf *tgt_stencils, size_t *recvcounts, size_t num_src_fields, MPI_Comm comm)
static int compare_double(void const *a, void const *b)
static void stencil_xscan_offsets(size_t num_fixed_values, size_t *num_tgt_per_fixed_value, size_t num_src_fields, size_t *num_links_per_src_field, size_t *fixed_offsets, size_t *link_offsets, MPI_Comm comm)
static void yac_interp_weights_redist_direct(MPI_Comm comm, uint64_t count, struct interp_weight_stencil *direct_stencils, struct yac_interpolation *interp)
static void xt_redist_msg_free(struct Xt_redist_msg *msgs, size_t count, MPI_Comm comm)
void yac_interp_weights_add_direct_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *src_field_indices, struct remote_point **srcs_per_field, size_t num_src_fields)
static void unpack_stencils(struct interp_weight_stencil *stencils, size_t count, void *packed_data, size_t packed_data_size, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct remote_point_info select_src(struct remote_point_infos src)
static struct interp_weight_stencils_wsum_mf * generate_w_sum_mf_stencils(struct interp_weight_stencil *stencils, size_t count, enum yac_interp_weight_stencil_type stencil_type)
static int compute_owner(int *ranks, size_t count)
static void stencil_get_fixed_values(struct interp_weight_stencil *stencils, size_t stencil_count, double **fixed_values, size_t *num_fixed_values, MPI_Comm comm)
yac_interp_weights_reorder_type
@ YAC_MAPPING_ON_TGT
weights will be applied at target processes
@ YAC_MAPPING_ON_SRC
weights will be appied at source processes
void yac_interpolation_add_sum_at_src(struct yac_interpolation *interp, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist)
void yac_interpolation_add_weight_sum_mvp_at_tgt(struct yac_interpolation *interp, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields)
struct yac_interpolation * yac_interpolation_new(size_t collection_size, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interpolation_add_direct_mf(struct yac_interpolation *interp, Xt_redist *redists, size_t num_src_fields)
void yac_interpolation_add_sum_at_tgt(struct yac_interpolation *interp, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields)
void yac_interpolation_add_fixed(struct yac_interpolation *interp, double value, size_t count, size_t *pos)
void yac_interpolation_add_direct(struct yac_interpolation *interp, Xt_redist redist)
void yac_interpolation_add_weight_sum_mvp_at_src(struct yac_interpolation *interp, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist)
void yac_get_io_ranks(MPI_Comm comm, int *local_is_io_, int **io_ranks_, int *num_io_ranks_)
void yac_nc_create(const char *path, int cmode, int *ncidp)
void yac_nc_inq_varid(int ncid, char const *name, int *varidp)
void yac_nc_open(const char *path, int omode, int *ncidp)
#define YAC_HANDLE_ERROR(exp)
char const * yac_loc2str(enum yac_location location)
#define YAC_MAX_LOC_STR_LEN
#define xrealloc(ptr, size)
struct remote_point_info src
struct remote_point_info src
struct remote_point_info src
struct interp_weight_stencil_wsum_mf_weight * data
union interp_weight_stencil::@35 data
struct interp_weight_stencil::@35::@39 weight_sum
struct remote_points * srcs
struct interp_weight_stencil::@35::@42 weight_sum_mf
struct interp_weight_stencil::@35::@41 sum_mf
struct interp_weight_stencil::@35::@36 fixed
enum yac_interp_weight_stencil_type type
struct interp_weight_stencil::@35::@38 sum
struct interp_weight_stencil::@35::@40 direct_mf
struct interp_weight_stencil::@35::@37 direct
struct interp_weight_stencils_wsum_mf stencils
struct interp_weight_stencil_wsum_mf_weight buffer[]
struct interp_weight_stencil_wsum_mf * data
struct remote_point_info data
struct remote_point_info single
struct remote_point_info * multi
union remote_point_infos::@1 data
struct remote_point_infos data
struct remote_point_info buffer[]
struct remote_point * data
struct interp_weight_stencil * stencils
size_t stencils_array_size
enum yac_location tgt_location
enum yac_location * src_locations
double frac_mask_fallback_value
void yac_quicksort_index_int_double(int *a, size_t n, double *idx)
void yac_quicksort_index_int_size_t(int *a, size_t n, size_t *idx)
void yac_quicksort_index_int_size_t_size_t(int *a, size_t n, size_t *b, size_t *c)
static void yac_remove_duplicates_double(double *array, size_t *n)
void yac_quicksort_index_size_t_size_t(size_t *a, size_t n, size_t *idx)
void yac_quicksort_index(int *a, size_t n, int *idx)
static struct user_input_data_points ** points
#define YAC_ASSERT_F(exp, format,...)
#define YAC_ASSERT(exp, msg)
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm)
#define yac_mpi_call(call, comm)