481 size_t * num_src_per_field_per_tgt,
struct remote_point ** srcs_per_field,
482 size_t num_src_fields) {
484 if (tgts->
count == 0)
return;
486 if (num_src_fields == 1) {
488 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0]);
494 int flag_count_one = 1;
495 for (
size_t i = 0, k = 0; i < tgts->
count; ++i) {
497 for (
size_t j = 0; j < num_src_fields; ++j, ++k)
498 count += num_src_per_field_per_tgt[k];
505 if (flag_count_one) {
507 size_t * src_field_indices =
510 for (
size_t i = 0, k = 0; i < tgts->
count; ++i)
511 for (
size_t j = 0; j < num_src_fields; ++j, ++k)
512 if (num_src_per_field_per_tgt[k])
513 src_field_indices[i] = j;
516 weights, tgts, src_field_indices, srcs_per_field, num_src_fields);
518 free(src_field_indices);
521 struct remote_point * curr_srcs_per_field[num_src_fields];
522 memcpy(curr_srcs_per_field, srcs_per_field,
523 num_src_fields *
sizeof(*srcs_per_field));
526 size_t stencils_array_size =
weights->stencils_array_size;
527 size_t stencils_size =
weights->stencils_size;
530 stencils, stencils_array_size, stencils_size + tgts->
count);
532 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
534 size_t * curr_num_src_per_src_field =
535 num_src_per_field_per_tgt + i * num_src_fields;
536 size_t curr_num_src = 0;
537 for (
size_t j = 0; j < num_src_fields; ++j)
538 curr_num_src += curr_num_src_per_src_field[j];
546 for (
size_t j = 0, l = 0; j < num_src_fields; ++j) {
547 size_t curr_num_src = curr_num_src_per_src_field[j];
548 for (
size_t k = 0; k < curr_num_src; ++k, ++l) {
554 curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
556 for (
size_t j = 0; j < num_src_fields; ++j)
557 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
561 weights->stencils_array_size = stencils_array_size;
562 weights->stencils_size = stencils_size;
568 size_t * num_src_per_field_per_tgt,
struct remote_point ** srcs_per_field,
569 double * w,
size_t num_src_fields) {
571 if (tgts->
count == 0)
return;
573 if (num_src_fields == 1) {
575 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0], w);
581 int flag_weight_one = 1;
582 for (
size_t i = 0, j = 0;
583 (i < tgts->
count) && flag_weight_one; ++i) {
585 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
589 num_src_per_field_per_tgt[i * num_src_fields + src_field_idx];
591 for (
size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
592 flag_weight_one &= fabs(w[j] - 1.0) < 1e-12;
597 if (flag_weight_one) {
600 weights, tgts, num_src_per_field_per_tgt, srcs_per_field, num_src_fields);
604 struct remote_point * curr_srcs_per_field[num_src_fields];
605 memcpy(curr_srcs_per_field, srcs_per_field,
606 num_src_fields *
sizeof(*srcs_per_field));
609 size_t stencils_array_size =
weights->stencils_array_size;
610 size_t stencils_size =
weights->stencils_size;
613 stencils, stencils_array_size, stencils_size + tgts->
count);
615 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
617 size_t * curr_num_src_per_src_field =
618 num_src_per_field_per_tgt + i * num_src_fields;
619 size_t curr_num_weights = 0;
620 for (
size_t j = 0; j < num_src_fields; ++j)
621 curr_num_weights += curr_num_src_per_src_field[j];
622 double * curr_weights =
623 xmalloc(curr_num_weights *
sizeof(*curr_weights));
630 for (
size_t j = 0, l = 0; j < num_src_fields; ++j) {
631 size_t curr_num_src = curr_num_src_per_src_field[j];
632 for (
size_t k = 0; k < curr_num_src; ++k, ++l)
field_indices[l] = j;
637 memcpy(curr_weights, w, curr_num_weights *
sizeof(*curr_weights));
639 for (
size_t j = 0; j < num_src_fields; ++j)
640 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
641 w += curr_num_weights;
645 weights->stencils_array_size = stencils_array_size;
646 weights->stencils_size = stencils_size;
683 MPI_Comm comm, uint64_t count,
690 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
692 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
694 for (
size_t i = 0; i < count; ++i) {
698 (&(fixed_stencils[i].tgt.
data.
data.single)):
700 for (
int j = 0; j < curr_count; ++j)
701 sendcounts[curr_point_infos[j].
rank]++;
705 1, sendcounts, recvcounts, sdispls, rdispls, comm);
707 size_t send_buffer_size =
708 sdispls[comm_size] + sendcounts[comm_size - 1];
709 size_t recv_buffer_size =
710 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
713 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*buffer));
718 for (
size_t i = 0; i < count; ++i) {
722 (&(fixed_stencils[i].tgt.
data.
data.single)):
725 for (
int j = 0; j < curr_count; ++j) {
726 size_t pos = sdispls[curr_point_infos[j].
rank + 1]++;
728 send_buffer[pos].
orig_pos = curr_point_infos[j].orig_pos;
737 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
738 sizeof(*send_buffer), stencil_fixed_dt, comm);
744 if (recv_buffer_size == 0) {
750 qsort(recv_buffer, recv_buffer_size,
sizeof(*recv_buffer),
753 size_t * tgt_pos =
xmalloc(recv_buffer_size *
sizeof(*tgt_pos));
754 for (
size_t i = 0; i < recv_buffer_size; ++i)
755 tgt_pos[i] = (
size_t)(recv_buffer[i].
orig_pos);
757 size_t offset = 0, i = 0;
758 while (offset < recv_buffer_size) {
759 double fixed_value = recv_buffer[i].
value;
760 while ((i < recv_buffer_size) && (fixed_value == recv_buffer[i].
value)) ++i;
761 size_t curr_count = i - offset;
763 interp, fixed_value, curr_count, tgt_pos + offset);
798 uint64_t * src_orig_poses,
size_t * sendcounts,
800 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
805 size_t nsend = 0, nrecv = 0;
806 size_t max_buffer_size = 0;
807 for (
int i = 0; i < comm_size; ++i) {
808 nsend += sendcounts[i] > 0;
809 nrecv += recvcounts[i] > 0;
810 if (max_buffer_size < sendcounts[i]) max_buffer_size = sendcounts[i];
811 if (max_buffer_size < recvcounts[i]) max_buffer_size = recvcounts[i];
814 size_t total_num_msg = nsend + nrecv;
816 struct Xt_redist_msg * msgs_buffer =
817 xmalloc(total_num_msg *
sizeof(*msgs_buffer));
818 struct Xt_redist_msg * send_msgs = msgs_buffer;
819 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
821 int * pos_buffer =
xmalloc((
size_t)max_buffer_size *
sizeof(*pos_buffer));
825 for (
int i = 0; i < comm_size; ++i) {
826 if (recvcounts[i] > 0) {
827 for (
size_t j = 0; j < recvcounts[i]; ++j)
828 pos_buffer[j] = (
int)tgt_stencils[j].
orig_pos;
829 tgt_stencils += recvcounts[i];
830 recv_msgs[nrecv].rank = i;
831 recv_msgs[nrecv].datatype =
832 xt_mpi_generate_datatype(pos_buffer, recvcounts[i], MPI_DOUBLE, comm);
835 if (sendcounts[i] > 0) {
836 for (
size_t j = 0; j < sendcounts[i]; ++j)
837 pos_buffer[j] = (
int)src_orig_poses[j];
838 src_orig_poses += sendcounts[i];
839 send_msgs[nsend].rank = i;
840 send_msgs[nsend].datatype =
841 xt_mpi_generate_datatype(pos_buffer, sendcounts[i], MPI_DOUBLE, comm);
851 if (total_num_msg > 0) {
853 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
856 xmalloc(2 * total_num_msg *
sizeof(*rank_buffer));
857 int * orig_ranks = rank_buffer;
858 int * split_ranks = rank_buffer + total_num_msg;
860 for (
size_t i = 0; i < total_num_msg; ++i)
861 orig_ranks[i] = msgs_buffer[i].rank;
863 MPI_Group orig_group, split_group;
865 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
868 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
869 split_group, split_ranks), split_comm);
871 for (
size_t i = 0; i < total_num_msg; ++i)
872 msgs_buffer[i].rank = split_ranks[i];
881 xt_redist_single_array_base_custom_new(
882 nsend, nrecv, send_msgs, recv_msgs, split_comm, redist_config);
885 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
896 uint64_t * src_orig_pos,
size_t * sendcounts,
898 size_t * recvcounts,
size_t num_src_fields, MPI_Comm comm,
899 Xt_config redist_config) {
904 size_t nsends[num_src_fields], nrecvs[num_src_fields];
905 size_t max_buffer_size = 0;
906 memset(nsends, 0, num_src_fields *
sizeof(nsends[0]));
907 memset(nrecvs, 0, num_src_fields *
sizeof(nrecvs[0]));
908 for (
int i = 0; i < comm_size; ++i) {
909 for (
size_t j = 0; j < num_src_fields; ++j) {
910 size_t idx = (size_t)i * num_src_fields + j;
911 if (sendcounts[idx] > 0) nsends[j]++;
912 if (recvcounts[idx] > 0) nrecvs[j]++;
913 if (max_buffer_size < sendcounts[idx]) max_buffer_size = sendcounts[idx];
914 if (max_buffer_size < recvcounts[idx]) max_buffer_size = recvcounts[idx];
918 size_t nsend = 0, nrecv = 0;
919 size_t send_offsets[num_src_fields];
920 size_t recv_offsets[num_src_fields];
921 for (
size_t i = 0; i < num_src_fields; ++i) {
922 send_offsets[i] = nsend;
923 recv_offsets[i] = nrecv;
928 size_t total_num_msg = nsend + nrecv;
930 struct Xt_redist_msg * msgs_buffer =
931 xmalloc(total_num_msg *
sizeof(*msgs_buffer));
932 struct Xt_redist_msg * send_msgs = msgs_buffer;
933 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
935 int * pos_buffer =
xmalloc(max_buffer_size *
sizeof(*pos_buffer));
937 for (
int i = 0; i < comm_size; ++i) {
938 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
940 size_t idx = (size_t)i * num_src_fields + src_field_idx;
941 if (recvcounts[idx] > 0) {
942 for (
size_t j = 0; j < recvcounts[idx]; ++j)
943 pos_buffer[j] = (
int)tgt_stencils[j].
orig_pos;
944 tgt_stencils += recvcounts[idx];
945 recv_msgs[recv_offsets[src_field_idx]].rank = i;
946 recv_msgs[recv_offsets[src_field_idx]].datatype =
947 xt_mpi_generate_datatype(
948 pos_buffer, recvcounts[idx], MPI_DOUBLE, comm);
949 recv_offsets[src_field_idx]++;
951 if (sendcounts[idx] > 0) {
952 for (
size_t j = 0; j < sendcounts[idx]; ++j)
953 pos_buffer[j] = (
int)src_orig_pos[j];
954 src_orig_pos += sendcounts[idx];
955 send_msgs[send_offsets[src_field_idx]].rank = i;
956 send_msgs[send_offsets[src_field_idx]].datatype =
957 xt_mpi_generate_datatype(
958 pos_buffer, sendcounts[idx], MPI_DOUBLE, comm);
959 send_offsets[src_field_idx]++;
969 if (total_num_msg > 0) {
971 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
974 xmalloc(2 * total_num_msg *
sizeof(*rank_buffer));
975 int * orig_ranks = rank_buffer;
976 int * split_ranks = rank_buffer + total_num_msg;
978 for (
size_t i = 0; i < total_num_msg; ++i)
979 orig_ranks[i] = msgs_buffer[i].rank;
981 MPI_Group orig_group, split_group;
983 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
986 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
987 split_group, split_ranks), split_comm);
989 for (
size_t i = 0; i < total_num_msg; ++i)
990 msgs_buffer[i].rank = split_ranks[i];
998 redists =
xmalloc(num_src_fields *
sizeof(*redists));
999 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
1001 redists[src_field_idx] =
1002 xt_redist_single_array_base_custom_new(
1003 nsends[src_field_idx], nrecvs[src_field_idx],
1004 send_msgs, recv_msgs, split_comm, redist_config);
1005 send_msgs += nsends[src_field_idx];
1006 recv_msgs += nrecvs[src_field_idx];
1010 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
1053 MPI_Comm comm, uint64_t count,
1060 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1062 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1064 for (
size_t i = 0; i < count; ++i) {
1068 (&(direct_stencils[i].tgt.
data.
data.single)):
1070 for (
int j = 0; j < curr_count; ++j)
1071 sendcounts[curr_point_info[j].
rank]++;
1075 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1077 size_t send_buffer_size =
1078 sdispls[comm_size] + sendcounts[comm_size - 1];
1079 size_t recv_buffer_size =
1080 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1081 size_t tgt_count = recv_buffer_size;
1084 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*stencil_buffer));
1086 stencil_buffer + recv_buffer_size;
1090 for (
size_t i = 0; i < count; ++i) {
1094 (&(direct_stencils[i].tgt.
data.
data.single)):
1098 for (
int j = 0; j < curr_count; ++j) {
1099 size_t pos = sdispls[curr_point_infos[j].
rank + 1]++;
1100 send_stencil_buffer[pos].
src =
src;
1101 send_stencil_buffer[pos].
orig_pos = curr_point_infos[j].orig_pos;
1110 send_stencil_buffer, sendcounts, sdispls,
1111 recv_stencil_buffer, recvcounts, rdispls,
1112 sizeof(*stencil_buffer), stencil_direct_dt, comm);
1117 qsort(recv_stencil_buffer, tgt_count,
sizeof(*recv_stencil_buffer),
1120 memset(sendcounts, 0, (
size_t)comm_size *
sizeof(*sendcounts));
1122 for (
size_t i = 0; i < tgt_count; ++i)
1123 sendcounts[recv_stencil_buffer[i].
src.rank]++;
1126 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1128 send_buffer_size = sdispls[comm_size] + sendcounts[comm_size - 1];
1129 recv_buffer_size = rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1131 uint64_t * orig_pos_buffer =
1132 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*orig_pos_buffer));
1133 uint64_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1134 uint64_t * recv_orig_pos_buffer = orig_pos_buffer;
1136 for (
size_t i = 0; i < tgt_count; ++i)
1137 send_orig_pos_buffer[sdispls[recv_stencil_buffer[i].
src.rank + 1]++] =
1142 send_orig_pos_buffer, sendcounts, sdispls,
1143 recv_orig_pos_buffer, recvcounts, rdispls,
1144 sizeof(*send_orig_pos_buffer), MPI_UINT64_T, comm);
1151 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1152 comm, redist_config);
1153 free(orig_pos_buffer);
1154 free(stencil_buffer);
1158 if (redist != NULL) xt_redist_delete(redist);
1203 MPI_Comm comm, uint64_t count,
1208 uint64_t num_src_fields = 0;
1209 for (
size_t i = 0; i < count; ++i) {
1216 MPI_IN_PLACE, &num_src_fields, 1, MPI_UINT64_T, MPI_MAX, comm), comm);
1221 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1223 (
size_t)num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1224 size_t * size_t_buffer =
1225 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
1226 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1227 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1228 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1229 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1231 for (
size_t i = 0; i < count; ++i) {
1235 (&(direct_mf_stencils[i].tgt.
data.
data.single)):
1237 uint64_t src_field_idx =
1239 for (
int j = 0; j < curr_count; ++j)
1241 (uint64_t)(curr_point_info[j].
rank) * num_src_fields + src_field_idx]++;
1245 (
size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1247 size_t saccu = 0, raccu = 0;
1248 for (
int i = 0; i < comm_size; ++i) {
1249 total_sdispls[i] = saccu;
1250 total_rdispls[i] = raccu;
1251 total_sendcounts[i] = 0;
1252 total_recvcounts[i] = 0;
1253 for (
size_t j = 0; j < num_src_fields; ++j) {
1254 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1255 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1257 saccu += total_sendcounts[i];
1258 raccu += total_recvcounts[i];
1261 size_t send_buffer_size = total_sdispls[comm_size - 1] +
1262 total_sendcounts[comm_size - 1];
1263 size_t recv_buffer_size = total_rdispls[comm_size - 1] +
1264 total_recvcounts[comm_size - 1];
1265 size_t tgt_count = recv_buffer_size;
1268 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*stencil_buffer));
1270 stencil_buffer + recv_buffer_size;
1274 for (
size_t i = 0; i < count; ++i) {
1278 (&(direct_mf_stencils[i].tgt.
data.
data.single)):
1282 uint64_t src_field_idx =
1284 for (
int j = 0; j < curr_count; ++j) {
1286 sdispls[(uint64_t)(curr_point_infos[j].
rank) * num_src_fields +
1287 src_field_idx + 1]++;
1288 send_stencil_buffer[pos].
src =
src;
1299 send_stencil_buffer, total_sendcounts, total_sdispls,
1300 recv_stencil_buffer, total_recvcounts, total_rdispls,
1301 sizeof(*stencil_buffer), stencil_direct_mf_dt, comm);
1303 yac_mpi_call(MPI_Type_free(&stencil_direct_mf_dt), comm);
1307 qsort(recv_stencil_buffer, tgt_count,
sizeof(*recv_stencil_buffer),
1310 memset(sendcounts, 0,
1311 (
size_t)comm_size * (
size_t)num_src_fields *
sizeof(*sendcounts));
1313 for (
size_t i = 0; i < tgt_count; ++i)
1314 sendcounts[(uint64_t)(recv_stencil_buffer[i].
src.
rank) * num_src_fields +
1315 recv_stencil_buffer[i].src_field_idx]++;
1318 (
size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1320 saccu = 0, raccu = 0;
1321 for (
int i = 0; i < comm_size; ++i) {
1322 total_sdispls[i] = saccu;
1323 total_rdispls[i] = raccu;
1324 total_sendcounts[i] = 0;
1325 total_recvcounts[i] = 0;
1326 for (
size_t j = 0; j < num_src_fields; ++j) {
1327 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1328 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1330 saccu += total_sendcounts[i];
1331 raccu += total_recvcounts[i];
1334 send_buffer_size = total_sdispls[comm_size - 1] +
1335 total_sendcounts[comm_size - 1];
1336 recv_buffer_size = total_rdispls[comm_size - 1] +
1337 total_recvcounts[comm_size - 1];
1339 uint64_t * orig_pos_buffer =
1340 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*orig_pos_buffer));
1341 uint64_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1342 uint64_t * recv_orig_pos_buffer = orig_pos_buffer;
1344 for (
size_t i = 0; i < tgt_count; ++i)
1345 send_orig_pos_buffer[
1346 sdispls[(uint64_t)(recv_stencil_buffer[i].
src.
rank) * num_src_fields +
1347 recv_stencil_buffer[i].src_field_idx + 1]++] =
1348 recv_stencil_buffer[i].
src.orig_pos;
1352 send_orig_pos_buffer, total_sendcounts, total_sdispls,
1353 recv_orig_pos_buffer, total_recvcounts, total_rdispls,
1354 sizeof(*send_orig_pos_buffer), MPI_UINT64_T, comm);
1355 free(size_t_buffer);
1358 Xt_redist * redists =
1360 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1361 (
size_t)num_src_fields, comm, redist_config);
1364 free(orig_pos_buffer);
1365 free(stencil_buffer);
1369 if (redists != NULL) {
1370 for (
size_t i = 0; i < (size_t)num_src_fields; ++i)
1371 xt_redist_delete(redists[i]);
2084 void ** pack_data,
int * pack_sizes, MPI_Datatype point_info_dt,
2088 stencils, count, pack_order, pack_sizes, point_info_dt, comm);
2090 size_t pack_buffer_size = 0;
2091 for (
size_t i = 0; i < count; ++i)
2092 pack_buffer_size += (
size_t)(pack_sizes[i]);
2094 void * pack_data_ =
xmalloc(pack_buffer_size);
2095 size_t total_pack_size = 0;
2097 for (
size_t i = 0; i < count; ++i) {
2102 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2107 (curr_stencil->
type ==
SUM) ||
2112 "ERROR(pack_stencils): invalid stencil type")
2113 switch (curr_stencil->
type) {
2139 int type = (int)curr_stencil->
type;
2140 void * buffer = (
void*)((
char*)pack_data_ + total_pack_size);
2141 int buffer_size = pack_sizes[i];
2145 MPI_Pack(&
type, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2148 &position, point_info_dt, comm);
2150 func_pack(curr_stencil, buffer, buffer_size, &position, point_info_dt, comm);
2153 pack_sizes[i] >= position,
2154 "ERROR(pack_stencils): "
2155 "actual pack size is bigger then computed one (%d > %d)",
2156 position, pack_sizes[i]);
2158 pack_sizes[i] = position;
2159 total_pack_size += (size_t)position;
2162 *pack_data =
xrealloc(pack_data_, total_pack_size);
2364 size_t * stencil_indices,
2365 size_t * stencil_sendcounts,
size_t * stencil_recvcounts) {
2367 int comm_rank, comm_size;
2372 stencil_sendcounts[comm_rank] == stencil_recvcounts[comm_rank],
2373 "ERROR(exchange_stencils): error in arguments")
2375 size_t send_count = 0, recv_count = 0;
2376 size_t local_send_offset = 0;
2377 size_t local_recv_offset = 0;
2378 size_t local_count = (size_t)(stencil_sendcounts[comm_rank]);
2379 for (
int i = 0; i < comm_rank; ++i) {
2380 send_count += stencil_sendcounts[i];
2381 recv_count += stencil_recvcounts[i];
2382 local_send_offset += stencil_sendcounts[i];
2383 local_recv_offset += stencil_recvcounts[i];
2385 local_send_offset = send_count;
2386 local_recv_offset = recv_count;
2387 stencil_sendcounts[comm_rank] = 0;
2388 stencil_recvcounts[comm_rank] = 0;
2389 for (
int i = comm_rank + 1; i < comm_size; ++i) {
2390 send_count += stencil_sendcounts[i];
2391 recv_count += stencil_recvcounts[i];
2395 xmalloc((recv_count + local_count) *
sizeof(*new_stencils));
2396 size_t * local_stencil_indices =
2397 xmalloc(local_count *
sizeof(*local_stencil_indices));
2398 memcpy(local_stencil_indices, stencil_indices + local_send_offset,
2399 local_count *
sizeof(*local_stencil_indices));
2403 stencil_indices + local_send_offset,
2404 stencil_indices + local_send_offset + local_count,
2405 (send_count - local_send_offset) *
sizeof(*stencil_indices));
2409 int * pack_sizes =
xmalloc(send_count *
sizeof(*pack_sizes));
2412 stencils, send_count, stencil_indices, &send_buffer, pack_sizes,
2413 point_info_dt, comm);
2415 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2417 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2420 for (
int rank = 0; rank < comm_size; ++rank) {
2421 size_t sendcount = 0;
2422 int curr_num_stencils = stencil_sendcounts[rank];
2423 for (
int j = 0; j < curr_num_stencils; ++j, ++send_count)
2424 sendcount += (
size_t)(pack_sizes[send_count]);
2425 sendcounts[rank] = sendcount;
2430 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2432 size_t recv_size = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
2434 void * recv_buffer =
xmalloc(recv_size);
2437 yac_alltoallv_packed_p2p(
2438 send_buffer, sendcounts, sdispls+1,
2439 recv_buffer, recvcounts, rdispls, comm);
2445 new_stencils, recv_count,
2446 recv_buffer, recv_size, point_info_dt, comm);
2450 memmove(new_stencils + local_recv_offset + local_count,
2451 new_stencils + local_recv_offset ,
2452 (recv_count - local_recv_offset ) *
sizeof(*new_stencils));
2453 for (
size_t i = 0; i < local_count; ++i, ++local_recv_offset )
2454 new_stencils[local_recv_offset] =
2456 stencils + local_stencil_indices[i],
2457 stencils[local_stencil_indices[i]].
tgt);
2458 free(local_stencil_indices);
2460 return new_stencils;
2465 int * stencil_ranks,
size_t count) {
2467 MPI_Comm comm =
weights->comm;
2473 "ERROR(yac_interp_weights_get_stencils): count exceeds INT_MAX");
2475 size_t * reorder_idx =
xmalloc(count *
sizeof(*reorder_idx));
2476 for (
size_t i = 0; i < count; ++i) reorder_idx[i] = i;
2479 stencil_ranks, count, stencil_indices, reorder_idx);
2482 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2484 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2485 for (
size_t i = 0; i < count; ++i) sendcounts[stencil_ranks[i]]++;
2487 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2489 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
2490 uint64_t * uint64_t_buffer =
2491 xmalloc((count + recv_count) *
sizeof(*uint64_t_buffer));
2492 uint64_t * send_stencil_indices = uint64_t_buffer;
2493 uint64_t * recv_stencil_indices = uint64_t_buffer + count;
2494 for (
size_t i = 0; i < count; ++i)
2495 send_stencil_indices[i] = (uint64_t)(stencil_indices[i]);
2496 yac_alltoallv_uint64_p2p(
2497 send_stencil_indices, sendcounts, sdispls+1,
2498 recv_stencil_indices, recvcounts, rdispls, comm);
2501 size_t * exchange_stencil_indices =
2502 xmalloc(recv_count *
sizeof(*exchange_stencil_indices));
2503 for (
size_t i = 0; i < recv_count; ++i) {
2505 (
size_t)(recv_stencil_indices[i]) <
weights->stencils_size,
2506 "ERROR(yac_interp_weights_get_stencils): invalid stencil index");
2507 exchange_stencil_indices[i] = (size_t)(recv_stencil_indices[i]);
2509 free(uint64_t_buffer);
2512 recvcounts, sendcounts);
2513 free(exchange_stencil_indices);
2518 xmalloc(count *
sizeof(*sorted_stencils));
2519 for (
size_t i = 0; i < count; ++i)
2520 sorted_stencils[reorder_idx[i]] = stencils[i];
2524 return sorted_stencils;
2532 size_t * num_stencils_per_tgt,
size_t * stencil_indices,
2533 int * stencil_ranks,
double * w) {
2535 size_t count = (tgts != NULL)?tgts->
count:0;
2536 MPI_Comm comm =
weights->comm;
2537 int comm_rank, comm_size;
2542 size_t total_num_stencils = 0;
2543 size_t max_num_stencils_per_tgt = 0;
2544 for (
size_t i = 0; i < count; ++i) {
2545 size_t curr_num_stencils_per_tgt = num_stencils_per_tgt[i];
2546 if (curr_num_stencils_per_tgt > max_num_stencils_per_tgt)
2547 max_num_stencils_per_tgt = curr_num_stencils_per_tgt;
2548 total_num_stencils += num_stencils_per_tgt[i];
2550 size_t num_missing_stencils = 0;
2551 for (
size_t i = 0; i < total_num_stencils; ++i)
2552 if (stencil_ranks[i] != comm_rank) num_missing_stencils++;
2555 size_t * missing_stencil_indices =
2556 xmalloc(num_missing_stencils *
sizeof(*missing_stencil_indices));
2557 int * missing_stencil_ranks =
2558 xmalloc(num_missing_stencils *
sizeof(*missing_stencil_ranks));
2559 for (
size_t i = 0, j = 0; i < total_num_stencils; ++i) {
2560 if (stencil_ranks[i] != comm_rank) {
2561 missing_stencil_indices[j] = stencil_indices[i];
2562 missing_stencil_ranks[j] = stencil_ranks[i];
2568 weights, missing_stencil_indices, missing_stencil_ranks,
2569 num_missing_stencils);
2570 free(missing_stencil_ranks);
2571 free(missing_stencil_indices);
2576 size_t stencils_array_size =
weights->stencils_array_size;
2577 size_t stencils_size =
weights->stencils_size;
2580 xmalloc(max_num_stencils_per_tgt *
sizeof(*stencils_buffer));
2583 stencils, stencils_array_size, stencils_size + count);
2585 for (
size_t i = 0, j = 0; i < count;
2586 ++i, ++stencils_size) {
2588 size_t curr_num_stencils = num_stencils_per_tgt[i];
2589 for (
size_t k = 0; k < curr_num_stencils; ++k)
2590 stencils_buffer[k] =
2591 (stencil_ranks[k] == comm_rank)?
2592 (stencils + stencil_indices[k]):(missing_stencils + (j++));
2594 stencils[stencils_size] =
2596 w += curr_num_stencils;
2597 stencil_indices += curr_num_stencils;
2598 stencil_ranks += curr_num_stencils;
2602 weights->stencils_array_size = stencils_array_size;
2603 weights->stencils_size = stencils_size;
2605 free(stencils_buffer);
2753 size_t * pack_order,
void ** pack_data,
int * pack_sizes,
2754 int * weight_counts, MPI_Comm comm) {
2760 size_t temp_total_pack_size = 0;
2761 for (
size_t i = 0; i < count; ++i) {
2762 temp_total_pack_size +=
2765 wsum_stencils + pack_order[i],
2766 wsum_mf_weight_dt, point_info_dt, comm));
2769 void * pack_data_ =
xmalloc(temp_total_pack_size);
2770 size_t total_pack_size = 0;
2773 for (
size_t i = 0; i < count; ++i) {
2775 size_t idx = pack_order[i];
2778 void * buffer = (
void*)((
unsigned char*)pack_data_ + total_pack_size);
2779 int buffer_size = pack_sizes[i];
2780 int curr_count = wsum_stencils[idx].
count;
2784 &(wsum_stencils[idx].tgt), buffer, buffer_size, &position,
2785 point_info_dt, comm);
2788 MPI_Pack(&curr_count, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2791 MPI_Pack(wsum_stencils[idx].data, curr_count, wsum_mf_weight_dt,
2792 buffer, buffer_size, &position, comm), comm);
2794 pack_sizes[i] = position;
2795 weight_counts[i] = curr_count;
2796 total_pack_size += (size_t)position;
2802 *pack_data =
xrealloc(pack_data_, total_pack_size);
2808 void * packed_data,
size_t packed_data_size, MPI_Comm comm) {
2813 size_t weight_offset = 0;
2814 for (
size_t i = 0, offset = 0; i < count; ++i) {
2817 void * curr_buffer = (
void*)((
char*)packed_data + offset);
2818 int buffer_size = (int)(packed_data_size - offset);
2824 weight_buffer + weight_offset;
2827 curr_buffer, buffer_size, &position, &tgt, point_info_dt, comm);
2829 MPI_Unpack(curr_buffer, buffer_size, &position,
2830 &weight_count, 1, MPI_INT, comm),
2833 MPI_Unpack(curr_buffer, buffer_size, &position,
2834 curr_weights, weight_count, wsum_mf_weight_dt, comm), comm);
2836 curr_wsum_stencil->
tgt = tgt;
2837 curr_wsum_stencil->
data = curr_weights;
2838 curr_wsum_stencil->
count = (size_t)weight_count;
2840 weight_offset += (size_t)weight_count;
2841 offset += (size_t)position;
2847 return weight_offset;
2852 int * stencil_owner,
size_t * reorder_idx,
size_t num_owners) {
2855 wsum_stencils_data->
data;
2857 int comm_rank, comm_size;
2861 size_t local_weight_count = 0;
2862 size_t local_count = 0;
2863 for (
size_t i = 0; i < num_owners; ++i) {
2864 if (stencil_owner[i] == comm_rank) {
2865 local_weight_count += wsum_stencils[reorder_idx[i]].
count;
2866 stencil_owner[i] = INT_MAX;
2872 size_t send_count = num_owners - local_count;
2876 int * pack_sizes =
xmalloc(2 * send_count *
sizeof(*pack_sizes));
2877 int * weight_counts = pack_sizes + send_count;
2879 pack_sizes, weight_counts, comm);
2881 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2883 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2885 for (
size_t i = 0; i < send_count; ++i) {
2886 int curr_rank = stencil_owner[i];
2887 sendcounts[3 * curr_rank + 0]++;
2888 sendcounts[3 * curr_rank + 1] += (size_t)(pack_sizes[i]);
2889 sendcounts[3 * curr_rank + 2] += (size_t)(weight_counts[i]);
2897 size_t recv_count = 0;
2898 size_t recv_size = 0;
2899 size_t recv_weight_count = 0;
2900 size_t saccu = 0, raccu = 0;
2901 for (
int i = 0; i < comm_size; ++i) {
2904 recv_count += recvcounts[3 * i + 0];
2905 recv_size += recvcounts[3 * i + 1];
2906 recv_weight_count += recvcounts[3 * i + 2];
2907 saccu += sendcounts[3 * i + 1];
2908 raccu += recvcounts[3 * i + 1];
2909 sendcounts[i] = sendcounts[3 * i + 1];
2910 recvcounts[i] = recvcounts[3 * i + 1];
2913 void * recv_buffer =
xmalloc(recv_size);
2916 yac_alltoallv_packed_p2p(
2917 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm);
2923 (local_weight_count + recv_weight_count) *
sizeof(temp->
buffer[0]));
2927 ((new_wsum_stencils_data->
data =
2928 xmalloc((local_count + recv_count) *
2929 sizeof(*(new_wsum_stencils_data->
data)))));
2930 new_wsum_stencils_data->
count = local_count + recv_count;
2933 size_t weight_offset =
2935 new_wsum_stencils, &(temp->
buffer[0]), recv_count,
2936 recv_buffer, recv_size, comm);
2938 new_wsum_stencils += recv_count;
2940 &(temp->
buffer[weight_offset]);
2944 for (
size_t i = 0, weight_offset = 0; i < local_count; ++i) {
2946 wsum_stencils + reorder_idx[i + send_count];
2948 new_wsum_stencils + i;
2950 weight_buffer + weight_offset;
2951 size_t curr_stencil_size = curr_wsum_stencil->
count;
2953 curr_new_wsum_stencil->
count = curr_stencil_size;
2954 curr_new_wsum_stencil->
data = curr_new_weights;
2955 memcpy(curr_new_weights, curr_wsum_stencil->
data,
2956 curr_stencil_size *
sizeof(*curr_new_weights));
2957 weight_offset += curr_stencil_size;
2960 return new_wsum_stencils_data;
3021 wsum_stencils_data->
data;
3026 size_t total_owner_count = 0;
3027 for (
size_t i = 0; i <
count; ++i) {
3029 if (stencil_size == 1) {
3030 total_owner_count++;
3035 tgt_point_infos, stencil_size,
sizeof(*tgt_point_infos),
3037 int prev_rank = INT_MAX;
3038 for (
int j = 0; j < stencil_size; ++j) {
3039 int curr_rank = tgt_point_infos[j].
rank;
3040 if (curr_rank != prev_rank) {
3041 ++total_owner_count;
3042 prev_rank = curr_rank;
3048 int * stencil_owner =
xmalloc(total_owner_count *
sizeof(*stencil_owner));
3049 size_t * reorder_idx =
xmalloc(total_owner_count *
sizeof(*reorder_idx));
3050 for (
size_t i = 0, k = 0; i < count; ++i) {
3052 if (stencil_size == 1) {
3059 int prev_rank = INT_MAX;
3060 for (
int j = 0; j < stencil_size; ++j) {
3061 int curr_rank = tgt_point_infos[j].
rank;
3062 if (curr_rank != prev_rank) {
3063 stencil_owner[k] = tgt_point_infos[j].
rank;
3066 prev_rank = curr_rank;
3074 comm, wsum_stencils_data, stencil_owner, reorder_idx, total_owner_count);
3076 wsum_stencils = new_wsum_stencils_data->
data;
3080 free(stencil_owner);
3082 if (
count == 0)
return new_wsum_stencils_data;
3088 size_t total_num_tgt_pos = 0;
3089 for (
size_t i = 0; i <
count; ++i) {
3091 if (curr_count == 1) {
3092 ++total_num_tgt_pos;
3096 for (
size_t j = 0; j < curr_count; ++j)
3097 if (curr_point_infos[j].
rank == comm_rank)
3098 ++total_num_tgt_pos;
3102 if (total_num_tgt_pos != count) {
3103 new_wsum_stencils_data->
data =
3105 xrealloc(wsum_stencils, total_num_tgt_pos *
sizeof(*wsum_stencils))));
3106 new_wsum_stencils_data->
count = total_num_tgt_pos;
3110 for (
size_t i = 0, offset = count; i < count; ++i) {
3112 if (curr_count > 1) {
3117 for (j = 0; j < curr_count; ++j) {
3118 if (curr_point_infos[j].
rank == comm_rank) {
3127 for (j = j + 1; j < curr_count; ++j) {
3128 if (curr_point_infos[j].
rank == comm_rank) {
3129 wsum_stencils[offset] = wsum_stencils[i];
3135 free(curr_point_infos);
3139 return new_wsum_stencils_data;
3144 size_t num_src_fields, MPI_Comm comm, Xt_config redist_config) {
3149 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3151 num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3152 size_t * size_t_buffer =
3153 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
3154 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
3155 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
3156 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
3157 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
3159 for (
size_t i = 0; i < count; ++i)
3160 sendcounts[halo_points[i].data.rank * num_src_fields +
3164 num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
3166 size_t saccu = 0, raccu = 0;
3167 for (
int i = 0; i < comm_size; ++i) {
3168 total_sdispls[i] = saccu;
3169 total_rdispls[i] = raccu;
3170 total_sendcounts[i] = 0;
3171 total_recvcounts[i] = 0;
3172 for (
size_t j = 0; j < num_src_fields; ++j) {
3173 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
3174 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
3176 saccu += total_sendcounts[i];
3177 raccu += total_recvcounts[i];
3180 size_t recv_count = total_recvcounts[comm_size - 1] +
3181 total_rdispls[comm_size - 1];
3183 int * exchange_buffer =
3184 xmalloc((2 * count + recv_count) *
sizeof(*exchange_buffer));
3185 int * send_buffer = exchange_buffer;
3186 int * reorder_idx = exchange_buffer + count;
3187 int * recv_buffer = exchange_buffer + 2 * count;
3190 size_t num_halo_per_src_field[num_src_fields];
3192 num_halo_per_src_field, 0,
3193 num_src_fields *
sizeof(num_halo_per_src_field[0]));
3194 for (
size_t i = 0; i < count; ++i) {
3195 size_t curr_src_field_idx = (size_t)(halo_points[i].field_idx);
3196 size_t pos = sdispls[(size_t)(halo_points[i].data.rank) * num_src_fields +
3197 curr_src_field_idx + 1]++;
3201 "ERROR(generate_halo_redists): offset not supported by MPI")
3203 reorder_idx[pos] = num_halo_per_src_field[curr_src_field_idx]++;
3207 yac_alltoallv_int_p2p(
3208 send_buffer, total_sendcounts, total_sdispls,
3209 recv_buffer, total_recvcounts, total_rdispls, comm);
3211 free(size_t_buffer);
3213 size_t nsend = 0, nsends[num_src_fields];
3214 size_t nrecv = 0, nrecvs[num_src_fields];
3215 memset(nsends, 0, num_src_fields *
sizeof(nsends[0]));
3216 memset(nrecvs, 0, num_src_fields *
sizeof(nrecvs[0]));
3217 for (
int i = 0; i < comm_size; ++i) {
3218 for (
size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3219 if (sendcounts[i * num_src_fields + field_idx] > 0) {
3221 nrecvs[field_idx]++;
3223 if (recvcounts[i * num_src_fields + field_idx] > 0) {
3225 nsends[field_idx]++;
3230 size_t total_num_msg = nsend + nrecv;
3232 struct Xt_redist_msg * msgs_buffer =
3233 xmalloc(total_num_msg *
sizeof(*msgs_buffer));
3234 struct Xt_redist_msg * send_msgs = msgs_buffer;
3235 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
3237 for (
size_t field_idx = 0, nsend = 0, nrecv = 0;
3238 field_idx < num_src_fields; ++field_idx) {
3239 for (
int rank = 0; rank < comm_size; ++rank) {
3240 size_t idx = (size_t)rank * num_src_fields + field_idx;
3241 if (sendcounts[idx] > 0) {
3242 recv_msgs[nrecv].rank = rank;
3243 recv_msgs[nrecv].datatype =
3244 xt_mpi_generate_datatype(
3245 reorder_idx + sdispls[idx], sendcounts[idx], MPI_DOUBLE, comm);
3248 if (recvcounts[idx] > 0) {
3249 send_msgs[nsend].rank = rank;
3250 send_msgs[nsend].datatype =
3251 xt_mpi_generate_datatype(
3252 recv_buffer + rdispls[idx], recvcounts[idx], MPI_DOUBLE, comm);
3261 if (total_num_msg > 0) {
3263 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &halo_comm), comm);
3265 int * rank_buffer =
xmalloc(2 * total_num_msg *
sizeof(*rank_buffer));
3266 int * orig_ranks = rank_buffer;
3267 int * split_ranks = rank_buffer + total_num_msg;
3269 for (
size_t i = 0; i < total_num_msg; ++i)
3270 orig_ranks[i] = msgs_buffer[i].rank;
3272 MPI_Group orig_group, split_group;
3274 yac_mpi_call(MPI_Comm_group(halo_comm, &split_group), comm);
3277 MPI_Group_translate_ranks(orig_group, (
int)total_num_msg, orig_ranks,
3278 split_group, split_ranks), halo_comm);
3280 for (
size_t i = 0; i < total_num_msg; ++i)
3281 msgs_buffer[i].rank = split_ranks[i];
3289 redist =
xmalloc(num_src_fields *
sizeof(*redist));
3290 if (num_src_fields == 1) {
3292 xt_redist_single_array_base_custom_new(
3293 nsend, nrecv, send_msgs, recv_msgs, halo_comm,
3296 for (
size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3298 xt_redist_single_array_base_custom_new(
3299 nsends[field_idx], nrecvs[field_idx],
3300 send_msgs, recv_msgs, halo_comm,
3302 send_msgs += nsends[field_idx];
3303 recv_msgs += nrecvs[field_idx];
3308 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &halo_comm), comm);
3313 free(exchange_buffer);
3387 Xt_config redist_config) {
3392 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
3394 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3396 for (
size_t i = 0; i <
count; ++i) {
3397 int curr_count = point_infos[i].
count;
3403 "ERROR(generate_redist_put_double): no owner found for global id")
3404 for (
int j = 0; j < curr_count; ++j)
3405 sendcounts[curr_point_infos[j].
rank]++;
3409 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3412 sdispls[comm_size] + sendcounts[comm_size - 1];
3414 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
3416 int * exchange_buffer =
3417 xmalloc((2 * send_count + recv_count) *
sizeof(*exchange_buffer));
3418 int * send_buffer = exchange_buffer;
3419 int * reorder_idx = exchange_buffer + send_count;
3420 int * recv_buffer = exchange_buffer + 2 * send_count;
3423 for (
size_t i = 0; i < count; ++i) {
3424 int curr_count = point_infos[i].
count;
3428 for (
int j = 0; j < curr_count; ++j) {
3429 size_t pos = sdispls[curr_point_infos[j].
rank + 1]++;
3430 uint64_t
orig_pos = curr_point_infos[j].orig_pos;
3433 "ERROR(generate_redist_put_double): offset not supported by MPI")
3435 reorder_idx[pos] = i;
3440 yac_alltoallv_int_p2p(
3441 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm);
3445 for (
int i = 0; i < comm_size; ++i) {
3446 if (sendcounts[i] > 0) nsend++;
3447 if (recvcounts[i] > 0) nrecv++;
3450 struct Xt_redist_msg * send_msgs =
xmalloc(nsend *
sizeof(*send_msgs));
3451 struct Xt_redist_msg * recv_msgs =
xmalloc(nrecv *
sizeof(*send_msgs));
3453 for (
int i = 0, nsend = 0, nrecv = 0; i < comm_size; ++i) {
3454 if (sendcounts[i] > 0) {
3455 send_msgs[nsend].rank = i;
3456 send_msgs[nsend].datatype =
3457 xt_mpi_generate_datatype(
3458 reorder_idx + sdispls[i], sendcounts[i], MPI_DOUBLE, comm);
3461 if (recvcounts[i] > 0) {
3462 recv_msgs[nrecv].rank = i;
3463 recv_msgs[nrecv].datatype =
3464 xt_mpi_generate_datatype(
3465 recv_buffer + rdispls[i], recvcounts[i], MPI_DOUBLE, comm);
3472 xt_redist_single_array_base_custom_new(
3473 nsend, nrecv, send_msgs, recv_msgs, comm, redist_config);
3475 free(exchange_buffer);
3498 "ERROR(yac_interp_weights_redist_w_sum_mf): invalid reorder type")
3505 size_t wsum_mf_count = new_wsum_mf_stencils_data->
count;
3507 new_wsum_mf_stencils_data->
data;
3510 size_t total_num_links = 0, total_num_remote_weights = 0;
3511 for (
size_t i = 0; i < wsum_mf_count; ++i) {
3512 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
3513 total_num_links += curr_stencil_size;
3514 for (
size_t j = 0; j < curr_stencil_size; ++j)
3515 if (wsum_mf_stencils[i].
data[j].
src.rank != comm_rank)
3516 ++total_num_remote_weights;
3521 xmalloc(total_num_remote_weights *
sizeof(*remote_src_points));
3522 size_t num_src_fields = 0;
3523 for (
size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
3524 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
3526 wsum_mf_stencils[i].
data;
3527 for (
size_t j = 0; j < curr_stencil_size; ++j) {
3529 if (curr_src_field_idx >= num_src_fields)
3530 num_src_fields = curr_src_field_idx + 1;
3531 if (curr_weights[j].
src.rank != comm_rank) {
3532 remote_src_points[k].
data = curr_weights[j].
src;
3533 remote_src_points[k].
field_idx = curr_src_field_idx;
3541 MPI_IN_PLACE, &num_src_fields, 1,
YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
3545 qsort(remote_src_points, total_num_remote_weights,
sizeof(*remote_src_points),
3552 size_t prev_field_idx;
3554 if (total_num_remote_weights > 0) {
3555 prev_remote_src_point = &(remote_src_points[0].
data);
3556 prev_field_idx = remote_src_points[0].
field_idx;
3559 prev_field_idx = SIZE_MAX;
3562 for (
size_t i = 0; i < total_num_remote_weights; ++i) {
3564 &(remote_src_points[i].
data);
3565 size_t curr_field_idx = remote_src_points[i].
field_idx;
3567 prev_remote_src_point, curr_remote_src_point) ||
3568 (prev_field_idx != curr_field_idx)) {
3569 prev_remote_src_point = curr_remote_src_point;
3570 prev_field_idx = curr_field_idx;
3571 remote_src_points[halo_size].
data = *curr_remote_src_point;
3572 remote_src_points[halo_size].
field_idx = curr_field_idx;
3576 wsum_mf_stencils + remote_src_points[i].
reorder_idx;
3577 size_t curr_stencil_size = curr_stencil->
count;
3578 for (
size_t j = 0; j < curr_stencil_size; ++j) {
3580 &(curr_stencil->
data[j].
src), curr_remote_src_point)) &&
3590 qsort(wsum_mf_stencils, wsum_mf_count,
sizeof(*wsum_mf_stencils),
3595 size_t * num_src_per_tgt =
xmalloc(wsum_mf_count *
sizeof(*num_src_per_tgt));
3596 double * weights =
xmalloc(total_num_links *
sizeof(*weights));
3597 size_t * src_idx =
xmalloc(total_num_links *
sizeof(*src_idx));
3598 size_t * src_field_idx =
xmalloc(total_num_links *
sizeof(*src_field_idx));
3601 for (
size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
3602 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
3604 wsum_mf_stencils[i].
data;
3605 num_src_per_tgt[i] = curr_stencil_size;
3606 for (
size_t j = 0; j < curr_stencil_size; ++j, ++k){
3607 weights[k] = curr_weights[j].
weight;
3614 Xt_redist * halo_redists =
3616 remote_src_points, halo_size, num_src_fields, comm, redist_config);
3620 (stencil_type ==
SUM) ||
3622 (stencil_type ==
SUM_MF),
3623 "ERROR(yac_interp_weights_redist_w_sum_mf): unsupported stencil type");
3630 xmalloc(wsum_mf_count *
sizeof(*tgt_infos));
3631 for (
size_t i = 0; i < wsum_mf_count; ++i)
3632 tgt_infos[i] = wsum_mf_stencils[i].tgt.
data;
3633 Xt_redist result_redist =
3635 tgt_infos, wsum_mf_count, comm, redist_config);
3638 switch(stencil_type) {
3643 interp, halo_redists, wsum_mf_count, num_src_per_tgt, weights,
3644 src_field_idx, src_idx, ((stencil_type==
WEIGHT_SUM)?1:num_src_fields),
3650 interp, halo_redists, wsum_mf_count, num_src_per_tgt,
3651 src_field_idx, src_idx, ((stencil_type ==
SUM)?1:num_src_fields),
3656 if (result_redist != NULL) xt_redist_delete(result_redist);
3660 size_t * tgt_orig_pos =
xmalloc(wsum_mf_count *
sizeof(*tgt_orig_pos));
3661 for (
size_t i = 0; i < wsum_mf_count; ++i) {
3663 wsum_mf_stencils[i].tgt.
data.count == 1,
3664 "ERROR(yac_interp_weights_redist_w_sum): currently unsupported target "
3665 "point distribution")
3667 (size_t)(wsum_mf_stencils[i].tgt.
data.data.single.orig_pos);
3670 switch(stencil_type) {
3675 interp, halo_redists, tgt_orig_pos, wsum_mf_count,
3676 num_src_per_tgt, weights, src_field_idx, src_idx,
3677 ((stencil_type ==
WEIGHT_SUM)?1:num_src_fields));
3682 interp, halo_redists, tgt_orig_pos, wsum_mf_count,
3683 num_src_per_tgt, src_field_idx, src_idx,
3684 ((stencil_type ==
SUM)?1:num_src_fields));
3690 for (
size_t i = 0; i < new_wsum_mf_stencils_data->
count; ++i)
3692 free(new_wsum_mf_stencils_data->
data);
3693 free(new_wsum_mf_stencils_data);
3695 free(remote_src_points);
3696 free(src_field_idx);
3699 free(num_src_per_tgt);
3700 if (halo_redists != NULL) {
3701 for (
size_t i = 0; i < num_src_fields; ++i)
3702 xt_redist_delete(halo_redists[i]);
3714 char const * yaxt_exchanger_name, MPI_Comm comm) {
3716 Xt_config redist_config = xt_config_new();
3719 char * env_exchanger_name = NULL;
3720 if (yaxt_exchanger_name == NULL) {
3727 size_t exchanger_name_len = 0;
3732 exchanger_name_len =
3733 ((env_exchanger_name != NULL) && (env_exchanger_name[0] !=
'\0'))?
3734 strlen(env_exchanger_name):0;
3740 MPI_Bcast(&exchanger_name_len, 1,
YAC_MPI_SIZE_T, 0, comm), comm);
3742 if (exchanger_name_len > 0) {
3745 env_exchanger_name = strdup(env_exchanger_name);
3747 env_exchanger_name =
3748 xmalloc((exchanger_name_len + 1) *
sizeof(*env_exchanger_name));
3753 env_exchanger_name, (
int)(exchanger_name_len + 1), MPI_CHAR, 0, comm),
3756 yaxt_exchanger_name = env_exchanger_name;
3760 if (yaxt_exchanger_name != NULL) {
3763 int exchanger_id = xt_exchanger_id_by_name(yaxt_exchanger_name);
3766 "ERROR(get_redist_config): invalid yaxt exchanger name \"%s\"",
3767 yaxt_exchanger_name);
3768 xt_config_set_exchange_method(redist_config, exchanger_id);
3771 free(env_exchanger_name);
3773 return redist_config;
3780 double scaling_factor,
double scaling_summand,
3781 char const * yaxt_exchanger_name) {
3783 MPI_Comm comm = weights->
comm;
3795 memset(&(local_stencil_counts[0]), 0,
sizeof(local_stencil_counts));
3797 local_stencil_counts[(
int)(weights->
stencils[i].
type)]++;
3800 stencils_offsets[i] = accu;
3801 accu += local_stencil_counts[i];
3809 local_stencil_counts, global_stencil_counts,
3816 MPI_IN_PLACE, &max_collection_size, 1, MPI_UINT64_T, MPI_MAX, comm),
3820 "ERROR(yac_interp_weights_get_interpolation): "
3821 "mismatching collection sizes")
3827 scaling_factor, scaling_summand);
3829 if (global_stencil_counts[
FIXED] > 0)
3831 weights->
comm, local_stencil_counts[
FIXED],
3834 if (global_stencil_counts[
DIRECT] > 0)
3840 if (global_stencil_counts[
SUM] > 0) {
3845 (
size_t)(local_stencil_counts[
SUM]),
SUM);
3848 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3850 weights->
comm, wsum_stencils, interp, reorder,
SUM,
3852 for (
size_t i = 0; i < wsum_stencils->
count; ++i)
3854 free(wsum_stencils->
data);
3855 free(wsum_stencils);
3866 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3870 for (
size_t i = 0; i < wsum_stencils->
count; ++i)
3872 free(wsum_stencils->
data);
3873 free(wsum_stencils);
3876 if (global_stencil_counts[
DIRECT_MF] > 0)
3882 if (global_stencil_counts[
SUM_MF] > 0) {
3890 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3892 weights->
comm, sum_mf_stencils, interp, reorder,
SUM_MF,
3894 for (
size_t i = 0; i < sum_mf_stencils->
count; ++i)
3896 free(sum_mf_stencils->
data);
3897 free(sum_mf_stencils);
3908 "ERROR(yac_interp_weights_get_interpolation): invalid reorder type")
3912 for (
size_t i = 0; i < wsum_mf_stencils->
count; ++i)
3914 free(wsum_mf_stencils->
data);
3915 free(wsum_mf_stencils);
3918 xt_config_delete(redist_config);
4008 char const * filename,
char const * src_grid_name,
char const * tgt_grid_name,
4009 size_t num_fixed_values,
double * fixed_values,
4010 size_t * num_tgt_per_fixed_value,
size_t num_links,
4011 size_t num_weights_per_link,
size_t num_src_fields,
4012 size_t * num_links_per_src_field,
4014 size_t src_grid_size,
size_t tgt_grid_size) {
4019 yac_nc_create(filename, NC_CLOBBER | NC_64BIT_OFFSET, &ncid);
4021 int dim_weight_id[8];
4024 if (num_links > 0) {
4025 YAC_HANDLE_ERROR(nc_def_dim(ncid,
"num_links", num_links, &dim_weight_id[0]));
4027 num_weights_per_link > 0,
4028 "ERROR(create_weight_file): number of links is %zu but number of "
4029 "weights per link is zero for weight file %s", num_links, filename)
4031 nc_def_dim(ncid,
"num_wgts", num_weights_per_link, &dim_weight_id[1]));
4035 "ERROR(create_weight_file): number of source fields is zero for "
4036 "weight file %s", filename)
4038 nc_def_dim(ncid,
"num_src_fields", num_src_fields, &dim_weight_id[2]));
4043 if (num_fixed_values > 0) {
4046 ncid,
"num_fixed_values", num_fixed_values, &dim_weight_id[4]));
4047 size_t num_fixed_dst = 0;
4048 for (
size_t i = 0; i < num_fixed_values; ++i)
4049 num_fixed_dst += num_tgt_per_fixed_value[i];
4052 "ERROR(create_weight_file): number of fixed values is %zu but number "
4053 "of fixed destination points is zero for weight file %s",
4054 num_fixed_dst, filename)
4056 nc_def_dim(ncid,
"num_fixed_dst", num_fixed_dst, &dim_weight_id[5]));
4059 if (src_grid_size > 0)
4061 nc_def_dim(ncid,
"src_grid_size", src_grid_size, &dim_weight_id[6]));
4063 if (tgt_grid_size > 0)
4065 nc_def_dim(ncid,
"dst_grid_size", tgt_grid_size, &dim_weight_id[7]));
4067 int var_src_add_id, var_dst_add_id, var_weight_id, var_num_links_id,
4068 src_var_locs_id, tgt_var_loc_id, var_fixed_values_id,
4069 var_num_dst_per_fixed_value_id, var_dst_add_fixed_id;
4072 if (num_links > 0) {
4075 ncid,
"src_address", NC_INT, 1, dim_weight_id, &var_src_add_id));
4078 ncid,
"dst_address", NC_INT, 1, dim_weight_id, &var_dst_add_id));
4081 ncid,
"remap_matrix", NC_DOUBLE, 2, dim_weight_id, &var_weight_id));
4083 nc_def_var(ncid,
"num_links_per_src_field", NC_INT, 1,
4084 &dim_weight_id[2], &var_num_links_id));
4088 ncid,
"src_locations", NC_CHAR, 2, &dim_weight_id[2], &src_var_locs_id));
4091 ncid,
"dst_location", NC_CHAR, 1, &dim_weight_id[3], &tgt_var_loc_id));
4092 if (num_fixed_values > 0) {
4094 nc_def_var(ncid,
"fixed_values", NC_DOUBLE, 1, &dim_weight_id[4],
4095 &var_fixed_values_id));
4097 nc_def_var(ncid,
"num_dst_per_fixed_value", NC_INT, 1, &dim_weight_id[4],
4098 &var_num_dst_per_fixed_value_id));
4100 nc_def_var(ncid,
"dst_address_fixed", NC_INT, 1, &dim_weight_id[5],
4101 &var_dst_add_fixed_id));
4106 nc_put_att_text(ncid, NC_GLOBAL,
"version",
4110 nc_put_att_text(ncid, NC_GLOBAL,
"src_grid_name",
4111 strlen(src_grid_name), src_grid_name));
4113 nc_put_att_text(ncid, NC_GLOBAL,
"dst_grid_name",
4114 strlen(tgt_grid_name), tgt_grid_name));
4116 char const * str_logical[2] = {
"FALSE",
"TRUE"};
4118 strlen(str_logical[num_links > 0]),
4119 str_logical[num_links > 0]));
4121 strlen(str_logical[num_fixed_values > 0]),
4122 str_logical[num_fixed_values > 0]));
4130 if (num_links > 0) {
4131 int * num_links_per_src_field_int =
4132 xmalloc(num_src_fields *
sizeof(*num_links_per_src_field_int));
4133 for (
size_t i = 0; i < num_src_fields; ++i) {
4135 num_links_per_src_field[i] <= INT_MAX,
4136 "ERROR(create_weight_file): "
4137 "number of links per source field too big (not yet supported)")
4138 num_links_per_src_field_int[i] = (int)num_links_per_src_field[i];
4141 nc_put_var_int(ncid, var_num_links_id, num_links_per_src_field_int));
4142 free(num_links_per_src_field_int);
4145 for (
size_t i = 0; i < num_src_fields; ++i) {
4146 char const * loc_str =
yac_loc2str(src_locations[i]);
4147 size_t str_start[2] = {i, 0};
4148 size_t str_count[2] = {1, strlen(loc_str)};
4150 nc_put_vara_text(ncid, src_var_locs_id, str_start, str_count, loc_str));
4155 size_t str_start[1] = {0};
4156 size_t str_count[1] = {strlen(loc_str)};
4158 nc_put_vara_text(ncid, tgt_var_loc_id, str_start, str_count, loc_str));
4160 if (num_fixed_values > 0) {
4162 int * num_tgt_per_fixed_value_int =
4163 xmalloc(num_fixed_values *
sizeof(*num_tgt_per_fixed_value_int));
4164 for (
unsigned i = 0; i < num_fixed_values; ++i) {
4166 num_tgt_per_fixed_value[i] <= INT_MAX,
4167 "ERROR(create_weight_file): "
4168 "number of targets per fixed value is too big (not yet supported)")
4169 num_tgt_per_fixed_value_int[i] = (int)num_tgt_per_fixed_value[i];
4171 YAC_HANDLE_ERROR(nc_put_var_double(ncid, var_fixed_values_id, fixed_values));
4173 num_tgt_per_fixed_value_int));
4174 free(num_tgt_per_fixed_value_int);
4247 double ** fixed_values,
size_t * num_fixed_values, MPI_Comm comm) {
4252 double * local_fixed_values =
4253 xmalloc(stencil_count *
sizeof(*local_fixed_values));
4255 int * int_buffer =
xmalloc(2 * (
size_t)comm_size *
sizeof(*int_buffer));
4256 int * recvcounts = int_buffer + 0 * comm_size;
4257 int * rdispls = int_buffer + 1 * comm_size;
4259 size_t local_num_fixed = 0;
4262 for (
size_t i = 0; i < stencil_count;
4263 ++i, ++local_num_fixed) {
4267 qsort(local_fixed_values, local_num_fixed,
sizeof(*local_fixed_values),
4272 int local_num_fixed_int = (int)(local_num_fixed);
4275 &local_num_fixed_int, 1, MPI_INT, recvcounts, 1,MPI_INT, comm), comm);
4276 for (
int i = 0, accu = 0; i < comm_size; ++i) {
4278 accu += recvcounts[i];
4281 size_t num_all_fixed_values = 0;
4282 for (
int i = 0; i < comm_size; ++i)
4283 num_all_fixed_values += (
size_t)(recvcounts[i]);
4285 double * all_fixed_values =
4286 xmalloc(num_all_fixed_values *
sizeof(*all_fixed_values));
4291 local_fixed_values, local_num_fixed_int, MPI_DOUBLE,
4292 all_fixed_values, recvcounts, rdispls, MPI_DOUBLE, comm), comm);
4294 free(local_fixed_values);
4296 qsort(all_fixed_values, num_all_fixed_values,
sizeof(*all_fixed_values),
4299 *fixed_values =
xrealloc(all_fixed_values,
4300 num_all_fixed_values *
sizeof(*all_fixed_values));
4301 *num_fixed_values = num_all_fixed_values;
4413 size_t num_fixed_values,
size_t * num_tgt_per_fixed_value,
4414 size_t num_src_fields,
size_t * num_links_per_src_field,
4415 size_t * fixed_offsets,
size_t * link_offsets, MPI_Comm comm) {
4420 size_t count = num_fixed_values + num_src_fields;
4421 uint64_t * uint64_t_buffer =
xmalloc(3 * count *
sizeof(*uint64_t_buffer));
4422 uint64_t * global_counts = uint64_t_buffer + 0 * count;
4423 uint64_t * local_counts = uint64_t_buffer + 1 * count;
4424 uint64_t * offsets = uint64_t_buffer + 2 * count;
4426 for (
size_t i = 0; i < num_fixed_values; ++i)
4427 local_counts[i] = (uint64_t)(num_tgt_per_fixed_value[i]);
4428 for (
size_t i = 0; i < num_src_fields; ++i)
4429 local_counts[num_fixed_values + i] = (uint64_t)(num_links_per_src_field[i]);
4432 MPI_Allreduce(local_counts, global_counts, (
int)count, MPI_UINT64_T,
4433 MPI_SUM, comm), comm);
4435 MPI_Exscan(local_counts, offsets, (
int)count, MPI_UINT64_T, MPI_SUM, comm),
4437 if (comm_rank == 0) memset(offsets, 0, count *
sizeof(*offsets));
4439 for (
size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
4440 fixed_offsets[i] = (size_t)(offsets[i]) + accu;
4441 accu += (size_t)(global_counts[i]);
4443 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4444 link_offsets[i] = (size_t)(offsets[i+num_fixed_values]) + accu;
4445 accu += (size_t)(global_counts[i+num_fixed_values]);
4447 free(uint64_t_buffer);
4469 size_t * num_links_per_src_field,
size_t num_src_fields,
4470 int * src_address,
int * tgt_address,
double * weight) {
4472 size_t * src_field_offsets =
4473 xmalloc(2 * num_src_fields *
sizeof(*src_field_offsets));
4474 size_t * prev_src_field_offsets = src_field_offsets + num_src_fields;
4475 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4476 src_field_offsets[i] = accu;
4477 accu += num_links_per_src_field[i];
4481 for (
size_t i = 0; i < stencil_count; ++i, ++curr_stencil) {
4483 memcpy(prev_src_field_offsets, src_field_offsets,
4484 num_src_fields *
sizeof(*prev_src_field_offsets));
4489 "ERROR(stencil_get_link_data): this call is invalid for FIXED stencils")
4492 (curr_stencil->
type ==
SUM) ||
4497 "ERROR(stencil_get_link_data): invalid stencil type")
4498 size_t src_field_offset;
4499 switch (curr_stencil->
type) {
4502 src_field_offset = src_field_offsets[0]++;
4503 src_address[src_field_offset] =
4505 tgt_address[src_field_offset] = curr_tgt_address;
4506 weight[src_field_offset] = 1.0;
4511 for (
size_t k = 0; k < curr_count; ++k) {
4512 src_field_offset = src_field_offsets[0]++;
4513 src_address[src_field_offset] =
4515 tgt_address[src_field_offset] = curr_tgt_address;
4516 weight[src_field_offset] = 1.0;
4524 for (
size_t k = 0; k < curr_count; ++k) {
4525 src_field_offset = src_field_offsets[0]++;
4526 src_address[src_field_offset] =
4528 tgt_address[src_field_offset] = curr_tgt_address;
4529 weight[src_field_offset] = weights[k];
4536 src_address[src_field_offset ] =
4538 tgt_address[src_field_offset ] = curr_tgt_address;
4539 weight[src_field_offset ] = 1.0;
4546 for (
size_t k = 0; k < curr_count; ++k) {
4547 src_field_offset = src_field_offsets[field_indices[k]]++;
4548 src_address[src_field_offset] =
4550 tgt_address[src_field_offset] = curr_tgt_address;
4551 weight[src_field_offset] = 1.0;
4561 for (
size_t k = 0; k < curr_count; ++k) {
4562 src_field_offset = src_field_offsets[field_indices[k]]++;
4563 src_address[src_field_offset] =
4565 tgt_address[src_field_offset] = curr_tgt_address;
4566 weight[src_field_offset] = weights[k];
4572 for (
size_t j = 0; j < num_src_fields; ++j)
4574 src_address + prev_src_field_offsets[j],
4575 src_field_offsets[j] - prev_src_field_offsets[j],
4576 weight + prev_src_field_offsets[j]);
4578 free(src_field_offsets);
4617 char const * src_grid_name,
char const * tgt_grid_name,
4618 size_t src_grid_size,
size_t tgt_grid_size) {
4620#ifndef YAC_NETCDF_ENABLED
4630 "ERROR(yac_interp_weights_write_to_file): "
4631 "YAC is built without the NetCDF support");
4634 MPI_Comm comm = weights->
comm;
4635 int comm_rank, comm_size;
4646 yac_int min_tgt_global_id, max_tgt_global_id;
4649 &min_tgt_global_id, &max_tgt_global_id, comm);
4656 min_tgt_global_id, max_tgt_global_id,
4657 num_io_ranks, io_owner);
4659 io_owner[i] = io_ranks[io_owner[i]];
4662 size_t io_stencil_count = 0;
4668 &io_stencil_count, &io_stencils);
4672 uint64_t grid_sizes[2] = {(uint64_t)src_grid_size, (uint64_t)tgt_grid_size};
4675 MPI_IN_PLACE, grid_sizes, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
4676 src_grid_size = (size_t)(grid_sizes[0]);
4677 tgt_grid_size = (size_t)(grid_sizes[1]);
4680 yac_mpi_call(MPI_Comm_split(comm, io_flag, comm_rank, &io_comm), comm);
4694 qsort(io_stencils, io_stencil_count,
sizeof(*io_stencils),
4697 yac_mpi_call(MPI_Comm_rank(io_comm, &comm_rank), comm);
4698 yac_mpi_call(MPI_Comm_size(io_comm, &comm_size), comm);
4700 double * fixed_values = NULL;
4701 size_t num_fixed_values = 0;
4703 io_stencils, io_stencil_count, &fixed_values, &num_fixed_values, io_comm);
4704 size_t num_src_fields =
weights->num_src_fields;
4705 size_t num_weights_per_link =
4708 size_t * size_t_buffer =
4709 xmalloc(2 * (num_fixed_values + num_src_fields) *
sizeof(*size_t_buffer));
4710 size_t * num_tgt_per_fixed_value = size_t_buffer;
4711 size_t * num_links_per_src_field = size_t_buffer + num_fixed_values;
4712 size_t * fixed_offsets = size_t_buffer + num_fixed_values + num_src_fields;
4713 size_t * link_offsets = size_t_buffer + 2 * num_fixed_values + num_src_fields;
4715 size_t num_fixed_tgt = 0;
4716 size_t num_links = 0;
4718 io_stencils, io_stencil_count, num_fixed_values, fixed_values,
4719 num_tgt_per_fixed_value, &num_fixed_tgt, num_src_fields,
4720 num_links_per_src_field, &num_links);
4723 num_fixed_values, num_tgt_per_fixed_value,
4724 num_src_fields, num_links_per_src_field,
4725 fixed_offsets, link_offsets, io_comm);
4727 if (comm_rank == comm_size - 1) {
4729 size_t * total_num_tgt_per_fixed_value =
4730 xmalloc(num_fixed_values *
sizeof(*total_num_tgt_per_fixed_value));
4731 for (
size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
4732 total_num_tgt_per_fixed_value[i] =
4733 fixed_offsets[i] + num_tgt_per_fixed_value[i] - accu;
4734 accu += total_num_tgt_per_fixed_value[i];
4736 size_t total_num_links = link_offsets[num_src_fields-1] +
4737 num_links_per_src_field[num_src_fields-1];
4739 size_t * total_num_links_per_src_field =
4740 xmalloc(num_src_fields *
sizeof(*total_num_links_per_src_field));
4741 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
4742 total_num_links_per_src_field[i] =
4743 link_offsets[i] + num_links_per_src_field[i] - accu;
4744 accu += total_num_links_per_src_field[i];
4748 filename, src_grid_name, tgt_grid_name,
4749 num_fixed_values, fixed_values, total_num_tgt_per_fixed_value,
4750 total_num_links, num_weights_per_link,
4751 num_src_fields, total_num_links_per_src_field,
4753 src_grid_size, tgt_grid_size);
4755 free(total_num_links_per_src_field);
4756 free(total_num_tgt_per_fixed_value);
4767 yac_nc_open(filename, NC_WRITE | NC_SHARE, &ncid);
4769 if (num_fixed_tgt > 0) {
4771 int * tgt_address_fixed =
4772 xmalloc(num_fixed_tgt *
sizeof(*tgt_address_fixed));
4776 int var_dst_add_fixed_id;
4780 for (
size_t i = 0, offset = 0; i < num_fixed_values; ++i) {
4782 if (num_tgt_per_fixed_value[i] == 0)
continue;
4784 size_t start[1] = {fixed_offsets[i]};
4785 size_t count[1] = {num_tgt_per_fixed_value[i]};
4788 ncid, var_dst_add_fixed_id, start, count, tgt_address_fixed + offset));
4789 offset += num_tgt_per_fixed_value[i];
4792 free(tgt_address_fixed);
4795 if (num_links > 0) {
4797 int * src_address_link =
xmalloc(num_links *
sizeof(*src_address_link));
4798 int * tgt_address_link =
xmalloc(num_links *
sizeof(*tgt_address_link));
4799 double * w =
xmalloc(num_links * num_weights_per_link *
sizeof(*w));
4801 io_stencils + num_fixed_tgt, io_stencil_count - num_fixed_tgt,
4802 num_links_per_src_field, num_src_fields,
4803 src_address_link, tgt_address_link, w);
4805 int var_src_add_id, var_dst_add_id, var_weight_id;
4810 for (
size_t i = 0, offset = 0; i < num_src_fields; ++i) {
4812 if (num_links_per_src_field[i] == 0)
continue;
4814 size_t start[2] = {link_offsets[i], 0};
4815 size_t count[2] = {num_links_per_src_field[i], num_weights_per_link};
4819 ncid, var_src_add_id, start, count, src_address_link + offset));
4822 ncid, var_dst_add_id, start, count, tgt_address_link + offset));
4825 ncid, var_weight_id, start, count,
4826 w + num_weights_per_link * offset));
4828 offset += num_links_per_src_field[i];
4832 free(tgt_address_link);
4833 free(src_address_link);
4842 free(size_t_buffer);