12#define WEIGHT_TOL (1e-9)
23#define YAC_YAXT_EXCHANGER_STR "YAC_YAXT_EXCHANGER"
64 size_t * field_indices;
174 xmalloc((
size_t)count *
sizeof(*point_infos));
175 memcpy(point_infos, point.data.data.multi,
176 (
size_t)count *
sizeof(*point_infos));
177 point.data.data.multi = point_infos;
188 for (
size_t i = 0; i < count; ++i) {
189 int curr_count = points_from[i].
data.
count;
190 points_to[i] = points_from[i];
191 if (curr_count > 1) {
194 point_info_buffer, points_from[i].data.
data.
multi,
195 (
size_t)curr_count *
sizeof(*point_info_buffer));
196 point_info_buffer += curr_count;
199 *point_info_buffer_ = point_info_buffer;
205 size_t point_info_buffer_size = 0;
206 for (
size_t i = 0; i <
count; ++i)
208 point_info_buffer_size += (size_t)(
points[i].
data.count);
212 sizeof(*points_copy));
218 points_copy->
data,
points, count, &point_info_buffer);
226 size_t point_info_buffer_size = 0;
227 size_t total_count = 0;
228 for (
size_t i = 0; i < num_fields; ++i) {
229 total_count += counts[i];
230 for (
size_t j = 0; j < counts[i]; ++j) {
232 point_info_buffer_size += (size_t)(
points[i][j].
data.count);
238 sizeof(*points_copy));
239 points_copy->
data =
xmalloc(total_count *
sizeof(*(points_copy->
data)));
240 points_copy->
count = total_count;
243 for (
size_t i = 0, k = 0; i < num_fields; ++i) {
244 for (
size_t j = 0; j < counts[i]; ++j, ++k) {
245 int curr_count =
points[i][j].data.count;
247 if (curr_count > 1) {
250 point_info_buffer,
points[i][j].data.data.multi,
251 (
size_t)curr_count *
sizeof(*point_info_buffer));
252 point_info_buffer += curr_count;
261 double fixed_value) {
264 size_t stencils_array_size =
weights->stencils_array_size;
265 size_t stencils_size =
weights->stencils_size;
269 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
277 weights->stencils_array_size = stencils_array_size;
278 weights->stencils_size = stencils_size;
285 if (tgts->
count == 0)
return;
289 for (
size_t i = 0, k = 0; (i < tgts->
count) && !pack_flag; ++i)
290 for (
size_t j = 0; (j < num_src_per_tgt[i]) && !pack_flag; ++j, ++k)
295 for (
size_t i = 0, k = 0, l = 0;
296 i < tgts->
count; i++) {
298 size_t curr_count = num_src_per_tgt[i];
300 for (
size_t j = 0; j < curr_count; j++, k++) {
303 num_src_per_tgt[i]--;
314 if ((curr_count != 0) && (num_src_per_tgt[i] == 0)) {
320 num_src_per_tgt[i] = 1;
328 int flag_weight_one = 1;
329 int flag_count_one = 1;
330 for (
size_t i = 0, j = 0;
331 (i < tgts->
count) && (flag_weight_one || flag_count_one); ++i) {
333 size_t curr_count = num_src_per_tgt[i];
334 flag_count_one &= curr_count == 1;
336 for (
size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
337 flag_weight_one &= fabs(w[j] - 1.0) <
WEIGHT_TOL;
341 if (flag_weight_one) {
352 size_t stencils_array_size =
weights->stencils_array_size;
353 size_t stencils_size =
weights->stencils_size;
357 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
359 size_t curr_num_src = num_src_per_tgt[i];
362 if (curr_num_src == 0) {
367 double * curr_weights =
368 xmalloc(curr_num_src *
sizeof(*curr_weights));
375 memcpy(curr_weights, w, curr_num_src *
sizeof(*curr_weights));
377 srcs += curr_num_src;
382 weights->stencils_array_size = stencils_array_size;
383 weights->stencils_size = stencils_size;
391 if (tgts->
count == 0)
return;
395 int flag_count_one = 1;
396 for (
size_t i = 0; i < tgts->
count; ++i) {
397 if (num_src_per_tgt[i] != 1) {
403 if (flag_count_one) {
410 size_t stencils_array_size =
weights->stencils_array_size;
411 size_t stencils_size =
weights->stencils_size;
415 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
417 size_t curr_num_src = num_src_per_tgt[i];
419 stencils[stencils_size].
type =
SUM;
425 srcs += curr_num_src;
429 weights->stencils_array_size = stencils_array_size;
430 weights->stencils_size = stencils_size;
438 if (tgts->
count == 0)
return;
441 size_t stencils_array_size =
weights->stencils_array_size;
442 size_t stencils_size =
weights->stencils_size;
446 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
454 weights->stencils_array_size = stencils_array_size;
455 weights->stencils_size = stencils_size;
460 size_t * src_field_indices,
struct remote_point ** srcs_per_field,
461 size_t num_src_fields) {
463 if (tgts->
count == 0)
return;
465 if (num_src_fields == 1) {
471 size_t stencils_array_size =
weights->stencils_array_size;
472 size_t stencils_size =
weights->stencils_size;
475 stencils, stencils_array_size, stencils_size + tgts->
count);
476 stencils += stencils_size;
478 size_t srcs_offsets[num_src_fields];
479 memset(srcs_offsets, 0, num_src_fields *
sizeof(srcs_offsets[0]));
481 for (
size_t i = 0; i < tgts->
count; ++i) {
483 size_t src_field_idx = src_field_indices[i];
488 srcs_per_field[src_field_idx][srcs_offsets[src_field_idx]++]);
493 weights->stencils_array_size = stencils_array_size;
499 size_t * num_src_per_field_per_tgt,
struct remote_point ** srcs_per_field,
500 size_t num_src_fields) {
502 if (tgts->
count == 0)
return;
504 if (num_src_fields == 1) {
506 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0]);
512 int flag_count_one = 1;
513 for (
size_t i = 0, k = 0; i < tgts->
count; ++i) {
515 for (
size_t j = 0; j < num_src_fields; ++j, ++k)
516 count += num_src_per_field_per_tgt[k];
523 if (flag_count_one) {
525 size_t * src_field_indices =
528 for (
size_t i = 0, k = 0; i < tgts->
count; ++i)
529 for (
size_t j = 0; j < num_src_fields; ++j, ++k)
530 if (num_src_per_field_per_tgt[k])
531 src_field_indices[i] = j;
534 weights, tgts, src_field_indices, srcs_per_field, num_src_fields);
536 free(src_field_indices);
539 struct remote_point * curr_srcs_per_field[num_src_fields];
540 memcpy(curr_srcs_per_field, srcs_per_field,
541 num_src_fields *
sizeof(*srcs_per_field));
544 size_t stencils_array_size =
weights->stencils_array_size;
545 size_t stencils_size =
weights->stencils_size;
548 stencils, stencils_array_size, stencils_size + tgts->
count);
550 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
552 size_t * curr_num_src_per_src_field =
553 num_src_per_field_per_tgt + i * num_src_fields;
554 size_t curr_num_src = 0;
555 for (
size_t j = 0; j < num_src_fields; ++j)
556 curr_num_src += curr_num_src_per_src_field[j];
564 for (
size_t j = 0, l = 0; j < num_src_fields; ++j) {
565 size_t curr_num_src = curr_num_src_per_src_field[j];
566 for (
size_t k = 0; k < curr_num_src; ++k, ++l) {
572 curr_srcs_per_field, curr_num_src_per_src_field, num_src_fields);
574 for (
size_t j = 0; j < num_src_fields; ++j)
575 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
579 weights->stencils_array_size = stencils_array_size;
580 weights->stencils_size = stencils_size;
586 size_t * num_src_per_field_per_tgt,
struct remote_point ** srcs_per_field,
587 double * w,
size_t num_src_fields) {
589 if (tgts->
count == 0)
return;
591 if (num_src_fields == 1) {
593 weights, tgts, num_src_per_field_per_tgt, srcs_per_field[0], w);
599 int flag_weight_one = 1;
600 for (
size_t i = 0, j = 0;
601 (i < tgts->
count) && flag_weight_one; ++i) {
603 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
607 num_src_per_field_per_tgt[i * num_src_fields + src_field_idx];
609 for (
size_t k = 0; (k < curr_count) && flag_weight_one; ++k, ++j)
610 flag_weight_one &= fabs(w[j] - 1.0) < 1e-12;
615 if (flag_weight_one) {
618 weights, tgts, num_src_per_field_per_tgt, srcs_per_field, num_src_fields);
622 struct remote_point * curr_srcs_per_field[num_src_fields];
623 memcpy(curr_srcs_per_field, srcs_per_field,
624 num_src_fields *
sizeof(*srcs_per_field));
627 size_t stencils_array_size =
weights->stencils_array_size;
628 size_t stencils_size =
weights->stencils_size;
631 stencils, stencils_array_size, stencils_size + tgts->
count);
633 for (
size_t i = 0; i < tgts->
count; ++i, ++stencils_size) {
635 size_t * curr_num_src_per_src_field =
636 num_src_per_field_per_tgt + i * num_src_fields;
637 size_t curr_num_weights = 0;
638 for (
size_t j = 0; j < num_src_fields; ++j)
639 curr_num_weights += curr_num_src_per_src_field[j];
640 double * curr_weights =
641 xmalloc(curr_num_weights *
sizeof(*curr_weights));
648 for (
size_t j = 0, l = 0; j < num_src_fields; ++j) {
649 size_t curr_num_src = curr_num_src_per_src_field[j];
650 for (
size_t k = 0; k < curr_num_src; ++k, ++l)
field_indices[l] = j;
655 memcpy(curr_weights, w, curr_num_weights *
sizeof(*curr_weights));
657 for (
size_t j = 0; j < num_src_fields; ++j)
658 curr_srcs_per_field[j] += curr_num_src_per_src_field[j];
659 w += curr_num_weights;
663 weights->stencils_array_size = stencils_array_size;
664 weights->stencils_size = stencils_size;
686 MPI_Datatype fixed_stencil_dt;
687 int array_of_blocklengths[] = {1, 1};
688 const MPI_Aint array_of_displacements[] =
689 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
value) -
690 (MPI_Aint)(intptr_t)(
const void *)&dummy,
691 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
orig_pos) -
692 (MPI_Aint)(intptr_t)(
const void *)&dummy};
693 const MPI_Datatype array_of_types[] = {MPI_DOUBLE,
YAC_MPI_SIZE_T};
695 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
696 array_of_types, &fixed_stencil_dt), comm);
701 void * interp,
double fixed_value,
size_t count,
size_t * tgt_pos) {
708 void * interp,
double fixed_value,
size_t count,
size_t * tgt_pos) {
729 size_t total_num_fixed_tgt = 0;
735 (total_num_fixed_tgt + count) *
739 tgt_pos, count *
sizeof(*tgt_pos));
745 MPI_Comm comm,
size_t count,
757 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
759 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
762 for (
size_t i = 0; i < count; ++i) {
766 (&(fixed_stencils[i].tgt.
data.
data.single)):
768 for (
int j = 0; j < curr_count; ++j)
769 sendcounts[curr_point_infos[j].
rank]++;
773 1, sendcounts, recvcounts, sdispls, rdispls, comm);
775 size_t send_buffer_size =
776 sdispls[comm_size] + sendcounts[comm_size - 1];
777 size_t recv_buffer_size =
778 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
781 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*buffer));
786 for (
size_t i = 0; i < count; ++i) {
790 (&(fixed_stencils[i].tgt.
data.
data.single)):
793 for (
int j = 0; j < curr_count; ++j) {
794 size_t pos = sdispls[curr_point_infos[j].
rank + 1]++;
796 send_buffer[pos].
orig_pos = curr_point_infos[j].orig_pos;
805 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
806 sizeof(*send_buffer), stencil_fixed_dt, comm,
807 "yac_interp_weights_redist_fixed", __LINE__);
812 if (recv_buffer_size == 0) {
822 qsort(recv_buffer, recv_buffer_size,
sizeof(*recv_buffer),
825 size_t * tgt_pos =
xmalloc(recv_buffer_size *
sizeof(*tgt_pos));
826 for (
size_t i = 0; i < recv_buffer_size; ++i)
827 tgt_pos[i] = (
size_t)(recv_buffer[i].
orig_pos);
829 size_t offset = 0, i = 0;
830 while (offset < recv_buffer_size) {
831 double fixed_value = recv_buffer[i].
value;
832 while ((i < recv_buffer_size) && (fixed_value == recv_buffer[i].
value)) ++i;
833 size_t curr_count = i - offset;
835 interp, fixed_value, curr_count, tgt_pos + offset);
847 if (
src.count == 1)
return src.data.single;
849 int min_rank = INT_MAX;
850 size_t min_rank_idx = SIZE_MAX;
851 for (
int i = 0; i <
src.count; ++i) {
852 if (
src.data.multi[i].rank < min_rank) {
853 min_rank =
src.data.multi[i].rank;
858 return src.data.multi[min_rank_idx];
862 struct Xt_redist_msg * msgs,
size_t count, MPI_Comm comm) {
863 for (
size_t i = 0; i < count; ++i) {
864 MPI_Datatype * dt = &(msgs[i].datatype);
865 if (*dt != MPI_DATATYPE_NULL)
yac_mpi_call(MPI_Type_free(dt), comm);
884 size_t * src_orig_poses,
size_t * sendcounts,
886 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
891 size_t nsend = 0, nrecv = 0;
892 size_t max_buffer_size = 0;
893 for (
int i = 0; i < comm_size; ++i) {
894 nsend += sendcounts[i] > 0;
895 nrecv += recvcounts[i] > 0;
896 if (max_buffer_size < sendcounts[i]) max_buffer_size = sendcounts[i];
897 if (max_buffer_size < recvcounts[i]) max_buffer_size = recvcounts[i];
900 size_t total_num_msg = nsend + nrecv;
902 struct Xt_redist_msg * msgs_buffer =
903 xmalloc(total_num_msg *
sizeof(*msgs_buffer));
904 struct Xt_redist_msg * send_msgs = msgs_buffer;
905 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
907 int * pos_buffer =
xmalloc((
size_t)max_buffer_size *
sizeof(*pos_buffer));
912 for (
int i = 0; i < comm_size; ++i) {
913 if (recvcounts[i] > 0) {
914 for (
size_t j = 0; j < recvcounts[i]; ++j)
915 pos_buffer[j] = (
int)tgt_stencils[j].
orig_pos;
916 tgt_stencils += recvcounts[i];
917 recv_msgs[nrecv].rank = i;
918 recv_msgs[nrecv].datatype =
919 xt_mpi_generate_datatype(pos_buffer, recvcounts[i], MPI_DOUBLE, comm);
922 if (sendcounts[i] > 0) {
923 for (
size_t j = 0; j < sendcounts[i]; ++j)
924 pos_buffer[j] = (
int)src_orig_poses[j];
925 src_orig_poses += sendcounts[i];
926 send_msgs[nsend].rank = i;
927 send_msgs[nsend].datatype =
928 xt_mpi_generate_datatype(pos_buffer, sendcounts[i], MPI_DOUBLE, comm);
940 if (total_num_msg > 0) {
944 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
947 xmalloc(2 * total_num_msg *
sizeof(*rank_buffer));
948 int * orig_ranks = rank_buffer;
949 int * split_ranks = rank_buffer + total_num_msg;
951 for (
size_t i = 0; i < total_num_msg; ++i)
952 orig_ranks[i] = msgs_buffer[i].rank;
954 MPI_Group orig_group, split_group;
956 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
961 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
962 split_group, split_ranks), split_comm);
964 for (
size_t i = 0; i < total_num_msg; ++i)
965 msgs_buffer[i].rank = split_ranks[i];
974 xt_redist_single_array_base_custom_new(
975 nsend, nrecv, send_msgs, recv_msgs, split_comm, redist_config);
978 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
1004 size_t * src_orig_poses,
size_t * sendcounts,
1006 size_t * recvcounts,
size_t num_src_fields, MPI_Comm comm,
1007 Xt_config redist_config) {
1012 size_t nsends[num_src_fields], nrecvs[num_src_fields];
1013 size_t max_buffer_size = 0;
1014 memset(nsends, 0, num_src_fields *
sizeof(nsends[0]));
1015 memset(nrecvs, 0, num_src_fields *
sizeof(nrecvs[0]));
1016 for (
int i = 0; i < comm_size; ++i) {
1017 for (
size_t j = 0; j < num_src_fields; ++j) {
1018 size_t idx = (size_t)i * num_src_fields + j;
1019 if (sendcounts[idx] > 0) nsends[j]++;
1020 if (recvcounts[idx] > 0) nrecvs[j]++;
1021 if (max_buffer_size < sendcounts[idx]) max_buffer_size = sendcounts[idx];
1022 if (max_buffer_size < recvcounts[idx]) max_buffer_size = recvcounts[idx];
1026 size_t nsend = 0, nrecv = 0;
1027 size_t send_offsets[num_src_fields];
1028 size_t recv_offsets[num_src_fields];
1029 for (
size_t i = 0; i < num_src_fields; ++i) {
1030 send_offsets[i] = nsend;
1031 recv_offsets[i] = nrecv;
1036 size_t total_num_msg = nsend + nrecv;
1038 struct Xt_redist_msg * msgs_buffer =
1039 xmalloc(total_num_msg *
sizeof(*msgs_buffer));
1040 struct Xt_redist_msg * send_msgs = msgs_buffer;
1041 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
1043 int * pos_buffer =
xmalloc(max_buffer_size *
sizeof(*pos_buffer));
1046 for (
int i = 0; i < comm_size; ++i) {
1047 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
1049 size_t idx = (size_t)i * num_src_fields + src_field_idx;
1050 if (recvcounts[idx] > 0) {
1051 for (
size_t j = 0; j < recvcounts[idx]; ++j)
1052 pos_buffer[j] = (
int)tgt_stencils[j].
orig_pos;
1053 tgt_stencils += recvcounts[idx];
1054 recv_msgs[recv_offsets[src_field_idx]].rank = i;
1055 recv_msgs[recv_offsets[src_field_idx]].datatype =
1056 xt_mpi_generate_datatype(
1057 pos_buffer, recvcounts[idx], MPI_DOUBLE, comm);
1058 recv_offsets[src_field_idx]++;
1060 if (sendcounts[idx] > 0) {
1061 for (
size_t j = 0; j < sendcounts[idx]; ++j)
1062 pos_buffer[j] = (
int)src_orig_poses[j];
1063 src_orig_poses += sendcounts[idx];
1064 send_msgs[send_offsets[src_field_idx]].rank = i;
1065 send_msgs[send_offsets[src_field_idx]].datatype =
1066 xt_mpi_generate_datatype(
1067 pos_buffer, sendcounts[idx], MPI_DOUBLE, comm);
1068 send_offsets[src_field_idx]++;
1075 Xt_redist * redists;
1076 MPI_Comm split_comm;
1080 if (total_num_msg > 0) {
1084 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
1087 xmalloc(2 * total_num_msg *
sizeof(*rank_buffer));
1088 int * orig_ranks = rank_buffer;
1089 int * split_ranks = rank_buffer + total_num_msg;
1091 for (
size_t i = 0; i < total_num_msg; ++i)
1092 orig_ranks[i] = msgs_buffer[i].rank;
1094 MPI_Group orig_group, split_group;
1096 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
1101 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
1102 split_group, split_ranks), split_comm);
1104 for (
size_t i = 0; i < total_num_msg; ++i)
1105 msgs_buffer[i].rank = split_ranks[i];
1113 redists =
xmalloc(num_src_fields *
sizeof(*redists));
1114 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
1116 redists[src_field_idx] =
1117 xt_redist_single_array_base_custom_new(
1118 nsends[src_field_idx], nrecvs[src_field_idx],
1119 send_msgs, recv_msgs, split_comm, redist_config);
1120 send_msgs += nsends[src_field_idx];
1121 recv_msgs += nrecvs[src_field_idx];
1125 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
1138 MPI_Datatype direct_stencil_dt;
1139 int array_of_blocklengths[] = {1, 1};
1140 const MPI_Aint array_of_displacements[] =
1141 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
src) -
1142 (MPI_Aint)(intptr_t)(
const void *)&dummy,
1143 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
orig_pos) -
1144 (MPI_Aint)(intptr_t)(
const void *)&dummy};
1145 MPI_Datatype array_of_types[] =
1148 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
1149 array_of_types, &direct_stencil_dt), comm);
1150 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1159 if (ret)
return ret;
1168 void * interp,
size_t * src_orig_poses,
size_t * sendcounts,
1170 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
1175 src_orig_poses, sendcounts, tgt_stencils, recvcounts,
1176 comm, redist_config);
1180 if (redist != NULL) xt_redist_delete(redist);
1184 MPI_Comm comm,
size_t count,
1189 size_t *, MPI_Comm, Xt_config), Xt_config redist_config) {
1199 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1201 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1204 for (
size_t i = 0; i < count; ++i) {
1208 (&(direct_stencils[i].tgt.
data.
data.single)):
1210 for (
int j = 0; j < curr_count; ++j)
1211 sendcounts[curr_point_info[j].
rank]++;
1215 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1217 size_t send_buffer_size =
1218 sdispls[comm_size] + sendcounts[comm_size - 1];
1219 size_t recv_buffer_size =
1220 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1221 size_t tgt_count = recv_buffer_size;
1224 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*stencil_buffer));
1226 stencil_buffer + recv_buffer_size;
1230 for (
size_t i = 0; i < count; ++i) {
1234 (&(direct_stencils[i].tgt.
data.
data.single)):
1238 for (
int j = 0; j < curr_count; ++j) {
1239 size_t pos = sdispls[curr_point_infos[j].
rank + 1]++;
1240 send_stencil_buffer[pos].
src =
src;
1241 send_stencil_buffer[pos].
orig_pos = curr_point_infos[j].orig_pos;
1250 send_stencil_buffer, sendcounts, sdispls,
1251 recv_stencil_buffer, recvcounts, rdispls,
1252 sizeof(*stencil_buffer), stencil_direct_dt, comm,
1253 "yac_interp_weights_redist_direct", __LINE__);
1258 qsort(recv_stencil_buffer, tgt_count,
sizeof(*recv_stencil_buffer),
1265 memset(sendcounts, 0, (
size_t)comm_size *
sizeof(*sendcounts));
1267 for (
size_t i = 0; i < tgt_count; ++i)
1268 sendcounts[recv_stencil_buffer[i].
src.rank]++;
1271 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1273 send_buffer_size = sdispls[comm_size] + sendcounts[comm_size - 1];
1274 recv_buffer_size = rdispls[comm_size - 1] + recvcounts[comm_size - 1];
1276 size_t * orig_pos_buffer =
1277 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*orig_pos_buffer));
1278 size_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1279 size_t * recv_orig_pos_buffer = orig_pos_buffer;
1281 for (
size_t i = 0; i < tgt_count; ++i)
1282 send_orig_pos_buffer[sdispls[recv_stencil_buffer[i].
src.rank + 1]++] =
1286 send_orig_pos_buffer, sendcounts, sdispls,
1287 recv_orig_pos_buffer, recvcounts, rdispls,
1289 "yac_interp_weights_redist_direct", __LINE__);
1296 interp, recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1297 comm, redist_config);
1300 free(orig_pos_buffer);
1301 free(stencil_buffer);
1307 MPI_Datatype direct_stencil_mf_dt;
1308 int array_of_blocklengths[] = {1, 1, 1};
1309 const MPI_Aint array_of_displacements[] =
1310 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
src) -
1311 (MPI_Aint)(intptr_t)(
const void *)&dummy,
1313 (MPI_Aint)(intptr_t)(
const void *)&dummy,
1314 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
orig_pos) -
1315 (MPI_Aint)(intptr_t)(
const void *)&dummy};
1316 MPI_Datatype array_of_types[] =
1319 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
1320 array_of_types, &direct_stencil_mf_dt), comm);
1321 yac_mpi_call(MPI_Type_free(&(array_of_types[0])), comm);
1330 if (ret)
return ret;
1337 if (ret)
return ret;
1346 void * interp,
size_t num_src_fields,
size_t * src_orig_poses,
1348 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
1351 Xt_redist * redists =
1353 src_orig_poses, sendcounts, tgt_stencils, recvcounts,
1354 (
size_t)num_src_fields, comm, redist_config);
1359 if (redists != NULL) {
1360 for (
size_t i = 0; i < (size_t)num_src_fields; ++i)
1361 xt_redist_delete(redists[i]);
1367 struct yac_src_field_exchange_data * src_field_exchange_data) {
1369 src_field_exchange_data->send.msg = NULL;
1370 src_field_exchange_data->send.num_msg = 0;
1371 src_field_exchange_data->recv.msg = NULL;
1372 src_field_exchange_data->recv.num_msg = 0;
1392 i < num_src_fields; ++i) {
1421 size_t num_src_fields,
size_t tgt_count,
size_t * tgt_idx,
1422 size_t * num_src_per_tgt,
double * weights,
size_t *
src_field_idx,
1423 size_t * src_idx,
size_t * src_field_buffer_size) {
1426 size_t num_weights = 0;
1427 if (num_src_per_tgt) {
1428 for (
size_t i = 0; i < tgt_count; ++i) num_weights += num_src_per_tgt[i];
1430 num_weights = tgt_count;
1434 size_t weights_offset = 0;
1435 for (
size_t i = 0; i < interp_weights_data->
num_wgt_tgt; ++i)
1446 tgt_idx, tgt_count *
sizeof(*tgt_idx));
1454 if (num_src_per_tgt) {
1457 num_src_per_tgt, tgt_count *
sizeof(*num_src_per_tgt));
1459 for (
size_t i = 0, j = interp_weights_data->
num_wgt_tgt; i < tgt_count;
1465 interp_weights_data->
weights =
1467 interp_weights_data->
weights, (weights_offset + num_weights) *
1468 sizeof(*(interp_weights_data->
weights)));
1471 interp_weights_data->
weights + weights_offset,
1472 weights, num_weights *
sizeof(*weights));
1474 for (
size_t i = 0, j = weights_offset; i < num_weights; ++i, ++j)
1475 interp_weights_data->
weights[j] = 1.0;
1481 interp_weights_data->
src_field_idx, (weights_offset + num_weights) *
1488 for (
size_t i = 0, j = weights_offset; i < num_weights;
1500 interp_weights_data->
src_idx =
1502 interp_weights_data->
src_idx, (weights_offset + num_weights) *
1503 sizeof(*(interp_weights_data->
src_idx)));
1504 for (
size_t i = 0, j = weights_offset; i < num_weights; ++i, ++j)
1505 interp_weights_data->
src_idx[j] =
1509 for (
size_t i = 0; i < num_src_fields; ++i)
1516static struct yac_src_field_exchange_data_msg *
1518 struct yac_src_field_exchange_data_msgs * msgs,
int rank) {
1522 for (; msg_idx < msgs->num_msg; ++msg_idx)
1523 if (msgs->msg[msg_idx].rank == rank)
break;
1526 if (msg_idx == msgs->num_msg) {
1528 msgs->msg =
xrealloc(msgs->msg, msgs->num_msg *
sizeof(*msgs->msg));
1529 msgs->msg[msg_idx] =
1530 (
struct yac_src_field_exchange_data_msg)
1531 {.rank = rank, .pos = NULL, .count = 0};
1534 return msgs->msg + msg_idx;
1547 struct yac_src_field_exchange_data_msgs * msgs,
1548 int rank,
size_t count,
size_t * pos,
size_t offset) {
1552 struct yac_src_field_exchange_data_msg * msg =
1557 xrealloc(msg->pos, ((
size_t)msg->count + count) *
sizeof(*msg->pos));
1558 for (
size_t i = 0; i < count; ++i, ++msg->count)
1559 msg->pos[msg->count] = pos[i] + offset;
1581 struct yac_src_field_exchange_data * src_field_exchange_data,
1582 size_t num_src_fields, MPI_Comm comm,
1583 size_t * send_msg_sizes,
size_t * send_pos,
1584 size_t * recv_msg_sizes,
size_t * recv_pos,
1585 size_t * recv_offsets) {
1591 for (
int rank = 0; rank < comm_size; ++rank) {
1593 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
1594 ++src_field_idx, ++msg_idx) {
1596 struct yac_src_field_exchange_data * curr_src_field_exchange_data =
1597 src_field_exchange_data + src_field_idx;
1600 size_t send_msg_size = send_msg_sizes[msg_idx];
1601 if (send_msg_sizes[msg_idx] > 0) {
1604 &curr_src_field_exchange_data->send, rank, send_msg_size,
1606 send_pos += send_msg_size;
1610 size_t recv_msg_size = recv_msg_sizes[msg_idx];
1611 if (recv_msg_size > 0) {
1614 &curr_src_field_exchange_data->recv, rank, recv_msg_size,
1615 recv_pos, recv_offsets[src_field_idx]);
1616 recv_pos += recv_msg_size;
1623 void * interp,
size_t * src_orig_poses,
size_t * sendcounts,
1625 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
1635 size_t num_src_fields = 1;
1638 size_t total_sendcounts = 0, total_recvcounts = 0;
1639 for (
int i = 0; i < comm_size; ++i) {
1640 total_sendcounts += sendcounts[i];
1641 total_recvcounts += recvcounts[i];
1644 size_t * size_t_buffer =
1646 (
MAX(total_sendcounts, total_recvcounts) + total_recvcounts) *
1647 sizeof(*size_t_buffer));
1648 size_t * send_pos = size_t_buffer;
1649 size_t * recv_pos = size_t_buffer + total_sendcounts;
1651 for (
size_t i = 0; i < total_sendcounts; ++i)
1652 send_pos[i] = (
size_t)src_orig_poses[i];
1653 for (
size_t i = 0; i < total_recvcounts; ++i)
1659 sendcounts, send_pos, recvcounts, recv_pos,
1662 size_t * tgt_idx = size_t_buffer;
1663 size_t * src_idx = size_t_buffer + total_recvcounts;
1665 for (
size_t i = 0; i < total_recvcounts; ++i) {
1666 tgt_idx[i] = (size_t)(tgt_stencils[i].orig_pos);
1672 size_t * num_src_per_tgt = NULL;
1673 double * weights = NULL;
1674 size_t * src_field_idx = NULL;
1675 size_t src_field_buffer_size = total_recvcounts;
1678 tgt_idx, num_src_per_tgt, weights, src_field_idx, src_idx,
1679 &src_field_buffer_size);
1681 free(size_t_buffer);
1685 void * interp,
size_t num_src_fields,
size_t * src_orig_poses,
1687 size_t * recvcounts, MPI_Comm comm, Xt_config redist_config) {
1699 size_t total_sendcounts = 0, total_recvcounts = 0;
1700 for (
size_t rank = 0, idx = 0; rank < (size_t)comm_size; ++rank) {
1701 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
1702 ++src_field_idx, ++idx) {
1703 total_sendcounts += sendcounts[idx];
1704 total_recvcounts += recvcounts[idx];
1708 size_t * size_t_buffer =
1710 (
MAX(total_sendcounts, total_recvcounts) + 2 * total_recvcounts) *
1711 sizeof(*size_t_buffer));
1712 size_t * send_pos = size_t_buffer;
1713 size_t * recv_pos = size_t_buffer + total_sendcounts;
1714 size_t src_field_buffer_size[num_src_fields];
1717 src_field_buffer_size, 0,
1718 num_src_fields *
sizeof(src_field_buffer_size[0]));
1720 for (
size_t i = 0; i < total_sendcounts; ++i)
1721 send_pos[i] = (
size_t)src_orig_poses[i];
1722 for (
size_t rank = 0, idx = 0, recv_pos_idx = 0; rank < (size_t)comm_size;
1724 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
1725 ++src_field_idx, ++idx) {
1726 if (recvcounts[idx] > 0) {
1727 for (
size_t i = 0; i < recvcounts[idx]; ++i, ++recv_pos_idx)
1728 recv_pos[recv_pos_idx] = src_field_buffer_size[src_field_idx]++;
1736 sendcounts, send_pos, recvcounts, recv_pos,
1739 size_t * tgt_idx = size_t_buffer;
1740 size_t * src_idx = size_t_buffer + total_recvcounts;
1741 size_t * src_field_idx = size_t_buffer + 2 * total_recvcounts;
1743 src_field_buffer_size, 0,
1744 num_src_fields *
sizeof(src_field_buffer_size[0]));
1746 for (
size_t rank = 0, idx = 0, k = 0; rank < (size_t)comm_size; ++rank) {
1747 for (
size_t j = 0; j < num_src_fields; ++j, ++idx) {
1749 if (recvcounts[idx] > 0) {
1750 for (
size_t i = 0; i < recvcounts[idx]; ++i, ++k) {
1751 tgt_idx[k] = (size_t)(tgt_stencils[k].orig_pos);
1752 src_idx[k] = src_field_buffer_size[j]++;
1753 src_field_idx[k] = j;
1761 size_t * num_src_per_tgt = NULL;
1762 double * weights = NULL;
1765 tgt_idx, num_src_per_tgt, weights, src_field_idx, src_idx,
1766 src_field_buffer_size);
1768 free(size_t_buffer);
1772 MPI_Comm comm,
size_t count,
1776 void *,
size_t,
size_t *,
size_t *,
1778 Xt_config redist_config) {
1786 size_t num_src_fields = 0;
1787 for (
size_t i = 0; i < count; ++i) {
1789 if (src_field_idx >= num_src_fields) num_src_fields = src_field_idx + 1;
1793 MPI_IN_PLACE, &num_src_fields, 1,
YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
1798 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
1800 (
size_t)num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1801 size_t * size_t_buffer =
1802 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
1803 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1804 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1805 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1806 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1810 for (
size_t i = 0; i < count; ++i) {
1814 (&(direct_mf_stencils[i].tgt.
data.
data.single)):
1817 for (
int j = 0; j < curr_count; ++j)
1819 (
size_t)(curr_point_info[j].
rank) * num_src_fields + src_field_idx]++;
1823 (
size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1825 size_t saccu = 0, raccu = 0;
1826 for (
int i = 0; i < comm_size; ++i) {
1827 total_sdispls[i] = saccu;
1828 total_rdispls[i] = raccu;
1829 total_sendcounts[i] = 0;
1830 total_recvcounts[i] = 0;
1831 for (
size_t j = 0; j < num_src_fields; ++j) {
1832 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1833 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1835 saccu += total_sendcounts[i];
1836 raccu += total_recvcounts[i];
1839 size_t send_buffer_size = total_sdispls[comm_size - 1] +
1840 total_sendcounts[comm_size - 1];
1841 size_t recv_buffer_size = total_rdispls[comm_size - 1] +
1842 total_recvcounts[comm_size - 1];
1843 size_t tgt_count = recv_buffer_size;
1846 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*stencil_buffer));
1848 stencil_buffer + recv_buffer_size;
1852 for (
size_t i = 0; i < count; ++i) {
1856 (&(direct_mf_stencils[i].tgt.
data.
data.single)):
1861 for (
int j = 0; j < curr_count; ++j) {
1863 sdispls[(size_t)(curr_point_infos[j].
rank) * num_src_fields +
1864 src_field_idx + 1]++;
1865 send_stencil_buffer[pos].
src =
src;
1876 send_stencil_buffer, total_sendcounts, total_sdispls,
1877 recv_stencil_buffer, total_recvcounts, total_rdispls,
1878 sizeof(*stencil_buffer), stencil_direct_mf_dt, comm,
1879 "yac_interp_weights_redist_direct_mf", __LINE__);
1881 yac_mpi_call(MPI_Type_free(&stencil_direct_mf_dt), comm);
1885 qsort(recv_stencil_buffer, tgt_count,
sizeof(*recv_stencil_buffer),
1892 memset(sendcounts, 0,
1893 (
size_t)comm_size * (
size_t)num_src_fields *
sizeof(*sendcounts));
1895 for (
size_t i = 0; i < tgt_count; ++i)
1896 sendcounts[(
size_t)(recv_stencil_buffer[i].
src.
rank) * num_src_fields +
1897 recv_stencil_buffer[i].src_field_idx]++;
1900 (
size_t)num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
1902 saccu = 0, raccu = 0;
1903 for (
int i = 0; i < comm_size; ++i) {
1904 total_sdispls[i] = saccu;
1905 total_rdispls[i] = raccu;
1906 total_sendcounts[i] = 0;
1907 total_recvcounts[i] = 0;
1908 for (
size_t j = 0; j < num_src_fields; ++j) {
1909 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
1910 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
1912 saccu += total_sendcounts[i];
1913 raccu += total_recvcounts[i];
1916 send_buffer_size = total_sdispls[comm_size - 1] +
1917 total_sendcounts[comm_size - 1];
1918 recv_buffer_size = total_rdispls[comm_size - 1] +
1919 total_recvcounts[comm_size - 1];
1921 size_t * orig_pos_buffer =
1922 xmalloc((send_buffer_size + recv_buffer_size) *
sizeof(*orig_pos_buffer));
1923 size_t * send_orig_pos_buffer = orig_pos_buffer + recv_buffer_size;
1924 size_t * recv_orig_pos_buffer = orig_pos_buffer;
1926 for (
size_t i = 0; i < tgt_count; ++i)
1927 send_orig_pos_buffer[
1928 sdispls[(
size_t)(recv_stencil_buffer[i].
src.
rank) * num_src_fields +
1929 recv_stencil_buffer[i].src_field_idx + 1]++] =
1930 recv_stencil_buffer[i].
src.orig_pos;
1934 send_orig_pos_buffer, total_sendcounts, total_sdispls,
1935 recv_orig_pos_buffer, total_recvcounts, total_rdispls,
1937 "yac_interp_weights_redist_direct_mf", __LINE__);
1938 free(size_t_buffer);
1945 interp, num_src_fields,
1946 recv_orig_pos_buffer, recvcounts, recv_stencil_buffer, sendcounts,
1947 comm, redist_config);
1950 free(orig_pos_buffer);
1951 free(stencil_buffer);
1961 int pack_size_value;
1963 yac_mpi_call(MPI_Pack_size(1, MPI_DOUBLE, comm, &pack_size_value), comm);
1965 return pack_size_value;
1990 int pack_size_weights;
1994 MPI_DOUBLE, comm, &pack_size_weights), comm);
2006 int pack_size_src_field_idx;
2014 pack_size_src_field_idx;
2021 int pack_size_weights, pack_size_field_indices;
2025 count, MPI_DOUBLE, comm, &pack_size_weights), comm);
2033 pack_size_weights + pack_size_field_indices;
2040 int pack_size_field_indices;
2049 pack_size_field_indices;
2066 "ERROR(copy_interp_weight_stencil): invalid stencil type")
2068 switch (stencil->
type) {
2087 size_t weight_size =
2106 size_t field_indices_size =
2120 size_t weight_size =
2126 size_t field_indices_size =
2136 return stencil_copy;
2154 "ERROR(wcopy_interp_weight_stencil): invalid stencil type")
2156 switch (stencil->
type) {
2181 double * new_weights =
xmalloc(src_count *
sizeof(*new_weights));
2182 if (weights == NULL)
2183 for (
size_t i = 0; i < src_count; ++i) new_weights[i] = weight;
2185 for (
size_t i = 0; i < src_count; ++i) new_weights[i] = weights[i] * weight;
2193 return stencil_wcopy;
2203 if (ret)
return ret;
2221 size_t count = srcs->
count;
2224 xmalloc(count *
sizeof(*w_global_id));
2227 for (
size_t i = 0; i < count; ++i) {
2229 w_global_id[i].
weight = (*w)[i];
2238 size_t new_count = 0;
2241 for (
size_t i = 0; i < count;) {
2243 data[new_count] = data[i];
2246 double curr_weight = w_global_id[i].
weight;
2250 while((i < count) && (curr_global_id == w_global_id[i].
global_id)) {
2252 curr_weight += w_global_id[i].
weight;
2256 (*w)[new_count] = curr_weight;
2262 srcs->
data =
xrealloc(data, new_count *
sizeof(*data));
2263 srcs->
count = new_count;
2264 *w =
xrealloc(*w, new_count *
sizeof(**w));
2270 size_t src_count = 0;
2271 size_t point_info_buffer_size = 0;
2273 for (
size_t i = 0; i < num_stencils; ++i) {
2274 size_t curr_src_count;
2280 "ERROR(stencils_merge_wsum): invalid stencil type")
2281 switch (stencils[i]->
type) {
2296 src_count += curr_src_count;
2297 for (
size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
2298 if (((curr_src_data_count = srcs[j].
data.
count)) > 1)
2299 point_info_buffer_size += curr_src_data_count;
2306 srcs->
count = src_count;
2308 double * new_w =
xmalloc(src_count *
sizeof(*new_w));
2310 for (
size_t i = 0, offset = 0; i < num_stencils; ++i) {
2311 size_t curr_src_count;
2318 "ERROR(stencils_merge_wsum): invalid stencil type")
2319 switch (stencils[i]->
type) {
2338 srcs->
data + offset, curr_srcs, curr_src_count, &point_info_buffer);
2339 if (stencil_w == NULL)
2340 for (
size_t j = 0; j < curr_src_count; ++j, ++offset)
2341 new_w[offset] = w[i];
2343 for (
size_t j = 0; j < curr_src_count; ++j, ++offset)
2344 new_w[offset] = w[i] * stencil_w[j];
2354 return merge_stencil;
2360 for (
size_t i = 0; i < num_stencils; ++i)
2364 size_t src_count = 0;
2365 size_t point_info_buffer_size = 0;
2367 for (
size_t i = 0; i < num_stencils; ++i) {
2368 size_t curr_src_count;
2373 "ERROR(stencils_merge_sum): invalid stencil type")
2374 switch (stencils[i]->
type) {
2385 src_count += curr_src_count;
2386 for (
size_t j = 0, curr_src_data_count; j < curr_src_count; ++j)
2387 if (((curr_src_data_count = srcs[j].
data.
count)) > 1)
2388 point_info_buffer_size += curr_src_data_count;
2395 srcs->
count = src_count;
2398 for (
size_t i = 0, offset = 0; i < num_stencils; ++i) {
2399 size_t curr_src_count;
2404 "ERROR(stencils_merge_sum): invalid stencil type")
2405 switch (stencils[i]->
type) {
2417 srcs->
data + offset, curr_srcs, curr_src_count, &point_info_buffer);
2418 offset += curr_src_count;
2427 return merge_stencil;
2434 if (num_stencils == 1)
2437 int fixed_count = 0;
2438 int direct_count = 0;
2441 double fixed_value = 0.0;
2443 for (
size_t i = 0; i < num_stencils; ++i) {
2448 "ERROR(stencils_merge): multiple source fields not yet supported")
2454 "ERROR(stencils_merge): unsupported stencil type")
2455 switch (stencils[i]->
type) {
2476 (fixed_count > 0) || (wsum_count > 0) ||
2477 (sum_count > 0) || (direct_count > 0),
2478 "ERROR(stencils_merge): unknown error")
2479 if (fixed_count > 0) {
2482 (direct_count + sum_count + wsum_count) <= 0,
2483 "ERROR(stencils_merge): invalid stencil combination")
2485 merge_stencil = **stencils;
2487 }
else if (wsum_count > 0)
2490 else if ((sum_count > 0) || (direct_count > 0))
2496 return merge_stencil;
2501 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
2504 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_type), comm);
2506 for (
size_t i = 0; i < count; ++i) {
2509 int (*func_pack_size)(
2515 (curr_stencil->
type ==
SUM) ||
2520 "ERROR(get_stencils_pack_sizes): invalid stencil type")
2521 switch (curr_stencil->
type) {
2545 pack_sizes[i] = pack_size_type +
2547 &(curr_stencil->
tgt), point_info_dt, comm) +
2548 func_pack_size(curr_stencil, point_info_dt, comm);
2554 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2560 MPI_Pack(&(stencil->
data.
fixed.
value), 1, MPI_DOUBLE, buffer, buffer_size,
2561 position, comm), comm);
2566 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2570 &(stencil->
data.
direct.
src), buffer, buffer_size, position, point_info_dt,
2576 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2580 stencil->
data.
sum.
srcs, buffer, buffer_size, position, point_info_dt, comm);
2585 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2590 point_info_dt, comm);
2595 buffer, buffer_size, position, comm), comm);
2600 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2605 point_info_dt, comm);
2611 buffer, buffer_size, position, comm), comm);
2616 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2621 point_info_dt, comm);
2625 size_t * temp_field_indices =
xmalloc(count *
sizeof(*temp_field_indices));
2626 for (
size_t i = 0; i < count; ++i)
2630 buffer, buffer_size, position, comm), comm);
2631 free(temp_field_indices);
2636 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2641 point_info_dt, comm);
2647 buffer, buffer_size, position, comm), comm);
2649 size_t * temp_field_indices =
xmalloc(count *
sizeof(*temp_field_indices));
2650 for (
size_t i = 0; i < count; ++i)
2654 buffer, buffer_size, position, comm), comm);
2655 free(temp_field_indices);
2660 void ** pack_data,
int * pack_sizes, MPI_Datatype point_info_dt,
2664 stencils, count, pack_order, pack_sizes, point_info_dt, comm);
2666 size_t pack_buffer_size = 0;
2667 for (
size_t i = 0; i < count; ++i)
2668 pack_buffer_size += (
size_t)(pack_sizes[i]);
2670 void * pack_data_ =
xmalloc(pack_buffer_size);
2671 size_t total_pack_size = 0;
2673 for (
size_t i = 0; i < count; ++i) {
2678 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2683 (curr_stencil->
type ==
SUM) ||
2688 "ERROR(pack_stencils): invalid stencil type")
2689 switch (curr_stencil->
type) {
2715 int type = (int)curr_stencil->
type;
2716 void * buffer = (
void*)((
char*)pack_data_ + total_pack_size);
2717 int buffer_size = pack_sizes[i];
2721 MPI_Pack(&
type, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
2724 &position, point_info_dt, comm);
2726 func_pack(curr_stencil, buffer, buffer_size, &position, point_info_dt, comm);
2729 pack_sizes[i] >= position,
2730 "ERROR(pack_stencils): "
2731 "actual pack size is bigger then computed one (%d > %d)",
2732 position, pack_sizes[i]);
2734 pack_sizes[i] = position;
2735 total_pack_size += (size_t)position;
2738 *pack_data =
xrealloc(pack_data_, total_pack_size);
2743 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2749 MPI_Unpack(buffer, buffer_size, position, &(stencil->
data.
fixed.
value), 1,
2750 MPI_DOUBLE, comm), comm);
2755 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2760 point_info_dt, comm);
2765 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2770 buffer, buffer_size, position, &(stencil->
data.
sum.
srcs), point_info_dt,
2776 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2781 point_info_dt, comm);
2791 (
int)count, MPI_DOUBLE, comm), comm);
2796 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2801 point_info_dt, comm);
2804 size_t temp_field_idx;
2807 buffer, buffer_size, position, &temp_field_idx,
2814 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2819 point_info_dt, comm);
2823 size_t * temp_field_indices =
xmalloc(count *
sizeof(*temp_field_indices));
2830 buffer, buffer_size, position, temp_field_indices,
2832 for (
size_t i = 0; i < count; ++i)
2834 free(temp_field_indices);
2839 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
2844 point_info_dt, comm);
2855 (
int)count, MPI_DOUBLE, comm), comm);
2857 size_t * temp_field_indices =
xmalloc(count *
sizeof(*temp_field_indices));
2864 buffer, buffer_size, position, temp_field_indices,
2866 for (
size_t i = 0; i < count; ++i)
2868 (
size_t)(temp_field_indices[i]);
2869 free(temp_field_indices);
2874 void * packed_data,
size_t packed_data_size,
2875 MPI_Datatype point_info_dt, MPI_Comm comm) {
2877 for (
size_t i = 0, offset = 0; i < count; ++i) {
2880 packed_data_size >= offset,
2881 "ERROR(unpack_stencils): invalid offset");
2884 void * curr_buffer = (
void*)((
unsigned char*)packed_data + offset);
2885 int buffer_size = (int)(
MIN(packed_data_size - offset, INT_MAX));
2891 curr_buffer, buffer_size, &position, &
type, 1, MPI_INT, comm), comm);
2893 void (*func_unpack)(
2895 int * position, MPI_Datatype point_info_dt, MPI_Comm comm);
2901 "ERROR(unpack_stencils): invalid stencil type")
2927 curr_stencil->
type =
2930 curr_buffer, buffer_size, &position, &(curr_stencil->
tgt),
2931 point_info_dt, comm);
2933 curr_stencil, curr_buffer, buffer_size, &position, point_info_dt, comm);
2934 offset += (size_t)position;
2940 size_t * stencil_indices,
2941 size_t * stencil_sendcounts,
size_t * stencil_recvcounts) {
2943 int comm_rank, comm_size;
2948 stencil_sendcounts[comm_rank] == stencil_recvcounts[comm_rank],
2949 "ERROR(exchange_stencils): error in arguments")
2951 size_t send_count = 0, recv_count = 0;
2952 size_t local_send_offset = 0;
2953 size_t local_recv_offset = 0;
2954 size_t local_count = (size_t)(stencil_sendcounts[comm_rank]);
2955 for (
int i = 0; i < comm_rank; ++i) {
2956 send_count += stencil_sendcounts[i];
2957 recv_count += stencil_recvcounts[i];
2958 local_send_offset += stencil_sendcounts[i];
2959 local_recv_offset += stencil_recvcounts[i];
2961 local_send_offset = send_count;
2962 local_recv_offset = recv_count;
2963 stencil_sendcounts[comm_rank] = 0;
2964 stencil_recvcounts[comm_rank] = 0;
2965 for (
int i = comm_rank + 1; i < comm_size; ++i) {
2966 send_count += stencil_sendcounts[i];
2967 recv_count += stencil_recvcounts[i];
2971 xmalloc((recv_count + local_count) *
sizeof(*new_stencils));
2972 size_t * local_stencil_indices =
2973 xmalloc(local_count *
sizeof(*local_stencil_indices));
2974 memcpy(local_stencil_indices, stencil_indices + local_send_offset,
2975 local_count *
sizeof(*local_stencil_indices));
2979 stencil_indices + local_send_offset,
2980 stencil_indices + local_send_offset + local_count,
2981 (send_count - local_send_offset) *
sizeof(*stencil_indices));
2985 int * pack_sizes =
xmalloc(send_count *
sizeof(*pack_sizes));
2988 stencils, send_count, stencil_indices, &send_buffer, pack_sizes,
2989 point_info_dt, comm);
2991 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
2993 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2996 for (
int rank = 0; rank < comm_size; ++rank) {
2997 size_t sendcount = 0;
2998 int curr_num_stencils = stencil_sendcounts[rank];
2999 for (
int j = 0; j < curr_num_stencils; ++j, ++send_count)
3000 sendcount += (
size_t)(pack_sizes[send_count]);
3001 sendcounts[rank] = sendcount;
3006 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3008 size_t recv_size = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
3010 void * recv_buffer =
xmalloc(recv_size);
3013 yac_alltoallv_packed_p2p(
3014 send_buffer, sendcounts, sdispls+1,
3015 recv_buffer, recvcounts, rdispls, comm,
"exchange_stencils", __LINE__);
3021 new_stencils, recv_count,
3022 recv_buffer, recv_size, point_info_dt, comm);
3026 memmove(new_stencils + local_recv_offset + local_count,
3027 new_stencils + local_recv_offset ,
3028 (recv_count - local_recv_offset ) *
sizeof(*new_stencils));
3029 for (
size_t i = 0; i < local_count; ++i, ++local_recv_offset )
3030 new_stencils[local_recv_offset] =
3032 stencils + local_stencil_indices[i],
3033 stencils[local_stencil_indices[i]].
tgt);
3034 free(local_stencil_indices);
3036 return new_stencils;
3041 int * stencil_ranks,
size_t count) {
3043 char const * routine =
"yac_interp_weights_get_stencils";
3045 MPI_Comm comm =
weights->comm;
3050 count <= INT_MAX,
"ERROR(%s): count exceeds INT_MAX", routine);
3052 size_t * reorder_idx =
xmalloc(count *
sizeof(*reorder_idx));
3053 for (
size_t i = 0; i < count; ++i) reorder_idx[i] = i;
3056 stencil_ranks, count, stencil_indices, reorder_idx);
3059 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3061 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3062 for (
size_t i = 0; i < count; ++i) sendcounts[stencil_ranks[i]]++;
3064 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3066 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
3067 size_t * size_t_buffer =
3068 xmalloc((count + recv_count) *
sizeof(*size_t_buffer));
3069 size_t * send_stencil_indices = size_t_buffer;
3070 size_t * recv_stencil_indices = size_t_buffer + count;
3071 for (
size_t i = 0; i < count; ++i)
3072 send_stencil_indices[i] = stencil_indices[i];
3073 yac_alltoallv_size_t_p2p(
3074 send_stencil_indices, sendcounts, sdispls+1,
3075 recv_stencil_indices, recvcounts, rdispls, comm, routine, __LINE__);
3078 size_t * exchange_stencil_indices =
3079 xmalloc(recv_count *
sizeof(*exchange_stencil_indices));
3080 for (
size_t i = 0; i < recv_count; ++i) {
3082 (
size_t)(recv_stencil_indices[i]) <
weights->stencils_size,
3083 "ERROR(%s): invalid stencil index", routine);
3084 exchange_stencil_indices[i] = (size_t)(recv_stencil_indices[i]);
3086 free(size_t_buffer);
3089 recvcounts, sendcounts);
3090 free(exchange_stencil_indices);
3095 xmalloc(count *
sizeof(*sorted_stencils));
3096 for (
size_t i = 0; i < count; ++i)
3097 sorted_stencils[reorder_idx[i]] = stencils[i];
3101 return sorted_stencils;
3109 size_t * num_stencils_per_tgt,
size_t * stencil_indices,
3110 int * stencil_ranks,
double * w) {
3112 size_t count = (tgts != NULL)?tgts->
count:0;
3113 MPI_Comm comm =
weights->comm;
3114 int comm_rank, comm_size;
3119 size_t total_num_stencils = 0;
3120 size_t max_num_stencils_per_tgt = 0;
3121 for (
size_t i = 0; i < count; ++i) {
3122 size_t curr_num_stencils_per_tgt = num_stencils_per_tgt[i];
3123 if (curr_num_stencils_per_tgt > max_num_stencils_per_tgt)
3124 max_num_stencils_per_tgt = curr_num_stencils_per_tgt;
3125 total_num_stencils += num_stencils_per_tgt[i];
3127 size_t num_missing_stencils = 0;
3128 for (
size_t i = 0; i < total_num_stencils; ++i)
3129 if (stencil_ranks[i] != comm_rank) num_missing_stencils++;
3132 size_t * missing_stencil_indices =
3133 xmalloc(num_missing_stencils *
sizeof(*missing_stencil_indices));
3134 int * missing_stencil_ranks =
3135 xmalloc(num_missing_stencils *
sizeof(*missing_stencil_ranks));
3136 for (
size_t i = 0, j = 0; i < total_num_stencils; ++i) {
3137 if (stencil_ranks[i] != comm_rank) {
3138 missing_stencil_indices[j] = stencil_indices[i];
3139 missing_stencil_ranks[j] = stencil_ranks[i];
3145 weights, missing_stencil_indices, missing_stencil_ranks,
3146 num_missing_stencils);
3147 free(missing_stencil_ranks);
3148 free(missing_stencil_indices);
3153 size_t stencils_array_size =
weights->stencils_array_size;
3154 size_t stencils_size =
weights->stencils_size;
3157 xmalloc(max_num_stencils_per_tgt *
sizeof(*stencils_buffer));
3160 stencils, stencils_array_size, stencils_size + count);
3162 for (
size_t i = 0, j = 0; i < count;
3163 ++i, ++stencils_size) {
3165 size_t curr_num_stencils = num_stencils_per_tgt[i];
3166 for (
size_t k = 0; k < curr_num_stencils; ++k)
3167 stencils_buffer[k] =
3168 (stencil_ranks[k] == comm_rank)?
3169 (stencils + stencil_indices[k]):(missing_stencils + (j++));
3171 stencils[stencils_size] =
3173 w += curr_num_stencils;
3174 stencil_indices += curr_num_stencils;
3175 stencil_ranks += curr_num_stencils;
3179 weights->stencils_array_size = stencils_array_size;
3180 weights->stencils_size = stencils_size;
3182 free(stencils_buffer);
3190 YAC_ASSERT(count != 0,
"ERROR(compute_owner): count == 0")
3195 size_t best_rank_count = 0;
3197 size_t curr_rank_count = 1;
3198 int prev_rank = ranks[0];
3200 for (
size_t i = 1; i < count; ++i, ++curr_rank_count) {
3201 int curr_rank = ranks[i];
3202 if (prev_rank != curr_rank) {
3203 if (curr_rank_count > best_rank_count) {
3204 best_rank = prev_rank;
3205 best_rank_count = curr_rank_count;
3207 prev_rank = curr_rank;
3208 curr_rank_count = 0;
3212 return (curr_rank_count > best_rank_count)?prev_rank:best_rank;
3220 size_t total_num_links = 0;
3222 for (
size_t i = 0; i <
count; ++i) {
3224 stencils[i].
type == stencil_type,
3225 "ERROR(generate_w_sum_mf_stencils): wrong stencil type")
3231 xmalloc(
sizeof(*temp) + total_num_links *
sizeof(temp->
buffer[0]));
3238 for (
size_t i = 0, k = 0; i <
count; ++i) {
3240 wsum_stencils->
data + i;
3246 curr_wsum_stencil->
count = curr_stencil_size;
3247 curr_wsum_stencil->
data = curr_links;
3248 for (
size_t j = 0; j < curr_stencil_size; ++j) {
3249 int curr_count = curr_srcs[j].
data.
count;
3252 "ERROR(generate_w_sum_mf_stencils): global src id no found")
3259 "ERROR(generate_w_sum_mf_stencils): unsupported stencil type")
3260 switch(stencil_type) {
3263 curr_links[j].
weight = 1.0;
3271 curr_links[j].
weight = 1.0;
3282 k += curr_stencil_size;
3285 return wsum_stencils;
3292 int array_of_blocklengths[] = {1, 1, 1, 1};
3293 const MPI_Aint array_of_displacements[] =
3294 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
src.
rank) -
3295 (MPI_Aint)(intptr_t)(
const void *)&dummy,
3296 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
src.
orig_pos) -
3297 (MPI_Aint)(intptr_t)(
const void *)&dummy,
3299 (MPI_Aint)(intptr_t)(
const void *)&dummy,
3300 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
weight) -
3301 (MPI_Aint)(intptr_t)(
const void *)&dummy};
3302 const MPI_Datatype array_of_types[] =
3305 MPI_Type_create_struct(4, array_of_blocklengths, array_of_displacements,
3306 array_of_types, &dt), comm);
3312 MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
3314 int pack_size_count,
3318 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
3321 (
int)(stencil->
count), wsum_mf_weight_dt, comm, &pack_size_weights), comm);
3325 return pack_size_count + pack_size_weights + pack_size_tgt;
3330 size_t * pack_order,
void ** pack_data,
int * pack_sizes,
3331 int * weight_counts, MPI_Comm comm) {
3337 size_t temp_total_pack_size = 0;
3338 for (
size_t i = 0; i < count; ++i) {
3339 temp_total_pack_size +=
3342 wsum_stencils + pack_order[i],
3343 wsum_mf_weight_dt, point_info_dt, comm));
3346 void * pack_data_ =
xmalloc(temp_total_pack_size);
3347 size_t total_pack_size = 0;
3350 for (
size_t i = 0; i < count; ++i) {
3352 size_t idx = pack_order[i];
3355 void * buffer = (
void*)((
unsigned char*)pack_data_ + total_pack_size);
3356 int buffer_size = pack_sizes[i];
3357 int curr_count = wsum_stencils[idx].
count;
3361 &(wsum_stencils[idx].tgt), buffer, buffer_size, &position,
3362 point_info_dt, comm);
3365 MPI_Pack(&curr_count, 1, MPI_INT, buffer, buffer_size, &position, comm), comm);
3368 MPI_Pack(wsum_stencils[idx].data, curr_count, wsum_mf_weight_dt,
3369 buffer, buffer_size, &position, comm), comm);
3371 pack_sizes[i] = position;
3372 weight_counts[i] = curr_count;
3373 total_pack_size += (size_t)position;
3379 *pack_data =
xrealloc(pack_data_, total_pack_size);
3385 void * packed_data,
size_t packed_data_size, MPI_Comm comm) {
3390 size_t weight_offset = 0;
3391 for (
size_t i = 0, offset = 0; i < count; ++i) {
3394 void * curr_buffer = (
void*)((
char*)packed_data + offset);
3395 int buffer_size = (int)(packed_data_size - offset);
3401 weight_buffer + weight_offset;
3404 curr_buffer, buffer_size, &position, &tgt, point_info_dt, comm);
3406 MPI_Unpack(curr_buffer, buffer_size, &position,
3407 &weight_count, 1, MPI_INT, comm),
3410 MPI_Unpack(curr_buffer, buffer_size, &position,
3411 curr_weights, weight_count, wsum_mf_weight_dt, comm), comm);
3413 curr_wsum_stencil->
tgt = tgt;
3414 curr_wsum_stencil->
data = curr_weights;
3415 curr_wsum_stencil->
count = (size_t)weight_count;
3417 weight_offset += (size_t)weight_count;
3418 offset += (size_t)position;
3424 return weight_offset;
3429 int * stencil_owner,
size_t * reorder_idx,
size_t num_owners) {
3432 wsum_stencils_data->
data;
3434 int comm_rank, comm_size;
3438 size_t local_weight_count = 0;
3439 size_t local_count = 0;
3440 for (
size_t i = 0; i < num_owners; ++i) {
3441 if (stencil_owner[i] == comm_rank) {
3442 local_weight_count += wsum_stencils[reorder_idx[i]].
count;
3443 stencil_owner[i] = INT_MAX;
3449 size_t send_count = num_owners - local_count;
3453 int * pack_sizes =
xmalloc(2 * send_count *
sizeof(*pack_sizes));
3454 int * weight_counts = pack_sizes + send_count;
3456 pack_sizes, weight_counts, comm);
3458 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3460 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3462 for (
size_t i = 0; i < send_count; ++i) {
3463 int curr_rank = stencil_owner[i];
3464 sendcounts[3 * curr_rank + 0]++;
3465 sendcounts[3 * curr_rank + 1] += (size_t)(pack_sizes[i]);
3466 sendcounts[3 * curr_rank + 2] += (size_t)(weight_counts[i]);
3474 size_t recv_count = 0;
3475 size_t recv_size = 0;
3476 size_t recv_weight_count = 0;
3477 size_t saccu = 0, raccu = 0;
3478 for (
int i = 0; i < comm_size; ++i) {
3481 recv_count += recvcounts[3 * i + 0];
3482 recv_size += recvcounts[3 * i + 1];
3483 recv_weight_count += recvcounts[3 * i + 2];
3484 saccu += sendcounts[3 * i + 1];
3485 raccu += recvcounts[3 * i + 1];
3486 sendcounts[i] = sendcounts[3 * i + 1];
3487 recvcounts[i] = recvcounts[3 * i + 1];
3490 void * recv_buffer =
xmalloc(recv_size);
3493 yac_alltoallv_packed_p2p(
3494 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm,
3495 "redist_wsum_mf_stencils", __LINE__);
3501 (local_weight_count + recv_weight_count) *
sizeof(temp->
buffer[0]));
3505 ((new_wsum_stencils_data->
data =
3506 xmalloc((local_count + recv_count) *
3507 sizeof(*(new_wsum_stencils_data->
data)))));
3508 new_wsum_stencils_data->
count = local_count + recv_count;
3511 size_t weight_offset =
3513 new_wsum_stencils, &(temp->
buffer[0]), recv_count,
3514 recv_buffer, recv_size, comm);
3516 new_wsum_stencils += recv_count;
3518 &(temp->
buffer[weight_offset]);
3522 for (
size_t i = 0, weight_offset = 0; i < local_count; ++i) {
3524 wsum_stencils + reorder_idx[i + send_count];
3526 new_wsum_stencils + i;
3528 weight_buffer + weight_offset;
3529 size_t curr_stencil_size = curr_wsum_stencil->
count;
3531 curr_new_wsum_stencil->
count = curr_stencil_size;
3532 curr_new_wsum_stencil->
data = curr_new_weights;
3533 memcpy(curr_new_weights, curr_wsum_stencil->
data,
3534 curr_stencil_size *
sizeof(*curr_new_weights));
3535 weight_offset += curr_stencil_size;
3538 return new_wsum_stencils_data;
3545 wsum_stencils_data->
data;
3549 size_t max_stencil_size = 0;
3550 for (
size_t i = 0; i <
count; ++i) {
3551 size_t curr_stencil_size = wsum_stencils[i].
count;
3552 if (curr_stencil_size > max_stencil_size)
3553 max_stencil_size = curr_stencil_size;
3558 xmalloc((
count + max_stencil_size) *
sizeof(*rank_buffer));
3559 int * stencil_owner = rank_buffer;
3560 int * stencil_owners = rank_buffer +
count;
3561 size_t * reorder_idx =
xmalloc(
count *
sizeof(*reorder_idx));
3562 for (
size_t i = 0; i <
count; ++i) {
3563 size_t curr_stencil_size = wsum_stencils[i].
count;
3565 wsum_stencils[i].
data;
3566 for (
size_t j = 0; j < curr_stencil_size; ++j)
3567 stencil_owners[j] = curr_weights[j].
src.rank;
3568 stencil_owner[i] =
compute_owner(stencil_owners, curr_stencil_size);
3574 comm, wsum_stencils_data, stencil_owner, reorder_idx,
count);
3579 return new_wsum_stencils_data;
3587 if (ret)
return ret;
3599 wsum_stencils_data->
data;
3604 size_t total_owner_count = 0;
3605 for (
size_t i = 0; i <
count; ++i) {
3607 if (stencil_size == 1) {
3608 total_owner_count++;
3613 tgt_point_infos, stencil_size,
sizeof(*tgt_point_infos),
3615 int prev_rank = INT_MAX;
3616 for (
int j = 0; j < stencil_size; ++j) {
3617 int curr_rank = tgt_point_infos[j].
rank;
3618 if (curr_rank != prev_rank) {
3619 ++total_owner_count;
3620 prev_rank = curr_rank;
3626 int * stencil_owner =
xmalloc(total_owner_count *
sizeof(*stencil_owner));
3627 size_t * reorder_idx =
xmalloc(total_owner_count *
sizeof(*reorder_idx));
3628 for (
size_t i = 0, k = 0; i < count; ++i) {
3630 if (stencil_size == 1) {
3637 int prev_rank = INT_MAX;
3638 for (
int j = 0; j < stencil_size; ++j) {
3639 int curr_rank = tgt_point_infos[j].
rank;
3640 if (curr_rank != prev_rank) {
3641 stencil_owner[k] = tgt_point_infos[j].
rank;
3644 prev_rank = curr_rank;
3652 comm, wsum_stencils_data, stencil_owner, reorder_idx, total_owner_count);
3654 wsum_stencils = new_wsum_stencils_data->
data;
3658 free(stencil_owner);
3660 if (
count == 0)
return new_wsum_stencils_data;
3666 size_t total_num_tgt_pos = 0;
3667 for (
size_t i = 0; i <
count; ++i) {
3669 if (curr_count == 1) {
3670 ++total_num_tgt_pos;
3674 for (
size_t j = 0; j < curr_count; ++j)
3675 if (curr_point_infos[j].
rank == comm_rank)
3676 ++total_num_tgt_pos;
3680 if (total_num_tgt_pos != count) {
3681 new_wsum_stencils_data->
data =
3683 xrealloc(wsum_stencils, total_num_tgt_pos *
sizeof(*wsum_stencils))));
3684 new_wsum_stencils_data->
count = total_num_tgt_pos;
3688 for (
size_t i = 0, offset = count; i < count; ++i) {
3690 if (curr_count > 1) {
3695 for (j = 0; j < curr_count; ++j) {
3696 if (curr_point_infos[j].
rank == comm_rank) {
3705 for (j = j + 1; j < curr_count; ++j) {
3706 if (curr_point_infos[j].
rank == comm_rank) {
3707 wsum_stencils[offset] = wsum_stencils[i];
3713 free(curr_point_infos);
3717 return new_wsum_stencils_data;
3722 size_t num_src_fields, MPI_Comm comm, Xt_config redist_config) {
3727 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
3729 num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3730 size_t * size_t_buffer =
3731 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
3732 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
3733 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
3734 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
3735 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
3737 for (
size_t i = 0; i < count; ++i)
3738 sendcounts[halo_points[i].data.rank * num_src_fields +
3742 num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
3744 size_t saccu = 0, raccu = 0;
3745 for (
int i = 0; i < comm_size; ++i) {
3746 total_sdispls[i] = saccu;
3747 total_rdispls[i] = raccu;
3748 total_sendcounts[i] = 0;
3749 total_recvcounts[i] = 0;
3750 for (
size_t j = 0; j < num_src_fields; ++j) {
3751 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
3752 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
3754 saccu += total_sendcounts[i];
3755 raccu += total_recvcounts[i];
3758 size_t recv_count = total_recvcounts[comm_size - 1] +
3759 total_rdispls[comm_size - 1];
3761 int * exchange_buffer =
3762 xmalloc((2 * count + recv_count) *
sizeof(*exchange_buffer));
3763 int * send_buffer = exchange_buffer;
3764 int * reorder_idx = exchange_buffer + count;
3765 int * recv_buffer = exchange_buffer + 2 * count;
3768 size_t num_halo_per_src_field[num_src_fields];
3770 num_halo_per_src_field, 0,
3771 num_src_fields *
sizeof(num_halo_per_src_field[0]));
3772 for (
size_t i = 0; i < count; ++i) {
3773 size_t curr_src_field_idx = (size_t)(halo_points[i].field_idx);
3774 size_t pos = sdispls[(size_t)(halo_points[i].data.rank) * num_src_fields +
3775 curr_src_field_idx + 1]++;
3779 "ERROR(generate_halo_redists): offset not supported by MPI")
3781 reorder_idx[pos] = num_halo_per_src_field[curr_src_field_idx]++;
3785 yac_alltoallv_int_p2p(
3786 send_buffer, total_sendcounts, total_sdispls,
3787 recv_buffer, total_recvcounts, total_rdispls, comm,
3788 "generate_halo_redists", __LINE__);
3790 free(size_t_buffer);
3792 size_t nsend = 0, nsends[num_src_fields];
3793 size_t nrecv = 0, nrecvs[num_src_fields];
3794 memset(nsends, 0, num_src_fields *
sizeof(nsends[0]));
3795 memset(nrecvs, 0, num_src_fields *
sizeof(nrecvs[0]));
3796 for (
int i = 0; i < comm_size; ++i) {
3797 for (
size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3798 if (sendcounts[i * num_src_fields + field_idx] > 0) {
3800 nrecvs[field_idx]++;
3802 if (recvcounts[i * num_src_fields + field_idx] > 0) {
3804 nsends[field_idx]++;
3809 size_t total_num_msg = nsend + nrecv;
3811 struct Xt_redist_msg * msgs_buffer =
3812 xmalloc(total_num_msg *
sizeof(*msgs_buffer));
3813 struct Xt_redist_msg * send_msgs = msgs_buffer;
3814 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
3816 for (
size_t field_idx = 0, nsend = 0, nrecv = 0;
3817 field_idx < num_src_fields; ++field_idx) {
3818 for (
int rank = 0; rank < comm_size; ++rank) {
3819 size_t idx = (size_t)rank * num_src_fields + field_idx;
3820 if (sendcounts[idx] > 0) {
3821 recv_msgs[nrecv].rank = rank;
3822 recv_msgs[nrecv].datatype =
3823 xt_mpi_generate_datatype(
3824 reorder_idx + sdispls[idx], sendcounts[idx], MPI_DOUBLE, comm);
3827 if (recvcounts[idx] > 0) {
3828 send_msgs[nsend].rank = rank;
3829 send_msgs[nsend].datatype =
3830 xt_mpi_generate_datatype(
3831 recv_buffer + rdispls[idx], recvcounts[idx], MPI_DOUBLE, comm);
3840 if (total_num_msg > 0) {
3842 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &halo_comm), comm);
3844 int * rank_buffer =
xmalloc(2 * total_num_msg *
sizeof(*rank_buffer));
3845 int * orig_ranks = rank_buffer;
3846 int * split_ranks = rank_buffer + total_num_msg;
3848 for (
size_t i = 0; i < total_num_msg; ++i)
3849 orig_ranks[i] = msgs_buffer[i].rank;
3851 MPI_Group orig_group, split_group;
3853 yac_mpi_call(MPI_Comm_group(halo_comm, &split_group), comm);
3856 MPI_Group_translate_ranks(orig_group, (
int)total_num_msg, orig_ranks,
3857 split_group, split_ranks), halo_comm);
3859 for (
size_t i = 0; i < total_num_msg; ++i)
3860 msgs_buffer[i].rank = split_ranks[i];
3868 redist =
xmalloc(num_src_fields *
sizeof(*redist));
3869 if (num_src_fields == 1) {
3871 xt_redist_single_array_base_custom_new(
3872 nsend, nrecv, send_msgs, recv_msgs, halo_comm,
3875 for (
size_t field_idx = 0; field_idx < num_src_fields; ++field_idx) {
3877 xt_redist_single_array_base_custom_new(
3878 nsends[field_idx], nrecvs[field_idx],
3879 send_msgs, recv_msgs, halo_comm,
3881 send_msgs += nsends[field_idx];
3882 recv_msgs += nrecvs[field_idx];
3887 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &halo_comm), comm);
3892 free(exchange_buffer);
3907 if (ret)
return ret;
3912 if (ret)
return ret;
3921 const void * a,
const void * b) {
3930 for (
size_t i = 0; i <
count; ++i) {
3933 if (ret)
return ret;
3936 if (ret)
return ret;
3942 const void * a,
const void * b) {
3951 "ERROR(compare_interp_weight_stencil_wsum_mf_tgt_orig_pos): invalid data")
3956 return (a_orig_pos > b_orig_pos) - (a_orig_pos < b_orig_pos);
3966 Xt_config redist_config) {
3968 char const * routine =
"generate_redist_put_double";
3973 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
3975 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3977 for (
size_t i = 0; i <
count; ++i) {
3978 int curr_count = point_infos[i].
count;
3983 curr_count >= 1,
"ERROR(%s): no owner found for global id", routine)
3984 for (
int j = 0; j < curr_count; ++j)
3985 sendcounts[curr_point_infos[j].
rank]++;
3989 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3992 sdispls[comm_size] + sendcounts[comm_size - 1];
3994 rdispls[comm_size - 1] + recvcounts[comm_size - 1];
3996 int * exchange_buffer =
3997 xmalloc((2 * send_count + recv_count) *
sizeof(*exchange_buffer));
3998 int * send_buffer = exchange_buffer;
3999 int * reorder_idx = exchange_buffer + send_count;
4000 int * recv_buffer = exchange_buffer + 2 * send_count;
4003 for (
size_t i = 0; i < count; ++i) {
4004 int curr_count = point_infos[i].
count;
4008 for (
int j = 0; j < curr_count; ++j) {
4009 size_t pos = sdispls[curr_point_infos[j].
rank + 1]++;
4010 size_t orig_pos = curr_point_infos[j].orig_pos;
4012 orig_pos <= INT_MAX,
"ERROR(%s): offset not supported by MPI", routine)
4014 reorder_idx[pos] = i;
4019 yac_alltoallv_int_p2p(
4020 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls, comm,
4025 for (
int i = 0; i < comm_size; ++i) {
4026 if (sendcounts[i] > 0) nsend++;
4027 if (recvcounts[i] > 0) nrecv++;
4030 struct Xt_redist_msg * send_msgs =
xmalloc(nsend *
sizeof(*send_msgs));
4031 struct Xt_redist_msg * recv_msgs =
xmalloc(nrecv *
sizeof(*send_msgs));
4033 for (
int i = 0, nsend = 0, nrecv = 0; i < comm_size; ++i) {
4034 if (sendcounts[i] > 0) {
4035 send_msgs[nsend].rank = i;
4036 send_msgs[nsend].datatype =
4037 xt_mpi_generate_datatype(
4038 reorder_idx + sdispls[i], sendcounts[i], MPI_DOUBLE, comm);
4041 if (recvcounts[i] > 0) {
4042 recv_msgs[nrecv].rank = i;
4043 recv_msgs[nrecv].datatype =
4044 xt_mpi_generate_datatype(
4045 recv_buffer + rdispls[i], recvcounts[i], MPI_DOUBLE, comm);
4052 xt_redist_single_array_base_custom_new(
4053 nsend, nrecv, send_msgs, recv_msgs, comm, redist_config);
4055 free(exchange_buffer);
4066 size_t num_src_fields,
size_t tgt_count,
4068 double * weights,
size_t * src_idx,
size_t * src_field_idx, MPI_Comm comm,
4071 void (*interp_add_wsum_mf_at_src)(
4073 size_t *,
size_t *,
size_t, Xt_redist),
4074 void (*interp_add_wsum_mf_at_tgt)(
4076 size_t *,
size_t *,
size_t), Xt_config redist_config) {
4079 Xt_redist * halo_redists =
4081 remote_src_points, halo_size, num_src_fields, comm, redist_config);
4088 xmalloc(tgt_count *
sizeof(*tgt_infos));
4089 for (
size_t i = 0; i < tgt_count; ++i)
4090 tgt_infos[i] = tgt_stencils[i].tgt.
data;
4091 Xt_redist result_redist =
4093 tgt_infos, tgt_count, comm, redist_config);
4096 interp_add_wsum_mf_at_src(
4097 interp, halo_redists, tgt_count, num_src_per_tgt, weights,
4098 src_field_idx, src_idx, num_src_fields, result_redist);
4100 if (result_redist != NULL) xt_redist_delete(result_redist);
4104 size_t * tgt_orig_pos =
xmalloc(tgt_count *
sizeof(*tgt_orig_pos));
4105 for (
size_t i = 0; i < tgt_count; ++i) {
4107 tgt_stencils[i].tgt.
data.count == 1,
4108 "ERROR(interpolation_add_w_sum_mf): currently unsupported target "
4109 "point distribution")
4111 (size_t)(tgt_stencils[i].tgt.
data.data.single.orig_pos);
4114 interp_add_wsum_mf_at_tgt(
4115 interp, halo_redists, tgt_orig_pos, tgt_count,
4116 num_src_per_tgt, weights, src_field_idx, src_idx, num_src_fields);
4121 if (halo_redists != NULL) {
4122 for (
size_t i = 0; i < num_src_fields; ++i)
4123 xt_redist_delete(halo_redists[i]);
4130 size_t tgt_count,
size_t * num_src_per_tgt,
double * weights,
4131 size_t * src_field_idx,
size_t * src_idx,
4132 size_t num_src_fields, Xt_redist result_redist) {
4137 interp, halo_redists, tgt_count, num_src_per_tgt, src_field_idx,
4138 src_idx, num_src_fields, result_redist);
4143 size_t * tgt_pos,
size_t tgt_count,
size_t * num_src_per_tgt,
4144 double * weights,
size_t * src_field_idx,
size_t * src_idx,
4145 size_t num_src_fields) {
4150 interp, src_redists, tgt_pos, tgt_count, num_src_per_tgt,
4151 src_field_idx, src_idx, num_src_fields);
4156 size_t num_src_fields,
size_t tgt_count,
4158 double * weights,
size_t * src_idx,
size_t * src_field_idx, MPI_Comm comm,
4160 Xt_config redist_config) {
4163 remote_src_points, halo_size, num_src_fields, tgt_count, tgt_stencils,
4164 num_src_per_tgt, weights, src_idx, src_field_idx, comm, reorder,
4172 size_t num_src_fields,
size_t tgt_count,
4174 double * weights,
size_t * src_idx,
size_t * src_field_idx, MPI_Comm comm,
4176 Xt_config redist_config) {
4181 remote_src_points, halo_size, 1, tgt_count, tgt_stencils, num_src_per_tgt,
4182 weights, src_idx, src_field_idx, comm, reorder, interp, redist_config);
4187 size_t num_src_fields,
size_t tgt_count,
4189 double * weights,
size_t * src_idx,
size_t * src_field_idx, MPI_Comm comm,
4191 Xt_config redist_config) {
4194 remote_src_points, halo_size, num_src_fields, tgt_count, tgt_stencils,
4195 num_src_per_tgt, weights, src_idx, src_field_idx, comm, reorder,
4203 size_t num_src_fields,
size_t tgt_count,
4205 double * weights,
size_t * src_idx,
size_t * src_field_idx, MPI_Comm comm,
4207 Xt_config redist_config) {
4212 remote_src_points, halo_size, 1, tgt_count, tgt_stencils, num_src_per_tgt,
4213 weights, src_idx, src_field_idx, comm, reorder, interp, redist_config);
4220 void (*interp_add_w_sum_mf)(
4224 Xt_config), Xt_config redist_config) {
4234 "ERROR(yac_interp_weights_redist_w_sum_mf): invalid reorder type")
4241 size_t wsum_mf_count = new_wsum_mf_stencils_data->
count;
4243 new_wsum_mf_stencils_data->
data;
4246 size_t total_num_links = 0, total_num_remote_weights = 0;
4247 for (
size_t i = 0; i < wsum_mf_count; ++i) {
4248 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
4249 total_num_links += curr_stencil_size;
4250 for (
size_t j = 0; j < curr_stencil_size; ++j)
4251 if (wsum_mf_stencils[i].
data[j].
src.rank != comm_rank)
4252 ++total_num_remote_weights;
4257 xmalloc(total_num_remote_weights *
sizeof(*remote_src_points));
4258 size_t num_src_fields = 0;
4259 for (
size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
4260 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
4262 wsum_mf_stencils[i].
data;
4263 for (
size_t j = 0; j < curr_stencil_size; ++j) {
4265 if (curr_src_field_idx >= num_src_fields)
4266 num_src_fields = curr_src_field_idx + 1;
4267 if (curr_weights[j].
src.rank != comm_rank) {
4268 remote_src_points[k].
data = curr_weights[j].
src;
4269 remote_src_points[k].
field_idx = curr_src_field_idx;
4277 MPI_IN_PLACE, &num_src_fields, 1,
YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
4281 qsort(remote_src_points, total_num_remote_weights,
sizeof(*remote_src_points),
4288 size_t prev_field_idx;
4290 if (total_num_remote_weights > 0) {
4291 prev_remote_src_point = &(remote_src_points[0].
data);
4292 prev_field_idx = remote_src_points[0].
field_idx;
4295 prev_field_idx = SIZE_MAX;
4298 for (
size_t i = 0; i < total_num_remote_weights; ++i) {
4300 &(remote_src_points[i].
data);
4301 size_t curr_field_idx = remote_src_points[i].
field_idx;
4303 prev_remote_src_point, curr_remote_src_point) ||
4304 (prev_field_idx != curr_field_idx)) {
4305 prev_remote_src_point = curr_remote_src_point;
4306 prev_field_idx = curr_field_idx;
4307 remote_src_points[halo_size].
data = *curr_remote_src_point;
4308 remote_src_points[halo_size].
field_idx = curr_field_idx;
4312 wsum_mf_stencils + remote_src_points[i].
reorder_idx;
4313 size_t curr_stencil_size = curr_stencil->
count;
4314 for (
size_t j = 0; j < curr_stencil_size; ++j) {
4316 &(curr_stencil->
data[j].
src), curr_remote_src_point)) &&
4326 qsort(wsum_mf_stencils, wsum_mf_count,
sizeof(*wsum_mf_stencils),
4331 size_t * num_src_per_tgt =
xmalloc(wsum_mf_count *
sizeof(*num_src_per_tgt));
4332 double * weights =
xmalloc(total_num_links *
sizeof(*weights));
4333 size_t * src_idx =
xmalloc(total_num_links *
sizeof(*src_idx));
4334 size_t * src_field_idx =
xmalloc(total_num_links *
sizeof(*src_field_idx));
4337 for (
size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
4338 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
4340 wsum_mf_stencils[i].
data;
4341 num_src_per_tgt[i] = curr_stencil_size;
4342 for (
size_t j = 0; j < curr_stencil_size; ++j, ++k){
4343 weights[k] = curr_weights[j].
weight;
4350 interp_add_w_sum_mf(
4351 remote_src_points, halo_size, num_src_fields, wsum_mf_count,
4352 wsum_mf_stencils, num_src_per_tgt, weights, src_idx,
src_field_idx,
4353 comm, reorder, interp, redist_config);
4355 for (
size_t i = 0; i < new_wsum_mf_stencils_data->
count; ++i)
4357 free(new_wsum_mf_stencils_data->
data);
4358 free(new_wsum_mf_stencils_data);
4360 free(remote_src_points);
4364 free(num_src_per_tgt);
4370 void (*interp_add_w_sum_mf)(
4373 size_t *, MPI_Comm,
void *,
4374 Xt_config), Xt_config redist_config) {
4383 size_t wsum_mf_count = new_wsum_mf_stencils_data->
count;
4385 new_wsum_mf_stencils_data->
data;
4388 size_t total_num_links = 0;
4389 for (
size_t i = 0; i < wsum_mf_count; ++i)
4390 total_num_links += wsum_mf_stencils[i].
count;
4394 xmalloc(total_num_links *
sizeof(*src_points));
4395 size_t num_src_fields = 0;
4396 for (
size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
4397 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
4399 wsum_mf_stencils[i].
data;
4400 for (
size_t j = 0; j < curr_stencil_size; ++j, ++k) {
4402 if (curr_src_field_idx >= num_src_fields)
4403 num_src_fields = curr_src_field_idx + 1;
4404 src_points[k].
data = curr_weights[j].
src;
4405 src_points[k].
field_idx = curr_src_field_idx;
4411 MPI_IN_PLACE, &num_src_fields, 1,
YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
4415 qsort(src_points, total_num_links,
sizeof(*src_points),
4422 size_t prev_field_idx;
4423 size_t num_src_points = 0;
4424 size_t src_field_buffer_size[num_src_fields];
4426 src_field_buffer_size, 0,
4427 num_src_fields *
sizeof(src_field_buffer_size[0]));
4429 if (total_num_links > 0) {
4430 prev_src_point = &(src_points[0].
data);
4431 prev_field_idx = src_points[0].
field_idx;
4432 src_field_buffer_size[src_points[0].
field_idx] = 1;
4435 for (
size_t i = 0; i < total_num_links; ++i) {
4437 size_t curr_field_idx = src_points[i].
field_idx;
4439 (prev_field_idx != curr_field_idx)) {
4440 prev_src_point = curr_src_point;
4441 prev_field_idx = curr_field_idx;
4442 if (num_src_points != i) {
4443 src_points[num_src_points].
data = *curr_src_point;
4444 src_points[num_src_points].
field_idx = curr_field_idx;
4446 src_field_buffer_size[curr_field_idx]++;
4451 size_t curr_stencil_size = curr_stencil->
count;
4452 for (
size_t j = 0; j < curr_stencil_size; ++j) {
4454 &(curr_stencil->
data[j].
src), curr_src_point)) &&
4458 src_field_buffer_size[curr_field_idx] - 1;
4464 qsort(wsum_mf_stencils, wsum_mf_count,
sizeof(*wsum_mf_stencils),
4467 size_t * num_src_per_tgt =
xmalloc(wsum_mf_count *
sizeof(*num_src_per_tgt));
4468 double * weights =
xmalloc(total_num_links *
sizeof(*weights));
4469 size_t * src_idx =
xmalloc(total_num_links *
sizeof(*src_idx));
4470 size_t * src_field_idx =
xmalloc(total_num_links *
sizeof(*src_field_idx));
4473 for (
size_t i = 0, k = 0; i < wsum_mf_count; ++i) {
4474 size_t curr_stencil_size = wsum_mf_stencils[i].
count;
4476 wsum_mf_stencils[i].
data;
4477 num_src_per_tgt[i] = curr_stencil_size;
4478 for (
size_t j = 0; j < curr_stencil_size; ++j, ++k){
4479 weights[k] = curr_weights[j].
weight;
4486 interp_add_w_sum_mf(
4487 src_points, num_src_points, num_src_fields, wsum_mf_count,
4488 wsum_mf_stencils, num_src_per_tgt, weights, src_idx,
src_field_idx,
4489 comm, interp, redist_config);
4491 for (
size_t i = 0; i < new_wsum_mf_stencils_data->
count; ++i)
4493 free(new_wsum_mf_stencils_data->
data);
4494 free(new_wsum_mf_stencils_data);
4500 free(num_src_per_tgt);
4510 char const * yaxt_exchanger_name, MPI_Comm comm) {
4512 Xt_config redist_config = xt_config_new();
4515 char * env_exchanger_name = NULL;
4516 if (yaxt_exchanger_name == NULL) {
4523 size_t exchanger_name_len = 0;
4528 exchanger_name_len =
4529 ((env_exchanger_name != NULL) && (env_exchanger_name[0] !=
'\0'))?
4530 strlen(env_exchanger_name):0;
4536 MPI_Bcast(&exchanger_name_len, 1,
YAC_MPI_SIZE_T, 0, comm), comm);
4538 if (exchanger_name_len > 0) {
4541 env_exchanger_name = strdup(env_exchanger_name);
4543 env_exchanger_name =
4544 xmalloc((exchanger_name_len + 1) *
sizeof(*env_exchanger_name));
4549 env_exchanger_name, (
int)(exchanger_name_len + 1), MPI_CHAR, 0, comm),
4552 yaxt_exchanger_name = env_exchanger_name;
4556 if (yaxt_exchanger_name != NULL) {
4559 int exchanger_id = xt_exchanger_id_by_name(yaxt_exchanger_name);
4562 "ERROR(get_redist_config): invalid yaxt exchanger name \"%s\"",
4563 yaxt_exchanger_name);
4564 xt_config_set_exchange_method(redist_config, exchanger_id);
4567 free(env_exchanger_name);
4569 return redist_config;
4576 double scaling_factor,
double scaling_summand,
4577 char const * yaxt_exchanger_name) {
4582 scaling_factor, scaling_summand);
4584 MPI_Comm comm = weights->
comm;
4596 memset(&(local_stencil_counts[0]), 0,
sizeof(local_stencil_counts));
4598 local_stencil_counts[(
int)(weights->
stencils[i].
type)]++;
4601 stencils_offsets[i] = accu;
4602 accu += local_stencil_counts[i];
4610 local_stencil_counts, global_stencil_counts,
4617 MPI_IN_PLACE, &max_collection_size, 1,
YAC_MPI_SIZE_T, MPI_MAX, comm),
4621 "ERROR(yac_interp_weights_get_interpolation): "
4622 "mismatching collection sizes")
4625 if (global_stencil_counts[
FIXED] > 0)
4627 weights->
comm, local_stencil_counts[
FIXED],
4631 if (global_stencil_counts[
DIRECT] > 0)
4637 if (global_stencil_counts[
SUM] > 0) {
4642 (
size_t)(local_stencil_counts[
SUM]),
SUM);
4644 weights->
comm, wsum_stencils, interp, reorder,
4646 for (
size_t i = 0; i < wsum_stencils->
count; ++i)
4648 free(wsum_stencils->
data);
4649 free(wsum_stencils);
4659 weights->
comm, wsum_stencils, interp, reorder,
4661 for (
size_t i = 0; i < wsum_stencils->
count; ++i)
4663 free(wsum_stencils->
data);
4664 free(wsum_stencils);
4667 if (global_stencil_counts[
DIRECT_MF] > 0)
4673 if (global_stencil_counts[
SUM_MF] > 0) {
4680 weights->
comm, sum_mf_stencils, interp, reorder,
4682 for (
size_t i = 0; i < sum_mf_stencils->
count; ++i)
4684 free(sum_mf_stencils->
data);
4685 free(sum_mf_stencils);
4695 weights->
comm, wsum_mf_stencils, interp, reorder,
4697 for (
size_t i = 0; i < wsum_mf_stencils->
count; ++i)
4699 free(wsum_mf_stencils->
data);
4700 free(wsum_mf_stencils);
4703 xt_config_delete(redist_config);
4709 struct yac_src_field_exchange_data * src_field_exchange_data,
4710 size_t num_src_fields, MPI_Comm comm,
4711 Xt_config redist_config) {
4716 size_t nsends[num_src_fields], nrecvs[num_src_fields];
4717 int max_recv_buffer_size = 0;
4718 for (
size_t i = 0; i < num_src_fields; ++i) {
4719 nsends[i] = src_field_exchange_data[i].send.num_msg;
4720 nrecvs[i] = src_field_exchange_data[i].recv.num_msg;
4721 for (
size_t j = 0; j < src_field_exchange_data[i].recv.num_msg; ++j) {
4722 if (max_recv_buffer_size < src_field_exchange_data[i].recv.msg[j].count)
4723 max_recv_buffer_size = src_field_exchange_data[i].recv.msg[j].count;
4727 size_t nsend = 0, nrecv = 0;
4728 size_t send_offsets[num_src_fields];
4729 size_t recv_offsets[num_src_fields];
4730 for (
size_t i = 0; i < num_src_fields; ++i) {
4731 send_offsets[i] = nsend;
4732 recv_offsets[i] = nrecv;
4737 size_t total_num_msg = nsend + nrecv;
4739 struct Xt_redist_msg * msgs_buffer =
4740 xmalloc(total_num_msg *
sizeof(*msgs_buffer));
4741 struct Xt_redist_msg * send_msgs = msgs_buffer;
4742 struct Xt_redist_msg * recv_msgs = msgs_buffer + nsend;
4745 xmalloc((
size_t)max_recv_buffer_size *
sizeof(*pos_buffer));
4748 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
4751 for (
size_t send_idx = 0;
4752 send_idx < src_field_exchange_data[src_field_idx].send.num_msg;
4754 send_msgs[send_offsets[src_field_idx]].rank =
4755 src_field_exchange_data[src_field_idx].send.msg[send_idx].rank;
4756 send_msgs[send_offsets[src_field_idx]].datatype =
4757 xt_mpi_generate_datatype(
4758 src_field_exchange_data[src_field_idx].send.msg[send_idx].pos,
4759 src_field_exchange_data[src_field_idx].send.msg[send_idx].count,
4761 send_offsets[src_field_idx]++;
4764 for (
size_t recv_idx = 0;
4765 recv_idx < src_field_exchange_data[src_field_idx].recv.num_msg;
4769 i < src_field_exchange_data[src_field_idx].recv.msg[recv_idx].count;
4772 src_field_exchange_data[src_field_idx].recv.msg[recv_idx].pos[i];
4774 recv_msgs[recv_offsets[src_field_idx]].rank =
4775 src_field_exchange_data[src_field_idx].recv.msg[recv_idx].rank;
4776 recv_msgs[recv_offsets[src_field_idx]].datatype =
4777 xt_mpi_generate_datatype(pos_buffer, count, MPI_DOUBLE, comm);
4778 recv_offsets[src_field_idx]++;
4784 Xt_redist * redists;
4785 MPI_Comm split_comm;
4789 if (total_num_msg > 0) {
4793 yac_mpi_call(MPI_Comm_split(comm, 1, 0, &split_comm), comm);
4796 xmalloc(2 * total_num_msg *
sizeof(*rank_buffer));
4797 int * orig_ranks = rank_buffer;
4798 int * split_ranks = rank_buffer + total_num_msg;
4800 for (
size_t i = 0; i < total_num_msg; ++i)
4801 orig_ranks[i] = msgs_buffer[i].rank;
4803 MPI_Group orig_group, split_group;
4805 yac_mpi_call(MPI_Comm_group(split_comm, &split_group), comm);
4810 MPI_Group_translate_ranks(orig_group, total_num_msg, orig_ranks,
4811 split_group, split_ranks), split_comm);
4813 for (
size_t i = 0; i < total_num_msg; ++i)
4814 msgs_buffer[i].rank = split_ranks[i];
4822 redists =
xmalloc(num_src_fields *
sizeof(*redists));
4823 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields;
4825 redists[src_field_idx] =
4826 xt_redist_single_array_base_custom_new(
4827 nsends[src_field_idx], nrecvs[src_field_idx],
4828 send_msgs, recv_msgs, split_comm, redist_config);
4829 send_msgs += nsends[src_field_idx];
4830 recv_msgs += nrecvs[src_field_idx];
4834 yac_mpi_call(MPI_Comm_split(comm, 0, 0, &split_comm), comm);
4845 double frac_mask_fallback_value,
double scaling_factor,
4846 double scaling_summand,
4861 interp_weights_data->
weights = NULL;
4863 interp_weights_data->
src_idx = NULL;
4869 struct yac_src_field_exchange_data * src_field_exchange_data,
4870 size_t num_src_fields) {
4872 for (
size_t i = 0; i < num_src_fields; ++i) {
4873 for (
size_t j = 0; j < src_field_exchange_data[i].send.num_msg; ++j)
4874 free(src_field_exchange_data[i].send.msg[j].pos);
4875 free(src_field_exchange_data[i].send.msg);
4876 for (
size_t j = 0; j < src_field_exchange_data[i].recv.num_msg; ++j)
4877 free(src_field_exchange_data[i].recv.msg[j].pos);
4878 free(src_field_exchange_data[i].recv.msg);
4881 free(src_field_exchange_data);
4886 size_t num_src_fields,
size_t tgt_count,
4888 double * weights,
size_t * src_idx,
size_t * src_field_idx, MPI_Comm comm,
4900 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
4902 num_src_fields, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4903 size_t * size_t_buffer =
4904 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
4905 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
4906 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
4907 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
4908 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
4910 for (
size_t i = 0; i < num_src_points; ++i)
4911 sendcounts[remote_src_points[i].data.rank * num_src_fields +
4915 num_src_fields, sendcounts, recvcounts, sdispls, rdispls, comm);
4917 size_t total_sendcount = 0, total_recvcount = 0;
4918 for (
int i = 0; i < comm_size; ++i) {
4919 total_sdispls[i] = total_sendcount;
4920 total_rdispls[i] = total_recvcount;
4921 total_sendcounts[i] = 0;
4922 total_recvcounts[i] = 0;
4923 for (
size_t j = 0; j < num_src_fields; ++j) {
4924 total_sendcounts[i] += sendcounts[num_src_fields * i + j];
4925 total_recvcounts[i] += recvcounts[num_src_fields * i + j];
4927 total_sendcount += total_sendcounts[i];
4928 total_recvcount += total_recvcounts[i];
4931 size_t recv_count = total_recvcounts[comm_size - 1] +
4932 total_rdispls[comm_size - 1];
4934 size_t * exchange_buffer =
4935 xmalloc((2 * num_src_points + recv_count) *
sizeof(*exchange_buffer));
4936 size_t * send_buffer = exchange_buffer;
4937 size_t * reorder_idx = exchange_buffer + num_src_points;
4938 size_t * recv_buffer = exchange_buffer + 2 * num_src_points;
4941 size_t src_field_buffer_size[num_src_fields];
4943 src_field_buffer_size, 0,
4944 num_src_fields *
sizeof(src_field_buffer_size[0]));
4945 for (
size_t i = 0; i < num_src_points; ++i) {
4946 size_t curr_src_field_idx = (size_t)(remote_src_points[i].field_idx);
4947 size_t pos = sdispls[(size_t)(remote_src_points[i].data.rank) * num_src_fields +
4948 curr_src_field_idx + 1]++;
4949 send_buffer[pos] = (size_t)remote_src_points[i].data.orig_pos;
4950 reorder_idx[pos] = src_field_buffer_size[curr_src_field_idx]++;
4954 yac_alltoallv_size_t_p2p(
4955 send_buffer, total_sendcounts, total_sdispls,
4956 recv_buffer, total_recvcounts, total_rdispls, comm,
4957 "interpolation_raw_add_w_sum_mf", __LINE__);
4959 free(size_t_buffer);
4963 recvcounts, recv_buffer, sendcounts, reorder_idx,
4967 free(exchange_buffer);
4969 size_t * tgt_idx =
xmalloc(tgt_count *
sizeof(*tgt_idx));
4971 for (
size_t i = 0; i < tgt_count; ++i) {
4973 tgt_stencils[i].tgt.
data.count == 1,
4974 "ERROR(interpolation_raw_add_w_sum_mf): currently unsupported "
4975 "target point distribution")
4976 tgt_idx[i] = (size_t)(tgt_stencils[i].tgt.
data.data.single.orig_pos);
4981 tgt_idx, num_src_per_tgt, weights, src_field_idx, src_idx,
4982 src_field_buffer_size);
4989 size_t num_src_fields,
size_t tgt_count,
4991 double * weights,
size_t * src_idx,
size_t * src_field_idx, MPI_Comm comm,
4992 void * interp_raw, Xt_config redist_config) {
4995 src_points, num_src_points, num_src_fields, tgt_count, tgt_stencils,
4996 num_src_per_tgt, weights, src_idx, src_field_idx, comm,
5002 size_t num_src_fields,
size_t tgt_count,
5004 double * weights,
size_t * src_idx,
size_t * src_field_idx, MPI_Comm comm,
5005 void * interp_raw, Xt_config redist_config) {
5010 src_points, num_src_points, num_src_fields, tgt_count, tgt_stencils,
5011 num_src_per_tgt, NULL, src_idx, src_field_idx, comm,
5017 size_t num_src_fields,
size_t tgt_count,
5019 double * weights,
size_t * src_idx,
size_t * src_field_idx, MPI_Comm comm,
5020 void * interp_raw, Xt_config redist_config) {
5025 src_points, num_src_points, num_src_fields, tgt_count, tgt_stencils,
5026 num_src_per_tgt, weights, src_idx, NULL, comm,
5032 size_t num_src_fields,
size_t tgt_count,
5034 double * weights,
size_t * src_idx,
size_t * src_field_idx, MPI_Comm comm,
5035 void * interp_raw, Xt_config redist_config) {
5041 src_points, num_src_points, num_src_fields, tgt_count, tgt_stencils,
5042 num_src_per_tgt, NULL, src_idx, NULL, comm,
5048 size_t collection_size,
double frac_mask_fallback_value,
5049 double scaling_factor,
double scaling_summand,
5050 char const * yaxt_exchanger_name,
5058 frac_mask_fallback_value, scaling_factor, scaling_summand,
5061 MPI_Comm comm = weights->
comm;
5073 memset(&(local_stencil_counts[0]), 0,
sizeof(local_stencil_counts));
5075 local_stencil_counts[(
int)(weights->
stencils[i].
type)]++;
5078 stencils_offsets[i] = accu;
5079 accu += local_stencil_counts[i];
5087 local_stencil_counts, global_stencil_counts,
5091 size_t max_collection_size = collection_size;
5094 MPI_IN_PLACE, &max_collection_size, 1,
YAC_MPI_SIZE_T, MPI_MAX, comm),
5097 (
size_t)max_collection_size == collection_size,
5098 "ERROR(yac_interp_weights_get_interpolation_raw): "
5099 "mismatching collection sizes")
5102 if (global_stencil_counts[
FIXED] > 0)
5104 weights->
comm, local_stencil_counts[
FIXED],
5108 if (global_stencil_counts[
DIRECT] > 0)
5115 if (global_stencil_counts[
SUM] > 0) {
5120 (
size_t)(local_stencil_counts[
SUM]),
SUM);
5122 weights->
comm, wsum_stencils, (
void*)&interpolation_raw,
5124 for (
size_t i = 0; i < wsum_stencils->
count; ++i)
5126 free(wsum_stencils->
data);
5127 free(wsum_stencils);
5137 weights->
comm, wsum_stencils, (
void*)&interpolation_raw,
5139 for (
size_t i = 0; i < wsum_stencils->
count; ++i)
5141 free(wsum_stencils->
data);
5142 free(wsum_stencils);
5145 if (global_stencil_counts[
DIRECT_MF] > 0)
5151 if (global_stencil_counts[
SUM_MF] > 0) {
5158 weights->
comm, sum_mf_stencils, (
void*)&interpolation_raw,
5160 for (
size_t i = 0; i < sum_mf_stencils->
count; ++i)
5162 free(sum_mf_stencils->
data);
5163 free(sum_mf_stencils);
5173 weights->
comm, wsum_mf_stencils, (
void*)&interpolation_raw,
5175 for (
size_t i = 0; i < wsum_mf_stencils->
count; ++i)
5177 free(wsum_mf_stencils->
data);
5178 free(wsum_mf_stencils);
5181 Xt_redist * redists =
5187 *interpolation_exchange =
5190 collection_size, with_frac_mask,
5191 "yac_interp_weights_get_interpolation_raw");
5194 if (redists != NULL) {
5197 xt_redist_delete(redists[i]);
5205 xt_config_delete(redist_config);
5211 double scaling_factor,
double scaling_summand,
5212 char const * yaxt_exchanger_name) {
5217 "ERROR(yac_interp_weights_get_interpolation_f2c): "
5218 "reorder type must be of YAC_MAPPING_ON_SRC/YAC_MAPPING_ON_TGT");
5224 scaling_factor, scaling_summand,
5225 ((yaxt_exchanger_name != NULL) && (yaxt_exchanger_name[0] !=
'\0'))?
5226 yaxt_exchanger_name:NULL);
5238 for (
size_t i = 0 ; i < count; ++i) {
5248 "ERROR(yac_interp_weights_delete): invalid stencil type")
5249 switch(stencils[i].
type) {
5282#ifdef YAC_NETCDF_ENABLED
5285 return (*(
double const *)a > *(
double const *)b) -
5286 (*(
double const *)a < *(
double const *)b);
5293 char const * filename,
char const * src_grid_name,
char const * tgt_grid_name,
5294 size_t num_fixed_values,
double * fixed_values,
5295 size_t * num_tgt_per_fixed_value,
size_t num_links,
5296 size_t num_weights_per_link,
size_t num_src_fields,
5297 size_t * num_links_per_src_field,
5299 size_t src_grid_size,
size_t tgt_grid_size) {
5304 yac_nc_create(filename, NC_CLOBBER | NC_64BIT_OFFSET, &ncid);
5306 int dim_weight_id[8];
5309 if (num_links > 0) {
5310 YAC_HANDLE_ERROR(nc_def_dim(ncid,
"num_links", num_links, &dim_weight_id[0]));
5312 num_weights_per_link > 0,
5313 "ERROR(create_weight_file): number of links is %zu but number of "
5314 "weights per link is zero for weight file %s", num_links, filename)
5316 nc_def_dim(ncid,
"num_wgts", num_weights_per_link, &dim_weight_id[1]));
5320 "ERROR(create_weight_file): number of source fields is zero for "
5321 "weight file %s", filename)
5323 nc_def_dim(ncid,
"num_src_fields", num_src_fields, &dim_weight_id[2]));
5328 if (num_fixed_values > 0) {
5331 ncid,
"num_fixed_values", num_fixed_values, &dim_weight_id[4]));
5332 size_t num_fixed_dst = 0;
5333 for (
size_t i = 0; i < num_fixed_values; ++i)
5334 num_fixed_dst += num_tgt_per_fixed_value[i];
5337 "ERROR(create_weight_file): number of fixed values is %zu but number "
5338 "of fixed destination points is zero for weight file %s",
5339 num_fixed_dst, filename)
5341 nc_def_dim(ncid,
"num_fixed_dst", num_fixed_dst, &dim_weight_id[5]));
5344 if (src_grid_size > 0)
5346 nc_def_dim(ncid,
"src_grid_size", src_grid_size, &dim_weight_id[6]));
5348 if (tgt_grid_size > 0)
5350 nc_def_dim(ncid,
"dst_grid_size", tgt_grid_size, &dim_weight_id[7]));
5352 int var_src_add_id, var_dst_add_id, var_weight_id, var_num_links_id,
5353 src_var_locs_id, tgt_var_loc_id, var_fixed_values_id,
5354 var_num_dst_per_fixed_value_id, var_dst_add_fixed_id;
5357 if (num_links > 0) {
5360 ncid,
"src_address", NC_INT, 1, dim_weight_id, &var_src_add_id));
5363 ncid,
"dst_address", NC_INT, 1, dim_weight_id, &var_dst_add_id));
5366 ncid,
"remap_matrix", NC_DOUBLE, 2, dim_weight_id, &var_weight_id));
5368 nc_def_var(ncid,
"num_links_per_src_field", NC_INT, 1,
5369 &dim_weight_id[2], &var_num_links_id));
5373 ncid,
"src_locations", NC_CHAR, 2, &dim_weight_id[2], &src_var_locs_id));
5376 ncid,
"dst_location", NC_CHAR, 1, &dim_weight_id[3], &tgt_var_loc_id));
5377 if (num_fixed_values > 0) {
5379 nc_def_var(ncid,
"fixed_values", NC_DOUBLE, 1, &dim_weight_id[4],
5380 &var_fixed_values_id));
5382 nc_def_var(ncid,
"num_dst_per_fixed_value", NC_INT, 1, &dim_weight_id[4],
5383 &var_num_dst_per_fixed_value_id));
5385 nc_def_var(ncid,
"dst_address_fixed", NC_INT, 1, &dim_weight_id[5],
5386 &var_dst_add_fixed_id));
5391 nc_put_att_text(ncid, NC_GLOBAL,
"version",
5395 nc_put_att_text(ncid, NC_GLOBAL,
"src_grid_name",
5396 strlen(src_grid_name), src_grid_name));
5398 nc_put_att_text(ncid, NC_GLOBAL,
"dst_grid_name",
5399 strlen(tgt_grid_name), tgt_grid_name));
5401 char const * str_logical[2] = {
"FALSE",
"TRUE"};
5403 strlen(str_logical[num_links > 0]),
5404 str_logical[num_links > 0]));
5406 strlen(str_logical[num_fixed_values > 0]),
5407 str_logical[num_fixed_values > 0]));
5415 if (num_links > 0) {
5416 int * num_links_per_src_field_int =
5417 xmalloc(num_src_fields *
sizeof(*num_links_per_src_field_int));
5418 for (
size_t i = 0; i < num_src_fields; ++i) {
5420 num_links_per_src_field[i] <= INT_MAX,
5421 "ERROR(create_weight_file): "
5422 "number of links per source field too big (not yet supported)")
5423 num_links_per_src_field_int[i] = (int)num_links_per_src_field[i];
5426 nc_put_var_int(ncid, var_num_links_id, num_links_per_src_field_int));
5427 free(num_links_per_src_field_int);
5430 for (
size_t i = 0; i < num_src_fields; ++i) {
5431 char const * loc_str =
yac_loc2str(src_locations[i]);
5432 size_t str_start[2] = {i, 0};
5433 size_t str_count[2] = {1, strlen(loc_str)};
5435 nc_put_vara_text(ncid, src_var_locs_id, str_start, str_count, loc_str));
5440 size_t str_start[1] = {0};
5441 size_t str_count[1] = {strlen(loc_str)};
5443 nc_put_vara_text(ncid, tgt_var_loc_id, str_start, str_count, loc_str));
5445 if (num_fixed_values > 0) {
5447 int * num_tgt_per_fixed_value_int =
5448 xmalloc(num_fixed_values *
sizeof(*num_tgt_per_fixed_value_int));
5449 for (
unsigned i = 0; i < num_fixed_values; ++i) {
5451 num_tgt_per_fixed_value[i] <= INT_MAX,
5452 "ERROR(create_weight_file): "
5453 "number of targets per fixed value is too big (not yet supported)")
5454 num_tgt_per_fixed_value_int[i] = (int)num_tgt_per_fixed_value[i];
5456 YAC_HANDLE_ERROR(nc_put_var_double(ncid, var_fixed_values_id, fixed_values));
5458 num_tgt_per_fixed_value_int));
5459 free(num_tgt_per_fixed_value_int);
5470 int ret = b_is_fixed - a_is_fixed;
5472 if (ret)
return ret;
5477 double fixed_value_a =
5479 double fixed_value_b =
5481 ret = (fixed_value_a > fixed_value_b) -
5482 (fixed_value_a < fixed_value_b);
5483 if (ret)
return ret;
5494 yac_int * min_tgt_global_id,
yac_int * max_tgt_global_id, MPI_Comm comm) {
5496 yac_int min_max[2] = {XT_INT_MAX, XT_INT_MIN};
5498 for (
size_t i = 0; i < stencils_size; ++i) {
5501 if (curr_id < min_max[0]) min_max[0] = curr_id;
5502 if (curr_id > min_max[1]) min_max[1] = curr_id;
5505 min_max[0] = XT_INT_MAX - min_max[0];
5509 MPI_IN_PLACE, min_max, 2,
yac_int_dt, MPI_MAX, comm), comm);
5511 *min_tgt_global_id = XT_INT_MAX - min_max[0];
5512 *max_tgt_global_id = min_max[1];
5518 int num_io_procs_int,
int * io_owner) {
5520 long long num_io_procs = (
long long)num_io_procs_int;
5521 long long id_range =
5522 MAX((
long long)(max_tgt_global_id - min_tgt_global_id),1);
5524 for (
size_t i = 0; i < stencils_size; ++i)
5526 ((
int)(
MIN(((
long long)(stencils[i].
tgt.
global_id - min_tgt_global_id) *
5527 num_io_procs) / id_range, num_io_procs - 1)));
5532 double ** fixed_values,
size_t * num_fixed_values, MPI_Comm comm) {
5537 double * local_fixed_values =
5538 xmalloc(stencil_count *
sizeof(*local_fixed_values));
5540 int * int_buffer =
xmalloc(2 * (
size_t)comm_size *
sizeof(*int_buffer));
5541 int * recvcounts = int_buffer + 0 * comm_size;
5542 int * rdispls = int_buffer + 1 * comm_size;
5544 size_t local_num_fixed = 0;
5547 for (
size_t i = 0; i < stencil_count;
5548 ++i, ++local_num_fixed) {
5552 qsort(local_fixed_values, local_num_fixed,
sizeof(*local_fixed_values),
5557 int local_num_fixed_int = (int)(local_num_fixed);
5560 &local_num_fixed_int, 1, MPI_INT, recvcounts, 1,MPI_INT, comm), comm);
5561 for (
int i = 0, accu = 0; i < comm_size; ++i) {
5563 accu += recvcounts[i];
5566 size_t num_all_fixed_values = 0;
5567 for (
int i = 0; i < comm_size; ++i)
5568 num_all_fixed_values += (
size_t)(recvcounts[i]);
5570 double * all_fixed_values =
5571 xmalloc(num_all_fixed_values *
sizeof(*all_fixed_values));
5576 local_fixed_values, local_num_fixed_int, MPI_DOUBLE,
5577 all_fixed_values, recvcounts, rdispls, MPI_DOUBLE, comm), comm);
5579 free(local_fixed_values);
5581 qsort(all_fixed_values, num_all_fixed_values,
sizeof(*all_fixed_values),
5584 *fixed_values =
xrealloc(all_fixed_values,
5585 num_all_fixed_values *
sizeof(*all_fixed_values));
5586 *num_fixed_values = num_all_fixed_values;
5599 "ERROR(get_num_weights_per_link): invalid stencil type")
5608 size_t num_weights_per_link = 0;
5609 for (
size_t i = 0; i < stencil_count; ++i)
5610 num_weights_per_link =
5613 size_t num_weights_per_link_64_t = num_weights_per_link;
5617 MPI_MAX, comm), comm);
5618 num_weights_per_link = (size_t)num_weights_per_link_64_t;
5620 return num_weights_per_link;
5628 "ERROR(get_num_links_per_src_field): "
5629 "stencil type FIXED not supported by this routine")
5637 "ERROR(get_num_links_per_src_field): invalid stencil type")
5638 switch (stencil->
type) {
5640 case(
DIRECT):
return (src_field_idx == 0)?1:0;
5649 for (
size_t i = 0; i < stencil_size; ++i)
5657 for (
size_t i = 0; i < stencil_size; ++i)
5666 size_t num_fixed_values,
double * fixed_values,
5667 size_t * num_tgt_per_fixed_value,
5668 size_t * num_fixed_tgt,
size_t num_src_fields,
5669 size_t * num_links_per_src_field,
size_t * num_links) {
5673 for (
size_t i = 0; i < num_fixed_values; ++i) num_tgt_per_fixed_value[i] = 0;
5674 for (
size_t i = 0; i < num_src_fields; ++i) num_links_per_src_field[i] = 0;
5676 for (
size_t i = 0; i < stencil_count; ++i) {
5679 for (
size_t j = 0; j < num_fixed_values; ++j) {
5680 if (curr_fixed_value == fixed_values[j]) {
5681 num_tgt_per_fixed_value[j]++;
5687 for (
size_t j = 0; j < num_src_fields; ++j) {
5688 num_links_per_src_field[j] +=
5693 for (
size_t i = 0; i < num_src_fields; ++i)
5694 *num_links += num_links_per_src_field[i];
5698 size_t num_fixed_values,
size_t * num_tgt_per_fixed_value,
5699 size_t num_src_fields,
size_t * num_links_per_src_field,
5700 size_t * fixed_offsets,
size_t * link_offsets, MPI_Comm comm) {
5705 size_t count = num_fixed_values + num_src_fields;
5706 size_t * size_t_buffer =
xmalloc(3 * count *
sizeof(*size_t_buffer));
5707 size_t * global_counts = size_t_buffer + 0 * count;
5708 size_t * local_counts = size_t_buffer + 1 * count;
5709 size_t * offsets = size_t_buffer + 2 * count;
5711 for (
size_t i = 0; i < num_fixed_values; ++i)
5712 local_counts[i] = num_tgt_per_fixed_value[i];
5713 for (
size_t i = 0; i < num_src_fields; ++i)
5714 local_counts[num_fixed_values + i] = num_links_per_src_field[i];
5717 MPI_Allreduce(local_counts, global_counts, (
int)count,
YAC_MPI_SIZE_T,
5718 MPI_SUM, comm), comm);
5720 MPI_Exscan(local_counts, offsets, (
int)count,
YAC_MPI_SIZE_T, MPI_SUM, comm),
5722 if (comm_rank == 0) memset(offsets, 0, count *
sizeof(*offsets));
5724 for (
size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
5725 fixed_offsets[i] = (size_t)(offsets[i]) + accu;
5726 accu += (size_t)(global_counts[i]);
5728 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
5729 link_offsets[i] = (size_t)(offsets[i+num_fixed_values]) + accu;
5730 accu += (size_t)(global_counts[i+num_fixed_values]);
5732 free(size_t_buffer);
5738 (global_id < INT_MAX) && (global_id != XT_INT_MAX),
5739 "ERROR(global_id_to_address): "
5740 "a global id cannot be converted into a address; too big")
5741 return (
int)global_id + 1;
5746 int * tgt_address) {
5748 for (
size_t i = 0; i < stencil_count; ++i)
5754 size_t * num_links_per_src_field,
size_t num_src_fields,
5755 int * src_address,
int * tgt_address,
double * weight) {
5757 size_t * src_field_offsets =
5758 xmalloc(2 * num_src_fields *
sizeof(*src_field_offsets));
5759 size_t * prev_src_field_offsets = src_field_offsets + num_src_fields;
5760 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
5761 src_field_offsets[i] = accu;
5762 accu += num_links_per_src_field[i];
5766 for (
size_t i = 0; i < stencil_count; ++i, ++curr_stencil) {
5768 memcpy(prev_src_field_offsets, src_field_offsets,
5769 num_src_fields *
sizeof(*prev_src_field_offsets));
5774 "ERROR(stencil_get_link_data): this call is invalid for FIXED stencils")
5777 (curr_stencil->
type ==
SUM) ||
5782 "ERROR(stencil_get_link_data): invalid stencil type")
5783 size_t src_field_offset;
5784 switch (curr_stencil->
type) {
5787 src_field_offset = src_field_offsets[0]++;
5788 src_address[src_field_offset] =
5790 tgt_address[src_field_offset] = curr_tgt_address;
5791 weight[src_field_offset] = 1.0;
5796 for (
size_t k = 0; k < curr_count; ++k) {
5797 src_field_offset = src_field_offsets[0]++;
5798 src_address[src_field_offset] =
5800 tgt_address[src_field_offset] = curr_tgt_address;
5801 weight[src_field_offset] = 1.0;
5809 for (
size_t k = 0; k < curr_count; ++k) {
5810 src_field_offset = src_field_offsets[0]++;
5811 src_address[src_field_offset] =
5813 tgt_address[src_field_offset] = curr_tgt_address;
5814 weight[src_field_offset] = weights[k];
5821 src_address[src_field_offset ] =
5823 tgt_address[src_field_offset ] = curr_tgt_address;
5824 weight[src_field_offset ] = 1.0;
5831 for (
size_t k = 0; k < curr_count; ++k) {
5832 src_field_offset = src_field_offsets[field_indices[k]]++;
5833 src_address[src_field_offset] =
5835 tgt_address[src_field_offset] = curr_tgt_address;
5836 weight[src_field_offset] = 1.0;
5846 for (
size_t k = 0; k < curr_count; ++k) {
5847 src_field_offset = src_field_offsets[field_indices[k]]++;
5848 src_address[src_field_offset] =
5850 tgt_address[src_field_offset] = curr_tgt_address;
5851 weight[src_field_offset] = weights[k];
5857 for (
size_t j = 0; j < num_src_fields; ++j)
5859 src_address + prev_src_field_offsets[j],
5860 src_field_offsets[j] - prev_src_field_offsets[j],
5861 weight + prev_src_field_offsets[j]);
5863 free(src_field_offsets);
5868 int * owner_ranks,
size_t * new_count,
5871 int comm_rank, comm_size;
5875 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
5877 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
5879 size_t * stencil_indices =
xmalloc(count *
sizeof(*stencil_indices));
5880 for (
size_t i = 0; i < count; ++i) {
5881 stencil_indices[i] = i;
5882 sendcounts[owner_ranks[i]]++;
5886 1, sendcounts, recvcounts, sdispls, rdispls, comm);
5891 *new_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
5895 free(stencil_indices);
5902 char const * src_grid_name,
char const * tgt_grid_name,
5903 size_t src_grid_size,
size_t tgt_grid_size,
5906#ifndef YAC_NETCDF_ENABLED
5916 "ERROR(yac_interp_weights_write_to_file): "
5917 "YAC is built without the NetCDF support");
5920 MPI_Comm comm = weights->
comm;
5921 int comm_rank, comm_size;
5932 int weight_file_exists =
5935 MPI_Bcast(&weight_file_exists, 1, MPI_INT, io_ranks[0], comm), comm);
5940 if (io_ranks[0] == comm_rank) {
5941 char const msg_fmt[] =
5942 "ERROR(yac_interp_weights_write_to_file): "
5943 "weight file already exists (%s)";
5944 char msg[strlen(msg_fmt) + strlen(filename)];
5945 sprintf(msg, msg_fmt, filename);
5946 yac_abort(comm, msg, __FILE__, __LINE__);
5952 yac_int min_tgt_global_id, max_tgt_global_id;
5955 &min_tgt_global_id, &max_tgt_global_id, comm);
5962 min_tgt_global_id, max_tgt_global_id,
5963 num_io_ranks, io_owner);
5965 io_owner[i] = io_ranks[io_owner[i]];
5968 size_t io_stencil_count = 0;
5974 &io_stencil_count, &io_stencils);
5978 size_t grid_sizes[2] = {src_grid_size, tgt_grid_size};
5981 MPI_IN_PLACE, grid_sizes, 2,
YAC_MPI_SIZE_T, MPI_MAX, comm), comm);
5982 src_grid_size = (size_t)(grid_sizes[0]);
5983 tgt_grid_size = (size_t)(grid_sizes[1]);
5986 yac_mpi_call(MPI_Comm_split(comm, io_flag, comm_rank, &io_comm), comm);
6000 qsort(io_stencils, io_stencil_count,
sizeof(*io_stencils),
6003 yac_mpi_call(MPI_Comm_rank(io_comm, &comm_rank), comm);
6004 yac_mpi_call(MPI_Comm_size(io_comm, &comm_size), comm);
6006 double * fixed_values = NULL;
6007 size_t num_fixed_values = 0;
6009 io_stencils, io_stencil_count, &fixed_values, &num_fixed_values, io_comm);
6010 size_t num_src_fields =
weights->num_src_fields;
6011 size_t num_weights_per_link =
6014 size_t * size_t_buffer =
6015 xmalloc(2 * (num_fixed_values + num_src_fields) *
sizeof(*size_t_buffer));
6016 size_t * num_tgt_per_fixed_value = size_t_buffer;
6017 size_t * num_links_per_src_field = size_t_buffer + num_fixed_values;
6018 size_t * fixed_offsets = size_t_buffer + num_fixed_values + num_src_fields;
6019 size_t * link_offsets = size_t_buffer + 2 * num_fixed_values + num_src_fields;
6021 size_t num_fixed_tgt = 0;
6022 size_t num_links = 0;
6024 io_stencils, io_stencil_count, num_fixed_values, fixed_values,
6025 num_tgt_per_fixed_value, &num_fixed_tgt, num_src_fields,
6026 num_links_per_src_field, &num_links);
6029 num_fixed_values, num_tgt_per_fixed_value,
6030 num_src_fields, num_links_per_src_field,
6031 fixed_offsets, link_offsets, io_comm);
6033 if (comm_rank == comm_size - 1) {
6035 size_t * total_num_tgt_per_fixed_value =
6036 xmalloc(num_fixed_values *
sizeof(*total_num_tgt_per_fixed_value));
6037 for (
size_t i = 0, accu = 0; i < num_fixed_values; ++i) {
6038 total_num_tgt_per_fixed_value[i] =
6039 fixed_offsets[i] + num_tgt_per_fixed_value[i] - accu;
6040 accu += total_num_tgt_per_fixed_value[i];
6042 size_t total_num_links = link_offsets[num_src_fields-1] +
6043 num_links_per_src_field[num_src_fields-1];
6045 size_t * total_num_links_per_src_field =
6046 xmalloc(num_src_fields *
sizeof(*total_num_links_per_src_field));
6047 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
6048 total_num_links_per_src_field[i] =
6049 link_offsets[i] + num_links_per_src_field[i] - accu;
6050 accu += total_num_links_per_src_field[i];
6054 filename, src_grid_name, tgt_grid_name,
6055 num_fixed_values, fixed_values, total_num_tgt_per_fixed_value,
6056 total_num_links, num_weights_per_link,
6057 num_src_fields, total_num_links_per_src_field,
6059 src_grid_size, tgt_grid_size);
6061 free(total_num_links_per_src_field);
6062 free(total_num_tgt_per_fixed_value);
6073 yac_nc_open(filename, NC_WRITE | NC_SHARE, &ncid);
6075 if (num_fixed_tgt > 0) {
6077 int * tgt_address_fixed =
6078 xmalloc(num_fixed_tgt *
sizeof(*tgt_address_fixed));
6082 int var_dst_add_fixed_id;
6086 for (
size_t i = 0, offset = 0; i < num_fixed_values; ++i) {
6088 if (num_tgt_per_fixed_value[i] == 0)
continue;
6090 size_t start[1] = {fixed_offsets[i]};
6091 size_t count[1] = {num_tgt_per_fixed_value[i]};
6094 ncid, var_dst_add_fixed_id, start, count, tgt_address_fixed + offset));
6095 offset += num_tgt_per_fixed_value[i];
6098 free(tgt_address_fixed);
6101 if (num_links > 0) {
6103 int * src_address_link =
xmalloc(num_links *
sizeof(*src_address_link));
6104 int * tgt_address_link =
xmalloc(num_links *
sizeof(*tgt_address_link));
6105 double * w =
xmalloc(num_links * num_weights_per_link *
sizeof(*w));
6107 io_stencils + num_fixed_tgt, io_stencil_count - num_fixed_tgt,
6108 num_links_per_src_field, num_src_fields,
6109 src_address_link, tgt_address_link, w);
6111 int var_src_add_id, var_dst_add_id, var_weight_id;
6116 for (
size_t i = 0, offset = 0; i < num_src_fields; ++i) {
6118 if (num_links_per_src_field[i] == 0)
continue;
6120 size_t start[2] = {link_offsets[i], 0};
6121 size_t count[2] = {num_links_per_src_field[i], num_weights_per_link};
6125 ncid, var_src_add_id, start, count, src_address_link + offset));
6128 ncid, var_dst_add_id, start, count, tgt_address_link + offset));
6131 ncid, var_weight_id, start, count,
6132 w + num_weights_per_link * offset));
6134 offset += num_links_per_src_field[i];
6138 free(tgt_address_link);
6139 free(src_address_link);
6148 free(size_t_buffer);
6156 return weights->stencils_size;
6163 size_t stencils_size =
weights->stencils_size;
6165 yac_int * global_ids =
xmalloc(stencils_size *
sizeof(*global_ids));
6167 for (
size_t i = 0; i < stencils_size; ++i)
6196#define COPY_ARRAY(DATA, COUNT) \
6198 size_t size = COUNT * sizeof(*(interp_weights_data.DATA)); \
6199 interp_weights_data_copy.DATA = xmalloc(size); \
6200 memcpy(interp_weights_data_copy.DATA, interp_weights_data.DATA, size); \
6206 interp_weights_data.frac_mask_fallback_value;
6208 interp_weights_data.scaling_factor;
6210 interp_weights_data.scaling_summand;
6212 size_t total_num_fixed_tgt = 0;
6213 for (
size_t i = 0; i < interp_weights_data.num_fixed_values; ++i)
6214 total_num_fixed_tgt += interp_weights_data.num_tgt_per_fixed_value[i];
6216 interp_weights_data.num_fixed_values;
6221 size_t num_weights = 0;
6222 for (
size_t i = 0; i < interp_weights_data.num_wgt_tgt; ++i)
6223 num_weights += interp_weights_data.num_src_per_tgt[i];
6224 interp_weights_data_copy.
num_wgt_tgt = interp_weights_data.num_wgt_tgt;
6231 interp_weights_data.num_src_fields;
6236 return interp_weights_data_copy;
6248 free(interp_weights_data.
weights);
6250 free(interp_weights_data.
src_idx);
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
#define YAC_WEIGHT_FILE_VERSION_STRING
static Xt_redist * generate_src_field_exchange_redists(struct yac_src_field_exchange_data *src_field_exchange_data, size_t num_src_fields, MPI_Comm comm, Xt_config redist_config)
static void interpolation_raw_add_wsum_mf(struct remote_point_info_reorder *src_points, size_t num_src_points, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, void *interp_raw, Xt_config redist_config)
static void interpolation_add_wsum(struct remote_point_info_reorder *remote_src_points, size_t halo_size, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, enum yac_interp_weights_reorder_type reorder, void *interp, Xt_config redist_config)
static MPI_Datatype get_fixed_stencil_mpi_datatype(MPI_Comm comm)
static size_t get_num_links_per_src_field(struct interp_weight_stencil *stencil, size_t src_field_idx)
static int get_stencil_pack_size_direct_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void interp_add_direct_raw(void *interp, size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
static void interpolation_add_w_sum_mf(struct remote_point_info_reorder *remote_src_points, size_t halo_size, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, enum yac_interp_weights_reorder_type reorder, struct yac_interpolation *interp, void(*interp_add_wsum_mf_at_src)(struct yac_interpolation *, Xt_redist *, size_t, size_t *, double *, size_t *, size_t *, size_t, Xt_redist), void(*interp_add_wsum_mf_at_tgt)(struct yac_interpolation *, Xt_redist *, size_t *, size_t, size_t *, double *, size_t *, size_t *, size_t), Xt_config redist_config)
static void interpolation_raw_add_sum(struct remote_point_info_reorder *src_points, size_t num_src_points, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, void *interp_raw, Xt_config redist_config)
static void unpack_stencil_wsum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static MPI_Datatype get_direct_stencil_mpi_datatype(MPI_Comm comm)
static void stencil_determine_tgt_global_id_range(struct interp_weight_stencil *stencils, size_t stencils_size, yac_int *min_tgt_global_id, yac_int *max_tgt_global_id, MPI_Comm comm)
static void unpack_stencil_sum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_fixed(struct yac_interp_weights *weights, struct remote_points *tgts, double fixed_value)
struct yac_interp_weights_data yac_interp_weights_data_copy(struct yac_interp_weights_data interp_weights_data)
static int compare_stencils_direct_mf(const void *a, const void *b)
void yac_interp_weights_add_sum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, size_t num_src_fields)
static struct remote_points * copy_remote_points_mf(struct remote_point **points, size_t *counts, size_t num_fields)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils_tgt(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data)
static int compare_remote_point_info(const void *a, const void *b)
static void yac_src_field_exchange_data_realloc(struct yac_interpolation_raw *interp_raw, size_t num_src_fields)
static int global_id_to_address(yac_int global_id)
static int get_stencil_pack_size_sum(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_data_init(struct yac_interp_weights_data *interp_weights_data)
#define COPY_ARRAY(DATA, COUNT)
static void pack_stencil_direct(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
yac_interp_weight_stencil_type
@ WEIGHT_STENCIL_TYPE_SIZE
static void interpolation_add_sum_mf(struct remote_point_info_reorder *remote_src_points, size_t halo_size, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, enum yac_interp_weights_reorder_type reorder, void *interp, Xt_config redist_config)
static void pack_stencil_wsum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_src_field_exchange_data_msgs_add(struct yac_src_field_exchange_data_msgs *msgs, int rank, size_t count, size_t *pos, size_t offset)
static void free_remote_points(struct remote_points *points)
static void interpolation_add_wsum_mf(struct remote_point_info_reorder *remote_src_points, size_t halo_size, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, enum yac_interp_weights_reorder_type reorder, void *interp, Xt_config redist_config)
static void interpolation_raw_add_w_sum_mf(struct remote_point_info_reorder *remote_src_points, size_t num_src_points, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, struct yac_interpolation_raw *interp_raw, Xt_config redist_config)
static void yac_interp_weight_stencils_delete(struct interp_weight_stencil *stencils, size_t count)
static void interpolation_raw_add_sum_mf(struct remote_point_info_reorder *src_points, size_t num_src_points, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, void *interp_raw, Xt_config redist_config)
static void interpolation_add_sum(struct remote_point_info_reorder *remote_src_points, size_t halo_size, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, enum yac_interp_weights_reorder_type reorder, void *interp, Xt_config redist_config)
static int compare_stencils_fixed(const void *a, const void *b)
static void interp_add_direct_mf_raw(void *interp, size_t num_src_fields, size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct_mf *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
static struct interp_weight_stencil stencils_merge(struct interp_weight_stencil **stencils, double *w, size_t num_stencils, struct remote_point point)
static int get_stencil_wsum_mf_pack_size(struct interp_weight_stencil_wsum_mf *stencil, MPI_Datatype wsum_mf_weight_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_wsum(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_tgt, struct remote_point *srcs, double *w)
static struct interp_weight_stencil stencils_merge_sum(struct interp_weight_stencil **stencils, double *w, size_t num_stencils)
static int compare_w_global_id(const void *a, const void *b)
static void compact_srcs_w(struct remote_points *srcs, double **w)
void yac_interp_weights_delete(struct yac_interp_weights *weights)
static size_t unpack_stencils_wsum_mf(struct interp_weight_stencil_wsum_mf *wsum_stencils, struct interp_weight_stencil_wsum_mf_weight *weight_buffer, size_t count, void *packed_data, size_t packed_data_size, MPI_Comm comm)
static void pack_stencil_sum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void determine_stencils_io_owner(struct interp_weight_stencil *stencils, size_t stencils_size, yac_int min_tgt_global_id, yac_int max_tgt_global_id, int num_io_procs_int, int *io_owner)
static Xt_redist * generate_direct_mf_redists(size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct_mf *tgt_stencils, size_t *recvcounts, size_t num_src_fields, MPI_Comm comm, Xt_config redist_config)
static size_t get_num_weights_per_link(struct interp_weight_stencil *stencil)
static void free_remote_point(struct remote_point point)
static void yac_interp_weights_redist_stencils(MPI_Comm comm, size_t count, struct interp_weight_stencil *stencils, int *owner_ranks, size_t *new_count, struct interp_weight_stencil **new_stencils)
static struct yac_src_field_exchange_data_msg * yac_src_field_exchange_data_msgs_get_msg(struct yac_src_field_exchange_data_msgs *msgs, int rank)
void yac_interp_weights_data_free(struct yac_interp_weights_data interp_weights_data)
static void unpack_stencil_wsum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_wcopy_weights(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_stencils_per_tgt, size_t *stencil_indices, int *stencil_ranks, double *w)
static int compare_interp_weight_stencil(const void *a, const void *b)
static Xt_config get_redist_config(char const *yaxt_exchanger_name, MPI_Comm comm)
void yac_interp_weights_get_interpolation_raw(struct yac_interp_weights *weights, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand, char const *yaxt_exchanger_name, struct yac_interpolation_exchange **interpolation_exchange, struct yac_interp_weights_data *interp_weights_data)
static void yac_interp_weights_redist_w_sum_mf(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_mf_stencils_data, struct yac_interpolation *interp, enum yac_interp_weights_reorder_type reorder, void(*interp_add_w_sum_mf)(struct remote_point_info_reorder *, size_t, size_t, size_t, struct interp_weight_stencil_wsum_mf *, size_t *, double *, size_t *, size_t *, MPI_Comm, enum yac_interp_weights_reorder_type, void *, Xt_config), Xt_config redist_config)
static void pack_stencil_wsum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_interp_weights_add_wsum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, double *w, size_t num_src_fields)
static int compare_rank_pos_reorder_field_idx(const void *a, const void *b)
static int get_stencil_pack_size_wsum(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencils(struct interp_weight_stencil *stencils, size_t count, size_t *pack_order, void **pack_data, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_interp_weights_redist_fixed(MPI_Comm comm, size_t count, struct interp_weight_stencil *fixed_stencils, void *interp, void(*interp_add_fixed)(void *, double, size_t, size_t *))
static void yac_src_field_exchange_data_init(struct yac_src_field_exchange_data *src_field_exchange_data)
static void interp_add_fixed(void *interp, double fixed_value, size_t count, size_t *tgt_pos)
static void get_stencils_pack_sizes(struct interp_weight_stencil *stencils, size_t count, size_t *pack_order, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
yac_int * yac_interp_weights_get_interp_tgt(struct yac_interp_weights *weights)
static int get_stencil_pack_size_sum_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencil_fixed(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_src_field_exchange_data_add(struct yac_src_field_exchange_data *src_field_exchange_data, size_t num_src_fields, MPI_Comm comm, size_t *send_msg_sizes, size_t *send_pos, size_t *recv_msg_sizes, size_t *recv_pos, size_t *recv_offsets)
static int get_stencil_pack_size_wsum_mf(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct interp_weight_stencil wcopy_interp_weight_stencil(struct interp_weight_stencil *stencil, struct remote_point point, double weight)
static Xt_redist generate_direct_redist(size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
static struct interp_weight_stencil * yac_interp_weights_get_stencils(struct yac_interp_weights *weights, size_t *stencil_indices, int *stencil_ranks, size_t count)
static void unpack_stencil_direct_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_stencils(const void *a, const void *b)
static void create_weight_file(char const *filename, char const *src_grid_name, char const *tgt_grid_name, size_t num_fixed_values, double *fixed_values, size_t *num_tgt_per_fixed_value, size_t num_links, size_t num_weights_per_link, size_t num_src_fields, size_t *num_links_per_src_field, enum yac_location *src_locations, enum yac_location tgt_location, size_t src_grid_size, size_t tgt_grid_size)
static int get_stencil_pack_size_direct(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static void stencil_get_counts(struct interp_weight_stencil *stencils, size_t stencil_count, size_t num_fixed_values, double *fixed_values, size_t *num_tgt_per_fixed_value, size_t *num_fixed_tgt, size_t num_src_fields, size_t *num_links_per_src_field, size_t *num_links)
static void copy_remote_points_no_alloc(struct remote_point *points_to, struct remote_point *points_from, size_t count, struct remote_point_info **point_info_buffer_)
struct yac_interpolation * yac_interp_weights_get_interpolation(struct yac_interp_weights *weights, enum yac_interp_weights_reorder_type reorder, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand, char const *yaxt_exchanger_name)
static void interpolation_raw_add_wsum(struct remote_point_info_reorder *src_points, size_t num_src_points, size_t num_src_fields, size_t tgt_count, struct interp_weight_stencil_wsum_mf *tgt_stencils, size_t *num_src_per_tgt, double *weights, size_t *src_idx, size_t *src_field_idx, MPI_Comm comm, void *interp_raw, Xt_config redist_config)
static struct remote_points * copy_remote_points(struct remote_point *points, size_t count)
static void interpolation_add_sum_at_src(struct yac_interpolation *interp, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist)
static Xt_redist generate_redist_put_double(struct remote_point_infos *point_infos, size_t count, MPI_Comm comm, Xt_config redist_config)
struct yac_interpolation * yac_interp_weights_get_interpolation_f2c(struct yac_interp_weights *weights, int reorder, size_t collection_size, double frac_mask_fallback_value, double scaling_factor, double scaling_summand, char const *yaxt_exchanger_name)
static int compare_remote_point(const void *a, const void *b)
static MPI_Datatype get_direct_mf_stencil_mpi_datatype(MPI_Comm comm)
#define YAC_YAXT_EXCHANGER_STR
MPI_Comm yac_interp_weights_get_comm(struct yac_interp_weights *weights)
static MPI_Datatype get_wsum_mf_weight_mpi_datatype(MPI_Comm comm)
static void yac_interp_weights_redist_direct_mf(MPI_Comm comm, size_t count, struct interp_weight_stencil *direct_mf_stencils, void *interp, void(*interp_add_direct_mf)(void *, size_t, size_t *, size_t *, struct interp_weight_stencil_direct_mf *, size_t *, MPI_Comm, Xt_config), Xt_config redist_config)
static struct interp_weight_stencil copy_interp_weight_stencil(struct interp_weight_stencil *stencil, struct remote_point point)
void yac_interp_weights_add_direct(struct yac_interp_weights *weights, struct remote_points *tgts, struct remote_point *srcs)
void yac_interp_weights_add_sum(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_tgt, struct remote_point *srcs)
static void unpack_stencil_direct(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_interp_weight_stencil_wsum_mf_tgt_orig_pos(const void *a, const void *b)
static void interp_raw_add_fixed(void *interp, double fixed_value, size_t count, size_t *tgt_pos)
struct yac_interp_weights * yac_interp_weights_new(MPI_Comm comm, enum yac_location tgt_location, enum yac_location *src_locations, size_t num_src_fields)
static void unpack_stencil_sum(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_interp_weights_redist_direct(MPI_Comm comm, size_t count, struct interp_weight_stencil *direct_stencils, void *interp, void(*interp_add_direct)(void *, size_t *, size_t *, struct interp_weight_stencil_direct *, size_t *, MPI_Comm, Xt_config), Xt_config redist_config)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data, int *stencil_owner, size_t *reorder_idx, size_t num_owners)
static struct interp_weight_stencils_wsum_mf * redist_wsum_mf_stencils_src(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_stencils_data)
static int compare_stencils_direct(const void *a, const void *b)
static void yac_interp_weights_data_init_(double frac_mask_fallback_value, double scaling_factor, double scaling_summand, struct yac_interp_weights_data *interp_weights_data)
static size_t stencil_get_num_weights_per_tgt(struct interp_weight_stencil *stencils, size_t stencil_count, MPI_Comm comm)
static void stencil_get_tgt_address(struct interp_weight_stencil *stencils, size_t stencil_count, int *tgt_address)
static int get_stencil_pack_size_fixed(struct interp_weight_stencil *stencil, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct remote_point copy_remote_point(struct remote_point point)
static void stencil_get_link_data(struct interp_weight_stencil *stencils, size_t stencil_count, size_t *num_links_per_src_field, size_t num_src_fields, int *src_address, int *tgt_address, double *weight)
static void interp_add_direct(void *interp, size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
size_t yac_interp_weights_get_interp_count(struct yac_interp_weights *weights)
static struct interp_weight_stencil stencils_merge_wsum(struct interp_weight_stencil **stencils, double *w, size_t num_stencils)
static void pack_stencil_sum_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static Xt_redist * generate_halo_redists(struct remote_point_info_reorder *halo_points, size_t count, size_t num_src_fields, MPI_Comm comm, Xt_config redist_config)
static void pack_stencil_direct_mf(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_stencils_wsum_mf(struct interp_weight_stencil_wsum_mf *wsum_stencils, size_t count, size_t *pack_order, void **pack_data, int *pack_sizes, int *weight_counts, MPI_Comm comm)
static int compare_interp_weight_stencil_wsum_mf_src_orig_pos(const void *a, const void *b)
static struct interp_weight_stencil * exchange_stencils(MPI_Comm comm, struct interp_weight_stencil *stencils, size_t *stencil_indices, size_t *stencil_sendcounts, size_t *stencil_recvcounts)
static void unpack_stencil_fixed(struct interp_weight_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_double(void const *a, void const *b)
static void stencil_xscan_offsets(size_t num_fixed_values, size_t *num_tgt_per_fixed_value, size_t num_src_fields, size_t *num_links_per_src_field, size_t *fixed_offsets, size_t *link_offsets, MPI_Comm comm)
static void interpolation_add_sum_at_tgt(struct yac_interpolation *interp, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields)
static void interp_add_direct_mf(void *interp, size_t num_src_fields, size_t *src_orig_poses, size_t *sendcounts, struct interp_weight_stencil_direct_mf *tgt_stencils, size_t *recvcounts, MPI_Comm comm, Xt_config redist_config)
void yac_interp_weights_write_to_file(struct yac_interp_weights *weights, char const *filename, char const *src_grid_name, char const *tgt_grid_name, size_t src_grid_size, size_t tgt_grid_size, enum yac_weight_file_on_existing on_existing)
static void xt_redist_msg_free(struct Xt_redist_msg *msgs, size_t count, MPI_Comm comm)
void yac_interp_weights_add_direct_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *src_field_indices, struct remote_point **srcs_per_field, size_t num_src_fields)
static void unpack_stencils(struct interp_weight_stencil *stencils, size_t count, void *packed_data, size_t packed_data_size, MPI_Datatype point_info_dt, MPI_Comm comm)
static struct remote_point_info select_src(struct remote_point_infos src)
static struct interp_weight_stencils_wsum_mf * generate_w_sum_mf_stencils(struct interp_weight_stencil *stencils, size_t count, enum yac_interp_weight_stencil_type stencil_type)
static void yac_src_field_exchange_data_free(struct yac_src_field_exchange_data *src_field_exchange_data, size_t num_src_fields)
static int compute_owner(int *ranks, size_t count)
static void yac_interp_weights_redist_w_sum_mf_raw(MPI_Comm comm, struct interp_weight_stencils_wsum_mf *wsum_mf_stencils_data, struct yac_interpolation *interp, void(*interp_add_w_sum_mf)(struct remote_point_info_reorder *, size_t, size_t, size_t, struct interp_weight_stencil_wsum_mf *, size_t *, double *, size_t *, size_t *, MPI_Comm, void *, Xt_config), Xt_config redist_config)
static void stencil_get_fixed_values(struct interp_weight_stencil *stencils, size_t stencil_count, double **fixed_values, size_t *num_fixed_values, MPI_Comm comm)
static void yac_interp_weights_data_set_wgt_tgt(struct yac_interp_weights_data *interp_weights_data, size_t num_src_fields, size_t tgt_count, size_t *tgt_idx, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t *src_field_buffer_size)
yac_interp_weights_reorder_type
@ YAC_MAPPING_ON_TGT
weights will be applied at target processes
@ YAC_MAPPING_ON_SRC
weights will be applied at source processes
yac_weight_file_on_existing
@ YAC_WEIGHT_FILE_KEEP
keep existing weight file
@ YAC_WEIGHT_FILE_ERROR
error when weight file existis already
void yac_interpolation_add_sum_at_src(struct yac_interpolation *interp, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist)
void yac_interpolation_add_weight_sum_mvp_at_tgt(struct yac_interpolation *interp, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields)
struct yac_interpolation * yac_interpolation_new(size_t collection_size, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interpolation_add_direct_mf(struct yac_interpolation *interp, Xt_redist *redists, size_t num_src_fields)
void yac_interpolation_add_sum_at_tgt(struct yac_interpolation *interp, Xt_redist *src_redists, size_t *tgt_pos, size_t tgt_count, size_t *num_src_per_tgt, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields)
void yac_interpolation_add_fixed(struct yac_interpolation *interp, double value, size_t count, size_t *pos)
double const YAC_FRAC_MASK_UNDEF
void yac_interpolation_add_direct(struct yac_interpolation *interp, Xt_redist redist)
void yac_interpolation_add_weight_sum_mvp_at_src(struct yac_interpolation *interp, Xt_redist *halo_redists, size_t tgt_count, size_t *num_src_per_tgt, double *weights, size_t *src_field_idx, size_t *src_idx, size_t num_src_fields, Xt_redist result_redist)
#define YAC_FRAC_MASK_VALUE_IS_VALID(value)
struct yac_interpolation_exchange * yac_interpolation_exchange_new(Xt_redist *redists, size_t num_fields, size_t collection_size, int with_frac_mask, char const *name)
void yac_get_io_ranks(MPI_Comm comm, int *local_is_io_, int **io_ranks_, int *num_io_ranks_)
void yac_nc_create(const char *path, int cmode, int *ncidp)
void yac_nc_inq_varid(int ncid, char const *name, int *varidp)
void yac_nc_open(const char *path, int omode, int *ncidp)
int yac_file_exists(const char *filename)
#define YAC_HANDLE_ERROR(exp)
char const * yac_loc2str(enum yac_location location)
#define YAC_MAX_LOC_STR_LEN
add versions of standard API functions not returning on error
#define xrealloc(ptr, size)
void yac_remote_point_pack(struct remote_point *point, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_remote_point_unpack(void *buffer, int buffer_size, int *position, struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_remote_points_pack(struct remote_points *points, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_remote_points_unpack(void *buffer, int buffer_size, int *position, struct remote_points **points, MPI_Datatype point_info_dt, MPI_Comm comm)
int yac_remote_points_get_pack_size(struct remote_points *points, MPI_Datatype point_info_dt, MPI_Comm comm)
MPI_Datatype yac_get_remote_point_info_mpi_datatype(MPI_Comm comm)
int yac_remote_point_get_pack_size(struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
struct remote_point_info src
struct remote_point_info src
struct remote_point_info src
struct interp_weight_stencil_wsum_mf_weight * data
struct interp_weight_stencil::@34::@41 weight_sum_mf
struct remote_points * srcs
struct interp_weight_stencil::@34::@40 sum_mf
struct interp_weight_stencil::@34::@38 weight_sum
struct interp_weight_stencil::@34::@35 fixed
struct interp_weight_stencil::@34::@39 direct_mf
struct interp_weight_stencil::@34::@37 sum
enum yac_interp_weight_stencil_type type
union interp_weight_stencil::@34 data
struct interp_weight_stencil::@34::@36 direct
struct interp_weight_stencils_wsum_mf stencils
struct interp_weight_stencil_wsum_mf_weight buffer[]
struct interp_weight_stencil_wsum_mf * data
struct remote_point_info data
single location information of a point
location information about a point that is located on one or
union remote_point_infos::@46 data
struct remote_point_info single
struct remote_point_info * multi
information (global id and location) about a point that
struct remote_point_infos data
structure containing the information (global id and location)
struct remote_point_info buffer[]
struct remote_point * data
double frac_mask_fallback_value
size_t * src_field_buffer_size
size_t * num_tgt_per_fixed_value
struct interp_weight_stencil * stencils
size_t stencils_array_size
enum yac_location tgt_location
enum yac_location * src_locations
struct yac_interpolation_raw::yac_src_field_exchange_data::yac_src_field_exchange_data_msgs::yac_src_field_exchange_data_msg * msg
struct yac_interpolation_raw::yac_src_field_exchange_data::yac_src_field_exchange_data_msgs send
struct yac_interpolation_raw::yac_src_field_exchange_data::yac_src_field_exchange_data_msgs recv
struct yac_interp_weights_data interp_weights_data
struct yac_interpolation_raw::yac_src_field_exchange_data * src_field_exchange_data
double frac_mask_fallback_value
void yac_quicksort_index_int_double(int *a, size_t n, double *idx)
void yac_quicksort_index_int_size_t(int *a, size_t n, size_t *idx)
void yac_quicksort_index_int_size_t_size_t(int *a, size_t n, size_t *b, size_t *c)
static void yac_remove_duplicates_double(double *array, size_t *n)
void yac_quicksort_index_size_t_size_t(size_t *a, size_t n, size_t *idx)
void yac_quicksort_index(int *a, size_t n, int *idx)
static struct user_input_data_points ** points
void yac_abort(MPI_Comm comm, const char *msg, const char *source, int line) __attribute__((noreturn))
#define YAC_ASSERT_F(exp, format,...)
#define YAC_ASSERT(exp, msg)
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm, char const *caller, int line)
#define yac_mpi_call(call, comm)