80 char const * weight_file_name, MPI_Comm comm,
81 char const * src_grid_name,
char const * tgt_grid_name,
83 size_t num_src_fields,
84 struct link_data ** links,
size_t * num_links,
85 struct fixed_data ** fixed,
size_t * num_fixed) {
87#ifndef YAC_NETCDF_ENABLED
101 die(
"ERROR(read_weight_file): YAC is built without the NetCDF support");
118 int io_idx = INT_MAX;
120 for (
int i = 0; (i < num_io_ranks) && (io_idx == INT_MAX); ++i)
121 if (io_ranks[i] == rank) io_idx = i;
125 if (!local_is_io)
return;
139 version =
xmalloc(version_len + 1);
140 version[version_len] =
'\0';
145 "ERROR(read_weight_file): "
146 "version string from weight file (\"%s\") does not match "
150 size_t grid_name_len;
152 YAC_HANDLE_ERROR(nc_inq_attlen(ncid, NC_GLOBAL,
"src_grid_name", &grid_name_len));
153 grid_name =
xmalloc(grid_name_len + 1);
154 grid_name[grid_name_len] =
'\0';
155 YAC_HANDLE_ERROR(nc_get_att_text(ncid, NC_GLOBAL,
"src_grid_name", grid_name));
157 (strlen(src_grid_name) == grid_name_len) &&
158 !strncmp(src_grid_name, grid_name, grid_name_len),
159 "ERROR(read_weight_file): source grid name from weight file (\"%s\") "
160 "does not match with the one provided through the interface (\"%s\")",
161 grid_name, src_grid_name)
165 YAC_HANDLE_ERROR(nc_inq_attlen(ncid, NC_GLOBAL,
"dst_grid_name", &grid_name_len));
166 grid_name =
xmalloc(grid_name_len + 1);
167 grid_name[grid_name_len] =
'\0';
168 YAC_HANDLE_ERROR(nc_get_att_text(ncid, NC_GLOBAL,
"dst_grid_name", grid_name));
170 (strlen(tgt_grid_name) == grid_name_len) &&
171 !strncmp(tgt_grid_name, grid_name, grid_name_len),
172 "ERROR(read_weight_file): target grid name from weight file (\"%s\") "
173 "does not match with the one provided through the interface (\"%s\")",
174 grid_name, tgt_grid_name)
185 YAC_HANDLE_ERROR(nc_inq_attlen(ncid, var_id,
"contains_links", &str_link_len));
186 str_link =
xmalloc(str_link_len + 1);
187 str_link[str_link_len] =
'\0';
188 YAC_HANDLE_ERROR(nc_get_att_text(ncid, var_id,
"contains_links", str_link));
190 int contains_links = (strlen(
"TRUE") == str_link_len) &&
191 !strncmp(
"TRUE", str_link, str_link_len);
194 ((strlen(
"FALSE") == str_link_len) &&
195 !strncmp(
"FALSE", str_link, str_link_len)),
196 "ERROR(read_weight_file): invalid global attribute contains_links")
200 if (contains_links) {
204 num_wgts == 1,
"ERROR(read_weight_file): YAC only supports num_wgts == 1")
209 size_t num_links_in_file = 0;
210 if (contains_links) {
214 num_links_in_file != 0,
"ERROR(read_weight_file): no links defined")
218 size_t tmp_num_src_fields = 0;
219 if (contains_links) {
223 tmp_num_src_fields != 0,
224 "ERROR(read_weight_file): no source fields in file")
226 tmp_num_src_fields == num_src_fields,
227 "ERROR(read_weight_file): number of source fields in file (%zu) does not "
228 "match with the number provided through the interface (%zu)",
229 tmp_num_src_fields, num_src_fields)
233 size_t max_loc_str_len;
238 "ERROR(read_weight_file): wrong max location string length in weight file")
242 xmalloc(num_src_fields *
sizeof(*tmp_src_locations));
245 for (
size_t i = 0; i < num_src_fields; ++i) {
249 size_t str_start[2] = {i, 0};
253 nc_get_vara_text(ncid, var_id, str_start, str_count, loc_str));
258 tmp_src_locations[i] == src_locations[i],
259 "ERROR(read_weight_file): source locations in file does not match with "
260 "the locations provided through the interface\n"
261 "location index: %d\n"
262 "location in weight file: %s\n"
263 "location from interface: %s",
267 free(tmp_src_locations);
276 "ERROR(read_weight_file): target locations in file does not match with "
277 "the locations provided through the interface")
280 if (contains_links) {
283 unsigned * num_links_per_src_field =
284 xmalloc(num_src_fields *
sizeof(*num_links_per_src_field));
289 (size_t)(((
long long)io_idx * (
long long)num_links_in_file) /
290 (
long long)num_io_ranks);
292 (size_t)((((
long long)io_idx + 1) * (
long long)num_links_in_file) /
293 (
long long)num_io_ranks) - offset;
297 *links =
xmalloc(count *
sizeof(**links));
300 size_t src_field_idx = 0, src_field_start_offset = 0;
301 while ((src_field_idx < num_src_fields) &&
302 (src_field_start_offset +
303 (
size_t)num_links_per_src_field[src_field_idx] <
306 src_field_start_offset += (size_t)num_links_per_src_field[src_field_idx];
310 src_field_idx < num_src_fields,
311 "ERROR(read_weight_file): problem in num_links_per_src_field")
312 size_t src_field_offset = offset - src_field_start_offset;
313 for (
size_t k = 0; (src_field_idx < num_src_fields) && (k < count);
314 src_field_offset = 0, ++src_field_idx) {
316 for (; (src_field_offset <
317 (size_t)num_links_per_src_field[src_field_idx]) &&
318 (k < count); ++src_field_offset, ++k) {
319 (*links)[k].src_field_idx = src_field_idx;
323 free(num_links_per_src_field);
326 int * address =
xmalloc(count *
sizeof(*address));
329 for (
size_t i = 0; i < count; ++i)
330 (*links)[i].src.global_id = (
yac_int)(address[i] - 1);
334 for (
size_t i = 0; i < count; ++i)
335 (*links)[i].tgt.global_id = (
yac_int)(address[i] - 1);
338 double * weights =
xmalloc(count *
sizeof(*weights));
341 size_t offsets[2] = {offset, 0};
342 size_t counts[2] = {count, 1};
343 YAC_HANDLE_ERROR(nc_get_vara_double(ncid, var_id, offsets, counts, weights));
344 for (
size_t i = 0; i < count; ++i)
345 (*links)[i].weight = weights[i];
355 size_t str_fixed_len;
359 nc_inq_attlen(ncid, var_id,
"contains_fixed_dst", &str_fixed_len));
360 str_fixed =
xmalloc(str_fixed_len + 1);
361 str_fixed[str_fixed_len] =
'\0';
362 YAC_HANDLE_ERROR(nc_get_att_text(ncid, var_id,
"contains_fixed_dst", str_fixed));
364 int contains_fixed = (strlen(
"TRUE") == str_fixed_len) &&
365 !strncmp(
"TRUE", str_fixed, str_fixed_len);
369 ((strlen(
"FALSE") == str_fixed_len) &&
370 !strncmp(
"FALSE", str_fixed, str_fixed_len)),
371 "ERROR(read_weight_file): invalid global attribute contains_fixed_dst")
374 if (contains_fixed) {
377 size_t num_fixed_values;
382 size_t num_fixed_tgt;
387 (size_t)(((
long long)io_idx * (
long long)num_fixed_tgt) /
388 (
long long)num_io_ranks);
390 (size_t)((((
long long)io_idx + 1) * (
long long)num_fixed_tgt) /
391 (
long long)num_io_ranks) - offset;
393 *fixed =
xmalloc(num_fixed_values *
sizeof(**fixed));
394 *num_fixed = num_fixed_values;
396 double * fixed_values =
xmalloc(num_fixed_values *
sizeof(*fixed_values));
397 int * num_tgt_indices_per_fixed_value =
398 xmalloc(num_fixed_values *
sizeof(*num_tgt_indices_per_fixed_value));
399 int * global_tgt_indices =
xmalloc(count *
sizeof(*global_tgt_indices));
403 YAC_HANDLE_ERROR(nc_get_var_int(ncid, var_id, num_tgt_indices_per_fixed_value));
411 nc_get_vara_int(ncid, var_id, &offset, &count, global_tgt_indices));
414 void * tgt_global_id_buffer =
xmalloc(count *
sizeof(*((*fixed)->tgt)));
415 size_t fixed_value_idx = 0, fixed_value_start_offset = 0;
416 while ((fixed_value_idx < num_fixed_values) &&
417 (fixed_value_start_offset +
418 (
size_t)num_tgt_indices_per_fixed_value[fixed_value_idx] <
421 fixed_value_start_offset +=
422 (size_t)num_tgt_indices_per_fixed_value[fixed_value_idx];
423 (*fixed)[fixed_value_idx].value = fixed_values[fixed_value_idx];
424 (*fixed)[fixed_value_idx].num_tgt = 0;
425 (*fixed)[fixed_value_idx].tgt = tgt_global_id_buffer;
429 fixed_value_idx < num_fixed_values,
430 "ERROR(read_weight_file): problem in num_tgt_indices_per_fixed_value")
431 size_t fixed_value_offset = offset - fixed_value_start_offset;
432 for (
size_t k = 0; (fixed_value_idx < num_fixed_values) && (k < count);
433 fixed_value_offset = 0, ++fixed_value_idx) {
435 size_t curr_tgt_count =
436 MIN((
size_t)num_tgt_indices_per_fixed_value[fixed_value_idx] -
437 fixed_value_offset, count - k);
439 (*fixed)[fixed_value_idx].value = fixed_values[fixed_value_idx];
440 (*fixed)[fixed_value_idx].num_tgt = curr_tgt_count;
441 (*fixed)[fixed_value_idx].tgt =
442 (
void*)(((
unsigned char *)tgt_global_id_buffer) +
443 k *
sizeof(*((*fixed)->tgt)));
445 for (
size_t i = 0; i < curr_tgt_count; ++fixed_value_offset, ++k, ++i)
446 (*fixed)[fixed_value_idx].tgt[i].global_id =
447 (
yac_int)(global_tgt_indices[k] - 1);
449 for (; fixed_value_idx < num_fixed_values; ++fixed_value_idx) {
450 (*fixed)[fixed_value_idx].value = fixed_values[fixed_value_idx];
451 (*fixed)[fixed_value_idx].num_tgt = 0;
452 (*fixed)[fixed_value_idx].tgt = NULL;
456 free(global_tgt_indices);
457 free(num_tgt_indices_per_fixed_value);
815 struct yac_interp_grid * interp_grid,
size_t * tgt_points,
size_t tgt_count,
816 size_t * num_interpolated_points,
817 struct link_data ** links,
size_t * num_links,
818 struct fixed_data ** fixed,
size_t * num_fixed) {
826 yac_int * tgt_global_ids =
xmalloc(tgt_count *
sizeof(*tgt_global_ids));
828 interp_grid, tgt_points, tgt_count, tgt_global_ids);
836 size_t * size_t_buffer =
837 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
838 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
839 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
840 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
841 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
842 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
844 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
846 for (
size_t i = 0; i < tgt_count; ++i)
847 sendcounts[3 *
compute_bucket(tgt_global_ids[i], comm_size) + 0]++;
848 for (
size_t i = 0; i < *num_links; ++i)
850 for (
size_t i = 0; i < *num_fixed; ++i)
851 for (
size_t j = 0; j < (*fixed)[i].num_tgt; ++j)
859 size_t saccu = 0, raccu = 0;
860 for (
int i = 0; i < comm_size; ++i) {
861 total_sdispls[i] = saccu;
862 sdispls[3*i+0] = saccu;
865 (total_sendcounts[i] =
866 sendcounts[3*i+0] * (size_t)tgt_pack_size));
869 (total_sendcounts[i] +=
870 sendcounts[3*i+1] * (size_t)link_pack_size));
871 total_sendcounts[i] +=
872 sendcounts[3*i+2] * (size_t)fixed_pack_size;
873 total_rdispls[i] = raccu;
874 rdispls[3*i+0] = raccu;
877 (total_recvcounts[i] =
878 recvcounts[3*i+0] * (size_t)tgt_pack_size));
881 (total_recvcounts[i] +=
882 recvcounts[3*i+1] * (size_t)link_pack_size));
883 total_recvcounts[i] +=
884 recvcounts[3*i+2] * (size_t)fixed_pack_size;
885 saccu += total_sendcounts[i];
886 raccu += total_recvcounts[i];
889 size_t send_size = total_sendcounts[comm_size - 1] +
890 total_sdispls[comm_size - 1];
891 size_t recv_size = total_recvcounts[comm_size - 1] +
892 total_rdispls[comm_size - 1];
894 void * pack_buffer =
xmalloc(send_size + recv_size);
895 void * send_pack_buffer = pack_buffer;
896 void * recv_pack_buffer = (
void*)((
unsigned char *)pack_buffer + send_size);
899 for (
size_t i = 0; i < tgt_count; ++i) {
900 yac_int curr_global_id = tgt_global_ids[i];
904 (
void*)((
unsigned char*)send_pack_buffer + sdispls[3*rank+0]),
905 tgt_pack_size, comm);
906 sdispls[3*rank+0] += (size_t)tgt_pack_size;
908 for (
size_t i = 0; i < *num_links; ++i) {
909 struct link_data * curr_link = (*links) + i;
913 (
void*)((
unsigned char*)send_pack_buffer + sdispls[3*rank+1]),
914 link_pack_size, comm);
915 sdispls[3*rank+1] += (size_t)link_pack_size;
917 for (
size_t i = 0; i < *num_fixed; ++i) {
918 double curr_fixed_value = (*fixed)[i].value;
919 size_t curr_num_tgt = (*fixed)[i].num_tgt;
920 for (
size_t j = 0; j < curr_num_tgt; ++j) {
921 yac_int curr_global_id = (*fixed)[i].tgt[j].global_id;
924 curr_fixed_value, curr_global_id,
925 (
void*)((
unsigned char*)send_pack_buffer + sdispls[3*rank+2]),
926 fixed_pack_size, comm);
927 sdispls[3*rank+2] += (size_t)fixed_pack_size;
932 yac_alltoallv_packed_p2p(
933 send_pack_buffer, total_sendcounts, total_sdispls,
934 recv_pack_buffer, total_recvcounts, total_rdispls, comm);
936 size_t recv_tgt_count = 0, recv_link_count = 0, recv_fixed_count = 0;
937 for (
int i = 0; i < comm_size; ++i) {
938 recv_tgt_count += recvcounts[3*i+0];
939 recv_link_count += recvcounts[3*i+1];
940 recv_fixed_count += recvcounts[3*i+2];
943 xrealloc(*links, recv_link_count *
sizeof(*recv_links));
945 xmalloc(recv_fixed_count *
sizeof(*recv_fixed));
947 xmalloc((recv_tgt_count + tgt_count) *
sizeof(*result_buffer));
949 struct temp_result * recv_results = result_buffer + recv_tgt_count;
952 recv_tgt_count = 0, recv_link_count = 0, recv_fixed_count = 0;
953 size_t unpack_offset = 0;
954 for (
int i = 0; i < comm_size; ++i) {
955 size_t curr_num_tgt = recvcounts[3 * i + 0];
956 size_t curr_num_links = recvcounts[3 * i + 1];
957 size_t curr_num_fixed = recvcounts[3 * i + 2];
958 for (
size_t j = 0; j < curr_num_tgt; ++j, ++recv_tgt_count) {
960 (
void*)((
unsigned char *)recv_pack_buffer + unpack_offset),
961 tgt_pack_size, &(send_results[recv_tgt_count].
tgt.
global_id),
963 unpack_offset += (size_t)tgt_pack_size;
964 send_results[recv_tgt_count].
owner = i;
965 send_results[recv_tgt_count].
reorder_idx = recv_tgt_count;
967 for (
int j = 0; j < curr_num_links; ++j, ++recv_link_count) {
969 (
void*)((
unsigned char *)recv_pack_buffer + unpack_offset),
970 link_pack_size, recv_links + recv_link_count, comm);
971 unpack_offset += (size_t)link_pack_size;
973 for (
int j = 0; j < curr_num_fixed; ++j, ++recv_fixed_count) {
975 (
void*)((
unsigned char *)recv_pack_buffer + unpack_offset),
976 fixed_pack_size, recv_fixed + recv_fixed_count, comm);
977 unpack_offset += (size_t)fixed_pack_size;
983 send_results, recv_tgt_count,
sizeof(*send_results),
992 recv_fixed, recv_fixed_count,
sizeof(*recv_fixed),
996 for (
size_t tgt_idx = 0, link_idx = 0, fixed_idx = 0;
997 tgt_idx < recv_tgt_count; ++tgt_idx) {
1001 while ((link_idx < recv_link_count) &&
1002 (recv_links[link_idx].
tgt.
global_id < curr_tgt)) ++link_idx;
1003 while ((fixed_idx < recv_fixed_count) &&
1004 (recv_fixed[fixed_idx].
tgt.
global_id < curr_tgt)) ++fixed_idx;
1007 (link_idx >= recv_link_count) ||
1009 (fixed_idx >= recv_fixed_count) ||
1011 "ERROR(redist_weight_file_data): inconsistent data in weight file;"
1012 "link and fixed data available for a target point")
1013 if ((link_idx < recv_link_count) &&
1016 size_t temp_link_idx = link_idx;
1017 while ((temp_link_idx < recv_link_count) &&
1021 send_results[tgt_idx].
data.
link.
links = recv_links + link_idx;
1022 send_results[tgt_idx].
data.
link.
count = temp_link_idx - link_idx;
1024 }
else if ((fixed_idx < recv_fixed_count) &&
1038 send_results, recv_tgt_count,
sizeof(*send_results),
1041 int * pack_size_buffer =
1042 xmalloc((recv_tgt_count + tgt_count) *
sizeof(*pack_size_buffer));
1043 int * send_pack_sizes = pack_size_buffer;
1044 int * recv_pack_sizes = pack_size_buffer + recv_tgt_count;
1045 for (
size_t i = 0; i < recv_tgt_count; ++i)
1048 saccu = 0, raccu = 0;
1049 for (
int i = 0; i < comm_size; ++i) {
1052 int sendcount = recvcounts[3*i];
1053 int recvcount = sendcounts[3*i];
1054 saccu += (sendcounts[i] = sendcount);
1055 raccu += (recvcounts[i] = recvcount);
1058 yac_alltoallv_int_p2p(
1059 send_pack_sizes, sendcounts, sdispls,
1060 recv_pack_sizes, recvcounts, rdispls, comm);
1062 saccu = 0, raccu = 0;
1063 for (
int i = 0, k = 0, l = 0; i < comm_size; ++i) {
1064 size_t sendcount = sendcounts[i];
1065 size_t recvcount = recvcounts[i];
1068 for (
size_t j = 0; j < sendcount; ++j, ++k)
1069 sendcounts[i] += send_pack_sizes[k];
1070 for (
size_t j = 0; j < recvcount; ++j, ++l)
1071 recvcounts[i] += recv_pack_sizes[l];
1074 saccu += sendcounts[i];
1075 raccu += recvcounts[i];
1077 size_t total_send_pack_size =
1078 sdispls[comm_size-1] + sendcounts[comm_size-1];
1079 size_t total_recv_pack_size =
1080 rdispls[comm_size-1] + recvcounts[comm_size-1];
1083 xrealloc(pack_buffer, total_send_pack_size + total_recv_pack_size);
1084 send_pack_buffer = pack_buffer;
1086 (
void*)((
unsigned char*)pack_buffer + total_send_pack_size);
1088 for (
size_t i = 0, offset = 0; i < recv_tgt_count; ++i) {
1090 send_results + i, (
void*)((
unsigned char*)send_pack_buffer + offset),
1091 send_pack_sizes[i], comm);
1092 offset += (size_t)(send_pack_sizes[i]);
1098 send_pack_buffer, sendcounts, sdispls,
1099 recv_pack_buffer, recvcounts, rdispls,
1100 1, MPI_PACKED, comm);
1103 for (
size_t i = 0, offset = 0; i < tgt_count; ++i) {
1105 (
void*)((
unsigned char*)recv_pack_buffer + offset),
1106 recv_pack_sizes[i], recv_results + i, comm);
1107 offset += (size_t)(recv_pack_sizes[i]);
1109 free(pack_size_buffer);
1113 recv_results, tgt_count,
sizeof(*recv_results),
1116 for (
size_t i = 0; i < tgt_count; ++i)
1121 recv_results, tgt_count,
sizeof(*recv_results),
1124 size_t new_num_links = 0;
1125 size_t new_total_num_links = 0;
1126 size_t new_num_fixed = 0;
1129 while ((i < tgt_count) && (recv_results[i].
type ==
LINK))
1132 while ((i < tgt_count) && (recv_results[i].
type ==
FIXED)) ++i;
1133 new_num_fixed = i - new_num_links;
1138 for (
size_t i = 0, offset = 0; i < new_num_links;
1142 *num_links = new_total_num_links;
1145 if (new_num_fixed > 0) {
1146 recv_results += new_num_links;
1148 recv_results, new_num_fixed,
sizeof(*recv_results),
1150 double curr_fixed_value =
1152 size_t num_fixed_values = 0;
1153 void * tgt_global_id_buffer =
1154 xrealloc((*num_fixed > 0)?(*fixed)->tgt:NULL,
1155 new_num_fixed *
sizeof(*((*fixed)->tgt)));
1157 for (
size_t i = 0, offset = 0; i < new_num_fixed; ++i) {
1158 if (recv_results[i].data.
fixed.
value != curr_fixed_value) {
1160 *fixed =
xrealloc(*fixed, (num_fixed_values + 1) *
sizeof(**fixed));
1161 curr_fixed = *fixed + num_fixed_values;
1163 curr_fixed->
value = curr_fixed_value;
1164 curr_fixed->
tgt = (
void*)(((
unsigned char *)tgt_global_id_buffer) +
1165 offset *
sizeof(*((*fixed)->tgt)));
1171 *num_fixed = num_fixed_values;
1172 recv_results -= new_num_links;
1175 if (*num_fixed > 0) free((*fixed)->tgt);
1181 for (
size_t i = 0; i < tgt_count; ++i)
1183 *num_interpolated_points = new_num_links + new_num_fixed;
1185 for (
size_t i = 0; i < new_num_links; ++i)
1187 free(result_buffer);
1190 free(size_t_buffer);
1191 free(tgt_global_ids);
1252 size_t * tgt_points,
size_t count,
1262 xmalloc(num_src_fields *
sizeof(*src_locations));
1263 for (
size_t i = 0; i < num_src_fields; ++i)
1277 src_locations, tgt_location, num_src_fields,
1278 &links, &num_links, &fixed, &num_fixed);
1280 free(src_locations);
1286 size_t num_interpolated_points;
1288 interp_grid, tgt_points, count, &num_interpolated_points,
1289 &links, &num_links, &fixed, &num_fixed);
1291 size_t * tgt_points_reorder_idx =
1292 xmalloc(num_interpolated_points *
sizeof(*tgt_points_reorder_idx));
1293 for (
size_t i = 0; i < num_interpolated_points; ++i)
1294 tgt_points_reorder_idx[i] = i;
1296 size_t num_fixed_tgt = 0;
1298 for (
size_t i = 0; i < num_fixed; ++i) num_fixed_tgt += fixed[i].
num_tgt;
1300 size_t tgt_points_offset = num_interpolated_points - num_fixed_tgt;
1302 for (
size_t i = 0; i < num_fixed; ++i) {
1307 interp_grid, tgt_points + tgt_points_offset, fixed[i].num_tgt),
1312 tgt_points_offset += fixed[i].
num_tgt;
1318 size_t num_link_tgt = 0;
1319 if (num_links > 0) {
1322 for (
size_t link_idx = 0, tgt_idx = 0; link_idx < num_links; tgt_idx++) {
1325 size_t curr_local_id = tgt_points[tgt_idx];
1327 while ((link_idx < num_links) &&
1328 (links[link_idx].tgt.
global_id == curr_global_id)) {
1338 size_t num_src_fields = links[num_links-1].
src_field_idx + 1;
1341 int int_num_src_fields = (int)num_src_fields;
1344 MPI_Allreduce(MPI_IN_PLACE, &int_num_src_fields, 1, MPI_INT, MPI_MAX,
1346 num_src_fields = (size_t)int_num_src_fields;
1349 yac_int * src_global_ids =
xmalloc(num_links *
sizeof(*src_global_ids));
1350 size_t * src_local_ids =
xmalloc(num_links *
sizeof(*src_local_ids));
1351 size_t src_field_prefix_sum[num_src_fields];
1355 for (
size_t src_field_idx = 0, link_idx = 0, offset = 0;
1356 src_field_idx < num_src_fields; ++src_field_idx) {
1358 size_t curr_num_links = 0;
1359 while ((link_idx < num_links) &&
1360 (links[link_idx].src_field_idx == src_field_idx)) {
1363 src_global_ids[offset + curr_num_links] =
1371 interp_grid, src_field_idx, src_global_ids + offset, curr_num_links,
1372 src_local_ids + offset);
1379 link_idx -= curr_num_links;
1380 if (src_mask != NULL) {
1381 for (
size_t i = 0; i < curr_num_links; ++i, ++link_idx)
1382 links[link_idx].
src.local_id =
1383 (src_mask[src_local_ids[link_idx]])?
1384 src_local_ids[link_idx]:SIZE_MAX;
1386 for (
size_t i = 0; i < curr_num_links; ++i, ++link_idx)
1387 links[link_idx].
src.local_id = src_local_ids[link_idx];
1390 src_field_prefix_sum[src_field_idx] = offset;
1391 offset += curr_num_links;
1398 size_t new_num_links = 0;
1399 int contains_masked_tgt = 0;
1400 num_link_tgt = num_interpolated_points - num_fixed_tgt;
1401 size_t * tgt_local_ids =
xmalloc(num_link_tgt *
sizeof(*tgt_local_ids));
1402 size_t * num_src_per_field_per_tgt =
1403 xcalloc(num_link_tgt * num_src_fields,
sizeof(*num_src_per_field_per_tgt));
1404 size_t num_links_per_src_field[num_src_fields];
1405 memset(num_links_per_src_field, 0,
sizeof(num_links_per_src_field));
1407 for (
size_t link_idx = 0, tgt_idx = 0; link_idx < num_links; ++tgt_idx) {
1409 size_t curr_tgt_local_id = links[link_idx].
tgt.
local_id;
1410 int is_masked = links[link_idx].
src.
local_id == SIZE_MAX;
1411 tgt_points[tgt_idx] = curr_tgt_local_id;
1416 while ((link_idx < num_links) &&
1417 (links[link_idx].tgt.
local_id == curr_tgt_local_id)) ++link_idx;
1418 contains_masked_tgt = 1;
1419 tgt_points_reorder_idx[tgt_idx] += num_interpolated_points;
1423 size_t * curr_num_src_per_field_per_tgt =
1424 num_src_per_field_per_tgt + num_link_tgt * num_src_fields;
1425 if (contains_masked_tgt) {
1426 while ((link_idx < num_links) &&
1427 (links[link_idx].tgt.
local_id == curr_tgt_local_id)) {
1430 src_field_prefix_sum[curr_src_field_idx] +
1431 num_links_per_src_field[curr_src_field_idx] +
1432 curr_num_src_per_field_per_tgt[curr_src_field_idx]++] =
1434 links[new_num_links++] = links[link_idx++];
1438 while ((link_idx < num_links) &&
1439 (links[link_idx].tgt.
local_id == curr_tgt_local_id)) {
1442 src_field_prefix_sum[curr_src_field_idx] +
1443 num_links_per_src_field[curr_src_field_idx] +
1444 curr_num_src_per_field_per_tgt[curr_src_field_idx]++] =
1446 ++link_idx, ++new_num_links;
1449 for (
size_t i = 0; i < num_src_fields; ++i) {
1450 num_links_per_src_field[i] += curr_num_src_per_field_per_tgt[i];
1452 tgt_local_ids[num_link_tgt++] = curr_tgt_local_id;
1455 num_links = new_num_links;
1457 xrealloc(tgt_local_ids, num_link_tgt *
sizeof(*tgt_local_ids));
1458 num_src_per_field_per_tgt =
1460 num_src_per_field_per_tgt,
1461 num_link_tgt * num_src_fields *
sizeof(*num_src_per_field_per_tgt));
1465 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields; ++src_field_idx)
1466 srcs_per_field[src_field_idx] =
1468 interp_grid, src_field_idx, src_local_ids + src_field_prefix_sum[src_field_idx],
1469 num_links_per_src_field[src_field_idx]);
1470 free(src_local_ids);
1476 double * w =
xmalloc(num_links *
sizeof(*w));
1477 for (
size_t link_idx = 0; link_idx < num_links; ++link_idx)
1478 w[link_idx] = links[link_idx].weight;
1483 interp_grid, tgt_local_ids, num_link_tgt),
1484 .count = num_link_tgt};
1485 free(tgt_local_ids);
1488 weights, &tgts, num_src_per_field_per_tgt, srcs_per_field, w, num_src_fields);
1491 for (
size_t i = 0; i < num_src_fields; ++i) free(srcs_per_field[i]);
1492 free(num_src_per_field_per_tgt);
1494 free(src_global_ids);
1497 int num_src_fields = 0;
1500 MPI_Allreduce(MPI_IN_PLACE, &num_src_fields, 1, MPI_INT, MPI_MAX,
1505 for (
int i = 0; i < num_src_fields; ++i)
1511 tgt_points_reorder_idx, num_interpolated_points, tgt_points);
1512 free(tgt_points_reorder_idx);
1514 return num_fixed_tgt + num_link_tgt;