78 char const * weight_file_name, MPI_Comm comm,
79 char const * src_grid_name,
char const * tgt_grid_name,
81 size_t num_src_fields,
82 struct link_data ** links,
size_t * num_links,
83 struct fixed_data ** fixed,
size_t * num_fixed) {
85#ifndef YAC_NETCDF_ENABLED
99 die(
"ERROR(read_weight_file): YAC is built without the NetCDF support");
116 int io_idx = INT_MAX;
118 for (
int i = 0; (i < num_io_ranks) && (io_idx == INT_MAX); ++i)
119 if (io_ranks[i] == rank) io_idx = i;
123 if (!local_is_io)
return;
137 version =
xmalloc(version_len + 1);
138 version[version_len] =
'\0';
143 "ERROR(read_weight_file): "
144 "version string from weight file (\"%s\") does not match "
148 size_t grid_name_len;
150 YAC_HANDLE_ERROR(nc_inq_attlen(ncid, NC_GLOBAL,
"src_grid_name", &grid_name_len));
151 grid_name =
xmalloc(grid_name_len + 1);
152 grid_name[grid_name_len] =
'\0';
153 YAC_HANDLE_ERROR(nc_get_att_text(ncid, NC_GLOBAL,
"src_grid_name", grid_name));
155 (strlen(src_grid_name) == grid_name_len) &&
156 !strncmp(src_grid_name, grid_name, grid_name_len),
157 "ERROR(read_weight_file): source grid name from weight file (\"%s\") "
158 "does not match with the one provided through the interface (\"%s\")",
159 grid_name, src_grid_name)
163 YAC_HANDLE_ERROR(nc_inq_attlen(ncid, NC_GLOBAL,
"dst_grid_name", &grid_name_len));
164 grid_name =
xmalloc(grid_name_len + 1);
165 grid_name[grid_name_len] =
'\0';
166 YAC_HANDLE_ERROR(nc_get_att_text(ncid, NC_GLOBAL,
"dst_grid_name", grid_name));
168 (strlen(tgt_grid_name) == grid_name_len) &&
169 !strncmp(tgt_grid_name, grid_name, grid_name_len),
170 "ERROR(read_weight_file): target grid name from weight file (\"%s\") "
171 "does not match with the one provided through the interface (\"%s\")",
172 grid_name, tgt_grid_name)
183 YAC_HANDLE_ERROR(nc_inq_attlen(ncid, var_id,
"contains_links", &str_link_len));
184 str_link =
xmalloc(str_link_len + 1);
185 str_link[str_link_len] =
'\0';
186 YAC_HANDLE_ERROR(nc_get_att_text(ncid, var_id,
"contains_links", str_link));
188 int contains_links = (strlen(
"TRUE") == str_link_len) &&
189 !strncmp(
"TRUE", str_link, str_link_len);
192 ((strlen(
"FALSE") == str_link_len) &&
193 !strncmp(
"FALSE", str_link, str_link_len)),
194 "ERROR(read_weight_file): invalid global attribute contains_links")
198 if (contains_links) {
202 num_wgts == 1,
"ERROR(read_weight_file): YAC only supports num_wgts == 1")
207 size_t num_links_in_file = 0;
208 if (contains_links) {
212 num_links_in_file != 0,
"ERROR(read_weight_file): no links defined")
216 size_t tmp_num_src_fields = 0;
217 if (contains_links) {
221 tmp_num_src_fields != 0,
222 "ERROR(read_weight_file): no source fields in file")
224 tmp_num_src_fields == num_src_fields,
225 "ERROR(read_weight_file): number of source fields in file (%zu) does not "
226 "match with the number provided through the interface (%zu)",
227 tmp_num_src_fields, num_src_fields)
231 size_t max_loc_str_len;
236 "ERROR(read_weight_file): wrong max location string length in weight file")
240 xmalloc(num_src_fields *
sizeof(*tmp_src_locations));
243 for (
size_t i = 0; i < num_src_fields; ++i) {
247 size_t str_start[2] = {i, 0};
251 nc_get_vara_text(ncid, var_id, str_start, str_count, loc_str));
256 tmp_src_locations[i] == src_locations[i],
257 "ERROR(read_weight_file): source locations in file does not match with "
258 "the locations provided through the interface\n"
259 "location index: %d\n"
260 "location in weight file: %s\n"
261 "location from interface: %s",
265 free(tmp_src_locations);
274 "ERROR(read_weight_file): target locations in file does not match with "
275 "the locations provided through the interface")
278 if (contains_links) {
281 unsigned * num_links_per_src_field =
282 xmalloc(num_src_fields *
sizeof(*num_links_per_src_field));
287 (size_t)(((
long long)io_idx * (
long long)num_links_in_file) /
288 (
long long)num_io_ranks);
290 (size_t)((((
long long)io_idx + 1) * (
long long)num_links_in_file) /
291 (
long long)num_io_ranks) - offset;
295 *links =
xmalloc(count *
sizeof(**links));
298 size_t src_field_idx = 0, src_field_start_offset = 0;
299 while ((src_field_idx < num_src_fields) &&
300 (src_field_start_offset +
301 (
size_t)num_links_per_src_field[src_field_idx] <
304 src_field_start_offset += (size_t)num_links_per_src_field[src_field_idx];
308 src_field_idx < num_src_fields,
309 "ERROR(read_weight_file): problem in num_links_per_src_field")
310 size_t src_field_offset = offset - src_field_start_offset;
311 for (
size_t k = 0; (src_field_idx < num_src_fields) && (k < count);
312 src_field_offset = 0, ++src_field_idx) {
314 for (; (src_field_offset <
315 (size_t)num_links_per_src_field[src_field_idx]) &&
316 (k < count); ++src_field_offset, ++k) {
317 (*links)[k].src_field_idx = src_field_idx;
321 free(num_links_per_src_field);
324 int * address =
xmalloc(count *
sizeof(*address));
327 for (
size_t i = 0; i < count; ++i)
328 (*links)[i].src.global_id = (
yac_int)(address[i] - 1);
332 for (
size_t i = 0; i < count; ++i)
333 (*links)[i].tgt.global_id = (
yac_int)(address[i] - 1);
336 double * weights =
xmalloc(count *
sizeof(*weights));
339 size_t offsets[2] = {offset, 0};
340 size_t counts[2] = {count, 1};
341 YAC_HANDLE_ERROR(nc_get_vara_double(ncid, var_id, offsets, counts, weights));
342 for (
size_t i = 0; i < count; ++i)
343 (*links)[i].weight = weights[i];
353 size_t str_fixed_len;
357 nc_inq_attlen(ncid, var_id,
"contains_fixed_dst", &str_fixed_len));
358 str_fixed =
xmalloc(str_fixed_len + 1);
359 str_fixed[str_fixed_len] =
'\0';
360 YAC_HANDLE_ERROR(nc_get_att_text(ncid, var_id,
"contains_fixed_dst", str_fixed));
362 int contains_fixed = (strlen(
"TRUE") == str_fixed_len) &&
363 !strncmp(
"TRUE", str_fixed, str_fixed_len);
367 ((strlen(
"FALSE") == str_fixed_len) &&
368 !strncmp(
"FALSE", str_fixed, str_fixed_len)),
369 "ERROR(read_weight_file): invalid global attribute contains_fixed_dst")
372 if (contains_fixed) {
375 size_t num_fixed_values;
380 size_t num_fixed_tgt;
385 (size_t)(((
long long)io_idx * (
long long)num_fixed_tgt) /
386 (
long long)num_io_ranks);
388 (size_t)((((
long long)io_idx + 1) * (
long long)num_fixed_tgt) /
389 (
long long)num_io_ranks) - offset;
391 *fixed =
xmalloc(num_fixed_values *
sizeof(**fixed));
392 *num_fixed = num_fixed_values;
394 double * fixed_values =
xmalloc(num_fixed_values *
sizeof(*fixed_values));
395 int * num_tgt_indices_per_fixed_value =
396 xmalloc(num_fixed_values *
sizeof(*num_tgt_indices_per_fixed_value));
397 int * global_tgt_indices =
xmalloc(count *
sizeof(*global_tgt_indices));
401 YAC_HANDLE_ERROR(nc_get_var_int(ncid, var_id, num_tgt_indices_per_fixed_value));
409 nc_get_vara_int(ncid, var_id, &offset, &count, global_tgt_indices));
412 void * tgt_global_id_buffer =
xmalloc(count *
sizeof(*((*fixed)->tgt)));
413 size_t fixed_value_idx = 0, fixed_value_start_offset = 0;
414 while ((fixed_value_idx < num_fixed_values) &&
415 (fixed_value_start_offset +
416 (
size_t)num_tgt_indices_per_fixed_value[fixed_value_idx] <
419 fixed_value_start_offset +=
420 (size_t)num_tgt_indices_per_fixed_value[fixed_value_idx];
421 (*fixed)[fixed_value_idx].value = fixed_values[fixed_value_idx];
422 (*fixed)[fixed_value_idx].num_tgt = 0;
423 (*fixed)[fixed_value_idx].tgt = tgt_global_id_buffer;
427 fixed_value_idx < num_fixed_values,
428 "ERROR(read_weight_file): problem in num_tgt_indices_per_fixed_value")
429 size_t fixed_value_offset = offset - fixed_value_start_offset;
430 for (
size_t k = 0; (fixed_value_idx < num_fixed_values) && (k < count);
431 fixed_value_offset = 0, ++fixed_value_idx) {
433 size_t curr_tgt_count =
434 MIN((
size_t)num_tgt_indices_per_fixed_value[fixed_value_idx] -
435 fixed_value_offset, count - k);
437 (*fixed)[fixed_value_idx].value = fixed_values[fixed_value_idx];
438 (*fixed)[fixed_value_idx].num_tgt = curr_tgt_count;
439 (*fixed)[fixed_value_idx].tgt =
440 (
void*)(((
unsigned char *)tgt_global_id_buffer) +
441 k *
sizeof(*((*fixed)->tgt)));
443 for (
size_t i = 0; i < curr_tgt_count; ++fixed_value_offset, ++k, ++i)
444 (*fixed)[fixed_value_idx].tgt[i].global_id =
445 (
yac_int)(global_tgt_indices[k] - 1);
447 for (; fixed_value_idx < num_fixed_values; ++fixed_value_idx) {
448 (*fixed)[fixed_value_idx].value = fixed_values[fixed_value_idx];
449 (*fixed)[fixed_value_idx].num_tgt = 0;
450 (*fixed)[fixed_value_idx].tgt = NULL;
454 free(global_tgt_indices);
455 free(num_tgt_indices_per_fixed_value);
816 struct yac_interp_grid * interp_grid,
size_t * tgt_points,
size_t tgt_count,
817 size_t * num_interpolated_points,
818 struct link_data ** links,
size_t * num_links,
819 struct fixed_data ** fixed,
size_t * num_fixed) {
827 yac_int * tgt_global_ids =
xmalloc(tgt_count *
sizeof(*tgt_global_ids));
829 interp_grid, tgt_points, tgt_count, tgt_global_ids);
837 size_t * size_t_buffer =
838 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
839 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
840 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
841 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
842 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
843 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
845 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
847 for (
size_t i = 0; i < tgt_count; ++i)
848 sendcounts[3 *
compute_bucket(tgt_global_ids[i], comm_size) + 0]++;
849 for (
size_t i = 0; i < *num_links; ++i)
851 for (
size_t i = 0; i < *num_fixed; ++i)
852 for (
size_t j = 0; j < (*fixed)[i].num_tgt; ++j)
860 size_t saccu = 0, raccu = 0;
861 for (
int i = 0; i < comm_size; ++i) {
862 total_sdispls[i] = saccu;
863 sdispls[3*i+0] = saccu;
866 (total_sendcounts[i] =
867 sendcounts[3*i+0] * (size_t)tgt_pack_size));
870 (total_sendcounts[i] +=
871 sendcounts[3*i+1] * (size_t)link_pack_size));
872 total_sendcounts[i] +=
873 sendcounts[3*i+2] * (size_t)fixed_pack_size;
874 total_rdispls[i] = raccu;
875 rdispls[3*i+0] = raccu;
878 (total_recvcounts[i] =
879 recvcounts[3*i+0] * (size_t)tgt_pack_size));
882 (total_recvcounts[i] +=
883 recvcounts[3*i+1] * (size_t)link_pack_size));
884 total_recvcounts[i] +=
885 recvcounts[3*i+2] * (size_t)fixed_pack_size;
886 saccu += total_sendcounts[i];
887 raccu += total_recvcounts[i];
890 size_t send_size = total_sendcounts[comm_size - 1] +
891 total_sdispls[comm_size - 1];
892 size_t recv_size = total_recvcounts[comm_size - 1] +
893 total_rdispls[comm_size - 1];
895 void * pack_buffer =
xmalloc(send_size + recv_size);
896 void * send_pack_buffer = pack_buffer;
897 void * recv_pack_buffer = (
void*)((
unsigned char *)pack_buffer + send_size);
900 for (
size_t i = 0; i < tgt_count; ++i) {
901 yac_int curr_global_id = tgt_global_ids[i];
905 (
void*)((
unsigned char*)send_pack_buffer + sdispls[3*rank+0]),
906 tgt_pack_size, comm);
907 sdispls[3*rank+0] += (size_t)tgt_pack_size;
909 for (
size_t i = 0; i < *num_links; ++i) {
910 struct link_data * curr_link = (*links) + i;
914 (
void*)((
unsigned char*)send_pack_buffer + sdispls[3*rank+1]),
915 link_pack_size, comm);
916 sdispls[3*rank+1] += (size_t)link_pack_size;
918 for (
size_t i = 0; i < *num_fixed; ++i) {
919 double curr_fixed_value = (*fixed)[i].value;
920 size_t curr_num_tgt = (*fixed)[i].num_tgt;
921 for (
size_t j = 0; j < curr_num_tgt; ++j) {
922 yac_int curr_global_id = (*fixed)[i].tgt[j].global_id;
925 curr_fixed_value, curr_global_id,
926 (
void*)((
unsigned char*)send_pack_buffer + sdispls[3*rank+2]),
927 fixed_pack_size, comm);
928 sdispls[3*rank+2] += (size_t)fixed_pack_size;
933 yac_alltoallv_packed_p2p(
934 send_pack_buffer, total_sendcounts, total_sdispls,
935 recv_pack_buffer, total_recvcounts, total_rdispls, comm);
937 size_t recv_tgt_count = 0, recv_link_count = 0, recv_fixed_count = 0;
938 for (
int i = 0; i < comm_size; ++i) {
939 recv_tgt_count += recvcounts[3*i+0];
940 recv_link_count += recvcounts[3*i+1];
941 recv_fixed_count += recvcounts[3*i+2];
944 xrealloc(*links, recv_link_count *
sizeof(*recv_links));
946 xmalloc(recv_fixed_count *
sizeof(*recv_fixed));
948 xmalloc((recv_tgt_count + tgt_count) *
sizeof(*result_buffer));
950 struct temp_result * recv_results = result_buffer + recv_tgt_count;
953 recv_tgt_count = 0, recv_link_count = 0, recv_fixed_count = 0;
954 size_t unpack_offset = 0;
955 for (
int i = 0; i < comm_size; ++i) {
956 size_t curr_num_tgt = recvcounts[3 * i + 0];
957 size_t curr_num_links = recvcounts[3 * i + 1];
958 size_t curr_num_fixed = recvcounts[3 * i + 2];
959 for (
size_t j = 0; j < curr_num_tgt; ++j, ++recv_tgt_count) {
961 (
void*)((
unsigned char *)recv_pack_buffer + unpack_offset),
962 tgt_pack_size, &(send_results[recv_tgt_count].
tgt.
global_id),
964 unpack_offset += (size_t)tgt_pack_size;
965 send_results[recv_tgt_count].
owner = i;
966 send_results[recv_tgt_count].
reorder_idx = recv_tgt_count;
968 for (
size_t j = 0; j < curr_num_links; ++j, ++recv_link_count) {
970 (
void*)((
unsigned char *)recv_pack_buffer + unpack_offset),
971 link_pack_size, recv_links + recv_link_count, comm);
972 unpack_offset += (size_t)link_pack_size;
974 for (
size_t j = 0; j < curr_num_fixed; ++j, ++recv_fixed_count) {
976 (
void*)((
unsigned char *)recv_pack_buffer + unpack_offset),
977 fixed_pack_size, recv_fixed + recv_fixed_count, comm);
978 unpack_offset += (size_t)fixed_pack_size;
984 send_results, recv_tgt_count,
sizeof(*send_results),
993 recv_fixed, recv_fixed_count,
sizeof(*recv_fixed),
997 for (
size_t tgt_idx = 0, link_idx = 0, fixed_idx = 0;
998 tgt_idx < recv_tgt_count; ++tgt_idx) {
1002 while ((link_idx < recv_link_count) &&
1003 (recv_links[link_idx].
tgt.
global_id < curr_tgt)) ++link_idx;
1004 while ((fixed_idx < recv_fixed_count) &&
1005 (recv_fixed[fixed_idx].
tgt.
global_id < curr_tgt)) ++fixed_idx;
1008 (link_idx >= recv_link_count) ||
1010 (fixed_idx >= recv_fixed_count) ||
1012 "ERROR(redist_weight_file_data): inconsistent data in weight file;"
1013 "link and fixed data available for a target point")
1014 if ((link_idx < recv_link_count) &&
1017 size_t temp_link_idx = link_idx;
1018 while ((temp_link_idx < recv_link_count) &&
1022 send_results[tgt_idx].
data.
link.
links = recv_links + link_idx;
1023 send_results[tgt_idx].
data.
link.
count = temp_link_idx - link_idx;
1025 }
else if ((fixed_idx < recv_fixed_count) &&
1039 send_results, recv_tgt_count,
sizeof(*send_results),
1042 int * pack_size_buffer =
1043 xmalloc((recv_tgt_count + tgt_count) *
sizeof(*pack_size_buffer));
1044 int * send_pack_sizes = pack_size_buffer;
1045 int * recv_pack_sizes = pack_size_buffer + recv_tgt_count;
1046 for (
size_t i = 0; i < recv_tgt_count; ++i)
1049 saccu = 0, raccu = 0;
1050 for (
int i = 0; i < comm_size; ++i) {
1053 int sendcount = recvcounts[3*i];
1054 int recvcount = sendcounts[3*i];
1055 saccu += (sendcounts[i] = sendcount);
1056 raccu += (recvcounts[i] = recvcount);
1059 yac_alltoallv_int_p2p(
1060 send_pack_sizes, sendcounts, sdispls,
1061 recv_pack_sizes, recvcounts, rdispls, comm);
1063 saccu = 0, raccu = 0;
1064 for (
int i = 0, k = 0, l = 0; i < comm_size; ++i) {
1065 size_t sendcount = sendcounts[i];
1066 size_t recvcount = recvcounts[i];
1069 for (
size_t j = 0; j < sendcount; ++j, ++k)
1070 sendcounts[i] += send_pack_sizes[k];
1071 for (
size_t j = 0; j < recvcount; ++j, ++l)
1072 recvcounts[i] += recv_pack_sizes[l];
1075 saccu += sendcounts[i];
1076 raccu += recvcounts[i];
1078 size_t total_send_pack_size =
1079 sdispls[comm_size-1] + sendcounts[comm_size-1];
1080 size_t total_recv_pack_size =
1081 rdispls[comm_size-1] + recvcounts[comm_size-1];
1084 xrealloc(pack_buffer, total_send_pack_size + total_recv_pack_size);
1085 send_pack_buffer = pack_buffer;
1087 (
void*)((
unsigned char*)pack_buffer + total_send_pack_size);
1089 for (
size_t i = 0, offset = 0; i < recv_tgt_count; ++i) {
1091 send_results + i, (
void*)((
unsigned char*)send_pack_buffer + offset),
1092 send_pack_sizes[i], comm);
1093 offset += (size_t)(send_pack_sizes[i]);
1099 send_pack_buffer, sendcounts, sdispls,
1100 recv_pack_buffer, recvcounts, rdispls,
1101 1, MPI_PACKED, comm);
1104 for (
size_t i = 0, offset = 0; i < tgt_count; ++i) {
1106 (
void*)((
unsigned char*)recv_pack_buffer + offset),
1107 recv_pack_sizes[i], recv_results + i, comm);
1108 offset += (size_t)(recv_pack_sizes[i]);
1110 free(pack_size_buffer);
1114 recv_results, tgt_count,
sizeof(*recv_results),
1117 for (
size_t i = 0; i < tgt_count; ++i)
1122 recv_results, tgt_count,
sizeof(*recv_results),
1125 size_t new_num_links = 0;
1126 size_t new_total_num_links = 0;
1127 size_t new_num_fixed = 0;
1130 while ((i < tgt_count) && (recv_results[i].
type ==
LINK))
1133 while ((i < tgt_count) && (recv_results[i].
type ==
FIXED)) ++i;
1134 new_num_fixed = i - new_num_links;
1139 for (
size_t i = 0, offset = 0; i < new_num_links;
1143 *num_links = new_total_num_links;
1146 if (new_num_fixed > 0) {
1147 recv_results += new_num_links;
1149 recv_results, new_num_fixed,
sizeof(*recv_results),
1151 double curr_fixed_value =
1153 size_t num_fixed_values = 0;
1154 void * tgt_global_id_buffer =
1155 xrealloc((*num_fixed > 0)?(*fixed)->tgt:NULL,
1156 new_num_fixed *
sizeof(*((*fixed)->tgt)));
1158 for (
size_t i = 0, offset = 0; i < new_num_fixed; ++i) {
1159 if (recv_results[i].data.
fixed.
value != curr_fixed_value) {
1161 *fixed =
xrealloc(*fixed, (num_fixed_values + 1) *
sizeof(**fixed));
1162 curr_fixed = *fixed + num_fixed_values;
1164 curr_fixed->
value = curr_fixed_value;
1165 curr_fixed->
tgt = (
void*)(((
unsigned char *)tgt_global_id_buffer) +
1166 offset *
sizeof(*((*fixed)->tgt)));
1172 *num_fixed = num_fixed_values;
1173 recv_results -= new_num_links;
1176 if (*num_fixed > 0) free((*fixed)->tgt);
1182 for (
size_t i = 0; i < tgt_count; ++i)
1184 *num_interpolated_points = new_num_links + new_num_fixed;
1186 for (
size_t i = 0; i < new_num_links; ++i)
1188 free(result_buffer);
1191 free(size_t_buffer);
1192 free(tgt_global_ids);
1253 size_t * tgt_points,
size_t count,
1263 xmalloc(num_src_fields *
sizeof(*src_locations));
1264 for (
size_t i = 0; i < num_src_fields; ++i)
1270 size_t num_links = 0;
1272 size_t num_fixed = 0;
1279 src_locations, tgt_location, num_src_fields,
1280 &links, &num_links, &fixed, &num_fixed);
1282 free(src_locations);
1288 size_t num_interpolated_points;
1290 interp_grid, tgt_points, count, &num_interpolated_points,
1291 &links, &num_links, &fixed, &num_fixed);
1293 size_t * tgt_points_reorder_idx =
1294 xmalloc(num_interpolated_points *
sizeof(*tgt_points_reorder_idx));
1295 for (
size_t i = 0; i < num_interpolated_points; ++i)
1296 tgt_points_reorder_idx[i] = i;
1298 size_t num_fixed_tgt = 0;
1300 for (
size_t i = 0; i < num_fixed; ++i) num_fixed_tgt += fixed[i].
num_tgt;
1302 size_t tgt_points_offset = num_interpolated_points - num_fixed_tgt;
1304 for (
size_t i = 0; i < num_fixed; ++i) {
1309 interp_grid, tgt_points + tgt_points_offset, fixed[i].num_tgt),
1314 tgt_points_offset += fixed[i].
num_tgt;
1320 size_t num_link_tgt = 0;
1321 if (num_links > 0) {
1324 for (
size_t link_idx = 0, tgt_idx = 0; link_idx < num_links; tgt_idx++) {
1327 size_t curr_local_id = tgt_points[tgt_idx];
1329 while ((link_idx < num_links) &&
1330 (links[link_idx].tgt.
global_id == curr_global_id)) {
1340 size_t num_src_fields = links[num_links-1].
src_field_idx + 1;
1343 int int_num_src_fields = (int)num_src_fields;
1346 MPI_Allreduce(MPI_IN_PLACE, &int_num_src_fields, 1, MPI_INT, MPI_MAX,
1348 num_src_fields = (size_t)int_num_src_fields;
1351 yac_int * src_global_ids =
xmalloc(num_links *
sizeof(*src_global_ids));
1352 size_t * src_local_ids =
xmalloc(num_links *
sizeof(*src_local_ids));
1353 size_t src_field_prefix_sum[num_src_fields];
1357 for (
size_t src_field_idx = 0, link_idx = 0, offset = 0;
1358 src_field_idx < num_src_fields; ++src_field_idx) {
1360 size_t curr_num_links = 0;
1361 while ((link_idx < num_links) &&
1362 (links[link_idx].src_field_idx == src_field_idx)) {
1365 src_global_ids[offset + curr_num_links] =
1373 interp_grid, src_field_idx, src_global_ids + offset, curr_num_links,
1374 src_local_ids + offset);
1381 link_idx -= curr_num_links;
1382 if (src_mask != NULL) {
1383 for (
size_t i = 0; i < curr_num_links; ++i, ++link_idx)
1384 links[link_idx].
src.local_id =
1385 (src_mask[src_local_ids[link_idx]])?
1386 src_local_ids[link_idx]:SIZE_MAX;
1388 for (
size_t i = 0; i < curr_num_links; ++i, ++link_idx)
1389 links[link_idx].
src.local_id = src_local_ids[link_idx];
1392 src_field_prefix_sum[src_field_idx] = offset;
1393 offset += curr_num_links;
1400 size_t new_num_links = 0;
1401 int contains_masked_tgt = 0;
1402 num_link_tgt = num_interpolated_points - num_fixed_tgt;
1403 size_t * tgt_local_ids =
xmalloc(num_link_tgt *
sizeof(*tgt_local_ids));
1404 size_t * num_src_per_field_per_tgt =
1405 xcalloc(num_link_tgt * num_src_fields,
sizeof(*num_src_per_field_per_tgt));
1406 size_t num_links_per_src_field[num_src_fields];
1407 memset(num_links_per_src_field, 0,
sizeof(num_links_per_src_field));
1409 for (
size_t link_idx = 0, tgt_idx = 0; link_idx < num_links; ++tgt_idx) {
1411 size_t curr_tgt_local_id = links[link_idx].
tgt.
local_id;
1412 int is_masked = links[link_idx].
src.
local_id == SIZE_MAX;
1413 tgt_points[tgt_idx] = curr_tgt_local_id;
1418 while ((link_idx < num_links) &&
1419 (links[link_idx].tgt.
local_id == curr_tgt_local_id)) ++link_idx;
1420 contains_masked_tgt = 1;
1421 tgt_points_reorder_idx[tgt_idx] += num_interpolated_points;
1425 size_t * curr_num_src_per_field_per_tgt =
1426 num_src_per_field_per_tgt + num_link_tgt * num_src_fields;
1427 if (contains_masked_tgt) {
1428 while ((link_idx < num_links) &&
1429 (links[link_idx].tgt.
local_id == curr_tgt_local_id)) {
1432 src_field_prefix_sum[curr_src_field_idx] +
1433 num_links_per_src_field[curr_src_field_idx] +
1434 curr_num_src_per_field_per_tgt[curr_src_field_idx]++] =
1436 links[new_num_links++] = links[link_idx++];
1440 while ((link_idx < num_links) &&
1441 (links[link_idx].tgt.
local_id == curr_tgt_local_id)) {
1444 src_field_prefix_sum[curr_src_field_idx] +
1445 num_links_per_src_field[curr_src_field_idx] +
1446 curr_num_src_per_field_per_tgt[curr_src_field_idx]++] =
1448 ++link_idx, ++new_num_links;
1451 for (
size_t i = 0; i < num_src_fields; ++i) {
1452 num_links_per_src_field[i] += curr_num_src_per_field_per_tgt[i];
1454 tgt_local_ids[num_link_tgt++] = curr_tgt_local_id;
1457 num_links = new_num_links;
1459 xrealloc(tgt_local_ids, num_link_tgt *
sizeof(*tgt_local_ids));
1460 num_src_per_field_per_tgt =
1462 num_src_per_field_per_tgt,
1463 num_link_tgt * num_src_fields *
sizeof(*num_src_per_field_per_tgt));
1467 for (
size_t src_field_idx = 0; src_field_idx < num_src_fields; ++src_field_idx)
1468 srcs_per_field[src_field_idx] =
1470 interp_grid, src_field_idx, src_local_ids + src_field_prefix_sum[src_field_idx],
1471 num_links_per_src_field[src_field_idx]);
1472 free(src_local_ids);
1478 double * w =
xmalloc(num_links *
sizeof(*w));
1479 for (
size_t link_idx = 0; link_idx < num_links; ++link_idx)
1480 w[link_idx] = links[link_idx].weight;
1485 interp_grid, tgt_local_ids, num_link_tgt),
1486 .count = num_link_tgt};
1487 free(tgt_local_ids);
1490 weights, &tgts, num_src_per_field_per_tgt, srcs_per_field, w, num_src_fields);
1493 for (
size_t i = 0; i < num_src_fields; ++i) free(srcs_per_field[i]);
1494 free(num_src_per_field_per_tgt);
1496 free(src_global_ids);
1499 int num_src_fields = 0;
1502 MPI_Allreduce(MPI_IN_PLACE, &num_src_fields, 1, MPI_INT, MPI_MAX,
1507 for (
int i = 0; i < num_src_fields; ++i)
1513 tgt_points_reorder_idx, num_interpolated_points, tgt_points);
1514 free(tgt_points_reorder_idx);
1516 return num_fixed_tgt + num_link_tgt;