YetAnotherCoupler 3.5.2
Loading...
Searching...
No Matches
interp_method_file.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#include <string.h>
6
9#include "dist_grid.h"
10#include "yac_mpi_internal.h"
11#include "io_utils.h"
12
13static size_t do_search_file(struct interp_method * method,
14 struct yac_interp_grid * interp_grid,
15 size_t * tgt_points, size_t count,
16 struct yac_interp_weights * weights);
17static void delete_file(struct interp_method * method);
18
19static struct interp_method_vtable
23
29
30struct link_data {
31 union {
33 size_t local_id;
34 } src, tgt;
35 double weight;
37};
38
39struct fixed_data {
40 double value;
41 struct {
43 } * tgt;
44 size_t num_tgt;
45};
46
48 double value;
49 struct {
51 } tgt;
52};
53
55 enum {
56 LINK = 0,
57 FIXED = 1,
60 union {
61 struct {
62 struct link_data * links;
63 size_t count;
65 struct {
66 double value;
69 struct {
71 size_t local_id;
72 } tgt;
73 int owner;
75};
76
77static void read_weight_file(
78 char const * weight_file_name, MPI_Comm comm,
79 char const * src_grid_name, char const * tgt_grid_name,
80 enum yac_location * src_locations, enum yac_location tgt_location,
81 size_t num_src_fields,
82 struct link_data ** links, size_t * num_links,
83 struct fixed_data ** fixed, size_t * num_fixed) {
84
85#ifndef YAC_NETCDF_ENABLED
86
87 UNUSED(weight_file_name);
88 UNUSED(comm);
89 UNUSED(src_grid_name);
90 UNUSED(tgt_grid_name);
91 UNUSED(src_locations);
92 UNUSED(tgt_location);
93 UNUSED(num_src_fields);
94 UNUSED(links);
95 UNUSED(num_links);
96 UNUSED(fixed);
97 UNUSED(num_fixed);
98
99 die("ERROR(read_weight_file): YAC is built without the NetCDF support");
100#else
101
102 *links = NULL;
103 *num_links = 0;
104 *fixed = NULL;
105 *num_fixed = 0;
106
107 int rank;
108 yac_mpi_call(MPI_Comm_rank(comm, &rank), comm);
109
110 // determine ranks that do input
111 int local_is_io;
112 int * io_ranks;
113 int num_io_ranks;
114 yac_get_io_ranks(comm, &local_is_io, &io_ranks, &num_io_ranks);
115
116 int io_idx = INT_MAX;
117 if (local_is_io)
118 for (int i = 0; (i < num_io_ranks) && (io_idx == INT_MAX); ++i)
119 if (io_ranks[i] == rank) io_idx = i;
120 free(io_ranks);
121
122 // if the local process does not have to read anything from the file
123 if (!local_is_io) return;
124
125 // open weight file on io processes
126 int ncid;
127 yac_nc_open(weight_file_name, NC_NOWRITE, &ncid);
128
129 //\todo use parallel netcdf if available
130
131 int dimid, var_id;
132
133 // global attributes
134 size_t version_len;
135 char * version;
136 YAC_HANDLE_ERROR(nc_inq_attlen(ncid, NC_GLOBAL, "version", &version_len));
137 version = xmalloc(version_len + 1);
138 version[version_len] = '\0';
139 YAC_HANDLE_ERROR(nc_get_att_text(ncid, NC_GLOBAL, "version", version));
141 (strlen(YAC_WEIGHT_FILE_VERSION_STRING) == version_len) &&
142 !strncmp(version, YAC_WEIGHT_FILE_VERSION_STRING, version_len),
143 "ERROR(read_weight_file): "
144 "version string from weight file (\"%s\") does not match "
145 "with YAC version \"%s\"", version, YAC_WEIGHT_FILE_VERSION_STRING)
146 free(version);
147
148 size_t grid_name_len;
149 char * grid_name;
150 YAC_HANDLE_ERROR(nc_inq_attlen(ncid, NC_GLOBAL, "src_grid_name", &grid_name_len));
151 grid_name = xmalloc(grid_name_len + 1);
152 grid_name[grid_name_len] = '\0';
153 YAC_HANDLE_ERROR(nc_get_att_text(ncid, NC_GLOBAL, "src_grid_name", grid_name));
155 (strlen(src_grid_name) == grid_name_len) &&
156 !strncmp(src_grid_name, grid_name, grid_name_len),
157 "ERROR(read_weight_file): source grid name from weight file (\"%s\") "
158 "does not match with the one provided through the interface (\"%s\")",
159 grid_name, src_grid_name)
160
161 free(grid_name);
162
163 YAC_HANDLE_ERROR(nc_inq_attlen(ncid, NC_GLOBAL, "dst_grid_name", &grid_name_len));
164 grid_name = xmalloc(grid_name_len + 1);
165 grid_name[grid_name_len] = '\0';
166 YAC_HANDLE_ERROR(nc_get_att_text(ncid, NC_GLOBAL, "dst_grid_name", grid_name));
168 (strlen(tgt_grid_name) == grid_name_len) &&
169 !strncmp(tgt_grid_name, grid_name, grid_name_len),
170 "ERROR(read_weight_file): target grid name from weight file (\"%s\") "
171 "does not match with the one provided through the interface (\"%s\")",
172 grid_name, tgt_grid_name)
173
174 free(grid_name);
175
176 //---------------
177 // read link data
178 //---------------
179
180 size_t str_link_len;
181 char * str_link;
182 var_id = NC_GLOBAL;
183 YAC_HANDLE_ERROR(nc_inq_attlen(ncid, var_id, "contains_links", &str_link_len));
184 str_link = xmalloc(str_link_len + 1);
185 str_link[str_link_len] = '\0';
186 YAC_HANDLE_ERROR(nc_get_att_text(ncid, var_id, "contains_links", str_link));
187
188 int contains_links = (strlen("TRUE") == str_link_len) &&
189 !strncmp("TRUE", str_link, str_link_len);
191 contains_links ||
192 ((strlen("FALSE") == str_link_len) &&
193 !strncmp("FALSE", str_link, str_link_len)),
194 "ERROR(read_weight_file): invalid global attribute contains_links")
195 free(str_link);
196
197 size_t num_wgts = 0;
198 if (contains_links) {
199 yac_nc_inq_dimid(ncid, "num_wgts", &dimid);
200 YAC_HANDLE_ERROR(nc_inq_dimlen(ncid, dimid, &num_wgts));
202 num_wgts == 1, "ERROR(read_weight_file): YAC only supports num_wgts == 1")
203 }
204
205 // get number of links from file
206
207 size_t num_links_in_file = 0;
208 if (contains_links) {
209 yac_nc_inq_dimid(ncid, "num_links", &dimid);
210 YAC_HANDLE_ERROR(nc_inq_dimlen(ncid, dimid, &num_links_in_file));
212 num_links_in_file != 0, "ERROR(read_weight_file): no links defined")
213 }
214
215 // get number of source fields
216 size_t tmp_num_src_fields = 0;
217 if (contains_links) {
218 yac_nc_inq_dimid(ncid, "num_src_fields", &dimid);
219 YAC_HANDLE_ERROR(nc_inq_dimlen(ncid, dimid, &tmp_num_src_fields));
221 tmp_num_src_fields != 0,
222 "ERROR(read_weight_file): no source fields in file")
224 tmp_num_src_fields == num_src_fields,
225 "ERROR(read_weight_file): number of source fields in file (%zu) does not "
226 "match with the number provided through the interface (%zu)",
227 tmp_num_src_fields, num_src_fields)
228 }
229
230 // get max location string length from file
231 size_t max_loc_str_len;
232 yac_nc_inq_dimid(ncid, "max_loc_str_len", &dimid);
233 YAC_HANDLE_ERROR(nc_inq_dimlen(ncid, dimid, &max_loc_str_len));
235 max_loc_str_len == YAC_MAX_LOC_STR_LEN,
236 "ERROR(read_weight_file): wrong max location string length in weight file")
237
238 // get source locations
239 enum yac_location * tmp_src_locations =
240 xmalloc(num_src_fields * sizeof(*tmp_src_locations));
241 yac_nc_inq_varid(ncid, "src_locations", &var_id);
242
243 for (size_t i = 0; i < num_src_fields; ++i) {
244
245 char loc_str[YAC_MAX_LOC_STR_LEN];
246
247 size_t str_start[2] = {i, 0};
248 size_t str_count[2] = {1, YAC_MAX_LOC_STR_LEN};
249
251 nc_get_vara_text(ncid, var_id, str_start, str_count, loc_str));
252
253 tmp_src_locations[i] = yac_str2loc(loc_str);
254
256 tmp_src_locations[i] == src_locations[i],
257 "ERROR(read_weight_file): source locations in file does not match with "
258 "the locations provided through the interface\n"
259 "location index: %d\n"
260 "location in weight file: %s\n"
261 "location from interface: %s",
262 (int)i, loc_str, yac_loc2str(src_locations[i]))
263 }
264
265 free(tmp_src_locations);
266
267 // get target location
268 yac_nc_inq_varid(ncid, "dst_location", &var_id);
269 {
270 char loc_str[YAC_MAX_LOC_STR_LEN];
271 YAC_HANDLE_ERROR(nc_get_var_text(ncid, var_id, loc_str));
273 tgt_location == yac_str2loc(loc_str),
274 "ERROR(read_weight_file): target locations in file does not match with "
275 "the locations provided through the interface")
276 }
277
278 if (contains_links) {
279
280 // get number of links per source field
281 unsigned * num_links_per_src_field =
282 xmalloc(num_src_fields * sizeof(*num_links_per_src_field));
283 yac_nc_inq_varid(ncid, "num_links_per_src_field", &var_id);
284 YAC_HANDLE_ERROR(nc_get_var_uint(ncid, var_id, num_links_per_src_field));
285
286 size_t offset =
287 (size_t)(((long long)io_idx * (long long)num_links_in_file) /
288 (long long)num_io_ranks);
289 size_t count =
290 (size_t)((((long long)io_idx + 1) * (long long)num_links_in_file) /
291 (long long)num_io_ranks) - offset;
292
293 *num_links = count;
294
295 *links = xmalloc(count * sizeof(**links));
296
297 if (count > 0) {
298 size_t src_field_idx = 0, src_field_start_offset = 0;
299 while ((src_field_idx < num_src_fields) &&
300 (src_field_start_offset +
301 (size_t)num_links_per_src_field[src_field_idx] <
302 offset)) {
303
304 src_field_start_offset += (size_t)num_links_per_src_field[src_field_idx];
305 ++src_field_idx;
306 };
308 src_field_idx < num_src_fields,
309 "ERROR(read_weight_file): problem in num_links_per_src_field")
310 size_t src_field_offset = offset - src_field_start_offset;
311 for (size_t k = 0; (src_field_idx < num_src_fields) && (k < count);
312 src_field_offset = 0, ++src_field_idx) {
313
314 for (; (src_field_offset <
315 (size_t)num_links_per_src_field[src_field_idx]) &&
316 (k < count); ++src_field_offset, ++k) {
317 (*links)[k].src_field_idx = src_field_idx;
318 }
319 }
320 }
321 free(num_links_per_src_field);
322
323 // get links
324 int * address = xmalloc(count * sizeof(*address));
325 yac_nc_inq_varid(ncid, "src_address", &var_id);
326 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, var_id, &offset, &count, address));
327 for (size_t i = 0; i < count; ++i)
328 (*links)[i].src.global_id = (yac_int)(address[i] - 1);
329
330 yac_nc_inq_varid(ncid, "dst_address", &var_id);
331 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, var_id, &offset, &count, address));
332 for (size_t i = 0; i < count; ++i)
333 (*links)[i].tgt.global_id = (yac_int)(address[i] - 1);
334 free(address);
335
336 double * weights = xmalloc(count * sizeof(*weights));
337
338 yac_nc_inq_varid(ncid, "remap_matrix", &var_id);
339 size_t offsets[2] = {offset, 0};
340 size_t counts[2] = {count, 1};
341 YAC_HANDLE_ERROR(nc_get_vara_double(ncid, var_id, offsets, counts, weights));
342 for (size_t i = 0; i < count; ++i)
343 (*links)[i].weight = weights[i];
344
345 free(weights);
346 }
347
348 //----------------
349 // read fixed data
350 //----------------
351
352 // global attributes
353 size_t str_fixed_len;
354 char * str_fixed;
355 var_id = NC_GLOBAL;
357 nc_inq_attlen(ncid, var_id, "contains_fixed_dst", &str_fixed_len));
358 str_fixed = xmalloc(str_fixed_len + 1);
359 str_fixed[str_fixed_len] = '\0';
360 YAC_HANDLE_ERROR(nc_get_att_text(ncid, var_id, "contains_fixed_dst", str_fixed));
361
362 int contains_fixed = (strlen("TRUE") == str_fixed_len) &&
363 !strncmp("TRUE", str_fixed, str_fixed_len);
364
366 contains_fixed ||
367 ((strlen("FALSE") == str_fixed_len) &&
368 !strncmp("FALSE", str_fixed, str_fixed_len)),
369 "ERROR(read_weight_file): invalid global attribute contains_fixed_dst")
370 free(str_fixed);
371
372 if (contains_fixed) {
373
374 // get number of fixed values
375 size_t num_fixed_values;
376 yac_nc_inq_dimid(ncid, "num_fixed_values", &dimid);
377 YAC_HANDLE_ERROR(nc_inq_dimlen(ncid, dimid, &num_fixed_values));
378
379 // get number of fixed target points
380 size_t num_fixed_tgt;
381 yac_nc_inq_dimid(ncid, "num_fixed_dst", &dimid);
382 YAC_HANDLE_ERROR(nc_inq_dimlen(ncid, dimid, &num_fixed_tgt));
383
384 size_t offset =
385 (size_t)(((long long)io_idx * (long long)num_fixed_tgt) /
386 (long long)num_io_ranks);
387 size_t count =
388 (size_t)((((long long)io_idx + 1) * (long long)num_fixed_tgt) /
389 (long long)num_io_ranks) - offset;
390
391 *fixed = xmalloc(num_fixed_values * sizeof(**fixed));
392 *num_fixed = num_fixed_values;
393
394 double * fixed_values = xmalloc(num_fixed_values * sizeof(*fixed_values));
395 int * num_tgt_indices_per_fixed_value =
396 xmalloc(num_fixed_values * sizeof(*num_tgt_indices_per_fixed_value));
397 int * global_tgt_indices = xmalloc(count * sizeof(*global_tgt_indices));
398
399 // get number of fixed target points per fixed value
400 yac_nc_inq_varid(ncid, "num_dst_per_fixed_value", &var_id);
401 YAC_HANDLE_ERROR(nc_get_var_int(ncid, var_id, num_tgt_indices_per_fixed_value));
402
403 // read data
404 yac_nc_inq_varid(ncid, "fixed_values", &var_id);
405 YAC_HANDLE_ERROR(nc_get_var_double(ncid, var_id, fixed_values));
406
407 yac_nc_inq_varid(ncid, "dst_address_fixed", &var_id);
409 nc_get_vara_int(ncid, var_id, &offset, &count, global_tgt_indices));
410
411 if (count > 0) {
412 void * tgt_global_id_buffer = xmalloc(count * sizeof(*((*fixed)->tgt)));
413 size_t fixed_value_idx = 0, fixed_value_start_offset = 0;
414 while ((fixed_value_idx < num_fixed_values) &&
415 (fixed_value_start_offset +
416 (size_t)num_tgt_indices_per_fixed_value[fixed_value_idx] <
417 offset)) {
418
419 fixed_value_start_offset +=
420 (size_t)num_tgt_indices_per_fixed_value[fixed_value_idx];
421 (*fixed)[fixed_value_idx].value = fixed_values[fixed_value_idx];
422 (*fixed)[fixed_value_idx].num_tgt = 0;
423 (*fixed)[fixed_value_idx].tgt = tgt_global_id_buffer;
424 ++fixed_value_idx;
425 };
427 fixed_value_idx < num_fixed_values,
428 "ERROR(read_weight_file): problem in num_tgt_indices_per_fixed_value")
429 size_t fixed_value_offset = offset - fixed_value_start_offset;
430 for (size_t k = 0; (fixed_value_idx < num_fixed_values) && (k < count);
431 fixed_value_offset = 0, ++fixed_value_idx) {
432
433 size_t curr_tgt_count =
434 MIN((size_t)num_tgt_indices_per_fixed_value[fixed_value_idx] -
435 fixed_value_offset, count - k);
436
437 (*fixed)[fixed_value_idx].value = fixed_values[fixed_value_idx];
438 (*fixed)[fixed_value_idx].num_tgt = curr_tgt_count;
439 (*fixed)[fixed_value_idx].tgt =
440 (void*)(((unsigned char *)tgt_global_id_buffer) +
441 k * sizeof(*((*fixed)->tgt)));
442
443 for (size_t i = 0; i < curr_tgt_count; ++fixed_value_offset, ++k, ++i)
444 (*fixed)[fixed_value_idx].tgt[i].global_id =
445 (yac_int)(global_tgt_indices[k] - 1);
446 }
447 for (; fixed_value_idx < num_fixed_values; ++fixed_value_idx) {
448 (*fixed)[fixed_value_idx].value = fixed_values[fixed_value_idx];
449 (*fixed)[fixed_value_idx].num_tgt = 0;
450 (*fixed)[fixed_value_idx].tgt = NULL;
451 }
452 }
453 free(fixed_values);
454 free(global_tgt_indices);
455 free(num_tgt_indices_per_fixed_value);
456 }
457
458 // close file
459 YAC_HANDLE_ERROR(nc_close(ncid));
460#endif
461}
462
463static inline int
464compute_bucket(yac_int value, int comm_size) {
465 return (int)(value / 128) % comm_size;
466}
467
468static int get_tgt_pack_size(MPI_Comm comm) {
469
470 int tgt_pack_size;
471 yac_mpi_call(MPI_Pack_size(1, yac_int_dt, comm, &tgt_pack_size), comm);
472 return tgt_pack_size;
473}
474
475static int get_link_pack_size(MPI_Comm comm) {
476
477 int global_id_pack_size;
478 int weight_pack_size;
479 int src_field_idx_pack_size;
480
482 MPI_Pack_size(1, yac_int_dt, comm, &global_id_pack_size), comm);
484 MPI_Pack_size(1, MPI_DOUBLE, comm, &weight_pack_size), comm);
486 MPI_Pack_size(1, MPI_UINT64_T, comm, &src_field_idx_pack_size), comm);
487
488 return 2 * global_id_pack_size + weight_pack_size +
489 src_field_idx_pack_size;
490}
491
492static int get_fixed_pack_size(MPI_Comm comm) {
493
494 int value_pack_size;
495 int global_id_pack_size;
496 int count_pack_size;
497
499 MPI_Pack_size(1, MPI_DOUBLE, comm, &value_pack_size), comm);
501 MPI_Pack_size(1, yac_int_dt, comm, &global_id_pack_size), comm);
503 MPI_Pack_size(1, MPI_UINT64_T, comm, &count_pack_size), comm);
504
505 return value_pack_size + global_id_pack_size + count_pack_size;
506}
507
508static void pack_tgt(
509 yac_int global_tgt_id, void * buffer, int buffer_size, MPI_Comm comm) {
510
511 int position = 0;
513 MPI_Pack(&global_tgt_id, 1, yac_int_dt, buffer,
514 buffer_size, &position, comm), comm);
515}
516
517static void pack_link(
518 struct link_data * link, void * buffer, int buffer_size, MPI_Comm comm) {
519
520 int position = 0;
522 MPI_Pack(&(link->src.global_id), 1, yac_int_dt, buffer,
523 buffer_size, &position, comm), comm);
525 MPI_Pack(&(link->tgt.global_id), 1, yac_int_dt, buffer,
526 buffer_size, &position, comm), comm);
528 MPI_Pack(&(link->weight), 1, MPI_DOUBLE, buffer,
529 buffer_size, &position, comm), comm);
530 uint64_t temp_src_field_idx = (uint64_t)(link->src_field_idx);
532 MPI_Pack(&temp_src_field_idx, 1, MPI_UINT64_T, buffer,
533 buffer_size, &position, comm), comm);
534
535}
536
537static void pack_fixed(
538 double fixed_value, yac_int global_tgt_id,
539 void * buffer, int buffer_size, MPI_Comm comm) {
540
541 int position = 0;
543 MPI_Pack(&fixed_value, 1, MPI_DOUBLE, buffer,
544 buffer_size, &position, comm), comm);
546 MPI_Pack(&global_tgt_id, 1, yac_int_dt, buffer,
547 buffer_size, &position, comm), comm);
548}
549
550static void unpack_tgt(
551 void * buffer, int buffer_size, yac_int * global_tgt_id, MPI_Comm comm) {
552
553 int position = 0;
555 MPI_Unpack(buffer, buffer_size, &position, global_tgt_id, 1,
556 yac_int_dt, comm), comm);
557}
558
559static void unpack_link(
560 void * buffer, int buffer_size, struct link_data * link, MPI_Comm comm) {
561
562 int position = 0;
564 MPI_Unpack(buffer, buffer_size, &position, &(link->src.global_id), 1,
565 yac_int_dt, comm), comm);
567 MPI_Unpack(buffer, buffer_size, &position, &(link->tgt.global_id), 1,
568 yac_int_dt, comm), comm);
570 MPI_Unpack(buffer, buffer_size, &position, &(link->weight), 1,
571 MPI_DOUBLE, comm), comm);
572 uint64_t temp_src_field_idx;
574 MPI_Unpack(buffer, buffer_size, &position, &temp_src_field_idx, 1,
575 MPI_UINT64_T, comm), comm);
576 link->src_field_idx = (size_t)temp_src_field_idx;
577}
578
579static void unpack_fixed(
580 void * buffer, int buffer_size, struct temp_fixed_data * fixed,
581 MPI_Comm comm) {
582
583 int position = 0;
585 MPI_Unpack(buffer, buffer_size, &position, &(fixed->value), 1,
586 MPI_DOUBLE, comm), comm);
588 MPI_Unpack(buffer, buffer_size, &position, &(fixed->tgt.global_id), 1,
589 yac_int_dt, comm), comm);
590}
591
592static int compare_temp_result_tgt (void const * a, void const * b) {
593
594 struct temp_result const * result_a = (struct temp_result const *)a;
595 struct temp_result const * result_b = (struct temp_result const *)b;
596
597 return (result_a->tgt.global_id > result_b->tgt.global_id) -
598 (result_a->tgt.global_id < result_b->tgt.global_id);
599}
600
601static int compare_link_data_tgt (void const * a, void const * b) {
602
603 struct link_data const * link_a = (struct link_data const *)a;
604 struct link_data const * link_b = (struct link_data const *)b;
605
606 return (link_a->tgt.global_id > link_b->tgt.global_id) -
607 (link_a->tgt.global_id < link_b->tgt.global_id);
608}
609
610static int compare_temp_result_type (void const * a, void const * b) {
611
612 struct temp_result const * result_a = (struct temp_result const *)a;
613 struct temp_result const * result_b = (struct temp_result const *)b;
614
615 return (result_a->type > result_b->type) - (result_a->type < result_b->type);
616}
617
618static int compare_temp_result_fixed (void const * a, void const * b) {
619
620 struct temp_result const * result_a = (struct temp_result const *)a;
621 struct temp_result const * result_b = (struct temp_result const *)b;
622
623 return (result_a->data.fixed.value > result_b->data.fixed.value) -
624 (result_a->data.fixed.value < result_b->data.fixed.value);
625}
626
627static int compare_temp_result_reorder_idx (void const * a, void const * b) {
628
629 struct temp_result const * result_a = (struct temp_result const *)a;
630 struct temp_result const * result_b = (struct temp_result const *)b;
631
632 return (result_a->reorder_idx > result_b->reorder_idx) -
633 (result_a->reorder_idx < result_b->reorder_idx);
634}
635
636static int compare_temp_fixed_data_tgt (void const * a, void const * b) {
637
638 struct temp_fixed_data const * fixed_a = (struct temp_fixed_data const *)a;
639 struct temp_fixed_data const * fixed_b = (struct temp_fixed_data const *)b;
640
641 return (fixed_a->tgt.global_id > fixed_b->tgt.global_id) -
642 (fixed_a->tgt.global_id < fixed_b->tgt.global_id);
643}
644
646 struct temp_result * result, MPI_Comm comm) {
647
648 int type_pack_size;
649 int data_pack_size;
650 int owner_pack_size;
651
653 MPI_Pack_size(1, MPI_INT, comm, &type_pack_size), comm);
655 (result->type == LINK) ||
656 (result->type == FIXED) ||
657 (result->type == NO_INTERP),
658 "ERROR(get_temp_result_pack_size): invalid result type")
659 switch(result->type) {
660 case(LINK): {
661 int count_pack_size;
662 int links_pack_size;
664 MPI_Pack_size(1, MPI_UINT64_T, comm, &count_pack_size), comm);
665 links_pack_size =
666 (int)(result->data.link.count) * get_link_pack_size(comm);
667 data_pack_size = links_pack_size + count_pack_size;
668 break;
669 }
670 case(FIXED): {
671 int fixed_value_pack_size;
673 MPI_Pack_size(1, MPI_DOUBLE, comm, &fixed_value_pack_size), comm);
674 data_pack_size = fixed_value_pack_size;
675 break;
676 }
677 default:
678 case(NO_INTERP): {
679 data_pack_size = 0;
680 break;
681 }
682 };
683 int tgt_global_id_pack_size;
685 MPI_Pack_size(1, yac_int_dt, comm, &tgt_global_id_pack_size), comm);
687 MPI_Pack_size(1, MPI_INT, comm, &owner_pack_size), comm);
688
689 return type_pack_size + data_pack_size + tgt_global_id_pack_size +
690 owner_pack_size;
691}
692
694 struct temp_result * result, void * buffer, int buffer_size, MPI_Comm comm) {
695
696 int position = 0;
697 int temp_type = (int)(result->type);
699 MPI_Pack(&temp_type, 1, MPI_INT, buffer,
700 buffer_size, &position, comm), comm);
702 (result->type == LINK) ||
703 (result->type == FIXED) ||
704 (result->type == NO_INTERP),
705 "ERROR(get_temp_result_pack_size): invalid result type")
706 switch(result->type) {
707 case(LINK): {
708 uint64_t temp_count = (uint64_t)(result->data.link.count);
710 MPI_Pack(&temp_count, 1, MPI_UINT64_T, buffer,
711 buffer_size, &position, comm), comm);
712 for (uint64_t i = 0; i < temp_count; ++i) {
714 MPI_Pack(&(result->data.link.links[i].src.global_id), 1,
715 yac_int_dt, buffer, buffer_size, &position, comm), comm);
717 MPI_Pack(&(result->data.link.links[i].tgt.global_id), 1,
718 yac_int_dt, buffer, buffer_size, &position, comm), comm);
720 MPI_Pack(&(result->data.link.links[i].weight), 1,
721 MPI_DOUBLE, buffer, buffer_size, &position, comm), comm);
722 uint64_t temp_src_field_idx =
723 (uint64_t)result->data.link.links[i].src_field_idx;
725 MPI_Pack(&temp_src_field_idx, 1, MPI_UINT64_T,
726 buffer, buffer_size, &position, comm), comm);
727 }
728 break;
729 }
730 case(FIXED): {
732 MPI_Pack(&(result->data.fixed.value), 1,
733 MPI_DOUBLE, buffer, buffer_size, &position, comm), comm);
734 break;
735 }
736 default:
737 case(NO_INTERP): {
738 break;
739 }
740 };
742 MPI_Pack(&(result->tgt.global_id), 1,
743 yac_int_dt, buffer, buffer_size, &position, comm), comm);
745 MPI_Pack(&(result->owner), 1, MPI_INT,
746 buffer, buffer_size, &position, comm), comm);
747}
748
750 void * buffer, int buffer_size, struct temp_result * result,
751 MPI_Comm comm) {
752
753 int position = 0;
754
755 int temp_type;
757 MPI_Unpack(buffer, buffer_size, &position, &temp_type, 1,
758 MPI_INT, comm), comm);
760 (temp_type == LINK) || (temp_type == FIXED) || (temp_type == NO_INTERP),
761 "ERROR(unpack_temp_result): invalid result type")
762 switch (temp_type) {
763 case(LINK): {
764 result->type = LINK;
765 uint64_t temp_count;
767 MPI_Unpack(buffer, buffer_size, &position, &temp_count, 1,
768 MPI_UINT64_T, comm), comm);
769 result->data.link.count = (size_t)temp_count;
770 result->data.link.links =
771 xmalloc((size_t)temp_count * sizeof(*(result->data.link.links)));
772 for (uint64_t i = 0; i < temp_count; ++i) {
773
775 MPI_Unpack(buffer, buffer_size, &position,
776 &(result->data.link.links[i].src.global_id), 1,
777 yac_int_dt, comm), comm);
779 MPI_Unpack(buffer, buffer_size, &position,
780 &(result->data.link.links[i].tgt.global_id), 1,
781 yac_int_dt, comm), comm);
783 MPI_Unpack(buffer, buffer_size, &position,
784 &(result->data.link.links[i].weight), 1,
785 MPI_DOUBLE, comm), comm);
786 uint64_t temp_src_field_idx;
788 MPI_Unpack(buffer, buffer_size, &position, &temp_src_field_idx, 1,
789 MPI_UINT64_T, comm), comm);
790 result->data.link.links[i].src_field_idx = (size_t)temp_src_field_idx;
791 }
792 break;
793 }
794 case (FIXED): {
795 result->type = FIXED;
797 MPI_Unpack(buffer, buffer_size, &position,
798 &(result->data.fixed.value), 1, MPI_DOUBLE, comm), comm);
799 break;
800 }
801 default:
802 case (NO_INTERP): {
803 result->type = NO_INTERP;
804 break;
805 }
806 };
808 MPI_Unpack(buffer, buffer_size, &position,
809 &(result->tgt.global_id), 1, yac_int_dt, comm), comm);
811 MPI_Unpack(buffer, buffer_size, &position,
812 &(result->owner), 1, MPI_INT, comm), comm);
813}
814
816 struct yac_interp_grid * interp_grid, size_t * tgt_points, size_t tgt_count,
817 size_t * num_interpolated_points,
818 struct link_data ** links, size_t * num_links,
819 struct fixed_data ** fixed, size_t * num_fixed) {
820
821 MPI_Comm comm = yac_interp_grid_get_MPI_Comm(interp_grid);
822
823 int comm_size;
824 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
825
826 // get global ids of target points that have to be interpolated
827 yac_int * tgt_global_ids = xmalloc(tgt_count * sizeof(*tgt_global_ids));
829 interp_grid, tgt_points, tgt_count, tgt_global_ids);
830
831 yac_quicksort_index_yac_int_size_t(tgt_global_ids, tgt_count, tgt_points);
832
833 int tgt_pack_size = get_tgt_pack_size(comm),
834 link_pack_size = get_link_pack_size(comm),
835 fixed_pack_size = get_fixed_pack_size(comm);
836
837 size_t * size_t_buffer =
838 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
839 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
840 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
841 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
842 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
843 size_t * sendcounts, * recvcounts, * sdispls, *rdispls;
845 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
846
847 for (size_t i = 0; i < tgt_count; ++i)
848 sendcounts[3 * compute_bucket(tgt_global_ids[i], comm_size) + 0]++;
849 for (size_t i = 0; i < *num_links; ++i)
850 sendcounts[3 * compute_bucket((*links)[i].tgt.global_id, comm_size) + 1]++;
851 for (size_t i = 0; i < *num_fixed; ++i)
852 for (size_t j = 0; j < (*fixed)[i].num_tgt; ++j)
853 sendcounts[
854 3 * compute_bucket((*fixed)[i].tgt[j].global_id, comm_size) + 2]++;
855
856 // exchange sendcounts
857 yac_mpi_call(MPI_Alltoall(sendcounts, 3, YAC_MPI_SIZE_T,
858 recvcounts, 3, YAC_MPI_SIZE_T, comm), comm);
859
860 size_t saccu = 0, raccu = 0;
861 for (int i = 0; i < comm_size; ++i) {
862 total_sdispls[i] = saccu;
863 sdispls[3*i+0] = saccu;
864 sdispls[3*i+1] =
865 (saccu +
866 (total_sendcounts[i] =
867 sendcounts[3*i+0] * (size_t)tgt_pack_size));
868 sdispls[3*i+2] =
869 (saccu +
870 (total_sendcounts[i] +=
871 sendcounts[3*i+1] * (size_t)link_pack_size));
872 total_sendcounts[i] +=
873 sendcounts[3*i+2] * (size_t)fixed_pack_size;
874 total_rdispls[i] = raccu;
875 rdispls[3*i+0] = raccu;
876 rdispls[3*i+1] =
877 (raccu +
878 (total_recvcounts[i] =
879 recvcounts[3*i+0] * (size_t)tgt_pack_size));
880 rdispls[3*i+2] =
881 (raccu +
882 (total_recvcounts[i] +=
883 recvcounts[3*i+1] * (size_t)link_pack_size));
884 total_recvcounts[i] +=
885 recvcounts[3*i+2] * (size_t)fixed_pack_size;
886 saccu += total_sendcounts[i];
887 raccu += total_recvcounts[i];
888 }
889
890 size_t send_size = total_sendcounts[comm_size - 1] +
891 total_sdispls[comm_size - 1];
892 size_t recv_size = total_recvcounts[comm_size - 1] +
893 total_rdispls[comm_size - 1];
894
895 void * pack_buffer = xmalloc(send_size + recv_size);
896 void * send_pack_buffer = pack_buffer;
897 void * recv_pack_buffer = (void*)((unsigned char *)pack_buffer + send_size);
898
899 // pack data
900 for (size_t i = 0; i < tgt_count; ++i) {
901 yac_int curr_global_id = tgt_global_ids[i];
902 int rank = compute_bucket(curr_global_id, comm_size);
903 pack_tgt(
904 curr_global_id,
905 (void*)((unsigned char*)send_pack_buffer + sdispls[3*rank+0]),
906 tgt_pack_size, comm);
907 sdispls[3*rank+0] += (size_t)tgt_pack_size;
908 }
909 for (size_t i = 0; i < *num_links; ++i) {
910 struct link_data * curr_link = (*links) + i;
911 int rank = compute_bucket(curr_link->tgt.global_id, comm_size);
912 pack_link(
913 curr_link,
914 (void*)((unsigned char*)send_pack_buffer + sdispls[3*rank+1]),
915 link_pack_size, comm);
916 sdispls[3*rank+1] += (size_t)link_pack_size;
917 }
918 for (size_t i = 0; i < *num_fixed; ++i) {
919 double curr_fixed_value = (*fixed)[i].value;
920 size_t curr_num_tgt = (*fixed)[i].num_tgt;
921 for (size_t j = 0; j < curr_num_tgt; ++j) {
922 yac_int curr_global_id = (*fixed)[i].tgt[j].global_id;
923 int rank = compute_bucket(curr_global_id, comm_size);
925 curr_fixed_value, curr_global_id,
926 (void*)((unsigned char*)send_pack_buffer + sdispls[3*rank+2]),
927 fixed_pack_size, comm);
928 sdispls[3*rank+2] += (size_t)fixed_pack_size;
929 }
930 }
931
932 // exchange data
933 yac_alltoallv_packed_p2p(
934 send_pack_buffer, total_sendcounts, total_sdispls,
935 recv_pack_buffer, total_recvcounts, total_rdispls, comm);
936
937 size_t recv_tgt_count = 0, recv_link_count = 0, recv_fixed_count = 0;
938 for (int i = 0; i < comm_size; ++i) {
939 recv_tgt_count += recvcounts[3*i+0];
940 recv_link_count += recvcounts[3*i+1];
941 recv_fixed_count += recvcounts[3*i+2];
942 }
943 struct link_data * recv_links =
944 xrealloc(*links, recv_link_count * sizeof(*recv_links));
945 struct temp_fixed_data * recv_fixed =
946 xmalloc(recv_fixed_count * sizeof(*recv_fixed));
947 struct temp_result * result_buffer =
948 xmalloc((recv_tgt_count + tgt_count) * sizeof(*result_buffer));
949 struct temp_result * send_results = result_buffer;
950 struct temp_result * recv_results = result_buffer + recv_tgt_count;
951
952 // unpack data
953 recv_tgt_count = 0, recv_link_count = 0, recv_fixed_count = 0;
954 size_t unpack_offset = 0;
955 for (int i = 0; i < comm_size; ++i) {
956 size_t curr_num_tgt = recvcounts[3 * i + 0];
957 size_t curr_num_links = recvcounts[3 * i + 1];
958 size_t curr_num_fixed = recvcounts[3 * i + 2];
959 for (size_t j = 0; j < curr_num_tgt; ++j, ++recv_tgt_count) {
961 (void*)((unsigned char *)recv_pack_buffer + unpack_offset),
962 tgt_pack_size, &(send_results[recv_tgt_count].tgt.global_id),
963 comm);
964 unpack_offset += (size_t)tgt_pack_size;
965 send_results[recv_tgt_count].owner = i;
966 send_results[recv_tgt_count].reorder_idx = recv_tgt_count;
967 }
968 for (size_t j = 0; j < curr_num_links; ++j, ++recv_link_count) {
970 (void*)((unsigned char *)recv_pack_buffer + unpack_offset),
971 link_pack_size, recv_links + recv_link_count, comm);
972 unpack_offset += (size_t)link_pack_size;
973 }
974 for (size_t j = 0; j < curr_num_fixed; ++j, ++recv_fixed_count) {
976 (void*)((unsigned char *)recv_pack_buffer + unpack_offset),
977 fixed_pack_size, recv_fixed + recv_fixed_count, comm);
978 unpack_offset += (size_t)fixed_pack_size;
979 }
980 }
981
982 // sort received target point inforation by global target id
983 qsort(
984 send_results, recv_tgt_count, sizeof(*send_results),
986
987 // sort links by global target ids
988 qsort(
989 recv_links, recv_link_count, sizeof(*recv_links), compare_link_data_tgt);
990
991 // sort fixed data by global target ids
992 qsort(
993 recv_fixed, recv_fixed_count, sizeof(*recv_fixed),
995
996 // match target points with linked and fixed data from the weight file
997 for (size_t tgt_idx = 0, link_idx = 0, fixed_idx = 0;
998 tgt_idx < recv_tgt_count; ++tgt_idx) {
999
1000 yac_int curr_tgt = send_results[tgt_idx].tgt.global_id;
1001
1002 while ((link_idx < recv_link_count) &&
1003 (recv_links[link_idx].tgt.global_id < curr_tgt)) ++link_idx;
1004 while ((fixed_idx < recv_fixed_count) &&
1005 (recv_fixed[fixed_idx].tgt.global_id < curr_tgt)) ++fixed_idx;
1006
1007 YAC_ASSERT(
1008 (link_idx >= recv_link_count) ||
1009 (recv_links[link_idx].tgt.global_id != curr_tgt) ||
1010 (fixed_idx >= recv_fixed_count) ||
1011 (recv_fixed[fixed_idx].tgt.global_id != curr_tgt),
1012 "ERROR(redist_weight_file_data): inconsistent data in weight file;"
1013 "link and fixed data available for a target point")
1014 if ((link_idx < recv_link_count) &&
1015 (recv_links[link_idx].tgt.global_id == curr_tgt)) {
1016
1017 size_t temp_link_idx = link_idx;
1018 while ((temp_link_idx < recv_link_count) &&
1019 (recv_links[temp_link_idx].tgt.global_id == curr_tgt))
1020 ++temp_link_idx;
1021 send_results[tgt_idx].type = LINK;
1022 send_results[tgt_idx].data.link.links = recv_links + link_idx;
1023 send_results[tgt_idx].data.link.count = temp_link_idx - link_idx;
1024
1025 } else if ((fixed_idx < recv_fixed_count) &&
1026 (recv_fixed[fixed_idx].tgt.global_id == curr_tgt)) {
1027
1028 send_results[tgt_idx].type = FIXED;
1029 send_results[tgt_idx].data.fixed.value = recv_fixed[fixed_idx].value;
1030
1031 } else {
1032 send_results[tgt_idx].type = NO_INTERP;
1033 }
1034 }
1035 free(recv_fixed);
1036
1037 // sort received target point inforatiom into origional receive order
1038 qsort(
1039 send_results, recv_tgt_count, sizeof(*send_results),
1041
1042 int * pack_size_buffer =
1043 xmalloc((recv_tgt_count + tgt_count) * sizeof(*pack_size_buffer));
1044 int * send_pack_sizes = pack_size_buffer;
1045 int * recv_pack_sizes = pack_size_buffer + recv_tgt_count;
1046 for (size_t i = 0; i < recv_tgt_count; ++i)
1047 send_pack_sizes[i] = get_temp_result_pack_size(send_results + i, comm);
1048
1049 saccu = 0, raccu = 0;
1050 for (int i = 0; i < comm_size; ++i) {
1051 sdispls[i] = saccu;
1052 rdispls[i] = raccu;
1053 int sendcount = recvcounts[3*i];
1054 int recvcount = sendcounts[3*i];
1055 saccu += (sendcounts[i] = sendcount);
1056 raccu += (recvcounts[i] = recvcount);
1057 }
1058
1059 yac_alltoallv_int_p2p(
1060 send_pack_sizes, sendcounts, sdispls,
1061 recv_pack_sizes, recvcounts, rdispls, comm);
1062
1063 saccu = 0, raccu = 0;
1064 for (int i = 0, k = 0, l = 0; i < comm_size; ++i) {
1065 size_t sendcount = sendcounts[i];
1066 size_t recvcount = recvcounts[i];
1067 sendcounts[i] = 0;
1068 recvcounts[i] = 0;
1069 for (size_t j = 0; j < sendcount; ++j, ++k)
1070 sendcounts[i] += send_pack_sizes[k];
1071 for (size_t j = 0; j < recvcount; ++j, ++l)
1072 recvcounts[i] += recv_pack_sizes[l];
1073 sdispls[i] = saccu;
1074 rdispls[i] = raccu;
1075 saccu += sendcounts[i];
1076 raccu += recvcounts[i];
1077 }
1078 size_t total_send_pack_size =
1079 sdispls[comm_size-1] + sendcounts[comm_size-1];
1080 size_t total_recv_pack_size =
1081 rdispls[comm_size-1] + recvcounts[comm_size-1];
1082
1083 pack_buffer =
1084 xrealloc(pack_buffer, total_send_pack_size + total_recv_pack_size);
1085 send_pack_buffer = pack_buffer;
1086 recv_pack_buffer =
1087 (void*)((unsigned char*)pack_buffer + total_send_pack_size);
1088
1089 for (size_t i = 0, offset = 0; i < recv_tgt_count; ++i) {
1091 send_results + i, (void*)((unsigned char*)send_pack_buffer + offset),
1092 send_pack_sizes[i], comm);
1093 offset += (size_t)(send_pack_sizes[i]);
1094 }
1095
1096 // redistribute link and fixed data to processes requiring this data for
1097 // their local target points
1099 send_pack_buffer, sendcounts, sdispls,
1100 recv_pack_buffer, recvcounts, rdispls,
1101 1, MPI_PACKED, comm);
1102
1103 // unpack results
1104 for (size_t i = 0, offset = 0; i < tgt_count; ++i) {
1106 (void*)((unsigned char*)recv_pack_buffer + offset),
1107 recv_pack_sizes[i], recv_results + i, comm);
1108 offset += (size_t)(recv_pack_sizes[i]);
1109 }
1110 free(pack_size_buffer);
1111
1112 // sort received results by global target id
1113 qsort(
1114 recv_results, tgt_count, sizeof(*recv_results),
1116
1117 for (size_t i = 0; i < tgt_count; ++i)
1118 recv_results[i].tgt.local_id = tgt_points[i];
1119
1120 // sort received results by type
1121 qsort(
1122 recv_results, tgt_count, sizeof(*recv_results),
1124
1125 size_t new_num_links = 0;
1126 size_t new_total_num_links = 0;
1127 size_t new_num_fixed = 0;
1128 {
1129 size_t i = 0;
1130 while ((i < tgt_count) && (recv_results[i].type == LINK))
1131 new_total_num_links += recv_results[i++].data.link.count;
1132 new_num_links = i;
1133 while ((i < tgt_count) && (recv_results[i].type == FIXED)) ++i;
1134 new_num_fixed = i - new_num_links;
1135 }
1136
1137 // extract links from results
1138 *links = xrealloc(recv_links, new_total_num_links * sizeof(**links));
1139 for (size_t i = 0, offset = 0; i < new_num_links;
1140 offset += recv_results[i++].data.link.count)
1141 memcpy(*links + offset, recv_results[i].data.link.links,
1142 recv_results[i].data.link.count * sizeof(**links));
1143 *num_links = new_total_num_links;
1144
1145 // extract fixed data from results
1146 if (new_num_fixed > 0) {
1147 recv_results += new_num_links;
1148 qsort(
1149 recv_results, new_num_fixed, sizeof(*recv_results),
1151 double curr_fixed_value =
1152 (recv_results[0].data.fixed.value == 1337.0)?(-1337.0):(1337.0);
1153 size_t num_fixed_values = 0;
1154 void * tgt_global_id_buffer =
1155 xrealloc((*num_fixed > 0)?(*fixed)->tgt:NULL,
1156 new_num_fixed * sizeof(*((*fixed)->tgt)));
1157 struct fixed_data * curr_fixed = NULL;
1158 for (size_t i = 0, offset = 0; i < new_num_fixed; ++i) {
1159 if (recv_results[i].data.fixed.value != curr_fixed_value) {
1160 curr_fixed_value = recv_results[i].data.fixed.value;
1161 *fixed = xrealloc(*fixed, (num_fixed_values + 1) * sizeof(**fixed));
1162 curr_fixed = *fixed + num_fixed_values;
1163 ++num_fixed_values;
1164 curr_fixed->value = curr_fixed_value;
1165 curr_fixed->tgt = (void*)(((unsigned char *)tgt_global_id_buffer) +
1166 offset * sizeof(*((*fixed)->tgt)));
1167 curr_fixed->num_tgt = 0;
1168 }
1169 curr_fixed->tgt[curr_fixed->num_tgt++].global_id =
1170 recv_results[i].tgt.global_id;
1171 }
1172 *num_fixed = num_fixed_values;
1173 recv_results -= new_num_links;
1174
1175 } else {
1176 if (*num_fixed > 0) free((*fixed)->tgt);
1177 free(*fixed);
1178 *fixed = NULL;
1179 *num_fixed = 0;
1180 }
1181
1182 for (size_t i = 0; i < tgt_count; ++i)
1183 tgt_points[i] = recv_results[i].tgt.local_id;
1184 *num_interpolated_points = new_num_links + new_num_fixed;
1185
1186 for (size_t i = 0; i < new_num_links; ++i)
1187 free(recv_results[i].data.link.links);
1188 free(result_buffer);
1189 free(pack_buffer);
1190 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1191 free(size_t_buffer);
1192 free(tgt_global_ids);
1193}
1194
1195static int compare_link_data_field_idx_src(void const *a, void const *b) {
1196
1197 struct link_data const * link_a = (struct link_data*)a;
1198 struct link_data const * link_b = (struct link_data*)b;
1199
1200 int ret = (link_a->src_field_idx > link_b->src_field_idx) -
1201 (link_a->src_field_idx < link_b->src_field_idx);
1202
1203 if (ret) return ret;
1204
1205 else return (link_a->src.global_id > link_b->src.global_id) -
1206 (link_a->src.global_id < link_b->src.global_id);
1207}
1208
1209// sort links by tgt local id and src ids (SIZE_MAX first; remaining by ascending src ids)
1210static int compare_link_data_tgt_src_mask(void const *a, void const *b) {
1211
1212 struct link_data const * link_a = (struct link_data*)a;
1213 struct link_data const * link_b = (struct link_data*)b;
1214
1215 int ret = (link_a->tgt.local_id > link_b->tgt.local_id) -
1216 (link_a->tgt.local_id < link_b->tgt.local_id);
1217
1218 if (ret) return ret;
1219
1220 if (link_a->src.local_id == SIZE_MAX) return -1;
1221 if (link_b->src.local_id == SIZE_MAX) return 1;
1222
1223 return (link_a->src.local_id > link_b->src.local_id) -
1224 (link_a->src.local_id < link_b->src.local_id);
1225}
1226
1227// sort links by tgt local id, source field index, and src local id
1228static int compare_link_data_tgt_src_field_src_id(void const *a, void const *b) {
1229
1230 struct link_data const * link_a = (struct link_data*)a;
1231 struct link_data const * link_b = (struct link_data*)b;
1232
1233 int ret = (link_a->tgt.local_id > link_b->tgt.local_id) -
1234 (link_a->tgt.local_id < link_b->tgt.local_id);
1235
1236 if (ret) return ret;
1237
1238 ret = (link_a->src_field_idx > link_b->src_field_idx) -
1239 (link_a->src_field_idx < link_b->src_field_idx);
1240
1241 if (ret) return ret;
1242
1243 if (link_a->src.local_id == SIZE_MAX) return -1;
1244 if (link_b->src.local_id == SIZE_MAX) return 1;
1245
1246 return (link_a->src.local_id > link_b->src.local_id) -
1247 (link_a->src.local_id < link_b->src.local_id);
1248}
1249
1250
1251static size_t do_search_file(struct interp_method * method,
1252 struct yac_interp_grid * interp_grid,
1253 size_t * tgt_points, size_t count,
1254 struct yac_interp_weights * weights) {
1255
1256 struct interp_method_file * method_file =
1257 (struct interp_method_file*)(method);
1258
1259 MPI_Comm comm = yac_interp_grid_get_MPI_Comm(interp_grid);
1260
1261 size_t num_src_fields = yac_interp_grid_get_num_src_fields(interp_grid);
1262 enum yac_location * src_locations =
1263 xmalloc(num_src_fields * sizeof(*src_locations));
1264 for (size_t i = 0; i < num_src_fields; ++i)
1265 src_locations[i] = yac_interp_grid_get_src_field_location(interp_grid, i);
1266 enum yac_location tgt_location =
1268
1269 struct link_data * links = NULL;
1270 size_t num_links = 0;
1271 struct fixed_data * fixed = NULL;
1272 size_t num_fixed = 0;
1273
1274 // read data from file
1276 method_file->weight_file_name, comm,
1279 src_locations, tgt_location, num_src_fields,
1280 &links, &num_links, &fixed, &num_fixed);
1281
1282 free(src_locations);
1283
1284 // relocates links and fixed to the processes, which require them to
1285 // interpolate their local target points
1286 // (the tgt_points first contain the link-tgt_points, then the fixed-tgt_points,
1287 // and followed the non-interpolated tgt_points)
1288 size_t num_interpolated_points;
1290 interp_grid, tgt_points, count, &num_interpolated_points,
1291 &links, &num_links, &fixed, &num_fixed);
1292
1293 size_t * tgt_points_reorder_idx =
1294 xmalloc(num_interpolated_points * sizeof(*tgt_points_reorder_idx));
1295 for (size_t i = 0; i < num_interpolated_points; ++i)
1296 tgt_points_reorder_idx[i] = i;
1297
1298 size_t num_fixed_tgt = 0;
1299 if (num_fixed) {
1300 for (size_t i = 0; i < num_fixed; ++i) num_fixed_tgt += fixed[i].num_tgt;
1301
1302 size_t tgt_points_offset = num_interpolated_points - num_fixed_tgt;
1303
1304 for (size_t i = 0; i < num_fixed; ++i) {
1305
1306 struct remote_points tgts = {
1307 .data =
1309 interp_grid, tgt_points + tgt_points_offset, fixed[i].num_tgt),
1310 .count = fixed[i].num_tgt};
1311
1312 yac_interp_weights_add_fixed(weights, &tgts, fixed[i].value);
1313 free(tgts.data);
1314 tgt_points_offset += fixed[i].num_tgt;
1315 }
1316 free(fixed->tgt);
1317 }
1318 free(fixed);
1319
1320 size_t num_link_tgt = 0;
1321 if (num_links > 0) {
1322
1323 // set local target ids for all links
1324 for (size_t link_idx = 0, tgt_idx = 0; link_idx < num_links; tgt_idx++) {
1325
1326 yac_int curr_global_id = links[link_idx].tgt.global_id;
1327 size_t curr_local_id = tgt_points[tgt_idx];
1328
1329 while ((link_idx < num_links) &&
1330 (links[link_idx].tgt.global_id == curr_global_id)) {
1331 links[link_idx].tgt.local_id = curr_local_id;
1332 link_idx++;
1333 }
1334 }
1335
1336 // sort links by field idx and src id
1337 qsort(links, num_links, sizeof(*links), compare_link_data_field_idx_src);
1338
1339 // determine number of source fields
1340 size_t num_src_fields = links[num_links-1].src_field_idx + 1;
1341
1342 {
1343 int int_num_src_fields = (int)num_src_fields;
1344 MPI_Comm comm = yac_interp_grid_get_MPI_Comm(interp_grid);
1346 MPI_Allreduce(MPI_IN_PLACE, &int_num_src_fields, 1, MPI_INT, MPI_MAX,
1347 comm), comm);
1348 num_src_fields = (size_t)int_num_src_fields;
1349 }
1350
1351 yac_int * src_global_ids = xmalloc(num_links * sizeof(*src_global_ids));
1352 size_t * src_local_ids = xmalloc(num_links * sizeof(*src_local_ids));
1353 size_t src_field_prefix_sum[num_src_fields];
1354
1355 // check the mask of the source points and get the local ids for the
1356 // remaining ones
1357 for (size_t src_field_idx = 0, link_idx = 0, offset = 0;
1358 src_field_idx < num_src_fields; ++src_field_idx) {
1359
1360 size_t curr_num_links = 0;
1361 while ((link_idx < num_links) &&
1362 (links[link_idx].src_field_idx == src_field_idx)) {
1363
1364 // get global source ids
1365 src_global_ids[offset + curr_num_links] =
1366 links[link_idx].src.global_id;
1367 ++curr_num_links;
1368 ++link_idx;
1369 }
1370
1371 // get local ids for all source points
1373 interp_grid, src_field_idx, src_global_ids + offset, curr_num_links,
1374 src_local_ids + offset);
1375
1376 // get the mask after the call to yac_interp_grid_src_global_to_local
1377 const_int_pointer src_mask =
1378 yac_interp_grid_get_src_field_mask(interp_grid, src_field_idx);
1379
1380 // check source masks and mask src ids of marked links with SIZE_MAX
1381 link_idx -= curr_num_links;
1382 if (src_mask != NULL) {
1383 for (size_t i = 0; i < curr_num_links; ++i, ++link_idx)
1384 links[link_idx].src.local_id =
1385 (src_mask[src_local_ids[link_idx]])?
1386 src_local_ids[link_idx]:SIZE_MAX;
1387 } else {
1388 for (size_t i = 0; i < curr_num_links; ++i, ++link_idx)
1389 links[link_idx].src.local_id = src_local_ids[link_idx];
1390 }
1391
1392 src_field_prefix_sum[src_field_idx] = offset;
1393 offset += curr_num_links;
1394 }
1395
1396 // sort links by tgt local id and src ids (SIZE_MAX first; remaining by ascending src ids)
1397 qsort(links, num_links, sizeof(*links), compare_link_data_tgt_src_mask);
1398
1399 // pack links (remove links with targets that have at least one masked source point)
1400 size_t new_num_links = 0;
1401 int contains_masked_tgt = 0;
1402 num_link_tgt = num_interpolated_points - num_fixed_tgt;
1403 size_t * tgt_local_ids = xmalloc(num_link_tgt * sizeof(*tgt_local_ids));
1404 size_t * num_src_per_field_per_tgt =
1405 xcalloc(num_link_tgt * num_src_fields, sizeof(*num_src_per_field_per_tgt));
1406 size_t num_links_per_src_field[num_src_fields];
1407 memset(num_links_per_src_field, 0, sizeof(num_links_per_src_field));
1408 num_link_tgt = 0;
1409 for (size_t link_idx = 0, tgt_idx = 0; link_idx < num_links; ++tgt_idx) {
1410
1411 size_t curr_tgt_local_id = links[link_idx].tgt.local_id;
1412 int is_masked = links[link_idx].src.local_id == SIZE_MAX;
1413 tgt_points[tgt_idx] = curr_tgt_local_id;
1414
1415 // if the current target point has a masked source point
1416 if (is_masked) {
1417
1418 while ((link_idx < num_links) &&
1419 (links[link_idx].tgt.local_id == curr_tgt_local_id)) ++link_idx;
1420 contains_masked_tgt = 1;
1421 tgt_points_reorder_idx[tgt_idx] += num_interpolated_points;
1422
1423 // if there already have been target points with masked source points
1424 } else {
1425 size_t * curr_num_src_per_field_per_tgt =
1426 num_src_per_field_per_tgt + num_link_tgt * num_src_fields;
1427 if (contains_masked_tgt) {
1428 while ((link_idx < num_links) &&
1429 (links[link_idx].tgt.local_id == curr_tgt_local_id)) {
1430 size_t curr_src_field_idx = links[link_idx].src_field_idx;
1431 src_local_ids[
1432 src_field_prefix_sum[curr_src_field_idx] +
1433 num_links_per_src_field[curr_src_field_idx] +
1434 curr_num_src_per_field_per_tgt[curr_src_field_idx]++] =
1435 links[link_idx].src.local_id;
1436 links[new_num_links++] = links[link_idx++];
1437 }
1438 } else {
1439
1440 while ((link_idx < num_links) &&
1441 (links[link_idx].tgt.local_id == curr_tgt_local_id)) {
1442 size_t curr_src_field_idx = links[link_idx].src_field_idx;
1443 src_local_ids[
1444 src_field_prefix_sum[curr_src_field_idx] +
1445 num_links_per_src_field[curr_src_field_idx] +
1446 curr_num_src_per_field_per_tgt[curr_src_field_idx]++] =
1447 links[link_idx].src.local_id;
1448 ++link_idx, ++new_num_links;
1449 }
1450 }
1451 for (size_t i = 0; i < num_src_fields; ++i) {
1452 num_links_per_src_field[i] += curr_num_src_per_field_per_tgt[i];
1453 }
1454 tgt_local_ids[num_link_tgt++] = curr_tgt_local_id;
1455 }
1456 }
1457 num_links = new_num_links;
1458 tgt_local_ids =
1459 xrealloc(tgt_local_ids, num_link_tgt * sizeof(*tgt_local_ids));
1460 num_src_per_field_per_tgt =
1461 xrealloc(
1462 num_src_per_field_per_tgt,
1463 num_link_tgt * num_src_fields * sizeof(*num_src_per_field_per_tgt));
1464
1465 // get the remote points for all required source points
1466 struct remote_point * srcs_per_field[num_src_fields];
1467 for (size_t src_field_idx = 0; src_field_idx < num_src_fields; ++src_field_idx)
1468 srcs_per_field[src_field_idx] =
1470 interp_grid, src_field_idx, src_local_ids + src_field_prefix_sum[src_field_idx],
1471 num_links_per_src_field[src_field_idx]);
1472 free(src_local_ids);
1473
1474 // sort links by tgt local id, source field index, and src local id
1475 qsort(links, num_links, sizeof(*links), compare_link_data_tgt_src_field_src_id);
1476
1477 // gather weights
1478 double * w = xmalloc(num_links * sizeof(*w));
1479 for (size_t link_idx = 0; link_idx < num_links; ++link_idx)
1480 w[link_idx] = links[link_idx].weight;
1481
1482 struct remote_points tgts = {
1483 .data =
1485 interp_grid, tgt_local_ids, num_link_tgt),
1486 .count = num_link_tgt};
1487 free(tgt_local_ids);
1488
1490 weights, &tgts, num_src_per_field_per_tgt, srcs_per_field, w, num_src_fields);
1491
1492 free(tgts.data);
1493 for (size_t i = 0; i < num_src_fields; ++i) free(srcs_per_field[i]);
1494 free(num_src_per_field_per_tgt);
1495 free(w);
1496 free(src_global_ids);
1497
1498 } else {
1499 int num_src_fields = 0;
1500 MPI_Comm comm = yac_interp_grid_get_MPI_Comm(interp_grid);
1502 MPI_Allreduce(MPI_IN_PLACE, &num_src_fields, 1, MPI_INT, MPI_MAX,
1503 comm), comm);
1504 // get local ids for all source points
1505 // (since this routine is collective for all processes, we have to call
1506 // this here if the local process received no links)
1507 for (int i = 0; i < num_src_fields; ++i)
1508 yac_interp_grid_src_global_to_local(interp_grid, 0, NULL, 0, NULL);
1509 }
1510 free(links);
1511
1513 tgt_points_reorder_idx, num_interpolated_points, tgt_points);
1514 free(tgt_points_reorder_idx);
1515
1516 return num_fixed_tgt + num_link_tgt;
1517}
1518
1520 char const * weight_file_name) {
1521
1522 struct interp_method_file * method = xmalloc(1 * sizeof(*method));
1523
1525 method->weight_file_name = strdup(weight_file_name);
1526
1527 return (struct interp_method*)method;
1528}
1529
1530static void delete_file(struct interp_method * method) {
1531
1532 struct interp_method_file * method_file =
1533 (struct interp_method_file *)method;
1534 free((void*)(method_file->weight_file_name));
1535 free(method_file);
1536}
#define UNUSED(x)
Definition core.h:73
int const * const_int_pointer
enum yac_location yac_interp_grid_get_tgt_field_location(struct yac_interp_grid *interp_grid)
size_t yac_interp_grid_get_num_src_fields(struct yac_interp_grid *interp_grid)
const_int_pointer yac_interp_grid_get_src_field_mask(struct yac_interp_grid *interp_grid, size_t src_field_idx)
char const * yac_interp_grid_get_src_grid_name(struct yac_interp_grid *interp_grid)
Definition interp_grid.c:77
char const * yac_interp_grid_get_tgt_grid_name(struct yac_interp_grid *interp_grid)
Definition interp_grid.c:82
void yac_interp_grid_get_tgt_global_ids(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count, yac_int *tgt_global_ids)
MPI_Comm yac_interp_grid_get_MPI_Comm(struct yac_interp_grid *interp_grid)
struct remote_point * yac_interp_grid_get_tgt_remote_points(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count)
enum yac_location yac_interp_grid_get_src_field_location(struct yac_interp_grid *interp_grid, size_t src_field_idx)
struct remote_point * yac_interp_grid_get_src_remote_points(struct yac_interp_grid *interp_grid, size_t src_field_idx, size_t *src_points, size_t count)
void yac_interp_grid_src_global_to_local(struct yac_interp_grid *interp_grid, size_t src_field_idx, yac_int *src_global_ids, size_t count, size_t *src_local_ids)
struct @7::@8 value
enum callback_type type
static int compare_temp_result_fixed(void const *a, void const *b)
static int compare_link_data_tgt(void const *a, void const *b)
static int compare_link_data_tgt_src_mask(void const *a, void const *b)
static int compare_link_data_field_idx_src(void const *a, void const *b)
static int get_fixed_pack_size(MPI_Comm comm)
static int compute_bucket(yac_int value, int comm_size)
static int compare_temp_result_tgt(void const *a, void const *b)
static int get_link_pack_size(MPI_Comm comm)
static void pack_tgt(yac_int global_tgt_id, void *buffer, int buffer_size, MPI_Comm comm)
static void pack_temp_result(struct temp_result *result, void *buffer, int buffer_size, MPI_Comm comm)
static void redist_weight_file_data(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t tgt_count, size_t *num_interpolated_points, struct link_data **links, size_t *num_links, struct fixed_data **fixed, size_t *num_fixed)
static void unpack_temp_result(void *buffer, int buffer_size, struct temp_result *result, MPI_Comm comm)
static void unpack_fixed(void *buffer, int buffer_size, struct temp_fixed_data *fixed, MPI_Comm comm)
struct interp_method * yac_interp_method_file_new(char const *weight_file_name)
static void pack_link(struct link_data *link, void *buffer, int buffer_size, MPI_Comm comm)
static int compare_temp_result_reorder_idx(void const *a, void const *b)
static void pack_fixed(double fixed_value, yac_int global_tgt_id, void *buffer, int buffer_size, MPI_Comm comm)
static size_t do_search_file(struct interp_method *method, struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count, struct yac_interp_weights *weights)
static struct interp_method_vtable interp_method_file_vtable
static void unpack_link(void *buffer, int buffer_size, struct link_data *link, MPI_Comm comm)
static int get_temp_result_pack_size(struct temp_result *result, MPI_Comm comm)
static int compare_temp_result_type(void const *a, void const *b)
static void delete_file(struct interp_method *method)
static int compare_temp_fixed_data_tgt(void const *a, void const *b)
static int get_tgt_pack_size(MPI_Comm comm)
static void unpack_tgt(void *buffer, int buffer_size, yac_int *global_tgt_id, MPI_Comm comm)
static void read_weight_file(char const *weight_file_name, MPI_Comm comm, char const *src_grid_name, char const *tgt_grid_name, enum yac_location *src_locations, enum yac_location tgt_location, size_t num_src_fields, struct link_data **links, size_t *num_links, struct fixed_data **fixed, size_t *num_fixed)
static int compare_link_data_tgt_src_field_src_id(void const *a, void const *b)
#define YAC_WEIGHT_FILE_VERSION_STRING
void yac_interp_weights_add_fixed(struct yac_interp_weights *weights, struct remote_points *tgts, double fixed_value)
@ FIXED
void yac_interp_weights_add_wsum_mf(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_src_per_field_per_tgt, struct remote_point **srcs_per_field, double *w, size_t num_src_fields)
void yac_get_io_ranks(MPI_Comm comm, int *local_is_io_, int **io_ranks_, int *num_io_ranks_)
Definition io_utils.c:309
void yac_nc_inq_varid(int ncid, char const *name, int *varidp)
Definition io_utils.c:411
void yac_nc_open(const char *path, int omode, int *ncidp)
Definition io_utils.c:350
void yac_nc_inq_dimid(int ncid, char const *name, int *dimidp)
Definition io_utils.c:385
#define YAC_HANDLE_ERROR(exp)
Definition io_utils.h:30
enum yac_location yac_str2loc(char const *location_str)
Definition location.c:21
char const * yac_loc2str(enum yac_location location)
Definition location.c:32
yac_location
Definition location.h:12
#define YAC_MAX_LOC_STR_LEN
Definition location.h:10
Definition __init__.py:1
#define xrealloc(ptr, size)
Definition ppm_xfuncs.h:67
#define xcalloc(nmemb, size)
Definition ppm_xfuncs.h:64
#define xmalloc(size)
Definition ppm_xfuncs.h:66
struct fixed_data::@12 * tgt
char const * weight_file_name
struct interp_method_vtable * vtable
size_t(* do_search)(struct interp_method *method, struct yac_interp_grid *grid, size_t *tgt_points, size_t count, struct yac_interp_weights *weights)
information (global id and location) about a point that
structure containing the information (global id and location)
struct remote_point * data
struct temp_fixed_data::@13 tgt
union temp_result::@15 data
struct temp_result::@15::@18 fixed
enum temp_result::@14 type
struct temp_result::@16 tgt
struct link_data * links
struct temp_result::@15::@17 link
#define MIN(a, b)
void yac_quicksort_index_yac_int_size_t(yac_int *a, size_t n, size_t *idx)
void yac_quicksort_index_size_t_size_t(size_t *a, size_t n, size_t *idx)
#define YAC_ASSERT_F(exp, format,...)
Definition yac_assert.h:19
#define die(msg)
Definition yac_assert.h:12
#define YAC_ASSERT(exp, msg)
Definition yac_assert.h:16
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
Definition yac_mpi.c:627
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
Definition yac_mpi.c:596
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm)
Definition yac_mpi.c:131
#define yac_mpi_call(call, comm)
#define YAC_MPI_SIZE_T
Xt_int yac_int
Definition yac_types.h:15
#define yac_int_dt
Definition yac_types.h:16