YAC 3.7.0
Yet Another Coupler
Loading...
Searching...
No Matches
interp_method_creep.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifdef HAVE_CONFIG_H
6// Get the definition of the 'restrict' keyword.
7#include "config.h"
8#endif
9
10#include <string.h>
11
13#include "interp_method_creep.h"
14#include "yac_mpi_internal.h"
15#include "ensure_array_size.h"
16
17static size_t do_search_creep(struct interp_method * method,
18 struct yac_interp_grid * interp_grid,
19 size_t * tgt_points, size_t count,
20 struct yac_interp_weights * weights,
21 int * interpolation_complete);
22static void delete_creep(struct interp_method * method);
23
24static struct interp_method_vtable
28
34
36 int rank; // rank of the owner of the stencil
37 uint64_t idx; // index of the stencil that is supposed to be used
38 double weight; // weight of the stencil
39};
40
42 yac_int global_id; // global id of target point for which
43 // the stencil was originally generated
44 size_t count; // number of contributing stencils
45 union {
46 struct stencil_info single,
49};
50
52 struct result_stencil * stencils; // result stencils
53 size_t count; // number of result stencils
55};
56
58 size_t local_id; // local id of target points that is supposed to
59 // be interpolated by the stencil
60 yac_int global_id; // global id of target points that is supposed to
61 // be interpolated by the stencil
62 size_t idx;
64};
65
67 yac_int global_id; // global id of requested point
68 int rank; // rank of process which requested
70};
71
72struct comm_stuff {
73 int rank, size;
75 MPI_Datatype stencil_info_dt;
76 MPI_Comm comm;
77};
78
79static MPI_Datatype yac_get_stencil_info_mpi_datatype(MPI_Comm comm) {
80
81 struct stencil_info dummy;
82 MPI_Datatype stencil_info_dt;
83 int array_of_blocklengths[] = {1, 1, 1};
84 const MPI_Aint array_of_displacements[] =
85 {(MPI_Aint)(intptr_t)(const void *)&(dummy.rank) -
86 (MPI_Aint)(intptr_t)(const void *)&dummy,
87 (MPI_Aint)(intptr_t)(const void *)&(dummy.idx) -
88 (MPI_Aint)(intptr_t)(const void *)&dummy,
89 (MPI_Aint)(intptr_t)(const void *)&(dummy.weight) -
90 (MPI_Aint)(intptr_t)(const void *)&dummy};
91 const MPI_Datatype array_of_types[] =
92 {MPI_INT, MPI_UINT64_T, MPI_DOUBLE};
94 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
95 array_of_types, &stencil_info_dt), comm);
96 return yac_create_resized(stencil_info_dt, sizeof(dummy), comm);
97}
98
100 void * results, size_t result_count, size_t result_size,
101 struct result_stencil*(*get_stencil)(void*), size_t * pack_order,
102 int * pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm) {
103
104 int pack_size_global_id, pack_size_count;
105 yac_mpi_call(MPI_Pack_size(1, yac_int_dt, comm, &pack_size_global_id), comm);
106 yac_mpi_call(MPI_Pack_size(1, MPI_UINT64_T, comm, &pack_size_count), comm);
107
108 for (size_t i = 0; i < result_count; ++i) {
109
110 struct result_stencil * stencil =
111 (*get_stencil)(
112 (void*)((unsigned char*)results + pack_order[i] * result_size));
113 size_t curr_count = stencil->count;
114 int pack_size_stencils;
116 MPI_Pack_size(
117 (int)curr_count, stencil_info_dt, comm, &pack_size_stencils), comm);
118 pack_sizes[i] = pack_size_global_id +
119 pack_size_count +
120 pack_size_stencils;
121 }
122}
123
125 struct result_stencil * stencil, void * buffer, int buffer_size,
126 int * position, MPI_Datatype stencil_info_dt, MPI_Comm comm) {
127
128 // global_id
130 MPI_Pack(&(stencil->global_id), 1, yac_int_dt,
131 buffer, buffer_size, position, comm), comm);
132
133 // count
134 uint64_t count_uint64_t = (uint64_t)(stencil->count);
136 MPI_Pack(&count_uint64_t, 1, MPI_UINT64_T,
137 buffer, buffer_size, position, comm), comm);
138
139 // stencils
140 struct stencil_info * stencils =
141 (count_uint64_t == 1)?(&(stencil->data.single)):(stencil->data.multi);
143 MPI_Pack(stencils, (int)count_uint64_t, stencil_info_dt,
144 buffer, buffer_size, position, comm), comm);
145}
146
148 void * results, size_t result_count, size_t result_size,
149 struct result_stencil*(*get_stencil)(void*), size_t * pack_order,
150 void ** pack_data, int * pack_sizes, MPI_Datatype stencil_info_dt,
151 MPI_Comm comm) {
152
154 results, result_count, result_size, get_stencil, pack_order,
155 pack_sizes, stencil_info_dt, comm);
156
157 size_t temp_total_pack_size = 0;
158 for (size_t i = 0; i < result_count; ++i)
159 temp_total_pack_size += (size_t)(pack_sizes[i]);
160
162 temp_total_pack_size <= INT_MAX,
163 "ERROR(pack_result_stencils): pack size exceeds INT_MAX")
164
165 void * pack_data_ = xmalloc(temp_total_pack_size);
166 size_t total_pack_size = 0;
167
168 for (size_t i = 0; i < result_count; ++i) {
169
170 struct result_stencil * curr_stencil =
171 (*get_stencil)(
172 (void*)((unsigned char*)results + pack_order[i] * result_size));
173
174 int position = 0;
175 void * buffer = (void*)((char*)pack_data_ + total_pack_size);
176 int buffer_size = (int)(temp_total_pack_size - total_pack_size);
177
178 // stencil
180 curr_stencil, buffer, buffer_size, &position, stencil_info_dt, comm);
181
182 pack_sizes[i] = position;
183 total_pack_size += (size_t)position;
184 }
185
186 *pack_data = pack_data_;
187}
188
190 struct result_stencil * stencil, void * buffer, int buffer_size,
191 int * position, MPI_Datatype stencil_info_dt, MPI_Comm comm,
192 struct stencil_info ** stencil_info_buffer,
193 size_t * stencil_info_buffer_array_size,
194 size_t * stencil_info_buffer_size) {
195
196 // global_id
198 MPI_Unpack(buffer, buffer_size, position,
199 &(stencil->global_id), 1, yac_int_dt, comm), comm);
200
201 // count
202 uint64_t count_uint64_t;
204 MPI_Unpack(buffer, buffer_size, position,
205 &count_uint64_t, 1, MPI_UINT64_T, comm), comm);
206 size_t count = ((stencil->count = (size_t)count_uint64_t));
207
208 // stencils
209 struct stencil_info * stencils;
210 if (count_uint64_t == 1) {
211 stencils = &(stencil->data.single);
212 } else {
214 *stencil_info_buffer, *stencil_info_buffer_array_size,
215 *stencil_info_buffer_size + count);
216 stencils =
217 ((stencil->data.multi =
218 *stencil_info_buffer + *stencil_info_buffer_size));
219 *stencil_info_buffer_size += count;
220 }
222 MPI_Unpack(buffer, buffer_size, position,
223 stencils, (int)count, stencil_info_dt, comm), comm);
224}
225
227 size_t count, void * packed_data, size_t packed_data_size,
228 MPI_Datatype stencil_info_dt, MPI_Comm comm) {
229
230 struct stencil_info * stencil_info_buffer = NULL;
231 size_t stencil_info_buffer_array_size = 0;
232 size_t stencil_info_buffer_size = 0;
233
235 xmalloc(count * sizeof(*result_stencils));
236
237 for (size_t i = 0, offset = 0; i < count; ++i) {
238
239 int position = 0;
240 void * curr_buffer = (void*)((char*)packed_data + offset);
241 int buffer_size = (int)(packed_data_size - offset);
242 struct result_stencil * curr_stencil = result_stencils + i;
243
245 curr_stencil, curr_buffer, buffer_size, &position, stencil_info_dt, comm,
246 &stencil_info_buffer, &stencil_info_buffer_array_size,
247 &stencil_info_buffer_size);
248 offset += (size_t)position;
249 }
250
251 struct result_stencils * results =
252 xmalloc(sizeof(*results) +
253 stencil_info_buffer_size * sizeof(results->stencil_info_buffer[0]));
254 results->stencils = result_stencils;
255 results->count = count;
256 memcpy(&(results->stencil_info_buffer[0]), stencil_info_buffer,
257 stencil_info_buffer_size * sizeof(*stencil_info_buffer));
259 for (size_t i = 0, offset = 0; i < count; ++i) {
260 size_t curr_count = result_stencils[i].count;
261 if (curr_count > 1) {
262 result_stencils[i].data.multi = &(results->stencil_info_buffer[offset]);
263 offset += curr_count;
264 }
265 }
266
267 return results;
268}
269
271 void * results, size_t result_count, size_t result_size,
272 struct result_stencil*(*result_get_stencil)(void*), size_t * pack_order,
273 int * ranks, struct comm_stuff comm) {
274
275 char const * routine = "exchange_interp_results";
276
277 // mark and count local results
278 size_t local_count = 0;
279 for (size_t i = 0; i < result_count; ++i) {
280 if (ranks[i] == comm.rank) {
281 ranks[i] = INT_MAX;
282 ++local_count;
283 }
284 }
285 size_t send_count = result_count - local_count;
286
287 // sort results by rank (local results go to the end of the array)
288 yac_quicksort_index_int_size_t(ranks, result_count, pack_order);
289
290 // pack the result stencils that need to be send to other processes
291 void * send_buffer;
292 int * pack_sizes = xmalloc(send_count * sizeof(*pack_sizes));
294 results, send_count, result_size,
295 result_get_stencil, pack_order, &send_buffer, pack_sizes,
296 comm.stencil_info_dt, comm.comm);
297
298 // set up comm buffers
299 size_t * num_results_per_rank =
300 xmalloc((size_t)comm.size * sizeof(*num_results_per_rank));
301 memset(comm.sendcounts, 0,
302 (size_t)comm.size * sizeof(*comm.sendcounts));
303 size_t j = 0;
304 for (int rank = 0; rank < comm.size; ++rank) {
305 size_t curr_num_results = 0;
306 size_t curr_sendcount = 0;
307 while ((j < send_count) && (ranks[j] == rank)) {
308 curr_sendcount += (size_t)(pack_sizes[j++]);
309 curr_num_results++;
310 }
311 num_results_per_rank[rank] = curr_num_results;
313 curr_sendcount <= INT_MAX, "ERROR(%s): pack size to big", routine)
314 comm.sendcounts[rank] = curr_sendcount;
315 }
316 free(pack_sizes);
318 1, comm.sendcounts, comm.recvcounts,
319 comm.sdispls, comm.rdispls, comm.comm);
321 MPI_Alltoall(MPI_IN_PLACE, 1, YAC_MPI_SIZE_T,
322 num_results_per_rank, 1, YAC_MPI_SIZE_T, comm.comm),
323 comm.comm);
324 size_t recv_count = 0;
325 for (int i = 0; i < comm.size; ++i)
326 recv_count += num_results_per_rank[i];
327 free(num_results_per_rank);
328
329 size_t recv_size = comm.recvcounts[comm.size - 1] +
330 comm.rdispls[comm.size - 1];
331 void * recv_buffer = xmalloc(recv_size);
332
333 // exchange result stencils
334 yac_alltoallv_packed_p2p(
335 send_buffer, comm.sendcounts, comm.sdispls+1,
336 recv_buffer, comm.recvcounts, comm.rdispls, comm.comm,
337 routine, __LINE__);
338 free(send_buffer);
339
340 // unpack stencils
343 recv_count, recv_buffer, recv_size, comm.stencil_info_dt, comm.comm);
344 free(recv_buffer);
345
346 result_stencils->count += local_count;
348 xrealloc(
350 (recv_count + local_count) * sizeof(*(result_stencils->stencils)));
351
352 // add the local result stenils
353 struct result_stencil * local_stencils =
354 result_stencils->stencils + recv_count;
355 pack_order += send_count;
356 for (size_t i = 0; i < local_count; ++i)
357 local_stencils[i] =
358 *(*result_get_stencil)(
359 (void*)((unsigned char*)results + pack_order[i] * result_size));
360
361 return result_stencils;
362}
363
365 void * interp_result) {
366
367 return &(((struct interp_result*)interp_result)->stencil);
368}
369
371 struct yac_interp_grid * interp_grid, struct comm_stuff comm,
372 struct interp_result * interp_results, size_t result_count) {
373
374 // get the the newly interpolated target points
375 size_t * tgt_points = xmalloc(result_count * sizeof(*tgt_points));
376 for (size_t i = 0; i < result_count; ++i)
377 tgt_points[i] = interp_results[i].local_id;
378
379 // get the distributed owners for all targets
380 int * tgt_points_dist_owner =
381 xmalloc(result_count * sizeof(*tgt_points_dist_owner));
383 interp_grid, tgt_points, result_count, tgt_points_dist_owner);
384 size_t * pack_order = tgt_points;
385 for (size_t i = 0; i < result_count; ++i) pack_order[i] = i;
386
387 struct result_stencils * relocated_results =
389 interp_results, result_count, sizeof(*interp_results),
390 interp_result_get_stencil, pack_order, tgt_points_dist_owner,
391 comm);
392 free(pack_order);
393 free(tgt_points_dist_owner);
394
395 return relocated_results;
396}
397
399 struct yac_interp_grid * interp_grid, struct comm_stuff comm,
400 struct yac_interp_weights * interp_weights,
401 struct interp_result ** interp_results, size_t * result_count) {
402
403 // get list of all already interpolated target points
404 size_t num_interpolated_tgt =
406 yac_int * interpolated_tgts_global_ids =
407 yac_interp_weights_get_interp_tgt(interp_weights);
408 size_t * interpolated_tgts_local_ids =
409 xmalloc(num_interpolated_tgt * sizeof(*interpolated_tgts_local_ids));
411 interp_grid, interpolated_tgts_global_ids, num_interpolated_tgt,
412 interpolated_tgts_local_ids);
413
414 // initialise initial interpolation results
415 struct interp_result * initial_interp_results =
416 (num_interpolated_tgt > 0)?
417 xmalloc(num_interpolated_tgt * sizeof(*initial_interp_results)):NULL;
418 for (size_t i = 0; i < num_interpolated_tgt; ++i) {
419 initial_interp_results[i].local_id = interpolated_tgts_local_ids[i];
420 initial_interp_results[i].global_id = interpolated_tgts_global_ids[i];
421 initial_interp_results[i].idx = SIZE_MAX;
422 initial_interp_results[i].stencil.global_id =
423 interpolated_tgts_global_ids[i];
424 initial_interp_results[i].stencil.count = 1;
425 initial_interp_results[i].stencil.data.single.rank = comm.rank;
426 initial_interp_results[i].stencil.data.single.idx = (uint64_t)i;
427 initial_interp_results[i].stencil.data.single.weight = 1.0;
428 }
429 free(interpolated_tgts_local_ids);
430 free(interpolated_tgts_global_ids);
431
432 *interp_results = initial_interp_results;
433 *result_count = num_interpolated_tgt;
434}
435
437 const void * a, const void * b) {
438
439 int ret = (((const struct interp_result *)a)->stencil.count >
440 ((const struct interp_result *)b)->stencil.count) -
441 (((const struct interp_result *)a)->stencil.count <
442 ((const struct interp_result *)b)->stencil.count);
443
444 if (ret) return ret;
445
446 return (((const struct interp_result *)a)->global_id >
447 ((const struct interp_result *)b)->global_id) -
448 (((const struct interp_result *)a)->global_id <
449 ((const struct interp_result *)b)->global_id);
450}
451
453 const void * a, const void * b) {
454
455 return (((const struct interp_result *)a)->local_id >
456 ((const struct interp_result *)b)->local_id) -
457 (((const struct interp_result *)a)->local_id <
458 ((const struct interp_result *)b)->local_id);
459}
460
461static int compare_interp_result_global_id(const void * a, const void * b) {
462
463 return (((const struct interp_result *)a)->global_id >
464 ((const struct interp_result *)b)->global_id) -
465 (((const struct interp_result *)a)->global_id <
466 ((const struct interp_result *)b)->global_id);
467}
468
470 const void * a, const void * b) {
471
472 return (((const struct tgt_request *)a)->stencil.count >
473 ((const struct tgt_request *)b)->stencil.count) -
474 (((const struct tgt_request *)a)->stencil.count <
475 ((const struct tgt_request *)b)->stencil.count);
476}
477
479 const void * a, const void * b) {
480
481 return (((const struct tgt_request *)a)->global_id >
482 ((const struct tgt_request *)b)->global_id) -
483 (((const struct tgt_request *)a)->global_id <
484 ((const struct tgt_request *)b)->global_id);
485}
486
488 const void * a, const void * b) {
489
490 return (((const struct result_stencil *)a)->global_id >
491 ((const struct result_stencil *)b)->global_id) -
492 (((const struct result_stencil *)a)->global_id <
493 ((const struct result_stencil *)b)->global_id);
494}
495
496static inline int compare_yac_int(const void * a, const void * b) {
497
498 return ((*(const yac_int *)a) >
499 (*(const yac_int *)b)) -
500 ((*(const yac_int *)a) <
501 (*(const yac_int *)b));
502}
503
505 struct yac_interp_grid * interp_grid,
506 struct interp_result * interp_results, size_t result_count,
507 struct remote_points * interp_tgt_remote_points,
508 size_t ** num_stencils_per_tgt_, size_t ** stencil_indices_,
509 int ** stencil_ranks_, double ** w_) {
510
511 // sort interpolation results by target local id
512 qsort(interp_results, result_count, sizeof(*interp_results),
514
515 size_t total_num_stencils = 0;
516 size_t * interpolated_tgts_local_ids =
517 xmalloc(result_count * sizeof(*interpolated_tgts_local_ids));
518 for (size_t i = 0; i < result_count; ++i) {
519 interpolated_tgts_local_ids[i] = interp_results[i].local_id;
520 total_num_stencils += interp_results[i].stencil.count;
521 }
522 interp_tgt_remote_points->data =
524 interp_grid, interpolated_tgts_local_ids, result_count);
525 interp_tgt_remote_points->count = result_count;
526 free(interpolated_tgts_local_ids);
527
528 size_t * num_stencils_per_tgt =
529 xmalloc(result_count * sizeof(*num_stencils_per_tgt));
530 size_t * stencil_indices =
531 xmalloc(total_num_stencils * sizeof(*stencil_indices));
532 int * stencil_ranks =
533 xmalloc(total_num_stencils * sizeof(*stencil_ranks));
534 double * w = xmalloc(total_num_stencils * sizeof(*w));
535
536 for (size_t i = 0, j = 0; i < result_count; ++i) {
537
538 size_t curr_count = interp_results[i].stencil.count;
539
540 num_stencils_per_tgt[i] = curr_count;
541
542 if (curr_count == 1) {
543 stencil_indices[j] = interp_results[i].stencil.data.single.idx;
544 stencil_ranks[j] = interp_results[i].stencil.data.single.rank;
545 w[j] = interp_results[i].stencil.data.single.weight;
546 ++j;
547 } else {
548 for (size_t k = 0; k < curr_count; ++k, ++j) {
549 stencil_indices[j] = interp_results[i].stencil.data.multi[k].idx;
550 stencil_ranks[j] = interp_results[i].stencil.data.multi[k].rank;
551 w[j] = interp_results[i].stencil.data.multi[k].weight;
552 }
553 }
554 }
555
556 *num_stencils_per_tgt_ = num_stencils_per_tgt;
557 *stencil_indices_ = stencil_indices;
558 *stencil_ranks_ = stencil_ranks;
559 *w_ = w;
560}
561
563 void * tgt_request) {
564
565 return &(((struct tgt_request*)tgt_request)->stencil);
566}
567
569 struct comm_stuff comm, struct tgt_request * neigh_requests,
570 size_t * request_count_, struct result_stencils * interp_stencils) {
571
572 size_t request_count = *request_count_;
573 size_t match_count = 0;
574
575 struct result_stencil * stencils = interp_stencils->stencils;
576 size_t stencil_count = interp_stencils->count;
577
578 // match current result points with neighbour requests
579 qsort(
580 stencils, stencil_count, sizeof(*stencils),
582 for (size_t i = 0, j = 0; i < stencil_count; ++i) {
583 yac_int curr_global_id = stencils[i].global_id;
584
585 while ((j < request_count) &&
586 (neigh_requests[j].global_id < curr_global_id)) ++j;
587 while ((j < request_count) &&
588 (neigh_requests[j].global_id == curr_global_id)) {
589
590 ++match_count;
591 neigh_requests[j].stencil = stencils[i];
592 ++j;
593 }
594 }
595
596 // sort neighbour request by stencil count
597 // (open requests have a stencil count of 0)
598 qsort(neigh_requests, request_count, sizeof(*neigh_requests),
600
601 request_count -= match_count;
602 *request_count_ = request_count;
603 struct tgt_request * neigh_request_matches = neigh_requests + request_count;
604
605 // sort open neighbour requests by global id
606 qsort(neigh_requests, request_count, sizeof(*neigh_requests),
608
609 size_t * pack_order = xmalloc(match_count * sizeof(*pack_order));
610 int * origin_rank = xmalloc(match_count * sizeof(*origin_rank));
611 for (size_t i = 0; i < match_count; ++i) {
612 pack_order[i] = i;
613 origin_rank[i] = neigh_request_matches[i].rank;
614 }
615
616 struct result_stencils * relocated_results =
618 neigh_request_matches, match_count, sizeof(*neigh_request_matches),
619 tgt_request_get_stencil, pack_order, origin_rank, comm);
620 free(origin_rank);
621 free(pack_order);
622
623 return relocated_results;
624}
625
627 struct yac_interp_grid * interp_grid, size_t * tgt_local_ids,
628 yac_int * tgt_global_ids, size_t count,
629 size_t ** neigh_local_ids_, yac_int ** neigh_to_tgt_global_id_,
630 size_t * total_num_neighbours_) {
631
632 // get neighbour cells for all target cells
633 size_t total_num_neighbours = 0;
634 struct yac_const_basic_grid_data * tgt_basic_grid_data =
636 for (size_t i = 0; i < count; ++i)
637 total_num_neighbours +=
638 tgt_basic_grid_data->num_vertices_per_cell[tgt_local_ids[i]];
639 size_t * neigh_local_ids =
640 xmalloc(total_num_neighbours * sizeof(*neigh_local_ids));
642 interp_grid, tgt_local_ids, count, neigh_local_ids);
643
644 // generate mapping between neighbour global ids and target points
645 yac_int * neigh_to_tgt_global_id =
646 xmalloc(total_num_neighbours * sizeof(*neigh_to_tgt_global_id));
647 for (size_t i = 0, j = 0; i < count; ++i) {
648 int curr_num_neigh =
649 tgt_basic_grid_data->num_vertices_per_cell[tgt_local_ids[i]];
650 yac_int curr_tgt_global_id = tgt_global_ids[i];
651 for (int k = 0; k < curr_num_neigh; ++k, ++j)
652 neigh_to_tgt_global_id[j] = curr_tgt_global_id;
653 }
654
655 *neigh_local_ids_ = neigh_local_ids;
656 *neigh_to_tgt_global_id_ = neigh_to_tgt_global_id;
657 *total_num_neighbours_ = total_num_neighbours;
658}
659
661 struct yac_interp_grid * interp_grid, size_t * tgt_local_ids,
662 yac_int * tgt_global_ids, size_t count,
663 size_t ** neigh_local_ids_, yac_int ** neigh_to_tgt_global_id_,
664 size_t * total_num_neighbours_) {
665
666 int * num_neighs_per_vertex = xmalloc(count * sizeof(*num_neighs_per_vertex));
667 size_t * neigh_vertices;
668
670 interp_grid, tgt_local_ids, count,
671 &neigh_vertices, num_neighs_per_vertex);
672
673 size_t total_num_neighbours = 0;
674 for (size_t i = 0; i < count; ++i)
675 total_num_neighbours += (size_t)(num_neighs_per_vertex[i]);
676
677 // generate mapping between neighbour global ids and target points
678 yac_int * neigh_to_tgt_global_id =
679 xmalloc(total_num_neighbours * sizeof(*neigh_to_tgt_global_id));
680 for (size_t i = 0, j = 0; i < count; ++i) {
681 int curr_num_neigh = num_neighs_per_vertex[i];
682 yac_int curr_tgt_global_id = tgt_global_ids[i];
683 for (int k = 0; k < curr_num_neigh; ++k, ++j)
684 neigh_to_tgt_global_id[j] = curr_tgt_global_id;
685 }
686 free(num_neighs_per_vertex);
687
688 *neigh_local_ids_ = neigh_vertices;
689 *neigh_to_tgt_global_id_ = neigh_to_tgt_global_id;
690 *total_num_neighbours_ = total_num_neighbours;
691}
692
693// extracts basic information about the neighbours of the target points
695 struct yac_interp_grid * interp_grid,
696 size_t * tgt_local_ids, yac_int * tgt_global_ids, size_t count,
697 size_t ** neigh_local_ids_, yac_int ** neigh_global_ids_,
698 yac_int ** neigh_to_tgt_global_id_, size_t * total_num_neighbours_) {
699
700 // get basic neighbour information
701 size_t * neigh_local_ids;
702 yac_int * neigh_to_tgt_global_id;
703 size_t total_num_neighbours;
704 enum yac_location tgt_field_location =
707 (tgt_field_location == YAC_LOC_CELL) ||
708 (tgt_field_location == YAC_LOC_CORNER),
709 "ERROR(get_tgt_neigh_info): unsupported target field location")
710 if (tgt_field_location == YAC_LOC_CELL)
712 interp_grid, tgt_local_ids, tgt_global_ids, count,
713 &neigh_local_ids, &neigh_to_tgt_global_id, &total_num_neighbours);
714 else
716 interp_grid, tgt_local_ids, tgt_global_ids, count,
717 &neigh_local_ids, &neigh_to_tgt_global_id, &total_num_neighbours);
718
719 // remove invalid neighbour indices
721 neigh_local_ids, total_num_neighbours, neigh_to_tgt_global_id);
722 while((total_num_neighbours > 0) &&
723 (neigh_local_ids[total_num_neighbours-1] == SIZE_MAX))
724 --total_num_neighbours;
725
726 // get global ids for all neighbours
727 yac_int * neigh_global_ids =
728 xmalloc(total_num_neighbours * sizeof(*neigh_global_ids));
730 interp_grid, neigh_local_ids, total_num_neighbours, neigh_global_ids);
731
732 *neigh_local_ids_ =
733 xrealloc(neigh_local_ids, total_num_neighbours * sizeof(*neigh_local_ids));
734 *neigh_global_ids_ = neigh_global_ids;
735 *neigh_to_tgt_global_id_ =
736 xrealloc(neigh_to_tgt_global_id,
737 total_num_neighbours * sizeof(*neigh_to_tgt_global_id));
738 *total_num_neighbours_ = total_num_neighbours;
739}
740
741// sends request for target points neighbours to the respective
742// distributed owners
744 struct yac_interp_grid * interp_grid, struct comm_stuff comm,
745 size_t * neigh_local_ids, yac_int * neigh_global_ids,
746 size_t num_neighbours, struct tgt_request ** neigh_requests_,
747 size_t * request_count_) {
748
749 // get the distributed owner of the neighbour target points
750 int * neigh_dist_owner =
751 xmalloc(num_neighbours * sizeof(*neigh_dist_owner));
753 interp_grid, neigh_local_ids, num_neighbours,
754 neigh_dist_owner);
755
756 yac_int * send_neigh_global_ids =
757 xmalloc(num_neighbours * sizeof(*send_neigh_global_ids));
758 memcpy(send_neigh_global_ids, neigh_global_ids,
759 num_neighbours * sizeof(*send_neigh_global_ids));
760
761 // sort send buffer by rank
763 neigh_dist_owner, num_neighbours, send_neigh_global_ids);
764
765 // remove duplicated global ids
766 size_t to = 0, new_to = 0, from = 0, new_from = 0;
767 for (int rank = 0; rank < comm.size; ++rank) {
768 while ((new_from < num_neighbours) &&
769 (neigh_dist_owner[new_from] == rank)) new_from++;
770 size_t curr_count = new_from - from;
771 qsort(send_neigh_global_ids + from, curr_count,
772 sizeof(*send_neigh_global_ids), compare_yac_int);
773 yac_int prev_global_id =
774 (curr_count > 0)?(send_neigh_global_ids[from]-1):0;
775 for (; from < new_from; ++from) {
776 yac_int curr_global_id = send_neigh_global_ids[from];
777 if (prev_global_id != curr_global_id) {
778 send_neigh_global_ids[new_to++] = curr_global_id;
779 prev_global_id = curr_global_id;
780 }
781 }
782 curr_count = new_to - to;
783 to = new_to;
784 comm.sendcounts[rank] = curr_count;
785 }
786 free(neigh_dist_owner);
787
788 // send request for all neighbours
790 1, comm.sendcounts, comm.recvcounts, comm.sdispls, comm.rdispls, comm.comm);
791 size_t request_count = comm.recvcounts[comm.size-1] +
792 comm.rdispls[comm.size-1];
793 yac_int * recv_neigh_global_ids =
794 xmalloc(request_count * sizeof(*recv_neigh_global_ids));
795 yac_alltoallv_yac_int_p2p(
796 send_neigh_global_ids, comm.sendcounts, comm.sdispls+1,
797 recv_neigh_global_ids, comm.recvcounts, comm.rdispls, comm.comm,
798 "send_neigh_request", __LINE__);
799 struct tgt_request * neigh_requests =
800 xmalloc(request_count * sizeof(*neigh_requests));
801 for (int i = 0, k = 0; i < comm.size; ++i) {
802 for (size_t j = 0; j < comm.recvcounts[i]; ++j, ++k) {
803 neigh_requests[k].global_id = recv_neigh_global_ids[k];
804 neigh_requests[k].rank = i;
805 neigh_requests[k].stencil.count = 0;
806 }
807 }
808 qsort(neigh_requests, request_count, sizeof(*neigh_requests),
810 free(send_neigh_global_ids);
811 free(recv_neigh_global_ids);
812
813 *neigh_requests_ = neigh_requests;
814 *request_count_ = request_count;
815}
816
818 size_t * tgt_local_ids, yac_int * tgt_global_ids, size_t count) {
819
820 struct interp_result * interp_results =
821 xmalloc(count * sizeof(*interp_results));
822 for (size_t i = 0; i < count; ++i) {
823 interp_results[i].local_id = tgt_local_ids[i];
824 interp_results[i].global_id = tgt_global_ids[i];
825 interp_results[i].idx = i;
826 interp_results[i].stencil.count = 0;
827 }
828 qsort(interp_results, count, sizeof(*interp_results),
830 return interp_results;
831}
832
834 const void * a, const void * b) {
835
836 int ret = ((struct stencil_info *)a)->rank -
837 ((struct stencil_info *)b)->rank;
838 if (ret) return ret;
839
840 return (((struct stencil_info *)a)->idx >
841 ((struct stencil_info *)b)->idx) -
842 (((struct stencil_info *)a)->idx <
843 ((struct stencil_info *)b)->idx);
844}
845
847 struct result_stencil * neigh_stencils, size_t * stencil_indices,
848 size_t count, yac_int global_id, double weight) {
849
850 size_t stencil_info_count = 0;
851
852 for (size_t i = 0; i < count; ++i)
853 stencil_info_count += neigh_stencils[stencil_indices[i]].count;
854
855 struct result_stencil stencil;
856
857 struct stencil_info * stencil_infos;
858 if (stencil_info_count > 1) {
859 stencil_infos =
860 ((stencil.data.multi =
861 xmalloc(stencil_info_count * sizeof(*stencil_infos))));
862 } else {
863 stencil_infos = &(stencil.data.single);
864 }
865
866 stencil.global_id = global_id;
867 for (size_t i = 0, j = 0; i < count; ++i) {
868 struct result_stencil * curr_stencil =
869 neigh_stencils + stencil_indices[i];
870 size_t curr_stencil_info_count = curr_stencil->count;
871 struct stencil_info * curr_stencil_infos =
872 (curr_stencil_info_count == 1)?
873 (&(curr_stencil->data.single)):(curr_stencil->data.multi);
874 memcpy(stencil_infos + j, curr_stencil_infos,
875 curr_stencil_info_count * sizeof(*stencil_infos));
876 for (size_t k = 0; k < curr_stencil_info_count; ++k, ++j)
877 stencil_infos[j].weight *= weight;
878 }
879
880 // in case we have multiple stencil infos, there may be duplicated
881 // entries from different source
882 // here we remove these duplicated entries
883 if (stencil_info_count > 1) {
884
885 // sort the stencils
886 qsort(stencil_infos, stencil_info_count, sizeof(*stencil_infos),
888
889 // remote duplicated stencils
890 struct stencil_info * prev_stencil_info = stencil_infos,
891 * curr_stencil_info = stencil_infos + 1;
892 size_t new_stencil_info_count = 1;
893 for (size_t i = 1; i < stencil_info_count; ++i, ++curr_stencil_info) {
895 curr_stencil_info, prev_stencil_info)) {
896 if (new_stencil_info_count != i)
897 stencil_infos[new_stencil_info_count] =
898 *curr_stencil_info;
899 ++new_stencil_info_count;
900 prev_stencil_info = curr_stencil_info;
901 } else {
902 stencil_infos[new_stencil_info_count-1].weight +=
903 curr_stencil_info->weight;
904 }
905 }
906 if (new_stencil_info_count != stencil_info_count) {
907 stencil_info_count = new_stencil_info_count;
908 if (new_stencil_info_count == 1) {
909 stencil.data.single = *stencil_infos;
910 free(stencil_infos);
911 } else {
912 stencil.data.multi =
913 xrealloc(
914 stencil_infos, stencil_info_count * sizeof(*stencil_infos));
915 }
916 }
917 }
918 stencil.count = stencil_info_count;
919
920 return stencil;
921}
922
924 struct result_stencils * neigh_answer,
925 yac_int * neigh_global_ids, yac_int * neigh_to_tgt_global_id,
926 size_t * stencil_indices, size_t * num_neighbours_,
927 struct interp_result * interp_results, size_t * num_open_tgt_) {
928
929 size_t num_neighbours = *num_neighbours_;
930 size_t num_open_tgt = *num_open_tgt_;
931
932 struct result_stencil * neigh_stencils = neigh_answer->stencils;
933 size_t answer_count = neigh_answer->count;
934
935 // match received neigh request answers with tgt neighbours
936 // (remove duplicated results)
937 qsort(
938 neigh_stencils, answer_count, sizeof(*neigh_stencils),
940 size_t match_count = 0;
941 for (size_t i = 0, j = 0; i < answer_count; ++i) {
942 yac_int curr_global_id = neigh_stencils[i].global_id;
943
944 while ((j < num_neighbours) &&
945 (neigh_global_ids[j] < curr_global_id)) ++j;
946
947 while ((j < num_neighbours) &&
948 (neigh_global_ids[j] == curr_global_id)) {
949 neigh_global_ids[j] = XT_INT_MAX;
950 stencil_indices[j] = i;
951 ++match_count;
952 ++j;
953 }
954 }
955
956 // move fullfilled neighbour requests to the end of the array
958 neigh_global_ids, num_neighbours, neigh_to_tgt_global_id, stencil_indices);
959 num_neighbours -= match_count;
960 *num_neighbours_ = num_neighbours;
961
962 neigh_to_tgt_global_id += num_neighbours;
963 stencil_indices += num_neighbours;
964
965 // sort matches by target global ids
967 neigh_to_tgt_global_id, match_count, stencil_indices);
968
969 // set stencils for target matches
970 for (size_t i = 0, k = 0; i < match_count;) {
971
972 size_t prev_i = i;
973
974 // count the number of stencils for the current target
975 yac_int curr_tgt_global_id = neigh_to_tgt_global_id[i++];
976 while ((i < match_count) &&
977 (neigh_to_tgt_global_id[i] == curr_tgt_global_id)) ++i;
978 size_t curr_stencil_count = i - prev_i;
979
980 while ((k < num_open_tgt) &&
981 (interp_results[k].global_id < curr_tgt_global_id)) ++k;
982
983 while ((k < num_open_tgt) &&
984 (interp_results[k].global_id == curr_tgt_global_id)) {
985
986 interp_results[k].stencil =
988 neigh_stencils, stencil_indices + prev_i, curr_stencil_count,
989 curr_tgt_global_id, 1.0 / (double)curr_stencil_count);
990 ++k;
991 }
992 }
993
994 // move successfully interpolated target points to the end of
995 // the array
996 qsort(interp_results, num_open_tgt, sizeof(*interp_results),
998 size_t new_num_open_tgt = 0;
999 while ((new_num_open_tgt < num_open_tgt) &&
1000 (interp_results[new_num_open_tgt].stencil.count == 0))
1001 new_num_open_tgt++;
1002 *num_open_tgt_ = new_num_open_tgt;
1003 return num_open_tgt - new_num_open_tgt;
1004}
1005
1007 struct yac_interp_grid * interp_grid) {
1008
1009 struct comm_stuff comm;
1010
1011 comm.comm = yac_interp_grid_get_MPI_Comm(interp_grid);
1013 1, &comm.sendcounts, &comm.recvcounts, &comm.sdispls, &comm.rdispls,
1014 comm.comm);
1015 yac_mpi_call(MPI_Comm_rank(comm.comm, &comm.rank), comm.comm);
1016 yac_mpi_call(MPI_Comm_size(comm.comm, &comm.size), comm.comm);
1017 comm.stencil_info_dt = yac_get_stencil_info_mpi_datatype(comm.comm);
1018
1019 return comm;
1020}
1021
1022static void free_comm_stuff(struct comm_stuff comm) {
1023
1025 comm.sendcounts, comm.recvcounts, comm.sdispls, comm.rdispls);
1026 yac_mpi_call(MPI_Type_free(&comm.stencil_info_dt), comm.comm);
1027}
1028
1029// the local process is the distributed owner for all target points
1030// passed to this routine
1032 struct yac_interp_grid * interp_grid, int const max_creep_distance,
1033 size_t * tgt_points, yac_int * tgt_global_ids, size_t count,
1034 int * interp_flag, struct yac_interp_weights * weights) {
1035
1036 struct comm_stuff comm = init_comm_stuff(interp_grid);
1037
1038 // get information about the neighbours of the target points
1039 size_t * neigh_local_ids;
1040 yac_int * neigh_global_ids;
1041 yac_int * neigh_to_tgt_global_id;
1042 size_t total_num_neighbours;
1044 interp_grid, tgt_points, tgt_global_ids, count,
1045 &neigh_local_ids, &neigh_global_ids, &neigh_to_tgt_global_id,
1046 &total_num_neighbours);
1047 size_t * stencil_indices =
1048 xmalloc(total_num_neighbours * sizeof(*stencil_indices));
1049
1050 // send request for target points neighbours to the respective
1051 // distributed owners
1052 struct tgt_request * neigh_requests;
1053 size_t request_count;
1055 interp_grid, comm, neigh_local_ids, neigh_global_ids, total_num_neighbours,
1056 &neigh_requests, &request_count);
1057 free(neigh_local_ids);
1058
1059 // sort global ids of target point neighbours
1061 neigh_global_ids, total_num_neighbours, neigh_to_tgt_global_id);
1062
1063 // initialise interpolation results
1064 size_t num_open_tgt = count;
1065 struct interp_result * interp_results =
1066 init_interp_results(tgt_points, tgt_global_ids, count);
1067
1068 // get already existing results and relocate them to their respective
1069 // distributed owners
1070 struct interp_result * initial_interp_results;
1071 size_t result_count;
1073 interp_grid, comm, weights, &initial_interp_results, &result_count);
1074
1075 for (int creep_distance = 0;
1076 creep_distance < max_creep_distance; ++creep_distance) {
1077
1078 // check whether there are initial results on any process
1079 int result_flag = result_count > 0;
1081 MPI_Allreduce(
1082 MPI_IN_PLACE, &result_flag, 1, MPI_INT, MPI_MAX, comm.comm), comm.comm);
1083 if (result_flag == 0) break;
1084
1085 // relocate interpolation results to distributed owners of associated
1086 // target points
1087 struct result_stencils * interp_stencils =
1089 interp_grid, comm,
1090 (creep_distance == 0)?
1091 initial_interp_results:(interp_results + num_open_tgt),
1092 result_count);
1093 if (creep_distance == 0) free(initial_interp_results);
1094
1095 // match interpolation results with distributed neighbour requests and
1096 // inform origins of requests about matches
1097 struct result_stencils * neigh_matches =
1099 comm, neigh_requests, &request_count, interp_stencils);
1100
1101 // match received neigh request answers with tgts
1102 result_count =
1104 neigh_matches, neigh_global_ids, neigh_to_tgt_global_id,
1105 stencil_indices, &total_num_neighbours, interp_results,
1106 &num_open_tgt);
1107 free(interp_stencils->stencils);
1108 free(interp_stencils);
1109 free(neigh_matches->stencils);
1110 free(neigh_matches);
1111 } // creep_distance < max_creep_distance
1112
1113 free(neigh_requests);
1114 free(stencil_indices);
1115 free(neigh_global_ids);
1116 free(neigh_to_tgt_global_id);
1117
1118 for (size_t i = 0; i < count; ++i)
1119 interp_flag[interp_results[i].idx] =
1120 interp_results[i].stencil.count > 0;
1121
1122 // copy stencils
1123 struct remote_points interp_tgt_remote_points;
1124 size_t * num_stencils_per_tgt;
1125 int * stencil_ranks;
1126 double * w;
1128 interp_grid, interp_results + num_open_tgt, count - num_open_tgt,
1129 &interp_tgt_remote_points, &num_stencils_per_tgt, &stencil_indices,
1130 &stencil_ranks, &w);
1132 weights, &interp_tgt_remote_points, num_stencils_per_tgt,
1133 stencil_indices, stencil_ranks, w);
1134 free(interp_tgt_remote_points.data);
1135 free(w);
1136 free(num_stencils_per_tgt);
1137 free(stencil_ranks);
1138 free(stencil_indices);
1139 for (size_t i = num_open_tgt; i < count; ++i)
1140 if (interp_results[i].stencil.count > 1)
1141 free(interp_results[i].stencil.data.multi);
1142 free(interp_results);
1143
1144 free_comm_stuff(comm);
1145}
1146
1147static size_t do_search_creep (struct interp_method * method,
1148 struct yac_interp_grid * interp_grid,
1149 size_t * tgt_points, size_t count,
1150 struct yac_interp_weights * weights,
1151 int * interpolation_complete) {
1152
1153 if (*interpolation_complete) return 0;
1154
1155 char const * routine = "do_search_creep";
1156
1157 struct interp_method_creep * creep_method =
1158 (struct interp_method_creep *)method;
1159
1160 int const max_creep_distance = creep_method->max_creep_distance;
1161
1162 if (max_creep_distance == 0) return 0;
1163
1167 "ERROR(%s): unsupported target field location "
1168 "(has to be YAC_LOC_CELL or YAC_LOC_CORNER)", routine)
1169
1170 MPI_Comm comm = yac_interp_grid_get_MPI_Comm(interp_grid);
1171 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1173 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1174 int comm_rank, comm_size;
1175 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
1176 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1177
1178 // get the distributed owners for all tgts
1179 int * tgt_points_dist_owner =
1180 xmalloc(count * sizeof(*tgt_points_dist_owner));
1182 interp_grid, tgt_points, count, tgt_points_dist_owner);
1183 yac_int * tgt_points_global_ids =
1184 xmalloc(count * sizeof(*tgt_points_global_ids));
1186 interp_grid, tgt_points, count, tgt_points_global_ids);
1187
1188 // determine which of these points are local
1189 size_t local_count = 0;
1190 for (size_t i = 0; i < count; ++i) {
1191 if (tgt_points_dist_owner[i] == comm_rank) {
1192 local_count++;
1193 tgt_points_dist_owner[i] = INT_MAX;
1194 }
1195 }
1196 size_t send_count = count - local_count;
1197
1198 // sort target points (remote points to the start of the array)
1200 tgt_points_dist_owner, count, tgt_points, tgt_points_global_ids);
1201
1202 // relocate tgt_points to distributed owners
1203 for (size_t i = 0; i < send_count; ++i)
1204 sendcounts[tgt_points_dist_owner[i]]++;
1206 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1207 size_t recv_count = recvcounts[comm_size-1] + rdispls[comm_size-1];
1208 yac_int * global_ids_buffer =
1209 xrealloc(
1210 tgt_points_global_ids,
1211 (send_count + local_count + recv_count) * sizeof(*global_ids_buffer));
1212 yac_int * send_global_ids = global_ids_buffer;
1213 yac_int * recv_global_ids = global_ids_buffer + send_count;
1214 yac_alltoallv_yac_int_p2p(
1215 send_global_ids, sendcounts, sdispls+1,
1216 recv_global_ids + local_count, recvcounts, rdispls, comm,
1217 routine, __LINE__);
1218
1219 // get local ids for received global ids
1220 size_t * temp_tgt_points =
1221 xmalloc((local_count + recv_count) * sizeof(*temp_tgt_points));
1222 memcpy(temp_tgt_points, tgt_points + send_count,
1223 local_count * sizeof(*tgt_points));
1225 interp_grid, recv_global_ids + local_count, recv_count,
1226 temp_tgt_points + local_count);
1227
1228 // do the interpolation for the redistributed target points
1229 int * interp_flag_buffer =
1230 xmalloc((send_count + local_count + recv_count) *
1231 sizeof(*interp_flag_buffer));
1232 int * temp_interp_flag = interp_flag_buffer + send_count;
1233 int * interp_flag = interp_flag_buffer;
1234 memset(temp_interp_flag, 0, (local_count + recv_count) * sizeof(*temp_interp_flag));
1236 interp_grid, max_creep_distance, temp_tgt_points, global_ids_buffer + send_count,
1237 local_count + recv_count, temp_interp_flag, weights);
1238 free(global_ids_buffer);
1239 free(temp_tgt_points);
1240
1241 // relocate interp_flag
1242 yac_alltoallv_int_p2p(
1243 temp_interp_flag + local_count, recvcounts, rdispls,
1244 interp_flag, sendcounts, sdispls+1, comm, routine, __LINE__);
1245
1246 // count number of points that can be interpolated and reorder
1247 // tgt_points accordingly (interpolated first)
1248 size_t num_interpolated_tgt = 0;
1249 for (size_t i = 0; i < count; ++i) {
1250 if (interp_flag[i]) {
1251 interp_flag[i] = 0;
1252 ++num_interpolated_tgt;
1253 } else {
1254 interp_flag[i] = 1;
1255 }
1256 }
1257 yac_quicksort_index_int_size_t(interp_flag, count, tgt_points);
1258 free(interp_flag_buffer);
1259 free(tgt_points_dist_owner);
1260
1261 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1262
1263 return num_interpolated_tgt;
1264}
1265
1266struct interp_method * yac_interp_method_creep_new(int creep_distance) {
1267
1268 struct interp_method_creep * method_creep =
1269 xmalloc(1 * sizeof(*method_creep));
1270
1271 method_creep->vtable = &interp_method_creep_vtable;
1272 method_creep->max_creep_distance =
1273 (creep_distance >= 0)?creep_distance:INT_MAX;
1274
1275 return (struct interp_method*)method_creep;
1276}
1277
1278static void delete_creep(struct interp_method * method) {
1279 free(method);
1280}
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
void yac_interp_grid_get_tgt_cell_neighbours(struct yac_interp_grid *interp_grid, size_t *tgt_cells, size_t count, size_t *neighbours)
enum yac_location yac_interp_grid_get_tgt_field_location(struct yac_interp_grid *interp_grid)
void yac_interp_grid_get_tgt_global_ids(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count, yac_int *tgt_global_ids)
MPI_Comm yac_interp_grid_get_MPI_Comm(struct yac_interp_grid *interp_grid)
struct remote_point * yac_interp_grid_get_tgt_remote_points(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count)
void yac_interp_grid_determine_dist_tgt_owners(struct yac_interp_grid *interp_grid, size_t *tgt_indices, size_t count, int *owners)
void yac_interp_grid_get_tgt_vertex_neighbours(struct yac_interp_grid *interp_grid, size_t *vertices, size_t count, size_t **neigh_vertices, int *num_neighs_per_vertex)
struct yac_const_basic_grid_data * yac_interp_grid_get_basic_grid_data_tgt(struct yac_interp_grid *interp_grid)
void yac_interp_grid_tgt_global_to_local(struct yac_interp_grid *interp_grid, yac_int *tgt_global_ids, size_t count, size_t *tgt_local_ids)
static void do_search_creep_2(struct yac_interp_grid *interp_grid, int const max_creep_distance, size_t *tgt_points, yac_int *tgt_global_ids, size_t count, int *interp_flag, struct yac_interp_weights *weights)
static int compare_tgt_request_global_id(const void *a, const void *b)
static void pack_result_stencil(struct result_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static struct result_stencils * unpack_result_stencils(size_t count, void *packed_data, size_t packed_data_size, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static int compare_interp_result_stencil_local_id(const void *a, const void *b)
static int compare_stencil_info(const void *a, const void *b)
static MPI_Datatype yac_get_stencil_info_mpi_datatype(MPI_Comm comm)
static struct result_stencil * tgt_request_get_stencil(void *tgt_request)
static struct result_stencils * update_neigh_requests(struct comm_stuff comm, struct tgt_request *neigh_requests, size_t *request_count_, struct result_stencils *interp_stencils)
static struct result_stencil * interp_result_get_stencil(void *interp_result)
static void get_tgt_neigh_info_vertex(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static struct result_stencils * exchange_interp_results(void *results, size_t result_count, size_t result_size, struct result_stencil *(*result_get_stencil)(void *), size_t *pack_order, int *ranks, struct comm_stuff comm)
static void free_comm_stuff(struct comm_stuff comm)
static struct comm_stuff init_comm_stuff(struct yac_interp_grid *interp_grid)
static int compare_result_stencil_global_id(const void *a, const void *b)
static int compare_interp_result_stencil(const void *a, const void *b)
static void get_tgt_neigh_info(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_global_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static void get_initial_results(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct yac_interp_weights *interp_weights, struct interp_result **interp_results, size_t *result_count)
static void extract_interp_info(struct yac_interp_grid *interp_grid, struct interp_result *interp_results, size_t result_count, struct remote_points *interp_tgt_remote_points, size_t **num_stencils_per_tgt_, size_t **stencil_indices_, int **stencil_ranks_, double **w_)
struct interp_method * yac_interp_method_creep_new(int creep_distance)
static struct result_stencil copy_result_stencil_multi(struct result_stencil *neigh_stencils, size_t *stencil_indices, size_t count, yac_int global_id, double weight)
static void pack_result_stencils(void *results, size_t result_count, size_t result_size, struct result_stencil *(*get_stencil)(void *), size_t *pack_order, void **pack_data, int *pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static size_t do_search_creep(struct interp_method *method, struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count, struct yac_interp_weights *weights, int *interpolation_complete)
static void get_tgt_neigh_info_cell(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static struct interp_result * init_interp_results(size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count)
static int compare_yac_int(const void *a, const void *b)
static size_t match_neigh_answers_with_tgts(struct result_stencils *neigh_answer, yac_int *neigh_global_ids, yac_int *neigh_to_tgt_global_id, size_t *stencil_indices, size_t *num_neighbours_, struct interp_result *interp_results, size_t *num_open_tgt_)
static void unpack_result_stencil(struct result_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype stencil_info_dt, MPI_Comm comm, struct stencil_info **stencil_info_buffer, size_t *stencil_info_buffer_array_size, size_t *stencil_info_buffer_size)
static void get_result_stencil_pack_sizes(void *results, size_t result_count, size_t result_size, struct result_stencil *(*get_stencil)(void *), size_t *pack_order, int *pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static struct interp_method_vtable interp_method_creep_vtable
static void send_neigh_request(struct yac_interp_grid *interp_grid, struct comm_stuff comm, size_t *neigh_local_ids, yac_int *neigh_global_ids, size_t num_neighbours, struct tgt_request **neigh_requests_, size_t *request_count_)
static void delete_creep(struct interp_method *method)
static struct result_stencils * relocate_interp_results(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct interp_result *interp_results, size_t result_count)
static int compare_tgt_request_stencil_count(const void *a, const void *b)
static int compare_interp_result_global_id(const void *a, const void *b)
void yac_interp_weights_wcopy_weights(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_stencils_per_tgt, size_t *stencil_indices, int *stencil_ranks, double *w)
yac_int * yac_interp_weights_get_interp_tgt(struct yac_interp_weights *weights)
size_t yac_interp_weights_get_interp_count(struct yac_interp_weights *weights)
yac_location
Definition location.h:12
@ YAC_LOC_CORNER
Definition location.h:15
@ YAC_LOC_CELL
Definition location.h:14
#define xrealloc(ptr, size)
Definition ppm_xfuncs.h:67
#define xmalloc(size)
Definition ppm_xfuncs.h:66
MPI_Datatype stencil_info_dt
struct interp_method_vtable * vtable
size_t(* do_search)(struct interp_method *method, struct yac_interp_grid *grid, size_t *tgt_points, size_t count, struct yac_interp_weights *weights, int *interpolation_complete)
struct result_stencil stencil
structure containing the information (global id and location)
struct remote_point * data
union result_stencil::@10 data
struct stencil_info single * multi
struct stencil_info stencil_info_buffer[]
struct result_stencil * stencils
struct result_stencil stencil
const const_int_pointer num_vertices_per_cell
void yac_quicksort_index_yac_int_size_t(yac_int *a, size_t n, size_t *idx)
void yac_quicksort_index_int_yac_int(int *a, size_t n, yac_int *idx)
void yac_quicksort_index_yac_int_yac_int_size_t(yac_int *a, size_t n, yac_int *b, size_t *c)
void yac_quicksort_index_size_t_yac_int(size_t *a, size_t n, yac_int *idx)
void yac_quicksort_index_int_size_t(int *a, size_t n, size_t *idx)
void yac_quicksort_index_yac_int_yac_int(yac_int *a, size_t n, yac_int *idx)
void yac_quicksort_index_int_size_t_yac_int(int *a, size_t n, size_t *b, yac_int *c)
#define YAC_ASSERT_F(exp, format,...)
Definition yac_assert.h:19
#define YAC_ASSERT(exp, msg)
Definition yac_assert.h:16
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
Definition yac_mpi.c:577
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
Definition yac_mpi.c:633
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
Definition yac_mpi.c:602
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
Definition yac_mpi.c:556
#define yac_mpi_call(call, comm)
#define YAC_MPI_SIZE_T
Xt_int yac_int
Definition yac_types.h:15
#define yac_int_dt
Definition yac_types.h:16