YetAnotherCoupler 3.2.0_a
Loading...
Searching...
No Matches
interp_method_creep.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifdef HAVE_CONFIG_H
6// Get the definition of the 'restrict' keyword.
7#include "config.h"
8#endif
9
10#include <string.h>
11
13#include "interp_method_creep.h"
14#include "yac_mpi_internal.h"
15#include "ensure_array_size.h"
16
17static size_t do_search_creep(struct interp_method * method,
18 struct yac_interp_grid * interp_grid,
19 size_t * tgt_points, size_t count,
20 struct yac_interp_weights * weights);
21static void delete_creep(struct interp_method * method);
22
23static struct interp_method_vtable
27
33
35 int rank; // rank of the owner of the stencil
36 uint64_t idx; // index of the stencil that is supposed to be used
37 double weight; // weight of the stencil
38};
39
41 yac_int global_id; // global id of target point for which
42 // the stencil was originally generated
43 size_t count; // number of contributing stencils
44 union {
45 struct stencil_info single,
48};
49
51 struct result_stencil * stencils; // result stencils
52 size_t count; // number of result stencils
54};
55
57 size_t local_id; // local id of target points that is supposed to
58 // be interpolated by the stencil
59 yac_int global_id; // global id of target points that is supposed to
60 // be interpolated by the stencil
61 size_t idx;
63};
64
66 yac_int global_id; // global id of requested point
67 int rank; // rank of process which requested
69};
70
71struct comm_stuff {
72 int rank, size;
74 MPI_Datatype stencil_info_dt;
75 MPI_Comm comm;
76};
77
78static MPI_Datatype yac_get_stencil_info_mpi_datatype(MPI_Comm comm) {
79
80 struct stencil_info dummy;
81 MPI_Datatype stencil_info_dt;
82 int array_of_blocklengths[] = {1, 1, 1};
83 const MPI_Aint array_of_displacements[] =
84 {(MPI_Aint)(intptr_t)(const void *)&(dummy.rank) -
85 (MPI_Aint)(intptr_t)(const void *)&dummy,
86 (MPI_Aint)(intptr_t)(const void *)&(dummy.idx) -
87 (MPI_Aint)(intptr_t)(const void *)&dummy,
88 (MPI_Aint)(intptr_t)(const void *)&(dummy.weight) -
89 (MPI_Aint)(intptr_t)(const void *)&dummy};
90 const MPI_Datatype array_of_types[] =
91 {MPI_INT, MPI_UINT64_T, MPI_DOUBLE};
93 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
94 array_of_types, &stencil_info_dt), comm);
95 return yac_create_resized(stencil_info_dt, sizeof(dummy), comm);
96}
97
99 void * results, size_t result_count, size_t result_size,
100 struct result_stencil*(*get_stencil)(void*), size_t * pack_order,
101 int * pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm) {
102
103 int pack_size_global_id, pack_size_count;
104 yac_mpi_call(MPI_Pack_size(1, yac_int_dt, comm, &pack_size_global_id), comm);
105 yac_mpi_call(MPI_Pack_size(1, MPI_UINT64_T, comm, &pack_size_count), comm);
106
107 for (size_t i = 0; i < result_count; ++i) {
108
109 struct result_stencil * stencil =
110 (*get_stencil)(
111 (void*)((unsigned char*)results + pack_order[i] * result_size));
112 size_t curr_count = stencil->count;
113 int pack_size_stencils;
115 MPI_Pack_size(
116 (int)curr_count, stencil_info_dt, comm, &pack_size_stencils), comm);
117 pack_sizes[i] = pack_size_global_id +
118 pack_size_count +
119 pack_size_stencils;
120 }
121}
122
124 struct result_stencil * stencil, void * buffer, int buffer_size,
125 int * position, MPI_Datatype stencil_info_dt, MPI_Comm comm) {
126
127 // global_id
129 MPI_Pack(&(stencil->global_id), 1, yac_int_dt,
130 buffer, buffer_size, position, comm), comm);
131
132 // count
133 uint64_t count_uint64_t = (uint64_t)(stencil->count);
135 MPI_Pack(&count_uint64_t, 1, MPI_UINT64_T,
136 buffer, buffer_size, position, comm), comm);
137
138 // stencils
139 struct stencil_info * stencils =
140 (count_uint64_t == 1)?(&(stencil->data.single)):(stencil->data.multi);
142 MPI_Pack(stencils, (int)count_uint64_t, stencil_info_dt,
143 buffer, buffer_size, position, comm), comm);
144}
145
147 void * results, size_t result_count, size_t result_size,
148 struct result_stencil*(*get_stencil)(void*), size_t * pack_order,
149 void ** pack_data, int * pack_sizes, MPI_Datatype stencil_info_dt,
150 MPI_Comm comm) {
151
153 results, result_count, result_size, get_stencil, pack_order,
154 pack_sizes, stencil_info_dt, comm);
155
156 size_t temp_total_pack_size = 0;
157 for (size_t i = 0; i < result_count; ++i)
158 temp_total_pack_size += (size_t)(pack_sizes[i]);
159
160 void * pack_data_ = xmalloc(temp_total_pack_size);
161 size_t total_pack_size = 0;
162
163 for (size_t i = 0; i < result_count; ++i) {
164
165 struct result_stencil * curr_stencil =
166 (*get_stencil)(
167 (void*)((unsigned char*)results + pack_order[i] * result_size));
168
169 int position = 0;
170 void * buffer = (void*)((char*)pack_data_ + total_pack_size);
171 int buffer_size = (int)(temp_total_pack_size - total_pack_size);
172
173 // stencil
175 curr_stencil, buffer, buffer_size, &position, stencil_info_dt, comm);
176
177 pack_sizes[i] = position;
178 total_pack_size += (size_t)position;
179 }
180
181 if (total_pack_size != temp_total_pack_size)
182 pack_data_ = xrealloc(pack_data_, total_pack_size);
183
184 *pack_data = pack_data_;
185}
186
188 struct result_stencil * stencil, void * buffer, int buffer_size,
189 int * position, MPI_Datatype stencil_info_dt, MPI_Comm comm,
190 struct stencil_info ** stencil_info_buffer,
191 size_t * stencil_info_buffer_array_size,
192 size_t * stencil_info_buffer_size) {
193
194 // global_id
196 MPI_Unpack(buffer, buffer_size, position,
197 &(stencil->global_id), 1, yac_int_dt, comm), comm);
198
199 // count
200 uint64_t count_uint64_t;
202 MPI_Unpack(buffer, buffer_size, position,
203 &count_uint64_t, 1, MPI_UINT64_T, comm), comm);
204 size_t count = ((stencil->count = (size_t)count_uint64_t));
205
206 // stencils
207 struct stencil_info * stencils;
208 if (count_uint64_t == 1) {
209 stencils = &(stencil->data.single);
210 } else {
212 *stencil_info_buffer, *stencil_info_buffer_array_size,
213 *stencil_info_buffer_size + count);
214 stencils =
215 ((stencil->data.multi =
216 *stencil_info_buffer + *stencil_info_buffer_size));
217 *stencil_info_buffer_size += count;
218 }
220 MPI_Unpack(buffer, buffer_size, position,
221 stencils, (int)count, stencil_info_dt, comm), comm);
222}
223
225 size_t count, void * packed_data, size_t packed_data_size,
226 MPI_Datatype stencil_info_dt, MPI_Comm comm) {
227
228 struct stencil_info * stencil_info_buffer = NULL;
229 size_t stencil_info_buffer_array_size = 0;
230 size_t stencil_info_buffer_size = 0;
231
233 xmalloc(count * sizeof(*result_stencils));
234
235 for (size_t i = 0, offset = 0; i < count; ++i) {
236
237 int position = 0;
238 void * curr_buffer = (void*)((char*)packed_data + offset);
239 int buffer_size = (int)(packed_data_size - offset);
240 struct result_stencil * curr_stencil = result_stencils + i;
241
243 curr_stencil, curr_buffer, buffer_size, &position, stencil_info_dt, comm,
244 &stencil_info_buffer, &stencil_info_buffer_array_size,
245 &stencil_info_buffer_size);
246 offset += (size_t)position;
247 }
248
249 struct result_stencils * results =
250 xmalloc(sizeof(*results) +
251 stencil_info_buffer_size * sizeof(results->stencil_info_buffer[0]));
252 results->stencils = result_stencils;
253 results->count = count;
254 memcpy(&(results->stencil_info_buffer[0]), stencil_info_buffer,
255 stencil_info_buffer_size * sizeof(*stencil_info_buffer));
257 for (size_t i = 0, offset = 0; i < count; ++i) {
258 size_t curr_count = result_stencils[i].count;
259 if (curr_count > 1) {
260 result_stencils[i].data.multi = &(results->stencil_info_buffer[offset]);
261 offset += curr_count;
262 }
263 }
264
265 return results;
266}
267
269 void * results, size_t result_count, size_t result_size,
270 struct result_stencil*(*result_get_stencil)(void*), size_t * pack_order,
271 int * ranks, struct comm_stuff comm) {
272
273 // mark and count local results
274 size_t local_count = 0;
275 for (size_t i = 0; i < result_count; ++i) {
276 if (ranks[i] == comm.rank) {
277 ranks[i] = INT_MAX;
278 ++local_count;
279 }
280 }
281 size_t send_count = result_count - local_count;
282
283 // sort results by rank (local results go to the end of the array)
284 yac_quicksort_index_int_size_t(ranks, result_count, pack_order);
285
286 // pack the result stencils that need to be send to other processes
287 void * send_buffer;
288 int * pack_sizes = xmalloc(send_count * sizeof(*pack_sizes));
290 results, send_count, result_size,
291 result_get_stencil, pack_order, &send_buffer, pack_sizes,
292 comm.stencil_info_dt, comm.comm);
293
294 // set up comm buffers
295 size_t * num_results_per_rank =
296 xmalloc((size_t)comm.size * sizeof(*num_results_per_rank));
297 memset(comm.sendcounts, 0,
298 (size_t)comm.size * sizeof(*comm.sendcounts));
299 size_t j = 0;
300 for (int rank = 0; rank < comm.size; ++rank) {
301 size_t curr_num_results = 0;
302 size_t curr_sendcount = 0;
303 while ((j < send_count) && (ranks[j] == rank)) {
304 curr_sendcount += (size_t)(pack_sizes[j++]);
305 curr_num_results++;
306 }
307 num_results_per_rank[rank] = curr_num_results;
309 curr_sendcount <= INT_MAX,
310 "ERROR(exchange_interp_results): pack size to big")
311 comm.sendcounts[rank] = curr_sendcount;
312 }
313 free(pack_sizes);
315 1, comm.sendcounts, comm.recvcounts,
316 comm.sdispls, comm.rdispls, comm.comm);
318 MPI_Alltoall(MPI_IN_PLACE, 1, YAC_MPI_SIZE_T,
319 num_results_per_rank, 1, YAC_MPI_SIZE_T, comm.comm),
320 comm.comm);
321 size_t recv_count = 0;
322 for (int i = 0; i < comm.size; ++i)
323 recv_count += num_results_per_rank[i];
324 free(num_results_per_rank);
325
326 size_t recv_size = comm.recvcounts[comm.size - 1] +
327 comm.rdispls[comm.size - 1];
328 void * recv_buffer = xmalloc(recv_size);
329
330 // exchange result stencils
331 yac_alltoallv_packed_p2p(
332 send_buffer, comm.sendcounts, comm.sdispls+1,
333 recv_buffer, comm.recvcounts, comm.rdispls, comm.comm);
334 free(send_buffer);
335
336 // unpack stencils
339 recv_count, recv_buffer, recv_size, comm.stencil_info_dt, comm.comm);
340 free(recv_buffer);
341
342 result_stencils->count += local_count;
344 xrealloc(
346 (recv_count + local_count) * sizeof(*(result_stencils->stencils)));
347
348 // add the local result stenils
349 struct result_stencil * local_stencils =
350 result_stencils->stencils + recv_count;
351 pack_order += send_count;
352 for (size_t i = 0; i < local_count; ++i)
353 local_stencils[i] =
354 *(*result_get_stencil)(
355 (void*)((unsigned char*)results + pack_order[i] * result_size));
356
357 return result_stencils;
358}
359
361 void * interp_result) {
362
363 return &(((struct interp_result*)interp_result)->stencil);
364}
365
367 struct yac_interp_grid * interp_grid, struct comm_stuff comm,
368 struct interp_result * interp_results, size_t result_count) {
369
370 // get the the newly interpolated target points
371 size_t * tgt_points = xmalloc(result_count * sizeof(*tgt_points));
372 for (size_t i = 0; i < result_count; ++i)
373 tgt_points[i] = interp_results[i].local_id;
374
375 // get the distributed owners for all targets
376 int * tgt_points_dist_owner =
377 xmalloc(result_count * sizeof(*tgt_points_dist_owner));
379 interp_grid, tgt_points, result_count, tgt_points_dist_owner);
380 size_t * pack_order = tgt_points;
381 for (size_t i = 0; i < result_count; ++i) pack_order[i] = i;
382
383 struct result_stencils * relocated_results =
385 interp_results, result_count, sizeof(*interp_results),
386 interp_result_get_stencil, pack_order, tgt_points_dist_owner,
387 comm);
388 free(pack_order);
389 free(tgt_points_dist_owner);
390
391 return relocated_results;
392}
393
395 struct yac_interp_grid * interp_grid, struct comm_stuff comm,
396 struct yac_interp_weights * interp_weights,
397 struct interp_result ** interp_results, size_t * result_count) {
398
399 // get list of all already interpolated target points
400 size_t num_interpolated_tgt =
402 yac_int * interpolated_tgts_global_ids =
403 yac_interp_weights_get_interp_tgt(interp_weights);
404 size_t * interpolated_tgts_local_ids =
405 xmalloc(num_interpolated_tgt * sizeof(*interpolated_tgts_local_ids));
407 interp_grid, interpolated_tgts_global_ids, num_interpolated_tgt,
408 interpolated_tgts_local_ids);
409
410 // initialise initial interpolation results
411 struct interp_result * initial_interp_results =
412 (num_interpolated_tgt > 0)?
413 xmalloc(num_interpolated_tgt * sizeof(*initial_interp_results)):NULL;
414 for (size_t i = 0; i < num_interpolated_tgt; ++i) {
415 initial_interp_results[i].local_id = interpolated_tgts_local_ids[i];
416 initial_interp_results[i].global_id = interpolated_tgts_global_ids[i];
417 initial_interp_results[i].idx = SIZE_MAX;
418 initial_interp_results[i].stencil.global_id =
419 interpolated_tgts_global_ids[i];
420 initial_interp_results[i].stencil.count = 1;
421 initial_interp_results[i].stencil.data.single.rank = comm.rank;
422 initial_interp_results[i].stencil.data.single.idx = (uint64_t)i;
423 initial_interp_results[i].stencil.data.single.weight = 1.0;
424 }
425 free(interpolated_tgts_local_ids);
426 free(interpolated_tgts_global_ids);
427
428 *interp_results = initial_interp_results;
429 *result_count = num_interpolated_tgt;
430}
431
433 const void * a, const void * b) {
434
435 int ret = (((const struct interp_result *)a)->stencil.count >
436 ((const struct interp_result *)b)->stencil.count) -
437 (((const struct interp_result *)a)->stencil.count <
438 ((const struct interp_result *)b)->stencil.count);
439
440 if (ret) return ret;
441
442 return (((const struct interp_result *)a)->global_id >
443 ((const struct interp_result *)b)->global_id) -
444 (((const struct interp_result *)a)->global_id <
445 ((const struct interp_result *)b)->global_id);
446}
447
449 const void * a, const void * b) {
450
451 return (((const struct interp_result *)a)->local_id >
452 ((const struct interp_result *)b)->local_id) -
453 (((const struct interp_result *)a)->local_id <
454 ((const struct interp_result *)b)->local_id);
455}
456
457static int compare_interp_result_global_id(const void * a, const void * b) {
458
459 return (((const struct interp_result *)a)->global_id >
460 ((const struct interp_result *)b)->global_id) -
461 (((const struct interp_result *)a)->global_id <
462 ((const struct interp_result *)b)->global_id);
463}
464
466 const void * a, const void * b) {
467
468 return (((const struct tgt_request *)a)->stencil.count >
469 ((const struct tgt_request *)b)->stencil.count) -
470 (((const struct tgt_request *)a)->stencil.count <
471 ((const struct tgt_request *)b)->stencil.count);
472}
473
475 const void * a, const void * b) {
476
477 return (((const struct tgt_request *)a)->global_id >
478 ((const struct tgt_request *)b)->global_id) -
479 (((const struct tgt_request *)a)->global_id <
480 ((const struct tgt_request *)b)->global_id);
481}
482
484 const void * a, const void * b) {
485
486 return (((const struct result_stencil *)a)->global_id >
487 ((const struct result_stencil *)b)->global_id) -
488 (((const struct result_stencil *)a)->global_id <
489 ((const struct result_stencil *)b)->global_id);
490}
491
492static inline int compare_yac_int(const void * a, const void * b) {
493
494 return ((*(const yac_int *)a) >
495 (*(const yac_int *)b)) -
496 ((*(const yac_int *)a) <
497 (*(const yac_int *)b));
498}
499
501 struct yac_interp_grid * interp_grid, struct comm_stuff comm,
502 struct interp_result * interp_results, size_t result_count,
503 struct remote_points * interp_tgt_remote_points,
504 size_t ** num_stencils_per_tgt_, size_t ** stencil_indices_,
505 int ** stencil_ranks_, double ** w_) {
506
507 // sort interpolation results by target local id
508 qsort(interp_results, result_count, sizeof(*interp_results),
510
511 size_t total_num_stencils = 0;
512 size_t * interpolated_tgts_local_ids =
513 xmalloc(result_count * sizeof(*interpolated_tgts_local_ids));
514 for (size_t i = 0; i < result_count; ++i) {
515 interpolated_tgts_local_ids[i] = interp_results[i].local_id;
516 total_num_stencils += interp_results[i].stencil.count;
517 }
518 interp_tgt_remote_points->data =
520 interp_grid, interpolated_tgts_local_ids, result_count);
521 interp_tgt_remote_points->count = result_count;
522 free(interpolated_tgts_local_ids);
523
524 size_t * num_stencils_per_tgt =
525 xmalloc(result_count * sizeof(*num_stencils_per_tgt));
526 size_t * stencil_indices =
527 xmalloc(total_num_stencils * sizeof(*stencil_indices));
528 int * stencil_ranks =
529 xmalloc(total_num_stencils * sizeof(*stencil_ranks));
530 double * w = xmalloc(total_num_stencils * sizeof(*w));
531
532 for (size_t i = 0, j = 0; i < result_count; ++i) {
533
534 size_t curr_count = interp_results[i].stencil.count;
535
536 num_stencils_per_tgt[i] = curr_count;
537
538 if (curr_count == 1) {
539 stencil_indices[j] = interp_results[i].stencil.data.single.idx;
540 stencil_ranks[j] = interp_results[i].stencil.data.single.rank;
541 w[j] = interp_results[i].stencil.data.single.weight;
542 ++j;
543 } else {
544 for (size_t k = 0; k < curr_count; ++k, ++j) {
545 stencil_indices[j] = interp_results[i].stencil.data.multi[k].idx;
546 stencil_ranks[j] = interp_results[i].stencil.data.multi[k].rank;
547 w[j] = interp_results[i].stencil.data.multi[k].weight;
548 }
549 }
550 }
551
552 *num_stencils_per_tgt_ = num_stencils_per_tgt;
553 *stencil_indices_ = stencil_indices;
554 *stencil_ranks_ = stencil_ranks;
555 *w_ = w;
556}
557
559 void * tgt_request) {
560
561 return &(((struct tgt_request*)tgt_request)->stencil);
562}
563
565 struct yac_interp_grid * interp_grid, struct comm_stuff comm,
566 struct tgt_request * neigh_requests, size_t * request_count_,
567 struct result_stencils * interp_stencils) {
568
569 size_t request_count = *request_count_;
570 size_t match_count = 0;
571
572 struct result_stencil * stencils = interp_stencils->stencils;
573 size_t stencil_count = interp_stencils->count;
574
575 // match current result points with neighbour requests
576 qsort(
577 stencils, stencil_count, sizeof(*stencils),
579 for (size_t i = 0, j = 0; i < stencil_count; ++i) {
580 yac_int curr_global_id = stencils[i].global_id;
581
582 while ((j < request_count) &&
583 (neigh_requests[j].global_id < curr_global_id)) ++j;
584 while ((j < request_count) &&
585 (neigh_requests[j].global_id == curr_global_id)) {
586
587 ++match_count;
588 neigh_requests[j].stencil = stencils[i];
589 ++j;
590 }
591 }
592
593 // sort neighbour request by stencil count
594 // (open requests have a stencil count of 0)
595 qsort(neigh_requests, request_count, sizeof(*neigh_requests),
597
598 request_count -= match_count;
599 *request_count_ = request_count;
600 struct tgt_request * neigh_request_matches = neigh_requests + request_count;
601
602 // sort open neighbour requests by global id
603 qsort(neigh_requests, request_count, sizeof(*neigh_requests),
605
606 size_t * pack_order = xmalloc(match_count * sizeof(*pack_order));
607 int * origin_rank = xmalloc(match_count * sizeof(*origin_rank));
608 for (size_t i = 0; i < match_count; ++i) {
609 pack_order[i] = i;
610 origin_rank[i] = neigh_request_matches[i].rank;
611 }
612
613 struct result_stencils * relocated_results =
615 neigh_request_matches, match_count, sizeof(*neigh_request_matches),
616 tgt_request_get_stencil, pack_order, origin_rank, comm);
617 free(origin_rank);
618 free(pack_order);
619
620 return relocated_results;
621}
622
624 struct yac_interp_grid * interp_grid, size_t * tgt_local_ids,
625 yac_int * tgt_global_ids, size_t count,
626 size_t ** neigh_local_ids_, yac_int ** neigh_to_tgt_global_id_,
627 size_t * total_num_neighbours_) {
628
629 // get neighbour cells for all target cells
630 size_t total_num_neighbours = 0;
631 struct yac_const_basic_grid_data * tgt_basic_grid_data =
633 for (size_t i = 0; i < count; ++i)
634 total_num_neighbours +=
635 tgt_basic_grid_data->num_vertices_per_cell[tgt_local_ids[i]];
636 size_t * neigh_local_ids =
637 xmalloc(total_num_neighbours * sizeof(*neigh_local_ids));
639 interp_grid, tgt_local_ids, count, neigh_local_ids);
640
641 // generate mapping between neighbour global ids and target points
642 yac_int * neigh_to_tgt_global_id =
643 xmalloc(total_num_neighbours * sizeof(*neigh_to_tgt_global_id));
644 for (size_t i = 0, j = 0; i < count; ++i) {
645 int curr_num_neigh =
646 tgt_basic_grid_data->num_vertices_per_cell[tgt_local_ids[i]];
647 yac_int curr_tgt_global_id = tgt_global_ids[i];
648 for (int k = 0; k < curr_num_neigh; ++k, ++j)
649 neigh_to_tgt_global_id[j] = curr_tgt_global_id;
650 }
651
652 *neigh_local_ids_ = neigh_local_ids;
653 *neigh_to_tgt_global_id_ = neigh_to_tgt_global_id;
654 *total_num_neighbours_ = total_num_neighbours;
655}
656
658 struct yac_interp_grid * interp_grid, size_t * tgt_local_ids,
659 yac_int * tgt_global_ids, size_t count,
660 size_t ** neigh_local_ids_, yac_int ** neigh_to_tgt_global_id_,
661 size_t * total_num_neighbours_) {
662
663 int * num_neighs_per_vertex = xmalloc(count * sizeof(*num_neighs_per_vertex));
664 size_t * neigh_vertices;
665
667 interp_grid, tgt_local_ids, count,
668 &neigh_vertices, num_neighs_per_vertex);
669
670 size_t total_num_neighbours = 0;
671 for (size_t i = 0; i < count; ++i)
672 total_num_neighbours += (size_t)(num_neighs_per_vertex[i]);
673
674 // generate mapping between neighbour global ids and target points
675 yac_int * neigh_to_tgt_global_id =
676 xmalloc(total_num_neighbours * sizeof(*neigh_to_tgt_global_id));
677 for (size_t i = 0, j = 0; i < count; ++i) {
678 int curr_num_neigh = num_neighs_per_vertex[i];
679 yac_int curr_tgt_global_id = tgt_global_ids[i];
680 for (int k = 0; k < curr_num_neigh; ++k, ++j)
681 neigh_to_tgt_global_id[j] = curr_tgt_global_id;
682 }
683 free(num_neighs_per_vertex);
684
685 *neigh_local_ids_ = neigh_vertices;
686 *neigh_to_tgt_global_id_ = neigh_to_tgt_global_id;
687 *total_num_neighbours_ = total_num_neighbours;
688}
689
690// extracts basic information about the neighbours of the target points
692 struct yac_interp_grid * interp_grid,
693 size_t * tgt_local_ids, yac_int * tgt_global_ids, size_t count,
694 size_t ** neigh_local_ids_, yac_int ** neigh_global_ids_,
695 yac_int ** neigh_to_tgt_global_id_, size_t * total_num_neighbours_) {
696
697 // get basic neighbour information
698 size_t * neigh_local_ids;
699 yac_int * neigh_to_tgt_global_id;
700 size_t total_num_neighbours;
701 enum yac_location tgt_field_location =
704 (tgt_field_location == YAC_LOC_CELL) ||
705 (tgt_field_location == YAC_LOC_CORNER),
706 "ERROR(get_tgt_neigh_info): unsupported target field location")
707 if (tgt_field_location == YAC_LOC_CELL)
709 interp_grid, tgt_local_ids, tgt_global_ids, count,
710 &neigh_local_ids, &neigh_to_tgt_global_id, &total_num_neighbours);
711 else
713 interp_grid, tgt_local_ids, tgt_global_ids, count,
714 &neigh_local_ids, &neigh_to_tgt_global_id, &total_num_neighbours);
715
716 // remove invalid neighbour indices
718 neigh_local_ids, total_num_neighbours, neigh_to_tgt_global_id);
719 while((total_num_neighbours > 0) &&
720 (neigh_local_ids[total_num_neighbours-1] == SIZE_MAX))
721 --total_num_neighbours;
722
723 // get global ids for all neighbours
724 yac_int * neigh_global_ids =
725 xmalloc(total_num_neighbours * sizeof(*neigh_global_ids));
727 interp_grid, neigh_local_ids, total_num_neighbours, neigh_global_ids);
728
729 *neigh_local_ids_ =
730 xrealloc(neigh_local_ids, total_num_neighbours * sizeof(*neigh_local_ids));
731 *neigh_global_ids_ = neigh_global_ids;
732 *neigh_to_tgt_global_id_ =
733 xrealloc(neigh_to_tgt_global_id,
734 total_num_neighbours * sizeof(*neigh_to_tgt_global_id));
735 *total_num_neighbours_ = total_num_neighbours;
736}
737
738// sends request for target points neighbours to the respective
739// distributed owners
741 struct yac_interp_grid * interp_grid, struct comm_stuff comm,
742 size_t * neigh_local_ids, yac_int * neigh_global_ids,
743 size_t num_neighbours, struct tgt_request ** neigh_requests_,
744 size_t * request_count_) {
745
746 // get the distributed owner of the neighbour target points
747 int * neigh_dist_owner =
748 xmalloc(num_neighbours * sizeof(*neigh_dist_owner));
750 interp_grid, neigh_local_ids, num_neighbours,
751 neigh_dist_owner);
752
753 yac_int * send_neigh_global_ids =
754 xmalloc(num_neighbours * sizeof(*send_neigh_global_ids));
755 memcpy(send_neigh_global_ids, neigh_global_ids,
756 num_neighbours * sizeof(*send_neigh_global_ids));
757
758 // sort send buffer by rank
760 neigh_dist_owner, num_neighbours, send_neigh_global_ids);
761
762 // remove duplicated global ids
763 size_t to = 0, new_to = 0, from = 0, new_from = 0;
764 for (int rank = 0; rank < comm.size; ++rank) {
765 while ((new_from < num_neighbours) &&
766 (neigh_dist_owner[new_from] == rank)) new_from++;
767 size_t curr_count = new_from - from;
768 qsort(send_neigh_global_ids + from, curr_count,
769 sizeof(*send_neigh_global_ids), compare_yac_int);
770 yac_int prev_global_id =
771 (curr_count > 0)?(send_neigh_global_ids[from]-1):0;
772 for (; from < new_from; ++from) {
773 yac_int curr_global_id = send_neigh_global_ids[from];
774 if (prev_global_id != curr_global_id) {
775 send_neigh_global_ids[new_to++] = curr_global_id;
776 prev_global_id = curr_global_id;
777 }
778 }
779 curr_count = new_to - to;
780 to = new_to;
781 comm.sendcounts[rank] = curr_count;
782 }
783 free(neigh_dist_owner);
784
785 // send request for all neighbours
787 1, comm.sendcounts, comm.recvcounts, comm.sdispls, comm.rdispls, comm.comm);
788 size_t request_count = comm.recvcounts[comm.size-1] +
789 comm.rdispls[comm.size-1];
790 yac_int * recv_neigh_global_ids =
791 xmalloc(request_count * sizeof(*recv_neigh_global_ids));
792 yac_alltoallv_yac_int_p2p(
793 send_neigh_global_ids, comm.sendcounts, comm.sdispls+1,
794 recv_neigh_global_ids, comm.recvcounts, comm.rdispls, comm.comm);
795 struct tgt_request * neigh_requests =
796 xmalloc(request_count * sizeof(*neigh_requests));
797 for (int i = 0, k = 0; i < comm.size; ++i) {
798 for (size_t j = 0; j < comm.recvcounts[i]; ++j, ++k) {
799 neigh_requests[k].global_id = recv_neigh_global_ids[k];
800 neigh_requests[k].rank = i;
801 neigh_requests[k].stencil.count = 0;
802 }
803 }
804 qsort(neigh_requests, request_count, sizeof(*neigh_requests),
806 free(send_neigh_global_ids);
807 free(recv_neigh_global_ids);
808
809 *neigh_requests_ = neigh_requests;
810 *request_count_ = request_count;
811}
812
814 size_t * tgt_local_ids, yac_int * tgt_global_ids, size_t count) {
815
816 struct interp_result * interp_results =
817 xmalloc(count * sizeof(*interp_results));
818 for (size_t i = 0; i < count; ++i) {
819 interp_results[i].local_id = tgt_local_ids[i];
820 interp_results[i].global_id = tgt_global_ids[i];
821 interp_results[i].idx = i;
822 interp_results[i].stencil.count = 0;
823 }
824 qsort(interp_results, count, sizeof(*interp_results),
826 return interp_results;
827}
828
830 const void * a, const void * b) {
831
832 int ret = ((struct stencil_info *)a)->rank -
833 ((struct stencil_info *)b)->rank;
834 if (ret) return ret;
835
836 return (((struct stencil_info *)a)->idx >
837 ((struct stencil_info *)b)->idx) -
838 (((struct stencil_info *)a)->idx <
839 ((struct stencil_info *)b)->idx);
840}
841
843 struct result_stencil * neigh_stencils, size_t * stencil_indices,
844 size_t count, yac_int global_id, double weight) {
845
846 size_t stencil_info_count = 0;
847
848 for (size_t i = 0; i < count; ++i)
849 stencil_info_count += neigh_stencils[stencil_indices[i]].count;
850
851 struct result_stencil stencil;
852
853 struct stencil_info * stencil_infos;
854 if (stencil_info_count > 1) {
855 stencil_infos =
856 ((stencil.data.multi =
857 xmalloc(stencil_info_count * sizeof(*stencil_infos))));
858 } else {
859 stencil_infos = &(stencil.data.single);
860 }
861
862 stencil.global_id = global_id;
863 for (size_t i = 0, j = 0; i < count; ++i) {
864 struct result_stencil * curr_stencil =
865 neigh_stencils + stencil_indices[i];
866 size_t curr_stencil_info_count = curr_stencil->count;
867 struct stencil_info * curr_stencil_infos =
868 (curr_stencil_info_count == 1)?
869 (&(curr_stencil->data.single)):(curr_stencil->data.multi);
870 memcpy(stencil_infos + j, curr_stencil_infos,
871 curr_stencil_info_count * sizeof(*stencil_infos));
872 for (size_t k = 0; k < curr_stencil_info_count; ++k, ++j)
873 stencil_infos[j].weight *= weight;
874 }
875
876 // in case we have multiple stencil infos, there may be duplicated
877 // entries from different source
878 // here we remove these duplicated entries
879 if (stencil_info_count > 1) {
880
881 // sort the stencils
882 qsort(stencil_infos, stencil_info_count, sizeof(*stencil_infos),
884
885 // remote duplicated stencils
886 struct stencil_info * prev_stencil_info = stencil_infos,
887 * curr_stencil_info = stencil_infos + 1;
888 size_t new_stencil_info_count = 1;
889 for (size_t i = 1; i < stencil_info_count; ++i, ++curr_stencil_info) {
891 curr_stencil_info, prev_stencil_info)) {
892 if (new_stencil_info_count != i)
893 stencil_infos[new_stencil_info_count] =
894 *curr_stencil_info;
895 ++new_stencil_info_count;
896 prev_stencil_info = curr_stencil_info;
897 } else {
898 stencil_infos[new_stencil_info_count-1].weight +=
899 curr_stencil_info->weight;
900 }
901 }
902 if (new_stencil_info_count != stencil_info_count) {
903 stencil_info_count = new_stencil_info_count;
904 if (new_stencil_info_count == 1) {
905 stencil.data.single = *stencil_infos;
906 free(stencil_infos);
907 } else {
908 stencil.data.multi =
909 xrealloc(
910 stencil_infos, stencil_info_count * sizeof(*stencil_infos));
911 }
912 }
913 }
914 stencil.count = stencil_info_count;
915
916 return stencil;
917}
918
920 struct result_stencils * neigh_answer,
921 yac_int * neigh_global_ids, yac_int * neigh_to_tgt_global_id,
922 size_t * stencil_indices, size_t * num_neighbours_,
923 struct interp_result * interp_results, size_t * num_open_tgt_) {
924
925 size_t num_neighbours = *num_neighbours_;
926 size_t num_open_tgt = *num_open_tgt_;
927
928 struct result_stencil * neigh_stencils = neigh_answer->stencils;
929 size_t answer_count = neigh_answer->count;
930
931 // match received neigh request answers with tgt neighbours
932 // (remove duplicated results)
933 qsort(
934 neigh_stencils, answer_count, sizeof(*neigh_stencils),
936 size_t match_count = 0;
937 for (size_t i = 0, j = 0; i < answer_count; ++i) {
938 yac_int curr_global_id = neigh_stencils[i].global_id;
939
940 while ((j < num_neighbours) &&
941 (neigh_global_ids[j] < curr_global_id)) ++j;
942
943 while ((j < num_neighbours) &&
944 (neigh_global_ids[j] == curr_global_id)) {
945 neigh_global_ids[j] = XT_INT_MAX;
946 stencil_indices[j] = i;
947 ++match_count;
948 ++j;
949 }
950 }
951
952 // move fullfilled neighbour requests to the end of the array
954 neigh_global_ids, num_neighbours, neigh_to_tgt_global_id, stencil_indices);
955 num_neighbours -= match_count;
956 *num_neighbours_ = num_neighbours;
957
958 neigh_to_tgt_global_id += num_neighbours;
959 stencil_indices += num_neighbours;
960
961 // sort matches by target global ids
963 neigh_to_tgt_global_id, match_count, stencil_indices);
964
965 // set stencils for target matches
966 for (size_t i = 0, k = 0; i < match_count;) {
967
968 size_t prev_i = i;
969
970 // count the number of stencils for the current target
971 yac_int curr_tgt_global_id = neigh_to_tgt_global_id[i++];
972 while ((i < match_count) &&
973 (neigh_to_tgt_global_id[i] == curr_tgt_global_id)) ++i;
974 size_t curr_stencil_count = i - prev_i;
975
976 while ((k < num_open_tgt) &&
977 (interp_results[k].global_id < curr_tgt_global_id)) ++k;
978
979 while ((k < num_open_tgt) &&
980 (interp_results[k].global_id == curr_tgt_global_id)) {
981
982 interp_results[k].stencil =
984 neigh_stencils, stencil_indices + prev_i, curr_stencil_count,
985 curr_tgt_global_id, 1.0 / (double)curr_stencil_count);
986 ++k;
987 }
988 }
989
990 // move successfully interpolated target points to the end of
991 // the array
992 qsort(interp_results, num_open_tgt, sizeof(*interp_results),
994 size_t new_num_open_tgt = 0;
995 while ((new_num_open_tgt < num_open_tgt) &&
996 (interp_results[new_num_open_tgt].stencil.count == 0))
997 new_num_open_tgt++;
998 *num_open_tgt_ = new_num_open_tgt;
999 return num_open_tgt - new_num_open_tgt;
1000}
1001
1003 struct yac_interp_grid * interp_grid) {
1004
1005 struct comm_stuff comm;
1006
1007 comm.comm = yac_interp_grid_get_MPI_Comm(interp_grid);
1009 1, &comm.sendcounts, &comm.recvcounts, &comm.sdispls, &comm.rdispls,
1010 comm.comm);
1011 yac_mpi_call(MPI_Comm_rank(comm.comm, &comm.rank), comm.comm);
1012 yac_mpi_call(MPI_Comm_size(comm.comm, &comm.size), comm.comm);
1013 comm.stencil_info_dt = yac_get_stencil_info_mpi_datatype(comm.comm);
1014
1015 return comm;
1016}
1017
1018static void free_comm_stuff(struct comm_stuff comm) {
1019
1021 comm.sendcounts, comm.recvcounts, comm.sdispls, comm.rdispls);
1022 yac_mpi_call(MPI_Type_free(&comm.stencil_info_dt), comm.comm);
1023}
1024
1025// the local process is the distributed owner for all target points
1026// passed to this routine
1028 struct yac_interp_grid * interp_grid, int const max_creep_distance,
1029 size_t * tgt_points, yac_int * tgt_global_ids, size_t count,
1030 int * interp_flag, struct yac_interp_weights * weights) {
1031
1032 struct comm_stuff comm = init_comm_stuff(interp_grid);
1033
1034 // get information about the neighbours of the target points
1035 size_t * neigh_local_ids;
1036 yac_int * neigh_global_ids;
1037 yac_int * neigh_to_tgt_global_id;
1038 size_t total_num_neighbours;
1040 interp_grid, tgt_points, tgt_global_ids, count,
1041 &neigh_local_ids, &neigh_global_ids, &neigh_to_tgt_global_id,
1042 &total_num_neighbours);
1043 size_t * stencil_indices =
1044 xmalloc(total_num_neighbours * sizeof(*stencil_indices));
1045
1046 // send request for target points neighbours to the respective
1047 // distributed owners
1048 struct tgt_request * neigh_requests;
1049 size_t request_count;
1051 interp_grid, comm, neigh_local_ids, neigh_global_ids, total_num_neighbours,
1052 &neigh_requests, &request_count);
1053 free(neigh_local_ids);
1054
1055 // sort global ids of target point neighbours
1057 neigh_global_ids, total_num_neighbours, neigh_to_tgt_global_id);
1058
1059 // initialise interpolation results
1060 size_t num_open_tgt = count;
1061 struct interp_result * interp_results =
1062 init_interp_results(tgt_points, tgt_global_ids, count);
1063
1064 // get already existing results and relocate them to their respective
1065 // distributed owners
1066 struct interp_result * initial_interp_results;
1067 size_t result_count;
1069 interp_grid, comm, weights, &initial_interp_results, &result_count);
1070
1071 for (int creep_distance = 0;
1072 creep_distance < max_creep_distance; ++creep_distance) {
1073
1074 // check whether there are initial results on any process
1075 int result_flag = result_count > 0;
1077 MPI_Allreduce(
1078 MPI_IN_PLACE, &result_flag, 1, MPI_INT, MPI_MAX, comm.comm), comm.comm);
1079 if (result_flag == 0) break;
1080
1081 // relocate interpolation results to distributed owners of associated
1082 // target points
1083 struct result_stencils * interp_stencils =
1085 interp_grid, comm,
1086 (creep_distance == 0)?
1087 initial_interp_results:(interp_results + num_open_tgt),
1088 result_count);
1089 if (creep_distance == 0) free(initial_interp_results);
1090
1091 // match interpolation results with distributed neighbour requests and
1092 // inform origins of requests about matches
1093 struct result_stencils * neigh_matches =
1095 interp_grid, comm, neigh_requests, &request_count,
1096 interp_stencils);
1097
1098 // match received neigh request answers with tgts
1099 result_count =
1101 neigh_matches, neigh_global_ids, neigh_to_tgt_global_id,
1102 stencil_indices, &total_num_neighbours, interp_results,
1103 &num_open_tgt);
1104 free(interp_stencils->stencils);
1105 free(interp_stencils);
1106 free(neigh_matches->stencils);
1107 free(neigh_matches);
1108 } // creep_distance < max_creep_distance
1109
1110 free(neigh_requests);
1111 free(stencil_indices);
1112 free(neigh_global_ids);
1113 free(neigh_to_tgt_global_id);
1114
1115 for (size_t i = 0; i < count; ++i)
1116 interp_flag[interp_results[i].idx] =
1117 interp_results[i].stencil.count > 0;
1118
1119 // copy stencils
1120 struct remote_points interp_tgt_remote_points;
1121 size_t * num_stencils_per_tgt;
1122 int * stencil_ranks;
1123 double * w;
1125 interp_grid, comm, interp_results + num_open_tgt, count - num_open_tgt,
1126 &interp_tgt_remote_points, &num_stencils_per_tgt, &stencil_indices,
1127 &stencil_ranks, &w);
1129 weights, &interp_tgt_remote_points, num_stencils_per_tgt,
1130 stencil_indices, stencil_ranks, w);
1131 free(interp_tgt_remote_points.data);
1132 free(w);
1133 free(num_stencils_per_tgt);
1134 free(stencil_ranks);
1135 free(stencil_indices);
1136 for (size_t i = num_open_tgt; i < count; ++i)
1137 if (interp_results[i].stencil.count > 1)
1138 free(interp_results[i].stencil.data.multi);
1139 free(interp_results);
1140
1141 free_comm_stuff(comm);
1142}
1143
1144static size_t do_search_creep (struct interp_method * method,
1145 struct yac_interp_grid * interp_grid,
1146 size_t * tgt_points, size_t count,
1147 struct yac_interp_weights * weights) {
1148
1149 struct interp_method_creep * creep_method =
1150 (struct interp_method_creep *)method;
1151
1152 int const max_creep_distance = creep_method->max_creep_distance;
1153
1154 if (max_creep_distance == 0) return 0;
1155
1156 YAC_ASSERT(
1159 "ERROR(do_search_creep): unsupported target field location "
1160 "(has to be YAC_LOC_CELL or YAC_LOC_CORNER)")
1161
1162 MPI_Comm comm = yac_interp_grid_get_MPI_Comm(interp_grid);
1163 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1165 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1166 int comm_rank, comm_size;
1167 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
1168 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1169
1170 // get the distributed owners for all tgts
1171 int * tgt_points_dist_owner =
1172 xmalloc(count * sizeof(*tgt_points_dist_owner));
1174 interp_grid, tgt_points, count, tgt_points_dist_owner);
1175 yac_int * tgt_points_global_ids =
1176 xmalloc(count * sizeof(*tgt_points_global_ids));
1178 interp_grid, tgt_points, count, tgt_points_global_ids);
1179
1180 // determine which of these points are local
1181 size_t local_count = 0;
1182 for (size_t i = 0; i < count; ++i) {
1183 if (tgt_points_dist_owner[i] == comm_rank) {
1184 local_count++;
1185 tgt_points_dist_owner[i] = INT_MAX;
1186 }
1187 }
1188 size_t send_count = count - local_count;
1189
1190 // sort target points (remote points to the start of the array)
1192 tgt_points_dist_owner, count, tgt_points, tgt_points_global_ids);
1193
1194 // relocate tgt_points to distributed owners
1195 for (size_t i = 0; i < send_count; ++i)
1196 sendcounts[tgt_points_dist_owner[i]]++;
1198 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1199 size_t recv_count = recvcounts[comm_size-1] + rdispls[comm_size-1];
1200 yac_int * global_ids_buffer =
1201 xrealloc(
1202 tgt_points_global_ids,
1203 (send_count + local_count + recv_count) * sizeof(*global_ids_buffer));
1204 yac_int * send_global_ids = global_ids_buffer;
1205 yac_int * recv_global_ids = global_ids_buffer + send_count;
1206 yac_alltoallv_yac_int_p2p(
1207 send_global_ids, sendcounts, sdispls+1,
1208 recv_global_ids + local_count, recvcounts, rdispls, comm);
1209
1210 // get local ids for received global ids
1211 size_t * temp_tgt_points =
1212 xmalloc((local_count + recv_count) * sizeof(*temp_tgt_points));
1213 memcpy(temp_tgt_points, tgt_points + send_count,
1214 local_count * sizeof(*tgt_points));
1216 interp_grid, recv_global_ids + local_count, recv_count,
1217 temp_tgt_points + local_count);
1218
1219 // do the interpolation for the redistributed target points
1220 int * interp_flag_buffer =
1221 xmalloc((send_count + local_count + recv_count) *
1222 sizeof(*interp_flag_buffer));
1223 int * temp_interp_flag = interp_flag_buffer + send_count;
1224 int * interp_flag = interp_flag_buffer;
1225 memset(temp_interp_flag, 0, (local_count + recv_count) * sizeof(*temp_interp_flag));
1227 interp_grid, max_creep_distance, temp_tgt_points, global_ids_buffer + send_count,
1228 local_count + recv_count, temp_interp_flag, weights);
1229 free(global_ids_buffer);
1230 free(temp_tgt_points);
1231
1232 // relocate interp_flag
1233 yac_alltoallv_int_p2p(
1234 temp_interp_flag + local_count, recvcounts, rdispls,
1235 interp_flag, sendcounts, sdispls+1, comm);
1236
1237 // count number of points that can be interpolated and reorder
1238 // tgt_points accordingly (interpolated first)
1239 size_t num_interpolated_tgt = 0;
1240 for (size_t i = 0; i < count; ++i) {
1241 if (interp_flag[i]) {
1242 interp_flag[i] = 0;
1243 ++num_interpolated_tgt;
1244 } else {
1245 interp_flag[i] = 1;
1246 }
1247 }
1248 yac_quicksort_index_int_size_t(interp_flag, count, tgt_points);
1249 free(interp_flag_buffer);
1250 free(tgt_points_dist_owner);
1251
1252 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1253
1254 return num_interpolated_tgt;
1255}
1256
1257struct interp_method * yac_interp_method_creep_new(int creep_distance) {
1258
1259 struct interp_method_creep * method_creep =
1260 xmalloc(1 * sizeof(*method_creep));
1261
1262 method_creep->vtable = &interp_method_creep_vtable;
1263 method_creep->max_creep_distance =
1264 (creep_distance >= 0)?creep_distance:INT_MAX;
1265
1266 return (struct interp_method*)method_creep;
1267}
1268
1269static void delete_creep(struct interp_method * method) {
1270 free(method);
1271}
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
void yac_interp_grid_get_tgt_cell_neighbours(struct yac_interp_grid *interp_grid, size_t *tgt_cells, size_t count, size_t *neighbours)
enum yac_location yac_interp_grid_get_tgt_field_location(struct yac_interp_grid *interp_grid)
void yac_interp_grid_get_tgt_global_ids(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count, yac_int *tgt_global_ids)
MPI_Comm yac_interp_grid_get_MPI_Comm(struct yac_interp_grid *interp_grid)
struct remote_point * yac_interp_grid_get_tgt_remote_points(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count)
void yac_interp_grid_determine_dist_tgt_owners(struct yac_interp_grid *interp_grid, size_t *tgt_indices, size_t count, int *owners)
void yac_interp_grid_get_tgt_vertex_neighbours(struct yac_interp_grid *interp_grid, size_t *vertices, size_t count, size_t **neigh_vertices, int *num_neighs_per_vertex)
struct yac_const_basic_grid_data * yac_interp_grid_get_basic_grid_data_tgt(struct yac_interp_grid *interp_grid)
void yac_interp_grid_tgt_global_to_local(struct yac_interp_grid *interp_grid, yac_int *tgt_global_ids, size_t count, size_t *tgt_local_ids)
static void do_search_creep_2(struct yac_interp_grid *interp_grid, int const max_creep_distance, size_t *tgt_points, yac_int *tgt_global_ids, size_t count, int *interp_flag, struct yac_interp_weights *weights)
static int compare_tgt_request_global_id(const void *a, const void *b)
static void pack_result_stencil(struct result_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static struct result_stencils * unpack_result_stencils(size_t count, void *packed_data, size_t packed_data_size, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static int compare_interp_result_stencil_local_id(const void *a, const void *b)
static int compare_stencil_info(const void *a, const void *b)
static MPI_Datatype yac_get_stencil_info_mpi_datatype(MPI_Comm comm)
static struct result_stencil * tgt_request_get_stencil(void *tgt_request)
static struct result_stencil * interp_result_get_stencil(void *interp_result)
static void get_tgt_neigh_info_vertex(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static struct result_stencils * exchange_interp_results(void *results, size_t result_count, size_t result_size, struct result_stencil *(*result_get_stencil)(void *), size_t *pack_order, int *ranks, struct comm_stuff comm)
static void free_comm_stuff(struct comm_stuff comm)
static struct comm_stuff init_comm_stuff(struct yac_interp_grid *interp_grid)
static int compare_result_stencil_global_id(const void *a, const void *b)
static int compare_interp_result_stencil(const void *a, const void *b)
static void extract_interp_info(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct interp_result *interp_results, size_t result_count, struct remote_points *interp_tgt_remote_points, size_t **num_stencils_per_tgt_, size_t **stencil_indices_, int **stencil_ranks_, double **w_)
static void get_tgt_neigh_info(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_global_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static void get_initial_results(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct yac_interp_weights *interp_weights, struct interp_result **interp_results, size_t *result_count)
static size_t do_search_creep(struct interp_method *method, struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count, struct yac_interp_weights *weights)
struct interp_method * yac_interp_method_creep_new(int creep_distance)
static struct result_stencil copy_result_stencil_multi(struct result_stencil *neigh_stencils, size_t *stencil_indices, size_t count, yac_int global_id, double weight)
static void pack_result_stencils(void *results, size_t result_count, size_t result_size, struct result_stencil *(*get_stencil)(void *), size_t *pack_order, void **pack_data, int *pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static void get_tgt_neigh_info_cell(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static struct interp_result * init_interp_results(size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count)
static int compare_yac_int(const void *a, const void *b)
static size_t match_neigh_answers_with_tgts(struct result_stencils *neigh_answer, yac_int *neigh_global_ids, yac_int *neigh_to_tgt_global_id, size_t *stencil_indices, size_t *num_neighbours_, struct interp_result *interp_results, size_t *num_open_tgt_)
static void unpack_result_stencil(struct result_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype stencil_info_dt, MPI_Comm comm, struct stencil_info **stencil_info_buffer, size_t *stencil_info_buffer_array_size, size_t *stencil_info_buffer_size)
static struct result_stencils * update_neigh_requests(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct tgt_request *neigh_requests, size_t *request_count_, struct result_stencils *interp_stencils)
static void get_result_stencil_pack_sizes(void *results, size_t result_count, size_t result_size, struct result_stencil *(*get_stencil)(void *), size_t *pack_order, int *pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static struct interp_method_vtable interp_method_creep_vtable
static void send_neigh_request(struct yac_interp_grid *interp_grid, struct comm_stuff comm, size_t *neigh_local_ids, yac_int *neigh_global_ids, size_t num_neighbours, struct tgt_request **neigh_requests_, size_t *request_count_)
static void delete_creep(struct interp_method *method)
static struct result_stencils * relocate_interp_results(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct interp_result *interp_results, size_t result_count)
static int compare_tgt_request_stencil_count(const void *a, const void *b)
static int compare_interp_result_global_id(const void *a, const void *b)
void yac_interp_weights_wcopy_weights(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_stencils_per_tgt, size_t *stencil_indices, int *stencil_ranks, double *w)
yac_int * yac_interp_weights_get_interp_tgt(struct yac_interp_weights *weights)
size_t yac_interp_weights_get_interp_count(struct yac_interp_weights *weights)
yac_location
Definition location.h:12
@ YAC_LOC_CORNER
Definition location.h:15
@ YAC_LOC_CELL
Definition location.h:14
#define xrealloc(ptr, size)
Definition ppm_xfuncs.h:67
#define xmalloc(size)
Definition ppm_xfuncs.h:66
MPI_Datatype stencil_info_dt
struct interp_method_vtable * vtable
size_t(* do_search)(struct interp_method *method, struct yac_interp_grid *grid, size_t *tgt_points, size_t count, struct yac_interp_weights *weights)
struct result_stencil stencil
struct remote_point * data
union result_stencil::@11 data
struct stencil_info single * multi
struct stencil_info stencil_info_buffer[]
struct result_stencil * stencils
struct result_stencil stencil
const_int_pointer num_vertices_per_cell
void yac_quicksort_index_yac_int_size_t(yac_int *a, size_t n, size_t *idx)
void yac_quicksort_index_int_yac_int(int *a, size_t n, yac_int *idx)
void yac_quicksort_index_yac_int_yac_int_size_t(yac_int *a, size_t n, yac_int *b, size_t *c)
void yac_quicksort_index_size_t_yac_int(size_t *a, size_t n, yac_int *idx)
void yac_quicksort_index_int_size_t(int *a, size_t n, size_t *idx)
void yac_quicksort_index_yac_int_yac_int(yac_int *a, size_t n, yac_int *idx)
void yac_quicksort_index_int_size_t_yac_int(int *a, size_t n, size_t *b, yac_int *c)
#define YAC_ASSERT(exp, msg)
Definition yac_assert.h:15
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
Definition yac_mpi.c:569
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
Definition yac_mpi.c:624
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
Definition yac_mpi.c:593
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
Definition yac_mpi.c:548
#define yac_mpi_call(call, comm)
#define YAC_MPI_SIZE_T
Xt_int yac_int
Definition yac_types.h:15
#define yac_int_dt
Definition yac_types.h:16