YetAnotherCoupler 3.5.2
Loading...
Searching...
No Matches
dist_grid.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifdef HAVE_CONFIG_H
6// Get the definition of the 'restrict' keyword.
7#include "config.h"
8#endif
9
10#include <stdlib.h>
11#include <string.h>
12#include <assert.h>
13#include <float.h>
14#include <yaxt.h>
15
16#include "basic_grid.h"
17#include "dist_grid_internal.h"
18#include "geometry.h"
19#include "yac_mpi_internal.h"
20#include "utils_core.h"
21#include "sphere_part.h"
22#include "proc_sphere_part.h"
23#include "ensure_array_size.h"
24#include "interp_grid.h"
25#include "field_data_set.h"
26
27#define CHECK_LOCATION(caller) \
28 YAC_ASSERT_F( \
29 (location == YAC_LOC_CELL) || \
30 (location == YAC_LOC_CORNER) || \
31 (location == YAC_LOC_EDGE), \
32 "ERROR(%s): \"%d\" is not a invalid location", \
33 caller, (int)location)
34
39
44
45// reorder_idx has to be first
52
53// reorder_idx has to be first
61
62struct id_pos {
64 uint64_t orig_pos;
65};
66
74
80
89
90// warning: when changing this, ensure that struct yac_const_basic_grid_data is
91// changed accordingly
93 yac_coordinate_pointer vertex_coordinates; // Cartesian coordinates of all edges
94 yac_int * ids[3]; // global cell/vertex/edge ids
96 size_t * cell_to_vertex; // vertices of cell i:
97 // cell_to_vertex[cell_to_vertex_offsets[i]]@num_vertices_per_cell[i]
99 size_t * cell_to_edge; // edges of cell i:
100 // cell_to_edge[cell_to_edge_offsets[i]]@num_vertices_per_cell[i]
103 struct bounding_circle * cell_bnd_circles; // bounding circle for all cells
105 struct remote_point_infos * owners[3]; // information for cells/vertices/edges about
106 // their position in the user decomposition
107 size_t total_count[3]; // current number of cells/vertices/edges in the
108 // distributed grid (can increase over time if
109 // for example a NNN search results contains
110 // data from other processes)
111 size_t count[3]; // number of cells/vertices/edges after the initial
112 // generation of the distributed grid
113 int * owner_mask[3]; // each cell/vertex/edge is owned by exactly one
114 // process in the distributed grid
115 yac_int * sorted_ids[3]; // sorted copy of "ids" arrays
116 size_t * sorted_reorder_idx[3]; // sorted_ids[i][j] = ids[i][sorted_reorder_idx[j]]
117 // with:
118 // i in [0;2]
119 // j in [0;total_count(i)[
121 MPI_Comm comm;
122};
123
132
138
140 struct {
141 size_t local_id;
144 size_t neigh_idx;
145};
146
147// looks up positions of ids in an array of sorted ids
148static void id2idx(
149 char const * caller, yac_int * ids, size_t * idx, size_t num_ids,
150 yac_int * ref_sorted_ids, size_t * ref_sorted_reorder_idx,
151 size_t num_sorted_ids) {
152
153 size_t * reorder = xmalloc(num_ids * sizeof(*reorder));
154 for (size_t i = 0; i < num_ids; ++i) reorder[i] = i;
155
156 yac_quicksort_index_yac_int_size_t(ids, num_ids, reorder);
157
158 for (size_t i = 0, j = 0; i < num_ids; ++i) {
159
160 yac_int curr_id = ids[i];
161 while ((j < num_sorted_ids) && (ref_sorted_ids[j] < curr_id)) ++j;
163 (j < num_sorted_ids) && (ref_sorted_ids[j] == curr_id),
164 "ERROR(%s): id %" XT_INT_FMT " not found", caller, curr_id)
165 idx[reorder[i]] = ref_sorted_reorder_idx[j];
166 }
167
168 free(reorder);
169}
170
171// returns the index of the cell vertex with the lowest global id
173 struct yac_dist_grid * dist_grid, size_t cell_idx) {
174
175 int num_vertices = dist_grid->num_vertices_per_cell[cell_idx];
176 if (num_vertices == 0) return SIZE_MAX;
177 yac_int * grid_vertex_ids = dist_grid->ids[YAC_LOC_CORNER];
178 size_t * vertices =
179 dist_grid->cell_to_vertex + dist_grid->cell_to_vertex_offsets[cell_idx];
180 // get the cell corner with the smallest global id
181 size_t min_idx = vertices[0];
182 yac_int min_global_id = grid_vertex_ids[min_idx];
183 for (int j = 1; j < num_vertices; ++j) {
184 size_t curr_vertex = vertices[j];
185 yac_int curr_global_id = grid_vertex_ids[curr_vertex];
186 if (min_global_id > curr_global_id) {
187 min_global_id = curr_global_id;
188 min_idx = curr_vertex;
189 }
190 }
191 return min_idx;
192}
193
194// generate cell owner mask (true for all cells belonging to the local part of
195// the distributed directory)
197 struct yac_dist_grid * dist_grid, int is_root, int * vertex_owner_mask) {
198
199 size_t num_cells = dist_grid->count[YAC_LOC_CELL];
200 int * cell_owner_mask = xmalloc(num_cells * sizeof(*cell_owner_mask));
201
202 //--------------------------
203 // determine cell owner mask
204 //--------------------------
205 for (size_t i = 0; i < num_cells; ++i) {
206 size_t ref_vertex = get_cell_reference_vertex(dist_grid, i);
207 cell_owner_mask[i] =
208 (ref_vertex != SIZE_MAX)?(vertex_owner_mask[ref_vertex]):is_root;
209 }
210
211 return cell_owner_mask;
212}
213
214// returns the index of the edge vertex with the lowest global id
215static inline size_t get_edge_reference_vertex(
216 struct yac_dist_grid * dist_grid, size_t edge_idx) {
217
218 yac_int * vertex_ids = dist_grid->ids[YAC_LOC_CORNER];
219 size_t * edge_vertices = &(dist_grid->edge_to_vertex[edge_idx][0]);
220 // get the edge corner with the smallest global id
221 return edge_vertices[
222 (vertex_ids[edge_vertices[0]] > vertex_ids[edge_vertices[1]])?1:0];
223}
224
225// generate edge owner mask
226// (each edge is owned by exactly one process in the distributed
227// directory, but may be located on more than one)
229 struct yac_dist_grid * dist_grid, int * vertex_owner_mask) {
230
231 size_t num_edges = dist_grid->count[YAC_LOC_EDGE];
232 int * edge_owner_mask = xmalloc(num_edges * sizeof(*edge_owner_mask));
233
234 //-------------------------
235 // determine edge owner mask
236 //-------------------------
237 for (size_t i = 0; i < num_edges; ++i)
238 edge_owner_mask[i] =
239 vertex_owner_mask[get_edge_reference_vertex(dist_grid, i)];
240
241 return edge_owner_mask;
242}
243
244// mask sure that each cell/vertex/edge is owned by only one process of
245// the distributed directory
247 struct yac_dist_grid * dist_grid, int comm_rank, int * vertex_owner) {
248
249 // determine distributed owner for vertices in the local part of the
250 // distributed grid
251 int * vertex_owner_mask =
252 (dist_grid->owner_mask[YAC_LOC_CORNER] = vertex_owner);
253 for (size_t i = 0; i < dist_grid->count[YAC_LOC_CORNER]; ++i)
254 vertex_owner_mask[i] = vertex_owner_mask[i] == comm_rank;
255
256 // generate owner mask for cells based on the vertex owner mask
257 dist_grid->owner_mask[YAC_LOC_CELL] =
258 determine_cell_owner_mask(dist_grid, comm_rank == 0, vertex_owner_mask);
259
260 // generate owner mask for edges based on the vertex owner mask
261 dist_grid->owner_mask[YAC_LOC_EDGE] =
262 determine_edge_owner_mask(dist_grid, vertex_owner_mask);
263}
264
265static MPI_Datatype yac_get_id_pos_mpi_datatype(MPI_Comm comm) {
266
267 struct id_pos dummy;
268 MPI_Datatype id_pos_dt;
269 int array_of_blocklengths[] = {1,1};
270 const MPI_Aint array_of_displacements[] =
271 {(MPI_Aint)(intptr_t)(const void *)&(dummy.global_id) -
272 (MPI_Aint)(intptr_t)(const void *)&dummy,
273 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
274 (MPI_Aint)(intptr_t)(const void *)&dummy};
275 const MPI_Datatype array_of_types[] = {yac_int_dt, MPI_UINT64_T};
277 MPI_Type_create_struct(
278 2, array_of_blocklengths, array_of_displacements,
279 array_of_types, &id_pos_dt), comm);
280 return yac_create_resized(id_pos_dt, sizeof(dummy), comm);
281}
282
283// inserts an element into an array and increases the corresponding size
284static void insert_global_id(yac_int * ids, size_t n, yac_int id) {
285
286 size_t i;
287 for (i = 0; i < n; ++i) if (ids[i] >= id) break;
288 // copy new id into array and move bigger elements one position up
289 if (n != i) memmove(ids + i + 1, ids + i, (n - i) * sizeof(*ids));
290 ids[i] = id;
291}
292
293// inserts an element into an array and increases the corresponding size
294// if the element already exists in the array nothing is done
295static void insert_rank(int * ranks, int * count, int rank) {
296
297 int i;
298 int n = *count;
299
300 for (i = 0; i < n; ++i) if (ranks[i] >= rank) break;
301
302 // if the rank is already in the array
303 if (i != n) {
304 if (ranks[i] == rank) return;
305 else memmove(ranks + i + 1, ranks + i, ((size_t)(n - i)) * sizeof(*ranks));
306 }
307 ranks[i] = rank;
308 *count = n + 1;
309}
310
312 const void * a, const void * b) {
313
314 int count_a = ((struct n_ids_reorder const *)a)->count;
315 int count_b = ((struct n_ids_reorder const *)b)->count;
316 yac_int * a_ids = ((struct n_ids_reorder const *)a)->ids;
317 yac_int * b_ids = ((struct n_ids_reorder const *)b)->ids;
318 int ret = count_a - count_b;
319 for (int i = 0; !ret && (i < count_a); ++i)
320 ret = (a_ids[i] > b_ids[i]) - (a_ids[i] < b_ids[i]);
321 return ret;
322}
323
325 const void * a, const void * b) {
326
327 return (((struct n_ids_reorder const *)a)->reorder_idx >
328 ((struct n_ids_reorder const *)b)->reorder_idx) -
329 (((struct n_ids_reorder const *)a)->reorder_idx <
330 ((struct n_ids_reorder const *)b)->reorder_idx);
331}
332
333// determines for all cells, in the grid data provided by the user, to
334// which processes they belong according to the decomposition of the
335// distributed grid
337 struct proc_sphere_part_node * proc_sphere_part,
338 struct yac_basic_grid_data * grid_data, MPI_Comm comm, int ** dist_cell_ranks,
339 int * dist_cell_rank_counts, size_t * dist_cell_rank_offsets,
340 int max_num_vertices_per_cell) {
341
342 int comm_size;
343 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
344
345 size_t num_cells = grid_data->num_cells;
346 int * ranks_buffer = xmalloc((size_t)comm_size * sizeof(*ranks_buffer));
347 size_t dist_cell_ranks_array_size = num_cells;
348 int * dist_cell_ranks_ = xmalloc(num_cells * sizeof(*dist_cell_ranks_));
349 size_t offset = 0;
350
351 int * core_cell_mask = grid_data->core_cell_mask;
352
353 // set up a cell buffer required to compute the bounding circle of a cell
354 struct yac_grid_cell cell;
355 cell.coordinates_xyz =
356 xmalloc(
357 (size_t)max_num_vertices_per_cell * sizeof(*(cell.coordinates_xyz)));
358 cell.edge_type =
359 xmalloc((size_t)max_num_vertices_per_cell * sizeof(*(cell.edge_type)));
360
361 size_t * cell_to_vertex = grid_data->cell_to_vertex;
362 size_t * cell_to_vertex_offsets = grid_data->cell_to_vertex_offsets;
363 size_t * cell_to_edge = grid_data->cell_to_edge;
364 size_t * cell_to_edge_offsets = grid_data->cell_to_edge_offsets;
365 yac_coordinate_pointer vertex_coordinates = grid_data->vertex_coordinates;
366 enum yac_edge_type * edge_type = grid_data->edge_type;
367 int * num_vertices_per_cell = grid_data->num_vertices_per_cell;
368
369 // generate a bounding circle for each cell and use it to determine
370 // ranks of the processes that require this cell
371 for (size_t i = 0; i < num_cells; ++i) {
372
373 int rank_count;
374
375 // we only have to consider valid cells
376 if ((core_cell_mask == NULL) || core_cell_mask[i]) {
377
378 cell.num_corners = num_vertices_per_cell[i];
379
380 // if the cell actually has corners
381 if (cell.num_corners > 0) {
382
383 // extract single cell from the grid_data data
384 size_t * curr_cell_to_vertex =
385 cell_to_vertex + cell_to_vertex_offsets[i];
386 size_t * curr_cell_to_edge =
387 cell_to_edge + cell_to_edge_offsets[i];
388 for (int j = 0; j < num_vertices_per_cell[i]; ++j) {
389 yac_coordinate_pointer curr_vertex_coords =
390 vertex_coordinates + curr_cell_to_vertex[j];
391 for (int k = 0; k < 3; ++k)
392 cell.coordinates_xyz[j][k] = (*curr_vertex_coords)[k];
393 cell.edge_type[j] = edge_type[curr_cell_to_edge[j]];
394 }
395
396 // generate bounding circle for the current cell
397 struct bounding_circle bnd_circle;
398 yac_get_cell_bounding_circle(cell, &bnd_circle);
399
400 // determine all processes whose part of the YAC internal
401 // decomposition overlaps with the bounding circle of the
402 // current cell
404 proc_sphere_part, bnd_circle, ranks_buffer, &rank_count);
405
406 } else { // cells without corners are all assigned to process 0
407
408 ranks_buffer[0] = 0;
409 rank_count = 1;
410 }
411
412 } else { // invalid cells are not distributed
413 rank_count = 0;
414 }
415
416 ENSURE_ARRAY_SIZE(dist_cell_ranks_, dist_cell_ranks_array_size,
417 offset + (size_t)rank_count);
418 memcpy(dist_cell_ranks_ + offset, ranks_buffer,
419 (size_t)rank_count * sizeof(*ranks_buffer));
420
421 dist_cell_rank_counts[i] = rank_count;
422 dist_cell_rank_offsets[i] = offset;
423 offset += (size_t)rank_count;
424 }
425
426 // the (n+1)'th entry contains the total number ranks (SUM(dist_cell_rank_counts))
427 dist_cell_rank_offsets[num_cells] = offset;
428
429 free(cell.edge_type);
430 free(cell.coordinates_xyz);
431 free(ranks_buffer);
432
433 *dist_cell_ranks = dist_cell_ranks_;
434}
435
436static MPI_Datatype yac_get_id_reorder_coord_coord_mpi_datatype(MPI_Comm comm) {
437
438 struct id_reorder_coord dummy;
439 MPI_Datatype coord_dt;
440 int array_of_blocklengths[] = {3};
441 const MPI_Aint array_of_displacements[] =
442 {(MPI_Aint)(intptr_t)(const void *)&(dummy.coord) -
443 (MPI_Aint)(intptr_t)(const void *)&dummy};
444 const MPI_Datatype array_of_types[] = {MPI_DOUBLE};
446 MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements,
447 array_of_types, &coord_dt), comm);
448 return yac_create_resized(coord_dt, sizeof(dummy), comm);
449}
450
451static MPI_Datatype yac_get_id_reorder_coord_id_mpi_datatype(MPI_Comm comm) {
452
453 struct id_reorder_coord dummy;
454 MPI_Datatype global_id_dt;
455 int array_of_blocklengths[] = {1};
456 const MPI_Aint array_of_displacements[] =
457 {(MPI_Aint)(intptr_t)(const void *)&(dummy.global_id) -
458 (MPI_Aint)(intptr_t)(const void *)&dummy};
459 const MPI_Datatype array_of_types[] = {yac_int_dt};
461 MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements,
462 array_of_types, &global_id_dt), comm);
463 return yac_create_resized(global_id_dt, sizeof(dummy), comm);
464}
465
467 const void * a, const void * b) {
468
469 return compare_coords(((const struct id_reorder_coord *)a)->coord,
470 ((const struct id_reorder_coord *)b)->coord);
471}
472
474 const void * a, const void * b) {
475
476 return (((const struct id_reorder_coord *)a)->reorder_idx >
477 ((const struct id_reorder_coord *)b)->reorder_idx) -
478 (((const struct id_reorder_coord *)a)->reorder_idx <
479 ((const struct id_reorder_coord *)b)->reorder_idx);
480}
481
482// generates global vertex ids (if they are not provied by the user)
483// and returns the distributed owners for each vertex in the provided
484// grid data
486 struct proc_sphere_part_node * proc_sphere_part,
487 struct yac_basic_grid_data * grid, MPI_Comm comm) {
488
489 int * vertex_ranks = xmalloc(grid->num_vertices * sizeof(*vertex_ranks));
490
491 // determine the dist grid ranks for all vertices
493 proc_sphere_part, grid->vertex_coordinates, grid->num_vertices,
494 vertex_ranks);
495
496 // check whether only of subset of the processes have defined
497 // their global ids, which is not supported
498 int ids_available_local =
499 (grid->num_vertices > 0) && (grid->vertex_ids != NULL);
500 int ids_available_global;
501 yac_mpi_call(MPI_Allreduce(
502 &ids_available_local, &ids_available_global, 1,
503 MPI_INT, MPI_MAX, comm), comm);
504
505 // if there are vertices defined locally for the current grid
506 if (grid->num_vertices != 0) {
508 ids_available_local || !ids_available_global,
509 "ERROR(generate_vertex_ids): inconsistent global ids")
510 }
511
512 // if we do not need to generate the global ids
513 if (ids_available_global) return vertex_ranks;
514
515 int comm_size;
516 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
517
518 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
520 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
521
522 for (size_t i = 0; i < grid->num_vertices; ++i)
523 sendcounts[vertex_ranks[i]]++;
524
526 1, sendcounts, recvcounts, sdispls, rdispls, comm);
527
528 size_t orig_vertex_count = grid->num_vertices;
529 size_t dist_vertex_count =
530 recvcounts[comm_size - 1] + rdispls[comm_size - 1];
531
532 struct id_reorder_coord * id_reorder_coord_buffer =
533 xmalloc((orig_vertex_count + dist_vertex_count) *
534 sizeof(*id_reorder_coord_buffer));
535 struct id_reorder_coord *
536 id_reorder_coord_send_buffer = id_reorder_coord_buffer;
537 struct id_reorder_coord *
538 id_reorder_coord_recv_buffer =
539 id_reorder_coord_buffer + orig_vertex_count;
540
541 // pack send buffer
542 for (size_t i = 0; i < orig_vertex_count; ++i) {
543 size_t pos = sdispls[vertex_ranks[i] + 1]++;
544 id_reorder_coord_send_buffer[pos].reorder_idx = i;
545 for (int j = 0; j < 3; ++j)
546 id_reorder_coord_send_buffer[pos].coord[j] =
547 grid->vertex_coordinates[i][j];
548 }
549
550 MPI_Datatype id_reorder_coord_coord_dt =
552
553 // exchange data
555 id_reorder_coord_send_buffer, sendcounts, sdispls,
556 id_reorder_coord_recv_buffer, recvcounts, rdispls,
557 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_coord_dt, comm);
558
559 yac_mpi_call(MPI_Type_free(&id_reorder_coord_coord_dt), comm);
560
561 for (size_t i = 0; i < dist_vertex_count; ++i)
562 id_reorder_coord_recv_buffer[i].reorder_idx = i;
563
564 // sort received vertices based on coordinates
565 qsort(
566 id_reorder_coord_recv_buffer, dist_vertex_count,
567 sizeof(*id_reorder_coord_recv_buffer), compare_id_reorder_coord_coord);
568
569 size_t unique_count = dist_vertex_count > 0;
570 struct id_reorder_coord * prev = id_reorder_coord_recv_buffer;
571
572 // determine number of unique coordinates
573 for (size_t i = 0; i < dist_vertex_count; ++i) {
574 struct id_reorder_coord * curr = id_reorder_coord_recv_buffer + i;
575 if (compare_id_reorder_coord_coord(prev, curr)) {
576 ++unique_count;
577 prev = curr;
578 }
579 curr->global_id = (yac_int)unique_count - 1;
580 }
581
583 unique_count <= (size_t)XT_INT_MAX,
584 "ERROR(generate_vertex_ids): global_id out of bounds")
585
586 yac_int yac_int_unique_count = (yac_int)unique_count;
587 yac_int id_offset;
588
589 // determine exclusive scan of sum of numbers of unique
590 // coordinates on all ranks
591 yac_mpi_call(MPI_Exscan(&yac_int_unique_count, &id_offset, 1, yac_int_dt,
592 MPI_SUM, comm), comm);
593 int comm_rank;
594 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
595 if (comm_rank == 0) id_offset = 0;
596
598 ((size_t)id_offset + unique_count) <= (size_t)XT_INT_MAX,
599 "ERROR(generate_vertex_ids): global_id out of bounds")
600
601 // adjust global ids
602 for (size_t i = 0; i < dist_vertex_count; ++i)
603 id_reorder_coord_recv_buffer[i].global_id += id_offset;
604
605 // return received vertices into original order
606 qsort(id_reorder_coord_recv_buffer, dist_vertex_count,
607 sizeof(*id_reorder_coord_recv_buffer),
609
610 MPI_Datatype id_reorder_coord_id_dt =
612
613 // return generated global ids data
615 id_reorder_coord_recv_buffer, recvcounts, rdispls,
616 id_reorder_coord_send_buffer, sendcounts, sdispls,
617 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_id_dt, comm);
618
619 yac_mpi_call(MPI_Type_free(&id_reorder_coord_id_dt), comm);
620
621 yac_int * vertex_ids =
622 ((grid->vertex_ids =
623 (grid->num_vertices > 0)?
624 xmalloc(grid->num_vertices * sizeof(*(grid->vertex_ids))):NULL));
625
626 for (size_t i = 0; i < grid->num_vertices; ++i)
627 vertex_ids[id_reorder_coord_send_buffer[i].reorder_idx] =
628 id_reorder_coord_send_buffer[i].global_id;
629
630 free(id_reorder_coord_buffer);
631 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
632
633 return vertex_ranks;
634}
635
636// generate global ids for all cells and edges (if non-existent)
637static void generate_ce_ids(
638 struct yac_basic_grid * grid, int * vertex_ranks,
639 int max_num_vertices_per_cell, MPI_Comm comm) {
640
641 struct yac_basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
642
643 size_t num_cells = grid_data->num_cells;
644 size_t num_edges = grid_data->num_edges;
645
646 yac_int * vertex_ids = grid_data->vertex_ids;
647
648 int comm_rank, comm_size;
649 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
650 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
651
652 // check whether only a subset of the processes have defined
653 // their global ids, which is not supported
654 int ids_available_local[2], ids_available_global[2];
655 ids_available_local[0] =
656 (num_cells > 0) && (grid_data->cell_ids != NULL);
657 ids_available_local[1] =
658 (num_edges > 0) && (grid_data->edge_ids != NULL);
659 yac_mpi_call(MPI_Allreduce(ids_available_local, ids_available_global, 2,
660 MPI_INT, MPI_MAX, comm), comm);
661
663 (num_cells == 0) ||
664 (ids_available_local[0] == ids_available_global[0]),
665 "ERROR(generate_ce_ids): inconsistent global ids")
666
668 (num_edges == 0) ||
669 (ids_available_local[1] == ids_available_global[1]),
670 "ERROR(generate_ce_ids): inconsistent global ids")
671
672 // if no ids have to be generated
673 if (ids_available_global[0] && ids_available_global[1]) return;
674
675 int * rank_buffer =
676 xmalloc(
677 (((ids_available_global[0])?0:(num_cells)) +
678 ((ids_available_global[1])?0:(num_edges))) *
679 sizeof(*rank_buffer));
680 int * cell_ranks = rank_buffer;
681 int * edge_ranks =
682 rank_buffer + ((ids_available_global[0])?0:(num_cells));
683
684 size_t * size_t_buffer =
685 xmalloc((8 * (size_t)comm_size + 1) * sizeof(*size_t_buffer));
686 size_t * sendcounts = size_t_buffer + 0 * comm_size;
687 size_t * recvcounts = size_t_buffer + 2 * comm_size;
688 size_t * total_sendcounts = size_t_buffer + 4 * comm_size;
689 size_t * total_recvcounts = size_t_buffer + 5 * comm_size;
690 size_t * total_sdispls = size_t_buffer + 6 * comm_size;
691 size_t * total_rdispls = size_t_buffer + 7 * comm_size + 1;
692 memset(sendcounts, 0, 2 * (size_t)comm_size * sizeof(*sendcounts));
693
694 yac_int * cell_to_vertex_ids = NULL;
695
696 if (!ids_available_global[0]) {
697
699 size_t * cell_to_vertex = grid_data->cell_to_vertex;
701
702 cell_to_vertex_ids =
703 xmalloc(num_cells * max_num_vertices_per_cell *
704 sizeof(*cell_to_vertex_ids));
705
706 for (size_t i = 0; i < num_cells; ++i) {
707
708 int curr_num_vertices = num_vertices_per_cell[i];
709 yac_int * curr_cell_to_vertex_ids =
710 cell_to_vertex_ids + i * max_num_vertices_per_cell;
711
712 int cell_rank;
713 if (curr_num_vertices > 0) {
714 size_t * curr_cell_vertices =
716 size_t min_vertex = curr_cell_vertices[0];
717 curr_cell_to_vertex_ids[0] = vertex_ids[min_vertex];
718 for (int j = 1; j < curr_num_vertices; ++j) {
719 size_t curr_vertex_idx = curr_cell_vertices[j];
720 yac_int curr_vertex_id = vertex_ids[curr_vertex_idx];
721 insert_global_id(curr_cell_to_vertex_ids, j, curr_vertex_id);
722 if (curr_cell_to_vertex_ids[0] == curr_vertex_id)
723 min_vertex = curr_vertex_idx;
724 }
725 cell_rank = vertex_ranks[min_vertex];
726 } else {
727 cell_rank = 0;
728 }
729 for (int j = curr_num_vertices; j < max_num_vertices_per_cell; ++j)
730 curr_cell_to_vertex_ids[j] = XT_INT_MAX;
731
732 sendcounts[2 * ((cell_ranks[i] = cell_rank)) + 0]++;
733 }
734 }
735
736 yac_int * edge_to_vertex_ids = NULL;
737
738 if (!ids_available_global[1]) {
739
740 edge_to_vertex_ids =
741 xmalloc(2 * num_edges * sizeof(*edge_to_vertex_ids));
743
744 for (size_t i = 0; i < num_edges; ++i) {
745
746 size_t * curr_edge_to_vertex = edge_to_vertex[i];
747 yac_int * curr_edge_vertex_ids = edge_to_vertex_ids + 2 * i;
748 curr_edge_vertex_ids[0] = vertex_ids[curr_edge_to_vertex[0]];
749 curr_edge_vertex_ids[1] = vertex_ids[curr_edge_to_vertex[1]];
750
751 if (curr_edge_vertex_ids[0] > curr_edge_vertex_ids[1]) {
752 yac_int temp = curr_edge_vertex_ids[0];
753 curr_edge_vertex_ids[0] = curr_edge_vertex_ids[1];
754 curr_edge_vertex_ids[1] = temp;
755 sendcounts[
756 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[1]])) + 1]++;
757 } else {
758 sendcounts[
759 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[0]])) + 1]++;
760 }
761 }
762 }
763
764 // exchange the number of cells and edges
765 yac_mpi_call(MPI_Alltoall(sendcounts, 2, YAC_MPI_SIZE_T,
766 recvcounts, 2, YAC_MPI_SIZE_T, comm), comm);
767
768 total_sdispls[0] = 0;
769 size_t recv_counts[2] = {0,0};
770 size_t saccu = 0, raccu = 0;
771 for (int i = 0; i < comm_size; ++i) {
772 total_sdispls[i+1] = saccu;
773 total_rdispls[i] = raccu;
774 recv_counts[0] += recvcounts[2 * i + 0];
775 recv_counts[1] += recvcounts[2 * i + 1];
776 total_sendcounts[i] = sendcounts[2 * i + 0] *
777 (size_t)max_num_vertices_per_cell +
778 sendcounts[2 * i + 1] * 2;
779 total_recvcounts[i] = recvcounts[2 * i + 0] *
780 (size_t)max_num_vertices_per_cell +
781 recvcounts[2 * i + 1] * 2;
782 saccu += total_sendcounts[i];
783 raccu += total_recvcounts[i];
784 }
785 size_t local_data_count = total_sendcounts[comm_size - 1] +
786 total_sdispls[comm_size];
787 size_t recv_count = total_recvcounts[comm_size - 1] +
788 total_rdispls[comm_size - 1];
789
790 yac_int * yac_int_buffer =
791 xcalloc((local_data_count + recv_count), sizeof(*yac_int_buffer));
792 yac_int * send_buffer = yac_int_buffer;
793 yac_int * recv_buffer = yac_int_buffer + local_data_count;
794
795 // pack send buffer
796 if (!ids_available_global[0])
797 for (size_t i = 0; i < num_cells; ++i)
798 for (int j = 0; j < max_num_vertices_per_cell; ++j)
799 send_buffer[total_sdispls[cell_ranks[i] + 1]++] =
800 cell_to_vertex_ids[i * max_num_vertices_per_cell + j];
801 if (!ids_available_global[1])
802 for (size_t i = 0; i < num_edges; ++i)
803 for (int j = 0; j < 2; ++j)
804 send_buffer[total_sdispls[edge_ranks[i] + 1]++] =
805 edge_to_vertex_ids[2 * i + j];
806
807 free(edge_to_vertex_ids);
808 free(cell_to_vertex_ids);
809
810 // exchange data
811 yac_alltoallv_yac_int_p2p(
812 send_buffer, total_sendcounts, total_sdispls,
813 recv_buffer, total_recvcounts, total_rdispls, comm);
814
815 struct n_ids_reorder * n_ids_reorder_buffer =
816 xmalloc((recv_counts[0] + recv_counts[1]) * sizeof(*n_ids_reorder_buffer));
817 struct n_ids_reorder * n_ids_reorder[2] =
818 {n_ids_reorder_buffer, n_ids_reorder_buffer + recv_counts[0]};
819
820 size_t offset = 0;
821 int index_counts[2] = {max_num_vertices_per_cell, 2};
822 size_t reorder_idx = 0;
823 recv_counts[0] = 0;
824 recv_counts[1] = 0;
825 for (int i = 0; i < comm_size; ++i) {
826 for (int j = 0; j < 2; ++j) {
827 size_t curr_count = recvcounts[2 * i + j];
828 for (size_t k = 0; k < curr_count;
829 ++k, ++reorder_idx, ++recv_counts[j]) {
830 n_ids_reorder[j][recv_counts[j]].count = index_counts[j];
831 n_ids_reorder[j][recv_counts[j]].ids = recv_buffer + offset;
832 n_ids_reorder[j][recv_counts[j]].reorder_idx = reorder_idx;
833 offset += index_counts[j];
834 }
835 }
836 }
837
838 for (int i = 0; i < 2; ++i) {
839
840 if (ids_available_global[i]) continue;
841
842 qsort(n_ids_reorder[i], recv_counts[i], sizeof(*(n_ids_reorder[i])),
844
845 size_t unique_count = recv_counts[i] > 0;
846 struct n_ids_reorder * prev = n_ids_reorder[i];
847 struct n_ids_reorder * curr = n_ids_reorder[i];
848
849 for (size_t j = 0; j < recv_counts[i]; ++j, ++curr) {
850 if (compare_n_ids_reorder_ids(prev, curr)) {
851 ++unique_count;
852 prev = curr;
853 }
854 curr->global_id = (yac_int)(unique_count - 1);
855 }
856
858 unique_count <= (size_t)XT_INT_MAX,
859 "ERROR(generate_global_ce_ids): global_id out of bounds")
860
861 yac_int yac_int_unique_count = (yac_int)unique_count;
862 yac_int id_offset;
863
864 // determine exclusive scan of sum of numbers of unique ids on all ranks
865 yac_mpi_call(MPI_Exscan(&yac_int_unique_count, &id_offset, 1, yac_int_dt,
866 MPI_SUM, comm), comm);
867 if (comm_rank == 0) id_offset = 0;
868
870 ((size_t)id_offset + unique_count) <= (size_t)XT_INT_MAX,
871 "ERROR(generate_global_ce_ids): global_id out of bounds")
872
873 // adjust global ids
874 for (size_t j = 0; j < recv_counts[i]; ++j)
875 n_ids_reorder[i][j].global_id += id_offset;
876 }
877 free(yac_int_buffer);
878
879 qsort(n_ids_reorder_buffer, recv_counts[0] + recv_counts[1],
880 sizeof(*n_ids_reorder_buffer), compare_n_ids_reorder_reorder);
881
882 yac_int * global_ids_buffer =
883 xmalloc((recv_counts[0] + recv_counts[1] +
884 ((ids_available_global[0])?0:(num_cells)) +
885 ((ids_available_global[1])?0:(num_edges))) *
886 sizeof(*global_ids_buffer));
887 yac_int * send_global_ids = global_ids_buffer;
888 yac_int * recv_global_ids =
889 global_ids_buffer + recv_counts[0] + recv_counts[1];
890
891 for (size_t i = 0; i < recv_counts[0] + recv_counts[1]; ++i)
892 send_global_ids[i] = n_ids_reorder_buffer[i].global_id;
893 free(n_ids_reorder_buffer);
894
895 // generate count and displs data
896 saccu = 0, raccu = 0;
897 for (int i = 0; i < comm_size; ++i) {
898 total_sdispls[i] = saccu;
899 total_rdispls[i] = raccu;
900 saccu +=
901 ((total_sendcounts[i] = recvcounts[2 * i + 0] + recvcounts[2 * i + 1]));
902 raccu +=
903 ((total_recvcounts[i] = sendcounts[2 * i + 0] + sendcounts[2 * i + 1]));
904 }
905
906 // exchange generated global ids data
907 yac_alltoallv_yac_int_p2p(
908 send_global_ids, total_sendcounts, total_sdispls,
909 recv_global_ids, total_recvcounts, total_rdispls, comm);
910
911 if ((!ids_available_global[0]) && (num_cells > 0))
912 grid_data->cell_ids =
913 xmalloc(num_cells * sizeof(*grid_data->cell_ids));
914 if ((!ids_available_global[1]) && (num_edges > 0))
915 grid_data->edge_ids =
916 xmalloc(num_edges * sizeof(grid_data->edge_ids));
917
918 // unpack generated global ids
919 if (!ids_available_global[0])
920 for (size_t i = 0; i < num_cells; ++i)
921 grid_data->cell_ids[i] =
922 recv_global_ids[total_rdispls[cell_ranks[i]]++];
923 if (!ids_available_global[1])
924 for (size_t i = 0; i < num_edges; ++i)
925 grid_data->edge_ids[i] =
926 recv_global_ids[total_rdispls[edge_ranks[i]]++];
927
928 free(rank_buffer);
929 free(size_t_buffer);
930 free(global_ids_buffer);
931}
932
933// check core masks for consistency
934// (contains no valid cell/edge connected to an invalid edge/vertex)
935// and generate it if required
936static void check_core_masks(struct yac_basic_grid * grid) {
937
938 struct yac_basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
939
940 int * core_vertex_mask = grid_data->core_vertex_mask;
941 int * core_edge_mask = grid_data->core_edge_mask;
942 int * core_cell_mask = grid_data->core_cell_mask;
943
944 size_t num_edges = grid_data->num_edges;
945 size_t num_cells = grid_data->num_cells;
946
947 //---------------------------------------------------------------
948 // the core mask for vertices is optional and does not have to be
949 // generated if it is missing
950 //---------------------------------------------------------------
951
952 //---------------------------------------------------------------
953 // a core mask for vertices is required if the grid contains
954 // edges and a core mask for vertices
955 //---------------------------------------------------------------
956
957 if (core_vertex_mask && (num_edges > 0)) {
958
960
961 // if the grid already contains a core mask for edges -->
962 // check consistency of the mask
963 // (no valid edge can be connected to a masked out vertex)
964 if (core_edge_mask) {
965
966 for (size_t j = 0; j < num_edges; ++j) {
967
968 size_t * curr_vertices = edge_to_vertex[j];
969
971 (!core_edge_mask[j]) ||
972 (core_vertex_mask[curr_vertices[0]] &&
973 core_vertex_mask[curr_vertices[1]]),
974 "ERROR: inconsistent edge core mask for grid \"%s\" "
975 "(edge %" XT_INT_FMT " is valid but one of its vertices is not)",
977 grid_data->edge_ids?grid_data->edge_ids[j]:XT_INT_MAX);
978 }
979
980 } else { // if there is no core mask for edges --> generate one
981
983 (grid_data->core_edge_mask =
984 xmalloc(num_edges * sizeof(*core_edge_mask)));
985 for (size_t j = 0; j < num_edges; ++j) {
986 size_t * curr_vertices = edge_to_vertex[j];
987 core_edge_mask[j] =
988 core_vertex_mask[curr_vertices[0]] &
989 core_vertex_mask[curr_vertices[1]];
990 }
991 }
992 }
993
994 //---------------------------------------------------------------
995 // a core mask for cells is required if the grid contains
996 // cells and a core mask for edges
997 //---------------------------------------------------------------
998
999 if (core_edge_mask && (num_cells > 0)) {
1000
1001 size_t * cell_to_edge = grid_data->cell_to_edge;
1002 size_t * cell_to_edge_offsets = grid_data->cell_to_edge_offsets;
1004
1005 // if there is a core mask for cells -->
1006 // check consistency of the mask
1007 // (no valid cell can be connected to a masked out edge)
1008 if (core_cell_mask) {
1009
1010 for (size_t j = 0; j < num_cells; ++j) {
1011
1012 if (!core_cell_mask[j]) continue;
1013
1014 size_t * curr_edges = cell_to_edge + cell_to_edge_offsets[j];
1015 int curr_num_edges = num_vertices_per_cell[j];
1016
1017 for (int k = 0; k < curr_num_edges; ++k) {
1019 core_edge_mask[curr_edges[k]],
1020 "ERROR: inconsistent cell core mask for grid \"%s\" "
1021 "(cell %" XT_INT_FMT " is valid but edge %" XT_INT_FMT " is not)",
1023 grid_data->cell_ids?grid_data->cell_ids[j]:XT_INT_MAX,
1024 grid_data->edge_ids?grid_data->edge_ids[curr_edges[k]]:XT_INT_MAX);
1025 }
1026 }
1027 } else { // if there is no core mask for cells --> generate one
1028
1030 (grid_data->core_cell_mask =
1031 xmalloc(grid_data->num_cells * sizeof(*core_cell_mask)));
1032 for (size_t j = 0; j < num_cells; ++j) {
1033 int curr_num_edges = num_vertices_per_cell[j];
1034 size_t * curr_edges = cell_to_edge + cell_to_edge_offsets[j];
1035 int mask = 1;
1036 for (int k = 0; k < curr_num_edges; ++k)
1037 mask &= core_edge_mask[curr_edges[k]];
1038 core_cell_mask[j] = mask;
1039 }
1040 }
1041 }
1042}
1043
1044// generate global ids for cells/vertices/edges (if they are missing)
1046 struct proc_sphere_part_node * proc_sphere_part,
1047 struct yac_basic_grid * grid, int ** vertex_ranks_,
1048 int max_num_vertices_per_cell, MPI_Comm comm) {
1049
1050 // generate global ids for all vertices (if non-existent)
1051 int * vertex_ranks =
1053 proc_sphere_part, yac_basic_grid_get_data(grid), comm);
1054
1055 // generate global ids and core masks for all cell and edge
1056 // (if non-existent and required)
1057 generate_ce_ids(grid, vertex_ranks, max_num_vertices_per_cell, comm);
1058
1059 *vertex_ranks_ = vertex_ranks;
1060}
1061
1062// generate edge to cell mapping
1065 int * core_cell_mask, size_t num_cells, size_t num_edges) {
1066
1067 if (num_cells == 0) return NULL;
1068
1069 yac_size_t_2_pointer edge_to_cell = xmalloc(num_edges * sizeof(*edge_to_cell));
1070
1071 for (size_t i = 0; i < num_edges; ++i) {
1072 edge_to_cell[i][0] = SIZE_MAX;
1073 edge_to_cell[i][1] = SIZE_MAX;
1074 }
1075
1076 for (size_t i = 0, offset = 0; i < num_cells; ++i) {
1077
1078 size_t curr_num_edges = num_edges_per_cell[i];
1079 const_size_t_pointer curr_cell_to_edge = cell_to_edge + offset;
1080 offset += curr_num_edges;
1081
1082 if ((core_cell_mask == NULL) || core_cell_mask[i]) {
1083
1084 for (size_t j = 0; j < curr_num_edges; ++j) {
1085
1086 size_t curr_edge = curr_cell_to_edge[j];
1087 size_t * curr_edge_to_cell = edge_to_cell[curr_edge];
1088 curr_edge_to_cell += *curr_edge_to_cell != SIZE_MAX;
1090 *curr_edge_to_cell == SIZE_MAX,
1091 "ERROR(generate_edge_to_cell): "
1092 "more than two cells point to a single edge "
1093 "(does the grid contain degenrated cells (less than 3 corners) "
1094 "or duplicated cells; "
1095 "these can be masked out using the core mask)\n"
1096 "(num_cells: %zu cell_idx: %zu: num_cell_edge %zu)",
1097 num_cells, i, curr_num_edges)
1098 *curr_edge_to_cell = i;
1099 }
1100 }
1101 }
1102
1103 return edge_to_cell;
1104}
1105
1107 yac_size_t_2_pointer edge_to_vertex,
1108 const yac_coordinate_pointer vertex_coordinates, size_t edge_id) {
1109
1110 struct bounding_circle bnd_circle;
1111
1112 size_t * curr_edge_to_vertex = edge_to_vertex[edge_id];
1113 double * vertices[2] =
1114 {vertex_coordinates[curr_edge_to_vertex[0]],
1115 vertex_coordinates[curr_edge_to_vertex[1]]};
1116
1117 bnd_circle.base_vector[0] = vertices[0][0] + vertices[1][0];
1118 bnd_circle.base_vector[1] = vertices[0][1] + vertices[1][1];
1119 bnd_circle.base_vector[2] = vertices[0][2] + vertices[1][2];
1120 normalise_vector(bnd_circle.base_vector);
1121 bnd_circle.inc_angle =
1122 half_angle(get_vector_angle_2(vertices[0], vertices[1]));
1123 bnd_circle.sq_crd = DBL_MAX;
1124
1125 return bnd_circle;
1126}
1127
1129 struct proc_sphere_part_node * proc_sphere_part,
1130 struct yac_basic_grid * grid, MPI_Comm comm,
1131 size_t * dist_cell_rank_offsets, size_t * dist_edge_rank_offsets,
1132 int * num_cell_ranks, int * num_edge_ranks,
1133 int ** rank_buffer, size_t * rank_buffer_array_size) {
1134
1135 struct yac_basic_grid_data * grid_data =
1137
1138 int comm_size;
1139 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1140
1141 size_t num_cells = grid_data->num_cells;
1142 size_t num_edges = grid_data->num_edges;
1143 size_t dist_edge_rank_offset = dist_cell_rank_offsets[num_cells];
1144 int * core_edge_mask = grid_data->core_edge_mask;
1145
1146 // compute mapping from edge to cell
1147 yac_size_t_2_pointer edge_to_cell =
1149 grid_data->cell_to_edge, grid_data->num_vertices_per_cell,
1150 NULL, num_cells, num_edges);
1151
1152 // for all edges
1153 for (size_t i = 0; i < num_edges; ++i) {
1154
1155 int edge_rank_count = 0;
1156
1157 // only distribute valid edges
1158 if ((core_edge_mask == NULL) || grid_data->core_edge_mask[i]) {
1159
1160 int cell_rank_counts[2] = {0, 0};
1161 size_t * curr_edge_cells = edge_to_cell[i];
1162
1163 for (int j = 0; j < 2; ++j)
1164 if (curr_edge_cells[j] != SIZE_MAX)
1165 edge_rank_count +=
1166 ((cell_rank_counts[j] = num_cell_ranks[curr_edge_cells[j]]));
1167
1168 // if the edge is connected to at least one cell
1169 if (edge_rank_count > 0) {
1170
1172 *rank_buffer, *rank_buffer_array_size,
1173 dist_edge_rank_offset + edge_rank_count);
1174
1175 int * curr_edge_ranks = *rank_buffer + dist_edge_rank_offset;
1176
1177 // get ranks of connected cells
1178 edge_rank_count = 0;
1179 for (int j = 0; j < 2; ++j) {
1180 if (cell_rank_counts[j] > 0) {
1181 int * cell_ranks =
1182 *rank_buffer + dist_cell_rank_offsets[curr_edge_cells[j]];
1183 for (int k = 0; k < cell_rank_counts[j]; ++k)
1185 curr_edge_ranks, &edge_rank_count, cell_ranks[k]);
1186 }
1187 }
1188
1189 } else { // if this is a "hanging edge" (not connected to any cell)
1190
1192 *rank_buffer, *rank_buffer_array_size,
1193 dist_edge_rank_offset + comm_size);
1194
1195 int * curr_edge_ranks = *rank_buffer + dist_edge_rank_offset;
1196
1197 // set up a bounding circle around the edge and search for all matching
1198 // ranks based on the YAC internal decomposition
1200 proc_sphere_part,
1202 grid_data->edge_to_vertex, grid_data->vertex_coordinates, i),
1203 curr_edge_ranks, &edge_rank_count);
1204 }
1205 }
1206
1207 dist_edge_rank_offset += (size_t)edge_rank_count;
1208 num_edge_ranks[i] = edge_rank_count;
1209
1210 dist_edge_rank_offsets[i+1] = dist_edge_rank_offset;
1211 }
1212
1213 free(edge_to_cell);
1214}
1215
1218 size_t * vertex_to_edge, int * num_edges_per_vertex) {
1219
1220 memset(
1221 num_edges_per_vertex, 0, num_vertices * sizeof(*num_edges_per_vertex));
1222
1223 for (size_t i = 0; i < num_edges; ++i) {
1224 num_edges_per_vertex[edge_to_vertex[i][0]]++;
1225 num_edges_per_vertex[edge_to_vertex[i][1]]++;
1226 }
1227
1228 size_t * vertex_edges_offsets =
1229 xmalloc((num_vertices + 1) * sizeof(*vertex_edges_offsets));
1230
1231 vertex_edges_offsets[0] = 0;
1232 for (size_t i = 0, offset = 0; i < num_vertices; ++i) {
1233 vertex_edges_offsets[i + 1] = offset;
1234 offset += (size_t)(num_edges_per_vertex[i]);
1235 }
1236
1237 for (size_t i = 0; i < num_edges; ++i) {
1238 for (int j = 0; j < 2; ++j) {
1239 size_t curr_vertex = edge_to_vertex[i][j];
1240 vertex_to_edge[vertex_edges_offsets[curr_vertex+1]] = i;
1241 vertex_edges_offsets[curr_vertex+1]++;
1242 }
1243 }
1244
1245 free(vertex_edges_offsets);
1246}
1247
1249 int * vertex_ranks, struct yac_basic_grid * grid, MPI_Comm comm,
1250 size_t * dist_edge_rank_offsets, int * num_edge_ranks, int * num_vertex_ranks,
1251 int ** rank_buffer, size_t * rank_buffer_array_size) {
1252
1253 struct yac_basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
1254
1255 int comm_size;
1256 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1257
1258 size_t num_edges = grid_data->num_edges;
1259 size_t num_vertices = grid_data->num_vertices;
1260 size_t vertex_rank_offset = dist_edge_rank_offsets[num_edges];
1261 int * core_vertex_mask = grid_data->core_vertex_mask;
1262
1263 // compute mapping from vertex to edge
1264 size_t * vertex_to_edge = xmalloc(2 * num_edges * sizeof(*vertex_to_edge));
1265 int * num_edges_per_vertex =
1266 xmalloc(num_vertices * sizeof(*num_edges_per_vertex));
1269 vertex_to_edge, num_edges_per_vertex);
1270 size_t * curr_edges = vertex_to_edge;
1271
1272 // for all vertices
1273 for (size_t i = 0; i < num_vertices; ++i) {
1274
1275 int vertex_rank_count = 0;
1276 int curr_num_edges = num_edges_per_vertex[i];
1277
1278 // if this is a valid vertex (not masked out by the core mask)
1279 if ((core_vertex_mask == NULL) || core_vertex_mask[i]) {
1280
1281 for (int j = 0; j < curr_num_edges; ++j)
1282 vertex_rank_count += num_edge_ranks[curr_edges[j]];
1283
1284 // if the vertex is connected to at least one edge
1285 if (vertex_rank_count > 0) {
1286
1288 *rank_buffer, *rank_buffer_array_size,
1289 vertex_rank_offset + vertex_rank_count);
1290
1291 int * curr_vertex_ranks = *rank_buffer + vertex_rank_offset;
1292
1293 // get ranks of connected edges
1294 vertex_rank_count = 0;
1295 for (int j = 0; j < curr_num_edges; ++j) {
1296 size_t curr_edge = curr_edges[j];
1297 int curr_num_edge_ranks = num_edge_ranks[curr_edge];
1298 int * curr_edge_ranks =
1299 *rank_buffer + dist_edge_rank_offsets[curr_edge];
1300 for (int k = 0; k < curr_num_edge_ranks; ++k)
1302 curr_vertex_ranks, &vertex_rank_count, curr_edge_ranks[k]);
1303 }
1304
1305 } else { // if this is a "hanging vertex" (not connected to any edge)
1306
1308 *rank_buffer, *rank_buffer_array_size, vertex_rank_offset + 1);
1309
1310 int * curr_vertex_ranks = *rank_buffer + vertex_rank_offset;
1311
1312 *curr_vertex_ranks = vertex_ranks[i];
1313 vertex_rank_count = 1;
1314 }
1315 }
1316
1317 vertex_rank_offset += (size_t)vertex_rank_count;
1318 num_vertex_ranks[i] = vertex_rank_count;
1319 curr_edges += curr_num_edges;
1320 }
1321
1322 free(num_edges_per_vertex);
1323 free(vertex_to_edge);
1324}
1325
1327 const void * a, const void * b) {
1328
1329 return (((const struct single_remote_point *)a)->global_id >
1330 ((const struct single_remote_point *)b)->global_id) -
1331 (((const struct single_remote_point *)a)->global_id <
1332 ((const struct single_remote_point *)b)->global_id);
1333}
1334
1335// generate owner information for all cell/vertices/edges that may be
1336// assigned to the local process in the YAC internal decomposition
1337// (dist owners are sorted by global ids)
1339 struct proc_sphere_part_node * proc_sphere_part,
1340 struct yac_basic_grid * grid, int * vertex_ranks,
1341 int max_num_vertices_per_cell, MPI_Comm comm,
1342 struct remote_point_infos ** dist_point_infos, yac_int ** dist_global_ids,
1343 size_t * dist_count) {
1344
1345 struct yac_basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
1346
1347 size_t num_cells = grid_data->num_cells;
1348 size_t num_vertices = grid_data->num_vertices;
1349 size_t num_edges = grid_data->num_edges;
1350
1351 int * rank_buffer;
1352 int * num_ranks_buffer =
1353 xmalloc(
1354 (num_cells + num_vertices + num_edges) * sizeof(*num_ranks_buffer));
1355 int * num_cell_ranks = num_ranks_buffer;
1356 int * num_vertex_ranks = num_ranks_buffer + num_cells;
1357 int * num_edge_ranks = num_ranks_buffer + num_cells + num_vertices;
1358 size_t * dist_rank_offsets =
1359 xmalloc((num_cells + num_edges + 1) * sizeof(*dist_rank_offsets));
1360 size_t * dist_cell_rank_offsets = dist_rank_offsets;
1361 size_t * dist_edge_rank_offsets = dist_rank_offsets + num_cells;
1362
1363 //-------------------------------------------------------------------
1364 // determine for all cells/vertices/edges that ranks of the processes
1365 // that require them according to the YAC internal decomposition
1366 //-------------------------------------------------------------------
1367
1368 // determine for all cells the ranks of the processes whose YAC internal
1369 // partition overlaps with the bounding circle of the respective cell
1371 proc_sphere_part, grid_data, comm,
1372 &rank_buffer, num_cell_ranks, dist_cell_rank_offsets, max_num_vertices_per_cell);
1373
1374 size_t rank_buffer_array_size = dist_cell_rank_offsets[num_cells];
1375
1376 // determine for all edges the ranks of the processes whose YAC internal
1377 // partition overlaps with the respective edge
1378 // (edges connected to a cell use the ranks of the cell;
1379 // edges not connected to any cell use a bounding circle to determine
1380 // the ranks)
1382 proc_sphere_part, grid, comm, dist_cell_rank_offsets, dist_edge_rank_offsets,
1383 num_cell_ranks, num_edge_ranks, &rank_buffer, &rank_buffer_array_size);
1384
1385 // determine for all vertices the ranks of the processes whose YAC internal
1386 // partition overlaps with the respective vertex
1387 // (vertices connected to an edge use the ranks of the edge;
1388 // vertices not connected to any edge directly determine the rank using
1389 // the YAC internal decomposition)
1391 vertex_ranks, grid, comm, dist_edge_rank_offsets,
1392 num_edge_ranks, num_vertex_ranks, &rank_buffer, &rank_buffer_array_size);
1393
1394 int * dist_cell_ranks = rank_buffer;
1395 int * dist_vertex_ranks = rank_buffer + dist_edge_rank_offsets[num_edges];
1396 int * dist_edge_ranks = rank_buffer + dist_cell_rank_offsets[num_cells];
1397
1398 free(dist_rank_offsets);
1399
1400 //-------------------------------------------------------------------
1401 // inform all processes about the cells/vertices/edges that they
1402 // require according to the YAC internal decomposition
1403 //-------------------------------------------------------------------
1404
1405 int comm_size;
1406 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1407
1408 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1410 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1411 size_t * size_t_buffer =
1412 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
1413 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1414 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1415 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1416 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1417
1418 struct {
1419 size_t count;
1420 int * ranks;
1421 int * num_ranks;
1422 yac_int * ids;
1423 } cve_data[3] =
1424 {{.count = num_cells,
1425 .ranks = dist_cell_ranks,
1426 .num_ranks = num_cell_ranks,
1427 .ids = grid_data->cell_ids},
1428 {.count = num_vertices,
1429 .ranks = dist_vertex_ranks,
1430 .num_ranks = num_vertex_ranks,
1431 .ids = grid_data->vertex_ids},
1432 {.count = num_edges,
1433 .ranks = dist_edge_ranks,
1434 .num_ranks = num_edge_ranks,
1435 .ids = grid_data->edge_ids}};
1436
1437 // determine number of cells/vertices/edges that have to be
1438 // sent to other processes
1439 for (int location = 0; location < 3; ++location) {
1440 size_t count = cve_data[location].count;
1441 int * ranks = cve_data[location].ranks;
1442 int * num_ranks = cve_data[location].num_ranks;
1443 for (size_t i = 0, k = 0; i < count; ++i) {
1444 int curr_num_ranks = num_ranks[i];
1445 for (int j = 0; j < curr_num_ranks; ++j, ++k)
1446 sendcounts[3 * ranks[k] + location]++;
1447 }
1448 }
1449
1451 3, sendcounts, recvcounts, sdispls, rdispls, comm);
1452
1453 size_t receive_counts[3] = {0,0,0};
1454 size_t saccu = 0, raccu = 0;
1455 for (int i = 0; i < comm_size; ++i) {
1456 total_sdispls[i] = saccu;
1457 total_rdispls[i] = raccu;
1458 total_sendcounts[i] = 0;
1459 total_recvcounts[i] = 0;
1460 for (int location = 0; location < 3; ++location) {
1461 total_sendcounts[i] += sendcounts[3 * i + location];
1462 total_recvcounts[i] += recvcounts[3 * i + location];
1463 receive_counts[location] += recvcounts[3 * i + location];
1464 }
1465 saccu += total_sendcounts[i];
1466 raccu += total_recvcounts[i];
1467 }
1468 size_t local_data_count = total_sendcounts[comm_size - 1] +
1469 total_sdispls[comm_size - 1];
1470 size_t recv_count = total_recvcounts[comm_size - 1] +
1471 total_rdispls[comm_size - 1];
1472
1473 struct id_pos * id_pos_buffer =
1474 xcalloc((local_data_count + recv_count), sizeof(*id_pos_buffer));
1475 struct id_pos * id_pos_send_buffer = id_pos_buffer;
1476 struct id_pos * id_pos_recv_buffer =
1477 id_pos_buffer + local_data_count;
1478
1479 // pack cell/edge/vertex information for distributed owners
1480 for (int location = 0; location < 3; ++location) {
1481 size_t count = cve_data[location].count;
1482 int * ranks = cve_data[location].ranks;
1483 int * num_ranks = cve_data[location].num_ranks;
1484 yac_int * ids = cve_data[location].ids;
1485 for (size_t i = 0, k = 0; i < count; ++i) {
1486 int curr_num_ranks = num_ranks[i];
1487 yac_int global_id = ids[i];
1488 for (int j = 0; j < curr_num_ranks; ++j, ++k) {
1489 size_t pos = sdispls[3 * ranks[k] + location + 1]++;
1490 id_pos_send_buffer[pos].global_id = global_id;
1491 id_pos_send_buffer[pos].orig_pos = i;
1492 }
1493 }
1494 }
1495 free(num_ranks_buffer);
1496 free(rank_buffer);
1497
1498 MPI_Datatype id_pos_dt = yac_get_id_pos_mpi_datatype(comm);
1499
1500 // exchange cell/vertex/edge information for distributed owners
1502 id_pos_send_buffer, total_sendcounts, total_sdispls,
1503 id_pos_recv_buffer, total_recvcounts, total_rdispls,
1504 sizeof(*id_pos_send_buffer), id_pos_dt, comm);
1505
1506 yac_mpi_call(MPI_Type_free(&id_pos_dt), comm);
1507
1508 size_t dist_owner_counts[3] = {0, 0, 0};
1509 for (int i = 0; i < comm_size; ++i)
1510 for (int location = 0; location < 3; ++location)
1511 dist_owner_counts[location] += recvcounts[3 * i + location];
1512 size_t max_dist_owner_count =
1513 MAX(MAX(dist_owner_counts[0], dist_owner_counts[1]), dist_owner_counts[2]);
1514 struct single_remote_point * temp_buffer =
1515 xcalloc(max_dist_owner_count, sizeof(*temp_buffer));
1516
1517 struct remote_point * unique_ids = NULL;
1518
1519 // unpack data
1520 for (int location = 0; location < 3; ++location) {
1521
1522 size_t count = 0;
1523 for (int i = 0; i < comm_size; ++i) {
1524 size_t curr_recvcount = recvcounts[3 * i + location];
1525 struct id_pos * curr_id_pos =
1526 id_pos_recv_buffer + rdispls[3 * i + location];
1527 for (size_t k = 0; k < curr_recvcount; ++k, ++count) {
1528 temp_buffer[count].global_id = curr_id_pos[k].global_id;
1529 temp_buffer[count].data.orig_pos = curr_id_pos[k].orig_pos;
1530 temp_buffer[count].data.rank = i;
1531 }
1532 }
1533
1534 // sort received global ids
1535 qsort(temp_buffer, count, sizeof(*temp_buffer),
1537
1538 unique_ids = xrealloc(unique_ids, count * sizeof(*unique_ids));
1539 size_t num_unique_ids = 0;
1540
1541 // determine unique global ids
1542 yac_int prev_id = (count > 0)?temp_buffer[0].global_id - 1:-1;
1543 for (size_t i = 0; i < count; ++i) {
1544
1545 yac_int curr_id = temp_buffer[i].global_id;
1546 if (curr_id != prev_id) {
1547 prev_id = curr_id;
1548 unique_ids[num_unique_ids].global_id = curr_id;
1549 unique_ids[num_unique_ids].data.count = 1;
1550 num_unique_ids++;
1551 } else {
1552 unique_ids[num_unique_ids-1].data.count++;
1553 }
1554 }
1555
1556 struct remote_point_infos * point_infos =
1557 ((dist_point_infos[location] =
1558 xmalloc(num_unique_ids * sizeof(*(dist_point_infos[location])))));
1559 yac_int * global_ids =
1560 ((dist_global_ids[location] =
1561 xmalloc(num_unique_ids * sizeof(*(dist_global_ids[location])))));
1562 dist_count[location] = num_unique_ids;
1563
1564 // compact received information
1565 // (each global id is only stored once and can have multiple
1566 // original owners)
1567 for (size_t i = 0, l = 0; i < num_unique_ids; ++i) {
1568 global_ids[i] = unique_ids[i].global_id;
1569 int curr_count = unique_ids[i].data.count;
1570 point_infos[i].count = curr_count;
1571 if (curr_count == 1) {
1572 point_infos[i].data.single = temp_buffer[l].data;
1573 ++l;
1574 } else {
1575 point_infos[i].data.multi =
1576 xmalloc(
1577 (size_t)curr_count *
1578 sizeof(*(point_infos[i].data.multi)));
1579 for (int k = 0; k < curr_count; ++k, ++l) {
1580 point_infos[i].data.multi[k] = temp_buffer[l].data;
1581 }
1582 }
1583 }
1584 } // location
1585
1586 free(unique_ids);
1587 free(id_pos_buffer);
1588 free(temp_buffer);
1589 free(size_t_buffer);
1590 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1591}
1592
1594 yac_int * global_ids, size_t count,
1595 yac_int ** sorted_global_ids, size_t ** reorder_idx) {
1596
1597 *sorted_global_ids = xmalloc(count * sizeof(**sorted_global_ids));
1598 memcpy(*sorted_global_ids, global_ids, count * sizeof(**sorted_global_ids));
1599 *reorder_idx = xmalloc(count * sizeof(**reorder_idx));
1600 for (size_t i = 0; i < count; ++i) (*reorder_idx)[i] = i;
1601}
1602
1603static Xt_xmap generate_xmap_data(
1604 struct remote_point_infos * point_infos, size_t count, MPI_Comm comm) {
1605
1606 int comm_size;
1607 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1608
1609 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1611 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1612
1613 for (size_t i = 0; i < count; ++i) {
1614 struct remote_point_info * curr_info =
1615 (point_infos[i].count > 1)?
1616 (point_infos[i].data.multi):
1617 (&(point_infos[i].data.single));
1618 sendcounts[curr_info->rank]++;
1619 }
1620
1622 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1623 size_t num_src_msg = 0, num_dst_msg = 0;
1624 for (int i = 0; i < comm_size; ++i) {
1625 num_src_msg += (recvcounts[i] > 0);
1626 num_dst_msg += (sendcounts[i] > 0);
1627 }
1628
1629 size_t recv_count =
1630 rdispls[comm_size-1] + recvcounts[comm_size-1];
1631
1632 int * pos_buffer =
1633 xmalloc((recv_count + 2 * count) * sizeof(*pos_buffer));
1634 int * src_pos_buffer = pos_buffer;
1635 int * dst_pos_buffer = pos_buffer + recv_count;
1636 int * send_pos_buffer = pos_buffer + recv_count + count;
1637
1638 // pack send buffer
1639 for (size_t i = 0; i < count; ++i) {
1640 struct remote_point_info * curr_info =
1641 (point_infos[i].count > 1)?
1642 (point_infos[i].data.multi):
1643 (&(point_infos[i].data.single));
1644 size_t pos = sdispls[curr_info->rank+1]++;
1645 dst_pos_buffer[pos] = i;
1646 send_pos_buffer[pos] = (int)(curr_info->orig_pos);
1647 }
1648
1649 // redistribute positions of requested data
1650 yac_alltoallv_int_p2p(
1651 send_pos_buffer, sendcounts, sdispls,
1652 src_pos_buffer, recvcounts, rdispls, comm);
1653
1654 struct Xt_com_pos * com_pos =
1655 xmalloc(((size_t)num_src_msg + (size_t)num_dst_msg) * sizeof(*com_pos));
1656 struct Xt_com_pos * src_com = com_pos;
1657 struct Xt_com_pos * dst_com = com_pos + num_src_msg;
1658
1659 // set transfer_pos pointers and transfer_pos counts in com_pos's
1660 num_src_msg = 0;
1661 num_dst_msg = 0;
1662 for (int i = 0; i < comm_size; ++i) {
1663 if (recvcounts[i] > 0) {
1664 src_com[num_src_msg].transfer_pos = src_pos_buffer;
1665 src_com[num_src_msg].num_transfer_pos = recvcounts[i];
1666 src_com[num_src_msg].rank = i;
1667 src_pos_buffer += recvcounts[i];
1668 ++num_src_msg;
1669 }
1670 if (sendcounts[i] > 0) {
1671 dst_com[num_dst_msg].transfer_pos = dst_pos_buffer;
1672 dst_com[num_dst_msg].num_transfer_pos = sendcounts[i];
1673 dst_com[num_dst_msg].rank = i;
1674 dst_pos_buffer += sendcounts[i];
1675 ++num_dst_msg;
1676 }
1677 }
1678 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1679
1680 Xt_xmap xmap =
1681 xt_xmap_intersection_pos_new(
1682 num_src_msg, src_com, num_dst_msg, dst_com, comm);
1683
1684 free(com_pos);
1685 free(pos_buffer);
1686
1687 return xmap;
1688}
1689
1691 const void * a, const void * b) {
1692
1693 return (((const struct single_remote_point_reorder *)a)->data.global_id >
1694 ((const struct single_remote_point_reorder *)b)->data.global_id) -
1695 (((const struct single_remote_point_reorder *)a)->data.global_id <
1696 ((const struct single_remote_point_reorder *)b)->data.global_id);
1697}
1698
1700 const void * a, const void * b) {
1701
1702 return (((const struct single_remote_point_reorder *)a)->reorder_idx >
1703 ((const struct single_remote_point_reorder *)b)->reorder_idx) -
1704 (((const struct single_remote_point_reorder *)a)->reorder_idx <
1705 ((const struct single_remote_point_reorder *)b)->reorder_idx);
1706}
1707
1708static MPI_Datatype yac_get_coordinate_mpi_datatype(MPI_Comm comm) {
1709
1710 MPI_Datatype coord_dt;
1711 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &coord_dt), comm);
1712 yac_mpi_call(MPI_Type_commit(&coord_dt), comm);
1713 return coord_dt;
1714}
1715
1717 struct yac_field_data * orig_field_data, size_t dist_size,
1718 Xt_redist redist_mask, Xt_redist redist_coords, MPI_Comm comm) {
1719
1720 struct yac_field_data * dist_field_data = yac_field_data_empty_new();
1721
1722 uint64_t counts[2], max_counts[2];
1723 if (orig_field_data != NULL) {
1724 counts[0] = yac_field_data_get_masks_count(orig_field_data);
1725 counts[1] = yac_field_data_get_coordinates_count(orig_field_data);
1726 } else {
1727 counts[0] = 0;
1728 counts[1] = 0;
1729 }
1731 MPI_Allreduce(
1732 counts, max_counts, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
1733 YAC_ASSERT(
1734 (orig_field_data == NULL) ||
1735 ((counts[0] == max_counts[0]) && (counts[1] == max_counts[1])),
1736 "ERROR(field_data_init): inconsistent number of masks or coordinates")
1737
1738 int * data_available_flag =
1739 xcalloc(2 * max_counts[0] + max_counts[1], sizeof(*data_available_flag));
1740
1741 for (size_t i = 0; i < counts[0]; ++i) {
1742 data_available_flag[i] =
1743 yac_field_data_get_mask_data(orig_field_data, i) != NULL;
1744 data_available_flag[i + counts[0]] =
1745 (yac_field_data_get_mask_name(orig_field_data, i) != NULL)?
1746 ((int)strlen(yac_field_data_get_mask_name(orig_field_data, i))+1):0;
1747 }
1748 for (size_t i = 0; i < counts[1]; ++i)
1749 data_available_flag[i + 2 * counts[0]] =
1750 yac_field_data_get_coordinates_data(orig_field_data, i) != NULL;
1751
1753 MPI_Allreduce(
1754 MPI_IN_PLACE, data_available_flag,
1755 (int)(2 * max_counts[0] + max_counts[1]), MPI_INT, MPI_MAX, comm), comm);
1756
1757 for (size_t i = 0; i < counts[0]; ++i) {
1758 YAC_ASSERT(
1759 data_available_flag[i] ==
1760 (yac_field_data_get_mask_data(orig_field_data, i) != NULL),
1761 "ERROR(field_data_init): inconsistent availability of masks")
1762 int mask_name_len =
1763 (yac_field_data_get_mask_name(orig_field_data, i) != NULL)?
1764 ((int)strlen(yac_field_data_get_mask_name(orig_field_data, i))+1):0;
1765 YAC_ASSERT(
1766 data_available_flag[i + counts[0]] ==
1767 mask_name_len,
1768 "ERROR(field_data_init): inconsistent mask names")
1769 }
1770
1771 for (size_t i = 0; i < counts[1]; ++i)
1772 YAC_ASSERT(
1773 data_available_flag[i + 2 * counts[0]] ==
1774 (yac_field_data_get_coordinates_data(orig_field_data, i) != NULL),
1775 "ERROR(field_data_init): inconsistent availability of coordinates")
1776
1777 for (uint64_t i = 0; i < max_counts[0]; ++i) {
1778 int * dist_mask = NULL;
1779 if (data_available_flag[i]) {
1780 dist_mask = xmalloc(dist_size * sizeof(*dist_mask));
1781 int const * orig_mask =
1782 (orig_field_data != NULL)?
1783 yac_field_data_get_mask_data(orig_field_data, i):NULL;
1784 xt_redist_s_exchange1(redist_mask, orig_mask, dist_mask);
1785 }
1786
1787 int mask_name_len = data_available_flag[i + max_counts[0]];
1788 char * mask_name = NULL;
1789 if (mask_name_len > 0) {
1790 mask_name = xmalloc((size_t)mask_name_len * sizeof(*mask_name));
1791 if ((orig_field_data != NULL) &&
1792 (yac_field_data_get_mask_name(orig_field_data, i) != NULL))
1793 memcpy(
1794 mask_name, yac_field_data_get_mask_name(orig_field_data, i),
1795 (size_t)mask_name_len);
1796 else
1797 memset(mask_name, 0, (size_t)mask_name_len * sizeof(*mask_name));
1799 MPI_Allreduce(
1800 MPI_IN_PLACE, mask_name, mask_name_len, MPI_CHAR, MPI_MAX, comm),
1801 comm);
1802 YAC_ASSERT(
1803 (orig_field_data == NULL) ||
1804 (yac_field_data_get_mask_name(orig_field_data, i) == NULL) ||
1805 !memcmp(
1806 yac_field_data_get_mask_name(orig_field_data, i), mask_name,
1807 (size_t)mask_name_len * sizeof(*mask_name)),
1808 "ERROR(field_data_init): inconsistent mask names")
1809 }
1810 yac_field_data_add_mask_nocpy(dist_field_data, dist_mask, mask_name);
1811 }
1812
1813 for (uint64_t i = 0; i < max_counts[1]; ++i) {
1814 yac_coordinate_pointer dist_coordinates = NULL;
1815 if (data_available_flag[i + 2 * max_counts[0]]) {
1816 dist_coordinates = xmalloc(dist_size * sizeof(*dist_coordinates));
1817 yac_const_coordinate_pointer orig_coordinates =
1818 (orig_field_data != NULL)?
1819 yac_field_data_get_coordinates_data(orig_field_data, i):NULL;
1820 xt_redist_s_exchange1(redist_coords, orig_coordinates, dist_coordinates);
1821 }
1822 yac_field_data_add_coordinates_nocpy(dist_field_data, dist_coordinates);
1823 }
1824
1825 free(data_available_flag);
1826
1827 return dist_field_data;
1828}
1829
1831 struct yac_basic_grid * grid,
1832 struct remote_point_infos * dist_vertex_infos, size_t num_vertices,
1833 MPI_Comm comm, MPI_Datatype dt_coord, int * vertex_ranks,
1834 yac_coordinate_pointer * vertex_coordinates_, int ** vertex_owner_,
1835 struct yac_field_data ** vertex_field_data_) {
1836
1837 struct yac_basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
1838
1839 // generate a yaxt exchange map user -> YAC decomposition
1840 Xt_xmap xmap = generate_xmap_data(dist_vertex_infos, num_vertices, comm);
1841
1842 // generate redistributing objects for vertex data
1843 Xt_redist redist_vertex_coords = xt_redist_p2p_new(xmap, dt_coord);
1844 Xt_redist redist_vertex_int = xt_redist_p2p_new(xmap, MPI_INT);
1845 xt_xmap_delete(xmap);
1846
1847 // get vertex coordinates
1850 xt_redist_s_exchange1(
1851 redist_vertex_coords, grid_data->vertex_coordinates, vertex_coordinates);
1852
1853 // get owners of all vertices
1854 int * vertex_owner = xmalloc(num_vertices * sizeof(*vertex_owner));
1855 xt_redist_s_exchange1(redist_vertex_int, vertex_ranks, vertex_owner);
1856
1857 // get field data
1858 struct yac_field_data * vertex_field_data =
1860 yac_basic_grid_get_field_data(grid, YAC_LOC_CORNER), num_vertices,
1861 redist_vertex_int, redist_vertex_coords, comm);
1862
1863 xt_redist_delete(redist_vertex_int);
1864 xt_redist_delete(redist_vertex_coords);
1865
1866 *vertex_coordinates_ = vertex_coordinates;
1867 *vertex_owner_ = vertex_owner;
1868 *vertex_field_data_ = vertex_field_data;
1869}
1870
1872 struct yac_basic_grid * grid,
1873 struct remote_point_infos * dist_edge_infos, size_t num_edges, MPI_Comm comm,
1874 MPI_Datatype dt_coord, size_t num_vertices, yac_int * sorted_vertex_ids,
1875 size_t * sorted_vertex_reorder_idx,
1876 yac_size_t_2_pointer * edge_to_vertex_, enum yac_edge_type ** edge_type_,
1877 struct yac_field_data ** edge_field_data_) {
1878
1879 struct yac_basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
1880
1881 // generate a yaxt exchange map user -> YAC decomposition
1882 Xt_xmap xmap = generate_xmap_data(dist_edge_infos, num_edges, comm);
1883
1884 // generate redistributing objects for edge data
1885 MPI_Datatype dt_2yac_int;
1886 yac_mpi_call(MPI_Type_contiguous(2, yac_int_dt, &dt_2yac_int), comm);
1887 yac_mpi_call(MPI_Type_commit(&dt_2yac_int), comm);
1888 Xt_redist redist_edge_int = xt_redist_p2p_new(xmap, MPI_INT);
1889 Xt_redist redist_edge_coords = xt_redist_p2p_new(xmap, dt_coord);
1890 Xt_redist redist_edge_2yac_int = xt_redist_p2p_new(xmap, dt_2yac_int);
1891 yac_mpi_call(MPI_Type_free(&dt_2yac_int), comm);
1892 xt_xmap_delete(xmap);
1893
1894 enum yac_edge_type * edge_type = xmalloc(num_edges * sizeof(*edge_type));
1895 { // get edge types
1896 int * temp_edge_type_src, * temp_edge_type_dst;
1897 if (sizeof(*edge_type) == sizeof(int)) {
1898 temp_edge_type_src = (int*)(grid_data->edge_type);
1899 temp_edge_type_dst = (int*)edge_type;
1900 } else {
1901 temp_edge_type_src =
1902 xmalloc(grid_data->num_edges * sizeof(*temp_edge_type_src));
1903 for (size_t i = 0; i < grid_data->num_edges; ++i)
1904 temp_edge_type_src[i] = (int)(grid_data->edge_type[i]);
1905 temp_edge_type_dst = xmalloc(num_edges * sizeof(*temp_edge_type_dst));
1906 for (size_t i = 0; i < num_edges; ++i)
1907 temp_edge_type_dst[i] = (int)(edge_type[i]);
1908 }
1909
1910 xt_redist_s_exchange1(
1911 redist_edge_int, temp_edge_type_src, temp_edge_type_dst);
1912
1913 if (sizeof(*edge_type) != sizeof(int)) {
1914
1915 for (size_t i = 0; i < num_edges; ++i)
1916 edge_type[i] = (enum yac_edge_type)(temp_edge_type_dst[i]);
1917
1918 free(temp_edge_type_src);
1919 free(temp_edge_type_dst);
1920 }
1921 }
1922
1924 xmalloc(num_edges * sizeof(*edge_to_vertex));
1925 { // get edge to vertex
1926 yac_int * vertex_id_buffer =
1927 xmalloc(
1928 2 * (grid_data->num_edges + num_edges) * sizeof(*vertex_id_buffer));
1929 yac_int * grid_edge_vertex_ids = vertex_id_buffer;
1930 yac_int * edge_vertex_ids = vertex_id_buffer + 2 * grid_data->num_edges;
1931
1932 size_t * grid_edge_to_vertex = &(grid_data->edge_to_vertex[0][0]);
1933
1934 for (size_t i = 0; i < 2 * grid_data->num_edges; ++i)
1935 grid_edge_vertex_ids[i] =
1936 grid_data->vertex_ids[grid_edge_to_vertex[i]];
1937
1938 xt_redist_s_exchange1(
1939 redist_edge_2yac_int, grid_edge_vertex_ids, edge_vertex_ids);
1940
1941 id2idx(
1942 "redistribute_edge_data", edge_vertex_ids,
1943 &(edge_to_vertex[0][0]), 2 * num_edges,
1944 sorted_vertex_ids, sorted_vertex_reorder_idx, num_vertices);
1945
1946 free(vertex_id_buffer);
1947 }
1948
1949 // get field data
1950 struct yac_field_data * edge_field_data =
1953 redist_edge_int, redist_edge_coords, comm);
1954
1955 xt_redist_delete(redist_edge_2yac_int);
1956 xt_redist_delete(redist_edge_coords);
1957 xt_redist_delete(redist_edge_int);
1958
1959 *edge_to_vertex_ = edge_to_vertex;
1960 *edge_type_ = edge_type;
1961 *edge_field_data_ = edge_field_data;
1962}
1963
1965 struct yac_basic_grid * grid,
1966 struct remote_point_infos * dist_cell_infos, size_t num_cells, MPI_Comm comm,
1967 MPI_Datatype dt_coord,
1968 size_t num_edges, yac_int * sorted_edge_ids,
1969 size_t * sorted_edge_reorder_idx,
1970 size_t num_vertices, yac_int * sorted_vertex_ids,
1971 size_t * sorted_vertex_reorder_idx, int max_num_vertices_per_cell,
1972 size_t ** cell_to_vertex_, size_t ** cell_to_edge_,
1973 int ** num_vertices_per_cell_, struct yac_field_data ** cell_field_data_) {
1974
1975 struct yac_basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
1976
1977 // generate a yaxt exchange map user -> YAC decomposition
1978 Xt_xmap xmap = generate_xmap_data(dist_cell_infos, num_cells, comm);
1979
1980 // generate redistributing objects for edge data
1981 MPI_Datatype dt_yac_ints;
1983 MPI_Type_contiguous(
1984 max_num_vertices_per_cell, yac_int_dt, &dt_yac_ints), comm);
1985 yac_mpi_call(MPI_Type_commit(&dt_yac_ints), comm);
1986 Xt_redist redist_cell_int = xt_redist_p2p_new(xmap, MPI_INT);
1987 Xt_redist redist_cell_yac_ints = xt_redist_p2p_new(xmap, dt_yac_ints);
1988 Xt_redist redist_cell_coords = xt_redist_p2p_new(xmap, dt_coord);
1989 yac_mpi_call(MPI_Type_free(&dt_yac_ints), comm);
1990 xt_xmap_delete(xmap);
1991
1992 size_t * cell_to_vertex;
1993 size_t * cell_to_edge;
1994 int * num_vertices_per_cell = NULL;
1995 { // get connectivity data
1996 yac_int * id_buffer =
1997 xmalloc(
1998 (size_t)max_num_vertices_per_cell *
1999 (grid_data->num_cells + num_cells) * sizeof(*id_buffer));
2000 yac_int * grid_id_buffer =
2001 id_buffer + (size_t)max_num_vertices_per_cell * num_cells;
2002 size_t total_num_ids = 0;
2003
2004 size_t grid_num_cells = grid_data->num_cells;
2005 int * grid_num_ve_per_cell = grid_data->num_vertices_per_cell;
2006 yac_int * grid_ids = grid_data->vertex_ids;
2007 size_t * grid_cell_to_ve = grid_data->cell_to_vertex;
2008 size_t * grid_cell_to_ve_offests = grid_data->cell_to_vertex_offsets;
2009 size_t ** cell_to_ve_ = &cell_to_vertex;
2010 yac_int * sorted_ve_ids = sorted_vertex_ids;
2011 size_t * sorted_ve_reorder_idx = sorted_vertex_reorder_idx;
2012 size_t num_ve = num_vertices;
2013 int compact_flag;
2014
2015 for (int location = 0; location < 2; ++location) {
2016
2017 // prepare send buffer
2018 for (size_t i = 0, k = 0; i < grid_num_cells; ++i) {
2019 int curr_num_ve_per_cell = grid_num_ve_per_cell[i];
2020 size_t * curr_cell_to_ve = grid_cell_to_ve + grid_cell_to_ve_offests[i];
2021 for (int j = 0; j < curr_num_ve_per_cell; ++j, ++k)
2022 grid_id_buffer[k] = grid_ids[curr_cell_to_ve[j]];
2023 for (int j = curr_num_ve_per_cell; j < max_num_vertices_per_cell; ++j, ++k)
2024 grid_id_buffer[k] = XT_INT_MAX;
2025 }
2026
2027 // exchange data
2028 xt_redist_s_exchange1(
2029 redist_cell_yac_ints, grid_id_buffer, id_buffer);
2030
2031 if (num_vertices_per_cell == NULL) {
2032 total_num_ids = 0;
2033 compact_flag = 0;
2035 for (size_t i = 0; i < num_cells; ++i) {
2036 int vertex_count;
2038 id_buffer + i * (size_t)max_num_vertices_per_cell;
2039 for (vertex_count = 0; vertex_count < max_num_vertices_per_cell;
2040 ++vertex_count)
2041 if (vertex_ids[vertex_count] == XT_INT_MAX) break;
2042 compact_flag |= vertex_count != max_num_vertices_per_cell;
2043 num_vertices_per_cell[i] = vertex_count;
2044 total_num_ids += (size_t)vertex_count;
2045 }
2046 }
2047
2048 // compact data if necessary
2049 if (compact_flag) {
2050 for (size_t i = 0, j = 0; j < total_num_ids; ++i) {
2051 yac_int curr_id = id_buffer[i];
2052 if (curr_id != XT_INT_MAX) {
2053 id_buffer[j] = curr_id;
2054 ++j;
2055 }
2056 }
2057 }
2058
2059 // lookup local ids
2060 size_t * cell_to_ve =
2061 ((*cell_to_ve_ = xmalloc(total_num_ids * sizeof(*cell_to_ve))));
2062 id2idx(
2063 "redistribute_cell_data", id_buffer, cell_to_ve, total_num_ids,
2064 sorted_ve_ids, sorted_ve_reorder_idx, num_ve);
2065
2066 // switch to edge
2067 grid_ids = grid_data->edge_ids;
2068 grid_cell_to_ve = grid_data->cell_to_edge;
2069 grid_cell_to_ve_offests = grid_data->cell_to_edge_offsets;
2070 cell_to_ve_ = &cell_to_edge;
2071 sorted_ve_ids = sorted_edge_ids;
2072 sorted_ve_reorder_idx = sorted_edge_reorder_idx;
2073 num_ve = num_edges;
2074 }
2075
2076 free(id_buffer);
2077 }
2078
2079 // get field data
2080 struct yac_field_data * cell_field_data =
2083 redist_cell_int, redist_cell_coords, comm);
2084
2085 xt_redist_delete(redist_cell_coords);
2086 xt_redist_delete(redist_cell_yac_ints);
2087 xt_redist_delete(redist_cell_int);
2088
2089 *cell_to_vertex_ = cell_to_vertex;
2090 *cell_to_edge_ = cell_to_edge;
2091 *num_vertices_per_cell_ = num_vertices_per_cell;
2092 *cell_field_data_ = cell_field_data;
2093}
2094
2096 size_t num_cells, int max_num_vertices_per_cell, int * num_vertices_per_cell,
2097 size_t * cell_to_vertex, size_t * cell_to_vertex_offsets,
2098 yac_coordinate_pointer vertex_coordinates,
2099 size_t * cell_to_edge, size_t * cell_to_edge_offsets,
2100 enum yac_edge_type * edge_type) {
2101
2102 struct bounding_circle * cell_bnd_circles =
2103 xmalloc(num_cells * sizeof(*cell_bnd_circles));
2104 {
2105 struct yac_grid_cell cell;
2106 cell.coordinates_xyz = xmalloc((size_t)max_num_vertices_per_cell *
2107 sizeof(*(cell.coordinates_xyz)));
2108 cell.edge_type = xmalloc((size_t)max_num_vertices_per_cell *
2109 sizeof(*(cell.edge_type)));
2110
2111 for (size_t i = 0; i < num_cells; ++i) {
2112 size_t * curr_cell_to_vertex =
2113 cell_to_vertex + cell_to_vertex_offsets[i];
2114 size_t * curr_cell_to_edge =
2115 cell_to_edge + cell_to_edge_offsets[i];
2116 for (int j = 0; j < num_vertices_per_cell[i]; ++j) {
2117 yac_coordinate_pointer curr_vertex_coords =
2118 vertex_coordinates + curr_cell_to_vertex[j];
2119 for (int k = 0; k < 3; ++k)
2120 cell.coordinates_xyz[j][k] = (*curr_vertex_coords)[k];
2121 cell.edge_type[j] = edge_type[curr_cell_to_edge[j]];
2122 }
2123 cell.num_corners = num_vertices_per_cell[i];
2124 if (cell.num_corners > 0)
2125 yac_get_cell_bounding_circle(cell, cell_bnd_circles + i);
2126 else
2127 cell_bnd_circles[i] =
2128 (struct bounding_circle) {
2129 .base_vector = {1.0, 0.0, 0.0},
2130 .inc_angle = SIN_COS_ZERO,
2131 .sq_crd = DBL_MAX};
2132 }
2133 free(cell.edge_type);
2134 free(cell.coordinates_xyz);
2135 }
2136
2137 return cell_bnd_circles;
2138}
2139
2140// compute the global maximum number of vertices per cell
2141// (this is requied by various operations that require the exchange of
2142// cell data; using varying number of vertices per cell would make
2143// these exchanges much more complicated)
2145 struct yac_basic_grid_data * grid_data, MPI_Comm comm) {
2146
2147 size_t num_cells = grid_data->num_cells;
2148 int * num_vertices_per_cell = grid_data->num_vertices_per_cell;
2149 int * core_cell_mask = grid_data->core_cell_mask;
2150
2151 int max_num_vertices_per_cell = 0;
2152
2153 // if there is a core mask for cells
2154 if (core_cell_mask) {
2155 for (size_t i = 0; i < num_cells; ++i)
2156 if (core_cell_mask[i] &&
2157 (num_vertices_per_cell[i] > max_num_vertices_per_cell))
2158 max_num_vertices_per_cell = num_vertices_per_cell[i];
2159 } else {
2160 for (size_t i = 0; i < num_cells; ++i)
2161 if (num_vertices_per_cell[i] > max_num_vertices_per_cell)
2162 max_num_vertices_per_cell = num_vertices_per_cell[i];
2163 }
2164
2166 MPI_Allreduce(
2167 MPI_IN_PLACE, &max_num_vertices_per_cell, 1, MPI_INT, MPI_MAX, comm),
2168 comm);
2169
2170 return max_num_vertices_per_cell;
2171}
2172
2174 struct proc_sphere_part_node * proc_sphere_part,
2175 struct yac_basic_grid * grid, MPI_Comm comm) {
2176
2177 struct yac_basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
2178
2179 int comm_rank;
2180 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2181
2182 // compute the global maximum number of vertices per cell
2183 int max_num_vertices_per_cell =
2184 get_max_num_vertices_per_cell(grid_data, comm);
2185
2186 // check core masks for consistency
2187 // (should contain no valid cell/edge connected to an invalid edge/vertex)
2188 // and generate it if required
2189 check_core_masks(grid);
2190
2191 int * vertex_ranks; // for each vertex in the user-provided grid data this
2192 // array contains the owner in the YAC internal
2193 // decomposition
2194
2195 // generate global ids for cells/vertices/edges (if they are missing)
2197 proc_sphere_part, grid, &vertex_ranks, max_num_vertices_per_cell, comm);
2198
2199 // generate owner information for all cell/vertices/edges that may be
2200 // assigned to the local process in the YAC internal decomposition
2201 // (dist owners are sorted by global ids)
2202 struct remote_point_infos * dist_point_infos[3];
2203 yac_int * dist_global_ids[3];
2204 size_t dist_count[3];
2206 proc_sphere_part, grid, vertex_ranks, max_num_vertices_per_cell, comm,
2207 dist_point_infos, dist_global_ids, dist_count);
2208 size_t num_cells = dist_count[YAC_LOC_CELL];
2209 size_t num_vertices = dist_count[YAC_LOC_CORNER];
2210 size_t num_edges = dist_count[YAC_LOC_EDGE];
2211 yac_int * cell_ids = dist_global_ids[YAC_LOC_CELL];
2212 yac_int * vertex_ids = dist_global_ids[YAC_LOC_CORNER];
2213 yac_int * edge_ids = dist_global_ids[YAC_LOC_EDGE];
2214
2215 // generate sorted ids (the initial data is already sorted, which makes this
2216 // a simple copy)
2217 yac_int * sorted_cell_ids, * sorted_vertex_ids, * sorted_edge_ids;
2218 size_t * sorted_cell_reorder_idx, * sorted_vertex_reorder_idx,
2219 * sorted_edge_reorder_idx;
2221 cell_ids, num_cells, &sorted_cell_ids, &sorted_cell_reorder_idx);
2223 vertex_ids, num_vertices, &sorted_vertex_ids,
2224 &sorted_vertex_reorder_idx);
2226 edge_ids, num_edges, &sorted_edge_ids, &sorted_edge_reorder_idx);
2227
2228 MPI_Datatype dt_coord = yac_get_coordinate_mpi_datatype(comm);
2229
2230 // redistribute vertex information from user to
2231 // YAC internal decomposition
2232 yac_coordinate_pointer vertex_coordinates;
2233 int * vertex_owner;
2234 struct yac_field_data * vertex_field_data;
2236 grid, dist_point_infos[YAC_LOC_CORNER], num_vertices,
2237 comm, dt_coord, vertex_ranks,
2238 &vertex_coordinates, &vertex_owner, &vertex_field_data);
2239 free(vertex_ranks);
2240
2241 // redistribute edge information from user to
2242 // YAC internal decomposition
2243 yac_size_t_2_pointer edge_to_vertex;
2244 enum yac_edge_type * edge_type;
2245 struct yac_field_data * edge_field_data;
2247 grid, dist_point_infos[YAC_LOC_EDGE], num_edges, comm, dt_coord,
2248 num_vertices, sorted_vertex_ids, sorted_vertex_reorder_idx,
2249 &edge_to_vertex, &edge_type, &edge_field_data);
2250
2251 // redistribute cell information from user to
2252 // YAC internal decomposition
2253 size_t * cell_to_vertex;
2254 size_t * cell_to_edge;
2255 int * num_vertices_per_cell;
2256 struct yac_field_data * cell_field_data;
2258 grid, dist_point_infos[YAC_LOC_CELL], num_cells, comm, dt_coord,
2259 num_edges, sorted_edge_ids, sorted_edge_reorder_idx,
2260 num_vertices, sorted_vertex_ids, sorted_vertex_reorder_idx,
2261 max_num_vertices_per_cell, &cell_to_vertex, &cell_to_edge,
2262 &num_vertices_per_cell, &cell_field_data);
2263
2264 yac_mpi_call(MPI_Type_free(&dt_coord), comm);
2265
2266 // compute two support arrays
2267 size_t * cell_to_vertex_offsets =
2268 xmalloc(num_cells * sizeof(*cell_to_vertex_offsets));
2269 size_t * cell_to_edge_offsets = cell_to_vertex_offsets;
2270 for (size_t i = 0, accu = 0; i < num_cells; ++i) {
2271 cell_to_vertex_offsets[i] = accu;
2272 accu += (size_t)(num_vertices_per_cell[i]);
2273 }
2274
2275 struct yac_dist_grid dist_grid =
2276 {.comm = comm,
2277 .vertex_coordinates = vertex_coordinates,
2278 .ids = {cell_ids, vertex_ids, edge_ids},
2279 .total_count = {num_cells, num_vertices, num_edges},
2280 .count = {num_cells, num_vertices, num_edges},
2281 .num_vertices_per_cell = num_vertices_per_cell,
2282 .cell_to_vertex = cell_to_vertex,
2283 .cell_to_vertex_offsets = cell_to_vertex_offsets,
2284 .cell_to_edge = cell_to_edge,
2285 .cell_to_edge_offsets = cell_to_edge_offsets,
2286 .edge_to_vertex = edge_to_vertex,
2287 .cell_bnd_circles =
2289 num_cells, max_num_vertices_per_cell, num_vertices_per_cell,
2292 .edge_type = edge_type,
2293 .owner_mask = {NULL, NULL, NULL},
2294 .sorted_ids = {sorted_cell_ids, sorted_vertex_ids, sorted_edge_ids},
2295 .sorted_reorder_idx =
2296 {sorted_cell_reorder_idx, sorted_vertex_reorder_idx,
2297 sorted_edge_reorder_idx},
2298 .field_data =
2300 cell_field_data, vertex_field_data, edge_field_data),
2301 .owners =
2302 {dist_point_infos[YAC_LOC_CELL],
2303 dist_point_infos[YAC_LOC_CORNER],
2304 dist_point_infos[YAC_LOC_EDGE]}};
2305
2306 // mask sure that each cell/vertex/edge is valid only on one process
2307 generate_owner_masks(&dist_grid, comm_rank, vertex_owner);
2308
2309 return dist_grid;
2310}
2311
2312static void setup_search_data(struct yac_dist_grid_pair * dist_grid_pair) {
2313
2314 // build sphere part for vertices
2315 for (int i = 0; i < 2; ++i)
2316 dist_grid_pair->vertex_sphere_part[i] =
2318 dist_grid_pair->dist_grid[i].count[YAC_LOC_CORNER],
2320 (dist_grid_pair->dist_grid[i].vertex_coordinates),
2321 dist_grid_pair->dist_grid[i].ids[YAC_LOC_CORNER]);
2322
2323 // build sphere part for bounding circle of cells
2324 for (int i = 0; i < 2; ++i) {
2325 dist_grid_pair->cell_sphere_part[i] =
2327 dist_grid_pair->dist_grid[i].cell_bnd_circles,
2328 dist_grid_pair->dist_grid[i].count[YAC_LOC_CELL]);
2329 }
2330}
2331
2333 struct yac_basic_grid * grid_a, struct yac_basic_grid * grid_b,
2334 MPI_Comm comm) {
2335
2336 struct yac_basic_grid_data * grid_data[2] =
2338
2339 // gather cell centers from both grids into single array
2340 size_t num_vertices = grid_data[0]->num_vertices + grid_data[1]->num_vertices;
2341 struct dist_vertex * vertices = xmalloc(num_vertices * sizeof(*vertices));
2342 for (size_t i = 0, k = 0; i < 2; ++i) {
2343 yac_coordinate_pointer vertex_coordinates = grid_data[i]->vertex_coordinates;
2344 for (size_t j = 0; j < grid_data[i]->num_vertices; ++j, ++k)
2345 memcpy(
2346 vertices[k].coord, vertex_coordinates[j], sizeof(*vertex_coordinates));
2347 }
2348
2349 // redistribute all cell centers and build parallel sphere
2350 // part for vertices
2351 struct proc_sphere_part_node * proc_sphere_part =
2352 yac_redistribute_vertices(&vertices, &num_vertices, comm);
2353
2354 free(vertices);
2355
2356 return proc_sphere_part;
2357}
2358
2360 struct yac_basic_grid * grid_a, struct yac_basic_grid * grid_b,
2361 MPI_Comm comm) {
2362
2363 YAC_ASSERT(
2364 grid_a,
2365 "ERROR(yac_dist_grid_pair_new): "
2366 "NULL is not a valid value for parameter grid_a")
2367 YAC_ASSERT(
2368 grid_b,
2369 "ERROR(yac_dist_grid_pair_new): "
2370 "NULL is not a valid value for parameter grid_b")
2371
2372 YAC_ASSERT(
2373 strcmp(yac_basic_grid_get_name(grid_a),
2374 yac_basic_grid_get_name(grid_b)),
2375 "ERROR(yac_dist_grid_pair_new): identical grid names")
2376
2377 MPI_Comm comm_copy;
2378 yac_mpi_call(MPI_Comm_dup(comm, &comm_copy), comm);
2379 comm = comm_copy;
2380
2381 // ensure same grid ordering on all processes
2382 if (strcmp(yac_basic_grid_get_name(grid_a),
2383 yac_basic_grid_get_name(grid_b)) > 0) {
2384 struct yac_basic_grid * grid_swap = grid_a;
2385 grid_a = grid_b;
2386 grid_b = grid_swap;
2387 }
2388
2389 struct yac_dist_grid_pair * grid_pair = xmalloc(1 * sizeof(*grid_pair));
2390
2391 grid_pair->grid_names[0] = strdup(yac_basic_grid_get_name(grid_a));
2392 grid_pair->grid_names[1] = strdup(yac_basic_grid_get_name(grid_b));
2393 grid_pair->comm = comm;
2394
2395 // generate a decomposition for the distributed grid pair
2396 // with the following properties:
2397 // * each process covers a unique area on the sphere
2398 // * number of cells (from grid_a and/or grid_b) per area
2399 // is roughly the same
2400 grid_pair->proc_sphere_part =
2401 generate_dist_grid_decomposition(grid_a, grid_b, comm);
2402
2403 // redistribute grid_a and grid_b according to decomposition
2404 grid_pair->dist_grid[0] =
2405 generate_dist_grid(grid_pair->proc_sphere_part, grid_a, comm);
2406 grid_pair->dist_grid[1] =
2407 generate_dist_grid(grid_pair->proc_sphere_part, grid_b, comm);
2408
2409 // build search data structures for local cells and vertices in
2410 // distributed grid
2411 setup_search_data(grid_pair);
2412
2413 return grid_pair;
2414}
2415
2417 struct yac_basic_grid * grid_a, struct yac_basic_grid * grid_b,
2418 MPI_Fint comm) {
2419
2420 return
2421 yac_dist_grid_pair_new(grid_a, grid_b, MPI_Comm_f2c(comm));
2422}
2423
2425 struct yac_dist_grid_pair * grid_pair) {
2426 return grid_pair->comm;
2427}
2428
2430 struct yac_dist_grid_pair * grid_pair, char const * grid_name) {
2431
2432 struct yac_dist_grid * dist_grid = NULL;
2433 for (int i = 0; (i < 2) && (dist_grid == NULL); ++i)
2434 if (!strcmp(grid_name, grid_pair->grid_names[i]))
2435 dist_grid = &(grid_pair->dist_grid[i]);
2436 YAC_ASSERT(
2437 dist_grid, "ERROR(yac_dist_grid_pair_get_dist_grid): invalid grid_name")
2438 return dist_grid;
2439}
2440
2442 struct yac_dist_grid * dist_grid) {
2443
2444 return (struct yac_const_basic_grid_data *)dist_grid;
2445}
2446
2447// return the number of cells/vertex/edges in the distributed directoy that
2448// are owned by the local process
2450 struct yac_dist_grid * dist_grid, enum yac_location location) {
2451
2452 CHECK_LOCATION("yac_dist_grid_get_local_count")
2453 size_t local_count = 0;
2454 size_t count = dist_grid->count[location];
2455 int * owner_mask = dist_grid->owner_mask[location];
2456 for (size_t i = 0; i < count; ++i) if (owner_mask[i]) ++local_count;
2457 return local_count;
2458}
2459
2460// returns the current number of cells/vertices/edges in the local
2461// part of the distributed directory (may increase over time, if the local
2462// has to be extended in order to contain external search results)
2464 struct yac_dist_grid * dist_grid, enum yac_location location) {
2465
2466 CHECK_LOCATION("yac_dist_grid_get_total_count")
2467 size_t * total_count = dist_grid->total_count;
2468 return total_count[location];
2469}
2470
2471// returns the original number of cells/vertices/edges in the local
2472// part of the distributed directory
2474 struct yac_dist_grid * dist_grid, enum yac_location location) {
2475
2476 CHECK_LOCATION("yac_dist_grid_get_count")
2477 size_t * count = dist_grid->count;
2478 return count[location];
2479}
2480
2482 struct yac_dist_grid * dist_grid, enum yac_location location) {
2483
2484 CHECK_LOCATION("yac_dist_grid_get_owner_mask")
2485 int ** owner_mask = dist_grid->owner_mask;
2486 return owner_mask[location];
2487}
2488
2490 struct yac_dist_grid * dist_grid, enum yac_location location) {
2491
2492 CHECK_LOCATION("yac_dist_grid_get_global_ids")
2493 yac_int ** ids = dist_grid->ids;
2494 return ids[location];
2495}
2496
2497// returns local ids of all points in the distributed directory of the
2498// provided fields that are owned by the local process and are not masked
2499// out by the field
2501 struct yac_dist_grid * dist_grid, struct yac_interp_field field,
2502 size_t ** indices, size_t * num_indices) {
2503
2504 int const * field_mask = yac_dist_grid_get_field_mask(dist_grid, field);
2505
2506 size_t count = yac_dist_grid_get_count(dist_grid, field.location);
2507 int const * owner_mask =
2508 yac_dist_grid_get_owner_mask(dist_grid, field.location);
2509
2510 size_t * temp_indices = xmalloc(count * sizeof(*temp_indices));
2511
2512 size_t num_indices_ = 0;
2513
2514 if (field_mask != NULL) {
2515 for (size_t i = 0; i < count; ++i)
2516 if (owner_mask[i] && field_mask[i]) temp_indices[num_indices_++] = i;
2517 } else {
2518 for (size_t i = 0; i < count; ++i)
2519 if (owner_mask[i]) temp_indices[num_indices_++] = i;
2520 }
2521
2522 *indices = xrealloc(temp_indices, num_indices_ * sizeof(**indices));
2523 *num_indices = num_indices_;
2524}
2525
2527 struct yac_dist_grid * dist_grid, enum yac_location location) {
2528
2529 return
2530 yac_field_data_set_get_field_data(dist_grid->field_data, location);
2531}
2532
2534 struct yac_dist_grid * dist_grid, struct yac_interp_field field) {
2535
2536 if (field.masks_idx == SIZE_MAX) return NULL;
2537
2538 return
2539 (field.masks_idx != SIZE_MAX)?
2541 yac_dist_grid_get_field_data(dist_grid, field.location),
2542 field.masks_idx):NULL;
2543}
2544
2546 struct yac_dist_grid * dist_grid, struct yac_interp_field field) {
2547
2549 (field.coordinates_idx != SIZE_MAX)?
2551 yac_dist_grid_get_field_data(dist_grid, field.location),
2552 field.coordinates_idx):NULL;
2553
2554 // if no field coordinates are defined, but the location is at the corners of
2555 // of the grid cells, return coordinates of them
2556 return
2557 ((coords != NULL) || (field.location != YAC_LOC_CORNER))?
2558 coords:((yac_const_coordinate_pointer)(dist_grid->vertex_coordinates));
2559}
2560
2562 struct yac_dist_grid * dist_grid, struct yac_interp_field field) {
2563
2564 size_t count =
2565 yac_dist_grid_get_count(dist_grid, field.location);
2566
2567 int const * field_mask = yac_dist_grid_get_field_mask(dist_grid, field);
2568 if (field_mask == NULL)
2569 return yac_dist_grid_get_local_count(dist_grid, field.location);
2570
2571 int const * owner_mask =
2572 yac_dist_grid_get_owner_mask(dist_grid, field.location);
2573
2574 size_t unmasked_local_count = 0;
2575 for (size_t i = 0; i < count; ++i)
2576 if (owner_mask[i] && field_mask[i]) ++unmasked_local_count;
2577 return unmasked_local_count;
2578}
2579
2581 struct remote_point_infos * point_infos, size_t count) {
2582
2583 for (size_t i = 0; i < count; ++i)
2584 if (point_infos[i].count > 1) free(point_infos[i].data.multi);
2585 free(point_infos);
2586}
2587
2588static void yac_dist_grid_free(struct yac_dist_grid grid) {
2589
2590 free(grid.vertex_coordinates);
2591 free(grid.num_vertices_per_cell);
2592 free(grid.cell_to_vertex);
2593 free(grid.cell_to_vertex_offsets);
2594 free(grid.cell_to_edge);
2595 free(grid.cell_bnd_circles);
2596 free(grid.edge_type);
2597 free(grid.edge_to_vertex);
2598 for (int i = 0; i < 3; ++i) {
2599 free(grid.ids[i]);
2600 free(grid.owner_mask[i]);
2601 free(grid.sorted_ids[i]);
2602 free(grid.sorted_reorder_idx[i]);
2604 }
2606}
2607
2609
2610 if (grid_pair == NULL) return;
2611 free(grid_pair->grid_names[0]);
2612 free(grid_pair->grid_names[1]);
2613 yac_mpi_call(MPI_Comm_free(&(grid_pair->comm)), MPI_COMM_WORLD);
2615 for (int i = 0; i < 2; ++i) {
2616 yac_dist_grid_free(grid_pair->dist_grid[i]);
2619 }
2620 free(grid_pair);
2621}
2622
2623static int coord_in_cell(
2624 double coord[3], struct yac_dist_grid * dist_grid,
2625 size_t cell_idx, struct yac_grid_cell * buffer_cell) {
2626
2628 (struct yac_const_basic_grid_data *)dist_grid, cell_idx, buffer_cell);
2629
2630 return
2632 coord, *buffer_cell, dist_grid->cell_bnd_circles[cell_idx]);
2633}
2634
2636 double coord[3], struct yac_dist_grid * dist_grid,
2637 size_t cell_idx, struct yac_grid_cell * buffer_cell) {
2638
2640 (struct yac_const_basic_grid_data *)dist_grid, cell_idx, buffer_cell);
2641 for (size_t i = 0; i < buffer_cell->num_corners; ++i)
2642 buffer_cell->edge_type[i] = YAC_GREAT_CIRCLE_EDGE;
2643
2644 return
2646 coord, *buffer_cell, dist_grid->cell_bnd_circles[cell_idx]);
2647}
2648
2650 struct yac_const_basic_grid_data * grid_data, size_t cell_idx,
2651 struct yac_grid_cell * buffer_cell) {
2652
2653 size_t num_vertices = (size_t)(grid_data->num_vertices_per_cell[cell_idx]);
2654
2655 struct yac_grid_cell cell = *buffer_cell;
2656
2657 if (cell.array_size < num_vertices) {
2658 cell.coordinates_xyz =
2659 xrealloc(cell.coordinates_xyz, num_vertices *
2660 sizeof(*(cell.coordinates_xyz)));
2661 cell.edge_type = xrealloc(cell.edge_type, num_vertices *
2662 sizeof(*(cell.edge_type)));
2663 cell.array_size = num_vertices;
2664 *buffer_cell = cell;
2665 }
2666
2667 for (size_t i = 0; i < num_vertices; ++i) {
2668 size_t vertex_idx =
2669 grid_data->cell_to_vertex[
2670 grid_data->cell_to_vertex_offsets[cell_idx] + i];
2671 cell.coordinates_xyz[i][0] = grid_data->vertex_coordinates[vertex_idx][0];
2672 cell.coordinates_xyz[i][1] = grid_data->vertex_coordinates[vertex_idx][1];
2673 cell.coordinates_xyz[i][2] = grid_data->vertex_coordinates[vertex_idx][2];
2674 size_t edge_idx =
2675 grid_data->cell_to_edge[grid_data->cell_to_edge_offsets[cell_idx] + i];
2676 cell.edge_type[i] = grid_data->edge_type[edge_idx];
2677 }
2678 buffer_cell->num_corners = num_vertices;
2679}
2680
2682 struct yac_dist_grid_pair * grid_pair, char const * grid_name) {
2683
2684 struct bnd_sphere_part_search * search = NULL;
2685
2686 for (int i = 0; (i < 2) && (search == NULL); ++i)
2687 if (!strcmp(grid_name, grid_pair->grid_names[i]))
2688 search = grid_pair->cell_sphere_part[i];
2689 YAC_ASSERT(
2690 search != NULL,
2691 "ERROR(yac_dist_grid_pair_get_cell_sphere_part): invalid grid_name")
2692 return search;
2693}
2694
2696 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
2697 yac_coordinate_pointer search_coords, size_t count, size_t * cells,
2698 int (*coord_in_cell)(
2699 double coord[3], struct yac_dist_grid * dist_grid, size_t cell_idx,
2700 struct yac_grid_cell * buffer_cell)) {
2701
2702 struct bnd_sphere_part_search * cell_sphere_part =
2703 dist_grid_pair_get_cell_sphere_part(grid_pair, grid_name);
2704 struct yac_dist_grid * dist_grid =
2705 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_name);
2706
2707 size_t * temp_cells;
2708 size_t * num_cells_per_coord =
2709 xmalloc(count * sizeof(*num_cells_per_coord));
2710
2711 // search for all matching source cells
2713 cell_sphere_part, search_coords, count, &temp_cells,
2714 num_cells_per_coord);
2715
2716 struct yac_grid_cell buffer_cell;
2717 yac_init_grid_cell(&buffer_cell);
2718
2719 // if we have multiple source cells for a single search coordinate, get the
2720 // source cell with the lowest global id
2721 for (size_t i = 0, k = 0; i < count; ++i) {
2722 size_t curr_num_cells = num_cells_per_coord[i];
2723 if (curr_num_cells == 0) {
2724 cells[i] = SIZE_MAX;
2725 } else if (curr_num_cells == 1) {
2726 if (coord_in_cell(
2727 search_coords[i], dist_grid, temp_cells[k], &buffer_cell))
2728 cells[i] = temp_cells[k];
2729 else
2730 cells[i] = SIZE_MAX;
2731 ++k;
2732 } else {
2733 size_t cell_idx = SIZE_MAX;
2734 yac_int cell_id = XT_INT_MAX;
2735 for (size_t j = 0; j < curr_num_cells; ++j, ++k) {
2736 size_t curr_cell_idx = temp_cells[k];
2737 yac_int curr_cell_id = dist_grid->ids[YAC_LOC_CELL][curr_cell_idx];
2738 if (!coord_in_cell(
2739 search_coords[i], dist_grid, curr_cell_idx, &buffer_cell))
2740 continue;
2741 if (curr_cell_id < cell_id) {
2742 cell_idx = curr_cell_idx;
2743 cell_id = curr_cell_id;
2744 }
2745 }
2746 cells[i] = cell_idx;
2747 }
2748 }
2749
2750 yac_free_grid_cell(&buffer_cell);
2751 free(num_cells_per_coord);
2752 free(temp_cells);
2753}
2754
2756 struct yac_dist_grid * dist_grid, enum yac_location location,
2757 struct single_remote_point_reorder * ids, size_t * count, size_t * idx) {
2758
2759 CHECK_LOCATION("lookup_single_remote_point_reorder_locally")
2760
2761 yac_int * sorted_ids = dist_grid->sorted_ids[location];
2762 size_t * reorder_idx = dist_grid->sorted_reorder_idx[location];
2763 size_t num_ids = dist_grid->total_count[location];
2764
2765 size_t count_ = *count;
2766 size_t new_count = 0;
2767
2768 // sort ids by global ids
2769 qsort(ids, count_, sizeof(*ids),
2771
2772 for (size_t i = 0, j = 0; i < count_; ++i) {
2773 yac_int curr_id = ids[i].data.global_id;
2774 while ((j < num_ids) && (sorted_ids[j] < curr_id)) ++j;
2775 if ((j < num_ids) && (sorted_ids[j] == curr_id)) {
2776 idx[ids[i].reorder_idx] = reorder_idx[j];
2777 } else {
2778 if (i != new_count) ids[new_count] = ids[i];
2779 ++new_count;
2780 }
2781 }
2782
2783 *count = new_count;
2784}
2785
2787 struct yac_field_data * field_data, MPI_Comm comm) {
2788
2789 int pack_size_field_coord, pack_size_field_mask;
2790
2792 MPI_Pack_size(3, MPI_DOUBLE, comm, &pack_size_field_coord), comm);
2793 pack_size_field_coord *=
2794 (int)yac_field_data_get_coordinates_count(field_data);
2795
2797 MPI_Pack_size(1, MPI_INT, comm, &pack_size_field_mask), comm);
2798 pack_size_field_mask *=
2799 (int)yac_field_data_get_masks_count(field_data);
2800
2801 return pack_size_field_coord + pack_size_field_mask;
2802}
2803
2805 struct yac_field_data * cell_field_data,
2806 MPI_Datatype bnd_circle_dt, MPI_Comm comm) {
2807
2808 int pack_size_id,
2809 pack_size_num_vertices,
2810 pack_size_bnd_circle;
2811
2812 // id
2813 yac_mpi_call(MPI_Pack_size(1, yac_int_dt, comm, &pack_size_id), comm);
2814 // num_vertices
2815 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_num_vertices), comm);
2816 // bounding circle
2818 MPI_Pack_size(1, bnd_circle_dt, comm, &pack_size_bnd_circle), comm);
2819
2820 return pack_size_id + pack_size_num_vertices + pack_size_bnd_circle +
2821 get_pack_size_field_data(cell_field_data, comm);
2822}
2823
2825 struct yac_field_data * vertex_field_data, MPI_Comm comm) {
2826
2827 int pack_size_id,
2828 pack_size_vertex_coords;
2829 // id
2830 yac_mpi_call(MPI_Pack_size(1, yac_int_dt, comm, &pack_size_id), comm);
2831 // vertex coordinates
2833 MPI_Pack_size(3, MPI_DOUBLE, comm, &pack_size_vertex_coords), comm);
2834
2835 return pack_size_id + pack_size_vertex_coords +
2836 get_pack_size_field_data(vertex_field_data, comm);
2837}
2838
2840 struct yac_field_data * edge_field_data, MPI_Comm comm) {
2841
2842 int pack_size_id,
2843 pack_size_edge_to_vertex,
2844 pack_size_edge_type;
2845 // id
2846 yac_mpi_call(MPI_Pack_size(1, yac_int_dt, comm, &pack_size_id), comm);
2847 // edge type
2848 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_edge_type), comm);
2849 // edge vertex ids
2851 MPI_Pack_size(2, yac_int_dt, comm, &pack_size_edge_to_vertex), comm);
2852
2853 return pack_size_id + pack_size_edge_type + pack_size_edge_to_vertex +
2854 get_pack_size_field_data(edge_field_data, comm);
2855}
2856
2858 struct yac_dist_grid * dist_grid, uint64_t * pos, size_t count,
2859 int * pack_sizes, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
2860 MPI_Comm comm) {
2861
2862 int pack_size_base_cell =
2865 dist_grid->field_data, YAC_LOC_CELL),
2866 bnd_circle_dt, comm);
2867 int pack_size_base_vertex =
2870 dist_grid->field_data, YAC_LOC_CORNER), comm);
2871 int pack_size_base_edge =
2874 dist_grid->field_data, YAC_LOC_EDGE), comm);
2875
2876 for (size_t i = 0; i < count; ++i) {
2877 size_t idx = (size_t)(pos[i]);
2878 int num_vertices = dist_grid->num_vertices_per_cell[idx];
2879 size_t * curr_vertices =
2880 dist_grid->cell_to_vertex + dist_grid->cell_to_vertex_offsets[idx];
2881 size_t * curr_edges =
2882 dist_grid->cell_to_edge + dist_grid->cell_to_edge_offsets[idx];
2883 int pack_size =
2884 pack_size_base_cell +
2885 num_vertices * (pack_size_base_vertex + pack_size_base_edge) +
2887 dist_grid->owners[YAC_LOC_CELL] + idx, point_info_dt, comm);
2888 for (int j = 0; j < num_vertices; ++j) {
2889 pack_size +=
2891 dist_grid->owners[YAC_LOC_CORNER] + curr_vertices[j],
2892 point_info_dt, comm) +
2894 dist_grid->owners[YAC_LOC_EDGE] + curr_edges[j], point_info_dt, comm);
2895 }
2896 pack_sizes[i] = pack_size;
2897 }
2898}
2899
2901 struct yac_dist_grid * dist_grid, uint64_t * pos, size_t count,
2902 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
2903
2904 int pack_size_base_vertex =
2907 dist_grid->field_data, YAC_LOC_CORNER), comm);
2908 for (size_t i = 0; i < count; ++i)
2909 pack_sizes[i] =
2910 pack_size_base_vertex +
2912 dist_grid->owners[YAC_LOC_CORNER] + pos[i], point_info_dt, comm);
2913}
2914
2916 struct yac_dist_grid * dist_grid, uint64_t * pos, size_t count,
2917 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
2918
2919 int pack_size_base_vertex =
2922 dist_grid->field_data, YAC_LOC_CORNER), comm);
2923 int pack_size_base_edge =
2926 dist_grid->field_data, YAC_LOC_EDGE), comm);
2927 for (size_t i = 0; i < count; ++i) {
2928 size_t * curr_vertices = dist_grid->edge_to_vertex[pos[i]];
2929 pack_sizes[i] =
2930 pack_size_base_edge +
2931 2 * pack_size_base_vertex +
2933 dist_grid->owners[YAC_LOC_EDGE] + pos[i], point_info_dt, comm) +
2935 dist_grid->owners[YAC_LOC_CORNER] + curr_vertices[0],
2936 point_info_dt, comm) +
2938 dist_grid->owners[YAC_LOC_CORNER] + curr_vertices[1],
2939 point_info_dt, comm);
2940 }
2941}
2942
2943static void get_pack_sizes(
2944 struct yac_dist_grid * dist_grid, enum yac_location location, uint64_t * pos,
2945 size_t count, int * pack_sizes, MPI_Datatype bnd_circle_dt,
2946 MPI_Datatype point_info_dt, MPI_Comm comm) {
2947
2948 CHECK_LOCATION("get_pack_sizes")
2949
2950 switch(location) {
2951 default:
2952 case(YAC_LOC_CELL):
2954 dist_grid, pos, count, pack_sizes, bnd_circle_dt, point_info_dt, comm);
2955 break;
2956 case(YAC_LOC_CORNER):
2958 dist_grid, pos, count, pack_sizes, point_info_dt, comm);
2959 break;
2960 case(YAC_LOC_EDGE):
2962 dist_grid, pos, count, pack_sizes, point_info_dt, comm);
2963 break;
2964 };
2965}
2966
2968 size_t idx, void * buffer, int buffer_size, int * position,
2969 struct yac_field_data * field_data, MPI_Comm comm) {
2970
2971 size_t coordinates_count =
2973 size_t masks_count =
2975
2976 // coordinates
2977 for (size_t i = 0; i < coordinates_count; ++i)
2979 MPI_Pack(
2980 yac_field_data_get_coordinates_data(field_data, i)[idx],
2981 3, MPI_DOUBLE, buffer, buffer_size, position, comm), comm);
2982
2983 // masks
2984 for (size_t i = 0; i < masks_count; ++i)
2986 MPI_Pack(
2987 yac_field_data_get_mask_data(field_data, i) + idx, 1, MPI_INT, buffer,
2988 buffer_size, position, comm), comm);
2989}
2990
2992 struct yac_dist_grid * dist_grid, size_t idx, void * buffer, int buffer_size,
2993 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
2994 MPI_Comm comm) {
2995
2996 UNUSED(bnd_circle_dt);
2997
2998 // id
3000 MPI_Pack(dist_grid->ids[YAC_LOC_CORNER] + idx, 1, yac_int_dt, buffer, buffer_size,
3001 position, comm), comm);
3002 // vertex coordinates
3004 MPI_Pack(&(dist_grid->vertex_coordinates[idx][0]), 3, MPI_DOUBLE, buffer,
3005 buffer_size, position, comm), comm);
3006 // vertex owner
3008 dist_grid->owners[YAC_LOC_CORNER] + idx, buffer, buffer_size, position,
3009 point_info_dt, comm);
3010 // pack field data
3012 idx, buffer, buffer_size, position,
3014}
3015
3017 struct yac_dist_grid * dist_grid, size_t idx, void * buffer, int buffer_size,
3018 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3019 MPI_Comm comm) {
3020
3021 UNUSED(bnd_circle_dt);
3022
3023 int edge_type = (int)(dist_grid->edge_type[idx]);
3024
3025 // id
3027 MPI_Pack(
3028 dist_grid->ids[YAC_LOC_EDGE] + idx, 1, yac_int_dt, buffer, buffer_size, position,
3029 comm), comm);
3030 // edge type
3032 MPI_Pack(
3033 &edge_type, 1, MPI_INT, buffer, buffer_size, position, comm), comm);
3034 // edge to vertex
3035 yac_int edge_to_vertex[2] = {
3036 dist_grid->ids[YAC_LOC_CORNER][dist_grid->edge_to_vertex[idx][0]],
3037 dist_grid->ids[YAC_LOC_CORNER][dist_grid->edge_to_vertex[idx][1]]};
3039 MPI_Pack(
3040 edge_to_vertex, 2, yac_int_dt, buffer, buffer_size, position, comm),
3041 comm);
3042 // edge owner
3044 dist_grid->owners[YAC_LOC_EDGE] + idx, buffer, buffer_size, position,
3045 point_info_dt, comm);
3046 // pack field data
3048 idx, buffer, buffer_size, position,
3050}
3051
3053 struct yac_dist_grid * dist_grid, size_t idx, void * buffer, int buffer_size,
3054 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3055 MPI_Comm comm) {
3056
3058 dist_grid, idx, buffer, buffer_size, position,
3059 bnd_circle_dt, point_info_dt, comm);
3060
3061 // pack edge vertices
3062 for (int i = 0; i < 2; ++i)
3064 dist_grid, dist_grid->edge_to_vertex[idx][i],
3065 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3066}
3067
3069 struct yac_dist_grid * dist_grid, size_t idx, void * buffer, int buffer_size,
3070 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3071 MPI_Comm comm) {
3072
3073 int num_vertices = dist_grid->num_vertices_per_cell[idx];
3074
3075 // id
3077 MPI_Pack(
3078 dist_grid->ids[YAC_LOC_CELL] + idx, 1, yac_int_dt, buffer, buffer_size, position,
3079 comm), comm);
3080 // pack field data
3082 idx, buffer, buffer_size, position,
3084 // num_vertices
3086 MPI_Pack(&num_vertices, 1, MPI_INT, buffer,
3087 buffer_size, position, comm), comm);
3088 // bounding_circle
3090 MPI_Pack(dist_grid->cell_bnd_circles + idx, 1, bnd_circle_dt, buffer,
3091 buffer_size, position, comm), comm);
3092 // cell owner
3094 dist_grid->owners[YAC_LOC_CELL] + idx, buffer, buffer_size, position,
3095 point_info_dt, comm);
3096
3097 for (int i = 0; i < num_vertices; ++i) {
3099 dist_grid,
3100 dist_grid->cell_to_vertex[dist_grid->cell_to_vertex_offsets[idx] + i],
3101 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3103 dist_grid,
3104 dist_grid->cell_to_edge[dist_grid->cell_to_edge_offsets[idx] + i],
3105 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3106 }
3107}
3108
3109static void pack_grid_data(
3110 struct yac_dist_grid * dist_grid, enum yac_location location, uint64_t * pos,
3111 size_t count, void ** pack_data, int * pack_sizes,
3112 MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
3113
3114 get_pack_sizes(dist_grid, location, pos, count, pack_sizes,
3115 bnd_circle_dt, point_info_dt, comm);
3116
3117 size_t pack_size = 0;
3118 for (size_t i = 0; i < count; ++i) pack_size += (size_t)(pack_sizes[i]);
3119
3120 void * pack_data_ = xmalloc(pack_size);
3121
3122 CHECK_LOCATION("pack_grid_data")
3123
3124 void (*func_pack[3])(
3125 struct yac_dist_grid * dist_grid, size_t idx, void * buffer,
3126 int buffer_size, int * position, MPI_Datatype bnd_circle_dt,
3127 MPI_Datatype point_info_dt, MPI_Comm comm) =
3129
3130 for (size_t i = 0, offset = 0; i < count; ++i) {
3131 int position = 0;
3132 func_pack[location](
3133 dist_grid, pos[i], (char*)pack_data_ + offset, pack_sizes[i],
3134 &position, bnd_circle_dt, point_info_dt, comm);
3135 pack_sizes[i] = position;
3136 offset += (size_t)position;
3137 }
3138
3139 *pack_data = pack_data_;
3140}
3141
3143 void * buffer, int buffer_size, int * position, size_t idx,
3144 struct temp_field_data temp_field_data, MPI_Comm comm) {
3145
3146 for (size_t i = 0; i < temp_field_data.coordinates_count; ++i)
3148 MPI_Unpack(
3149 buffer, buffer_size, position, temp_field_data.coordinates[i][idx],
3150 3, MPI_DOUBLE, comm), comm);
3151
3152 for (size_t i = 0; i < temp_field_data.masks_count; ++i)
3154 MPI_Unpack(buffer, buffer_size, position,
3155 temp_field_data.masks[i] + idx, 1, MPI_INT, comm), comm);
3156}
3157
3159 struct global_vertex_reorder * vertex, size_t idx, void * buffer,
3160 int buffer_size, int * position,
3161 struct temp_field_data temp_vertex_field_data,
3162 MPI_Datatype point_info_dt, MPI_Comm comm) {
3163
3164 // id
3166 MPI_Unpack(buffer, buffer_size, position, &(vertex[idx].global_id), 1,
3167 yac_int_dt, comm), comm);
3168 // vertex coordinates
3170 MPI_Unpack(buffer, buffer_size, position, &(vertex[idx].coord[0]), 3,
3171 MPI_DOUBLE, comm), comm);
3172 // vertex owners
3174 buffer, buffer_size, position, &(vertex[idx].owners), point_info_dt, comm);
3175 // unpack field data
3177 buffer, buffer_size, position, idx, temp_vertex_field_data, comm);
3178}
3179
3181 struct global_edge_reorder * edge, size_t idx, void * buffer,
3182 int buffer_size, int * position,
3183 struct temp_field_data temp_edge_field_data,
3184 MPI_Datatype point_info_dt, MPI_Comm comm) {
3185
3186 int edge_type;
3187
3188 // id
3190 MPI_Unpack(buffer, buffer_size, position, &(edge[idx].global_id), 1,
3191 yac_int_dt, comm), comm);
3192 // edge type
3194 MPI_Unpack(buffer, buffer_size, position, &edge_type, 1,
3195 MPI_INT, comm), comm);
3196 edge[idx].edge_type = (enum yac_edge_type)edge_type;
3197 // edge to vertex
3199 MPI_Unpack(buffer, buffer_size, position, edge[idx].edge_to_vertex, 2,
3200 yac_int_dt, comm), comm);
3201 // edge owners
3202 yac_remote_point_infos_unpack(buffer, buffer_size, position,
3203 &(edge[idx].owners), point_info_dt, comm);
3204 // unpack field data
3206 buffer, buffer_size, position, idx, temp_edge_field_data, comm);
3207}
3208
3210 const void * a, const void * b) {
3211
3212 return (((const struct global_vertex_reorder *)a)->global_id >
3213 ((const struct global_vertex_reorder *)b)->global_id) -
3214 (((const struct global_vertex_reorder *)a)->global_id <
3215 ((const struct global_vertex_reorder *)b)->global_id);
3216}
3217
3218static void add_field_data(
3219 struct yac_field_data * field_data, struct temp_field_data temp_field_data,
3220 void * reorder_idx, size_t reorder_idx_size,
3221 size_t old_count, size_t new_count) {
3222
3223 size_t add_count = new_count - old_count;
3224
3225 for (size_t i = 0; i < temp_field_data.masks_count; ++i) {
3226 int * temp_mask = temp_field_data.masks[i];
3227 int * mask =
3228 xrealloc(
3229 (void*)yac_field_data_get_mask_data(field_data, i),
3230 new_count * sizeof(*mask));
3231 yac_field_data_set_mask_data(field_data, i, mask);
3232 for (size_t i = 0, j = old_count; i < add_count; ++i, ++j) {
3233 size_t idx =
3234 *(size_t*)((unsigned char*)reorder_idx + i * reorder_idx_size);
3235 mask[j] = temp_mask[idx];
3236 }
3237 }
3238
3239 for (size_t i = 0; i < temp_field_data.coordinates_count; ++i) {
3241 yac_coordinate_pointer coordinates =
3242 xrealloc(
3243 (void*)yac_field_data_get_coordinates_data(field_data, i),
3244 new_count * sizeof(*coordinates));
3245 yac_field_data_set_coordinates_data(field_data, i, coordinates);
3246 for (size_t i = 0, j = old_count; i < add_count; ++i, ++j) {
3247 size_t idx =
3248 *(size_t*)((unsigned char*)reorder_idx + i * reorder_idx_size);
3249 coordinates[j][0] = temp_coordinates[idx][0];
3250 coordinates[j][1] = temp_coordinates[idx][1];
3251 coordinates[j][2] = temp_coordinates[idx][2];
3252 }
3253 }
3254}
3255
3257 struct remote_point_infos * point_infos) {
3258
3259 if (point_infos->count > 1) free(point_infos->data.multi);
3260}
3261
3263 struct yac_dist_grid * dist_grid, struct global_vertex_reorder * vertices,
3264 size_t count, size_t * idx,
3265 struct temp_field_data temp_vertex_field_data) {
3266
3267 if (count == 0) return;
3268
3269 // sort vertices global ids
3270 qsort(vertices, count, sizeof(*vertices),
3272
3273 yac_int * sorted_vertex_ids =
3274 dist_grid->sorted_ids[YAC_LOC_CORNER];
3275 size_t * sorted_vertex_reorder_idx =
3277
3278 yac_int prev_global_id = vertices[0].global_id - 1;
3279 size_t prev_idx = 0;
3280 size_t add_count = 0;
3281 size_t num_total_vertices = dist_grid->total_count[YAC_LOC_CORNER];
3282
3283 // determine which vertices need to be added to local data
3284 for (size_t i = 0, j = 0; i < count; ++i) {
3285
3286 yac_int curr_global_id = vertices[i].global_id;
3287 size_t curr_reorder_idx = vertices[i].reorder_idx;
3288
3289 // if the current global id is a duplicate
3290 if (prev_global_id == curr_global_id) {
3291 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3293 continue;
3294 }
3295 prev_global_id = curr_global_id;
3296
3297 // check whether the current global id is already part of the local
3298 // grid data
3299 while ((j < num_total_vertices) && (sorted_vertex_ids[j] < curr_global_id))
3300 ++j;
3301
3302 // if we found a match in the local data
3303 if ((j < num_total_vertices) && (sorted_vertex_ids[j] == curr_global_id)) {
3304
3305 if (idx != NULL) idx[curr_reorder_idx] = sorted_vertex_reorder_idx[j];
3306 prev_idx = sorted_vertex_reorder_idx[j];
3308
3309 // if we need to add the current vertex to the local data
3310 } else {
3311
3312 if (idx != NULL) idx[curr_reorder_idx] = num_total_vertices + add_count;
3313 prev_idx = num_total_vertices + add_count;
3314 if (add_count != i) vertices[add_count] = vertices[i];
3315 ++add_count;
3316 }
3317 }
3318
3319 size_t new_num_total_vertices = num_total_vertices + add_count;
3320 yac_coordinate_pointer vertex_coordinates =
3321 xrealloc(dist_grid->vertex_coordinates, new_num_total_vertices *
3322 sizeof(*vertex_coordinates));
3323 yac_int * vertex_ids =
3324 xrealloc(dist_grid->ids[YAC_LOC_CORNER],
3325 new_num_total_vertices * sizeof(*vertex_ids));
3326 int * vertex_owner_mask =
3327 xrealloc(dist_grid->owner_mask[YAC_LOC_CORNER], new_num_total_vertices *
3328 sizeof(*vertex_owner_mask));
3329 struct remote_point_infos * vertex_owners =
3330 xrealloc(dist_grid->owners[YAC_LOC_CORNER], new_num_total_vertices *
3331 sizeof(*vertex_owners));
3332 sorted_vertex_ids =
3333 xrealloc(
3334 sorted_vertex_ids, new_num_total_vertices * sizeof(*sorted_vertex_ids));
3335 sorted_vertex_reorder_idx =
3336 xrealloc(
3337 sorted_vertex_reorder_idx, new_num_total_vertices *
3338 sizeof(*sorted_vertex_reorder_idx));
3339
3340 // add the selected vertices to the local grid data
3341 for (size_t i = 0, j = num_total_vertices; i < add_count; ++i, ++j) {
3342
3343 vertex_coordinates[j][0] = vertices[i].coord[0];
3344 vertex_coordinates[j][1] = vertices[i].coord[1];
3345 vertex_coordinates[j][2] = vertices[i].coord[2];
3346 vertex_ids[j] = vertices[i].global_id;
3347 vertex_owner_mask[j] = 0;
3348 vertex_owners[j] = vertices[i].owners;
3349 sorted_vertex_ids[j] = vertices[i].global_id;
3350 sorted_vertex_reorder_idx[j] = j;
3351 }
3352 // add field data
3355 temp_vertex_field_data, vertices, sizeof(*vertices),
3356 num_total_vertices, new_num_total_vertices);
3358 sorted_vertex_ids, new_num_total_vertices, sorted_vertex_reorder_idx);
3359
3360 dist_grid->vertex_coordinates = vertex_coordinates;
3361 dist_grid->ids[YAC_LOC_CORNER] = vertex_ids;
3362 dist_grid->owner_mask[YAC_LOC_CORNER] = vertex_owner_mask;
3363 dist_grid->owners[YAC_LOC_CORNER] = vertex_owners;
3364 dist_grid->sorted_ids[YAC_LOC_CORNER] = sorted_vertex_ids;
3365 dist_grid->sorted_reorder_idx[YAC_LOC_CORNER] = sorted_vertex_reorder_idx;
3366 dist_grid->total_count[YAC_LOC_CORNER] = new_num_total_vertices;
3367}
3368
3370 const void * a, const void * b) {
3371
3372 return (((const struct global_edge_reorder *)a)->global_id >
3373 ((const struct global_edge_reorder *)b)->global_id) -
3374 (((const struct global_edge_reorder *)a)->global_id <
3375 ((const struct global_edge_reorder *)b)->global_id);
3376}
3377
3379 struct yac_dist_grid * dist_grid, struct global_edge_reorder * edges,
3380 size_t count, size_t * idx, struct temp_field_data temp_edge_field_data) {
3381
3382 if (count == 0) return;
3383
3384 // sort edges global ids
3385 qsort(edges, count, sizeof(*edges), compare_global_edge_reorder_global_id);
3386
3387 yac_int * sorted_edge_ids = dist_grid->sorted_ids[YAC_LOC_EDGE];
3388 size_t * sorted_edge_reorder_idx = dist_grid->sorted_reorder_idx[YAC_LOC_EDGE];
3389
3390 yac_int prev_global_id = edges[0].global_id - 1;
3391 size_t prev_idx = 0;
3392 size_t add_count = 0;
3393 size_t num_total_edges = dist_grid->total_count[YAC_LOC_EDGE];
3394
3395 // determine which edges need to be added to local data
3396 for (size_t i = 0, j = 0; i < count; ++i) {
3397
3398 yac_int curr_global_id = edges[i].global_id;
3399 size_t curr_reorder_idx = edges[i].reorder_idx;
3400
3401 // if the current global id is a duplicate
3402 if (prev_global_id == curr_global_id) {
3403 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3405 continue;
3406 }
3407 prev_global_id = curr_global_id;
3408
3409 // check whether the current global id is already part of the local
3410 // grid data
3411 while ((j < num_total_edges) && (sorted_edge_ids[j] < curr_global_id)) ++j;
3412
3413 // if we found a match in the local data
3414 if ((j < num_total_edges) && (sorted_edge_ids[j] == curr_global_id)) {
3415
3416 if (idx != NULL) idx[curr_reorder_idx] = sorted_edge_reorder_idx[j];
3417 prev_idx = sorted_edge_reorder_idx[j];
3419
3420 // if we need to add the current edge to the local data
3421 } else {
3422
3423 if (idx != NULL) idx[curr_reorder_idx] = num_total_edges + add_count;
3424 prev_idx = num_total_edges + add_count;
3425 if (add_count != i) edges[add_count] = edges[i];
3426 ++add_count;
3427 }
3428 }
3429
3430 size_t new_num_total_edges = num_total_edges + add_count;
3431 yac_int * edge_ids =
3432 xrealloc(dist_grid->ids[YAC_LOC_EDGE],
3433 new_num_total_edges * sizeof(*edge_ids));
3434 enum yac_edge_type * edge_type =
3435 xrealloc(dist_grid->edge_type,
3436 new_num_total_edges * sizeof(*edge_type));
3438 xrealloc(dist_grid->edge_to_vertex,
3439 new_num_total_edges * sizeof(*edge_to_vertex));
3440 struct remote_point_infos * edge_owners =
3441 xrealloc(dist_grid->owners[YAC_LOC_EDGE],
3442 new_num_total_edges * sizeof(*edge_owners));
3443 int * edge_owner_mask =
3444 xrealloc(dist_grid->owner_mask[YAC_LOC_EDGE], new_num_total_edges *
3445 sizeof(*edge_owner_mask));
3446 sorted_edge_ids =
3447 xrealloc(
3448 sorted_edge_ids, new_num_total_edges * sizeof(*sorted_edge_ids));
3449 sorted_edge_reorder_idx =
3450 xrealloc(
3451 sorted_edge_reorder_idx, new_num_total_edges *
3452 sizeof(*sorted_edge_reorder_idx));
3453
3454 yac_int * vertex_ids = xmalloc(2 * add_count * sizeof(*vertex_ids));
3455 size_t * reorder = xmalloc(2 * add_count * sizeof(*reorder));
3456
3457 // add the selected edges to the local grid data
3458 for (size_t i = 0, j = num_total_edges; i < add_count; ++i, ++j) {
3459
3460 edge_ids[j] = edges[i].global_id;
3461 edge_type[j] = edges[i].edge_type;
3462 edge_owner_mask[j] = 0;
3463 edge_owners[j] = edges[i].owners;
3464 sorted_edge_ids[j] = edges[i].global_id;
3465 sorted_edge_reorder_idx[j] = j;
3466
3467 vertex_ids[2 * i + 0] = edges[i].edge_to_vertex[0];
3468 vertex_ids[2 * i + 1] = edges[i].edge_to_vertex[1];
3469 reorder[2 * i + 0] = 2 * num_total_edges + 2 * i + 0;
3470 reorder[2 * i + 1] = 2 * num_total_edges + 2 * i + 1;
3471 }
3472 // add field data
3475 temp_edge_field_data, edges, sizeof(*edges),
3476 num_total_edges, new_num_total_edges);
3478 sorted_edge_ids, new_num_total_edges, sorted_edge_reorder_idx);
3479
3480 { // determine vertex indices for edge_to_vertex
3481 yac_quicksort_index_yac_int_size_t(vertex_ids, 2 * add_count, reorder);
3482 yac_int * sorted_vertex_ids = dist_grid->sorted_ids[YAC_LOC_CORNER];
3483 size_t * sorted_vertex_reorder_idx =
3485 size_t total_num_vertices = dist_grid->total_count[YAC_LOC_CORNER];
3486 size_t * edge_to_vertex_ = (size_t*)&(edge_to_vertex[0][0]);
3487 // lookup global ids
3488 for (size_t i = 0, j = 0; i < 2 * add_count; ++i) {
3489 yac_int curr_id = vertex_ids[i];
3490 while ((j < total_num_vertices) && (sorted_vertex_ids[j] < curr_id)) ++j;
3491 YAC_ASSERT(
3492 (j < total_num_vertices) && (sorted_vertex_ids[j] == curr_id),
3493 "ERROR(yac_dist_grid_add_edges): vertex id not found")
3494 edge_to_vertex_[reorder[i]] = sorted_vertex_reorder_idx[j];
3495 }
3496 }
3497
3498 free(vertex_ids);
3499 free(reorder);
3500
3501 dist_grid->ids[YAC_LOC_EDGE] = edge_ids;
3502 dist_grid->edge_type = edge_type;
3503 dist_grid->edge_to_vertex = edge_to_vertex;
3504 dist_grid->owners[YAC_LOC_EDGE] = edge_owners;
3505 dist_grid->owner_mask[YAC_LOC_EDGE] = edge_owner_mask;
3506 dist_grid->sorted_ids[YAC_LOC_EDGE] = sorted_edge_ids;
3507 dist_grid->sorted_reorder_idx[YAC_LOC_EDGE] = sorted_edge_reorder_idx;
3508 dist_grid->total_count[YAC_LOC_EDGE] = new_num_total_edges;
3509}
3510
3512 struct yac_dist_grid * dist_grid, yac_int * cell_ids,
3513 int * num_vertices_per_cell, struct bounding_circle * cell_bnd_circles,
3514 size_t count, size_t * cell_to_vertex, size_t * cell_to_edge,
3515 struct remote_point_infos * cell_owners,
3516 struct temp_field_data temp_cell_field_data) {
3517
3518 if (count == 0) return;
3519
3520 size_t * reorder_idx = xmalloc(count * sizeof(reorder_idx));
3521 for (size_t i = 0; i < count; ++i) reorder_idx[i] = i;
3522
3523 size_t * prescan = xmalloc(count * sizeof(*prescan));
3524 for (size_t i = 0, accu = 0; i < count;
3525 accu += (size_t)(num_vertices_per_cell[i++])) prescan[i] = accu;
3526
3527 // sort cells global ids
3528 yac_quicksort_index_yac_int_size_t(cell_ids, count, reorder_idx);
3529
3530 yac_int * sorted_cell_ids = dist_grid->sorted_ids[YAC_LOC_CELL];
3531 size_t * sorted_cell_reorder_idx =
3532 dist_grid->sorted_reorder_idx[YAC_LOC_CELL];
3533
3534 yac_int prev_global_id = cell_ids[0] - 1;
3535 size_t cell_add_count = 0;
3536 size_t relations_add_count = 0;
3537 size_t num_total_cells = dist_grid->total_count[YAC_LOC_CELL];
3538
3539 // determine which cells need to be added to local data
3540 for (size_t i = 0, j = 0; i < count; ++i) {
3541
3542 yac_int curr_global_id = cell_ids[i];
3543 size_t curr_reorder_idx = reorder_idx[i];
3544
3545 // if the current global id is a duplicate
3546 if (prev_global_id == curr_global_id) {
3547 yac_remote_point_infos_single_free(cell_owners + curr_reorder_idx);
3548 continue;
3549 }
3550 prev_global_id = curr_global_id;
3551
3552 // check whether the current global id is already part of the local
3553 // grid data
3554 while ((j < num_total_cells) && (sorted_cell_ids[j] < curr_global_id)) ++j;
3555
3556 // if we did not find a match in the local data
3557 if ((j >= num_total_cells) || (sorted_cell_ids[j] != curr_global_id)) {
3558
3559 if (cell_add_count != i) {
3560 cell_ids[cell_add_count] = curr_global_id;
3561 reorder_idx[cell_add_count] = curr_reorder_idx;
3562 }
3563 ++cell_add_count;
3564 relations_add_count += (size_t)(num_vertices_per_cell[curr_reorder_idx]);
3565 }
3566 }
3567
3568 size_t new_num_total_cells = num_total_cells + cell_add_count;
3569 size_t num_total_relations =
3570 (num_total_cells > 0)?
3571 (dist_grid->cell_to_vertex_offsets[num_total_cells-1] +
3572 (size_t)(dist_grid->num_vertices_per_cell[num_total_cells-1])):0;
3573 size_t new_num_total_relations = num_total_relations + relations_add_count;
3574 yac_int * new_cell_ids =
3575 xrealloc(dist_grid->ids[YAC_LOC_CELL],
3576 new_num_total_cells * sizeof(*new_cell_ids));
3577 int * new_num_vertices_per_cell =
3578 xrealloc(dist_grid->num_vertices_per_cell, new_num_total_cells *
3579 sizeof(*new_num_vertices_per_cell));
3580 size_t * new_cell_to_vertex =
3581 xrealloc(dist_grid->cell_to_vertex, new_num_total_relations *
3582 sizeof(*new_cell_to_vertex));
3583 size_t * cell_to_vertex_offsets =
3584 xrealloc(dist_grid->cell_to_vertex_offsets, new_num_total_cells *
3585 sizeof(*cell_to_vertex_offsets));
3586 size_t * new_cell_to_edge =
3587 xrealloc(dist_grid->cell_to_edge, new_num_total_relations *
3588 sizeof(*new_cell_to_edge));
3589 struct bounding_circle * new_cell_bnd_circles =
3590 xrealloc(dist_grid->cell_bnd_circles, new_num_total_cells *
3591 sizeof(*new_cell_bnd_circles));
3592 int * cell_owner_mask =
3593 xrealloc(dist_grid->owner_mask[YAC_LOC_CELL],
3594 new_num_total_cells * sizeof(*cell_owner_mask));
3595 struct remote_point_infos * new_cell_owners =
3596 xrealloc(dist_grid->owners[YAC_LOC_CELL],
3597 new_num_total_cells * sizeof(*cell_owners));
3598 sorted_cell_ids =
3599 xrealloc(
3600 sorted_cell_ids, new_num_total_cells * sizeof(*sorted_cell_ids));
3601 sorted_cell_reorder_idx =
3602 xrealloc(
3603 sorted_cell_reorder_idx, new_num_total_cells *
3604 sizeof(*sorted_cell_reorder_idx));
3605
3606 // add the selected cells to the local grid data
3607 for (size_t i = 0, j = num_total_cells; i < cell_add_count;
3608 ++i, ++j) {
3609
3610 size_t curr_reorder_idx = reorder_idx[i];
3611 int curr_num_vertices = num_vertices_per_cell[curr_reorder_idx];
3612 size_t curr_relation_idx = prescan[curr_reorder_idx];
3613
3614 new_cell_ids[j] = cell_ids[i];
3615 new_num_vertices_per_cell[j] = curr_num_vertices;
3616 cell_to_vertex_offsets[j] = num_total_relations;
3617 for (int j = 0; j < curr_num_vertices;
3618 ++j, ++num_total_relations, ++curr_relation_idx) {
3619 new_cell_to_vertex[num_total_relations] =
3620 cell_to_vertex[curr_relation_idx];
3621 new_cell_to_edge[num_total_relations] = cell_to_edge[curr_relation_idx];
3622 }
3623 cell_owner_mask[j] = 0;
3624 sorted_cell_ids[j] = cell_ids[i];
3625 sorted_cell_reorder_idx[j] = j;
3626 new_cell_bnd_circles[j] = cell_bnd_circles[curr_reorder_idx];
3627 new_cell_owners[j] = cell_owners[curr_reorder_idx];
3628 }
3629 // add field data
3632 temp_cell_field_data, reorder_idx, sizeof(*reorder_idx),
3633 num_total_cells, new_num_total_cells);
3635 sorted_cell_ids, new_num_total_cells, sorted_cell_reorder_idx);
3636
3637 dist_grid->ids[YAC_LOC_CELL] = new_cell_ids;
3638 dist_grid->num_vertices_per_cell = new_num_vertices_per_cell;
3639 dist_grid->cell_to_vertex = new_cell_to_vertex;
3640 dist_grid->cell_to_vertex_offsets = cell_to_vertex_offsets;
3641 dist_grid->cell_to_edge = new_cell_to_edge;
3642 dist_grid->cell_to_edge_offsets = cell_to_vertex_offsets;
3643 dist_grid->owner_mask[YAC_LOC_CELL] = cell_owner_mask;
3644 dist_grid->owners[YAC_LOC_CELL] = new_cell_owners;
3645 dist_grid->sorted_ids[YAC_LOC_CELL] = sorted_cell_ids;
3646 dist_grid->sorted_reorder_idx[YAC_LOC_CELL] = sorted_cell_reorder_idx;
3647 dist_grid->cell_bnd_circles = new_cell_bnd_circles;
3648 dist_grid->total_count[YAC_LOC_CELL] = new_num_total_cells;
3649
3650 free(prescan);
3651 free(reorder_idx);
3652}
3653
3655 struct temp_field_data * temp_field_data, size_t size) {
3656
3657 for (size_t i = 0; i < temp_field_data->masks_count; ++i)
3660 for (size_t i = 0; i < temp_field_data->coordinates_count; ++i)
3664}
3665
3667 struct yac_field_data * field_data, size_t count) {
3668
3670 size_t masks_count = yac_field_data_get_masks_count(field_data);
3672
3678 for (size_t i = 0; i < masks_count; ++i) {
3680 xmalloc(count * sizeof(**temp_field_data.masks));
3682 }
3683
3685 xmalloc(
3688 xmalloc(
3692 for (size_t i = 0; i < coordinates_count; ++i) {
3694 xmalloc(count * sizeof(**temp_field_data.coordinates));
3696 }
3697
3698 return temp_field_data;
3699}
3700
3702
3703 for (size_t i = 0; i < temp_field_data.masks_count; ++i)
3704 free(temp_field_data.masks[i]);
3705 free(temp_field_data.masks);
3707 for (size_t i = 0; i < temp_field_data.coordinates_count; ++i)
3711}
3712
3714 struct yac_dist_grid * dist_grid, size_t count, void * buffer,
3715 int buffer_size, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3716 MPI_Comm comm) {
3717
3718 yac_int * cell_ids = xmalloc(count * sizeof(*cell_ids));
3719 int * num_vertices_per_cell = xmalloc(count * sizeof(*num_vertices_per_cell));
3720 struct bounding_circle * cell_bnd_circles =
3721 xmalloc(count * sizeof(*cell_bnd_circles));
3722 struct remote_point_infos * cell_owners =
3723 xmalloc(count * sizeof(*cell_owners));
3724
3725 struct global_vertex_reorder * vertices = NULL;
3726 size_t vertices_array_size = 0;
3727 size_t total_num_vertices = 0;
3728
3729 struct global_edge_reorder * edges = NULL;
3730 size_t edges_array_size = 0;
3731
3732 struct temp_field_data temp_cell_field_data =
3735 dist_grid->field_data, YAC_LOC_CELL), count);
3736 struct temp_field_data temp_vertex_field_data =
3739 dist_grid->field_data, YAC_LOC_CORNER), 3 * count);
3740 struct temp_field_data temp_edge_field_data =
3743 dist_grid->field_data, YAC_LOC_EDGE), 3 * count);
3744
3745 for (size_t i = 0, buffer_offset = 0; i < count; ++i) {
3746
3747 int position = 0;
3748 void * curr_buffer = (char*)buffer + buffer_offset;
3749 int num_vertices;
3750
3751 // cell id
3753 MPI_Unpack(curr_buffer, buffer_size, &position, cell_ids + i, 1,
3754 yac_int_dt, comm), comm);
3755 // unpack field data
3757 curr_buffer, buffer_size, &position, i, temp_cell_field_data, comm);
3758 // num vertices
3760 MPI_Unpack(curr_buffer, buffer_size, &position, &num_vertices, 1,
3761 MPI_INT, comm), comm);
3762 // bounding circle
3764 MPI_Unpack(curr_buffer, buffer_size, &position, cell_bnd_circles + i, 1,
3765 bnd_circle_dt, comm), comm);
3766 // cell owners
3768 curr_buffer, buffer_size, &position, cell_owners + i,
3769 point_info_dt, comm);
3770
3771 num_vertices_per_cell[i] = num_vertices;
3772
3774 vertices, vertices_array_size, total_num_vertices + (size_t)num_vertices);
3776 edges, edges_array_size, total_num_vertices + (size_t)num_vertices);
3778 &temp_vertex_field_data, total_num_vertices + (size_t)num_vertices);
3780 &temp_edge_field_data, total_num_vertices + (size_t)num_vertices);
3781
3782 for (int j = 0; j < num_vertices; ++j, ++total_num_vertices) {
3784 vertices, total_num_vertices, curr_buffer, buffer_size, &position,
3785 temp_vertex_field_data, point_info_dt, comm);
3787 edges, total_num_vertices, curr_buffer, buffer_size, &position,
3788 temp_edge_field_data, point_info_dt, comm);
3789 vertices[total_num_vertices].reorder_idx = total_num_vertices;
3790 edges[total_num_vertices].reorder_idx = total_num_vertices;
3791 }
3792
3793 buffer_offset += (size_t)position;
3794 buffer_size -= position;
3795 }
3796
3797 size_t * cell_to_vertex = xmalloc(total_num_vertices * sizeof(*cell_to_vertex));
3798 size_t * cell_to_edge = xmalloc(total_num_vertices * sizeof(*cell_to_edge));
3799
3801 dist_grid, vertices, total_num_vertices, cell_to_vertex,
3802 temp_vertex_field_data);
3804 dist_grid, edges, total_num_vertices, cell_to_edge,
3805 temp_edge_field_data);
3807 dist_grid, cell_ids, num_vertices_per_cell, cell_bnd_circles, count,
3808 cell_to_vertex, cell_to_edge, cell_owners, temp_cell_field_data);
3809
3810 temp_field_data_free(temp_cell_field_data);
3811 temp_field_data_free(temp_vertex_field_data);
3812 temp_field_data_free(temp_edge_field_data);
3813 free(cell_to_edge);
3814 free(cell_to_vertex);
3815 free(vertices);
3816 free(edges);
3817 free(cell_owners);
3818 free(cell_bnd_circles);
3819 free(num_vertices_per_cell);
3820 free(cell_ids);
3821}
3822
3824 struct yac_dist_grid * dist_grid, size_t count, void * buffer,
3825 int buffer_size, MPI_Datatype point_info_dt, MPI_Comm comm) {
3826
3827 struct global_vertex_reorder * vertices = xmalloc(count * sizeof(*vertices));
3828
3829 struct temp_field_data temp_vertex_field_data =
3832 count);
3833
3834 for (size_t i = 0, buffer_offset = 0; i < count; ++i) {
3835
3836 int position = 0;
3837 void * curr_buffer = (char*)buffer + buffer_offset;
3838
3840 vertices, i, curr_buffer, buffer_size, &position,
3841 temp_vertex_field_data, point_info_dt, comm);
3842 vertices[i].reorder_idx = i;
3843
3844 buffer_offset += (size_t)position;
3845 buffer_size -= position;
3846 }
3847
3849 dist_grid, vertices, count, NULL, temp_vertex_field_data);
3850
3851 temp_field_data_free(temp_vertex_field_data);
3852
3853 free(vertices);
3854}
3855
3857 struct yac_dist_grid * dist_grid, size_t count, void * buffer,
3858 int buffer_size, MPI_Datatype point_info_dt, MPI_Comm comm) {
3859
3860 struct global_edge_reorder * edges = xmalloc(count * sizeof(*edges));
3861 struct global_vertex_reorder * vertices =
3862 xmalloc(2 * count * sizeof(*vertices));
3863
3864 struct temp_field_data temp_edge_field_data =
3867 count);
3868 struct temp_field_data temp_vertex_field_data =
3871 2 * count);
3872
3873 for (size_t i = 0, buffer_offset = 0; i < count; ++i) {
3874
3875 int position = 0;
3876 void * curr_buffer = (char*)buffer + buffer_offset;
3877
3879 edges, i, curr_buffer, buffer_size, &position,
3880 temp_edge_field_data, point_info_dt, comm);
3881 edges[i].reorder_idx = i;
3882
3883 for (size_t j = 0; j < 2; ++j)
3885 vertices, 2 * i + j, curr_buffer, buffer_size, &position,
3886 temp_vertex_field_data, point_info_dt, comm);
3887
3888 buffer_offset += (size_t)position;
3889 buffer_size -= position;
3890 }
3891
3893 dist_grid, vertices, 2 * count, NULL, temp_vertex_field_data);
3895 dist_grid, edges, count, NULL, temp_edge_field_data);
3896
3897 temp_field_data_free(temp_vertex_field_data);
3898 temp_field_data_free(temp_edge_field_data);
3899
3900 free(vertices);
3901 free(edges);
3902}
3903
3905 struct yac_dist_grid * dist_grid, enum yac_location location, size_t count,
3906 void * buffer, int buffer_size, MPI_Datatype bnd_circle_dt,
3907 MPI_Datatype point_info_dt, MPI_Comm comm) {
3908
3909 CHECK_LOCATION("unpack_grid_data")
3910
3911 switch(location) {
3912 default:
3913 case(YAC_LOC_CELL):
3915 dist_grid, count, buffer, buffer_size, bnd_circle_dt,
3916 point_info_dt, comm);
3917 break;
3918 case(YAC_LOC_CORNER):
3920 dist_grid, count, buffer, buffer_size, point_info_dt, comm);
3921 break;
3922 case(YAC_LOC_EDGE):
3924 dist_grid, count, buffer, buffer_size, point_info_dt, comm);
3925 break;
3926 };
3927}
3928
3930 const void * a, const void * b) {
3931
3932 return ((const struct single_remote_point_reorder *)a)->data.data.rank -
3933 ((const struct single_remote_point_reorder *)b)->data.data.rank;
3934}
3935
3937 struct yac_dist_grid * dist_grid, struct single_remote_point * ids,
3938 size_t count, enum yac_location location, size_t * idx) {
3939
3940 MPI_Comm comm = dist_grid->comm;
3941 int comm_rank, comm_size;
3942 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
3943 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
3944
3945 size_t remote_count = 0;
3946
3947 for (size_t i = 0; i < count; ++i) {
3948 if (ids[i].global_id == XT_INT_MAX) idx[i] = SIZE_MAX;
3949 else if (ids[i].data.rank != comm_rank) ++remote_count;
3950 else idx[i] = ids[i].data.orig_pos;
3951 }
3952
3953 struct single_remote_point_reorder * missing_ids =
3954 xmalloc(remote_count * sizeof(*missing_ids));
3955
3956 for (size_t i = 0, j = 0; i < count; ++i) {
3957 if ((ids[i].data.rank != comm_rank) &&
3958 (ids[i].global_id != XT_INT_MAX)) {
3959 missing_ids[j].data = ids[i];
3960 missing_ids[j].reorder_idx = i;
3961 ++j;
3962 }
3963 }
3964
3965 // check whether we already have some of the missing ids locally
3967 dist_grid, location, missing_ids, &remote_count, idx);
3968
3969 // sort data by owner
3970 qsort(missing_ids, remote_count, sizeof(*missing_ids),
3972
3973 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
3975 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3976
3977 for (size_t i = 0; i < remote_count; ++i)
3978 sendcounts[missing_ids[i].data.data.rank]++;
3979
3981 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3982
3983 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
3984
3985 uint64_t * uint64_t_buffer =
3986 xmalloc((remote_count + recv_count) * sizeof(*uint64_t_buffer));
3987 uint64_t * orig_pos_send_buffer = uint64_t_buffer;
3988 uint64_t * orig_pos_recv_buffer = uint64_t_buffer + remote_count;
3989
3990 // pack send buffer
3991 for (size_t i = 0; i < remote_count; ++i) {
3992 int rank = missing_ids[i].data.data.rank;
3993 if (rank != comm_rank)
3994 orig_pos_send_buffer[sdispls[rank+1]++] =
3995 (uint64_t)(missing_ids[i].data.data.orig_pos);
3996 }
3997
3998 // redistribute ids
3999 yac_alltoallv_uint64_p2p(
4000 orig_pos_send_buffer, sendcounts, sdispls,
4001 orig_pos_recv_buffer, recvcounts, rdispls, comm);
4002
4003 MPI_Datatype bnd_circle_dt = yac_get_bounding_circle_mpi_datatype(comm);
4004 yac_mpi_call(MPI_Type_commit(&bnd_circle_dt), comm);
4005 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
4006 yac_mpi_call(MPI_Type_commit(&point_info_dt), comm);
4007
4008 void * packed_send_data = NULL;
4009 int * pack_sizes = xmalloc(recv_count * sizeof(*pack_sizes));
4010
4011 // pack all requested grid data
4013 dist_grid, location, orig_pos_recv_buffer, recv_count,
4014 &packed_send_data, pack_sizes,
4015 bnd_circle_dt, point_info_dt, comm);
4016 free(uint64_t_buffer);
4017
4018 memset(sendcounts, 0, (size_t)comm_size * sizeof(*sendcounts));
4019 for (int i = 0, k = 0; i < comm_size; ++i)
4020 for (size_t j = 0; j < recvcounts[i]; ++j, ++k)
4021 sendcounts[i] += (size_t)(pack_sizes[k]);
4022
4023 free(pack_sizes);
4024
4026 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4027
4028 recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4029
4030 void * packed_recv_data = xmalloc(recv_count);
4031
4032 // redistribute packed grid data
4033 yac_alltoallv_packed_p2p(
4034 packed_send_data, sendcounts, sdispls+1,
4035 packed_recv_data, recvcounts, rdispls, comm);
4036
4037 // unpack requested grid data
4039 dist_grid, location, remote_count, packed_recv_data, (int)recv_count,
4040 bnd_circle_dt, point_info_dt, comm);
4041
4042 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
4043 yac_mpi_call(MPI_Type_free(&bnd_circle_dt), comm);
4044
4045 // get the local ids for the remaining missing ids
4047 dist_grid, location, missing_ids, &remote_count, idx);
4048
4049 free(missing_ids);
4050 free(packed_recv_data);
4051 free(packed_send_data);
4052 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4053}
4054
4055static MPI_Datatype yac_get_single_remote_point_mpi_datatype(MPI_Comm comm) {
4056
4057 struct single_remote_point dummy;
4058 MPI_Datatype single_id_owner_dt;
4059 int array_of_blocklengths[] = {1, 1, 1};
4060 const MPI_Aint array_of_displacements[] =
4061 {(MPI_Aint)(intptr_t)(const void *)&(dummy.global_id) -
4062 (MPI_Aint)(intptr_t)(const void *)&dummy,
4063 (MPI_Aint)(intptr_t)(const void *)&(dummy.data.rank) -
4064 (MPI_Aint)(intptr_t)(const void *)&dummy,
4065 (MPI_Aint)(intptr_t)(const void *)&(dummy.data.orig_pos) -
4066 (MPI_Aint)(intptr_t)(const void *)&dummy};
4067 const MPI_Datatype array_of_types[] =
4068 {yac_int_dt, MPI_INT, MPI_UINT64_T};
4070 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
4071 array_of_types, &single_id_owner_dt), comm);
4072 return yac_create_resized(single_id_owner_dt, sizeof(dummy), comm);
4073}
4074
4075// determines for each search point the matching cell
4077 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
4078 yac_coordinate_pointer search_coords, size_t count, size_t * cells,
4079 int (*coord_in_cell)(
4080 double coord[3], struct yac_dist_grid * dist_grid, size_t cell_idx,
4081 struct yac_grid_cell * buffer_cell)) {
4082
4083 MPI_Comm comm = grid_pair->comm;
4084 int comm_rank, comm_size;
4085 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4086 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4087
4088 int * ranks = xmalloc(count * sizeof(ranks));
4089
4090 //----------------------------------------------------
4091 // match search points with YAC internal decomposition
4092 //----------------------------------------------------
4093
4094 // search for the matching process (according to the YAC
4095 // internal decomposition) for each search point
4097 grid_pair->proc_sphere_part, search_coords, count, ranks);
4098
4099 //---------------------------------------------------------------
4100 // relocate search points according to YAC internal decomposition
4101 //---------------------------------------------------------------
4102
4103 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4105 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4106 for (size_t i = 0; i < count; ++i) sendcounts[ranks[i]]++;
4107
4108 size_t local_count = sendcounts[comm_rank];
4109 sendcounts[comm_rank] = 0;
4110
4112 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4113
4114 size_t remote_count = sdispls[comm_size] + sendcounts[comm_size-1];
4115 size_t request_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4116
4117 yac_coordinate_pointer coord_buffer =
4118 xmalloc((remote_count + request_count + local_count) *
4119 sizeof(*coord_buffer));
4120 yac_coordinate_pointer coord_send_buffer = coord_buffer + 0;
4121 yac_coordinate_pointer coord_recv_buffer = coord_buffer + remote_count;
4122 yac_coordinate_pointer coord_local_buffer =
4123 coord_buffer + remote_count + request_count;
4124
4125 // pack search coordinates
4126 for (size_t i = 0, k = 0; i < count; ++i) {
4127 if (ranks[i] == comm_rank) {
4128 coord_local_buffer[k][0] = search_coords[i][0];
4129 coord_local_buffer[k][1] = search_coords[i][1];
4130 coord_local_buffer[k][2] = search_coords[i][2];
4131 ++k;
4132 } else {
4133 size_t displ = sdispls[ranks[i]+1]++;
4134 coord_send_buffer[displ][0] = search_coords[i][0];
4135 coord_send_buffer[displ][1] = search_coords[i][1];
4136 coord_send_buffer[displ][2] = search_coords[i][2];
4137 }
4138 }
4139
4140 MPI_Datatype dt_coord;
4141 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &dt_coord), comm);
4142 yac_mpi_call(MPI_Type_commit(&dt_coord), comm);
4143
4144 // redistribute search coordinates
4146 coord_send_buffer, sendcounts, sdispls,
4147 coord_recv_buffer, recvcounts, rdispls,
4148 sizeof(*coord_send_buffer), dt_coord, comm);
4149
4150 yac_mpi_call(MPI_Type_free(&dt_coord), comm);
4151
4152 size_t * local_cells =
4153 xmalloc((request_count + local_count) * sizeof(*local_cells));
4154
4155 //-----------------------------------------------
4156 // match search points with locally stored cells,
4157 // which should contain the matching cell
4158 //-----------------------------------------------
4159
4160 // do local search
4162 grid_pair, grid_name, coord_recv_buffer, request_count + local_count,
4163 local_cells, coord_in_cell);
4164
4165 //--------------------------------------------------------------
4166 // return search results (global and local id of matching cells)
4167 // (unmatched points:
4168 // global_id = XT_INT_MAX, local_id = UINT64_MAX)
4169 //--------------------------------------------------------------
4170
4171 struct single_remote_point * single_remote_point_buffer =
4172 xmalloc((remote_count + request_count) *
4173 sizeof(*single_remote_point_buffer));
4174 struct single_remote_point * id_send_buffer = single_remote_point_buffer;
4175 struct single_remote_point * id_recv_buffer = single_remote_point_buffer +
4176 request_count;
4177
4178 struct yac_dist_grid * dist_grid =
4179 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_name);
4180
4181 // pack global ids of found source cells
4182 for (size_t i = 0; i < request_count; ++i) {
4183 size_t cell_idx = local_cells[i];
4184 id_send_buffer[i].data.rank = comm_rank;
4185 if (cell_idx != SIZE_MAX) {
4186 id_send_buffer[i].global_id = dist_grid->ids[YAC_LOC_CELL][cell_idx];
4187 id_send_buffer[i].data.orig_pos = (uint64_t)cell_idx;
4188 } else {
4189 id_send_buffer[i].global_id = XT_INT_MAX;
4190 id_send_buffer[i].data.orig_pos = UINT64_MAX;
4191 }
4192 }
4193
4194 MPI_Datatype single_remote_point_dt =
4196
4197 // redistribute results (global ids of found source cells)
4199 id_send_buffer, recvcounts, rdispls, id_recv_buffer, sendcounts, sdispls,
4200 sizeof(*id_send_buffer), single_remote_point_dt, comm);
4201
4202 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4203
4204 size_t * new_local_cells =
4205 xmalloc(remote_count * sizeof(*new_local_cells));
4206
4207 //------------------------------------------------------------------
4208 // extend local part of the distributed grid, such that it contains
4209 // all matching cells and afterwards convert all search results into
4210 // from global ids to local ones
4211 //------------------------------------------------------------------
4212
4213 // convert all remote ids to local ones, extend local dist_grid data,
4214 // if necessary
4216 dist_grid, id_recv_buffer, remote_count, YAC_LOC_CELL, new_local_cells);
4217
4218 // extract results from local and remote search
4219 for (size_t i = 0, k = 0; i < count; ++i) {
4220 if (ranks[i] == comm_rank) {
4221 cells[i] = local_cells[request_count + k];
4222 ++k;
4223 } else {
4224 size_t displ = sdispls[ranks[i]]++;
4225 cells[i] = new_local_cells[displ];
4226 }
4227 }
4228
4229 free(new_local_cells);
4230 free(single_remote_point_buffer);
4231 free(local_cells);
4232 free(coord_buffer);
4233 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4234 free(ranks);
4235}
4236
4238 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
4239 yac_coordinate_pointer search_coords, size_t count, size_t * cells) {
4240
4242 grid_pair, grid_name, search_coords, count, cells, coord_in_cell);
4243}
4244
4246 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
4247 yac_coordinate_pointer search_coords, size_t count, size_t * cells) {
4248
4250 grid_pair, grid_name, search_coords, count, cells, coord_in_cell_gc);
4251}
4252
4253// generates search data structure for the points of a field
4254// (has to be regenerated for each use, because the local part of
4255// the distributed grid may have been extend by a previous search call)
4257 struct yac_dist_grid * dist_grid, struct yac_interp_field field) {
4258
4259 yac_const_coordinate_pointer field_coords =
4260 yac_dist_grid_get_field_coords(dist_grid, field);
4261 yac_int const * global_ids =
4262 yac_dist_grid_get_global_ids(dist_grid, field.location);
4263 int const * mask = yac_dist_grid_get_field_mask(dist_grid, field);
4264 size_t total_count =
4265 yac_dist_grid_get_total_count(dist_grid, field.location);
4266
4267 if (mask == NULL)
4268 return
4270 total_count, field_coords, global_ids);
4271 else
4272 return
4274 total_count, field_coords, global_ids, mask);
4275}
4276
4277// returns n points from the list of locally owned unmasked points;
4278// the data is returned in a format that can be sent to other processes
4279// (remark: this routine assumes the local part actually contains the
4280// required number of points, which is not checked)
4282 struct yac_dist_grid * dist_grid, struct yac_interp_field field,
4283 int comm_rank, size_t n, struct single_remote_point * points) {
4284
4285 size_t count =
4286 yac_dist_grid_get_count(dist_grid, field.location);
4287 int const * field_mask = yac_dist_grid_get_field_mask(dist_grid, field);
4288 int const * owner_mask =
4289 yac_dist_grid_get_owner_mask(dist_grid, field.location);
4290 yac_int const * global_ids =
4291 yac_dist_grid_get_global_ids(dist_grid, field.location);
4292
4293 if (field_mask == NULL) {
4294
4295 for (size_t i = 0, j = 0; i < count; ++i) {
4296 if (owner_mask[i]) {
4297 points[j].global_id = global_ids[i];
4298 points[j].data.rank = comm_rank;
4299 points[j].data.orig_pos = i;
4300 if (n == ++j) return;
4301 }
4302 }
4303
4304 } else {
4305
4306 for (size_t i = 0, j = 0; i < count; ++i) {
4307 if (owner_mask[i] && field_mask[i]) {
4308 points[j].global_id = global_ids[i];
4309 points[j].data.rank = comm_rank;
4310 points[j].data.orig_pos = i;
4311 if (n == ++j) return;
4312 }
4313 }
4314 }
4315}
4316
4318 void const * a, void const * b) {
4319
4320 struct nnn_search_result const * result_a =
4321 (struct nnn_search_result const *)a;
4322 struct nnn_search_result const * result_b =
4323 (struct nnn_search_result const *)b;
4324
4325 int ret = (result_a->cos_angle < result_b->cos_angle) -
4326 (result_a->cos_angle > result_b->cos_angle);
4327 if (ret) return ret;
4328 return (result_a->global_id > result_b->global_id) -
4329 (result_a->global_id < result_b->global_id);
4330}
4331
4332// searches for the n nearest points in the locally available data
4334 struct yac_dist_grid * dist_grid, struct yac_interp_field field,
4335 size_t count, yac_coordinate_pointer search_coords, size_t n,
4336 double cos_max_search_distance, size_t * result_points) {
4337
4338 struct point_sphere_part_search * sphere_part =
4339 yac_dist_grid_get_field_sphere_part(dist_grid, field);
4340
4341 // do local search
4342 double * cos_angles = NULL;
4343 size_t cos_angles_array_size = 0;
4344 size_t * temp_result_points = NULL;
4345 size_t temp_result_points_array_size = 0;
4346 size_t * num_temp_results = xmalloc(count * sizeof(*num_temp_results));
4348 sphere_part, count, search_coords, n, &cos_angles,
4349 &cos_angles_array_size, NULL, NULL, &temp_result_points,
4350 &temp_result_points_array_size, num_temp_results);
4351
4353
4354 // get the maximum number of results found per search point (can be more
4355 // than n, if multiple result distances are identical)
4356 size_t max_num_results = 0;
4357 for (size_t i = 0; i < count; ++i)
4358 if (max_num_results < num_temp_results[i])
4359 max_num_results = num_temp_results[i];
4360
4361 struct nnn_search_result * temp_results =
4362 xmalloc(max_num_results * sizeof(*temp_results));
4363
4364 yac_int const * global_ids =
4365 yac_dist_grid_get_global_ids(dist_grid, field.location);
4366
4367 // for all search points
4368 for (size_t i = 0, k = 0; i < count; ++i) {
4369
4370 size_t curr_num_search_results = num_temp_results[i];
4371 size_t curr_num_results = 0;
4372
4373 // extract results
4374 for (size_t j = 0; j < curr_num_search_results; ++j, ++k) {
4375
4376 // if the current search result is close enough
4377 if (cos_angles[k] >= cos_max_search_distance) {
4378
4379 // extract result
4380 size_t curr_local_id = temp_result_points[k];
4381 temp_results[curr_num_results].local_id = curr_local_id;
4382 temp_results[curr_num_results].global_id = global_ids[curr_local_id];
4383 temp_results[curr_num_results].cos_angle = cos_angles[k];
4384 curr_num_results++;
4385 }
4386
4387 }
4388 // sort results (by distance and global id)
4389 qsort(
4390 temp_results, curr_num_results, sizeof(*temp_results),
4392
4393 if (curr_num_results > n) curr_num_results = n;
4394
4395 for (size_t l = 0; l < curr_num_results; ++l)
4396 result_points[i * n + l] = temp_results[l].local_id;
4397 for (size_t l = curr_num_results; l < n; ++l)
4398 result_points[i * n + l] = UINT64_MAX;
4399 }
4400
4401 free(num_temp_results);
4402 free(cos_angles);
4403 free(temp_results);
4404 free(temp_result_points);
4405}
4406
4407static inline int compare_size_t(const void * a, const void * b) {
4408
4409 size_t const * a_ = a, * b_ = b;
4410
4411 return (*a_ > *b_) - (*b_ > *a_);
4412}
4413
4415 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
4416 yac_coordinate_pointer search_coords, size_t count, size_t * local_ids,
4417 size_t n, struct yac_interp_field field, double max_search_distance) {
4418
4419 MPI_Comm comm = grid_pair->comm;
4420 int comm_rank, comm_size;
4421 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4422 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4423
4425 (max_search_distance >= 0.0) && (max_search_distance <= M_PI),
4426 "ERROR(yac_dist_grid_pair_do_nnn_search): "
4427 "invalid max_search_distance (%lf)", max_search_distance)
4428
4429 struct sin_cos_angle max_search_distance_angle =
4430 sin_cos_angle_new(sin(max_search_distance), cos(max_search_distance));
4431
4432 struct yac_dist_grid * dist_grid =
4433 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_name);
4434
4435 //---------------------------------------------------------------------------
4436 // At first we have to make sure that each process has at least N unmasked
4437 // points locally available. This enables each process to locally perform a
4438 // first rough nnn search, which is an upper bound for searching on other
4439 // processes.
4440 //---------------------------------------------------------------------------
4441
4442 uint64_t unmasked_local_count =
4444
4445 uint64_t * unmasked_local_counts =
4446 xmalloc((size_t)comm_size * sizeof(*unmasked_local_counts));
4447
4448 // exchange number of local source points and target points
4450 MPI_Allgather(
4451 &unmasked_local_count, 1, MPI_UINT64_T,
4452 unmasked_local_counts, 1, MPI_UINT64_T, comm), comm);
4453
4454 // check whether there is a rank with too few source points
4455 int flag = 0;
4456 for (int i = 0; i < comm_size; ++i)
4457 flag |= unmasked_local_counts[i] < (uint64_t)n;
4458
4459 // if ranks with insufficient number of local source points
4460 if (flag) {
4461
4462 uint64_t global_num_unmasked_count = 0;
4463 for (int i = 0; i < comm_size; ++i)
4464 global_num_unmasked_count += unmasked_local_counts[i];
4465
4466 YAC_ASSERT(
4467 (size_t)global_num_unmasked_count >= n,
4468 "ERROR(yac_dist_grid_pair_do_nnn_search): insufficient number of "
4469 "unmasked points")
4470
4471 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4473 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4474
4475 // get ranks of processes that have additional data or require data in
4476 // order to have enough source points to do a initial nnn search
4477 int * flag_buffer = xcalloc(2 * (size_t)comm_size, sizeof(*flag_buffer));
4478 int * send_flags = flag_buffer;
4479 int * recv_flags = flag_buffer + comm_size;
4481 grid_pair->proc_sphere_part, unmasked_local_counts, (uint64_t)n,
4482 send_flags, recv_flags, comm_rank, comm_size);
4483 for (int i = 0; i < comm_size; ++i) {
4484 sendcounts[i] = (size_t)send_flags[i];
4485 recvcounts[i] = (size_t)recv_flags[i];
4486 }
4487 free(flag_buffer);
4488
4489 size_t local_send_count = (size_t)(MIN(unmasked_local_count, n));
4490
4491 size_t raccu = 0;
4492 for (int i = 0; i < comm_size; ++i) {
4493 sdispls[i] = 0;
4494 rdispls[i] = raccu;
4495 sendcounts[i] *= local_send_count;
4496 raccu += (recvcounts[i] *= (int)(MIN(unmasked_local_counts[i], n)));
4497 }
4498
4499 size_t recv_count = recvcounts[comm_size-1] + rdispls[comm_size-1];
4500
4501 struct single_remote_point * single_remote_point_buffer =
4502 xmalloc(
4503 (local_send_count + recv_count) * sizeof(*single_remote_point_buffer));
4504 struct single_remote_point * local_send_ids = single_remote_point_buffer;
4505 struct single_remote_point * recv_ids =
4506 single_remote_point_buffer + local_send_count;
4507
4508 // get local source points that can be sent to other processes
4510 dist_grid, field, comm_rank, local_send_count, local_send_ids);
4511
4512 MPI_Datatype single_remote_point_dt =
4514
4515 // exchange source points (integrate points into local data)
4517 local_send_ids, sendcounts, sdispls, recv_ids, recvcounts, rdispls,
4518 sizeof(*local_send_ids), single_remote_point_dt, comm);
4519
4520 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4521
4522 size_t * dummy = xmalloc(recv_count * sizeof(*dummy));
4523
4524 // convert all remote ids to local ones, extend local dist_grid data,
4525 // if necessary
4527 dist_grid, recv_ids, recv_count, field.location, dummy);
4528
4529 free(dummy);
4530 free(single_remote_point_buffer);
4531 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4532 }
4533 free(unmasked_local_counts);
4534
4535 //---------------------------------------------------------------------------
4536 // The actual search starts by determining locally the upper bound for
4537 // the distance for all search points
4538 //---------------------------------------------------------------------------
4539
4540 struct sin_cos_angle * ubounds = xmalloc(count * sizeof(*ubounds));
4541
4542 struct point_sphere_part_search * sphere_part =
4543 yac_dist_grid_get_field_sphere_part(dist_grid, field);
4544
4546 sphere_part, count, search_coords, n, ubounds);
4547
4548 //---------------------------------------------------------------------------
4549 // Now for each search points the global decomposition is checked to
4550 // determine whether other processes may contribute data
4551 //---------------------------------------------------------------------------
4552
4553 int * request_ranks = NULL;
4554 size_t request_ranks_array_size = 0;
4555 size_t num_request_ranks = 0;
4556 int * num_requests = xmalloc(count * sizeof(*num_requests));
4557
4558 // for all search points
4559 for (size_t i = 0; i < count; ++i) {
4560
4561 // generate bounding circles for all search points
4562 struct bounding_circle bnd_circle;
4563 memcpy(bnd_circle.base_vector, search_coords[i],
4564 3 * sizeof(search_coords[0][0]));
4565 if (compare_angles(max_search_distance_angle, ubounds[i]) < 0)
4566 ubounds[i] = max_search_distance_angle;
4567
4568 bnd_circle.inc_angle = ubounds[i];
4569
4570 ENSURE_ARRAY_SIZE(request_ranks, request_ranks_array_size,
4571 num_request_ranks + (size_t)comm_size);
4572
4573 // search for processes that might be able to contribute to the search
4574 // results
4575 int * curr_request_ranks = request_ranks + num_request_ranks;
4577 grid_pair->proc_sphere_part, bnd_circle,
4578 curr_request_ranks, num_requests + i);
4579
4580 // remove requests for local process
4581 int new_num_requests = 0;
4582 for (int j = 0; j < num_requests[i]; ++j) {
4583 if (curr_request_ranks[j] == comm_rank) continue;
4584 if (new_num_requests != j)
4585 curr_request_ranks[new_num_requests] = curr_request_ranks[j];
4586 ++new_num_requests;
4587 }
4588
4589 num_request_ranks += (size_t)(num_requests[i] = new_num_requests);
4590 }
4591
4592 //---------------------------------------------------------------------------
4593 // send bounding circles to remote processes and receive potential results
4594 // (center of the bounding circle is the actual search point and the radius
4595 // is the upper bound for the search distance for each point)
4596 //---------------------------------------------------------------------------
4597
4598 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4600 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4601
4602 for (size_t i = 0; i < num_request_ranks; ++i) sendcounts[request_ranks[i]]++;
4603
4605 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4606
4607 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4608
4609 struct bounding_circle * bnd_circles =
4610 xmalloc((num_request_ranks + recv_count) * sizeof(*bnd_circles));
4611 struct bounding_circle * send_bnd_circles = bnd_circles;
4612 struct bounding_circle * recv_bnd_circles = bnd_circles + num_request_ranks;
4613
4614 // pack bounding circles
4615 for (size_t i = 0, k = 0; i < count; ++i) {
4616 for (int j = 0; j < num_requests[i]; ++j, ++k) {
4617 struct bounding_circle * curr_bnd_circle =
4618 send_bnd_circles + sdispls[request_ranks[k]+1];
4619 sdispls[request_ranks[k]+1]++;
4620 memcpy(curr_bnd_circle->base_vector, search_coords[i],
4621 3 * sizeof(search_coords[0][0]));
4622 curr_bnd_circle->inc_angle = ubounds[i];
4623 }
4624 }
4625
4626 free(num_requests);
4627 free(request_ranks);
4628 free(ubounds);
4629
4630 MPI_Datatype bnd_circle_dt = yac_get_bounding_circle_mpi_datatype(comm);
4631
4632 // exchange requests to other processes
4634 send_bnd_circles, sendcounts, sdispls,
4635 recv_bnd_circles, recvcounts, rdispls,
4636 sizeof(*send_bnd_circles), bnd_circle_dt, comm);
4637
4638 yac_mpi_call(MPI_Type_free(&bnd_circle_dt), comm);
4639
4640 //---------------------------------------------------------------------------
4641 // do n nearest neighbour search for the received coordinates and return
4642 // results potentially required by original process
4643 //---------------------------------------------------------------------------
4644
4645 size_t * result_points = NULL;
4646 size_t result_points_array_size = 0;
4647 size_t * num_results_points =
4648 xmalloc(recv_count * sizeof(*num_results_points));
4650 sphere_part, recv_count, recv_bnd_circles, n, &result_points,
4651 &result_points_array_size, num_results_points);
4652
4653 free(bnd_circles);
4655
4656 // compact results
4657 size_t total_num_result_points = 0;
4658 size_t offset = 0, k = 0;
4659 for (int i = 0; i < comm_size; ++i) {
4660 size_t curr_num_result_points = 0;
4661 for (size_t j = 0; j < recvcounts[i]; ++j, ++k)
4662 curr_num_result_points += num_results_points[k];
4663 size_t new_num_result_points = curr_num_result_points;
4664 qsort(
4665 result_points + offset,
4666 new_num_result_points, sizeof(*result_points), compare_size_t);
4668 result_points + offset, &new_num_result_points);
4669 memmove(
4670 result_points + total_num_result_points,
4671 result_points + offset, new_num_result_points *
4672 sizeof(*result_points));
4673 total_num_result_points += new_num_result_points;
4674 offset += curr_num_result_points;
4675 sendcounts[i] = new_num_result_points;
4676 }
4677 free(num_results_points);
4678
4680 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4681 recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4682
4683 yac_int const * global_ids =
4684 yac_dist_grid_get_global_ids(dist_grid, field.location);
4685
4686 struct single_remote_point * single_remote_point_buffer =
4687 xmalloc((total_num_result_points + recv_count) *
4688 sizeof(*single_remote_point_buffer));
4689 struct single_remote_point * id_send_buffer = single_remote_point_buffer;
4690 struct single_remote_point * id_recv_buffer = single_remote_point_buffer +
4691 total_num_result_points;
4692 for (size_t i = 0; i < total_num_result_points; ++i) {
4693 size_t orig_pos = result_points[i];
4694 id_send_buffer[i].global_id = global_ids[orig_pos];
4695 id_send_buffer[i].data.rank = comm_rank;
4696 id_send_buffer[i].data.orig_pos = (uint64_t)orig_pos;
4697 }
4698 free(result_points);
4699
4700 MPI_Datatype single_remote_point_dt =
4702
4703 // exchange results to other processes
4705 id_send_buffer, sendcounts, sdispls+1, id_recv_buffer, recvcounts, rdispls,
4706 sizeof(*id_send_buffer), single_remote_point_dt, comm);
4707 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4708 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4709
4710 //---------------------------------------------------------------------------
4711 // integrate potential results into local part of the distributed grid
4712 //---------------------------------------------------------------------------
4713
4714 // convert all remote ids to local ones, extend local dist_grid data,
4715 // if necessary
4716 size_t * temp_idx = xmalloc(recv_count * sizeof(*temp_idx));
4718 dist_grid, id_recv_buffer, recv_count, field.location, temp_idx);
4719 free(temp_idx);
4720 free(single_remote_point_buffer);
4721
4722 //---------------------------------------------------------------------------
4723 // do the actual nnn search
4724 //---------------------------------------------------------------------------
4725
4727 dist_grid, field, count, search_coords, n, max_search_distance_angle.cos,
4728 local_ids);
4729}
4730
4732 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
4733 const_bounding_circle_pointer bnd_circles, size_t count, size_t ** cells,
4734 size_t * num_results_per_bnd_circle, struct yac_interp_field field) {
4735
4736 MPI_Comm comm = grid_pair->comm;
4737 int comm_rank, comm_size;
4738 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4739 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4740
4741 //---------------------------------------------------------------------------
4742 // match bounding circles with YAC internal decomposition
4743 //---------------------------------------------------------------------------
4744
4745 struct yac_dist_grid * dist_grid =
4746 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_name);
4747
4748 int * num_ranks = xcalloc(count, sizeof(*num_ranks));
4749 int * rank_buffer = NULL;
4750 size_t rank_buffer_size = 0;
4751 size_t rank_buffer_array_size = 0;
4752
4753 for (size_t i = 0; i < count; ++i) {
4754
4755 ENSURE_ARRAY_SIZE(rank_buffer, rank_buffer_array_size,
4756 rank_buffer_size + (size_t)comm_size);
4757
4758 // finds all processes whose core area overlaps with the bounding circle
4759 // of the current cell
4760 // beware: even if the core area of a process does not overlap with the
4761 // bounding circle, it may have core cells that overlap nevertheless
4763 grid_pair->proc_sphere_part, bnd_circles[i],
4764 rank_buffer + rank_buffer_size, num_ranks + i);
4765 rank_buffer_size += (size_t)(num_ranks[i]);
4766 }
4767
4768 //---------------------------------------------------------------------------
4769 // relocate bounding circles according to YAC internal decomposition
4770 //---------------------------------------------------------------------------
4771
4772 size_t * size_t_buffer =
4773 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
4774 size_t * result_sendcounts = size_t_buffer + 0 * comm_size;
4775 size_t * result_recvcounts = size_t_buffer + 1 * comm_size;
4776 size_t * result_sdispls = size_t_buffer + 2 * comm_size;
4777 size_t * result_rdispls = size_t_buffer + 3 * comm_size;
4778
4779 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4781 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4782
4783 for (size_t i = 0, offset = 0; i < count; ++i) {
4784 int curr_num_ranks = num_ranks[i];
4785 int * ranks = rank_buffer + offset;
4786 offset += (size_t)curr_num_ranks;
4787 for (int j = 0; j < curr_num_ranks; ++j) sendcounts[ranks[j]]++;
4788 }
4789
4790 // local overlaps do not need to be send around
4791 size_t local_count = sendcounts[comm_rank];
4792 sendcounts[comm_rank] = 0;
4793
4795 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4796
4797 size_t send_count = sdispls[comm_size] + sendcounts[comm_size-1];
4798 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4799
4800 struct bounding_circle * bnd_circle_buffer =
4801 xmalloc((send_count + recv_count + local_count) *
4802 sizeof(*bnd_circle_buffer));
4803 struct bounding_circle * send_buffer = bnd_circle_buffer;
4804 struct bounding_circle * recv_buffer = bnd_circle_buffer + send_count;
4805 struct bounding_circle * local_buffer =
4806 bnd_circle_buffer + send_count + recv_count;
4807
4808 // pack bounding circles
4809 for (size_t i = 0, offset = 0, local_offset = 0; i < count; ++i) {
4810 int curr_num_ranks = num_ranks[i];
4811 int * ranks = rank_buffer + offset;
4812 offset += (size_t)curr_num_ranks;
4813 for (int j = 0; j < curr_num_ranks; ++j) {
4814 int rank = ranks[j];
4815 if (rank == comm_rank)
4816 local_buffer[local_offset++] = bnd_circles[i];
4817 else
4818 send_buffer[sdispls[rank + 1]++] = bnd_circles[i];
4819 }
4820 }
4821
4822 MPI_Datatype bnd_circle_dt = yac_get_bounding_circle_mpi_datatype(comm);
4823 yac_mpi_call(MPI_Type_commit(&bnd_circle_dt), comm);
4824
4825 // exchange bounding circles
4827 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
4828 sizeof(*send_buffer), bnd_circle_dt, comm);
4829
4830 //---------------------------------------------------------------------------
4831 // match bounding circles with the locally stored cells
4832 //---------------------------------------------------------------------------
4833
4834 yac_mpi_call(MPI_Type_free(&bnd_circle_dt), comm);
4835
4836 struct bnd_sphere_part_search * cell_sphere_part =
4837 dist_grid_pair_get_cell_sphere_part(grid_pair, grid_name);
4838
4839 size_t * local_cells = NULL;
4840 size_t * num_local_cells_per_bnd_circle =
4841 xmalloc((recv_count + local_count) *
4842 sizeof(*num_local_cells_per_bnd_circle));
4843
4844 uint64_t * uint64_t_buffer =
4845 xmalloc((send_count + recv_count + local_count) *
4846 sizeof(*uint64_t_buffer));
4847 uint64_t * num_local_cells_per_bnd_circle_uint64_t = uint64_t_buffer;
4848 uint64_t * num_remote_cells_per_bnd_circle =
4849 uint64_t_buffer + recv_count + local_count;
4850
4851 // search for received bounding circles in local data
4853 cell_sphere_part, recv_buffer, recv_count + local_count, &local_cells,
4854 num_local_cells_per_bnd_circle);
4855
4856 //---------------------------------------------------------------------------
4857 // check results (apply cell field mask, if available, and check actual
4858 // overlap of cells with the bounding circle
4859 //---------------------------------------------------------------------------
4860
4861 int const * field_mask =
4862 (field.location == YAC_LOC_CELL)?
4863 yac_dist_grid_get_field_mask(dist_grid, field):NULL;
4864
4865 struct bounding_circle * cell_bnd_circles = dist_grid->cell_bnd_circles;
4866
4867 // check field mask and actual overlap of bounding circles
4868 for (size_t i = 0, offset = 0, new_offset = 0;
4869 i < recv_count + local_count; ++i) {
4870
4871 struct bounding_circle * curr_bnd_circle = recv_buffer + i;
4872 size_t curr_num_results = num_local_cells_per_bnd_circle[i];
4873
4874 // remove cells whose bounding circle do not overlap with the current one
4875 uint64_t new_num_results = 0;
4876 for (size_t j = 0; j < curr_num_results; ++j, ++offset) {
4877 size_t local_cell_id = local_cells[offset];
4878 if (!yac_extents_overlap(curr_bnd_circle,
4879 cell_bnd_circles + local_cell_id)) continue;
4880 if ((field_mask == NULL) || (field_mask[local_cell_id])) {
4881 if (offset != new_offset) local_cells[new_offset] = local_cell_id;
4882 new_num_results++;
4883 new_offset++;
4884 }
4885 }
4886 num_local_cells_per_bnd_circle_uint64_t[i] = new_num_results;
4887 }
4888 free(num_local_cells_per_bnd_circle);
4889 free(bnd_circle_buffer);
4890
4891 //---------------------------------------------------------------------------
4892 // return results
4893 //---------------------------------------------------------------------------
4894
4895 // exchange number of results per bounding circle
4897 num_local_cells_per_bnd_circle_uint64_t, recvcounts, rdispls,
4898 num_remote_cells_per_bnd_circle, sendcounts, sdispls,
4899 sizeof(*num_local_cells_per_bnd_circle_uint64_t), MPI_UINT64_T, comm);
4900
4901 size_t saccu = 0, raccu = 0, soffset = 0, roffset = 0;
4902 for (int i = 0; i < comm_size; ++i) {
4903
4904 result_sdispls[i] = saccu;
4905 result_rdispls[i] = raccu;
4906
4907 size_t sendcount = recvcounts[i];
4908 size_t recvcount = sendcounts[i];
4909
4910 result_sendcounts[i] = 0;
4911 result_recvcounts[i] = 0;
4912 for (size_t j = 0; j < sendcount; ++j, ++soffset)
4913 result_sendcounts[i] +=
4914 (size_t)(num_local_cells_per_bnd_circle_uint64_t[soffset]);
4915 for (size_t j = 0; j < recvcount; ++j, ++roffset)
4916 result_recvcounts[i] +=
4917 (size_t)(num_remote_cells_per_bnd_circle[roffset]);
4918
4919 saccu += result_sendcounts[i];
4920 raccu += result_recvcounts[i];
4921 }
4922
4923 // count the number of results for bounding circles, which had a match with#
4924 // their original process
4925 size_t result_local_count = 0;
4926 for (size_t i = recv_count; i < recv_count + local_count; ++i)
4927 result_local_count += (size_t)(num_local_cells_per_bnd_circle_uint64_t[i]);
4928
4929 size_t result_send_count = (size_t)(result_sdispls[comm_size-1]) +
4930 (size_t)(result_sendcounts[comm_size-1]);
4931 size_t result_recv_count = (size_t)(result_rdispls[comm_size-1]) +
4932 (size_t)(result_recvcounts[comm_size-1]);
4933
4934 struct single_remote_point * single_remote_point_buffer =
4935 xmalloc((result_recv_count + result_send_count) *
4936 sizeof(*single_remote_point_buffer));
4937 struct single_remote_point * id_send_buffer = single_remote_point_buffer;
4938 struct single_remote_point * id_recv_buffer = single_remote_point_buffer +
4939 result_send_count;
4940
4941 yac_int * cell_ids = dist_grid->ids[YAC_LOC_CELL];
4942
4943 for (size_t i = 0; i < result_send_count; ++i) {
4944 size_t local_cell_id = local_cells[i];
4945 id_send_buffer[i].global_id = cell_ids[local_cell_id];
4946 id_send_buffer[i].data.rank = comm_rank;
4947 id_send_buffer[i].data.orig_pos = local_cell_id;
4948 }
4949
4950 MPI_Datatype single_remote_point_dt =
4952
4953 // redistribute results (global ids of found source cells)
4955 id_send_buffer, result_sendcounts, result_sdispls,
4956 id_recv_buffer, result_recvcounts, result_rdispls,
4957 sizeof(*id_send_buffer), single_remote_point_dt, comm);
4958
4959 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4960
4961 size_t * new_local_cells =
4962 xmalloc((result_recv_count + result_local_count) *
4963 sizeof(*new_local_cells));
4964
4965 memcpy(new_local_cells + result_recv_count,
4966 local_cells + result_send_count,
4967 result_local_count * sizeof(*new_local_cells));
4968 free(local_cells);
4969
4970 //---------------------------------------------------------------------------
4971 // convert results into local ids and update local part of the distributed
4972 // grid if necessary
4973 //---------------------------------------------------------------------------
4974
4975 // convert all remote ids to local ones, extend local dist_grid data,
4976 // if necessary
4978 dist_grid, id_recv_buffer, result_recv_count, YAC_LOC_CELL, new_local_cells);
4979
4980 free(single_remote_point_buffer);
4981
4982 size_t * reorder_idx =
4983 xmalloc((result_recv_count + result_local_count) * sizeof(*reorder_idx));
4984
4985 memset(
4986 num_results_per_bnd_circle, 0, count * sizeof(*num_results_per_bnd_circle));
4987
4988 for (size_t i = 0, offset = 0, reorder = 0, local_search_idx = recv_count,
4989 local_offset = result_recv_count; i < count; ++i) {
4990 int curr_num_ranks = num_ranks[i];
4991 int * ranks = rank_buffer + offset;
4992 offset += (size_t)curr_num_ranks;
4993 for (int j = 0; j < curr_num_ranks; ++j) {
4994 int rank = ranks[j];
4995 if (rank == comm_rank) {
4996 uint64_t curr_num_results =
4997 num_local_cells_per_bnd_circle_uint64_t[local_search_idx++];
4998 num_results_per_bnd_circle[i] += (size_t)curr_num_results;
4999 for (uint64_t k = 0; k < curr_num_results; ++k, ++reorder)
5000 reorder_idx[local_offset++] = reorder;
5001 } else {
5002 size_t rank_pos = sdispls[rank]++;
5003 uint64_t curr_num_results = num_remote_cells_per_bnd_circle[rank_pos];
5004 num_results_per_bnd_circle[i] += (size_t)curr_num_results;
5005 for (uint64_t k = 0; k < curr_num_results; ++k, ++reorder)
5006 reorder_idx[result_rdispls[rank]++] = reorder;
5007 }
5008 }
5009 }
5010 free(uint64_t_buffer);
5011 free(num_ranks);
5012 free(rank_buffer);
5013 free(size_t_buffer);
5014 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
5015
5017 reorder_idx, result_recv_count + result_local_count, new_local_cells);
5018 free(reorder_idx);
5019
5020 // remove duplicated results
5021 for (size_t i = 0, offset = 0, new_offset = 0; i < count; ++i) {
5022
5023 size_t * curr_local_cells = new_local_cells + offset;
5024 size_t curr_num_results_per_bnd_circle = num_results_per_bnd_circle[i];
5025 size_t new_num_results_per_bnd_circle = 0;
5026 size_t prev_cell = SIZE_MAX;
5027 offset += curr_num_results_per_bnd_circle;
5028
5030 curr_local_cells, curr_num_results_per_bnd_circle, NULL);
5031
5032 for (size_t j = 0; j < curr_num_results_per_bnd_circle; ++j) {
5033 size_t curr_cell = curr_local_cells[j];
5034 if (curr_cell != prev_cell) {
5035 new_local_cells[new_offset++] = (prev_cell = curr_cell);
5036 ++new_num_results_per_bnd_circle;
5037 }
5038 }
5039 num_results_per_bnd_circle[i] = new_num_results_per_bnd_circle;
5040 }
5041
5042 *cells = new_local_cells;
5043}
5044
5046 struct yac_dist_grid_pair * grid_pair,
5047 char const * search_grid_name, char const * result_grid_name,
5048 size_t * search_cells, size_t count, size_t ** result_cells,
5049 size_t * num_results_per_search_cell, struct yac_interp_field result_field) {
5050
5051 struct bounding_circle * search_bnd_circles =
5052 xmalloc(count * sizeof(*search_bnd_circles));
5053
5054 struct yac_dist_grid * search_dist_grid =
5055 yac_dist_grid_pair_get_dist_grid(grid_pair, search_grid_name);
5056 struct yac_dist_grid * result_dist_grid =
5057 yac_dist_grid_pair_get_dist_grid(grid_pair, result_grid_name);
5058
5059 const_bounding_circle_pointer search_grid_cell_bnd_circles =
5060 search_dist_grid->cell_bnd_circles;
5061
5062 for (size_t i = 0; i < count; ++i)
5063 search_bnd_circles[i] = search_grid_cell_bnd_circles[search_cells[i]];
5064
5066 grid_pair, result_grid_name, search_bnd_circles, count, result_cells,
5067 num_results_per_search_cell, result_field);
5068
5069 size_t total_num_result_cells = 0;
5070
5071 // struct yac_grid_cell search_cell, result_cell;
5072 // yac_init_grid_cell(&search_cell);
5073 // yac_init_grid_cell(&result_cell);
5074
5075 // filter out obvious mismachtes
5076 // (currently only the bounding circles are checked)
5077 for (size_t i = 0, offset = 0; i < count; ++i) {
5078
5079 size_t curr_num_results_per_bnd_circle = num_results_per_search_cell[i];
5080 size_t new_num_results_per_search_cell = 0;
5081 size_t * curr_result_cells = *result_cells + offset;
5082 offset += curr_num_results_per_bnd_circle;
5083
5084 // yac_const_basic_grid_data_get_grid_cell(
5085 // (struct yac_const_basic_grid_data *)search_dist_grid,
5086 // search_cells[i], &search_cell);
5087
5088 for (size_t j = 0; j < curr_num_results_per_bnd_circle; ++j) {
5089
5090 size_t curr_result_cell = curr_result_cells[j];
5091
5092 // yac_const_basic_grid_data_get_grid_cell(
5093 // (struct yac_const_basic_grid_data *)result_dist_grid,
5094 // curr_result_cell, &result_cell);
5095
5096 // if (yac_check_overlap_cells2(
5097 // search_cell, search_dist_grid->cell_bnd_circles[search_cells[i]],
5098 // result_cell, result_dist_grid->cell_bnd_circles[curr_result_cell])) {
5100 search_bnd_circles + i,
5101 result_dist_grid->cell_bnd_circles + curr_result_cell)) {
5102
5103 (*result_cells)[total_num_result_cells++] = curr_result_cell;
5104 ++new_num_results_per_search_cell;
5105 }
5106 }
5107
5108 num_results_per_search_cell[i] = new_num_results_per_search_cell;
5109 }
5110
5111 // yac_free_grid_cell(&result_cell);
5112 // yac_free_grid_cell(&search_cell);
5113
5114 // *result_cells =
5115 // xrealloc(*result_cells, total_num_result_cells * sizeof(**result_cells));
5116 free(search_bnd_circles);
5117}
5118
5120 struct yac_dist_grid * dist_grid, size_t edge_id) {
5121
5122 return
5124 dist_grid->edge_to_vertex, dist_grid->vertex_coordinates, edge_id);
5125}
5126
5128 struct yac_dist_grid * dist_grid,
5129 struct proc_sphere_part_node * proc_sphere_part,
5130 size_t * cells, size_t count, size_t * neighbours) {
5131
5132 // generate edge to cell
5133 yac_size_t_2_pointer edge_to_cell =
5135 dist_grid->cell_to_edge, dist_grid->num_vertices_per_cell,
5136 NULL, dist_grid->total_count[YAC_LOC_CELL],
5137 dist_grid->total_count[YAC_LOC_EDGE]);
5138
5139 // get maximum number of edges per cell
5140 int max_num_edges_per_cell = 0;
5141 for (size_t i = 0; i < dist_grid->total_count[YAC_LOC_CELL]; ++i)
5142 if (max_num_edges_per_cell < dist_grid->num_vertices_per_cell[i])
5143 max_num_edges_per_cell = dist_grid->num_vertices_per_cell[i];
5144
5145 yac_size_t_2_pointer edge_vertices =
5146 xmalloc((size_t)max_num_edges_per_cell * sizeof(*edge_vertices));
5147
5148 size_t neigh_idx = 0;
5149
5151 size_t missing_edge_neighbour_array_size = 0;
5152 size_t num_missing_neighbours = 0;
5153
5154 // for each cell
5155 for (size_t i = 0; i < count; ++i) {
5156
5157 size_t curr_cell = cells[i];
5158
5159 // get all edges
5160 size_t curr_num_edges = dist_grid->num_vertices_per_cell[curr_cell];
5161 size_t const * cell_edges =
5162 dist_grid->cell_to_edge + dist_grid->cell_to_edge_offsets[curr_cell];
5163 for (size_t j = 0; j < curr_num_edges; ++j) {
5164 size_t const * curr_edge_to_vertex =
5165 dist_grid->edge_to_vertex[cell_edges[j]];
5166 edge_vertices[j][0] = curr_edge_to_vertex[0];
5167 edge_vertices[j][1] = curr_edge_to_vertex[1];
5168 }
5169
5171 missing_edge_neighbour, missing_edge_neighbour_array_size,
5172 num_missing_neighbours + curr_num_edges);
5173
5174 // get the neighbour cells by following the edges and vertices around
5175 // the cell
5176 size_t prev_vertex = edge_vertices[0][0];
5177 for (size_t j = 0, edge_idx = 0; j < curr_num_edges; ++j, ++neigh_idx) {
5178
5179 // get the neighbour cell associated with the current edge
5180 size_t curr_edge = cell_edges[edge_idx];
5181 size_t * curr_edge_cells = edge_to_cell[curr_edge];
5182 size_t other_cell = curr_edge_cells[curr_edge_cells[0] == curr_cell];
5183 neighbours[neigh_idx] = other_cell;
5184
5185 if (other_cell == SIZE_MAX) {
5186 struct missing_edge_neighbour * curr_miss_neigh =
5187 missing_edge_neighbour + num_missing_neighbours++;
5188 curr_miss_neigh->edge.local_id = curr_edge;
5189 curr_miss_neigh->edge.global_id =
5190 dist_grid->ids[YAC_LOC_EDGE][curr_edge];
5191 curr_miss_neigh->cell.local_id = curr_cell;
5192 curr_miss_neigh->cell.global_id =
5193 dist_grid->ids[YAC_LOC_CELL][curr_cell];
5194 curr_miss_neigh->neigh_idx = neigh_idx;
5195 }
5196
5197 // get an edge that shares a vertex with the current edge
5198 size_t new_edge_idx = SIZE_MAX;
5199 for (size_t k = 0; k < curr_num_edges; ++k) {
5200 if (k == edge_idx) continue;
5201 else if (edge_vertices[k][0] == prev_vertex) {
5202 new_edge_idx = k;
5203 prev_vertex = edge_vertices[k][1];
5204 break;
5205 } else if (edge_vertices[k][1] == prev_vertex) {
5206 new_edge_idx = k;
5207 prev_vertex = edge_vertices[k][0];
5208 break;
5209 }
5210 }
5211 YAC_ASSERT(
5212 new_edge_idx < SIZE_MAX,
5213 "ERROR(yac_basic_grid_data_get_cell_neighbours): "
5214 "inconsistent cell_to_edge/edge_to_vertex data")
5215 edge_idx = new_edge_idx;
5216 }
5217
5218 // check whether we went once completely around the cell
5219 YAC_ASSERT(
5220 prev_vertex == edge_vertices[0][0],
5221 "ERROR(yac_basic_grid_data_get_cell_neighbours): "
5222 "inconsistent cell_to_edge/edge_to_vertex data")
5223 }
5224
5225 { // get the missing neighbours
5226 MPI_Comm comm = dist_grid->comm;
5227 int comm_rank, comm_size;
5228 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
5229 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
5230
5231 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
5233 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
5234 int * int_buffer =
5235 xmalloc(
5236 ((size_t)comm_size + num_missing_neighbours) * sizeof(*int_buffer));
5237 int * temp_ranks = int_buffer;
5238 int * num_ranks = int_buffer + comm_size;
5239 memset(num_ranks, 0, num_missing_neighbours * sizeof(*num_ranks));
5240
5241 int * rank_buffer = NULL;
5242 size_t rank_buffer_array_size = 0;
5243 size_t rank_buffer_size = 0;
52