YetAnotherCoupler 3.2.0
Loading...
Searching...
No Matches
dist_grid.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifdef HAVE_CONFIG_H
6// Get the definition of the 'restrict' keyword.
7#include "config.h"
8#endif
9
10#include <stdlib.h>
11#include <string.h>
12#include <assert.h>
13#include <float.h>
14#include <yaxt.h>
15
16#include "basic_grid.h"
17#include "dist_grid_internal.h"
18#include "geometry.h"
19#include "yac_mpi_internal.h"
20#include "utils_core.h"
21#include "sphere_part.h"
22#include "proc_sphere_part.h"
23#include "ensure_array_size.h"
24#include "interp_grid.h"
25#include "field_data_set.h"
26
31
36
41
42// reorder_idx has to be first
49
50// reorder_idx has to be first
58
59struct id_pos {
61 uint64_t orig_pos;
62};
63
71
77
86
87// warning: when changing this, ensure that struct yac_const_basic_grid_data is
88// changed accordingly
109
118
123
125 struct {
126 size_t local_id;
129 size_t neigh_idx;
130};
131
132// looks up positions of ids in an array of sorted ids
133static void id2idx(
134 yac_int * ids, size_t * idx, size_t num_ids,
135 yac_int * ref_sorted_ids, size_t num_sorted_ids) {
136
137 size_t * reorder = xmalloc(num_ids * sizeof(*reorder));
138 for (size_t i = 0; i < num_ids; ++i) reorder[i] = i;
139
140 yac_quicksort_index_yac_int_size_t(ids, num_ids, reorder);
141
142 for (size_t i = 0, j = 0; i < num_ids; ++i) {
143
144 yac_int curr_id = ids[i];
145 while ((j < num_sorted_ids) && (ref_sorted_ids[j] < curr_id)) ++j;
147 (j < num_sorted_ids) && (ref_sorted_ids[j] == curr_id),
148 "ERROR(id2idx): id not found")
149 idx[reorder[i]] = j;
150 }
151
152 free(reorder);
153}
154
155// returns the index of the cell vertex with the lowest global id
157 struct yac_dist_grid * dist_grid, size_t cell_idx) {
158
159 int num_vertices = dist_grid->num_vertices_per_cell[cell_idx];
160 if (num_vertices == 0) return SIZE_MAX;
161 yac_int * grid_vertex_ids = dist_grid->ids[YAC_LOC_CORNER];
162 size_t * vertices =
163 dist_grid->cell_to_vertex + dist_grid->cell_to_vertex_offsets[cell_idx];
164 // get the cell corner with the smallest global id
165 size_t min_idx = vertices[0];
166 yac_int min_global_id = grid_vertex_ids[min_idx];
167 for (int j = 1; j < num_vertices; ++j) {
168 size_t curr_vertex = vertices[j];
169 yac_int curr_global_id = grid_vertex_ids[curr_vertex];
170 if (min_global_id > curr_global_id) {
171 min_global_id = curr_global_id;
172 min_idx = curr_vertex;
173 }
174 }
175 return min_idx;
176}
177
178// generate cell core mask (true for all cells belonging to the local part of
179// the distributed directory and the have a valid original owner)
181 struct yac_dist_grid * dist_grid, int is_root, int * vertex_owner_mask) {
182
183 size_t num_cells = dist_grid->count[YAC_LOC_CELL];
184 int * core_cell_mask = xmalloc(num_cells * sizeof(*core_cell_mask));
185
186 //-------------------------
187 // determine cell core mask
188 //-------------------------
189 for (size_t i = 0; i < num_cells; ++i) {
190 size_t ref_vertex = get_cell_reference_vertex(dist_grid, i);
191 core_cell_mask[i] =
192 (dist_grid->owners[YAC_LOC_CELL][i].count > 0) &&
193 ((ref_vertex != SIZE_MAX)?(vertex_owner_mask[ref_vertex]):is_root);
194 }
195 return core_cell_mask;
196}
197
198// returns the index of the edge vertex with the lowest global id
199static inline size_t get_edge_reference_vertex(
200 struct yac_dist_grid * dist_grid, size_t edge_idx) {
201
202 yac_int * vertex_ids = dist_grid->ids[YAC_LOC_CORNER];
203 size_t * edge_vertices = &(dist_grid->edge_to_vertex[edge_idx][0]);
204 // get the edge corner with the smallest global id
205 return edge_vertices[
206 (vertex_ids[edge_vertices[0]] > vertex_ids[edge_vertices[1]])?1:0];
207}
208
209// generate edge core mask (true for all edge belonging to the local part of
210// the distributed directory and the have a valid original owner)
212 struct yac_dist_grid * dist_grid, int * vertex_owner_mask) {
213
214 size_t num_edges = dist_grid->count[YAC_LOC_EDGE];
215 int * core_edge_mask = xmalloc(num_edges * sizeof(*core_edge_mask));
216
217 //-------------------------
218 // determine edge core mask
219 //-------------------------
220 for (size_t i = 0; i < num_edges; ++i)
221 core_edge_mask[i] =
222 (dist_grid->owners[YAC_LOC_EDGE][i].count > 0) &&
223 vertex_owner_mask[get_edge_reference_vertex(dist_grid, i)];
224 return core_edge_mask;
225}
226
227// generate core masks for cells, vertices and edges
229 struct yac_dist_grid * dist_grid,
230 struct proc_sphere_part_node * proc_sphere_part, int comm_rank) {
231
232 // determine distributed owner for vertices in the local part of the
233 // distributed grid
234 int * vertex_owner_mask =
235 xmalloc(dist_grid->count[YAC_LOC_CORNER] * sizeof(*vertex_owner_mask));
237 proc_sphere_part, dist_grid->vertex_coordinates,
238 dist_grid->count[YAC_LOC_CORNER], vertex_owner_mask);
239 for (size_t i = 0; i < dist_grid->count[YAC_LOC_CORNER]; ++i)
240 vertex_owner_mask[i] = vertex_owner_mask[i] == comm_rank;
241
242 // generate core mask for cells
243 dist_grid->core_mask[YAC_LOC_CELL] =
244 determine_cell_core_mask(dist_grid, comm_rank == 0, vertex_owner_mask);
245
246 // generate core mask for edges
247 dist_grid->core_mask[YAC_LOC_EDGE] =
248 determine_edge_core_mask(dist_grid, vertex_owner_mask);
249
250 // generate vertex core mask (true for vertices belonging to the local
251 // process and have an original owner)
252 for (size_t i = 0; i < dist_grid->count[YAC_LOC_CORNER]; ++i)
253 vertex_owner_mask[i] =
254 vertex_owner_mask[i] && (dist_grid->owners[YAC_LOC_CORNER][i].count > 0);
255 dist_grid->core_mask[YAC_LOC_CORNER] = vertex_owner_mask;
256}
257
258static MPI_Datatype yac_get_single_remote_point_mpi_datatype(MPI_Comm comm) {
259
260 struct single_remote_point dummy;
261 MPI_Datatype single_id_owner_dt;
262 int array_of_blocklengths[] = {1, 1, 1};
263 const MPI_Aint array_of_displacements[] =
264 {(MPI_Aint)(intptr_t)(const void *)&(dummy.global_id) -
265 (MPI_Aint)(intptr_t)(const void *)&dummy,
266 (MPI_Aint)(intptr_t)(const void *)&(dummy.data.rank) -
267 (MPI_Aint)(intptr_t)(const void *)&dummy,
268 (MPI_Aint)(intptr_t)(const void *)&(dummy.data.orig_pos) -
269 (MPI_Aint)(intptr_t)(const void *)&dummy};
270 const MPI_Datatype array_of_types[] =
271 {yac_int_dt, MPI_INT, MPI_UINT64_T};
273 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
274 array_of_types, &single_id_owner_dt), comm);
275 return yac_create_resized(single_id_owner_dt, sizeof(dummy), comm);
276}
277
279 int n, MPI_Comm comm) {
280
281 MPI_Datatype dt_single_remote_point_ =
283 MPI_Datatype dt_single_remote_point__ =
285 dt_single_remote_point_, sizeof(struct single_remote_point_reorder), comm);
286 MPI_Datatype dt_single_remote_point;
288 MPI_Type_contiguous(
289 n, dt_single_remote_point__, &dt_single_remote_point), comm);
290 yac_mpi_call(MPI_Type_free(&dt_single_remote_point__), comm);
291 yac_mpi_call(MPI_Type_commit(&dt_single_remote_point), comm);
292 return dt_single_remote_point;
293}
294
296 const void * a, const void * b) {
297
298 return (((const struct single_remote_point *)a)->global_id >
299 ((const struct single_remote_point *)b)->global_id) -
300 (((const struct single_remote_point *)a)->global_id <
301 ((const struct single_remote_point *)b)->global_id);
302}
303
304static MPI_Datatype yac_get_id_pos_mpi_datatype(MPI_Comm comm) {
305
306 struct id_pos dummy;
307 MPI_Datatype id_pos_dt;
308 int array_of_blocklengths[] = {1,1};
309 const MPI_Aint array_of_displacements[] =
310 {(MPI_Aint)(intptr_t)(const void *)&(dummy.global_id) -
311 (MPI_Aint)(intptr_t)(const void *)&dummy,
312 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
313 (MPI_Aint)(intptr_t)(const void *)&dummy};
314 const MPI_Datatype array_of_types[] = {yac_int_dt, MPI_UINT64_T};
316 MPI_Type_create_struct(
317 2, array_of_blocklengths, array_of_displacements,
318 array_of_types, &id_pos_dt), comm);
319 return yac_create_resized(id_pos_dt, sizeof(dummy), comm);
320}
321
322MPI_Datatype yac_get_remote_point_info_mpi_datatype(MPI_Comm comm) {
323
324 struct remote_point_info dummy;
325 MPI_Datatype remote_point_info_dt;
326 int array_of_blocklengths[] = {1, 1};
327 const MPI_Aint array_of_displacements[] =
328 {(MPI_Aint)(intptr_t)(const void *)&(dummy.rank) -
329 (MPI_Aint)(intptr_t)(const void *)&dummy,
330 (MPI_Aint)(intptr_t)(const void *)&(dummy.orig_pos) -
331 (MPI_Aint)(intptr_t)(const void *)&dummy};
332 const MPI_Datatype array_of_types[] =
333 {MPI_INT, MPI_UINT64_T};
335 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
336 array_of_types, &remote_point_info_dt), comm);
337 return yac_create_resized(remote_point_info_dt, sizeof(dummy), comm);
338}
339
341 struct remote_point_infos point_infos) {
342
343 int count = point_infos.count;
344 if (count > 1) {
345 struct remote_point_info * temp = xmalloc((size_t)count * sizeof(*temp));
346 memcpy(temp, point_infos.data.multi, (size_t)count * sizeof(*temp));
347 point_infos.data.multi = temp;
348 }
349 return point_infos;
350}
351
353 yac_int * sorted_ids, size_t * reorder_idx, size_t count,
354 struct remote_points * points) {
355
356 struct remote_point_infos * point_infos = xmalloc(count * sizeof(*point_infos));
357
358 size_t id_owner_count = points->count;
359 struct remote_point * points_ = points->data;
360
361 for (size_t i = 0, j = 0; i < count; ++i) {
362
363 yac_int curr_id = sorted_ids[i];
364
365 while ((j < id_owner_count) && (points_[j].global_id < curr_id)) ++j;
366
367 if ((j >= id_owner_count) || (points_[j].global_id != curr_id))
368 point_infos[reorder_idx[i]].count = -1;
369 else
370 point_infos[reorder_idx[i]] = copy_remote_point_infos(points_[j].data);
371 }
372
373 return point_infos;
374}
375
377 yac_int * requested_ids, struct remote_point_infos ** results,
378 size_t * reorder_buffer, size_t request_count,
379 struct remote_points * local_remote_data) {
380
381 struct remote_point * local_remote_points = local_remote_data->data;
382 size_t local_count = local_remote_data->count;
383 static struct remote_point_infos dummy_data =
384 {.count = 0, .data.single = {.rank = INT_MAX, .orig_pos = UINT64_MAX}};
385
386 for (size_t j = 0; j < request_count; ++j) reorder_buffer[j] = j;
387
388 // sort ids for easier owner lookup
390 requested_ids, request_count, reorder_buffer);
391
392 for (size_t i = 0, j = 0; i < request_count; ++i) {
393 yac_int curr_id = requested_ids[i];
394 while ((j < local_count) && (curr_id > local_remote_points[j].global_id))
395 ++j;
396 results[reorder_buffer[i]] =
397 ((j < local_count) && (curr_id == local_remote_points[j].global_id))?
398 &(local_remote_points[j].data):&dummy_data;
399 }
400}
401
403 struct remote_point_infos const * infos, MPI_Datatype point_info_dt,
404 MPI_Comm comm) {
405
406 int pack_size_count,
407 pack_size_data;
408
409 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
411 MPI_Pack_size(infos->count, point_info_dt, comm, &pack_size_data), comm);
412
413 return pack_size_count + pack_size_data;
414}
415
417 struct remote_point_infos ** infos, size_t count, int * pack_sizes,
418 MPI_Datatype point_info_dt, MPI_Comm comm) {
419
420 int pack_size_count;
421
422 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
423
424 for (size_t i = 0; i < count; ++i) {
426 MPI_Pack_size(
427 infos[i]->count, point_info_dt, comm, pack_sizes + i), comm);
428 pack_sizes[i] += pack_size_count;
429 }
430}
431
433 struct remote_point_infos ** infos, int * pack_sizes, size_t count,
434 void * buffer, MPI_Datatype point_info_dt, MPI_Comm comm) {
435
436 for (size_t i = 0; i < count; ++i) {
437
438 int curr_count = infos[i]->count;
439 struct remote_point_info * curr_point_infos =
440 (curr_count == 1)?
441 (&(infos[i]->data.single)):(infos[i]->data.multi);
442
443 int position = 0;
444 int buffer_size = pack_sizes[i];
446 MPI_Pack(&curr_count, 1, MPI_INT, buffer,
447 buffer_size, &position, comm), comm);
449 MPI_Pack(curr_point_infos, curr_count, point_info_dt, buffer,
450 buffer_size, &position, comm), comm);
451 buffer = (void*)(((unsigned char*)buffer) + buffer_size);
452 }
453}
454
456 void * buffer, int buffer_size, int * position,
457 struct remote_point_infos * infos, MPI_Datatype point_info_dt, MPI_Comm comm) {
458
459 int count;
461 MPI_Unpack(buffer, buffer_size, position, &count, 1, MPI_INT, comm), comm);
462
463 infos->count = count;
464
465 if (count > 0) {
466 struct remote_point_info * point_infos;
467 if (count == 1)
468 point_infos = &(infos->data.single);
469 else
470 point_infos =
471 (infos->data.multi = xmalloc((size_t)count * sizeof(*point_infos)));
472
474 MPI_Unpack(buffer, buffer_size, position, point_infos, count,
475 point_info_dt, comm), comm);
476 } else {
477 infos->data.single.rank = INT_MAX;
478 infos->data.single.orig_pos = UINT64_MAX;
479 }
480}
481
483 void * buffer, int buffer_size, int * position,
484 struct remote_point_info * info_buffer, size_t * info_buffer_position,
485 struct remote_point_infos * infos, MPI_Datatype point_info_dt, MPI_Comm comm) {
486
487 int count;
489 MPI_Unpack(buffer, buffer_size, position, &count, 1, MPI_INT, comm), comm);
490
491 infos->count = count;
492
493 struct remote_point_info * point_infos;
494 if (count == 1)
495 point_infos = &(infos->data.single);
496 else {
497 point_infos =
498 (infos->data.multi = info_buffer + *info_buffer_position);
499 *info_buffer_position += count;
500 }
501
503 MPI_Unpack(buffer, buffer_size, position, point_infos, count,
504 point_info_dt, comm), comm);
505}
506
508 struct remote_point * point, MPI_Datatype point_info_dt, MPI_Comm comm) {
509
510 int pack_size_id,
511 pack_size_remote_point_infos;
512
513 yac_mpi_call(MPI_Pack_size(1, yac_int_dt, comm, &pack_size_id), comm);
514 pack_size_remote_point_infos =
515 yac_remote_point_infos_get_pack_size(&(point->data), point_info_dt, comm);
516
517 return pack_size_id + pack_size_remote_point_infos;
518}
519
521 struct remote_point_infos const * infos, void * buffer, int buffer_size,
522 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
523
524 int count = infos->count;
525
526 struct remote_point_info const * info =
527 (count == 1)?(&(infos->data.single)):(infos->data.multi);
528
530 MPI_Pack(&count, 1, MPI_INT, buffer, buffer_size, position, comm), comm);
532 MPI_Pack(info, count, point_info_dt, buffer, buffer_size, position, comm),
533 comm);
534}
535
537 struct remote_point * point, void * buffer, int buffer_size, int * position,
538 MPI_Datatype point_info_dt, MPI_Comm comm) {
539
541 MPI_Pack(&(point->global_id), 1, yac_int_dt, buffer,
542 buffer_size, position, comm), comm);
543
545 &(point->data), buffer, buffer_size, position, point_info_dt, comm);
546}
547
549 void * buffer, int buffer_size, int * position, struct remote_point * point,
550 MPI_Datatype point_info_dt, MPI_Comm comm) {
551
553 MPI_Unpack(
554 buffer, buffer_size, position, &(point->global_id), 1, yac_int_dt, comm),
555 comm);
557 buffer, buffer_size, position, &(point->data), point_info_dt, comm);
558}
559
561 void * buffer, int buffer_size, int * position,
562 struct remote_point_info * info_buffer, size_t * info_buffer_position,
563 struct remote_point * point, MPI_Datatype point_info_dt, MPI_Comm comm) {
564
566 MPI_Unpack(
567 buffer, buffer_size, position, &(point->global_id), 1, yac_int_dt, comm),
568 comm);
570 buffer, buffer_size, position, info_buffer, info_buffer_position,
571 &(point->data), point_info_dt, comm);
572}
573
575 struct remote_points * points, MPI_Datatype point_info_dt, MPI_Comm comm) {
576
577 size_t count = points->count;
578 struct remote_point * points_data = points->data;
579
580 int count_pack_size,
581 remote_points_pack_size;
582
583 yac_mpi_call(MPI_Pack_size(2, MPI_UINT64_T, comm, &count_pack_size), comm);
584 remote_points_pack_size = 0;
585 for (size_t i = 0; i < count; ++i)
586 remote_points_pack_size +=
587 yac_remote_point_get_pack_size(points_data + i, point_info_dt, comm);
588
589 return count_pack_size + remote_points_pack_size;
590}
591
593 struct remote_points * points, void * buffer, int buffer_size, int * position,
594 MPI_Datatype point_info_dt, MPI_Comm comm) {
595
596 size_t count = points->count;
597 struct remote_point * point_data = points->data;
598 uint64_t counts[2] = {(uint64_t)count, 0};
599 for (size_t i = 0; i < count; ++i)
600 if (point_data[i].data.count > 1)
601 counts[1] += (uint64_t)(point_data[i].data.count);
602
604 MPI_Pack(counts, 2, MPI_UINT64_T, buffer,
605 buffer_size, position, comm), comm);
606 for (size_t i = 0; i < count; ++i)
608 point_data + i, buffer, buffer_size, position, point_info_dt, comm);
609}
610
612 void * buffer, int buffer_size, int * position,
613 struct remote_points ** points, MPI_Datatype point_info_dt, MPI_Comm comm) {
614
615 uint64_t counts[2];
616
618 MPI_Unpack(
619 buffer, buffer_size, position, counts, 2, MPI_UINT64_T, comm), comm);
620
621 *points = xmalloc(((size_t)counts[1]) * sizeof(struct remote_point_infos) +
622 sizeof(**points));
623
624 size_t count = ((*points)->count = (size_t)(counts[0]));
625 struct remote_point * point_data =
626 ((*points)->data = xmalloc(count * sizeof(*((*points)->data))));
627 struct remote_point_info * remote_point_info_buffer =
628 &((*points)->buffer[0]);
629
630 for (size_t i = 0, offset = 0; i < count; ++i) {
631
633 buffer, buffer_size, position, remote_point_info_buffer, &offset,
634 point_data + i, point_info_dt, comm);
635 }
636}
637
639 struct yac_dist_grid * dist_grid,
640 struct proc_sphere_part_node * proc_sphere_part,
641 struct remote_points ** dist_owner) {
642
643 MPI_Comm comm = dist_grid->comm;
644
645 int comm_size;
646 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
647
648 for (int location = 0; location < 3; ++location)
649 dist_grid->owners[location] =
651 dist_grid->sorted_ids[location],
652 dist_grid->sorted_reorder_idx[location],
653 dist_grid->count[location], dist_owner[location]);
654
655 size_t num_missing_vertices = 0;
656 size_t num_missing_edges = 0;
657
658 for (size_t i = 0; i < dist_grid->count[YAC_LOC_CELL]; ++i)
660 dist_grid->owners[YAC_LOC_CELL][i].count != -1,
661 "ERROR(generate_dist_grid_remote_point_infos):"
662 "no owner information for a cell available")
663 for (size_t i = 0; i < dist_grid->count[YAC_LOC_CORNER]; ++i)
664 if (dist_grid->owners[YAC_LOC_CORNER][i].count == -1)
665 ++num_missing_vertices;
666 for (size_t i = 0; i < dist_grid->count[YAC_LOC_EDGE]; ++i)
667 if (dist_grid->owners[YAC_LOC_EDGE][i].count == -1) ++num_missing_edges;
668
669 size_t total_num_missing = num_missing_vertices + num_missing_edges;
670 int * ranks_buffer = xmalloc(total_num_missing * sizeof(*ranks_buffer));
671 int * vertex_ranks = ranks_buffer;
672 int * edge_ranks = ranks_buffer + num_missing_vertices;
673 size_t * size_t_buffer =
674 xmalloc((3 * total_num_missing +
675 4 * (size_t)comm_size) * sizeof(*size_t_buffer));
676 size_t * vertex_reorder = size_t_buffer;
677 size_t * edge_reorder = size_t_buffer + num_missing_vertices;
678 size_t * coord_indices = size_t_buffer + total_num_missing;
679 size_t * ranks_idxs = coord_indices;
680 size_t * cve_reorder = size_t_buffer + 2 * total_num_missing;
681 size_t * total_sendcounts = size_t_buffer + 3 * total_num_missing + 0 * comm_size;
682 size_t * total_recvcounts = size_t_buffer + 3 * total_num_missing + 1 * comm_size;
683 size_t * total_sdispls = size_t_buffer + 3 * total_num_missing + 2 * comm_size;
684 size_t * total_rdispls = size_t_buffer + 3 * total_num_missing + 3 * comm_size;
685
686 size_t offset = 0;
687 for (size_t i = 0, j = 0; i < dist_grid->count[YAC_LOC_CORNER]; ++i) {
688 if (dist_grid->owners[YAC_LOC_CORNER][i].count == -1) {
689
690 vertex_reorder[j] = i;
691 ++j;
692 coord_indices[offset] = i;
693 cve_reorder[offset] = offset;
694 ++offset;
695 }
696 }
697 for (size_t i = 0, j = 0; i < dist_grid->count[YAC_LOC_EDGE]; ++i) {
698 if (dist_grid->owners[YAC_LOC_EDGE][i].count == -1) {
699 // compute middle point of edge
700 size_t * vertex_idxes = &(dist_grid->edge_to_vertex[i][0]);
701 yac_int vertex_ids[2] = {dist_grid->ids[YAC_LOC_CORNER][vertex_idxes[0]],
702 dist_grid->ids[YAC_LOC_CORNER][vertex_idxes[1]]};
703 edge_reorder[j] = i;
704 ++j;
705 coord_indices[offset] =
706 vertex_idxes[(vertex_ids[0] > vertex_ids[1])?1:0];
707 cve_reorder[offset] = offset;
708 ++offset;
709 }
710 }
711
713 coord_indices, total_num_missing, cve_reorder);
714
715 size_t num_unique_idxs = 0;
716 for (size_t i = 0, prev_idx = SIZE_MAX; i < total_num_missing; ++i) {
717 size_t curr_idx = coord_indices[i];
718 if (curr_idx != prev_idx) {
719 prev_idx = curr_idx;
720 ++num_unique_idxs;
721 }
722 }
723
724 yac_coordinate_pointer search_coords =
725 xmalloc(num_unique_idxs * sizeof(*search_coords));
726 int * search_ranks = xmalloc(num_unique_idxs * sizeof(*search_ranks));
727
728 for (size_t i = 0, j = 0, prev_idx = SIZE_MAX; i < total_num_missing; ++i) {
729 size_t curr_idx = coord_indices[i];
730 if (curr_idx != prev_idx) {
731 memcpy(search_coords[j], dist_grid->vertex_coordinates[curr_idx],
732 3 * sizeof(double));
733 prev_idx = curr_idx;
734 ++j;
735 }
736 ranks_idxs[i] = j-1;
737 }
738
740 proc_sphere_part, search_coords, num_unique_idxs, search_ranks);
741 free(search_coords);
742
744 cve_reorder, total_num_missing, ranks_idxs);
745
746 offset = 0;
747 for (size_t i = 0; i < num_missing_vertices; ++i, ++offset)
748 vertex_ranks[i] = search_ranks[ranks_idxs[offset]];
749 for (size_t i = 0; i < num_missing_edges; ++i, ++offset)
750 edge_ranks[i] = search_ranks[ranks_idxs[offset]];
751 free(search_ranks);
752
754 vertex_ranks, num_missing_vertices, vertex_reorder);
756 edge_ranks, num_missing_edges, edge_reorder);
757
758 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
760 2, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
761
762 for (size_t i = 0; i < num_missing_vertices; ++i)
763 sendcounts[2 * vertex_ranks[i] + 0]++;
764 for (size_t i = 0; i < num_missing_edges; ++i)
765 sendcounts[2 * edge_ranks[i] + 1]++;
766
768 2, sendcounts, recvcounts, sdispls, rdispls, comm);
769
770 size_t num_requested_ids[2] = {0,0};
771 size_t saccu = 0, raccu = 0;
772 for (int i = 0; i < comm_size; ++i) {
773 total_sdispls[i] = saccu;
774 total_rdispls[i] = raccu;
775 total_sendcounts[i] = 0;
776 total_recvcounts[i] = 0;
777 for (int j = 0; j < 2; ++j) {
778 total_sendcounts[i] += sendcounts[2 * i + j];
779 total_recvcounts[i] += recvcounts[2 * i + j];
780 num_requested_ids[j] += recvcounts[2 * i + j];
781 }
782 saccu += total_sendcounts[i];
783 raccu += total_recvcounts[i];
784 }
785 size_t request_count = total_recvcounts[comm_size - 1] +
786 total_rdispls[comm_size - 1];
787
788 yac_int * exchange_id_buffer =
789 xmalloc(
790 (total_num_missing + 2 * request_count) * sizeof(*exchange_id_buffer));
791 yac_int * id_send_buffer = exchange_id_buffer + request_count;
792 yac_int * id_recv_buffer = exchange_id_buffer + request_count + total_num_missing;
793
794 // pack the ids for which the local process is missing the owner information
795 for (size_t i = 0; i < num_missing_vertices; ++i) {
796 size_t pos = sdispls[2 * vertex_ranks[i] + 0 + 1]++;
797 id_send_buffer[pos] = dist_grid->ids[YAC_LOC_CORNER][vertex_reorder[i]];
798 }
799 for (size_t i = 0; i < num_missing_edges; ++i) {
800 size_t pos = sdispls[2 * edge_ranks[i] + 1 + 1]++;
801 id_send_buffer[pos] = dist_grid->ids[YAC_LOC_EDGE][edge_reorder[i]];
802 }
803
804 // exchange the ids for which the local process is missing the owner information
805 yac_alltoallv_yac_int_p2p(
806 id_send_buffer, total_sendcounts, total_sdispls,
807 id_recv_buffer, total_recvcounts, total_rdispls, comm);
808
809 yac_int * requested_ids[2] =
810 {exchange_id_buffer, exchange_id_buffer + num_requested_ids[0]};
811
812 // unpack data
813 {
814 size_t ids_offset[2] = {0, 0}, offset = 0;
815 for (int i = 0; i < comm_size; ++i) {
816 for (int j = 0; j < 2; ++j) {
817 size_t curr_count = recvcounts[2 * i + j];
818 memcpy(requested_ids[j] + ids_offset[j], id_recv_buffer + offset,
819 curr_count * sizeof(*(requested_ids[0])));
820 ids_offset[j] += curr_count;
821 offset += curr_count;
822 }
823 }
824 }
825
826 struct remote_point_infos ** remote_point_infos_buffer =
827 xmalloc(request_count * sizeof(*remote_point_infos_buffer));
828 struct remote_point_infos ** found_points[2] =
829 {remote_point_infos_buffer,
830 remote_point_infos_buffer + num_requested_ids[0]};
831 struct remote_points * local_remote_points[2] =
832 {dist_owner[YAC_LOC_CORNER], dist_owner[YAC_LOC_EDGE]};
833
834 size_t max_num_requested_ids = MAX(num_requested_ids[0], num_requested_ids[1]);
835 size_t * reorder_buffer = xmalloc(max_num_requested_ids * sizeof(*reorder_buffer));
836 // lookup owners for requested ids
837 for (int i = 0; i < 2; ++i)
839 requested_ids[i], found_points[i], reorder_buffer, num_requested_ids[i],
840 local_remote_points[i]);
841 free(reorder_buffer);
842 free(exchange_id_buffer);
843
844 int * pack_size_buffer =
845 xmalloc((total_num_missing + request_count) * sizeof(*pack_size_buffer));
846 int * send_pack_sizes = pack_size_buffer;
847 int * recv_pack_sizes = pack_size_buffer + request_count;
848 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
849
850 {
851 size_t offset = 0, offsets[2] = {0, 0};
852 for (int i = 0; i < comm_size; ++i) {
853 for (int j = 0; j < 2; ++j) {
854 size_t curr_count = (size_t)(recvcounts[2 * i + j]);
855 // get pack sizes
857 found_points[j] + offsets[j], curr_count,
858 send_pack_sizes + offset, point_info_dt, comm);
859 offsets[j] += curr_count;
860 offset += curr_count;
861 }
862 }
863 }
864
865 // exchange pack sizes
866 yac_alltoallv_int_p2p(
867 send_pack_sizes, total_recvcounts, total_rdispls,
868 recv_pack_sizes, total_sendcounts, total_sdispls, comm);
869
870 size_t send_buffer_size = 0, recv_buffer_size = 0;
871 for (size_t i = 0; i < request_count; ++i)
872 send_buffer_size += (size_t)(send_pack_sizes[i]);
873 for (size_t i = 0; i < total_num_missing; ++i)
874 recv_buffer_size += (size_t)(recv_pack_sizes[i]);
875
876 void * result_buffer = xmalloc(send_buffer_size + recv_buffer_size);
877 void * send_result_buffer = result_buffer;
878 void * recv_result_buffer = (unsigned char *)result_buffer + send_buffer_size;
879
880 // pack data
881 {
882 size_t send_buffer_size = 0, offset = 0, offsets[2] = {0,0};
883 for (int i = 0; i < comm_size; ++i) {
884 for (int j = 0; j < 2; ++j) {
885 size_t curr_count = recvcounts[2 * i + j];
886 // get pack sizes
888 found_points[j] + offsets[j],
889 send_pack_sizes + offset, curr_count,
890 (void*)((unsigned char*)send_result_buffer + send_buffer_size),
891 point_info_dt, comm);
892 for (size_t k = 0; k < curr_count; ++k)
893 send_buffer_size += send_pack_sizes[offset + k];
894 offsets[j] += curr_count;
895 offset += curr_count;
896 }
897 }
898 }
899
900 {
901 size_t send_offset = 0, recv_offset = 0;
902 size_t saccu = 0, raccu = 0;
903 for (int i = 0; i < comm_size; ++i) {
904 total_sdispls[i] = saccu;
905 total_rdispls[i] = raccu;
906 size_t curr_sendcount = 0, curr_recvcount = 0;
907 for (int k = 0; k < 2; ++k) {
908 for (size_t j = 0; j < recvcounts[2 * i + k]; ++j, ++send_offset)
909 curr_sendcount += (size_t)(send_pack_sizes[send_offset]);
910 for (size_t j = 0; j < sendcounts[2 * i + k]; ++j, ++recv_offset)
911 curr_recvcount += (size_t)(recv_pack_sizes[recv_offset]);
912 }
913 total_sendcounts[i] = curr_sendcount;
914 total_recvcounts[i] = curr_recvcount;
915 saccu += curr_sendcount;
916 raccu += curr_recvcount;
917 }
918 }
919
920 // exchange results
921 yac_alltoallv_packed_p2p(
922 send_result_buffer, total_sendcounts, total_sdispls,
923 recv_result_buffer, total_recvcounts, total_rdispls, comm);
924
925 // unpack results
926 {
927 size_t counts[2] = {0, 0}, count = 0, offset = 0;
929 {dist_grid->owners[YAC_LOC_CORNER], dist_grid->owners[YAC_LOC_EDGE]};
930 size_t * reorder[2] = {vertex_reorder, edge_reorder};
931 for (int i = 0; i < comm_size; ++i) {
932 for (int j = 0; j < 2; ++j) {
933 size_t curr_count = sendcounts[2 * i + j];
934 for (size_t k = 0; k < curr_count; ++k, ++counts[j], ++count) {
935 int curr_buffer_size = recv_pack_sizes[sdispls[2 * i + j] + k];
936 int position = 0;
938 (void*)((unsigned char*)recv_result_buffer + offset),
939 curr_buffer_size, &position,
940 remote_points[j] + reorder[j][counts[j]], point_info_dt, comm);
941 offset += (size_t)(curr_buffer_size);
942 }
943 }
944 }
945 }
946
947 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
948
949 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
950 free(result_buffer);
951 free(pack_size_buffer);
952 free(remote_point_infos_buffer);
953 free(size_t_buffer);
954 free(ranks_buffer);
955}
956
957static int compare_remote_point(const void * a, const void * b) {
958
959 return ((const struct remote_point*)a)->global_id -
960 ((const struct remote_point*)b)->global_id;
961}
962
963// inserts an element into an array and increases the corresponding size
964static void insert_global_id(yac_int * ids, size_t n, yac_int id) {
965
966 size_t i;
967 for (i = 0; i < n; ++i) if (ids[i] >= id) break;
968 // copy new id into array and move bigger elements one position up
969 if (n != i) memmove(ids + i + 1, ids + i, (n - i) * sizeof(*ids));
970 ids[i] = id;
971}
972
973// inserts an element into an array and increases the corresponding size
974// if the element already exists in the array nothing is done
975static void insert_rank(int * ranks, int * count, int rank) {
976
977 int i;
978 int n = *count;
979
980 for (i = 0; i < n; ++i) if (ranks[i] >= rank) break;
981
982 // if the rank is already in the array
983 if (i != n) {
984 if (ranks[i] == rank) return;
985 else memmove(ranks + i + 1, ranks + i, ((size_t)(n - i)) * sizeof(*ranks));
986 }
987 ranks[i] = rank;
988 *count = n + 1;
989}
990
992 const void * a, const void * b) {
993
994 int count_a = ((struct n_ids_reorder const *)a)->count;
995 int count_b = ((struct n_ids_reorder const *)b)->count;
996 yac_int * a_ids = ((struct n_ids_reorder const *)a)->ids;
997 yac_int * b_ids = ((struct n_ids_reorder const *)b)->ids;
998 int ret = count_a - count_b;
999 for (int i = 0; !ret && (i < count_a); ++i)
1000 ret = (a_ids[i] > b_ids[i]) - (a_ids[i] < b_ids[i]);
1001 return ret;
1002}
1003
1005 const void * a, const void * b) {
1006
1007 return (((struct n_ids_reorder const *)a)->reorder_idx >
1008 ((struct n_ids_reorder const *)b)->reorder_idx) -
1009 (((struct n_ids_reorder const *)a)->reorder_idx <
1010 ((struct n_ids_reorder const *)b)->reorder_idx);
1011}
1012
1013// determines for all cells, in the grid data provided by the user, to
1014// which processes they belong according to the decomposition of the
1015// distributed grid
1017 struct proc_sphere_part_node * proc_sphere_part,
1018 struct yac_basic_grid_data * grid, MPI_Comm comm, int ** dist_cell_ranks,
1019 int * dist_cell_rank_counts, size_t * dist_cell_rank_offsets) {
1020
1021 int comm_size;
1022 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1023
1024 size_t num_cells = grid->num_cells;
1025 int * ranks_buffer = xmalloc((size_t)comm_size * sizeof(*ranks_buffer));
1026 size_t dist_cell_ranks_array_size = num_cells;
1027 int * dist_cell_ranks_ = xmalloc(num_cells * sizeof(*dist_cell_ranks_));
1028 size_t offset = 0;
1029
1030 // determine maximum number of vertices per cell
1031 int max_num_vertices_per_cell = 0;
1032 for (size_t i = 0; i < num_cells; ++i)
1033 if (grid->num_vertices_per_cell[i] > max_num_vertices_per_cell)
1034 max_num_vertices_per_cell = grid->num_vertices_per_cell[i];
1035
1036 struct yac_grid_cell cell;
1037 cell.coordinates_xyz =
1038 xmalloc(
1039 (size_t)max_num_vertices_per_cell * sizeof(*(cell.coordinates_xyz)));
1040 cell.edge_type =
1041 xmalloc((size_t)max_num_vertices_per_cell * sizeof(*(cell.edge_type)));
1042
1043 size_t * cell_to_vertex = grid->cell_to_vertex;
1044 size_t * cell_to_vertex_offsets = grid->cell_to_vertex_offsets;
1045 size_t * cell_to_edge = grid->cell_to_edge;
1046 size_t * cell_to_edge_offsets = grid->cell_to_edge_offsets;
1047 yac_coordinate_pointer vertex_coordinates = grid->vertex_coordinates;
1048 enum yac_edge_type * edge_type = grid->edge_type;
1049 int * num_vertices_per_cell = grid->num_vertices_per_cell;
1050
1051 // generate a bounding circle for each cell and use it to determine
1052 // ranks of the processes that require this cell
1053 for (size_t i = 0; i < num_cells; ++i) {
1054
1055 // get bounding circle for current cell
1056 cell.num_corners = num_vertices_per_cell[i];
1057 size_t * curr_cell_to_vertex =
1058 cell_to_vertex + cell_to_vertex_offsets[i];
1059 size_t * curr_cell_to_edge =
1060 cell_to_edge + cell_to_edge_offsets[i];
1061 for (int j = 0; j < num_vertices_per_cell[i]; ++j) {
1062 yac_coordinate_pointer curr_vertex_coords =
1063 vertex_coordinates + curr_cell_to_vertex[j];
1064 for (int k = 0; k < 3; ++k)
1065 cell.coordinates_xyz[j][k] = (*curr_vertex_coords)[k];
1066 cell.edge_type[j] = edge_type[curr_cell_to_edge[j]];
1067 }
1068 int rank_count;
1069 if (cell.num_corners > 0) {
1070
1071 struct bounding_circle bnd_circle;
1072 yac_get_cell_bounding_circle(cell, &bnd_circle);
1073
1075 proc_sphere_part, bnd_circle, ranks_buffer, &rank_count);
1076 } else {
1077 ranks_buffer[0] = 0;
1078 rank_count = 1;
1079 }
1080
1081 ENSURE_ARRAY_SIZE(dist_cell_ranks_, dist_cell_ranks_array_size,
1082 offset + (size_t)rank_count);
1083 memcpy(dist_cell_ranks_ + offset, ranks_buffer,
1084 (size_t)rank_count * sizeof(*ranks_buffer));
1085 dist_cell_rank_counts[i] = rank_count;
1086 dist_cell_rank_offsets[i] = offset;
1087 offset += (size_t)rank_count;
1088 }
1089
1090 dist_cell_rank_offsets[num_cells] = offset;
1091
1092 free(cell.edge_type);
1093 free(cell.coordinates_xyz);
1094 free(ranks_buffer);
1095
1096 *dist_cell_ranks = dist_cell_ranks_;
1097}
1098
1099static MPI_Datatype yac_get_id_reorder_coord_coord_mpi_datatype(MPI_Comm comm) {
1100
1101 struct id_reorder_coord dummy;
1102 MPI_Datatype coord_dt;
1103 int array_of_blocklengths[] = {3};
1104 const MPI_Aint array_of_displacements[] =
1105 {(MPI_Aint)(intptr_t)(const void *)&(dummy.coord) -
1106 (MPI_Aint)(intptr_t)(const void *)&dummy};
1107 const MPI_Datatype array_of_types[] = {MPI_DOUBLE};
1109 MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements,
1110 array_of_types, &coord_dt), comm);
1111 return yac_create_resized(coord_dt, sizeof(dummy), comm);
1112}
1113
1114static MPI_Datatype yac_get_id_reorder_coord_id_mpi_datatype(MPI_Comm comm) {
1115
1116 struct id_reorder_coord dummy;
1117 MPI_Datatype global_id_dt;
1118 int array_of_blocklengths[] = {1};
1119 const MPI_Aint array_of_displacements[] =
1120 {(MPI_Aint)(intptr_t)(const void *)&(dummy.global_id) -
1121 (MPI_Aint)(intptr_t)(const void *)&dummy};
1122 const MPI_Datatype array_of_types[] = {yac_int_dt};
1124 MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements,
1125 array_of_types, &global_id_dt), comm);
1126 return yac_create_resized(global_id_dt, sizeof(dummy), comm);
1127}
1128
1130 const void * a, const void * b) {
1131
1132 return compare_coords(((const struct id_reorder_coord *)a)->coord,
1133 ((const struct id_reorder_coord *)b)->coord);
1134}
1135
1137 const void * a, const void * b) {
1138
1139 return (((const struct id_reorder_coord *)a)->reorder_idx >
1140 ((const struct id_reorder_coord *)b)->reorder_idx) -
1141 (((const struct id_reorder_coord *)a)->reorder_idx <
1142 ((const struct id_reorder_coord *)b)->reorder_idx);
1143}
1144
1145// generates global vertex ids if they are not provied by the user
1146// and returns the distributed owners for each vertex in the provided
1147// grid data
1149 struct proc_sphere_part_node * proc_sphere_part,
1150 struct yac_basic_grid_data * grid, MPI_Comm comm) {
1151
1152 int * vertex_ranks = xmalloc(grid->num_vertices * sizeof(*vertex_ranks));
1153
1154 // determine the dist grid ranks for all vertices
1156 proc_sphere_part, grid->vertex_coordinates, grid->num_vertices,
1157 vertex_ranks);
1158
1159 // check whether only of subset of the processes have defined
1160 // their global ids, which is not supported
1161 int ids_available_local =
1162 (grid->num_vertices > 0) && (grid->vertex_ids != NULL);
1163 int ids_available_global;
1164 yac_mpi_call(MPI_Allreduce(&ids_available_local, &ids_available_global, 1,
1165 MPI_INT, MPI_MAX, comm), comm);
1166
1167 // if there are vertices defined locally for the current grid
1168 if (grid->num_vertices != 0) {
1169 YAC_ASSERT(
1170 ids_available_local || !ids_available_global,
1171 "ERROR(generate_vertex_ids): inconsistent global ids")
1172
1173 // generate core mask if not available
1174 if (grid->core_vertex_mask == NULL) {
1175 int * core_mask =
1176 (grid->core_vertex_mask =
1177 xmalloc(grid->num_vertices * sizeof(*core_mask)));
1178 for (size_t j = 0; j < grid->num_vertices; ++j) core_mask[j] = 1;
1179 }
1180 }
1181
1182 // if we do not need to generate the global ids
1183 if (ids_available_global) return vertex_ranks;
1184
1185 int comm_size;
1186 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1187
1188 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1190 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1191
1192 for (size_t i = 0; i < grid->num_vertices; ++i)
1193 sendcounts[vertex_ranks[i]]++;
1194
1196 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1197
1198 size_t orig_vertex_count = grid->num_vertices;
1199 size_t dist_vertex_count =
1200 recvcounts[comm_size - 1] + rdispls[comm_size - 1];
1201
1202 struct id_reorder_coord * id_reorder_coord_buffer =
1203 xmalloc((orig_vertex_count + dist_vertex_count) *
1204 sizeof(*id_reorder_coord_buffer));
1205 struct id_reorder_coord *
1206 id_reorder_coord_send_buffer = id_reorder_coord_buffer;
1207 struct id_reorder_coord *
1208 id_reorder_coord_recv_buffer = id_reorder_coord_buffer + orig_vertex_count;
1209
1210 // pack send buffer
1211 for (size_t i = 0; i < orig_vertex_count; ++i) {
1212 size_t pos = sdispls[vertex_ranks[i] + 1]++;
1213 id_reorder_coord_send_buffer[pos].reorder_idx = i;
1214 for (int j = 0; j < 3; ++j)
1215 id_reorder_coord_send_buffer[pos].coord[j] =
1216 grid->vertex_coordinates[i][j];
1217 }
1218
1219 MPI_Datatype id_reorder_coord_coord_dt =
1221
1222 // exchange data
1224 id_reorder_coord_send_buffer, sendcounts, sdispls,
1225 id_reorder_coord_recv_buffer, recvcounts, rdispls,
1226 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_coord_dt, comm);
1227
1228 yac_mpi_call(MPI_Type_free(&id_reorder_coord_coord_dt), comm);
1229
1230 for (size_t i = 0; i < dist_vertex_count; ++i)
1231 id_reorder_coord_recv_buffer[i].reorder_idx = i;
1232
1233 // sort received vertices based on coordinates
1234 qsort(id_reorder_coord_recv_buffer, dist_vertex_count,
1235 sizeof(*id_reorder_coord_recv_buffer), compare_id_reorder_coord_coord);
1236
1237 size_t unique_count = dist_vertex_count > 0;
1238 struct id_reorder_coord * prev = id_reorder_coord_recv_buffer;
1239
1240 // determine number of unique coordinates
1241 for (size_t i = 0; i < dist_vertex_count; ++i) {
1242 struct id_reorder_coord * curr = id_reorder_coord_recv_buffer + i;
1243 if (compare_id_reorder_coord_coord(prev, curr)) {
1244 ++unique_count;
1245 prev = curr;
1246 }
1247 curr->global_id = (yac_int)unique_count - 1;
1248 }
1249
1250 YAC_ASSERT(
1251 unique_count <= (size_t)XT_INT_MAX,
1252 "ERROR(generate_vertex_ids): global_id out of bounds")
1253
1254 yac_int yac_int_unique_count = (yac_int)unique_count;
1255 yac_int id_offset;
1256
1257 // determine exclusive scan of sum of numbers of unique
1258 // coordinates on all ranks
1259 yac_mpi_call(MPI_Exscan(&yac_int_unique_count, &id_offset, 1, yac_int_dt,
1260 MPI_SUM, comm), comm);
1261 int comm_rank;
1262 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
1263 if (comm_rank == 0) id_offset = 0;
1264
1265 YAC_ASSERT(
1266 ((size_t)id_offset + unique_count) <= (size_t)XT_INT_MAX,
1267 "ERROR(generate_vertex_ids): global_id out of bounds")
1268
1269 // adjust global ids
1270 for (size_t i = 0; i < dist_vertex_count; ++i)
1271 id_reorder_coord_recv_buffer[i].global_id += id_offset;
1272
1273 // return received vertices into original order
1274 qsort(id_reorder_coord_recv_buffer, dist_vertex_count,
1275 sizeof(*id_reorder_coord_recv_buffer),
1277
1278 MPI_Datatype id_reorder_coord_id_dt =
1280
1281 // return generated global ids data
1283 id_reorder_coord_recv_buffer, recvcounts, rdispls,
1284 id_reorder_coord_send_buffer, sendcounts, sdispls,
1285 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_id_dt, comm);
1286
1287 yac_mpi_call(MPI_Type_free(&id_reorder_coord_id_dt), comm);
1288
1289 yac_int * vertex_ids =
1290 ((grid->vertex_ids =
1291 (grid->num_vertices > 0)?
1292 xmalloc(grid->num_vertices * sizeof(*(grid->vertex_ids))):NULL));
1293
1294 for (size_t i = 0; i < grid->num_vertices; ++i)
1295 vertex_ids[id_reorder_coord_send_buffer[i].reorder_idx] =
1296 id_reorder_coord_send_buffer[i].global_id;
1297
1298 free(id_reorder_coord_buffer);
1299 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1300
1301 return vertex_ranks;
1302}
1303
1304// generates global cell/edge ids if they are not provied by the user
1306 struct yac_basic_grid_data * grid, int * vertex_ranks, MPI_Comm comm) {
1307
1308 int comm_rank, comm_size;
1309 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
1310 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1311
1312 // check whether only of subset of the processes have defined
1313 // their global ids, which is not supported
1314 int ids_available_local[2], ids_available_global[2];
1315 ids_available_local[0] = (grid->num_cells > 0) && (grid->cell_ids != NULL);
1316 ids_available_local[1] = (grid->num_edges > 0) && (grid->edge_ids != NULL);
1317 yac_mpi_call(MPI_Allreduce(ids_available_local, ids_available_global, 2,
1318 MPI_INT, MPI_MAX, comm), comm);
1319
1320 if (grid->num_cells > 0) {
1321 YAC_ASSERT(
1322 ids_available_local[0] == ids_available_global[0],
1323 "ERROR(generate_ce_ids): inconsistent global ids")
1324 // generate core mask if not available
1325 if (grid->core_cell_mask == NULL) {
1326 int * core_cell_mask =
1327 (grid->core_cell_mask =
1328 xmalloc(grid->num_cells * sizeof(*core_cell_mask)));
1329 for (size_t j = 0; j < grid->num_cells; ++j) core_cell_mask[j] = 1;
1330 }
1331 }
1332 if (grid->num_edges > 0) {
1333 YAC_ASSERT(
1334 ids_available_local[1] == ids_available_global[1],
1335 "ERROR(generate_ce_ids): inconsistent global ids")
1336 // generate core mask if not available
1337 if (grid->core_edge_mask == NULL) {
1338 int * core_edge_mask =
1339 (grid->core_edge_mask =
1340 xmalloc(grid->num_edges * sizeof(*core_edge_mask)));
1341 for (size_t j = 0; j < grid->num_edges; ++j) core_edge_mask[j] = 1;
1342 }
1343 }
1344
1345 // if no ids need to be generated
1346 if (ids_available_global[0] && ids_available_global[1]) return;
1347
1348 int * rank_buffer =
1349 xmalloc(
1350 (((ids_available_global[0])?0:(grid->num_cells)) +
1351 ((ids_available_global[1])?0:(grid->num_edges))) * sizeof(*rank_buffer));
1352 int * cell_ranks = rank_buffer;
1353 int * edge_ranks =
1354 rank_buffer + ((ids_available_global[0])?0:(grid->num_cells));
1355
1356 size_t * size_t_buffer =
1357 xmalloc((8 * (size_t)comm_size + 1) * sizeof(*size_t_buffer));
1358 size_t * sendcounts = size_t_buffer + 0 * comm_size;
1359 size_t * recvcounts = size_t_buffer + 2 * comm_size;
1360 size_t * total_sendcounts = size_t_buffer + 4 * comm_size;
1361 size_t * total_recvcounts = size_t_buffer + 5 * comm_size;
1362 size_t * total_sdispls = size_t_buffer + 6 * comm_size;
1363 size_t * total_rdispls = size_t_buffer + 7 * comm_size + 1;
1364 memset(sendcounts, 0, 2 * (size_t)comm_size * sizeof(*sendcounts));
1365
1366 yac_int * cell_to_vertex_ids = NULL;
1367 int max_num_vertices_per_cell = 0;
1368
1369 if (!ids_available_global[0]) {
1370
1371 { // compute the global maximum number of vertices per cell
1372 for (size_t i = 0; i < grid->num_cells; ++i)
1373 if (grid->num_vertices_per_cell[i] > max_num_vertices_per_cell)
1374 max_num_vertices_per_cell = grid->num_vertices_per_cell[i];
1375 yac_mpi_call(MPI_Allreduce(MPI_IN_PLACE, &max_num_vertices_per_cell, 1,
1376 MPI_INT, MPI_MAX, comm), comm);
1377 }
1378
1379 cell_to_vertex_ids =
1380 xmalloc(grid->num_cells * max_num_vertices_per_cell *
1381 sizeof(*cell_to_vertex_ids));
1382
1383 for (size_t i = 0; i < grid->num_cells; ++i) {
1384
1385 int curr_num_vertices = grid->num_vertices_per_cell[i];
1386 yac_int * curr_cell_to_vertex_ids =
1387 cell_to_vertex_ids + i * max_num_vertices_per_cell;
1388
1389 int cell_rank;
1390 if (curr_num_vertices > 0) {
1391 size_t * curr_cell_vertices =
1392 grid->cell_to_vertex + grid->cell_to_vertex_offsets[i];
1393 size_t min_vertex = curr_cell_vertices[0];
1394 curr_cell_to_vertex_ids[0] = grid->vertex_ids[min_vertex];
1395 for (int j = 1; j < curr_num_vertices; ++j) {
1396 size_t curr_vertex_idx = curr_cell_vertices[j];
1397 yac_int curr_vertex_id = grid->vertex_ids[curr_vertex_idx];
1398 insert_global_id(curr_cell_to_vertex_ids, j, curr_vertex_id);
1399 if (curr_cell_to_vertex_ids[0] == curr_vertex_id)
1400 min_vertex = curr_vertex_idx;
1401 }
1402 cell_rank = vertex_ranks[min_vertex];
1403 } else {
1404 cell_rank = 0;
1405 }
1406 for (int j = curr_num_vertices; j < max_num_vertices_per_cell; ++j)
1407 curr_cell_to_vertex_ids[j] = XT_INT_MAX;
1408
1409 sendcounts[2 * ((cell_ranks[i] = cell_rank)) + 0]++;
1410 }
1411 }
1412
1413 yac_int * edge_to_vertex_ids = NULL;
1414
1415 if (!ids_available_global[1]) {
1416
1417 edge_to_vertex_ids =
1418 xmalloc(2 * grid->num_edges * sizeof(*edge_to_vertex_ids));
1419
1420 for (size_t i = 0; i < grid->num_edges; ++i) {
1421
1422 size_t * curr_edge_to_vertex = &(grid->edge_to_vertex[i][0]);
1423 yac_int * curr_edge_vertex_ids = edge_to_vertex_ids + 2 * i;
1424 curr_edge_vertex_ids[0] = grid->vertex_ids[curr_edge_to_vertex[0]];
1425 curr_edge_vertex_ids[1] = grid->vertex_ids[curr_edge_to_vertex[1]];
1426
1427 if (curr_edge_vertex_ids[0] > curr_edge_vertex_ids[1]) {
1428 yac_int temp = curr_edge_vertex_ids[0];
1429 curr_edge_vertex_ids[0] = curr_edge_vertex_ids[1];
1430 curr_edge_vertex_ids[1] = temp;
1431 sendcounts[
1432 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[1]])) + 1]++;
1433 } else {
1434 sendcounts[
1435 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[0]])) + 1]++;
1436 }
1437 }
1438 }
1439
1440 // exchange the number of cells and edges
1441 yac_mpi_call(MPI_Alltoall(sendcounts, 2, YAC_MPI_SIZE_T,
1442 recvcounts, 2, YAC_MPI_SIZE_T, comm), comm);
1443
1444 total_sdispls[0] = 0;
1445 size_t recv_counts[2] = {0,0};
1446 size_t saccu = 0, raccu = 0;
1447 for (int i = 0; i < comm_size; ++i) {
1448 total_sdispls[i+1] = saccu;
1449 total_rdispls[i] = raccu;
1450 recv_counts[0] += recvcounts[2 * i + 0];
1451 recv_counts[1] += recvcounts[2 * i + 1];
1452 total_sendcounts[i] = sendcounts[2 * i + 0] *
1453 (size_t)max_num_vertices_per_cell +
1454 sendcounts[2 * i + 1] * 2;
1455 total_recvcounts[i] = recvcounts[2 * i + 0] *
1456 (size_t)max_num_vertices_per_cell +
1457 recvcounts[2 * i + 1] * 2;
1458 saccu += total_sendcounts[i];
1459 raccu += total_recvcounts[i];
1460 }
1461 size_t local_data_count = total_sendcounts[comm_size - 1] +
1462 total_sdispls[comm_size];
1463 size_t recv_count = total_recvcounts[comm_size - 1] +
1464 total_rdispls[comm_size - 1];
1465
1466 yac_int * yac_int_buffer =
1467 xcalloc((local_data_count + recv_count), sizeof(*yac_int_buffer));
1468 yac_int * send_buffer = yac_int_buffer;
1469 yac_int * recv_buffer = yac_int_buffer + local_data_count;
1470
1471 // pack send buffer
1472 if (!ids_available_global[0])
1473 for (size_t i = 0; i < grid->num_cells; ++i)
1474 for (int j = 0; j < max_num_vertices_per_cell; ++j)
1475 send_buffer[total_sdispls[cell_ranks[i] + 1]++] =
1476 cell_to_vertex_ids[i * max_num_vertices_per_cell + j];
1477 if (!ids_available_global[1])
1478 for (size_t i = 0; i < grid->num_edges; ++i)
1479 for (int j = 0; j < 2; ++j)
1480 send_buffer[total_sdispls[edge_ranks[i] + 1]++] =
1481 edge_to_vertex_ids[2 * i + j];
1482
1483 free(edge_to_vertex_ids);
1484 free(cell_to_vertex_ids);
1485
1486 // exchange data
1487 yac_alltoallv_yac_int_p2p(
1488 send_buffer, total_sendcounts, total_sdispls,
1489 recv_buffer, total_recvcounts, total_rdispls, comm);
1490
1491 struct n_ids_reorder * n_ids_reorder_buffer =
1492 xmalloc((recv_counts[0] + recv_counts[1]) * sizeof(*n_ids_reorder_buffer));
1493 struct n_ids_reorder * n_ids_reorder[2] =
1494 {n_ids_reorder_buffer, n_ids_reorder_buffer + recv_counts[0]};
1495
1496 size_t offset = 0;
1497 int index_counts[2] = {max_num_vertices_per_cell, 2};
1498 size_t reorder_idx = 0;
1499 recv_counts[0] = 0;
1500 recv_counts[1] = 0;
1501 for (int i = 0; i < comm_size; ++i) {
1502 for (int j = 0; j < 2; ++j) {
1503 size_t curr_count = recvcounts[2 * i + j];
1504 for (size_t k = 0; k < curr_count; ++k, ++reorder_idx, ++recv_counts[j]) {
1505 n_ids_reorder[j][recv_counts[j]].count = index_counts[j];
1506 n_ids_reorder[j][recv_counts[j]].ids = recv_buffer + offset;
1507 n_ids_reorder[j][recv_counts[j]].reorder_idx = reorder_idx;
1508 offset += index_counts[j];
1509 }
1510 }
1511 }
1512
1513 for (int i = 0; i < 2; ++i) {
1514
1515 if (ids_available_global[i]) continue;
1516
1517 qsort(n_ids_reorder[i], recv_counts[i], sizeof(*(n_ids_reorder[i])),
1519
1520 size_t unique_count = recv_counts[i] > 0;
1521 struct n_ids_reorder * prev = n_ids_reorder[i];
1522 struct n_ids_reorder * curr = n_ids_reorder[i];
1523
1524 for (size_t j = 0; j < recv_counts[i]; ++j, ++curr) {
1525 if (compare_n_ids_reorder_ids(prev, curr)) {
1526 ++unique_count;
1527 prev = curr;
1528 }
1529 curr->global_id = (yac_int)(unique_count - 1);
1530 }
1531
1532 YAC_ASSERT(
1533 unique_count <= (size_t)XT_INT_MAX,
1534 "ERROR(generate_global_ce_ids): global_id out of bounds")
1535
1536 yac_int yac_int_unique_count = (yac_int)unique_count;
1537 yac_int id_offset;
1538
1539 // determine exclusive scan of sum of numbers of unique ids on all ranks
1540 yac_mpi_call(MPI_Exscan(&yac_int_unique_count, &id_offset, 1, yac_int_dt,
1541 MPI_SUM, comm), comm);
1542 if (comm_rank == 0) id_offset = 0;
1543
1544 YAC_ASSERT(
1545 ((size_t)id_offset + unique_count) <= (size_t)XT_INT_MAX,
1546 "ERROR(generate_global_ce_ids): global_id out of bounds")
1547
1548 // adjust global ids
1549 for (size_t j = 0; j < recv_counts[i]; ++j)
1550 n_ids_reorder[i][j].global_id += id_offset;
1551 }
1552 free(yac_int_buffer);
1553
1554 qsort(n_ids_reorder_buffer, recv_counts[0] + recv_counts[1],
1555 sizeof(*n_ids_reorder_buffer), compare_n_ids_reorder_reorder);
1556
1557 yac_int * global_ids_buffer =
1558 xmalloc((recv_counts[0] + recv_counts[1] +
1559 ((ids_available_global[0])?0:(grid->num_cells)) +
1560 ((ids_available_global[1])?0:(grid->num_edges))) *
1561 sizeof(*global_ids_buffer));
1562 yac_int * send_global_ids = global_ids_buffer;
1563 yac_int * recv_global_ids =
1564 global_ids_buffer + recv_counts[0] + recv_counts[1];
1565
1566 for (size_t i = 0; i < recv_counts[0] + recv_counts[1]; ++i)
1567 send_global_ids[i] = n_ids_reorder_buffer[i].global_id;
1568 free(n_ids_reorder_buffer);
1569
1570 // generate count and displs data
1571 saccu = 0, raccu = 0;
1572 for (int i = 0; i < comm_size; ++i) {
1573 total_sdispls[i] = saccu;
1574 total_rdispls[i] = raccu;
1575 saccu +=
1576 ((total_sendcounts[i] = recvcounts[2 * i + 0] + recvcounts[2 * i + 1]));
1577 raccu +=
1578 ((total_recvcounts[i] = sendcounts[2 * i + 0] + sendcounts[2 * i + 1]));
1579 }
1580
1581 // exchange generated global ids data
1582 yac_alltoallv_yac_int_p2p(
1583 send_global_ids, total_sendcounts, total_sdispls,
1584 recv_global_ids, total_recvcounts, total_rdispls, comm);
1585
1586 if ((!ids_available_global[0]) && (grid->num_cells > 0))
1587 grid->cell_ids = xmalloc(grid->num_cells * sizeof(*grid->cell_ids));
1588 if ((!ids_available_global[1]) && (grid->num_edges > 0))
1589 grid->edge_ids = xmalloc(grid->num_edges * sizeof(grid->edge_ids));
1590
1591 // unpack generated global ids
1592 if (!ids_available_global[0])
1593 for (size_t i = 0; i < grid->num_cells; ++i)
1594 grid->cell_ids[i] = recv_global_ids[total_rdispls[cell_ranks[i]]++];
1595 if (!ids_available_global[1])
1596 for (size_t i = 0; i < grid->num_edges; ++i)
1597 grid->edge_ids[i] = recv_global_ids[total_rdispls[edge_ranks[i]]++];
1598
1599 free(rank_buffer);
1600 free(size_t_buffer);
1601 free(global_ids_buffer);
1602}
1603
1604// generates global cell/vertex/edge ids if they are not provided by the user
1606 struct proc_sphere_part_node * proc_sphere_part,
1607 struct yac_basic_grid_data * grid, MPI_Comm comm) {
1608
1609 // generate global ids for all vertices (if non-existent)
1610 int * vertex_ranks = generate_vertex_ids(proc_sphere_part, grid, comm);
1611
1612 // generate global ids for all cell and edge (if non-existent)
1613 generate_ce_ids(grid, vertex_ranks, comm);
1614
1615 free(vertex_ranks);
1616}
1617
1618// generate edge to cell mapping
1620 const_size_t_pointer cell_to_edge, const_int_pointer num_edges_per_cell,
1621 int * core_cell_mask, size_t num_cells, size_t num_edges) {
1622
1623 if (num_cells == 0) return NULL;
1624
1625 yac_size_t_2_pointer edge_to_cell = xmalloc(num_edges * sizeof(*edge_to_cell));
1626
1627 for (size_t i = 0; i < num_edges; ++i) {
1628 edge_to_cell[i][0] = SIZE_MAX;
1629 edge_to_cell[i][1] = SIZE_MAX;
1630 }
1631
1632 for (size_t i = 0, offset = 0; i < num_cells; ++i) {
1633
1634 size_t curr_num_edges = num_edges_per_cell[i];
1635 const_size_t_pointer curr_cell_to_edge = cell_to_edge + offset;
1636 offset += curr_num_edges;
1637
1638 if ((core_cell_mask == NULL) || core_cell_mask[i]) {
1639
1640 for (size_t j = 0; j < curr_num_edges; ++j) {
1641
1642 size_t curr_edge = curr_cell_to_edge[j];
1643 size_t * curr_edge_to_cell = edge_to_cell[curr_edge];
1644 curr_edge_to_cell += *curr_edge_to_cell != SIZE_MAX;
1646 *curr_edge_to_cell == SIZE_MAX,
1647 "ERROR(generate_edge_to_cell): "
1648 "more than two cells point to a single edge "
1649 "(does the grid contain degenrated cells (less than 3 corners) "
1650 "or duplicated cells; "
1651 "these can be masked out using the core mask)\n"
1652 "(num_cells: %zu cell_idx: %zu: num_cell_edge %zu)",
1653 num_cells, i, curr_num_edges)
1654 *curr_edge_to_cell = i;
1655 }
1656 }
1657 }
1658
1659 return edge_to_cell;
1660}
1661
1663 void * grid, size_t vertex_idx, size_t ** cells, int * num_cells) {
1664
1665 *cells =
1666 ((struct yac_basic_grid_data *)grid)->vertex_to_cell +
1667 ((struct yac_basic_grid_data *)grid)->vertex_to_cell_offsets[vertex_idx];
1668 *num_cells =
1669 ((struct yac_basic_grid_data *)grid)->num_cells_per_vertex[vertex_idx];
1670}
1671
1672static void get_edge_cells(
1673 void * edge_to_cell, size_t edge_idx, size_t ** cells, int * num_cells) {
1674
1675 *cells = ((yac_size_t_2_pointer)edge_to_cell)[edge_idx];
1676 *num_cells = 2;
1677}
1678
1679static void get_ve_ranks(
1680 int ** rank_buffer, size_t * rank_buffer_size,
1681 size_t * rank_buffer_array_size, int * num_ve_ranks,
1682 int * core_mask, size_t count,
1683 void(*get_cells)(
1684 void * ve_to_cell_data, size_t idx, size_t ** cells, int * num_cells),
1685 void * ve_to_cell_data, int * num_cell_ranks,
1686 size_t * dist_cell_rank_offsets) {
1687
1688 // determine dist ranks for all vertices/edges based on the dist ranks of
1689 // adjacent cells
1690 for (size_t i = 0; i < count; ++i) {
1691
1692 if (core_mask[i]) {
1693
1694 size_t * cells;
1695 int num_cells;
1696 get_cells(ve_to_cell_data, i, &cells, &num_cells);
1697
1698 size_t max_num_ranks = 0;
1699 for (int j = 0; j < num_cells; ++j)
1700 if (cells[j] != SIZE_MAX)
1701 max_num_ranks += (size_t)(num_cell_ranks[cells[j]]);
1702
1703 ENSURE_ARRAY_SIZE(*rank_buffer, *rank_buffer_array_size,
1704 *rank_buffer_size + (size_t)max_num_ranks);
1705
1706 int * curr_ranks = *rank_buffer + *rank_buffer_size;
1707 int curr_num_ranks = 0;
1708
1709 for (int j = 0; j < num_cells; ++j) {
1710
1711 size_t curr_cell_idx = cells[j];
1712 if (curr_cell_idx == SIZE_MAX) continue;
1713
1714 int curr_num_cell_ranks = num_cell_ranks[curr_cell_idx];
1715 int * curr_cell_ranks =
1716 *rank_buffer + dist_cell_rank_offsets[curr_cell_idx];
1717
1718 for (int k = 0; k < curr_num_cell_ranks; ++k)
1719 insert_rank(curr_ranks, &curr_num_ranks, curr_cell_ranks[k]);
1720 }
1721 *rank_buffer_size += (size_t)(num_ve_ranks[i] = curr_num_ranks);
1722 } else {
1723 num_ve_ranks[i] = 0;
1724 }
1725 }
1726}
1727
1728// generates mapping information from the decomposition provided by the user to
1729// one of the distributed grid
1731 struct proc_sphere_part_node * proc_sphere_part,
1732 struct yac_basic_grid_data * grid, MPI_Comm comm) {
1733
1734 // generate global ids if they are missing
1735 generate_global_ids(proc_sphere_part, grid, comm);
1736
1737 // determine dist ranks for all cells based on their bounding circles
1738 int * rank_buffer;
1739 int * num_ranks_buffer =
1740 xmalloc((grid->num_cells + grid->num_vertices + grid->num_edges) *
1741 sizeof(*num_ranks_buffer));
1742 int * num_cell_ranks = num_ranks_buffer;
1743 int * num_vertex_ranks = num_ranks_buffer + grid->num_cells;
1744 int * num_edge_ranks =
1745 num_ranks_buffer + grid->num_cells + grid->num_vertices;
1746 size_t * dist_cell_rank_offsets =
1747 xmalloc((grid->num_cells + 1) * sizeof(*dist_cell_rank_offsets));
1749 proc_sphere_part, grid, comm,
1750 &rank_buffer, num_cell_ranks, dist_cell_rank_offsets);
1751
1752 size_t rank_buffer_size = dist_cell_rank_offsets[grid->num_cells];
1753 size_t rank_buffer_array_size = dist_cell_rank_offsets[grid->num_cells];
1754
1755 // determine dist ranks for all vertices based on the dist ranks of
1756 // adjacent cells
1758 &rank_buffer, &rank_buffer_size, &rank_buffer_array_size, num_vertex_ranks,
1760 (void*)grid, num_cell_ranks, dist_cell_rank_offsets);
1761
1762 // determine dist ranks for all edges based on the dist ranks of
1763 // adjacent cells
1764 yac_size_t_2_pointer edge_to_cell =
1767 grid->core_cell_mask, grid->num_cells, grid->num_edges);
1769 &rank_buffer, &rank_buffer_size, &rank_buffer_array_size, num_edge_ranks,
1771 (void*)edge_to_cell, num_cell_ranks, dist_cell_rank_offsets);
1772 free(edge_to_cell);
1773
1774 int comm_size;
1775 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1776
1777 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1779 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1780 size_t * size_t_buffer =
1781 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
1782 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1783 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1784 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1785 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1786
1787 struct {
1788 size_t count;
1789 int * num_ranks;
1790 int * core_mask;
1791 yac_int * ids;
1792 } cve_data[3] =
1793 {{.count = grid->num_cells,
1794 .num_ranks = num_cell_ranks,
1795 .core_mask = grid->core_cell_mask,
1796 .ids = grid->cell_ids},
1797 {.count = grid->num_vertices,
1798 .num_ranks = num_vertex_ranks,
1799 .core_mask = grid->core_vertex_mask,
1800 .ids = grid->vertex_ids},
1801 {.count = grid->num_edges,
1802 .num_ranks = num_edge_ranks,
1803 .core_mask = grid->core_edge_mask,
1804 .ids = grid->edge_ids}};
1805 size_t offset = 0;
1806
1807 // determine number of cells/vertices/edges that have to be
1808 // sent to other processes
1809 for (int location = 0; location < 3; ++location) {
1810 size_t count = cve_data[location].count;
1811 int * num_ranks = cve_data[location].num_ranks;
1812 int * core_mask = cve_data[location].core_mask;
1813 for (size_t i = 0; i < count; ++i) {
1814 int curr_num_ranks = num_ranks[i];
1815 int * ranks = rank_buffer + offset;
1816 offset += (size_t)curr_num_ranks;
1817 if (core_mask[i])
1818 for (int j = 0; j < curr_num_ranks; ++j)
1819 sendcounts[3 * ranks[j] + location]++;
1820 }
1821 }
1822
1824 3, sendcounts, recvcounts, sdispls, rdispls, comm);
1825
1826 size_t receive_counts[3] = {0,0,0};
1827 size_t saccu = 0, raccu = 0;
1828 for (int i = 0; i < comm_size; ++i) {
1829 total_sdispls[i] = saccu;
1830 total_rdispls[i] = raccu;
1831 total_sendcounts[i] = 0;
1832 total_recvcounts[i] = 0;
1833 for (int location = 0; location < 3; ++location) {
1834 total_sendcounts[i] += sendcounts[3 * i + location];
1835 total_recvcounts[i] += recvcounts[3 * i + location];
1836 receive_counts[location] += recvcounts[3 * i + location];
1837 }
1838 saccu += total_sendcounts[i];
1839 raccu += total_recvcounts[i];
1840 }
1841 size_t local_data_count = total_sendcounts[comm_size - 1] +
1842 total_sdispls[comm_size - 1];
1843 size_t recv_count = total_recvcounts[comm_size - 1] +
1844 total_rdispls[comm_size - 1];
1845
1846 struct id_pos * id_pos_buffer =
1847 xcalloc((local_data_count + recv_count), sizeof(*id_pos_buffer));
1848 struct id_pos * id_pos_send_buffer = id_pos_buffer;
1849 struct id_pos * id_pos_recv_buffer =
1850 id_pos_buffer + local_data_count;
1851
1852 // pack cell/vertex/edge information for distributed owners
1853 offset = 0;
1854 for (int location = 0; location < 3; ++location) {
1855 size_t count = cve_data[location].count;
1856 int * num_ranks = cve_data[location].num_ranks;
1857 int * core_mask = cve_data[location].core_mask;
1858 yac_int * ids = cve_data[location].ids;
1859 for (size_t i = 0; i < count; ++i) {
1860 int curr_num_ranks = num_ranks[i];
1861 int * ranks = rank_buffer + offset;
1862 offset += (size_t)curr_num_ranks;
1863 if (core_mask[i]) {
1864 yac_int global_id = ids[i];
1865 for (int j = 0; j < curr_num_ranks; ++j) {
1866 size_t pos = sdispls[3 * ranks[j] + location + 1]++;
1867 id_pos_send_buffer[pos].global_id = global_id;
1868 id_pos_send_buffer[pos].orig_pos = i;
1869 }
1870 }
1871 }
1872 }
1873 free(num_ranks_buffer);
1874 free(rank_buffer);
1875 free(dist_cell_rank_offsets);
1876
1877 MPI_Datatype id_pos_dt = yac_get_id_pos_mpi_datatype(comm);
1878
1879 // exchange cell/vertex/edge information for distributed owners
1881 id_pos_send_buffer, total_sendcounts, total_sdispls,
1882 id_pos_recv_buffer, total_recvcounts, total_rdispls,
1883 sizeof(*id_pos_send_buffer), id_pos_dt, comm);
1884
1885 yac_mpi_call(MPI_Type_free(&id_pos_dt), comm);
1886
1887 size_t dist_owner_counts[3] = {0, 0, 0};
1888 for (int i = 0; i < comm_size; ++i)
1889 for (int location = 0; location < 3; ++location)
1890 dist_owner_counts[location] += recvcounts[3 * i + location];
1891 size_t max_dist_owner_count =
1892 MAX(MAX(dist_owner_counts[0], dist_owner_counts[1]), dist_owner_counts[2]);
1893 struct single_remote_point * temp_buffer =
1894 xcalloc(max_dist_owner_count, sizeof(*temp_buffer));
1895
1896 struct remote_points ** dist_owners = xmalloc(3 * sizeof(*dist_owners));
1897
1898 // unpack data
1899 for (int location = 0; location < 3; ++location) {
1900
1901 size_t count = 0;
1902 for (int i = 0; i < comm_size; ++i) {
1903 size_t curr_recvcount = recvcounts[3 * i + location];
1904 struct id_pos * curr_id_pos =
1905 id_pos_recv_buffer + rdispls[3 * i + location];
1906 for (size_t k = 0; k < curr_recvcount; ++k, ++count) {
1907 temp_buffer[count].global_id = curr_id_pos[k].global_id;
1908 temp_buffer[count].data.orig_pos = curr_id_pos[k].orig_pos;
1909 temp_buffer[count].data.rank = i;
1910 }
1911 }
1912
1913 // sort received global ids
1914 qsort(temp_buffer, count, sizeof(*temp_buffer),
1916
1917 struct remote_point * unique_ids = xmalloc(count * sizeof(*unique_ids));
1918 size_t num_unique_ids = 0;
1919
1920 // determine unique global ids
1921 yac_int prev_id = (count > 0)?temp_buffer[0].global_id - 1:-1;
1922 for (size_t i = 0; i < count; ++i) {
1923
1924 yac_int curr_id = temp_buffer[i].global_id;
1925 if (curr_id != prev_id) {
1926 prev_id = curr_id;
1927 unique_ids[num_unique_ids].global_id = curr_id;
1928 unique_ids[num_unique_ids].data.count = 1;
1929 num_unique_ids++;
1930 } else {
1931 unique_ids[num_unique_ids-1].data.count++;
1932 }
1933 }
1934
1935 size_t remote_point_info_buffer_size = 0;
1936 for (size_t i = 0; i < num_unique_ids; ++i)
1937 if (unique_ids[i].data.count > 1)
1938 remote_point_info_buffer_size +=
1939 (size_t)(unique_ids[i].data.count);
1940
1941 dist_owners[location] =
1942 xmalloc(remote_point_info_buffer_size * sizeof(struct remote_point_info) +
1943 sizeof(**dist_owners));
1944 dist_owners[location]->data =
1945 (unique_ids = xrealloc(unique_ids, num_unique_ids * sizeof(*unique_ids)));
1946 dist_owners[location]->count = num_unique_ids;
1947
1948 for (size_t i = 0, l = 0, offset = 0; i < num_unique_ids; ++i) {
1949 int curr_count = unique_ids[i].data.count;
1950 if (curr_count == 1) {
1951 unique_ids[i].data.data.single = temp_buffer[l].data;
1952 ++l;
1953 } else {
1954 unique_ids[i].data.data.multi = &(dist_owners[location]->buffer[offset]);
1955 for (int k = 0; k < curr_count; ++k, ++l, ++offset) {
1956 dist_owners[location]->buffer[offset] = temp_buffer[l].data;
1957 }
1958 }
1959 }
1960
1961 qsort(
1962 unique_ids, num_unique_ids, sizeof(*unique_ids), compare_remote_point);
1963 }
1964
1965 free(id_pos_buffer);
1966 free(temp_buffer);
1967 free(size_t_buffer);
1968 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1969
1970 return dist_owners;
1971}
1972
1974 yac_int * ids, size_t count, yac_int ** sorted_ids, size_t ** reorder_idx) {
1975
1976 yac_int * sorted_ids_ = xrealloc(*sorted_ids, count * sizeof(*sorted_ids_));
1977 size_t * reorder_idx_ = xrealloc(*reorder_idx, count * sizeof(*reorder_idx_));
1978
1979 memcpy(sorted_ids_, ids, count * sizeof(*sorted_ids_));
1980 for (size_t i = 0; i < count; ++i) reorder_idx_[i] = i;
1981
1982 yac_quicksort_index_yac_int_size_t(sorted_ids_, count, reorder_idx_);
1983
1984 *sorted_ids = sorted_ids_;
1985 *reorder_idx = reorder_idx_;
1986}
1987
1988static Xt_xmap generate_xmap_data(
1989 void * data, size_t count, MPI_Comm comm,
1990 void(*set_sendcounts)(void*,size_t,size_t*),
1991 void(*pack)(void*,size_t,size_t*,int*,int*)) {
1992
1993 int comm_size;
1994 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1995
1996 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1998 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1999
2000 set_sendcounts(data, count, sendcounts);
2001
2003 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2004 size_t num_src_msg = 0, num_dst_msg = 0;
2005 for (int i = 0; i < comm_size; ++i) {
2006 num_src_msg += (recvcounts[i] > 0);
2007 num_dst_msg += (sendcounts[i] > 0);
2008 }
2009
2010 size_t recv_count =
2011 rdispls[comm_size-1] + recvcounts[comm_size-1];
2012
2013 int * pos_buffer =
2014 xmalloc((recv_count + 2 * count) * sizeof(*pos_buffer));
2015 int * src_pos_buffer = pos_buffer;
2016 int * dst_pos_buffer = pos_buffer + recv_count;
2017 int * send_pos_buffer = pos_buffer + recv_count + count;
2018
2019 // pack send buffer
2020 pack(data, count, sdispls, dst_pos_buffer, send_pos_buffer);
2021
2022 // redistribute positions of requested data
2023 yac_alltoallv_int_p2p(
2024 send_pos_buffer, sendcounts, sdispls,
2025 src_pos_buffer, recvcounts, rdispls, comm);
2026
2027 struct Xt_com_pos * com_pos =
2028 xmalloc(((size_t)num_src_msg + (size_t)num_dst_msg) * sizeof(*com_pos));
2029 struct Xt_com_pos * src_com = com_pos;
2030 struct Xt_com_pos * dst_com = com_pos + num_src_msg;
2031
2032 // set transfer_pos pointers and transfer_pos counts in com_pos's
2033 num_src_msg = 0;
2034 num_dst_msg = 0;
2035 for (int i = 0; i < comm_size; ++i) {
2036 if (recvcounts[i] > 0) {
2037 src_com[num_src_msg].transfer_pos = src_pos_buffer;
2038 src_com[num_src_msg].num_transfer_pos = recvcounts[i];
2039 src_com[num_src_msg].rank = i;
2040 src_pos_buffer += recvcounts[i];
2041 ++num_src_msg;
2042 }
2043 if (sendcounts[i] > 0) {
2044 dst_com[num_dst_msg].transfer_pos = dst_pos_buffer;
2045 dst_com[num_dst_msg].num_transfer_pos = sendcounts[i];
2046 dst_com[num_dst_msg].rank = i;
2047 dst_pos_buffer += sendcounts[i];
2048 ++num_dst_msg;
2049 }
2050 }
2051 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2052
2053 Xt_xmap xmap =
2054 xt_xmap_intersection_pos_new(
2055 num_src_msg, src_com, num_dst_msg, dst_com, comm);
2056
2057 free(com_pos);
2058 free(pos_buffer);
2059
2060 return xmap;
2061}
2062
2064 void * data, size_t count, size_t * sendcounts) {
2065
2066 struct remote_point * remote_point_data = (struct remote_point *)data;
2067
2068 for (size_t i = 0; i < count; ++i) {
2069 struct remote_point_info * curr_info =
2070 (remote_point_data[i].data.count > 1)?
2071 (remote_point_data[i].data.data.multi):
2072 (&(remote_point_data[i].data.data.single));
2073 sendcounts[curr_info->rank]++;
2074 }
2075}
2076
2078 void * data, size_t count, size_t * sdispls,
2079 int * dst_pos_buffer, int * send_pos_buffer) {
2080
2081 struct remote_point * remote_point_data = (struct remote_point *)data;
2082
2083 YAC_ASSERT(
2084 count <= INT_MAX,
2085 "ERROR(generate_xmap_pack_remote_points): count exceeds INT_MAX");
2086
2087 for (size_t i = 0; i < count; ++i) {
2088 struct remote_point_info * curr_info =
2089 (remote_point_data[i].data.count > 1)?
2090 (remote_point_data[i].data.data.multi):
2091 (&(remote_point_data[i].data.data.single));
2092 size_t pos = sdispls[curr_info->rank+1]++;
2093 dst_pos_buffer[pos] = i;
2094 send_pos_buffer[pos] = (int)(curr_info->orig_pos);
2095 }
2096}
2097
2099 struct remote_point * remote_point_data, size_t count, MPI_Comm comm) {
2100
2101 return
2102 generate_xmap_data(remote_point_data, count, comm,
2105}
2106
2108 void * data, size_t count, size_t * sendcounts) {
2109
2110 struct remote_point_info * point_infos = (struct remote_point_info *)data;
2111
2112 for (size_t i = 0; i < count; ++i) sendcounts[point_infos[i].rank]++;
2113}
2114
2116 void * data, size_t count, size_t * sdispls,
2117 int * dst_pos_buffer, int * send_pos_buffer) {
2118
2119 struct remote_point_info * point_infos = (struct remote_point_info *)data;
2120
2121 YAC_ASSERT(
2122 count <= INT_MAX,
2123 "ERROR(generate_xmap_pack_remote_point_info): count exceeds INT_MAX");
2124
2125 for (size_t i = 0; i < count; ++i) {
2126 size_t pos = sdispls[point_infos[i].rank+1]++;
2127 dst_pos_buffer[pos] = i;
2128 send_pos_buffer[pos] = (int)(point_infos[i].orig_pos);
2129 }
2130}
2131
2133 struct remote_point_info * point_infos, size_t count, MPI_Comm comm) {
2134
2135 return
2136 generate_xmap_data(point_infos, count, comm,
2139}
2140
2142 const void * a, const void * b) {
2143
2144 return (((const struct single_remote_point_reorder *)a)->data.global_id >
2145 ((const struct single_remote_point_reorder *)b)->data.global_id) -
2146 (((const struct single_remote_point_reorder *)a)->data.global_id <
2147 ((const struct single_remote_point_reorder *)b)->data.global_id);
2148}
2149
2151 const void * a, const void * b) {
2152
2153 return (((const struct single_remote_point_reorder *)a)->reorder_idx >
2154 ((const struct single_remote_point_reorder *)b)->reorder_idx) -
2155 (((const struct single_remote_point_reorder *)a)->reorder_idx <
2156 ((const struct single_remote_point_reorder *)b)->reorder_idx);
2157}
2158
2160 size_t max_num_cell_ve, size_t num_cells, int * num_ve_per_cell,
2161 struct single_remote_point_reorder * cell_ve_point_data,
2162 yac_int ** ids, size_t * num_ids, size_t ** cell_to_ve, Xt_xmap * xmap,
2163 MPI_Comm comm) {
2164
2165 size_t num_total_ve = num_cells * max_num_cell_ve;
2166
2167 size_t total_num_cell_ve = 0;
2168 for (size_t i = 0, k = 0, l = 0; i < num_cells; ++i) {
2169 size_t num_vertices = (size_t)(num_ve_per_cell[i]);
2170 total_num_cell_ve += num_vertices;
2171 size_t j = 0;
2172 for (; j < num_vertices; ++j, ++k, ++l) {
2173 cell_ve_point_data[k].reorder_idx = l;
2174 }
2175 for (; j < max_num_cell_ve; ++j, ++k) {
2176 cell_ve_point_data[k].data.global_id = XT_INT_MAX;
2177 cell_ve_point_data[k].reorder_idx = SIZE_MAX;
2178 }
2179 }
2180 qsort(cell_ve_point_data, num_total_ve, sizeof(*cell_ve_point_data),
2182 yac_int * ids_ = xmalloc(total_num_cell_ve * sizeof(*ids_));
2183 size_t num_ids_ = 0;
2184 size_t * cell_to_ve_ =
2185 xmalloc(total_num_cell_ve * sizeof(*cell_to_ve_));
2186 struct remote_point_info * ve_point_info =
2187 xmalloc(total_num_cell_ve * sizeof(*ve_point_info));
2188 yac_int prev_global_id = XT_INT_MAX;
2189 for (size_t i = 0; i < num_total_ve; ++i) {
2190 yac_int curr_global_id = cell_ve_point_data[i].data.global_id;
2191 size_t curr_reorder_idx = cell_ve_point_data[i].reorder_idx;
2192 if (curr_global_id == XT_INT_MAX) break;
2193 if (curr_global_id != prev_global_id) {
2194 ids_[num_ids_] = (prev_global_id = curr_global_id);
2195 ve_point_info[num_ids_] = cell_ve_point_data[i].data.data;
2196 num_ids_++;
2197 }
2198 cell_to_ve_[curr_reorder_idx] = num_ids_ - 1;
2199 }
2200
2201 *ids = xrealloc(ids_, num_ids_ * sizeof(*ids_));
2202 *num_ids = num_ids_;
2203 *cell_to_ve = cell_to_ve_;
2204 *xmap = generate_xmap_from_remote_point_info(ve_point_info, num_ids_, comm);
2205 free(ve_point_info);
2206}
2207
2208static MPI_Datatype yac_get_coordinate_mpi_datatype(MPI_Comm comm) {
2209
2210 MPI_Datatype coord_dt;
2211 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &coord_dt), comm);
2212 yac_mpi_call(MPI_Type_commit(&coord_dt), comm);
2213 return coord_dt;
2214}
2215
2217 struct yac_field_data * orig_field_data, size_t dist_size,
2218 Xt_redist redist_mask, Xt_redist redist_coords, MPI_Comm comm) {
2219
2220 struct yac_field_data * dist_field_data = yac_field_data_empty_new();
2221
2222 uint64_t counts[2], max_counts[2];
2223 if (orig_field_data != NULL) {
2224 counts[0] = yac_field_data_get_masks_count(orig_field_data);
2225 counts[1] = yac_field_data_get_coordinates_count(orig_field_data);
2226 } else {
2227 counts[0] = 0;
2228 counts[1] = 0;
2229 }
2231 MPI_Allreduce(
2232 counts, max_counts, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
2233 YAC_ASSERT(
2234 (orig_field_data == NULL) ||
2235 ((counts[0] == max_counts[0]) && (counts[1] == max_counts[1])),
2236 "ERROR(field_data_init): inconsistent number of masks or coordinates")
2237
2238 int * data_available_flag =
2239 xcalloc(2 * max_counts[0] + max_counts[1], sizeof(*data_available_flag));
2240
2241 for (size_t i = 0; i < counts[0]; ++i) {
2242 data_available_flag[i] =
2243 yac_field_data_get_mask_data(orig_field_data, i) != NULL;
2244 data_available_flag[i + counts[0]] =
2245 (yac_field_data_get_mask_name(orig_field_data, i) != NULL)?
2246 ((int)strlen(yac_field_data_get_mask_name(orig_field_data, i))+1):0;
2247 }
2248 for (size_t i = 0; i < counts[1]; ++i)
2249 data_available_flag[i + 2 * counts[0]] =
2250 yac_field_data_get_coordinates_data(orig_field_data, i) != NULL;
2251
2253 MPI_Allreduce(
2254 MPI_IN_PLACE, data_available_flag,
2255 (int)(2 * max_counts[0] + max_counts[1]), MPI_INT, MPI_MAX, comm), comm);
2256
2257 for (size_t i = 0; i < counts[0]; ++i) {
2258 YAC_ASSERT(
2259 data_available_flag[i] ==
2260 (yac_field_data_get_mask_data(orig_field_data, i) != NULL),
2261 "ERROR(field_data_init): inconsistent availability of masks")
2262 int mask_name_len =
2263 (yac_field_data_get_mask_name(orig_field_data, i) != NULL)?
2264 ((int)strlen(yac_field_data_get_mask_name(orig_field_data, i))+1):0;
2265 YAC_ASSERT(
2266 data_available_flag[i + counts[0]] ==
2267 mask_name_len,
2268 "ERROR(field_data_init): inconsistent mask names")
2269 }
2270
2271 for (size_t i = 0; i < counts[1]; ++i)
2272 YAC_ASSERT(
2273 data_available_flag[i + 2 * counts[0]] ==
2274 (yac_field_data_get_coordinates_data(orig_field_data, i) != NULL),
2275 "ERROR(field_data_init): inconsistent availability of coordinates")
2276
2277 for (uint64_t i = 0; i < max_counts[0]; ++i) {
2278 int * dist_mask = NULL;
2279 if (data_available_flag[i]) {
2280 dist_mask = xmalloc(dist_size * sizeof(*dist_mask));
2281 int const * orig_mask =
2282 (orig_field_data != NULL)?
2283 yac_field_data_get_mask_data(orig_field_data, i):NULL;
2284 xt_redist_s_exchange1(redist_mask, orig_mask, dist_mask);
2285 }
2286
2287 int mask_name_len = data_available_flag[i + max_counts[0]];
2288 char * mask_name = NULL;
2289 if (mask_name_len > 0) {
2290 mask_name = xmalloc((size_t)mask_name_len * sizeof(*mask_name));
2291 if ((orig_field_data != NULL) &&
2292 (yac_field_data_get_mask_name(orig_field_data, i) != NULL))
2293 memcpy(
2294 mask_name, yac_field_data_get_mask_name(orig_field_data, i),
2295 (size_t)mask_name_len);
2296 else
2297 memset(mask_name, 0, (size_t)mask_name_len * sizeof(*mask_name));
2299 MPI_Allreduce(
2300 MPI_IN_PLACE, mask_name, mask_name_len, MPI_CHAR, MPI_MAX, comm),
2301 comm);
2302 YAC_ASSERT(
2303 (orig_field_data == NULL) ||
2304 (yac_field_data_get_mask_name(orig_field_data, i) == NULL) ||
2305 !memcmp(
2306 yac_field_data_get_mask_name(orig_field_data, i), mask_name,
2307 (size_t)mask_name_len * sizeof(*mask_name)),
2308 "ERROR(field_data_init): inconsistent mask names")
2309 }
2310 yac_field_data_add_mask_nocpy(dist_field_data, dist_mask, mask_name);
2311 }
2312
2313 for (uint64_t i = 0; i < max_counts[1]; ++i) {
2314 yac_coordinate_pointer dist_coordinates = NULL;
2315 if (data_available_flag[i + 2 * max_counts[0]]) {
2316 dist_coordinates = xmalloc(dist_size * sizeof(*dist_coordinates));
2317 yac_const_coordinate_pointer orig_coordinates =
2318 (orig_field_data != NULL)?
2319 yac_field_data_get_coordinates_data(orig_field_data, i):NULL;
2320 xt_redist_s_exchange1(redist_coords, orig_coordinates, dist_coordinates);
2321 }
2322 yac_field_data_add_coordinates_nocpy(dist_field_data, dist_coordinates);
2323 }
2324
2325 free(data_available_flag);
2326
2327 return dist_field_data;
2328}
2329
2331 struct proc_sphere_part_node * proc_sphere_part,
2332 struct yac_basic_grid * grid, MPI_Comm comm) {
2333
2334 struct yac_basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
2335
2336 int comm_rank;
2337 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2338
2339 int max_num_vertices_per_cell = 0;
2340 { // compute the global maximum number of vertices per cell
2341 for (size_t i = 0; i < grid_data->num_cells; ++i)
2342 if (grid_data->num_vertices_per_cell[i] > max_num_vertices_per_cell)
2343 max_num_vertices_per_cell = grid_data->num_vertices_per_cell[i];
2344 yac_mpi_call(MPI_Allreduce(MPI_IN_PLACE, &max_num_vertices_per_cell, 1,
2345 MPI_INT, MPI_MAX, comm), comm);
2346 }
2347
2348 struct remote_points ** dist_owner =
2349 generate_dist_remote_points(proc_sphere_part, grid_data, comm);
2350
2351 Xt_xmap xmap_cell_orig_to_dist =
2353 dist_owner[YAC_LOC_CELL]->data, dist_owner[YAC_LOC_CELL]->count, comm);
2354
2355 MPI_Datatype dt_single_remote_point =
2357 max_num_vertices_per_cell, comm);
2358 MPI_Datatype dt_coord = yac_get_coordinate_mpi_datatype(comm);
2359
2360 Xt_redist redist_cell_yac_int,
2361 redist_cell_int,
2362 redist_cell_corner_point_data,
2363 redist_cell_coords;
2364 redist_cell_yac_int = xt_redist_p2p_new(xmap_cell_orig_to_dist, yac_int_dt);
2365 redist_cell_int = xt_redist_p2p_new(xmap_cell_orig_to_dist, MPI_INT);
2366 redist_cell_corner_point_data =
2367 xt_redist_p2p_new(xmap_cell_orig_to_dist, dt_single_remote_point);
2368 yac_mpi_call(MPI_Type_free(&dt_single_remote_point), comm);
2369 redist_cell_coords = xt_redist_p2p_new(xmap_cell_orig_to_dist, dt_coord);
2370
2371 xt_xmap_delete(xmap_cell_orig_to_dist);
2372
2373 size_t num_cells = dist_owner[YAC_LOC_CELL]->count;
2374 size_t max_num_cell_corners =
2375 (size_t)max_num_vertices_per_cell * num_cells;
2376 size_t max_num_local_cell_corners =
2377 (size_t)max_num_vertices_per_cell * grid_data->num_cells;
2378 yac_int * cell_ids = xmalloc(num_cells * sizeof(*cell_ids));
2379 int * num_vertices_per_cell =
2380 xmalloc(num_cells * sizeof(*num_vertices_per_cell));
2382 cell_corner_point_data =
2383 xmalloc(2 * (max_num_cell_corners + max_num_local_cell_corners) *
2384 sizeof(*cell_corner_point_data));
2385 struct single_remote_point_reorder * cell_edge_point_data =
2386 cell_corner_point_data + max_num_cell_corners;
2387 struct single_remote_point_reorder * local_cell_corner_point_data =
2388 cell_corner_point_data + 2 * max_num_cell_corners;
2389 struct single_remote_point_reorder * local_cell_edge_point_data =
2390 cell_corner_point_data + 2 * max_num_cell_corners +
2391 max_num_local_cell_corners;
2392 for (size_t i = 0; i < grid_data->num_cells; ++i){
2393 int num_corners = (size_t)(grid_data->num_vertices_per_cell[i]);
2394 size_t * corners = grid_data->cell_to_vertex + grid_data->cell_to_vertex_offsets[i];
2395 size_t * edges = grid_data->cell_to_edge + grid_data->cell_to_edge_offsets[i];
2396 struct single_remote_point_reorder * curr_local_cell_corner_point_data =
2397 local_cell_corner_point_data + i * (size_t)max_num_vertices_per_cell;
2398 struct single_remote_point_reorder * curr_local_cell_edge_point_data =
2399 local_cell_edge_point_data + i * (size_t)max_num_vertices_per_cell;
2400 for (int j = 0; j < num_corners; ++j) {
2401 curr_local_cell_corner_point_data[j].data.global_id = grid_data->vertex_ids[corners[j]];
2402 curr_local_cell_corner_point_data[j].data.data.rank = comm_rank;
2403 curr_local_cell_corner_point_data[j].data.data.orig_pos = corners[j];
2404 curr_local_cell_edge_point_data[j].data.global_id = grid_data->edge_ids[edges[j]];
2405 curr_local_cell_edge_point_data[j].data.data.rank = comm_rank;
2406 curr_local_cell_edge_point_data[j].data.data.orig_pos = edges[j];
2407 }
2408 }
2409
2410 xt_redist_s_exchange1(redist_cell_yac_int, grid_data->cell_ids, cell_ids);
2411 xt_redist_s_exchange1(
2412 redist_cell_int, grid_data->num_vertices_per_cell, num_vertices_per_cell);
2413 xt_redist_s_exchange1(
2414 redist_cell_corner_point_data,
2415 local_cell_corner_point_data, cell_corner_point_data);
2416 xt_redist_s_exchange1(
2417 redist_cell_corner_point_data,
2418 local_cell_edge_point_data, cell_edge_point_data);
2419
2420 struct yac_field_data * cell_field_data =
2423 redist_cell_int, redist_cell_coords, comm);
2424
2425 xt_redist_delete(redist_cell_coords);
2426 xt_redist_delete(redist_cell_corner_point_data);
2427 xt_redist_delete(redist_cell_yac_int);
2428 xt_redist_delete(redist_cell_int);
2429
2430 yac_int * vertex_ids;
2431 size_t num_vertices;
2432 size_t * cell_to_vertex;
2433 size_t * cell_to_vertex_offsets =
2434 xmalloc(num_cells * sizeof(*cell_to_vertex_offsets));
2435 Xt_xmap xmap_vertex_orig_to_dist;
2436 for (size_t i = 0, accu = 0; i < num_cells; ++i) {
2437 cell_to_vertex_offsets[i] = accu;
2438 accu += (size_t)(num_vertices_per_cell[i]);
2439 }
2441 max_num_vertices_per_cell, num_cells, num_vertices_per_cell,
2442 cell_corner_point_data, &vertex_ids, &num_vertices,
2443 &cell_to_vertex, &xmap_vertex_orig_to_dist, comm);
2444
2445 yac_int * edge_ids;
2446 size_t num_edges;
2447 size_t * cell_to_edge;
2448 size_t * cell_to_edge_offsets = cell_to_vertex_offsets;
2449 Xt_xmap xmap_edge_orig_to_dist;
2451 max_num_vertices_per_cell, num_cells, num_vertices_per_cell,
2452 cell_edge_point_data, &edge_ids, &num_edges,
2453 &cell_to_edge, &xmap_edge_orig_to_dist, comm);
2454
2455 free(cell_corner_point_data);
2456
2457 // generate redists for redistribution of vertex data
2458 Xt_redist redist_vertex_coords =
2459 xt_redist_p2p_new(xmap_vertex_orig_to_dist, dt_coord);
2460 Xt_redist redist_vertex_int =
2461 xt_redist_p2p_new(xmap_vertex_orig_to_dist, MPI_INT);
2462 xt_xmap_delete(xmap_vertex_orig_to_dist);
2463
2464 // get vertex coordinates
2465 yac_coordinate_pointer vertex_coordinates =
2466 xmalloc(num_vertices * sizeof(*vertex_coordinates));
2467 xt_redist_s_exchange1(
2468 redist_vertex_coords, grid_data->vertex_coordinates, vertex_coordinates);
2469
2470 struct yac_field_data * vertex_field_data =
2472 yac_basic_grid_get_field_data(grid, YAC_LOC_CORNER), num_vertices,
2473 redist_vertex_int, redist_vertex_coords, comm);
2474
2475 xt_redist_delete(redist_vertex_int);
2476 xt_redist_delete(redist_vertex_coords);
2477
2478 Xt_redist redist_edge_int,
2479 redist_edge_coords,
2480 redist_edge_2yac_int;
2481 { // generate redists for redistribution of edge data
2482
2483 MPI_Datatype dt_2yac_int;
2484 yac_mpi_call(MPI_Type_contiguous(2, yac_int_dt, &dt_2yac_int), comm);
2485 yac_mpi_call(MPI_Type_commit(&dt_2yac_int), comm);
2486
2487 redist_edge_int = xt_redist_p2p_new(xmap_edge_orig_to_dist, MPI_INT);
2488 redist_edge_coords = xt_redist_p2p_new(xmap_edge_orig_to_dist, dt_coord);
2489 redist_edge_2yac_int = xt_redist_p2p_new(xmap_edge_orig_to_dist, dt_2yac_int);
2490
2491 yac_mpi_call(MPI_Type_free(&dt_2yac_int), comm);
2492
2493 }
2494 xt_xmap_delete(xmap_edge_orig_to_dist);
2495
2496 enum yac_edge_type * edge_type = xmalloc(num_edges * sizeof(*edge_type));
2497
2498 { // get edge types
2499 int * temp_edge_type_src, * temp_edge_type_dst;
2500 if (sizeof(*edge_type) == sizeof(int)) {
2501 temp_edge_type_src = (int*)(grid_data->edge_type);
2502 temp_edge_type_dst = (int*)edge_type;
2503 } else {
2504 temp_edge_type_src =
2505 xmalloc(grid_data->num_edges * sizeof(*temp_edge_type_src));
2506 for (size_t i = 0; i < grid_data->num_edges; ++i)
2507 temp_edge_type_src[i] = (int)(grid_data->edge_type[i]);
2508 temp_edge_type_dst = xmalloc(num_edges * sizeof(*temp_edge_type_dst));
2509 for (size_t i = 0; i < num_edges; ++i)
2510 temp_edge_type_dst[i] = (int)(edge_type[i]);
2511 }
2512
2513 xt_redist_s_exchange1(
2514 redist_edge_int, temp_edge_type_src, temp_edge_type_dst);
2515
2516 if (sizeof(*edge_type) != sizeof(int)) {
2517
2518 for (size_t i = 0; i < num_edges; ++i)
2519 edge_type[i] = (enum yac_edge_type)(temp_edge_type_dst[i]);
2520
2521 free(temp_edge_type_src);
2522 free(temp_edge_type_dst);
2523 }
2524 }
2525
2526 struct yac_field_data * edge_field_data =
2529 redist_edge_int, redist_edge_coords, comm);
2530
2531 xt_redist_delete(redist_edge_int);
2532 xt_redist_delete(redist_edge_coords);
2533
2534 struct bounding_circle * cell_bnd_circles =
2535 xmalloc(num_cells * sizeof(*cell_bnd_circles));
2536 {
2537 struct yac_grid_cell cell;
2538 cell.coordinates_xyz = xmalloc((size_t)max_num_vertices_per_cell *
2539 sizeof(*(cell.coordinates_xyz)));
2540 cell.edge_type = xmalloc((size_t)max_num_vertices_per_cell *
2541 sizeof(*(cell.edge_type)));
2542
2543 for (size_t i = 0; i < num_cells; ++i) {
2544 size_t * curr_cell_to_vertex =
2545 cell_to_vertex + cell_to_vertex_offsets[i];
2546 size_t * curr_cell_to_edge =
2547 cell_to_edge + cell_to_edge_offsets[i];
2548 for (int j = 0; j < num_vertices_per_cell[i]; ++j) {
2549 yac_coordinate_pointer curr_vertex_coords =
2550 vertex_coordinates + curr_cell_to_vertex[j];
2551 for (int k = 0; k < 3; ++k)
2552 cell.coordinates_xyz[j][k] = (*curr_vertex_coords)[k];
2553 cell.edge_type[j] = edge_type[curr_cell_to_edge[j]];
2554 }
2555 cell.num_corners = num_vertices_per_cell[i];
2556 if (cell.num_corners > 0)
2557 yac_get_cell_bounding_circle(cell, cell_bnd_circles + i);
2558 else
2559 cell_bnd_circles[i] =
2560 (struct bounding_circle) {
2561 .base_vector = {1.0, 0.0, 0.0},
2562 .inc_angle = SIN_COS_ZERO,
2563 .sq_crd = DBL_MAX};
2564 }
2565 free(cell.edge_type);
2566 free(cell.coordinates_xyz);
2567 }
2568
2569 yac_size_t_2_pointer edge_to_vertex =
2570 xmalloc(num_edges * sizeof(*edge_to_vertex));
2571
2572 // get edge to vertex
2573 {
2574 yac_int * vertex_id_buffer =
2575 xmalloc(
2576 2 * (grid_data->num_edges + num_edges) * sizeof(*vertex_id_buffer));
2577 yac_int * grid_edge_vertex_ids = vertex_id_buffer;
2578 yac_int * edge_vertex_ids = vertex_id_buffer + 2 * grid_data->num_edges;
2579
2580 size_t * grid_edge_to_vertex = &(grid_data->edge_to_vertex[0][0]);
2581
2582 for (size_t i = 0; i < 2 * grid_data->num_edges; ++i)
2583 grid_edge_vertex_ids[i] =
2584 grid_data->vertex_ids[grid_edge_to_vertex[i]];
2585
2586 xt_redist_s_exchange1(
2587 redist_edge_2yac_int, grid_edge_vertex_ids, edge_vertex_ids);
2588
2589 id2idx(edge_vertex_ids, &(edge_to_vertex[0][0]), 2 * num_edges,
2590 vertex_ids, num_vertices);
2591
2592 free(vertex_id_buffer);
2593 }
2594
2595 xt_redist_delete(redist_edge_2yac_int);
2596
2597 yac_mpi_call(MPI_Type_free(&dt_coord), comm);
2598
2599
2600 struct yac_dist_grid dist_grid =
2601 {.comm = comm,
2602 .vertex_coordinates = vertex_coordinates,
2603 .ids = {cell_ids, vertex_ids, edge_ids},
2604 .total_count = {num_cells, num_vertices, num_edges},
2605 .count = {num_cells, num_vertices, num_edges},
2606 .num_vertices_per_cell = num_vertices_per_cell,
2607 .cell_to_vertex = cell_to_vertex,
2608 .cell_to_vertex_offsets = cell_to_vertex_offsets,
2609 .cell_to_edge = cell_to_edge,
2610 .cell_to_edge_offsets = cell_to_edge_offsets,
2611 .edge_to_vertex = edge_to_vertex,
2612 .cell_bnd_circles = cell_bnd_circles,
2613 .edge_type = edge_type,
2614 .sorted_ids = {NULL, NULL, NULL},
2615 .sorted_reorder_idx = {NULL, NULL, NULL}};
2617 cell_ids, num_cells, &(dist_grid.sorted_ids[YAC_LOC_CELL]),
2618 &(dist_grid.sorted_reorder_idx[YAC_LOC_CELL]));
2620 vertex_ids, num_vertices, &(dist_grid.sorted_ids[YAC_LOC_CORNER]),
2621 &(dist_grid.sorted_reorder_idx[YAC_LOC_CORNER]));
2623 edge_ids, num_edges, &(dist_grid.sorted_ids[YAC_LOC_EDGE]),
2624 &(dist_grid.sorted_reorder_idx[YAC_LOC_EDGE]));
2625 dist_grid.field_data =
2627 cell_field_data, vertex_field_data, edge_field_data);
2628
2630 &dist_grid, proc_sphere_part, dist_owner);
2631
2632 generate_core_masks(&dist_grid, proc_sphere_part, comm_rank);
2633
2634 for (int i = 0; i < 3; ++i) {
2635 free(dist_owner[i]->data);
2636 free(dist_owner[i]);
2637 }
2638 free(dist_owner);
2639
2640 return dist_grid;
2641}
2642
2643static void setup_search_data(struct yac_dist_grid_pair * dist_grid_pair) {
2644
2645 // build sphere part for vertices
2646 for (int i = 0; i < 2; ++i)
2647 dist_grid_pair->vertex_sphere_part[i] =
2649 dist_grid_pair->dist_grid[i].count[YAC_LOC_CORNER],
2651 (dist_grid_pair->dist_grid[i].vertex_coordinates),
2652 dist_grid_pair->dist_grid[i].ids[YAC_LOC_CORNER]);
2653
2654 // build sphere part for bounding circle of cells
2655 for (int i = 0; i < 2; ++i) {
2656 dist_grid_pair->cell_sphere_part[i] =
2658 dist_grid_pair->dist_grid[i].cell_bnd_circles,
2659 dist_grid_pair->dist_grid[i].count[YAC_LOC_CELL]);
2660 }
2661}
2662
2663static void get_grid_cells(
2664 struct yac_basic_grid * grid, struct dist_cell * cells) {
2665
2666 struct yac_basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
2667
2668 size_t num_cells = grid_data->num_cells;
2669 for (size_t i = 0; i < num_cells; ++i) {
2670
2671 double * cell_coord = cells[i].coord;
2672 for (int j = 0; j < 3; ++j) cell_coord[j] = 0.0;
2673
2674 if (grid_data->num_vertices_per_cell[i] == 0) continue;
2675
2676 size_t * curr_cell_to_vertex =
2677 grid_data->cell_to_vertex + grid_data->cell_to_vertex_offsets[i];
2678 // compute the middle point of the cell
2679 for (int j = 0; j < grid_data->num_vertices_per_cell[i]; ++j) {
2680 yac_coordinate_pointer curr_vertex_coords =
2681 grid_data->vertex_coordinates + curr_cell_to_vertex[j];
2682 for (int k = 0; k < 3; ++k)
2683 cell_coord[k] += (*curr_vertex_coords)[k];
2684 }
2685 normalise_vector(cell_coord);
2686 }
2687}
2688
2689static struct proc_sphere_part_node *
2691 struct yac_basic_grid * grid_a, struct yac_basic_grid * grid_b,
2692 MPI_Comm comm) {
2693
2694 size_t num_cells_a = yac_basic_grid_get_data(grid_a)->num_cells;
2695 size_t num_cells_b = yac_basic_grid_get_data(grid_b)->num_cells;
2696
2697 // gather cell centers from both grids into single array
2698 size_t num_cells = num_cells_a + num_cells_b;
2699 struct dist_cell * cells = xmalloc(num_cells * sizeof(*cells));
2700 get_grid_cells(grid_a, cells);
2701 get_grid_cells(grid_b, cells + num_cells_a);
2702
2703 // redistribute all cell centers and build parallel sphere
2704 // part for vertices
2705 struct proc_sphere_part_node * proc_sphere_part =
2706 yac_redistribute_cells(&cells, &num_cells, comm);
2707
2708 free(cells);
2709
2710 return proc_sphere_part;
2711}
2712
2714 struct yac_basic_grid * grid_a, struct yac_basic_grid * grid_b,
2715 MPI_Comm comm) {
2716
2717 YAC_ASSERT(
2718 grid_a,
2719 "ERROR(yac_dist_grid_pair_new): "
2720 "NULL is not a valid value for parameter grid_a")
2721 YAC_ASSERT(
2722 grid_b,
2723 "ERROR(yac_dist_grid_pair_new): "
2724 "NULL is not a valid value for parameter grid_b")
2725
2726 YAC_ASSERT(
2727 strcmp(yac_basic_grid_get_name(grid_a),
2728 yac_basic_grid_get_name(grid_b)),
2729 "ERROR(yac_dist_grid_pair_new): identical grid names")
2730
2731 MPI_Comm comm_copy;
2732 yac_mpi_call(MPI_Comm_dup(comm, &comm_copy), comm);
2733 comm = comm_copy;
2734
2735 // ensure same grid ordering on all processes
2736 if (strcmp(yac_basic_grid_get_name(grid_a),
2737 yac_basic_grid_get_name(grid_b)) > 0) {
2738 struct yac_basic_grid * grid_swap = grid_a;
2739 grid_a = grid_b;
2740 grid_b = grid_swap;
2741 }
2742
2743 struct yac_dist_grid_pair * grid_pair = xmalloc(1 * sizeof(*grid_pair));
2744
2745 grid_pair->grid_names[0] = strdup(yac_basic_grid_get_name(grid_a));
2746 grid_pair->grid_names[1] = strdup(yac_basic_grid_get_name(grid_b));
2747 grid_pair->comm = comm;
2748
2749 // generate a decomposition for the distributed grid pair
2750 // with the following properties:
2751 // * each process covers a unique area on the sphere
2752 // * number of cells (from grid_a and/or grid_b) per area
2753 // is roughly the same
2754 grid_pair->proc_sphere_part =
2755 generate_dist_grid_decomposition(grid_a, grid_b, comm);
2756
2757 // redistribute grid_a and grid_b according to decomposition
2758 grid_pair->dist_grid[0] =
2759 generate_dist_grid(grid_pair->proc_sphere_part, grid_a, comm);
2760 grid_pair->dist_grid[1] =
2761 generate_dist_grid(grid_pair->proc_sphere_part, grid_b, comm);
2762
2763 // build search data structures for local cells and vertices in
2764 // distributed grid
2765 setup_search_data(grid_pair);
2766
2767 return grid_pair;
2768}
2769
2771 struct yac_basic_grid * grid_a, struct yac_basic_grid * grid_b,
2772 MPI_Fint comm) {
2773
2774 return
2775 yac_dist_grid_pair_new(grid_a, grid_b, MPI_Comm_f2c(comm));
2776}
2777
2779 struct yac_dist_grid_pair * grid_pair) {
2780 return grid_pair->comm;
2781}
2782
2784 struct yac_dist_grid_pair * grid_pair, char const * grid_name) {
2785
2786 struct yac_dist_grid * dist_grid = NULL;
2787 for (int i = 0; (i < 2) && (dist_grid == NULL); ++i)
2788 if (!strcmp(grid_name, grid_pair->grid_names[i]))
2789 dist_grid = &(grid_pair->dist_grid[i]);
2790 YAC_ASSERT(
2791 dist_grid, "ERROR(yac_dist_grid_pair_get_dist_grid): invalid grid_name")
2792 return dist_grid;
2793}
2794
2796 struct yac_dist_grid * dist_grid) {
2797
2798 return (struct yac_const_basic_grid_data *)dist_grid;
2799}
2800
2802 struct yac_dist_grid * dist_grid, enum yac_location location) {
2803
2804 YAC_ASSERT(
2805 (location == YAC_LOC_CELL) ||
2806 (location == YAC_LOC_CORNER) ||
2807 (location == YAC_LOC_EDGE),
2808 "ERROR(yac_dist_grid_get_local_count): invalid location")
2809 size_t local_count = 0;
2810 size_t count = dist_grid->count[location];
2811 int * core_mask = dist_grid->core_mask[location];
2812 for (size_t i = 0; i < count; ++i) if (core_mask[i]) ++local_count;
2813 return local_count;
2814}
2815
2817 struct yac_dist_grid * dist_grid, enum yac_location location) {
2818
2819 YAC_ASSERT(
2820 (location == YAC_LOC_CELL) ||
2821 (location == YAC_LOC_CORNER) ||
2822 (location == YAC_LOC_EDGE),
2823 "ERROR(yac_dist_grid_get_count): invalid location")
2824 size_t * total_count = dist_grid->total_count;
2825 return total_count[location];
2826}
2827
2829 struct yac_dist_grid * dist_grid, enum yac_location location) {
2830
2831 YAC_ASSERT(
2832 (location == YAC_LOC_CELL) ||
2833 (location == YAC_LOC_CORNER) ||
2834 (location == YAC_LOC_EDGE),
2835 "ERROR(yac_dist_grid_get_core_mask): invalid location")
2836 int ** core_mask = dist_grid->core_mask;
2837 return core_mask[location];
2838}
2839
2841 struct yac_dist_grid * dist_grid, enum yac_location location) {
2842
2843 YAC_ASSERT(
2844 (location == YAC_LOC_CELL) ||
2845 (location == YAC_LOC_CORNER) ||
2846 (location == YAC_LOC_EDGE),
2847 "ERROR(yac_dist_grid_get_global_ids): invalid location")
2848 yac_int ** ids = dist_grid->ids;
2849 return ids[location];
2850}
2851
2853 struct yac_dist_grid * dist_grid, struct yac_interp_field field,
2854 size_t ** indices, size_t * count) {
2855
2856 int const * mask = yac_dist_grid_get_field_mask(dist_grid, field);
2857
2858 size_t total_count = yac_dist_grid_get_count(dist_grid, field.location);
2859 int const * core_mask =
2860 yac_dist_grid_get_core_mask(dist_grid, field.location);
2861
2862 size_t * temp_indices = xmalloc(total_count * sizeof(*temp_indices));
2863
2864 size_t count_ = 0;
2865
2866 if (mask != NULL) {
2867 for (size_t i = 0; i < total_count; ++i)
2868 if (core_mask[i] && mask[i]) temp_indices[count_++] = i;
2869 } else {
2870 for (size_t i = 0; i < total_count; ++i)
2871 if (core_mask[i]) temp_indices[count_++] = i;
2872 }
2873
2874 *indices = xrealloc(temp_indices, count_ * sizeof(**indices));
2875 *count = count_;
2876}
2877
2879 struct yac_dist_grid * dist_grid, enum yac_location location) {
2880
2881 return
2882 yac_field_data_set_get_field_data(dist_grid->field_data, location);
2883}
2884
2886 struct yac_dist_grid * dist_grid, struct yac_interp_field field) {
2887
2888 if (field.masks_idx == SIZE_MAX) return NULL;
2889
2890 return
2891 (field.masks_idx != SIZE_MAX)?
2893 yac_dist_grid_get_field_data(dist_grid, field.location),
2894 field.masks_idx):NULL;
2895}
2896
2898 struct yac_dist_grid * dist_grid, struct yac_interp_field field) {
2899
2901 (field.coordinates_idx != SIZE_MAX)?
2903 yac_dist_grid_get_field_data(dist_grid, field.location),
2904 field.coordinates_idx):NULL;
2905
2906 // if no field coordinates are defined, but the location is at the corners of
2907 // of the grid cells, return coordinates of them
2908 return
2909 ((coords != NULL) || (field.location != YAC_LOC_CORNER))?
2910 coords:((yac_const_coordinate_pointer)(dist_grid->vertex_coordinates));
2911}
2912
2914 struct yac_dist_grid * dist_grid, struct yac_interp_field field) {
2915
2916 size_t total_count = yac_dist_grid_get_count(dist_grid, field.location);
2917
2918 int const * mask = yac_dist_grid_get_field_mask(dist_grid, field);
2919 if (mask == NULL)
2920 return yac_dist_grid_get_local_count(dist_grid, field.location);
2921
2922 int const * core_mask =
2923 yac_dist_grid_get_core_mask(dist_grid, field.location);
2924
2925 size_t unmasked_local_count = 0;
2926 for (size_t i = 0; i < total_count; ++i)
2927 if (core_mask[i] && mask[i]) ++unmasked_local_count;
2928 return unmasked_local_count;
2929}
2930
2932 struct remote_point_infos * point_infos, size_t count) {
2933
2934 for (size_t i = 0; i < count; ++i)
2935 if (point_infos[i].count > 1) free(point_infos[i].data.multi);
2936 free(point_infos);
2937}
2938
2939static void yac_dist_grid_free(struct yac_dist_grid grid) {
2940
2941 free(grid.vertex_coordinates);
2942 free(grid.num_vertices_per_cell);
2943 free(grid.cell_to_vertex);
2944 free(grid.cell_to_vertex_offsets);
2945 free(grid.cell_to_edge);
2946 free(grid.cell_bnd_circles);
2947 free(grid.edge_type);
2948 free(grid.edge_to_vertex);
2949 for (int i = 0; i < 3; ++i) {
2950 free(grid.ids[i]);
2951 free(grid.core_mask[i]);
2952 free(grid.sorted_ids[i]);
2953 free(grid.sorted_reorder_idx[i]);
2955 }
2957}
2958
2960
2961 if (grid_pair == NULL) return;
2962 free(grid_pair->grid_names[0]);
2963 free(grid_pair->grid_names[1]);
2964 yac_mpi_call(MPI_Comm_free(&(grid_pair->comm)), MPI_COMM_WORLD);
2966 for (int i = 0; i < 2; ++i) {
2967 yac_dist_grid_free(grid_pair->dist_grid[i]);
2970 }
2971 free(grid_pair);
2972}
2973
2974static int coord_in_cell(double coord[3], struct yac_dist_grid * dist_grid,
2975 size_t cell_idx, struct yac_grid_cell * buffer_cell) {
2976
2978 (struct yac_const_basic_grid_data *)dist_grid, cell_idx, buffer_cell);
2979
2980 return
2982 coord, *buffer_cell, dist_grid->cell_bnd_circles[cell_idx]);
2983}
2984
2985static int coord_in_cell_gc(double coord[3], struct yac_dist_grid * dist_grid,
2986 size_t cell_idx, struct yac_grid_cell * buffer_cell) {
2987
2989 (struct yac_const_basic_grid_data *)dist_grid, cell_idx, buffer_cell);
2990 for (size_t i = 0; i < buffer_cell->num_corners; ++i)
2991 buffer_cell->edge_type[i] = YAC_GREAT_CIRCLE_EDGE;
2992
2993 return
2995 coord, *buffer_cell, dist_grid->cell_bnd_circles[cell_idx]);
2996}
2997
2999 struct yac_const_basic_grid_data * grid_data, size_t cell_idx,
3000 struct yac_grid_cell * buffer_cell) {
3001
3002 size_t num_vertices = (size_t)(grid_data->num_vertices_per_cell[cell_idx]);
3003
3004 struct yac_grid_cell cell = *buffer_cell;
3005
3006 if (cell.array_size < num_vertices) {
3007 cell.coordinates_xyz =
3008 xrealloc(cell.coordinates_xyz, num_vertices *
3009 sizeof(*(cell.coordinates_xyz)));
3010 cell.edge_type = xrealloc(cell.edge_type, num_vertices *
3011 sizeof(*(cell.edge_type)));
3012 cell.array_size = num_vertices;
3013 *buffer_cell = cell;
3014 }
3015
3016 for (size_t i = 0; i < num_vertices; ++i) {
3017 size_t vertex_idx =
3018 grid_data->cell_to_vertex[
3019 grid_data->cell_to_vertex_offsets[cell_idx] + i];
3020 cell.coordinates_xyz[i][0] = grid_data->vertex_coordinates[vertex_idx][0];
3021 cell.coordinates_xyz[i][1] = grid_data->vertex_coordinates[vertex_idx][1];
3022 cell.coordinates_xyz[i][2] = grid_data->vertex_coordinates[vertex_idx][2];
3023 size_t edge_idx =
3024 grid_data->cell_to_edge[grid_data->cell_to_edge_offsets[cell_idx] + i];
3025 cell.edge_type[i] = grid_data->edge_type[edge_idx];
3026 }
3027 buffer_cell->num_corners = num_vertices;
3028}
3029
3031 struct yac_dist_grid_pair * grid_pair, char const * grid_name) {
3032
3033 struct bnd_sphere_part_search * search = NULL;
3034
3035 for (int i = 0; (i < 2) && (search == NULL); ++i)
3036 if (!strcmp(grid_name, grid_pair->grid_names[i]))
3037 search = grid_pair->cell_sphere_part[i];
3038 YAC_ASSERT(
3039 search != NULL,
3040 "ERROR(yac_dist_grid_pair_get_cell_sphere_part): invalid grid_name")
3041 return search;
3042}
3043
3045 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
3046 yac_coordinate_pointer search_coords, size_t count, size_t * cells,
3047 int (*coord_in_cell)(
3048 double coord[3], struct yac_dist_grid * dist_grid, size_t cell_idx,
3049 struct yac_grid_cell * buffer_cell)) {
3050
3051 struct bnd_sphere_part_search * cell_sphere_part =
3052 dist_grid_pair_get_cell_sphere_part(grid_pair, grid_name);
3053 struct yac_dist_grid * dist_grid =
3054 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_name);
3055
3056 size_t * temp_cells;
3057 size_t * num_cells_per_coord =
3058 xmalloc(count * sizeof(*num_cells_per_coord));
3059
3060 // search for all matching source cells
3062 cell_sphere_part, search_coords, count, &temp_cells,
3063 num_cells_per_coord);
3064
3065 struct yac_grid_cell buffer_cell;
3066 yac_init_grid_cell(&buffer_cell);
3067
3068 // if we have multiple source cells for a single search coordinate, get the
3069 // source cell with the lowest global id
3070 for (size_t i = 0, k = 0; i < count; ++i) {
3071 size_t curr_num_cells = num_cells_per_coord[i];
3072 if (curr_num_cells == 0) {
3073 cells[i] = SIZE_MAX;
3074 } else if (curr_num_cells == 1) {
3075 if (coord_in_cell(
3076 search_coords[i], dist_grid, temp_cells[k], &buffer_cell))
3077 cells[i] = temp_cells[k];
3078 else
3079 cells[i] = SIZE_MAX;
3080 ++k;
3081 } else {
3082 size_t cell_idx = SIZE_MAX;
3083 yac_int cell_id = XT_INT_MAX;
3084 for (size_t j = 0; j < curr_num_cells; ++j, ++k) {
3085 size_t curr_cell_idx = temp_cells[k];
3086 yac_int curr_cell_id = dist_grid->ids[YAC_LOC_CELL][curr_cell_idx];
3087 if (!coord_in_cell(
3088 search_coords[i], dist_grid, curr_cell_idx, &buffer_cell))
3089 continue;
3090 if (curr_cell_id < cell_id) {
3091 cell_idx = curr_cell_idx;
3092 cell_id = curr_cell_id;
3093 }
3094 }
3095 cells[i] = cell_idx;
3096 }
3097 }
3098
3099 yac_free_grid_cell(&buffer_cell);
3100 free(num_cells_per_coord);
3101 free(temp_cells);
3102}
3103
3105 struct yac_dist_grid * dist_grid, enum yac_location location,
3106 struct single_remote_point_reorder * ids, size_t * count, size_t * idx) {
3107
3108 YAC_ASSERT(
3109 (location == YAC_LOC_CELL) ||
3110 (location == YAC_LOC_CORNER) ||
3111 (location == YAC_LOC_EDGE),
3112 "ERROR(lookup_single_remote_point_reorder_locally): invalid location")
3113
3114 yac_int * sorted_ids = dist_grid->sorted_ids[location];
3115 size_t * reorder_idx = dist_grid->sorted_reorder_idx[location];
3116 size_t num_ids = dist_grid->total_count[location];
3117
3118 size_t count_ = *count;
3119 size_t new_count = 0;
3120
3121 // sort ids by global ids
3122 qsort(ids, count_, sizeof(*ids),
3124
3125 for (size_t i = 0, j = 0; i < count_; ++i) {
3126 yac_int curr_id = ids[i].data.global_id;
3127 while ((j < num_ids) && (sorted_ids[j] < curr_id)) ++j;
3128 if ((j < num_ids) && (sorted_ids[j] == curr_id)) {
3129 idx[ids[i].reorder_idx] = reorder_idx[j];
3130 } else {
3131 if (i != new_count) ids[new_count] = ids[i];
3132 ++new_count;
3133 }
3134 }
3135
3136 *count = new_count;
3137}
3138
3140 struct yac_field_data * field_data, MPI_Comm comm) {
3141
3142 int pack_size_field_coord, pack_size_field_mask;
3143
3145 MPI_Pack_size(3, MPI_DOUBLE, comm, &pack_size_field_coord), comm);
3146 pack_size_field_coord *=
3147 (int)yac_field_data_get_coordinates_count(field_data);
3148
3150 MPI_Pack_size(1, MPI_INT, comm, &pack_size_field_mask), comm);
3151 pack_size_field_mask *=
3152 (int)yac_field_data_get_masks_count(field_data);
3153
3154 return pack_size_field_coord + pack_size_field_mask;
3155}
3156
3158 struct yac_field_data * cell_field_data,
3159 MPI_Datatype bnd_circle_dt, MPI_Comm comm) {
3160
3161 int pack_size_id,
3162 pack_size_num_vertices,
3163 pack_size_bnd_circle;
3164
3165 // id
3166 yac_mpi_call(MPI_Pack_size(1, yac_int_dt, comm, &pack_size_id), comm);
3167 // num_vertices
3168 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_num_vertices), comm);
3169 // bounding circle
3171 MPI_Pack_size(1, bnd_circle_dt, comm, &pack_size_bnd_circle), comm);
3172
3173 return pack_size_id + pack_size_num_vertices + pack_size_bnd_circle +
3174 get_pack_size_field_data(cell_field_data, comm);
3175}
3176
3178 struct yac_field_data * vertex_field_data, MPI_Comm comm) {
3179
3180 int pack_size_id,
3181 pack_size_vertex_coords;
3182 // id
3183 yac_mpi_call(MPI_Pack_size(1, yac_int_dt, comm, &pack_size_id), comm);
3184 // vertex coordinates
3186 MPI_Pack_size(3, MPI_DOUBLE, comm, &pack_size_vertex_coords), comm);
3187
3188 return pack_size_id + pack_size_vertex_coords +
3189 get_pack_size_field_data(vertex_field_data, comm);
3190}
3191
3193 struct yac_field_data * edge_field_data, MPI_Comm comm) {
3194
3195 int pack_size_id,
3196 pack_size_edge_to_vertex,
3197 pack_size_edge_type;
3198 // id
3199 yac_mpi_call(MPI_Pack_size(1, yac_int_dt, comm, &pack_size_id), comm);
3200 // edge type
3201 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_edge_type), comm);
3202 // edge vertex ids
3204 MPI_Pack_size(2, yac_int_dt, comm, &pack_size_edge_to_vertex), comm);
3205
3206 return pack_size_id + pack_size_edge_type + pack_size_edge_to_vertex +
3207 get_pack_size_field_data(edge_field_data, comm);
3208}
3209
3211 struct yac_dist_grid * dist_grid, uint64_t * pos, size_t count,
3212 int * pack_sizes, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3213 MPI_Comm comm) {
3214
3215 int pack_size_base_cell =
3218 dist_grid->field_data, YAC_LOC_CELL),
3219 bnd_circle_dt, comm);
3220 int pack_size_base_vertex =
3223 dist_grid->field_data, YAC_LOC_CORNER), comm);
3224 int pack_size_base_edge =
3227 dist_grid->field_data, YAC_LOC_EDGE), comm);
3228
3229 for (size_t i = 0; i < count; ++i) {
3230 size_t idx = (size_t)(pos[i]);
3231 int num_vertices = dist_grid->num_vertices_per_cell[idx];
3232 size_t * curr_vertices =
3233 dist_grid->cell_to_vertex + dist_grid->cell_to_vertex_offsets[idx];
3234 size_t * curr_edges =
3235 dist_grid->cell_to_edge + dist_grid->cell_to_edge_offsets[idx];
3236 int pack_size =
3237 pack_size_base_cell +
3238 num_vertices * (pack_size_base_vertex + pack_size_base_edge) +
3240 dist_grid->owners[YAC_LOC_CELL] + idx, point_info_dt, comm);
3241 for (int j = 0; j < num_vertices; ++j) {
3242 pack_size +=
3244 dist_grid->owners[YAC_LOC_CORNER] + curr_vertices[j],
3245 point_info_dt, comm) +
3247 dist_grid->owners[YAC_LOC_EDGE] + curr_edges[j], point_info_dt, comm);
3248 }
3249 pack_sizes[i] = pack_size;
3250 }
3251}
3252
3254 struct yac_dist_grid * dist_grid, uint64_t * pos, size_t count,
3255 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
3256
3257 int pack_size_base_vertex =
3260 dist_grid->field_data, YAC_LOC_CORNER), comm);
3261 for (size_t i = 0; i < count; ++i)
3262 pack_sizes[i] =
3263 pack_size_base_vertex +
3265 dist_grid->owners[YAC_LOC_CORNER] + pos[i], point_info_dt, comm);
3266}
3267
3269 struct yac_dist_grid * dist_grid, uint64_t * pos, size_t count,
3270 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
3271
3272 int pack_size_base_vertex =
3275 dist_grid->field_data, YAC_LOC_CORNER), comm);
3276 int pack_size_base_edge =
3279 dist_grid->field_data, YAC_LOC_EDGE), comm);
3280 for (size_t i = 0; i < count; ++i) {
3281 size_t * curr_vertices = dist_grid->edge_to_vertex[pos[i]];
3282 pack_sizes[i] =
3283 pack_size_base_edge +
3284 2 * pack_size_base_vertex +
3286 dist_grid->owners[YAC_LOC_EDGE] + pos[i], point_info_dt, comm) +
3288 dist_grid->owners[YAC_LOC_CORNER] + curr_vertices[0],
3289 point_info_dt, comm) +
3291 dist_grid->owners[YAC_LOC_CORNER] + curr_vertices[1],
3292 point_info_dt, comm);
3293 }
3294}
3295
3296static void get_pack_sizes(
3297 struct yac_dist_grid * dist_grid, enum yac_location location, uint64_t * pos,
3298 size_t count, int * pack_sizes, MPI_Datatype bnd_circle_dt,
3299 MPI_Datatype point_info_dt, MPI_Comm comm) {
3300
3301 YAC_ASSERT(
3302 (location == YAC_LOC_CELL) ||
3303 (location == YAC_LOC_CORNER) ||
3304 (location == YAC_LOC_EDGE),
3305 "ERROR(get_pack_sizes): invalid location")
3306
3307 switch(location) {
3308 default:
3309 case(YAC_LOC_CELL):
3311 dist_grid, pos, count, pack_sizes, bnd_circle_dt, point_info_dt, comm);
3312 break;
3313 case(YAC_LOC_CORNER):
3315 dist_grid, pos, count, pack_sizes, point_info_dt, comm);
3316 break;
3317 case(YAC_LOC_EDGE):
3319 dist_grid, pos, count, pack_sizes, point_info_dt, comm);
3320 break;
3321 };
3322}
3323
3325 size_t idx, void * buffer, int buffer_size, int * position,
3326 struct yac_field_data * field_data, MPI_Comm comm) {
3327
3328 size_t coordinates_count =
3330 size_t masks_count =
3332
3333 // coordinates
3334 for (size_t i = 0; i < coordinates_count; ++i)
3336 MPI_Pack(
3337 yac_field_data_get_coordinates_data(field_data, i)[idx],
3338 3, MPI_DOUBLE, buffer, buffer_size, position, comm), comm);
3339
3340 // masks
3341 for (size_t i = 0; i < masks_count; ++i)
3343 MPI_Pack(
3344 yac_field_data_get_mask_data(field_data, i) + idx, 1, MPI_INT, buffer,
3345 buffer_size, position, comm), comm);
3346}
3347
3349 struct yac_dist_grid * dist_grid, size_t idx, void * buffer, int buffer_size,
3350 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3351 MPI_Comm comm) {
3352
3353 UNUSED(bnd_circle_dt);
3354
3355 // id
3357 MPI_Pack(dist_grid->ids[YAC_LOC_CORNER] + idx, 1, yac_int_dt, buffer, buffer_size,
3358 position, comm), comm);
3359 // vertex coordinates
3361 MPI_Pack(&(dist_grid->vertex_coordinates[idx][0]), 3, MPI_DOUBLE, buffer,
3362 buffer_size, position, comm), comm);
3363 // vertex owner
3365 dist_grid->owners[YAC_LOC_CORNER] + idx, buffer, buffer_size, position,
3366 point_info_dt, comm);
3367 // pack field data
3369 idx, buffer, buffer_size, position,
3371}
3372
3374 struct yac_dist_grid * dist_grid, size_t idx, void * buffer, int buffer_size,
3375 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3376 MPI_Comm comm) {
3377
3378 UNUSED(bnd_circle_dt);
3379
3380 int edge_type = (int)(dist_grid->edge_type[idx]);
3381
3382 // id
3384 MPI_Pack(
3385 dist_grid->ids[YAC_LOC_EDGE] + idx, 1, yac_int_dt, buffer, buffer_size, position,
3386 comm), comm);
3387 // edge type
3389 MPI_Pack(
3390 &edge_type, 1, MPI_INT, buffer, buffer_size, position, comm), comm);
3391 // edge to vertex
3392 yac_int edge_to_vertex[2] = {
3393 dist_grid->ids[YAC_LOC_CORNER][dist_grid->edge_to_vertex[idx][0]],
3394 dist_grid->ids[YAC_LOC_CORNER][dist_grid->edge_to_vertex[idx][1]]};
3396 MPI_Pack(
3397 edge_to_vertex, 2, yac_int_dt, buffer, buffer_size, position, comm),
3398 comm);
3399 // edge owner
3401 dist_grid->owners[YAC_LOC_EDGE] + idx, buffer, buffer_size, position,
3402 point_info_dt, comm);
3403 // pack field data
3405 idx, buffer, buffer_size, position,
3407}
3408
3410 struct yac_dist_grid * dist_grid, size_t idx, void * buffer, int buffer_size,
3411 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3412 MPI_Comm comm) {
3413
3415 dist_grid, idx, buffer, buffer_size, position,
3416 bnd_circle_dt, point_info_dt, comm);
3417
3418 // pack edge vertices
3419 for (int i = 0; i < 2; ++i)
3421 dist_grid, dist_grid->edge_to_vertex[idx][i],
3422 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3423}
3424
3426 struct yac_dist_grid * dist_grid, size_t idx, void * buffer, int buffer_size,
3427 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3428 MPI_Comm comm) {
3429
3430 int num_vertices = dist_grid->num_vertices_per_cell[idx];
3431
3432 // id
3434 MPI_Pack(
3435 dist_grid->ids[YAC_LOC_CELL] + idx, 1, yac_int_dt, buffer, buffer_size, position,
3436 comm), comm);
3437 // pack field data
3439 idx, buffer, buffer_size, position,
3441 // num_vertices
3443 MPI_Pack(&num_vertices, 1, MPI_INT, buffer,
3444 buffer_size, position, comm), comm);
3445 // bounding_circle
3447 MPI_Pack(dist_grid->cell_bnd_circles + idx, 1, bnd_circle_dt, buffer,
3448 buffer_size, position, comm), comm);
3449 // cell owner
3451 dist_grid->owners[YAC_LOC_CELL] + idx, buffer, buffer_size, position,
3452 point_info_dt, comm);
3453
3454 for (int i = 0; i < num_vertices; ++i) {
3456 dist_grid,
3457 dist_grid->cell_to_vertex[dist_grid->cell_to_vertex_offsets[idx] + i],
3458 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3460 dist_grid,
3461 dist_grid->cell_to_edge[dist_grid->cell_to_edge_offsets[idx] + i],
3462 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3463 }
3464}
3465
3466static void pack_grid_data(
3467 struct yac_dist_grid * dist_grid, enum yac_location location, uint64_t * pos,
3468 size_t count, void ** pack_data, int * pack_sizes,
3469 MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
3470
3471 get_pack_sizes(dist_grid, location, pos, count, pack_sizes,
3472 bnd_circle_dt, point_info_dt, comm);
3473
3474 size_t pack_size = 0;
3475 for (size_t i = 0; i < count; ++i) pack_size += (size_t)(pack_sizes[i]);
3476
3477 void * pack_data_ = xmalloc(pack_size);
3478
3479 YAC_ASSERT(
3480 (location == YAC_LOC_CELL) ||
3481 (location == YAC_LOC_CORNER) ||
3482 (location == YAC_LOC_EDGE),
3483 "ERROR(pack_grid_data): invalid location")
3484
3485 void (*func_pack[3])(
3486 struct yac_dist_grid * dist_grid, size_t idx, void * buffer,
3487 int buffer_size, int * position, MPI_Datatype bnd_circle_dt,
3488 MPI_Datatype point_info_dt, MPI_Comm comm) =
3490
3491 for (size_t i = 0, offset = 0; i < count; ++i) {
3492 int position = 0;
3493 func_pack[location](
3494 dist_grid, pos[i], (char*)pack_data_ + offset, pack_sizes[i],
3495 &position, bnd_circle_dt, point_info_dt, comm);
3496 pack_sizes[i] = position;
3497 offset += (size_t)position;
3498 }
3499
3500 *pack_data = pack_data_;
3501}
3502
3504 void * buffer, int buffer_size, int * position, size_t idx,
3505 struct temp_field_data temp_field_data, MPI_Comm comm) {
3506
3507 for (size_t i = 0; i < temp_field_data.coordinates_count; ++i)
3509 MPI_Unpack(
3510 buffer, buffer_size, position, temp_field_data.coordinates[i][idx],
3511 3, MPI_DOUBLE, comm), comm);
3512
3513 for (size_t i = 0; i < temp_field_data.masks_count; ++i)
3515 MPI_Unpack(buffer, buffer_size, position,
3516 temp_field_data.masks[i] + idx, 1, MPI_INT, comm), comm);
3517}
3518
3520 struct global_vertex_reorder * vertex, size_t idx, void * buffer,
3521 int buffer_size, int * position,
3522 struct temp_field_data temp_vertex_field_data,
3523 MPI_Datatype point_info_dt, MPI_Comm comm) {
3524
3525 // id
3527 MPI_Unpack(buffer, buffer_size, position, &(vertex[idx].global_id), 1,
3528 yac_int_dt, comm), comm);
3529 // vertex coordinates
3531 MPI_Unpack(buffer, buffer_size, position, &(vertex[idx].coord[0]), 3,
3532 MPI_DOUBLE, comm), comm);
3533 // vertex owners
3535 buffer, buffer_size, position, &(vertex[idx].owners), point_info_dt, comm);
3536 // unpack field data
3538 buffer, buffer_size, position, idx, temp_vertex_field_data, comm);
3539}
3540
3542 struct global_edge_reorder * edge, size_t idx, void * buffer,
3543 int buffer_size, int * position,
3544 struct temp_field_data temp_edge_field_data,
3545 MPI_Datatype point_info_dt, MPI_Comm comm) {
3546
3547 int edge_type;
3548
3549 // id
3551 MPI_Unpack(buffer, buffer_size, position, &(edge[idx].global_id), 1,
3552 yac_int_dt, comm), comm);
3553 // edge type
3555 MPI_Unpack(buffer, buffer_size, position, &edge_type, 1,
3556 MPI_INT, comm), comm);
3557 edge[idx].edge_type = (enum yac_edge_type)edge_type;
3558 // edge to vertex
3560 MPI_Unpack(buffer, buffer_size, position, edge[idx].edge_to_vertex, 2,
3561 yac_int_dt, comm), comm);
3562 // edge owners
3563 yac_remote_point_infos_unpack(buffer, buffer_size, position,
3564 &(edge[idx].owners), point_info_dt, comm);
3565 // unpack field data
3567 buffer, buffer_size, position, idx, temp_edge_field_data, comm);
3568}
3569
3571 const void * a, const void * b) {
3572
3573 return (((const struct global_vertex_reorder *)a)->global_id >
3574 ((const struct global_vertex_reorder *)b)->global_id) -
3575 (((const struct global_vertex_reorder *)a)->global_id <
3576 ((const struct global_vertex_reorder *)b)->global_id);
3577}
3578
3579static void add_field_data(
3580 struct yac_field_data * field_data, struct temp_field_data temp_field_data,
3581 void * reorder_idx, size_t reorder_idx_size,
3582 size_t old_count, size_t new_count) {
3583
3584 size_t add_count = new_count - old_count;
3585
3586 for (size_t i = 0; i < temp_field_data.masks_count; ++i) {
3587 int * temp_mask = temp_field_data.masks[i];
3588 int * mask =
3589 xrealloc(
3590 (void*)yac_field_data_get_mask_data(field_data, i),
3591 new_count * sizeof(*mask));
3592 yac_field_data_set_mask_data(field_data, i, mask);
3593 for (size_t i = 0, j = old_count; i < add_count; ++i, ++j) {
3594 size_t idx =
3595 *(size_t*)((unsigned char*)reorder_idx + i * reorder_idx_size);
3596 mask[j] = temp_mask[idx];
3597 }
3598 }
3599
3600 for (size_t i = 0; i < temp_field_data.coordinates_count; ++i) {
3602 yac_coordinate_pointer coordinates =
3603 xrealloc(
3604 (void*)yac_field_data_get_coordinates_data(field_data, i),
3605 new_count * sizeof(*coordinates));
3606 yac_field_data_set_coordinates_data(field_data, i, coordinates);
3607 for (size_t i = 0, j = old_count; i < add_count; ++i, ++j) {
3608 size_t idx =
3609 *(size_t*)((unsigned char*)reorder_idx + i * reorder_idx_size);
3610 coordinates[j][0] = temp_coordinates[idx][0];
3611 coordinates[j][1] = temp_coordinates[idx][1];
3612 coordinates[j][2] = temp_coordinates[idx][2];
3613 }
3614 }
3615}
3616
3618 struct remote_point_infos * point_infos) {
3619
3620 if (point_infos->count > 1) free(point_infos->data.multi);
3621}
3622
3624 struct yac_dist_grid * dist_grid, struct global_vertex_reorder * vertices,
3625 size_t count, size_t * idx,
3626 struct temp_field_data temp_vertex_field_data) {
3627
3628 if (count == 0) return;
3629
3630 // sort vertices global ids
3631 qsort(vertices, count, sizeof(*vertices),
3633
3634 yac_int * sorted_vertex_ids =
3635 dist_grid->sorted_ids[YAC_LOC_CORNER];
3636 size_t * sorted_vertex_reorder_idx =
3638
3639 yac_int prev_global_id = vertices[0].global_id - 1;
3640 size_t prev_idx = 0;
3641 size_t add_count = 0;
3642 size_t num_total_vertices = dist_grid->total_count[YAC_LOC_CORNER];
3643
3644 // determine which vertices need to be added to local data
3645 for (size_t i = 0, j = 0; i < count; ++i) {
3646
3647 yac_int curr_global_id = vertices[i].global_id;
3648 size_t curr_reorder_idx = vertices[i].reorder_idx;
3649
3650 // if the current global id is a duplicate
3651 if (prev_global_id == curr_global_id) {
3652 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3654 continue;
3655 }
3656 prev_global_id = curr_global_id;
3657
3658 // check whether the current global id is already part of the local
3659 // grid data
3660 while ((j < num_total_vertices) && (sorted_vertex_ids[j] < curr_global_id))
3661 ++j;
3662
3663 // if we found a match in the local data
3664 if ((j < num_total_vertices) && (sorted_vertex_ids[j] == curr_global_id)) {
3665
3666 if (idx != NULL) idx[curr_reorder_idx] = sorted_vertex_reorder_idx[j];
3667 prev_idx = sorted_vertex_reorder_idx[j];
3669
3670 // if we need to add the current vertex to the local data
3671 } else {
3672
3673 if (idx != NULL) idx[curr_reorder_idx] = num_total_vertices + add_count;
3674 prev_idx = num_total_vertices + add_count;
3675 if (add_count != i) vertices[add_count] = vertices[i];
3676 ++add_count;
3677 }
3678 }
3679
3680 size_t new_num_total_vertices = num_total_vertices + add_count;
3681 yac_coordinate_pointer vertex_coordinates =
3682 xrealloc(dist_grid->vertex_coordinates, new_num_total_vertices *
3683 sizeof(*vertex_coordinates));
3684 yac_int * vertex_ids =
3685 xrealloc(dist_grid->ids[YAC_LOC_CORNER],
3686 new_num_total_vertices * sizeof(*vertex_ids));
3687 int * core_vertex_mask =
3688 xrealloc(dist_grid->core_mask[YAC_LOC_CORNER], new_num_total_vertices *
3689 sizeof(*core_vertex_mask));
3690 struct remote_point_infos * vertex_owners =
3691 xrealloc(dist_grid->owners[YAC_LOC_CORNER], new_num_total_vertices *
3692 sizeof(*vertex_owners));
3693 sorted_vertex_ids =
3694 xrealloc(
3695 sorted_vertex_ids, new_num_total_vertices * sizeof(*sorted_vertex_ids));
3696 sorted_vertex_reorder_idx =
3697 xrealloc(
3698 sorted_vertex_reorder_idx, new_num_total_vertices *
3699 sizeof(*sorted_vertex_reorder_idx));
3700
3701 // add the selected vertices to the local grid data
3702 for (size_t i = 0, j = num_total_vertices; i < add_count; ++i, ++j) {
3703
3704 vertex_coordinates[j][0] = vertices[i].coord[0];
3705 vertex_coordinates[j][1] = vertices[i].coord[1];
3706 vertex_coordinates[j][2] = vertices[i].coord[2];
3707 vertex_ids[j] = vertices[i].global_id;
3708 core_vertex_mask[j] = 0;
3709 vertex_owners[j] = vertices[i].owners;
3710 sorted_vertex_ids[j] = vertices[i].global_id;
3711 sorted_vertex_reorder_idx[j] = j;
3712 }
3713 // add field data
3716 temp_vertex_field_data, vertices, sizeof(*vertices),
3717 num_total_vertices, new_num_total_vertices);
3719 sorted_vertex_ids, new_num_total_vertices, sorted_vertex_reorder_idx);
3720
3721 dist_grid->vertex_coordinates = vertex_coordinates;
3722 dist_grid->ids[YAC_LOC_CORNER] = vertex_ids;
3723 dist_grid->core_mask[YAC_LOC_CORNER] = core_vertex_mask;
3724 dist_grid->owners[YAC_LOC_CORNER] = vertex_owners;
3725 dist_grid->sorted_ids[YAC_LOC_CORNER] = sorted_vertex_ids;
3726 dist_grid->sorted_reorder_idx[YAC_LOC_CORNER] = sorted_vertex_reorder_idx;
3727 dist_grid->total_count[YAC_LOC_CORNER] = new_num_total_vertices;
3728}
3729
3731 const void * a, const void * b) {
3732
3733 return (((const struct global_edge_reorder *)a)->global_id >
3734 ((const struct global_edge_reorder *)b)->global_id) -
3735 (((const struct global_edge_reorder *)a)->global_id <
3736 ((const struct global_edge_reorder *)b)->global_id);
3737}
3738
3740 struct yac_dist_grid * dist_grid, struct global_edge_reorder * edges,
3741 size_t count, size_t * idx, struct temp_field_data temp_edge_field_data) {
3742
3743 if (count == 0) return;
3744
3745 // sort edges global ids
3746 qsort(edges, count, sizeof(*edges), compare_global_edge_reorder_global_id);
3747
3748 yac_int * sorted_edge_ids = dist_grid->sorted_ids[YAC_LOC_EDGE];
3749 size_t * sorted_edge_reorder_idx = dist_grid->sorted_reorder_idx[YAC_LOC_EDGE];
3750
3751 yac_int prev_global_id = edges[0].global_id - 1;
3752 size_t prev_idx = 0;
3753 size_t add_count = 0;
3754 size_t num_total_edges = dist_grid->total_count[YAC_LOC_EDGE];
3755
3756 // determine which edges need to be added to local data
3757 for (size_t i = 0, j = 0; i < count; ++i) {
3758
3759 yac_int curr_global_id = edges[i].global_id;
3760 size_t curr_reorder_idx = edges[i].reorder_idx;
3761
3762 // if the current global id is a duplicate
3763 if (prev_global_id == curr_global_id) {
3764 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3766 continue;
3767 }
3768 prev_global_id = curr_global_id;
3769
3770 // check whether the current global id is already part of the local
3771 // grid data
3772 while ((j < num_total_edges) && (sorted_edge_ids[j] < curr_global_id)) ++j;
3773
3774 // if we found a match in the local data
3775 if ((j < num_total_edges) && (sorted_edge_ids[j] == curr_global_id)) {
3776
3777 if (idx != NULL) idx[curr_reorder_idx] = sorted_edge_reorder_idx[j];
3778 prev_idx = sorted_edge_reorder_idx[j];
3780
3781 // if we need to add the current edge to the local data
3782 } else {
3783
3784 if (idx != NULL) idx[curr_reorder_idx] = num_total_edges + add_count;
3785 prev_idx = num_total_edges + add_count;
3786 if (add_count != i) edges[add_count] = edges[i];
3787 ++add_count;
3788 }
3789 }
3790
3791 size_t new_num_total_edges = num_total_edges + add_count;
3792 yac_int * edge_ids =
3793 xrealloc(dist_grid->ids[YAC_LOC_EDGE],
3794 new_num_total_edges * sizeof(*edge_ids));
3795 enum yac_edge_type * edge_type =
3796 xrealloc(dist_grid->edge_type,
3797 new_num_total_edges * sizeof(*edge_type));
3799 xrealloc(dist_grid->edge_to_vertex,
3800 new_num_total_edges * sizeof(*edge_to_vertex));
3801 struct remote_point_infos * edge_owners =
3802 xrealloc(dist_grid->owners[YAC_LOC_EDGE],
3803 new_num_total_edges * sizeof(*edge_owners));
3804 int * core_edge_mask =
3805 xrealloc(dist_grid->core_mask[YAC_LOC_EDGE], new_num_total_edges *
3806 sizeof(*core_edge_mask));
3807 sorted_edge_ids =
3808 xrealloc(
3809 sorted_edge_ids, new_num_total_edges * sizeof(*sorted_edge_ids));
3810 sorted_edge_reorder_idx =
3811 xrealloc(
3812 sorted_edge_reorder_idx, new_num_total_edges *
3813 sizeof(*sorted_edge_reorder_idx));
3814
3815 yac_int * vertex_ids = xmalloc(2 * add_count * sizeof(*vertex_ids));
3816 size_t * reorder = xmalloc(2 * add_count * sizeof(*reorder));
3817
3818 // add the selected edges to the local grid data
3819 for (size_t i = 0, j = num_total_edges; i < add_count; ++i, ++j) {
3820
3821 edge_ids[j] = edges[i].global_id;
3822 edge_type[j] = edges[i].edge_type;
3823 core_edge_mask[j] = 0;
3824 edge_owners[j] = edges[i].owners;
3825 sorted_edge_ids[j] = edges[i].global_id;
3826 sorted_edge_reorder_idx[j] = j;
3827
3828 vertex_ids[2 * i + 0] = edges[i].edge_to_vertex[0];
3829 vertex_ids[2 * i + 1] = edges[i].edge_to_vertex[1];
3830 reorder[2 * i + 0] = 2 * num_total_edges + 2 * i + 0;
3831 reorder[2 * i + 1] = 2 * num_total_edges + 2 * i + 1;
3832 }
3833 // add field data
3836 temp_edge_field_data, edges, sizeof(*edges),
3837 num_total_edges, new_num_total_edges);
3839 sorted_edge_ids, new_num_total_edges, sorted_edge_reorder_idx);
3840
3841 { // determine vertex indices for edge_to_vertex
3842 yac_quicksort_index_yac_int_size_t(vertex_ids, 2 * add_count, reorder);
3843 yac_int * sorted_vertex_ids = dist_grid->sorted_ids[YAC_LOC_CORNER];
3844 size_t * sorted_vertex_reorder_idx =
3846 size_t total_num_vertices = dist_grid->total_count[YAC_LOC_CORNER];
3847 size_t * edge_to_vertex_ = (size_t*)&(edge_to_vertex[0][0]);
3848 // lookup global ids
3849 for (size_t i = 0, j = 0; i < 2 * add_count; ++i) {
3850 yac_int curr_id = vertex_ids[i];
3851 while ((j < total_num_vertices) && (sorted_vertex_ids[j] < curr_id)) ++j;
3852 YAC_ASSERT(
3853 (j < total_num_vertices) && (sorted_vertex_ids[j] == curr_id),
3854 "ERROR(yac_dist_grid_add_edges): vertex id not found")
3855 edge_to_vertex_[reorder[i]] = sorted_vertex_reorder_idx[j];
3856 }
3857 }
3858
3859 free(vertex_ids);
3860 free(reorder);
3861
3862 dist_grid->ids[YAC_LOC_EDGE] = edge_ids;
3863 dist_grid->edge_type = edge_type;
3864 dist_grid->edge_to_vertex = edge_to_vertex;
3865 dist_grid->owners[YAC_LOC_EDGE] = edge_owners;
3866 dist_grid->core_mask[YAC_LOC_EDGE] = core_edge_mask;
3867 dist_grid->sorted_ids[YAC_LOC_EDGE] = sorted_edge_ids;
3868 dist_grid->sorted_reorder_idx[YAC_LOC_EDGE] = sorted_edge_reorder_idx;
3869 dist_grid->total_count[YAC_LOC_EDGE] = new_num_total_edges;
3870}
3871
3873 struct yac_dist_grid * dist_grid, yac_int * cell_ids,
3874 int * num_vertices_per_cell, struct bounding_circle * cell_bnd_circles,
3875 size_t count, size_t * cell_to_vertex, size_t * cell_to_edge,
3876 struct remote_point_infos * cell_owners,
3877 struct temp_field_data temp_cell_field_data) {
3878
3879 if (count == 0) return;
3880
3881 size_t * reorder_idx = xmalloc(count * sizeof(reorder_idx));
3882 for (size_t i = 0; i < count; ++i) reorder_idx[i] = i;
3883
3884 size_t * prescan = xmalloc(count * sizeof(*prescan));
3885 for (size_t i = 0, accu = 0; i < count;
3886 accu += (size_t)(num_vertices_per_cell[i++])) prescan[i] = accu;
3887
3888 // sort cells global ids
3889 yac_quicksort_index_yac_int_size_t(cell_ids, count, reorder_idx);
3890
3891 yac_int * sorted_cell_ids = dist_grid->sorted_ids[YAC_LOC_CELL];
3892 size_t * sorted_cell_reorder_idx =
3893 dist_grid->sorted_reorder_idx[YAC_LOC_CELL];
3894
3895 yac_int prev_global_id = cell_ids[0] - 1;
3896 size_t cell_add_count = 0;
3897 size_t relations_add_count = 0;
3898 size_t num_total_cells = dist_grid->total_count[YAC_LOC_CELL];
3899
3900 // determine which cells need to be added to local data
3901 for (size_t i = 0, j = 0; i < count; ++i) {
3902
3903 yac_int curr_global_id = cell_ids[i];
3904 size_t curr_reorder_idx = reorder_idx[i];
3905
3906 // if the current global id is a duplicate
3907 if (prev_global_id == curr_global_id) {
3908 yac_remote_point_infos_single_free(cell_owners + curr_reorder_idx);
3909 continue;
3910 }
3911 prev_global_id = curr_global_id;
3912
3913 // check whether the current global id is already part of the local
3914 // grid data
3915 while ((j < num_total_cells) && (sorted_cell_ids[j] < curr_global_id)) ++j;
3916
3917 // if we did not find a match in the local data
3918 if ((j >= num_total_cells) || (sorted_cell_ids[j] != curr_global_id)) {
3919
3920 if (cell_add_count != i) {
3921 cell_ids[cell_add_count] = curr_global_id;
3922 reorder_idx[cell_add_count] = curr_reorder_idx;
3923 }
3924 ++cell_add_count;
3925 relations_add_count += (size_t)(num_vertices_per_cell[curr_reorder_idx]);
3926 }
3927 }
3928
3929 size_t new_num_total_cells = num_total_cells + cell_add_count;
3930 size_t num_total_relations =
3931 (num_total_cells > 0)?
3932 (dist_grid->cell_to_vertex_offsets[num_total_cells-1] +
3933 (size_t)(dist_grid->num_vertices_per_cell[num_total_cells-1])):0;
3934 size_t new_num_total_relations = num_total_relations + relations_add_count;
3935 yac_int * new_cell_ids =
3936 xrealloc(dist_grid->ids[YAC_LOC_CELL],
3937 new_num_total_cells * sizeof(*new_cell_ids));
3938 int * new_num_vertices_per_cell =
3939 xrealloc(dist_grid->num_vertices_per_cell, new_num_total_cells *
3940 sizeof(*new_num_vertices_per_cell));
3941 size_t * new_cell_to_vertex =
3942 xrealloc(dist_grid->cell_to_vertex, new_num_total_relations *
3943 sizeof(*new_cell_to_vertex));
3944 size_t * cell_to_vertex_offsets =
3945 xrealloc(dist_grid->cell_to_vertex_offsets, new_num_total_cells *
3946 sizeof(*cell_to_vertex_offsets));
3947 size_t * new_cell_to_edge =
3948 xrealloc(dist_grid->cell_to_edge, new_num_total_relations *
3949 sizeof(*new_cell_to_edge));
3950 struct bounding_circle * new_cell_bnd_circles =
3951 xrealloc(dist_grid->cell_bnd_circles, new_num_total_cells *
3952 sizeof(*new_cell_bnd_circles));
3953 int * core_cell_mask =
3954 xrealloc(dist_grid->core_mask[YAC_LOC_CELL],
3955 new_num_total_cells * sizeof(*core_cell_mask));
3956 struct remote_point_infos * new_cell_owners =
3957 xrealloc(dist_grid->owners[YAC_LOC_CELL],
3958 new_num_total_cells * sizeof(*cell_owners));
3959 sorted_cell_ids =
3960 xrealloc(
3961 sorted_cell_ids, new_num_total_cells * sizeof(*sorted_cell_ids));
3962 sorted_cell_reorder_idx =
3963 xrealloc(
3964 sorted_cell_reorder_idx, new_num_total_cells *
3965 sizeof(*sorted_cell_reorder_idx));
3966
3967 // add the selected cells to the local grid data
3968 for (size_t i = 0, j = num_total_cells; i < cell_add_count;
3969 ++i, ++j) {
3970
3971 size_t curr_reorder_idx = reorder_idx[i];
3972 int curr_num_vertices = num_vertices_per_cell[curr_reorder_idx];
3973 size_t curr_relation_idx = prescan[curr_reorder_idx];
3974
3975 new_cell_ids[j] = cell_ids[i];
3976 new_num_vertices_per_cell[j] = curr_num_vertices;
3977 cell_to_vertex_offsets[j] = num_total_relations;
3978 for (int j = 0; j < curr_num_vertices;
3979 ++j, ++num_total_relations, ++curr_relation_idx) {
3980 new_cell_to_vertex[num_total_relations] =
3981 cell_to_vertex[curr_relation_idx];
3982 new_cell_to_edge[num_total_relations] = cell_to_edge[curr_relation_idx];
3983 }
3984 core_cell_mask[j] = 0;
3985 sorted_cell_ids[j] = cell_ids[i];
3986 sorted_cell_reorder_idx[j] = j;
3987 new_cell_bnd_circles[j] = cell_bnd_circles[curr_reorder_idx];
3988 new_cell_owners[j] = cell_owners[curr_reorder_idx];
3989 }
3990 // add field data
3993 temp_cell_field_data, reorder_idx, sizeof(*reorder_idx),
3994 num_total_cells, new_num_total_cells);
3996 sorted_cell_ids, new_num_total_cells, sorted_cell_reorder_idx);
3997
3998 dist_grid->ids[YAC_LOC_CELL] = new_cell_ids;
3999 dist_grid->num_vertices_per_cell = new_num_vertices_per_cell;
4000 dist_grid->cell_to_vertex = new_cell_to_vertex;
4001 dist_grid->cell_to_vertex_offsets = cell_to_vertex_offsets;
4002 dist_grid->cell_to_edge = new_cell_to_edge;
4003 dist_grid->cell_to_edge_offsets = cell_to_vertex_offsets;
4004 dist_grid->core_mask[YAC_LOC_CELL] = core_cell_mask;
4005 dist_grid->owners[YAC_LOC_CELL] = new_cell_owners;
4006 dist_grid->sorted_ids[YAC_LOC_CELL] = sorted_cell_ids;
4007 dist_grid->sorted_reorder_idx[YAC_LOC_CELL] = sorted_cell_reorder_idx;
4008 dist_grid->cell_bnd_circles = new_cell_bnd_circles;
4009 dist_grid->total_count[YAC_LOC_CELL] = new_num_total_cells;
4010
4011 free(prescan);
4012 free(reorder_idx);
4013}
4014
4016 struct temp_field_data * temp_field_data, size_t size) {
4017
4018 for (size_t i = 0; i < temp_field_data->masks_count; ++i)
4021 for (size_t i = 0; i < temp_field_data->coordinates_count; ++i)
4025}
4026
4028 struct yac_field_data * field_data, size_t count) {
4029
4031 size_t masks_count = yac_field_data_get_masks_count(field_data);
4033
4039 for (size_t i = 0; i < masks_count; ++i) {
4041 xmalloc(count * sizeof(**temp_field_data.masks));
4043 }
4044
4046 xmalloc(
4049 xmalloc(
4053 for (size_t i = 0; i < coordinates_count; ++i) {
4055 xmalloc(count * sizeof(**temp_field_data.coordinates));
4057 }
4058
4059 return temp_field_data;
4060}
4061
4063
4064 for (size_t i = 0; i < temp_field_data.masks_count; ++i)
4065 free(temp_field_data.masks[i]);
4066 free(temp_field_data.masks);
4068 for (size_t i = 0; i < temp_field_data.coordinates_count; ++i)
4072}
4073
4075 struct yac_dist_grid * dist_grid, size_t count, void * buffer,
4076 int buffer_size, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
4077 MPI_Comm comm) {
4078
4079 yac_int * cell_ids = xmalloc(count * sizeof(*cell_ids));
4080 int * num_vertices_per_cell = xmalloc(count * sizeof(*num_vertices_per_cell));
4081 struct bounding_circle * cell_bnd_circles =
4082 xmalloc(count * sizeof(*cell_bnd_circles));
4083 struct remote_point_infos * cell_owners =
4084 xmalloc(count * sizeof(*cell_owners));
4085
4086 struct global_vertex_reorder * vertices = NULL;
4087 size_t vertices_array_size = 0;
4088 size_t total_num_vertices = 0;
4089
4090 struct global_edge_reorder * edges = NULL;
4091 size_t edges_array_size = 0;
4092
4093 struct temp_field_data temp_cell_field_data =
4096 dist_grid->field_data, YAC_LOC_CELL), count);
4097 struct temp_field_data temp_vertex_field_data =
4100 dist_grid->field_data, YAC_LOC_CORNER), 3 * count);
4101 struct temp_field_data temp_edge_field_data =
4104 dist_grid->field_data, YAC_LOC_EDGE), 3 * count);
4105
4106 for (size_t i = 0, buffer_offset = 0; i < count; ++i) {
4107
4108 int position = 0;
4109 void * curr_buffer = (char*)buffer + buffer_offset;
4110 int num_vertices;
4111
4112 // cell id
4114 MPI_Unpack(curr_buffer, buffer_size, &position, cell_ids + i, 1,
4115 yac_int_dt, comm), comm);
4116 // unpack field data
4118 curr_buffer, buffer_size, &position, i, temp_cell_field_data, comm);
4119 // num vertices
4121 MPI_Unpack(curr_buffer, buffer_size, &position, &num_vertices, 1,
4122 MPI_INT, comm), comm);
4123 // bounding circle
4125 MPI_Unpack(curr_buffer, buffer_size, &position, cell_bnd_circles + i, 1,
4126 bnd_circle_dt, comm), comm);
4127 // cell owners
4129 curr_buffer, buffer_size, &position, cell_owners + i,
4130 point_info_dt, comm);
4131
4132 num_vertices_per_cell[i] = num_vertices;
4133
4135 vertices, vertices_array_size, total_num_vertices + (size_t)num_vertices);
4137 edges, edges_array_size, total_num_vertices + (size_t)num_vertices);
4139 &temp_vertex_field_data, total_num_vertices + (size_t)num_vertices);
4141 &temp_edge_field_data, total_num_vertices + (size_t)num_vertices);
4142
4143 for (int j = 0; j < num_vertices; ++j, ++total_num_vertices) {
4145 vertices, total_num_vertices, curr_buffer, buffer_size, &position,
4146 temp_vertex_field_data, point_info_dt, comm);
4148 edges, total_num_vertices, curr_buffer, buffer_size, &position,
4149 temp_edge_field_data, point_info_dt, comm);
4150 vertices[total_num_vertices].reorder_idx = total_num_vertices;
4151 edges[total_num_vertices].reorder_idx = total_num_vertices;
4152 }
4153
4154 buffer_offset += (size_t)position;
4155 buffer_size -= position;
4156 }
4157
4158 size_t * cell_to_vertex = xmalloc(total_num_vertices * sizeof(*cell_to_vertex));
4159 size_t * cell_to_edge = xmalloc(total_num_vertices * sizeof(*cell_to_edge));
4160
4162 dist_grid, vertices, total_num_vertices, cell_to_vertex,
4163 temp_vertex_field_data);
4165 dist_grid, edges, total_num_vertices, cell_to_edge,
4166 temp_edge_field_data);
4168 dist_grid, cell_ids, num_vertices_per_cell, cell_bnd_circles, count,
4169 cell_to_vertex, cell_to_edge, cell_owners, temp_cell_field_data);
4170
4171 temp_field_data_free(temp_cell_field_data);
4172 temp_field_data_free(temp_vertex_field_data);
4173 temp_field_data_free(temp_edge_field_data);
4174 free(cell_to_edge);
4175 free(cell_to_vertex);
4176 free(vertices);
4177 free(edges);
4178 free(cell_owners);
4179 free(cell_bnd_circles);
4180 free(num_vertices_per_cell);
4181 free(cell_ids);
4182}
4183
4185 struct yac_dist_grid * dist_grid, size_t count, void * buffer,
4186 int buffer_size, MPI_Datatype point_info_dt, MPI_Comm comm) {
4187
4188 struct global_vertex_reorder * vertices = xmalloc(count * sizeof(*vertices));
4189
4190 struct temp_field_data temp_vertex_field_data =
4193 count);
4194
4195 for (size_t i = 0, buffer_offset = 0; i < count; ++i) {
4196
4197 int position = 0;
4198 void * curr_buffer = (char*)buffer + buffer_offset;
4199
4201 vertices, i, curr_buffer, buffer_size, &position,
4202 temp_vertex_field_data, point_info_dt, comm);
4203 vertices[i].reorder_idx = i;
4204
4205 buffer_offset += (size_t)position;
4206 buffer_size -= position;
4207 }
4208
4210 dist_grid, vertices, count, NULL, temp_vertex_field_data);
4211
4212 temp_field_data_free(temp_vertex_field_data);
4213
4214 free(vertices);
4215}
4216
4218 struct yac_dist_grid * dist_grid, size_t count, void * buffer,
4219 int buffer_size, MPI_Datatype point_info_dt, MPI_Comm comm) {
4220
4221 struct global_edge_reorder * edges = xmalloc(count * sizeof(*edges));
4222 struct global_vertex_reorder * vertices =
4223 xmalloc(2 * count * sizeof(*vertices));
4224
4225 struct temp_field_data temp_edge_field_data =
4228 count);
4229 struct temp_field_data temp_vertex_field_data =
4232 2 * count);
4233
4234 for (size_t i = 0, buffer_offset = 0; i < count; ++i) {
4235
4236 int position = 0;
4237 void * curr_buffer = (char*)buffer + buffer_offset;
4238
4240 edges, i, curr_buffer, buffer_size, &position,
4241 temp_edge_field_data, point_info_dt, comm);
4242 edges[i].reorder_idx = i;
4243
4244 for (size_t j = 0; j < 2; ++j)
4246 vertices, 2 * i + j, curr_buffer, buffer_size, &position,
4247 temp_vertex_field_data, point_info_dt, comm);
4248
4249 buffer_offset += (size_t)position;
4250 buffer_size -= position;
4251 }
4252
4254 dist_grid, vertices, 2 * count, NULL, temp_vertex_field_data);
4256 dist_grid, edges, count, NULL, temp_edge_field_data);
4257
4258 temp_field_data_free(temp_vertex_field_data);
4259 temp_field_data_free(temp_edge_field_data);
4260
4261 free(vertices);
4262 free(edges);
4263}
4264
4266 struct yac_dist_grid * dist_grid, enum yac_location location, size_t count,
4267 void * buffer, int buffer_size, MPI_Datatype bnd_circle_dt,
4268 MPI_Datatype point_info_dt, MPI_Comm comm) {
4269
4270 YAC_ASSERT(
4271 (location == YAC_LOC_CELL) ||
4272 (location == YAC_LOC_CORNER) ||
4273 (location == YAC_LOC_EDGE),
4274 "ERROR(unpack_grid_data): invalid location")
4275
4276 switch(location) {
4277 default:
4278 case(YAC_LOC_CELL):
4280 dist_grid, count, buffer, buffer_size, bnd_circle_dt,
4281 point_info_dt, comm);
4282 break;
4283 case(YAC_LOC_CORNER):
4285 dist_grid, count, buffer, buffer_size, point_info_dt, comm);
4286 break;
4287 case(YAC_LOC_EDGE):
4289 dist_grid, count, buffer, buffer_size, point_info_dt, comm);
4290 break;
4291 };
4292}
4293
4295 const void * a, const void * b) {
4296
4297 return ((const struct single_remote_point_reorder *)a)->data.data.rank -
4298 ((const struct single_remote_point_reorder *)b)->data.data.rank;
4299}
4300
4302 struct yac_dist_grid * dist_grid, struct single_remote_point * ids,
4303 size_t count, enum yac_location location, size_t * idx) {
4304
4305 MPI_Comm comm = dist_grid->comm;
4306 int comm_rank, comm_size;
4307 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4308 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4309
4310 size_t remote_count = 0;
4311
4312 for (size_t i = 0; i < count; ++i) {
4313 if (ids[i].global_id == XT_INT_MAX) idx[i] = SIZE_MAX;
4314 else if (ids[i].data.rank != comm_rank) ++remote_count;
4315 else idx[i] = ids[i].data.orig_pos;
4316 }
4317
4318 struct single_remote_point_reorder * missing_ids =
4319 xmalloc(remote_count * sizeof(*missing_ids));
4320
4321 for (size_t i = 0, j = 0; i < count; ++i) {
4322 if ((ids[i].data.rank != comm_rank) &&
4323 (ids[i].global_id != XT_INT_MAX)) {
4324 missing_ids[j].data = ids[i];
4325 missing_ids[j].reorder_idx = i;
4326 ++j;
4327 }
4328 }
4329
4330 // check whether we already have some of the missing ids locally
4332 dist_grid, location, missing_ids, &remote_count, idx);
4333
4334 // sort data by owner
4335 qsort(missing_ids, remote_count, sizeof(*missing_ids),
4337
4338 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4340 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4341
4342 for (size_t i = 0; i < remote_count; ++i)
4343 sendcounts[missing_ids[i].data.data.rank]++;
4344
4346 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4347
4348 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4349
4350 uint64_t * uint64_t_buffer =
4351 xmalloc((remote_count + recv_count) * sizeof(*uint64_t_buffer));
4352 uint64_t * orig_pos_send_buffer = uint64_t_buffer;
4353 uint64_t * orig_pos_recv_buffer = uint64_t_buffer + remote_count;
4354
4355 // pack send buffer
4356 for (size_t i = 0; i < remote_count; ++i) {
4357 int rank = missing_ids[i].data.data.rank;
4358 if (rank != comm_rank)
4359 orig_pos_send_buffer[sdispls[rank+1]++] =
4360 (uint64_t)(missing_ids[i].data.data.orig_pos);
4361 }
4362
4363 // redistribute ids
4364 yac_alltoallv_uint64_p2p(
4365 orig_pos_send_buffer, sendcounts, sdispls,
4366 orig_pos_recv_buffer, recvcounts, rdispls, comm);
4367
4368 MPI_Datatype bnd_circle_dt = yac_get_bounding_circle_mpi_datatype(comm);
4369 yac_mpi_call(MPI_Type_commit(&bnd_circle_dt), comm);
4370 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
4371 yac_mpi_call(MPI_Type_commit(&point_info_dt), comm);
4372
4373 void * packed_send_data = NULL;
4374 int * pack_sizes = xmalloc(recv_count * sizeof(*pack_sizes));
4375
4376 // pack all requested grid data
4378 dist_grid, location, orig_pos_recv_buffer, recv_count,
4379 &packed_send_data, pack_sizes,
4380 bnd_circle_dt, point_info_dt, comm);
4381 free(uint64_t_buffer);
4382
4383 memset(sendcounts, 0, (size_t)comm_size * sizeof(*sendcounts));
4384 for (int i = 0, k = 0; i < comm_size; ++i)
4385 for (size_t j = 0; j < recvcounts[i]; ++j, ++k)
4386 sendcounts[i] += (size_t)(pack_sizes[k]);
4387
4388 free(pack_sizes);
4389
4391 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4392
4393 recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4394
4395 void * packed_recv_data = xmalloc(recv_count);
4396
4397 // redistribute packed grid data
4398 yac_alltoallv_packed_p2p(
4399 packed_send_data, sendcounts, sdispls+1,
4400 packed_recv_data, recvcounts, rdispls, comm);
4401
4402 // unpack requested grid data
4404 dist_grid, location, remote_count, packed_recv_data, (int)recv_count,
4405 bnd_circle_dt, point_info_dt, comm);
4406
4407 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
4408 yac_mpi_call(MPI_Type_free(&bnd_circle_dt), comm);
4409
4410 // get the local ids for the remaining missing ids
4412 dist_grid, location, missing_ids, &remote_count, idx);
4413
4414 free(missing_ids);
4415 free(packed_recv_data);
4416 free(packed_send_data);
4417 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4418}
4419
4421 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
4422 yac_coordinate_pointer search_coords, size_t count, size_t * cells,
4423 int (*coord_in_cell)(
4424 double coord[3], struct yac_dist_grid * dist_grid, size_t cell_idx,
4425 struct yac_grid_cell * buffer_cell)) {
4426
4427 MPI_Comm comm = grid_pair->comm;
4428 int comm_rank, comm_size;
4429 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4430 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4431
4432 int * ranks = xmalloc(count * sizeof(ranks));
4433
4434 // search for every point in the process tree
4436 grid_pair->proc_sphere_part, search_coords, count, ranks);
4437
4438 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4440 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4441 for (size_t i = 0; i < count; ++i) sendcounts[ranks[i]]++;
4442
4443 size_t local_count = sendcounts[comm_rank];
4444 sendcounts[comm_rank] = 0;
4445
4447 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4448
4449 size_t remote_count = sdispls[comm_size] + sendcounts[comm_size-1];
4450 size_t request_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4451
4452 yac_coordinate_pointer coord_buffer =
4453 xmalloc((remote_count + request_count + local_count) *
4454 sizeof(*coord_buffer));
4455 yac_coordinate_pointer coord_send_buffer = coord_buffer + 0;
4456 yac_coordinate_pointer coord_recv_buffer = coord_buffer + remote_count;
4457 yac_coordinate_pointer coord_local_buffer =
4458 coord_buffer + remote_count + request_count;
4459
4460 // pack search coordinates
4461 for (size_t i = 0, k = 0; i < count; ++i) {
4462 if (ranks[i] == comm_rank) {
4463 coord_local_buffer[k][0] = search_coords[i][0];
4464 coord_local_buffer[k][1] = search_coords[i][1];
4465 coord_local_buffer[k][2] = search_coords[i][2];
4466 ++k;
4467 } else {
4468 size_t displ = sdispls[ranks[i]+1]++;
4469 coord_send_buffer[displ][0] = search_coords[i][0];
4470 coord_send_buffer[displ][1] = search_coords[i][1];
4471 coord_send_buffer[displ][2] = search_coords[i][2];
4472 }
4473 }
4474
4475 MPI_Datatype dt_coord;
4476 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &dt_coord), comm);
4477 yac_mpi_call(MPI_Type_commit(&dt_coord), comm);
4478
4479 // redistribute search coordinates
4481 coord_send_buffer, sendcounts, sdispls,
4482 coord_recv_buffer, recvcounts, rdispls,
4483 sizeof(*coord_send_buffer), dt_coord, comm);
4484
4485 yac_mpi_call(MPI_Type_free(&dt_coord), comm);
4486
4487 size_t * local_cells =
4488 xmalloc((request_count + local_count) * sizeof(*local_cells));
4489
4490 // do local search
4492 grid_pair, grid_name, coord_recv_buffer, request_count + local_count,
4493 local_cells, coord_in_cell);
4494
4495 struct single_remote_point * single_remote_point_buffer =
4496 xmalloc((remote_count + request_count) *
4497 sizeof(*single_remote_point_buffer));
4498 struct single_remote_point * id_send_buffer = single_remote_point_buffer;
4499 struct single_remote_point * id_recv_buffer = single_remote_point_buffer +
4500 request_count;
4501
4502 struct yac_dist_grid * dist_grid =
4503 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_name);
4504
4505 // pack global ids of found source cells
4506 for (size_t i = 0; i < request_count; ++i) {
4507 size_t cell_idx = local_cells[i];
4508 id_send_buffer[i].data.rank = comm_rank;
4509 if (cell_idx != SIZE_MAX) {
4510 id_send_buffer[i].global_id = dist_grid->ids[YAC_LOC_CELL][cell_idx];
4511 id_send_buffer[i].data.orig_pos = (uint64_t)cell_idx;
4512 } else {
4513 id_send_buffer[i].global_id = XT_INT_MAX;
4514 id_send_buffer[i].data.orig_pos = UINT64_MAX;
4515 }
4516 }
4517
4518 MPI_Datatype single_remote_point_dt =
4520
4521 // redistribute results (global ids of found source cells)
4523 id_send_buffer, recvcounts, rdispls, id_recv_buffer, sendcounts, sdispls,
4524 sizeof(*id_send_buffer), single_remote_point_dt, comm);
4525
4526 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4527
4528 size_t * new_local_cells =
4529 xmalloc(remote_count * sizeof(*new_local_cells));
4530
4531 // convert all remote ids to local ones, extend local dist_grid data,
4532 // if necessary
4534 dist_grid, id_recv_buffer, remote_count, YAC_LOC_CELL, new_local_cells);
4535
4536 // extract results from local and remote search
4537 for (size_t i = 0, k = 0; i < count; ++i) {
4538 if (ranks[i] == comm_rank) {
4539 cells[i] = local_cells[request_count + k];
4540 ++k;
4541 } else {
4542 size_t displ = sdispls[ranks[i]]++;
4543 cells[i] = new_local_cells[displ];
4544 }
4545 }
4546
4547 free(new_local_cells);
4548 free(single_remote_point_buffer);
4549 free(local_cells);
4550 free(coord_buffer);
4551 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4552 free(ranks);
4553}
4554
4556 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
4557 yac_coordinate_pointer search_coords, size_t count, size_t * cells) {
4558
4560 grid_pair, grid_name, search_coords, count, cells, coord_in_cell);
4561}
4562
4564 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
4565 yac_coordinate_pointer search_coords, size_t count, size_t * cells) {
4566
4568 grid_pair, grid_name, search_coords, count, cells, coord_in_cell_gc);
4569}
4570
4572 struct yac_dist_grid * dist_grid, struct yac_interp_field field) {
4573
4574 yac_const_coordinate_pointer field_coords =
4575 yac_dist_grid_get_field_coords(dist_grid, field);
4576 yac_int const * global_ids =
4577 yac_dist_grid_get_global_ids(dist_grid, field.location);
4578 int const * mask = yac_dist_grid_get_field_mask(dist_grid, field);
4579 size_t total_count = yac_dist_grid_get_count(dist_grid, field.location);
4580
4581 if (mask == NULL)
4582 return
4584 total_count, field_coords, global_ids);
4585 else
4586 return
4588 total_count, field_coords, global_ids, mask);
4589}
4590
4592 struct yac_dist_grid * dist_grid, struct yac_interp_field field,
4593 int comm_rank, size_t n, struct single_remote_point * points) {
4594
4595 size_t total_count = yac_dist_grid_get_count(dist_grid, field.location);
4596 int const * mask = yac_dist_grid_get_field_mask(dist_grid, field);
4597 int const * core_mask =
4598 yac_dist_grid_get_core_mask(dist_grid, field.location);
4599 yac_int const * global_ids =
4600 yac_dist_grid_get_global_ids(dist_grid, field.location);
4601
4602 if (mask == NULL) {
4603
4604 for (size_t i = 0, j = 0; i < total_count; ++i) {
4605 if (core_mask[i]) {
4606 points[j].global_id = global_ids[i];
4607 points[j].data.rank = comm_rank;
4608 points[j].data.orig_pos = i;
4609 if (n == ++j) return;
4610 }
4611 }
4612
4613 } else {
4614
4615 for (size_t i = 0, j = 0; i < total_count; ++i) {
4616 if (core_mask[i] && mask[i]) {
4617 points[j].global_id = global_ids[i];
4618 points[j].data.rank = comm_rank;
4619 points[j].data.orig_pos = i;
4620 if (n == ++j) return;
4621 }
4622 }
4623 }
4624}
4625
4627 size_t n, MPI_Comm comm) {
4628
4629 struct nnn_search_result dummy;
4630 MPI_Datatype single_remote_point_dt =
4632 single_nnn_search_result_dt_,
4633 nnn_search_result_dt;
4634 int array_of_blocklengths[] = {1, 1};
4635 const MPI_Aint array_of_displacements[] =
4636 {(MPI_Aint)(intptr_t)(const void *)&(dummy.point_info) -
4637 (MPI_Aint)(intptr_t)(const void *)&dummy,
4638 (MPI_Aint)(intptr_t)(const void *)&(dummy.cos_angle) -
4639 (MPI_Aint)(intptr_t)(const void *)&dummy};
4640 const MPI_Datatype array_of_types[] = {single_remote_point_dt, MPI_DOUBLE};
4642 MPI_Type_create_struct(
4643 2, array_of_blocklengths, array_of_displacements,
4644 array_of_types, &single_nnn_search_result_dt_), comm);
4645 MPI_Datatype single_nnn_search_result_dt =
4646 yac_create_resized(single_nnn_search_result_dt_, sizeof(dummy), comm);
4648 MPI_Type_contiguous(
4649 (int)n, single_nnn_search_result_dt, &nnn_search_result_dt), comm);
4650 yac_mpi_call(MPI_Type_commit(&nnn_search_result_dt), comm);
4651 yac_mpi_call(MPI_Type_free(&single_nnn_search_result_dt), comm);
4652 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4653 return nnn_search_result_dt;
4654}
4655
4657 void const * a, void const * b) {
4658
4659 struct nnn_search_result const * result_a =
4660 (struct nnn_search_result const *)a;
4661 struct nnn_search_result const * result_b =
4662 (struct nnn_search_result const *)b;
4663
4664 int ret = (result_a->cos_angle < result_b->cos_angle) -
4665 (result_a->cos_angle > result_b->cos_angle);
4666 if (ret) return ret;
4667 return (result_a->point_info.global_id > result_b->point_info.global_id) -
4668 (result_a->point_info.global_id < result_b->point_info.global_id);
4669}
4670
4672 struct nnn_search_result * array, size_t array_size,
4673 struct nnn_search_result * insert, size_t insert_size) {
4674
4675 if (insert_size == 0) return;
4676
4677 // sort arrays first by cos_angle and second by global id
4678 qsort(array, array_size, sizeof(*array),
4680 qsort(insert, insert_size, sizeof(*insert),
4682
4683 struct nnn_search_result temp_results[array_size];
4684 memcpy(temp_results, array, array_size * sizeof(*array));
4685
4686 for (size_t i = 0, j = 0, k = 0; k < array_size; ++k) {
4687
4688 int cmp =
4689 (j < insert_size)?
4691 &temp_results[i], insert + j):(-1);
4692
4693 if (cmp <= 0) {
4694 if (k != i) array[k] = temp_results[i];
4695 ++i;
4696 } else {
4697 array[k] = insert[j];
4698 }
4699
4700 j += cmp >= 0;
4701 }
4702}
4703
4705 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
4706 yac_coordinate_pointer search_coords, size_t count, size_t * local_ids,
4707 size_t n, struct yac_interp_field field) {
4708
4709 MPI_Comm comm = grid_pair->comm;
4710 int comm_rank, comm_size;
4711 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4712 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4713
4714 struct yac_dist_grid * dist_grid =
4715 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_name);
4716
4717 uint64_t unmasked_local_count =
4719
4720 uint64_t * unmasked_local_counts =
4721 xmalloc((size_t)comm_size * sizeof(*unmasked_local_counts));
4722
4723 // exchange number of local source points and target points
4725 MPI_Allgather(
4726 &unmasked_local_count, 1, MPI_UINT64_T,
4727 unmasked_local_counts, 1, MPI_UINT64_T, comm), comm);
4728
4729 // check whether there is a rank with too few source points
4730 int flag = 0;
4731 for (int i = 0; i < comm_size; ++i)
4732 flag |= unmasked_local_counts[i] < (uint64_t)n;
4733
4734 // if ranks with insufficient number of local source points
4735 if (flag) {
4736
4737 uint64_t global_num_unmasked_count = 0;
4738 for (int i = 0; i < comm_size; ++i)
4739 global_num_unmasked_count += unmasked_local_counts[i];
4740
4741 YAC_ASSERT(
4742 (size_t)global_num_unmasked_count >= n,
4743 "ERROR(yac_dist_grid_pair_do_nnn_search): insufficient number of "
4744 "unmasked points")
4745
4746 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4748 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4749
4750 // get ranks of processes that have additional data or require data in
4751 // order to have enough source points to do a initial nnn search
4752 int * flag_buffer = xcalloc(2 * (size_t)comm_size, sizeof(*flag_buffer));
4753 int * send_flags = flag_buffer;
4754 int * recv_flags = flag_buffer + comm_size;
4756 grid_pair->proc_sphere_part, unmasked_local_counts, (uint64_t)n,
4757 send_flags, recv_flags, comm_rank, comm_size);
4758 for (int i = 0; i < comm_size; ++i) {
4759 sendcounts[i] = (size_t)send_flags[i];
4760 recvcounts[i] = (size_t)recv_flags[i];
4761 }
4762 free(flag_buffer);
4763
4764 size_t local_send_count = (size_t)(MIN(unmasked_local_count, n));
4765
4766 size_t raccu = 0;
4767 for (int i = 0; i < comm_size; ++i) {
4768 sdispls[i] = 0;
4769 rdispls[i] = raccu;
4770 sendcounts[i] *= local_send_count;
4771 raccu += (recvcounts[i] *= (int)(MIN(unmasked_local_counts[i], n)));
4772 }
4773
4774 size_t recv_count = recvcounts[comm_size-1] + rdispls[comm_size-1];
4775
4776 struct single_remote_point * single_remote_point_buffer =
4777 xmalloc(
4778 (local_send_count + recv_count) * sizeof(*single_remote_point_buffer));
4779 struct single_remote_point * local_send_ids = single_remote_point_buffer;
4780 struct single_remote_point * recv_ids =
4781 single_remote_point_buffer + local_send_count;
4782
4783 // get local source points that can be sent to other processes
4785 dist_grid, field, comm_rank, local_send_count, local_send_ids);
4786
4787 MPI_Datatype single_remote_point_dt =
4789
4790 // exchange source points (integrate points into local data)
4792 local_send_ids, sendcounts, sdispls, recv_ids, recvcounts, rdispls,
4793 sizeof(*local_send_ids), single_remote_point_dt, comm);
4794
4795 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4796
4797 size_t * dummy = xmalloc(recv_count * sizeof(*dummy));
4798
4799 // convert all remote ids to local ones, extend local dist_grid data,
4800 // if necessary
4802 dist_grid, recv_ids, recv_count, field.location, dummy);
4803
4804 free(dummy);
4805 free(single_remote_point_buffer);
4806 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4807 }
4808 free(unmasked_local_counts);
4809
4810 struct point_sphere_part_search * sphere_part =
4811 yac_dist_grid_get_field_sphere_part(dist_grid, field);
4812
4813 double * cos_angles = NULL;
4814 size_t cos_angles_array_size = 0;
4815 yac_coordinate_pointer result_coords = NULL;
4816 size_t result_coords_array_size = 0;
4817 size_t * result_points = NULL;
4818 size_t result_points_array_size = 0;
4819 size_t * num_results = xcalloc(count, sizeof(*num_results));
4820
4821 // do local search
4823 sphere_part, count, search_coords, n, &cos_angles, &cos_angles_array_size,
4824 &result_coords, &result_coords_array_size, &result_points,
4825 &result_points_array_size, num_results);
4826
4827 size_t max_num_results = 0;
4828 for (size_t i = 0; i < count; ++i)
4829 if (max_num_results < num_results[i]) max_num_results = num_results[i];
4830
4831 // extract results (only the local core points)
4832 struct nnn_search_result * results = xmalloc(n * count * sizeof(*results));
4833 yac_int const * global_ids =
4834 yac_dist_grid_get_global_ids(dist_grid, field.location);
4835
4836 struct nnn_search_result * temp_results =
4837 xmalloc(max_num_results * sizeof(*temp_results));
4838
4839 for (size_t i = 0, k = 0; i < count; ++i) {
4840
4841 struct nnn_search_result * curr_results = results + i * n;
4842 for (size_t j = 0; j < n; ++j, ++k) {
4843 size_t curr_local_id = result_points[k];
4844 curr_results[j].point_info.global_id = global_ids[curr_local_id];
4845 curr_results[j].point_info.data.rank = comm_rank;
4846 curr_results[j].point_info.data.orig_pos = (uint64_t)curr_local_id;
4847 curr_results[j].cos_angle = cos_angles[k];
4848 }
4849
4850 for (size_t j = n; j < num_results[i]; ++j, ++k) {
4851 size_t curr_local_id = result_points[k];
4852 temp_results[j-n].point_info.global_id = global_ids[curr_local_id];
4853 temp_results[j-n].point_info.data.rank = comm_rank;
4854 temp_results[j-n].point_info.data.orig_pos = (uint64_t)curr_local_id;
4855 temp_results[j-n].cos_angle = cos_angles[k];
4856 }
4857 insert_nnn_result_points(curr_results, n, temp_results, num_results[i] - n);
4858 }
4859
4860 yac_const_coordinate_pointer field_coords =
4861 yac_dist_grid_get_field_coords(dist_grid, field);
4862 int * request_ranks = NULL;
4863 size_t request_ranks_array_size = 0;
4864 size_t num_request_ranks = 0;
4865 int * num_requests = xmalloc(count * sizeof(*num_requests));
4866
4867 // for all search points
4868 for (size_t i = 0; i < count; ++i) {
4869
4870 // generate bounding circles for all search points
4871 struct bounding_circle result_bnd_circle;
4872 memcpy(result_bnd_circle.base_vector, search_coords[i],
4873 3 * sizeof(search_coords[0][0]));
4874 result_bnd_circle.inc_angle =
4876 search_coords[i],
4877 field_coords[results[(i + 1) * n - 1].point_info.data.orig_pos]);
4878
4879 ENSURE_ARRAY_SIZE(request_ranks, request_ranks_array_size,
4880 num_request_ranks + (size_t)comm_size);
4881
4882 // search for processes that might be able to contribute to the search
4883 // results
4884 int * curr_request_ranks = request_ranks + num_request_ranks;
4886 grid_pair->proc_sphere_part, result_bnd_circle,
4887 curr_request_ranks, num_requests + i);
4888
4889 // remove requests for local process
4890 int new_num_requests = 0;
4891 for (int j = 0; j < num_requests[i]; ++j) {
4892 if (curr_request_ranks[j] == comm_rank) continue;
4893 if (new_num_requests != j)
4894 curr_request_ranks[new_num_requests] = curr_request_ranks[j];
4895 ++new_num_requests;
4896 }
4897
4898 num_request_ranks += (size_t)(num_requests[i] = new_num_requests);
4899 }
4900
4901 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4903 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4904
4905 for (size_t i = 0; i < num_request_ranks; ++i) sendcounts[request_ranks[i]]++;
4906
4908 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4909
4910 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4911
4912 yac_coordinate_pointer coord_buffer =
4913 xmalloc((num_request_ranks + recv_count) * sizeof(*coord_buffer));
4914 yac_coordinate_pointer send_request_coords = coord_buffer;
4915 yac_coordinate_pointer recv_request_coords = coord_buffer + num_request_ranks;
4916
4917 // pack bounding circles
4918 for (size_t i = 0, k = 0; i < count; ++i)
4919 for (int j = 0; j < num_requests[i]; ++j, ++k)
4920 memcpy(send_request_coords[sdispls[request_ranks[k]+1]++],
4921 search_coords[i], 3 * sizeof(double));
4922
4923 MPI_Datatype coord_dt = yac_get_coordinate_mpi_datatype(comm);
4924
4925 // exchange requests to other processes
4927 send_request_coords, sendcounts, sdispls,
4928 recv_request_coords, recvcounts, rdispls,
4929 sizeof(*send_request_coords), coord_dt, comm);
4930
4931 yac_mpi_call(MPI_Type_free(&coord_dt), comm);
4932
4933 num_results = xrealloc(num_results, recv_count * sizeof(*num_results));
4934 memset(num_results, 0, recv_count * sizeof(*num_results));
4935
4936 // do local search for requests
4938 sphere_part, recv_count, recv_request_coords, n, &cos_angles,
4939 &cos_angles_array_size, &result_coords, &result_coords_array_size,
4940 &result_points, &result_points_array_size, num_results);
4942 free(coord_buffer);
4943
4944 for (size_t i = 0; i < recv_count; ++i)
4945 if (max_num_results < num_results[i]) max_num_results = num_results[i];
4946 temp_results =
4947 xrealloc(temp_results, max_num_results * sizeof(*temp_results));
4948
4949 // extract results (only the ones, that are local core points)
4950 struct nnn_search_result * request_results =
4951 xmalloc(n * (num_request_ranks + recv_count) * sizeof(*request_results));
4952 struct nnn_search_result * send_request_results = request_results;
4953 struct nnn_search_result * recv_request_results =
4954 request_results + n * recv_count;
4955
4956 for (size_t i = 0, k = 0; i < recv_count; ++i) {
4957
4958 struct nnn_search_result * curr_results = send_request_results + i * n;
4959 for (size_t j = 0; j < n; ++j, ++k) {
4960 size_t curr_local_id = result_points[k];
4961 curr_results[j].point_info.global_id = global_ids[curr_local_id];
4962 curr_results[j].point_info.data.rank = comm_rank;
4963 curr_results[j].point_info.data.orig_pos = (uint64_t)curr_local_id;
4964 curr_results[j].cos_angle = cos_angles[k];
4965 }
4966
4967 for (size_t j = n; j < num_results[i]; ++j, ++k) {
4968 size_t curr_local_id = result_points[k];
4969 temp_results[j-n].point_info.global_id = global_ids[curr_local_id];
4970 temp_results[j-n].point_info.data.rank = comm_rank;
4971 temp_results[j-n].point_info.data.orig_pos = (uint64_t)curr_local_id;
4972 temp_results[j-n].cos_angle = cos_angles[k];
4973 }
4974 insert_nnn_result_points(curr_results, n, temp_results, num_results[i]-n);
4975 }
4976
4977 free(cos_angles);
4978 free(result_coords);
4979 free(result_points);
4980 free(num_results);
4981
4982 MPI_Datatype nnn_search_result_dt =
4984
4985 // return results
4987 send_request_results, recvcounts, rdispls,
4988 recv_request_results, sendcounts, sdispls,
4989 n * sizeof(*send_request_results), nnn_search_result_dt, comm);
4990
4991 yac_mpi_call(MPI_Type_free(&nnn_search_result_dt), comm);
4992
4993 // unpack results of remote search and integrate them into local results
4994 for (size_t i = 0, k = 0; i < count; ++i) {
4995 struct nnn_search_result * curr_results = results + n * i;
4996 for (int j = 0; j < num_requests[i]; ++k, ++j) {
4997 size_t pos = (size_t)(sdispls[request_ranks[k]]++);
4998 struct nnn_search_result * curr_request_results =
4999 recv_request_results + n * pos;
5000 insert_nnn_result_points(curr_results, n, curr_request_results, n);
5001 }
5002 }
5003 free(request_ranks);
5004 free(request_results);
5005 free(num_requests);
5006 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
5007
5008 size_t remote_count = 0;
5009 for (size_t i = 0; i < n * count; ++i)
5010 if (results[i].point_info.data.rank != comm_rank) ++remote_count;
5011
5012 // gather remote points from search results
5014 xmalloc(remote_count * sizeof(*remote_points));
5015 for (size_t i = 0, k = 0; i < n * count; ++i)
5016 if (results[i].point_info.data.rank != comm_rank)
5017 remote_points[k++] = results[i].point_info;
5018
5019 // convert all remote ids to local ones, extend local dist_grid data,
5020 // if necessary
5022 dist_grid, remote_points, remote_count, field.location,
5023 local_ids + n * count - remote_count);
5024
5025 free(remote_points);
5026
5027 // gather final result
5028 for (size_t i = 0, offset = n * count - remote_count; i < n * count; ++i) {
5029 if (results[i].point_info.data.rank == comm_rank)
5030 local_ids[i] = (size_t)(results[i].point_info.data.orig_pos);
5031 else local_ids[i] = local_ids[offset++];
5032 }
5033 free(temp_results);
5034 free(results);
5035}
5036
5038 struct yac_dist_grid_pair * grid_pair, char const * grid_name,
5039 const_bounding_circle_pointer bnd_circles, size_t count, size_t ** cells,
5040 size_t * num_results_per_bnd_circle, struct yac_interp_field field) {
5041
5042 MPI_Comm comm = grid_pair->comm;
5043 int comm_rank, comm_size;
5044 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
5045 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
5046
5047 struct yac_dist_grid * dist_grid =
5048 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_name);
5049
5050 int * num_ranks = xcalloc(count, sizeof(*num_ranks));
5051 int * rank_buffer = NULL;
5052 size_t rank_buffer_size = 0;
5053 size_t rank_buffer_array_size = 0;
5054
5055 for (size_t i = 0; i < count; ++i) {
5056
5057 ENSURE_ARRAY_SIZE(rank_buffer, rank_buffer_array_size,
5058 rank_buffer_size + (size_t)comm_size);
5059
5060 // finds all processes whose core area overlaps with the bounding circle
5061 // of the current cell
5062 // beware: even if the core are of a process does not overlap with the
5063 // bounding circle, it may have core cells that overlap nevertheless
5065 grid_pair->proc_sphere_part, bnd_circles[i],
5066 rank_buffer + rank_buffer_size, num_ranks + i);
5067 rank_buffer_size += (size_t)(num_ranks[i]);
5068 }
5069
5070 size_t * size_t_buffer =
5071 xmalloc(4 * (size_t)comm_size * sizeof(*size_t_buffer));
5072 size_t * result_sendcounts = size_t_buffer + 0 * comm_size;
5073 size_t * result_recvcounts = size_t_buffer + 1 * comm_size;
5074 size_t * result_sdispls = size_t_buffer + 2 * comm_size;
5075 size_t * result_rdispls = size_t_buffer + 3 * comm_size;
5076
5077 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
5079 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
5080
5081 for (size_t i = 0, offset = 0; i < count; ++i) {
5082 int curr_num_ranks = num_ranks[i];
5083 int * ranks = rank_buffer + offset;
5084 offset += (size_t)curr_num_ranks;
5085 for (int j = 0; j < curr_num_ranks; ++j) sendcounts[ranks[j]]++;
5086 }
5087
5088 // local overlaps do not need to be send around
5089 size_t local_count = sendcounts[comm_rank];
5090 sendcounts[comm_rank] = 0;
5091
5093 1, sendcounts, recvcounts, sdispls, rdispls, comm);
5094
5095 size_t send_count = sdispls[comm_size] + sendcounts[comm_size-1];
5096 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
5097
5098 struct bounding_circle * bnd_circle_buffer =
5099 xmalloc((send_count + recv_count + local_count) *
5100 sizeof(*bnd_circle_buffer));
5101 struct bounding_circle * send_buffer = bnd_circle_buffer;
5102 struct bounding_circle * recv_buffer = bnd_circle_buffer + send_count;
5103 struct bounding_circle * local_buffer =
5104 bnd_circle_buffer + send_count + recv_count;
5105
5106 // pack bounding circles
5107 for (size_t i = 0, offset = 0, local_offset = 0; i < count; ++i) {
5108 int curr_num_ranks = num_ranks[i];
5109 int * ranks = rank_buffer + offset;
5110 offset += (size_t)curr_num_ranks;
5111 for (int j = 0; j < curr_num_ranks; ++j) {
5112 int rank = ranks[j];
5113 if (rank == comm_rank)
5114 local_buffer[local_offset++] = bnd_circles[i];
5115 else
5116 send_buffer[sdispls[rank + 1]++] = bnd_circles[i];
5117 }
5118 }
5119
5120 MPI_Datatype bnd_circle_dt = yac_get_bounding_circle_mpi_datatype(comm);
5121 yac_mpi_call(MPI_Type_commit(&bnd_circle_dt), comm);
5122
5123 // exchange bounding circles
5125 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
5126 sizeof(*send_buffer), bnd_circle_dt, comm);
5127
5128 yac_mpi_call(MPI_Type_free(&bnd_circle_dt), comm);
5129
5130 struct bnd_sphere_part_search * cell_sphere_part =
5131 dist_grid_pair_get_cell_sphere_part(grid_pair, grid_name);
5132
5133 size_t * local_cells = NULL;
5134 size_t * num_local_cells_per_bnd_circle =
5135 xmalloc((recv_count + local_count) *
5136 sizeof(*num_local_cells_per_bnd_circle));
5137
5138 uint64_t * uint64_t_buffer =
5139 xmalloc((send_count + recv_count + local_count) *
5140 sizeof(*uint64_t_buffer));
5141 uint64_t * num_local_cells_per_bnd_circle_uint64_t = uint64_t_buffer;
5142 uint64_t * num_remote_cells_per_bnd_circle =
5143 uint64_t_buffer + recv_count + local_count;
5144
5145 // search for received bounding circles in local data
5147 cell_sphere_part, recv_buffer, recv_count + local_count, &local_cells,
5148 num_local_cells_per_bnd_circle);
5149
5150 int const * field_mask =
5151 (field.location == YAC_LOC_CELL)?
5152 yac_dist_grid_get_field_mask(dist_grid, field):NULL;
5153
5154 struct bounding_circle * cell_bnd_circles = dist_grid->cell_bnd_circles;
5155
5156 // check field mask and actual overlap of bounding circles
5157 for (size_t i = 0, offset = 0, new_offset = 0;
5158 i < recv_count + local_count; ++i) {
5159
5160 struct bounding_circle * curr_bnd_circle = recv_buffer + i;
5161 size_t curr_num_results = num_local_cells_per_bnd_circle[i];
5162
5163 // remove cells whose bounding circle do not overlap with the current one
5164</