YetAnotherCoupler 2.4.3
dist_grid.c
Go to the documentation of this file.
1
11/*
12 * Keywords:
13 * Maintainer: Moritz Hanke <hanke@dkrz.de>
14 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yac/
15 *
16 * This file is part of YAC.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met:
21 *
22 * Redistributions of source code must retain the above copyright notice,
23 * this list of conditions and the following disclaimer.
24 *
25 * Redistributions in binary form must reproduce the above copyright
26 * notice, this list of conditions and the following disclaimer in the
27 * documentation and/or other materials provided with the distribution.
28 *
29 * Neither the name of the DKRZ GmbH nor the names of its contributors
30 * may be used to endorse or promote products derived from this software
31 * without specific prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
34 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
35 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
36 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
37 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
38 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
39 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
40 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
41 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#ifdef HAVE_CONFIG_H
47// Get the definition of the 'restrict' keyword.
48#include "config.h"
49#endif
50
51#include <stdlib.h>
52#include <string.h>
53#include <assert.h>
54#include <float.h>
55#include <yaxt.h>
56
57#include "grid.h"
58#include "dist_grid.h"
59#include "geometry.h"
60#include "yac_mpi.h"
61#include "utils.h"
62#include "sphere_part.h"
63#include "ensure_array_size.h"
64#include "interp_grid.h"
65
69};
70
74};
75
79};
80
81// reorder_idx has to be first
85 double coord[3];
87};
88
89// reorder_idx has to be first
96};
97
100 uint64_t orig_pos;
101 double coord[3];
102};
103
104struct id_pos {
106 uint64_t orig_pos;
107};
108
110
111 int count;
115};
116
120 double coord[3];
121};
122
124 int ** masks;
130};
131
132// warning: when changing this, ensure that struct const_basic_grid_data is
133// changed accordingly
134struct dist_grid {
142 size_t * cell_to_edge;
147 struct remote_point_infos * cell_owners; // original cell owner(s)
148 struct remote_point_infos * vertex_owners; // original vertex owner(s)
149 struct remote_point_infos * edge_owners; // original edge owner(s)
153 size_t num_cells;
155 size_t num_edges;
166 MPI_Comm comm;
167};
168
171 int grid_ids[2];
175 MPI_Comm comm;
176};
177
180 double cos_angle;
181};
182
184 struct {
185 size_t local_id;
188 size_t neigh_idx;
189};
190
191// looks up positions of ids in an array of sorted ids
192static void id2idx(
193 yac_int * ids, size_t * idx, size_t num_ids,
194 yac_int * ref_sorted_ids, size_t num_sorted_ids) {
195
196 size_t * reorder = xmalloc(num_ids * sizeof(*reorder));
197 for (size_t i = 0; i < num_ids; ++i) reorder[i] = i;
198
199 yac_quicksort_index_yac_int_size_t(ids, num_ids, reorder);
200
201 for (size_t i = 0, j = 0; i < num_ids; ++i) {
202
203 yac_int curr_id = ids[i];
204 while ((j < num_sorted_ids) && (ref_sorted_ids[j] < curr_id)) ++j;
206 (j < num_sorted_ids) && (ref_sorted_ids[j] == curr_id),
207 "ERROR(id2idx): id not found")
208 idx[reorder[i]] = j;
209 }
210
211 free(reorder);
212}
213
215 struct proc_sphere_part_node * proc_sphere_part, int comm_rank,
216 coordinate_pointer vertex_coordinates, size_t count) {
217
218 int * core_mask = xcalloc(count, sizeof(*core_mask));
219
221 proc_sphere_part, vertex_coordinates, count, core_mask);
222
223 for (size_t i = 0; i < count; ++i)
224 core_mask[i] = core_mask[i] == comm_rank;
225
226 return core_mask;
227}
228
230 struct dist_grid * grid, size_t cell_idx) {
231
232 int num_vertices = grid->num_vertices_per_cell[cell_idx];
233 if (num_vertices == 0) return SIZE_MAX;
234 yac_int * grid_vertex_ids = grid->vertex_ids;
235 size_t * vertices =
236 grid->cell_to_vertex + grid->cell_to_vertex_offsets[cell_idx];
237 // get the cell corner with the smallest global id
238 size_t min_idx = vertices[0];
239 yac_int min_global_id = grid_vertex_ids[min_idx];
240 for (int j = 1; j < num_vertices; ++j) {
241 size_t curr_vertex = vertices[j];
242 yac_int curr_global_id = grid_vertex_ids[curr_vertex];
243 if (min_global_id > curr_global_id) {
244 min_global_id = curr_global_id;
245 min_idx = curr_vertex;
246 }
247 }
248 return min_idx;
249}
250
251// determines the core mask for the cells in the distributed grid
252// the core mask value is equal to that of the core mask value of the
253// cell vertex with the lowest global id
255 int const * vertex_core_mask, struct dist_grid * dist_grid, int is_root) {
256
257 size_t num_cells = dist_grid->num_cells;
258 int * core_cell_mask = xmalloc(num_cells * sizeof(core_cell_mask));
259
260 //-------------------------
261 // determine cell core mask
262 //-------------------------
263 for (size_t i = 0; i < num_cells; ++i) {
264 size_t ref_vertex = get_cell_reference_vertex(dist_grid, i);
265 core_cell_mask[i] =
266 (ref_vertex != SIZE_MAX)?(vertex_core_mask[ref_vertex]):is_root;
267 }
268 return core_cell_mask;
269}
270
271static inline size_t get_edge_reference_vertex(
272 struct dist_grid * grid, size_t edge_idx) {
273
274 yac_int * vertex_ids = grid->vertex_ids;
275 size_t * edge_vertices = &(grid->edge_to_vertex[edge_idx][0]);
276 // get the edge corner with the smallest global id
277 return edge_vertices[
278 (vertex_ids[edge_vertices[0]] > vertex_ids[edge_vertices[1]])?1:0];
279}
280
281// determines the core mask for the edges in the distributed grid
282// the core mask value is equal to that of the core mask value of the
283// edge vertex with the lowest global id
285 int const * vertex_core_mask, struct dist_grid * dist_grid) {
286
287 size_t num_edges = dist_grid->num_edges;
288 int * core_edge_mask = xmalloc(num_edges * sizeof(core_edge_mask));
289
290 //-------------------------
291 // determine edge core mask
292 //-------------------------
293 for (size_t i = 0; i < num_edges; ++i)
294 core_edge_mask[i] =
295 vertex_core_mask[get_edge_reference_vertex(dist_grid, i)];
296 return core_edge_mask;
297}
298
299static MPI_Datatype yac_get_single_remote_point_mpi_datatype(MPI_Comm comm) {
300
301 struct single_remote_point dummy;
302 MPI_Datatype single_id_owner_dt;
303 int array_of_blocklengths[] = {1, 1, 1};
304 const MPI_Aint array_of_displacements[] =
305 {(MPI_Aint)((void*)&(dummy.global_id) - (void*)&dummy),
306 (MPI_Aint)((void*)&(dummy.data.rank) - (void*)&dummy),
307 (MPI_Aint)((void*)&(dummy.data.orig_pos) - (void*)&dummy)};
308 const MPI_Datatype array_of_types[] =
309 {Xt_int_dt, MPI_INT, MPI_UINT64_T};
311 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
312 array_of_types, &single_id_owner_dt), comm);
313 return yac_create_resized(single_id_owner_dt, sizeof(dummy), comm);
314}
315
317 int n, MPI_Comm comm) {
318
319 MPI_Datatype dt_single_remote_point_ =
321 MPI_Datatype dt_single_remote_point__ =
323 dt_single_remote_point_, sizeof(struct single_remote_point_reorder), comm);
324 MPI_Datatype dt_single_remote_point;
326 MPI_Type_contiguous(
327 n, dt_single_remote_point__, &dt_single_remote_point), comm);
328 yac_mpi_call(MPI_Type_free(&dt_single_remote_point__), comm);
329 yac_mpi_call(MPI_Type_commit(&dt_single_remote_point), comm);
330 return dt_single_remote_point;
331}
332
334 const void * a, const void * b) {
335
336 return (((const struct single_remote_point *)a)->global_id >
337 ((const struct single_remote_point *)b)->global_id) -
338 (((const struct single_remote_point *)a)->global_id <
339 ((const struct single_remote_point *)b)->global_id);
340}
341
342static size_t determine_local_count(size_t count, int * owner) {
343
344 size_t local_count = 0;
345 for (size_t i = 0; i < count; ++i) if (owner[i]) ++local_count;
346 return local_count;
347}
348
349static MPI_Datatype yac_get_id_pos_mpi_datatype(MPI_Comm comm) {
350
351 struct id_pos dummy;
352 MPI_Datatype id_pos_dt;
353 int array_of_blocklengths[] = {1,1};
354 const MPI_Aint array_of_displacements[] =
355 {(MPI_Aint)((void*)&(dummy.global_id) - (void*)&dummy),
356 (MPI_Aint)((void*)&(dummy.orig_pos) - (void*)&dummy)};
357 const MPI_Datatype array_of_types[] = {Xt_int_dt, MPI_UINT64_T};
359 MPI_Type_create_struct(
360 2, array_of_blocklengths, array_of_displacements,
361 array_of_types, &id_pos_dt), comm);
362 return yac_create_resized(id_pos_dt, sizeof(dummy), comm);
363}
364
365MPI_Datatype yac_get_remote_point_info_mpi_datatype(MPI_Comm comm) {
366
367 struct remote_point_info dummy;
368 MPI_Datatype remote_point_info_dt;
369 int array_of_blocklengths[] = {1, 1};
370 const MPI_Aint array_of_displacements[] =
371 {(MPI_Aint)((void*)&(dummy.rank) - (void*)&dummy),
372 (MPI_Aint)((void*)&(dummy.orig_pos) - (void*)&dummy)};
373 const MPI_Datatype array_of_types[] =
374 {MPI_INT, MPI_UINT64_T};
376 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
377 array_of_types, &remote_point_info_dt), comm);
378 return yac_create_resized(remote_point_info_dt, sizeof(dummy), comm);
379}
380
382 struct remote_point_infos point_infos) {
383
384 int count = point_infos.count;
385 if (count > 1) {
386 struct remote_point_info * temp = xmalloc((size_t)count * sizeof(*temp));
387 memcpy(temp, point_infos.data.multi, (size_t)count * sizeof(*temp));
388 point_infos.data.multi = temp;
389 }
390 return point_infos;
391}
392
394 yac_int * sorted_ids, size_t * reorder_idx, size_t count,
395 struct remote_points * points) {
396
397 struct remote_point_infos * point_infos = xmalloc(count * sizeof(*point_infos));
398
399 size_t id_owner_count = points->count;
400 struct remote_point * points_ = points->data;
401
402 for (size_t i = 0, j = 0; i < count; ++i) {
403
404 yac_int curr_id = sorted_ids[i];
405
406 while ((j < id_owner_count) && (points_[j].global_id < curr_id)) ++j;
407
408 if ((j >= id_owner_count) || (points_[j].global_id != curr_id))
409 point_infos[reorder_idx[i]].count = -1;
410 else
411 point_infos[reorder_idx[i]] = copy_remote_point_infos(points_[j].data);
412 }
413
414 return point_infos;
415}
416
418 yac_int * requested_ids, struct remote_point_infos ** results,
419 size_t * reorder_buffer, size_t request_count,
420 struct remote_points * local_remote_data) {
421
422 struct remote_point * local_remote_points = local_remote_data->data;
423 size_t local_count = local_remote_data->count;
424 static struct remote_point_infos dummy_data =
425 {.count = 0, .data.single = {.rank = INT_MAX, .orig_pos = UINT64_MAX}};
426
427 for (size_t j = 0; j < request_count; ++j) reorder_buffer[j] = j;
428
429 // sort ids for easier owner lookup
431 requested_ids, request_count, reorder_buffer);
432
433 for (size_t i = 0, j = 0; i < request_count; ++i) {
434 yac_int curr_id = requested_ids[i];
435 while ((j < local_count) && (curr_id > local_remote_points[j].global_id))
436 ++j;
437 results[reorder_buffer[i]] =
438 ((j < local_count) && (curr_id == local_remote_points[j].global_id))?
439 &(local_remote_points[j].data):&dummy_data;
440 }
441}
442
444 struct remote_point_infos const * infos, MPI_Datatype point_info_dt,
445 MPI_Comm comm) {
446
447 int pack_size_count,
448 pack_size_data;
449
450 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
452 MPI_Pack_size(infos->count, point_info_dt, comm, &pack_size_data), comm);
453
454 return pack_size_count + pack_size_data;
455}
456
458 struct remote_point_infos ** infos, size_t count, int * pack_sizes,
459 MPI_Datatype point_info_dt, MPI_Comm comm) {
460
461 int pack_size_count;
462
463 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
464
465 for (size_t i = 0; i < count; ++i) {
467 MPI_Pack_size(
468 infos[i]->count, point_info_dt, comm, pack_sizes + i), comm);
469 pack_sizes[i] += pack_size_count;
470 }
471}
472
474 struct remote_point_infos ** infos, int * pack_sizes, size_t count,
475 void * buffer, MPI_Datatype point_info_dt, MPI_Comm comm) {
476
477 for (size_t i = 0; i < count; ++i) {
478
479 int curr_count = infos[i]->count;
480 struct remote_point_info * curr_point_infos =
481 (curr_count == 1)?
482 (&(infos[i]->data.single)):(infos[i]->data.multi);
483
484 int position = 0;
485 int buffer_size = pack_sizes[i];
487 MPI_Pack(&curr_count, 1, MPI_INT, buffer,
488 buffer_size, &position, comm), comm);
490 MPI_Pack(curr_point_infos, curr_count, point_info_dt, buffer,
491 buffer_size, &position, comm), comm);
492 buffer = (void*)(((unsigned char*)buffer) + buffer_size);
493 }
494}
495
497 void * buffer, int buffer_size, int * position,
498 struct remote_point_infos * infos, MPI_Datatype point_info_dt, MPI_Comm comm) {
499
500 int count;
502 MPI_Unpack(buffer, buffer_size, position, &count, 1, MPI_INT, comm), comm);
503
504 infos->count = count;
505
506 if (count > 0) {
507 struct remote_point_info * point_infos;
508 if (count == 1)
509 point_infos = &(infos->data.single);
510 else
511 point_infos =
512 (infos->data.multi = xmalloc((size_t)count * sizeof(*point_infos)));
513
515 MPI_Unpack(buffer, buffer_size, position, point_infos, count,
516 point_info_dt, comm), comm);
517 } else {
518 infos->data.single.rank = INT_MAX;
519 infos->data.single.orig_pos = UINT64_MAX;
520 }
521}
522
524 void * buffer, int buffer_size, int * position,
525 struct remote_point_info * info_buffer, size_t * info_buffer_position,
526 struct remote_point_infos * infos, MPI_Datatype point_info_dt, MPI_Comm comm) {
527
528 int count;
530 MPI_Unpack(buffer, buffer_size, position, &count, 1, MPI_INT, comm), comm);
531
532 infos->count = count;
533
534 struct remote_point_info * point_infos;
535 if (count == 1)
536 point_infos = &(infos->data.single);
537 else {
538 point_infos =
539 (infos->data.multi = info_buffer + *info_buffer_position);
540 *info_buffer_position += count;
541 }
542
544 MPI_Unpack(buffer, buffer_size, position, point_infos, count,
545 point_info_dt, comm), comm);
546}
547
549 struct remote_point * point, MPI_Datatype point_info_dt, MPI_Comm comm) {
550
551 int pack_size_id,
552 pack_size_remote_point_infos;
553
554 yac_mpi_call(MPI_Pack_size(1, Xt_int_dt, comm, &pack_size_id), comm);
555 pack_size_remote_point_infos =
556 yac_remote_point_infos_get_pack_size(&(point->data), point_info_dt, comm);
557
558 return pack_size_id + pack_size_remote_point_infos;
559}
560
562 struct remote_point_infos const * infos, void * buffer, int buffer_size,
563 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
564
565 int count = infos->count;
566
567 struct remote_point_info const * info =
568 (count == 1)?(&(infos->data.single)):(infos->data.multi);
569
571 MPI_Pack(&count, 1, MPI_INT, buffer, buffer_size, position, comm), comm);
573 MPI_Pack(info, count, point_info_dt, buffer, buffer_size, position, comm),
574 comm);
575}
576
578 struct remote_point * point, void * buffer, int buffer_size, int * position,
579 MPI_Datatype point_info_dt, MPI_Comm comm) {
580
582 MPI_Pack(&(point->global_id), 1, Xt_int_dt, buffer,
583 buffer_size, position, comm), comm);
584
586 &(point->data), buffer, buffer_size, position, point_info_dt, comm);
587}
588
590 void * buffer, int buffer_size, int * position, struct remote_point * point,
591 MPI_Datatype point_info_dt, MPI_Comm comm) {
592
594 MPI_Unpack(
595 buffer, buffer_size, position, &(point->global_id), 1, Xt_int_dt, comm),
596 comm);
598 buffer, buffer_size, position, &(point->data), point_info_dt, comm);
599}
600
602 void * buffer, int buffer_size, int * position,
603 struct remote_point_info * info_buffer, size_t * info_buffer_position,
604 struct remote_point * point, MPI_Datatype point_info_dt, MPI_Comm comm) {
605
607 MPI_Unpack(
608 buffer, buffer_size, position, &(point->global_id), 1, Xt_int_dt, comm),
609 comm);
611 buffer, buffer_size, position, info_buffer, info_buffer_position,
612 &(point->data), point_info_dt, comm);
613}
614
616 struct remote_points * points, MPI_Datatype point_info_dt, MPI_Comm comm) {
617
618 size_t count = points->count;
619 struct remote_point * points_data = points->data;
620
621 int count_pack_size,
622 remote_points_pack_size;
623
624 yac_mpi_call(MPI_Pack_size(2, MPI_UINT64_T, comm, &count_pack_size), comm);
625 remote_points_pack_size = 0;
626 for (size_t i = 0; i < count; ++i)
627 remote_points_pack_size +=
628 yac_remote_point_get_pack_size(points_data + i, point_info_dt, comm);
629
630 return count_pack_size + remote_points_pack_size;
631}
632
634 struct remote_points * points, void * buffer, int buffer_size, int * position,
635 MPI_Datatype point_info_dt, MPI_Comm comm) {
636
637 size_t count = points->count;
638 struct remote_point * point_data = points->data;
639 uint64_t counts[2] = {(uint64_t)count, 0};
640 for (size_t i = 0; i < count; ++i)
641 if (point_data[i].data.count > 1)
642 counts[1] += (uint64_t)(point_data[i].data.count);
643
645 MPI_Pack(counts, 2, MPI_UINT64_T, buffer,
646 buffer_size, position, comm), comm);
647 for (size_t i = 0; i < count; ++i)
649 point_data + i, buffer, buffer_size, position, point_info_dt, comm);
650}
651
653 void * buffer, int buffer_size, int * position,
654 struct remote_points ** points, MPI_Datatype point_info_dt, MPI_Comm comm) {
655
656 uint64_t counts[2];
657
659 MPI_Unpack(
660 buffer, buffer_size, position, counts, 2, MPI_UINT64_T, comm), comm);
661
662 *points = xmalloc(((size_t)counts[1]) * sizeof(struct remote_point_infos) +
663 sizeof(**points));
664
665 size_t count = ((*points)->count = (size_t)(counts[0]));
666 struct remote_point * point_data =
667 ((*points)->data = xmalloc(count * sizeof(*((*points)->data))));
668 struct remote_point_info * remote_point_info_buffer =
669 &((*points)->buffer[0]);
670
671 for (size_t i = 0, offset = 0; i < count; ++i) {
672
674 buffer, buffer_size, position, remote_point_info_buffer, &offset,
675 point_data + i, point_info_dt, comm);
676 }
677}
678
680 struct dist_grid * dist_grid, struct proc_sphere_part_node * proc_sphere_part,
681 struct remote_points * dist_cell_owner,
682 struct remote_points * dist_vertex_owner,
683 struct remote_points * dist_edge_owner) {
684
685 MPI_Comm comm = dist_grid->comm;
686
687 int comm_size;
688 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
689
693 dist_grid->num_cells, dist_cell_owner);
697 dist_grid->num_vertices, dist_vertex_owner);
701 dist_grid->num_edges, dist_edge_owner);
702
703 size_t num_missing_vertices = 0;
704 size_t num_missing_edges = 0;
705
706 for (size_t i = 0; i < dist_grid->num_cells; ++i)
708 dist_grid->cell_owners[i].count != -1,
709 "ERROR(generate_dist_grid_remote_point_infos):"
710 "no owner information for a cell available")
711 for (size_t i = 0; i < dist_grid->num_vertices; ++i)
712 if (dist_grid->vertex_owners[i].count == -1) ++num_missing_vertices;
713 for (size_t i = 0; i < dist_grid->num_edges; ++i)
714 if (dist_grid->edge_owners[i].count == -1) ++num_missing_edges;
715
716 size_t total_num_missing = num_missing_vertices + num_missing_edges;
717 int * ranks_buffer = xmalloc(total_num_missing * sizeof(*ranks_buffer));
718 int * vertex_ranks = ranks_buffer;
719 int * edge_ranks = ranks_buffer + num_missing_vertices;
720 size_t * size_t_buffer =
721 xmalloc(3 * total_num_missing * sizeof(*size_t_buffer));
722 size_t * vertex_reorder = size_t_buffer;
723 size_t * edge_reorder = size_t_buffer + num_missing_vertices;
724 size_t * coord_indices = size_t_buffer + total_num_missing;
725 size_t * ranks_idxs = coord_indices;
726 size_t * cve_reorder = size_t_buffer + 2 * total_num_missing;
727
728 size_t offset = 0;
729 for (size_t i = 0, j = 0; i < dist_grid->num_vertices; ++i) {
730 if (dist_grid->vertex_owners[i].count == -1) {
731
732 vertex_reorder[j] = i;
733 ++j;
734 coord_indices[offset] = i;
735 cve_reorder[offset] = offset;
736 ++offset;
737 }
738 }
739 for (size_t i = 0, j = 0; i < dist_grid->num_edges; ++i) {
740 if (dist_grid->edge_owners[i].count == -1) {
741 // compute middle point of edge
742 size_t * vertex_idxes = &(dist_grid->edge_to_vertex[i][0]);
743 yac_int vertex_ids[2] = {dist_grid->vertex_ids[vertex_idxes[0]],
744 dist_grid->vertex_ids[vertex_idxes[1]]};
745 edge_reorder[j] = i;
746 ++j;
747 coord_indices[offset] =
748 vertex_idxes[(vertex_ids[0] > vertex_ids[1])?1:0];
749 cve_reorder[offset] = offset;
750 ++offset;
751 }
752 }
753
755 coord_indices, total_num_missing, cve_reorder);
756
757 size_t num_unique_idxs = 0;
758 for (size_t i = 0, prev_idx = SIZE_MAX; i < total_num_missing; ++i) {
759 size_t curr_idx = coord_indices[i];
760 if (curr_idx != prev_idx) {
761 prev_idx = curr_idx;
762 ++num_unique_idxs;
763 }
764 }
765
766 coordinate_pointer search_coords =
767 xmalloc(num_unique_idxs * sizeof(*search_coords));
768 int * search_ranks = xmalloc(num_unique_idxs * sizeof(*search_ranks));
769
770 for (size_t i = 0, j = 0, prev_idx = SIZE_MAX; i < total_num_missing; ++i) {
771 size_t curr_idx = coord_indices[i];
772 if (curr_idx != prev_idx) {
773 memcpy(search_coords[j], dist_grid->vertex_coordinates[curr_idx],
774 3 * sizeof(double));
775 prev_idx = curr_idx;
776 ++j;
777 }
778 ranks_idxs[i] = j-1;
779 }
780
782 proc_sphere_part, search_coords, num_unique_idxs, search_ranks);
783 free(search_coords);
784
786 cve_reorder, total_num_missing, ranks_idxs);
787
788 offset = 0;
789 for (size_t i = 0; i < num_missing_vertices; ++i, ++offset)
790 vertex_ranks[i] = search_ranks[ranks_idxs[offset]];
791 for (size_t i = 0; i < num_missing_edges; ++i, ++offset)
792 edge_ranks[i] = search_ranks[ranks_idxs[offset]];
793 free(search_ranks);
794
796 vertex_ranks, num_missing_vertices, vertex_reorder);
798 edge_ranks, num_missing_edges, edge_reorder);
799
800 int * sendcounts, * recvcounts, * sdispls, * rdispls;
802 2, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
803 int * int_buffer =
804 xmalloc(4 * (size_t)comm_size * sizeof(*int_buffer));
805 int * total_sendcounts = int_buffer + 0 * comm_size;
806 int * total_recvcounts = int_buffer + 1 * comm_size;
807 int * total_sdispls = int_buffer + 2 * comm_size;
808 int * total_rdispls = int_buffer + 3 * comm_size;
809
810 for (size_t i = 0; i < num_missing_vertices; ++i)
811 sendcounts[2 * vertex_ranks[i] + 0]++;
812 for (size_t i = 0; i < num_missing_edges; ++i)
813 sendcounts[2 * edge_ranks[i] + 1]++;
814
816 2, sendcounts, recvcounts, sdispls, rdispls, comm);
817
818 size_t num_requested_ids[2] = {0,0};
819 for (int i = 0, saccu = 0, raccu = 0; i < comm_size; ++i) {
820 total_sdispls[i] = saccu;
821 total_rdispls[i] = raccu;
822 total_sendcounts[i] = 0;
823 total_recvcounts[i] = 0;
824 for (int j = 0; j < 2; ++j) {
825 total_sendcounts[i] += sendcounts[2 * i + j];
826 total_recvcounts[i] += recvcounts[2 * i + j];
827 num_requested_ids[j] += (size_t)(recvcounts[2 * i + j]);
828 }
829 saccu += total_sendcounts[i];
830 raccu += total_recvcounts[i];
831 }
832 size_t request_count = (size_t)(total_recvcounts[comm_size - 1]) +
833 (size_t)(total_rdispls[comm_size - 1]);
834
835 yac_int * exchange_id_buffer =
836 xmalloc(
837 (total_num_missing + 2 * request_count) * sizeof(*exchange_id_buffer));
838 yac_int * id_send_buffer = exchange_id_buffer + request_count;
839 yac_int * id_recv_buffer = exchange_id_buffer + request_count + total_num_missing;
840
841 // pack the ids for which the local process is missing the owner information
842 for (size_t i = 0; i < num_missing_vertices; ++i) {
843 int pos = sdispls[2 * vertex_ranks[i] + 0 + 1]++;
844 id_send_buffer[pos] = dist_grid->vertex_ids[vertex_reorder[i]];
845 }
846 for (size_t i = 0; i < num_missing_edges; ++i) {
847 int pos = sdispls[2 * edge_ranks[i] + 1 + 1]++;
848 id_send_buffer[pos] = dist_grid->edge_ids[edge_reorder[i]];
849 }
850
851 // exchange the ids for which the local process is missing the owner information
853 id_send_buffer, total_sendcounts, total_sdispls,
854 id_recv_buffer, total_recvcounts, total_rdispls, comm);
855
856 yac_int * requested_ids[2] =
857 {exchange_id_buffer, exchange_id_buffer + num_requested_ids[0]};
858
859 // unpack data
860 {
861 size_t ids_offset[2] = {0, 0}, offset = 0;
862 for (int i = 0; i < comm_size; ++i) {
863 for (int j = 0; j < 2; ++j) {
864 size_t curr_count = recvcounts[2 * i + j];
865 memcpy(requested_ids[j] + ids_offset[j], id_recv_buffer + offset,
866 curr_count * sizeof(*(requested_ids[0])));
867 ids_offset[j] += curr_count;
868 offset += curr_count;
869 }
870 }
871 }
872
873 struct remote_point_infos ** remote_point_infos_buffer =
874 xmalloc(request_count * sizeof(*remote_point_infos_buffer));
875 struct remote_point_infos ** found_points[2] =
876 {remote_point_infos_buffer,
877 remote_point_infos_buffer + num_requested_ids[0]};
878 struct remote_points * local_remote_points[2] =
879 {dist_vertex_owner, dist_edge_owner};
880
881 size_t max_num_requested_ids = MAX(num_requested_ids[0], num_requested_ids[1]);
882 size_t * reorder_buffer = xmalloc(max_num_requested_ids * sizeof(*reorder_buffer));
883 // lookup owners for requested ids
884 for (int i = 0; i < 2; ++i)
886 requested_ids[i], found_points[i], reorder_buffer, num_requested_ids[i],
887 local_remote_points[i]);
888 free(reorder_buffer);
889 free(exchange_id_buffer);
890
891 int * pack_size_buffer =
892 xmalloc((total_num_missing + request_count) * sizeof(*pack_size_buffer));
893 int * send_pack_sizes = pack_size_buffer;
894 int * recv_pack_sizes = pack_size_buffer + request_count;
895 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
896
897 {
898 size_t offset = 0, offsets[2] = {0, 0};
899 for (int i = 0; i < comm_size; ++i) {
900 for (int j = 0; j < 2; ++j) {
901 size_t curr_count = (size_t)(recvcounts[2 * i + j]);
902 // get pack sizes
904 found_points[j] + offsets[j], curr_count,
905 send_pack_sizes + offset, point_info_dt, comm);
906 offsets[j] += curr_count;
907 offset += curr_count;
908 }
909 }
910 }
911
912 // exchange pack sizes
914 send_pack_sizes, total_recvcounts, total_rdispls,
915 recv_pack_sizes, total_sendcounts, total_sdispls, comm);
916
917 size_t send_buffer_size = 0, recv_buffer_size = 0;
918 for (size_t i = 0; i < request_count; ++i)
919 send_buffer_size += (size_t)(send_pack_sizes[i]);
920 for (size_t i = 0; i < total_num_missing; ++i)
921 recv_buffer_size += (size_t)(recv_pack_sizes[i]);
922
923 void * result_buffer = xmalloc(send_buffer_size + recv_buffer_size);
924 void * send_result_buffer = result_buffer;
925 void * recv_result_buffer = (unsigned char *)result_buffer + send_buffer_size;
926
927 // pack data
928 {
929 size_t send_buffer_size = 0, offset = 0, offsets[2] = {0,0};
930 for (int i = 0; i < comm_size; ++i) {
931 for (int j = 0; j < 2; ++j) {
932 size_t curr_count = (size_t)(recvcounts[2 * i + j]);
933 // get pack sizes
935 found_points[j] + offsets[j],
936 send_pack_sizes + offset, curr_count,
937 (void*)((unsigned char*)send_result_buffer + send_buffer_size),
938 point_info_dt, comm);
939 for (size_t k = 0; k < curr_count; ++k)
940 send_buffer_size += send_pack_sizes[offset + k];
941 offsets[j] += curr_count;
942 offset += curr_count;
943 }
944 }
945 }
946
947 {
948 size_t send_offset = 0, recv_offset = 0;
949 size_t saccu = 0, raccu = 0;
950 for (int i = 0; i < comm_size; ++i) {
951 total_sdispls[i] = (int)saccu;
952 total_rdispls[i] = (int)raccu;
953 int curr_sendcount = 0, curr_recvcount = 0;
954 for (int k = 0; k < 2; ++k) {
955 for (int j = 0; j < recvcounts[2 * i + k]; ++j, ++send_offset)
956 curr_sendcount += (size_t)(send_pack_sizes[send_offset]);
957 for (int j = 0; j < sendcounts[2 * i + k]; ++j, ++recv_offset)
958 curr_recvcount += (size_t)(recv_pack_sizes[recv_offset]);
959 }
960 total_sendcounts[i] = (int)curr_sendcount;
961 total_recvcounts[i] = (int)curr_recvcount;
962
964 (saccu <= INT_MAX) && (raccu <= INT_MAX) &&
965 (curr_sendcount <= INT_MAX) && (curr_recvcount <= INT_MAX),
966 "ERROR(generate_dist_grid_remote_point_infos): "
967 "int argument to MPI is bigger than INT_MAX")
968
969 saccu += curr_sendcount;
970 raccu += curr_recvcount;
971 }
972 }
973
974 // exchange results
976 send_result_buffer, total_sendcounts, total_sdispls,
977 recv_result_buffer, total_recvcounts, total_rdispls, 1, MPI_PACKED, comm);
978
979 // unpack results
980 {
981 size_t counts[2] = {0, 0}, count = 0, offset = 0;
984 size_t * reorder[2] = {vertex_reorder, edge_reorder};
985 for (int i = 0; i < comm_size; ++i) {
986 for (int j = 0; j < 2; ++j) {
987 size_t curr_count = sendcounts[2 * i + j];
988 for (size_t k = 0; k < curr_count; ++k, ++counts[j], ++count) {
989 int curr_buffer_size =
990 recv_pack_sizes[(size_t)(sdispls[2 * i + j]) + k];
991 int position = 0;
993 (void*)((unsigned char*)recv_result_buffer + offset),
994 curr_buffer_size, &position,
995 remote_points[j] + reorder[j][counts[j]], point_info_dt, comm);
996 offset += (size_t)(curr_buffer_size);
997 }
998 }
999 }
1000 }
1001
1002 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
1003
1004 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1005 free(int_buffer);
1006 free(result_buffer);
1007 free(pack_size_buffer);
1008 free(remote_point_infos_buffer);
1009 free(size_t_buffer);
1010 free(ranks_buffer);
1011}
1012
1013static int compare_remote_point(const void * a, const void * b) {
1014
1015 return ((const struct remote_point*)a)->global_id -
1016 ((const struct remote_point*)b)->global_id;
1017}
1018
1019// inserts an element into an array and increases the corresponding size
1020// if the element already exists in the array nothing is done
1021static void insert_global_id(yac_int * ids, size_t n, yac_int id) {
1022
1023 size_t i;
1024 for (i = 0; i < n; ++i) if (ids[i] >= id) break;
1025 // copy new id into array and move bigger elements one position up
1026 if (n != i) memmove(ids + i + 1, ids + i, (n - i) * sizeof(*ids));
1027 ids[i] = id;
1028}
1029
1030// inserts an element into an array and increases the corresponding size
1031// if the element already exists in the array nothing is done
1032static void insert_rank(int * ranks, int * count, int rank) {
1033
1034 int i;
1035 int n = *count;
1036
1037 for (i = 0; i < n; ++i) if (ranks[i] >= rank) break;
1038
1039 // if the rank is already in the array
1040 if (i != n) {
1041 if (ranks[i] == rank) return;
1042 else memmove(ranks + i + 1, ranks + i, ((size_t)(n - i)) * sizeof(*ranks));
1043 }
1044 ranks[i] = rank;
1045 *count = n + 1;
1046}
1047
1049 const void * a, const void * b) {
1050
1051 int count_a = ((struct n_ids_reorder const *)a)->count;
1052 int count_b = ((struct n_ids_reorder const *)b)->count;
1053 yac_int * a_ids = ((struct n_ids_reorder const *)a)->ids;
1054 yac_int * b_ids = ((struct n_ids_reorder const *)b)->ids;
1055 int ret = count_a - count_b;
1056 for (int i = 0; !ret && (i < count_a); ++i)
1057 ret = (a_ids[i] > b_ids[i]) - (a_ids[i] < b_ids[i]);
1058 return ret;
1059}
1060
1062 const void * a, const void * b) {
1063
1064 return (((struct n_ids_reorder const *)a)->reorder_idx >
1065 ((struct n_ids_reorder const *)b)->reorder_idx) -
1066 (((struct n_ids_reorder const *)a)->reorder_idx <
1067 ((struct n_ids_reorder const *)b)->reorder_idx);
1068}
1069
1071 struct proc_sphere_part_node * proc_sphere_part,
1072 struct basic_grid_data * grid, MPI_Comm comm, int ** dist_cell_ranks,
1073 int ** dist_cell_rank_counts, size_t ** dist_cell_rank_offsets) {
1074
1075 int comm_size;
1076 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1077
1078 size_t num_cells = grid->num_cells;
1079 int * ranks_buffer = xmalloc((size_t)comm_size * sizeof(*ranks_buffer));
1080 size_t dist_cell_ranks_array_size = num_cells;
1081 int * dist_cell_ranks_ = xmalloc(num_cells * sizeof(*dist_cell_ranks_));
1082 int * dist_cell_rank_counts_ =
1083 (*dist_cell_rank_counts =
1084 xmalloc(num_cells * sizeof(*dist_cell_rank_counts_)));
1085 size_t * dist_cell_rank_offsets_ =
1086 (*dist_cell_rank_offsets =
1087 xmalloc(num_cells * sizeof(*dist_cell_rank_offsets_)));
1088 size_t offset = 0;
1089
1090 // determine maximum number of vertices per cell
1091 int max_num_vertices_per_cell = 0;
1092 for (size_t i = 0; i < num_cells; ++i)
1093 if (grid->num_vertices_per_cell[i] > max_num_vertices_per_cell)
1094 max_num_vertices_per_cell = grid->num_vertices_per_cell[i];
1095
1096 struct grid_cell cell;
1097 cell.coordinates_xyz =
1098 xmalloc(
1099 (size_t)max_num_vertices_per_cell * sizeof(*(cell.coordinates_xyz)));
1100 cell.edge_type =
1101 xmalloc((size_t)max_num_vertices_per_cell * sizeof(*(cell.edge_type)));
1102
1103 size_t * cell_to_vertex = grid->cell_to_vertex;
1104 size_t * cell_to_vertex_offsets = grid->cell_to_vertex_offsets;
1105 size_t * cell_to_edge = grid->cell_to_edge;
1106 size_t * cell_to_edge_offsets = grid->cell_to_edge_offsets;
1107 coordinate_pointer vertex_coordinates = grid->vertex_coordinates;
1108 enum yac_edge_type * edge_type = grid->edge_type;
1109 int * num_vertices_per_cell = grid->num_vertices_per_cell;
1110
1111 for (size_t i = 0; i < num_cells; ++i) {
1112
1113 // get bounding circle for current cell
1114 cell.num_corners = num_vertices_per_cell[i];
1115 size_t * curr_cell_to_vertex =
1116 cell_to_vertex + cell_to_vertex_offsets[i];
1117 size_t * curr_cell_to_edge =
1118 cell_to_edge + cell_to_edge_offsets[i];
1119 for (int j = 0; j < num_vertices_per_cell[i]; ++j) {
1120 coordinate_pointer curr_vertex_coords =
1121 vertex_coordinates + curr_cell_to_vertex[j];
1122 for (int k = 0; k < 3; ++k)
1123 cell.coordinates_xyz[j][k] = (*curr_vertex_coords)[k];
1124 cell.edge_type[j] = edge_type[curr_cell_to_edge[j]];
1125 }
1126 int rank_count;
1127 if (cell.num_corners > 0) {
1128
1129 struct bounding_circle bnd_circle;
1130 yac_get_cell_bounding_circle(cell, &bnd_circle);
1131
1133 proc_sphere_part, bnd_circle, ranks_buffer, &rank_count);
1134 } else {
1135 ranks_buffer[0] = 0;
1136 rank_count = 1;
1137 }
1138
1139 ENSURE_ARRAY_SIZE(dist_cell_ranks_, dist_cell_ranks_array_size,
1140 offset + (size_t)rank_count);
1141 memcpy(dist_cell_ranks_ + offset, ranks_buffer,
1142 (size_t)rank_count * sizeof(*ranks_buffer));
1143 dist_cell_rank_counts_[i] = rank_count;
1144 dist_cell_rank_offsets_[i] = offset;
1145 offset += (size_t)rank_count;
1146 }
1147
1148 free(cell.edge_type);
1149 free(cell.coordinates_xyz);
1150 free(ranks_buffer);
1151
1152 *dist_cell_ranks =
1153 xrealloc(dist_cell_ranks_, offset * sizeof(*dist_cell_ranks_));
1154}
1155
1156static MPI_Datatype yac_get_id_reorder_coord_coord_mpi_datatype(MPI_Comm comm) {
1157
1158 struct id_reorder_coord dummy;
1159 MPI_Datatype coord_dt;
1160 int array_of_blocklengths[] = {3};
1161 const MPI_Aint array_of_displacements[] =
1162 {(MPI_Aint)((void*)&(dummy.coord) - (void*)&dummy)};
1163 const MPI_Datatype array_of_types[] = {MPI_DOUBLE};
1165 MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements,
1166 array_of_types, &coord_dt), comm);
1167 return yac_create_resized(coord_dt, sizeof(dummy), comm);
1168}
1169
1170static MPI_Datatype yac_get_id_reorder_coord_id_mpi_datatype(MPI_Comm comm) {
1171
1172 struct id_reorder_coord dummy;
1173 MPI_Datatype global_id_dt;
1174 int array_of_blocklengths[] = {1};
1175 const MPI_Aint array_of_displacements[] =
1176 {(MPI_Aint)((void*)&(dummy.global_id) - (void*)&dummy)};
1177 const MPI_Datatype array_of_types[] = {Xt_int_dt};
1179 MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements,
1180 array_of_types, &global_id_dt), comm);
1181 return yac_create_resized(global_id_dt, sizeof(dummy), comm);
1182}
1183
1185 const void * a, const void * b) {
1186
1187 return compare_coords(((const struct id_reorder_coord *)a)->coord,
1188 ((const struct id_reorder_coord *)b)->coord);
1189}
1190
1192 const void * a, const void * b) {
1193
1194 return (((const struct id_reorder_coord *)a)->reorder_idx >
1195 ((const struct id_reorder_coord *)b)->reorder_idx) -
1196 (((const struct id_reorder_coord *)a)->reorder_idx <
1197 ((const struct id_reorder_coord *)b)->reorder_idx);
1198}
1199
1201 struct proc_sphere_part_node * proc_sphere_part,
1202 struct basic_grid_data * grid, MPI_Comm comm) {
1203
1204 int * vertex_ranks = xmalloc(grid->num_vertices * sizeof(*vertex_ranks));
1205
1206 // determine the dist grid ranks for all vertices
1208 proc_sphere_part, grid->vertex_coordinates, grid->num_vertices,
1209 vertex_ranks);
1210
1211 // check whether only of subset of the processes have defined
1212 // their global ids, which is not supported
1213 int ids_available_local =
1214 (grid->num_vertices > 0) && (grid->vertex_ids != NULL);
1215 int ids_available_global;
1216 yac_mpi_call(MPI_Allreduce(&ids_available_local, &ids_available_global, 1,
1217 MPI_INT, MPI_MAX, comm), comm);
1218
1219 // if there are vertices defined locally for the current grid
1220 if (grid->num_vertices != 0) {
1221 YAC_ASSERT(
1222 ids_available_local || !ids_available_global,
1223 "ERROR(generate_dist_remote_points_vertex): inconsistent global ids")
1224
1225 // generate core mask if not available
1226 if (grid->core_vertex_mask == NULL) {
1227 int * core_mask =
1228 (grid->core_vertex_mask =
1229 xmalloc(grid->num_vertices * sizeof(*core_mask)));
1230 for (size_t j = 0; j < grid->num_vertices; ++j) core_mask[j] = 1;
1231 }
1232 }
1233
1234 // if we do not need to generate the global ids
1235 if (ids_available_global) return vertex_ranks;
1236
1237 int comm_size;
1238 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1239
1240 int * sendcounts, * recvcounts, * sdispls, * rdispls;
1242 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1243
1244 for (size_t i = 0; i < grid->num_vertices; ++i)
1245 sendcounts[vertex_ranks[i]]++;
1246
1248 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1249
1250 size_t orig_vertex_count = grid->num_vertices;
1251 size_t dist_vertex_count = (size_t)(recvcounts[comm_size - 1]) +
1252 (size_t)(rdispls[comm_size - 1]);
1253
1254 struct id_reorder_coord * id_reorder_coord_buffer =
1255 xmalloc((orig_vertex_count + dist_vertex_count) *
1256 sizeof(*id_reorder_coord_buffer));
1257 struct id_reorder_coord *
1258 id_reorder_coord_send_buffer = id_reorder_coord_buffer;
1259 struct id_reorder_coord *
1260 id_reorder_coord_recv_buffer = id_reorder_coord_buffer + orig_vertex_count;
1261
1262 // pack send buffer
1263 for (size_t i = 0; i < orig_vertex_count; ++i) {
1264 int pos = sdispls[vertex_ranks[i] + 1]++;
1265 id_reorder_coord_send_buffer[pos].reorder_idx = i;
1266 for (int j = 0; j < 3; ++j)
1267 id_reorder_coord_send_buffer[pos].coord[j] =
1268 grid->vertex_coordinates[i][j];
1269 }
1270
1271 MPI_Datatype id_reorder_coord_coord_dt =
1273
1274 // exchange data
1276 id_reorder_coord_send_buffer, sendcounts, sdispls,
1277 id_reorder_coord_recv_buffer, recvcounts, rdispls,
1278 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_coord_dt, comm);
1279
1280 yac_mpi_call(MPI_Type_free(&id_reorder_coord_coord_dt), comm);
1281
1282 for (size_t i = 0; i < dist_vertex_count; ++i)
1283 id_reorder_coord_recv_buffer[i].reorder_idx = i;
1284
1285 // sort received vertices based on coordinates
1286 qsort(id_reorder_coord_recv_buffer, dist_vertex_count,
1287 sizeof(*id_reorder_coord_recv_buffer), compare_id_reorder_coord_coord);
1288
1289 size_t unique_count = dist_vertex_count > 0;
1290 struct id_reorder_coord * prev = id_reorder_coord_recv_buffer;
1291
1292 // determine number of unique coordinates
1293 for (size_t i = 0; i < dist_vertex_count; ++i) {
1294 struct id_reorder_coord * curr = id_reorder_coord_recv_buffer + i;
1295 if (compare_id_reorder_coord_coord(prev, curr)) {
1296 ++unique_count;
1297 prev = curr;
1298 }
1299 curr->global_id = (yac_int)unique_count - 1;
1300 }
1301
1302 YAC_ASSERT(
1303 unique_count <= (size_t)XT_INT_MAX,
1304 "ERROR(generate_vertex_ids): global_id out of bounds")
1305
1306 yac_int yac_int_unique_count = (yac_int)unique_count;
1307 yac_int id_offset;
1308
1309 // determine exclusive scan of sum of numbers of unique
1310 // coordinates on all ranks
1311 yac_mpi_call(MPI_Exscan(&yac_int_unique_count, &id_offset, 1, Xt_int_dt,
1312 MPI_SUM, comm), comm);
1313 int comm_rank;
1314 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
1315 if (comm_rank == 0) id_offset = 0;
1316
1317 YAC_ASSERT(
1318 ((size_t)id_offset + unique_count) <= (size_t)XT_INT_MAX,
1319 "ERROR(generate_vertex_ids): global_id out of bounds")
1320
1321 // adjust global ids
1322 for (size_t i = 0; i < dist_vertex_count; ++i)
1323 id_reorder_coord_recv_buffer[i].global_id += id_offset;
1324
1325 // return received vertices into original order
1326 qsort(id_reorder_coord_recv_buffer, dist_vertex_count,
1327 sizeof(*id_reorder_coord_recv_buffer),
1329
1330 MPI_Datatype id_reorder_coord_id_dt =
1332
1333 // return generated global ids data
1335 id_reorder_coord_recv_buffer, recvcounts, rdispls,
1336 id_reorder_coord_send_buffer, sendcounts, sdispls,
1337 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_id_dt, comm);
1338
1339 yac_mpi_call(MPI_Type_free(&id_reorder_coord_id_dt), comm);
1340
1341 yac_int * vertex_ids =
1342 ((grid->vertex_ids =
1343 (grid->num_vertices > 0)?
1344 xmalloc(grid->num_vertices * sizeof(*(grid->vertex_ids))):NULL));
1345
1346 for (size_t i = 0; i < grid->num_vertices; ++i)
1347 vertex_ids[id_reorder_coord_send_buffer[i].reorder_idx] =
1348 id_reorder_coord_send_buffer[i].global_id;
1349
1350 free(id_reorder_coord_buffer);
1351 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
1352
1353 return vertex_ranks;
1354}
1355
1357 struct proc_sphere_part_node * proc_sphere_part,
1358 struct basic_grid_data * grid, int * vertex_ranks, MPI_Comm comm) {
1359
1360 int comm_rank, comm_size;
1361 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
1362 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1363
1364 // check whether only of subset of the processes have defined
1365 // their global ids, which is not supported
1366 int ids_available_local[2], ids_available_global[2];
1367 ids_available_local[0] = (grid->num_cells > 0) && (grid->cell_ids != NULL);
1368 ids_available_local[1] = (grid->num_edges > 0) && (grid->edge_ids != NULL);
1369 yac_mpi_call(MPI_Allreduce(ids_available_local, ids_available_global, 2,
1370 MPI_INT, MPI_MAX, comm), comm);
1371
1372 if (grid->num_cells > 0) {
1373 YAC_ASSERT(
1374 ids_available_local[0] == ids_available_global[0],
1375 "ERROR(generate_ce_ids): inconsistent global ids")
1376 // generate core mask if not available
1377 if (grid->core_cell_mask == NULL) {
1378 int * core_cell_mask =
1379 (grid->core_cell_mask =
1380 xmalloc(grid->num_cells * sizeof(*core_cell_mask)));
1381 for (size_t j = 0; j < grid->num_cells; ++j) core_cell_mask[j] = 1;
1382 }
1383 }
1384 if (grid->num_edges > 0) {
1385 YAC_ASSERT(
1386 ids_available_local[1] == ids_available_global[1],
1387 "ERROR(generate_ce_ids): inconsistent global ids")
1388 // generate core mask if not available
1389 if (grid->core_edge_mask == NULL) {
1390 int * core_edge_mask =
1391 (grid->core_edge_mask =
1392 xmalloc(grid->num_edges * sizeof(*core_edge_mask)));
1393 for (size_t j = 0; j < grid->num_edges; ++j) core_edge_mask[j] = 1;
1394 }
1395 }
1396
1397 // if no ids need to be generated
1398 if (ids_available_global[0] && ids_available_global[1]) return;
1399
1400 int * rank_buffer =
1401 xmalloc(
1402 (((ids_available_global[0])?0:(grid->num_cells)) +
1403 ((ids_available_global[1])?0:(grid->num_edges))) * sizeof(*rank_buffer));
1404 int * cell_ranks = rank_buffer;
1405 int * edge_ranks =
1406 rank_buffer + ((ids_available_global[0])?0:(grid->num_cells));
1407
1408 int * int_buffer =
1409 xmalloc((8 * (size_t)comm_size + 1) * sizeof(*int_buffer));
1410 int * sendcounts = int_buffer + 0 * comm_size;
1411 int * recvcounts = int_buffer + 2 * comm_size;
1412 int * total_sendcounts = int_buffer + 4 * comm_size;
1413 int * total_recvcounts = int_buffer + 5 * comm_size;
1414 int * total_sdispls = int_buffer + 6 * comm_size;
1415 int * total_rdispls = int_buffer + 7 * comm_size + 1;
1416 memset(sendcounts, 0, 2 * (size_t)comm_size * sizeof(*sendcounts));
1417
1418 yac_int * cell_to_vertex_ids = NULL;
1419 int max_num_vertices_per_cell = 0;
1420
1421 if (!ids_available_global[0]) {
1422
1423 { // compute the global maximum number of vertices per cell
1424 for (size_t i = 0; i < grid->num_cells; ++i)
1425 if (grid->num_vertices_per_cell[i] > max_num_vertices_per_cell)
1426 max_num_vertices_per_cell = grid->num_vertices_per_cell[i];
1427 yac_mpi_call(MPI_Allreduce(MPI_IN_PLACE, &max_num_vertices_per_cell, 1,
1428 MPI_INT, MPI_MAX, comm), comm);
1429 }
1430
1431 cell_to_vertex_ids =
1432 xmalloc(grid->num_cells * max_num_vertices_per_cell *
1433 sizeof(*cell_to_vertex_ids));
1434
1435 for (size_t i = 0; i < grid->num_cells; ++i) {
1436
1437 size_t curr_num_vertices = grid->num_vertices_per_cell[i];
1438 yac_int * curr_cell_to_vertex_ids =
1439 cell_to_vertex_ids + i * max_num_vertices_per_cell;
1440
1441 int cell_rank;
1442 if (curr_num_vertices > 0) {
1443 size_t * curr_cell_vertices =
1444 grid->cell_to_vertex + grid->cell_to_vertex_offsets[i];
1445 size_t min_vertex = curr_cell_vertices[0];
1446 curr_cell_to_vertex_ids[0] = grid->vertex_ids[min_vertex];
1447 for (size_t j = 1; j < curr_num_vertices; ++j) {
1448 size_t curr_vertex_idx = curr_cell_vertices[j];
1449 yac_int curr_vertex_id = grid->vertex_ids[curr_vertex_idx];
1450 insert_global_id(curr_cell_to_vertex_ids, j, curr_vertex_id);
1451 if (curr_cell_to_vertex_ids[0] == curr_vertex_id)
1452 min_vertex = curr_vertex_idx;
1453 }
1454 cell_rank = vertex_ranks[min_vertex];
1455 } else {
1456 cell_rank = 0;
1457 }
1458 for (size_t j = curr_num_vertices; j < max_num_vertices_per_cell; ++j)
1459 curr_cell_to_vertex_ids[j] = XT_INT_MAX;
1460
1461 sendcounts[2 * ((cell_ranks[i] = cell_rank)) + 0]++;
1462 }
1463 }
1464
1465 yac_int * edge_to_vertex_ids = NULL;
1466
1467 if (!ids_available_global[1]) {
1468
1469 edge_to_vertex_ids =
1470 xmalloc(2 * grid->num_edges * sizeof(*edge_to_vertex_ids));
1471
1472 for (size_t i = 0; i < grid->num_edges; ++i) {
1473
1474 size_t * curr_edge_to_vertex = &(grid->edge_to_vertex[i][0]);
1475 yac_int * curr_edge_vertex_ids = edge_to_vertex_ids + 2 * i;
1476 curr_edge_vertex_ids[0] = grid->vertex_ids[curr_edge_to_vertex[0]];
1477 curr_edge_vertex_ids[1] = grid->vertex_ids[curr_edge_to_vertex[1]];
1478
1479 if (curr_edge_vertex_ids[0] > curr_edge_vertex_ids[1]) {
1480 yac_int temp = curr_edge_vertex_ids[0];
1481 curr_edge_vertex_ids[0] = curr_edge_vertex_ids[1];
1482 curr_edge_vertex_ids[1] = temp;
1483 sendcounts[
1484 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[1]])) + 1]++;
1485 } else {
1486 sendcounts[
1487 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[0]])) + 1]++;
1488 }
1489 }
1490 }
1491
1492 // exchange the number of cells and edges
1493 yac_mpi_call(MPI_Alltoall(sendcounts, 2, MPI_INT,
1494 recvcounts, 2, MPI_INT, comm), comm);
1495
1496 total_sdispls[0] = 0;
1497 size_t recv_counts[2] = {0,0};
1498 for (int i = 0, saccu = 0, raccu = 0; i < comm_size; ++i) {
1499 total_sdispls[i+1] = saccu;
1500 total_rdispls[i] = raccu;
1501 recv_counts[0] += recvcounts[2 * i + 0];
1502 recv_counts[1] += recvcounts[2 * i + 1];
1503 total_sendcounts[i] = sendcounts[2 * i + 0] * max_num_vertices_per_cell +
1504 sendcounts[2 * i + 1] * 2;
1505 total_recvcounts[i] = recvcounts[2 * i + 0] * max_num_vertices_per_cell +
1506 recvcounts[2 * i + 1] * 2;
1507 saccu += total_sendcounts[i];
1508 raccu += total_recvcounts[i];
1509 }
1510 size_t local_data_count = (size_t)(total_sendcounts[comm_size - 1]) +
1511 (size_t)(total_sdispls[comm_size]);
1512 size_t recv_count = (size_t)(total_recvcounts[comm_size - 1]) +
1513 (size_t)(total_rdispls[comm_size - 1]);
1514
1515 yac_int * yac_int_buffer =
1516 xmalloc((local_data_count + recv_count) * sizeof(*yac_int_buffer));
1517 yac_int * send_buffer = yac_int_buffer;
1518 yac_int * recv_buffer = yac_int_buffer + local_data_count;
1519
1520 // pack send buffer
1521 if (!ids_available_global[0])
1522 for (size_t i = 0; i < grid->num_cells; ++i)
1523 for (int j = 0; j < max_num_vertices_per_cell; ++j)
1524 send_buffer[total_sdispls[cell_ranks[i] + 1]++] =
1525 cell_to_vertex_ids[i * max_num_vertices_per_cell + j];
1526 if (!ids_available_global[1])
1527 for (size_t i = 0; i < grid->num_edges; ++i)
1528 for (int j = 0; j < 2; ++j)
1529 send_buffer[total_sdispls[edge_ranks[i] + 1]++] =
1530 edge_to_vertex_ids[2 * i + j];
1531
1532 free(edge_to_vertex_ids);
1533 free(cell_to_vertex_ids);
1534
1535 // exchange data
1537 send_buffer, total_sendcounts, total_sdispls,
1538 recv_buffer, total_recvcounts, total_rdispls,
1539 sizeof(*send_buffer), Xt_int_dt, comm);
1540
1541 struct n_ids_reorder * n_ids_reorder_buffer =
1542 xmalloc((recv_counts[0] + recv_counts[1]) * sizeof(*n_ids_reorder_buffer));
1543 struct n_ids_reorder * n_ids_reorder[2] =
1544 {n_ids_reorder_buffer, n_ids_reorder_buffer + recv_counts[0]};
1545
1546 size_t offset = 0;
1547 int index_counts[2] = {max_num_vertices_per_cell, 2};
1548 size_t reorder_idx = 0;
1549 recv_counts[0] = 0;
1550 recv_counts[1] = 0;
1551 for (int i = 0; i < comm_size; ++i) {
1552 for (int j = 0; j < 2; ++j) {
1553 int curr_count = recvcounts[2 * i + j];
1554 for (int k = 0; k < curr_count; ++k, ++reorder_idx, ++recv_counts[j]) {
1555 n_ids_reorder[j][recv_counts[j]].count = index_counts[j];
1556 n_ids_reorder[j][recv_counts[j]].ids = recv_buffer + offset;
1557 n_ids_reorder[j][recv_counts[j]].reorder_idx = reorder_idx;
1558 offset += index_counts[j];
1559 }
1560 }
1561 }
1562
1563 for (int i = 0; i < 2; ++i) {
1564
1565 if (ids_available_global[i]) continue;
1566
1567 qsort(n_ids_reorder[i], recv_counts[i], sizeof(*(n_ids_reorder[i])),
1569
1570 size_t unique_count = recv_counts[i] > 0;
1571 struct n_ids_reorder * prev = n_ids_reorder[i];
1572 struct n_ids_reorder * curr = n_ids_reorder[i];
1573
1574 for (size_t j = 0; j < recv_counts[i]; ++j, ++curr) {
1575 if (compare_n_ids_reorder_ids(prev, curr)) {
1576 ++unique_count;
1577 prev = curr;
1578 }
1579 curr->global_id = (yac_int)(unique_count - 1);
1580 }
1581
1582 YAC_ASSERT(
1583 unique_count <= (size_t)XT_INT_MAX,
1584 "ERROR(generate_global_ce_ids): global_id out of bounds")
1585
1586 yac_int yac_int_unique_count = (yac_int)unique_count;
1587 yac_int id_offset;
1588
1589 // determine exclusive scan of sum of numbers of unique ids on all ranks
1590 yac_mpi_call(MPI_Exscan(&yac_int_unique_count, &id_offset, 1, Xt_int_dt,
1591 MPI_SUM, comm), comm);
1592 if (comm_rank == 0) id_offset = 0;
1593
1594 YAC_ASSERT(
1595 ((size_t)id_offset + unique_count) <= (size_t)XT_INT_MAX,
1596 "ERROR(generate_global_ce_ids): global_id out of bounds")
1597
1598 // adjust global ids
1599 for (size_t j = 0; j < recv_counts[i]; ++j)
1600 n_ids_reorder[i][j].global_id += id_offset;
1601 }
1602 free(yac_int_buffer);
1603
1604 qsort(n_ids_reorder_buffer, recv_counts[0] + recv_counts[1],
1605 sizeof(*n_ids_reorder_buffer), compare_n_ids_reorder_reorder);
1606
1607 yac_int * global_ids_buffer =
1608 xmalloc((recv_counts[0] + recv_counts[1] +
1609 ((ids_available_global[0])?0:(grid->num_cells)) +
1610 ((ids_available_global[1])?0:(grid->num_edges))) *
1611 sizeof(*global_ids_buffer));
1612 yac_int * send_global_ids = global_ids_buffer;
1613 yac_int * recv_global_ids =
1614 global_ids_buffer + recv_counts[0] + recv_counts[1];
1615
1616 for (size_t i = 0; i < recv_counts[0] + recv_counts[1]; ++i)
1617 send_global_ids[i] = n_ids_reorder_buffer[i].global_id;
1618 free(n_ids_reorder_buffer);
1619
1620 // generate count and displs data
1621 for (int i = 0, saccu = 0, raccu = 0; i < comm_size; ++i) {
1622 total_sdispls[i] = saccu;
1623 total_rdispls[i] = raccu;
1624 saccu +=
1625 ((total_sendcounts[i] = recvcounts[2 * i + 0] + recvcounts[2 * i + 1]));
1626 raccu +=
1627 ((total_recvcounts[i] = sendcounts[2 * i + 0] + sendcounts[2 * i + 1]));
1628 }
1629
1630 // exchange generated global ids data
1632 send_global_ids, total_sendcounts, total_sdispls,
1633 recv_global_ids, total_recvcounts, total_rdispls,
1634 sizeof(*send_global_ids), Xt_int_dt, comm);
1635
1636 if ((!ids_available_global[0]) && (grid->num_cells > 0))
1637 grid->cell_ids = xmalloc(grid->num_cells * sizeof(*grid->cell_ids));
1638 if ((!ids_available_global[1]) && (grid->num_edges > 0))
1639 grid->edge_ids = xmalloc(grid->num_edges * sizeof(grid->edge_ids));
1640
1641 // unpack generated global ids
1642 if (!ids_available_global[0])
1643 for (size_t i = 0; i < grid->num_cells; ++i)
1644 grid->cell_ids[i] = recv_global_ids[total_rdispls[cell_ranks[i]]++];
1645 if (!ids_available_global[1])
1646 for (size_t i = 0; i < grid->num_edges; ++i)
1647 grid->edge_ids[i] = recv_global_ids[total_rdispls[edge_ranks[i]]++];
1648
1649 free(rank_buffer);
1650 free(int_buffer);
1651 free(global_ids_buffer);
1652}
1653
1655 struct proc_sphere_part_node * proc_sphere_part,
1656 struct basic_grid_data * grid, MPI_Comm comm) {
1657
1658 // generate global ids for all vertices (if non-existent)
1659 int * vertex_ranks = generate_vertex_ids(proc_sphere_part, grid, comm);
1660
1661 // generate global ids for all cell and edge (if non-existent)
1662 generate_ce_ids(proc_sphere_part, grid, vertex_ranks, comm);
1663
1664 free(vertex_ranks);
1665}
1666
1668 const_size_t_pointer cell_to_edge, const_int_pointer num_edges_per_cell,
1669 int * core_cell_mask, size_t num_cells, size_t num_edges) {
1670
1671 if (num_cells == 0) return NULL;
1672
1673 size_t_2_pointer edge_to_cell = xmalloc(num_edges * sizeof(*edge_to_cell));
1674
1675 for (size_t i = 0; i < num_edges; ++i) {
1676 edge_to_cell[i][0] = SIZE_MAX;
1677 edge_to_cell[i][1] = SIZE_MAX;
1678 }
1679
1680 for (size_t i = 0, offset = 0; i < num_cells; ++i) {
1681
1682 size_t curr_num_edges = num_edges_per_cell[i];
1683 const_size_t_pointer curr_cell_to_edge = cell_to_edge + offset;
1684 offset += curr_num_edges;
1685
1686 if ((core_cell_mask == NULL) || core_cell_mask[i]) {
1687
1688 for (size_t j = 0; j < curr_num_edges; ++j) {
1689
1690 size_t curr_edge = curr_cell_to_edge[j];
1691 size_t * curr_edge_to_cell = edge_to_cell[curr_edge];
1692 curr_edge_to_cell += *curr_edge_to_cell != SIZE_MAX;
1693 YAC_ASSERT(
1694 *curr_edge_to_cell == SIZE_MAX,
1695 "ERROR(generate_edge_to_cell): "
1696 "more than two cells point to a single edge")
1697 *curr_edge_to_cell = i;
1698 }
1699 }
1700 }
1701
1702 return edge_to_cell;
1703}
1704
1706 struct proc_sphere_part_node * proc_sphere_part,
1707 struct basic_grid_data * grid,
1708 struct remote_points ** dist_cell_owner,
1709 struct remote_points ** dist_vertex_owner,
1710 struct remote_points ** dist_edge_owner, MPI_Comm comm) {
1711
1712 // generate global ids if they are missing
1713 generate_global_ids(proc_sphere_part, grid, comm);
1714
1715 // determine dist ranks for all cells based on their bounding circles
1716 int * dist_cell_ranks;
1717 int * dist_cell_rank_counts;
1718 size_t * dist_cell_rank_offsets;
1720 proc_sphere_part, grid, comm,
1721 &dist_cell_ranks, &dist_cell_rank_counts, &dist_cell_rank_offsets);
1722
1723 int * rank_buffer = NULL;
1724 size_t rank_buffer_size = 0;
1725 size_t rank_buffer_array_size = 0;
1726
1727 int * num_ranks_buffer =
1728 xrealloc(dist_cell_rank_counts,
1729 (grid->num_cells + grid->num_vertices + grid->num_edges) *
1730 sizeof(*num_ranks_buffer));
1731 int * num_cell_ranks = num_ranks_buffer;
1732 int * num_vertex_ranks = num_ranks_buffer + grid->num_cells;
1733 int * num_edge_ranks =
1734 num_ranks_buffer + grid->num_cells + grid->num_vertices;
1735
1736 for (size_t i = 0; i < grid->num_vertices; ++i) {
1737 if (grid->core_vertex_mask[i]) {
1738 int num_cells = grid->num_cells_per_vertex[i];
1739 size_t * cells =
1740 grid->vertex_to_cell + grid->vertex_to_cell_offsets[i];
1741
1742 size_t max_num_ranks = 0;
1743 for (int j = 0; j < num_cells; ++j)
1744 max_num_ranks += (size_t)(num_cell_ranks[cells[j]]);
1745
1746 ENSURE_ARRAY_SIZE(rank_buffer, rank_buffer_array_size,
1747 rank_buffer_size + (size_t)max_num_ranks);
1748
1749 int * curr_ranks = rank_buffer + rank_buffer_size;
1750 int curr_num_ranks = 0;
1751
1752 for (int j = 0; j < num_cells; ++j) {
1753
1754 size_t curr_cell_idx = cells[j];
1755 int curr_num_cell_ranks = num_cell_ranks[curr_cell_idx];
1756 int * curr_cell_ranks =
1757 dist_cell_ranks + dist_cell_rank_offsets[curr_cell_idx];
1758
1759 for (int k = 0; k < curr_num_cell_ranks; ++k)
1760 insert_rank(curr_ranks, &curr_num_ranks, curr_cell_ranks[k]);
1761 }
1762 rank_buffer_size += (size_t)(num_vertex_ranks[i] = curr_num_ranks);
1763 } else {
1764 num_vertex_ranks[i] = 0;
1765 }
1766 }
1767
1768 size_t_2_pointer edge_to_cell =
1771 grid->core_cell_mask, grid->num_cells, grid->num_edges);
1772
1773 for (size_t i = 0; i < grid->num_edges; ++i) {
1774 if (grid->core_edge_mask[i]) {
1775
1776 size_t max_num_ranks = 0;
1777 for (int j = 0; j < 2; ++j)
1778 if (edge_to_cell[i][j] != SIZE_MAX)
1779 max_num_ranks += (size_t)(num_cell_ranks[edge_to_cell[i][j]]);
1780
1781 ENSURE_ARRAY_SIZE(rank_buffer, rank_buffer_array_size,
1782 rank_buffer_size + (size_t)max_num_ranks);
1783
1784 int * curr_ranks = rank_buffer + rank_buffer_size;
1785 int curr_num_ranks = 0;
1786
1787 for (int j = 0; j < 2; ++j) {
1788
1789 size_t curr_cell_idx = edge_to_cell[i][j];
1790 if (curr_cell_idx == SIZE_MAX) continue;
1791
1792 int curr_num_cell_ranks = num_cell_ranks[curr_cell_idx];
1793 int * curr_cell_ranks =
1794 dist_cell_ranks + dist_cell_rank_offsets[curr_cell_idx];
1795
1796 for (int k = 0; k < curr_num_cell_ranks; ++k)
1797 insert_rank(curr_ranks, &curr_num_ranks, curr_cell_ranks[k]);
1798 }
1799 rank_buffer_size += (size_t)(num_edge_ranks[i] = curr_num_ranks);
1800 } else {
1801 num_edge_ranks[i] = 0;
1802 }
1803 }
1804 free(edge_to_cell);
1805
1806 int comm_size;
1807 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
1808
1809 int * sendcounts, * recvcounts, * sdispls, * rdispls;
1811 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1812 int * int_buffer =
1813 xmalloc(4 * (size_t)comm_size * sizeof(*int_buffer));
1814 int * total_sendcounts = int_buffer + 0 * comm_size;
1815 int * total_recvcounts = int_buffer + 1 * comm_size;
1816 int * total_sdispls = int_buffer + 2 * comm_size;
1817 int * total_rdispls = int_buffer + 3 * comm_size;
1818
1819 for (size_t i = 0; i < grid->num_cells; ++i) {
1820 if (grid->core_cell_mask[i]) {
1821 int curr_num_ranks = num_cell_ranks[i];
1822 int * ranks = dist_cell_ranks + dist_cell_rank_offsets[i];
1823 for (int j = 0; j < curr_num_ranks; ++j) sendcounts[3 * ranks[j] + 0]++;
1824 }
1825 }
1826 size_t offset = 0;
1827 for (size_t i = 0; i < grid->num_vertices; ++i) {
1828 if (grid->core_vertex_mask[i]) {
1829 int curr_num_ranks = num_vertex_ranks[i];
1830 int * ranks = rank_buffer + offset;
1831 offset += (size_t)curr_num_ranks;
1832 for (int j = 0; j < curr_num_ranks; ++j) sendcounts[3 * ranks[j] + 1]++;
1833 }
1834 }
1835 for (size_t i = 0; i < grid->num_edges; ++i) {
1836 if (grid->core_edge_mask[i]) {
1837 int curr_num_ranks = num_edge_ranks[i];
1838 int * ranks = rank_buffer + offset;
1839 offset += (size_t)curr_num_ranks;
1840 for (int j = 0; j < curr_num_ranks; ++j) sendcounts[3 * ranks[j] + 2]++;
1841 }
1842 }
1843
1845 3, sendcounts, recvcounts, sdispls, rdispls, comm);
1846
1847 size_t receive_counts[3] = {0,0,0};
1848 for (int i = 0, saccu = 0, raccu = 0; i < comm_size; ++i) {
1849 total_sdispls[i] = saccu;
1850 total_rdispls[i] = raccu;
1851 total_sendcounts[i] = 0;
1852 total_recvcounts[i] = 0;
1853 for (int j = 0; j < 3; ++j) {
1854 total_sendcounts[i] += sendcounts[3 * i + j];
1855 total_recvcounts[i] += recvcounts[3 * i + j];
1856 receive_counts[j] += (size_t)(recvcounts[3 * i + j]);
1857 }
1858 saccu += total_sendcounts[i];
1859 raccu += total_recvcounts[i];
1860 }
1861 size_t local_data_count = (size_t)(total_sendcounts[comm_size - 1]) +
1862 (size_t)(total_sdispls[comm_size - 1]);
1863 size_t recv_count = (size_t)(total_recvcounts[comm_size - 1]) +
1864 (size_t)(total_rdispls[comm_size - 1]);
1865
1866 struct id_pos * id_pos_buffer =
1867 xmalloc((local_data_count + recv_count) * sizeof(*id_pos_buffer));
1868 struct id_pos * id_pos_send_buffer = id_pos_buffer;
1869 struct id_pos * id_pos_recv_buffer =
1870 id_pos_buffer + local_data_count;
1871
1872 // pack send buffer
1873 for (size_t i = 0; i < grid->num_cells; ++i) {
1874 if (grid->core_cell_mask[i]) {
1875 int curr_num_ranks = num_cell_ranks[i];
1876 int * ranks = dist_cell_ranks + dist_cell_rank_offsets[i];
1877 yac_int global_id = grid->cell_ids[i];
1878 for (int j = 0; j < curr_num_ranks; ++j) {
1879 int pos = sdispls[3 * ranks[j] + 0 + 1]++;
1880 id_pos_send_buffer[pos].global_id = global_id;
1881 id_pos_send_buffer[pos].orig_pos = i;
1882 }
1883 }
1884 }
1885 offset = 0;
1886 for (size_t i = 0; i < grid->num_vertices; ++i) {
1887 if (grid->core_vertex_mask[i]) {
1888 int curr_num_ranks = num_vertex_ranks[i];
1889 int * ranks = rank_buffer + offset;
1890 offset += (size_t)curr_num_ranks;
1891 yac_int global_id = grid->vertex_ids[i];
1892 for (int j = 0; j < curr_num_ranks; ++j) {
1893 int pos = sdispls[3 * ranks[j] + 1 + 1]++;
1894 id_pos_send_buffer[pos].global_id = global_id;
1895 id_pos_send_buffer[pos].orig_pos = i;
1896 }
1897 }
1898 }
1899 for (size_t i = 0; i < grid->num_edges; ++i) {
1900 if (grid->core_edge_mask[i]) {
1901 int curr_num_ranks = num_edge_ranks[i];
1902 int * ranks = rank_buffer + offset;
1903 offset += (size_t)curr_num_ranks;
1904 yac_int global_id = grid->edge_ids[i];
1905 for (int j = 0; j < curr_num_ranks; ++j) {
1906 int pos = sdispls[3 * ranks[j] + 2 + 1]++;
1907 id_pos_send_buffer[pos].global_id = global_id;
1908 id_pos_send_buffer[pos].orig_pos = i;
1909 }
1910 }
1911 }
1912 free(num_ranks_buffer);
1913 free(rank_buffer);
1914 free(dist_cell_ranks);
1915 free(dist_cell_rank_offsets);
1916
1917 MPI_Datatype id_pos_dt = yac_get_id_pos_mpi_datatype(comm);
1918
1919 // exchange data
1921 id_pos_send_buffer, total_sendcounts, total_sdispls,
1922 id_pos_recv_buffer, total_recvcounts, total_rdispls,
1923 sizeof(*id_pos_send_buffer), id_pos_dt, comm);
1924
1925 yac_mpi_call(MPI_Type_free(&id_pos_dt), comm);
1926
1927 size_t dist_owner_counts[3] = {0, 0, 0};
1928 for (int i = 0; i < comm_size; ++i)
1929 for (int j = 0; j < 3; ++j)
1930 dist_owner_counts[j] += (size_t)(recvcounts[3 * i + j]);
1931 size_t max_dist_owner_count =
1932 MAX(MAX(dist_owner_counts[0], dist_owner_counts[1]), dist_owner_counts[2]);
1933 struct single_remote_point * temp_buffer =
1934 xmalloc(max_dist_owner_count * sizeof(*temp_buffer));
1935
1936 struct remote_points * dist_owners[3];
1937
1938 // unpack data
1939 for (int i = 0; i < 3; ++i) {
1940
1941 size_t count = 0;
1942 for (int j = 0; j < comm_size; ++j) {
1943 int curr_recvcount = recvcounts[3 * j + i];
1944 struct id_pos * curr_id_pos =
1945 id_pos_recv_buffer + rdispls[3 * j + i];
1946 for (int k = 0; k < curr_recvcount; ++k, ++count) {
1947 temp_buffer[count].global_id = curr_id_pos[k].global_id;
1948 temp_buffer[count].data.orig_pos = curr_id_pos[k].orig_pos;
1949 temp_buffer[count].data.rank = j;
1950 }
1951 }
1952
1953 // sort received global ids
1954 qsort(temp_buffer, count, sizeof(*temp_buffer),
1956
1957 struct remote_point * unique_ids = xmalloc(count * sizeof(*unique_ids));
1958 size_t num_unique_ids = 0;
1959
1960 // determine unique global ids
1961 yac_int prev_id = (count > 0)?temp_buffer[0].global_id - 1:-1;
1962 for (size_t j = 0; j < count; ++j) {
1963
1964 yac_int curr_id = temp_buffer[j].global_id;
1965 if (curr_id != prev_id) {
1966 prev_id = curr_id;
1967 unique_ids[num_unique_ids].global_id = curr_id;
1968 unique_ids[num_unique_ids].data.count = 1;
1969 num_unique_ids++;
1970 } else {
1971 unique_ids[num_unique_ids-1].data.count++;
1972 }
1973 }
1974
1975 size_t remote_point_info_buffer_size = 0;
1976 for (size_t j = 0; j < num_unique_ids; ++j)
1977 if (unique_ids[j].data.count > 1)
1978 remote_point_info_buffer_size +=
1979 (size_t)(unique_ids[j].data.count);
1980
1981 dist_owners[i] =
1982 xmalloc(remote_point_info_buffer_size * sizeof(struct remote_point_info) +
1983 sizeof(*(dist_owners[0])));
1984 dist_owners[i]->data =
1985 (unique_ids = xrealloc(unique_ids, num_unique_ids * sizeof(*unique_ids)));
1986 dist_owners[i]->count = num_unique_ids;
1987
1988 for (size_t j = 0, l = 0, offset = 0; j < num_unique_ids; ++j) {
1989 int curr_count = unique_ids[j].data.count;
1990 if (curr_count == 1) {
1991 unique_ids[j].data.data.single = temp_buffer[l].data;
1992 ++l;
1993 } else {
1994 unique_ids[j].data.data.multi = &(dist_owners[i]->buffer[offset]);
1995 for (int k = 0; k < curr_count; ++k, ++l, ++offset) {
1996 dist_owners[i]->buffer[offset] = temp_buffer[l].data;
1997 }
1998 }
1999 }
2000
2001 qsort(
2002 unique_ids, num_unique_ids, sizeof(*unique_ids), compare_remote_point);
2003 }
2004
2005 free(id_pos_buffer);
2006 free(temp_buffer);
2007 free(int_buffer);
2008 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2009
2010 *dist_cell_owner = dist_owners[0];
2011 *dist_vertex_owner = dist_owners[1];
2012 *dist_edge_owner = dist_owners[2];
2013}
2014
2016 yac_int * ids, size_t count, yac_int ** sorted_ids, size_t ** reorder_idx) {
2017
2018 yac_int * sorted_ids_ = xrealloc(*sorted_ids, count * sizeof(*sorted_ids_));
2019 size_t * reorder_idx_ = xrealloc(*reorder_idx, count * sizeof(*reorder_idx_));
2020
2021 memcpy(sorted_ids_, ids, count * sizeof(*sorted_ids_));
2022 for (size_t i = 0; i < count; ++i) reorder_idx_[i] = i;
2023
2024 yac_quicksort_index_yac_int_size_t(sorted_ids_, count, reorder_idx_);
2025
2026 *sorted_ids = sorted_ids_;
2027 *reorder_idx = reorder_idx_;
2028}
2029
2030static Xt_xmap generate_xmap_data(
2031 void * data, size_t count, MPI_Comm comm,
2032 void(*set_sendcounts)(void*,size_t,int*),
2033 void(*pack)(void*,size_t,int*,int*,int*)) {
2034
2035 int comm_size;
2036 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
2037
2038 int * sendcounts, * recvcounts, * sdispls, * rdispls;
2040 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2041
2042 set_sendcounts(data, count, sendcounts);
2043
2045 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2046 int num_src_msg = 0, num_dst_msg = 0;
2047 for (int i = 0; i < comm_size; ++i) {
2048 num_src_msg += (recvcounts[i] > 0);
2049 num_dst_msg += (sendcounts[i] > 0);
2050 }
2051
2052 size_t recv_count =
2053 (size_t)(rdispls[comm_size-1]) + (size_t)(recvcounts[comm_size-1]);
2054
2055 int * pos_buffer =
2056 xmalloc((recv_count + 2 * count) * sizeof(*pos_buffer));
2057 int * src_pos_buffer = pos_buffer;
2058 int * dst_pos_buffer = pos_buffer + recv_count;
2059 int * send_pos_buffer = pos_buffer + recv_count + count;
2060
2061 // pack send buffer
2062 pack(data, count, sdispls, dst_pos_buffer, send_pos_buffer);
2063
2064 // redistribute positions of requested data
2066 send_pos_buffer, sendcounts, sdispls,
2067 src_pos_buffer, recvcounts, rdispls, comm);
2068
2069 struct Xt_com_pos * com_pos =
2070 xmalloc(((size_t)num_src_msg + (size_t)num_dst_msg) * sizeof(*com_pos));
2071 struct Xt_com_pos * src_com = com_pos;
2072 struct Xt_com_pos * dst_com = com_pos + num_src_msg;
2073
2074 // set transfer_pos pointers and transfer_pos counts in com_pos's
2075 num_src_msg = 0;
2076 num_dst_msg = 0;
2077 for (int i = 0; i < comm_size; ++i) {
2078 if (recvcounts[i] > 0) {
2079 src_com[num_src_msg].transfer_pos = src_pos_buffer;
2080 src_com[num_src_msg].num_transfer_pos = recvcounts[i];
2081 src_com[num_src_msg].rank = i;
2082 src_pos_buffer += recvcounts[i];
2083 ++num_src_msg;
2084 }
2085 if (sendcounts[i] > 0) {
2086 dst_com[num_dst_msg].transfer_pos = dst_pos_buffer;
2087 dst_com[num_dst_msg].num_transfer_pos = sendcounts[i];
2088 dst_com[num_dst_msg].rank = i;
2089 dst_pos_buffer += sendcounts[i];
2090 ++num_dst_msg;
2091 }
2092 }
2093 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
2094
2095 Xt_xmap xmap =
2096 xt_xmap_intersection_pos_new(
2097 num_src_msg, src_com, num_dst_msg, dst_com, comm);
2098
2099 free(com_pos);
2100 free(pos_buffer);
2101
2102 return xmap;
2103}
2104
2106 void * data, size_t count, int * sendcounts) {
2107
2108 struct remote_point * remote_point_data = (struct remote_point *)data;
2109
2110 for (size_t i = 0; i < count; ++i) {
2111 struct remote_point_info * curr_info =
2112 (remote_point_data[i].data.count > 1)?
2113 (remote_point_data[i].data.data.multi):
2114 (&(remote_point_data[i].data.data.single));
2115 sendcounts[curr_info->rank]++;
2116 }
2117}
2118
2120 void * data, size_t count, int * sdispls,
2121 int * dst_pos_buffer, int * send_pos_buffer) {
2122
2123 struct remote_point * remote_point_data = (struct remote_point *)data;
2124
2125 for (size_t i = 0; i < count; ++i) {
2126 struct remote_point_info * curr_info =
2127 (remote_point_data[i].data.count > 1)?
2128 (remote_point_data[i].data.data.multi):
2129 (&(remote_point_data[i].data.data.single));
2130 int pos = sdispls[curr_info->rank+1]++;
2131 dst_pos_buffer[pos] = (int)i;
2132 send_pos_buffer[pos] = (int)(curr_info->orig_pos);
2133 }
2134}
2135
2137 struct remote_point * remote_point_data, size_t count, MPI_Comm comm) {
2138
2139 return
2140 generate_xmap_data(remote_point_data, count, comm,
2143}
2144
2146 void * data, size_t count, int * sendcounts) {
2147
2148 struct remote_point_info * point_infos = (struct remote_point_info *)data;
2149
2150 for (size_t i = 0; i < count; ++i) sendcounts[point_infos[i].rank]++;
2151}
2152
2154 void * data, size_t count, int * sdispls,
2155 int * dst_pos_buffer, int * send_pos_buffer) {
2156
2157 struct remote_point_info * point_infos = (struct remote_point_info *)data;
2158
2159 for (size_t i = 0; i < count; ++i) {
2160 int pos = sdispls[point_infos[i].rank+1]++;
2161 dst_pos_buffer[pos] = (int)i;
2162 send_pos_buffer[pos] = (int)(point_infos[i].orig_pos);
2163 }
2164}
2165
2167 struct remote_point_info * point_infos, size_t count, MPI_Comm comm) {
2168
2169 return
2170 generate_xmap_data(point_infos, count, comm,
2173}
2174
2176 const void * a, const void * b) {
2177
2178 return (((const struct single_remote_point_reorder *)a)->data.global_id >
2179 ((const struct single_remote_point_reorder *)b)->data.global_id) -
2180 (((const struct single_remote_point_reorder *)a)->data.global_id <
2181 ((const struct single_remote_point_reorder *)b)->data.global_id);
2182}
2183
2185 const void * a, const void * b) {
2186
2187 return (((const struct single_remote_point_reorder *)a)->reorder_idx >
2188 ((const struct single_remote_point_reorder *)b)->reorder_idx) -
2189 (((const struct single_remote_point_reorder *)a)->reorder_idx <
2190 ((const struct single_remote_point_reorder *)b)->reorder_idx);
2191}
2192
2194 size_t max_num_cell_ve, size_t num_cells, int * num_ve_per_cell,
2195 struct single_remote_point_reorder * cell_ve_point_data,
2196 yac_int ** ids, size_t * num_ids, size_t ** cell_to_ve, Xt_xmap * xmap,
2197 MPI_Comm comm) {
2198
2199 size_t num_total_ve = num_cells * max_num_cell_ve;
2200
2201 size_t total_num_cell_ve = 0;
2202 for (size_t i = 0, k = 0, l = 0; i < num_cells; ++i) {
2203 size_t num_vertices = (size_t)(num_ve_per_cell[i]);
2204 total_num_cell_ve += num_vertices;
2205 size_t j = 0;
2206 for (; j < num_vertices; ++j, ++k, ++l) {
2207 cell_ve_point_data[k].reorder_idx = l;
2208 }
2209 for (; j < max_num_cell_ve; ++j, ++k) {
2210 cell_ve_point_data[k].data.global_id = XT_INT_MAX;
2211 cell_ve_point_data[k].reorder_idx = SIZE_MAX;
2212 }
2213 }
2214 qsort(cell_ve_point_data, num_total_ve, sizeof(*cell_ve_point_data),
2216 yac_int * ids_ = xmalloc(total_num_cell_ve * sizeof(*ids_));
2217 size_t num_ids_ = 0;
2218 size_t * cell_to_ve_ =
2219 xmalloc(total_num_cell_ve * sizeof(*cell_to_ve_));
2220 struct remote_point_info * ve_point_info =
2221 xmalloc(total_num_cell_ve * sizeof(*ve_point_info));
2222 yac_int prev_global_id = XT_INT_MAX;
2223 for (size_t i = 0; i < num_total_ve; ++i) {
2224 yac_int curr_global_id = cell_ve_point_data[i].data.global_id;
2225 size_t curr_reorder_idx = cell_ve_point_data[i].reorder_idx;
2226 if (curr_global_id == XT_INT_MAX) break;
2227 if (curr_global_id != prev_global_id) {
2228 ids_[num_ids_] = (prev_global_id = curr_global_id);
2229 ve_point_info[num_ids_] = cell_ve_point_data[i].data.data;
2230 num_ids_++;
2231 }
2232 cell_to_ve_[curr_reorder_idx] = num_ids_ - 1;
2233 }
2234
2235 *ids = xrealloc(ids_, num_ids_ * sizeof(*ids_));
2236 *num_ids = num_ids_;
2237 *cell_to_ve = cell_to_ve_;
2238 *xmap = generate_xmap_from_remote_point_info(ve_point_info, num_ids_, comm);
2239 free(ve_point_info);
2240}
2241
2242static MPI_Datatype yac_get_coordinate_mpi_datatype(MPI_Comm comm) {
2243
2244 MPI_Datatype coord_dt;
2245 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &coord_dt), comm);
2246 yac_mpi_call(MPI_Type_commit(&coord_dt), comm);
2247 return coord_dt;
2248}
2249
2251 struct yac_field_data * orig_field_data, size_t dist_size,
2252 Xt_redist redist_mask, Xt_redist redist_coords, MPI_Comm comm) {
2253
2254 struct yac_field_data dist_field_data = yac_field_data_init();
2255
2256 uint64_t counts[2], max_counts[2];
2257 if (orig_field_data != NULL) {
2258 counts[0] = orig_field_data->masks_count;
2259 counts[1] = orig_field_data->coordinates_count;
2260 } else {
2261 counts[0] = 0;
2262 counts[1] = 0;
2263 }
2265 MPI_Allreduce(
2266 counts, max_counts, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
2267 YAC_ASSERT(
2268 (orig_field_data == NULL) ||
2269 ((counts[0] == max_counts[0]) && (counts[1] == max_counts[1])),
2270 "ERROR(field_data_init): inconsistent number of masks or coordinates")
2271
2272 int * data_available_flag =
2273 xcalloc(max_counts[0] + max_counts[1], sizeof(*data_available_flag));
2274
2275 if (orig_field_data != NULL) {
2276 for (size_t i = 0; i < orig_field_data->masks_count; ++i)
2277 data_available_flag[i] = orig_field_data->masks[i] != NULL;
2278 for (size_t i = 0; i < orig_field_data->coordinates_count; ++i)
2279 data_available_flag[i + orig_field_data->masks_count] =
2280 orig_field_data->coordinates[i] != NULL;
2281 }
2282
2284 MPI_Allreduce(
2285 MPI_IN_PLACE, data_available_flag,
2286 (int)(max_counts[0] + max_counts[1]), MPI_INT, MPI_MAX, comm), comm);
2287
2288 if (orig_field_data != NULL) {
2289 for (size_t i = 0; i < orig_field_data->masks_count; ++i)
2290 YAC_ASSERT(
2291 data_available_flag[i] == (orig_field_data->masks[i] != NULL),
2292 "ERROR(field_data_init): inconsistent availability of masks")
2293
2294 for (size_t i = 0; i < orig_field_data->coordinates_count; ++i)
2295 YAC_ASSERT(
2296 data_available_flag[i + orig_field_data->masks_count] ==
2297 (orig_field_data->coordinates[i] != NULL),
2298 "ERROR(field_data_init): inconsistent availability of coordinates")
2299 }
2300
2301 for (uint64_t i = 0; i < max_counts[0]; ++i) {
2302 int * dist_mask = NULL;
2303 if (data_available_flag[i]) {
2304 dist_mask = xmalloc(dist_size * sizeof(*dist_mask));
2305 int * orig_mask = (orig_field_data != NULL)?orig_field_data->masks[i]:NULL;
2306 xt_redist_s_exchange1(redist_mask, orig_mask, dist_mask);
2307 }
2308 yac_field_data_add_mask_nocpy(&dist_field_data, dist_mask);
2309 }
2310
2311 for (uint64_t i = 0; i < max_counts[1]; ++i) {
2312 coordinate_pointer dist_coordinates = NULL;
2313 if (data_available_flag[i + max_counts[0]]) {
2314 dist_coordinates = xmalloc(dist_size * sizeof(*dist_coordinates));
2315 coordinate_pointer orig_coordinates =
2316 (orig_field_data != NULL)?orig_field_data->coordinates[i]:NULL;
2317 xt_redist_s_exchange1(redist_coords, orig_coordinates, dist_coordinates);
2318 }
2319 yac_field_data_add_coordinates_nocpy(&dist_field_data, dist_coordinates);
2320 }
2321
2322 free(data_available_flag);
2323
2324 return dist_field_data;
2325}
2326
2328 struct dist_grid * dist_grid, struct proc_sphere_part_node * proc_sphere_part,
2329 struct yac_basic_grid * grid, MPI_Comm comm) {
2330
2331 struct basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
2332
2333 int comm_rank;
2334 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
2335
2336 int max_num_vertices_per_cell = 0;
2337 { // compute the global maximum number of vertices per cell
2338 for (size_t i = 0; i < grid_data->num_cells; ++i)
2339 if (grid_data->num_vertices_per_cell[i] > max_num_vertices_per_cell)
2340 max_num_vertices_per_cell = grid_data->num_vertices_per_cell[i];
2341 yac_mpi_call(MPI_Allreduce(MPI_IN_PLACE, &max_num_vertices_per_cell, 1,
2342 MPI_INT, MPI_MAX, comm), comm);
2343 }
2344
2345 struct remote_points * dist_cell_owner,
2346 * dist_vertex_owner,
2347 * dist_edge_owner;
2348
2350 proc_sphere_part, grid_data,
2351 &dist_cell_owner, &dist_vertex_owner, &dist_edge_owner, comm);
2352
2353 Xt_xmap xmap_cell_orig_to_dist =
2355 dist_cell_owner->data, dist_cell_owner->count, comm);
2356
2357 MPI_Datatype dt_single_remote_point =
2359 max_num_vertices_per_cell, comm);
2360 MPI_Datatype dt_coord = yac_get_coordinate_mpi_datatype(comm);
2361
2362 Xt_redist redist_cell_yac_int,
2363 redist_cell_int,
2364 redist_cell_corner_point_data,
2365 redist_cell_coords;
2366 redist_cell_yac_int = xt_redist_p2p_new(xmap_cell_orig_to_dist, Xt_int_dt);
2367 redist_cell_int = xt_redist_p2p_new(xmap_cell_orig_to_dist, MPI_INT);
2368 redist_cell_corner_point_data =
2369 xt_redist_p2p_new(xmap_cell_orig_to_dist, dt_single_remote_point);
2370 yac_mpi_call(MPI_Type_free(&dt_single_remote_point), comm);
2371 redist_cell_coords = xt_redist_p2p_new(xmap_cell_orig_to_dist, dt_coord);
2372
2373 xt_xmap_delete(xmap_cell_orig_to_dist);
2374
2375 size_t num_cells = dist_cell_owner->count;
2376 size_t max_num_cell_corners =
2377 (size_t)max_num_vertices_per_cell * num_cells;
2378 size_t max_num_local_cell_corners =
2379 (size_t)max_num_vertices_per_cell * grid_data->num_cells;
2380 yac_int * cell_ids = xmalloc(num_cells * sizeof(*cell_ids));
2381 int * num_vertices_per_cell =
2382 xmalloc(num_cells * sizeof(*num_vertices_per_cell));
2384 cell_corner_point_data =
2385 xmalloc(2 * (max_num_cell_corners + max_num_local_cell_corners) *
2386 sizeof(*cell_corner_point_data));
2387 struct single_remote_point_reorder * cell_edge_point_data =
2388 cell_corner_point_data + max_num_cell_corners;
2389 struct single_remote_point_reorder * local_cell_corner_point_data =
2390 cell_corner_point_data + 2 * max_num_cell_corners;
2391 struct single_remote_point_reorder * local_cell_edge_point_data =
2392 cell_corner_point_data + 2 * max_num_cell_corners +
2393 max_num_local_cell_corners;
2394 for (size_t i = 0; i < grid_data->num_cells; ++i){
2395 int num_corners = (size_t)(grid_data->num_vertices_per_cell[i]);
2396 size_t * corners = grid_data->cell_to_vertex + grid_data->cell_to_vertex_offsets[i];
2397 size_t * edges = grid_data->cell_to_edge + grid_data->cell_to_edge_offsets[i];
2398 struct single_remote_point_reorder * curr_local_cell_corner_point_data =
2399 local_cell_corner_point_data + i * (size_t)max_num_vertices_per_cell;
2400 struct single_remote_point_reorder * curr_local_cell_edge_point_data =
2401 local_cell_edge_point_data + i * (size_t)max_num_vertices_per_cell;
2402 for (size_t j = 0; j < num_corners; ++j) {
2403 curr_local_cell_corner_point_data[j].data.global_id = grid_data->vertex_ids[corners[j]];
2404 curr_local_cell_corner_point_data[j].data.data.rank = comm_rank;
2405 curr_local_cell_corner_point_data[j].data.data.orig_pos = corners[j];
2406 curr_local_cell_edge_point_data[j].data.global_id = grid_data->edge_ids[edges[j]];
2407 curr_local_cell_edge_point_data[j].data.data.rank = comm_rank;
2408 curr_local_cell_edge_point_data[j].data.data.orig_pos = edges[j];
2409 }
2410 }
2411
2412 xt_redist_s_exchange1(redist_cell_yac_int, grid_data->cell_ids, cell_ids);
2413 xt_redist_s_exchange1(
2414 redist_cell_int, grid_data->num_vertices_per_cell, num_vertices_per_cell);
2415 xt_redist_s_exchange1(
2416 redist_cell_corner_point_data,
2417 local_cell_corner_point_data, cell_corner_point_data);
2418 xt_redist_s_exchange1(
2419 redist_cell_corner_point_data,
2420 local_cell_edge_point_data, cell_edge_point_data);
2421
2422 struct yac_field_data cell_field_data =
2424 yac_basic_grid_get_field_data(grid, CELL), num_cells,
2425 redist_cell_int, redist_cell_coords, comm);
2426
2427 xt_redist_delete(redist_cell_coords);
2428 xt_redist_delete(redist_cell_corner_point_data);
2429 xt_redist_delete(redist_cell_yac_int);
2430 xt_redist_delete(redist_cell_int);
2431
2432 yac_int * vertex_ids;
2433 size_t num_vertices;
2434 size_t * cell_to_vertex;
2435 size_t * cell_to_vertex_offsets =
2436 xmalloc(num_cells * sizeof(*cell_to_vertex_offsets));
2437 Xt_xmap xmap_vertex_orig_to_dist;
2438 for (size_t i = 0, accu = 0; i < num_cells; ++i) {
2439 cell_to_vertex_offsets[i] = accu;
2440 accu += (size_t)(num_vertices_per_cell[i]);
2441 }
2443 max_num_vertices_per_cell, num_cells, num_vertices_per_cell,
2444 cell_corner_point_data, &vertex_ids, &num_vertices,
2445 &cell_to_vertex, &xmap_vertex_orig_to_dist, comm);
2446
2447 yac_int * edge_ids;
2448 size_t num_edges;
2449 size_t * cell_to_edge;
2450 size_t * cell_to_edge_offsets = cell_to_vertex_offsets;
2451 Xt_xmap xmap_edge_orig_to_dist;
2453 max_num_vertices_per_cell, num_cells, num_vertices_per_cell,
2454 cell_edge_point_data, &edge_ids, &num_edges,
2455 &cell_to_edge, &xmap_edge_orig_to_dist, comm);
2456
2457 free(cell_corner_point_data);
2458
2459 // generate redists for redistribution of vertex data
2460 Xt_redist redist_vertex_coords =
2461 xt_redist_p2p_new(xmap_vertex_orig_to_dist, dt_coord);
2462 Xt_redist redist_vertex_int =
2463 xt_redist_p2p_new(xmap_vertex_orig_to_dist, MPI_INT);
2464 xt_xmap_delete(xmap_vertex_orig_to_dist);
2465
2466 // get vertex coordinates
2467 coordinate_pointer vertex_coordinates =
2468 xmalloc(num_vertices * sizeof(*vertex_coordinates));
2469 xt_redist_s_exchange1(
2470 redist_vertex_coords, grid_data->vertex_coordinates, vertex_coordinates);
2471
2472 struct yac_field_data vertex_field_data =
2474 yac_basic_grid_get_field_data(grid, CORNER), num_vertices,
2475 redist_vertex_int, redist_vertex_coords, comm);
2476
2477 xt_redist_delete(redist_vertex_int);
2478 xt_redist_delete(redist_vertex_coords);
2479
2480 Xt_redist redist_edge_int,
2481 redist_edge_coords,
2482 redist_edge_2yac_int;
2483 { // generate redists for redistribution of edge data
2484
2485 MPI_Datatype dt_2yac_int;
2486 yac_mpi_call(MPI_Type_contiguous(2, Xt_int_dt, &dt_2yac_int), comm);
2487 yac_mpi_call(MPI_Type_commit(&dt_2yac_int), comm);
2488
2489 redist_edge_int = xt_redist_p2p_new(xmap_edge_orig_to_dist, MPI_INT);
2490 redist_edge_coords = xt_redist_p2p_new(xmap_edge_orig_to_dist, dt_coord);
2491 redist_edge_2yac_int = xt_redist_p2p_new(xmap_edge_orig_to_dist, dt_2yac_int);
2492
2493 yac_mpi_call(MPI_Type_free(&dt_2yac_int), comm);
2494
2495 }
2496 xt_xmap_delete(xmap_edge_orig_to_dist);
2497
2498 enum yac_edge_type * edge_type = xmalloc(num_edges * sizeof(*edge_type));
2499
2500 { // get edge types
2501 int * temp_edge_type_src, * temp_edge_type_dst;
2502 if (sizeof(*edge_type) == sizeof(int)) {
2503 temp_edge_type_src = (int*)(grid_data->edge_type);
2504 temp_edge_type_dst = (int*)edge_type;
2505 } else {
2506 temp_edge_type_src =
2507 xmalloc(grid_data->num_edges * sizeof(*temp_edge_type_src));
2508 for (size_t i = 0; i < grid_data->num_edges; ++i)
2509 temp_edge_type_src[i] = (int)(grid_data->edge_type[i]);
2510 temp_edge_type_dst = xmalloc(num_edges * sizeof(*temp_edge_type_dst));
2511 for (size_t i = 0; i < num_edges; ++i)
2512 temp_edge_type_dst[i] = (int)(edge_type[i]);
2513 }
2514
2515 xt_redist_s_exchange1(
2516 redist_edge_int, temp_edge_type_src, temp_edge_type_dst);
2517
2518 if (sizeof(*edge_type) != sizeof(int)) {
2519
2520 for (size_t i = 0; i < num_edges; ++i)
2521 edge_type[i] = (enum yac_edge_type)(temp_edge_type_dst[i]);
2522
2523 free(temp_edge_type_src);
2524 free(temp_edge_type_dst);
2525 }
2526 }
2527
2528 struct yac_field_data edge_field_data =
2530 yac_basic_grid_get_field_data(grid, EDGE), num_edges,
2531 redist_edge_int, redist_edge_coords, comm);
2532
2533 xt_redist_delete(redist_edge_int);
2534 xt_redist_delete(redist_edge_coords);
2535
2536 struct bounding_circle * cell_bnd_circles =
2537 xmalloc(num_cells * sizeof(*cell_bnd_circles));
2538 {
2539 struct grid_cell cell;
2540 cell.coordinates_xyz = xmalloc((size_t)max_num_vertices_per_cell *
2541 sizeof(*(cell.coordinates_xyz)));
2542 cell.edge_type = xmalloc((size_t)max_num_vertices_per_cell *
2543 sizeof(*(cell.edge_type)));
2544
2545 for (size_t i = 0; i < num_cells; ++i) {
2546 size_t * curr_cell_to_vertex =
2547 cell_to_vertex + cell_to_vertex_offsets[i];
2548 size_t * curr_cell_to_edge =
2549 cell_to_edge + cell_to_edge_offsets[i];
2550 for (int j = 0; j < num_vertices_per_cell[i]; ++j) {
2551 coordinate_pointer curr_vertex_coords =
2552 vertex_coordinates + curr_cell_to_vertex[j];
2553 for (int k = 0; k < 3; ++k)
2554 cell.coordinates_xyz[j][k] = (*curr_vertex_coords)[k];
2555 cell.edge_type[j] = edge_type[curr_cell_to_edge[j]];
2556 }
2557 cell.num_corners = num_vertices_per_cell[i];
2558 if (cell.num_corners > 0)
2559 yac_get_cell_bounding_circle(cell, cell_bnd_circles + i);
2560 else
2561 cell_bnd_circles[i] =
2562 (struct bounding_circle) {
2563 .base_vector = {1.0, 0.0, 0.0},
2564 .inc_angle = SIN_COS_ZERO,
2565 .sq_crd = DBL_MAX};
2566 }
2567 free(cell.edge_type);
2568 free(cell.coordinates_xyz);
2569 }
2570
2571 size_t_2_pointer edge_to_vertex =
2572 xmalloc(num_edges * sizeof(*edge_to_vertex));
2573
2574 // get edge to vertex
2575 {
2576 yac_int * vertex_id_buffer =
2577 xmalloc(
2578 2 * (grid_data->num_edges + num_edges) * sizeof(*vertex_id_buffer));
2579 yac_int * grid_edge_vertex_ids = vertex_id_buffer;
2580 yac_int * edge_vertex_ids = vertex_id_buffer + 2 * grid_data->num_edges;
2581
2582 size_t * grid_edge_to_vertex = &(grid_data->edge_to_vertex[0][0]);
2583
2584 for (size_t i = 0; i < 2 * grid_data->num_edges; ++i)
2585 grid_edge_vertex_ids[i] =
2586 grid_data->vertex_ids[grid_edge_to_vertex[i]];
2587
2588 xt_redist_s_exchange1(
2589 redist_edge_2yac_int, grid_edge_vertex_ids, edge_vertex_ids);
2590
2591 id2idx(edge_vertex_ids, &(edge_to_vertex[0][0]), 2 * num_edges,
2592 vertex_ids, num_vertices);
2593
2594 free(vertex_id_buffer);
2595 }
2596
2597 xt_redist_delete(redist_edge_2yac_int);
2598
2599 size_t num_total_cells = num_cells;
2600 size_t num_total_vertices = num_vertices;
2601 size_t num_total_edges = num_edges;
2602
2603 yac_mpi_call(MPI_Type_free(&dt_coord), comm);
2604
2605 dist_grid->comm = comm;
2606 dist_grid->vertex_coordinates = vertex_coordinates;
2607 dist_grid->cell_ids = cell_ids;
2608 dist_grid->vertex_ids = vertex_ids;
2609 dist_grid->edge_ids = edge_ids;
2610 dist_grid->num_cells = num_cells;
2611 dist_grid->num_vertices = num_vertices;
2612 dist_grid->num_edges = num_edges;
2613 dist_grid->num_vertices_per_cell = num_vertices_per_cell;
2614 dist_grid->cell_to_vertex = cell_to_vertex;
2615 dist_grid->cell_to_vertex_offsets = cell_to_vertex_offsets;
2616 dist_grid->cell_to_edge = cell_to_edge;
2617 dist_grid->cell_to_edge_offsets = cell_to_edge_offsets;
2618 dist_grid->edge_to_vertex = edge_to_vertex;
2619 dist_grid->cell_bnd_circles = cell_bnd_circles;
2620 dist_grid->edge_type = edge_type;
2621 dist_grid->sorted_cell_ids = NULL;
2623 dist_grid->sorted_edge_ids = NULL;
2627 generate_sorted_ids(cell_ids, num_cells, &(dist_grid->sorted_cell_ids),
2629 generate_sorted_ids(vertex_ids, num_vertices, &(dist_grid->sorted_vertex_ids),
2631 generate_sorted_ids(edge_ids, num_edges, &(dist_grid->sorted_edge_ids),
2635 proc_sphere_part, comm_rank, vertex_coordinates, num_total_vertices);
2638 dist_grid->core_vertex_mask, dist_grid, comm_rank == 0);
2641 dist_grid->num_total_cells = num_total_cells;
2642 dist_grid->num_total_vertices = num_total_vertices;
2643 dist_grid->num_total_edges = num_total_edges;
2644 dist_grid->field_data.cell = cell_field_data;
2645 dist_grid->field_data.vertex = vertex_field_data;
2646 dist_grid->field_data.edge = edge_field_data;
2647
2649 dist_grid, proc_sphere_part,
2650 dist_cell_owner, dist_vertex_owner, dist_edge_owner);
2651
2652 free(dist_cell_owner->data);
2653 free(dist_cell_owner);
2654 free(dist_vertex_owner->data);
2655 free(dist_vertex_owner);
2656 free(dist_edge_owner->data);
2657 free(dist_edge_owner);
2658}
2659
2661
2662 // build sphere part for vertices
2663 for (int i = 0; i < 2; ++i)
2669
2670 // build sphere part for bounding circle of cells
2671 for (int i = 0; i < 2; ++i) {
2676 }
2677}
2678
2679static void get_grid_cells(
2680 struct yac_basic_grid * grid, struct dist_cell * cells, MPI_Comm comm) {
2681
2682 struct basic_grid_data * grid_data = yac_basic_grid_get_data(grid);
2683
2684 size_t num_cells = grid_data->num_cells;
2685 for (size_t i = 0; i < num_cells; ++i) {
2686
2687 double * cell_coord = cells[i].coord;
2688 for (int j = 0; j < 3; ++j) cell_coord[j] = 0.0;
2689
2690 if (grid_data->num_vertices_per_cell[i] == 0) continue;
2691
2692 size_t * curr_cell_to_vertex =
2693 grid_data->cell_to_vertex + grid_data->cell_to_vertex_offsets[i];
2694 // compute the middle point of the cell
2695 for (int j = 0; j < grid_data->num_vertices_per_cell[i]; ++j) {
2696 coordinate_pointer curr_vertex_coords =
2697 grid_data->vertex_coordinates + curr_cell_to_vertex[j];
2698 for (int k = 0; k < 3; ++k)
2699 cell_coord[k] += (*curr_vertex_coords)[k];
2700 }
2701 normalise_vector(cell_coord);
2702 }
2703}
2704
2706 struct yac_basic_grid * grid_a, int grid_id_a,
2707 struct yac_basic_grid * grid_b, int grid_id_b, MPI_Comm comm) {
2708
2709 YAC_ASSERT(
2710 grid_id_a != grid_id_b,"ERROR(yac_dist_grid_pair_new): identical grid ids")
2711
2712 MPI_Comm comm_copy;
2713 yac_mpi_call(MPI_Comm_dup(comm, &comm_copy), comm);
2714 comm = comm_copy;
2715
2716 if (grid_id_a > grid_id_b) {
2717 int grid_id_swap = grid_id_a;
2718 grid_id_a = grid_id_b;
2719 grid_id_b = grid_id_swap;
2720 struct yac_basic_grid * grid_swap = grid_a;
2721 grid_a = grid_b;
2722 grid_b = grid_swap;
2723 }
2724
2725 struct dist_grid_pair * grid_pair = xmalloc(1 * sizeof(*grid_pair));
2726
2727 size_t num_cells_a = yac_basic_grid_get_data(grid_a)->num_cells;
2728 size_t num_cells_b = yac_basic_grid_get_data(grid_b)->num_cells;
2729
2730 size_t num_cells = num_cells_a + num_cells_b;
2731 struct dist_cell * cells = xmalloc(num_cells * sizeof(*cells));
2732 get_grid_cells(grid_a, cells, comm);
2733 get_grid_cells(grid_b, cells + num_cells_a, comm);
2734
2735 grid_pair->grid_ids[0] = grid_id_a;
2736 grid_pair->grid_ids[1] = grid_id_b;
2737 grid_pair->comm = comm;
2738
2739 // redistribute all points and build parallel sphere part for vertices
2740 grid_pair->proc_sphere_part =
2741 yac_redistribute_cells(&cells, &num_cells, comm);
2742 free(cells);
2743
2744 // generate dist_grids
2746 &(grid_pair->dist_grid[0]), grid_pair->proc_sphere_part, grid_a, comm);
2748 &(grid_pair->dist_grid[1]), grid_pair->proc_sphere_part, grid_b, comm);
2749
2750 // build search data structures for local cells and vertices in
2751 // distributed grid
2752 setup_search_data(grid_pair);
2753
2754 return grid_pair;
2755}
2756
2758 return grid_pair->comm;
2759}
2760
2762 struct dist_grid_pair * grid_pair, int grid_id) {
2763
2764 struct dist_grid * grid = NULL;
2765 for (int i = 0; (i < 2) && (grid == NULL); ++i)
2766 if (grid_id == grid_pair->grid_ids[i])
2767 grid = &(grid_pair->dist_grid[i]);
2768 YAC_ASSERT(
2769 grid != NULL, "ERROR(yac_dist_grid_pair_get_dist_grid): invalid grid_id")
2770 return grid;
2771}
2772
2774 struct dist_grid * dist_grid) {
2775
2776 return (struct const_basic_grid_data *)dist_grid;
2777}
2778
2780 struct dist_grid * dist_grid, enum yac_location location) {
2781
2782 YAC_ASSERT(
2783 (location == CELL) || (location == CORNER) || (location == EDGE),
2784 "ERROR(yac_dist_grid_get_local_count): invalid location")
2785 switch (location) {
2786 default:
2787 case(CELL):
2788 return
2791 case (CORNER):
2792 return
2795 case (EDGE):
2796 return
2799 };
2800}
2801
2803 struct dist_grid * dist_grid, enum yac_location location) {
2804
2805 YAC_ASSERT(
2806 (location == CELL) || (location == CORNER) || (location == EDGE),
2807 "ERROR(yac_dist_grid_get_count): invalid location")
2808 switch (location) {
2809 default:
2810 case(CELL): return dist_grid->num_total_cells;
2811 case (CORNER): return dist_grid->num_total_vertices;
2812 case (EDGE): return dist_grid->num_total_edges;
2813 };
2814}
2815
2817 struct dist_grid * dist_grid, enum yac_location location) {
2818
2819 YAC_ASSERT(
2820 (location == CELL) || (location == CORNER) || (location == EDGE),
2821 "ERROR(yac_dist_grid_get_core_mask): invalid location")
2822 switch (location) {
2823 default:
2824 case(CELL): return dist_grid->core_cell_mask;
2825 case (CORNER): return dist_grid->core_vertex_mask;
2826 case (EDGE): return dist_grid->core_edge_mask;
2827 };
2828}
2829
2831 struct dist_grid * dist_grid, enum yac_location location) {
2832
2833 YAC_ASSERT(
2834 (location == CELL) || (location == CORNER) || (location == EDGE),
2835 "ERROR(yac_dist_grid_get_global_ids): invalid location")
2836 switch (location) {
2837 default:
2838 case(CELL): return dist_grid->cell_ids;
2839 case (CORNER): return dist_grid->vertex_ids;
2840 case (EDGE): return dist_grid->edge_ids;
2841 };
2842}
2843
2845 struct dist_grid * dist_grid, struct interp_field field,
2846 size_t ** indices, size_t * count) {
2847
2848 int const * mask = yac_dist_grid_get_field_mask(dist_grid, field);
2849
2850 size_t total_count = yac_dist_grid_get_count(dist_grid, field.location);
2851 int const * core_mask =
2853
2854 size_t * temp_indices = xmalloc(total_count * sizeof(*temp_indices));
2855
2856 size_t count_ = 0;
2857
2858 if (mask != NULL) {
2859 for (size_t i = 0; i < total_count; ++i)
2860 if (core_mask[i] && mask[i]) temp_indices[count_++] = i;
2861 } else {
2862 for (size_t i = 0; i < total_count; ++i)
2863 if (core_mask[i]) temp_indices[count_++] = i;
2864 }
2865
2866 *indices = xrealloc(temp_indices, count_ * sizeof(**indices));
2867 *count = count_;
2868}
2869
2871 struct dist_grid * dist_grid, enum yac_location location) {
2872
2873 YAC_ASSERT(
2874 (location == CELL) || (location == CORNER) || (location == EDGE),
2875 "ERROR(yac_dist_grid_get_field_data): invalid location")
2876
2877 switch (location) {
2878 default:
2879 case(CELL): return &(dist_grid->field_data.cell);
2880 case(CORNER): return &(dist_grid->field_data.vertex);
2881 case(EDGE): return &(dist_grid->field_data.edge);
2882 };
2883}
2884
2886 struct dist_grid * dist_grid, struct interp_field field) {
2887
2888 if (field.masks_idx == SIZE_MAX) return NULL;
2889
2890 struct yac_field_data * data =
2892
2893 YAC_ASSERT(
2894 field.masks_idx < data->masks_count,
2895 "ERROR(yac_dist_grid_get_field_mask): invalid mask_idx")
2896
2897 return data->masks[field.masks_idx];
2898}
2899
2901 struct dist_grid * dist_grid, struct interp_field field) {
2902
2903 coordinate_pointer coords = NULL;
2904
2905 if (field.coordinates_idx != SIZE_MAX) {
2906
2907 struct yac_field_data * data =
2909
2910 YAC_ASSERT(
2911 field.coordinates_idx < data->coordinates_count,
2912 "ERROR(yac_dist_grid_get_field_coords): invalid coord_idx")
2913
2914 coords = data->coordinates[field.coordinates_idx];
2915 }
2916
2917 // if no field coordinates are defined, but the location is at the corners of
2918 // of the grid cells, return coordinates of them
2919 if ((coords == NULL) && (field.location == CORNER))
2920 coords = dist_grid->vertex_coordinates;
2921
2922 return coords;
2923}
2924
2926 struct dist_grid * dist_grid, struct interp_field field) {
2927
2928 size_t total_count = yac_dist_grid_get_count(dist_grid, field.location);
2929
2930 int const * mask = yac_dist_grid_get_field_mask(dist_grid, field);
2931 if (mask == NULL)
2933
2934 int const * core_mask =
2936
2937 size_t unmasked_local_count = 0;
2938 for (size_t i = 0; i < total_count; ++i)
2939 if (core_mask[i] && mask[i]) ++unmasked_local_count;
2940 return unmasked_local_count;
2941}
2942
2944 struct remote_point_infos * point_infos, size_t count) {
2945
2946 for (size_t i = 0; i < count; ++i)
2947 if (point_infos[i].count > 1) free(point_infos[i].data.multi);
2948 free(point_infos);
2949}
2950
2951static void yac_dist_grid_free(struct dist_grid grid) {
2952
2953 free(grid.vertex_coordinates);
2954 free(grid.cell_ids);
2955 free(grid.vertex_ids);
2956 free(grid.edge_ids);
2957 free(grid.num_vertices_per_cell);
2958 free(grid.cell_to_vertex);
2959 free(grid.cell_to_vertex_offsets);
2960 free(grid.cell_to_edge);
2961 free(grid.cell_bnd_circles);
2962 free(grid.edge_type);
2963 free(grid.edge_to_vertex);
2964 free(grid.core_cell_mask);
2965 free(grid.core_vertex_mask);
2966 free(grid.core_edge_mask);
2967 free(grid.sorted_cell_ids);
2968 free(grid.sorted_vertex_ids);
2969 free(grid.sorted_edge_ids);
2970 free(grid.sorted_cell_reorder_idx);
2971 free(grid.sorted_vertex_reorder_idx);
2972 free(grid.sorted_edge_reorder_idx);
2977}
2978
2980
2981 if (grid_pair == NULL) return;
2982 yac_mpi_call(MPI_Comm_free(&(grid_pair->comm)), MPI_COMM_WORLD);
2984 for (int i = 0; i < 2; ++i) {
2985 yac_dist_grid_free(grid_pair->dist_grid[i]);
2988 }
2989 free(grid_pair);
2990}
2991
2992static int coord_in_cell(double coord[3], struct dist_grid * grid,
2993 size_t cell_idx, struct grid_cell * buffer_cell) {
2994
2996 (struct const_basic_grid_data *)grid, cell_idx, buffer_cell);
2997
2998 return
2999 yac_point_in_cell2(coord, *buffer_cell, grid->cell_bnd_circles[cell_idx]);
3000}
3001
3002static int coord_in_cell_gc(double coord[3], struct dist_grid * grid,
3003 size_t cell_idx, struct grid_cell * buffer_cell) {
3004
3006 (struct const_basic_grid_data *)grid, cell_idx, buffer_cell);
3007 for (size_t i = 0; i < buffer_cell->num_corners; ++i)
3008 buffer_cell->edge_type[i] = GREAT_CIRCLE_EDGE;
3009
3010 return
3011 yac_point_in_cell2(coord, *buffer_cell, grid->cell_bnd_circles[cell_idx]);
3012}
3013
3015 struct const_basic_grid_data * grid_data, size_t cell_idx,
3016 struct grid_cell * buffer_cell) {
3017
3018 size_t num_vertices = (size_t)(grid_data->num_vertices_per_cell[cell_idx]);
3019
3020 struct grid_cell cell = *buffer_cell;
3021
3022 if (cell.array_size < num_vertices) {
3023 cell.coordinates_xyz =
3024 xrealloc(cell.coordinates_xyz, num_vertices *
3025 sizeof(*(cell.coordinates_xyz)));
3026 cell.edge_type = xrealloc(cell.edge_type, num_vertices *
3027 sizeof(*(cell.edge_type)));
3028 cell.array_size = num_vertices;
3029 *buffer_cell = cell;
3030 }
3031
3032 for (size_t i = 0; i < num_vertices; ++i) {
3033 size_t vertex_idx =
3034 grid_data->cell_to_vertex[
3035 grid_data->cell_to_vertex_offsets[cell_idx] + i];
3036 cell.coordinates_xyz[i][0] = grid_data->vertex_coordinates[vertex_idx][0];
3037 cell.coordinates_xyz[i][1] = grid_data->vertex_coordinates[vertex_idx][1];
3038 cell.coordinates_xyz[i][2] = grid_data->vertex_coordinates[vertex_idx][2];
3039 size_t edge_idx =
3040 grid_data->cell_to_edge[grid_data->cell_to_edge_offsets[cell_idx] + i];
3041 cell.edge_type[i] = grid_data->edge_type[edge_idx];
3042 }
3043 buffer_cell->num_corners = num_vertices;
3044}
3045
3047 struct dist_grid_pair * grid_pair, int grid_id) {
3048
3049 struct bnd_sphere_part_search * search = NULL;
3050
3051 for (int i = 0; (i < 2) && (search == NULL); ++i)
3052 if (grid_id == grid_pair->grid_ids[i])
3053 search = grid_pair->cell_sphere_part[i];
3054 YAC_ASSERT(
3055 search != NULL,
3056 "ERROR(yac_dist_grid_pair_get_cell_sphere_part): invalid grid_id")
3057 return search;
3058}
3059
3061 struct dist_grid_pair * grid_pair, int grid_id,
3062 coordinate_pointer search_coords, size_t count, size_t * cells,
3063 int (*coord_in_cell)(
3064 double coord[3], struct dist_grid * grid, size_t cell_idx,
3065 struct grid_cell * buffer_cell)) {
3066
3067 struct bnd_sphere_part_search * cell_sphere_part =
3068 dist_grid_pair_get_cell_sphere_part(grid_pair, grid_id);
3069 struct dist_grid * grid =
3070 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_id);
3071
3072 size_t * temp_cells;
3073 size_t * num_cells_per_coord =
3074 xmalloc(count * sizeof(*num_cells_per_coord));
3075
3076 // search for all matching source cells
3078 cell_sphere_part, search_coords, count, &temp_cells,
3079 num_cells_per_coord);
3080
3081 struct grid_cell buffer_cell;
3082 yac_init_grid_cell(&buffer_cell);
3083
3084 // if we have multiple source cells for a single search coordinate, get the
3085 // source cell with the lowest global id
3086 for (size_t i = 0, k = 0; i < count; ++i) {
3087 size_t curr_num_cells = num_cells_per_coord[i];
3088 if (curr_num_cells == 0) {
3089 cells[i] = SIZE_MAX;
3090 } else if (curr_num_cells == 1) {
3091 if (coord_in_cell(
3092 search_coords[i], grid, temp_cells[k], &buffer_cell))
3093 cells[i] = temp_cells[k];
3094 else
3095 cells[i] = SIZE_MAX;
3096 ++k;
3097 } else {
3098 size_t cell_idx = SIZE_MAX;
3099 yac_int cell_id = XT_INT_MAX;
3100 for (size_t j = 0; j < curr_num_cells; ++j, ++k) {
3101 size_t curr_cell_idx = temp_cells[k];
3102 yac_int curr_cell_id = grid->cell_ids[curr_cell_idx];
3103 if (!coord_in_cell(
3104 search_coords[i], grid, curr_cell_idx, &buffer_cell))
3105 continue;
3106 if (curr_cell_id < cell_id) {
3107 cell_idx = curr_cell_idx;
3108 cell_id = curr_cell_id;
3109 }
3110 }
3111 cells[i] = cell_idx;
3112 }
3113 }
3114
3115 yac_free_grid_cell(&buffer_cell);
3116 free(num_cells_per_coord);
3117 free(temp_cells);
3118}
3119
3121 struct dist_grid * grid, enum yac_location location,
3122 struct single_remote_point_reorder * ids, size_t * count, size_t * idx) {
3123
3124 yac_int * sorted_ids;
3125 size_t * reorder_idx;
3126 size_t num_ids;
3127
3128 YAC_ASSERT(
3129 (location == CELL) || (location == CORNER) || (location == EDGE),
3130 "ERROR(lookup_single_remote_point_reorder_locally): invalid location")
3131
3132 // get required grid data
3133 switch (location) {
3134 default:
3135 case (CELL):
3136 sorted_ids = grid->sorted_cell_ids;
3137 reorder_idx = grid->sorted_cell_reorder_idx;
3138 num_ids = grid->num_total_cells;
3139 break;
3140 case (CORNER):
3141 sorted_ids = grid->sorted_vertex_ids;
3142 reorder_idx = grid->sorted_vertex_reorder_idx;
3143 num_ids = grid->num_total_vertices;
3144 break;
3145 case (EDGE):
3146 sorted_ids = grid->sorted_edge_ids;
3147 reorder_idx = grid->sorted_edge_reorder_idx;
3148 num_ids = grid->num_total_edges;
3149 break;
3150 };
3151
3152 size_t count_ = *count;
3153 size_t new_count = 0;
3154
3155 // sort ids by global ids
3156 qsort(ids, count_, sizeof(*ids),
3158
3159 for (size_t i = 0, j = 0; i < count_; ++i) {
3160 yac_int curr_id = ids[i].data.global_id;
3161 while ((j < num_ids) && (sorted_ids[j] < curr_id)) ++j;
3162 if ((j < num_ids) && (sorted_ids[j] == curr_id)) {
3163 idx[ids[i].reorder_idx] = reorder_idx[j];
3164 } else {
3165 if (i != new_count) ids[new_count] = ids[i];
3166 ++new_count;
3167 }
3168 }
3169
3170 *count = new_count;
3171}
3172
3174 struct yac_field_data field_data, MPI_Comm comm) {
3175
3176 int pack_size_field_coord, pack_size_field_mask;
3177
3179 MPI_Pack_size(3, MPI_DOUBLE, comm, &pack_size_field_coord), comm);
3180 pack_size_field_coord *= (int)field_data.coordinates_count;
3181
3183 MPI_Pack_size(1, MPI_INT, comm, &pack_size_field_mask), comm);
3184 pack_size_field_mask *= (int)field_data.masks_count;
3185
3186 return pack_size_field_coord + pack_size_field_mask;
3187}
3188
3190 struct yac_field_data cell_field_data,
3191 MPI_Datatype bnd_circle_dt, MPI_Comm comm) {
3192
3193 int pack_size_id,
3194 pack_size_num_vertices,
3195 pack_size_bnd_circle;
3196
3197 // id
3198 yac_mpi_call(MPI_Pack_size(1, Xt_int_dt, comm, &pack_size_id), comm);
3199 // num_vertices
3200 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_num_vertices), comm);
3201 // bounding circle
3203 MPI_Pack_size(1, bnd_circle_dt, comm, &pack_size_bnd_circle), comm);
3204
3205 return pack_size_id + pack_size_num_vertices + pack_size_bnd_circle +
3206 get_pack_size_field_data(cell_field_data, comm);
3207}
3208
3210 struct yac_field_data vertex_field_data, MPI_Comm comm) {
3211
3212 int pack_size_id,
3213 pack_size_vertex_coords;
3214 // id
3215 yac_mpi_call(MPI_Pack_size(1, Xt_int_dt, comm, &pack_size_id), comm);
3216 // vertex coordinates
3218 MPI_Pack_size(3, MPI_DOUBLE, comm, &pack_size_vertex_coords), comm);
3219
3220 return pack_size_id + pack_size_vertex_coords +
3221 get_pack_size_field_data(vertex_field_data, comm);
3222}
3223
3225 struct yac_field_data edge_field_data, MPI_Comm comm) {
3226
3227 int pack_size_id,
3228 pack_size_edge_to_vertex,
3229 pack_size_edge_type;
3230 // id
3231 yac_mpi_call(MPI_Pack_size(1, Xt_int_dt, comm, &pack_size_id), comm);
3232 // edge type
3233 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_edge_type), comm);
3234 // edge vertex ids
3236 MPI_Pack_size(2, Xt_int_dt, comm, &pack_size_edge_to_vertex), comm);
3237
3238 return pack_size_id + pack_size_edge_type + pack_size_edge_to_vertex +
3239 get_pack_size_field_data(edge_field_data, comm);
3240}
3241
3243 struct dist_grid * grid, uint64_t * pos, size_t count, int * pack_sizes,
3244 MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
3245
3246 int pack_size_base_cell =
3247 get_pack_size_base_cell(grid->field_data.cell, bnd_circle_dt, comm);
3248 int pack_size_base_vertex =
3249 get_pack_size_base_vertex(grid->field_data.vertex, comm);
3250 int pack_size_base_edge =
3252
3253 for (size_t i = 0; i < count; ++i) {
3254 size_t idx = (size_t)(pos[i]);
3255 int num_vertices = grid->num_vertices_per_cell[idx];
3256 size_t * curr_vertices =
3257 grid->cell_to_vertex + grid->cell_to_vertex_offsets[idx];
3258 size_t * curr_edges =
3259 grid->cell_to_edge + grid->cell_to_edge_offsets[idx];
3260 int pack_size =
3261 pack_size_base_cell +
3262 num_vertices * (pack_size_base_vertex + pack_size_base_edge) +
3264 grid->cell_owners + idx, point_info_dt, comm);
3265 for (int j = 0; j < num_vertices; ++j) {
3266 pack_size +=
3268 grid->vertex_owners + curr_vertices[j], point_info_dt, comm) +
3270 grid->edge_owners + curr_edges[j], point_info_dt, comm);
3271 }
3272 pack_sizes[i] = pack_size;
3273 }
3274}
3275
3277 struct dist_grid * grid, uint64_t * pos, size_t count, int * pack_sizes,
3278 MPI_Datatype point_info_dt, MPI_Comm comm) {
3279
3280 int pack_size_base_vertex =
3281 get_pack_size_base_vertex(grid->field_data.vertex, comm);
3282 for (size_t i = 0; i < count; ++i)
3283 pack_sizes[i] =
3284 pack_size_base_vertex +
3286 grid->vertex_owners + pos[i], point_info_dt, comm);
3287}
3288
3290 struct dist_grid * grid, uint64_t * pos, size_t count, int * pack_sizes,
3291 MPI_Datatype point_info_dt, MPI_Comm comm) {
3292
3293 int pack_size_base_vertex =
3294 get_pack_size_base_vertex(grid->field_data.vertex, comm);
3295 int pack_size_base_edge =
3297 for (size_t i = 0; i < count; ++i) {
3298 size_t * curr_vertices = grid->edge_to_vertex[pos[i]];
3299 pack_sizes[i] =
3300 pack_size_base_edge +
3301 2 * pack_size_base_vertex +
3303 grid->edge_owners + pos[i], point_info_dt, comm) +
3305 grid->vertex_owners + curr_vertices[0], point_info_dt, comm) +
3307 grid->vertex_owners + curr_vertices[1], point_info_dt, comm);
3308 }
3309}
3310
3311static void get_pack_sizes(
3312 struct dist_grid * grid, enum yac_location location, uint64_t * pos,
3313 size_t count, int * pack_sizes, MPI_Datatype bnd_circle_dt,
3314 MPI_Datatype point_info_dt, MPI_Comm comm) {
3315
3316 YAC_ASSERT(
3317 (location == CELL) || (location == CORNER) || (location == EDGE),
3318 "ERROR(get_pack_sizes): invalid location")
3319
3320 switch(location) {
3321 default:
3322 case(CELL):
3324 grid, pos, count, pack_sizes, bnd_circle_dt, point_info_dt, comm);
3325 break;
3326 case(CORNER):
3328 grid, pos, count, pack_sizes, point_info_dt, comm);
3329 break;
3330 case(EDGE):
3332 grid, pos, count, pack_sizes, point_info_dt, comm);
3333 break;
3334 };
3335}
3336
3338 size_t idx, void * buffer, int buffer_size, int * position,
3339 struct yac_field_data field_data, MPI_Comm comm) {
3340
3341 // coordinates
3342 for (size_t i = 0; i < field_data.coordinates_count; ++i)
3344 MPI_Pack(field_data.coordinates[i][idx], 3, MPI_DOUBLE, buffer,
3345 buffer_size, position, comm), comm);
3346
3347 // masks
3348 for (size_t i = 0; i < field_data.masks_count; ++i)
3350 MPI_Pack(field_data.masks[i] + idx, 1, MPI_INT, buffer,
3351 buffer_size, position, comm), comm);
3352}
3353
3355 struct dist_grid * grid, size_t idx, void * buffer, int buffer_size,
3356 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3357 MPI_Comm comm) {
3358
3359 UNUSED(bnd_circle_dt);
3360
3361 // id
3363 MPI_Pack(grid->vertex_ids + idx, 1, Xt_int_dt, buffer, buffer_size,
3364 position, comm), comm);
3365 // vertex coordinates
3367 MPI_Pack(&(grid->vertex_coordinates[idx][0]), 3, MPI_DOUBLE, buffer,
3368 buffer_size, position, comm), comm);
3369 // vertex owner
3371 grid->vertex_owners + idx, buffer, buffer_size, position,
3372 point_info_dt, comm);
3373 // pack field data
3375 idx, buffer, buffer_size, position, grid->field_data.vertex, comm);
3376}
3377
3379 struct dist_grid * grid, size_t idx, void * buffer, int buffer_size,
3380 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3381 MPI_Comm comm) {
3382
3383 UNUSED(bnd_circle_dt);
3384
3385 int edge_type = (int)(grid->edge_type[idx]);
3386
3387 // id
3389 MPI_Pack(grid->edge_ids + idx, 1, Xt_int_dt, buffer, buffer_size, position,
3390 comm), comm);
3391 // edge type
3393 MPI_Pack(
3394 &edge_type, 1, MPI_INT, buffer, buffer_size, position, comm), comm);
3395 // edge to vertex
3396 yac_int edge_to_vertex[2] = {
3397 grid->vertex_ids[grid->edge_to_vertex[idx][0]],
3398 grid->vertex_ids[grid->edge_to_vertex[idx][1]]};
3400 MPI_Pack(
3401 edge_to_vertex, 2, Xt_int_dt, buffer, buffer_size, position, comm), comm);
3402 // edge owner
3404 grid->edge_owners + idx, buffer, buffer_size, position,
3405 point_info_dt, comm);
3406 // pack field data
3408 idx, buffer, buffer_size, position, grid->field_data.edge, comm);
3409}
3410
3412 struct dist_grid * grid, size_t idx, void * buffer, int buffer_size,
3413 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3414 MPI_Comm comm) {
3415
3417 grid, idx, buffer, buffer_size, position,
3418 bnd_circle_dt, point_info_dt, comm);
3419
3420 // pack edge vertices
3421 for (int i = 0; i < 2; ++i)
3423 grid, grid->edge_to_vertex[idx][i],
3424 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3425}
3426
3428 struct dist_grid * grid, size_t idx, void * buffer, int buffer_size,
3429 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3430 MPI_Comm comm) {
3431
3432 int num_vertices = grid->num_vertices_per_cell[idx];
3433
3434 // id
3436 MPI_Pack(grid->cell_ids + idx, 1, Xt_int_dt, buffer, buffer_size, position,
3437 comm), comm);
3438 // pack field data
3440 idx, buffer, buffer_size, position, grid->field_data.cell, comm);
3441 // num_vertices
3443 MPI_Pack(&num_vertices, 1, MPI_INT, buffer,
3444 buffer_size, position, comm), comm);
3445 // bounding_circle
3447 MPI_Pack(grid->cell_bnd_circles + idx, 1, bnd_circle_dt, buffer,
3448 buffer_size, position, comm), comm);
3449 // cell owner
3451 grid->cell_owners + idx, buffer, buffer_size, position,
3452 point_info_dt, comm);
3453
3454 for (int i = 0; i < num_vertices; ++i) {
3456 grid, grid->cell_to_vertex[grid->cell_to_vertex_offsets[idx] + i],
3457 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3459 grid, grid->cell_to_edge[grid->cell_to_edge_offsets[idx] + i],
3460 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3461 }
3462}
3463
3464static void pack_grid_data(
3465 struct dist_grid * grid, enum yac_location location, uint64_t * pos,
3466 size_t count, void ** pack_data, int * pack_sizes,
3467 MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
3468
3469 get_pack_sizes(grid, location, pos, count, pack_sizes,
3470 bnd_circle_dt, point_info_dt, comm);
3471
3472 size_t pack_size = 0;
3473 for (size_t i = 0; i < count; ++i) pack_size += (size_t)(pack_sizes[i]);
3474
3475 void * pack_data_ = xmalloc(pack_size);
3476
3477 void (*func_pack)(
3478 struct dist_grid * grid, size_t idx, void * buffer, int buffer_size,
3479 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3480 MPI_Comm comm);
3481
3482 YAC_ASSERT(
3483 (location == CELL) || (location == CORNER) || (location == EDGE),
3484 "ERROR(pack_grid_data): invalid location")
3485
3486 switch(location) {
3487 default:
3488 case(CELL):
3489 func_pack = pack_grid_data_cell;
3490 break;
3491 case(CORNER):
3492 func_pack = pack_grid_data_vertex;
3493 break;
3494 case(EDGE):
3495 func_pack = pack_grid_data_edge;
3496 break;
3497 };
3498
3499 for (size_t i = 0, offset = 0; i < count; ++i) {
3500 int position = 0;
3501 func_pack(grid, pos[i], (char*)pack_data_ + offset, pack_sizes[i],
3502 &position, bnd_circle_dt, point_info_dt, comm);
3503 pack_sizes[i] = position;
3504 offset += (size_t)position;
3505 }
3506
3507 *pack_data = pack_data_;
3508}
3509
3511 void * buffer, int buffer_size, int * position, size_t idx,
3512 struct temp_field_data temp_field_data, MPI_Comm comm) {
3513
3514 for (size_t i = 0; i < temp_field_data.coordinates_count; ++i)
3516 MPI_Unpack(
3517 buffer, buffer_size, position, temp_field_data.coordinates[i][idx],
3518 3, MPI_DOUBLE, comm), comm);
3519
3520 for (size_t i = 0; i < temp_field_data.masks_count; ++i)
3522 MPI_Unpack(buffer, buffer_size, position,
3523 temp_field_data.masks[i] + idx, 1, MPI_INT, comm), comm);
3524}
3525
3527 struct global_vertex_reorder * vertex, size_t idx, void * buffer,
3528 int buffer_size, int * position,
3529 struct temp_field_data temp_vertex_field_data,
3530 MPI_Datatype point_info_dt, MPI_Comm comm) {
3531
3532 // id
3534 MPI_Unpack(buffer, buffer_size, position, &(vertex[idx].global_id), 1,
3535 Xt_int_dt, comm), comm);
3536 // vertex coordinates
3538 MPI_Unpack(buffer, buffer_size, position, &(vertex[idx].coord[0]), 3,
3539 MPI_DOUBLE, comm), comm);
3540 // vertex owners
3542 buffer, buffer_size, position, &(vertex[idx].owners), point_info_dt, comm);
3543 // unpack field data
3545 buffer, buffer_size, position, idx, temp_vertex_field_data, comm);
3546}
3547
3549 struct global_edge_reorder * edge, size_t idx, void * buffer,
3550 int buffer_size, int * position,
3551 struct temp_field_data temp_edge_field_data,
3552 MPI_Datatype point_info_dt, MPI_Comm comm) {
3553
3554 int edge_type;
3555
3556 // id
3558 MPI_Unpack(buffer, buffer_size, position, &(edge[idx].global_id), 1,
3559 Xt_int_dt, comm), comm);
3560 // edge type
3562 MPI_Unpack(buffer, buffer_size, position, &edge_type, 1,
3563 MPI_INT, comm), comm);
3564 edge[idx].edge_type = (enum yac_edge_type)edge_type;
3565 // edge to vertex
3567 MPI_Unpack(buffer, buffer_size, position, edge[idx].edge_to_vertex, 2,
3568 Xt_int_dt, comm), comm);
3569 // edge owners
3570 yac_remote_point_infos_unpack(buffer, buffer_size, position,
3571 &(edge[idx].owners), point_info_dt, comm);
3572 // unpack field data
3574 buffer, buffer_size, position, idx, temp_edge_field_data, comm);
3575}
3576
3578 const void * a, const void * b) {
3579
3580 return (((const struct global_vertex_reorder *)a)->global_id >
3581 ((const struct global_vertex_reorder *)b)->global_id) -
3582 (((const struct global_vertex_reorder *)a)->global_id <
3583 ((const struct global_vertex_reorder *)b)->global_id);
3584}
3585
3586static void add_field_data(
3587 struct yac_field_data * field_data, struct temp_field_data temp_field_data,
3588 void * reorder_idx, size_t reorder_idx_size,
3589 size_t old_count, size_t new_count) {
3590
3591 size_t add_count = new_count - old_count;
3592
3593 for (size_t i = 0; i < temp_field_data.masks_count; ++i) {
3594 int * temp_mask = temp_field_data.masks[i];
3595 int * mask =
3596 ((field_data->masks[i] =
3597 xrealloc(field_data->masks[i], new_count * sizeof(*mask))));
3598 for (size_t i = 0, j = old_count; i < add_count; ++i, ++j) {
3599 size_t idx =
3600 *(size_t*)((unsigned char*)reorder_idx + i * reorder_idx_size);
3601 mask[j] = temp_mask[idx];
3602 }
3603 }
3604
3605 for (size_t i = 0; i < temp_field_data.coordinates_count; ++i) {
3606 coordinate_pointer temp_coordinates = temp_field_data.coordinates[i];
3607 coordinate_pointer coordinates =
3608 ((field_data->coordinates[i] =
3609 xrealloc(
3610 field_data->coordinates[i], new_count * sizeof(*coordinates))));
3611 for (size_t i = 0, j = old_count; i < add_count; ++i, ++j) {
3612 size_t idx =
3613 *(size_t*)((unsigned char*)reorder_idx + i * reorder_idx_size);
3614 coordinates[j][0] = temp_coordinates[idx][0];
3615 coordinates[j][1] = temp_coordinates[idx][1];
3616 coordinates[j][2] = temp_coordinates[idx][2];
3617 }
3618 }
3619}
3620
3622 struct remote_point_infos * point_infos) {
3623
3624 if (point_infos->count > 1) free(point_infos->data.multi);
3625}
3626
3628 struct dist_grid * grid, struct global_vertex_reorder * vertices,
3629 size_t count, size_t * idx,
3630 struct temp_field_data temp_vertex_field_data) {
3631
3632 if (count == 0) return;
3633
3634 // sort vertices global ids
3635 qsort(vertices, count, sizeof(*vertices),
3637
3638 yac_int * sorted_vertex_ids = grid->sorted_vertex_ids;
3639 size_t * sorted_vertex_reorder_idx = grid->sorted_vertex_reorder_idx;
3640
3641 yac_int prev_global_id = vertices[0].global_id - 1;
3642 size_t prev_idx = 0;
3643 size_t add_count = 0;
3644 size_t num_total_vertices = grid->num_total_vertices;
3645
3646 // determine which vertices need to be added to local data
3647 for (size_t i = 0, j = 0; i < count; ++i) {
3648
3649 yac_int curr_global_id = vertices[i].global_id;
3650 size_t curr_reorder_idx = vertices[i].reorder_idx;
3651
3652 // if the current global id is a duplicate
3653 if (prev_global_id == curr_global_id) {
3654 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3656 continue;
3657 }
3658 prev_global_id = curr_global_id;
3659
3660 // check whether the current global id is already part of the local
3661 // grid data
3662 while ((j < num_total_vertices) && (sorted_vertex_ids[j] < curr_global_id))
3663 ++j;
3664
3665 // if we found a match in the local data
3666 if ((j < num_total_vertices) && (sorted_vertex_ids[j] == curr_global_id)) {
3667
3668 if (idx != NULL) idx[curr_reorder_idx] = sorted_vertex_reorder_idx[j];
3669 prev_idx = sorted_vertex_reorder_idx[j];
3671
3672 // if we need to add the current vertex to the local data
3673 } else {
3674
3675 if (idx != NULL) idx[curr_reorder_idx] = num_total_vertices + add_count;
3676 prev_idx = num_total_vertices + add_count;
3677 if (add_count != i) vertices[add_count] = vertices[i];
3678 ++add_count;
3679 }
3680 }
3681
3682 size_t new_num_total_vertices = num_total_vertices + add_count;
3683 coordinate_pointer vertex_coordinates =
3684 xrealloc(grid->vertex_coordinates, new_num_total_vertices *
3685 sizeof(*vertex_coordinates));
3686 yac_int * vertex_ids =
3687 xrealloc(grid->vertex_ids, new_num_total_vertices * sizeof(*vertex_ids));
3688 int * core_vertex_mask =
3689 xrealloc(grid->core_vertex_mask, new_num_total_vertices *
3690 sizeof(*core_vertex_mask));
3691 struct remote_point_infos * vertex_owners =
3692 xrealloc(grid->vertex_owners, new_num_total_vertices *
3693 sizeof(*vertex_owners));
3694 sorted_vertex_ids =
3695 xrealloc(
3696 sorted_vertex_ids, new_num_total_vertices * sizeof(*sorted_vertex_ids));
3697 sorted_vertex_reorder_idx =
3698 xrealloc(
3699 sorted_vertex_reorder_idx, new_num_total_vertices *
3700 sizeof(*sorted_vertex_reorder_idx));
3701
3702 // add the selected vertices to the local grid data
3703 for (size_t i = 0, j = num_total_vertices; i < add_count; ++i, ++j) {
3704
3705 vertex_coordinates[j][0] = vertices[i].coord[0];
3706 vertex_coordinates[j][1] = vertices[i].coord[1];
3707 vertex_coordinates[j][2] = vertices[i].coord[2];
3708 vertex_ids[j] = vertices[i].global_id;
3709 core_vertex_mask[j] = 0;
3710 vertex_owners[j] = vertices[i].owners;
3711 sorted_vertex_ids[j] = vertices[i].global_id;
3712 sorted_vertex_reorder_idx[j] = j;
3713 }
3714 // add field data
3716 &(grid->field_data.vertex), temp_vertex_field_data,
3717 vertices, sizeof(*vertices),
3718 num_total_vertices, new_num_total_vertices);
3720 sorted_vertex_ids, new_num_total_vertices, sorted_vertex_reorder_idx);
3721
3722 grid->vertex_coordinates = vertex_coordinates;
3723 grid->vertex_ids = vertex_ids;
3724 grid->core_vertex_mask = core_vertex_mask;
3725 grid->vertex_owners = vertex_owners;
3726 grid->sorted_vertex_ids = sorted_vertex_ids;
3727 grid->sorted_vertex_reorder_idx = sorted_vertex_reorder_idx;
3728 grid->num_total_vertices = new_num_total_vertices;
3729}
3730
3732 const void * a, const void * b) {
3733
3734 return (((const struct global_edge_reorder *)a)->global_id >
3735 ((const struct global_edge_reorder *)b)->global_id) -
3736 (((const struct global_edge_reorder *)a)->global_id <
3737 ((const struct global_edge_reorder *)b)->global_id);
3738}
3739
3741 struct dist_grid * grid, struct global_edge_reorder * edges,
3742 size_t count, size_t * idx, struct temp_field_data temp_edge_field_data) {
3743
3744 if (count == 0) return;
3745
3746 // sort edges global ids
3747 qsort(edges, count, sizeof(*edges), compare_global_edge_reorder_global_id);
3748
3749 yac_int * sorted_edge_ids = grid->sorted_edge_ids;
3750 size_t * sorted_edge_reorder_idx = grid->sorted_edge_reorder_idx;
3751
3752 yac_int prev_global_id = edges[0].global_id - 1;
3753 size_t prev_idx = 0;
3754 size_t add_count = 0;
3755 size_t num_total_edges = grid->num_total_edges;
3756
3757 // determine which edges need to be added to local data
3758 for (size_t i = 0, j = 0; i < count; ++i) {
3759
3760 yac_int curr_global_id = edges[i].global_id;
3761 size_t curr_reorder_idx = edges[i].reorder_idx;
3762
3763 // if the current global id is a duplicate
3764 if (prev_global_id == curr_global_id) {
3765 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3767 continue;
3768 }
3769 prev_global_id = curr_global_id;
3770
3771 // check whether the current global id is already part of the local
3772 // grid data
3773 while ((j < num_total_edges) && (sorted_edge_ids[j] < curr_global_id)) ++j;
3774
3775 // if we found a match in the local data
3776 if ((j < num_total_edges) && (sorted_edge_ids[j] == curr_global_id)) {
3777
3778 if (idx != NULL) idx[curr_reorder_idx] = sorted_edge_reorder_idx[j];
3779 prev_idx = sorted_edge_reorder_idx[j];
3781
3782 // if we need to add the current edge to the local data
3783 } else {
3784
3785 if (idx != NULL) idx[curr_reorder_idx] = num_total_edges + add_count;
3786 prev_idx = num_total_edges + add_count;
3787 if (add_count != i) edges[add_count] = edges[i];
3788 ++add_count;
3789 }
3790 }
3791
3792 size_t new_num_total_edges = num_total_edges + add_count;
3793 yac_int * edge_ids =
3794 xrealloc(grid->edge_ids, new_num_total_edges * sizeof(*edge_ids));
3795 enum yac_edge_type * edge_type =
3796 xrealloc(grid->edge_type, new_num_total_edges * sizeof(*edge_type));
3798 xrealloc(grid->edge_to_vertex, new_num_total_edges * sizeof(*edge_to_vertex));
3799 struct remote_point_infos * edge_owners =
3800 xrealloc(grid->edge_owners, new_num_total_edges * sizeof(*edge_owners));
3801 int * core_edge_mask =
3802 xrealloc(grid->core_edge_mask, new_num_total_edges *
3803 sizeof(*core_edge_mask));
3804 sorted_edge_ids =
3805 xrealloc(
3806 sorted_edge_ids, new_num_total_edges * sizeof(*sorted_edge_ids));
3807 sorted_edge_reorder_idx =
3808 xrealloc(
3809 sorted_edge_reorder_idx, new_num_total_edges *
3810 sizeof(*sorted_edge_reorder_idx));
3811
3812 yac_int * vertex_ids = xmalloc(2 * add_count * sizeof(*vertex_ids));
3813 size_t * reorder = xmalloc(2 * add_count * sizeof(*reorder));
3814
3815 // add the selected edges to the local grid data
3816 for (size_t i = 0, j = num_total_edges; i < add_count; ++i, ++j) {
3817
3818 edge_ids[j] = edges[i].global_id;
3819 edge_type[j] = edges[i].edge_type;
3820 core_edge_mask[j] = 0;
3821 edge_owners[j] = edges[i].owners;
3822 sorted_edge_ids[j] = edges[i].global_id;
3823 sorted_edge_reorder_idx[j] = j;
3824
3825 vertex_ids[2 * i + 0] = edges[i].edge_to_vertex[0];
3826 vertex_ids[2 * i + 1] = edges[i].edge_to_vertex[1];
3827 reorder[2 * i + 0] = 2 * num_total_edges + 2 * i + 0;
3828 reorder[2 * i + 1] = 2 * num_total_edges + 2 * i + 1;
3829 }
3830 // add field data
3832 &(grid->field_data.edge), temp_edge_field_data, edges, sizeof(*edges),
3833 num_total_edges, new_num_total_edges);
3835 sorted_edge_ids, new_num_total_edges, sorted_edge_reorder_idx);
3836
3837 { // determine vertex indices for edge_to_vertex
3838 yac_quicksort_index_yac_int_size_t(vertex_ids, 2 * add_count, reorder);
3839 yac_int * sorted_vertex_ids = grid->sorted_vertex_ids;
3840 size_t * sorted_vertex_reorder_idx = grid->sorted_vertex_reorder_idx;
3841 size_t total_num_vertices = grid->num_total_vertices;
3842 size_t * edge_to_vertex_ = (size_t*)&(edge_to_vertex[0][0]);
3843 // lookup global ids
3844 for (size_t i = 0, j = 0; i < 2 * add_count; ++i) {
3845 yac_int curr_id = vertex_ids[i];
3846 while ((j < total_num_vertices) && (sorted_vertex_ids[j] < curr_id)) ++j;
3847 YAC_ASSERT(
3848 (j < total_num_vertices) && (sorted_vertex_ids[j] == curr_id),
3849 "ERROR(yac_dist_grid_add_edges): vertex id not found")
3850 edge_to_vertex_[reorder[i]] = sorted_vertex_reorder_idx[j];
3851 }
3852 }
3853
3854 free(vertex_ids);
3855 free(reorder);
3856
3857 grid->edge_ids = edge_ids;
3858 grid->edge_type = edge_type;
3859 grid->edge_to_vertex = edge_to_vertex;
3860 grid->edge_owners = edge_owners;
3861 grid->core_edge_mask = core_edge_mask;
3862 grid->sorted_edge_ids = sorted_edge_ids;
3863 grid->sorted_edge_reorder_idx = sorted_edge_reorder_idx;
3864 grid->num_total_edges = new_num_total_edges;
3865}
3866
3868 struct dist_grid * grid, yac_int * cell_ids, int * num_vertices_per_cell,
3869 struct bounding_circle * cell_bnd_circles, size_t count,
3870 size_t * cell_to_vertex, size_t * cell_to_edge,
3871 struct remote_point_infos * cell_owners,
3872 struct temp_field_data temp_cell_field_data) {
3873
3874 if (count == 0) return;
3875
3876 size_t * reorder_idx = xmalloc(count * sizeof(reorder_idx));
3877 for (size_t i = 0; i < count; ++i) reorder_idx[i] = i;
3878
3879 size_t * prescan = xmalloc(count * sizeof(*prescan));
3880 for (size_t i = 0, accu = 0; i < count;
3881 accu += (size_t)(num_vertices_per_cell[i++])) prescan[i] = accu;
3882
3883 // sort cells global ids
3884 yac_quicksort_index_yac_int_size_t(cell_ids, count, reorder_idx);
3885
3886 yac_int * sorted_cell_ids = grid->sorted_cell_ids;
3887 size_t * sorted_cell_reorder_idx = grid->sorted_cell_reorder_idx;
3888
3889 yac_int prev_global_id = cell_ids[0] - 1;
3890 size_t cell_add_count = 0;
3891 size_t relations_add_count = 0;
3892 size_t num_total_cells = grid->num_total_cells;
3893
3894 // determine which cells need to be added to local data
3895 for (size_t i = 0, j = 0; i < count; ++i) {
3896
3897 yac_int curr_global_id = cell_ids[i];
3898 size_t curr_reorder_idx = reorder_idx[i];
3899
3900 // if the current global id is a duplicate
3901 if (prev_global_id == curr_global_id) {
3902 yac_remote_point_infos_single_free(cell_owners + curr_reorder_idx);
3903 continue;
3904 }
3905 prev_global_id = curr_global_id;
3906
3907 // check whether the current global id is already part of the local
3908 // grid data
3909 while ((j < num_total_cells) && (sorted_cell_ids[j] < curr_global_id)) ++j;
3910
3911 // if we did not find a match in the local data
3912 if ((j >= num_total_cells) || (sorted_cell_ids[j] != curr_global_id)) {
3913
3914 if (cell_add_count != i) {
3915 cell_ids[cell_add_count] = curr_global_id;
3916 reorder_idx[cell_add_count] = curr_reorder_idx;
3917 }
3918 ++cell_add_count;
3919 relations_add_count += (size_t)(num_vertices_per_cell[i]);
3920 }
3921 }
3922
3923 size_t new_num_total_cells = num_total_cells + cell_add_count;
3924 size_t num_total_relations =
3925 (num_total_cells > 0)?
3926 (grid->cell_to_vertex_offsets[num_total_cells-1] +
3927 (size_t)(grid->num_vertices_per_cell[num_total_cells-1])):0;
3928 size_t new_num_total_relations = num_total_relations + relations_add_count;
3929 yac_int * new_cell_ids =
3930 xrealloc(grid->cell_ids, new_num_total_cells * sizeof(*new_cell_ids));
3931 int * new_num_vertices_per_cell =
3932 xrealloc(grid->num_vertices_per_cell, new_num_total_cells *
3933 sizeof(*new_num_vertices_per_cell));
3934 size_t * new_cell_to_vertex =
3935 xrealloc(grid->cell_to_vertex, new_num_total_relations *
3936 sizeof(*new_cell_to_vertex));
3937 size_t * cell_to_vertex_offsets =
3938 xrealloc(grid->cell_to_vertex_offsets, new_num_total_cells *
3939 sizeof(*cell_to_vertex_offsets));
3940 size_t * new_cell_to_edge =
3941 xrealloc(grid->cell_to_edge, new_num_total_relations *
3942 sizeof(*new_cell_to_edge));
3943 struct bounding_circle * new_cell_bnd_circles =
3944 xrealloc(grid->cell_bnd_circles, new_num_total_cells *
3945 sizeof(*new_cell_bnd_circles));
3946 int * core_cell_mask =
3947 xrealloc(grid->core_cell_mask, new_num_total_cells * sizeof(*core_cell_mask));
3948 struct remote_point_infos * new_cell_owners =
3949 xrealloc(grid->cell_owners, new_num_total_cells * sizeof(*cell_owners));
3950 sorted_cell_ids =
3951 xrealloc(
3952 sorted_cell_ids, new_num_total_cells * sizeof(*sorted_cell_ids));
3953 sorted_cell_reorder_idx =
3954 xrealloc(
3955 sorted_cell_reorder_idx, new_num_total_cells *
3956 sizeof(*sorted_cell_reorder_idx));
3957
3958 // add the selected cells to the local grid data
3959 for (size_t i = 0, j = num_total_cells; i < cell_add_count;
3960 ++i, ++j) {
3961
3962 size_t curr_reorder_idx = reorder_idx[i];
3963 int curr_num_vertices = num_vertices_per_cell[curr_reorder_idx];
3964 size_t curr_relation_idx = prescan[curr_reorder_idx];
3965
3966 new_cell_ids[j] = cell_ids[i];
3967 new_num_vertices_per_cell[j] = curr_num_vertices;
3968 cell_to_vertex_offsets[j] = num_total_relations;
3969 for (int j = 0; j < curr_num_vertices;
3970 ++j, ++num_total_relations, ++curr_relation_idx) {
3971 new_cell_to_vertex[num_total_relations] =
3972 cell_to_vertex[curr_relation_idx];
3973 new_cell_to_edge[num_total_relations] = cell_to_edge[curr_relation_idx];
3974 }
3975 core_cell_mask[j] = 0;
3976 sorted_cell_ids[j] = cell_ids[i];
3977 sorted_cell_reorder_idx[j] = j;
3978 new_cell_bnd_circles[j] = cell_bnd_circles[curr_reorder_idx];
3979 new_cell_owners[j] = cell_owners[curr_reorder_idx];
3980 }
3981 // add field data
3983 &(grid->field_data.cell), temp_cell_field_data,
3984 reorder_idx, sizeof(*reorder_idx),
3985 num_total_cells, new_num_total_cells);
3987 sorted_cell_ids, new_num_total_cells, sorted_cell_reorder_idx);
3988
3989 grid->cell_ids = new_cell_ids;
3990 grid->num_vertices_per_cell = new_num_vertices_per_cell;
3991 grid->cell_to_vertex = new_cell_to_vertex;
3992 grid->cell_to_vertex_offsets = cell_to_vertex_offsets;
3993 grid->cell_to_edge = new_cell_to_edge;
3994 grid->cell_to_edge_offsets = cell_to_vertex_offsets;
3995 grid->core_cell_mask = core_cell_mask;
3996 grid->cell_owners = new_cell_owners;
3997 grid->sorted_cell_ids = sorted_cell_ids;
3998 grid->sorted_cell_reorder_idx = sorted_cell_reorder_idx;
3999 grid->cell_bnd_circles = new_cell_bnd_circles;
4000 grid->num_total_cells = new_num_total_cells;
4001
4002 free(prescan);
4003 free(reorder_idx);
4004}
4005
4007 struct temp_field_data * temp_field_data, size_t size) {
4008
4009 for (size_t i = 0; i < temp_field_data->masks_count; ++i)
4012 for (size_t i = 0; i < temp_field_data->coordinates_count; ++i)
4016}
4017
4019 struct yac_field_data field_data, size_t count) {
4020
4022
4024 xmalloc(field_data.masks_count * sizeof(*temp_field_data.masks));
4026 xmalloc(
4027 field_data.masks_count * sizeof(*temp_field_data.masks_array_sizes));
4028 temp_field_data.masks_count = field_data.masks_count;
4029 for (size_t i = 0; i < field_data.masks_count; ++i) {
4031 xmalloc(count * sizeof(**temp_field_data.masks));
4033 }
4034
4036 xmalloc(
4037 field_data.coordinates_count * sizeof(*temp_field_data.coordinates));
4039 xmalloc(
4040 field_data.coordinates_count *
4042 temp_field_data.coordinates_count = field_data.coordinates_count;
4043 for (size_t i = 0; i < field_data.coordinates_count; ++i) {
4045 xmalloc(count * sizeof(**temp_field_data.coordinates));
4047 }
4048
4049 return temp_field_data;
4050}
4051
4053
4054 for (size_t i = 0; i < temp_field_data.masks_count; ++i)
4055 free(temp_field_data.masks[i]);
4056 free(temp_field_data.masks);
4058 for (size_t i = 0; i < temp_field_data.coordinates_count; ++i)
4062}
4063
4065 struct dist_grid * grid, size_t count, void * buffer, int buffer_size,
4066 MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
4067
4068 yac_int * cell_ids = xmalloc(count * sizeof(*cell_ids));
4069 int * num_vertices_per_cell = xmalloc(count * sizeof(*num_vertices_per_cell));
4070 struct bounding_circle * cell_bnd_circles =
4071 xmalloc(count * sizeof(*cell_bnd_circles));
4072 struct remote_point_infos * cell_owners =
4073 xmalloc(count * sizeof(*cell_owners));
4074
4075 struct global_vertex_reorder * vertices = NULL;
4076 size_t vertices_array_size = 0;
4077 size_t total_num_vertices = 0;
4078
4079 struct global_edge_reorder * edges = NULL;
4080 size_t edges_array_size = 0;
4081
4082 struct temp_field_data temp_cell_field_data =
4083 temp_field_data_init(grid->field_data.cell, count);
4084 struct temp_field_data temp_vertex_field_data =
4085 temp_field_data_init(grid->field_data.vertex, 3 * count);
4086 struct temp_field_data temp_edge_field_data =
4087 temp_field_data_init(grid->field_data.edge, 3 * count);
4088
4089 for (size_t i = 0, buffer_offset = 0; i < count; ++i) {
4090
4091 int position = 0;
4092 void * curr_buffer = (char*)buffer + buffer_offset;
4093 int num_vertices;
4094
4095 // cell id
4097 MPI_Unpack(curr_buffer, buffer_size, &position, cell_ids + i, 1,
4098 Xt_int_dt, comm), comm);
4099 // unpack field data
4101 curr_buffer, buffer_size, &position, i, temp_cell_field_data, comm);
4102 // num vertices
4104 MPI_Unpack(curr_buffer, buffer_size, &position, &num_vertices, 1,
4105 MPI_INT, comm), comm);
4106 // bounding circle
4108 MPI_Unpack(curr_buffer, buffer_size, &position, cell_bnd_circles + i, 1,
4109 bnd_circle_dt, comm), comm);
4110 // cell owners
4112 curr_buffer, buffer_size, &position, cell_owners + i,
4113 point_info_dt, comm);
4114
4115 num_vertices_per_cell[i] = num_vertices;
4116
4118 vertices, vertices_array_size, total_num_vertices + (size_t)num_vertices);
4120 edges, edges_array_size, total_num_vertices + (size_t)num_vertices);
4122 &temp_vertex_field_data, total_num_vertices + (size_t)num_vertices);
4124 &temp_edge_field_data, total_num_vertices + (size_t)num_vertices);
4125
4126 for (int j = 0; j < num_vertices; ++j, ++total_num_vertices) {
4128 vertices, total_num_vertices, curr_buffer, buffer_size, &position,
4129 temp_vertex_field_data, point_info_dt, comm);
4131 edges, total_num_vertices, curr_buffer, buffer_size, &position,
4132 temp_edge_field_data, point_info_dt, comm);
4133 vertices[total_num_vertices].reorder_idx = total_num_vertices;
4134 edges[total_num_vertices].reorder_idx = total_num_vertices;
4135 }
4136
4137 buffer_offset += (size_t)position;
4138 buffer_size -= position;
4139 }
4140
4141 size_t * cell_to_vertex = xmalloc(total_num_vertices * sizeof(*cell_to_vertex));
4142 size_t * cell_to_edge = xmalloc(total_num_vertices * sizeof(*cell_to_edge));
4143
4145 grid, vertices, total_num_vertices, cell_to_vertex,
4146 temp_vertex_field_data);
4148 grid, edges, total_num_vertices, cell_to_edge,
4149 temp_edge_field_data);
4151 grid, cell_ids, num_vertices_per_cell, cell_bnd_circles, count,
4152 cell_to_vertex, cell_to_edge, cell_owners, temp_cell_field_data);
4153
4154 temp_field_data_free(temp_cell_field_data);
4155 temp_field_data_free(temp_vertex_field_data);
4156 temp_field_data_free(temp_edge_field_data);
4157 free(cell_to_edge);
4158 free(cell_to_vertex);
4159 free(vertices);
4160 free(edges);
4161 free(cell_owners);
4162 free(cell_bnd_circles);
4163 free(num_vertices_per_cell);
4164 free(cell_ids);
4165}
4166
4168 struct dist_grid * grid, size_t count, void * buffer, int buffer_size,
4169 MPI_Datatype point_info_dt, MPI_Comm comm) {
4170
4171 struct global_vertex_reorder * vertices = xmalloc(count * sizeof(*vertices));
4172
4173 struct temp_field_data temp_vertex_field_data =
4174 temp_field_data_init(grid->field_data.vertex, count);
4175
4176 for (size_t i = 0, buffer_offset = 0; i < count; ++i) {
4177
4178 int position = 0;
4179 void * curr_buffer = (char*)buffer + buffer_offset;
4180
4182 vertices, i, curr_buffer, buffer_size, &position,
4183 temp_vertex_field_data, point_info_dt, comm);
4184 vertices[i].reorder_idx = i;
4185
4186 buffer_offset += (size_t)position;
4187 buffer_size -= position;
4188 }
4189
4191 grid, vertices, count, NULL, temp_vertex_field_data);
4192
4193 temp_field_data_free(temp_vertex_field_data);
4194
4195 free(vertices);
4196}
4197
4199 struct dist_grid * grid, size_t count, void * buffer, int buffer_size,
4200 MPI_Datatype point_info_dt, MPI_Comm comm) {
4201
4202 struct global_edge_reorder * edges = xmalloc(count * sizeof(*edges));
4203 struct global_vertex_reorder * vertices =
4204 xmalloc(2 * count * sizeof(*vertices));
4205
4206 struct temp_field_data temp_edge_field_data =
4207 temp_field_data_init(grid->field_data.edge, count);
4208 struct temp_field_data temp_vertex_field_data =
4209 temp_field_data_init(grid->field_data.vertex, 2 * count);
4210
4211 for (size_t i = 0, buffer_offset = 0; i < count; ++i) {
4212
4213 int position = 0;
4214 void * curr_buffer = (char*)buffer + buffer_offset;
4215
4217 edges, i, curr_buffer, buffer_size, &position,
4218 temp_edge_field_data, point_info_dt, comm);
4219 edges[i].reorder_idx = i;
4220
4221 for (size_t j = 0; j < 2; ++j)
4223 vertices, 2 * i + j, curr_buffer, buffer_size, &position,
4224 temp_vertex_field_data, point_info_dt, comm);
4225
4226 buffer_offset += (size_t)position;
4227 buffer_size -= position;
4228 }
4229
4231 grid, vertices, 2 * count, NULL, temp_vertex_field_data);
4233 grid, edges, count, NULL, temp_edge_field_data);
4234
4235 temp_field_data_free(temp_vertex_field_data);
4236 temp_field_data_free(temp_edge_field_data);
4237
4238 free(vertices);
4239 free(edges);
4240}
4241
4243 struct dist_grid * grid, enum yac_location location, size_t count,
4244 void * buffer, int buffer_size, MPI_Datatype bnd_circle_dt,
4245 MPI_Datatype point_info_dt, MPI_Comm comm) {
4246
4247 YAC_ASSERT(
4248 (location == CELL) || (location == CORNER) || (location == EDGE),
4249 "ERROR(unpack_grid_data): invalid location")
4250
4251 switch(location) {
4252 default:
4253 case(CELL):
4255 grid, count, buffer, buffer_size, bnd_circle_dt, point_info_dt, comm);
4256 break;
4257 case(CORNER):
4259 grid, count, buffer, buffer_size, point_info_dt, comm);
4260 break;
4261 case(EDGE):
4263 grid, count, buffer, buffer_size, point_info_dt, comm);
4264 break;
4265 };
4266}
4267
4269 const void * a, const void * b) {
4270
4271 return ((const struct single_remote_point_reorder *)a)->data.data.rank -
4272 ((const struct single_remote_point_reorder *)b)->data.data.rank;
4273}
4274
4276 struct dist_grid * grid, struct single_remote_point * ids, size_t count,
4277 enum yac_location location, size_t * idx) {
4278
4279 MPI_Comm comm = grid->comm;
4280 int comm_rank, comm_size;
4281 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4282 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4283
4284 size_t remote_count = 0;
4285
4286 for (size_t i = 0; i < count; ++i) {
4287 if (ids[i].global_id == XT_INT_MAX) idx[i] = SIZE_MAX;
4288 else if (ids[i].data.rank != comm_rank) ++remote_count;
4289 else idx[i] = ids[i].data.orig_pos;
4290 }
4291
4292 struct single_remote_point_reorder * missing_ids =
4293 xmalloc(remote_count * sizeof(*missing_ids));
4294
4295 for (size_t i = 0, j = 0; i < count; ++i) {
4296 if ((ids[i].data.rank != comm_rank) &&
4297 (ids[i].global_id != XT_INT_MAX)) {
4298 missing_ids[j].data = ids[i];
4299 missing_ids[j].reorder_idx = i;
4300 ++j;
4301 }
4302 }
4303
4304 // check whether we already have some of the missing ids locally
4306 grid, location, missing_ids, &remote_count, idx);
4307
4308 // sort data by owner
4309 qsort(missing_ids, remote_count, sizeof(*missing_ids),
4311
4312 int * sendcounts, * recvcounts, * sdispls, * rdispls;
4314 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4315
4316 for (size_t i = 0; i < remote_count; ++i)
4317 sendcounts[missing_ids[i].data.data.rank]++;
4318
4320 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4321
4322 size_t recv_count =
4323 (size_t)(rdispls[comm_size-1]) + (size_t)(recvcounts[comm_size-1]);
4324
4325 uint64_t * uint64_t_buffer =
4326 xmalloc((remote_count + recv_count) * sizeof(*uint64_t_buffer));
4327 uint64_t * orig_pos_send_buffer = uint64_t_buffer;
4328 uint64_t * orig_pos_recv_buffer = uint64_t_buffer + remote_count;
4329
4330 // pack send buffer
4331 for (size_t i = 0; i < remote_count; ++i) {
4332 int rank = missing_ids[i].data.data.rank;
4333 if (rank != comm_rank)
4334 orig_pos_send_buffer[sdispls[rank+1]++] =
4335 (uint64_t)(missing_ids[i].data.data.orig_pos);
4336 }
4337
4338 // redistribute ids
4340 orig_pos_send_buffer, sendcounts, sdispls,
4341 orig_pos_recv_buffer, recvcounts, rdispls, comm);
4342
4343 MPI_Datatype bnd_circle_dt = yac_get_bounding_circle_mpi_datatype(comm);
4344 yac_mpi_call(MPI_Type_commit(&bnd_circle_dt), comm);
4345 MPI_Datatype point_info_dt = yac_get_remote_point_info_mpi_datatype(comm);
4346 yac_mpi_call(MPI_Type_commit(&point_info_dt), comm);
4347
4348 void * packed_send_data = NULL;
4349 int * pack_sizes = xmalloc(recv_count * sizeof(*pack_sizes));
4350
4351 // pack all requested grid data
4353 grid, location, orig_pos_recv_buffer, recv_count,
4354 &packed_send_data, pack_sizes,
4355 bnd_circle_dt, point_info_dt, comm);
4356 free(uint64_t_buffer);
4357
4358 memset(sendcounts, 0, (size_t)comm_size * sizeof(*sendcounts));
4359 for (int i = 0, k = 0; i < comm_size; ++i)
4360 for (int j = 0; j < recvcounts[i]; ++j, ++k)
4361 sendcounts[i] += pack_sizes[k];
4362
4363 free(pack_sizes);
4364
4366 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4367
4368 recv_count =
4369 (size_t)(rdispls[comm_size-1]) + (size_t)(recvcounts[comm_size-1]);
4370
4371 void * packed_recv_data = xmalloc(recv_count);
4372
4373 // redistribute packed grid data
4375 packed_send_data, sendcounts, sdispls+1,
4376 packed_recv_data, recvcounts, rdispls, comm);
4377
4378 // unpack requested grid data
4380 grid, location, remote_count, packed_recv_data, (int)recv_count,
4381 bnd_circle_dt, point_info_dt, comm);
4382
4383 yac_mpi_call(MPI_Type_free(&point_info_dt), comm);
4384 yac_mpi_call(MPI_Type_free(&bnd_circle_dt), comm);
4385
4386 // get the local ids for the remaining missing ids
4388 grid, location, missing_ids, &remote_count, idx);
4389
4390 free(missing_ids);
4391 free(packed_recv_data);
4392 free(packed_send_data);
4393 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4394}
4395
4397 struct dist_grid_pair * grid_pair, int grid_id,
4398 coordinate_pointer search_coords, size_t count, size_t * cells,
4399 int (*coord_in_cell)(
4400 double coord[3], struct dist_grid * grid, size_t cell_idx,
4401 struct grid_cell * buffer_cell)) {
4402
4403 MPI_Comm comm = grid_pair->comm;
4404 int comm_rank, comm_size;
4405 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4406 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4407
4408 int * ranks = xmalloc(count * sizeof(ranks));
4409
4410 // search for every point in the process tree
4412 grid_pair->proc_sphere_part, search_coords, count, ranks);
4413
4414 int * sendcounts, * recvcounts, * sdispls, * rdispls;
4416 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4417 for (size_t i = 0; i < count; ++i) sendcounts[ranks[i]]++;
4418
4419 size_t local_count = sendcounts[comm_rank];
4420 sendcounts[comm_rank] = 0;
4421
4423 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4424
4425 size_t remote_count = sdispls[comm_size] + sendcounts[comm_size-1];
4426 size_t request_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4427
4428 coordinate_pointer coord_buffer =
4429 xmalloc((remote_count + request_count + local_count) *
4430 sizeof(*coord_buffer));
4431 coordinate_pointer coord_send_buffer = coord_buffer + 0;
4432 coordinate_pointer coord_recv_buffer = coord_buffer + remote_count;
4433 coordinate_pointer coord_local_buffer =
4434 coord_buffer + remote_count + request_count;
4435
4436 // pack search coordinates
4437 for (size_t i = 0, k = 0; i < count; ++i) {
4438 if (ranks[i] == comm_rank) {
4439 coord_local_buffer[k][0] = search_coords[i][0];
4440 coord_local_buffer[k][1] = search_coords[i][1];
4441 coord_local_buffer[k][2] = search_coords[i][2];
4442 ++k;
4443 } else {
4444 int displ = sdispls[ranks[i]+1]++;
4445 coord_send_buffer[displ][0] = search_coords[i][0];
4446 coord_send_buffer[displ][1] = search_coords[i][1];
4447 coord_send_buffer[displ][2] = search_coords[i][2];
4448 }
4449 }
4450
4451 MPI_Datatype dt_coord;
4452 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &dt_coord), comm);
4453 yac_mpi_call(MPI_Type_commit(&dt_coord), comm);
4454
4455 // redistribute search coordinates
4457 coord_send_buffer, sendcounts, sdispls,
4458 coord_recv_buffer, recvcounts, rdispls,
4459 sizeof(*coord_send_buffer), dt_coord, comm);
4460
4461 yac_mpi_call(MPI_Type_free(&dt_coord), comm);
4462
4463 size_t * local_cells =
4464 xmalloc((request_count + local_count) * sizeof(*local_cells));
4465
4466 // do local search
4468 grid_pair, grid_id, coord_recv_buffer, request_count + local_count,
4469 local_cells, coord_in_cell);
4470
4471 struct single_remote_point * single_remote_point_buffer =
4472 xmalloc((remote_count + request_count) *
4473 sizeof(*single_remote_point_buffer));
4474 struct single_remote_point * id_send_buffer = single_remote_point_buffer;
4475 struct single_remote_point * id_recv_buffer = single_remote_point_buffer +
4476 request_count;
4477
4478 struct dist_grid * grid =
4479 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_id);
4480
4481 // pack global ids of found source cells
4482 for (size_t i = 0; i < request_count; ++i) {
4483 size_t cell_idx = local_cells[i];
4484 id_send_buffer[i].data.rank = comm_rank;
4485 if (cell_idx != SIZE_MAX) {
4486 id_send_buffer[i].global_id = grid->cell_ids[cell_idx];
4487 id_send_buffer[i].data.orig_pos = (uint64_t)cell_idx;
4488 } else {
4489 id_send_buffer[i].global_id = XT_INT_MAX;
4490 id_send_buffer[i].data.orig_pos = UINT64_MAX;
4491 }
4492 }
4493
4494 MPI_Datatype single_remote_point_dt =
4496
4497 // redistribute results (global ids of found source cells)
4499 id_send_buffer, recvcounts, rdispls, id_recv_buffer, sendcounts, sdispls,
4500 sizeof(*id_send_buffer), single_remote_point_dt, comm);
4501
4502 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4503
4504 size_t * new_local_cells =
4505 xmalloc(remote_count * sizeof(*new_local_cells));
4506
4507 // convert all remote ids to local ones, extend local dist_grid data,
4508 // if necessary
4510 grid, id_recv_buffer, remote_count, CELL, new_local_cells);
4511
4512 // extract results from local and remote search
4513 for (size_t i = 0, k = 0; i < count; ++i) {
4514 if (ranks[i] == comm_rank) {
4515 cells[i] = local_cells[request_count + k];
4516 ++k;
4517 } else {
4518 int displ = sdispls[ranks[i]]++;
4519 cells[i] = new_local_cells[displ];
4520 }
4521 }
4522
4523 free(new_local_cells);
4524 free(single_remote_point_buffer);
4525 free(local_cells);
4526 free(coord_buffer);
4527 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4528 free(ranks);
4529}
4530
4532 struct dist_grid_pair * grid_pair, int grid_id,
4533 coordinate_pointer search_coords, size_t count, size_t * cells) {
4534
4536 grid_pair, grid_id, search_coords, count, cells, coord_in_cell);
4537}
4538
4540 struct dist_grid_pair * grid_pair, int grid_id,
4541 coordinate_pointer search_coords, size_t count, size_t * cells) {
4542
4544 grid_pair, grid_id, search_coords, count, cells, coord_in_cell_gc);
4545}
4546
4548 struct dist_grid * grid, struct interp_field field) {
4549
4550 coordinate_pointer field_coords =
4551 yac_dist_grid_get_field_coords(grid, field);
4552 yac_int const * global_ids =
4554 int const * mask = yac_dist_grid_get_field_mask(grid, field);
4555 size_t total_count = yac_dist_grid_get_count(grid, field.location);
4556
4557 if (mask == NULL)
4558 return
4560 total_count, field_coords, global_ids);
4561 else
4562 return
4564 total_count, field_coords, global_ids, mask);
4565}
4566
4568 struct dist_grid * grid, struct interp_field field, int comm_rank,
4569 size_t n, struct single_remote_point * points) {
4570
4571 size_t total_count = yac_dist_grid_get_count(grid, field.location);
4572 int const * mask = yac_dist_grid_get_field_mask(grid, field);
4573 int const * core_mask =
4575 yac_int const * global_ids =
4577
4578 if (mask == NULL) {
4579
4580 for (size_t i = 0, j = 0; i < total_count; ++i) {
4581 if (core_mask[i]) {
4582 points[j].global_id = global_ids[i];
4583 points[j].data.rank = comm_rank;
4584 points[j].data.orig_pos = i;
4585 if (n == ++j) return;
4586 }
4587 }
4588
4589 } else {
4590
4591 for (size_t i = 0, j = 0; i < total_count; ++i) {
4592 if (core_mask[i] && mask[i]) {
4593 points[j].global_id = global_ids[i];
4594 points[j].data.rank = comm_rank;
4595 points[j].data.orig_pos = i;
4596 if (n == ++j) return;
4597 }
4598 }
4599 }
4600}
4601
4603 size_t n, MPI_Comm comm) {
4604
4605 struct nnn_search_result dummy;
4606 MPI_Datatype single_remote_point_dt =
4608 single_nnn_search_result_dt_,
4609 nnn_search_result_dt;
4610 int array_of_blocklengths[] = {1, 1};
4611 const MPI_Aint array_of_displacements[] =
4612 {(MPI_Aint)((void*)&(dummy.point_info) - (void*)&dummy),
4613 (MPI_Aint)((void*)&(dummy.cos_angle) - (void*)&dummy)};
4614 const MPI_Datatype array_of_types[] = {single_remote_point_dt, MPI_DOUBLE};
4616 MPI_Type_create_struct(
4617 2, array_of_blocklengths, array_of_displacements,
4618 array_of_types, &single_nnn_search_result_dt_), comm);
4619 MPI_Datatype single_nnn_search_result_dt =
4620 yac_create_resized(single_nnn_search_result_dt_, sizeof(dummy), comm);
4622 MPI_Type_contiguous(
4623 (int)n, single_nnn_search_result_dt, &nnn_search_result_dt), comm);
4624 yac_mpi_call(MPI_Type_commit(&nnn_search_result_dt), comm);
4625 yac_mpi_call(MPI_Type_free(&single_nnn_search_result_dt), comm);
4626 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4627 return nnn_search_result_dt;
4628}
4629
4631 void const * a, void const * b) {
4632
4633 struct nnn_search_result const * result_a =
4634 (struct nnn_search_result const *)a;
4635 struct nnn_search_result const * result_b =
4636 (struct nnn_search_result const *)b;
4637
4638 int ret = (result_a->cos_angle < result_b->cos_angle) -
4639 (result_a->cos_angle > result_b->cos_angle);
4640 if (ret) return ret;
4641 return (result_a->point_info.global_id > result_b->point_info.global_id) -
4642 (result_a->point_info.global_id < result_b->point_info.global_id);
4643}
4644
4646 struct nnn_search_result * array, size_t array_size,
4647 struct nnn_search_result * insert, size_t insert_size) {
4648
4649 if (insert_size == 0) return;
4650
4651 // sort arrays first by cos_angle and second by global id
4652 qsort(array, array_size, sizeof(*array),
4654 qsort(insert, insert_size, sizeof(*insert),
4656
4657 struct nnn_search_result temp_results[array_size];
4658 memcpy(temp_results, array, array_size * sizeof(*array));
4659
4660 for (size_t i = 0, j = 0, k = 0; k < array_size; ++k) {
4661
4662 int cmp =
4663 (j < insert_size)?
4665 &temp_results[i], insert + j):(-1);
4666
4667 if (cmp <= 0) {
4668 if (k != i) array[k] = temp_results[i];
4669 ++i;
4670 } else {
4671 array[k] = insert[j];
4672 }
4673
4674 j += cmp >= 0;
4675 }
4676}
4677
4679 struct dist_grid_pair * grid_pair, int grid_id,
4680 coordinate_pointer search_coords, size_t count, size_t * local_ids,
4681 size_t n, struct interp_field field) {
4682
4683 MPI_Comm comm = grid_pair->comm;
4684 int comm_rank, comm_size;
4685 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
4686 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
4687
4688 struct dist_grid * grid =
4689 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_id);
4690
4691 uint64_t unmasked_local_count =
4693
4694 uint64_t * unmasked_local_counts =
4695 xmalloc((size_t)comm_size * sizeof(*unmasked_local_counts));
4696
4697 // exchange number of local source points and target points
4699 MPI_Allgather(
4700 &unmasked_local_count, 1, MPI_UINT64_T,
4701 unmasked_local_counts, 1, MPI_UINT64_T, comm), comm);
4702
4703 // check whether there is a rank with too few source points
4704 int flag = 0;
4705 for (int i = 0; i < comm_size; ++i)
4706 flag |= unmasked_local_counts[i] < (uint64_t)n;
4707
4708 // if ranks with insufficient number of local source points
4709 if (flag) {
4710
4711 uint64_t global_num_unmasked_count = 0;
4712 for (int i = 0; i < comm_size; ++i)
4713 global_num_unmasked_count += unmasked_local_counts[i];
4714
4715 YAC_ASSERT(
4716 (size_t)global_num_unmasked_count >= n,
4717 "ERROR(yac_dist_grid_pair_do_nnn_search): insufficient number of "
4718 "unmasked points")
4719
4720 int * sendcounts, * recvcounts, * sdispls, * rdispls;
4722 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4723 memset(recvcounts, 0, (size_t)comm_size * sizeof(*recvcounts));
4724
4725 // get ranks of processes that have additional data or require data in
4726 // order to have enough source points to do a initial nnn search
4728 grid_pair->proc_sphere_part, unmasked_local_counts, (uint64_t)n,
4729 sendcounts, recvcounts, comm_rank, comm_size);
4730
4731 size_t local_send_count = (size_t)(MIN(unmasked_local_count, n));
4732
4733 for (int i = 0, raccu = 0; i < comm_size; ++i) {
4734 sdispls[i] = 0;
4735 rdispls[i] = raccu;
4736 sendcounts[i] *= (int)local_send_count;
4737 raccu += (recvcounts[i] *= (int)(MIN(unmasked_local_counts[i], n)));
4738 }
4739
4740 size_t recv_count =
4741 (size_t)(recvcounts[comm_size-1]) + (size_t)(rdispls[comm_size-1]);
4742
4743 struct single_remote_point * single_remote_point_buffer =
4744 xmalloc(
4745 (local_send_count + recv_count) * sizeof(*single_remote_point_buffer));
4746 struct single_remote_point * local_send_ids = single_remote_point_buffer;
4747 struct single_remote_point * recv_ids =
4748 single_remote_point_buffer + local_send_count;
4749
4750 // get local source points that can be sent to other processes
4752 grid, field, comm_rank, local_send_count, local_send_ids);
4753
4754 MPI_Datatype single_remote_point_dt =
4756
4757 // exchange source points (integrate points into local data)
4759 local_send_ids, sendcounts, sdispls, recv_ids, recvcounts, rdispls,
4760 sizeof(*local_send_ids), single_remote_point_dt, comm);
4761
4762 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4763
4764 size_t * dummy = xmalloc(recv_count * sizeof(*dummy));
4765
4766 // convert all remote ids to local ones, extend local dist_grid data,
4767 // if necessary
4769 grid, recv_ids, recv_count, field.location, dummy);
4770
4771 free(dummy);
4772 free(single_remote_point_buffer);
4773 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4774 }
4775 free(unmasked_local_counts);
4776
4777 struct point_sphere_part_search * sphere_part =
4779
4780 double * cos_angles = NULL;
4781 size_t cos_angles_array_size = 0;
4782 coordinate_pointer result_coords = NULL;
4783 size_t result_coords_array_size = 0;
4784 size_t * result_points = NULL;
4785 size_t result_points_array_size = 0;
4786 size_t * num_results = xcalloc(count, sizeof(*num_results));
4787
4788 // do local search
4790 sphere_part, count, search_coords, n, &cos_angles, &cos_angles_array_size,
4791 &result_coords, &result_coords_array_size, &result_points,
4792 &result_points_array_size, num_results);
4793
4794 size_t max_num_results = 0;
4795 for (size_t i = 0; i < count; ++i)
4796 if (max_num_results < num_results[i]) max_num_results = num_results[i];
4797
4798 // extract results (only the local core points)
4799 struct nnn_search_result * results = xmalloc(n * count * sizeof(*results));
4800 yac_int const * global_ids =
4802
4803 struct nnn_search_result * temp_results =
4804 xmalloc(max_num_results * sizeof(*temp_results));
4805
4806 for (size_t i = 0, k = 0; i < count; ++i) {
4807
4808 struct nnn_search_result * curr_results = results + i * n;
4809 for (size_t j = 0; j < n; ++j, ++k) {
4810 size_t curr_local_id = result_points[k];
4811 curr_results[j].point_info.global_id = global_ids[curr_local_id];
4812 curr_results[j].point_info.data.rank = comm_rank;
4813 curr_results[j].point_info.data.orig_pos = (uint64_t)curr_local_id;
4814 curr_results[j].cos_angle = cos_angles[k];
4815 }
4816
4817 for (size_t j = n; j < num_results[i]; ++j, ++k) {
4818 size_t curr_local_id = result_points[k];
4819 temp_results[j-n].point_info.global_id = global_ids[curr_local_id];
4820 temp_results[j-n].point_info.data.rank = comm_rank;
4821 temp_results[j-n].point_info.data.orig_pos = (uint64_t)curr_local_id;
4822 temp_results[j-n].cos_angle = cos_angles[k];
4823 }
4824 insert_nnn_result_points(curr_results, n, temp_results, num_results[i] - n);
4825 }
4826
4827 coordinate_pointer field_coords =
4828 yac_dist_grid_get_field_coords(grid, field);
4829 int * request_ranks = NULL;
4830 size_t request_ranks_array_size = 0;
4831 size_t num_request_ranks = 0;
4832 int * num_requests = xmalloc(count * sizeof(*num_requests));
4833
4834 // for all search points
4835 for (size_t i = 0; i < count; ++i) {
4836
4837 // generate bounding circles for all search points
4838 struct bounding_circle result_bnd_circle;
4839 memcpy(result_bnd_circle.base_vector, search_coords[i],
4840 3 * sizeof(search_coords[0][0]));
4841 result_bnd_circle.inc_angle =
4843 search_coords[i],
4844 field_coords[results[(i + 1) * n - 1].point_info.data.orig_pos]);
4845
4846 ENSURE_ARRAY_SIZE(request_ranks, request_ranks_array_size,
4847 num_request_ranks + (size_t)comm_size);
4848
4849 // search for processes that might be able to contribute to the search
4850 // results
4851 int * curr_request_ranks = request_ranks + num_request_ranks;
4853 grid_pair->proc_sphere_part, result_bnd_circle,
4854 curr_request_ranks, num_requests + i);
4855
4856 // remove requests for local process
4857 int new_num_requests = 0;
4858 for (int j = 0; j < num_requests[i]; ++j) {
4859 if (curr_request_ranks[j] == comm_rank) continue;
4860 if (new_num_requests != j)
4861 curr_request_ranks[new_num_requests] = curr_request_ranks[j];
4862 ++new_num_requests;
4863 }
4864
4865 num_request_ranks += (size_t)(num_requests[i] = new_num_requests);
4866 }
4867
4868 int * sendcounts, * recvcounts, * sdispls, * rdispls;
4870 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4871
4872 for (size_t i = 0; i < num_request_ranks; ++i) sendcounts[request_ranks[i]]++;
4873
4875 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4876 int num_src_msg = 0, num_dst_msg = 0;
4877 for (int i = 0; i < comm_size; ++i) {
4878 num_src_msg += (recvcounts[i] > 0);
4879 num_dst_msg += (sendcounts[i] > 0);
4880 }
4881
4882 size_t recv_count =
4883 (size_t)(rdispls[comm_size-1]) + (size_t)(recvcounts[comm_size-1]);
4884
4885 coordinate_pointer coord_buffer =
4886 xmalloc((num_request_ranks + recv_count) * sizeof(*coord_buffer));
4887 coordinate_pointer send_request_coords = coord_buffer;
4888 coordinate_pointer recv_request_coords = coord_buffer + num_request_ranks;
4889
4890 // pack bounding circles
4891 for (size_t i = 0, k = 0; i < count; ++i)
4892 for (size_t j = 0; j < num_requests[i]; ++j, ++k)
4893 memcpy(send_request_coords[sdispls[request_ranks[k]+1]++],
4894 search_coords[i], 3 * sizeof(double));
4895
4896 MPI_Datatype coord_dt = yac_get_coordinate_mpi_datatype(comm);
4897
4898 // exchange requests to other processes
4900 send_request_coords, sendcounts, sdispls,
4901 recv_request_coords, recvcounts, rdispls,
4902 sizeof(*send_request_coords), coord_dt, comm);
4903
4904 yac_mpi_call(MPI_Type_free(&coord_dt), comm);
4905
4906 num_results = xrealloc(num_results, recv_count * sizeof(*num_results));
4907 memset(num_results, 0, recv_count * sizeof(*num_results));
4908
4909 // do local search for requests
4911 sphere_part, recv_count, recv_request_coords, n, &cos_angles,
4912 &cos_angles_array_size, &result_coords, &result_coords_array_size,
4913 &result_points, &result_points_array_size, num_results);
4915 free(coord_buffer);
4916
4917 for (size_t i = 0; i < recv_count; ++i)
4918 if (max_num_results < num_results[i]) max_num_results = num_results[i];
4919 temp_results =
4920 xrealloc(temp_results, max_num_results * sizeof(*temp_results));
4921
4922 // extract results (only the ones, that are local core points)
4923 struct nnn_search_result * request_results =
4924 xmalloc(n * (num_request_ranks + recv_count) * sizeof(*request_results));
4925 struct nnn_search_result * send_request_results = request_results;
4926 struct nnn_search_result * recv_request_results =
4927 request_results + n * recv_count;
4928
4929 for (size_t i = 0, k = 0; i < recv_count; ++i) {
4930
4931 struct nnn_search_result * curr_results = send_request_results + i * n;
4932 for (size_t j = 0; j < n; ++j, ++k) {
4933 size_t curr_local_id = result_points[k];
4934 curr_results[j].point_info.global_id = global_ids[curr_local_id];
4935 curr_results[j].point_info.data.rank = comm_rank;
4936 curr_results[j].point_info.data.orig_pos = (uint64_t)curr_local_id;
4937 curr_results[j].cos_angle = cos_angles[k];
4938 }
4939
4940 for (size_t j = n; j < num_results[i]; ++j, ++k) {
4941 size_t curr_local_id = result_points[k];
4942 temp_results[j-n].point_info.global_id = global_ids[curr_local_id];
4943 temp_results[j-n].point_info.data.rank = comm_rank;
4944 temp_results[j-n].point_info.data.orig_pos = (uint64_t)curr_local_id;
4945 temp_results[j-n].cos_angle = cos_angles[k];
4946 }
4947 insert_nnn_result_points(curr_results, n, temp_results, num_results[i]-n);
4948 }
4949
4950 free(cos_angles);
4951 free(result_coords);
4952 free(result_points);
4953 free(num_results);
4954
4955 MPI_Datatype nnn_search_result_dt =
4957
4958 // return results
4960 send_request_results, recvcounts, rdispls,
4961 recv_request_results, sendcounts, sdispls,
4962 n * sizeof(*send_request_results), nnn_search_result_dt, comm);
4963
4964 yac_mpi_call(MPI_Type_free(&nnn_search_result_dt), comm);
4965
4966 // unpack results of remote search and integrate them into local results
4967 for (size_t i = 0, k = 0; i < count; ++i) {
4968 struct nnn_search_result * curr_results = results + n * i;
4969 for (size_t j = 0; j < num_requests[i]; ++k, ++j) {
4970 size_t pos = (size_t)(sdispls[request_ranks[k]]++);
4971 struct nnn_search_result * curr_request_results =
4972 recv_request_results + n * pos;
4973 insert_nnn_result_points(curr_results, n, curr_request_results, n);
4974 }
4975 }
4976 free(request_ranks);
4977 free(request_results);
4978 free(num_requests);
4979 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
4980
4981 size_t remote_count = 0;
4982 for (size_t i = 0; i < n * count; ++i)
4983 if (results[i].point_info.data.rank != comm_rank) ++remote_count;
4984
4985 // gather remote points from search results
4987 xmalloc(remote_count * sizeof(*remote_points));
4988 for (size_t i = 0, k = 0; i < n * count; ++i)
4989 if (results[i].point_info.data.rank != comm_rank)
4990 remote_points[k++] = results[i].point_info;
4991
4992 // convert all remote ids to local ones, extend local dist_grid data,
4993 // if necessary
4995 grid, remote_points, remote_count, field.location,
4996 local_ids + n * count - remote_count);
4997
4998 free(remote_points);
4999
5000 // gather final result
5001 for (size_t i = 0, offset = n * count - remote_count; i < n * count; ++i) {
5002 if (results[i].point_info.data.rank == comm_rank)
5003 local_ids[i] = (size_t)(results[i].point_info.data.orig_pos);
5004 else local_ids[i] = local_ids[offset++];
5005 }
5006 free(temp_results);
5007 free(results);
5008}
5009
5011 struct dist_grid_pair * grid_pair, int grid_id,
5012 const_bounding_circle_pointer bnd_circles, size_t count, size_t ** cells,
5013 size_t * num_results_per_bnd_circle, struct interp_field field) {
5014
5015 MPI_Comm comm = grid_pair->comm;
5016 int comm_rank, comm_size;
5017 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
5018 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
5019
5020 struct dist_grid * dist_grid =
5021 yac_dist_grid_pair_get_dist_grid(grid_pair, grid_id);
5022
5023 int * num_ranks = xcalloc(count, sizeof(*num_ranks));
5024 int * rank_buffer = NULL;
5025 size_t rank_buffer_size = 0;
5026 size_t rank_buffer_array_size = 0;
5027
5028 for (size_t i = 0; i < count; ++i) {
5029
5030 ENSURE_ARRAY_SIZE(rank_buffer, rank_buffer_array_size,
5031 rank_buffer_size + (size_t)comm_size);
5032
5033 // finds all processes whose core area overlaps with the bounding circle
5034 // of the current cell
5035 // beware: even if the core are of a process does not overlap with the
5036 // bounding circle, it may have core cells that overlap nevertheless
5038 grid_pair->proc_sphere_part, bnd_circles[i],
5039 rank_buffer + rank_buffer_size, num_ranks + i);
5040 rank_buffer_size += (size_t)(num_ranks[i]);
5041 }
5042
5043 int * int_buffer =
5044 xmalloc(4 * (size_t)comm_size * sizeof(*int_buffer));
5045 int * result_sendcounts = int_buffer + 0 * comm_size;
5046 int * result_recvcounts = int_buffer + 1 * comm_size;
5047 int * result_sdispls = int_buffer + 2 * comm_size;
5048 int * result_rdispls = int_buffer + 3 * comm_size;
5049
5050 int * sendcounts, * recvcounts, * sdispls, * rdispls;
5052 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
5053
5054 for (size_t i = 0, offset = 0; i < count; ++i) {
5055 int curr_num_ranks = num_ranks[i];
5056 int * ranks = rank_buffer + offset;
5057 offset += (size_t)curr_num_ranks;
5058 for (int j = 0; j < curr_num_ranks; ++j) sendcounts[ranks[j]]++;
5059 }
5060
5061 // local overlaps do not need to be send around
5062 size_t local_count = (size_t)(sendcounts[comm_rank]);
5063 sendcounts[comm_rank] = 0;
5064
5066 1, sendcounts, recvcounts, sdispls, rdispls, comm);
5067
5068 size_t send_count =
5069 (size_t)(sdispls[comm_size]) + (size_t)(sendcounts[comm_size-1]);
5070 size_t recv_count =
5071 (size_t)(rdispls[comm_size-1]) + (size_t)(recvcounts[comm_size-1]);
5072
5073 struct bounding_circle * bnd_circle_buffer =
5074 xmalloc((send_count + recv_count + local_count) *
5075 sizeof(*bnd_circle_buffer));
5076 struct bounding_circle * send_buffer = bnd_circle_buffer;
5077 struct bounding_circle * recv_buffer = bnd_circle_buffer + send_count;
5078 struct bounding_circle * local_buffer =
5079 bnd_circle_buffer + send_count + recv_count;
5080
5081 // pack bounding circles
5082 for (size_t i = 0, offset = 0, local_offset = 0; i < count; ++i) {
5083 int curr_num_ranks = num_ranks[i];
5084 int * ranks = rank_buffer + offset;
5085 offset += (size_t)curr_num_ranks;
5086 for (int j = 0; j < curr_num_ranks; ++j) {
5087 int rank = ranks[j];
5088 if (rank == comm_rank)
5089 local_buffer[local_offset++] = bnd_circles[i];
5090 else
5091 send_buffer[sdispls[rank + 1]++] = bnd_circles[i];
5092 }
5093 }
5094
5095 MPI_Datatype bnd_circle_dt = yac_get_bounding_circle_mpi_datatype(comm);
5096 yac_mpi_call(MPI_Type_commit(&bnd_circle_dt), comm);
5097
5098 // exchange bounding circles
5100 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
5101 sizeof(*send_buffer), bnd_circle_dt, comm);
5102
5103 yac_mpi_call(MPI_Type_free(&bnd_circle_dt), comm);
5104
5105 struct bnd_sphere_part_search * cell_sphere_part =
5106 dist_grid_pair_get_cell_sphere_part(grid_pair, grid_id);
5107
5108 size_t * local_cells = NULL;
5109 size_t * num_local_cells_per_bnd_circle =
5110 xmalloc((recv_count + local_count) *
5111 sizeof(*num_local_cells_per_bnd_circle));
5112
5113 uint64_t * uint64_t_buffer =
5114 xmalloc((send_count + recv_count + local_count) *
5115 sizeof(*uint64_t_buffer));
5116 uint64_t * num_local_cells_per_bnd_circle_uint64_t = uint64_t_buffer;
5117 uint64_t * num_remote_cells_per_bnd_circle =
5118 uint64_t_buffer + recv_count + local_count;
5119
5120 // search for received bounding circles in local data
5122 cell_sphere_part, recv_buffer, recv_count + local_count, &local_cells,
5123 num_local_cells_per_bnd_circle);
5124
5125 int const * field_mask =
5126 (field.location == CELL)?
5128
5129 struct bounding_circle * cell_bnd_circles = dist_grid->cell_bnd_circles;
5130
5131 // check field mask and actual overlap of bounding circles
5132 for (size_t i = 0, offset = 0, new_offset = 0;
5133 i < recv_count + local_count; ++i) {
5134
5135 struct bounding_circle * curr_bnd_circle = recv_buffer + i;
5136 size_t curr_num_results = num_local_cells_per_bnd_circle[i];
5137
5138 // remove cells whose bounding circle do not overlap with the current one
5139 uint64_t new_num_results = 0;
5140 for (size_t j = 0; j < curr_num_results; ++j, ++offset) {
5141 size_t local_cell_id = local_cells[offset];
5142 if (!yac_extents_overlap(curr_bnd_circle,
5143 cell_bnd_circles + local_cell_id)) continue;
5144 if ((field_mask == NULL) || (field_mask[local_cell_id])) {
5145 if (offset != new_offset) local_cells[new_offset] = local_cell_id;
5146 new_num_results++;
5147 new_offset++;
5148 }
5149 }
5150 num_local_cells_per_bnd_circle_uint64_t[i] = new_num_results;
5151 }
5152 free(num_local_cells_per_bnd_circle);
5153 free(bnd_circle_buffer);
5154
5155 // exchange number of results per bounding circle
5157 num_local_cells_per_bnd_circle_uint64_t, recvcounts, rdispls,
5158 num_remote_cells_per_bnd_circle, sendcounts, sdispls,
5159 sizeof(*num_local_cells_per_bnd_circle_uint64_t), MPI_UINT64_T, comm);
5160
5161 for (int i = 0, saccu = 0, raccu = 0, soffset = 0, roffset = 0;
5162 i < comm_size; ++i) {
5163
5164 result_sdispls[i] = saccu;
5165 result_rdispls[i] = raccu;
5166
5167 int sendcount = recvcounts[i];
5168 int recvcount = sendcounts[i];
5169
5170 result_sendcounts[i] = 0;
5171 result_recvcounts[i] = 0;
5172 for (int j = 0; j < sendcount; ++j, ++soffset)
5173 result_sendcounts[i] +=
5174 (int)(num_local_cells_per_bnd_circle_uint64_t[soffset]);
5175 for (int j = 0; j < recvcount; ++j, ++roffset)
5176 result_recvcounts[i] += (int)(num_remote_cells_per_bnd_circle[roffset]);
5177
5178 saccu += result_sendcounts[i];
5179 raccu += result_recvcounts[i];
5180 }
5181
5182 // count the number of results for bounding circles, which had a match with#
5183 // their original process
5184 size_t result_local_count = 0;
5185 for (size_t i = recv_count; i < recv_count + local_count; ++i)
5186 result_local_count += (size_t)(num_local_cells_per_bnd_circle_uint64_t[i]);
5187
5188 size_t result_send_count = (size_t)(result_sdispls[comm_size-1]) +
5189 (size_t)(result_sendcounts[comm_size-1]);
5190 size_t result_recv_count = (size_t)(result_rdispls[comm_size-1]) +
5191 (size_t)(result_recvcounts[comm_size-1]);
5192
5193 struct single_remote_point * single_remote_point_buffer =
5194 xmalloc((result_recv_count + result_send_count) *
5195 sizeof(*single_remote_point_buffer));
5196 struct single_remote_point * id_send_buffer = single_remote_point_buffer;
5197 struct single_remote_point * id_recv_buffer = single_remote_point_buffer +
5198 result_send_count;
5199
5200 yac_int * cell_ids = dist_grid->cell_ids;
5201
5202 for (size_t i = 0; i < result_send_count; ++i) {
5203 size_t local_cell_id = local_cells[i];
5204 id_send_buffer[i].global_id = cell_ids[local_cell_id];
5205 id_send_buffer[i].data.rank = comm_rank;
5206 id_send_buffer[i].data.orig_pos = local_cell_id;
5207 }
5208
5209 MPI_Datatype single_remote_point_dt =
5211
5212 // redistribute results (global ids of found source cells)
5214 id_send_buffer, result_sendcounts, result_sdispls,
5215 id_recv_buffer, result_recvcounts, result_rdispls,
5216 sizeof(*id_send_buffer), single_remote_point_dt, comm);
5217
5218 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
5219
5220 size_t * new_local_cells =
5221 xmalloc((result_recv_count + result_local_count) *
5222 sizeof(*new_local_cells));
5223
5224 memcpy(new_local_cells + result_recv_count,
5225 local_cells + result_send_count,
5226 result_local_count * sizeof(*new_local_cells));
5227 free(local_cells);
5228
5229 // convert all remote ids to local ones, extend local dist_grid data,
5230 // if necessary
5232 dist_grid, id_recv_buffer, result_recv_count, CELL, new_local_cells);
5233
5234 free(single_remote_point_buffer);
5235
5236 size_t * reorder_idx =
5237 xmalloc((result_recv_count + result_local_count) * sizeof(*reorder_idx));
5238
5239 memset(
5240 num_results_per_bnd_circle, 0, count * sizeof(*num_results_per_bnd_circle));
5241
5242 for (size_t i = 0, offset = 0, reorder = 0, local_search_idx = recv_count,
5243 local_offset = result_recv_count; i < count; ++i) {
5244 int curr_num_ranks = num_ranks[i];
5245 int * ranks = rank_buffer + offset;
5246 offset += (size_t)curr_num_ranks;
5247 for (int j = 0; j < curr_num_ranks; ++j) {
5248 int rank = ranks[j];
5249 if (rank == comm_rank) {
5250 uint64_t curr_num_results =
5251 num_local_cells_per_bnd_circle_uint64_t[local_search_idx++];
5252 num_results_per_bnd_circle[i] += (size_t)curr_num_results;
5253 for (uint64_t k = 0; k < curr_num_results; ++k, ++reorder)
5254 reorder_idx[local_offset++] = reorder;
5255 } else {
5256 int rank_pos = sdispls[rank]++;
5257 uint64_t curr_num_results = num_remote_cells_per_bnd_circle[rank_pos];
5258 num_results_per_bnd_circle[i] += (size_t)curr_num_results;
5259 for (uint64_t k = 0; k < curr_num_results; ++k, ++reorder)
5260 reorder_idx[result_rdispls[rank]++] = reorder;
5261 }
5262 }
5263 }
5264 free(uint64_t_buffer);
5265 free(num_ranks);
5266 free(rank_buffer);
5267 free(int_buffer);
5268 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
5269
5271 reorder_idx, result_recv_count + result_local_count, new_local_cells);
5272 free(reorder_idx);
5273
5274 // remove duplicated results
5275 for (size_t i = 0, offset = 0, new_offset = 0; i < count; ++i) {
5276
5277 size_t * curr_local_cells = new_local_cells + offset;
5278 size_t curr_num_results_per_bnd_circle = num_results_per_bnd_circle[i];
5279 size_t new_num_results_per_bnd_circle = 0;
5280 size_t prev_cell = SIZE_MAX;
5281 offset += curr_num_results_per_bnd_circle;
5282
5284 curr_local_cells, curr_num_results_per_bnd_circle, NULL);
5285
5286 for (size_t j = 0; j < curr_num_results_per_bnd_circle; ++j) {
5287 size_t curr_cell = curr_local_cells[j];
5288 if (curr_cell != prev_cell) {
5289 new_local_cells[new_offset++] = (prev_cell = curr_cell);
5290 ++new_num_results_per_bnd_circle;
5291 }
5292 }
5293 num_results_per_bnd_circle[i] = new_num_results_per_bnd_circle;
5294 }
5295
5296 *cells = new_local_cells;
5297}
5298
5300 struct dist_grid_pair * grid_pair, int search_grid_id, int result_grid_id,
5301 size_t * search_cells, size_t count, size_t ** result_cells,
5302 size_t * num_results_per_search_cell, struct