YetAnotherCoupler 3.2.0_a
Loading...
Searching...
No Matches
proc_sphere_part.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#include "string.h"
6#include "proc_sphere_part.h"
7#include "yac_mpi_internal.h"
8#ifdef SCOREP_USER_ENABLE
9#include "scorep/SCOREP_User.h"
10#endif // SCOREP_USER_ENABLE
11
16
18
20 union {
22 int rank;
25};
30
36
38 struct proc_sphere_part_node_data node_data, MPI_Comm comm);
39
41 struct proc_sphere_part_node node, MPI_Comm comm) {
42
43 int vec_pack_size;
45 MPI_Pack_size(3, MPI_DOUBLE, comm, &vec_pack_size), comm);
46
47 return vec_pack_size +
50}
51
53 struct proc_sphere_part_node_data node_data, MPI_Comm comm) {
54
55 int int_pack_size;
56 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &int_pack_size), comm);
57
58 int data_size = int_pack_size;
59 if (node_data.is_leaf)
60 data_size += int_pack_size;
61 else
62 data_size +=
64
65 return data_size;
66}
67
69 struct proc_sphere_part_node_data node_data, void * pack_buffer,
70 int pack_buffer_size, int * position, MPI_Comm comm);
71
73 struct proc_sphere_part_node * node, void * pack_buffer,
74 int pack_buffer_size, int * position, MPI_Comm comm) {
75
76 yac_mpi_call(MPI_Pack(&(node->gc_norm_vector[0]), 3, MPI_DOUBLE, pack_buffer,
77 pack_buffer_size, position, comm), comm);
79 node->U, pack_buffer, pack_buffer_size, position, comm);
81 node->T, pack_buffer, pack_buffer_size, position, comm);
82}
83
85 struct proc_sphere_part_node_data node_data, void * pack_buffer,
86 int pack_buffer_size, int * position, MPI_Comm comm) {
87
88 yac_mpi_call(MPI_Pack(&(node_data.is_leaf), 1, MPI_INT, pack_buffer,
89 pack_buffer_size, position, comm), comm);
90
91 if (node_data.is_leaf)
92 yac_mpi_call(MPI_Pack(&(node_data.data.rank), 1, MPI_INT, pack_buffer,
93 pack_buffer_size, position, comm), comm);
94 else
95 pack_proc_sphere_part_node(node_data.data.node, pack_buffer,
96 pack_buffer_size, position, comm);
97}
98
100 void * pack_buffer, int pack_buffer_size, int * position,
101 MPI_Comm comm);
102
104 void * pack_buffer, int pack_buffer_size, int * position,
105 MPI_Comm comm) {
106
107 struct proc_sphere_part_node * node = xmalloc(1 * sizeof(*node));
108
110 MPI_Unpack(pack_buffer, pack_buffer_size, position,
111 &(node->gc_norm_vector[0]), 3, MPI_DOUBLE, comm), comm);
112
113 node->U = unpack_proc_sphere_part_node_data(pack_buffer, pack_buffer_size,
114 position, comm);
115 node->T = unpack_proc_sphere_part_node_data(pack_buffer, pack_buffer_size,
116 position, comm);
117
118 return node;
119}
120
122 void * pack_buffer, int pack_buffer_size, int * position,
123 MPI_Comm comm) {
124
125 struct proc_sphere_part_node_data node_data;
126
128 MPI_Unpack(pack_buffer, pack_buffer_size, position,
129 &(node_data.is_leaf), 1, MPI_INT, comm), comm);
130
131 if (node_data.is_leaf)
133 MPI_Unpack(pack_buffer, pack_buffer_size, position,
134 &(node_data.data.rank), 1, MPI_INT, comm), comm);
135 else
136 node_data.data.node =
138 pack_buffer, pack_buffer_size, position, comm);
139
140 return node_data;
141}
142
144 struct proc_sphere_part_node_data local_data,
145 struct yac_group_comm local_group_comm,
146 struct yac_group_comm remote_group_comm) {
147
148 int comm_rank;
149 MPI_Comm comm = local_group_comm.comm;
150 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
151
152 int data_size;
153 void * recv_buffer;
154
155 int order = local_group_comm.start < remote_group_comm.start;
156
157 for (int i = 0; i < 2; ++i) {
158
159 if (order == i) {
160
162 &data_size, 1, MPI_INT, remote_group_comm.start, local_group_comm);
163 recv_buffer = xmalloc((size_t)data_size);
164 yac_bcast_group(recv_buffer, data_size, MPI_PACKED,
165 remote_group_comm.start, local_group_comm);
166
167 } else {
168 if (comm_rank == local_group_comm.start) {
169
170 // pack local_data
171 int pack_buffer_size =
173 void * pack_buffer = xmalloc((size_t)pack_buffer_size);
174 int position = 0;
176 local_data, pack_buffer, pack_buffer_size, &position, comm);
177
178 // broadcast data size to other group
180 &position, 1, MPI_INT, local_group_comm.start, remote_group_comm);
181
182 // broadcast remote_data to other root
183 yac_bcast_group(pack_buffer, position, MPI_PACKED,
184 local_group_comm.start, remote_group_comm);
185 free(pack_buffer);
186 }
187 }
188 }
189
190 // unpack node data
191 int position = 0;
192 struct proc_sphere_part_node_data other_data =
194 recv_buffer, data_size, &position, comm);
195 free(recv_buffer);
196
197 return other_data;
198}
199
201 uint64_t comm_rank, uint64_t split_rank, uint64_t comm_size,
202 uint64_t global_sizes[2], uint64_t (*all_bucket_sizes)[2],
203 int * counts, int * displs, size_t * recv_count) {
204
205 int color = comm_rank >= split_rank;
206
207 uint64_t split_comm_size = split_rank;
208 uint64_t split_comm_rank = comm_rank;
209 if (color) {
210 split_comm_rank -= split_comm_size;
211 split_comm_size = comm_size - split_comm_size;
212 }
213
214 uint64_t global_size = global_sizes[color];
215 uint64_t local_interval_start =
216 (global_size * split_comm_rank + split_comm_size - 1) /
217 split_comm_size;
218 uint64_t local_interval_end =
219 (global_size * (split_comm_rank+1) + split_comm_size - 1) /
220 split_comm_size;
221
222 *recv_count = (size_t)(local_interval_end - local_interval_start);
223
224 for (uint64_t i = 0, start_idx = 0; i < comm_size; ++i) {
225
226 uint64_t next_start_idx = start_idx + all_bucket_sizes[i][color];
227 uint64_t interval_start = MAX(start_idx, local_interval_start);
228 uint64_t interval_end = MIN(next_start_idx, local_interval_end);
229
230 if (interval_start < interval_end) {
231
232 uint64_t count = interval_end - interval_start;
233 uint64_t disp = interval_start - local_interval_start;
234
236 (count <= INT_MAX) && (disp <= INT_MAX),
237 "ERROR(compute_redist_recvcounts_rdispls): invalid interval")
238
239 counts[i] = (int)count;
240 displs[i] = (int)disp;
241 } else {
242 counts[i] = 0;
243 displs[i] = 0;
244 }
245
246 start_idx = next_start_idx;
247 }
248}
249
251 uint64_t comm_rank, uint64_t split_rank, uint64_t comm_size,
252 uint64_t global_sizes[2], uint64_t (*all_bucket_sizes)[2],
253 uint64_t U_size, int * counts, int * displs) {
254
255 uint64_t local_interval_start[2] = {0, 0};
256 uint64_t local_interval_end[2];
257 for (uint64_t i = 0; i < comm_rank; ++i)
258 for (int j = 0; j < 2; ++j)
259 local_interval_start[j] += all_bucket_sizes[i][j];
260 for (int j = 0; j < 2; ++j)
261 local_interval_end[j] =
262 local_interval_start[j] + all_bucket_sizes[comm_rank][j];
263
264 uint64_t comm_sizes[2] = {split_rank, comm_size - split_rank};
265
266 for (uint64_t i = 0, start_idx[2] = {0,0}; i < comm_size; ++i) {
267
268 int color = i >= split_rank;
269 uint64_t global_size = global_sizes[color];
270 uint64_t split_comm_rank = i - (color?(split_rank):0);
271 uint64_t split_comm_size = comm_sizes[color];
272 uint64_t next_start_idx =
273 (global_size * (split_comm_rank + 1) + split_comm_size - 1) /
274 split_comm_size;
275 uint64_t interval_start = MAX(start_idx[color], local_interval_start[color]);
276 uint64_t interval_end = MIN(next_start_idx, local_interval_end[color]);
277
278 if (interval_start < interval_end) {
279
280 uint64_t count = interval_end - interval_start;
281 uint64_t disp = interval_start - local_interval_start[color] +
282 ((color)?(U_size):(0));
283
285 (count <= INT_MAX) && (disp <= INT_MAX),
286 "ERROR(compute_redist_sendcounts_sdispls): invalid interval")
287
288 counts[i] = (int)count;
289 displs[i] = (int)disp;
290 } else {
291 counts[i] = 0;
292 displs[i] = 0;
293 }
294
295 start_idx[color] = next_start_idx;
296 }
297}
298
299static int compare_dist_cells_coord(const void * a, const void * b) {
300
301 return memcmp(&(((const struct dist_cell *)a)->coord[0]),
302 &(((const struct dist_cell *)b)->coord[0]), 3 * sizeof(double));
303}
304
306 struct dist_cell * cells, size_t * num_cells) {
307
308 if (*num_cells == 0) return;
309
310 //------------------------------------
311 // sort cells by grid id and global id
312 //------------------------------------
313
314 qsort(cells, *num_cells, sizeof(*cells), compare_dist_cells_coord);
315
316 size_t old_num_cells = *num_cells;
317 struct dist_cell * last_new_cell = cells;
318 for (size_t i = 1; i < old_num_cells; ++i) {
319 struct dist_cell * curr_cell = cells + i;
320 if (compare_dist_cells_coord(last_new_cell, curr_cell)) {
321 ++last_new_cell;
322 if (last_new_cell != curr_cell) *last_new_cell = *curr_cell;
323 }
324 }
325 *num_cells = (size_t)(last_new_cell - cells) + 1;
326}
327
329 struct dist_cell ** cells, size_t * num_cells,
330 struct yac_group_comm group_comm, double prev_gc_norm_vector[3],
331 MPI_Datatype dist_cell_dt) {
332
333#ifdef SCOREP_USER_ENABLE
334SCOREP_USER_REGION_DEFINE( local_balance_point_region )
335SCOREP_USER_REGION_DEFINE( global_balance_point_region )
336SCOREP_USER_REGION_DEFINE( splitting_region )
337SCOREP_USER_REGION_DEFINE( comm_split_region )
338SCOREP_USER_REGION_DEFINE( redist_data_region )
339#endif
340
341 int group_rank = yac_group_comm_get_rank(group_comm);
342 int group_size = yac_group_comm_get_size(group_comm);
343
344 remove_duplicated_cells(*cells, num_cells);
345
346 //--------------------
347 // compute split plane
348 //--------------------
349#ifdef SCOREP_USER_ENABLE
350SCOREP_USER_REGION_BEGIN(
351 local_balance_point_region, "local balance point",
352 SCOREP_USER_REGION_TYPE_COMMON )
353#endif
354 // compute local balance point
355 double balance_point[3] = {0.0, 0.0, 0.0};
356 for (size_t i = 0; i < *num_cells; ++i) {
357 double * cell_coord = (*cells)[i].coord;
358 for (int j = 0; j < 3; ++j) balance_point[j] += cell_coord[j];
359 }
360#ifdef SCOREP_USER_ENABLE
361SCOREP_USER_REGION_END( local_balance_point_region )
362SCOREP_USER_REGION_BEGIN(
363 global_balance_point_region, "global balance point",
364 SCOREP_USER_REGION_TYPE_COMMON )
365#endif
366
367 // compute global balance point (make sure that the allreduce operation
368 // generates bit-identical results on all processes)
369 yac_allreduce_sum_dble(&(balance_point[0]), 3, group_comm);
370
371 if ((fabs(balance_point[0]) > 1e-9) ||
372 (fabs(balance_point[1]) > 1e-9) ||
373 (fabs(balance_point[2]) > 1e-9)) {
374 normalise_vector(balance_point);
375 } else {
376 balance_point[0] = prev_gc_norm_vector[2];
377 balance_point[1] = prev_gc_norm_vector[0];
378 balance_point[2] = prev_gc_norm_vector[1];
379 }
380
381 double gc_norm_vector[3];
382 crossproduct_kahan(balance_point, prev_gc_norm_vector, gc_norm_vector);
383
384 if ((fabs(gc_norm_vector[0]) > 1e-9) ||
385 (fabs(gc_norm_vector[1]) > 1e-9) ||
386 (fabs(gc_norm_vector[2]) > 1e-9)) {
388 } else {
389 gc_norm_vector[0] = prev_gc_norm_vector[2];
390 gc_norm_vector[1] = prev_gc_norm_vector[0];
391 gc_norm_vector[2] = prev_gc_norm_vector[1];
392 }
393
394#ifdef SCOREP_USER_ENABLE
395SCOREP_USER_REGION_END( global_balance_point_region )
396#endif
397
398 //-----------------
399 // split local data
400 //-----------------
401
402#ifdef SCOREP_USER_ENABLE
403SCOREP_USER_REGION_BEGIN( splitting_region, "splitting data", SCOREP_USER_REGION_TYPE_COMMON )
404#endif
405
406 // angle between a cell coord and the great circle plane:
407 // acos(dot(gc_norm_vector, cell_coord)) = angle(gc_norm_vector, cell_coord)
408 // acos(dot(gc_norm_vector, cell_coord)) - PI/2 = angle(gc_plane, cell_coord)
409 // dot <= 0.0 -> U list
410 // dot > 0.0 -> T list
411
412 struct dist_cell * left = *cells, * right = *cells + *num_cells - 1;
413
414 // sort such that all cells for the U list come first, followed by the
415 // cells of the T list
416 while (1) {
417 // find a cell that does not belong into U-list
418 while (left <= right) {
419 double * curr_coordinates_xyz = left->coord;
420 double dot = curr_coordinates_xyz[0] * gc_norm_vector[0] +
421 curr_coordinates_xyz[1] * gc_norm_vector[1] +
422 curr_coordinates_xyz[2] * gc_norm_vector[2];
423
424 // if (angle < M_PI_2)
425 if (dot > 0.0) break;
426 ++left;
427 };
428
429 // find a cell that does not belong into T-list
430 while (left < right) {
431 double * curr_coordinates_xyz = right->coord;
432 double dot = curr_coordinates_xyz[0] * gc_norm_vector[0] +
433 curr_coordinates_xyz[1] * gc_norm_vector[1] +
434 curr_coordinates_xyz[2] * gc_norm_vector[2];
435
436 // if (angle >= M_PI_2)
437 if (dot <= 0.0) break;
438 --right;
439 }
440
441 if (left < right) {
442 struct dist_cell tmp_cell = *left;
443 *left = *right;
444 *right = tmp_cell;
445 ++left;
446 --right;
447 } else {
448 break;
449 }
450 }
451
452
453 uint64_t U_size = (uint64_t)(left - *cells);
454 uint64_t T_size = (uint64_t)(*num_cells) - U_size;
455
456#ifdef SCOREP_USER_ENABLE
457SCOREP_USER_REGION_END( splitting_region )
458#endif
459
460 // there is no MPI datatype for size_t -> we use uint64_t here
461 uint64_t bucket_sizes[2] = {U_size, T_size};
462
463 // exchange local U/T sizes between all processes
464 uint64_t (*all_bucket_sizes)[2] =
465 xmalloc((size_t)group_size * sizeof(*all_bucket_sizes));
467 &(bucket_sizes[0]), &(all_bucket_sizes[0][0]), 2, group_comm);
468
469 // determine global U/T sizes
470 uint64_t global_bucket_sizes[2] = {0, 0};
471 for (int i = 0; i < group_size; ++i)
472 for (int j = 0; j < 2; ++j)
473 global_bucket_sizes[j] += all_bucket_sizes[i][j];
474 uint64_t global_num_cells = global_bucket_sizes[0] + global_bucket_sizes[1];
475
476 //----------------------
477 // split into two groups
478 //----------------------
479#ifdef SCOREP_USER_ENABLE
480SCOREP_USER_REGION_BEGIN(
481 comm_split_region, "creating splitcomm", SCOREP_USER_REGION_TYPE_COMMON )
482#endif
483 // determine processor groups
484 int split_rank =
485 MIN(
486 (int)MAX(
487 ((global_bucket_sizes[0] * (uint64_t)group_size + global_num_cells/2) /
488 global_num_cells), 1),
489 group_size - 1);
490
491 // generate processor groups
492 struct yac_group_comm local_group_comm, remote_group_comm;
494 group_comm, split_rank, &local_group_comm, &remote_group_comm);
495
496#ifdef SCOREP_USER_ENABLE
497SCOREP_USER_REGION_END( comm_split_region )
498#endif
499 //------------------
500 // redistribute data
501 //------------------
502
503 int * int_buffer = xmalloc(4 * (size_t)group_size * sizeof(*int_buffer));
504 int * sendcounts = int_buffer + 0 * group_size;
505 int * recvcounts = int_buffer + 1 * group_size;
506 int * sdispls = int_buffer + 2 * group_size;
507 int * rdispls = int_buffer + 3 * group_size;
508
509#ifdef SCOREP_USER_ENABLE
510SCOREP_USER_REGION_BEGIN(
511 redist_data_region, "data redistribution", SCOREP_USER_REGION_TYPE_COMMON )
512#endif
513 // compute send and receive counts and respective ranks for data
514 // redistribution
516 (uint64_t)group_rank, (uint64_t)split_rank, (uint64_t)group_size,
517 global_bucket_sizes, all_bucket_sizes, (uint64_t)U_size, sendcounts, sdispls);
518 size_t new_num_cells;
520 (uint64_t)group_rank, (uint64_t)split_rank, (uint64_t)group_size,
521 global_bucket_sizes, all_bucket_sizes, recvcounts, rdispls, &new_num_cells);
522
523 // exchange data between both groups
524 struct dist_cell * new_cells = xmalloc(new_num_cells * sizeof(*new_cells));
526 *cells, sendcounts, sdispls, new_cells, recvcounts, rdispls,
527 sizeof(**cells), dist_cell_dt, group_comm);
528#ifdef SCOREP_USER_ENABLE
529SCOREP_USER_REGION_END( redist_data_region )
530#endif
531 free(*cells);
532 *cells = new_cells;
533 *num_cells = new_num_cells;
534
535 free(int_buffer);
536 free(all_bucket_sizes);
537
538 //----------
539 // recursion
540 //----------
541
542 // redistribute data within local group
543 struct proc_sphere_part_node_data local_data;
544
545 if (yac_group_comm_get_size(local_group_comm) > 1) {
546 local_data.data.node =
548 cells, num_cells, local_group_comm, gc_norm_vector, dist_cell_dt);
549 local_data.is_leaf = 0;
550 } else {
551
552 remove_duplicated_cells(*cells, num_cells);
553 local_data.data.rank = yac_group_comm_get_global_rank(group_comm);
554 local_data.is_leaf = 1;
555 }
556
557 // get proc_sphere_part_node_data from remote group
558 struct proc_sphere_part_node_data remote_data =
559 get_remote_data(local_data, local_group_comm, remote_group_comm);
560
561 // generate node
562 struct proc_sphere_part_node * node = xmalloc(1 * sizeof(*node));
563 if (group_rank < split_rank) {
564 node->U = local_data;
565 node->T = remote_data;
566 } else {
567 node->U = remote_data;
568 node->T = local_data;
569 }
570 for (int i = 0; i < 3; ++i) node->gc_norm_vector[i] = gc_norm_vector[i];
571
572 return node;
573}
574
575static MPI_Datatype yac_get_dist_cell_mpi_datatype(MPI_Comm comm) {
576
577 MPI_Datatype coord_dt;
578 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &coord_dt), comm);
579 return yac_create_resized(coord_dt, sizeof(struct dist_cell), comm);
580}
581
583 struct dist_cell ** cells, size_t * num_cells, MPI_Comm comm) {
584
585 double base_gc_norm_vector[3] = {0.0,0.0,1.0};
586
587 int comm_rank, comm_size;
588 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
589 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
590
591 uint64_t global_num_cells = (uint64_t)*num_cells;
593 MPI_Allreduce(
594 MPI_IN_PLACE, &global_num_cells, 1, MPI_UINT64_T, MPI_SUM, comm), comm);
595
596 struct proc_sphere_part_node * proc_sphere_part;
597
598 if ((comm_size > 1) && (global_num_cells > 0)) {
599
600 MPI_Datatype dist_cell_dt = yac_get_dist_cell_mpi_datatype(comm);
601 struct yac_group_comm group_comm = yac_group_comm_new(comm);
602 proc_sphere_part =
603 redistribute_cells_recursive(cells, num_cells, group_comm,
604 base_gc_norm_vector, dist_cell_dt);
605 yac_group_comm_delete(group_comm);
606
607 yac_mpi_call(MPI_Type_free(&dist_cell_dt), comm);
608 } else {
609 proc_sphere_part = xmalloc(1 * sizeof(*proc_sphere_part));
610 proc_sphere_part->U.data.rank = 0;
611 proc_sphere_part->U.is_leaf = 1;
612 proc_sphere_part->T.data.rank = 0;
613 proc_sphere_part->T.is_leaf = 1;
614 proc_sphere_part->gc_norm_vector[0] = base_gc_norm_vector[0];
615 proc_sphere_part->gc_norm_vector[1] = base_gc_norm_vector[1];
616 proc_sphere_part->gc_norm_vector[2] = base_gc_norm_vector[2];
617 }
618
619 return proc_sphere_part;
620}
621
622static int is_serial_node(struct proc_sphere_part_node * node) {
623 return (node->U.is_leaf) && (node->T.is_leaf) &&
624 (node->U.data.rank == 0) && (node->T.data.rank == 0);
625}
626
628 struct proc_sphere_part_node * node, yac_coordinate_pointer search_coords,
629 size_t count, int * ranks) {
630
631 if (is_serial_node(node)) {
632 for (size_t i = 0; i < count; ++i) ranks[i] = 0;
633 return;
634 }
635
636 for (size_t i = 0; i < count; ++i) {
637
638 double curr_coord[3] =
639 {search_coords[i][0], search_coords[i][1], search_coords[i][2]};
640
641 struct proc_sphere_part_node * curr_node = node;
642
643 while (1) {
644
645 // get angle between the norm vector of the split plane and the base
646 // point of the bounding circle
647 struct sin_cos_angle angle =
648 get_vector_angle_2(curr_coord, curr_node->gc_norm_vector);
649
650 if (angle.cos <= 0.0) {
651 if (curr_node->U.is_leaf) {
652 ranks[i] = curr_node->U.data.rank;
653 break;
654 } else {
655 curr_node = curr_node->U.data.node;
656 continue;
657 }
658 } else {
659 if (curr_node->T.is_leaf) {
660 ranks[i] = curr_node->T.data.rank;
661 break;
662 } else {
663 curr_node = curr_node->T.data.node;
664 continue;
665 }
666 }
667 }
668 }
669}
670
672 struct proc_sphere_part_node * node, struct bounding_circle bnd_circle,
673 int * ranks, int * rank_count) {
674
675 double dot = bnd_circle.base_vector[0] * node->gc_norm_vector[0] +
676 bnd_circle.base_vector[1] * node->gc_norm_vector[1] +
677 bnd_circle.base_vector[2] * node->gc_norm_vector[2];
678
679 // angle < M_PI_2 + bnd_circle.inc_angle
680 if (dot > - bnd_circle.inc_angle.sin) {
681
682 if (node->T.is_leaf) {
683
684 ranks[*rank_count] = node->T.data.rank;
685 ++*rank_count;
686
687 } else {
688 bnd_circle_search(node->T.data.node, bnd_circle, ranks, rank_count);
689 }
690 }
691
692 // angle > M_PI_2 - bnd_circle.inc_angle
693 if (dot < bnd_circle.inc_angle.sin) {
694
695 if (node->U.is_leaf) {
696
697 ranks[*rank_count] = node->U.data.rank;
698 ++*rank_count;
699
700 } else {
701 bnd_circle_search(node->U.data.node, bnd_circle, ranks, rank_count);
702 }
703 }
704}
705
707 struct proc_sphere_part_node * node, struct bounding_circle bnd_circle,
708 int * ranks, int * rank_count) {
709
711 compare_angles(bnd_circle.inc_angle, SIN_COS_M_PI) == -1,
712 "ERROR(yac_proc_sphere_part_do_bnd_circle_search): angle is >= PI")
713
714 // special case in which the proc_sphere_part_node only contains a single
715 // rank
716 if (is_serial_node(node)) {
717 ranks[0] = 0;
718 *rank_count = 1;
719 } else {
720 *rank_count = 0;
721 bnd_circle_search(node, bnd_circle, ranks, rank_count);
722 }
723}
724
725static int get_leaf_ranks(struct proc_sphere_part_node * node, int * ranks) {
726
727 int curr_size;
728
729 if (node->U.is_leaf) {
730 *ranks = node->U.data.rank;
731 curr_size = 1;
732 } else {
733 curr_size = get_leaf_ranks(node->U.data.node, ranks);
734 }
735 if (node->T.is_leaf) {
736 ranks[curr_size++] = node->T.data.rank;
737 } else {
738 curr_size += get_leaf_ranks(node->T.data.node, ranks + curr_size);
739 }
740
741 return curr_size;
742}
743
744static void get_neigh_ranks(
745 struct proc_sphere_part_node * node, uint64_t * leaf_sizes, uint64_t min_size,
746 uint64_t ** inner_node_sizes, int * send_flags, int * recv_flags,
747 int comm_rank, struct neigh_search_data * last_valid_node) {
748
749 int curr_node_is_valid = (**inner_node_sizes >= min_size);
750
751 struct neigh_search_data * curr_valid_node;
752 struct neigh_search_data temp_valid_node;
753
754 int * ranks = last_valid_node->ranks;
755
756 if (curr_node_is_valid) {
757 temp_valid_node.ranks = ranks;
758 temp_valid_node.num_ranks = 0;
759 temp_valid_node.node = node;
760 curr_valid_node = &temp_valid_node;
761 last_valid_node->num_ranks = 0;
762 } else {
763 curr_valid_node = last_valid_node;
764 }
765
766 for (int j = 0; j < 2; ++j) {
767
768 struct proc_sphere_part_node_data node_data = (j == 0)?(node->U):(node->T);
769
770 if (node_data.is_leaf) {
771
772 int rank = node_data.data.rank;
773
774 if (leaf_sizes[rank] < min_size) {
775
776 if (curr_valid_node->num_ranks == 0)
777 curr_valid_node->num_ranks =
778 get_leaf_ranks(curr_valid_node->node, ranks);
779
780 // if the current leaf is the local process
781 if (rank == comm_rank)
782 for (int i = 0; i < curr_valid_node->num_ranks; ++i)
783 recv_flags[ranks[i]] = 1;
784
785 // if the process of the current leaf required data from the
786 // local process
787 for (int i = 0; i < curr_valid_node->num_ranks; ++i) {
788 if (ranks[i] == comm_rank) {
789 send_flags[rank] = 1;
790 break;
791 }
792 }
793 }
794
795 } else {
796 ++*inner_node_sizes;
798 node_data.data.node, leaf_sizes, min_size, inner_node_sizes,
799 send_flags, recv_flags, comm_rank, curr_valid_node);
800 }
801 }
802}
803
804static uint64_t determine_node_sizes(
805 struct proc_sphere_part_node * node, uint64_t * leaf_sizes,
806 uint64_t ** inner_node_sizes) {
807
808 uint64_t * curr_inner_node_size = *inner_node_sizes;
809 uint64_t node_size;
810 if (node->U.is_leaf) {
811 node_size = leaf_sizes[node->U.data.rank];
812 } else {
813 ++*inner_node_sizes;
814 node_size =
815 determine_node_sizes(node->U.data.node, leaf_sizes, inner_node_sizes);
816 }
817 if (node->T.is_leaf) {
818 node_size += leaf_sizes[node->T.data.rank];
819 } else {
820 ++*inner_node_sizes;
821 node_size +=
822 determine_node_sizes(node->T.data.node, leaf_sizes, inner_node_sizes);
823 }
824 return (*curr_inner_node_size = node_size);
825}
826
828 struct proc_sphere_part_node * node, uint64_t * leaf_sizes,
829 uint64_t min_size, int * send_flags, int * recv_flags,
830 int comm_rank, int comm_size) {
831
832 uint64_t * inner_node_sizes =
833 xcalloc((size_t)comm_size, sizeof(*inner_node_sizes));
834
835 uint64_t * temp_inner_node_sizes = inner_node_sizes;
836 determine_node_sizes(node, leaf_sizes, &temp_inner_node_sizes);
837
839 *inner_node_sizes >= min_size,
840 "ERROR(yac_proc_sphere_part_get_neigh_ranks): sum of global leaf sizes "
841 "is < min_size")
842
843 struct neigh_search_data search_data = {
844 .ranks = xmalloc((size_t)comm_size * sizeof(int)),
845 .num_ranks = 0,
846 .node = node
847 };
848
849 temp_inner_node_sizes = inner_node_sizes;
850 get_neigh_ranks(node, leaf_sizes, min_size, &temp_inner_node_sizes,
851 send_flags, recv_flags, comm_rank, &search_data);
852
853 send_flags[comm_rank] = 0;
854 recv_flags[comm_rank] = 0;
855
856 free(search_data.ranks);
857 free(inner_node_sizes);
858}
859
static struct sin_cos_angle get_vector_angle_2(double const a[3], double const b[3])
Definition geometry.h:415
static const struct sin_cos_angle SIN_COS_M_PI
Definition geometry.h:49
static void crossproduct_kahan(double const a[], double const b[], double cross[])
Definition geometry.h:332
static int compare_angles(struct sin_cos_angle a, struct sin_cos_angle b)
Definition geometry.h:428
static void normalise_vector(double v[])
Definition geometry.h:689
#define xcalloc(nmemb, size)
Definition ppm_xfuncs.h:64
#define xmalloc(size)
Definition ppm_xfuncs.h:66
static int get_leaf_ranks(struct proc_sphere_part_node *node, int *ranks)
void yac_proc_sphere_part_get_neigh_ranks(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t min_size, int *send_flags, int *recv_flags, int comm_rank, int comm_size)
static void compute_redist_sendcounts_sdispls(uint64_t comm_rank, uint64_t split_rank, uint64_t comm_size, uint64_t global_sizes[2], uint64_t(*all_bucket_sizes)[2], uint64_t U_size, int *counts, int *displs)
void yac_proc_sphere_part_do_bnd_circle_search(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
static void compute_redist_recvcounts_rdispls(uint64_t comm_rank, uint64_t split_rank, uint64_t comm_size, uint64_t global_sizes[2], uint64_t(*all_bucket_sizes)[2], int *counts, int *displs, size_t *recv_count)
static uint64_t determine_node_sizes(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t **inner_node_sizes)
void yac_proc_sphere_part_do_point_search(struct proc_sphere_part_node *node, yac_coordinate_pointer search_coords, size_t count, int *ranks)
static void bnd_circle_search(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
static int is_serial_node(struct proc_sphere_part_node *node)
splicomm_tags
@ DATA_SIZE_TAG
@ DATA_TAG
static struct proc_sphere_part_node_data get_remote_data(struct proc_sphere_part_node_data local_data, struct yac_group_comm local_group_comm, struct yac_group_comm remote_group_comm)
void yac_proc_sphere_part_node_delete(struct proc_sphere_part_node *node)
static MPI_Datatype yac_get_dist_cell_mpi_datatype(MPI_Comm comm)
static void get_neigh_ranks(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t min_size, uint64_t **inner_node_sizes, int *send_flags, int *recv_flags, int comm_rank, struct neigh_search_data *last_valid_node)
static void remove_duplicated_cells(struct dist_cell *cells, size_t *num_cells)
static void pack_proc_sphere_part_node(struct proc_sphere_part_node *node, void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static int get_proc_sphere_part_node_pack_size(struct proc_sphere_part_node node, MPI_Comm comm)
static void pack_proc_sphere_part_node_data(struct proc_sphere_part_node_data node_data, void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static int get_proc_sphere_part_node_data_pack_size(struct proc_sphere_part_node_data node_data, MPI_Comm comm)
struct proc_sphere_part_node * yac_redistribute_cells(struct dist_cell **cells, size_t *num_cells, MPI_Comm comm)
static struct proc_sphere_part_node * unpack_proc_sphere_part_node(void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static int compare_dist_cells_coord(const void *a, const void *b)
static struct proc_sphere_part_node_data unpack_proc_sphere_part_node_data(void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static struct proc_sphere_part_node * redistribute_cells_recursive(struct dist_cell **cells, size_t *num_cells, struct yac_group_comm group_comm, double prev_gc_norm_vector[3], MPI_Datatype dist_cell_dt)
struct sin_cos_angle inc_angle
angle between the middle point and the boundary of the spherical cap
Definition geometry.h:61
double base_vector[3]
Definition geometry.h:59
double coord[3]
struct proc_sphere_part_node * node
struct proc_sphere_part_node * node
union proc_sphere_part_node_data::@46 data
struct proc_sphere_part_node_data U T
double sin
Definition geometry.h:41
double cos
Definition geometry.h:41
#define MIN(a, b)
#define MAX(a, b)
#define YAC_ASSERT(exp, msg)
Definition yac_assert.h:15
void yac_alltoallv_p2p_group(void const *send_buffer, int const *sendcounts, int const *sdispls, void *recv_buffer, int const *recvcounts, int const *rdispls, size_t dt_size, MPI_Datatype dt, struct yac_group_comm group_comm)
Definition yac_mpi.c:234
int yac_group_comm_get_global_rank(struct yac_group_comm group_comm)
Definition yac_mpi.c:491
int yac_group_comm_get_rank(struct yac_group_comm group_comm)
Definition yac_mpi.c:483
void yac_group_comm_split(struct yac_group_comm group_comm, int split_rank, struct yac_group_comm *local_group_comm, struct yac_group_comm *remote_group_comm)
Definition yac_mpi.c:503
struct yac_group_comm yac_group_comm_new(MPI_Comm comm)
Definition yac_mpi.c:468
void yac_allreduce_sum_dble(double *buffer, int count, struct yac_group_comm group_comm)
Definition yac_mpi.c:287
int yac_group_comm_get_size(struct yac_group_comm group_comm)
Definition yac_mpi.c:487
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
Definition yac_mpi.c:548
void yac_bcast_group(void *buffer, int count, MPI_Datatype datatype, int root, struct yac_group_comm group_comm)
Definition yac_mpi.c:403
void yac_allgather_uint64(const uint64_t *sendbuf, uint64_t *recvbuf, int count, struct yac_group_comm group_comm)
Definition yac_mpi.c:359
void yac_group_comm_delete(struct yac_group_comm group_comm)
Definition yac_mpi.c:478
#define yac_mpi_call(call, comm)
double(* yac_coordinate_pointer)[3]
Definition yac_types.h:19