YetAnotherCoupler 3.5.2
Loading...
Searching...
No Matches
proc_sphere_part.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#include "string.h"
6#include "proc_sphere_part.h"
7#include "yac_mpi_internal.h"
8#ifdef SCOREP_USER_ENABLE
9#include "scorep/SCOREP_User.h"
10#endif // SCOREP_USER_ENABLE
11
16
18
20 union {
22 int rank;
25};
30
36
38 struct proc_sphere_part_node_data node_data, MPI_Comm comm);
39
41 struct proc_sphere_part_node node, MPI_Comm comm) {
42
43 int vec_pack_size;
45 MPI_Pack_size(3, MPI_DOUBLE, comm, &vec_pack_size), comm);
46
47 return vec_pack_size +
50}
51
53 struct proc_sphere_part_node_data node_data, MPI_Comm comm) {
54
55 int int_pack_size;
56 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &int_pack_size), comm);
57
58 int data_size = int_pack_size;
59 if (node_data.is_leaf)
60 data_size += int_pack_size;
61 else
62 data_size +=
64
65 return data_size;
66}
67
69 struct proc_sphere_part_node_data node_data, void * pack_buffer,
70 int pack_buffer_size, int * position, MPI_Comm comm);
71
73 struct proc_sphere_part_node * node, void * pack_buffer,
74 int pack_buffer_size, int * position, MPI_Comm comm) {
75
76 yac_mpi_call(MPI_Pack(&(node->gc_norm_vector[0]), 3, MPI_DOUBLE, pack_buffer,
77 pack_buffer_size, position, comm), comm);
79 node->U, pack_buffer, pack_buffer_size, position, comm);
81 node->T, pack_buffer, pack_buffer_size, position, comm);
82}
83
85 struct proc_sphere_part_node_data node_data, void * pack_buffer,
86 int pack_buffer_size, int * position, MPI_Comm comm) {
87
88 yac_mpi_call(MPI_Pack(&(node_data.is_leaf), 1, MPI_INT, pack_buffer,
89 pack_buffer_size, position, comm), comm);
90
91 if (node_data.is_leaf)
92 yac_mpi_call(MPI_Pack(&(node_data.data.rank), 1, MPI_INT, pack_buffer,
93 pack_buffer_size, position, comm), comm);
94 else
95 pack_proc_sphere_part_node(node_data.data.node, pack_buffer,
96 pack_buffer_size, position, comm);
97}
98
100 void * pack_buffer, int pack_buffer_size, int * position,
101 MPI_Comm comm);
102
104 void * pack_buffer, int pack_buffer_size, int * position,
105 MPI_Comm comm) {
106
107 struct proc_sphere_part_node * node = xmalloc(1 * sizeof(*node));
108
110 MPI_Unpack(pack_buffer, pack_buffer_size, position,
111 &(node->gc_norm_vector[0]), 3, MPI_DOUBLE, comm), comm);
112
113 node->U = unpack_proc_sphere_part_node_data(pack_buffer, pack_buffer_size,
114 position, comm);
115 node->T = unpack_proc_sphere_part_node_data(pack_buffer, pack_buffer_size,
116 position, comm);
117
118 return node;
119}
120
122 void * pack_buffer, int pack_buffer_size, int * position,
123 MPI_Comm comm) {
124
125 struct proc_sphere_part_node_data node_data;
126
128 MPI_Unpack(pack_buffer, pack_buffer_size, position,
129 &(node_data.is_leaf), 1, MPI_INT, comm), comm);
130
131 if (node_data.is_leaf)
133 MPI_Unpack(pack_buffer, pack_buffer_size, position,
134 &(node_data.data.rank), 1, MPI_INT, comm), comm);
135 else
136 node_data.data.node =
138 pack_buffer, pack_buffer_size, position, comm);
139
140 return node_data;
141}
142
144 struct proc_sphere_part_node_data local_data,
145 struct yac_group_comm local_group_comm,
146 struct yac_group_comm remote_group_comm) {
147
148 int comm_rank;
149 MPI_Comm comm = local_group_comm.comm;
150 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
151
152 int data_size;
153 void * recv_buffer = NULL;
154
155 int order = local_group_comm.start < remote_group_comm.start;
156
157 for (int i = 0; i < 2; ++i) {
158
159 if (order == i) {
160
162 &data_size, 1, MPI_INT, remote_group_comm.start, local_group_comm);
163 recv_buffer = xmalloc((size_t)data_size);
164 yac_bcast_group(recv_buffer, data_size, MPI_PACKED,
165 remote_group_comm.start, local_group_comm);
166
167 } else {
168 if (comm_rank == local_group_comm.start) {
169
170 // pack local_data
171 int pack_buffer_size =
173 void * pack_buffer = xmalloc((size_t)pack_buffer_size);
174 int position = 0;
176 local_data, pack_buffer, pack_buffer_size, &position, comm);
177
178 // broadcast data size to other group
180 &position, 1, MPI_INT, local_group_comm.start, remote_group_comm);
181
182 // broadcast remote_data to other root
183 yac_bcast_group(pack_buffer, position, MPI_PACKED,
184 local_group_comm.start, remote_group_comm);
185 free(pack_buffer);
186 }
187 }
188 }
189
190 // unpack node data
191 int position = 0;
192 struct proc_sphere_part_node_data other_data =
194 recv_buffer, data_size, &position, comm);
195 free(recv_buffer);
196
197 return other_data;
198}
199
201 uint64_t comm_rank, uint64_t split_rank, uint64_t comm_size,
202 uint64_t global_sizes[2], uint64_t (*all_bucket_sizes)[2],
203 int * counts, int * displs, size_t * recv_count) {
204
205 int color = comm_rank >= split_rank;
206
207 uint64_t split_comm_size = split_rank;
208 uint64_t split_comm_rank = comm_rank;
209 if (color) {
210 split_comm_rank -= split_comm_size;
211 split_comm_size = comm_size - split_comm_size;
212 }
213
214 uint64_t global_size = global_sizes[color];
215 uint64_t local_interval_start =
216 (global_size * split_comm_rank + split_comm_size - 1) /
217 split_comm_size;
218 uint64_t local_interval_end =
219 (global_size * (split_comm_rank+1) + split_comm_size - 1) /
220 split_comm_size;
221
222 *recv_count = (size_t)(local_interval_end - local_interval_start);
223
224 for (uint64_t i = 0, start_idx = 0; i < comm_size; ++i) {
225
226 uint64_t next_start_idx = start_idx + all_bucket_sizes[i][color];
227 uint64_t interval_start = MAX(start_idx, local_interval_start);
228 uint64_t interval_end = MIN(next_start_idx, local_interval_end);
229
230 if (interval_start < interval_end) {
231
232 uint64_t count = interval_end - interval_start;
233 uint64_t disp = interval_start - local_interval_start;
234
236 (count <= INT_MAX) && (disp <= INT_MAX),
237 "ERROR(compute_redist_recvcounts_rdispls): invalid interval")
238
239 counts[i] = (int)count;
240 displs[i] = (int)disp;
241 } else {
242 counts[i] = 0;
243 displs[i] = 0;
244 }
245
246 start_idx = next_start_idx;
247 }
248}
249
251 uint64_t comm_rank, uint64_t split_rank, uint64_t comm_size,
252 uint64_t global_sizes[2], uint64_t (*all_bucket_sizes)[2],
253 uint64_t U_size, int * counts, int * displs) {
254
255 uint64_t local_interval_start[2] = {0, 0};
256 uint64_t local_interval_end[2];
257 for (uint64_t i = 0; i < comm_rank; ++i)
258 for (int j = 0; j < 2; ++j)
259 local_interval_start[j] += all_bucket_sizes[i][j];
260 for (int j = 0; j < 2; ++j)
261 local_interval_end[j] =
262 local_interval_start[j] + all_bucket_sizes[comm_rank][j];
263
264 uint64_t comm_sizes[2] = {split_rank, comm_size - split_rank};
265
266 for (uint64_t i = 0, start_idx[2] = {0,0}; i < comm_size; ++i) {
267
268 int color = i >= split_rank;
269 uint64_t global_size = global_sizes[color];
270 uint64_t split_comm_rank = i - (color?(split_rank):0);
271 uint64_t split_comm_size = comm_sizes[color];
272 uint64_t next_start_idx =
273 (global_size * (split_comm_rank + 1) + split_comm_size - 1) /
274 split_comm_size;
275 uint64_t interval_start = MAX(start_idx[color], local_interval_start[color]);
276 uint64_t interval_end = MIN(next_start_idx, local_interval_end[color]);
277
278 if (interval_start < interval_end) {
279
280 uint64_t count = interval_end - interval_start;
281 uint64_t disp = interval_start - local_interval_start[color] +
282 ((color)?(U_size):(0));
283
285 (count <= INT_MAX) && (disp <= INT_MAX),
286 "ERROR(compute_redist_sendcounts_sdispls): invalid interval")
287
288 counts[i] = (int)count;
289 displs[i] = (int)disp;
290 } else {
291 counts[i] = 0;
292 displs[i] = 0;
293 }
294
295 start_idx[color] = next_start_idx;
296 }
297}
298
299static int compare_dist_vertices_coord(const void * a, const void * b) {
300
301 return
302 memcmp(&(((const struct dist_vertex *)a)->coord[0]),
303 &(((const struct dist_vertex *)b)->coord[0]), 3 * sizeof(double));
304}
305
307 struct dist_vertex * vertices, size_t * num_vertices) {
308
309 if (*num_vertices == 0) return;
310
311 //---------------------------------------
312 // sort vertices by grid id and global id
313 //---------------------------------------
314
315 qsort(
316 vertices, *num_vertices, sizeof(*vertices), compare_dist_vertices_coord);
317
318 size_t old_num_vertices = *num_vertices;
319 struct dist_vertex * last_new_vertex = vertices;
320 for (size_t i = 1; i < old_num_vertices; ++i) {
321 struct dist_vertex * curr_vertex = vertices + i;
322 if (compare_dist_vertices_coord(last_new_vertex, curr_vertex)) {
323 ++last_new_vertex;
324 if (last_new_vertex != curr_vertex) *last_new_vertex = *curr_vertex;
325 }
326 }
327 *num_vertices = (size_t)(last_new_vertex - vertices) + 1;
328}
329
331 struct dist_vertex ** vertices, size_t * num_vertices,
332 struct yac_group_comm group_comm, double prev_gc_norm_vector[3],
333 MPI_Datatype dist_vertex_dt) {
334
335#ifdef SCOREP_USER_ENABLE
336SCOREP_USER_REGION_DEFINE( local_balance_point_region )
337SCOREP_USER_REGION_DEFINE( global_balance_point_region )
338SCOREP_USER_REGION_DEFINE( splitting_region )
339SCOREP_USER_REGION_DEFINE( comm_split_region )
340SCOREP_USER_REGION_DEFINE( redist_data_region )
341#endif
342
343 int group_rank = yac_group_comm_get_rank(group_comm);
344 int group_size = yac_group_comm_get_size(group_comm);
345
346 remove_duplicated_vertices(*vertices, num_vertices);
347
348 //--------------------
349 // compute split plane
350 //--------------------
351#ifdef SCOREP_USER_ENABLE
352SCOREP_USER_REGION_BEGIN(
353 local_balance_point_region, "local balance point",
354 SCOREP_USER_REGION_TYPE_COMMON )
355#endif
356 // compute local balance point
357 double balance_point[3] = {0.0, 0.0, 0.0};
358 for (size_t i = 0; i < *num_vertices; ++i) {
359 double * vertex_coord = (*vertices)[i].coord;
360 for (int j = 0; j < 3; ++j) balance_point[j] += vertex_coord[j];
361 }
362#ifdef SCOREP_USER_ENABLE
363SCOREP_USER_REGION_END( local_balance_point_region )
364SCOREP_USER_REGION_BEGIN(
365 global_balance_point_region, "global balance point",
366 SCOREP_USER_REGION_TYPE_COMMON )
367#endif
368
369 // compute global balance point (make sure that the allreduce operation
370 // generates bit-identical results on all processes)
371 yac_allreduce_sum_dble(&(balance_point[0]), 3, group_comm);
372
373 if ((fabs(balance_point[0]) > 1e-9) ||
374 (fabs(balance_point[1]) > 1e-9) ||
375 (fabs(balance_point[2]) > 1e-9)) {
376 normalise_vector(balance_point);
377 } else {
378 balance_point[0] = prev_gc_norm_vector[2];
379 balance_point[1] = prev_gc_norm_vector[0];
380 balance_point[2] = prev_gc_norm_vector[1];
381 }
382
383 double gc_norm_vector[3];
384 crossproduct_kahan(balance_point, prev_gc_norm_vector, gc_norm_vector);
385
386 if ((fabs(gc_norm_vector[0]) > 1e-9) ||
387 (fabs(gc_norm_vector[1]) > 1e-9) ||
388 (fabs(gc_norm_vector[2]) > 1e-9)) {
390 } else {
391 gc_norm_vector[0] = prev_gc_norm_vector[2];
392 gc_norm_vector[1] = prev_gc_norm_vector[0];
393 gc_norm_vector[2] = prev_gc_norm_vector[1];
394 }
395
396#ifdef SCOREP_USER_ENABLE
397SCOREP_USER_REGION_END( global_balance_point_region )
398#endif
399
400 //-----------------
401 // split local data
402 //-----------------
403
404#ifdef SCOREP_USER_ENABLE
405SCOREP_USER_REGION_BEGIN( splitting_region, "splitting data", SCOREP_USER_REGION_TYPE_COMMON )
406#endif
407
408 // angle between a vertex coord and the great circle plane:
409 // acos(dot(gc_norm_vector, vertex_coord)) = angle(gc_norm_vector, vertex_coord)
410 // acos(dot(gc_norm_vector, vertex_coord)) - PI/2 = angle(gc_plane, vertex_coord)
411 // dot <= 0.0 -> U list
412 // dot > 0.0 -> T list
413
414 struct dist_vertex * left = *vertices, * right = *vertices+ *num_vertices - 1;
415
416 // sort such that all vertices for the U list come first, followed by the
417 // vertices of the T list
418 while (1) {
419 // find a cell that does not belong into U-list
420 while (left <= right) {
421 double * curr_coordinates_xyz = left->coord;
422 double dot = curr_coordinates_xyz[0] * gc_norm_vector[0] +
423 curr_coordinates_xyz[1] * gc_norm_vector[1] +
424 curr_coordinates_xyz[2] * gc_norm_vector[2];
425
426 // if (angle < M_PI_2)
427 if (dot > 0.0) break;
428 ++left;
429 };
430
431 // find a cell that does not belong into T-list
432 while (left < right) {
433 double * curr_coordinates_xyz = right->coord;
434 double dot = curr_coordinates_xyz[0] * gc_norm_vector[0] +
435 curr_coordinates_xyz[1] * gc_norm_vector[1] +
436 curr_coordinates_xyz[2] * gc_norm_vector[2];
437
438 // if (angle >= M_PI_2)
439 if (dot <= 0.0) break;
440 --right;
441 }
442
443 if (left < right) {
444 struct dist_vertex tmp_cell = *left;
445 *left = *right;
446 *right = tmp_cell;
447 ++left;
448 --right;
449 } else {
450 break;
451 }
452 }
453
454
455 uint64_t U_size = (uint64_t)(left - *vertices);
456 uint64_t T_size = (uint64_t)(*num_vertices) - U_size;
457
458#ifdef SCOREP_USER_ENABLE
459SCOREP_USER_REGION_END( splitting_region )
460#endif
461
462 // there is no MPI datatype for size_t -> we use uint64_t here
463 uint64_t bucket_sizes[2] = {U_size, T_size};
464
465 // exchange local U/T sizes between all processes
466 uint64_t (*all_bucket_sizes)[2] =
467 xmalloc((size_t)group_size * sizeof(*all_bucket_sizes));
469 &(bucket_sizes[0]), &(all_bucket_sizes[0][0]), 2, group_comm);
470
471 // determine global U/T sizes
472 uint64_t global_bucket_sizes[2] = {0, 0};
473 for (int i = 0; i < group_size; ++i)
474 for (int j = 0; j < 2; ++j)
475 global_bucket_sizes[j] += all_bucket_sizes[i][j];
476 uint64_t global_num_vertices =
477 global_bucket_sizes[0] + global_bucket_sizes[1];
478
479 //----------------------
480 // split into two groups
481 //----------------------
482#ifdef SCOREP_USER_ENABLE
483SCOREP_USER_REGION_BEGIN(
484 comm_split_region, "creating splitcomm", SCOREP_USER_REGION_TYPE_COMMON )
485#endif
486 // determine processor groups
487 int split_rank =
488 MIN(
489 (int)MAX(
490 ((global_bucket_sizes[0] * (uint64_t)group_size +
491 global_num_vertices/2) / global_num_vertices), 1),
492 group_size - 1);
493
494 // generate processor groups
495 struct yac_group_comm local_group_comm, remote_group_comm;
497 group_comm, split_rank, &local_group_comm, &remote_group_comm);
498
499#ifdef SCOREP_USER_ENABLE
500SCOREP_USER_REGION_END( comm_split_region )
501#endif
502 //------------------
503 // redistribute data
504 //------------------
505
506 int * int_buffer = xmalloc(4 * (size_t)group_size * sizeof(*int_buffer));
507 int * sendcounts = int_buffer + 0 * group_size;
508 int * recvcounts = int_buffer + 1 * group_size;
509 int * sdispls = int_buffer + 2 * group_size;
510 int * rdispls = int_buffer + 3 * group_size;
511
512#ifdef SCOREP_USER_ENABLE
513SCOREP_USER_REGION_BEGIN(
514 redist_data_region, "data redistribution", SCOREP_USER_REGION_TYPE_COMMON )
515#endif
516 // compute send and receive counts and respective ranks for data
517 // redistribution
519 (uint64_t)group_rank, (uint64_t)split_rank, (uint64_t)group_size,
520 global_bucket_sizes, all_bucket_sizes, (uint64_t)U_size, sendcounts, sdispls);
521 size_t new_num_vertices;
523 (uint64_t)group_rank, (uint64_t)split_rank, (uint64_t)group_size,
524 global_bucket_sizes, all_bucket_sizes, recvcounts, rdispls,
525 &new_num_vertices);
526
527 // exchange data between both groups
528 struct dist_vertex * new_vertices =
529 xmalloc(new_num_vertices * sizeof(*new_vertices));
531 *vertices, sendcounts, sdispls, new_vertices, recvcounts, rdispls,
532 sizeof(**vertices), dist_vertex_dt, group_comm);
533#ifdef SCOREP_USER_ENABLE
534SCOREP_USER_REGION_END( redist_data_region )
535#endif
536 free(*vertices);
537 *vertices = new_vertices;
538 *num_vertices = new_num_vertices;
539
540 free(int_buffer);
541 free(all_bucket_sizes);
542
543 //----------
544 // recursion
545 //----------
546
547 // redistribute data within local group
548 struct proc_sphere_part_node_data local_data;
549
550 if (yac_group_comm_get_size(local_group_comm) > 1) {
551 local_data.data.node =
553 vertices, num_vertices, local_group_comm, gc_norm_vector,
554 dist_vertex_dt);
555 local_data.is_leaf = 0;
556 } else {
557
558 remove_duplicated_vertices(*vertices, num_vertices);
559 local_data.data.rank = yac_group_comm_get_global_rank(group_comm);
560 local_data.is_leaf = 1;
561 }
562
563 // get proc_sphere_part_node_data from remote group
564 struct proc_sphere_part_node_data remote_data =
565 get_remote_data(local_data, local_group_comm, remote_group_comm);
566
567 // generate node
568 struct proc_sphere_part_node * node = xmalloc(1 * sizeof(*node));
569 if (group_rank < split_rank) {
570 node->U = local_data;
571 node->T = remote_data;
572 } else {
573 node->U = remote_data;
574 node->T = local_data;
575 }
576 for (int i = 0; i < 3; ++i) node->gc_norm_vector[i] = gc_norm_vector[i];
577
578 return node;
579}
580
581static MPI_Datatype yac_get_dist_vertex_mpi_datatype(MPI_Comm comm) {
582
583 MPI_Datatype coord_dt;
584 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &coord_dt), comm);
585 return yac_create_resized(coord_dt, sizeof(struct dist_vertex), comm);
586}
587
589 struct dist_vertex ** vertices, size_t * num_vertices, MPI_Comm comm) {
590
591 double base_gc_norm_vector[3] = {0.0,0.0,1.0};
592
593 int comm_rank, comm_size;
594 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
595 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
596
597 uint64_t global_num_vertices = (uint64_t)*num_vertices;
599 MPI_Allreduce(
600 MPI_IN_PLACE, &global_num_vertices, 1, MPI_UINT64_T, MPI_SUM, comm), comm);
601
602 struct proc_sphere_part_node * proc_sphere_part;
603
604 if ((comm_size > 1) && (global_num_vertices > 0)) {
605
606 MPI_Datatype dist_vertex_dt = yac_get_dist_vertex_mpi_datatype(comm);
607 struct yac_group_comm group_comm = yac_group_comm_new(comm);
608 proc_sphere_part =
610 vertices, num_vertices, group_comm, base_gc_norm_vector,
611 dist_vertex_dt);
612 yac_group_comm_delete(group_comm);
613
614 yac_mpi_call(MPI_Type_free(&dist_vertex_dt), comm);
615 } else {
616 proc_sphere_part = xmalloc(1 * sizeof(*proc_sphere_part));
617 proc_sphere_part->U.data.rank = 0;
618 proc_sphere_part->U.is_leaf = 1;
619 proc_sphere_part->T.data.rank = 0;
620 proc_sphere_part->T.is_leaf = 1;
621 proc_sphere_part->gc_norm_vector[0] = base_gc_norm_vector[0];
622 proc_sphere_part->gc_norm_vector[1] = base_gc_norm_vector[1];
623 proc_sphere_part->gc_norm_vector[2] = base_gc_norm_vector[2];
624 }
625
626 return proc_sphere_part;
627}
628
629static int is_serial_node(struct proc_sphere_part_node * node) {
630 return (node->U.is_leaf) && (node->T.is_leaf) &&
631 (node->U.data.rank == 0) && (node->T.data.rank == 0);
632}
633
635 struct proc_sphere_part_node * node, yac_coordinate_pointer search_coords,
636 size_t count, int * ranks) {
637
638 if (is_serial_node(node)) {
639 for (size_t i = 0; i < count; ++i) ranks[i] = 0;
640 return;
641 }
642
643 for (size_t i = 0; i < count; ++i) {
644
645 double curr_coord[3] =
646 {search_coords[i][0], search_coords[i][1], search_coords[i][2]};
647
648 struct proc_sphere_part_node * curr_node = node;
649
650 while (1) {
651
652 // get angle between the norm vector of the split plane and the base
653 // point of the bounding circle
654 struct sin_cos_angle angle =
655 get_vector_angle_2(curr_coord, curr_node->gc_norm_vector);
656
657 if (angle.cos <= 0.0) {
658 if (curr_node->U.is_leaf) {
659 ranks[i] = curr_node->U.data.rank;
660 break;
661 } else {
662 curr_node = curr_node->U.data.node;
663 continue;
664 }
665 } else {
666 if (curr_node->T.is_leaf) {
667 ranks[i] = curr_node->T.data.rank;
668 break;
669 } else {
670 curr_node = curr_node->T.data.node;
671 continue;
672 }
673 }
674 }
675 }
676}
677
679 struct proc_sphere_part_node * node, struct bounding_circle bnd_circle,
680 int * ranks, int * rank_count) {
681
682 double dot = bnd_circle.base_vector[0] * node->gc_norm_vector[0] +
683 bnd_circle.base_vector[1] * node->gc_norm_vector[1] +
684 bnd_circle.base_vector[2] * node->gc_norm_vector[2];
685
686 // angle < M_PI_2 + bnd_circle.inc_angle
687 if (dot > - bnd_circle.inc_angle.sin) {
688
689 if (node->T.is_leaf) {
690
691 ranks[*rank_count] = node->T.data.rank;
692 ++*rank_count;
693
694 } else {
695 bnd_circle_search(node->T.data.node, bnd_circle, ranks, rank_count);
696 }
697 }
698
699 // angle > M_PI_2 - bnd_circle.inc_angle
700 if (dot < bnd_circle.inc_angle.sin) {
701
702 if (node->U.is_leaf) {
703
704 ranks[*rank_count] = node->U.data.rank;
705 ++*rank_count;
706
707 } else {
708 bnd_circle_search(node->U.data.node, bnd_circle, ranks, rank_count);
709 }
710 }
711}
712
714 struct proc_sphere_part_node * node, struct bounding_circle bnd_circle,
715 int * ranks, int * rank_count) {
716
717 if (node->T.is_leaf) {
718
719 ranks[*rank_count] = node->T.data.rank;
720 ++*rank_count;
721
722 } else {
724 node->T.data.node, bnd_circle, ranks, rank_count);
725 }
726
727 if (node->U.is_leaf) {
728
729 ranks[*rank_count] = node->U.data.rank;
730 ++*rank_count;
731
732 } else {
734 node->U.data.node, bnd_circle, ranks, rank_count);
735 }
736}
737
739 struct proc_sphere_part_node * node, struct bounding_circle bnd_circle,
740 int * ranks, int * rank_count) {
741
743 compare_angles(bnd_circle.inc_angle, SIN_COS_M_PI) == -1,
744 "ERROR(yac_proc_sphere_part_do_bnd_circle_search): angle is >= PI")
745
746 // special case in which the proc_sphere_part_node only contains a single
747 // rank
748 if (is_serial_node(node)) {
749 ranks[0] = 0;
750 *rank_count = 1;
751 } else if (bnd_circle.inc_angle.cos <= 0.0) {
752 *rank_count = 0;
753 bnd_circle_search_big_angle(node, bnd_circle, ranks, rank_count);
754 } else {
755 *rank_count = 0;
756 bnd_circle_search(node, bnd_circle, ranks, rank_count);
757 }
758}
759
760static int get_leaf_ranks(struct proc_sphere_part_node * node, int * ranks) {
761
762 int curr_size;
763
764 if (node->U.is_leaf) {
765 *ranks = node->U.data.rank;
766 curr_size = 1;
767 } else {
768 curr_size = get_leaf_ranks(node->U.data.node, ranks);
769 }
770 if (node->T.is_leaf) {
771 ranks[curr_size++] = node->T.data.rank;
772 } else {
773 curr_size += get_leaf_ranks(node->T.data.node, ranks + curr_size);
774 }
775
776 return curr_size;
777}
778
779static void get_neigh_ranks(
780 struct proc_sphere_part_node * node, uint64_t * leaf_sizes, uint64_t min_size,
781 uint64_t ** inner_node_sizes, int * send_flags, int * recv_flags,
782 int comm_rank, struct neigh_search_data * last_valid_node) {
783
784 int curr_node_is_valid = (**inner_node_sizes >= min_size);
785
786 struct neigh_search_data * curr_valid_node;
787 struct neigh_search_data temp_valid_node;
788
789 int * ranks = last_valid_node->ranks;
790
791 if (curr_node_is_valid) {
792 temp_valid_node.ranks = ranks;
793 temp_valid_node.num_ranks = 0;
794 temp_valid_node.node = node;
795 curr_valid_node = &temp_valid_node;
796 last_valid_node->num_ranks = 0;
797 } else {
798 curr_valid_node = last_valid_node;
799 }
800
801 for (int j = 0; j < 2; ++j) {
802
803 struct proc_sphere_part_node_data node_data = (j == 0)?(node->U):(node->T);
804
805 if (node_data.is_leaf) {
806
807 int rank = node_data.data.rank;
808
809 if (leaf_sizes[rank] < min_size) {
810
811 if (curr_valid_node->num_ranks == 0)
812 curr_valid_node->num_ranks =
813 get_leaf_ranks(curr_valid_node->node, ranks);
814
815 // if the current leaf is the local process
816 if (rank == comm_rank)
817 for (int i = 0; i < curr_valid_node->num_ranks; ++i)
818 recv_flags[ranks[i]] = 1;
819
820 // if the process of the current leaf required data from the
821 // local process
822 for (int i = 0; i < curr_valid_node->num_ranks; ++i) {
823 if (ranks[i] == comm_rank) {
824 send_flags[rank] = 1;
825 break;
826 }
827 }
828 }
829
830 } else {
831 ++*inner_node_sizes;
833 node_data.data.node, leaf_sizes, min_size, inner_node_sizes,
834 send_flags, recv_flags, comm_rank, curr_valid_node);
835 }
836 }
837}
838
839static uint64_t determine_node_sizes(
840 struct proc_sphere_part_node * node, uint64_t * leaf_sizes,
841 uint64_t ** inner_node_sizes) {
842
843 uint64_t * curr_inner_node_size = *inner_node_sizes;
844 uint64_t node_size;
845 if (node->U.is_leaf) {
846 node_size = leaf_sizes[node->U.data.rank];
847 } else {
848 ++*inner_node_sizes;
849 node_size =
850 determine_node_sizes(node->U.data.node, leaf_sizes, inner_node_sizes);
851 }
852 if (node->T.is_leaf) {
853 node_size += leaf_sizes[node->T.data.rank];
854 } else {
855 ++*inner_node_sizes;
856 node_size +=
857 determine_node_sizes(node->T.data.node, leaf_sizes, inner_node_sizes);
858 }
859 return (*curr_inner_node_size = node_size);
860}
861
863 struct proc_sphere_part_node * node, uint64_t * leaf_sizes,
864 uint64_t min_size, int * send_flags, int * recv_flags,
865 int comm_rank, int comm_size) {
866
867 uint64_t * inner_node_sizes =
868 xcalloc((size_t)comm_size, sizeof(*inner_node_sizes));
869
870 uint64_t * temp_inner_node_sizes = inner_node_sizes;
871 determine_node_sizes(node, leaf_sizes, &temp_inner_node_sizes);
872
874 *inner_node_sizes >= min_size,
875 "ERROR(yac_proc_sphere_part_get_neigh_ranks): sum of global leaf sizes "
876 "is < min_size")
877
878 struct neigh_search_data search_data = {
879 .ranks = xmalloc((size_t)comm_size * sizeof(int)),
880 .num_ranks = 0,
881 .node = node
882 };
883
884 temp_inner_node_sizes = inner_node_sizes;
885 get_neigh_ranks(node, leaf_sizes, min_size, &temp_inner_node_sizes,
886 send_flags, recv_flags, comm_rank, &search_data);
887
888 send_flags[comm_rank] = 0;
889 recv_flags[comm_rank] = 0;
890
891 free(search_data.ranks);
892 free(inner_node_sizes);
893}
894
static struct sin_cos_angle get_vector_angle_2(double const a[3], double const b[3])
Definition geometry.h:415
static const struct sin_cos_angle SIN_COS_M_PI
Definition geometry.h:49
static void crossproduct_kahan(double const a[], double const b[], double cross[])
Definition geometry.h:332
static int compare_angles(struct sin_cos_angle a, struct sin_cos_angle b)
Definition geometry.h:428
static void normalise_vector(double v[])
Definition geometry.h:689
#define xcalloc(nmemb, size)
Definition ppm_xfuncs.h:64
#define xmalloc(size)
Definition ppm_xfuncs.h:66
struct proc_sphere_part_node * yac_redistribute_vertices(struct dist_vertex **vertices, size_t *num_vertices, MPI_Comm comm)
static int get_leaf_ranks(struct proc_sphere_part_node *node, int *ranks)
void yac_proc_sphere_part_get_neigh_ranks(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t min_size, int *send_flags, int *recv_flags, int comm_rank, int comm_size)
static void compute_redist_sendcounts_sdispls(uint64_t comm_rank, uint64_t split_rank, uint64_t comm_size, uint64_t global_sizes[2], uint64_t(*all_bucket_sizes)[2], uint64_t U_size, int *counts, int *displs)
void yac_proc_sphere_part_do_bnd_circle_search(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
static void compute_redist_recvcounts_rdispls(uint64_t comm_rank, uint64_t split_rank, uint64_t comm_size, uint64_t global_sizes[2], uint64_t(*all_bucket_sizes)[2], int *counts, int *displs, size_t *recv_count)
static uint64_t determine_node_sizes(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t **inner_node_sizes)
void yac_proc_sphere_part_do_point_search(struct proc_sphere_part_node *node, yac_coordinate_pointer search_coords, size_t count, int *ranks)
static int compare_dist_vertices_coord(const void *a, const void *b)
static void bnd_circle_search(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
static int is_serial_node(struct proc_sphere_part_node *node)
splicomm_tags
@ DATA_SIZE_TAG
@ DATA_TAG
static struct proc_sphere_part_node_data get_remote_data(struct proc_sphere_part_node_data local_data, struct yac_group_comm local_group_comm, struct yac_group_comm remote_group_comm)
void yac_proc_sphere_part_node_delete(struct proc_sphere_part_node *node)
static void get_neigh_ranks(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t min_size, uint64_t **inner_node_sizes, int *send_flags, int *recv_flags, int comm_rank, struct neigh_search_data *last_valid_node)
static MPI_Datatype yac_get_dist_vertex_mpi_datatype(MPI_Comm comm)
static void pack_proc_sphere_part_node(struct proc_sphere_part_node *node, void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static int get_proc_sphere_part_node_pack_size(struct proc_sphere_part_node node, MPI_Comm comm)
static void pack_proc_sphere_part_node_data(struct proc_sphere_part_node_data node_data, void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static struct proc_sphere_part_node * redistribute_vertices_recursive(struct dist_vertex **vertices, size_t *num_vertices, struct yac_group_comm group_comm, double prev_gc_norm_vector[3], MPI_Datatype dist_vertex_dt)
static int get_proc_sphere_part_node_data_pack_size(struct proc_sphere_part_node_data node_data, MPI_Comm comm)
static void remove_duplicated_vertices(struct dist_vertex *vertices, size_t *num_vertices)
static struct proc_sphere_part_node * unpack_proc_sphere_part_node(void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static struct proc_sphere_part_node_data unpack_proc_sphere_part_node_data(void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static void bnd_circle_search_big_angle(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
struct sin_cos_angle inc_angle
angle between the middle point and the boundary of the spherical cap
Definition geometry.h:61
double base_vector[3]
Definition geometry.h:59
double coord[3]
struct proc_sphere_part_node * node
struct proc_sphere_part_node * node
union proc_sphere_part_node_data::@45 data
struct proc_sphere_part_node_data U T
double sin
Definition geometry.h:41
double cos
Definition geometry.h:41
#define MIN(a, b)
#define MAX(a, b)
#define YAC_ASSERT(exp, msg)
Definition yac_assert.h:16
void yac_alltoallv_p2p_group(void const *send_buffer, int const *sendcounts, int const *sdispls, void *recv_buffer, int const *recvcounts, int const *rdispls, size_t dt_size, MPI_Datatype dt, struct yac_group_comm group_comm)
Definition yac_mpi.c:236
int yac_group_comm_get_global_rank(struct yac_group_comm group_comm)
Definition yac_mpi.c:493
int yac_group_comm_get_rank(struct yac_group_comm group_comm)
Definition yac_mpi.c:485
void yac_group_comm_split(struct yac_group_comm group_comm, int split_rank, struct yac_group_comm *local_group_comm, struct yac_group_comm *remote_group_comm)
Definition yac_mpi.c:505
struct yac_group_comm yac_group_comm_new(MPI_Comm comm)
Definition yac_mpi.c:470
void yac_allreduce_sum_dble(double *buffer, int count, struct yac_group_comm group_comm)
Definition yac_mpi.c:289
int yac_group_comm_get_size(struct yac_group_comm group_comm)
Definition yac_mpi.c:489
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
Definition yac_mpi.c:550
void yac_bcast_group(void *buffer, int count, MPI_Datatype datatype, int root, struct yac_group_comm group_comm)
Definition yac_mpi.c:405
void yac_allgather_uint64(const uint64_t *sendbuf, uint64_t *recvbuf, int count, struct yac_group_comm group_comm)
Definition yac_mpi.c:361
void yac_group_comm_delete(struct yac_group_comm group_comm)
Definition yac_mpi.c:480
#define yac_mpi_call(call, comm)
double(* yac_coordinate_pointer)[3]
Definition yac_types.h:19