YAC 3.7.0
Yet Another Coupler
Loading...
Searching...
No Matches
proc_sphere_part.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#include "string.h"
6#include "proc_sphere_part.h"
7#include "remote_point.h"
8#include "ensure_array_size.h"
9#include "yac_mpi_internal.h"
10#ifdef SCOREP_USER_ENABLE
11#include "scorep/SCOREP_User.h"
12#endif // SCOREP_USER_ENABLE
13
18
19// WARNING: before changing this datatype ensure that the MPI datatype created
20// for this still matches its data layout
28
32 int * sdispls;
33 int * rdispls;
34};
35
37
39 union {
41 int rank;
44};
49
55
57 struct proc_sphere_part_node_data node_data, MPI_Comm comm);
58
60 struct proc_sphere_part_node node, MPI_Comm comm) {
61
62 int vec_pack_size;
64 MPI_Pack_size(3, MPI_DOUBLE, comm, &vec_pack_size), comm);
65
66 return vec_pack_size +
69}
70
72 struct proc_sphere_part_node_data node_data, MPI_Comm comm) {
73
74 int int_pack_size;
75 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &int_pack_size), comm);
76
77 int data_size = int_pack_size;
78 if (node_data.is_leaf)
79 data_size += int_pack_size;
80 else
81 data_size +=
83
84 return data_size;
85}
86
88 struct proc_sphere_part_node_data node_data, void * pack_buffer,
89 int pack_buffer_size, int * position, MPI_Comm comm);
90
92 struct proc_sphere_part_node * node, void * pack_buffer,
93 int pack_buffer_size, int * position, MPI_Comm comm) {
94
95 yac_mpi_call(MPI_Pack(&(node->gc_norm_vector[0]), 3, MPI_DOUBLE, pack_buffer,
96 pack_buffer_size, position, comm), comm);
98 node->U, pack_buffer, pack_buffer_size, position, comm);
100 node->T, pack_buffer, pack_buffer_size, position, comm);
101}
102
104 struct proc_sphere_part_node_data node_data, void * pack_buffer,
105 int pack_buffer_size, int * position, MPI_Comm comm) {
106
107 yac_mpi_call(MPI_Pack(&(node_data.is_leaf), 1, MPI_INT, pack_buffer,
108 pack_buffer_size, position, comm), comm);
109
110 if (node_data.is_leaf)
111 yac_mpi_call(MPI_Pack(&(node_data.data.rank), 1, MPI_INT, pack_buffer,
112 pack_buffer_size, position, comm), comm);
113 else
114 pack_proc_sphere_part_node(node_data.data.node, pack_buffer,
115 pack_buffer_size, position, comm);
116}
117
119 void * pack_buffer, int pack_buffer_size, int * position,
120 MPI_Comm comm);
121
123 void * pack_buffer, int pack_buffer_size, int * position,
124 MPI_Comm comm) {
125
126 struct proc_sphere_part_node * node = xmalloc(1 * sizeof(*node));
127
129 MPI_Unpack(pack_buffer, pack_buffer_size, position,
130 &(node->gc_norm_vector[0]), 3, MPI_DOUBLE, comm), comm);
131
132 node->U = unpack_proc_sphere_part_node_data(pack_buffer, pack_buffer_size,
133 position, comm);
134 node->T = unpack_proc_sphere_part_node_data(pack_buffer, pack_buffer_size,
135 position, comm);
136
137 return node;
138}
139
141 void * pack_buffer, int pack_buffer_size, int * position,
142 MPI_Comm comm) {
143
144 struct proc_sphere_part_node_data node_data;
145
147 MPI_Unpack(pack_buffer, pack_buffer_size, position,
148 &(node_data.is_leaf), 1, MPI_INT, comm), comm);
149
150 if (node_data.is_leaf)
152 MPI_Unpack(pack_buffer, pack_buffer_size, position,
153 &(node_data.data.rank), 1, MPI_INT, comm), comm);
154 else
155 node_data.data.node =
157 pack_buffer, pack_buffer_size, position, comm);
158
159 return node_data;
160}
161
163 struct proc_sphere_part_node_data local_data,
164 struct yac_group_comm local_group_comm,
165 struct yac_group_comm remote_group_comm) {
166
167 int comm_rank;
168 MPI_Comm comm = local_group_comm.comm;
169 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
170
171 int data_size;
172 void * recv_buffer = NULL;
173
174 int order = local_group_comm.start < remote_group_comm.start;
175
176 for (int i = 0; i < 2; ++i) {
177
178 if (order == i) {
179
181 &data_size, 1, MPI_INT, remote_group_comm.start, local_group_comm);
182 recv_buffer = xmalloc((size_t)data_size);
183 yac_bcast_group(recv_buffer, data_size, MPI_PACKED,
184 remote_group_comm.start, local_group_comm);
185
186 } else {
187 if (comm_rank == local_group_comm.start) {
188
189 // pack local_data
190 int pack_buffer_size =
192 void * pack_buffer = xmalloc((size_t)pack_buffer_size);
193 int position = 0;
195 local_data, pack_buffer, pack_buffer_size, &position, comm);
196
197 // broadcast data size to other group
199 &position, 1, MPI_INT, local_group_comm.start, remote_group_comm);
200
201 // broadcast remote_data to other root
202 yac_bcast_group(pack_buffer, position, MPI_PACKED,
203 local_group_comm.start, remote_group_comm);
204 free(pack_buffer);
205 }
206 }
207 }
208
209 // unpack node data
210 int position = 0;
211 struct proc_sphere_part_node_data other_data =
213 recv_buffer, data_size, &position, comm);
214 free(recv_buffer);
215
216 return other_data;
217}
218
220 int comm_rank, int split_rank, int comm_size,
221 size_t global_sizes[2], size_t (*all_bucket_sizes)[2],
222 int * counts, int * displs, size_t * recv_count) {
223
224 int color = comm_rank >= split_rank;
225
226 int split_comm_size = split_rank;
227 int split_comm_rank = comm_rank;
228 if (color) {
229 split_comm_rank -= split_comm_size;
230 split_comm_size = comm_size - split_comm_size;
231 }
232
233 size_t global_size = global_sizes[color];
234 size_t local_interval_start =
235 (size_t)
236 (((long long)global_size * (long long)split_comm_rank +
237 (long long)(split_comm_size - 1)) /
238 (long long)split_comm_size);
239 size_t local_interval_end =
240 (size_t)
241 (((long long)global_size * (long long)(split_comm_rank+1) +
242 (long long)(split_comm_size - 1)) /
243 (long long)split_comm_size);
244
245 *recv_count = (size_t)(local_interval_end - local_interval_start);
246
247 size_t start_idx = 0;
248 for (int i = 0; i < comm_size; ++i) {
249
250 size_t next_start_idx = start_idx + all_bucket_sizes[i][color];
251 size_t interval_start = MAX(start_idx, local_interval_start);
252 size_t interval_end = MIN(next_start_idx, local_interval_end);
253
254 if (interval_start < interval_end) {
255
256 size_t count = interval_end - interval_start;
257 size_t disp = interval_start - local_interval_start;
258
260 (count <= INT_MAX) && (disp <= INT_MAX),
261 "ERROR(compute_redist_recvcounts_rdispls): invalid interval")
262
263 counts[i] = (int)count;
264 displs[i] = (int)disp;
265 } else {
266 counts[i] = 0;
267 displs[i] = 0;
268 }
269
270 start_idx = next_start_idx;
271 }
272}
273
275 int comm_rank, int split_rank, int comm_size,
276 size_t global_sizes[2], size_t (*all_bucket_sizes)[2],
277 int * counts, int * displs) {
278
279 size_t U_size = all_bucket_sizes[comm_rank][0];
280
281 size_t local_interval_start[2] = {0, 0};
282 size_t local_interval_end[2];
283 for (int i = 0; i < comm_rank; ++i)
284 for (int j = 0; j < 2; ++j)
285 local_interval_start[j] += all_bucket_sizes[i][j];
286 for (int j = 0; j < 2; ++j)
287 local_interval_end[j] =
288 local_interval_start[j] + all_bucket_sizes[comm_rank][j];
289
290 int comm_sizes[2] = {split_rank, comm_size - split_rank};
291
292 size_t start_idx[2] = {0,0};
293 for (int i = 0; i < comm_size; ++i) {
294
295 int color = i >= split_rank;
296 size_t global_size = global_sizes[color];
297 int split_comm_rank = i - (color?(split_rank):0);
298 int split_comm_size = comm_sizes[color];
299 size_t next_start_idx =
300 (size_t)(
301 ((long long)global_size * (long long)(split_comm_rank + 1) +
302 (long long)(split_comm_size - 1)) /
303 (long long)split_comm_size);
304 size_t interval_start = MAX(start_idx[color], local_interval_start[color]);
305 size_t interval_end = MIN(next_start_idx, local_interval_end[color]);
306
307 if (interval_start < interval_end) {
308
309 size_t count = interval_end - interval_start;
310 size_t disp = interval_start - local_interval_start[color] +
311 ((color)?(U_size):(0));
312
314 (count <= INT_MAX) && (disp <= INT_MAX),
315 "ERROR(compute_redist_sendcounts_sdispls): invalid interval")
316
317 counts[i] = (int)count;
318 displs[i] = (int)disp;
319 } else {
320 counts[i] = 0;
321 displs[i] = 0;
322 }
323
324 start_idx[color] = next_start_idx;
325 }
326}
327
329 size_t * reorder_idx, size_t count, struct remote_point_info * data) {
330
331 // this routine assumes that all entries in reorder_idx are unique and in the
332 // range [0;count[
333
334 // for all elements
335 for (size_t i = 0; i < count; ++i) {
336
337 if (reorder_idx[i] != i) {
338
339 size_t j = i;
340 struct remote_point_info temp = data[i];
341
342 while(1) {
343 { // swap(j, reorder_idx[j]}
344 size_t swap = reorder_idx[j];
345 reorder_idx[j] = j;
346 j = swap;
347 }
348 if (j == i) break;
349 { // swap(temp, data[j]
350 struct remote_point_info swap = data[j];
351 data[j] = temp;
352 temp = swap;
353 }
354 };
355
356 data[i] = temp;
357 }
358 }
359}
360
361static int compare_dist_vertices(const void * a, const void * b) {
362
363 int ret;
364
365 if ((ret = ((const struct dist_vertex *)a)->grid_idx -
366 ((const struct dist_vertex *)b)->grid_idx)) return ret;
367 if (((const struct dist_vertex *)a)->global_id != XT_INT_MAX)
368 return
369 (((const struct dist_vertex *)a)->global_id >
370 ((const struct dist_vertex *)b)->global_id) -
371 (((const struct dist_vertex *)a)->global_id <
372 ((const struct dist_vertex *)b)->global_id);
373 else
374 return
376 ((const struct dist_vertex *)a)->coord,
377 ((const struct dist_vertex *)b)->coord);
378}
379
381 struct dist_vertex * vertices, size_t * num_vertices,
382 struct remote_point_info * owners, size_t num_owners,
383 size_t ** owners_reorder_idx, size_t * owners_reorder_idx_array_size) {
384
385 if (*num_vertices == 0) return;
386
388 *owners_reorder_idx, *owners_reorder_idx_array_size, num_owners);
389
390 // initialise owner offset
391 {
392 size_t owner_offset = 0;
393 for (size_t i = 0; i < *num_vertices; ++i) {
394 vertices[i].owner_offset = owner_offset;
395 owner_offset += (size_t)(vertices[i].num_owners);
396 }
399 "ERROR(remove_duplicated_vertices): internal error "
400 "(owner_offset != num_owners)");
401 }
402
403 // sort vertices by grid id and global id, if available otherwise compare
404 // coordinates
405 qsort(vertices, *num_vertices, sizeof(*vertices), compare_dist_vertices);
406 // on GPU/NEC VE do seperate sorts for each criteria using radix sort
407
408 size_t old_num_vertices = *num_vertices;
409 size_t new_num_vertices = 0;
410 struct dist_vertex dummy_vertex = {.grid_idx = INT_MAX};
411 struct dist_vertex * prev_vertex = &dummy_vertex;
412 struct dist_vertex * curr_vertex = vertices;
413 size_t * owners_reorder_idx_ = *owners_reorder_idx;
414 for (size_t i = 0, reorder_idx = 0; i < old_num_vertices;
415 ++i, ++curr_vertex) {
416 size_t owner_offset = curr_vertex->owner_offset;
417 for (int j = 0; j < curr_vertex->num_owners;
418 ++j, ++owner_offset, ++reorder_idx)
419 owners_reorder_idx_[owner_offset] = reorder_idx;
420 if (compare_dist_vertices(prev_vertex, curr_vertex)) {
421 prev_vertex = vertices + new_num_vertices;
422 ++new_num_vertices;
423 if (prev_vertex != curr_vertex) *prev_vertex = *curr_vertex;
424 } else {
425 prev_vertex->num_owners += curr_vertex->num_owners;
426 }
427 }
428 *num_vertices = new_num_vertices;
429
430 // reorder owners according to new vertex order
431 reorder_data_remote_point_info(owners_reorder_idx_, num_owners, owners);
432}
433
435 struct dist_vertex ** vertices, size_t * num_vertices,
436 struct remote_point_info ** owners, size_t * num_owners,
437 size_t global_bucket_sizes[2], size_t (*all_bucket_sizes)[2],
438 int split_rank, struct comm_buffers comm_buffers,
439 size_t ** owners_reorder_idx, size_t * owners_reorder_idx_array_size,
440 MPI_Datatype dist_vertex_dt, MPI_Datatype remote_point_info_dt,
441 struct yac_group_comm group_comm) {
442
443#ifdef SCOREP_USER_ENABLE
444SCOREP_USER_REGION_DEFINE( redist_data_region )
445SCOREP_USER_REGION_BEGIN(
446 redist_data_region, "data redistribution", SCOREP_USER_REGION_TYPE_COMMON )
447#endif
448
449 int group_rank = yac_group_comm_get_rank(group_comm);
450 int group_size = yac_group_comm_get_size(group_comm);
451
452 // compute send and receive counts and respective ranks for data
453 // redistribution
455 group_rank, split_rank, group_size, global_bucket_sizes, all_bucket_sizes,
457 size_t new_num_vertices;
459 group_rank, split_rank, group_size, global_bucket_sizes, all_bucket_sizes,
460 comm_buffers.recvcounts, comm_buffers.rdispls, &new_num_vertices);
461
462 // redistribute vertices
463 struct dist_vertex * new_vertices =
464 xmalloc(new_num_vertices * sizeof(*new_vertices));
468 sizeof(**vertices), dist_vertex_dt, group_comm);
469
470 // adjust comm_buffers for redistribution of owner data
471 size_t new_num_owners;
472 {
473 size_t saccu = 0, raccu = 0, send_vertex_idx = 0, recv_vertex_idx = 0;
474 for (int i = 0; i < group_size; ++i) {
475 size_t send_count = 0, recv_count = 0;
476 for (int j = 0; j < comm_buffers.sendcounts[i]; ++j, ++send_vertex_idx)
477 send_count += (size_t)((*vertices)[send_vertex_idx].num_owners);
478 for (int j = 0; j < comm_buffers.recvcounts[i]; ++j, ++recv_vertex_idx)
479 recv_count += (size_t)(new_vertices[recv_vertex_idx].num_owners);
481 (saccu <= INT_MAX) && (raccu <= INT_MAX),
482 "ERROR(redistribute_dist_vertices): displacement exceeds INT_MAX");
484 (send_count <= INT_MAX) && (recv_count <= INT_MAX),
485 "ERROR(redistribute_dist_vertices): counts exceeds INT_MAX");
486 comm_buffers.sendcounts[i] = (int)send_count;
487 comm_buffers.recvcounts[i] = (int)recv_count;
488 comm_buffers.sdispls[i] = (int)saccu;
489 comm_buffers.rdispls[i] = (int)raccu;
490 saccu += send_count;
491 raccu += recv_count;
492 }
494 saccu == *num_owners,
495 "ERROR(redistribute_dist_vertices): inconsistent owner count");
496 new_num_owners = raccu;
497 }
498
499 // redistribute owner data
500 struct remote_point_info * new_owners =
501 xmalloc(new_num_owners * sizeof(*new_owners));
505 sizeof(**owners), remote_point_info_dt, group_comm);
506
507 free(*vertices);
508 free(*owners);
509 *vertices = new_vertices;
510 *num_vertices = new_num_vertices;
511 *owners = new_owners;
512 *num_owners = new_num_owners;
513
514 // check for duplicated verties
515 // (identified either by global id (if available) or coordinates)
517 *vertices, num_vertices, *owners, *num_owners,
518 owners_reorder_idx, owners_reorder_idx_array_size);
519
520#ifdef SCOREP_USER_ENABLE
521SCOREP_USER_REGION_END( redist_data_region )
522#endif
523}
524
526 struct dist_vertex ** vertices, size_t * num_vertices,
527 struct remote_point_info ** owners, size_t * num_owners,
528 size_t (*all_bucket_sizes)[2], struct comm_buffers comm_buffers,
529 size_t ** reorder_idx, size_t * reorder_idx_array_size,
530 int ** list_flag_, size_t * list_flag_array_size,
531 MPI_Datatype dist_vertex_dt, MPI_Datatype remote_point_info_dt,
532 struct yac_group_comm group_comm, double prev_gc_norm_vector[3]) {
533
534#ifdef SCOREP_USER_ENABLE
535SCOREP_USER_REGION_DEFINE( local_balance_point_region )
536SCOREP_USER_REGION_DEFINE( global_balance_point_region )
537SCOREP_USER_REGION_DEFINE( splitting_region )
538SCOREP_USER_REGION_DEFINE( comm_split_region )
539#endif
540
541 int group_rank = yac_group_comm_get_rank(group_comm);
542 int group_size = yac_group_comm_get_size(group_comm);
543
544 //--------------------
545 // compute split plane
546 //--------------------
547
548#ifdef SCOREP_USER_ENABLE
549SCOREP_USER_REGION_BEGIN(
550 local_balance_point_region, "local balance point",
551 SCOREP_USER_REGION_TYPE_COMMON )
552#endif
553 // compute local balance point
554 double balance_point[3] = {0.0, 0.0, 0.0};
555 for (size_t i = 0; i < *num_vertices; ++i) {
556 double * vertex_coord = (*vertices)[i].coord;
557 for (int j = 0; j < 3; ++j) balance_point[j] += vertex_coord[j];
558 }
559#ifdef SCOREP_USER_ENABLE
560SCOREP_USER_REGION_END( local_balance_point_region )
561SCOREP_USER_REGION_BEGIN(
562 global_balance_point_region, "global balance point",
563 SCOREP_USER_REGION_TYPE_COMMON )
564#endif
565
566 // compute global balance point (make sure that the allreduce operation
567 // generates bit-identical results on all processes)
568 yac_allreduce_sum_dble(&(balance_point[0]), 3, group_comm);
569
570 // check whether the computed balance_point is unambiguous, otherwise use
571 // a point which is perpendicularly to the previous split plane
572 if ((fabs(balance_point[0]) > 1e-9) ||
573 (fabs(balance_point[1]) > 1e-9) ||
574 (fabs(balance_point[2]) > 1e-9)) {
575 normalise_vector(balance_point);
576 } else {
577 balance_point[0] = prev_gc_norm_vector[2];
578 balance_point[1] = prev_gc_norm_vector[0];
579 balance_point[2] = prev_gc_norm_vector[1];
580 }
581
582 // compute norm vector of new split plane
583 double gc_norm_vector[3];
584 crossproduct_kahan(balance_point, prev_gc_norm_vector, gc_norm_vector);
585
586 // check whether the computed norm vector is unambiguous, otherwise use
587 // a norm vector which is perpendicularly to the previous split plane
588 if ((fabs(gc_norm_vector[0]) > 1e-9) ||
589 (fabs(gc_norm_vector[1]) > 1e-9) ||
590 (fabs(gc_norm_vector[2]) > 1e-9)) {
592 } else {
593 gc_norm_vector[0] = prev_gc_norm_vector[2];
594 gc_norm_vector[1] = prev_gc_norm_vector[0];
595 gc_norm_vector[2] = prev_gc_norm_vector[1];
596 }
597
598#ifdef SCOREP_USER_ENABLE
599SCOREP_USER_REGION_END( global_balance_point_region )
600#endif
601
602 //-----------------
603 // split local data
604 //-----------------
605
606#ifdef SCOREP_USER_ENABLE
607SCOREP_USER_REGION_BEGIN( splitting_region, "splitting data", SCOREP_USER_REGION_TYPE_COMMON )
608#endif
609
610 ENSURE_ARRAY_SIZE(*list_flag_, *list_flag_array_size, *num_vertices);
611 int * list_flag = *list_flag_;
612
613 // angle between a vertex coord and the great circle plane:
614 // acos(dot(gc_norm_vector, vertex_coord)) = angle(gc_norm_vector, vertex_coord)
615 // acos(dot(gc_norm_vector, vertex_coord)) - PI/2 = angle(gc_plane, vertex_coord)
616 // dot <= 0.0 -> U list
617 // dot > 0.0 -> T list
618
619 // compute for each vertex the list it belongs to
620 struct dist_vertex * vertices_ = *vertices;
621 size_t num_vertices_ = *num_vertices;
623 {
625 for (size_t i = 0; i < num_vertices_; ++i) {
626 double * curr_coordinates_xyz = vertices_[i].coord;
627 double dot = curr_coordinates_xyz[0] * gc_norm_vector[0] +
628 curr_coordinates_xyz[1] * gc_norm_vector[1] +
629 curr_coordinates_xyz[2] * gc_norm_vector[2];
630
631 // if (angle >= M_PI_2)
632 list_flag[i] = dot <= 0.0;
633 }
634 }
635 size_t U_size = 0, T_size = 0;
636 for (size_t i = 0; i < num_vertices_; ++i) {
637 if (list_flag[i]) ++U_size;
638 else ++T_size;
639 }
640
641 // initialise owner offset
642 for (size_t i = 0, owner_offset = 0; i < num_vertices_; ++i) {
643 vertices_[i].owner_offset = owner_offset;
644 owner_offset += (size_t)(vertices_[i].num_owners);
645 }
646
647 // The number of T-vertices among the first U-size number of vertices in the
648 // array is equal to the number of U-vertices in the remaining array. These
649 // have to be exchanged in order to get a sorted vertex array
650
651 // search for all T-vertices in the U-part of the array and swap them with a
652 // U-vertex in the T-part
653 for (size_t i = 0, j = U_size; i < U_size; ++i) {
654 // if the current vertex belongs to the T-list
655 if (!list_flag[i]) {
656 // search for a matching U-vertex
657 for (;!list_flag[j];++j);
658 struct dist_vertex temp_vertex = vertices_[i];
659 vertices_[i] = vertices_[j];
660 vertices_[j] = temp_vertex;
661 ++j;
662 }
663 }
664
665 // reorder owners accoring to new vertex order
666 ENSURE_ARRAY_SIZE(*reorder_idx, *reorder_idx_array_size, *num_owners);
667 size_t * reorder_idx_ = *reorder_idx;
668 for (size_t i = 0, k = 0; i < num_vertices_; ++i) {
669 size_t offset = vertices_[i].owner_offset;
670 int num_owners = vertices_[i].num_owners;
671 for (int j = 0; j < num_owners; ++j, ++k, ++offset) {
672 reorder_idx_[offset] = k;
673 }
674 }
675 reorder_data_remote_point_info(reorder_idx_, *num_owners, *owners);
676
677#ifdef SCOREP_USER_ENABLE
678SCOREP_USER_REGION_END( splitting_region )
679#endif
680
681 size_t bucket_sizes[2] = {U_size, T_size};
682
683 // exchange local U/T sizes between all processes
685 &(bucket_sizes[0]), &(all_bucket_sizes[0][0]), 2, group_comm);
686
687 // determine global U/T sizes
688 size_t global_bucket_sizes[2] = {0, 0};
689 for (int i = 0; i < group_size; ++i)
690 for (int j = 0; j < 2; ++j)
691 global_bucket_sizes[j] += all_bucket_sizes[i][j];
692 size_t global_num_vertices =
693 global_bucket_sizes[0] + global_bucket_sizes[1];
694
695 //----------------------
696 // split into two groups
697 //----------------------
698#ifdef SCOREP_USER_ENABLE
699SCOREP_USER_REGION_BEGIN(
700 comm_split_region, "creating splitcomm", SCOREP_USER_REGION_TYPE_COMMON )
701#endif
702 // determine processor groups
703 int split_rank =
704 MIN(
705 (int)MAX(
706 ((global_bucket_sizes[0] * (size_t)group_size +
707 global_num_vertices/2) / global_num_vertices), 1),
708 group_size - 1);
709
710 // generate processor groups
711 struct yac_group_comm local_group_comm, remote_group_comm;
713 group_comm, split_rank, &local_group_comm, &remote_group_comm);
714
715#ifdef SCOREP_USER_ENABLE
716SCOREP_USER_REGION_END( comm_split_region )
717#endif
718
719 //------------------
720 // redistribute data
721 //------------------
722
724 vertices, num_vertices, owners, num_owners,
725 global_bucket_sizes, all_bucket_sizes, split_rank, comm_buffers,
726 reorder_idx, reorder_idx_array_size, dist_vertex_dt, remote_point_info_dt,
727 group_comm);
728
729 //----------
730 // recursion
731 //----------
732
733 // generate proc_sphere_part node for remaining data
734 struct proc_sphere_part_node_data local_data;
735
736 if (yac_group_comm_get_size(local_group_comm) > 1) {
737 local_data.data.node =
739 vertices, num_vertices, owners, num_owners,
740 all_bucket_sizes, comm_buffers, reorder_idx, reorder_idx_array_size,
741 list_flag_, list_flag_array_size, dist_vertex_dt, remote_point_info_dt,
742 local_group_comm, gc_norm_vector);
743 local_data.is_leaf = 0;
744 } else {
745
746 local_data.data.rank = yac_group_comm_get_global_rank(group_comm);
747 local_data.is_leaf = 1;
748 }
749
750 // get proc_sphere_part_node_data from remote group
751 struct proc_sphere_part_node_data remote_data =
752 get_remote_data(local_data, local_group_comm, remote_group_comm);
753
754 // generate node
755 struct proc_sphere_part_node * node = xmalloc(1 * sizeof(*node));
756 if (group_rank < split_rank) {
757 node->U = local_data;
758 node->T = remote_data;
759 } else {
760 node->U = remote_data;
761 node->T = local_data;
762 }
763 memcpy(node->gc_norm_vector, gc_norm_vector, sizeof(gc_norm_vector));
764
765 return node;
766}
767
768static MPI_Datatype yac_get_dist_vertex_mpi_datatype(MPI_Comm comm) {
769
770 struct dist_vertex dummy;
771 MPI_Datatype dist_vertex_dt;
772 int array_of_blocklengths[] = {3, 1, 1, 1, 1};
773 const MPI_Aint array_of_displacements[] =
774 {(MPI_Aint)(intptr_t)(const void *)&(dummy.coord[0]) -
775 (MPI_Aint)(intptr_t)(const void *)&dummy,
776 (MPI_Aint)(intptr_t)(const void *)&(dummy.grid_idx) -
777 (MPI_Aint)(intptr_t)(const void *)&dummy,
778 (MPI_Aint)(intptr_t)(const void *)&(dummy.global_id) -
779 (MPI_Aint)(intptr_t)(const void *)&dummy,
780 (MPI_Aint)(intptr_t)(const void *)&(dummy.num_owners) -
781 (MPI_Aint)(intptr_t)(const void *)&dummy};
782 const MPI_Datatype array_of_types[] =
783 {MPI_DOUBLE, MPI_INT, yac_int_dt, MPI_INT};
785 MPI_Type_create_struct(4, array_of_blocklengths, array_of_displacements,
786 array_of_types, &dist_vertex_dt), comm);
787 return yac_create_resized(dist_vertex_dt, sizeof(dummy), comm);
788}
789
791 struct dist_vertex * dist_vertices, size_t num_dist_vertices,
792 struct remote_point_info * dist_owners,
793 size_t ** reorder_idx, size_t * reorder_idx_array_size,
794 yac_int **global_vertex_ids[2], int * global_ids_missing,
795 int **vertex_ranks[2], size_t * num_vertices, MPI_Comm comm) {
796
797 // Here we assume that the vertices are sorted first by their grid index and
798 // second by global id/coordinate (depending on availablity).
799 // Additionally, the vertices should contain no duplications.
800
801 int comm_rank, comm_size;
802 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
803 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
804
805 // count the number of vertices per grid
806 size_t num_vertices_per_grid[2] = {0, 0};
807 size_t num_owners_per_grid[2] = {0, 0};
808 for (size_t i = 0; i < num_dist_vertices; ++i) {
809 num_vertices_per_grid[dist_vertices[i].grid_idx]++;
810 num_owners_per_grid[dist_vertices[i].grid_idx] +=
811 (size_t)(dist_vertices[i].num_owners);
812 }
813
814 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
816 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
817
819 *reorder_idx, *reorder_idx_array_size,
820 MAX((num_owners_per_grid[0] + num_vertices[0]),
821 (num_owners_per_grid[1] + num_vertices[1])));
822 yac_int * global_vertex_ids_buffer =
823 xmalloc(
824 MAX(
825 (global_ids_missing[0]?(num_owners_per_grid[0] + num_vertices[0]):0),
826 (global_ids_missing[1]?(num_owners_per_grid[1] + num_vertices[1]):0)) *
827 sizeof(*global_vertex_ids_buffer));
828
829 // for both grids
830 for (int grid_idx = 0; grid_idx < 2; ++grid_idx) {
831
832 struct dist_vertex * dist_grid_vertices =
833 dist_vertices + ((grid_idx == 0)?0:num_vertices_per_grid[0]);
834 struct remote_point_info * dist_grid_owners =
835 dist_owners + ((grid_idx == 0)?0:num_owners_per_grid[0]);
836
837 // if the user did not provide global vertex ids for the current grid
838 yac_int id_offset = 0;
839 if (global_ids_missing[grid_idx]) {
840
842 num_vertices_per_grid[grid_idx] <= (size_t)XT_INT_MAX,
843 "ERROR(inform_dist_vertex_owners): global_id out of bounds");
844
845 // determine exclusive scan of sum of numbers of unique
846 // coordinates on all ranks
847 yac_int yac_int_num_vertices = (yac_int)num_vertices_per_grid[grid_idx];
848 yac_mpi_call(MPI_Exscan(&yac_int_num_vertices, &id_offset, 1, yac_int_dt,
849 MPI_SUM, comm), comm);
850 if (comm_rank == 0) id_offset = 0;
851
853 ((size_t)id_offset + num_vertices_per_grid[grid_idx]) <=
854 (size_t)XT_INT_MAX,
855 "ERROR(inform_dist_vertex_owners): global_id out of bounds")
856 }
857
858 memset(sendcounts, 0, (size_t)(comm_size + 1) * sizeof(*sendcounts));
859
860 // determine send counts for vertex information
861 for (size_t i = 0; i < num_owners_per_grid[grid_idx]; ++i)
862 sendcounts[dist_grid_owners[i].rank]++;
864 1, sendcounts, recvcounts, sdispls, rdispls, comm);
865
866 size_t * send_reorder_idx = *reorder_idx;
867 size_t * recv_reorder_idx = *reorder_idx + num_owners_per_grid[grid_idx];
868 yac_int * send_global_vertex_ids = global_vertex_ids_buffer;
869 yac_int * recv_global_vertex_ids = global_vertex_ids_buffer +
870 num_owners_per_grid[grid_idx];
871
872 for (size_t i = 0, k = 0; i < num_vertices_per_grid[grid_idx]; ++i) {
873
874 int num_vertex_owners = dist_grid_vertices[i].num_owners;
875
876 for (int j = 0; j < num_vertex_owners; ++j, ++k) {
877
878 size_t pos = sdispls[dist_grid_owners[k].rank + 1]++;
879 send_reorder_idx[pos] = dist_grid_owners[k].orig_pos;
880 if (global_ids_missing[grid_idx])
881 send_global_vertex_ids[pos] = id_offset + (yac_int)i;
882 }
883 }
884
885 // exchange reorder idx
887 send_reorder_idx, sendcounts, sdispls,
888 recv_reorder_idx, recvcounts, rdispls,
889 sizeof(*send_reorder_idx), YAC_MPI_SIZE_T, comm,
890 "inform_dist_vertex_owners", __LINE__);
891
892 // generate vertex ranks
893 {
894 int * curr_vertex_ranks =
895 xmalloc(num_vertices[grid_idx] * sizeof(*curr_vertex_ranks));
896 size_t j = 0;
897 for (int rank = 0; rank < comm_size; ++rank)
898 for (size_t i = 0; i < recvcounts[rank]; ++i, ++j)
899 curr_vertex_ranks[recv_reorder_idx[j]] = rank;
900 *(vertex_ranks[grid_idx]) = curr_vertex_ranks;
901 }
902
903 // exchange and set global ids (if not provided by the user)
904 if (global_ids_missing[grid_idx]) {
905
907 send_global_vertex_ids, sendcounts, sdispls,
908 recv_global_vertex_ids, recvcounts, rdispls,
909 sizeof(*send_global_vertex_ids), yac_int_dt, comm,
910 "inform_dist_vertex_owners", __LINE__);
911
912 yac_int * curr_global_vertex_ids =
913 xmalloc(num_vertices[grid_idx] * sizeof(*curr_global_vertex_ids));
914
915 for (size_t i = 0; i < num_vertices[grid_idx]; ++i)
916 curr_global_vertex_ids[recv_reorder_idx[i]] = recv_global_vertex_ids[i];
917
918 *(global_vertex_ids[grid_idx]) = curr_global_vertex_ids;
919 }
920 }
921
922 free(global_vertex_ids_buffer);
923
924 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
925}
926
928 yac_coordinate_pointer vertex_coordinates[2], size_t * num_vertices,
929 struct proc_sphere_part_node ** proc_sphere_part,
930 yac_int **global_vertex_ids_[2], int **vertex_ranks[2], MPI_Comm comm) {
931
932 // check whether there are user-provided global vertex ids
933 yac_int *global_vertex_ids[2] =
934 {*(global_vertex_ids_[0]), *(global_vertex_ids_[1])};
935 int global_ids_missing[2] =
936 {(num_vertices[0] > 0) && (global_vertex_ids[0] == NULL),
937 (num_vertices[1] > 0) && (global_vertex_ids[1] == NULL)};
939 MPI_Allreduce(MPI_IN_PLACE, global_ids_missing, 2, MPI_INT, MPI_MAX, comm),
940 comm);
941
942 size_t total_num_vertices = num_vertices[0] + num_vertices[1];
943
944 int comm_rank, comm_size;
945 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
946 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
947
948 double base_gc_norm_vector[3] = {0.0,0.0,1.0};
949
950 int vertices_available = total_num_vertices > 0;
952 MPI_Allreduce(
953 MPI_IN_PLACE, &vertices_available, 1, MPI_INT, MPI_MAX, comm), comm);
954
955 if ((comm_size > 1) && vertices_available) {
956
957 // generate basic owner information for all vertices
958 struct remote_point_info * dist_owners =
959 xmalloc(total_num_vertices * sizeof(*dist_owners));
960 for (size_t i = 0; i < num_vertices[0]; ++i)
961 dist_owners[i] =
962 (struct remote_point_info){.rank = comm_rank, .orig_pos = i};
963 for (size_t i = 0, j = num_vertices[0]; i < num_vertices[1]; ++i, ++j)
964 dist_owners[j] =
965 (struct remote_point_info){.rank = comm_rank, .orig_pos = i};
966
967 // generate vertex information for all vertices
968 struct dist_vertex * dist_vertices =
969 xmalloc(total_num_vertices * sizeof(*dist_vertices));
970 {
971 size_t k = 0;
972 // for both grids
973 for (int i = 0; i < 2; ++i) {
974 // for all vertices of the grid
975 for (size_t j = 0; j < num_vertices[i]; ++j, ++k) {
976 memcpy(
977 dist_vertices[k].coord, vertex_coordinates[i][j],
978 3 * sizeof(vertex_coordinates[i][j][0]));
979 dist_vertices[k].grid_idx = i;
980 dist_vertices[k].global_id =
981 (global_ids_missing[i])?XT_INT_MAX:global_vertex_ids[i][j];
982 dist_vertices[k].num_owners = 1;
983 }
984 }
985 }
986
987 // set up buffers for generation of proc_sphere_part
988 size_t num_dist_vertices = total_num_vertices;
989 size_t num_dist_owners = total_num_vertices;
990 size_t (*all_bucket_sizes)[2] =
991 xmalloc((size_t)comm_size * sizeof(*(all_bucket_sizes)));
994 xmalloc(4 * (size_t)comm_size * sizeof(*(comm_buffers.sendcounts)));
998 size_t * reorder_idx = NULL, reorder_idx_array_size = 0;
999 int * list_flag = NULL;
1000 size_t list_flag_array_size = 0;
1001 MPI_Datatype dist_vertex_dt = yac_get_dist_vertex_mpi_datatype(comm);
1002 MPI_Datatype remote_point_info_dt =
1004 struct yac_group_comm group_comm = yac_group_comm_new(comm);
1005
1006 // initial redistribute of all vertices
1007 // (in case one of the two grids has significantly more vertices per process
1008 // than the other, this improves the load balance in the initial step)
1010 (size_t[2]){total_num_vertices,0}, &(all_bucket_sizes[0][0]), 2,
1011 group_comm);
1012 size_t global_num_vertices = 0;
1013 for (int i = 0; i < comm_size; ++i)
1014 global_num_vertices += all_bucket_sizes[i][0];
1016 &dist_vertices, &num_dist_vertices, &dist_owners, &num_dist_owners,
1017 (size_t[2]){global_num_vertices, 0}, all_bucket_sizes, comm_size,
1018 comm_buffers, &reorder_idx, &reorder_idx_array_size,
1019 dist_vertex_dt, remote_point_info_dt, group_comm);
1020
1021 // generate proc_sphere_part
1022 *proc_sphere_part =
1024 &dist_vertices, &num_dist_vertices, &dist_owners, &num_dist_owners,
1025 all_bucket_sizes, comm_buffers, &reorder_idx, &reorder_idx_array_size,
1026 &list_flag, &list_flag_array_size, dist_vertex_dt, remote_point_info_dt,
1027 group_comm, base_gc_norm_vector);
1028
1029 // cleanup
1030 yac_group_comm_delete(group_comm);
1031 yac_mpi_call(MPI_Type_free(&remote_point_info_dt), comm);
1032 yac_mpi_call(MPI_Type_free(&dist_vertex_dt), comm);
1033 free(list_flag);
1035 free(all_bucket_sizes);
1036
1037 // return information about distributed vertices to original owners
1039 dist_vertices, num_dist_vertices, dist_owners,
1040 &reorder_idx, &reorder_idx_array_size, global_vertex_ids_,
1041 global_ids_missing, vertex_ranks, num_vertices, comm);
1042
1043 // cleanup
1044 free(reorder_idx);
1045 free(dist_owners);
1046 free(dist_vertices);
1047
1048 } else {
1049 *proc_sphere_part = xmalloc(1 * sizeof(**proc_sphere_part));
1050 (*proc_sphere_part)->U.data.rank = 0;
1051 (*proc_sphere_part)->U.is_leaf = 1;
1052 (*proc_sphere_part)->T.data.rank = 0;
1053 (*proc_sphere_part)->T.is_leaf = 1;
1054 (*proc_sphere_part)->gc_norm_vector[0] = base_gc_norm_vector[0];
1055 (*proc_sphere_part)->gc_norm_vector[1] = base_gc_norm_vector[1];
1056 (*proc_sphere_part)->gc_norm_vector[2] = base_gc_norm_vector[2];
1057
1058 // generate global ids and vertex ranks
1059 for (int grid_idx = 0; grid_idx < 2; ++grid_idx) {
1060 *(vertex_ranks[grid_idx]) =
1061 xmalloc(num_vertices[grid_idx] * sizeof(**(vertex_ranks[grid_idx])));
1062 for (size_t i = 0; i < num_vertices[grid_idx]; ++i)
1063 (*(vertex_ranks[grid_idx]))[i] = 0;
1064 if (global_ids_missing[grid_idx]) {
1065 *(global_vertex_ids_[grid_idx]) =
1066 xmalloc(
1067 num_vertices[grid_idx] * sizeof(**(global_vertex_ids_[grid_idx])));
1068 for (size_t i = 0; i < num_vertices[grid_idx]; ++i)
1069 (*(global_vertex_ids_[grid_idx]))[i] = (yac_int)i;
1070 }
1071 }
1072 }
1073}
1074
1075static int is_serial_node(struct proc_sphere_part_node * node) {
1076 return (node->U.is_leaf) && (node->T.is_leaf) &&
1077 (node->U.data.rank == 0) && (node->T.data.rank == 0);
1078}
1079
1080// #define YAC_NEC_EXPERIMENTAL
1081#ifdef YAC_NEC_EXPERIMENTAL
1082// the following code may be better for a vector machine
1083static void yac_proc_sphere_part_do_point_search_recursive(
1084 struct proc_sphere_part_node * node, yac_coordinate_pointer search_coords,
1085 size_t * search_idx, size_t * temp_search_idx, int * flag,
1086 size_t count, int * ranks) {
1087
1088 for (size_t i = 0; i < count; ++i) {
1089
1090 // compute cos angle between current search point and norm vector
1091 double dot = search_coords[search_idx[i]][0] * node->gc_norm_vector[0] +
1092 search_coords[search_idx[i]][1] * node->gc_norm_vector[1] +
1093 search_coords[search_idx[i]][2] * node->gc_norm_vector[2];
1094
1095 // if (angle >= M_PI_2)
1096 flag[i] = (dot <= 0.0);
1097 }
1098
1099 size_t u_size = 0, t_size = 0;
1100 for (size_t i = 0; i < count; ++i) {
1101 if (flag[i]) search_idx[u_size++] = search_idx[i];
1102 else temp_search_idx[t_size++] = search_idx[i];
1103 }
1104
1105 if (node->U.is_leaf) {
1106 size_t rank = node->U.data.rank;;
1107 for (size_t i = 0; i < u_size; ++i) ranks[search_idx[i]] = rank;
1108 } else {
1109 yac_proc_sphere_part_do_point_search_recursive(
1110 node->U.data.node, search_coords, search_idx, temp_search_idx + t_size,
1111 flag, u_size, ranks);
1112 }
1113
1114 if (node->T.is_leaf) {
1115 size_t rank = node->T.data.rank;
1116 for (size_t i = 0; i < t_size; ++i) ranks[temp_search_idx[i]] = rank;
1117 } else {
1118 yac_proc_sphere_part_do_point_search_recursive(
1119 node->T.data.node, search_coords, temp_search_idx, search_idx + u_size,
1120 flag, t_size, ranks);
1121 }
1122}
1123#endif // YAC_NEC_EXPERIMENTAL
1124
1126 struct proc_sphere_part_node * node, yac_coordinate_pointer search_coords,
1127 size_t count, int * ranks) {
1128
1129 if (is_serial_node(node)) {
1130 for (size_t i = 0; i < count; ++i) ranks[i] = 0;
1131 return;
1132 }
1133
1134#ifdef YAC_NEC_EXPERIMENTAL
1135
1136 size_t * search_idx = xmalloc(2 * count * sizeof(*search_idx));
1137 for (size_t i = 0; i < count; ++i) search_idx[i] = i;
1138 int * flag = xmalloc(count * sizeof(*flag));
1139 yac_proc_sphere_part_do_point_search_recursive(
1140 node, search_coords, search_idx, search_idx + count, flag, count, ranks);
1141 free(flag);
1142 free(search_idx);
1143
1144#else
1145
1147 {
1149 for (size_t i = 0; i < count; ++i) {
1150
1151 double * curr_coord = search_coords[i];
1152
1153 struct proc_sphere_part_node * curr_node = node;
1154
1155 while (1) {
1156
1157 // compute cos angle between current search point and norm vector
1158 double dot = curr_coord[0] * curr_node->gc_norm_vector[0] +
1159 curr_coord[1] * curr_node->gc_norm_vector[1] +
1160 curr_coord[2] * curr_node->gc_norm_vector[2];
1161
1162 // if (angle >= M_PI_2)
1163 if (dot <= 0.0) {
1164 if (curr_node->U.is_leaf) {
1165 ranks[i] = curr_node->U.data.rank;
1166 break;
1167 } else {
1168 curr_node = curr_node->U.data.node;
1169 continue;
1170 }
1171 // if (angle < M_PI_2)
1172 } else {
1173 if (curr_node->T.is_leaf) {
1174 ranks[i] = curr_node->T.data.rank;
1175 break;
1176 } else {
1177 curr_node = curr_node->T.data.node;
1178 continue;
1179 }
1180 }
1181 }
1182 }
1183 }
1184#endif // YAC_NEC_EXPERIMENTAL
1185}
1186
1188 struct proc_sphere_part_node * node, struct bounding_circle bnd_circle,
1189 int * ranks, int * rank_count) {
1190
1191 double dot = bnd_circle.base_vector[0] * node->gc_norm_vector[0] +
1192 bnd_circle.base_vector[1] * node->gc_norm_vector[1] +
1193 bnd_circle.base_vector[2] * node->gc_norm_vector[2];
1194
1195 // angle < M_PI_2 + bnd_circle.inc_angle
1196 if (dot > - bnd_circle.inc_angle.sin) {
1197
1198 if (node->T.is_leaf) {
1199
1200 ranks[*rank_count] = node->T.data.rank;
1201 ++*rank_count;
1202
1203 } else {
1204 bnd_circle_search(node->T.data.node, bnd_circle, ranks, rank_count);
1205 }
1206 }
1207
1208 // angle > M_PI_2 - bnd_circle.inc_angle
1209 if (dot < bnd_circle.inc_angle.sin) {
1210
1211 if (node->U.is_leaf) {
1212
1213 ranks[*rank_count] = node->U.data.rank;
1214 ++*rank_count;
1215
1216 } else {
1217 bnd_circle_search(node->U.data.node, bnd_circle, ranks, rank_count);
1218 }
1219 }
1220}
1221
1223 struct proc_sphere_part_node * node, struct bounding_circle bnd_circle,
1224 int * ranks, int * rank_count) {
1225
1226 if (node->T.is_leaf) {
1227
1228 ranks[*rank_count] = node->T.data.rank;
1229 ++*rank_count;
1230
1231 } else {
1233 node->T.data.node, bnd_circle, ranks, rank_count);
1234 }
1235
1236 if (node->U.is_leaf) {
1237
1238 ranks[*rank_count] = node->U.data.rank;
1239 ++*rank_count;
1240
1241 } else {
1243 node->U.data.node, bnd_circle, ranks, rank_count);
1244 }
1245}
1246
1248 struct proc_sphere_part_node * node, struct bounding_circle bnd_circle,
1249 int * ranks, int * rank_count) {
1250
1251 YAC_ASSERT(
1252 compare_angles(bnd_circle.inc_angle, SIN_COS_M_PI) == -1,
1253 "ERROR(yac_proc_sphere_part_do_bnd_circle_search): angle is >= PI")
1254
1255 // special case in which the proc_sphere_part_node only contains a single
1256 // rank
1257 if (is_serial_node(node)) {
1258 ranks[0] = 0;
1259 *rank_count = 1;
1260 } else if (bnd_circle.inc_angle.cos <= 0.0) {
1261 *rank_count = 0;
1262 bnd_circle_search_big_angle(node, bnd_circle, ranks, rank_count);
1263 } else {
1264 *rank_count = 0;
1265 bnd_circle_search(node, bnd_circle, ranks, rank_count);
1266 }
1267}
1268
1269static int get_leaf_ranks(struct proc_sphere_part_node * node, int * ranks) {
1270
1271 int curr_size;
1272
1273 if (node->U.is_leaf) {
1274 *ranks = node->U.data.rank;
1275 curr_size = 1;
1276 } else {
1277 curr_size = get_leaf_ranks(node->U.data.node, ranks);
1278 }
1279 if (node->T.is_leaf) {
1280 ranks[curr_size++] = node->T.data.rank;
1281 } else {
1282 curr_size += get_leaf_ranks(node->T.data.node, ranks + curr_size);
1283 }
1284
1285 return curr_size;
1286}
1287
1289 struct proc_sphere_part_node * node, uint64_t * leaf_sizes, uint64_t min_size,
1290 uint64_t ** inner_node_sizes, int * send_flags, int * recv_flags,
1291 int comm_rank, struct neigh_search_data * last_valid_node) {
1292
1293 int curr_node_is_valid = (**inner_node_sizes >= min_size);
1294
1295 struct neigh_search_data * curr_valid_node;
1296 struct neigh_search_data temp_valid_node;
1297
1298 int * ranks = last_valid_node->ranks;
1299
1300 if (curr_node_is_valid) {
1301 temp_valid_node.ranks = ranks;
1302 temp_valid_node.num_ranks = 0;
1303 temp_valid_node.node = node;
1304 curr_valid_node = &temp_valid_node;
1305 last_valid_node->num_ranks = 0;
1306 } else {
1307 curr_valid_node = last_valid_node;
1308 }
1309
1310 for (int j = 0; j < 2; ++j) {
1311
1312 struct proc_sphere_part_node_data node_data = (j == 0)?(node->U):(node->T);
1313
1314 if (node_data.is_leaf) {
1315
1316 int rank = node_data.data.rank;
1317
1318 if (leaf_sizes[rank] < min_size) {
1319
1320 if (curr_valid_node->num_ranks == 0)
1321 curr_valid_node->num_ranks =
1322 get_leaf_ranks(curr_valid_node->node, ranks);
1323
1324 // if the current leaf is the local process
1325 if (rank == comm_rank)
1326 for (int i = 0; i < curr_valid_node->num_ranks; ++i)
1327 recv_flags[ranks[i]] = 1;
1328
1329 // if the process of the current leaf required data from the
1330 // local process
1331 for (int i = 0; i < curr_valid_node->num_ranks; ++i) {
1332 if (ranks[i] == comm_rank) {
1333 send_flags[rank] = 1;
1334 break;
1335 }
1336 }
1337 }
1338
1339 } else {
1340 ++*inner_node_sizes;
1342 node_data.data.node, leaf_sizes, min_size, inner_node_sizes,
1343 send_flags, recv_flags, comm_rank, curr_valid_node);
1344 }
1345 }
1346}
1347
1348static uint64_t determine_node_sizes(
1349 struct proc_sphere_part_node * node, uint64_t * leaf_sizes,
1350 uint64_t ** inner_node_sizes) {
1351
1352 uint64_t * curr_inner_node_size = *inner_node_sizes;
1353 uint64_t node_size;
1354 if (node->U.is_leaf) {
1355 node_size = leaf_sizes[node->U.data.rank];
1356 } else {
1357 ++*inner_node_sizes;
1358 node_size =
1359 determine_node_sizes(node->U.data.node, leaf_sizes, inner_node_sizes);
1360 }
1361 if (node->T.is_leaf) {
1362 node_size += leaf_sizes[node->T.data.rank];
1363 } else {
1364 ++*inner_node_sizes;
1365 node_size +=
1366 determine_node_sizes(node->T.data.node, leaf_sizes, inner_node_sizes);
1367 }
1368 return (*curr_inner_node_size = node_size);
1369}
1370
1372 struct proc_sphere_part_node * node, uint64_t * leaf_sizes,
1373 uint64_t min_size, int * send_flags, int * recv_flags,
1374 int comm_rank, int comm_size) {
1375
1376 uint64_t * inner_node_sizes =
1377 xcalloc((size_t)comm_size, sizeof(*inner_node_sizes));
1378
1379 uint64_t * temp_inner_node_sizes = inner_node_sizes;
1380 determine_node_sizes(node, leaf_sizes, &temp_inner_node_sizes);
1381
1382 YAC_ASSERT(
1383 *inner_node_sizes >= min_size,
1384 "ERROR(yac_proc_sphere_part_get_neigh_ranks): sum of global leaf sizes "
1385 "is < min_size")
1386
1387 struct neigh_search_data search_data = {
1388 .ranks = xmalloc((size_t)comm_size * sizeof(int)),
1389 .num_ranks = 0,
1390 .node = node
1391 };
1392
1393 temp_inner_node_sizes = inner_node_sizes;
1394 get_neigh_ranks(node, leaf_sizes, min_size, &temp_inner_node_sizes,
1395 send_flags, recv_flags, comm_rank, &search_data);
1396
1397 send_flags[comm_rank] = 0;
1398 recv_flags[comm_rank] = 0;
1399
1400 free(search_data.ranks);
1401 free(inner_node_sizes);
1402}
1403
1405
1406 if (!(node->U.is_leaf)) yac_proc_sphere_part_node_delete(node->U.data.node);
1408 free(node);
1409}
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
static int compare_coords(double const *a, double const *b)
Definition geometry.h:644
static const struct sin_cos_angle SIN_COS_M_PI
Definition geometry.h:49
static void crossproduct_kahan(double const a[], double const b[], double cross[])
Definition geometry.h:318
static int compare_angles(struct sin_cos_angle a, struct sin_cos_angle b)
Definition geometry.h:410
static void normalise_vector(double v[])
Definition geometry.h:671
#define xcalloc(nmemb, size)
Definition ppm_xfuncs.h:64
#define xmalloc(size)
Definition ppm_xfuncs.h:66
static int get_leaf_ranks(struct proc_sphere_part_node *node, int *ranks)
void yac_proc_sphere_part_get_neigh_ranks(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t min_size, int *send_flags, int *recv_flags, int comm_rank, int comm_size)
static struct proc_sphere_part_node * generate_proc_sphere_part_node_recursive(struct dist_vertex **vertices, size_t *num_vertices, struct remote_point_info **owners, size_t *num_owners, size_t(*all_bucket_sizes)[2], struct comm_buffers comm_buffers, size_t **reorder_idx, size_t *reorder_idx_array_size, int **list_flag_, size_t *list_flag_array_size, MPI_Datatype dist_vertex_dt, MPI_Datatype remote_point_info_dt, struct yac_group_comm group_comm, double prev_gc_norm_vector[3])
void yac_proc_sphere_part_do_bnd_circle_search(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
static uint64_t determine_node_sizes(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t **inner_node_sizes)
void yac_proc_sphere_part_do_point_search(struct proc_sphere_part_node *node, yac_coordinate_pointer search_coords, size_t count, int *ranks)
static void bnd_circle_search(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
static void reorder_data_remote_point_info(size_t *reorder_idx, size_t count, struct remote_point_info *data)
static void compute_redist_recvcounts_rdispls(int comm_rank, int split_rank, int comm_size, size_t global_sizes[2], size_t(*all_bucket_sizes)[2], int *counts, int *displs, size_t *recv_count)
static int is_serial_node(struct proc_sphere_part_node *node)
splicomm_tags
@ DATA_SIZE_TAG
@ DATA_TAG
static struct proc_sphere_part_node_data get_remote_data(struct proc_sphere_part_node_data local_data, struct yac_group_comm local_group_comm, struct yac_group_comm remote_group_comm)
void yac_proc_sphere_part_node_delete(struct proc_sphere_part_node *node)
static void get_neigh_ranks(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t min_size, uint64_t **inner_node_sizes, int *send_flags, int *recv_flags, int comm_rank, struct neigh_search_data *last_valid_node)
static MPI_Datatype yac_get_dist_vertex_mpi_datatype(MPI_Comm comm)
void yac_proc_sphere_part_new(yac_coordinate_pointer vertex_coordinates[2], size_t *num_vertices, struct proc_sphere_part_node **proc_sphere_part, yac_int **global_vertex_ids_[2], int **vertex_ranks[2], MPI_Comm comm)
static void pack_proc_sphere_part_node(struct proc_sphere_part_node *node, void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static int get_proc_sphere_part_node_pack_size(struct proc_sphere_part_node node, MPI_Comm comm)
static void pack_proc_sphere_part_node_data(struct proc_sphere_part_node_data node_data, void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static void remove_duplicated_vertices(struct dist_vertex *vertices, size_t *num_vertices, struct remote_point_info *owners, size_t num_owners, size_t **owners_reorder_idx, size_t *owners_reorder_idx_array_size)
static void compute_redist_sendcounts_sdispls(int comm_rank, int split_rank, int comm_size, size_t global_sizes[2], size_t(*all_bucket_sizes)[2], int *counts, int *displs)
static void inform_dist_vertex_owners(struct dist_vertex *dist_vertices, size_t num_dist_vertices, struct remote_point_info *dist_owners, size_t **reorder_idx, size_t *reorder_idx_array_size, yac_int **global_vertex_ids[2], int *global_ids_missing, int **vertex_ranks[2], size_t *num_vertices, MPI_Comm comm)
static int get_proc_sphere_part_node_data_pack_size(struct proc_sphere_part_node_data node_data, MPI_Comm comm)
static int compare_dist_vertices(const void *a, const void *b)
static struct proc_sphere_part_node * unpack_proc_sphere_part_node(void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static struct proc_sphere_part_node_data unpack_proc_sphere_part_node_data(void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static void redistribute_dist_vertices(struct dist_vertex **vertices, size_t *num_vertices, struct remote_point_info **owners, size_t *num_owners, size_t global_bucket_sizes[2], size_t(*all_bucket_sizes)[2], int split_rank, struct comm_buffers comm_buffers, size_t **owners_reorder_idx, size_t *owners_reorder_idx_array_size, MPI_Datatype dist_vertex_dt, MPI_Datatype remote_point_info_dt, struct yac_group_comm group_comm)
static void bnd_circle_search_big_angle(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
MPI_Datatype yac_get_remote_point_info_mpi_datatype(MPI_Comm comm)
struct sin_cos_angle inc_angle
angle between the middle point and the boundary of the spherical cap
Definition geometry.h:61
double base_vector[3]
Definition geometry.h:59
double coord[3]
struct proc_sphere_part_node * node
struct proc_sphere_part_node * node
union proc_sphere_part_node_data::@45 data
struct proc_sphere_part_node_data U T
single location information of a point
double sin
Definition geometry.h:41
double cos
Definition geometry.h:41
#define YAC_OMP_PARALLEL
#define MIN(a, b)
#define YAC_OMP_FOR
#define MAX(a, b)
#define YAC_ASSERT(exp, msg)
Definition yac_assert.h:16
void yac_alltoallv_p2p_group(void const *send_buffer, int const *sendcounts, int const *sdispls, void *recv_buffer, int const *recvcounts, int const *rdispls, size_t dt_size, MPI_Datatype dt, struct yac_group_comm group_comm)
Definition yac_mpi.c:242
int yac_group_comm_get_global_rank(struct yac_group_comm group_comm)
Definition yac_mpi.c:499
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
Definition yac_mpi.c:577
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
Definition yac_mpi.c:633
int yac_group_comm_get_rank(struct yac_group_comm group_comm)
Definition yac_mpi.c:491
void yac_group_comm_split(struct yac_group_comm group_comm, int split_rank, struct yac_group_comm *local_group_comm, struct yac_group_comm *remote_group_comm)
Definition yac_mpi.c:511
struct yac_group_comm yac_group_comm_new(MPI_Comm comm)
Definition yac_mpi.c:476
void yac_allreduce_sum_dble(double *buffer, int count, struct yac_group_comm group_comm)
Definition yac_mpi.c:295
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
Definition yac_mpi.c:602
int yac_group_comm_get_size(struct yac_group_comm group_comm)
Definition yac_mpi.c:495
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
Definition yac_mpi.c:556
void yac_bcast_group(void *buffer, int count, MPI_Datatype datatype, int root, struct yac_group_comm group_comm)
Definition yac_mpi.c:411
void yac_allgather_size_t(const size_t *sendbuf, size_t *recvbuf, int count, struct yac_group_comm group_comm)
Definition yac_mpi.c:367
void yac_group_comm_delete(struct yac_group_comm group_comm)
Definition yac_mpi.c:486
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm, char const *caller, int line)
Definition yac_mpi.c:131
#define yac_mpi_call(call, comm)
#define YAC_MPI_SIZE_T
Xt_int yac_int
Definition yac_types.h:15
#define yac_int_dt
Definition yac_types.h:16
double(* yac_coordinate_pointer)[3]
Definition yac_types.h:19