10#ifdef SCOREP_USER_ENABLE
11#include "scorep/SCOREP_User.h"
64 MPI_Pack_size(3, MPI_DOUBLE, comm, &vec_pack_size), comm);
66 return vec_pack_size +
75 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &int_pack_size), comm);
77 int data_size = int_pack_size;
79 data_size += int_pack_size;
89 int pack_buffer_size,
int * position, MPI_Comm comm);
93 int pack_buffer_size,
int * position, MPI_Comm comm) {
96 pack_buffer_size, position, comm), comm);
98 node->U, pack_buffer, pack_buffer_size, position, comm);
100 node->
T, pack_buffer, pack_buffer_size, position, comm);
105 int pack_buffer_size,
int * position, MPI_Comm comm) {
108 pack_buffer_size, position, comm), comm);
112 pack_buffer_size, position, comm), comm);
115 pack_buffer_size, position, comm);
119 void * pack_buffer,
int pack_buffer_size,
int * position,
123 void * pack_buffer,
int pack_buffer_size,
int * position,
129 MPI_Unpack(pack_buffer, pack_buffer_size, position,
141 void * pack_buffer,
int pack_buffer_size,
int * position,
147 MPI_Unpack(pack_buffer, pack_buffer_size, position,
148 &(node_data.
is_leaf), 1, MPI_INT, comm), comm);
152 MPI_Unpack(pack_buffer, pack_buffer_size, position,
153 &(node_data.
data.
rank), 1, MPI_INT, comm), comm);
157 pack_buffer, pack_buffer_size, position, comm);
168 MPI_Comm comm = local_group_comm.comm;
172 void * recv_buffer = NULL;
174 int order = local_group_comm.start < remote_group_comm.start;
176 for (
int i = 0; i < 2; ++i) {
181 &data_size, 1, MPI_INT, remote_group_comm.start, local_group_comm);
182 recv_buffer =
xmalloc((
size_t)data_size);
184 remote_group_comm.start, local_group_comm);
187 if (comm_rank == local_group_comm.start) {
190 int pack_buffer_size =
192 void * pack_buffer =
xmalloc((
size_t)pack_buffer_size);
195 local_data, pack_buffer, pack_buffer_size, &position, comm);
199 &position, 1, MPI_INT, local_group_comm.start, remote_group_comm);
203 local_group_comm.start, remote_group_comm);
213 recv_buffer, data_size, &position, comm);
220 int comm_rank,
int split_rank,
int comm_size,
221 size_t global_sizes[2],
size_t (*all_bucket_sizes)[2],
222 int * counts,
int * displs,
size_t * recv_count) {
224 int color = comm_rank >= split_rank;
226 int split_comm_size = split_rank;
227 int split_comm_rank = comm_rank;
229 split_comm_rank -= split_comm_size;
230 split_comm_size = comm_size - split_comm_size;
233 size_t global_size = global_sizes[color];
234 size_t local_interval_start =
236 (((
long long)global_size * (
long long)split_comm_rank +
237 (
long long)(split_comm_size - 1)) /
238 (
long long)split_comm_size);
239 size_t local_interval_end =
241 (((
long long)global_size * (
long long)(split_comm_rank+1) +
242 (
long long)(split_comm_size - 1)) /
243 (
long long)split_comm_size);
245 *recv_count = (size_t)(local_interval_end - local_interval_start);
247 size_t start_idx = 0;
248 for (
int i = 0; i < comm_size; ++i) {
250 size_t next_start_idx = start_idx + all_bucket_sizes[i][color];
251 size_t interval_start =
MAX(start_idx, local_interval_start);
252 size_t interval_end =
MIN(next_start_idx, local_interval_end);
254 if (interval_start < interval_end) {
256 size_t count = interval_end - interval_start;
257 size_t disp = interval_start - local_interval_start;
260 (count <= INT_MAX) && (disp <= INT_MAX),
261 "ERROR(compute_redist_recvcounts_rdispls): invalid interval")
263 counts[i] = (int)count;
264 displs[i] = (int)disp;
270 start_idx = next_start_idx;
275 int comm_rank,
int split_rank,
int comm_size,
276 size_t global_sizes[2],
size_t (*all_bucket_sizes)[2],
277 int * counts,
int * displs) {
279 size_t U_size = all_bucket_sizes[comm_rank][0];
281 size_t local_interval_start[2] = {0, 0};
282 size_t local_interval_end[2];
283 for (
int i = 0; i < comm_rank; ++i)
284 for (
int j = 0; j < 2; ++j)
285 local_interval_start[j] += all_bucket_sizes[i][j];
286 for (
int j = 0; j < 2; ++j)
287 local_interval_end[j] =
288 local_interval_start[j] + all_bucket_sizes[comm_rank][j];
290 int comm_sizes[2] = {split_rank, comm_size - split_rank};
292 size_t start_idx[2] = {0,0};
293 for (
int i = 0; i < comm_size; ++i) {
295 int color = i >= split_rank;
296 size_t global_size = global_sizes[color];
297 int split_comm_rank = i - (color?(split_rank):0);
298 int split_comm_size = comm_sizes[color];
299 size_t next_start_idx =
301 ((
long long)global_size * (
long long)(split_comm_rank + 1) +
302 (
long long)(split_comm_size - 1)) /
303 (
long long)split_comm_size);
304 size_t interval_start =
MAX(start_idx[color], local_interval_start[color]);
305 size_t interval_end =
MIN(next_start_idx, local_interval_end[color]);
307 if (interval_start < interval_end) {
309 size_t count = interval_end - interval_start;
310 size_t disp = interval_start - local_interval_start[color] +
311 ((color)?(U_size):(0));
314 (count <= INT_MAX) && (disp <= INT_MAX),
315 "ERROR(compute_redist_sendcounts_sdispls): invalid interval")
317 counts[i] = (int)count;
318 displs[i] = (int)disp;
324 start_idx[color] = next_start_idx;
335 for (
size_t i = 0; i < count; ++i) {
337 if (reorder_idx[i] != i) {
344 size_t swap = reorder_idx[j];
365 if ((ret = ((
const struct dist_vertex *)a)->grid_idx -
366 ((
const struct dist_vertex *)b)->grid_idx))
return ret;
367 if (((
const struct dist_vertex *)a)->global_id != XT_INT_MAX)
381 struct dist_vertex * vertices,
size_t * num_vertices,
383 size_t ** owners_reorder_idx,
size_t * owners_reorder_idx_array_size) {
385 if (*num_vertices == 0)
return;
388 *owners_reorder_idx, *owners_reorder_idx_array_size,
num_owners);
393 for (
size_t i = 0; i < *num_vertices; ++i) {
399 "ERROR(remove_duplicated_vertices): internal error "
400 "(owner_offset != num_owners)");
408 size_t old_num_vertices = *num_vertices;
409 size_t new_num_vertices = 0;
413 size_t * owners_reorder_idx_ = *owners_reorder_idx;
414 for (
size_t i = 0, reorder_idx = 0; i < old_num_vertices;
415 ++i, ++curr_vertex) {
421 prev_vertex = vertices + new_num_vertices;
423 if (prev_vertex != curr_vertex) *prev_vertex = *curr_vertex;
428 *num_vertices = new_num_vertices;
435 struct dist_vertex ** vertices,
size_t * num_vertices,
437 size_t global_bucket_sizes[2],
size_t (*all_bucket_sizes)[2],
439 size_t ** owners_reorder_idx,
size_t * owners_reorder_idx_array_size,
440 MPI_Datatype dist_vertex_dt, MPI_Datatype remote_point_info_dt,
443#ifdef SCOREP_USER_ENABLE
444SCOREP_USER_REGION_DEFINE( redist_data_region )
445SCOREP_USER_REGION_BEGIN(
446 redist_data_region,
"data redistribution", SCOREP_USER_REGION_TYPE_COMMON )
455 group_rank, split_rank, group_size, global_bucket_sizes, all_bucket_sizes,
457 size_t new_num_vertices;
459 group_rank, split_rank, group_size, global_bucket_sizes, all_bucket_sizes,
464 xmalloc(new_num_vertices *
sizeof(*new_vertices));
468 sizeof(**vertices), dist_vertex_dt, group_comm);
471 size_t new_num_owners;
473 size_t saccu = 0, raccu = 0, send_vertex_idx = 0, recv_vertex_idx = 0;
474 for (
int i = 0; i < group_size; ++i) {
475 size_t send_count = 0, recv_count = 0;
477 send_count += (
size_t)((*vertices)[send_vertex_idx].num_owners);
479 recv_count += (
size_t)(new_vertices[recv_vertex_idx].
num_owners);
481 (saccu <= INT_MAX) && (raccu <= INT_MAX),
482 "ERROR(redistribute_dist_vertices): displacement exceeds INT_MAX");
484 (send_count <= INT_MAX) && (recv_count <= INT_MAX),
485 "ERROR(redistribute_dist_vertices): counts exceeds INT_MAX");
495 "ERROR(redistribute_dist_vertices): inconsistent owner count");
496 new_num_owners = raccu;
501 xmalloc(new_num_owners *
sizeof(*new_owners));
505 sizeof(**owners), remote_point_info_dt, group_comm);
509 *vertices = new_vertices;
510 *num_vertices = new_num_vertices;
511 *owners = new_owners;
512 *num_owners = new_num_owners;
517 *vertices, num_vertices, *owners, *num_owners,
518 owners_reorder_idx, owners_reorder_idx_array_size);
520#ifdef SCOREP_USER_ENABLE
521SCOREP_USER_REGION_END( redist_data_region )
526 struct dist_vertex ** vertices,
size_t * num_vertices,
529 size_t ** reorder_idx,
size_t * reorder_idx_array_size,
530 int ** list_flag_,
size_t * list_flag_array_size,
531 MPI_Datatype dist_vertex_dt, MPI_Datatype remote_point_info_dt,
532 struct yac_group_comm group_comm,
double prev_gc_norm_vector[3]) {
534#ifdef SCOREP_USER_ENABLE
535SCOREP_USER_REGION_DEFINE( local_balance_point_region )
536SCOREP_USER_REGION_DEFINE( global_balance_point_region )
537SCOREP_USER_REGION_DEFINE( splitting_region )
538SCOREP_USER_REGION_DEFINE( comm_split_region )
548#ifdef SCOREP_USER_ENABLE
549SCOREP_USER_REGION_BEGIN(
550 local_balance_point_region,
"local balance point",
551 SCOREP_USER_REGION_TYPE_COMMON )
554 double balance_point[3] = {0.0, 0.0, 0.0};
555 for (
size_t i = 0; i < *num_vertices; ++i) {
556 double * vertex_coord = (*vertices)[i].coord;
557 for (
int j = 0; j < 3; ++j) balance_point[j] += vertex_coord[j];
559#ifdef SCOREP_USER_ENABLE
560SCOREP_USER_REGION_END( local_balance_point_region )
561SCOREP_USER_REGION_BEGIN(
562 global_balance_point_region,
"global balance point",
563 SCOREP_USER_REGION_TYPE_COMMON )
572 if ((fabs(balance_point[0]) > 1e-9) ||
573 (fabs(balance_point[1]) > 1e-9) ||
574 (fabs(balance_point[2]) > 1e-9)) {
577 balance_point[0] = prev_gc_norm_vector[2];
578 balance_point[1] = prev_gc_norm_vector[0];
579 balance_point[2] = prev_gc_norm_vector[1];
598#ifdef SCOREP_USER_ENABLE
599SCOREP_USER_REGION_END( global_balance_point_region )
606#ifdef SCOREP_USER_ENABLE
607SCOREP_USER_REGION_BEGIN( splitting_region,
"splitting data", SCOREP_USER_REGION_TYPE_COMMON )
611 int * list_flag = *list_flag_;
621 size_t num_vertices_ = *num_vertices;
625 for (
size_t i = 0; i < num_vertices_; ++i) {
626 double * curr_coordinates_xyz = vertices_[i].
coord;
627 double dot = curr_coordinates_xyz[0] * gc_norm_vector[0] +
628 curr_coordinates_xyz[1] * gc_norm_vector[1] +
629 curr_coordinates_xyz[2] * gc_norm_vector[2];
632 list_flag[i] = dot <= 0.0;
635 size_t U_size = 0, T_size = 0;
636 for (
size_t i = 0; i < num_vertices_; ++i) {
637 if (list_flag[i]) ++U_size;
642 for (
size_t i = 0,
owner_offset = 0; i < num_vertices_; ++i) {
653 for (
size_t i = 0, j = U_size; i < U_size; ++i) {
657 for (;!list_flag[j];++j);
659 vertices_[i] = vertices_[j];
660 vertices_[j] = temp_vertex;
667 size_t * reorder_idx_ = *reorder_idx;
668 for (
size_t i = 0, k = 0; i < num_vertices_; ++i) {
671 for (
int j = 0; j <
num_owners; ++j, ++k, ++offset) {
672 reorder_idx_[offset] = k;
677#ifdef SCOREP_USER_ENABLE
678SCOREP_USER_REGION_END( splitting_region )
681 size_t bucket_sizes[2] = {U_size, T_size};
685 &(bucket_sizes[0]), &(all_bucket_sizes[0][0]), 2, group_comm);
688 size_t global_bucket_sizes[2] = {0, 0};
689 for (
int i = 0; i < group_size; ++i)
690 for (
int j = 0; j < 2; ++j)
691 global_bucket_sizes[j] += all_bucket_sizes[i][j];
692 size_t global_num_vertices =
693 global_bucket_sizes[0] + global_bucket_sizes[1];
698#ifdef SCOREP_USER_ENABLE
699SCOREP_USER_REGION_BEGIN(
700 comm_split_region,
"creating splitcomm", SCOREP_USER_REGION_TYPE_COMMON )
706 ((global_bucket_sizes[0] * (
size_t)group_size +
707 global_num_vertices/2) / global_num_vertices), 1),
713 group_comm, split_rank, &local_group_comm, &remote_group_comm);
715#ifdef SCOREP_USER_ENABLE
716SCOREP_USER_REGION_END( comm_split_region )
724 vertices, num_vertices, owners, num_owners,
725 global_bucket_sizes, all_bucket_sizes, split_rank,
comm_buffers,
726 reorder_idx, reorder_idx_array_size, dist_vertex_dt, remote_point_info_dt,
739 vertices, num_vertices, owners, num_owners,
740 all_bucket_sizes,
comm_buffers, reorder_idx, reorder_idx_array_size,
741 list_flag_, list_flag_array_size, dist_vertex_dt, remote_point_info_dt,
742 local_group_comm, gc_norm_vector);
756 if (group_rank < split_rank) {
757 node->U = local_data;
758 node->
T = remote_data;
760 node->U = remote_data;
761 node->
T = local_data;
771 MPI_Datatype dist_vertex_dt;
772 int array_of_blocklengths[] = {3, 1, 1, 1, 1};
773 const MPI_Aint array_of_displacements[] =
774 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
coord[0]) -
775 (MPI_Aint)(intptr_t)(
const void *)&dummy,
776 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
grid_idx) -
777 (MPI_Aint)(intptr_t)(
const void *)&dummy,
778 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
global_id) -
779 (MPI_Aint)(intptr_t)(
const void *)&dummy,
780 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
num_owners) -
781 (MPI_Aint)(intptr_t)(
const void *)&dummy};
782 const MPI_Datatype array_of_types[] =
785 MPI_Type_create_struct(4, array_of_blocklengths, array_of_displacements,
786 array_of_types, &dist_vertex_dt), comm);
791 struct dist_vertex * dist_vertices,
size_t num_dist_vertices,
793 size_t ** reorder_idx,
size_t * reorder_idx_array_size,
794 yac_int **global_vertex_ids[2],
int * global_ids_missing,
795 int **vertex_ranks[2],
size_t * num_vertices, MPI_Comm comm) {
801 int comm_rank, comm_size;
806 size_t num_vertices_per_grid[2] = {0, 0};
807 size_t num_owners_per_grid[2] = {0, 0};
808 for (
size_t i = 0; i < num_dist_vertices; ++i) {
809 num_vertices_per_grid[dist_vertices[i].
grid_idx]++;
810 num_owners_per_grid[dist_vertices[i].grid_idx] +=
814 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
816 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
819 *reorder_idx, *reorder_idx_array_size,
820 MAX((num_owners_per_grid[0] + num_vertices[0]),
821 (num_owners_per_grid[1] + num_vertices[1])));
822 yac_int * global_vertex_ids_buffer =
825 (global_ids_missing[0]?(num_owners_per_grid[0] + num_vertices[0]):0),
826 (global_ids_missing[1]?(num_owners_per_grid[1] + num_vertices[1]):0)) *
827 sizeof(*global_vertex_ids_buffer));
833 dist_vertices + ((
grid_idx == 0)?0:num_vertices_per_grid[0]);
835 dist_owners + ((grid_idx == 0)?0:num_owners_per_grid[0]);
839 if (global_ids_missing[grid_idx]) {
842 num_vertices_per_grid[grid_idx] <= (
size_t)XT_INT_MAX,
843 "ERROR(inform_dist_vertex_owners): global_id out of bounds");
847 yac_int yac_int_num_vertices = (
yac_int)num_vertices_per_grid[grid_idx];
849 MPI_SUM, comm), comm);
850 if (comm_rank == 0) id_offset = 0;
853 ((
size_t)id_offset + num_vertices_per_grid[grid_idx]) <=
855 "ERROR(inform_dist_vertex_owners): global_id out of bounds")
858 memset(sendcounts, 0, (
size_t)(comm_size + 1) *
sizeof(*sendcounts));
861 for (
size_t i = 0; i < num_owners_per_grid[grid_idx]; ++i)
862 sendcounts[dist_grid_owners[i].
rank]++;
864 1, sendcounts, recvcounts, sdispls, rdispls, comm);
866 size_t * send_reorder_idx = *reorder_idx;
867 size_t * recv_reorder_idx = *reorder_idx + num_owners_per_grid[grid_idx];
868 yac_int * send_global_vertex_ids = global_vertex_ids_buffer;
869 yac_int * recv_global_vertex_ids = global_vertex_ids_buffer +
870 num_owners_per_grid[grid_idx];
872 for (
size_t i = 0, k = 0; i < num_vertices_per_grid[grid_idx]; ++i) {
874 int num_vertex_owners = dist_grid_vertices[i].
num_owners;
876 for (
int j = 0; j < num_vertex_owners; ++j, ++k) {
878 size_t pos = sdispls[dist_grid_owners[k].
rank + 1]++;
879 send_reorder_idx[pos] = dist_grid_owners[k].orig_pos;
880 if (global_ids_missing[grid_idx])
881 send_global_vertex_ids[pos] = id_offset + (
yac_int)i;
887 send_reorder_idx, sendcounts, sdispls,
888 recv_reorder_idx, recvcounts, rdispls,
890 "inform_dist_vertex_owners", __LINE__);
894 int * curr_vertex_ranks =
895 xmalloc(num_vertices[grid_idx] *
sizeof(*curr_vertex_ranks));
898 for (
size_t i = 0; i < recvcounts[
rank]; ++i, ++j)
899 curr_vertex_ranks[recv_reorder_idx[j]] =
rank;
900 *(vertex_ranks[grid_idx]) = curr_vertex_ranks;
904 if (global_ids_missing[grid_idx]) {
907 send_global_vertex_ids, sendcounts, sdispls,
908 recv_global_vertex_ids, recvcounts, rdispls,
909 sizeof(*send_global_vertex_ids),
yac_int_dt, comm,
910 "inform_dist_vertex_owners", __LINE__);
912 yac_int * curr_global_vertex_ids =
913 xmalloc(num_vertices[grid_idx] *
sizeof(*curr_global_vertex_ids));
915 for (
size_t i = 0; i < num_vertices[grid_idx]; ++i)
916 curr_global_vertex_ids[recv_reorder_idx[i]] = recv_global_vertex_ids[i];
918 *(global_vertex_ids[grid_idx]) = curr_global_vertex_ids;
922 free(global_vertex_ids_buffer);
930 yac_int **global_vertex_ids_[2],
int **vertex_ranks[2], MPI_Comm comm) {
933 yac_int *global_vertex_ids[2] =
934 {*(global_vertex_ids_[0]), *(global_vertex_ids_[1])};
935 int global_ids_missing[2] =
936 {(num_vertices[0] > 0) && (global_vertex_ids[0] == NULL),
937 (num_vertices[1] > 0) && (global_vertex_ids[1] == NULL)};
939 MPI_Allreduce(MPI_IN_PLACE, global_ids_missing, 2, MPI_INT, MPI_MAX, comm),
942 size_t total_num_vertices = num_vertices[0] + num_vertices[1];
944 int comm_rank, comm_size;
948 double base_gc_norm_vector[3] = {0.0,0.0,1.0};
950 int vertices_available = total_num_vertices > 0;
953 MPI_IN_PLACE, &vertices_available, 1, MPI_INT, MPI_MAX, comm), comm);
955 if ((comm_size > 1) && vertices_available) {
959 xmalloc(total_num_vertices *
sizeof(*dist_owners));
960 for (
size_t i = 0; i < num_vertices[0]; ++i)
963 for (
size_t i = 0, j = num_vertices[0]; i < num_vertices[1]; ++i, ++j)
969 xmalloc(total_num_vertices *
sizeof(*dist_vertices));
973 for (
int i = 0; i < 2; ++i) {
975 for (
size_t j = 0; j < num_vertices[i]; ++j, ++k) {
977 dist_vertices[k].
coord, vertex_coordinates[i][j],
978 3 *
sizeof(vertex_coordinates[i][j][0]));
981 (global_ids_missing[i])?XT_INT_MAX:global_vertex_ids[i][j];
988 size_t num_dist_vertices = total_num_vertices;
989 size_t num_dist_owners = total_num_vertices;
990 size_t (*all_bucket_sizes)[2] =
991 xmalloc((
size_t)comm_size *
sizeof(*(all_bucket_sizes)));
998 size_t * reorder_idx = NULL, reorder_idx_array_size = 0;
999 int * list_flag = NULL;
1000 size_t list_flag_array_size = 0;
1002 MPI_Datatype remote_point_info_dt =
1010 (
size_t[2]){total_num_vertices,0}, &(all_bucket_sizes[0][0]), 2,
1012 size_t global_num_vertices = 0;
1013 for (
int i = 0; i < comm_size; ++i)
1014 global_num_vertices += all_bucket_sizes[i][0];
1016 &dist_vertices, &num_dist_vertices, &dist_owners, &num_dist_owners,
1017 (
size_t[2]){global_num_vertices, 0}, all_bucket_sizes, comm_size,
1019 dist_vertex_dt, remote_point_info_dt, group_comm);
1024 &dist_vertices, &num_dist_vertices, &dist_owners, &num_dist_owners,
1025 all_bucket_sizes,
comm_buffers, &reorder_idx, &reorder_idx_array_size,
1026 &list_flag, &list_flag_array_size, dist_vertex_dt, remote_point_info_dt,
1027 group_comm, base_gc_norm_vector);
1035 free(all_bucket_sizes);
1039 dist_vertices, num_dist_vertices, dist_owners,
1040 &reorder_idx, &reorder_idx_array_size, global_vertex_ids_,
1041 global_ids_missing, vertex_ranks, num_vertices,
comm);
1046 free(dist_vertices);
1049 *proc_sphere_part =
xmalloc(1 *
sizeof(**proc_sphere_part));
1050 (*proc_sphere_part)->U.data.rank = 0;
1051 (*proc_sphere_part)->U.is_leaf = 1;
1052 (*proc_sphere_part)->T.data.rank = 0;
1053 (*proc_sphere_part)->T.is_leaf = 1;
1054 (*proc_sphere_part)->gc_norm_vector[0] = base_gc_norm_vector[0];
1055 (*proc_sphere_part)->gc_norm_vector[1] = base_gc_norm_vector[1];
1056 (*proc_sphere_part)->gc_norm_vector[2] = base_gc_norm_vector[2];
1059 for (
int grid_idx = 0; grid_idx < 2; ++grid_idx) {
1060 *(vertex_ranks[grid_idx]) =
1061 xmalloc(num_vertices[grid_idx] *
sizeof(**(vertex_ranks[grid_idx])));
1062 for (
size_t i = 0; i < num_vertices[grid_idx]; ++i)
1063 (*(vertex_ranks[grid_idx]))[i] = 0;
1064 if (global_ids_missing[grid_idx]) {
1065 *(global_vertex_ids_[grid_idx]) =
1067 num_vertices[grid_idx] *
sizeof(**(global_vertex_ids_[grid_idx])));
1068 for (
size_t i = 0; i < num_vertices[grid_idx]; ++i)
1069 (*(global_vertex_ids_[grid_idx]))[i] = (
yac_int)i;
1076 return (node->U.is_leaf) && (node->
T.
is_leaf) &&
1077 (node->U.data.rank == 0) && (node->
T.
data.
rank == 0);
1081#ifdef YAC_NEC_EXPERIMENTAL
1083static void yac_proc_sphere_part_do_point_search_recursive(
1085 size_t * search_idx,
size_t * temp_search_idx,
int * flag,
1086 size_t count,
int * ranks) {
1088 for (
size_t i = 0; i < count; ++i) {
1091 double dot = search_coords[search_idx[i]][0] * node->
gc_norm_vector[0] +
1096 flag[i] = (dot <= 0.0);
1099 size_t u_size = 0, t_size = 0;
1100 for (
size_t i = 0; i < count; ++i) {
1101 if (flag[i]) search_idx[u_size++] = search_idx[i];
1102 else temp_search_idx[t_size++] = search_idx[i];
1105 if (node->U.is_leaf) {
1106 size_t rank = node->U.data.rank;;
1107 for (
size_t i = 0; i < u_size; ++i) ranks[search_idx[i]] = rank;
1109 yac_proc_sphere_part_do_point_search_recursive(
1110 node->U.data.node, search_coords, search_idx, temp_search_idx + t_size,
1111 flag, u_size, ranks);
1116 for (
size_t i = 0; i < t_size; ++i) ranks[temp_search_idx[i]] = rank;
1118 yac_proc_sphere_part_do_point_search_recursive(
1119 node->
T.
data.
node, search_coords, temp_search_idx, search_idx + u_size,
1120 flag, t_size, ranks);
1127 size_t count,
int * ranks) {
1130 for (
size_t i = 0; i < count; ++i) ranks[i] = 0;
1134#ifdef YAC_NEC_EXPERIMENTAL
1136 size_t * search_idx =
xmalloc(2 * count *
sizeof(*search_idx));
1137 for (
size_t i = 0; i < count; ++i) search_idx[i] = i;
1138 int * flag =
xmalloc(count *
sizeof(*flag));
1139 yac_proc_sphere_part_do_point_search_recursive(
1140 node, search_coords, search_idx, search_idx + count, flag, count, ranks);
1149 for (
size_t i = 0; i < count; ++i) {
1151 double * curr_coord = search_coords[i];
1164 if (curr_node->U.is_leaf) {
1165 ranks[i] = curr_node->U.data.rank;
1168 curr_node = curr_node->U.data.node;
1189 int * ranks,
int * rank_count) {
1211 if (node->U.is_leaf) {
1213 ranks[*rank_count] = node->U.data.rank;
1224 int * ranks,
int * rank_count) {
1233 node->
T.
data.
node, bnd_circle, ranks, rank_count);
1236 if (node->U.is_leaf) {
1238 ranks[*rank_count] = node->U.data.rank;
1243 node->U.data.node, bnd_circle, ranks, rank_count);
1249 int * ranks,
int * rank_count) {
1253 "ERROR(yac_proc_sphere_part_do_bnd_circle_search): angle is >= PI")
1273 if (node->U.is_leaf) {
1274 *ranks = node->U.data.rank;
1290 uint64_t ** inner_node_sizes,
int * send_flags,
int * recv_flags,
1293 int curr_node_is_valid = (**inner_node_sizes >= min_size);
1300 if (curr_node_is_valid) {
1304 curr_valid_node = &temp_valid_node;
1307 curr_valid_node = last_valid_node;
1310 for (
int j = 0; j < 2; ++j) {
1318 if (leaf_sizes[
rank] < min_size) {
1325 if (
rank == comm_rank)
1326 for (
int i = 0; i < curr_valid_node->
num_ranks; ++i)
1327 recv_flags[ranks[i]] = 1;
1331 for (
int i = 0; i < curr_valid_node->
num_ranks; ++i) {
1332 if (ranks[i] == comm_rank) {
1333 send_flags[
rank] = 1;
1340 ++*inner_node_sizes;
1342 node_data.
data.
node, leaf_sizes, min_size, inner_node_sizes,
1343 send_flags, recv_flags, comm_rank, curr_valid_node);
1350 uint64_t ** inner_node_sizes) {
1352 uint64_t * curr_inner_node_size = *inner_node_sizes;
1354 if (
node->U.is_leaf) {
1355 node_size = leaf_sizes[
node->U.data.rank];
1357 ++*inner_node_sizes;
1364 ++*inner_node_sizes;
1368 return (*curr_inner_node_size = node_size);
1373 uint64_t min_size,
int * send_flags,
int * recv_flags,
1374 int comm_rank,
int comm_size) {
1376 uint64_t * inner_node_sizes =
1377 xcalloc((
size_t)comm_size,
sizeof(*inner_node_sizes));
1379 uint64_t * temp_inner_node_sizes = inner_node_sizes;
1383 *inner_node_sizes >= min_size,
1384 "ERROR(yac_proc_sphere_part_get_neigh_ranks): sum of global leaf sizes "
1393 temp_inner_node_sizes = inner_node_sizes;
1395 send_flags, recv_flags, comm_rank, &search_data);
1397 send_flags[comm_rank] = 0;
1398 recv_flags[comm_rank] = 0;
1400 free(search_data.
ranks);
1401 free(inner_node_sizes);
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
static int compare_coords(double const *a, double const *b)
static const struct sin_cos_angle SIN_COS_M_PI
static void crossproduct_kahan(double const a[], double const b[], double cross[])
static int compare_angles(struct sin_cos_angle a, struct sin_cos_angle b)
static void normalise_vector(double v[])
#define xcalloc(nmemb, size)
static int get_leaf_ranks(struct proc_sphere_part_node *node, int *ranks)
void yac_proc_sphere_part_get_neigh_ranks(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t min_size, int *send_flags, int *recv_flags, int comm_rank, int comm_size)
static struct proc_sphere_part_node * generate_proc_sphere_part_node_recursive(struct dist_vertex **vertices, size_t *num_vertices, struct remote_point_info **owners, size_t *num_owners, size_t(*all_bucket_sizes)[2], struct comm_buffers comm_buffers, size_t **reorder_idx, size_t *reorder_idx_array_size, int **list_flag_, size_t *list_flag_array_size, MPI_Datatype dist_vertex_dt, MPI_Datatype remote_point_info_dt, struct yac_group_comm group_comm, double prev_gc_norm_vector[3])
void yac_proc_sphere_part_do_bnd_circle_search(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
static uint64_t determine_node_sizes(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t **inner_node_sizes)
void yac_proc_sphere_part_do_point_search(struct proc_sphere_part_node *node, yac_coordinate_pointer search_coords, size_t count, int *ranks)
static void bnd_circle_search(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
static void reorder_data_remote_point_info(size_t *reorder_idx, size_t count, struct remote_point_info *data)
static void compute_redist_recvcounts_rdispls(int comm_rank, int split_rank, int comm_size, size_t global_sizes[2], size_t(*all_bucket_sizes)[2], int *counts, int *displs, size_t *recv_count)
static int is_serial_node(struct proc_sphere_part_node *node)
static struct proc_sphere_part_node_data get_remote_data(struct proc_sphere_part_node_data local_data, struct yac_group_comm local_group_comm, struct yac_group_comm remote_group_comm)
void yac_proc_sphere_part_node_delete(struct proc_sphere_part_node *node)
static void get_neigh_ranks(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t min_size, uint64_t **inner_node_sizes, int *send_flags, int *recv_flags, int comm_rank, struct neigh_search_data *last_valid_node)
static MPI_Datatype yac_get_dist_vertex_mpi_datatype(MPI_Comm comm)
void yac_proc_sphere_part_new(yac_coordinate_pointer vertex_coordinates[2], size_t *num_vertices, struct proc_sphere_part_node **proc_sphere_part, yac_int **global_vertex_ids_[2], int **vertex_ranks[2], MPI_Comm comm)
static void pack_proc_sphere_part_node(struct proc_sphere_part_node *node, void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static int get_proc_sphere_part_node_pack_size(struct proc_sphere_part_node node, MPI_Comm comm)
static void pack_proc_sphere_part_node_data(struct proc_sphere_part_node_data node_data, void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static void remove_duplicated_vertices(struct dist_vertex *vertices, size_t *num_vertices, struct remote_point_info *owners, size_t num_owners, size_t **owners_reorder_idx, size_t *owners_reorder_idx_array_size)
static void compute_redist_sendcounts_sdispls(int comm_rank, int split_rank, int comm_size, size_t global_sizes[2], size_t(*all_bucket_sizes)[2], int *counts, int *displs)
static void inform_dist_vertex_owners(struct dist_vertex *dist_vertices, size_t num_dist_vertices, struct remote_point_info *dist_owners, size_t **reorder_idx, size_t *reorder_idx_array_size, yac_int **global_vertex_ids[2], int *global_ids_missing, int **vertex_ranks[2], size_t *num_vertices, MPI_Comm comm)
static int get_proc_sphere_part_node_data_pack_size(struct proc_sphere_part_node_data node_data, MPI_Comm comm)
static int compare_dist_vertices(const void *a, const void *b)
static struct proc_sphere_part_node * unpack_proc_sphere_part_node(void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static struct proc_sphere_part_node_data unpack_proc_sphere_part_node_data(void *pack_buffer, int pack_buffer_size, int *position, MPI_Comm comm)
static void redistribute_dist_vertices(struct dist_vertex **vertices, size_t *num_vertices, struct remote_point_info **owners, size_t *num_owners, size_t global_bucket_sizes[2], size_t(*all_bucket_sizes)[2], int split_rank, struct comm_buffers comm_buffers, size_t **owners_reorder_idx, size_t *owners_reorder_idx_array_size, MPI_Datatype dist_vertex_dt, MPI_Datatype remote_point_info_dt, struct yac_group_comm group_comm)
static void bnd_circle_search_big_angle(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
MPI_Datatype yac_get_remote_point_info_mpi_datatype(MPI_Comm comm)
struct sin_cos_angle inc_angle
angle between the middle point and the boundary of the spherical cap
struct proc_sphere_part_node * node
struct proc_sphere_part_node * node
union proc_sphere_part_node_data::@45 data
struct proc_sphere_part_node_data U T
single location information of a point
#define YAC_ASSERT(exp, msg)
void yac_alltoallv_p2p_group(void const *send_buffer, int const *sendcounts, int const *sdispls, void *recv_buffer, int const *recvcounts, int const *rdispls, size_t dt_size, MPI_Datatype dt, struct yac_group_comm group_comm)
int yac_group_comm_get_global_rank(struct yac_group_comm group_comm)
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
int yac_group_comm_get_rank(struct yac_group_comm group_comm)
void yac_group_comm_split(struct yac_group_comm group_comm, int split_rank, struct yac_group_comm *local_group_comm, struct yac_group_comm *remote_group_comm)
struct yac_group_comm yac_group_comm_new(MPI_Comm comm)
void yac_allreduce_sum_dble(double *buffer, int count, struct yac_group_comm group_comm)
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
int yac_group_comm_get_size(struct yac_group_comm group_comm)
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
void yac_bcast_group(void *buffer, int count, MPI_Datatype datatype, int root, struct yac_group_comm group_comm)
void yac_allgather_size_t(const size_t *sendbuf, size_t *recvbuf, int count, struct yac_group_comm group_comm)
void yac_group_comm_delete(struct yac_group_comm group_comm)
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm, char const *caller, int line)
#define yac_mpi_call(call, comm)
double(* yac_coordinate_pointer)[3]