134 yac_int * ids,
size_t * idx,
size_t num_ids,
135 yac_int * ref_sorted_ids,
size_t num_sorted_ids) {
137 size_t * reorder =
xmalloc(num_ids *
sizeof(*reorder));
138 for (
size_t i = 0; i < num_ids; ++i) reorder[i] = i;
142 for (
size_t i = 0, j = 0; i < num_ids; ++i) {
145 while ((j < num_sorted_ids) && (ref_sorted_ids[j] < curr_id)) ++j;
147 (j < num_sorted_ids) && (ref_sorted_ids[j] == curr_id),
148 "ERROR(id2idx): id not found")
160 if (num_vertices == 0)
return SIZE_MAX;
165 size_t min_idx = vertices[0];
166 yac_int min_global_id = grid_vertex_ids[min_idx];
167 for (
int j = 1; j < num_vertices; ++j) {
168 size_t curr_vertex = vertices[j];
169 yac_int curr_global_id = grid_vertex_ids[curr_vertex];
170 if (min_global_id > curr_global_id) {
171 min_global_id = curr_global_id;
172 min_idx = curr_vertex;
181 struct yac_dist_grid * dist_grid,
int is_root,
int * vertex_owner_mask) {
184 int * core_cell_mask =
xmalloc(num_cells *
sizeof(*core_cell_mask));
189 for (
size_t i = 0; i < num_cells; ++i) {
193 ((ref_vertex != SIZE_MAX)?(vertex_owner_mask[ref_vertex]):is_root);
195 return core_cell_mask;
203 size_t * edge_vertices = &(dist_grid->
edge_to_vertex[edge_idx][0]);
205 return edge_vertices[
206 (vertex_ids[edge_vertices[0]] > vertex_ids[edge_vertices[1]])?1:0];
215 int * core_edge_mask =
xmalloc(num_edges *
sizeof(*core_edge_mask));
220 for (
size_t i = 0; i < num_edges; ++i)
224 return core_edge_mask;
234 int * vertex_owner_mask =
240 vertex_owner_mask[i] = vertex_owner_mask[i] == comm_rank;
253 vertex_owner_mask[i] =
261 MPI_Datatype single_id_owner_dt;
262 int array_of_blocklengths[] = {1, 1, 1};
263 const MPI_Aint array_of_displacements[] =
264 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
global_id) -
265 (MPI_Aint)(intptr_t)(
const void *)&dummy,
266 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
data.
rank) -
267 (MPI_Aint)(intptr_t)(
const void *)&dummy,
268 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
data.
orig_pos) -
269 (MPI_Aint)(intptr_t)(
const void *)&dummy};
270 const MPI_Datatype array_of_types[] =
273 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
274 array_of_types, &single_id_owner_dt), comm);
279 int n, MPI_Comm comm) {
281 MPI_Datatype dt_single_remote_point_ =
283 MPI_Datatype dt_single_remote_point__ =
286 MPI_Datatype dt_single_remote_point;
289 n, dt_single_remote_point__, &dt_single_remote_point), comm);
290 yac_mpi_call(MPI_Type_free(&dt_single_remote_point__), comm);
291 yac_mpi_call(MPI_Type_commit(&dt_single_remote_point), comm);
292 return dt_single_remote_point;
296 const void * a,
const void * b) {
307 MPI_Datatype id_pos_dt;
308 int array_of_blocklengths[] = {1,1};
309 const MPI_Aint array_of_displacements[] =
310 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
global_id) -
311 (MPI_Aint)(intptr_t)(
const void *)&dummy,
312 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
orig_pos) -
313 (MPI_Aint)(intptr_t)(
const void *)&dummy};
314 const MPI_Datatype array_of_types[] = {
yac_int_dt, MPI_UINT64_T};
316 MPI_Type_create_struct(
317 2, array_of_blocklengths, array_of_displacements,
318 array_of_types, &id_pos_dt), comm);
325 MPI_Datatype remote_point_info_dt;
326 int array_of_blocklengths[] = {1, 1};
327 const MPI_Aint array_of_displacements[] =
328 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
rank) -
329 (MPI_Aint)(intptr_t)(
const void *)&dummy,
330 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
orig_pos) -
331 (MPI_Aint)(intptr_t)(
const void *)&dummy};
332 const MPI_Datatype array_of_types[] =
333 {MPI_INT, MPI_UINT64_T};
335 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
336 array_of_types, &remote_point_info_dt), comm);
346 memcpy(temp, point_infos.data.multi, (
size_t)
count *
sizeof(*temp));
347 point_infos.data.multi = temp;
353 yac_int * sorted_ids,
size_t * reorder_idx,
size_t count,
358 size_t id_owner_count =
points->count;
361 for (
size_t i = 0, j = 0; i < count; ++i) {
363 yac_int curr_id = sorted_ids[i];
365 while ((j < id_owner_count) && (points_[j].
global_id < curr_id)) ++j;
367 if ((j >= id_owner_count) || (points_[j].
global_id != curr_id))
368 point_infos[reorder_idx[i]].
count = -1;
378 size_t * reorder_buffer,
size_t request_count,
382 size_t local_count = local_remote_data->
count;
384 {.
count = 0, .data.single = {.rank = INT_MAX, .orig_pos = UINT64_MAX}};
386 for (
size_t j = 0; j < request_count; ++j) reorder_buffer[j] = j;
390 requested_ids, request_count, reorder_buffer);
392 for (
size_t i = 0, j = 0; i < request_count; ++i) {
393 yac_int curr_id = requested_ids[i];
394 while ((j < local_count) && (curr_id > local_remote_points[j].global_id))
396 results[reorder_buffer[i]] =
397 ((j < local_count) && (curr_id == local_remote_points[j].global_id))?
398 &(local_remote_points[j].
data):&dummy_data;
409 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
411 MPI_Pack_size(infos->
count, point_info_dt, comm, &pack_size_data), comm);
413 return pack_size_count + pack_size_data;
418 MPI_Datatype point_info_dt, MPI_Comm comm) {
422 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
424 for (
size_t i = 0; i <
count; ++i) {
427 infos[i]->
count, point_info_dt, comm, pack_sizes + i), comm);
428 pack_sizes[i] += pack_size_count;
434 void * buffer, MPI_Datatype point_info_dt, MPI_Comm comm) {
436 for (
size_t i = 0; i <
count; ++i) {
438 int curr_count = infos[i]->
count;
444 int buffer_size = pack_sizes[i];
446 MPI_Pack(&curr_count, 1, MPI_INT, buffer,
447 buffer_size, &position, comm), comm);
449 MPI_Pack(curr_point_infos, curr_count, point_info_dt, buffer,
450 buffer_size, &position, comm), comm);
451 buffer = (
void*)(((
unsigned char*)buffer) + buffer_size);
456 void * buffer,
int buffer_size,
int * position,
461 MPI_Unpack(buffer, buffer_size, position, &count, 1, MPI_INT, comm), comm);
463 infos->
count = count;
474 MPI_Unpack(buffer, buffer_size, position, point_infos, count,
475 point_info_dt, comm), comm);
483 void * buffer,
int buffer_size,
int * position,
489 MPI_Unpack(buffer, buffer_size, position, &count, 1, MPI_INT, comm), comm);
491 infos->
count = count;
498 (infos->
data.
multi = info_buffer + *info_buffer_position);
499 *info_buffer_position += count;
503 MPI_Unpack(buffer, buffer_size, position, point_infos, count,
504 point_info_dt, comm), comm);
508 struct remote_point * point, MPI_Datatype point_info_dt, MPI_Comm comm) {
511 pack_size_remote_point_infos;
514 pack_size_remote_point_infos =
517 return pack_size_id + pack_size_remote_point_infos;
522 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
524 int count = infos->
count;
530 MPI_Pack(&count, 1, MPI_INT, buffer, buffer_size, position, comm), comm);
532 MPI_Pack(info, count, point_info_dt, buffer, buffer_size, position, comm),
537 struct remote_point * point,
void * buffer,
int buffer_size,
int * position,
538 MPI_Datatype point_info_dt, MPI_Comm comm) {
542 buffer_size, position, comm), comm);
545 &(point->
data), buffer, buffer_size, position, point_info_dt, comm);
549 void * buffer,
int buffer_size,
int * position,
struct remote_point * point,
550 MPI_Datatype point_info_dt, MPI_Comm comm) {
557 buffer, buffer_size, position, &(point->
data), point_info_dt, comm);
561 void * buffer,
int buffer_size,
int * position,
563 struct remote_point * point, MPI_Datatype point_info_dt, MPI_Comm comm) {
570 buffer, buffer_size, position, info_buffer, info_buffer_position,
571 &(point->
data), point_info_dt, comm);
577 size_t count =
points->count;
581 remote_points_pack_size;
583 yac_mpi_call(MPI_Pack_size(2, MPI_UINT64_T, comm, &count_pack_size), comm);
584 remote_points_pack_size = 0;
585 for (
size_t i = 0; i < count; ++i)
586 remote_points_pack_size +=
589 return count_pack_size + remote_points_pack_size;
594 MPI_Datatype point_info_dt, MPI_Comm comm) {
596 size_t count =
points->count;
598 uint64_t counts[2] = {(uint64_t)count, 0};
599 for (
size_t i = 0; i < count; ++i)
601 counts[1] += (uint64_t)(point_data[i].
data.
count);
604 MPI_Pack(counts, 2, MPI_UINT64_T, buffer,
605 buffer_size, position, comm), comm);
606 for (
size_t i = 0; i < count; ++i)
608 point_data + i, buffer, buffer_size, position, point_info_dt, comm);
612 void * buffer,
int buffer_size,
int * position,
619 buffer, buffer_size, position, counts, 2, MPI_UINT64_T, comm), comm);
624 size_t count = ((*points)->count = (size_t)(counts[0]));
626 ((*points)->data =
xmalloc(count *
sizeof(*((*points)->data))));
628 &((*points)->buffer[0]);
630 for (
size_t i = 0, offset = 0; i < count; ++i) {
633 buffer, buffer_size, position, remote_point_info_buffer, &offset,
634 point_data + i, point_info_dt, comm);
643 MPI_Comm comm = dist_grid->
comm;
648 for (
int location = 0; location < 3; ++location)
649 dist_grid->
owners[location] =
653 dist_grid->
count[location], dist_owner[location]);
655 size_t num_missing_vertices = 0;
656 size_t num_missing_edges = 0;
661 "ERROR(generate_dist_grid_remote_point_infos):"
662 "no owner information for a cell available")
665 ++num_missing_vertices;
669 size_t total_num_missing = num_missing_vertices + num_missing_edges;
670 int * ranks_buffer =
xmalloc(total_num_missing *
sizeof(*ranks_buffer));
671 int * vertex_ranks = ranks_buffer;
672 int * edge_ranks = ranks_buffer + num_missing_vertices;
673 size_t * size_t_buffer =
674 xmalloc((3 * total_num_missing +
675 4 * (
size_t)comm_size) *
sizeof(*size_t_buffer));
676 size_t * vertex_reorder = size_t_buffer;
677 size_t * edge_reorder = size_t_buffer + num_missing_vertices;
678 size_t * coord_indices = size_t_buffer + total_num_missing;
679 size_t * ranks_idxs = coord_indices;
680 size_t * cve_reorder = size_t_buffer + 2 * total_num_missing;
681 size_t * total_sendcounts = size_t_buffer + 3 * total_num_missing + 0 * comm_size;
682 size_t * total_recvcounts = size_t_buffer + 3 * total_num_missing + 1 * comm_size;
683 size_t * total_sdispls = size_t_buffer + 3 * total_num_missing + 2 * comm_size;
684 size_t * total_rdispls = size_t_buffer + 3 * total_num_missing + 3 * comm_size;
690 vertex_reorder[j] = i;
692 coord_indices[offset] = i;
693 cve_reorder[offset] = offset;
705 coord_indices[offset] =
706 vertex_idxes[(vertex_ids[0] > vertex_ids[1])?1:0];
707 cve_reorder[offset] = offset;
713 coord_indices, total_num_missing, cve_reorder);
715 size_t num_unique_idxs = 0;
716 for (
size_t i = 0, prev_idx = SIZE_MAX; i < total_num_missing; ++i) {
717 size_t curr_idx = coord_indices[i];
718 if (curr_idx != prev_idx) {
725 xmalloc(num_unique_idxs *
sizeof(*search_coords));
726 int * search_ranks =
xmalloc(num_unique_idxs *
sizeof(*search_ranks));
728 for (
size_t i = 0, j = 0, prev_idx = SIZE_MAX; i < total_num_missing; ++i) {
729 size_t curr_idx = coord_indices[i];
730 if (curr_idx != prev_idx) {
740 proc_sphere_part, search_coords, num_unique_idxs, search_ranks);
744 cve_reorder, total_num_missing, ranks_idxs);
747 for (
size_t i = 0; i < num_missing_vertices; ++i, ++offset)
748 vertex_ranks[i] = search_ranks[ranks_idxs[offset]];
749 for (
size_t i = 0; i < num_missing_edges; ++i, ++offset)
750 edge_ranks[i] = search_ranks[ranks_idxs[offset]];
754 vertex_ranks, num_missing_vertices, vertex_reorder);
756 edge_ranks, num_missing_edges, edge_reorder);
758 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
760 2, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
762 for (
size_t i = 0; i < num_missing_vertices; ++i)
763 sendcounts[2 * vertex_ranks[i] + 0]++;
764 for (
size_t i = 0; i < num_missing_edges; ++i)
765 sendcounts[2 * edge_ranks[i] + 1]++;
768 2, sendcounts, recvcounts, sdispls, rdispls, comm);
770 size_t num_requested_ids[2] = {0,0};
771 size_t saccu = 0, raccu = 0;
772 for (
int i = 0; i < comm_size; ++i) {
773 total_sdispls[i] = saccu;
774 total_rdispls[i] = raccu;
775 total_sendcounts[i] = 0;
776 total_recvcounts[i] = 0;
777 for (
int j = 0; j < 2; ++j) {
778 total_sendcounts[i] += sendcounts[2 * i + j];
779 total_recvcounts[i] += recvcounts[2 * i + j];
780 num_requested_ids[j] += recvcounts[2 * i + j];
782 saccu += total_sendcounts[i];
783 raccu += total_recvcounts[i];
785 size_t request_count = total_recvcounts[comm_size - 1] +
786 total_rdispls[comm_size - 1];
790 (total_num_missing + 2 * request_count) *
sizeof(*exchange_id_buffer));
791 yac_int * id_send_buffer = exchange_id_buffer + request_count;
792 yac_int * id_recv_buffer = exchange_id_buffer + request_count + total_num_missing;
795 for (
size_t i = 0; i < num_missing_vertices; ++i) {
796 size_t pos = sdispls[2 * vertex_ranks[i] + 0 + 1]++;
799 for (
size_t i = 0; i < num_missing_edges; ++i) {
800 size_t pos = sdispls[2 * edge_ranks[i] + 1 + 1]++;
805 yac_alltoallv_yac_int_p2p(
806 id_send_buffer, total_sendcounts, total_sdispls,
807 id_recv_buffer, total_recvcounts, total_rdispls, comm);
810 {exchange_id_buffer, exchange_id_buffer + num_requested_ids[0]};
814 size_t ids_offset[2] = {0, 0}, offset = 0;
815 for (
int i = 0; i < comm_size; ++i) {
816 for (
int j = 0; j < 2; ++j) {
817 size_t curr_count = recvcounts[2 * i + j];
818 memcpy(requested_ids[j] + ids_offset[j], id_recv_buffer + offset,
819 curr_count *
sizeof(*(requested_ids[0])));
820 ids_offset[j] += curr_count;
821 offset += curr_count;
827 xmalloc(request_count *
sizeof(*remote_point_infos_buffer));
829 {remote_point_infos_buffer,
830 remote_point_infos_buffer + num_requested_ids[0]};
834 size_t max_num_requested_ids =
MAX(num_requested_ids[0], num_requested_ids[1]);
835 size_t * reorder_buffer =
xmalloc(max_num_requested_ids *
sizeof(*reorder_buffer));
837 for (
int i = 0; i < 2; ++i)
839 requested_ids[i], found_points[i], reorder_buffer, num_requested_ids[i],
840 local_remote_points[i]);
841 free(reorder_buffer);
842 free(exchange_id_buffer);
844 int * pack_size_buffer =
845 xmalloc((total_num_missing + request_count) *
sizeof(*pack_size_buffer));
846 int * send_pack_sizes = pack_size_buffer;
847 int * recv_pack_sizes = pack_size_buffer + request_count;
851 size_t offset = 0, offsets[2] = {0, 0};
852 for (
int i = 0; i < comm_size; ++i) {
853 for (
int j = 0; j < 2; ++j) {
854 size_t curr_count = (size_t)(recvcounts[2 * i + j]);
857 found_points[j] + offsets[j], curr_count,
858 send_pack_sizes + offset, point_info_dt, comm);
859 offsets[j] += curr_count;
860 offset += curr_count;
866 yac_alltoallv_int_p2p(
867 send_pack_sizes, total_recvcounts, total_rdispls,
868 recv_pack_sizes, total_sendcounts, total_sdispls, comm);
870 size_t send_buffer_size = 0, recv_buffer_size = 0;
871 for (
size_t i = 0; i < request_count; ++i)
872 send_buffer_size += (
size_t)(send_pack_sizes[i]);
873 for (
size_t i = 0; i < total_num_missing; ++i)
874 recv_buffer_size += (
size_t)(recv_pack_sizes[i]);
876 void * result_buffer =
xmalloc(send_buffer_size + recv_buffer_size);
877 void * send_result_buffer = result_buffer;
878 void * recv_result_buffer = (
unsigned char *)result_buffer + send_buffer_size;
882 size_t send_buffer_size = 0, offset = 0, offsets[2] = {0,0};
883 for (
int i = 0; i < comm_size; ++i) {
884 for (
int j = 0; j < 2; ++j) {
885 size_t curr_count = recvcounts[2 * i + j];
888 found_points[j] + offsets[j],
889 send_pack_sizes + offset, curr_count,
890 (
void*)((
unsigned char*)send_result_buffer + send_buffer_size),
891 point_info_dt, comm);
892 for (
size_t k = 0; k < curr_count; ++k)
893 send_buffer_size += send_pack_sizes[offset + k];
894 offsets[j] += curr_count;
895 offset += curr_count;
901 size_t send_offset = 0, recv_offset = 0;
902 size_t saccu = 0, raccu = 0;
903 for (
int i = 0; i < comm_size; ++i) {
904 total_sdispls[i] = saccu;
905 total_rdispls[i] = raccu;
906 size_t curr_sendcount = 0, curr_recvcount = 0;
907 for (
int k = 0; k < 2; ++k) {
908 for (
size_t j = 0; j < recvcounts[2 * i + k]; ++j, ++send_offset)
909 curr_sendcount += (
size_t)(send_pack_sizes[send_offset]);
910 for (
size_t j = 0; j < sendcounts[2 * i + k]; ++j, ++recv_offset)
911 curr_recvcount += (
size_t)(recv_pack_sizes[recv_offset]);
913 total_sendcounts[i] = curr_sendcount;
914 total_recvcounts[i] = curr_recvcount;
915 saccu += curr_sendcount;
916 raccu += curr_recvcount;
921 yac_alltoallv_packed_p2p(
922 send_result_buffer, total_sendcounts, total_sdispls,
923 recv_result_buffer, total_recvcounts, total_rdispls, comm);
927 size_t counts[2] = {0, 0},
count = 0, offset = 0;
930 size_t * reorder[2] = {vertex_reorder, edge_reorder};
931 for (
int i = 0; i < comm_size; ++i) {
932 for (
int j = 0; j < 2; ++j) {
933 size_t curr_count = sendcounts[2 * i + j];
934 for (
size_t k = 0; k < curr_count; ++k, ++counts[j], ++
count) {
935 int curr_buffer_size = recv_pack_sizes[sdispls[2 * i + j] + k];
938 (
void*)((
unsigned char*)recv_result_buffer + offset),
939 curr_buffer_size, &position,
940 remote_points[j] + reorder[j][counts[j]], point_info_dt, comm);
941 offset += (size_t)(curr_buffer_size);
951 free(pack_size_buffer);
952 free(remote_point_infos_buffer);
967 for (i = 0; i < n; ++i)
if (ids[i] >=
id)
break;
969 if (n != i) memmove(ids + i + 1, ids + i, (n - i) *
sizeof(*ids));
980 for (i = 0; i < n; ++i)
if (ranks[i] >= rank)
break;
984 if (ranks[i] == rank)
return;
985 else memmove(ranks + i + 1, ranks + i, ((
size_t)(n - i)) *
sizeof(*ranks));
992 const void * a,
const void * b) {
998 int ret = count_a - count_b;
999 for (
int i = 0; !ret && (i < count_a); ++i)
1000 ret = (a_ids[i] > b_ids[i]) - (a_ids[i] < b_ids[i]);
1005 const void * a,
const void * b) {
1019 int * dist_cell_rank_counts,
size_t * dist_cell_rank_offsets) {
1025 int * ranks_buffer =
xmalloc((
size_t)comm_size *
sizeof(*ranks_buffer));
1026 size_t dist_cell_ranks_array_size = num_cells;
1027 int * dist_cell_ranks_ =
xmalloc(num_cells *
sizeof(*dist_cell_ranks_));
1031 int max_num_vertices_per_cell = 0;
1032 for (
size_t i = 0; i < num_cells; ++i)
1053 for (
size_t i = 0; i < num_cells; ++i) {
1057 size_t * curr_cell_to_vertex =
1058 cell_to_vertex + cell_to_vertex_offsets[i];
1059 size_t * curr_cell_to_edge =
1060 cell_to_edge + cell_to_edge_offsets[i];
1061 for (
int j = 0; j < num_vertices_per_cell[i]; ++j) {
1063 vertex_coordinates + curr_cell_to_vertex[j];
1064 for (
int k = 0; k < 3; ++k)
1075 proc_sphere_part, bnd_circle, ranks_buffer, &rank_count);
1077 ranks_buffer[0] = 0;
1082 offset + (
size_t)rank_count);
1083 memcpy(dist_cell_ranks_ + offset, ranks_buffer,
1084 (
size_t)rank_count *
sizeof(*ranks_buffer));
1085 dist_cell_rank_counts[i] = rank_count;
1086 dist_cell_rank_offsets[i] = offset;
1087 offset += (size_t)rank_count;
1090 dist_cell_rank_offsets[num_cells] = offset;
1096 *dist_cell_ranks = dist_cell_ranks_;
1102 MPI_Datatype coord_dt;
1103 int array_of_blocklengths[] = {3};
1104 const MPI_Aint array_of_displacements[] =
1105 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
coord) -
1106 (MPI_Aint)(intptr_t)(
const void *)&dummy};
1107 const MPI_Datatype array_of_types[] = {MPI_DOUBLE};
1109 MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements,
1110 array_of_types, &coord_dt), comm);
1117 MPI_Datatype global_id_dt;
1118 int array_of_blocklengths[] = {1};
1119 const MPI_Aint array_of_displacements[] =
1120 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
global_id) -
1121 (MPI_Aint)(intptr_t)(
const void *)&dummy};
1122 const MPI_Datatype array_of_types[] = {
yac_int_dt};
1124 MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements,
1125 array_of_types, &global_id_dt), comm);
1130 const void * a,
const void * b) {
1137 const void * a,
const void * b) {
1161 int ids_available_local =
1163 int ids_available_global;
1164 yac_mpi_call(MPI_Allreduce(&ids_available_local, &ids_available_global, 1,
1165 MPI_INT, MPI_MAX, comm), comm);
1170 ids_available_local || !ids_available_global,
1171 "ERROR(generate_vertex_ids): inconsistent global ids")
1178 for (
size_t j = 0; j < grid->
num_vertices; ++j) core_mask[j] = 1;
1183 if (ids_available_global)
return vertex_ranks;
1188 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1190 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1193 sendcounts[vertex_ranks[i]]++;
1196 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1199 size_t dist_vertex_count =
1200 recvcounts[comm_size - 1] + rdispls[comm_size - 1];
1203 xmalloc((orig_vertex_count + dist_vertex_count) *
1204 sizeof(*id_reorder_coord_buffer));
1206 id_reorder_coord_send_buffer = id_reorder_coord_buffer;
1208 id_reorder_coord_recv_buffer = id_reorder_coord_buffer + orig_vertex_count;
1211 for (
size_t i = 0; i < orig_vertex_count; ++i) {
1212 size_t pos = sdispls[vertex_ranks[i] + 1]++;
1213 id_reorder_coord_send_buffer[pos].
reorder_idx = i;
1214 for (
int j = 0; j < 3; ++j)
1215 id_reorder_coord_send_buffer[pos].
coord[j] =
1219 MPI_Datatype id_reorder_coord_coord_dt =
1224 id_reorder_coord_send_buffer, sendcounts, sdispls,
1225 id_reorder_coord_recv_buffer, recvcounts, rdispls,
1226 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_coord_dt, comm);
1228 yac_mpi_call(MPI_Type_free(&id_reorder_coord_coord_dt), comm);
1230 for (
size_t i = 0; i < dist_vertex_count; ++i)
1234 qsort(id_reorder_coord_recv_buffer, dist_vertex_count,
1237 size_t unique_count = dist_vertex_count > 0;
1241 for (
size_t i = 0; i < dist_vertex_count; ++i) {
1251 unique_count <= (
size_t)XT_INT_MAX,
1252 "ERROR(generate_vertex_ids): global_id out of bounds")
1260 MPI_SUM, comm), comm);
1263 if (comm_rank == 0) id_offset = 0;
1266 ((
size_t)id_offset + unique_count) <= (
size_t)XT_INT_MAX,
1267 "ERROR(generate_vertex_ids): global_id out of bounds")
1270 for (
size_t i = 0; i < dist_vertex_count; ++i)
1271 id_reorder_coord_recv_buffer[i].
global_id += id_offset;
1274 qsort(id_reorder_coord_recv_buffer, dist_vertex_count,
1275 sizeof(*id_reorder_coord_recv_buffer),
1278 MPI_Datatype id_reorder_coord_id_dt =
1283 id_reorder_coord_recv_buffer, recvcounts, rdispls,
1284 id_reorder_coord_send_buffer, sendcounts, sdispls,
1285 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_id_dt, comm);
1287 yac_mpi_call(MPI_Type_free(&id_reorder_coord_id_dt), comm);
1295 vertex_ids[id_reorder_coord_send_buffer[i].
reorder_idx] =
1296 id_reorder_coord_send_buffer[i].
global_id;
1298 free(id_reorder_coord_buffer);
1301 return vertex_ranks;
1309 int comm_rank, comm_size;
1315 int ids_available_local[2], ids_available_global[2];
1318 yac_mpi_call(MPI_Allreduce(ids_available_local, ids_available_global, 2,
1319 MPI_INT, MPI_MAX, comm), comm);
1323 ids_available_local[0] == ids_available_global[0],
1324 "ERROR(generate_ce_ids): inconsistent global ids")
1327 int * core_cell_mask =
1330 for (
size_t j = 0; j < grid->
num_cells; ++j) core_cell_mask[j] = 1;
1335 ids_available_local[1] == ids_available_global[1],
1336 "ERROR(generate_ce_ids): inconsistent global ids")
1339 int * core_edge_mask =
1342 for (
size_t j = 0; j < grid->
num_edges; ++j) core_edge_mask[j] = 1;
1347 if (ids_available_global[0] && ids_available_global[1])
return;
1351 (((ids_available_global[0])?0:(grid->
num_cells)) +
1352 ((ids_available_global[1])?0:(grid->
num_edges))) *
sizeof(*rank_buffer));
1353 int * cell_ranks = rank_buffer;
1355 rank_buffer + ((ids_available_global[0])?0:(grid->
num_cells));
1357 size_t * size_t_buffer =
1358 xmalloc((8 * (
size_t)comm_size + 1) *
sizeof(*size_t_buffer));
1359 size_t * sendcounts = size_t_buffer + 0 * comm_size;
1360 size_t * recvcounts = size_t_buffer + 2 * comm_size;
1361 size_t * total_sendcounts = size_t_buffer + 4 * comm_size;
1362 size_t * total_recvcounts = size_t_buffer + 5 * comm_size;
1363 size_t * total_sdispls = size_t_buffer + 6 * comm_size;
1364 size_t * total_rdispls = size_t_buffer + 7 * comm_size + 1;
1365 memset(sendcounts, 0, 2 * (
size_t)comm_size *
sizeof(*sendcounts));
1367 yac_int * cell_to_vertex_ids = NULL;
1368 int max_num_vertices_per_cell = 0;
1370 if (!ids_available_global[0]) {
1373 for (
size_t i = 0; i < grid->
num_cells; ++i)
1376 yac_mpi_call(MPI_Allreduce(MPI_IN_PLACE, &max_num_vertices_per_cell, 1,
1377 MPI_INT, MPI_MAX, comm), comm);
1380 cell_to_vertex_ids =
1382 sizeof(*cell_to_vertex_ids));
1384 for (
size_t i = 0; i < grid->
num_cells; ++i) {
1387 yac_int * curr_cell_to_vertex_ids =
1388 cell_to_vertex_ids + i * max_num_vertices_per_cell;
1391 if (curr_num_vertices > 0) {
1392 size_t * curr_cell_vertices =
1394 size_t min_vertex = curr_cell_vertices[0];
1395 curr_cell_to_vertex_ids[0] = grid->
vertex_ids[min_vertex];
1396 for (
size_t j = 1; j < curr_num_vertices; ++j) {
1397 size_t curr_vertex_idx = curr_cell_vertices[j];
1400 if (curr_cell_to_vertex_ids[0] == curr_vertex_id)
1401 min_vertex = curr_vertex_idx;
1403 cell_rank = vertex_ranks[min_vertex];
1407 for (
size_t j = curr_num_vertices; j < max_num_vertices_per_cell; ++j)
1408 curr_cell_to_vertex_ids[j] = XT_INT_MAX;
1410 sendcounts[2 * ((cell_ranks[i] = cell_rank)) + 0]++;
1414 yac_int * edge_to_vertex_ids = NULL;
1416 if (!ids_available_global[1]) {
1418 edge_to_vertex_ids =
1421 for (
size_t i = 0; i < grid->
num_edges; ++i) {
1424 yac_int * curr_edge_vertex_ids = edge_to_vertex_ids + 2 * i;
1425 curr_edge_vertex_ids[0] = grid->
vertex_ids[curr_edge_to_vertex[0]];
1426 curr_edge_vertex_ids[1] = grid->
vertex_ids[curr_edge_to_vertex[1]];
1428 if (curr_edge_vertex_ids[0] > curr_edge_vertex_ids[1]) {
1429 yac_int temp = curr_edge_vertex_ids[0];
1430 curr_edge_vertex_ids[0] = curr_edge_vertex_ids[1];
1431 curr_edge_vertex_ids[1] = temp;
1433 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[1]])) + 1]++;
1436 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[0]])) + 1]++;
1445 total_sdispls[0] = 0;
1446 size_t recv_counts[2] = {0,0};
1447 size_t saccu = 0, raccu = 0;
1448 for (
int i = 0; i < comm_size; ++i) {
1449 total_sdispls[i+1] = saccu;
1450 total_rdispls[i] = raccu;
1451 recv_counts[0] += recvcounts[2 * i + 0];
1452 recv_counts[1] += recvcounts[2 * i + 1];
1453 total_sendcounts[i] = sendcounts[2 * i + 0] *
1454 (size_t)max_num_vertices_per_cell +
1455 sendcounts[2 * i + 1] * 2;
1456 total_recvcounts[i] = recvcounts[2 * i + 0] *
1457 (size_t)max_num_vertices_per_cell +
1458 recvcounts[2 * i + 1] * 2;
1459 saccu += total_sendcounts[i];
1460 raccu += total_recvcounts[i];
1462 size_t local_data_count = total_sendcounts[comm_size - 1] +
1463 total_sdispls[comm_size];
1464 size_t recv_count = total_recvcounts[comm_size - 1] +
1465 total_rdispls[comm_size - 1];
1468 xmalloc((local_data_count + recv_count) *
sizeof(*yac_int_buffer));
1469 yac_int * send_buffer = yac_int_buffer;
1470 yac_int * recv_buffer = yac_int_buffer + local_data_count;
1473 if (!ids_available_global[0])
1474 for (
size_t i = 0; i < grid->
num_cells; ++i)
1475 for (
int j = 0; j < max_num_vertices_per_cell; ++j)
1476 send_buffer[total_sdispls[cell_ranks[i] + 1]++] =
1477 cell_to_vertex_ids[i * max_num_vertices_per_cell + j];
1478 if (!ids_available_global[1])
1479 for (
size_t i = 0; i < grid->
num_edges; ++i)
1480 for (
int j = 0; j < 2; ++j)
1481 send_buffer[total_sdispls[edge_ranks[i] + 1]++] =
1482 edge_to_vertex_ids[2 * i + j];
1484 free(edge_to_vertex_ids);
1485 free(cell_to_vertex_ids);
1488 yac_alltoallv_yac_int_p2p(
1489 send_buffer, total_sendcounts, total_sdispls,
1490 recv_buffer, total_recvcounts, total_rdispls, comm);
1493 xmalloc((recv_counts[0] + recv_counts[1]) *
sizeof(*n_ids_reorder_buffer));
1495 {n_ids_reorder_buffer, n_ids_reorder_buffer + recv_counts[0]};
1498 int index_counts[2] = {max_num_vertices_per_cell, 2};
1502 for (
int i = 0; i < comm_size; ++i) {
1503 for (
int j = 0; j < 2; ++j) {
1504 size_t curr_count = recvcounts[2 * i + j];
1505 for (
size_t k = 0; k < curr_count; ++k, ++
reorder_idx, ++recv_counts[j]) {
1509 offset += index_counts[j];
1514 for (
int i = 0; i < 2; ++i) {
1516 if (ids_available_global[i])
continue;
1521 size_t unique_count = recv_counts[i] > 0;
1525 for (
size_t j = 0; j < recv_counts[i]; ++j, ++curr) {
1534 unique_count <= (
size_t)XT_INT_MAX,
1535 "ERROR(generate_global_ce_ids): global_id out of bounds")
1542 MPI_SUM, comm), comm);
1543 if (comm_rank == 0) id_offset = 0;
1546 ((
size_t)id_offset + unique_count) <= (
size_t)XT_INT_MAX,
1547 "ERROR(generate_global_ce_ids): global_id out of bounds")
1550 for (
size_t j = 0; j < recv_counts[i]; ++j)
1553 free(yac_int_buffer);
1555 qsort(n_ids_reorder_buffer, recv_counts[0] + recv_counts[1],
1559 xmalloc((recv_counts[0] + recv_counts[1] +
1560 ((ids_available_global[0])?0:(grid->
num_cells)) +
1561 ((ids_available_global[1])?0:(grid->
num_edges))) *
1562 sizeof(*global_ids_buffer));
1563 yac_int * send_global_ids = global_ids_buffer;
1565 global_ids_buffer + recv_counts[0] + recv_counts[1];
1567 for (
size_t i = 0; i < recv_counts[0] + recv_counts[1]; ++i)
1568 send_global_ids[i] = n_ids_reorder_buffer[i].
global_id;
1569 free(n_ids_reorder_buffer);
1572 saccu = 0, raccu = 0;
1573 for (
int i = 0; i < comm_size; ++i) {
1574 total_sdispls[i] = saccu;
1575 total_rdispls[i] = raccu;
1577 ((total_sendcounts[i] = recvcounts[2 * i + 0] + recvcounts[2 * i + 1]));
1579 ((total_recvcounts[i] = sendcounts[2 * i + 0] + sendcounts[2 * i + 1]));
1583 yac_alltoallv_yac_int_p2p(
1584 send_global_ids, total_sendcounts, total_sdispls,
1585 recv_global_ids, total_recvcounts, total_rdispls, comm);
1587 if ((!ids_available_global[0]) && (grid->
num_cells > 0))
1589 if ((!ids_available_global[1]) && (grid->
num_edges > 0))
1593 if (!ids_available_global[0])
1594 for (
size_t i = 0; i < grid->
num_cells; ++i)
1595 grid->
cell_ids[i] = recv_global_ids[total_rdispls[cell_ranks[i]]++];
1596 if (!ids_available_global[1])
1597 for (
size_t i = 0; i < grid->
num_edges; ++i)
1598 grid->
edge_ids[i] = recv_global_ids[total_rdispls[edge_ranks[i]]++];
1601 free(size_t_buffer);
1602 free(global_ids_buffer);
1622 int * core_cell_mask,
size_t num_cells,
size_t num_edges) {
1624 if (num_cells == 0)
return NULL;
1628 for (
size_t i = 0; i < num_edges; ++i) {
1629 edge_to_cell[i][0] = SIZE_MAX;
1630 edge_to_cell[i][1] = SIZE_MAX;
1633 for (
size_t i = 0, offset = 0; i < num_cells; ++i) {
1635 size_t curr_num_edges = num_edges_per_cell[i];
1637 offset += curr_num_edges;
1639 if ((core_cell_mask == NULL) || core_cell_mask[i]) {
1641 for (
size_t j = 0; j < curr_num_edges; ++j) {
1643 size_t curr_edge = curr_cell_to_edge[j];
1644 size_t * curr_edge_to_cell = edge_to_cell[curr_edge];
1645 curr_edge_to_cell += *curr_edge_to_cell != SIZE_MAX;
1647 *curr_edge_to_cell == SIZE_MAX,
1648 "ERROR(generate_edge_to_cell): "
1649 "more than two cells point to a single edge "
1650 "(does the grid contain degenrated cells (less than 3 corners) "
1651 "or duplicated cells; "
1652 "these can be masked out using the core mask)\n"
1653 "(num_cells: %zu cell_idx: %zu: num_cell_edge %zu)",
1654 num_cells, i, curr_num_edges)
1655 *curr_edge_to_cell = i;
1660 return edge_to_cell;
1664 void * grid,
size_t vertex_idx,
size_t ** cells,
int * num_cells) {
1674 void * edge_to_cell,
size_t edge_idx,
size_t ** cells,
int *
num_cells) {
1681 int ** rank_buffer,
size_t * rank_buffer_size,
1682 size_t * rank_buffer_array_size,
int * num_ve_ranks,
1683 int * core_mask,
size_t count,
1685 void * ve_to_cell_data,
size_t idx,
size_t ** cells,
int *
num_cells),
1686 void * ve_to_cell_data,
int * num_cell_ranks,
1687 size_t * dist_cell_rank_offsets) {
1691 for (
size_t i = 0; i < count; ++i) {
1697 get_cells(ve_to_cell_data, i, &cells, &
num_cells);
1699 size_t max_num_ranks = 0;
1701 if (cells[j] != SIZE_MAX)
1702 max_num_ranks += (size_t)(num_cell_ranks[cells[j]]);
1705 *rank_buffer_size + (
size_t)max_num_ranks);
1707 int * curr_ranks = *rank_buffer + *rank_buffer_size;
1708 int curr_num_ranks = 0;
1712 size_t curr_cell_idx = cells[j];
1713 if (curr_cell_idx == SIZE_MAX)
continue;
1715 int curr_num_cell_ranks = num_cell_ranks[curr_cell_idx];
1716 int * curr_cell_ranks =
1717 *rank_buffer + dist_cell_rank_offsets[curr_cell_idx];
1719 for (
int k = 0; k < curr_num_cell_ranks; ++k)
1720 insert_rank(curr_ranks, &curr_num_ranks, curr_cell_ranks[k]);
1722 *rank_buffer_size += (size_t)(num_ve_ranks[i] = curr_num_ranks);
1724 num_ve_ranks[i] = 0;
1740 int * num_ranks_buffer =
1742 sizeof(*num_ranks_buffer));
1743 int * num_cell_ranks = num_ranks_buffer;
1744 int * num_vertex_ranks = num_ranks_buffer + grid->
num_cells;
1745 int * num_edge_ranks =
1747 size_t * dist_cell_rank_offsets =
1750 proc_sphere_part, grid, comm,
1751 &rank_buffer, num_cell_ranks, dist_cell_rank_offsets);
1753 size_t rank_buffer_size = dist_cell_rank_offsets[grid->
num_cells];
1754 size_t rank_buffer_array_size = dist_cell_rank_offsets[grid->
num_cells];
1759 &rank_buffer, &rank_buffer_size, &rank_buffer_array_size, num_vertex_ranks,
1761 (
void*)grid, num_cell_ranks, dist_cell_rank_offsets);
1770 &rank_buffer, &rank_buffer_size, &rank_buffer_array_size, num_edge_ranks,
1772 (
void*)edge_to_cell, num_cell_ranks, dist_cell_rank_offsets);
1778 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1780 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1781 size_t * size_t_buffer =
1782 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
1783 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1784 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1785 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1786 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1795 .num_ranks = num_cell_ranks,
1799 .num_ranks = num_vertex_ranks,
1803 .num_ranks = num_edge_ranks,
1810 for (
int location = 0; location < 3; ++location) {
1811 size_t count = cve_data[location].count;
1812 int * num_ranks = cve_data[location].num_ranks;
1813 int * core_mask = cve_data[location].core_mask;
1814 for (
size_t i = 0; i < count; ++i) {
1815 int curr_num_ranks = num_ranks[i];
1816 int * ranks = rank_buffer + offset;
1817 offset += (size_t)curr_num_ranks;
1819 for (
int j = 0; j < curr_num_ranks; ++j)
1820 sendcounts[3 * ranks[j] + location]++;
1825 3, sendcounts, recvcounts, sdispls, rdispls, comm);
1827 size_t receive_counts[3] = {0,0,0};
1828 size_t saccu = 0, raccu = 0;
1829 for (
int i = 0; i < comm_size; ++i) {
1830 total_sdispls[i] = saccu;
1831 total_rdispls[i] = raccu;
1832 total_sendcounts[i] = 0;
1833 total_recvcounts[i] = 0;
1834 for (
int location = 0; location < 3; ++location) {
1835 total_sendcounts[i] += sendcounts[3 * i + location];
1836 total_recvcounts[i] += recvcounts[3 * i + location];
1837 receive_counts[location] += recvcounts[3 * i + location];
1839 saccu += total_sendcounts[i];
1840 raccu += total_recvcounts[i];
1842 size_t local_data_count = total_sendcounts[comm_size - 1] +
1843 total_sdispls[comm_size - 1];
1844 size_t recv_count = total_recvcounts[comm_size - 1] +
1845 total_rdispls[comm_size - 1];
1847 struct id_pos * id_pos_buffer =
1848 xmalloc((local_data_count + recv_count) *
sizeof(*id_pos_buffer));
1849 struct id_pos * id_pos_send_buffer = id_pos_buffer;
1850 struct id_pos * id_pos_recv_buffer =
1851 id_pos_buffer + local_data_count;
1855 for (
int location = 0; location < 3; ++location) {
1856 size_t count = cve_data[location].count;
1857 int * num_ranks = cve_data[location].num_ranks;
1858 int * core_mask = cve_data[location].core_mask;
1859 yac_int * ids = cve_data[location].ids;
1860 for (
size_t i = 0; i < count; ++i) {
1861 int curr_num_ranks = num_ranks[i];
1862 int * ranks = rank_buffer + offset;
1863 offset += (size_t)curr_num_ranks;
1866 for (
int j = 0; j < curr_num_ranks; ++j) {
1867 size_t pos = sdispls[3 * ranks[j] + location + 1]++;
1869 id_pos_send_buffer[pos].
orig_pos = i;
1874 free(num_ranks_buffer);
1876 free(dist_cell_rank_offsets);
1882 id_pos_send_buffer, total_sendcounts, total_sdispls,
1883 id_pos_recv_buffer, total_recvcounts, total_rdispls,
1884 sizeof(*id_pos_send_buffer), id_pos_dt, comm);
1888 size_t dist_owner_counts[3] = {0, 0, 0};
1889 for (
int i = 0; i < comm_size; ++i)
1890 for (
int location = 0; location < 3; ++location)
1891 dist_owner_counts[location] += recvcounts[3 * i + location];
1892 size_t max_dist_owner_count =
1893 MAX(
MAX(dist_owner_counts[0], dist_owner_counts[1]), dist_owner_counts[2]);
1895 xmalloc(max_dist_owner_count *
sizeof(*temp_buffer));
1900 for (
int location = 0; location < 3; ++location) {
1903 for (
int i = 0; i < comm_size; ++i) {
1904 size_t curr_recvcount = recvcounts[3 * i + location];
1905 struct id_pos * curr_id_pos =
1906 id_pos_recv_buffer + rdispls[3 * i + location];
1907 for (
size_t k = 0; k < curr_recvcount; ++k, ++count) {
1915 qsort(temp_buffer, count,
sizeof(*temp_buffer),
1919 size_t num_unique_ids = 0;
1923 for (
size_t i = 0; i < count; ++i) {
1926 if (curr_id != prev_id) {
1928 unique_ids[num_unique_ids].
global_id = curr_id;
1929 unique_ids[num_unique_ids].
data.
count = 1;
1932 unique_ids[num_unique_ids-1].
data.
count++;
1936 size_t remote_point_info_buffer_size = 0;
1937 for (
size_t i = 0; i < num_unique_ids; ++i)
1939 remote_point_info_buffer_size +=
1942 dist_owners[location] =
1944 sizeof(**dist_owners));
1945 dist_owners[location]->
data =
1946 (unique_ids =
xrealloc(unique_ids, num_unique_ids *
sizeof(*unique_ids)));
1947 dist_owners[location]->
count = num_unique_ids;
1949 for (
size_t i = 0, l = 0, offset = 0; i < num_unique_ids; ++i) {
1950 int curr_count = unique_ids[i].
data.
count;
1951 if (curr_count == 1) {
1956 for (
int k = 0; k < curr_count; ++k, ++l, ++offset) {
1957 dist_owners[location]->
buffer[offset] = temp_buffer[l].
data;
1966 free(id_pos_buffer);
1968 free(size_t_buffer);
1975 yac_int * ids,
size_t count,
yac_int ** sorted_ids,
size_t ** reorder_idx) {
1977 yac_int * sorted_ids_ =
xrealloc(*sorted_ids, count *
sizeof(*sorted_ids_));
1978 size_t * reorder_idx_ =
xrealloc(*reorder_idx, count *
sizeof(*reorder_idx_));
1980 memcpy(sorted_ids_, ids, count *
sizeof(*sorted_ids_));
1981 for (
size_t i = 0; i < count; ++i) reorder_idx_[i] = i;
1985 *sorted_ids = sorted_ids_;
1986 *reorder_idx = reorder_idx_;
1990 void *
data,
size_t count, MPI_Comm comm,
1991 void(*set_sendcounts)(
void*,
size_t,
size_t*),
1992 void(*pack)(
void*,
size_t,
size_t*,
int*,
int*)) {
1997 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1999 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2001 set_sendcounts(
data, count, sendcounts);
2004 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2005 size_t num_src_msg = 0, num_dst_msg = 0;
2006 for (
int i = 0; i < comm_size; ++i) {
2007 num_src_msg += (recvcounts[i] > 0);
2008 num_dst_msg += (sendcounts[i] > 0);
2012 rdispls[comm_size-1] + recvcounts[comm_size-1];
2015 xmalloc((recv_count + 2 * count) *
sizeof(*pos_buffer));
2016 int * src_pos_buffer = pos_buffer;
2017 int * dst_pos_buffer = pos_buffer + recv_count;
2018 int * send_pos_buffer = pos_buffer + recv_count + count;
2021 pack(
data, count, sdispls, dst_pos_buffer, send_pos_buffer);
2024 yac_alltoallv_int_p2p(
2025 send_pos_buffer, sendcounts, sdispls,
2026 src_pos_buffer, recvcounts, rdispls, comm);
2028 struct Xt_com_pos * com_pos =
2029 xmalloc(((
size_t)num_src_msg + (
size_t)num_dst_msg) *
sizeof(*com_pos));
2030 struct Xt_com_pos * src_com = com_pos;
2031 struct Xt_com_pos * dst_com = com_pos + num_src_msg;
2036 for (
int i = 0; i < comm_size; ++i) {
2037 if (recvcounts[i] > 0) {
2038 src_com[num_src_msg].transfer_pos = src_pos_buffer;
2039 src_com[num_src_msg].num_transfer_pos = recvcounts[i];
2040 src_com[num_src_msg].rank = i;
2041 src_pos_buffer += recvcounts[i];
2044 if (sendcounts[i] > 0) {
2045 dst_com[num_dst_msg].transfer_pos = dst_pos_buffer;
2046 dst_com[num_dst_msg].num_transfer_pos = sendcounts[i];
2047 dst_com[num_dst_msg].rank = i;
2048 dst_pos_buffer += sendcounts[i];
2055 xt_xmap_intersection_pos_new(
2056 num_src_msg, src_com, num_dst_msg, dst_com, comm);
2065 void * data,
size_t count,
size_t * sendcounts) {
2069 for (
size_t i = 0; i < count; ++i) {
2074 sendcounts[curr_info->
rank]++;
2079 void * data,
size_t count,
size_t * sdispls,
2080 int * dst_pos_buffer,
int * send_pos_buffer) {
2086 "ERROR(generate_xmap_pack_remote_points): count exceeds INT_MAX");
2088 for (
size_t i = 0; i < count; ++i) {
2093 size_t pos = sdispls[curr_info->
rank+1]++;
2094 dst_pos_buffer[pos] = i;
2095 send_pos_buffer[pos] = (int)(curr_info->
orig_pos);
2100 struct remote_point * remote_point_data,
size_t count, MPI_Comm comm) {
2109 void * data,
size_t count,
size_t * sendcounts) {
2113 for (
size_t i = 0; i < count; ++i) sendcounts[point_infos[i].
rank]++;
2117 void * data,
size_t count,
size_t * sdispls,
2118 int * dst_pos_buffer,
int * send_pos_buffer) {
2124 "ERROR(generate_xmap_pack_remote_point_info): count exceeds INT_MAX");
2126 for (
size_t i = 0; i < count; ++i) {
2127 size_t pos = sdispls[point_infos[i].
rank+1]++;
2128 dst_pos_buffer[pos] = i;
2129 send_pos_buffer[pos] = (int)(point_infos[i].
orig_pos);
2143 const void * a,
const void * b) {
2152 const void * a,
const void * b) {
2161 size_t max_num_cell_ve,
size_t num_cells,
int * num_ve_per_cell,
2163 yac_int ** ids,
size_t * num_ids,
size_t ** cell_to_ve, Xt_xmap * xmap,
2166 size_t num_total_ve = num_cells * max_num_cell_ve;
2168 size_t total_num_cell_ve = 0;
2169 for (
size_t i = 0, k = 0, l = 0; i < num_cells; ++i) {
2170 size_t num_vertices = (size_t)(num_ve_per_cell[i]);
2171 total_num_cell_ve += num_vertices;
2173 for (; j < num_vertices; ++j, ++k, ++l) {
2176 for (; j < max_num_cell_ve; ++j, ++k) {
2181 qsort(cell_ve_point_data, num_total_ve,
sizeof(*cell_ve_point_data),
2184 size_t num_ids_ = 0;
2185 size_t * cell_to_ve_ =
2186 xmalloc(total_num_cell_ve *
sizeof(*cell_to_ve_));
2188 xmalloc(total_num_cell_ve *
sizeof(*ve_point_info));
2189 yac_int prev_global_id = XT_INT_MAX;
2190 for (
size_t i = 0; i < num_total_ve; ++i) {
2192 size_t curr_reorder_idx = cell_ve_point_data[i].
reorder_idx;
2193 if (curr_global_id == XT_INT_MAX)
break;
2194 if (curr_global_id != prev_global_id) {
2195 ids_[num_ids_] = (prev_global_id = curr_global_id);
2196 ve_point_info[num_ids_] = cell_ve_point_data[i].
data.
data;
2199 cell_to_ve_[curr_reorder_idx] = num_ids_ - 1;
2202 *ids =
xrealloc(ids_, num_ids_ *
sizeof(*ids_));
2203 *num_ids = num_ids_;
2204 *cell_to_ve = cell_to_ve_;
2206 free(ve_point_info);
2211 MPI_Datatype coord_dt;
2212 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &coord_dt), comm);
2219 Xt_redist redist_mask, Xt_redist redist_coords, MPI_Comm comm) {
2223 uint64_t counts[2], max_counts[2];
2224 if (orig_field_data != NULL) {
2233 counts, max_counts, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
2235 (orig_field_data == NULL) ||
2236 ((counts[0] == max_counts[0]) && (counts[1] == max_counts[1])),
2237 "ERROR(field_data_init): inconsistent number of masks or coordinates")
2239 int * data_available_flag =
2240 xcalloc(2 * max_counts[0] + max_counts[1],
sizeof(*data_available_flag));
2242 for (
size_t i = 0; i < counts[0]; ++i) {
2243 data_available_flag[i] =
2245 data_available_flag[i + counts[0]] =
2249 for (
size_t i = 0; i < counts[1]; ++i)
2250 data_available_flag[i + 2 * counts[0]] =
2255 MPI_IN_PLACE, data_available_flag,
2256 (
int)(2 * max_counts[0] + max_counts[1]), MPI_INT, MPI_MAX, comm), comm);
2258 for (
size_t i = 0; i < counts[0]; ++i) {
2260 data_available_flag[i] ==
2262 "ERROR(field_data_init): inconsistent availability of masks")
2267 data_available_flag[i + counts[0]] ==
2269 "ERROR(field_data_init): inconsistent mask names")
2272 for (
size_t i = 0; i < counts[1]; ++i)
2274 data_available_flag[i + 2 * counts[0]] ==
2276 "ERROR(field_data_init): inconsistent availability of coordinates")
2278 for (uint64_t i = 0; i < max_counts[0]; ++i) {
2279 int * dist_mask = NULL;
2280 if (data_available_flag[i]) {
2281 dist_mask =
xmalloc(dist_size *
sizeof(*dist_mask));
2282 int const * orig_mask =
2283 (orig_field_data != NULL)?
2285 xt_redist_s_exchange1(redist_mask, orig_mask, dist_mask);
2288 int mask_name_len = data_available_flag[i + max_counts[0]];
2289 char * mask_name = NULL;
2290 if (mask_name_len > 0) {
2291 mask_name =
xmalloc((
size_t)mask_name_len *
sizeof(*mask_name));
2292 if ((orig_field_data != NULL) &&
2296 (
size_t)mask_name_len);
2298 memset(mask_name, 0, (
size_t)mask_name_len *
sizeof(*mask_name));
2301 MPI_IN_PLACE, mask_name, mask_name_len, MPI_CHAR, MPI_MAX, comm),
2304 (orig_field_data == NULL) ||
2308 (
size_t)mask_name_len *
sizeof(*mask_name)),
2309 "ERROR(field_data_init): inconsistent mask names")
2314 for (uint64_t i = 0; i < max_counts[1]; ++i) {
2316 if (data_available_flag[i + 2 * max_counts[0]]) {
2317 dist_coordinates =
xmalloc(dist_size *
sizeof(*dist_coordinates));
2319 (orig_field_data != NULL)?
2321 xt_redist_s_exchange1(redist_coords, orig_coordinates, dist_coordinates);
2326 free(data_available_flag);
2328 return dist_field_data;
2340 int max_num_vertices_per_cell = 0;
2342 for (
size_t i = 0; i < grid_data->
num_cells; ++i)
2345 yac_mpi_call(MPI_Allreduce(MPI_IN_PLACE, &max_num_vertices_per_cell, 1,
2346 MPI_INT, MPI_MAX, comm), comm);
2352 Xt_xmap xmap_cell_orig_to_dist =
2356 MPI_Datatype dt_single_remote_point =
2358 max_num_vertices_per_cell, comm);
2361 Xt_redist redist_cell_yac_int,
2363 redist_cell_corner_point_data,
2365 redist_cell_yac_int = xt_redist_p2p_new(xmap_cell_orig_to_dist,
yac_int_dt);
2366 redist_cell_int = xt_redist_p2p_new(xmap_cell_orig_to_dist, MPI_INT);
2367 redist_cell_corner_point_data =
2368 xt_redist_p2p_new(xmap_cell_orig_to_dist, dt_single_remote_point);
2369 yac_mpi_call(MPI_Type_free(&dt_single_remote_point), comm);
2370 redist_cell_coords = xt_redist_p2p_new(xmap_cell_orig_to_dist, dt_coord);
2372 xt_xmap_delete(xmap_cell_orig_to_dist);
2375 size_t max_num_cell_corners =
2376 (size_t)max_num_vertices_per_cell * num_cells;
2377 size_t max_num_local_cell_corners =
2378 (size_t)max_num_vertices_per_cell * grid_data->
num_cells;
2380 int * num_vertices_per_cell =
2381 xmalloc(num_cells *
sizeof(*num_vertices_per_cell));
2383 cell_corner_point_data =
2384 xmalloc(2 * (max_num_cell_corners + max_num_local_cell_corners) *
2385 sizeof(*cell_corner_point_data));
2387 cell_corner_point_data + max_num_cell_corners;
2389 cell_corner_point_data + 2 * max_num_cell_corners;
2391 cell_corner_point_data + 2 * max_num_cell_corners +
2392 max_num_local_cell_corners;
2393 for (
size_t i = 0; i < grid_data->
num_cells; ++i){
2398 local_cell_corner_point_data + i * (size_t)max_num_vertices_per_cell;
2400 local_cell_edge_point_data + i * (size_t)max_num_vertices_per_cell;
2401 for (
size_t j = 0; j < num_corners; ++j) {
2403 curr_local_cell_corner_point_data[j].
data.
data.
rank = comm_rank;
2406 curr_local_cell_edge_point_data[j].
data.
data.
rank = comm_rank;
2411 xt_redist_s_exchange1(redist_cell_yac_int, grid_data->
cell_ids, cell_ids);
2412 xt_redist_s_exchange1(
2414 xt_redist_s_exchange1(
2415 redist_cell_corner_point_data,
2416 local_cell_corner_point_data, cell_corner_point_data);
2417 xt_redist_s_exchange1(
2418 redist_cell_corner_point_data,
2419 local_cell_edge_point_data, cell_edge_point_data);
2424 redist_cell_int, redist_cell_coords, comm);
2426 xt_redist_delete(redist_cell_coords);
2427 xt_redist_delete(redist_cell_corner_point_data);
2428 xt_redist_delete(redist_cell_yac_int);
2429 xt_redist_delete(redist_cell_int);
2432 size_t num_vertices;
2433 size_t * cell_to_vertex;
2434 size_t * cell_to_vertex_offsets =
2435 xmalloc(num_cells *
sizeof(*cell_to_vertex_offsets));
2436 Xt_xmap xmap_vertex_orig_to_dist;
2437 for (
size_t i = 0, accu = 0; i < num_cells; ++i) {
2438 cell_to_vertex_offsets[i] = accu;
2439 accu += (size_t)(num_vertices_per_cell[i]);
2442 max_num_vertices_per_cell, num_cells, num_vertices_per_cell,
2443 cell_corner_point_data, &vertex_ids, &num_vertices,
2444 &cell_to_vertex, &xmap_vertex_orig_to_dist, comm);
2448 size_t * cell_to_edge;
2449 size_t * cell_to_edge_offsets = cell_to_vertex_offsets;
2450 Xt_xmap xmap_edge_orig_to_dist;
2452 max_num_vertices_per_cell, num_cells, num_vertices_per_cell,
2453 cell_edge_point_data, &edge_ids, &num_edges,
2454 &cell_to_edge, &xmap_edge_orig_to_dist, comm);
2456 free(cell_corner_point_data);
2459 Xt_redist redist_vertex_coords =
2460 xt_redist_p2p_new(xmap_vertex_orig_to_dist, dt_coord);
2461 Xt_redist redist_vertex_int =
2462 xt_redist_p2p_new(xmap_vertex_orig_to_dist, MPI_INT);
2463 xt_xmap_delete(xmap_vertex_orig_to_dist);
2467 xmalloc(num_vertices *
sizeof(*vertex_coordinates));
2468 xt_redist_s_exchange1(
2474 redist_vertex_int, redist_vertex_coords, comm);
2476 xt_redist_delete(redist_vertex_int);
2477 xt_redist_delete(redist_vertex_coords);
2479 Xt_redist redist_edge_int,
2481 redist_edge_2yac_int;
2484 MPI_Datatype dt_2yac_int;
2488 redist_edge_int = xt_redist_p2p_new(xmap_edge_orig_to_dist, MPI_INT);
2489 redist_edge_coords = xt_redist_p2p_new(xmap_edge_orig_to_dist, dt_coord);
2490 redist_edge_2yac_int = xt_redist_p2p_new(xmap_edge_orig_to_dist, dt_2yac_int);
2495 xt_xmap_delete(xmap_edge_orig_to_dist);
2500 int * temp_edge_type_src, * temp_edge_type_dst;
2501 if (
sizeof(*edge_type) ==
sizeof(
int)) {
2502 temp_edge_type_src = (
int*)(grid_data->
edge_type);
2503 temp_edge_type_dst = (
int*)edge_type;
2505 temp_edge_type_src =
2507 for (
size_t i = 0; i < grid_data->
num_edges; ++i)
2508 temp_edge_type_src[i] = (
int)(grid_data->
edge_type[i]);
2509 temp_edge_type_dst =
xmalloc(num_edges *
sizeof(*temp_edge_type_dst));
2510 for (
size_t i = 0; i < num_edges; ++i)
2511 temp_edge_type_dst[i] = (
int)(edge_type[i]);
2514 xt_redist_s_exchange1(
2515 redist_edge_int, temp_edge_type_src, temp_edge_type_dst);
2517 if (
sizeof(*edge_type) !=
sizeof(
int)) {
2519 for (
size_t i = 0; i < num_edges; ++i)
2522 free(temp_edge_type_src);
2523 free(temp_edge_type_dst);
2530 redist_edge_int, redist_edge_coords, comm);
2532 xt_redist_delete(redist_edge_int);
2533 xt_redist_delete(redist_edge_coords);
2536 xmalloc(num_cells *
sizeof(*cell_bnd_circles));
2544 for (
size_t i = 0; i < num_cells; ++i) {
2545 size_t * curr_cell_to_vertex =
2546 cell_to_vertex + cell_to_vertex_offsets[i];
2547 size_t * curr_cell_to_edge =
2548 cell_to_edge + cell_to_edge_offsets[i];
2549 for (
int j = 0; j < num_vertices_per_cell[i]; ++j) {
2551 vertex_coordinates + curr_cell_to_vertex[j];
2552 for (
int k = 0; k < 3; ++k)
2560 cell_bnd_circles[i] =
2571 xmalloc(num_edges *
sizeof(*edge_to_vertex));
2577 2 * (grid_data->
num_edges + num_edges) *
sizeof(*vertex_id_buffer));
2578 yac_int * grid_edge_vertex_ids = vertex_id_buffer;
2581 size_t * grid_edge_to_vertex = &(grid_data->
edge_to_vertex[0][0]);
2583 for (
size_t i = 0; i < 2 * grid_data->
num_edges; ++i)
2584 grid_edge_vertex_ids[i] =
2585 grid_data->
vertex_ids[grid_edge_to_vertex[i]];
2587 xt_redist_s_exchange1(
2588 redist_edge_2yac_int, grid_edge_vertex_ids, edge_vertex_ids);
2590 id2idx(edge_vertex_ids, &(edge_to_vertex[0][0]), 2 * num_edges,
2591 vertex_ids, num_vertices);
2593 free(vertex_id_buffer);
2596 xt_redist_delete(redist_edge_2yac_int);
2604 .ids = {cell_ids, vertex_ids, edge_ids},
2605 .total_count = {num_cells, num_vertices, num_edges},
2606 .count = {num_cells, num_vertices, num_edges},
2615 .sorted_ids = {NULL, NULL, NULL},
2616 .sorted_reorder_idx = {NULL, NULL, NULL}};
2628 cell_field_data, vertex_field_data, edge_field_data);
2631 &dist_grid, proc_sphere_part, dist_owner);
2635 for (
int i = 0; i < 3; ++i) {
2636 free(dist_owner[i]->data);
2637 free(dist_owner[i]);
2647 for (
int i = 0; i < 2; ++i)
2656 for (
int i = 0; i < 2; ++i) {
2670 for (
size_t i = 0; i <
num_cells; ++i) {
2672 double * cell_coord = cells[i].
coord;
2673 for (
int j = 0; j < 3; ++j) cell_coord[j] = 0.0;
2677 size_t * curr_cell_to_vertex =
2683 for (
int k = 0; k < 3; ++k)
2684 cell_coord[k] += (*curr_vertex_coords)[k];
2699 size_t num_cells = num_cells_a + num_cells_b;
2711 return proc_sphere_part;
2720 "ERROR(yac_dist_grid_pair_new): "
2721 "NULL is not a valid value for parameter grid_a")
2724 "ERROR(yac_dist_grid_pair_new): "
2725 "NULL is not a valid value for parameter grid_b")
2730 "ERROR(yac_dist_grid_pair_new): identical grid names")
2781 return grid_pair->
comm;
2788 for (
int i = 0; (i < 2) && (dist_grid == NULL); ++i)
2789 if (!strcmp(grid_name, grid_pair->
grid_names[i]))
2792 dist_grid,
"ERROR(yac_dist_grid_pair_get_dist_grid): invalid grid_name")
2809 "ERROR(yac_dist_grid_get_local_count): invalid location")
2810 size_t local_count = 0;
2812 int * core_mask = dist_grid->
core_mask[location];
2813 for (
size_t i = 0; i <
count; ++i)
if (core_mask[i]) ++local_count;
2824 "ERROR(yac_dist_grid_get_count): invalid location")
2835 "ERROR(yac_dist_grid_get_core_mask): invalid location")
2846 "ERROR(yac_dist_grid_get_global_ids): invalid location")
2847 return dist_grid->
ids[location];
2852 size_t ** indices,
size_t *
count) {
2857 int const * core_mask =
2860 size_t * temp_indices =
xmalloc(total_count *
sizeof(*temp_indices));
2865 for (
size_t i = 0; i < total_count; ++i)
2866 if (core_mask[i] && mask[i]) temp_indices[count_++] = i;
2868 for (
size_t i = 0; i < total_count; ++i)
2869 if (core_mask[i]) temp_indices[count_++] = i;
2872 *indices =
xrealloc(temp_indices, count_ *
sizeof(**indices));
2886 if (field.
masks_idx == SIZE_MAX)
return NULL;
2920 int const * core_mask =
2923 size_t unmasked_local_count = 0;
2924 for (
size_t i = 0; i < total_count; ++i)
2925 if (core_mask[i] && mask[i]) ++unmasked_local_count;
2926 return unmasked_local_count;
2932 for (
size_t i = 0; i < count; ++i)
2933 if (point_infos[i].count > 1) free(point_infos[i].
data.
multi);
2947 for (
int i = 0; i < 3; ++i) {
2959 if (grid_pair == NULL)
return;
2964 for (
int i = 0; i < 2; ++i) {
2973 size_t cell_idx,
struct grid_cell * buffer_cell) {
2984 size_t cell_idx,
struct grid_cell * buffer_cell) {
2988 for (
size_t i = 0; i < buffer_cell->
num_corners; ++i)
3011 *buffer_cell = cell;
3014 for (
size_t i = 0; i < num_vertices; ++i) {
3033 for (
int i = 0; (i < 2) && (search == NULL); ++i)
3034 if (!strcmp(grid_name, grid_pair->
grid_names[i]))
3038 "ERROR(yac_dist_grid_pair_get_cell_sphere_part): invalid grid_name")
3046 double coord[3],
struct yac_dist_grid * dist_grid,
size_t cell_idx,
3054 size_t * temp_cells;
3055 size_t * num_cells_per_coord =
3060 cell_sphere_part, search_coords,
count, &temp_cells,
3061 num_cells_per_coord);
3068 for (
size_t i = 0, k = 0; i < count; ++i) {
3069 size_t curr_num_cells = num_cells_per_coord[i];
3070 if (curr_num_cells == 0) {
3071 cells[i] = SIZE_MAX;
3072 }
else if (curr_num_cells == 1) {
3074 search_coords[i], dist_grid, temp_cells[k], &buffer_cell))
3075 cells[i] = temp_cells[k];
3077 cells[i] = SIZE_MAX;
3080 size_t cell_idx = SIZE_MAX;
3082 for (
size_t j = 0; j < curr_num_cells; ++j, ++k) {
3083 size_t curr_cell_idx = temp_cells[k];
3086 search_coords[i], dist_grid, curr_cell_idx, &buffer_cell))
3088 if (curr_cell_id < cell_id) {
3089 cell_idx = curr_cell_idx;
3090 cell_id = curr_cell_id;
3093 cells[i] = cell_idx;
3098 free(num_cells_per_coord);
3110 "ERROR(lookup_single_remote_point_reorder_locally): invalid location")
3114 size_t num_ids = dist_grid->
total_count[location];
3116 size_t count_ = *count;
3117 size_t new_count = 0;
3120 qsort(ids, count_,
sizeof(*ids),
3123 for (
size_t i = 0, j = 0; i < count_; ++i) {
3125 while ((j < num_ids) && (sorted_ids[j] < curr_id)) ++j;
3126 if ((j < num_ids) && (sorted_ids[j] == curr_id)) {
3129 if (i != new_count) ids[new_count] = ids[i];
3140 int pack_size_field_coord, pack_size_field_mask;
3143 MPI_Pack_size(3, MPI_DOUBLE, comm, &pack_size_field_coord), comm);
3144 pack_size_field_coord *=
3148 MPI_Pack_size(1, MPI_INT, comm, &pack_size_field_mask), comm);
3149 pack_size_field_mask *=
3152 return pack_size_field_coord + pack_size_field_mask;
3157 MPI_Datatype bnd_circle_dt, MPI_Comm comm) {
3160 pack_size_num_vertices,
3161 pack_size_bnd_circle;
3166 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_num_vertices), comm);
3169 MPI_Pack_size(1, bnd_circle_dt, comm, &pack_size_bnd_circle), comm);
3171 return pack_size_id + pack_size_num_vertices + pack_size_bnd_circle +
3179 pack_size_vertex_coords;
3184 MPI_Pack_size(3, MPI_DOUBLE, comm, &pack_size_vertex_coords), comm);
3186 return pack_size_id + pack_size_vertex_coords +
3194 pack_size_edge_to_vertex,
3195 pack_size_edge_type;
3199 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_edge_type), comm);
3202 MPI_Pack_size(2,
yac_int_dt, comm, &pack_size_edge_to_vertex), comm);
3204 return pack_size_id + pack_size_edge_type + pack_size_edge_to_vertex +
3209 struct yac_dist_grid * dist_grid, uint64_t * pos,
size_t count,
3210 int * pack_sizes, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3213 int pack_size_base_cell =
3217 bnd_circle_dt, comm);
3218 int pack_size_base_vertex =
3222 int pack_size_base_edge =
3227 for (
size_t i = 0; i < count; ++i) {
3228 size_t idx = (size_t)(pos[i]);
3230 size_t * curr_vertices =
3232 size_t * curr_edges =
3235 pack_size_base_cell +
3236 num_vertices * (pack_size_base_vertex + pack_size_base_edge) +
3239 for (
int j = 0; j < num_vertices; ++j) {
3243 point_info_dt, comm) +
3247 pack_sizes[i] = pack_size;
3252 struct yac_dist_grid * dist_grid, uint64_t * pos,
size_t count,
3253 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
3255 int pack_size_base_vertex =
3259 for (
size_t i = 0; i < count; ++i)
3261 pack_size_base_vertex +
3267 struct yac_dist_grid * dist_grid, uint64_t * pos,
size_t count,
3268 int * pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm) {
3270 int pack_size_base_vertex =
3274 int pack_size_base_edge =
3278 for (
size_t i = 0; i < count; ++i) {
3281 pack_size_base_edge +
3282 2 * pack_size_base_vertex +
3287 point_info_dt, comm) +
3290 point_info_dt, comm);
3296 size_t count,
int * pack_sizes, MPI_Datatype bnd_circle_dt,
3297 MPI_Datatype point_info_dt, MPI_Comm comm) {
3303 "ERROR(get_pack_sizes): invalid location")
3309 dist_grid, pos, count, pack_sizes, bnd_circle_dt, point_info_dt, comm);
3313 dist_grid, pos, count, pack_sizes, point_info_dt, comm);
3317 dist_grid, pos, count, pack_sizes, point_info_dt, comm);
3323 size_t idx,
void * buffer,
int buffer_size,
int * position,
3326 size_t coordinates_count =
3328 size_t masks_count =
3332 for (
size_t i = 0; i < coordinates_count; ++i)
3336 3, MPI_DOUBLE, buffer, buffer_size, position, comm), comm);
3339 for (
size_t i = 0; i < masks_count; ++i)
3343 buffer_size, position, comm), comm);
3347 struct yac_dist_grid * dist_grid,
size_t idx,
void * buffer,
int buffer_size,
3348 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3356 position, comm), comm);
3360 buffer_size, position, comm), comm);
3364 point_info_dt, comm);
3367 idx, buffer, buffer_size, position,
3372 struct yac_dist_grid * dist_grid,
size_t idx,
void * buffer,
int buffer_size,
3373 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3388 &
edge_type, 1, MPI_INT, buffer, buffer_size, position, comm), comm);
3395 edge_to_vertex, 2,
yac_int_dt, buffer, buffer_size, position, comm),
3400 point_info_dt, comm);
3403 idx, buffer, buffer_size, position,
3408 struct yac_dist_grid * dist_grid,
size_t idx,
void * buffer,
int buffer_size,
3409 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3413 dist_grid, idx, buffer, buffer_size, position,
3414 bnd_circle_dt, point_info_dt, comm);
3417 for (
int i = 0; i < 2; ++i)
3420 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3424 struct yac_dist_grid * dist_grid,
size_t idx,
void * buffer,
int buffer_size,
3425 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3437 idx, buffer, buffer_size, position,
3441 MPI_Pack(&num_vertices, 1, MPI_INT, buffer,
3442 buffer_size, position, comm), comm);
3446 buffer_size, position, comm), comm);
3450 point_info_dt, comm);
3452 for (
int i = 0; i < num_vertices; ++i) {
3456 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3460 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3466 size_t count,
void ** pack_data,
int * pack_sizes,
3467 MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
3470 bnd_circle_dt, point_info_dt, comm);
3472 size_t pack_size = 0;
3473 for (
size_t i = 0; i < count; ++i) pack_size += (
size_t)(pack_sizes[i]);
3475 void * pack_data_ =
xmalloc(pack_size);
3481 "ERROR(pack_grid_data): invalid location")
3483 void (*func_pack[3])(
3484 struct yac_dist_grid * dist_grid,
size_t idx,
void * buffer,
3485 int buffer_size,
int * position, MPI_Datatype bnd_circle_dt,
3486 MPI_Datatype point_info_dt, MPI_Comm
comm) =
3489 for (
size_t i = 0, offset = 0; i <
count; ++i) {
3491 func_pack[location](
3492 dist_grid, pos[i], (
char*)pack_data_ + offset, pack_sizes[i],
3493 &position, bnd_circle_dt, point_info_dt,
comm);
3494 pack_sizes[i] = position;
3495 offset += (size_t)position;
3498 *pack_data = pack_data_;
3502 void * buffer,
int buffer_size,
int * position,
size_t idx,
3513 MPI_Unpack(buffer, buffer_size, position,
3519 int buffer_size,
int * position,
3521 MPI_Datatype point_info_dt, MPI_Comm
comm) {
3525 MPI_Unpack(buffer, buffer_size, position, &(vertex[idx].global_id), 1,
3529 MPI_Unpack(buffer, buffer_size, position, &(vertex[idx].coord[0]), 3,
3533 buffer, buffer_size, position, &(vertex[idx].
owners), point_info_dt,
comm);
3536 buffer, buffer_size, position, idx, temp_vertex_field_data,
comm);
3541 int buffer_size,
int * position,
3543 MPI_Datatype point_info_dt, MPI_Comm
comm) {
3549 MPI_Unpack(buffer, buffer_size, position, &(edge[idx].global_id), 1,
3553 MPI_Unpack(buffer, buffer_size, position, &
edge_type, 1,
3558 MPI_Unpack(buffer, buffer_size, position, edge[idx].
edge_to_vertex, 2,
3565 buffer, buffer_size, position, idx, temp_edge_field_data,
comm);
3569 const void * a,
const void * b) {
3580 size_t old_count,
size_t new_count) {
3582 size_t add_count = new_count - old_count;
3589 new_count *
sizeof(*mask));
3591 for (
size_t i = 0, j = old_count; i < add_count; ++i, ++j) {
3593 *(
size_t*)((
unsigned char*)
reorder_idx + i * reorder_idx_size);
3594 mask[j] = temp_mask[idx];
3603 new_count *
sizeof(*coordinates));
3605 for (
size_t i = 0, j = old_count; i < add_count; ++i, ++j) {
3607 *(
size_t*)((
unsigned char*)
reorder_idx + i * reorder_idx_size);
3608 coordinates[j][0] = temp_coordinates[idx][0];
3609 coordinates[j][1] = temp_coordinates[idx][1];
3610 coordinates[j][2] = temp_coordinates[idx][2];
3623 size_t count,
size_t * idx,
3626 if (count == 0)
return;
3629 qsort(vertices, count,
sizeof(*vertices),
3634 size_t * sorted_vertex_reorder_idx =
3638 size_t prev_idx = 0;
3639 size_t add_count = 0;
3643 for (
size_t i = 0, j = 0; i < count; ++i) {
3646 size_t curr_reorder_idx = vertices[i].
reorder_idx;
3649 if (prev_global_id == curr_global_id) {
3650 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3654 prev_global_id = curr_global_id;
3658 while ((j < num_total_vertices) && (sorted_vertex_ids[j] < curr_global_id))
3662 if ((j < num_total_vertices) && (sorted_vertex_ids[j] == curr_global_id)) {
3664 if (idx != NULL) idx[curr_reorder_idx] = sorted_vertex_reorder_idx[j];
3665 prev_idx = sorted_vertex_reorder_idx[j];
3671 if (idx != NULL) idx[curr_reorder_idx] = num_total_vertices + add_count;
3672 prev_idx = num_total_vertices + add_count;
3673 if (add_count != i) vertices[add_count] = vertices[i];
3678 size_t new_num_total_vertices = num_total_vertices + add_count;
3681 sizeof(*vertex_coordinates));
3684 new_num_total_vertices *
sizeof(*vertex_ids));
3685 int * core_vertex_mask =
3687 sizeof(*core_vertex_mask));
3690 sizeof(*vertex_owners));
3693 sorted_vertex_ids, new_num_total_vertices *
sizeof(*sorted_vertex_ids));
3694 sorted_vertex_reorder_idx =
3696 sorted_vertex_reorder_idx, new_num_total_vertices *
3697 sizeof(*sorted_vertex_reorder_idx));
3700 for (
size_t i = 0, j = num_total_vertices; i < add_count; ++i, ++j) {
3702 vertex_coordinates[j][0] = vertices[i].
coord[0];
3703 vertex_coordinates[j][1] = vertices[i].
coord[1];
3704 vertex_coordinates[j][2] = vertices[i].
coord[2];
3706 core_vertex_mask[j] = 0;
3707 vertex_owners[j] = vertices[i].
owners;
3708 sorted_vertex_ids[j] = vertices[i].
global_id;
3709 sorted_vertex_reorder_idx[j] = j;
3714 temp_vertex_field_data, vertices,
sizeof(*vertices),
3715 num_total_vertices, new_num_total_vertices);
3717 sorted_vertex_ids, new_num_total_vertices, sorted_vertex_reorder_idx);
3729 const void * a,
const void * b) {
3739 size_t count,
size_t * idx,
struct temp_field_data temp_edge_field_data) {
3741 if (count == 0)
return;
3750 size_t prev_idx = 0;
3751 size_t add_count = 0;
3755 for (
size_t i = 0, j = 0; i < count; ++i) {
3761 if (prev_global_id == curr_global_id) {
3762 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3766 prev_global_id = curr_global_id;
3770 while ((j < num_total_edges) && (sorted_edge_ids[j] < curr_global_id)) ++j;
3773 if ((j < num_total_edges) && (sorted_edge_ids[j] == curr_global_id)) {
3775 if (idx != NULL) idx[curr_reorder_idx] = sorted_edge_reorder_idx[j];
3776 prev_idx = sorted_edge_reorder_idx[j];
3782 if (idx != NULL) idx[curr_reorder_idx] = num_total_edges + add_count;
3783 prev_idx = num_total_edges + add_count;
3784 if (add_count != i) edges[add_count] = edges[i];
3789 size_t new_num_total_edges = num_total_edges + add_count;
3792 new_num_total_edges *
sizeof(*edge_ids));
3795 new_num_total_edges *
sizeof(*
edge_type));
3801 new_num_total_edges *
sizeof(*edge_owners));
3802 int * core_edge_mask =
3804 sizeof(*core_edge_mask));
3807 sorted_edge_ids, new_num_total_edges *
sizeof(*sorted_edge_ids));
3808 sorted_edge_reorder_idx =
3810 sorted_edge_reorder_idx, new_num_total_edges *
3811 sizeof(*sorted_edge_reorder_idx));
3813 yac_int * vertex_ids =
xmalloc(2 * add_count *
sizeof(*vertex_ids));
3814 size_t * reorder =
xmalloc(2 * add_count *
sizeof(*reorder));
3817 for (
size_t i = 0, j = num_total_edges; i < add_count; ++i, ++j) {
3821 core_edge_mask[j] = 0;
3822 edge_owners[j] = edges[i].
owners;
3823 sorted_edge_ids[j] = edges[i].
global_id;
3824 sorted_edge_reorder_idx[j] = j;
3828 reorder[2 * i + 0] = 2 * num_total_edges + 2 * i + 0;
3829 reorder[2 * i + 1] = 2 * num_total_edges + 2 * i + 1;
3834 temp_edge_field_data, edges,
sizeof(*edges),
3835 num_total_edges, new_num_total_edges);
3837 sorted_edge_ids, new_num_total_edges, sorted_edge_reorder_idx);
3842 size_t * sorted_vertex_reorder_idx =
3845 size_t * edge_to_vertex_ = (
size_t*)&(edge_to_vertex[0][0]);
3847 for (
size_t i = 0, j = 0; i < 2 * add_count; ++i) {
3848 yac_int curr_id = vertex_ids[i];
3849 while ((j < total_num_vertices) && (sorted_vertex_ids[j] < curr_id)) ++j;
3851 (j < total_num_vertices) && (sorted_vertex_ids[j] == curr_id),
3852 "ERROR(yac_dist_grid_add_edges): vertex id not found")
3853 edge_to_vertex_[reorder[i]] = sorted_vertex_reorder_idx[j];
3872 int * num_vertices_per_cell,
struct bounding_circle * cell_bnd_circles,
3873 size_t count,
size_t * cell_to_vertex,
size_t * cell_to_edge,
3877 if (
count == 0)
return;
3879 size_t * reorder_idx =
xmalloc(
count *
sizeof(reorder_idx));
3880 for (
size_t i = 0; i <
count; ++i) reorder_idx[i] = i;
3883 for (
size_t i = 0, accu = 0; i <
count;
3884 accu += (size_t)(num_vertices_per_cell[i++])) prescan[i] = accu;
3890 size_t * sorted_cell_reorder_idx =
3893 yac_int prev_global_id = cell_ids[0] - 1;
3894 size_t cell_add_count = 0;
3895 size_t relations_add_count = 0;
3899 for (
size_t i = 0, j = 0; i <
count; ++i) {
3901 yac_int curr_global_id = cell_ids[i];
3902 size_t curr_reorder_idx = reorder_idx[i];
3905 if (prev_global_id == curr_global_id) {
3909 prev_global_id = curr_global_id;
3913 while ((j < num_total_cells) && (sorted_cell_ids[j] < curr_global_id)) ++j;
3916 if ((j >= num_total_cells) || (sorted_cell_ids[j] != curr_global_id)) {
3918 if (cell_add_count != i) {
3919 cell_ids[cell_add_count] = curr_global_id;
3920 reorder_idx[cell_add_count] = curr_reorder_idx;
3923 relations_add_count += (size_t)(num_vertices_per_cell[curr_reorder_idx]);
3927 size_t new_num_total_cells = num_total_cells + cell_add_count;
3928 size_t num_total_relations =
3929 (num_total_cells > 0)?
3932 size_t new_num_total_relations = num_total_relations + relations_add_count;
3935 new_num_total_cells *
sizeof(*new_cell_ids));
3936 int * new_num_vertices_per_cell =
3938 sizeof(*new_num_vertices_per_cell));
3939 size_t * new_cell_to_vertex =
3941 sizeof(*new_cell_to_vertex));
3942 size_t * cell_to_vertex_offsets =
3944 sizeof(*cell_to_vertex_offsets));
3945 size_t * new_cell_to_edge =
3947 sizeof(*new_cell_to_edge));
3950 sizeof(*new_cell_bnd_circles));
3951 int * core_cell_mask =
3953 new_num_total_cells *
sizeof(*core_cell_mask));
3956 new_num_total_cells *
sizeof(*cell_owners));
3959 sorted_cell_ids, new_num_total_cells *
sizeof(*sorted_cell_ids));
3960 sorted_cell_reorder_idx =
3962 sorted_cell_reorder_idx, new_num_total_cells *
3963 sizeof(*sorted_cell_reorder_idx));
3966 for (
size_t i = 0, j = num_total_cells; i < cell_add_count;
3969 size_t curr_reorder_idx = reorder_idx[i];
3970 int curr_num_vertices = num_vertices_per_cell[curr_reorder_idx];
3971 size_t curr_relation_idx = prescan[curr_reorder_idx];
3973 new_cell_ids[j] = cell_ids[i];
3974 new_num_vertices_per_cell[j] = curr_num_vertices;
3975 cell_to_vertex_offsets[j] = num_total_relations;
3976 for (
int j = 0; j < curr_num_vertices;
3977 ++j, ++num_total_relations, ++curr_relation_idx) {
3978 new_cell_to_vertex[num_total_relations] =
3979 cell_to_vertex[curr_relation_idx];
3980 new_cell_to_edge[num_total_relations] = cell_to_edge[curr_relation_idx];
3982 core_cell_mask[j] = 0;
3983 sorted_cell_ids[j] = cell_ids[i];
3984 sorted_cell_reorder_idx[j] = j;
3985 new_cell_bnd_circles[j] = cell_bnd_circles[curr_reorder_idx];
3986 new_cell_owners[j] = cell_owners[curr_reorder_idx];
3991 temp_cell_field_data, reorder_idx,
sizeof(*reorder_idx),
3992 num_total_cells, new_num_total_cells);
3994 sorted_cell_ids, new_num_total_cells, sorted_cell_reorder_idx);
4073 struct yac_dist_grid * dist_grid,
size_t count,
void * buffer,
4074 int buffer_size, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
4078 int * num_vertices_per_cell =
xmalloc(count *
sizeof(*num_vertices_per_cell));
4080 xmalloc(count *
sizeof(*cell_bnd_circles));
4085 size_t vertices_array_size = 0;
4086 size_t total_num_vertices = 0;
4089 size_t edges_array_size = 0;
4104 for (
size_t i = 0, buffer_offset = 0; i < count; ++i) {
4107 void * curr_buffer = (
char*)buffer + buffer_offset;
4112 MPI_Unpack(curr_buffer, buffer_size, &position, cell_ids + i, 1,
4116 curr_buffer, buffer_size, &position, i, temp_cell_field_data, comm);
4119 MPI_Unpack(curr_buffer, buffer_size, &position, &num_vertices, 1,
4120 MPI_INT, comm), comm);
4123 MPI_Unpack(curr_buffer, buffer_size, &position, cell_bnd_circles + i, 1,
4124 bnd_circle_dt, comm), comm);
4127 curr_buffer, buffer_size, &position, cell_owners + i,
4128 point_info_dt, comm);
4130 num_vertices_per_cell[i] = num_vertices;
4133 vertices, vertices_array_size, total_num_vertices + (
size_t)num_vertices);
4135 edges, edges_array_size, total_num_vertices + (
size_t)num_vertices);
4137 &temp_vertex_field_data, total_num_vertices + (
size_t)num_vertices);
4139 &temp_edge_field_data, total_num_vertices + (
size_t)num_vertices);
4141 for (
int j = 0; j < num_vertices; ++j, ++total_num_vertices) {
4143 vertices, total_num_vertices, curr_buffer, buffer_size, &position,
4144 temp_vertex_field_data, point_info_dt, comm);
4146 edges, total_num_vertices, curr_buffer, buffer_size, &position,
4147 temp_edge_field_data, point_info_dt, comm);
4148 vertices[total_num_vertices].
reorder_idx = total_num_vertices;
4149 edges[total_num_vertices].
reorder_idx = total_num_vertices;
4152 buffer_offset += (size_t)position;
4153 buffer_size -= position;
4156 size_t * cell_to_vertex =
xmalloc(total_num_vertices *
sizeof(*cell_to_vertex));
4157 size_t * cell_to_edge =
xmalloc(total_num_vertices *
sizeof(*cell_to_edge));
4160 dist_grid, vertices, total_num_vertices, cell_to_vertex,
4161 temp_vertex_field_data);
4163 dist_grid, edges, total_num_vertices, cell_to_edge,
4164 temp_edge_field_data);
4166 dist_grid, cell_ids, num_vertices_per_cell, cell_bnd_circles, count,
4167 cell_to_vertex, cell_to_edge, cell_owners, temp_cell_field_data);
4173 free(cell_to_vertex);
4177 free(cell_bnd_circles);
4178 free(num_vertices_per_cell);
4183 struct yac_dist_grid * dist_grid,
size_t count,
void * buffer,
4184 int buffer_size, MPI_Datatype point_info_dt, MPI_Comm comm) {
4193 for (
size_t i = 0, buffer_offset = 0; i < count; ++i) {
4196 void * curr_buffer = (
char*)buffer + buffer_offset;
4199 vertices, i, curr_buffer, buffer_size, &position,
4200 temp_vertex_field_data, point_info_dt, comm);
4203 buffer_offset += (size_t)position;
4204 buffer_size -= position;
4208 dist_grid, vertices, count, NULL, temp_vertex_field_data);
4216 struct yac_dist_grid * dist_grid,
size_t count,
void * buffer,
4217 int buffer_size, MPI_Datatype point_info_dt, MPI_Comm comm) {
4221 xmalloc(2 * count *
sizeof(*vertices));
4232 for (
size_t i = 0, buffer_offset = 0; i < count; ++i) {
4235 void * curr_buffer = (
char*)buffer + buffer_offset;
4238 edges, i, curr_buffer, buffer_size, &position,
4239 temp_edge_field_data, point_info_dt, comm);
4242 for (
size_t j = 0; j < 2; ++j)
4244 vertices, 2 * i + j, curr_buffer, buffer_size, &position,
4245 temp_vertex_field_data, point_info_dt, comm);
4247 buffer_offset += (size_t)position;
4248 buffer_size -= position;
4252 dist_grid, vertices, 2 * count, NULL, temp_vertex_field_data);
4254 dist_grid, edges, count, NULL, temp_edge_field_data);
4265 void * buffer,
int buffer_size, MPI_Datatype bnd_circle_dt,
4266 MPI_Datatype point_info_dt, MPI_Comm comm) {
4272 "ERROR(unpack_grid_data): invalid location")
4278 dist_grid, count, buffer, buffer_size, bnd_circle_dt,
4279 point_info_dt, comm);
4283 dist_grid, count, buffer, buffer_size, point_info_dt, comm);
4287 dist_grid, count, buffer, buffer_size, point_info_dt, comm);
4293 const void * a,
const void * b) {
4301 size_t count,
enum yac_location location,
size_t * idx) {
4303 MPI_Comm comm = dist_grid->
comm;
4304 int comm_rank, comm_size;
4308 size_t remote_count = 0;
4310 for (
size_t i = 0; i < count; ++i) {
4311 if (ids[i].global_id == XT_INT_MAX) idx[i] = SIZE_MAX;
4312 else if (ids[i].
data.rank != comm_rank) ++remote_count;
4317 xmalloc(remote_count *
sizeof(*missing_ids));
4319 for (
size_t i = 0, j = 0; i < count; ++i) {
4320 if ((ids[i].
data.rank != comm_rank) &&
4322 missing_ids[j].
data = ids[i];
4330 dist_grid, location, missing_ids, &remote_count, idx);
4333 qsort(missing_ids, remote_count,
sizeof(*missing_ids),
4336 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4338 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4340 for (
size_t i = 0; i < remote_count; ++i)
4344 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4346 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4348 uint64_t * uint64_t_buffer =
4349 xmalloc((remote_count + recv_count) *
sizeof(*uint64_t_buffer));
4350 uint64_t * orig_pos_send_buffer = uint64_t_buffer;
4351 uint64_t * orig_pos_recv_buffer = uint64_t_buffer + remote_count;
4354 for (
size_t i = 0; i < remote_count; ++i) {
4356 if (rank != comm_rank)
4357 orig_pos_send_buffer[sdispls[rank+1]++] =
4362 yac_alltoallv_uint64_p2p(
4363 orig_pos_send_buffer, sendcounts, sdispls,
4364 orig_pos_recv_buffer, recvcounts, rdispls, comm);
4371 void * packed_send_data = NULL;
4372 int * pack_sizes =
xmalloc(recv_count *
sizeof(*pack_sizes));
4376 dist_grid, location, orig_pos_recv_buffer, recv_count,
4377 &packed_send_data, pack_sizes,
4378 bnd_circle_dt, point_info_dt, comm);
4379 free(uint64_t_buffer);
4381 memset(sendcounts, 0, (
size_t)comm_size *
sizeof(*sendcounts));
4382 for (
int i = 0, k = 0; i < comm_size; ++i)
4383 for (
size_t j = 0; j < recvcounts[i]; ++j, ++k)
4384 sendcounts[i] += (
size_t)(pack_sizes[k]);
4389 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4391 recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4393 void * packed_recv_data =
xmalloc(recv_count);
4396 yac_alltoallv_packed_p2p(
4397 packed_send_data, sendcounts, sdispls+1,
4398 packed_recv_data, recvcounts, rdispls, comm);
4402 dist_grid, location, remote_count, packed_recv_data, (
int)recv_count,
4403 bnd_circle_dt, point_info_dt, comm);
4410 dist_grid, location, missing_ids, &remote_count, idx);
4413 free(packed_recv_data);
4414 free(packed_send_data);
4422 double coord[3],
struct yac_dist_grid * dist_grid,
size_t cell_idx,
4425 MPI_Comm comm = grid_pair->
comm;
4426 int comm_rank, comm_size;
4430 int * ranks =
xmalloc(count *
sizeof(ranks));
4436 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4438 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4439 for (
size_t i = 0; i < count; ++i) sendcounts[ranks[i]]++;
4441 size_t local_count = sendcounts[comm_rank];
4442 sendcounts[comm_rank] = 0;
4445 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4447 size_t remote_count = sdispls[comm_size] + sendcounts[comm_size-1];
4448 size_t request_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4451 xmalloc((remote_count + request_count + local_count) *
4452 sizeof(*coord_buffer));
4456 coord_buffer + remote_count + request_count;
4459 for (
size_t i = 0, k = 0; i < count; ++i) {
4460 if (ranks[i] == comm_rank) {
4461 coord_local_buffer[k][0] = search_coords[i][0];
4462 coord_local_buffer[k][1] = search_coords[i][1];
4463 coord_local_buffer[k][2] = search_coords[i][2];
4466 size_t displ = sdispls[ranks[i]+1]++;
4467 coord_send_buffer[displ][0] = search_coords[i][0];
4468 coord_send_buffer[displ][1] = search_coords[i][1];
4469 coord_send_buffer[displ][2] = search_coords[i][2];
4473 MPI_Datatype dt_coord;
4474 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &dt_coord), comm);
4479 coord_send_buffer, sendcounts, sdispls,
4480 coord_recv_buffer, recvcounts, rdispls,
4481 sizeof(*coord_send_buffer), dt_coord, comm);
4485 size_t * local_cells =
4486 xmalloc((request_count + local_count) *
sizeof(*local_cells));
4490 grid_pair, grid_name, coord_recv_buffer, request_count + local_count,
4494 xmalloc((remote_count + request_count) *
4495 sizeof(*single_remote_point_buffer));
4504 for (
size_t i = 0; i < request_count; ++i) {
4505 size_t cell_idx = local_cells[i];
4506 id_send_buffer[i].
data.
rank = comm_rank;
4507 if (cell_idx != SIZE_MAX) {
4511 id_send_buffer[i].
global_id = XT_INT_MAX;
4516 MPI_Datatype single_remote_point_dt =
4521 id_send_buffer, recvcounts, rdispls, id_recv_buffer, sendcounts, sdispls,
4522 sizeof(*id_send_buffer), single_remote_point_dt,
comm);
4526 size_t * new_local_cells =
4527 xmalloc(remote_count *
sizeof(*new_local_cells));
4532 dist_grid, id_recv_buffer, remote_count,
YAC_LOC_CELL, new_local_cells);
4535 for (
size_t i = 0, k = 0; i <
count; ++i) {
4536 if (ranks[i] == comm_rank) {
4537 cells[i] = local_cells[request_count + k];
4540 size_t displ = sdispls[ranks[i]]++;
4541 cells[i] = new_local_cells[displ];
4545 free(new_local_cells);
4546 free(single_remote_point_buffer);
4582 total_count, field_coords, global_ids);
4586 total_count, field_coords, global_ids, mask);
4595 int const * core_mask =
4602 for (
size_t i = 0, j = 0; i < total_count; ++i) {
4604 points[j].global_id = global_ids[i];
4605 points[j].data.rank = comm_rank;
4606 points[j].data.orig_pos = i;
4607 if (n == ++j)
return;
4613 for (
size_t i = 0, j = 0; i < total_count; ++i) {
4614 if (core_mask[i] && mask[i]) {
4615 points[j].global_id = global_ids[i];
4616 points[j].data.rank = comm_rank;
4617 points[j].data.orig_pos = i;
4618 if (n == ++j)
return;
4625 size_t n, MPI_Comm comm) {
4628 MPI_Datatype single_remote_point_dt =
4630 single_nnn_search_result_dt_,
4631 nnn_search_result_dt;
4632 int array_of_blocklengths[] = {1, 1};
4633 const MPI_Aint array_of_displacements[] =
4634 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
point_info) -
4635 (MPI_Aint)(intptr_t)(
const void *)&dummy,
4636 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
cos_angle) -
4637 (MPI_Aint)(intptr_t)(
const void *)&dummy};
4638 const MPI_Datatype array_of_types[] = {single_remote_point_dt, MPI_DOUBLE};
4640 MPI_Type_create_struct(
4641 2, array_of_blocklengths, array_of_displacements,
4642 array_of_types, &single_nnn_search_result_dt_), comm);
4643 MPI_Datatype single_nnn_search_result_dt =
4646 MPI_Type_contiguous(
4647 (
int)n, single_nnn_search_result_dt, &nnn_search_result_dt), comm);
4648 yac_mpi_call(MPI_Type_commit(&nnn_search_result_dt), comm);
4649 yac_mpi_call(MPI_Type_free(&single_nnn_search_result_dt), comm);
4650 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4651 return nnn_search_result_dt;
4655 void const * a,
void const * b) {
4664 if (ret)
return ret;
4673 if (insert_size == 0)
return;
4676 qsort(array, array_size,
sizeof(*array),
4678 qsort(insert, insert_size,
sizeof(*insert),
4682 memcpy(temp_results, array, array_size *
sizeof(*array));
4684 for (
size_t i = 0, j = 0, k = 0; k < array_size; ++k) {
4689 &temp_results[i], insert + j):(-1);
4692 if (k != i) array[k] = temp_results[i];
4695 array[k] = insert[j];
4707 MPI_Comm comm = grid_pair->
comm;
4708 int comm_rank, comm_size;
4715 uint64_t unmasked_local_count =
4718 uint64_t * unmasked_local_counts =
4719 xmalloc((
size_t)comm_size *
sizeof(*unmasked_local_counts));
4724 &unmasked_local_count, 1, MPI_UINT64_T,
4725 unmasked_local_counts, 1, MPI_UINT64_T,
comm),
comm);
4729 for (
int i = 0; i < comm_size; ++i)
4730 flag |= unmasked_local_counts[i] < (uint64_t)n;
4735 uint64_t global_num_unmasked_count = 0;
4736 for (
int i = 0; i < comm_size; ++i)
4737 global_num_unmasked_count += unmasked_local_counts[i];
4740 (
size_t)global_num_unmasked_count >= n,
4741 "ERROR(yac_dist_grid_pair_do_nnn_search): insufficient number of "
4744 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4746 1, &sendcounts, &recvcounts, &sdispls, &rdispls,
comm);
4750 int * flag_buffer =
xcalloc(2 * (
size_t)comm_size,
sizeof(*flag_buffer));
4751 int * send_flags = flag_buffer;
4752 int * recv_flags = flag_buffer + comm_size;
4755 send_flags, recv_flags, comm_rank, comm_size);
4756 for (
int i = 0; i < comm_size; ++i) {
4757 sendcounts[i] = (size_t)send_flags[i];
4758 recvcounts[i] = (size_t)recv_flags[i];
4762 size_t local_send_count = (size_t)(
MIN(unmasked_local_count, n));
4765 for (
int i = 0; i < comm_size; ++i) {
4768 sendcounts[i] *= local_send_count;
4769 raccu += (recvcounts[i] *= (int)(
MIN(unmasked_local_counts[i], n)));
4772 size_t recv_count = recvcounts[comm_size-1] + rdispls[comm_size-1];
4776 (local_send_count + recv_count) *
sizeof(*single_remote_point_buffer));
4779 single_remote_point_buffer + local_send_count;
4783 dist_grid, field, comm_rank, local_send_count, local_send_ids);
4785 MPI_Datatype single_remote_point_dt =
4790 local_send_ids, sendcounts, sdispls, recv_ids, recvcounts, rdispls,
4791 sizeof(*local_send_ids), single_remote_point_dt, comm);
4793 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4795 size_t * dummy =
xmalloc(recv_count *
sizeof(*dummy));
4800 dist_grid, recv_ids, recv_count, field.
location, dummy);
4803 free(single_remote_point_buffer);
4806 free(unmasked_local_counts);
4811 double * cos_angles = NULL;
4812 size_t cos_angles_array_size = 0;
4814 size_t result_coords_array_size = 0;
4815 size_t * result_points = NULL;
4816 size_t result_points_array_size = 0;
4817 size_t * num_results =
xcalloc(count,
sizeof(*num_results));
4821 sphere_part, count, search_coords, n, &cos_angles, &cos_angles_array_size,
4822 &result_coords, &result_coords_array_size, &result_points,
4823 &result_points_array_size, num_results);
4825 size_t max_num_results = 0;
4826 for (
size_t i = 0; i < count; ++i)
4827 if (max_num_results < num_results[i]) max_num_results = num_results[i];
4835 xmalloc(max_num_results *
sizeof(*temp_results));
4837 for (
size_t i = 0, k = 0; i < count; ++i) {
4840 for (
size_t j = 0; j < n; ++j, ++k) {
4841 size_t curr_local_id = result_points[k];
4845 curr_results[j].
cos_angle = cos_angles[k];
4848 for (
size_t j = n; j < num_results[i]; ++j, ++k) {
4849 size_t curr_local_id = result_points[k];
4853 temp_results[j-n].
cos_angle = cos_angles[k];
4860 int * request_ranks = NULL;
4861 size_t request_ranks_array_size = 0;
4862 size_t num_request_ranks = 0;
4863 int * num_requests =
xmalloc(count *
sizeof(*num_requests));
4866 for (
size_t i = 0; i < count; ++i) {
4870 memcpy(result_bnd_circle.
base_vector, search_coords[i],
4871 3 *
sizeof(search_coords[0][0]));
4875 field_coords[results[(i + 1) * n - 1].point_info.data.orig_pos]);
4878 num_request_ranks + (
size_t)comm_size);
4882 int * curr_request_ranks = request_ranks + num_request_ranks;
4885 curr_request_ranks, num_requests + i);
4888 int new_num_requests = 0;
4889 for (
int j = 0; j < num_requests[i]; ++j) {
4890 if (curr_request_ranks[j] == comm_rank)
continue;
4891 if (new_num_requests != j)
4892 curr_request_ranks[new_num_requests] = curr_request_ranks[j];
4896 num_request_ranks += (size_t)(num_requests[i] = new_num_requests);
4899 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4901 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4903 for (
size_t i = 0; i < num_request_ranks; ++i) sendcounts[request_ranks[i]]++;
4906 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4908 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4911 xmalloc((num_request_ranks + recv_count) *
sizeof(*coord_buffer));
4916 for (
size_t i = 0, k = 0; i < count; ++i)
4917 for (
size_t j = 0; j < num_requests[i]; ++j, ++k)
4918 memcpy(send_request_coords[sdispls[request_ranks[k]+1]++],
4919 search_coords[i], 3 *
sizeof(
double));
4925 send_request_coords, sendcounts, sdispls,
4926 recv_request_coords, recvcounts, rdispls,
4927 sizeof(*send_request_coords), coord_dt, comm);
4931 num_results =
xrealloc(num_results, recv_count *
sizeof(*num_results));
4932 memset(num_results, 0, recv_count *
sizeof(*num_results));
4936 sphere_part, recv_count, recv_request_coords, n, &cos_angles,
4937 &cos_angles_array_size, &result_coords, &result_coords_array_size,
4938 &result_points, &result_points_array_size, num_results);
4942 for (
size_t i = 0; i < recv_count; ++i)
4943 if (max_num_results < num_results[i]) max_num_results = num_results[i];
4945 xrealloc(temp_results, max_num_results *
sizeof(*temp_results));
4949 xmalloc(n * (num_request_ranks + recv_count) *
sizeof(*request_results));
4952 request_results + n * recv_count;
4954 for (
size_t i = 0, k = 0; i < recv_count; ++i) {
4957 for (
size_t j = 0; j < n; ++j, ++k) {
4958 size_t curr_local_id = result_points[k];
4962 curr_results[j].
cos_angle = cos_angles[k];
4965 for (
size_t j = n; j < num_results[i]; ++j, ++k) {
4966 size_t curr_local_id = result_points[k];
4970 temp_results[j-n].
cos_angle = cos_angles[k];
4976 free(result_coords);
4977 free(result_points);
4980 MPI_Datatype nnn_search_result_dt =
4985 send_request_results, recvcounts, rdispls,
4986 recv_request_results, sendcounts, sdispls,
4987 n *
sizeof(*send_request_results), nnn_search_result_dt, comm);
4989 yac_mpi_call(MPI_Type_free(&nnn_search_result_dt), comm);
4992 for (
size_t i = 0, k = 0; i < count; ++i) {
4994 for (
size_t j = 0; j < num_requests[i]; ++k, ++j) {
4995 size_t pos = (size_t)(sdispls[request_ranks[k]]++);
4997 recv_request_results + n * pos;
5001 free(request_ranks);
5002 free(request_results);
5006 size_t remote_count = 0;
5007 for (
size_t i = 0; i < n * count; ++i)
5013 for (
size_t i = 0, k = 0; i < n * count; ++i)
5014 if (results[i].point_info.data.rank != comm_rank)
5021 local_ids + n * count - remote_count);
5026 for (
size_t i = 0, offset = n * count - remote_count; i < n * count; ++i) {
5027 if (results[i].point_info.data.rank == comm_rank)
5028 local_ids[i] = (size_t)(results[i].point_info.data.orig_pos);
5029 else local_ids[i] = local_ids[offset++];
5040 MPI_Comm comm = grid_pair->
comm;
5041 int comm_rank, comm_size;
5049 int * rank_buffer = NULL;
5050 size_t rank_buffer_size = 0;
5051 size_t rank_buffer_array_size = 0;
5053 for (
size_t i = 0; i <
count; ++i) {
5056 rank_buffer_size + (
size_t)comm_size);
5064 rank_buffer + rank_buffer_size, num_ranks + i);
5065 rank_buffer_size += (size_t)(num_ranks[i]);
5068 size_t * size_t_buffer =
5069 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
5070 size_t * result_sendcounts = size_t_buffer + 0 * comm_size;
5071 size_t * result_recvcounts = size_t_buffer + 1 * comm_size;
5072 size_t * result_sdispls = size_t_buffer + 2 * comm_size;
5073 size_t * result_rdispls = size_t_buffer + 3 * comm_size;
5075 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
5077 1, &sendcounts, &recvcounts, &sdispls, &rdispls,
comm);
5079 for (
size_t i = 0, offset = 0; i <
count; ++i) {
5080 int curr_num_ranks = num_ranks[i];
5081 int * ranks = rank_buffer + offset;
5082 offset += (size_t)curr_num_ranks;
5083 for (
int j = 0; j < curr_num_ranks; ++j) sendcounts[ranks[j]]++;
5087 size_t local_count = sendcounts[comm_rank];
5088 sendcounts[comm_rank] = 0;
5091 1, sendcounts, recvcounts, sdispls, rdispls,
comm);
5093 size_t send_count = sdispls[comm_size] + sendcounts[comm_size-1];
5094 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
5097 xmalloc((send_count + recv_count + local_count) *
5098 sizeof(*bnd_circle_buffer));
5100 struct bounding_circle * recv_buffer = bnd_circle_buffer + send_count;
5102 bnd_circle_buffer + send_count + recv_count;
5105 for (
size_t i = 0, offset = 0, local_offset = 0; i < count; ++i) {
5106 int curr_num_ranks = num_ranks[i];
5107 int * ranks = rank_buffer + offset;
5108 offset += (size_t)curr_num_ranks;
5109 for (
int j = 0; j < curr_num_ranks; ++j) {
5110 int rank = ranks[j];
5111 if (rank == comm_rank)
5112 local_buffer[local_offset++] = bnd_circles[i];
5114 send_buffer[sdispls[rank + 1]++] = bnd_circles[i];
5123 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
5124 sizeof(*send_buffer), bnd_circle_dt, comm);
5131 size_t * local_cells = NULL;
5132 size_t * num_local_cells_per_bnd_circle =
5133 xmalloc((recv_count + local_count) *
5134 sizeof(*num_local_cells_per_bnd_circle));
5136 uint64_t * uint64_t_buffer =
5137 xmalloc((send_count + recv_count + local_count) *
5138 sizeof(*uint64_t_buffer));
5139 uint64_t * num_local_cells_per_bnd_circle_uint64_t = uint64_t_buffer;
5140 uint64_t * num_remote_cells_per_bnd_circle =
5141 uint64_t_buffer + recv_count + local_count;
5145 cell_sphere_part, recv_buffer, recv_count + local_count, &local_cells,
5146 num_local_cells_per_bnd_circle);
5148 int const * field_mask =
5155 for (
size_t i = 0, offset = 0, new_offset = 0;
5156 i < recv_count + local_count; ++i) {
5159 size_t curr_num_results = num_local_cells_per_bnd_circle[i];
5162 uint64_t new_num_results = 0;
5163 for (
size_t j = 0; j < curr_num_results; ++j, ++offset) {
5164 size_t local_cell_id = local_cells[offset];
5166 cell_bnd_circles + local_cell_id))
continue;
5167 if ((field_mask == NULL) || (field_mask[local_cell_id])) {
5168 if (offset != new_offset) local_cells[new_offset] = local_cell_id;
5173 num_local_cells_per_bnd_circle_uint64_t[i] = new_num_results;
5175 free(num_local_cells_per_bnd_circle);
5176 free(bnd_circle_buffer);
5180 num_local_cells_per_bnd_circle_uint64_t, recvcounts, rdispls,
5181 num_remote_cells_per_bnd_circle, sendcounts, sdispls,
5182 sizeof(*num_local_cells_per_bnd_circle_uint64_t), MPI_UINT64_T, comm);
5184 size_t saccu = 0, raccu = 0, soffset = 0, roffset = 0;
5185 for (
int i = 0; i < comm_size; ++i) {
5187 result_sdispls[i] = saccu;
5188 result_rdispls[i] = raccu;
5190 size_t sendcount = recvcounts[i];
5191 size_t recvcount = sendcounts[i];
5193 result_sendcounts[i] = 0;
5194 result_recvcounts[i] = 0;
5195 for (
size_t j = 0; j < sendcount; ++j, ++soffset)
5196 result_sendcounts[i] +=
5197 (
size_t)(num_local_cells_per_bnd_circle_uint64_t[soffset]);
5198 for (
size_t j = 0; j < recvcount; ++j, ++roffset)
5199 result_recvcounts[i] +=
5200 (
size_t)(num_remote_cells_per_bnd_circle[roffset]);
5202 saccu += result_sendcounts[i];
5203 raccu += result_recvcounts[i];
5208 size_t result_local_count = 0;
5209 for (
size_t i = recv_count; i < recv_count + local_count; ++i)
5210 result_local_count += (
size_t)(num_local_cells_per_bnd_circle_uint64_t[i]);
5212 size_t result_send_count = (size_t)(result_sdispls[comm_size-1]) +
5213 (size_t)(result_sendcounts[comm_size-1]);
5214 size_t result_recv_count = (size_t)(result_rdispls[comm_size-1]) +
5215 (size_t)(result_recvcounts[comm_size-1]);
5218 xmalloc((result_recv_count + result_send_count) *
5219 sizeof(*single_remote_point_buffer));
5226 for (
size_t i = 0; i < result_send_count; ++i) {
5227 size_t local_cell_id = local_cells[i];
5228 id_send_buffer[i].
global_id = cell_ids[local_cell_id];
5229 id_send_buffer[i].
data.
rank = comm_rank;
5233 MPI_Datatype single_remote_point_dt =
5238 id_send_buffer, result_sendcounts, result_sdispls,
5239 id_recv_buffer, result_recvcounts, result_rdispls,
5240 sizeof(*id_send_buffer), single_remote_point_dt, comm);
5242 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
5244 size_t * new_local_cells =
5245 xmalloc((result_recv_count + result_local_count) *
5246 sizeof(*new_local_cells));
5248 memcpy(new_local_cells + result_recv_count,
5249 local_cells + result_send_count,
5250 result_local_count *
sizeof(*new_local_cells));
5256 dist_grid, id_recv_buffer, result_recv_count,
YAC_LOC_CELL, new_local_cells);
5258 free(single_remote_point_buffer);
5260 size_t * reorder_idx =
5261 xmalloc((result_recv_count + result_local_count) *
sizeof(*reorder_idx));
5264 num_results_per_bnd_circle, 0, count *
sizeof(*num_results_per_bnd_circle));
5266 for (
size_t i = 0, offset = 0, reorder = 0, local_search_idx = recv_count,
5267 local_offset = result_recv_count; i < count; ++i) {
5268 int curr_num_ranks = num_ranks[i];
5269 int * ranks = rank_buffer + offset;
5270 offset += (size_t)curr_num_ranks;
5271 for (
int j = 0; j < curr_num_ranks; ++j) {
5272 int rank = ranks[j];
5273 if (rank == comm_rank) {
5274 uint64_t curr_num_results =
5275 num_local_cells_per_bnd_circle_uint64_t[local_search_idx++];
5276 num_results_per_bnd_circle[i] += (size_t)curr_num_results;
5277 for (uint64_t k = 0; k < curr_num_results; ++k, ++reorder)
5278 reorder_idx[local_offset++] = reorder;
5280 size_t rank_pos = sdispls[rank]++;
5281 uint64_t curr_num_results = num_remote_cells_per_bnd_circle[rank_pos];
5282 num_results_per_bnd_circle[i] += (size_t)curr_num_results;
5283 for (uint64_t k = 0; k < curr_num_results; ++k, ++reorder)
5284 reorder_idx[result_rdispls[rank]++] = reorder;
5288 free(uint64_t_buffer);
5291 free(size_t_buffer);
5295 reorder_idx, result_recv_count + result_local_count, new_local_cells);
5299 for (
size_t i = 0, offset = 0, new_offset = 0; i < count; ++i) {
5301 size_t * curr_local_cells = new_local_cells + offset;
5302 size_t curr_num_results_per_bnd_circle = num_results_per_bnd_circle[i];
5303 size_t new_num_results_per_bnd_circle = 0;
5304 size_t prev_cell = SIZE_MAX;
5305 offset += curr_num_results_per_bnd_circle;
5308 curr_local_cells, curr_num_results_per_bnd_circle, NULL);
5310 for (
size_t j = 0; j < curr_num_results_per_bnd_circle; ++j) {
5311 size_t curr_cell = curr_local_cells[j];
5312 if (curr_cell != prev_cell) {
5313 new_local_cells[new_offset++] = (prev_cell = curr_cell);
5314 ++new_num_results_per_bnd_circle;
5317 num_results_per_bnd_circle[i] = new_num_results_per_bnd_circle;
5320 *cells = new_local_cells;
5325 char const * search_grid_name,
char const * result_grid_name,
5326 size_t * search_cells,
size_t count,
size_t ** result_cells,
5327 size_t * num_results_per_search_cell,
struct yac_interp_field result_field) {
5330 xmalloc(count *
sizeof(*search_bnd_circles));
5340 for (
size_t i = 0; i <
count; ++i)
5341 search_bnd_circles[i] = search_grid_cell_bnd_circles[search_cells[i]];
5344 grid_pair, result_grid_name, search_bnd_circles,
count, result_cells,
5345 num_results_per_search_cell, result_field);
5347 size_t total_num_result_cells = 0;
5355 for (
size_t i = 0, offset = 0; i <
count; ++i) {
5357 size_t curr_num_results_per_bnd_circle = num_results_per_search_cell[i];
5358 size_t new_num_results_per_search_cell = 0;
5359 size_t * curr_result_cells = *result_cells + offset;
5360 offset += curr_num_results_per_bnd_circle;
5366 for (
size_t j = 0; j < curr_num_results_per_bnd_circle; ++j) {
5368 size_t curr_result_cell = curr_result_cells[j];
5378 search_bnd_circles + i,
5381 (*result_cells)[total_num_result_cells++] = curr_result_cell;
5382 ++new_num_results_per_search_cell;
5386 num_results_per_search_cell[i] = new_num_results_per_search_cell;
5394 free(search_bnd_circles);
5402 size_t * edge_to_vertex = &(dist_grid->edge_to_vertex[edge_id][0]);
5403 double * vertices[2] =
5404 {&(dist_grid->vertex_coordinates[edge_to_vertex[0]][0]),
5405 &(dist_grid->vertex_coordinates[edge_to_vertex[1]][0])};
5407 bnd_circle.
base_vector[0] = vertices[0][0] + vertices[1][0];
5408 bnd_circle.
base_vector[1] = vertices[0][1] + vertices[1][1];
5409 bnd_circle.
base_vector[2] = vertices[0][2] + vertices[1][2];
5413 bnd_circle.
sq_crd = DBL_MAX;
5421 size_t * cells,
size_t count,
size_t * neighbours) {
5431 size_t max_num_edges_per_cell = 0;
5433 if (max_num_edges_per_cell < dist_grid->num_vertices_per_cell[i])
5437 xmalloc(max_num_edges_per_cell *
sizeof(*edge_vertices));
5439 size_t neigh_idx = 0;
5442 size_t missing_edge_neighbour_array_size = 0;
5443 size_t num_missing_neighbours = 0;
5446 for (
size_t i = 0; i < count; ++i) {
5448 size_t curr_cell = cells[i];
5452 size_t const * cell_edges =
5454 for (
size_t j = 0; j < curr_num_edges; ++j) {
5455 size_t const * curr_edge_to_vertex =
5457 edge_vertices[j][0] = curr_edge_to_vertex[0];
5458 edge_vertices[j][1] = curr_edge_to_vertex[1];
5463 num_missing_neighbours + curr_num_edges);
5467 size_t prev_vertex = edge_vertices[0][0];
5468 for (
size_t j = 0, edge_idx = 0; j < curr_num_edges; ++j, ++
neigh_idx) {
5471 size_t curr_edge = cell_edges[edge_idx];
5472 size_t * curr_edge_cells = edge_to_cell[curr_edge];
5473 size_t other_cell = curr_edge_cells[curr_edge_cells[0] == curr_cell];
5476 if (other_cell == SIZE_MAX) {
5489 size_t new_edge_idx = SIZE_MAX;
5490 for (
size_t k = 0; k < curr_num_edges; ++k) {
5491 if (k == edge_idx)
continue;
5492 else if (edge_vertices[k][0] == prev_vertex) {
5494 prev_vertex = edge_vertices[k][1];
5496 }
else if (edge_vertices[k][1] == prev_vertex) {
5498 prev_vertex = edge_vertices[k][0];
5503 new_edge_idx < SIZE_MAX,
5504 "ERROR(yac_basic_grid_data_get_cell_neighbours): "
5505 "inconsistent cell_to_edge/edge_to_vertex data")
5506 edge_idx = new_edge_idx;
5511 prev_vertex == edge_vertices[0][0],
5512 "ERROR(yac_basic_grid_data_get_cell_neighbours): "
5513 "inconsistent cell_to_edge/edge_to_vertex data")
5517 MPI_Comm comm = dist_grid->
comm;
5518 int comm_rank, comm_size;
5522 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
5524 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
5527 ((
size_t)comm_size + num_missing_neighbours) *
sizeof(*int_buffer));
5528 int * temp_ranks = int_buffer;
5529 int * num_ranks = int_buffer + comm_size;
5530 memset(num_ranks, 0, num_missing_neighbours *
sizeof(*num_ranks));
5532 int * rank_buffer = NULL;
5533 size_t rank_buffer_array_size = 0;
5534 size_t rank_buffer_size = 0;
5536 for (
size_t i = 0; i < num_missing_neighbours; ++i) {
5543 temp_ranks, &curr_num_ranks);
5546 rank_buffer_size + (
size_t)curr_num_ranks);
5548 for (
int j = 0; j < curr_num_ranks; ++j) {
5549 int curr_rank = temp_ranks[j];
5550 if (curr_rank != comm_rank) {
5551 sendcounts[curr_rank] += 2;
5553 rank_buffer[rank_buffer_size++] = curr_rank;
5559 1, sendcounts, recvcounts, sdispls, rdispls, comm);
5562 (sdispls[comm_size] + sendcounts[comm_size-1])/2;
5564 (rdispls[comm_size-1] + recvcounts[comm_size-1])/2;
5567 xmalloc(2 * (send_count + recv_count) *
sizeof(*yac_int_buffer));
5568 yac_int * send_buffer = yac_int_buffer;
5569 yac_int * recv_buffer = yac_int_buffer + 2 * send_count;
5570 size_t * result_reorder_idx =
5571 xmalloc(send_count *
sizeof(*result_reorder_idx));
5574 for (
size_t i = 0, k = 0, rank_offset = 0; i < num_missing_neighbours; ++i) {
5576 int * curr_rank_buffer = rank_buffer + rank_offset;
5577 int curr_num_ranks = num_ranks[i];
5578 rank_offset += (size_t)curr_num_ranks;
5580 for (
int j = 0; j < curr_num_ranks; ++j, ++k) {
5582 int rank = curr_rank_buffer[j];
5584 if (rank == comm_rank)
continue;
5586 size_t pos = sdispls[rank + 1];
5587 sdispls[rank + 1] += 2;
5598 yac_alltoallv_yac_int_p2p(
5599 send_buffer, sendcounts, sdispls,
5600 recv_buffer, recvcounts, rdispls, comm);
5603 xmalloc(recv_count *
sizeof(*request_edge_ids));
5604 size_t * reorder_idx =
xmalloc(recv_count *
sizeof(*reorder_idx));
5607 (send_count + recv_count) *
sizeof(*point_info_buffer));
5610 point_info_buffer + recv_count;
5611 for (
size_t i = 0; i < recv_count; ++i) {
5612 request_edge_ids[i] = recv_buffer[2 * i + 0];
5617 request_edge_ids, recv_count, reorder_idx);
5619 size_t * sorted_edge_reorder_idx =
5625 for (
size_t i = 0, j = 0; i < recv_count; ++i) {
5627 yac_int curr_edge_id = request_edge_ids[i];
5628 size_t curr_reorder_idx = reorder_idx[i];
5630 while ((j < num_edges) && (sorted_edge_ids[j] < curr_edge_id)) ++j;
5633 if ((j >= num_edges) || (sorted_edge_ids[j] != curr_edge_id)) {
5634 point_send_buffer[curr_reorder_idx] =
5637 .data = {.rank = comm_rank, .orig_pos = UINT64_MAX}};
5642 yac_int available_edge_cell_id = recv_buffer[2 * curr_reorder_idx + 1];
5644 size_t * local_edge_cell_ids = edge_to_cell[sorted_edge_reorder_idx[j]];
5645 yac_int global_edge_cell_ids[2];
5646 for (
int k = 0; k < 2; ++k)
5647 global_edge_cell_ids[k] =
5648 (local_edge_cell_ids[k] == SIZE_MAX)?
5649 XT_INT_MAX:cell_ids[local_edge_cell_ids[k]];
5651 int missing_idx = global_edge_cell_ids[0] == available_edge_cell_id;
5655 (global_edge_cell_ids[missing_idx^1] == available_edge_cell_id) ||
5656 (global_edge_cell_ids[missing_idx^1] == XT_INT_MAX),
5657 "ERROR(yac_dist_grid_get_cell_neighbours): "
5658 "inconsistent cell edge grid data")
5660 point_send_buffer[curr_reorder_idx].
global_id =
5661 global_edge_cell_ids[missing_idx];
5662 point_send_buffer[curr_reorder_idx].
data.
rank = comm_rank;
5664 (local_edge_cell_ids[missing_idx] == SIZE_MAX)?
5665 (uint64_t)UINT64_MAX:local_edge_cell_ids[missing_idx];
5668 free(request_edge_ids);
5669 free(yac_int_buffer);
5671 for (
int i = 0; i < comm_size; ++i) {
5678 MPI_Datatype single_remote_point_dt =
5682 point_send_buffer, recvcounts, rdispls,
5683 point_recv_buffer, sendcounts, sdispls,
5684 sizeof(*point_send_buffer), single_remote_point_dt, comm);
5686 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
5689 xmalloc(send_count *
sizeof(*results));
5691 for (
size_t i = 0; i < send_count; ++i) {
5692 results[i].
data = point_recv_buffer[i];
5696 qsort(results, send_count,
sizeof(*results),
5700 size_t result_count = 0;
5701 yac_int prev_global_id = XT_INT_MAX;
5702 for (
size_t i = 0, prev_reorder_idx = SIZE_MAX; i < send_count; ++i) {
5706 if (curr_global_id == XT_INT_MAX)
continue;
5709 if (curr_reorder_idx != prev_reorder_idx){
5711 results[result_count++] = results[i];
5712 prev_reorder_idx = curr_reorder_idx;
5713 prev_global_id = curr_global_id;
5718 prev_global_id == curr_global_id,
5719 "ERROR(yac_dist_grid_get_cell_neighbours): "
5720 "inconsistent cell edge data")
5723 for (
size_t i = 0; i < result_count; ++i) {
5724 point_send_buffer[i] = results[i].
data;
5729 size_t * local_ids =
xmalloc(result_count *
sizeof(*local_ids));
5732 dist_grid, point_send_buffer, result_count,
YAC_LOC_CELL, local_ids);
5734 for (
size_t i = 0; i < result_count; ++i)
5735 neighbours[result_reorder_idx[i]] = local_ids[i];
5738 free(result_reorder_idx);
5739 free(point_send_buffer);
5744 free(edge_vertices);
5750 size_t * cells,
size_t count,
size_t * neighbours) {
5759 size_t *
points,
size_t count) {
5767 "ERROR(yac_dist_grid_get_remote_points): invalid location")
5768 yac_int * global_ids = dist_grid->
ids[location];
5771 for (
size_t i = 0; i <
count; ++i) {
5781 return (
int)(
value / 128) % comm_size;
5786 int global_id_pack_size;
5789 MPI_Pack_size(1,
yac_int_dt, comm, &global_id_pack_size), comm);
5791 return global_id_pack_size;
5795 yac_int global_id,
void * buffer,
int buffer_size,
int * position,
5800 buffer_size, position, comm), comm);
5804 void * buffer,
int buffer_size,
int * position,
yac_int * global_id,
5808 MPI_Unpack(buffer, buffer_size, position, global_id, 1,
5813 MPI_Datatype single_remote_point_dt, MPI_Comm comm) {
5818 MPI_Pack_size(1, single_remote_point_dt, comm, &pack_size), comm);
5825 void * buffer,
int buffer_size,
int * position,
5826 MPI_Datatype single_remote_point_dt, MPI_Comm comm) {
5829 MPI_Pack(point, 1, single_remote_point_dt, buffer,
5830 buffer_size, position, comm), comm);
5834 void * buffer,
int buffer_size,
int * position,
5839 MPI_Unpack(buffer, buffer_size, position, point, 1,
5840 single_remote_point_dt, comm), comm);
5845 yac_int * global_ids,
size_t count,
size_t * local_ids) {
5847 MPI_Comm comm = dist_grid->
comm;
5848 int comm_rank, comm_size;
5852 size_t * size_t_buffer =
5853 xmalloc((8 * (
size_t)comm_size + 1) *
sizeof(*size_t_buffer));
5854 size_t * sendcounts = size_t_buffer + 0 * comm_size;
5855 size_t * recvcounts = size_t_buffer + 2 * comm_size;
5856 size_t * sdispls = size_t_buffer + 4 * comm_size;
5857 size_t * rdispls = size_t_buffer + 5 * comm_size + 1;
5858 size_t * total_sendcounts = size_t_buffer + 6 * comm_size + 1;
5859 size_t * total_recvcounts = size_t_buffer + 7 * comm_size + 1;
5860 memset(sendcounts, 0, 2 * (
size_t)comm_size *
sizeof(*sendcounts));
5862 size_t * core_points, core_count;
5865 .coordinates_idx = SIZE_MAX,
5866 .masks_idx = SIZE_MAX};
5868 dist_grid, dummy_interp_field, &core_points, &core_count);
5869 yac_int const * grid_global_ids =
5872 int * rank_buffer =
xmalloc((count + core_count) *
sizeof(*rank_buffer));
5873 int * core_point_ranks = rank_buffer;
5874 int * global_id_ranks = rank_buffer + core_count;
5876 size_t * global_id_reorder_idx =
5877 xmalloc(count *
sizeof(*global_id_reorder_idx));
5879 for (
size_t i = 0; i < count; ++i) {
5882 sendcounts[2 * rank + 0]++;
5883 global_id_reorder_idx[i] = i;
5888 for (
size_t i = 0; i < core_count; ++i) {
5889 size_t point_idx = core_points[i];
5891 (core_point_ranks[i] =
5893 sendcounts[2 * rank + 1]++;
5904 size_t recv_core_count = 0;
5905 for (
int i = 0; i < comm_size; ++i)
5906 recv_core_count += recvcounts[2*i+1];
5908 MPI_Datatype single_remote_point_dt =
5911 int core_point_pack_size =
5914 for (
int i = 0; i < comm_size; ++i) {
5915 total_sendcounts[i] = sendcounts[2*i+0] * (size_t)global_id_pack_size +
5916 sendcounts[2*i+1] * (
size_t)core_point_pack_size;
5917 total_recvcounts[i] = recvcounts[2*i+0] * (size_t)global_id_pack_size +
5918 recvcounts[2*i+1] * (
size_t)core_point_pack_size;
5921 size_t saccu = 0, raccu = 0;
5923 for (
int i = 0; i < comm_size; ++i) {
5924 sdispls[i+1] = saccu;
5926 saccu += total_sendcounts[i];
5927 raccu += total_recvcounts[i];
5930 size_t send_size = sdispls[comm_size] + total_sendcounts[comm_size-1];
5931 size_t recv_size = rdispls[comm_size-1] + total_recvcounts[comm_size-1];
5932 void * pack_buffer =
xmalloc(send_size + recv_size);
5933 void * send_buffer = pack_buffer;
5934 void * recv_buffer = (
void*)((
unsigned char *)pack_buffer + send_size);
5937 for (
size_t i = 0; i < count; ++i) {
5938 yac_int curr_global_id = global_ids[global_id_reorder_idx[i]];
5939 int rank = global_id_ranks[i];
5940 size_t pos = sdispls[rank + 1];
5943 curr_global_id, (
void*)((
unsigned char*)send_buffer + pos),
5944 global_id_pack_size, &position, comm);
5945 sdispls[rank + 1] += global_id_pack_size;
5947 for (
size_t i = 0; i < core_count; ++i) {
5948 size_t point_idx = core_points[i];
5949 int rank = core_point_ranks[i];
5950 size_t pos = sdispls[rank + 1];
5952 curr_core_point.
global_id = grid_global_ids[point_idx];
5953 curr_core_point.
data.
rank = comm_rank;
5958 (
void*)((
unsigned char*)send_buffer + pos),
5959 core_point_pack_size, &position, single_remote_point_dt, comm);
5960 sdispls[rank + 1] += core_point_pack_size;
5966 yac_alltoallv_packed_p2p(
5967 send_buffer, total_sendcounts, sdispls,
5968 recv_buffer, total_recvcounts, rdispls, comm);
5970 size_t num_requested_ids = 0;
5971 for(
int i = 0; i < comm_size; ++i)
5972 num_requested_ids += recvcounts[2*i+0];
5974 yac_int * request_global_ids =
5975 xmalloc(num_requested_ids *
sizeof(*request_global_ids));
5976 size_t * reorder_idx =
5977 xmalloc(num_requested_ids *
sizeof(*reorder_idx));
5979 xmalloc((recv_core_count + num_requested_ids + count) *
5980 sizeof(*point_info_buffer));
5983 point_info_buffer + recv_core_count;
5985 point_info_buffer + recv_core_count + num_requested_ids;
5988 num_requested_ids = 0;
5989 recv_core_count = 0;
5990 for (
int i = 0; i < comm_size; ++i) {
5992 size_t curr_num_requested_ids = recvcounts[2*i+0];
5993 size_t curr_num_core_points = recvcounts[2*i+1];
5995 for (
size_t j = 0; j < curr_num_requested_ids; ++j, ++num_requested_ids) {
5999 recv_buffer, global_id_pack_size, &position,
6000 request_global_ids + num_requested_ids, comm);
6001 reorder_idx[num_requested_ids] = num_requested_ids;
6002 recv_buffer = (
void*)((
unsigned char*)recv_buffer + global_id_pack_size);
6004 for (
size_t j = 0; j < curr_num_core_points; ++j, ++recv_core_count) {
6008 recv_buffer, core_point_pack_size, &position,
6009 recv_core_points + recv_core_count, single_remote_point_dt, comm);
6010 recv_buffer = (
void*)((
unsigned char*)recv_buffer + core_point_pack_size);
6017 request_global_ids, num_requested_ids, reorder_idx);
6020 qsort(recv_core_points, recv_core_count,
sizeof(*recv_core_points),
6024 for (
size_t i = 0, j = 0; i < num_requested_ids; ++i) {
6026 yac_int curr_global_id = request_global_ids[i];
6027 while ((j < recv_core_count) &&
6028 (recv_core_points[j].
global_id < curr_global_id)) ++j;
6031 (j < recv_core_count) &&
6032 (recv_core_points[j].
global_id == curr_global_id),
6033 "ERROR(yac_dist_grid_global_to_local): "
6034 "no matching core point found for global id %zu",
6035 (
size_t)curr_global_id)
6037 send_point_info[reorder_idx[i]] = recv_core_points[j];
6040 free(request_global_ids);
6042 saccu = 0, raccu = 0;
6043 for (
int i = 0; i < comm_size; ++i) {
6047 int recvcount = sendcounts[2*i+0];
6048 int sendcount = recvcounts[2*i+0];
6049 saccu += (sendcounts[i] = sendcount);
6050 raccu += (recvcounts[i] = recvcount);
6055 send_point_info, sendcounts, sdispls,
6056 recv_point_info, recvcounts, rdispls,
6057 sizeof(*send_point_info), single_remote_point_dt, comm);
6059 free(size_t_buffer);
6060 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
6063 dist_grid, recv_point_info, count, location, local_ids);
6067 free(global_id_reorder_idx);
6068 free(point_info_buffer);
6073 size_t * vertices,
size_t count,
size_t ** cells,
6081 xmalloc(count *
sizeof(*vertex_bnd_circles));
6086 for (
size_t i = 0; i < count; ++i) {
6087 memcpy(vertex_bnd_circles[i].base_vector,
6089 vertex_bnd_circles[i].
inc_angle = sin_cos_high_tol;
6090 vertex_bnd_circles[i].
sq_crd = DBL_MAX;
6095 grid_pair, grid_name, vertex_bnd_circles, count,
6096 cells, num_cells_per_vertex, field);
6097 free(vertex_bnd_circles);
6100 size_t total_num_cells = 0;
6101 for (
size_t i = 0, k = 0; i < count; ++i) {
6103 size_t curr_vertex = vertices[i];
6104 size_t curr_num_cells_per_vertex = num_cells_per_vertex[i];
6106 size_t new_num_cells_per_vertex = 0;
6109 for (
size_t j = 0; j < curr_num_cells_per_vertex; ++j, ++k) {
6111 size_t curr_cell = (*cells)[k];
6112 size_t * curr_cell_vertices =
6117 size_t vertex_idx = 0;
6118 for (; vertex_idx < curr_cell_size; ++vertex_idx)
6119 if (curr_cell_vertices[vertex_idx] == curr_vertex)
break;
6122 if (vertex_idx == curr_cell_size)
continue;
6124 if (total_num_cells != k)
6125 (*cells)[total_num_cells] = curr_cell;
6126 ++new_num_cells_per_vertex;
6130 num_cells_per_vertex[i] = new_num_cells_per_vertex;
6133 *cells =
xrealloc(*cells, total_num_cells *
sizeof(**cells));
6159 size_t * vertices,
size_t count,
size_t ** cells,
6166 size_t * result_cells;
6167 size_t * num_result_per_vertex =
6170 grid_pair, grid_name, vertices,
count,
6171 &result_cells, num_result_per_vertex, field);
6173 size_t max_num_cell_per_vertex = 0;
6174 for (
size_t i = 0; i <
count; ++i)
6175 if (num_result_per_vertex[i] > max_num_cell_per_vertex)
6176 max_num_cell_per_vertex = num_result_per_vertex[i];
6177 size_t (*neigh_vertices)[2] =
6178 xmalloc(max_num_cell_per_vertex *
sizeof(*neigh_vertices));
6179 size_t * temp_vertex_cell =
6180 xmalloc(max_num_cell_per_vertex *
sizeof(*temp_vertex_cell));
6182 xmalloc(max_num_cell_per_vertex *
sizeof(*global_cell_ids));
6185 for (
size_t i = 0, offset = 0, new_offset = 0; i <
count; ++i) {
6187 size_t curr_vertex = vertices[i];
6188 size_t * curr_cells = result_cells + offset;
6189 size_t curr_num_cells_per_vertex = num_result_per_vertex[i];
6192 for (
size_t j = 0; j < curr_num_cells_per_vertex; ++j) {
6194 size_t curr_cell = curr_cells[j];
6195 size_t * curr_cell_vertices =
6199 size_t vertex_idx = 0;
6200 for (; vertex_idx < curr_cell_size; ++vertex_idx)
6201 if (curr_cell_vertices[vertex_idx] == curr_vertex)
break;
6204 neigh_vertices[j][0] =
6205 curr_cell_vertices[((vertex_idx + curr_cell_size) - 1)%curr_cell_size];
6206 neigh_vertices[j][1] =
6207 curr_cell_vertices[(vertex_idx + 1)%curr_cell_size];
6209 if (new_offset != offset) result_cells[new_offset + j] = curr_cell;
6212 offset += curr_num_cells_per_vertex;
6213 curr_cells = result_cells + new_offset;
6216 temp_vertex_cell[0] = curr_cells[0];
6217 size_t start_neigh_vertex = neigh_vertices[0][0];
6218 size_t prev_vertex = neigh_vertices[0][1];
6219 for (
size_t j = 1, prev_cell_idx = 0; j < curr_num_cells_per_vertex; ++j) {
6222 for (k = 0; k < curr_num_cells_per_vertex; ++k) {
6224 if (k == prev_cell_idx)
continue;
6226 int flag = neigh_vertices[k][0] == prev_vertex;
6227 if (flag || (neigh_vertices[k][1] == prev_vertex)) {
6228 temp_vertex_cell[j] = curr_cells[k];
6230 prev_vertex = neigh_vertices[k][flag];
6237 if (k == curr_num_cells_per_vertex) {
6238 curr_num_cells_per_vertex = 0;
6242 if ((prev_vertex != start_neigh_vertex) ||
6243 (curr_num_cells_per_vertex < 3))
6244 curr_num_cells_per_vertex = 0;
6246 new_offset += curr_num_cells_per_vertex;
6247 num_cells_per_vertex[i] = (int)curr_num_cells_per_vertex;
6249 if (curr_num_cells_per_vertex == 0)
continue;
6252 yac_int min_global_cell_id = XT_INT_MAX;
6253 size_t min_global_cell_id_idx = SIZE_MAX;
6254 for (
size_t j = 0; j < curr_num_cells_per_vertex; ++j) {
6256 ((global_cell_ids[j] =
6258 if (curr_global_cell_id < min_global_cell_id) {
6259 min_global_cell_id = curr_global_cell_id;
6260 min_global_cell_id_idx = j;
6267 ((min_global_cell_id_idx + curr_num_cells_per_vertex) - 1)%
6268 curr_num_cells_per_vertex] >
6270 (min_global_cell_id_idx + 1)%curr_num_cells_per_vertex])?-1:1;
6273 for (
size_t j = 0; j < curr_num_cells_per_vertex; ++j)
6276 ((
int)(min_global_cell_id_idx + curr_num_cells_per_vertex) +
6277 (
int)j * order)%(
int)curr_num_cells_per_vertex];
6280 *cells = result_cells;
6281 free(num_result_per_vertex);
6282 free(global_cell_ids);
6283 free(temp_vertex_cell);
6284 free(neigh_vertices);
6289 size_t const * a_ = a, * b_ = b;
6291 return (*a_ > *b_) - (*b_ > *a_);
6296 size_t * vertices,
size_t count,
size_t ** neigh_vertices_,
6302 size_t * result_cells;
6303 size_t * num_result_per_vertex =
6306 grid_pair, grid_name, vertices,
count, &result_cells,
6307 num_result_per_vertex, field);
6309 size_t total_num_neigh = 0;
6310 size_t max_num_neigh = 0;
6311 for (
size_t i = 0; i <
count; ++i) {
6312 total_num_neigh += num_result_per_vertex[i];
6313 if (num_result_per_vertex[i] > max_num_neigh)
6314 max_num_neigh = num_result_per_vertex[i];
6317 int const * vertex_mask = NULL;
6321 size_t * neigh_vertices =
6322 xmalloc(total_num_neigh *
sizeof(*neigh_vertices));
6323 size_t * temp_neigh_vertices =
6324 xmalloc(2 * max_num_neigh *
sizeof(*temp_neigh_vertices));
6325 total_num_neigh = 0;
6328 for (
size_t i = 0, offset = 0; i <
count; ++i) {
6330 size_t curr_vertex = vertices[i];
6331 size_t * curr_cells = result_cells + offset;
6332 size_t curr_num_cells_per_vertex = num_result_per_vertex[i];
6334 size_t curr_num_neigh_vertices = 0;
6337 for (
size_t j = 0; j < curr_num_cells_per_vertex; ++j) {
6339 size_t curr_cell = curr_cells[j];
6340 size_t * curr_cell_vertices =
6345 size_t vertex_idx = 0;
6346 for (; vertex_idx < curr_cell_size; ++vertex_idx)
6347 if (curr_cell_vertices[vertex_idx] == curr_vertex)
break;
6350 size_t neigh_vertex_idx =
6351 curr_cell_vertices[((vertex_idx + curr_cell_size) - 1)%curr_cell_size];
6352 if ((vertex_mask != NULL) && (vertex_mask[neigh_vertex_idx]))
6353 temp_neigh_vertices[curr_num_neigh_vertices++] = neigh_vertex_idx;
6355 curr_cell_vertices[(vertex_idx + 1)%curr_cell_size];
6356 if ((vertex_mask != NULL) && (vertex_mask[neigh_vertex_idx]))
6357 temp_neigh_vertices[curr_num_neigh_vertices++] = neigh_vertex_idx;
6360 qsort(temp_neigh_vertices, curr_num_neigh_vertices,
6363 temp_neigh_vertices, &curr_num_neigh_vertices);
6364 memcpy(neigh_vertices + total_num_neigh, temp_neigh_vertices,
6365 curr_num_neigh_vertices *
sizeof(*neigh_vertices));
6367 total_num_neigh += curr_num_neigh_vertices;
6368 num_neighs_per_vertex[i] = curr_num_neigh_vertices;
6370 offset += curr_num_cells_per_vertex;
6372 free(temp_neigh_vertices);
6375 free(num_result_per_vertex);
6377 *neigh_vertices_ = neigh_vertices;
6382 size_t * vertices,
size_t count,
size_t ** vertex_to_cell,
6383 size_t * num_cells_per_vertex) {
6391 .coordinates_idx = SIZE_MAX,
6392 .masks_idx = SIZE_MAX};
6394 grid_pair, grid_name, vertices, count, vertex_to_cell,
6395 num_cells_per_vertex, dummy_interp_field);
6397 size_t max_num_cells_per_vertex = 0;
6398 for (
size_t i = 0; i < count; ++i)
6399 if (num_cells_per_vertex[i] > max_num_cells_per_vertex)
6400 max_num_cells_per_vertex = num_cells_per_vertex[i];
6403 xmalloc(max_num_cells_per_vertex *
sizeof(*global_id_buffer));
6408 (count == 0) || (global_cell_ids != NULL),
6409 "ERROR(yac_dist_grid_pair_get_corner_cells): no global cell ids")
6412 for (
size_t i = 0, offset = 0; i < count; ++i) {
6414 size_t curr_num_cells_per_vertex = num_cells_per_vertex[i];
6415 size_t * curr_vertex_to_cell = *vertex_to_cell + offset;
6416 offset += curr_num_cells_per_vertex;
6419 for (
size_t j = 0; j < curr_num_cells_per_vertex; ++j)
6420 global_id_buffer[j] = global_cell_ids[curr_vertex_to_cell[j]];
6424 global_id_buffer, curr_num_cells_per_vertex, curr_vertex_to_cell);
6427 free(global_id_buffer);
6432 size_t * cells,
size_t count,
6433 size_t ** vertex_to_cell,
size_t ** vertex_to_cell_offsets_,
6440 size_t * temp_cells =
xmalloc(
count *
sizeof(*temp_cells));
6441 int * required_vertices =
6443 memcpy(temp_cells, cells,
count *
sizeof(*cells));
6445 for (
size_t i = 0, prev_cell = SIZE_MAX; i <
count; ++i) {
6446 size_t curr_cell = temp_cells[i];
6447 if (curr_cell == SIZE_MAX)
break;
6448 if (curr_cell != prev_cell) {
6449 prev_cell = curr_cell;
6451 size_t const * curr_vertices =
6453 for (
size_t j = 0; j < curr_num_vertices; ++j)
6454 required_vertices[curr_vertices[j]] = 1;
6460 size_t num_unique_vertices = 0;
6462 if (required_vertices[i]) ++num_unique_vertices;
6463 size_t * unique_vertices =
6464 xmalloc(num_unique_vertices *
sizeof(*unique_vertices));
6466 if (required_vertices[i]) unique_vertices[j++] = i;
6467 free(required_vertices);
6470 int * num_cells_per_vertex =
6471 xcalloc(num_unique_vertices,
sizeof(*num_cells_per_vertex));
6473 grid_pair, grid_name, unique_vertices, num_unique_vertices,
6474 vertex_to_cell, num_cells_per_vertex, field);
6476 int * grid_num_cells_per_vertex =
6479 sizeof(*grid_num_cells_per_vertex));
6481 for (
size_t i = 0; i < num_unique_vertices; ++i)
6482 grid_num_cells_per_vertex[unique_vertices[i]] =
6483 num_cells_per_vertex[i];
6484 free(num_cells_per_vertex);
6485 free(unique_vertices);
6487 size_t * vertex_to_cell_offsets =
6490 sizeof(*vertex_to_cell_offsets));
6493 vertex_to_cell_offsets[i] = offset;
6494 offset += grid_num_cells_per_vertex[i];
6497 *vertex_to_cell_offsets_ = vertex_to_cell_offsets;
6498 *num_cells_per_vertex_ = grid_num_cells_per_vertex;
6502 size_t **
points,
size_t * reorder_idx,
int * ranks,
size_t count,
6503 enum yac_location location,
size_t local_count,
size_t recv_count,
6506 size_t * sendcounts,
size_t * sdispls,
size_t * recvcounts,
size_t * rdispls,
6507 MPI_Datatype single_remote_point_dt, MPI_Comm
comm,
6516 size_t * old_points = *
points;
6517 size_t * new_points =
6518 xmalloc((local_count + recv_count) *
sizeof(*new_points));
6520 for (
size_t i = 0, j = 0, k = 0; i <
count; ++i) {
6521 size_t idx = reorder_idx[i];
6522 size_t curr_point = old_points[idx];
6523 int curr_rank = ranks[i];
6524 if (curr_rank == comm_rank) {
6525 new_points[j++] = curr_point;
6527 id_send_buffer[k].
global_id = global_ids[curr_point];
6528 id_send_buffer[k].
data.
rank = comm_rank;
6536 id_send_buffer, sendcounts, sdispls,
6537 id_recv_buffer, recvcounts, rdispls,
6538 sizeof(*id_send_buffer), single_remote_point_dt,
comm);
6543 dist_grid, id_recv_buffer, recv_count, location,
6544 new_points + local_count);
6551 double ** weights,
size_t * reorder_idx,
int * ranks,
size_t count,
6552 size_t send_count,
size_t local_count,
size_t recv_count,
6553 size_t * sendcounts,
size_t * sdispls,
size_t * recvcounts,
size_t * rdispls,
6559 double * old_weights = *weights;
6560 double * send_buffer =
xmalloc(send_count *
sizeof(*send_buffer));
6561 double * recv_buffer =
6562 xmalloc((local_count + recv_count) *
sizeof(*recv_buffer));
6564 for (
size_t i = 0, j = 0, k = 0; i <
count; ++i) {
6565 size_t idx = reorder_idx[i];
6566 double curr_weight = old_weights[idx];
6567 int curr_rank = ranks[i];
6568 if (curr_rank == comm_rank) recv_buffer[j++] = curr_weight;
6569 else send_buffer[k++] = curr_weight;
6574 send_buffer, sendcounts, sdispls,
6575 recv_buffer + local_count, recvcounts, rdispls,
6576 sizeof(*send_buffer), MPI_DOUBLE,
comm);
6579 *weights = recv_buffer;
6586 size_t * vertices,
size_t count,
int * ranks) {
6588 size_t * reorder_idx =
xmalloc(
count *
sizeof(*reorder_idx));
6589 for (
size_t i = 0; i <
count; ++i) reorder_idx[i] = i;
6593 for (valid_count = 0;
6594 (valid_count <
count) && (vertices[valid_count] != SIZE_MAX);
6597 for (
size_t i = valid_count; i <
count; ++i) ranks[reorder_idx[i]] = 0;
6599 size_t unique_count = 0;
6600 size_t prev_vertex = SIZE_MAX;
6601 for (
size_t i = 0; i < valid_count; ++i) {
6602 size_t curr_vertex = vertices[i];
6603 if (curr_vertex != prev_vertex) {
6604 prev_vertex = curr_vertex;
6611 xmalloc(unique_count *
sizeof(*search_coords));
6613 prev_vertex = SIZE_MAX;
6614 for (
size_t i = 0, j = 0; i < valid_count; ++i) {
6615 size_t curr_vertex = vertices[i];
6616 if (curr_vertex != prev_vertex) {
6617 prev_vertex = curr_vertex;
6619 search_coords[j++], grid_coords[curr_vertex], 3 *
sizeof(
double));
6623 int * temp_ranks =
xmalloc(unique_count *
sizeof(*temp_ranks));
6625 proc_sphere_part, search_coords, unique_count, temp_ranks);
6627 prev_vertex = SIZE_MAX;
6628 for (
size_t i = 0, j = 0; i < valid_count; ++i) {
6629 size_t curr_vertex = vertices[i];
6630 if (curr_vertex != prev_vertex) {
6631 prev_vertex = curr_vertex;
6634 ranks[reorder_idx[i]] = temp_ranks[j-1];
6639 free(search_coords);
6646 size_t * indices,
size_t count,
int * ranks,
6647 size_t (*get_ce_reference_vertex)(
struct yac_dist_grid *,
size_t)) {
6649 size_t * reorder_idx =
xmalloc(
count *
sizeof(*reorder_idx));
6650 for (
size_t i = 0; i <
count; ++i) reorder_idx[i] = i;
6653 size_t unique_count = 0;
6654 size_t prev_index = SIZE_MAX;
6655 for (
size_t i = 0; i <
count; ++i) {
6656 size_t curr_index = indices[i];
6657 if (curr_index != prev_index) {
6658 prev_index = curr_index;
6663 size_t * ref_vertices =
xmalloc(unique_count *
sizeof(*ref_vertices));
6664 prev_index = SIZE_MAX;
6665 for (
size_t i = 0, j = 0; i <
count; ++i) {
6666 size_t curr_index = indices[i];
6667 if (curr_index != prev_index) {
6668 prev_index = curr_index;
6670 get_ce_reference_vertex(dist_grid, curr_index);
6674 int * temp_ranks =
xmalloc(unique_count *
sizeof(*temp_ranks));
6676 dist_grid, proc_sphere_part, ref_vertices, unique_count, temp_ranks);
6679 prev_index = SIZE_MAX;
6680 for (
size_t i = 0, j = 0; i <
count; ++i) {
6681 size_t curr_index = indices[i];
6682 if (curr_index != prev_index) {
6683 prev_index = curr_index;
6686 ranks[reorder_idx[i]] = temp_ranks[j-1];
6697 size_t * cells,
size_t count,
int * ranks) {
6700 dist_grid, proc_sphere_part, cells,
count, ranks,
6707 size_t * edges,
size_t count,
int * ranks) {
6710 dist_grid, proc_sphere_part, edges,
count, ranks,
6725 "ERROR(yac_dist_grid_pair_determine_dist_owner): invalid location")
6727 void (*determine_dist_owner[3])(
6730 size_t * cells,
size_t count,
int * ranks) =
6734 determine_dist_owner[location](
6751 "ERROR(yac_dist_grid_pair_determine_orig_owner): invalid location");
6752 orig_owners = dist_grid->
owners[location];
6754 for (
size_t i = 0; i <
count; ++i) {
6759 int curr_count = curr_orig_owner->
count;
6760 if (curr_count == 1) {
6764 for (
int j = 1; j < curr_count; ++j) {
6766 if (min_rank > curr_rank) min_rank = curr_rank;
6769 ranks[i] = min_rank;
6775 char const * grid_name_a,
size_t ** points_a,
enum yac_location location_a,
6776 char const * grid_name_b,
size_t ** points_b,
enum yac_location location_b,
6777 double ** weights,
size_t *
count) {
6779 size_t count_ = *
count;
6781 MPI_Comm comm = grid_pair->
comm;
6782 int comm_rank, comm_size;
6787 int weight_flag_local =
6788 (count_ > 0) && (weights != NULL) && (*weights != NULL);
6790 yac_mpi_call(MPI_Allreduce(&weight_flag_local, &weight_flag, 1,
6791 MPI_INT, MPI_MAX, comm), comm);
6795 (count_ <= 0) || weight_flag_local || !weight_flag,
6796 "ERROR(yac_dist_grid_pair_relocate_point_pairs): weights")
6799 int * ranks =
xmalloc(count_ *
sizeof(ranks));
6800 size_t * reorder_idx =
xmalloc(count_ *
sizeof(reorder_idx));
6802 char const * grid_name = (a_is_ref)?grid_name_a:grid_name_b;
6803 size_t *
points = (a_is_ref)?*points_a:*points_b;
6804 enum yac_location location = (a_is_ref)?location_a:location_b;
6807 grid_pair, grid_name,
points, count_, location, ranks);
6810 grid_pair, grid_name,
points, count_, location, ranks);
6812 for (
size_t i = 0; i < count_; ++i) reorder_idx[i] = i;
6815 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
6817 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
6818 for (
size_t i = 0; i < count_; ++i) sendcounts[ranks[i]]++;
6820 size_t local_count = sendcounts[comm_rank];
6821 sendcounts[comm_rank] = 0;
6824 1, sendcounts, recvcounts, sdispls, rdispls, comm);
6826 size_t send_count = sdispls[comm_size] + sendcounts[comm_size-1];
6827 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
6830 xmalloc((send_count + recv_count) *
6831 sizeof(*single_remote_point_buffer));
6836 MPI_Datatype single_remote_point_dt =
6840 points_a, reorder_idx, ranks, count_, location_a,
6841 local_count, recv_count, id_send_buffer, id_recv_buffer,
6842 sendcounts, sdispls + 1, recvcounts, rdispls,
6843 single_remote_point_dt, comm,
6846 points_b, reorder_idx, ranks, count_, location_b,
6847 local_count, recv_count, id_send_buffer, id_recv_buffer,
6848 sendcounts, sdispls + 1, recvcounts, rdispls,
6849 single_remote_point_dt, comm,
6851 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
6855 weights, reorder_idx, ranks, count_,
6856 send_count, local_count, recv_count,
6857 sendcounts, sdispls + 1, recvcounts, rdispls, comm);
6860 free(single_remote_point_buffer);
6864 *count = local_count + recv_count;
struct yac_field_data * yac_basic_grid_get_field_data(struct yac_basic_grid *grid, enum yac_location location)
struct yac_basic_grid_data * yac_basic_grid_get_data(struct yac_basic_grid *grid)
char const * yac_basic_grid_get_name(struct yac_basic_grid *grid)
void yac_get_cell_bounding_circle(struct grid_cell cell, struct bounding_circle *bnd_circle)
int yac_extents_overlap(struct bounding_circle *extent_a, struct bounding_circle *extent_b)
int yac_point_in_cell2(double point_coords[3], struct grid_cell cell, struct bounding_circle bnd_circle)
static void yac_remote_point_infos_get_pack_sizes(struct remote_point_infos **infos, size_t count, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_n_ids_reorder_ids(const void *a, const void *b)
static void generate_xmap_pack_remote_point_info(void *data, size_t count, size_t *sdispls, int *dst_pos_buffer, int *send_pos_buffer)
void yac_dist_grid_pair_do_point_search_gc(struct yac_dist_grid_pair *grid_pair, char const *grid_name, yac_coordinate_pointer search_coords, size_t count, size_t *cells)
void yac_dist_grid_pair_delete(struct yac_dist_grid_pair *grid_pair)
yac_const_coordinate_pointer yac_dist_grid_get_field_coords(struct yac_dist_grid *dist_grid, struct yac_interp_field field)
void yac_dist_grid_determine_dist_ce_owner(struct yac_dist_grid *dist_grid, struct proc_sphere_part_node *proc_sphere_part, size_t *indices, size_t count, int *ranks, size_t(*get_ce_reference_vertex)(struct yac_dist_grid *, size_t))
static int get_pack_size_base_cell(struct yac_field_data *cell_field_data, MPI_Datatype bnd_circle_dt, MPI_Comm comm)
static int compare_single_remote_point_reorder_global_id(const void *a, const void *b)
static void lookup_remote_points(yac_int *requested_ids, struct remote_point_infos **results, size_t *reorder_buffer, size_t request_count, struct remote_points *local_remote_data)
static void generate_sorted_ids(yac_int *ids, size_t count, yac_int **sorted_ids, size_t **reorder_idx)
static void unpack_global_id(void *buffer, int buffer_size, int *position, yac_int *global_id, MPI_Comm comm)
size_t yac_dist_grid_get_unmasked_local_count(struct yac_dist_grid *dist_grid, struct yac_interp_field field)
static struct point_sphere_part_search * yac_dist_grid_get_field_sphere_part(struct yac_dist_grid *dist_grid, struct yac_interp_field field)
void yac_const_basic_grid_data_get_grid_cell(struct yac_const_basic_grid_data *grid_data, size_t cell_idx, struct grid_cell *buffer_cell)
static void pack_grid_data_vertex(struct yac_dist_grid *dist_grid, size_t idx, void *buffer, int buffer_size, int *position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
static void ensure_temp_field_data_sizes(struct temp_field_data *temp_field_data, size_t size)
void yac_remote_point_pack(struct remote_point *point, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_dist_grid_get_cell_neighbours(struct yac_dist_grid *dist_grid, struct proc_sphere_part_node *proc_sphere_part, size_t *cells, size_t count, size_t *neighbours)
static int get_pack_size_base_edge(struct yac_field_data *edge_field_data, MPI_Comm comm)
static MPI_Datatype yac_get_id_reorder_coord_coord_mpi_datatype(MPI_Comm comm)
void yac_dist_grid_pair_determine_dist_owner(struct yac_dist_grid_pair *grid_pair, char const *grid_name, size_t *points, size_t count, enum yac_location location, int *ranks)
void yac_dist_grid_get_local_unmasked_points(struct yac_dist_grid *dist_grid, struct yac_interp_field field, size_t **indices, size_t *count)
void yac_dist_grid_pair_do_point_search(struct yac_dist_grid_pair *grid_pair, char const *grid_name, yac_coordinate_pointer search_coords, size_t count, size_t *cells)
size_t yac_dist_grid_get_local_count(struct yac_dist_grid *dist_grid, enum yac_location location)
static void generate_ce_ids(struct proc_sphere_part_node *proc_sphere_part, struct yac_basic_grid_data *grid, int *vertex_ranks, MPI_Comm comm)
void yac_remote_point_unpack(void *buffer, int buffer_size, int *position, struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
static void insert_nnn_result_points(struct nnn_search_result *array, size_t array_size, struct nnn_search_result *insert, size_t insert_size)
static int yac_remote_point_infos_get_pack_size(struct remote_point_infos const *infos, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_size_t(const void *a, const void *b)
static void temp_field_data_free(struct temp_field_data temp_field_data)
void yac_dist_grid_pair_get_vertex_neighbours(struct yac_dist_grid_pair *grid_pair, char const *grid_name, size_t *vertices, size_t count, size_t **neigh_vertices_, int *num_neighs_per_vertex, struct yac_interp_field field)
static struct yac_field_data * field_data_init(struct yac_field_data *orig_field_data, size_t dist_size, Xt_redist redist_mask, Xt_redist redist_coords, MPI_Comm comm)
static struct remote_points ** generate_dist_remote_points(struct proc_sphere_part_node *proc_sphere_part, struct yac_basic_grid_data *grid, MPI_Comm comm)
static struct yac_dist_grid generate_dist_grid(struct proc_sphere_part_node *proc_sphere_part, struct yac_basic_grid *grid, MPI_Comm comm)
static int compare_id_reorder_coord_coord(const void *a, const void *b)
static int coord_in_cell(double coord[3], struct yac_dist_grid *dist_grid, size_t cell_idx, struct grid_cell *buffer_cell)
static int compare_single_remote_point_reorder_reorder_idx(const void *a, const void *b)
void yac_dist_grid_determine_dist_cell_owner(struct yac_dist_grid *dist_grid, struct proc_sphere_part_node *proc_sphere_part, size_t *cells, size_t count, int *ranks)
static void unpack_field_data(void *buffer, int buffer_size, int *position, size_t idx, struct temp_field_data temp_field_data, MPI_Comm comm)
static void dist_grid_pair_do_point_search_local(struct yac_dist_grid_pair *grid_pair, char const *grid_name, yac_coordinate_pointer search_coords, size_t count, size_t *cells, int(*coord_in_cell)(double coord[3], struct yac_dist_grid *dist_grid, size_t cell_idx, struct grid_cell *buffer_cell))
static yac_size_t_2_pointer generate_edge_to_cell(const_size_t_pointer cell_to_edge, const_int_pointer num_edges_per_cell, int *core_cell_mask, size_t num_cells, size_t num_edges)
static int get_pack_size_field_data(struct yac_field_data *field_data, MPI_Comm comm)
static struct remote_point_infos copy_remote_point_infos(struct remote_point_infos point_infos)
static MPI_Datatype yac_get_coordinate_mpi_datatype(MPI_Comm comm)
static int compute_bucket(yac_int value, int comm_size)
static size_t yac_dist_grid_get_count(struct yac_dist_grid *dist_grid, enum yac_location location)
static void add_field_data(struct yac_field_data *field_data, struct temp_field_data temp_field_data, void *reorder_idx, size_t reorder_idx_size, size_t old_count, size_t new_count)
static int get_single_remote_point_pack_size(MPI_Datatype single_remote_point_dt, MPI_Comm comm)
static void get_pack_sizes_edge(struct yac_dist_grid *dist_grid, uint64_t *pos, size_t count, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_remote_point_infos_unpack(void *buffer, int buffer_size, int *position, struct remote_point_infos *infos, MPI_Datatype point_info_dt, MPI_Comm comm)
static void generate_xmap_set_sendcounts_remote_points(void *data, size_t count, size_t *sendcounts)
struct yac_dist_grid * yac_dist_grid_pair_get_dist_grid(struct yac_dist_grid_pair *grid_pair, char const *grid_name)
static struct remote_point_infos * generate_remote_point_infos(yac_int *sorted_ids, size_t *reorder_idx, size_t count, struct remote_points *points)
static int compare_single_remote_point_reorder_owner(const void *a, const void *b)
static int * determine_edge_core_mask(struct yac_dist_grid *dist_grid, int *vertex_owner_mask)
static void insert_global_id(yac_int *ids, size_t n, yac_int id)
static void generate_dist_grid_remote_point_infos(struct yac_dist_grid *dist_grid, struct proc_sphere_part_node *proc_sphere_part, struct remote_points **dist_owner)
static void get_vertex_cells(void *grid, size_t vertex_idx, size_t **cells, int *num_cells)
static int * determine_cell_core_mask(struct yac_dist_grid *dist_grid, int is_root, int *vertex_owner_mask)
static void get_pack_sizes_vertex(struct yac_dist_grid *dist_grid, uint64_t *pos, size_t count, int *pack_sizes, MPI_Datatype point_info_dt, MPI_Comm comm)
static void relocate_weights(double **weights, size_t *reorder_idx, int *ranks, size_t count, size_t send_count, size_t local_count, size_t recv_count, size_t *sendcounts, size_t *sdispls, size_t *recvcounts, size_t *rdispls, MPI_Comm comm)
static void get_grid_cells(struct yac_basic_grid *grid, struct dist_cell *cells, MPI_Comm comm)
static void setup_search_data(struct yac_dist_grid_pair *dist_grid_pair)
static void generate_global_ids(struct proc_sphere_part_node *proc_sphere_part, struct yac_basic_grid_data *grid, MPI_Comm comm)
static struct temp_field_data temp_field_data_init(struct yac_field_data *field_data, size_t count)
static void get_ve_ranks(int **rank_buffer, size_t *rank_buffer_size, size_t *rank_buffer_array_size, int *num_ve_ranks, int *core_mask, size_t count, void(*get_cells)(void *ve_to_cell_data, size_t idx, size_t **cells, int *num_cells), void *ve_to_cell_data, int *num_cell_ranks, size_t *dist_cell_rank_offsets)
struct yac_dist_grid_pair * yac_dist_grid_pair_new(struct yac_basic_grid *grid_a, struct yac_basic_grid *grid_b, MPI_Comm comm)
int const * yac_dist_grid_get_field_mask(struct yac_dist_grid *dist_grid, struct yac_interp_field field)
static void relocate_points(size_t **points, size_t *reorder_idx, int *ranks, size_t count, enum yac_location location, size_t local_count, size_t recv_count, struct single_remote_point *id_send_buffer, struct single_remote_point *id_recv_buffer, size_t *sendcounts, size_t *sdispls, size_t *recvcounts, size_t *rdispls, MPI_Datatype single_remote_point_dt, MPI_Comm comm, struct yac_dist_grid *dist_grid)
static void get_pack_sizes_cell(struct yac_dist_grid *dist_grid, uint64_t *pos, size_t count, int *pack_sizes, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_single_remote_point_global_id(const void *a, const void *b)
static void yac_dist_grid_add_vertices(struct yac_dist_grid *dist_grid, struct global_vertex_reorder *vertices, size_t count, size_t *idx, struct temp_field_data temp_vertex_field_data)
static int get_global_id_pack_size(MPI_Comm comm)
void yac_remote_points_pack(struct remote_points *points, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_dist_grid_determine_dist_vertex_owner(struct yac_dist_grid *dist_grid, struct proc_sphere_part_node *proc_sphere_part, size_t *vertices, size_t count, int *ranks)
static int * generate_vertex_ids(struct proc_sphere_part_node *proc_sphere_part, struct yac_basic_grid_data *grid, MPI_Comm comm)
void yac_remote_points_unpack(void *buffer, int buffer_size, int *position, struct remote_points **points, MPI_Datatype point_info_dt, MPI_Comm comm)
static MPI_Datatype yac_get_n_single_remote_point_reorder_mpi_datatype(int n, MPI_Comm comm)
static void unpack_grid_data_edge(struct global_edge_reorder *edge, size_t idx, void *buffer, int buffer_size, int *position, struct temp_field_data temp_edge_field_data, MPI_Datatype point_info_dt, MPI_Comm comm)
static int get_pack_size_base_vertex(struct yac_field_data *vertex_field_data, MPI_Comm comm)
int yac_remote_points_get_pack_size(struct remote_points *points, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_dist_grid_get_n_unmasked_local_points(struct yac_dist_grid *dist_grid, struct yac_interp_field field, int comm_rank, size_t n, struct single_remote_point *points)
static void generate_core_masks(struct yac_dist_grid *dist_grid, struct proc_sphere_part_node *proc_sphere_part, int comm_rank)
struct yac_dist_grid_pair * yac_dist_grid_pair_new_f2c(struct yac_basic_grid *grid_a, struct yac_basic_grid *grid_b, MPI_Fint comm)
static int coord_in_cell_gc(double coord[3], struct yac_dist_grid *dist_grid, size_t cell_idx, struct grid_cell *buffer_cell)
static void compute_basic_ve_data(size_t max_num_cell_ve, size_t num_cells, int *num_ve_per_cell, struct single_remote_point_reorder *cell_ve_point_data, yac_int **ids, size_t *num_ids, size_t **cell_to_ve, Xt_xmap *xmap, MPI_Comm comm)
static size_t get_cell_reference_vertex(struct yac_dist_grid *dist_grid, size_t cell_idx)
static void yac_single_remote_point_unpack(void *buffer, int buffer_size, int *position, struct single_remote_point *point, MPI_Datatype single_remote_point_dt, MPI_Comm comm)
void yac_dist_grid_pair_get_cell_neighbours(struct yac_dist_grid_pair *grid_pair, char const *grid_name, size_t *cells, size_t count, size_t *neighbours)
MPI_Datatype yac_get_remote_point_info_mpi_datatype(MPI_Comm comm)
static void yac_remote_point_infos_single_free(struct remote_point_infos *point_infos)
void yac_dist_grid_determine_dist_edge_owner(struct yac_dist_grid *dist_grid, struct proc_sphere_part_node *proc_sphere_part, size_t *edges, size_t count, int *ranks)
static void get_dist_vertex_cells(struct yac_dist_grid_pair *grid_pair, char const *grid_name, size_t *vertices, size_t count, size_t **cells, size_t *num_cells_per_vertex, struct yac_interp_field field)
struct remote_point * yac_dist_grid_get_remote_points(struct yac_dist_grid *dist_grid, enum yac_location location, size_t *points, size_t count)
struct yac_const_basic_grid_data * yac_dist_grid_get_basic_grid_data(struct yac_dist_grid *dist_grid)
void yac_dist_grid_pair_relocate_point_pairs(struct yac_dist_grid_pair *grid_pair, int a_is_ref, int to_dist_owner, char const *grid_name_a, size_t **points_a, enum yac_location location_a, char const *grid_name_b, size_t **points_b, enum yac_location location_b, double **weights, size_t *count)
static MPI_Datatype yac_get_id_reorder_coord_id_mpi_datatype(MPI_Comm comm)
static int const * yac_dist_grid_get_core_mask(struct yac_dist_grid *dist_grid, enum yac_location location)
static void pack_grid_data_edge(struct yac_dist_grid *dist_grid, size_t idx, void *buffer, int buffer_size, int *position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
static int compare_remote_point(const void *a, const void *b)
static size_t get_edge_reference_vertex(struct yac_dist_grid *dist_grid, size_t edge_idx)
static void insert_rank(int *ranks, int *count, int rank)
static void yac_remote_point_infos_pack_single(struct remote_point_infos const *infos, void *buffer, int buffer_size, int *position, MPI_Datatype point_info_dt, MPI_Comm comm)
static void pack_field_data(size_t idx, void *buffer, int buffer_size, int *position, struct yac_field_data *field_data, MPI_Comm comm)
static void yac_dist_grid_add_cells(struct yac_dist_grid *dist_grid, yac_int *cell_ids, int *num_vertices_per_cell, struct bounding_circle *cell_bnd_circles, size_t count, size_t *cell_to_vertex, size_t *cell_to_edge, struct remote_point_infos *cell_owners, struct temp_field_data temp_cell_field_data)
void yac_dist_grid_pair_do_bnd_circle_search(struct yac_dist_grid_pair *grid_pair, char const *grid_name, const_bounding_circle_pointer bnd_circles, size_t count, size_t **cells, size_t *num_results_per_bnd_circle, struct yac_interp_field field)
static void get_edge_cells(void *edge_to_cell, size_t edge_idx, size_t **cells, int *num_cells)
void yac_dist_grid_pair_do_cell_search(struct yac_dist_grid_pair *grid_pair, char const *search_grid_name, char const *result_grid_name, size_t *search_cells, size_t count, size_t **result_cells, size_t *num_results_per_search_cell, struct yac_interp_field result_field)
static int compare_n_ids_reorder_reorder(const void *a, const void *b)
static MPI_Datatype yac_get_id_pos_mpi_datatype(MPI_Comm comm)
static void pack_grid_data(struct yac_dist_grid *dist_grid, enum yac_location location, uint64_t *pos, size_t count, void **pack_data, int *pack_sizes, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
static void determine_dist_cell_ranks(struct proc_sphere_part_node *proc_sphere_part, struct yac_basic_grid_data *grid, MPI_Comm comm, int **dist_cell_ranks, int *dist_cell_rank_counts, size_t *dist_cell_rank_offsets)
static void yac_remote_point_infos_unpack_info_buffer(void *buffer, int buffer_size, int *position, struct remote_point_info *info_buffer, size_t *info_buffer_position, struct remote_point_infos *infos, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_remote_point_unpack_info_buffer(void *buffer, int buffer_size, int *position, struct remote_point_info *info_buffer, size_t *info_buffer_position, struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
MPI_Comm yac_dist_grid_pair_get_MPI_Comm(struct yac_dist_grid_pair *grid_pair)
static void pack_global_id(yac_int global_id, void *buffer, int buffer_size, int *position, MPI_Comm comm)
static void yac_dist_grid_pair_get_aux_grid_cells(struct yac_dist_grid_pair *grid_pair, char const *grid_name, size_t *vertices, size_t count, size_t **cells, int *num_cells_per_vertex, struct yac_interp_field field)
static int compare_global_vertex_reorder_global_id(const void *a, const void *b)
static yac_int const * yac_dist_grid_get_global_ids(struct yac_dist_grid *dist_grid, enum yac_location location)
static Xt_xmap generate_xmap_data(void *data, size_t count, MPI_Comm comm, void(*set_sendcounts)(void *, size_t, size_t *), void(*pack)(void *, size_t, size_t *, int *, int *))
static void pack_grid_data_cell(struct yac_dist_grid *dist_grid, size_t idx, void *buffer, int buffer_size, int *position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
static void unpack_grid_data_vertex(struct global_vertex_reorder *vertex, size_t idx, void *buffer, int buffer_size, int *position, struct temp_field_data temp_vertex_field_data, MPI_Datatype point_info_dt, MPI_Comm comm)
static void unpack_grid_data(struct yac_dist_grid *dist_grid, enum yac_location location, size_t count, void *buffer, int buffer_size, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_dist_grid_global_to_local(struct yac_dist_grid *dist_grid, enum yac_location location, yac_int *global_ids, size_t count, size_t *local_ids)
static void unpack_grid_data_vertices(struct yac_dist_grid *dist_grid, size_t count, void *buffer, int buffer_size, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_remote_point_infos_free(struct remote_point_infos *point_infos, size_t count)
static struct proc_sphere_part_node * generate_dist_grid_decomposition(struct yac_basic_grid *grid_a, struct yac_basic_grid *grid_b, MPI_Comm comm)
static int compare_id_reorder_coord_reorder_idx(const void *a, const void *b)
static int compare_nnn_search_results_cos_angle(void const *a, void const *b)
static void yac_remote_point_infos_pack(struct remote_point_infos **infos, int *pack_sizes, size_t count, void *buffer, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_dist_grid_pair_determine_orig_owner(struct yac_dist_grid_pair *grid_pair, char const *grid_name, size_t *points, size_t count, enum yac_location location, int *ranks)
void yac_dist_grid_pair_get_aux_grid(struct yac_dist_grid_pair *grid_pair, char const *grid_name, size_t *cells, size_t count, size_t **vertex_to_cell, size_t **vertex_to_cell_offsets_, int **num_cells_per_vertex_, struct yac_interp_field field)
static MPI_Datatype yac_get_nnn_search_result_mpi_datatype(size_t n, MPI_Comm comm)
static void generate_xmap_pack_remote_points(void *data, size_t count, size_t *sdispls, int *dst_pos_buffer, int *send_pos_buffer)
static void unpack_grid_data_cells(struct yac_dist_grid *dist_grid, size_t count, void *buffer, int buffer_size, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_dist_grid_pair_do_point_search_(struct yac_dist_grid_pair *grid_pair, char const *grid_name, yac_coordinate_pointer search_coords, size_t count, size_t *cells, int(*coord_in_cell)(double coord[3], struct yac_dist_grid *dist_grid, size_t cell_idx, struct grid_cell *buffer_cell))
static void yac_dist_grid_add_edges(struct yac_dist_grid *dist_grid, struct global_edge_reorder *edges, size_t count, size_t *idx, struct temp_field_data temp_edge_field_data)
static void yac_dist_grid_single_remote_point_to_local(struct yac_dist_grid *dist_grid, struct single_remote_point *ids, size_t count, enum yac_location location, size_t *idx)
static struct bounding_circle compute_edge_bnd_circle(struct yac_dist_grid *dist_grid, size_t edge_id)
void yac_dist_grid_pair_get_corner_cells(struct yac_dist_grid_pair *grid_pair, char const *grid_name, size_t *vertices, size_t count, size_t **vertex_to_cell, size_t *num_cells_per_vertex)
static int compare_global_edge_reorder_global_id(const void *a, const void *b)
static void id2idx(yac_int *ids, size_t *idx, size_t num_ids, yac_int *ref_sorted_ids, size_t num_sorted_ids)
static Xt_xmap generate_xmap_from_remote_point_info(struct remote_point_info *point_infos, size_t count, MPI_Comm comm)
static Xt_xmap generate_xmap_from_remote_points(struct remote_point *remote_point_data, size_t count, MPI_Comm comm)
int yac_remote_point_get_pack_size(struct remote_point *point, MPI_Datatype point_info_dt, MPI_Comm comm)
void yac_dist_grid_pair_do_nnn_search(struct yac_dist_grid_pair *grid_pair, char const *grid_name, yac_coordinate_pointer search_coords, size_t count, size_t *local_ids, size_t n, struct yac_interp_field field)
static void unpack_grid_data_edges(struct yac_dist_grid *dist_grid, size_t count, void *buffer, int buffer_size, MPI_Datatype point_info_dt, MPI_Comm comm)
static void generate_xmap_set_sendcounts_remote_point_info(void *data, size_t count, size_t *sendcounts)
static struct yac_field_data * yac_dist_grid_get_field_data(struct yac_dist_grid *dist_grid, enum yac_location location)
static void get_pack_sizes(struct yac_dist_grid *dist_grid, enum yac_location location, uint64_t *pos, size_t count, int *pack_sizes, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_single_remote_point_pack(struct single_remote_point *point, void *buffer, int buffer_size, int *position, MPI_Datatype single_remote_point_dt, MPI_Comm comm)
static MPI_Datatype yac_get_single_remote_point_mpi_datatype(MPI_Comm comm)
static struct bnd_sphere_part_search * dist_grid_pair_get_cell_sphere_part(struct yac_dist_grid_pair *grid_pair, char const *grid_name)
static void pack_grid_data_edge_(struct yac_dist_grid *dist_grid, size_t idx, void *buffer, int buffer_size, int *position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm)
static void yac_dist_grid_free(struct yac_dist_grid grid)
static void lookup_single_remote_point_reorder_locally(struct yac_dist_grid *dist_grid, enum yac_location location, struct single_remote_point_reorder *ids, size_t *count, size_t *idx)
struct bounding_circle const *const const_bounding_circle_pointer
size_t const *const const_size_t_pointer
int const *const const_int_pointer
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
size_t yac_field_data_get_masks_count(struct yac_field_data *field_data)
void yac_field_data_set_mask_data(struct yac_field_data *field_data, size_t mask_idx, int *mask_data)
size_t yac_field_data_get_coordinates_count(struct yac_field_data *field_data)
yac_const_coordinate_pointer yac_field_data_get_coordinates_data(struct yac_field_data *field_data, size_t coordinates_idx)
int const * yac_field_data_get_mask_data(struct yac_field_data *field_data, size_t mask_idx)
char const * yac_field_data_get_mask_name(struct yac_field_data *field_data, size_t mask_idx)
void yac_field_data_set_coordinates_data(struct yac_field_data *field_data, size_t coordinates_idx, yac_coordinate_pointer coordinates_data)
size_t yac_field_data_add_mask_nocpy(struct yac_field_data *field_data, int const *mask, char const *mask_name)
struct yac_field_data * yac_field_data_empty_new()
size_t yac_field_data_add_coordinates_nocpy(struct yac_field_data *field_data, yac_coordinate_pointer coordinates)
struct yac_field_data_set * yac_field_data_set_new(struct yac_field_data *cell_field_data, struct yac_field_data *vertex_field_data, struct yac_field_data *edge_field_data)
void yac_field_data_set_delete(struct yac_field_data_set *field_data_set)
struct yac_field_data * yac_field_data_set_get_field_data(struct yac_field_data_set *field_data_set, enum yac_location location)
static struct sin_cos_angle get_vector_angle_2(double const a[3], double const b[3])
static int compare_coords(double const *a, double const *b)
static const struct sin_cos_angle SIN_COS_ZERO
static void normalise_vector(double v[])
static struct sin_cos_angle half_angle(struct sin_cos_angle angle)
void yac_init_grid_cell(struct grid_cell *cell)
void yac_free_grid_cell(struct grid_cell *cell)
@ GREAT_CIRCLE_EDGE
great circle
#define xrealloc(ptr, size)
#define xcalloc(nmemb, size)
void yac_proc_sphere_part_get_neigh_ranks(struct proc_sphere_part_node *node, uint64_t *leaf_sizes, uint64_t min_size, int *send_flags, int *recv_flags, int comm_rank, int comm_size)
void yac_proc_sphere_part_do_bnd_circle_search(struct proc_sphere_part_node *node, struct bounding_circle bnd_circle, int *ranks, int *rank_count)
void yac_proc_sphere_part_do_point_search(struct proc_sphere_part_node *node, yac_coordinate_pointer search_coords, size_t count, int *ranks)
void yac_proc_sphere_part_node_delete(struct proc_sphere_part_node *node)
struct proc_sphere_part_node * yac_redistribute_cells(struct dist_cell **cells, size_t *num_cells, MPI_Comm comm)
void yac_bnd_sphere_part_search_do_bnd_circle_search(struct bnd_sphere_part_search *search, struct bounding_circle *bnd_circles, size_t count, size_t **cells, size_t *num_cells_per_bnd_circle)
struct bnd_sphere_part_search * yac_bnd_sphere_part_search_new(struct bounding_circle *circles, size_t num_circles)
void yac_delete_point_sphere_part_search(struct point_sphere_part_search *search)
struct point_sphere_part_search * yac_point_sphere_part_search_mask_new(size_t num_points, yac_const_coordinate_pointer coordinates_xyz, yac_int const *ids, int const *mask)
void yac_bnd_sphere_part_search_delete(struct bnd_sphere_part_search *search)
struct point_sphere_part_search * yac_point_sphere_part_search_new(size_t num_points, yac_const_coordinate_pointer coordinates_xyz, yac_int const *ids)
void yac_bnd_sphere_part_search_do_point_search(struct bnd_sphere_part_search *search, yac_coordinate_pointer coordinates_xyz, size_t count, size_t **cells, size_t *num_cells_per_coordinate)
void yac_point_sphere_part_search_NNN(struct point_sphere_part_search *search, size_t num_points, double(*coordinates_xyz)[3], size_t n, double **cos_angles, size_t *cos_angles_array_size, double(**result_coordinates_xyz)[3], size_t *result_coordinates_xyz_array_size, size_t **local_point_ids, size_t *local_point_ids_array_size, size_t *num_local_point_ids)
algorithm for searching cells and points on a grid
struct sin_cos_angle inc_angle
angle between the middle point and the boundary of the spherical cap
enum yac_edge_type edge_type
struct remote_point_infos owners
yac_int edge_to_vertex[2]
struct remote_point_infos owners
double(* coordinates_xyz)[3]
enum yac_edge_type * edge_type
struct missing_edge_neighbour::@0 cell
struct missing_edge_neighbour::@0 edge
struct single_remote_point point_info
struct remote_point_info single
struct remote_point_info * multi
union remote_point_infos::@1 data
struct remote_point_infos data
struct remote_point_info buffer[]
struct remote_point * data
struct single_remote_point data
struct remote_point_info data
yac_coordinate_pointer * coordinates
size_t * masks_array_sizes
size_t * coordinates_array_sizes
yac_coordinate_pointer vertex_coordinates
size_t * cell_to_edge_offsets
yac_size_t_2_pointer edge_to_vertex
enum yac_edge_type * edge_type
size_t * cell_to_vertex_offsets
int * num_vertices_per_cell
const_int_pointer num_vertices_per_cell
const_size_t_pointer cell_to_vertex_offsets
const_size_t_pointer cell_to_vertex
const_size_t_pointer cell_to_edge
const_size_t_pointer cell_to_edge_offsets
const_yac_edge_type_pointer edge_type
yac_const_coordinate_pointer vertex_coordinates
struct point_sphere_part_search * vertex_sphere_part[2]
struct proc_sphere_part_node * proc_sphere_part
struct yac_dist_grid dist_grid[2]
struct bnd_sphere_part_search * cell_sphere_part[2]
size_t * sorted_reorder_idx[3]
struct yac_field_data_set * field_data
struct bounding_circle * cell_bnd_circles
struct remote_point_infos * owners[3]
enum yac_edge_type * edge_type
yac_coordinate_pointer vertex_coordinates
size_t * cell_to_edge_offsets
size_t * cell_to_vertex_offsets
yac_size_t_2_pointer edge_to_vertex
int * num_vertices_per_cell
enum yac_location location
#define YAC_ASSERT_F(exp, format,...)
#define YAC_ASSERT(exp, msg)
void yac_quicksort_index_yac_int_size_t(yac_int *a, size_t n, size_t *idx)
void yac_quicksort_index_int_size_t(int *a, size_t n, size_t *idx)
static void yac_remove_duplicates_size_t(size_t *array, size_t *n)
void yac_quicksort_index_size_t_size_t(size_t *a, size_t n, size_t *idx)
static struct user_input_data_points ** points
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
MPI_Datatype yac_get_bounding_circle_mpi_datatype(MPI_Comm comm)
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm)
#define yac_mpi_call(call, comm)
size_t(* yac_size_t_2_pointer)[2]
double const (*const yac_const_coordinate_pointer)[3]
double(* yac_coordinate_pointer)[3]