193 yac_int * ids,
size_t * idx,
size_t num_ids,
194 yac_int * ref_sorted_ids,
size_t num_sorted_ids) {
196 size_t * reorder =
xmalloc(num_ids *
sizeof(*reorder));
197 for (
size_t i = 0; i < num_ids; ++i) reorder[i] = i;
201 for (
size_t i = 0, j = 0; i < num_ids; ++i) {
204 while ((j < num_sorted_ids) && (ref_sorted_ids[j] < curr_id)) ++j;
206 (j < num_sorted_ids) && (ref_sorted_ids[j] == curr_id),
207 "ERROR(id2idx): id not found")
218 int * core_mask =
xcalloc(count,
sizeof(*core_mask));
221 proc_sphere_part, vertex_coordinates, count, core_mask);
223 for (
size_t i = 0; i < count; ++i)
224 core_mask[i] = core_mask[i] == comm_rank;
230 struct dist_grid * grid,
size_t cell_idx) {
233 if (num_vertices == 0)
return SIZE_MAX;
238 size_t min_idx = vertices[0];
239 yac_int min_global_id = grid_vertex_ids[min_idx];
240 for (
int j = 1; j < num_vertices; ++j) {
241 size_t curr_vertex = vertices[j];
242 yac_int curr_global_id = grid_vertex_ids[curr_vertex];
243 if (min_global_id > curr_global_id) {
244 min_global_id = curr_global_id;
245 min_idx = curr_vertex;
258 int * core_cell_mask =
xmalloc(num_cells *
sizeof(core_cell_mask));
263 for (
size_t i = 0; i < num_cells; ++i) {
266 (ref_vertex != SIZE_MAX)?(vertex_core_mask[ref_vertex]):is_root;
268 return core_cell_mask;
272 struct dist_grid * grid,
size_t edge_idx) {
277 return edge_vertices[
278 (vertex_ids[edge_vertices[0]] > vertex_ids[edge_vertices[1]])?1:0];
288 int * core_edge_mask =
xmalloc(num_edges *
sizeof(core_edge_mask));
293 for (
size_t i = 0; i < num_edges; ++i)
296 return core_edge_mask;
302 MPI_Datatype single_id_owner_dt;
303 int array_of_blocklengths[] = {1, 1, 1};
304 const MPI_Aint array_of_displacements[] =
305 {(MPI_Aint)((
void*)&(dummy.
global_id) - (
void*)&dummy),
306 (MPI_Aint)((
void*)&(dummy.
data.
rank) - (
void*)&dummy),
307 (MPI_Aint)((
void*)&(dummy.
data.
orig_pos) - (
void*)&dummy)};
308 const MPI_Datatype array_of_types[] =
309 {Xt_int_dt, MPI_INT, MPI_UINT64_T};
311 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
312 array_of_types, &single_id_owner_dt), comm);
317 int n, MPI_Comm comm) {
319 MPI_Datatype dt_single_remote_point_ =
321 MPI_Datatype dt_single_remote_point__ =
324 MPI_Datatype dt_single_remote_point;
327 n, dt_single_remote_point__, &dt_single_remote_point), comm);
328 yac_mpi_call(MPI_Type_free(&dt_single_remote_point__), comm);
329 yac_mpi_call(MPI_Type_commit(&dt_single_remote_point), comm);
330 return dt_single_remote_point;
334 const void * a,
const void * b) {
344 size_t local_count = 0;
345 for (
size_t i = 0; i < count; ++i)
if (owner[i]) ++local_count;
352 MPI_Datatype id_pos_dt;
353 int array_of_blocklengths[] = {1,1};
354 const MPI_Aint array_of_displacements[] =
355 {(MPI_Aint)((
void*)&(dummy.
global_id) - (
void*)&dummy),
356 (MPI_Aint)((
void*)&(dummy.
orig_pos) - (
void*)&dummy)};
357 const MPI_Datatype array_of_types[] = {Xt_int_dt, MPI_UINT64_T};
359 MPI_Type_create_struct(
360 2, array_of_blocklengths, array_of_displacements,
361 array_of_types, &id_pos_dt), comm);
368 MPI_Datatype remote_point_info_dt;
369 int array_of_blocklengths[] = {1, 1};
370 const MPI_Aint array_of_displacements[] =
371 {(MPI_Aint)((
void*)&(dummy.
rank) - (
void*)&dummy),
372 (MPI_Aint)((
void*)&(dummy.
orig_pos) - (
void*)&dummy)};
373 const MPI_Datatype array_of_types[] =
374 {MPI_INT, MPI_UINT64_T};
376 MPI_Type_create_struct(2, array_of_blocklengths, array_of_displacements,
377 array_of_types, &remote_point_info_dt), comm);
384 int count = point_infos.count;
387 memcpy(temp, point_infos.data.multi, (
size_t)
count *
sizeof(*temp));
388 point_infos.data.multi = temp;
394 yac_int * sorted_ids,
size_t * reorder_idx,
size_t count,
399 size_t id_owner_count =
points->count;
402 for (
size_t i = 0, j = 0; i < count; ++i) {
404 yac_int curr_id = sorted_ids[i];
406 while ((j < id_owner_count) && (points_[j].
global_id < curr_id)) ++j;
408 if ((j >= id_owner_count) || (points_[j].
global_id != curr_id))
409 point_infos[reorder_idx[i]].
count = -1;
419 size_t * reorder_buffer,
size_t request_count,
423 size_t local_count = local_remote_data->
count;
425 {.
count = 0, .data.single = {.rank = INT_MAX, .orig_pos = UINT64_MAX}};
427 for (
size_t j = 0; j < request_count; ++j) reorder_buffer[j] = j;
431 requested_ids, request_count, reorder_buffer);
433 for (
size_t i = 0, j = 0; i < request_count; ++i) {
434 yac_int curr_id = requested_ids[i];
435 while ((j < local_count) && (curr_id > local_remote_points[j].global_id))
437 results[reorder_buffer[i]] =
438 ((j < local_count) && (curr_id == local_remote_points[j].global_id))?
439 &(local_remote_points[j].
data):&dummy_data;
450 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
452 MPI_Pack_size(infos->
count, point_info_dt, comm, &pack_size_data), comm);
454 return pack_size_count + pack_size_data;
459 MPI_Datatype point_info_dt, MPI_Comm comm) {
463 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_count), comm);
465 for (
size_t i = 0; i <
count; ++i) {
468 infos[i]->
count, point_info_dt, comm, pack_sizes + i), comm);
469 pack_sizes[i] += pack_size_count;
475 void * buffer, MPI_Datatype point_info_dt, MPI_Comm comm) {
477 for (
size_t i = 0; i <
count; ++i) {
479 int curr_count = infos[i]->
count;
485 int buffer_size = pack_sizes[i];
487 MPI_Pack(&curr_count, 1, MPI_INT, buffer,
488 buffer_size, &position, comm), comm);
490 MPI_Pack(curr_point_infos, curr_count, point_info_dt, buffer,
491 buffer_size, &position, comm), comm);
492 buffer = (
void*)(((
unsigned char*)buffer) + buffer_size);
497 void * buffer,
int buffer_size,
int * position,
502 MPI_Unpack(buffer, buffer_size, position, &count, 1, MPI_INT, comm), comm);
504 infos->
count = count;
515 MPI_Unpack(buffer, buffer_size, position, point_infos, count,
516 point_info_dt, comm), comm);
524 void * buffer,
int buffer_size,
int * position,
530 MPI_Unpack(buffer, buffer_size, position, &count, 1, MPI_INT, comm), comm);
532 infos->
count = count;
539 (infos->
data.
multi = info_buffer + *info_buffer_position);
540 *info_buffer_position += count;
544 MPI_Unpack(buffer, buffer_size, position, point_infos, count,
545 point_info_dt, comm), comm);
549 struct remote_point * point, MPI_Datatype point_info_dt, MPI_Comm comm) {
552 pack_size_remote_point_infos;
554 yac_mpi_call(MPI_Pack_size(1, Xt_int_dt, comm, &pack_size_id), comm);
555 pack_size_remote_point_infos =
558 return pack_size_id + pack_size_remote_point_infos;
563 int * position, MPI_Datatype point_info_dt, MPI_Comm comm) {
565 int count = infos->
count;
571 MPI_Pack(&count, 1, MPI_INT, buffer, buffer_size, position, comm), comm);
573 MPI_Pack(info, count, point_info_dt, buffer, buffer_size, position, comm),
578 struct remote_point * point,
void * buffer,
int buffer_size,
int * position,
579 MPI_Datatype point_info_dt, MPI_Comm comm) {
582 MPI_Pack(&(point->
global_id), 1, Xt_int_dt, buffer,
583 buffer_size, position, comm), comm);
586 &(point->
data), buffer, buffer_size, position, point_info_dt, comm);
590 void * buffer,
int buffer_size,
int * position,
struct remote_point * point,
591 MPI_Datatype point_info_dt, MPI_Comm comm) {
595 buffer, buffer_size, position, &(point->
global_id), 1, Xt_int_dt, comm),
598 buffer, buffer_size, position, &(point->
data), point_info_dt, comm);
602 void * buffer,
int buffer_size,
int * position,
604 struct remote_point * point, MPI_Datatype point_info_dt, MPI_Comm comm) {
608 buffer, buffer_size, position, &(point->
global_id), 1, Xt_int_dt, comm),
611 buffer, buffer_size, position, info_buffer, info_buffer_position,
612 &(point->
data), point_info_dt, comm);
618 size_t count =
points->count;
622 remote_points_pack_size;
624 yac_mpi_call(MPI_Pack_size(2, MPI_UINT64_T, comm, &count_pack_size), comm);
625 remote_points_pack_size = 0;
626 for (
size_t i = 0; i < count; ++i)
627 remote_points_pack_size +=
630 return count_pack_size + remote_points_pack_size;
635 MPI_Datatype point_info_dt, MPI_Comm comm) {
637 size_t count =
points->count;
639 uint64_t counts[2] = {(uint64_t)count, 0};
640 for (
size_t i = 0; i < count; ++i)
642 counts[1] += (uint64_t)(point_data[i].
data.
count);
645 MPI_Pack(counts, 2, MPI_UINT64_T, buffer,
646 buffer_size, position, comm), comm);
647 for (
size_t i = 0; i < count; ++i)
649 point_data + i, buffer, buffer_size, position, point_info_dt, comm);
653 void * buffer,
int buffer_size,
int * position,
660 buffer, buffer_size, position, counts, 2, MPI_UINT64_T, comm), comm);
665 size_t count = ((*points)->count = (size_t)(counts[0]));
667 ((*points)->data =
xmalloc(count *
sizeof(*((*points)->data))));
669 &((*points)->buffer[0]);
671 for (
size_t i = 0, offset = 0; i < count; ++i) {
674 buffer, buffer_size, position, remote_point_info_buffer, &offset,
675 point_data + i, point_info_dt, comm);
703 size_t num_missing_vertices = 0;
704 size_t num_missing_edges = 0;
709 "ERROR(generate_dist_grid_remote_point_infos):"
710 "no owner information for a cell available")
716 size_t total_num_missing = num_missing_vertices + num_missing_edges;
717 int * ranks_buffer =
xmalloc(total_num_missing *
sizeof(*ranks_buffer));
718 int * vertex_ranks = ranks_buffer;
719 int * edge_ranks = ranks_buffer + num_missing_vertices;
720 size_t * size_t_buffer =
721 xmalloc(3 * total_num_missing *
sizeof(*size_t_buffer));
722 size_t * vertex_reorder = size_t_buffer;
723 size_t * edge_reorder = size_t_buffer + num_missing_vertices;
724 size_t * coord_indices = size_t_buffer + total_num_missing;
725 size_t * ranks_idxs = coord_indices;
726 size_t * cve_reorder = size_t_buffer + 2 * total_num_missing;
732 vertex_reorder[j] = i;
734 coord_indices[offset] = i;
735 cve_reorder[offset] = offset;
747 coord_indices[offset] =
748 vertex_idxes[(vertex_ids[0] > vertex_ids[1])?1:0];
749 cve_reorder[offset] = offset;
755 coord_indices, total_num_missing, cve_reorder);
757 size_t num_unique_idxs = 0;
758 for (
size_t i = 0, prev_idx = SIZE_MAX; i < total_num_missing; ++i) {
759 size_t curr_idx = coord_indices[i];
760 if (curr_idx != prev_idx) {
767 xmalloc(num_unique_idxs *
sizeof(*search_coords));
768 int * search_ranks =
xmalloc(num_unique_idxs *
sizeof(*search_ranks));
770 for (
size_t i = 0, j = 0, prev_idx = SIZE_MAX; i < total_num_missing; ++i) {
771 size_t curr_idx = coord_indices[i];
772 if (curr_idx != prev_idx) {
782 proc_sphere_part, search_coords, num_unique_idxs, search_ranks);
786 cve_reorder, total_num_missing, ranks_idxs);
789 for (
size_t i = 0; i < num_missing_vertices; ++i, ++offset)
790 vertex_ranks[i] = search_ranks[ranks_idxs[offset]];
791 for (
size_t i = 0; i < num_missing_edges; ++i, ++offset)
792 edge_ranks[i] = search_ranks[ranks_idxs[offset]];
796 vertex_ranks, num_missing_vertices, vertex_reorder);
798 edge_ranks, num_missing_edges, edge_reorder);
800 int * sendcounts, * recvcounts, * sdispls, * rdispls;
802 2, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
804 xmalloc(4 * (
size_t)comm_size *
sizeof(*int_buffer));
805 int * total_sendcounts = int_buffer + 0 * comm_size;
806 int * total_recvcounts = int_buffer + 1 * comm_size;
807 int * total_sdispls = int_buffer + 2 * comm_size;
808 int * total_rdispls = int_buffer + 3 * comm_size;
810 for (
size_t i = 0; i < num_missing_vertices; ++i)
811 sendcounts[2 * vertex_ranks[i] + 0]++;
812 for (
size_t i = 0; i < num_missing_edges; ++i)
813 sendcounts[2 * edge_ranks[i] + 1]++;
816 2, sendcounts, recvcounts, sdispls, rdispls, comm);
818 size_t num_requested_ids[2] = {0,0};
819 for (
int i = 0, saccu = 0, raccu = 0; i < comm_size; ++i) {
820 total_sdispls[i] = saccu;
821 total_rdispls[i] = raccu;
822 total_sendcounts[i] = 0;
823 total_recvcounts[i] = 0;
824 for (
int j = 0; j < 2; ++j) {
825 total_sendcounts[i] += sendcounts[2 * i + j];
826 total_recvcounts[i] += recvcounts[2 * i + j];
827 num_requested_ids[j] += (size_t)(recvcounts[2 * i + j]);
829 saccu += total_sendcounts[i];
830 raccu += total_recvcounts[i];
832 size_t request_count = (size_t)(total_recvcounts[comm_size - 1]) +
833 (size_t)(total_rdispls[comm_size - 1]);
837 (total_num_missing + 2 * request_count) *
sizeof(*exchange_id_buffer));
838 yac_int * id_send_buffer = exchange_id_buffer + request_count;
839 yac_int * id_recv_buffer = exchange_id_buffer + request_count + total_num_missing;
842 for (
size_t i = 0; i < num_missing_vertices; ++i) {
843 int pos = sdispls[2 * vertex_ranks[i] + 0 + 1]++;
846 for (
size_t i = 0; i < num_missing_edges; ++i) {
847 int pos = sdispls[2 * edge_ranks[i] + 1 + 1]++;
853 id_send_buffer, total_sendcounts, total_sdispls,
854 id_recv_buffer, total_recvcounts, total_rdispls, comm);
857 {exchange_id_buffer, exchange_id_buffer + num_requested_ids[0]};
861 size_t ids_offset[2] = {0, 0}, offset = 0;
862 for (
int i = 0; i < comm_size; ++i) {
863 for (
int j = 0; j < 2; ++j) {
864 size_t curr_count = recvcounts[2 * i + j];
865 memcpy(requested_ids[j] + ids_offset[j], id_recv_buffer + offset,
866 curr_count *
sizeof(*(requested_ids[0])));
867 ids_offset[j] += curr_count;
868 offset += curr_count;
874 xmalloc(request_count *
sizeof(*remote_point_infos_buffer));
876 {remote_point_infos_buffer,
877 remote_point_infos_buffer + num_requested_ids[0]};
879 {dist_vertex_owner, dist_edge_owner};
881 size_t max_num_requested_ids =
MAX(num_requested_ids[0], num_requested_ids[1]);
882 size_t * reorder_buffer =
xmalloc(max_num_requested_ids *
sizeof(*reorder_buffer));
884 for (
int i = 0; i < 2; ++i)
886 requested_ids[i], found_points[i], reorder_buffer, num_requested_ids[i],
887 local_remote_points[i]);
888 free(reorder_buffer);
889 free(exchange_id_buffer);
891 int * pack_size_buffer =
892 xmalloc((total_num_missing + request_count) *
sizeof(*pack_size_buffer));
893 int * send_pack_sizes = pack_size_buffer;
894 int * recv_pack_sizes = pack_size_buffer + request_count;
898 size_t offset = 0, offsets[2] = {0, 0};
899 for (
int i = 0; i < comm_size; ++i) {
900 for (
int j = 0; j < 2; ++j) {
901 size_t curr_count = (size_t)(recvcounts[2 * i + j]);
904 found_points[j] + offsets[j], curr_count,
905 send_pack_sizes + offset, point_info_dt, comm);
906 offsets[j] += curr_count;
907 offset += curr_count;
914 send_pack_sizes, total_recvcounts, total_rdispls,
915 recv_pack_sizes, total_sendcounts, total_sdispls, comm);
917 size_t send_buffer_size = 0, recv_buffer_size = 0;
918 for (
size_t i = 0; i < request_count; ++i)
919 send_buffer_size += (
size_t)(send_pack_sizes[i]);
920 for (
size_t i = 0; i < total_num_missing; ++i)
921 recv_buffer_size += (
size_t)(recv_pack_sizes[i]);
923 void * result_buffer =
xmalloc(send_buffer_size + recv_buffer_size);
924 void * send_result_buffer = result_buffer;
925 void * recv_result_buffer = (
unsigned char *)result_buffer + send_buffer_size;
929 size_t send_buffer_size = 0, offset = 0, offsets[2] = {0,0};
930 for (
int i = 0; i < comm_size; ++i) {
931 for (
int j = 0; j < 2; ++j) {
932 size_t curr_count = (size_t)(recvcounts[2 * i + j]);
935 found_points[j] + offsets[j],
936 send_pack_sizes + offset, curr_count,
937 (
void*)((
unsigned char*)send_result_buffer + send_buffer_size),
938 point_info_dt, comm);
939 for (
size_t k = 0; k < curr_count; ++k)
940 send_buffer_size += send_pack_sizes[offset + k];
941 offsets[j] += curr_count;
942 offset += curr_count;
948 size_t send_offset = 0, recv_offset = 0;
949 size_t saccu = 0, raccu = 0;
950 for (
int i = 0; i < comm_size; ++i) {
951 total_sdispls[i] = (int)saccu;
952 total_rdispls[i] = (int)raccu;
953 int curr_sendcount = 0, curr_recvcount = 0;
954 for (
int k = 0; k < 2; ++k) {
955 for (
int j = 0; j < recvcounts[2 * i + k]; ++j, ++send_offset)
956 curr_sendcount += (
size_t)(send_pack_sizes[send_offset]);
957 for (
int j = 0; j < sendcounts[2 * i + k]; ++j, ++recv_offset)
958 curr_recvcount += (
size_t)(recv_pack_sizes[recv_offset]);
960 total_sendcounts[i] = (int)curr_sendcount;
961 total_recvcounts[i] = (int)curr_recvcount;
964 (saccu <= INT_MAX) && (raccu <= INT_MAX) &&
965 (curr_sendcount <= INT_MAX) && (curr_recvcount <= INT_MAX),
966 "ERROR(generate_dist_grid_remote_point_infos): "
967 "int argument to MPI is bigger than INT_MAX")
969 saccu += curr_sendcount;
970 raccu += curr_recvcount;
976 send_result_buffer, total_sendcounts, total_sdispls,
977 recv_result_buffer, total_recvcounts, total_rdispls, 1, MPI_PACKED, comm);
981 size_t counts[2] = {0, 0},
count = 0, offset = 0;
984 size_t * reorder[2] = {vertex_reorder, edge_reorder};
985 for (
int i = 0; i < comm_size; ++i) {
986 for (
int j = 0; j < 2; ++j) {
987 size_t curr_count = sendcounts[2 * i + j];
988 for (
size_t k = 0; k < curr_count; ++k, ++counts[j], ++
count) {
989 int curr_buffer_size =
990 recv_pack_sizes[(size_t)(sdispls[2 * i + j]) + k];
993 (
void*)((
unsigned char*)recv_result_buffer + offset),
994 curr_buffer_size, &position,
995 remote_points[j] + reorder[j][counts[j]], point_info_dt, comm);
996 offset += (size_t)(curr_buffer_size);
1006 free(result_buffer);
1007 free(pack_size_buffer);
1008 free(remote_point_infos_buffer);
1009 free(size_t_buffer);
1024 for (i = 0; i < n; ++i)
if (ids[i] >=
id)
break;
1026 if (n != i) memmove(ids + i + 1, ids + i, (n - i) *
sizeof(*ids));
1037 for (i = 0; i < n; ++i)
if (ranks[i] >= rank)
break;
1041 if (ranks[i] == rank)
return;
1042 else memmove(ranks + i + 1, ranks + i, ((
size_t)(n - i)) *
sizeof(*ranks));
1049 const void * a,
const void * b) {
1055 int ret = count_a - count_b;
1056 for (
int i = 0; !ret && (i < count_a); ++i)
1057 ret = (a_ids[i] > b_ids[i]) - (a_ids[i] < b_ids[i]);
1062 const void * a,
const void * b) {
1073 int ** dist_cell_rank_counts,
size_t ** dist_cell_rank_offsets) {
1079 int * ranks_buffer =
xmalloc((
size_t)comm_size *
sizeof(*ranks_buffer));
1080 size_t dist_cell_ranks_array_size = num_cells;
1081 int * dist_cell_ranks_ =
xmalloc(num_cells *
sizeof(*dist_cell_ranks_));
1082 int * dist_cell_rank_counts_ =
1083 (*dist_cell_rank_counts =
1084 xmalloc(num_cells *
sizeof(*dist_cell_rank_counts_)));
1085 size_t * dist_cell_rank_offsets_ =
1086 (*dist_cell_rank_offsets =
1087 xmalloc(num_cells *
sizeof(*dist_cell_rank_offsets_)));
1091 int max_num_vertices_per_cell = 0;
1092 for (
size_t i = 0; i < num_cells; ++i)
1111 for (
size_t i = 0; i < num_cells; ++i) {
1115 size_t * curr_cell_to_vertex =
1116 cell_to_vertex + cell_to_vertex_offsets[i];
1117 size_t * curr_cell_to_edge =
1118 cell_to_edge + cell_to_edge_offsets[i];
1119 for (
int j = 0; j < num_vertices_per_cell[i]; ++j) {
1121 vertex_coordinates + curr_cell_to_vertex[j];
1122 for (
int k = 0; k < 3; ++k)
1133 proc_sphere_part, bnd_circle, ranks_buffer, &rank_count);
1135 ranks_buffer[0] = 0;
1140 offset + (
size_t)rank_count);
1141 memcpy(dist_cell_ranks_ + offset, ranks_buffer,
1142 (
size_t)rank_count *
sizeof(*ranks_buffer));
1143 dist_cell_rank_counts_[i] = rank_count;
1144 dist_cell_rank_offsets_[i] = offset;
1145 offset += (size_t)rank_count;
1153 xrealloc(dist_cell_ranks_, offset *
sizeof(*dist_cell_ranks_));
1159 MPI_Datatype coord_dt;
1160 int array_of_blocklengths[] = {3};
1161 const MPI_Aint array_of_displacements[] =
1162 {(MPI_Aint)((
void*)&(dummy.
coord) - (
void*)&dummy)};
1163 const MPI_Datatype array_of_types[] = {MPI_DOUBLE};
1165 MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements,
1166 array_of_types, &coord_dt), comm);
1173 MPI_Datatype global_id_dt;
1174 int array_of_blocklengths[] = {1};
1175 const MPI_Aint array_of_displacements[] =
1176 {(MPI_Aint)((
void*)&(dummy.
global_id) - (
void*)&dummy)};
1177 const MPI_Datatype array_of_types[] = {Xt_int_dt};
1179 MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements,
1180 array_of_types, &global_id_dt), comm);
1185 const void * a,
const void * b) {
1192 const void * a,
const void * b) {
1213 int ids_available_local =
1215 int ids_available_global;
1216 yac_mpi_call(MPI_Allreduce(&ids_available_local, &ids_available_global, 1,
1217 MPI_INT, MPI_MAX, comm), comm);
1222 ids_available_local || !ids_available_global,
1223 "ERROR(generate_dist_remote_points_vertex): inconsistent global ids")
1230 for (
size_t j = 0; j < grid->
num_vertices; ++j) core_mask[j] = 1;
1235 if (ids_available_global)
return vertex_ranks;
1240 int * sendcounts, * recvcounts, * sdispls, * rdispls;
1242 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1245 sendcounts[vertex_ranks[i]]++;
1248 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1251 size_t dist_vertex_count = (size_t)(recvcounts[comm_size - 1]) +
1252 (size_t)(rdispls[comm_size - 1]);
1255 xmalloc((orig_vertex_count + dist_vertex_count) *
1256 sizeof(*id_reorder_coord_buffer));
1258 id_reorder_coord_send_buffer = id_reorder_coord_buffer;
1260 id_reorder_coord_recv_buffer = id_reorder_coord_buffer + orig_vertex_count;
1263 for (
size_t i = 0; i < orig_vertex_count; ++i) {
1264 int pos = sdispls[vertex_ranks[i] + 1]++;
1265 id_reorder_coord_send_buffer[pos].
reorder_idx = i;
1266 for (
int j = 0; j < 3; ++j)
1267 id_reorder_coord_send_buffer[pos].
coord[j] =
1271 MPI_Datatype id_reorder_coord_coord_dt =
1276 id_reorder_coord_send_buffer, sendcounts, sdispls,
1277 id_reorder_coord_recv_buffer, recvcounts, rdispls,
1278 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_coord_dt, comm);
1280 yac_mpi_call(MPI_Type_free(&id_reorder_coord_coord_dt), comm);
1282 for (
size_t i = 0; i < dist_vertex_count; ++i)
1286 qsort(id_reorder_coord_recv_buffer, dist_vertex_count,
1289 size_t unique_count = dist_vertex_count > 0;
1293 for (
size_t i = 0; i < dist_vertex_count; ++i) {
1303 unique_count <= (
size_t)XT_INT_MAX,
1304 "ERROR(generate_vertex_ids): global_id out of bounds")
1311 yac_mpi_call(MPI_Exscan(&yac_int_unique_count, &id_offset, 1, Xt_int_dt,
1312 MPI_SUM, comm), comm);
1315 if (comm_rank == 0) id_offset = 0;
1318 ((
size_t)id_offset + unique_count) <= (
size_t)XT_INT_MAX,
1319 "ERROR(generate_vertex_ids): global_id out of bounds")
1322 for (
size_t i = 0; i < dist_vertex_count; ++i)
1323 id_reorder_coord_recv_buffer[i].
global_id += id_offset;
1326 qsort(id_reorder_coord_recv_buffer, dist_vertex_count,
1327 sizeof(*id_reorder_coord_recv_buffer),
1330 MPI_Datatype id_reorder_coord_id_dt =
1335 id_reorder_coord_recv_buffer, recvcounts, rdispls,
1336 id_reorder_coord_send_buffer, sendcounts, sdispls,
1337 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_id_dt, comm);
1339 yac_mpi_call(MPI_Type_free(&id_reorder_coord_id_dt), comm);
1347 vertex_ids[id_reorder_coord_send_buffer[i].
reorder_idx] =
1348 id_reorder_coord_send_buffer[i].
global_id;
1350 free(id_reorder_coord_buffer);
1353 return vertex_ranks;
1360 int comm_rank, comm_size;
1366 int ids_available_local[2], ids_available_global[2];
1369 yac_mpi_call(MPI_Allreduce(ids_available_local, ids_available_global, 2,
1370 MPI_INT, MPI_MAX, comm), comm);
1374 ids_available_local[0] == ids_available_global[0],
1375 "ERROR(generate_ce_ids): inconsistent global ids")
1378 int * core_cell_mask =
1381 for (
size_t j = 0; j < grid->
num_cells; ++j) core_cell_mask[j] = 1;
1386 ids_available_local[1] == ids_available_global[1],
1387 "ERROR(generate_ce_ids): inconsistent global ids")
1390 int * core_edge_mask =
1393 for (
size_t j = 0; j < grid->
num_edges; ++j) core_edge_mask[j] = 1;
1398 if (ids_available_global[0] && ids_available_global[1])
return;
1402 (((ids_available_global[0])?0:(grid->
num_cells)) +
1403 ((ids_available_global[1])?0:(grid->
num_edges))) *
sizeof(*rank_buffer));
1404 int * cell_ranks = rank_buffer;
1406 rank_buffer + ((ids_available_global[0])?0:(grid->
num_cells));
1409 xmalloc((8 * (
size_t)comm_size + 1) *
sizeof(*int_buffer));
1410 int * sendcounts = int_buffer + 0 * comm_size;
1411 int * recvcounts = int_buffer + 2 * comm_size;
1412 int * total_sendcounts = int_buffer + 4 * comm_size;
1413 int * total_recvcounts = int_buffer + 5 * comm_size;
1414 int * total_sdispls = int_buffer + 6 * comm_size;
1415 int * total_rdispls = int_buffer + 7 * comm_size + 1;
1416 memset(sendcounts, 0, 2 * (
size_t)comm_size *
sizeof(*sendcounts));
1418 yac_int * cell_to_vertex_ids = NULL;
1419 int max_num_vertices_per_cell = 0;
1421 if (!ids_available_global[0]) {
1424 for (
size_t i = 0; i < grid->
num_cells; ++i)
1427 yac_mpi_call(MPI_Allreduce(MPI_IN_PLACE, &max_num_vertices_per_cell, 1,
1428 MPI_INT, MPI_MAX, comm), comm);
1431 cell_to_vertex_ids =
1433 sizeof(*cell_to_vertex_ids));
1435 for (
size_t i = 0; i < grid->
num_cells; ++i) {
1438 yac_int * curr_cell_to_vertex_ids =
1439 cell_to_vertex_ids + i * max_num_vertices_per_cell;
1442 if (curr_num_vertices > 0) {
1443 size_t * curr_cell_vertices =
1445 size_t min_vertex = curr_cell_vertices[0];
1446 curr_cell_to_vertex_ids[0] = grid->
vertex_ids[min_vertex];
1447 for (
size_t j = 1; j < curr_num_vertices; ++j) {
1448 size_t curr_vertex_idx = curr_cell_vertices[j];
1451 if (curr_cell_to_vertex_ids[0] == curr_vertex_id)
1452 min_vertex = curr_vertex_idx;
1454 cell_rank = vertex_ranks[min_vertex];
1458 for (
size_t j = curr_num_vertices; j < max_num_vertices_per_cell; ++j)
1459 curr_cell_to_vertex_ids[j] = XT_INT_MAX;
1461 sendcounts[2 * ((cell_ranks[i] = cell_rank)) + 0]++;
1465 yac_int * edge_to_vertex_ids = NULL;
1467 if (!ids_available_global[1]) {
1469 edge_to_vertex_ids =
1472 for (
size_t i = 0; i < grid->
num_edges; ++i) {
1475 yac_int * curr_edge_vertex_ids = edge_to_vertex_ids + 2 * i;
1476 curr_edge_vertex_ids[0] = grid->
vertex_ids[curr_edge_to_vertex[0]];
1477 curr_edge_vertex_ids[1] = grid->
vertex_ids[curr_edge_to_vertex[1]];
1479 if (curr_edge_vertex_ids[0] > curr_edge_vertex_ids[1]) {
1480 yac_int temp = curr_edge_vertex_ids[0];
1481 curr_edge_vertex_ids[0] = curr_edge_vertex_ids[1];
1482 curr_edge_vertex_ids[1] = temp;
1484 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[1]])) + 1]++;
1487 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[0]])) + 1]++;
1494 recvcounts, 2, MPI_INT, comm), comm);
1496 total_sdispls[0] = 0;
1497 size_t recv_counts[2] = {0,0};
1498 for (
int i = 0, saccu = 0, raccu = 0; i < comm_size; ++i) {
1499 total_sdispls[i+1] = saccu;
1500 total_rdispls[i] = raccu;
1501 recv_counts[0] += recvcounts[2 * i + 0];
1502 recv_counts[1] += recvcounts[2 * i + 1];
1503 total_sendcounts[i] = sendcounts[2 * i + 0] * max_num_vertices_per_cell +
1504 sendcounts[2 * i + 1] * 2;
1505 total_recvcounts[i] = recvcounts[2 * i + 0] * max_num_vertices_per_cell +
1506 recvcounts[2 * i + 1] * 2;
1507 saccu += total_sendcounts[i];
1508 raccu += total_recvcounts[i];
1510 size_t local_data_count = (size_t)(total_sendcounts[comm_size - 1]) +
1511 (size_t)(total_sdispls[comm_size]);
1512 size_t recv_count = (size_t)(total_recvcounts[comm_size - 1]) +
1513 (size_t)(total_rdispls[comm_size - 1]);
1516 xmalloc((local_data_count + recv_count) *
sizeof(*yac_int_buffer));
1517 yac_int * send_buffer = yac_int_buffer;
1518 yac_int * recv_buffer = yac_int_buffer + local_data_count;
1521 if (!ids_available_global[0])
1522 for (
size_t i = 0; i < grid->
num_cells; ++i)
1523 for (
int j = 0; j < max_num_vertices_per_cell; ++j)
1524 send_buffer[total_sdispls[cell_ranks[i] + 1]++] =
1525 cell_to_vertex_ids[i * max_num_vertices_per_cell + j];
1526 if (!ids_available_global[1])
1527 for (
size_t i = 0; i < grid->
num_edges; ++i)
1528 for (
int j = 0; j < 2; ++j)
1529 send_buffer[total_sdispls[edge_ranks[i] + 1]++] =
1530 edge_to_vertex_ids[2 * i + j];
1532 free(edge_to_vertex_ids);
1533 free(cell_to_vertex_ids);
1537 send_buffer, total_sendcounts, total_sdispls,
1538 recv_buffer, total_recvcounts, total_rdispls,
1539 sizeof(*send_buffer), Xt_int_dt, comm);
1542 xmalloc((recv_counts[0] + recv_counts[1]) *
sizeof(*n_ids_reorder_buffer));
1544 {n_ids_reorder_buffer, n_ids_reorder_buffer + recv_counts[0]};
1547 int index_counts[2] = {max_num_vertices_per_cell, 2};
1551 for (
int i = 0; i < comm_size; ++i) {
1552 for (
int j = 0; j < 2; ++j) {
1553 int curr_count = recvcounts[2 * i + j];
1554 for (
int k = 0; k < curr_count; ++k, ++
reorder_idx, ++recv_counts[j]) {
1558 offset += index_counts[j];
1563 for (
int i = 0; i < 2; ++i) {
1565 if (ids_available_global[i])
continue;
1570 size_t unique_count = recv_counts[i] > 0;
1574 for (
size_t j = 0; j < recv_counts[i]; ++j, ++curr) {
1583 unique_count <= (
size_t)XT_INT_MAX,
1584 "ERROR(generate_global_ce_ids): global_id out of bounds")
1590 yac_mpi_call(MPI_Exscan(&yac_int_unique_count, &id_offset, 1, Xt_int_dt,
1591 MPI_SUM, comm), comm);
1592 if (comm_rank == 0) id_offset = 0;
1595 ((
size_t)id_offset + unique_count) <= (
size_t)XT_INT_MAX,
1596 "ERROR(generate_global_ce_ids): global_id out of bounds")
1599 for (
size_t j = 0; j < recv_counts[i]; ++j)
1602 free(yac_int_buffer);
1604 qsort(n_ids_reorder_buffer, recv_counts[0] + recv_counts[1],
1608 xmalloc((recv_counts[0] + recv_counts[1] +
1609 ((ids_available_global[0])?0:(grid->
num_cells)) +
1610 ((ids_available_global[1])?0:(grid->
num_edges))) *
1611 sizeof(*global_ids_buffer));
1612 yac_int * send_global_ids = global_ids_buffer;
1614 global_ids_buffer + recv_counts[0] + recv_counts[1];
1616 for (
size_t i = 0; i < recv_counts[0] + recv_counts[1]; ++i)
1617 send_global_ids[i] = n_ids_reorder_buffer[i].
global_id;
1618 free(n_ids_reorder_buffer);
1621 for (
int i = 0, saccu = 0, raccu = 0; i < comm_size; ++i) {
1622 total_sdispls[i] = saccu;
1623 total_rdispls[i] = raccu;
1625 ((total_sendcounts[i] = recvcounts[2 * i + 0] + recvcounts[2 * i + 1]));
1627 ((total_recvcounts[i] = sendcounts[2 * i + 0] + sendcounts[2 * i + 1]));
1632 send_global_ids, total_sendcounts, total_sdispls,
1633 recv_global_ids, total_recvcounts, total_rdispls,
1634 sizeof(*send_global_ids), Xt_int_dt, comm);
1636 if ((!ids_available_global[0]) && (grid->
num_cells > 0))
1638 if ((!ids_available_global[1]) && (grid->
num_edges > 0))
1642 if (!ids_available_global[0])
1643 for (
size_t i = 0; i < grid->
num_cells; ++i)
1644 grid->
cell_ids[i] = recv_global_ids[total_rdispls[cell_ranks[i]]++];
1645 if (!ids_available_global[1])
1646 for (
size_t i = 0; i < grid->
num_edges; ++i)
1647 grid->
edge_ids[i] = recv_global_ids[total_rdispls[edge_ranks[i]]++];
1651 free(global_ids_buffer);
1669 int * core_cell_mask,
size_t num_cells,
size_t num_edges) {
1671 if (num_cells == 0)
return NULL;
1675 for (
size_t i = 0; i < num_edges; ++i) {
1676 edge_to_cell[i][0] = SIZE_MAX;
1677 edge_to_cell[i][1] = SIZE_MAX;
1680 for (
size_t i = 0, offset = 0; i < num_cells; ++i) {
1682 size_t curr_num_edges = num_edges_per_cell[i];
1684 offset += curr_num_edges;
1686 if ((core_cell_mask == NULL) || core_cell_mask[i]) {
1688 for (
size_t j = 0; j < curr_num_edges; ++j) {
1690 size_t curr_edge = curr_cell_to_edge[j];
1691 size_t * curr_edge_to_cell = edge_to_cell[curr_edge];
1692 curr_edge_to_cell += *curr_edge_to_cell != SIZE_MAX;
1694 *curr_edge_to_cell == SIZE_MAX,
1695 "ERROR(generate_edge_to_cell): "
1696 "more than two cells point to a single edge")
1697 *curr_edge_to_cell = i;
1702 return edge_to_cell;
1716 int * dist_cell_ranks;
1717 int * dist_cell_rank_counts;
1718 size_t * dist_cell_rank_offsets;
1720 proc_sphere_part, grid, comm,
1721 &dist_cell_ranks, &dist_cell_rank_counts, &dist_cell_rank_offsets);
1723 int * rank_buffer = NULL;
1724 size_t rank_buffer_size = 0;
1725 size_t rank_buffer_array_size = 0;
1727 int * num_ranks_buffer =
1730 sizeof(*num_ranks_buffer));
1731 int * num_cell_ranks = num_ranks_buffer;
1732 int * num_vertex_ranks = num_ranks_buffer + grid->
num_cells;
1733 int * num_edge_ranks =
1742 size_t max_num_ranks = 0;
1743 for (
int j = 0; j < num_cells; ++j)
1744 max_num_ranks += (
size_t)(num_cell_ranks[cells[j]]);
1747 rank_buffer_size + (
size_t)max_num_ranks);
1749 int * curr_ranks = rank_buffer + rank_buffer_size;
1750 int curr_num_ranks = 0;
1752 for (
int j = 0; j < num_cells; ++j) {
1754 size_t curr_cell_idx = cells[j];
1755 int curr_num_cell_ranks = num_cell_ranks[curr_cell_idx];
1756 int * curr_cell_ranks =
1757 dist_cell_ranks + dist_cell_rank_offsets[curr_cell_idx];
1759 for (
int k = 0; k < curr_num_cell_ranks; ++k)
1760 insert_rank(curr_ranks, &curr_num_ranks, curr_cell_ranks[k]);
1762 rank_buffer_size += (size_t)(num_vertex_ranks[i] = curr_num_ranks);
1764 num_vertex_ranks[i] = 0;
1773 for (
size_t i = 0; i < grid->
num_edges; ++i) {
1776 size_t max_num_ranks = 0;
1777 for (
int j = 0; j < 2; ++j)
1778 if (edge_to_cell[i][j] != SIZE_MAX)
1779 max_num_ranks += (size_t)(num_cell_ranks[edge_to_cell[i][j]]);
1782 rank_buffer_size + (
size_t)max_num_ranks);
1784 int * curr_ranks = rank_buffer + rank_buffer_size;
1785 int curr_num_ranks = 0;
1787 for (
int j = 0; j < 2; ++j) {
1789 size_t curr_cell_idx = edge_to_cell[i][j];
1790 if (curr_cell_idx == SIZE_MAX)
continue;
1792 int curr_num_cell_ranks = num_cell_ranks[curr_cell_idx];
1793 int * curr_cell_ranks =
1794 dist_cell_ranks + dist_cell_rank_offsets[curr_cell_idx];
1796 for (
int k = 0; k < curr_num_cell_ranks; ++k)
1797 insert_rank(curr_ranks, &curr_num_ranks, curr_cell_ranks[k]);
1799 rank_buffer_size += (size_t)(num_edge_ranks[i] = curr_num_ranks);
1801 num_edge_ranks[i] = 0;
1809 int * sendcounts, * recvcounts, * sdispls, * rdispls;
1811 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1813 xmalloc(4 * (
size_t)comm_size *
sizeof(*int_buffer));
1814 int * total_sendcounts = int_buffer + 0 * comm_size;
1815 int * total_recvcounts = int_buffer + 1 * comm_size;
1816 int * total_sdispls = int_buffer + 2 * comm_size;
1817 int * total_rdispls = int_buffer + 3 * comm_size;
1819 for (
size_t i = 0; i < grid->
num_cells; ++i) {
1821 int curr_num_ranks = num_cell_ranks[i];
1822 int * ranks = dist_cell_ranks + dist_cell_rank_offsets[i];
1823 for (
int j = 0; j < curr_num_ranks; ++j) sendcounts[3 * ranks[j] + 0]++;
1829 int curr_num_ranks = num_vertex_ranks[i];
1830 int * ranks = rank_buffer + offset;
1831 offset += (size_t)curr_num_ranks;
1832 for (
int j = 0; j < curr_num_ranks; ++j) sendcounts[3 * ranks[j] + 1]++;
1835 for (
size_t i = 0; i < grid->
num_edges; ++i) {
1837 int curr_num_ranks = num_edge_ranks[i];
1838 int * ranks = rank_buffer + offset;
1839 offset += (size_t)curr_num_ranks;
1840 for (
int j = 0; j < curr_num_ranks; ++j) sendcounts[3 * ranks[j] + 2]++;
1845 3, sendcounts, recvcounts, sdispls, rdispls, comm);
1847 size_t receive_counts[3] = {0,0,0};
1848 for (
int i = 0, saccu = 0, raccu = 0; i < comm_size; ++i) {
1849 total_sdispls[i] = saccu;
1850 total_rdispls[i] = raccu;
1851 total_sendcounts[i] = 0;
1852 total_recvcounts[i] = 0;
1853 for (
int j = 0; j < 3; ++j) {
1854 total_sendcounts[i] += sendcounts[3 * i + j];
1855 total_recvcounts[i] += recvcounts[3 * i + j];
1856 receive_counts[j] += (size_t)(recvcounts[3 * i + j]);
1858 saccu += total_sendcounts[i];
1859 raccu += total_recvcounts[i];
1861 size_t local_data_count = (size_t)(total_sendcounts[comm_size - 1]) +
1862 (size_t)(total_sdispls[comm_size - 1]);
1863 size_t recv_count = (size_t)(total_recvcounts[comm_size - 1]) +
1864 (size_t)(total_rdispls[comm_size - 1]);
1866 struct id_pos * id_pos_buffer =
1867 xmalloc((local_data_count + recv_count) *
sizeof(*id_pos_buffer));
1868 struct id_pos * id_pos_send_buffer = id_pos_buffer;
1869 struct id_pos * id_pos_recv_buffer =
1870 id_pos_buffer + local_data_count;
1873 for (
size_t i = 0; i < grid->
num_cells; ++i) {
1875 int curr_num_ranks = num_cell_ranks[i];
1876 int * ranks = dist_cell_ranks + dist_cell_rank_offsets[i];
1878 for (
int j = 0; j < curr_num_ranks; ++j) {
1879 int pos = sdispls[3 * ranks[j] + 0 + 1]++;
1881 id_pos_send_buffer[pos].
orig_pos = i;
1888 int curr_num_ranks = num_vertex_ranks[i];
1889 int * ranks = rank_buffer + offset;
1890 offset += (size_t)curr_num_ranks;
1892 for (
int j = 0; j < curr_num_ranks; ++j) {
1893 int pos = sdispls[3 * ranks[j] + 1 + 1]++;
1895 id_pos_send_buffer[pos].
orig_pos = i;
1899 for (
size_t i = 0; i < grid->
num_edges; ++i) {
1901 int curr_num_ranks = num_edge_ranks[i];
1902 int * ranks = rank_buffer + offset;
1903 offset += (size_t)curr_num_ranks;
1905 for (
int j = 0; j < curr_num_ranks; ++j) {
1906 int pos = sdispls[3 * ranks[j] + 2 + 1]++;
1908 id_pos_send_buffer[pos].
orig_pos = i;
1912 free(num_ranks_buffer);
1914 free(dist_cell_ranks);
1915 free(dist_cell_rank_offsets);
1921 id_pos_send_buffer, total_sendcounts, total_sdispls,
1922 id_pos_recv_buffer, total_recvcounts, total_rdispls,
1923 sizeof(*id_pos_send_buffer), id_pos_dt, comm);
1927 size_t dist_owner_counts[3] = {0, 0, 0};
1928 for (
int i = 0; i < comm_size; ++i)
1929 for (
int j = 0; j < 3; ++j)
1930 dist_owner_counts[j] += (
size_t)(recvcounts[3 * i + j]);
1931 size_t max_dist_owner_count =
1932 MAX(
MAX(dist_owner_counts[0], dist_owner_counts[1]), dist_owner_counts[2]);
1934 xmalloc(max_dist_owner_count *
sizeof(*temp_buffer));
1939 for (
int i = 0; i < 3; ++i) {
1942 for (
int j = 0; j < comm_size; ++j) {
1943 int curr_recvcount = recvcounts[3 * j + i];
1944 struct id_pos * curr_id_pos =
1945 id_pos_recv_buffer + rdispls[3 * j + i];
1946 for (
int k = 0; k < curr_recvcount; ++k, ++count) {
1954 qsort(temp_buffer, count,
sizeof(*temp_buffer),
1958 size_t num_unique_ids = 0;
1962 for (
size_t j = 0; j < count; ++j) {
1965 if (curr_id != prev_id) {
1967 unique_ids[num_unique_ids].
global_id = curr_id;
1968 unique_ids[num_unique_ids].
data.
count = 1;
1971 unique_ids[num_unique_ids-1].
data.
count++;
1975 size_t remote_point_info_buffer_size = 0;
1976 for (
size_t j = 0; j < num_unique_ids; ++j)
1978 remote_point_info_buffer_size +=
1983 sizeof(*(dist_owners[0])));
1984 dist_owners[i]->
data =
1985 (unique_ids =
xrealloc(unique_ids, num_unique_ids *
sizeof(*unique_ids)));
1986 dist_owners[i]->
count = num_unique_ids;
1988 for (
size_t j = 0, l = 0, offset = 0; j < num_unique_ids; ++j) {
1989 int curr_count = unique_ids[j].
data.
count;
1990 if (curr_count == 1) {
1995 for (
int k = 0; k < curr_count; ++k, ++l, ++offset) {
1996 dist_owners[i]->
buffer[offset] = temp_buffer[l].
data;
2005 free(id_pos_buffer);
2010 *dist_cell_owner = dist_owners[0];
2011 *dist_vertex_owner = dist_owners[1];
2012 *dist_edge_owner = dist_owners[2];
2016 yac_int * ids,
size_t count,
yac_int ** sorted_ids,
size_t ** reorder_idx) {
2018 yac_int * sorted_ids_ =
xrealloc(*sorted_ids, count *
sizeof(*sorted_ids_));
2019 size_t * reorder_idx_ =
xrealloc(*reorder_idx, count *
sizeof(*reorder_idx_));
2021 memcpy(sorted_ids_, ids, count *
sizeof(*sorted_ids_));
2022 for (
size_t i = 0; i < count; ++i) reorder_idx_[i] = i;
2026 *sorted_ids = sorted_ids_;
2027 *reorder_idx = reorder_idx_;
2031 void *
data,
size_t count, MPI_Comm comm,
2032 void(*set_sendcounts)(
void*,
size_t,
int*),
2033 void(*pack)(
void*,
size_t,
int*,
int*,
int*)) {
2038 int * sendcounts, * recvcounts, * sdispls, * rdispls;
2040 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
2042 set_sendcounts(
data, count, sendcounts);
2045 1, sendcounts, recvcounts, sdispls, rdispls, comm);
2046 int num_src_msg = 0, num_dst_msg = 0;
2047 for (
int i = 0; i < comm_size; ++i) {
2048 num_src_msg += (recvcounts[i] > 0);
2049 num_dst_msg += (sendcounts[i] > 0);
2053 (size_t)(rdispls[comm_size-1]) + (size_t)(recvcounts[comm_size-1]);
2056 xmalloc((recv_count + 2 * count) *
sizeof(*pos_buffer));
2057 int * src_pos_buffer = pos_buffer;
2058 int * dst_pos_buffer = pos_buffer + recv_count;
2059 int * send_pos_buffer = pos_buffer + recv_count + count;
2062 pack(
data, count, sdispls, dst_pos_buffer, send_pos_buffer);
2066 send_pos_buffer, sendcounts, sdispls,
2067 src_pos_buffer, recvcounts, rdispls, comm);
2069 struct Xt_com_pos * com_pos =
2070 xmalloc(((
size_t)num_src_msg + (
size_t)num_dst_msg) *
sizeof(*com_pos));
2071 struct Xt_com_pos * src_com = com_pos;
2072 struct Xt_com_pos * dst_com = com_pos + num_src_msg;
2077 for (
int i = 0; i < comm_size; ++i) {
2078 if (recvcounts[i] > 0) {
2079 src_com[num_src_msg].transfer_pos = src_pos_buffer;
2080 src_com[num_src_msg].num_transfer_pos = recvcounts[i];
2081 src_com[num_src_msg].rank = i;
2082 src_pos_buffer += recvcounts[i];
2085 if (sendcounts[i] > 0) {
2086 dst_com[num_dst_msg].transfer_pos = dst_pos_buffer;
2087 dst_com[num_dst_msg].num_transfer_pos = sendcounts[i];
2088 dst_com[num_dst_msg].rank = i;
2089 dst_pos_buffer += sendcounts[i];
2096 xt_xmap_intersection_pos_new(
2097 num_src_msg, src_com, num_dst_msg, dst_com, comm);
2106 void * data,
size_t count,
int * sendcounts) {
2110 for (
size_t i = 0; i < count; ++i) {
2115 sendcounts[curr_info->
rank]++;
2120 void * data,
size_t count,
int * sdispls,
2121 int * dst_pos_buffer,
int * send_pos_buffer) {
2125 for (
size_t i = 0; i < count; ++i) {
2130 int pos = sdispls[curr_info->
rank+1]++;
2131 dst_pos_buffer[pos] = (int)i;
2132 send_pos_buffer[pos] = (int)(curr_info->
orig_pos);
2137 struct remote_point * remote_point_data,
size_t count, MPI_Comm comm) {
2146 void * data,
size_t count,
int * sendcounts) {
2150 for (
size_t i = 0; i < count; ++i) sendcounts[point_infos[i].
rank]++;
2154 void * data,
size_t count,
int * sdispls,
2155 int * dst_pos_buffer,
int * send_pos_buffer) {
2159 for (
size_t i = 0; i < count; ++i) {
2160 int pos = sdispls[point_infos[i].
rank+1]++;
2161 dst_pos_buffer[pos] = (int)i;
2162 send_pos_buffer[pos] = (int)(point_infos[i].
orig_pos);
2176 const void * a,
const void * b) {
2185 const void * a,
const void * b) {
2194 size_t max_num_cell_ve,
size_t num_cells,
int * num_ve_per_cell,
2196 yac_int ** ids,
size_t * num_ids,
size_t ** cell_to_ve, Xt_xmap * xmap,
2199 size_t num_total_ve = num_cells * max_num_cell_ve;
2201 size_t total_num_cell_ve = 0;
2202 for (
size_t i = 0, k = 0, l = 0; i < num_cells; ++i) {
2203 size_t num_vertices = (size_t)(num_ve_per_cell[i]);
2204 total_num_cell_ve += num_vertices;
2206 for (; j < num_vertices; ++j, ++k, ++l) {
2209 for (; j < max_num_cell_ve; ++j, ++k) {
2214 qsort(cell_ve_point_data, num_total_ve,
sizeof(*cell_ve_point_data),
2217 size_t num_ids_ = 0;
2218 size_t * cell_to_ve_ =
2219 xmalloc(total_num_cell_ve *
sizeof(*cell_to_ve_));
2221 xmalloc(total_num_cell_ve *
sizeof(*ve_point_info));
2222 yac_int prev_global_id = XT_INT_MAX;
2223 for (
size_t i = 0; i < num_total_ve; ++i) {
2225 size_t curr_reorder_idx = cell_ve_point_data[i].
reorder_idx;
2226 if (curr_global_id == XT_INT_MAX)
break;
2227 if (curr_global_id != prev_global_id) {
2228 ids_[num_ids_] = (prev_global_id = curr_global_id);
2229 ve_point_info[num_ids_] = cell_ve_point_data[i].
data.
data;
2232 cell_to_ve_[curr_reorder_idx] = num_ids_ - 1;
2235 *ids =
xrealloc(ids_, num_ids_ *
sizeof(*ids_));
2236 *num_ids = num_ids_;
2237 *cell_to_ve = cell_to_ve_;
2239 free(ve_point_info);
2244 MPI_Datatype coord_dt;
2245 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &coord_dt), comm);
2252 Xt_redist redist_mask, Xt_redist redist_coords, MPI_Comm comm) {
2256 uint64_t counts[2], max_counts[2];
2257 if (orig_field_data != NULL) {
2266 counts, max_counts, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
2268 (orig_field_data == NULL) ||
2269 ((counts[0] == max_counts[0]) && (counts[1] == max_counts[1])),
2270 "ERROR(field_data_init): inconsistent number of masks or coordinates")
2272 int * data_available_flag =
2273 xcalloc(max_counts[0] + max_counts[1],
sizeof(*data_available_flag));
2275 if (orig_field_data != NULL) {
2276 for (
size_t i = 0; i < orig_field_data->
masks_count; ++i)
2277 data_available_flag[i] = orig_field_data->
masks[i] != NULL;
2279 data_available_flag[i + orig_field_data->
masks_count] =
2285 MPI_IN_PLACE, data_available_flag,
2286 (
int)(max_counts[0] + max_counts[1]), MPI_INT, MPI_MAX, comm), comm);
2288 if (orig_field_data != NULL) {
2289 for (
size_t i = 0; i < orig_field_data->
masks_count; ++i)
2291 data_available_flag[i] == (orig_field_data->
masks[i] != NULL),
2292 "ERROR(field_data_init): inconsistent availability of masks")
2296 data_available_flag[i + orig_field_data->
masks_count] ==
2298 "ERROR(field_data_init): inconsistent availability of coordinates")
2301 for (uint64_t i = 0; i < max_counts[0]; ++i) {
2302 int * dist_mask = NULL;
2303 if (data_available_flag[i]) {
2304 dist_mask =
xmalloc(dist_size *
sizeof(*dist_mask));
2305 int * orig_mask = (orig_field_data != NULL)?orig_field_data->
masks[i]:NULL;
2306 xt_redist_s_exchange1(redist_mask, orig_mask, dist_mask);
2311 for (uint64_t i = 0; i < max_counts[1]; ++i) {
2313 if (data_available_flag[i + max_counts[0]]) {
2314 dist_coordinates =
xmalloc(dist_size *
sizeof(*dist_coordinates));
2316 (orig_field_data != NULL)?orig_field_data->
coordinates[i]:NULL;
2317 xt_redist_s_exchange1(redist_coords, orig_coordinates, dist_coordinates);
2322 free(data_available_flag);
2324 return dist_field_data;
2336 int max_num_vertices_per_cell = 0;
2338 for (
size_t i = 0; i < grid_data->
num_cells; ++i)
2341 yac_mpi_call(MPI_Allreduce(MPI_IN_PLACE, &max_num_vertices_per_cell, 1,
2342 MPI_INT, MPI_MAX, comm), comm);
2346 * dist_vertex_owner,
2350 proc_sphere_part, grid_data,
2351 &dist_cell_owner, &dist_vertex_owner, &dist_edge_owner, comm);
2353 Xt_xmap xmap_cell_orig_to_dist =
2355 dist_cell_owner->
data, dist_cell_owner->
count, comm);
2357 MPI_Datatype dt_single_remote_point =
2359 max_num_vertices_per_cell, comm);
2362 Xt_redist redist_cell_yac_int,
2364 redist_cell_corner_point_data,
2366 redist_cell_yac_int = xt_redist_p2p_new(xmap_cell_orig_to_dist, Xt_int_dt);
2367 redist_cell_int = xt_redist_p2p_new(xmap_cell_orig_to_dist, MPI_INT);
2368 redist_cell_corner_point_data =
2369 xt_redist_p2p_new(xmap_cell_orig_to_dist, dt_single_remote_point);
2370 yac_mpi_call(MPI_Type_free(&dt_single_remote_point), comm);
2371 redist_cell_coords = xt_redist_p2p_new(xmap_cell_orig_to_dist, dt_coord);
2373 xt_xmap_delete(xmap_cell_orig_to_dist);
2375 size_t num_cells = dist_cell_owner->
count;
2376 size_t max_num_cell_corners =
2377 (size_t)max_num_vertices_per_cell * num_cells;
2378 size_t max_num_local_cell_corners =
2379 (size_t)max_num_vertices_per_cell * grid_data->
num_cells;
2381 int * num_vertices_per_cell =
2382 xmalloc(num_cells *
sizeof(*num_vertices_per_cell));
2384 cell_corner_point_data =
2385 xmalloc(2 * (max_num_cell_corners + max_num_local_cell_corners) *
2386 sizeof(*cell_corner_point_data));
2388 cell_corner_point_data + max_num_cell_corners;
2390 cell_corner_point_data + 2 * max_num_cell_corners;
2392 cell_corner_point_data + 2 * max_num_cell_corners +
2393 max_num_local_cell_corners;
2394 for (
size_t i = 0; i < grid_data->
num_cells; ++i){
2399 local_cell_corner_point_data + i * (size_t)max_num_vertices_per_cell;
2401 local_cell_edge_point_data + i * (size_t)max_num_vertices_per_cell;
2402 for (
size_t j = 0; j < num_corners; ++j) {
2404 curr_local_cell_corner_point_data[j].
data.
data.
rank = comm_rank;
2407 curr_local_cell_edge_point_data[j].
data.
data.
rank = comm_rank;
2412 xt_redist_s_exchange1(redist_cell_yac_int, grid_data->
cell_ids, cell_ids);
2413 xt_redist_s_exchange1(
2415 xt_redist_s_exchange1(
2416 redist_cell_corner_point_data,
2417 local_cell_corner_point_data, cell_corner_point_data);
2418 xt_redist_s_exchange1(
2419 redist_cell_corner_point_data,
2420 local_cell_edge_point_data, cell_edge_point_data);
2425 redist_cell_int, redist_cell_coords, comm);
2427 xt_redist_delete(redist_cell_coords);
2428 xt_redist_delete(redist_cell_corner_point_data);
2429 xt_redist_delete(redist_cell_yac_int);
2430 xt_redist_delete(redist_cell_int);
2433 size_t num_vertices;
2434 size_t * cell_to_vertex;
2435 size_t * cell_to_vertex_offsets =
2436 xmalloc(num_cells *
sizeof(*cell_to_vertex_offsets));
2437 Xt_xmap xmap_vertex_orig_to_dist;
2438 for (
size_t i = 0, accu = 0; i < num_cells; ++i) {
2439 cell_to_vertex_offsets[i] = accu;
2440 accu += (size_t)(num_vertices_per_cell[i]);
2443 max_num_vertices_per_cell, num_cells, num_vertices_per_cell,
2444 cell_corner_point_data, &vertex_ids, &num_vertices,
2445 &cell_to_vertex, &xmap_vertex_orig_to_dist, comm);
2449 size_t * cell_to_edge;
2450 size_t * cell_to_edge_offsets = cell_to_vertex_offsets;
2451 Xt_xmap xmap_edge_orig_to_dist;
2453 max_num_vertices_per_cell, num_cells, num_vertices_per_cell,
2454 cell_edge_point_data, &edge_ids, &num_edges,
2455 &cell_to_edge, &xmap_edge_orig_to_dist, comm);
2457 free(cell_corner_point_data);
2460 Xt_redist redist_vertex_coords =
2461 xt_redist_p2p_new(xmap_vertex_orig_to_dist, dt_coord);
2462 Xt_redist redist_vertex_int =
2463 xt_redist_p2p_new(xmap_vertex_orig_to_dist, MPI_INT);
2464 xt_xmap_delete(xmap_vertex_orig_to_dist);
2468 xmalloc(num_vertices *
sizeof(*vertex_coordinates));
2469 xt_redist_s_exchange1(
2475 redist_vertex_int, redist_vertex_coords, comm);
2477 xt_redist_delete(redist_vertex_int);
2478 xt_redist_delete(redist_vertex_coords);
2480 Xt_redist redist_edge_int,
2482 redist_edge_2yac_int;
2485 MPI_Datatype dt_2yac_int;
2486 yac_mpi_call(MPI_Type_contiguous(2, Xt_int_dt, &dt_2yac_int), comm);
2489 redist_edge_int = xt_redist_p2p_new(xmap_edge_orig_to_dist, MPI_INT);
2490 redist_edge_coords = xt_redist_p2p_new(xmap_edge_orig_to_dist, dt_coord);
2491 redist_edge_2yac_int = xt_redist_p2p_new(xmap_edge_orig_to_dist, dt_2yac_int);
2496 xt_xmap_delete(xmap_edge_orig_to_dist);
2501 int * temp_edge_type_src, * temp_edge_type_dst;
2502 if (
sizeof(*edge_type) ==
sizeof(
int)) {
2503 temp_edge_type_src = (
int*)(grid_data->
edge_type);
2504 temp_edge_type_dst = (
int*)edge_type;
2506 temp_edge_type_src =
2508 for (
size_t i = 0; i < grid_data->
num_edges; ++i)
2509 temp_edge_type_src[i] = (
int)(grid_data->
edge_type[i]);
2510 temp_edge_type_dst =
xmalloc(num_edges *
sizeof(*temp_edge_type_dst));
2511 for (
size_t i = 0; i < num_edges; ++i)
2512 temp_edge_type_dst[i] = (
int)(edge_type[i]);
2515 xt_redist_s_exchange1(
2516 redist_edge_int, temp_edge_type_src, temp_edge_type_dst);
2518 if (
sizeof(*edge_type) !=
sizeof(
int)) {
2520 for (
size_t i = 0; i < num_edges; ++i)
2523 free(temp_edge_type_src);
2524 free(temp_edge_type_dst);
2531 redist_edge_int, redist_edge_coords, comm);
2533 xt_redist_delete(redist_edge_int);
2534 xt_redist_delete(redist_edge_coords);
2537 xmalloc(num_cells *
sizeof(*cell_bnd_circles));
2545 for (
size_t i = 0; i < num_cells; ++i) {
2546 size_t * curr_cell_to_vertex =
2547 cell_to_vertex + cell_to_vertex_offsets[i];
2548 size_t * curr_cell_to_edge =
2549 cell_to_edge + cell_to_edge_offsets[i];
2550 for (
int j = 0; j < num_vertices_per_cell[i]; ++j) {
2552 vertex_coordinates + curr_cell_to_vertex[j];
2553 for (
int k = 0; k < 3; ++k)
2561 cell_bnd_circles[i] =
2572 xmalloc(num_edges *
sizeof(*edge_to_vertex));
2578 2 * (grid_data->
num_edges + num_edges) *
sizeof(*vertex_id_buffer));
2579 yac_int * grid_edge_vertex_ids = vertex_id_buffer;
2582 size_t * grid_edge_to_vertex = &(grid_data->
edge_to_vertex[0][0]);
2584 for (
size_t i = 0; i < 2 * grid_data->
num_edges; ++i)
2585 grid_edge_vertex_ids[i] =
2586 grid_data->
vertex_ids[grid_edge_to_vertex[i]];
2588 xt_redist_s_exchange1(
2589 redist_edge_2yac_int, grid_edge_vertex_ids, edge_vertex_ids);
2591 id2idx(edge_vertex_ids, &(edge_to_vertex[0][0]), 2 * num_edges,
2592 vertex_ids, num_vertices);
2594 free(vertex_id_buffer);
2597 xt_redist_delete(redist_edge_2yac_int);
2599 size_t num_total_cells = num_cells;
2600 size_t num_total_vertices = num_vertices;
2601 size_t num_total_edges = num_edges;
2635 proc_sphere_part, comm_rank, vertex_coordinates, num_total_vertices);
2650 dist_cell_owner, dist_vertex_owner, dist_edge_owner);
2652 free(dist_cell_owner->
data);
2653 free(dist_cell_owner);
2654 free(dist_vertex_owner->
data);
2655 free(dist_vertex_owner);
2656 free(dist_edge_owner->
data);
2657 free(dist_edge_owner);
2663 for (
int i = 0; i < 2; ++i)
2671 for (
int i = 0; i < 2; ++i) {
2685 for (
size_t i = 0; i <
num_cells; ++i) {
2687 double * cell_coord = cells[i].
coord;
2688 for (
int j = 0; j < 3; ++j) cell_coord[j] = 0.0;
2692 size_t * curr_cell_to_vertex =
2698 for (
int k = 0; k < 3; ++k)
2699 cell_coord[k] += (*curr_vertex_coords)[k];
2710 grid_id_a != grid_id_b,
"ERROR(yac_dist_grid_pair_new): identical grid ids")
2716 if (grid_id_a > grid_id_b) {
2717 int grid_id_swap = grid_id_a;
2718 grid_id_a = grid_id_b;
2719 grid_id_b = grid_id_swap;
2730 size_t num_cells = num_cells_a + num_cells_b;
2735 grid_pair->
grid_ids[0] = grid_id_a;
2736 grid_pair->
grid_ids[1] = grid_id_b;
2737 grid_pair->
comm = comm;
2758 return grid_pair->
comm;
2765 for (
int i = 0; (i < 2) && (grid == NULL); ++i)
2766 if (grid_id == grid_pair->
grid_ids[i])
2769 grid != NULL,
"ERROR(yac_dist_grid_pair_get_dist_grid): invalid grid_id")
2783 (location ==
CELL) || (location ==
CORNER) || (location ==
EDGE),
2784 "ERROR(yac_dist_grid_get_local_count): invalid location")
2806 (location ==
CELL) || (location ==
CORNER) || (location ==
EDGE),
2807 "ERROR(yac_dist_grid_get_count): invalid location")
2820 (location ==
CELL) || (location ==
CORNER) || (location ==
EDGE),
2821 "ERROR(yac_dist_grid_get_core_mask): invalid location")
2834 (location ==
CELL) || (location ==
CORNER) || (location ==
EDGE),
2835 "ERROR(yac_dist_grid_get_global_ids): invalid location")
2846 size_t ** indices,
size_t * count) {
2851 int const * core_mask =
2854 size_t * temp_indices =
xmalloc(total_count *
sizeof(*temp_indices));
2859 for (
size_t i = 0; i < total_count; ++i)
2860 if (core_mask[i] && mask[i]) temp_indices[count_++] = i;
2862 for (
size_t i = 0; i < total_count; ++i)
2863 if (core_mask[i]) temp_indices[count_++] = i;
2866 *indices =
xrealloc(temp_indices, count_ *
sizeof(**indices));
2874 (location ==
CELL) || (location ==
CORNER) || (location ==
EDGE),
2875 "ERROR(yac_dist_grid_get_field_data): invalid location")
2888 if (field.
masks_idx == SIZE_MAX)
return NULL;
2895 "ERROR(yac_dist_grid_get_field_mask): invalid mask_idx")
2912 "ERROR(yac_dist_grid_get_field_coords): invalid coord_idx")
2934 int const * core_mask =
2937 size_t unmasked_local_count = 0;
2938 for (
size_t i = 0; i < total_count; ++i)
2939 if (core_mask[i] && mask[i]) ++unmasked_local_count;
2940 return unmasked_local_count;
2946 for (
size_t i = 0; i < count; ++i)
2947 if (point_infos[i].count > 1) free(point_infos[i].data.
multi);
2981 if (grid_pair == NULL)
return;
2984 for (
int i = 0; i < 2; ++i) {
2993 size_t cell_idx,
struct grid_cell * buffer_cell) {
3003 size_t cell_idx,
struct grid_cell * buffer_cell) {
3007 for (
size_t i = 0; i < buffer_cell->
num_corners; ++i)
3029 *buffer_cell = cell;
3032 for (
size_t i = 0; i < num_vertices; ++i) {
3051 for (
int i = 0; (i < 2) && (search == NULL); ++i)
3052 if (grid_id == grid_pair->
grid_ids[i])
3056 "ERROR(yac_dist_grid_pair_get_cell_sphere_part): invalid grid_id")
3064 double coord[3],
struct dist_grid * grid,
size_t cell_idx,
3072 size_t * temp_cells;
3073 size_t * num_cells_per_coord =
3074 xmalloc(count *
sizeof(*num_cells_per_coord));
3078 cell_sphere_part, search_coords, count, &temp_cells,
3079 num_cells_per_coord);
3086 for (
size_t i = 0, k = 0; i < count; ++i) {
3087 size_t curr_num_cells = num_cells_per_coord[i];
3088 if (curr_num_cells == 0) {
3089 cells[i] = SIZE_MAX;
3090 }
else if (curr_num_cells == 1) {
3092 search_coords[i], grid, temp_cells[k], &buffer_cell))
3093 cells[i] = temp_cells[k];
3095 cells[i] = SIZE_MAX;
3098 size_t cell_idx = SIZE_MAX;
3100 for (
size_t j = 0; j < curr_num_cells; ++j, ++k) {
3101 size_t curr_cell_idx = temp_cells[k];
3104 search_coords[i], grid, curr_cell_idx, &buffer_cell))
3106 if (curr_cell_id < cell_id) {
3107 cell_idx = curr_cell_idx;
3108 cell_id = curr_cell_id;
3111 cells[i] = cell_idx;
3116 free(num_cells_per_coord);
3125 size_t * reorder_idx;
3129 (location ==
CELL) || (location ==
CORNER) || (location ==
EDGE),
3130 "ERROR(lookup_single_remote_point_reorder_locally): invalid location")
3152 size_t count_ = *count;
3153 size_t new_count = 0;
3156 qsort(ids, count_,
sizeof(*ids),
3159 for (
size_t i = 0, j = 0; i < count_; ++i) {
3161 while ((j < num_ids) && (sorted_ids[j] < curr_id)) ++j;
3162 if ((j < num_ids) && (sorted_ids[j] == curr_id)) {
3165 if (i != new_count) ids[new_count] = ids[i];
3176 int pack_size_field_coord, pack_size_field_mask;
3179 MPI_Pack_size(3, MPI_DOUBLE, comm, &pack_size_field_coord), comm);
3183 MPI_Pack_size(1, MPI_INT, comm, &pack_size_field_mask), comm);
3184 pack_size_field_mask *= (int)field_data.
masks_count;
3186 return pack_size_field_coord + pack_size_field_mask;
3191 MPI_Datatype bnd_circle_dt, MPI_Comm comm) {
3194 pack_size_num_vertices,
3195 pack_size_bnd_circle;
3198 yac_mpi_call(MPI_Pack_size(1, Xt_int_dt, comm, &pack_size_id), comm);
3200 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_num_vertices), comm);
3203 MPI_Pack_size(1, bnd_circle_dt, comm, &pack_size_bnd_circle), comm);
3205 return pack_size_id + pack_size_num_vertices + pack_size_bnd_circle +
3213 pack_size_vertex_coords;
3215 yac_mpi_call(MPI_Pack_size(1, Xt_int_dt, comm, &pack_size_id), comm);
3218 MPI_Pack_size(3, MPI_DOUBLE, comm, &pack_size_vertex_coords), comm);
3220 return pack_size_id + pack_size_vertex_coords +
3228 pack_size_edge_to_vertex,
3229 pack_size_edge_type;
3231 yac_mpi_call(MPI_Pack_size(1, Xt_int_dt, comm, &pack_size_id), comm);
3233 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &pack_size_edge_type), comm);
3236 MPI_Pack_size(2, Xt_int_dt, comm, &pack_size_edge_to_vertex), comm);
3238 return pack_size_id + pack_size_edge_type + pack_size_edge_to_vertex +
3243 struct dist_grid * grid, uint64_t * pos,
size_t count,
int * pack_sizes,
3244 MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
3246 int pack_size_base_cell =
3248 int pack_size_base_vertex =
3250 int pack_size_base_edge =
3253 for (
size_t i = 0; i < count; ++i) {
3254 size_t idx = (size_t)(pos[i]);
3256 size_t * curr_vertices =
3258 size_t * curr_edges =
3261 pack_size_base_cell +
3262 num_vertices * (pack_size_base_vertex + pack_size_base_edge) +
3265 for (
int j = 0; j < num_vertices; ++j) {
3268 grid->
vertex_owners + curr_vertices[j], point_info_dt, comm) +
3270 grid->
edge_owners + curr_edges[j], point_info_dt, comm);
3272 pack_sizes[i] = pack_size;
3277 struct dist_grid * grid, uint64_t * pos,
size_t count,
int * pack_sizes,
3278 MPI_Datatype point_info_dt, MPI_Comm comm) {
3280 int pack_size_base_vertex =
3282 for (
size_t i = 0; i < count; ++i)
3284 pack_size_base_vertex +
3290 struct dist_grid * grid, uint64_t * pos,
size_t count,
int * pack_sizes,
3291 MPI_Datatype point_info_dt, MPI_Comm comm) {
3293 int pack_size_base_vertex =
3295 int pack_size_base_edge =
3297 for (
size_t i = 0; i < count; ++i) {
3300 pack_size_base_edge +
3301 2 * pack_size_base_vertex +
3303 grid->
edge_owners + pos[i], point_info_dt, comm) +
3305 grid->
vertex_owners + curr_vertices[0], point_info_dt, comm) +
3307 grid->
vertex_owners + curr_vertices[1], point_info_dt, comm);
3313 size_t count,
int * pack_sizes, MPI_Datatype bnd_circle_dt,
3314 MPI_Datatype point_info_dt, MPI_Comm comm) {
3317 (location ==
CELL) || (location ==
CORNER) || (location ==
EDGE),
3318 "ERROR(get_pack_sizes): invalid location")
3324 grid, pos, count, pack_sizes, bnd_circle_dt, point_info_dt, comm);
3328 grid, pos, count, pack_sizes, point_info_dt, comm);
3332 grid, pos, count, pack_sizes, point_info_dt, comm);
3338 size_t idx,
void * buffer,
int buffer_size,
int * position,
3344 MPI_Pack(field_data.
coordinates[i][idx], 3, MPI_DOUBLE, buffer,
3345 buffer_size, position, comm), comm);
3348 for (
size_t i = 0; i < field_data.
masks_count; ++i)
3350 MPI_Pack(field_data.
masks[i] + idx, 1, MPI_INT, buffer,
3351 buffer_size, position, comm), comm);
3355 struct dist_grid * grid,
size_t idx,
void * buffer,
int buffer_size,
3356 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3363 MPI_Pack(grid->
vertex_ids + idx, 1, Xt_int_dt, buffer, buffer_size,
3364 position, comm), comm);
3368 buffer_size, position, comm), comm);
3372 point_info_dt, comm);
3375 idx, buffer, buffer_size, position, grid->
field_data.vertex, comm);
3379 struct dist_grid * grid,
size_t idx,
void * buffer,
int buffer_size,
3380 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3389 MPI_Pack(grid->
edge_ids + idx, 1, Xt_int_dt, buffer, buffer_size, position,
3394 &
edge_type, 1, MPI_INT, buffer, buffer_size, position, comm), comm);
3401 edge_to_vertex, 2, Xt_int_dt, buffer, buffer_size, position, comm), comm);
3404 grid->
edge_owners + idx, buffer, buffer_size, position,
3405 point_info_dt, comm);
3408 idx, buffer, buffer_size, position, grid->
field_data.
edge, comm);
3412 struct dist_grid * grid,
size_t idx,
void * buffer,
int buffer_size,
3413 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3417 grid, idx, buffer, buffer_size, position,
3418 bnd_circle_dt, point_info_dt, comm);
3421 for (
int i = 0; i < 2; ++i)
3424 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3428 struct dist_grid * grid,
size_t idx,
void * buffer,
int buffer_size,
3429 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3436 MPI_Pack(grid->
cell_ids + idx, 1, Xt_int_dt, buffer, buffer_size, position,
3440 idx, buffer, buffer_size, position, grid->
field_data.cell, comm);
3443 MPI_Pack(&num_vertices, 1, MPI_INT, buffer,
3444 buffer_size, position, comm), comm);
3448 buffer_size, position, comm), comm);
3451 grid->
cell_owners + idx, buffer, buffer_size, position,
3452 point_info_dt, comm);
3454 for (
int i = 0; i < num_vertices; ++i) {
3457 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3460 buffer, buffer_size, position, bnd_circle_dt, point_info_dt, comm);
3466 size_t count,
void ** pack_data,
int * pack_sizes,
3467 MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
3470 bnd_circle_dt, point_info_dt, comm);
3472 size_t pack_size = 0;
3473 for (
size_t i = 0; i < count; ++i) pack_size += (
size_t)(pack_sizes[i]);
3475 void * pack_data_ =
xmalloc(pack_size);
3478 struct dist_grid * grid,
size_t idx,
void * buffer,
int buffer_size,
3479 int * position, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3483 (location ==
CELL) || (location ==
CORNER) || (location ==
EDGE),
3484 "ERROR(pack_grid_data): invalid location")
3499 for (
size_t i = 0, offset = 0; i < count; ++i) {
3501 func_pack(grid, pos[i], (
char*)pack_data_ + offset, pack_sizes[i],
3502 &position, bnd_circle_dt, point_info_dt,
comm);
3503 pack_sizes[i] = position;
3504 offset += (size_t)position;
3507 *pack_data = pack_data_;
3511 void * buffer,
int buffer_size,
int * position,
size_t idx,
3522 MPI_Unpack(buffer, buffer_size, position,
3528 int buffer_size,
int * position,
3530 MPI_Datatype point_info_dt, MPI_Comm
comm) {
3534 MPI_Unpack(buffer, buffer_size, position, &(vertex[idx].global_id), 1,
3538 MPI_Unpack(buffer, buffer_size, position, &(vertex[idx].coord[0]), 3,
3542 buffer, buffer_size, position, &(vertex[idx].owners), point_info_dt,
comm);
3545 buffer, buffer_size, position, idx, temp_vertex_field_data,
comm);
3550 int buffer_size,
int * position,
3552 MPI_Datatype point_info_dt, MPI_Comm
comm) {
3558 MPI_Unpack(buffer, buffer_size, position, &(edge[idx].global_id), 1,
3562 MPI_Unpack(buffer, buffer_size, position, &
edge_type, 1,
3567 MPI_Unpack(buffer, buffer_size, position, edge[idx].
edge_to_vertex, 2,
3571 &(edge[idx].owners), point_info_dt,
comm);
3574 buffer, buffer_size, position, idx, temp_edge_field_data,
comm);
3578 const void * a,
const void * b) {
3589 size_t old_count,
size_t new_count) {
3591 size_t add_count = new_count - old_count;
3596 ((field_data->
masks[i] =
3597 xrealloc(field_data->
masks[i], new_count *
sizeof(*mask))));
3598 for (
size_t i = 0, j = old_count; i < add_count; ++i, ++j) {
3600 *(
size_t*)((
unsigned char*)
reorder_idx + i * reorder_idx_size);
3601 mask[j] = temp_mask[idx];
3610 field_data->
coordinates[i], new_count *
sizeof(*coordinates))));
3611 for (
size_t i = 0, j = old_count; i < add_count; ++i, ++j) {
3613 *(
size_t*)((
unsigned char*)
reorder_idx + i * reorder_idx_size);
3614 coordinates[j][0] = temp_coordinates[idx][0];
3615 coordinates[j][1] = temp_coordinates[idx][1];
3616 coordinates[j][2] = temp_coordinates[idx][2];
3629 size_t count,
size_t * idx,
3632 if (count == 0)
return;
3635 qsort(vertices, count,
sizeof(*vertices),
3642 size_t prev_idx = 0;
3643 size_t add_count = 0;
3647 for (
size_t i = 0, j = 0; i < count; ++i) {
3650 size_t curr_reorder_idx = vertices[i].
reorder_idx;
3653 if (prev_global_id == curr_global_id) {
3654 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3658 prev_global_id = curr_global_id;
3662 while ((j < num_total_vertices) && (sorted_vertex_ids[j] < curr_global_id))
3666 if ((j < num_total_vertices) && (sorted_vertex_ids[j] == curr_global_id)) {
3668 if (idx != NULL) idx[curr_reorder_idx] = sorted_vertex_reorder_idx[j];
3669 prev_idx = sorted_vertex_reorder_idx[j];
3675 if (idx != NULL) idx[curr_reorder_idx] = num_total_vertices + add_count;
3676 prev_idx = num_total_vertices + add_count;
3677 if (add_count != i) vertices[add_count] = vertices[i];
3682 size_t new_num_total_vertices = num_total_vertices + add_count;
3685 sizeof(*vertex_coordinates));
3688 int * core_vertex_mask =
3690 sizeof(*core_vertex_mask));
3693 sizeof(*vertex_owners));
3696 sorted_vertex_ids, new_num_total_vertices *
sizeof(*sorted_vertex_ids));
3697 sorted_vertex_reorder_idx =
3699 sorted_vertex_reorder_idx, new_num_total_vertices *
3700 sizeof(*sorted_vertex_reorder_idx));
3703 for (
size_t i = 0, j = num_total_vertices; i < add_count; ++i, ++j) {
3705 vertex_coordinates[j][0] = vertices[i].
coord[0];
3706 vertex_coordinates[j][1] = vertices[i].
coord[1];
3707 vertex_coordinates[j][2] = vertices[i].
coord[2];
3709 core_vertex_mask[j] = 0;
3710 vertex_owners[j] = vertices[i].
owners;
3711 sorted_vertex_ids[j] = vertices[i].
global_id;
3712 sorted_vertex_reorder_idx[j] = j;
3716 &(grid->
field_data.vertex), temp_vertex_field_data,
3717 vertices,
sizeof(*vertices),
3718 num_total_vertices, new_num_total_vertices);
3720 sorted_vertex_ids, new_num_total_vertices, sorted_vertex_reorder_idx);
3732 const void * a,
const void * b) {
3742 size_t count,
size_t * idx,
struct temp_field_data temp_edge_field_data) {
3744 if (count == 0)
return;
3753 size_t prev_idx = 0;
3754 size_t add_count = 0;
3758 for (
size_t i = 0, j = 0; i < count; ++i) {
3764 if (prev_global_id == curr_global_id) {
3765 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3769 prev_global_id = curr_global_id;
3773 while ((j < num_total_edges) && (sorted_edge_ids[j] < curr_global_id)) ++j;
3776 if ((j < num_total_edges) && (sorted_edge_ids[j] == curr_global_id)) {
3778 if (idx != NULL) idx[curr_reorder_idx] = sorted_edge_reorder_idx[j];
3779 prev_idx = sorted_edge_reorder_idx[j];
3785 if (idx != NULL) idx[curr_reorder_idx] = num_total_edges + add_count;
3786 prev_idx = num_total_edges + add_count;
3787 if (add_count != i) edges[add_count] = edges[i];
3792 size_t new_num_total_edges = num_total_edges + add_count;
3801 int * core_edge_mask =
3803 sizeof(*core_edge_mask));
3806 sorted_edge_ids, new_num_total_edges *
sizeof(*sorted_edge_ids));
3807 sorted_edge_reorder_idx =
3809 sorted_edge_reorder_idx, new_num_total_edges *
3810 sizeof(*sorted_edge_reorder_idx));
3812 yac_int * vertex_ids =
xmalloc(2 * add_count *
sizeof(*vertex_ids));
3813 size_t * reorder =
xmalloc(2 * add_count *
sizeof(*reorder));
3816 for (
size_t i = 0, j = num_total_edges; i < add_count; ++i, ++j) {
3820 core_edge_mask[j] = 0;
3821 edge_owners[j] = edges[i].
owners;
3822 sorted_edge_ids[j] = edges[i].
global_id;
3823 sorted_edge_reorder_idx[j] = j;
3827 reorder[2 * i + 0] = 2 * num_total_edges + 2 * i + 0;
3828 reorder[2 * i + 1] = 2 * num_total_edges + 2 * i + 1;
3832 &(grid->
field_data.
edge), temp_edge_field_data, edges,
sizeof(*edges),
3833 num_total_edges, new_num_total_edges);
3835 sorted_edge_ids, new_num_total_edges, sorted_edge_reorder_idx);
3842 size_t * edge_to_vertex_ = (
size_t*)&(edge_to_vertex[0][0]);
3844 for (
size_t i = 0, j = 0; i < 2 * add_count; ++i) {
3845 yac_int curr_id = vertex_ids[i];
3846 while ((j < total_num_vertices) && (sorted_vertex_ids[j] < curr_id)) ++j;
3848 (j < total_num_vertices) && (sorted_vertex_ids[j] == curr_id),
3849 "ERROR(yac_dist_grid_add_edges): vertex id not found")
3850 edge_to_vertex_[reorder[i]] = sorted_vertex_reorder_idx[j];
3870 size_t * cell_to_vertex,
size_t * cell_to_edge,
3874 if (
count == 0)
return;
3876 size_t * reorder_idx =
xmalloc(
count *
sizeof(reorder_idx));
3877 for (
size_t i = 0; i <
count; ++i) reorder_idx[i] = i;
3880 for (
size_t i = 0, accu = 0; i <
count;
3881 accu += (size_t)(num_vertices_per_cell[i++])) prescan[i] = accu;
3889 yac_int prev_global_id = cell_ids[0] - 1;
3890 size_t cell_add_count = 0;
3891 size_t relations_add_count = 0;
3895 for (
size_t i = 0, j = 0; i <
count; ++i) {
3897 yac_int curr_global_id = cell_ids[i];
3898 size_t curr_reorder_idx = reorder_idx[i];
3901 if (prev_global_id == curr_global_id) {
3905 prev_global_id = curr_global_id;
3909 while ((j < num_total_cells) && (sorted_cell_ids[j] < curr_global_id)) ++j;
3912 if ((j >= num_total_cells) || (sorted_cell_ids[j] != curr_global_id)) {
3914 if (cell_add_count != i) {
3915 cell_ids[cell_add_count] = curr_global_id;
3916 reorder_idx[cell_add_count] = curr_reorder_idx;
3919 relations_add_count += (size_t)(num_vertices_per_cell[i]);
3923 size_t new_num_total_cells = num_total_cells + cell_add_count;
3924 size_t num_total_relations =
3925 (num_total_cells > 0)?
3928 size_t new_num_total_relations = num_total_relations + relations_add_count;
3931 int * new_num_vertices_per_cell =
3933 sizeof(*new_num_vertices_per_cell));
3934 size_t * new_cell_to_vertex =
3936 sizeof(*new_cell_to_vertex));
3937 size_t * cell_to_vertex_offsets =
3939 sizeof(*cell_to_vertex_offsets));
3940 size_t * new_cell_to_edge =
3942 sizeof(*new_cell_to_edge));
3945 sizeof(*new_cell_bnd_circles));
3946 int * core_cell_mask =
3952 sorted_cell_ids, new_num_total_cells *
sizeof(*sorted_cell_ids));
3953 sorted_cell_reorder_idx =
3955 sorted_cell_reorder_idx, new_num_total_cells *
3956 sizeof(*sorted_cell_reorder_idx));
3959 for (
size_t i = 0, j = num_total_cells; i < cell_add_count;
3962 size_t curr_reorder_idx = reorder_idx[i];
3963 int curr_num_vertices = num_vertices_per_cell[curr_reorder_idx];
3964 size_t curr_relation_idx = prescan[curr_reorder_idx];
3966 new_cell_ids[j] = cell_ids[i];
3967 new_num_vertices_per_cell[j] = curr_num_vertices;
3968 cell_to_vertex_offsets[j] = num_total_relations;
3969 for (
int j = 0; j < curr_num_vertices;
3970 ++j, ++num_total_relations, ++curr_relation_idx) {
3971 new_cell_to_vertex[num_total_relations] =
3972 cell_to_vertex[curr_relation_idx];
3973 new_cell_to_edge[num_total_relations] = cell_to_edge[curr_relation_idx];
3975 core_cell_mask[j] = 0;
3976 sorted_cell_ids[j] = cell_ids[i];
3977 sorted_cell_reorder_idx[j] = j;
3978 new_cell_bnd_circles[j] = cell_bnd_circles[curr_reorder_idx];
3979 new_cell_owners[j] = cell_owners[curr_reorder_idx];
3983 &(grid->
field_data.cell), temp_cell_field_data,
3984 reorder_idx,
sizeof(*reorder_idx),
3985 num_total_cells, new_num_total_cells);
3987 sorted_cell_ids, new_num_total_cells, sorted_cell_reorder_idx);
4029 for (
size_t i = 0; i < field_data.masks_count; ++i) {
4040 field_data.coordinates_count *
4043 for (
size_t i = 0; i < field_data.coordinates_count; ++i) {
4065 struct dist_grid * grid,
size_t count,
void * buffer,
int buffer_size,
4066 MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt, MPI_Comm comm) {
4069 int * num_vertices_per_cell =
xmalloc(count *
sizeof(*num_vertices_per_cell));
4071 xmalloc(count *
sizeof(*cell_bnd_circles));
4076 size_t vertices_array_size = 0;
4077 size_t total_num_vertices = 0;
4080 size_t edges_array_size = 0;
4089 for (
size_t i = 0, buffer_offset = 0; i < count; ++i) {
4092 void * curr_buffer = (
char*)buffer + buffer_offset;
4097 MPI_Unpack(curr_buffer, buffer_size, &position, cell_ids + i, 1,
4098 Xt_int_dt, comm), comm);
4101 curr_buffer, buffer_size, &position, i, temp_cell_field_data, comm);
4104 MPI_Unpack(curr_buffer, buffer_size, &position, &num_vertices, 1,
4105 MPI_INT, comm), comm);
4108 MPI_Unpack(curr_buffer, buffer_size, &position, cell_bnd_circles + i, 1,
4109 bnd_circle_dt, comm), comm);
4112 curr_buffer, buffer_size, &position, cell_owners + i,
4113 point_info_dt, comm);
4115 num_vertices_per_cell[i] = num_vertices;
4118 vertices, vertices_array_size, total_num_vertices + (
size_t)num_vertices);
4120 edges, edges_array_size, total_num_vertices + (
size_t)num_vertices);
4122 &temp_vertex_field_data, total_num_vertices + (
size_t)num_vertices);
4124 &temp_edge_field_data, total_num_vertices + (
size_t)num_vertices);
4126 for (
int j = 0; j < num_vertices; ++j, ++total_num_vertices) {
4128 vertices, total_num_vertices, curr_buffer, buffer_size, &position,
4129 temp_vertex_field_data, point_info_dt, comm);
4131 edges, total_num_vertices, curr_buffer, buffer_size, &position,
4132 temp_edge_field_data, point_info_dt, comm);
4133 vertices[total_num_vertices].
reorder_idx = total_num_vertices;
4134 edges[total_num_vertices].
reorder_idx = total_num_vertices;
4137 buffer_offset += (size_t)position;
4138 buffer_size -= position;
4141 size_t * cell_to_vertex =
xmalloc(total_num_vertices *
sizeof(*cell_to_vertex));
4142 size_t * cell_to_edge =
xmalloc(total_num_vertices *
sizeof(*cell_to_edge));
4145 grid, vertices, total_num_vertices, cell_to_vertex,
4146 temp_vertex_field_data);
4148 grid, edges, total_num_vertices, cell_to_edge,
4149 temp_edge_field_data);
4151 grid, cell_ids, num_vertices_per_cell, cell_bnd_circles, count,
4152 cell_to_vertex, cell_to_edge, cell_owners, temp_cell_field_data);
4158 free(cell_to_vertex);
4162 free(cell_bnd_circles);
4163 free(num_vertices_per_cell);
4168 struct dist_grid * grid,
size_t count,
void * buffer,
int buffer_size,
4169 MPI_Datatype point_info_dt, MPI_Comm comm) {
4176 for (
size_t i = 0, buffer_offset = 0; i < count; ++i) {
4179 void * curr_buffer = (
char*)buffer + buffer_offset;
4182 vertices, i, curr_buffer, buffer_size, &position,
4183 temp_vertex_field_data, point_info_dt, comm);
4186 buffer_offset += (size_t)position;
4187 buffer_size -= position;
4191 grid, vertices, count, NULL, temp_vertex_field_data);
4199 struct dist_grid * grid,
size_t count,
void * buffer,
int buffer_size,
4200 MPI_Datatype point_info_dt, MPI_Comm comm) {
4204 xmalloc(2 * count *
sizeof(*vertices));
4211 for (
size_t i = 0, buffer_offset = 0; i < count; ++i) {
4214 void * curr_buffer = (
char*)buffer + buffer_offset;
4217 edges, i, curr_buffer, buffer_size, &position,
4218 temp_edge_field_data, point_info_dt, comm);
4221 for (
size_t j = 0; j < 2; ++j)
4223 vertices, 2 * i + j, curr_buffer, buffer_size, &position,
4224 temp_vertex_field_data, point_info_dt, comm);
4226 buffer_offset += (size_t)position;
4227 buffer_size -= position;
4231 grid, vertices, 2 * count, NULL, temp_vertex_field_data);
4233 grid, edges, count, NULL, temp_edge_field_data);
4244 void * buffer,
int buffer_size, MPI_Datatype bnd_circle_dt,
4245 MPI_Datatype point_info_dt, MPI_Comm comm) {
4248 (location ==
CELL) || (location ==
CORNER) || (location ==
EDGE),
4249 "ERROR(unpack_grid_data): invalid location")
4255 grid, count, buffer, buffer_size, bnd_circle_dt, point_info_dt, comm);
4259 grid, count, buffer, buffer_size, point_info_dt, comm);
4263 grid, count, buffer, buffer_size, point_info_dt, comm);
4269 const void * a,
const void * b) {
4279 MPI_Comm comm = grid->
comm;
4280 int comm_rank, comm_size;
4284 size_t remote_count = 0;
4286 for (
size_t i = 0; i < count; ++i) {
4287 if (ids[i].global_id == XT_INT_MAX) idx[i] = SIZE_MAX;
4288 else if (ids[i].
data.rank != comm_rank) ++remote_count;
4293 xmalloc(remote_count *
sizeof(*missing_ids));
4295 for (
size_t i = 0, j = 0; i < count; ++i) {
4296 if ((ids[i].
data.rank != comm_rank) &&
4298 missing_ids[j].
data = ids[i];
4306 grid, location, missing_ids, &remote_count, idx);
4309 qsort(missing_ids, remote_count,
sizeof(*missing_ids),
4312 int * sendcounts, * recvcounts, * sdispls, * rdispls;
4314 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4316 for (
size_t i = 0; i < remote_count; ++i)
4320 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4323 (size_t)(rdispls[comm_size-1]) + (size_t)(recvcounts[comm_size-1]);
4325 uint64_t * uint64_t_buffer =
4326 xmalloc((remote_count + recv_count) *
sizeof(*uint64_t_buffer));
4327 uint64_t * orig_pos_send_buffer = uint64_t_buffer;
4328 uint64_t * orig_pos_recv_buffer = uint64_t_buffer + remote_count;
4331 for (
size_t i = 0; i < remote_count; ++i) {
4333 if (rank != comm_rank)
4334 orig_pos_send_buffer[sdispls[rank+1]++] =
4340 orig_pos_send_buffer, sendcounts, sdispls,
4341 orig_pos_recv_buffer, recvcounts, rdispls, comm);
4348 void * packed_send_data = NULL;
4349 int * pack_sizes =
xmalloc(recv_count *
sizeof(*pack_sizes));
4353 grid, location, orig_pos_recv_buffer, recv_count,
4354 &packed_send_data, pack_sizes,
4355 bnd_circle_dt, point_info_dt, comm);
4356 free(uint64_t_buffer);
4358 memset(sendcounts, 0, (
size_t)comm_size *
sizeof(*sendcounts));
4359 for (
int i = 0, k = 0; i < comm_size; ++i)
4360 for (
int j = 0; j < recvcounts[i]; ++j, ++k)
4361 sendcounts[i] += pack_sizes[k];
4366 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4369 (size_t)(rdispls[comm_size-1]) + (size_t)(recvcounts[comm_size-1]);
4371 void * packed_recv_data =
xmalloc(recv_count);
4375 packed_send_data, sendcounts, sdispls+1,
4376 packed_recv_data, recvcounts, rdispls, comm);
4380 grid, location, remote_count, packed_recv_data, (
int)recv_count,
4381 bnd_circle_dt, point_info_dt, comm);
4388 grid, location, missing_ids, &remote_count, idx);
4391 free(packed_recv_data);
4392 free(packed_send_data);
4400 double coord[3],
struct dist_grid * grid,
size_t cell_idx,
4403 MPI_Comm comm = grid_pair->
comm;
4404 int comm_rank, comm_size;
4408 int * ranks =
xmalloc(count *
sizeof(ranks));
4414 int * sendcounts, * recvcounts, * sdispls, * rdispls;
4416 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4417 for (
size_t i = 0; i < count; ++i) sendcounts[ranks[i]]++;
4419 size_t local_count = sendcounts[comm_rank];
4420 sendcounts[comm_rank] = 0;
4423 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4425 size_t remote_count = sdispls[comm_size] + sendcounts[comm_size-1];
4426 size_t request_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4429 xmalloc((remote_count + request_count + local_count) *
4430 sizeof(*coord_buffer));
4434 coord_buffer + remote_count + request_count;
4437 for (
size_t i = 0, k = 0; i < count; ++i) {
4438 if (ranks[i] == comm_rank) {
4439 coord_local_buffer[k][0] = search_coords[i][0];
4440 coord_local_buffer[k][1] = search_coords[i][1];
4441 coord_local_buffer[k][2] = search_coords[i][2];
4444 int displ = sdispls[ranks[i]+1]++;
4445 coord_send_buffer[displ][0] = search_coords[i][0];
4446 coord_send_buffer[displ][1] = search_coords[i][1];
4447 coord_send_buffer[displ][2] = search_coords[i][2];
4451 MPI_Datatype dt_coord;
4452 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &dt_coord), comm);
4457 coord_send_buffer, sendcounts, sdispls,
4458 coord_recv_buffer, recvcounts, rdispls,
4459 sizeof(*coord_send_buffer), dt_coord, comm);
4463 size_t * local_cells =
4464 xmalloc((request_count + local_count) *
sizeof(*local_cells));
4468 grid_pair, grid_id, coord_recv_buffer, request_count + local_count,
4472 xmalloc((remote_count + request_count) *
4473 sizeof(*single_remote_point_buffer));
4482 for (
size_t i = 0; i < request_count; ++i) {
4483 size_t cell_idx = local_cells[i];
4484 id_send_buffer[i].
data.
rank = comm_rank;
4485 if (cell_idx != SIZE_MAX) {
4489 id_send_buffer[i].
global_id = XT_INT_MAX;
4494 MPI_Datatype single_remote_point_dt =
4499 id_send_buffer, recvcounts, rdispls, id_recv_buffer, sendcounts, sdispls,
4500 sizeof(*id_send_buffer), single_remote_point_dt,
comm);
4504 size_t * new_local_cells =
4505 xmalloc(remote_count *
sizeof(*new_local_cells));
4510 grid, id_recv_buffer, remote_count,
CELL, new_local_cells);
4513 for (
size_t i = 0, k = 0; i < count; ++i) {
4514 if (ranks[i] == comm_rank) {
4515 cells[i] = local_cells[request_count + k];
4518 int displ = sdispls[ranks[i]]++;
4519 cells[i] = new_local_cells[displ];
4523 free(new_local_cells);
4524 free(single_remote_point_buffer);
4536 grid_pair, grid_id, search_coords, count, cells,
coord_in_cell);
4560 total_count, field_coords, global_ids);
4564 total_count, field_coords, global_ids, mask);
4573 int const * core_mask =
4580 for (
size_t i = 0, j = 0; i < total_count; ++i) {
4582 points[j].global_id = global_ids[i];
4583 points[j].data.rank = comm_rank;
4584 points[j].data.orig_pos = i;
4585 if (n == ++j)
return;
4591 for (
size_t i = 0, j = 0; i < total_count; ++i) {
4592 if (core_mask[i] && mask[i]) {
4593 points[j].global_id = global_ids[i];
4594 points[j].data.rank = comm_rank;
4595 points[j].data.orig_pos = i;
4596 if (n == ++j)
return;
4603 size_t n, MPI_Comm comm) {
4606 MPI_Datatype single_remote_point_dt =
4608 single_nnn_search_result_dt_,
4609 nnn_search_result_dt;
4610 int array_of_blocklengths[] = {1, 1};
4611 const MPI_Aint array_of_displacements[] =
4612 {(MPI_Aint)((
void*)&(dummy.
point_info) - (
void*)&dummy),
4613 (MPI_Aint)((
void*)&(dummy.
cos_angle) - (
void*)&dummy)};
4614 const MPI_Datatype array_of_types[] = {single_remote_point_dt, MPI_DOUBLE};
4616 MPI_Type_create_struct(
4617 2, array_of_blocklengths, array_of_displacements,
4618 array_of_types, &single_nnn_search_result_dt_), comm);
4619 MPI_Datatype single_nnn_search_result_dt =
4622 MPI_Type_contiguous(
4623 (
int)n, single_nnn_search_result_dt, &nnn_search_result_dt), comm);
4624 yac_mpi_call(MPI_Type_commit(&nnn_search_result_dt), comm);
4625 yac_mpi_call(MPI_Type_free(&single_nnn_search_result_dt), comm);
4626 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4627 return nnn_search_result_dt;
4631 void const * a,
void const * b) {
4640 if (ret)
return ret;
4649 if (insert_size == 0)
return;
4652 qsort(array, array_size,
sizeof(*array),
4654 qsort(insert, insert_size,
sizeof(*insert),
4658 memcpy(temp_results, array, array_size *
sizeof(*array));
4660 for (
size_t i = 0, j = 0, k = 0; k < array_size; ++k) {
4665 &temp_results[i], insert + j):(-1);
4668 if (k != i) array[k] = temp_results[i];
4671 array[k] = insert[j];
4683 MPI_Comm comm = grid_pair->
comm;
4684 int comm_rank, comm_size;
4691 uint64_t unmasked_local_count =
4694 uint64_t * unmasked_local_counts =
4695 xmalloc((
size_t)comm_size *
sizeof(*unmasked_local_counts));
4700 &unmasked_local_count, 1, MPI_UINT64_T,
4701 unmasked_local_counts, 1, MPI_UINT64_T,
comm),
comm);
4705 for (
int i = 0; i < comm_size; ++i)
4706 flag |= unmasked_local_counts[i] < (uint64_t)n;
4711 uint64_t global_num_unmasked_count = 0;
4712 for (
int i = 0; i < comm_size; ++i)
4713 global_num_unmasked_count += unmasked_local_counts[i];
4716 (
size_t)global_num_unmasked_count >= n,
4717 "ERROR(yac_dist_grid_pair_do_nnn_search): insufficient number of "
4720 int * sendcounts, * recvcounts, * sdispls, * rdispls;
4722 1, &sendcounts, &recvcounts, &sdispls, &rdispls,
comm);
4723 memset(recvcounts, 0, (
size_t)comm_size *
sizeof(*recvcounts));
4729 sendcounts, recvcounts, comm_rank, comm_size);
4731 size_t local_send_count = (size_t)(
MIN(unmasked_local_count, n));
4733 for (
int i = 0, raccu = 0; i < comm_size; ++i) {
4736 sendcounts[i] *= (int)local_send_count;
4737 raccu += (recvcounts[i] *= (int)(
MIN(unmasked_local_counts[i], n)));
4741 (size_t)(recvcounts[comm_size-1]) + (size_t)(rdispls[comm_size-1]);
4745 (local_send_count + recv_count) *
sizeof(*single_remote_point_buffer));
4748 single_remote_point_buffer + local_send_count;
4752 grid, field, comm_rank, local_send_count, local_send_ids);
4754 MPI_Datatype single_remote_point_dt =
4759 local_send_ids, sendcounts, sdispls, recv_ids, recvcounts, rdispls,
4760 sizeof(*local_send_ids), single_remote_point_dt, comm);
4762 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4764 size_t * dummy =
xmalloc(recv_count *
sizeof(*dummy));
4769 grid, recv_ids, recv_count, field.
location, dummy);
4772 free(single_remote_point_buffer);
4775 free(unmasked_local_counts);
4780 double * cos_angles = NULL;
4781 size_t cos_angles_array_size = 0;
4783 size_t result_coords_array_size = 0;
4784 size_t * result_points = NULL;
4785 size_t result_points_array_size = 0;
4786 size_t * num_results =
xcalloc(count,
sizeof(*num_results));
4790 sphere_part, count, search_coords, n, &cos_angles, &cos_angles_array_size,
4791 &result_coords, &result_coords_array_size, &result_points,
4792 &result_points_array_size, num_results);
4794 size_t max_num_results = 0;
4795 for (
size_t i = 0; i < count; ++i)
4796 if (max_num_results < num_results[i]) max_num_results = num_results[i];
4804 xmalloc(max_num_results *
sizeof(*temp_results));
4806 for (
size_t i = 0, k = 0; i < count; ++i) {
4809 for (
size_t j = 0; j < n; ++j, ++k) {
4810 size_t curr_local_id = result_points[k];
4814 curr_results[j].
cos_angle = cos_angles[k];
4817 for (
size_t j = n; j < num_results[i]; ++j, ++k) {
4818 size_t curr_local_id = result_points[k];
4822 temp_results[j-n].
cos_angle = cos_angles[k];
4829 int * request_ranks = NULL;
4830 size_t request_ranks_array_size = 0;
4831 size_t num_request_ranks = 0;
4832 int * num_requests =
xmalloc(count *
sizeof(*num_requests));
4835 for (
size_t i = 0; i < count; ++i) {
4839 memcpy(result_bnd_circle.
base_vector, search_coords[i],
4840 3 *
sizeof(search_coords[0][0]));
4844 field_coords[results[(i + 1) * n - 1].point_info.data.orig_pos]);
4847 num_request_ranks + (
size_t)comm_size);
4851 int * curr_request_ranks = request_ranks + num_request_ranks;
4854 curr_request_ranks, num_requests + i);
4857 int new_num_requests = 0;
4858 for (
int j = 0; j < num_requests[i]; ++j) {
4859 if (curr_request_ranks[j] == comm_rank)
continue;
4860 if (new_num_requests != j)
4861 curr_request_ranks[new_num_requests] = curr_request_ranks[j];
4865 num_request_ranks += (size_t)(num_requests[i] = new_num_requests);
4868 int * sendcounts, * recvcounts, * sdispls, * rdispls;
4870 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4872 for (
size_t i = 0; i < num_request_ranks; ++i) sendcounts[request_ranks[i]]++;
4875 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4876 int num_src_msg = 0, num_dst_msg = 0;
4877 for (
int i = 0; i < comm_size; ++i) {
4878 num_src_msg += (recvcounts[i] > 0);
4879 num_dst_msg += (sendcounts[i] > 0);
4883 (size_t)(rdispls[comm_size-1]) + (size_t)(recvcounts[comm_size-1]);
4886 xmalloc((num_request_ranks + recv_count) *
sizeof(*coord_buffer));
4891 for (
size_t i = 0, k = 0; i < count; ++i)
4892 for (
size_t j = 0; j < num_requests[i]; ++j, ++k)
4893 memcpy(send_request_coords[sdispls[request_ranks[k]+1]++],
4894 search_coords[i], 3 *
sizeof(
double));
4900 send_request_coords, sendcounts, sdispls,
4901 recv_request_coords, recvcounts, rdispls,
4902 sizeof(*send_request_coords), coord_dt, comm);
4906 num_results =
xrealloc(num_results, recv_count *
sizeof(*num_results));
4907 memset(num_results, 0, recv_count *
sizeof(*num_results));
4911 sphere_part, recv_count, recv_request_coords, n, &cos_angles,
4912 &cos_angles_array_size, &result_coords, &result_coords_array_size,
4913 &result_points, &result_points_array_size, num_results);
4917 for (
size_t i = 0; i < recv_count; ++i)
4918 if (max_num_results < num_results[i]) max_num_results = num_results[i];
4920 xrealloc(temp_results, max_num_results *
sizeof(*temp_results));
4924 xmalloc(n * (num_request_ranks + recv_count) *
sizeof(*request_results));
4927 request_results + n * recv_count;
4929 for (
size_t i = 0, k = 0; i < recv_count; ++i) {
4932 for (
size_t j = 0; j < n; ++j, ++k) {
4933 size_t curr_local_id = result_points[k];
4937 curr_results[j].
cos_angle = cos_angles[k];
4940 for (
size_t j = n; j < num_results[i]; ++j, ++k) {
4941 size_t curr_local_id = result_points[k];
4945 temp_results[j-n].
cos_angle = cos_angles[k];
4951 free(result_coords);
4952 free(result_points);
4955 MPI_Datatype nnn_search_result_dt =
4960 send_request_results, recvcounts, rdispls,
4961 recv_request_results, sendcounts, sdispls,
4962 n *
sizeof(*send_request_results), nnn_search_result_dt, comm);
4964 yac_mpi_call(MPI_Type_free(&nnn_search_result_dt), comm);
4967 for (
size_t i = 0, k = 0; i < count; ++i) {
4969 for (
size_t j = 0; j < num_requests[i]; ++k, ++j) {
4970 size_t pos = (size_t)(sdispls[request_ranks[k]]++);
4972 recv_request_results + n * pos;
4976 free(request_ranks);
4977 free(request_results);
4981 size_t remote_count = 0;
4982 for (
size_t i = 0; i < n * count; ++i)
4988 for (
size_t i = 0, k = 0; i < n * count; ++i)
4989 if (results[i].point_info.data.rank != comm_rank)
4996 local_ids + n * count - remote_count);
5001 for (
size_t i = 0, offset = n * count - remote_count; i < n * count; ++i) {
5002 if (results[i].point_info.data.rank == comm_rank)
5003 local_ids[i] = (size_t)(results[i].point_info.data.orig_pos);
5004 else local_ids[i] = local_ids[offset++];
5013 size_t * num_results_per_bnd_circle,
struct interp_field field) {
5015 MPI_Comm comm = grid_pair->
comm;
5016 int comm_rank, comm_size;
5023 int * num_ranks =
xcalloc(count,
sizeof(*num_ranks));
5024 int * rank_buffer = NULL;
5025 size_t rank_buffer_size = 0;
5026 size_t rank_buffer_array_size = 0;
5028 for (
size_t i = 0; i < count; ++i) {
5031 rank_buffer_size + (
size_t)comm_size);
5039 rank_buffer + rank_buffer_size, num_ranks + i);
5040 rank_buffer_size += (size_t)(num_ranks[i]);
5044 xmalloc(4 * (
size_t)comm_size *
sizeof(*int_buffer));
5045 int * result_sendcounts = int_buffer + 0 * comm_size;
5046 int * result_recvcounts = int_buffer + 1 * comm_size;
5047 int * result_sdispls = int_buffer + 2 * comm_size;
5048 int * result_rdispls = int_buffer + 3 * comm_size;
5050 int * sendcounts, * recvcounts, * sdispls, * rdispls;
5052 1, &sendcounts, &recvcounts, &sdispls, &rdispls,
comm);
5054 for (
size_t i = 0, offset = 0; i < count; ++i) {
5055 int curr_num_ranks = num_ranks[i];
5056 int * ranks = rank_buffer + offset;
5057 offset += (size_t)curr_num_ranks;
5058 for (
int j = 0; j < curr_num_ranks; ++j) sendcounts[ranks[j]]++;
5062 size_t local_count = (size_t)(sendcounts[comm_rank]);
5063 sendcounts[comm_rank] = 0;
5066 1, sendcounts, recvcounts, sdispls, rdispls,
comm);
5069 (size_t)(sdispls[comm_size]) + (size_t)(sendcounts[comm_size-1]);
5071 (size_t)(rdispls[comm_size-1]) + (size_t)(recvcounts[comm_size-1]);
5074 xmalloc((send_count + recv_count + local_count) *
5075 sizeof(*bnd_circle_buffer));
5077 struct bounding_circle * recv_buffer = bnd_circle_buffer + send_count;
5079 bnd_circle_buffer + send_count + recv_count;
5082 for (
size_t i = 0, offset = 0, local_offset = 0; i < count; ++i) {
5083 int curr_num_ranks = num_ranks[i];
5084 int * ranks = rank_buffer + offset;
5085 offset += (size_t)curr_num_ranks;
5086 for (
int j = 0; j < curr_num_ranks; ++j) {
5087 int rank = ranks[j];
5088 if (rank == comm_rank)
5089 local_buffer[local_offset++] = bnd_circles[i];
5091 send_buffer[sdispls[rank + 1]++] = bnd_circles[i];
5100 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
5101 sizeof(*send_buffer), bnd_circle_dt, comm);
5108 size_t * local_cells = NULL;
5109 size_t * num_local_cells_per_bnd_circle =
5110 xmalloc((recv_count + local_count) *
5111 sizeof(*num_local_cells_per_bnd_circle));
5113 uint64_t * uint64_t_buffer =
5114 xmalloc((send_count + recv_count + local_count) *
5115 sizeof(*uint64_t_buffer));
5116 uint64_t * num_local_cells_per_bnd_circle_uint64_t = uint64_t_buffer;
5117 uint64_t * num_remote_cells_per_bnd_circle =
5118 uint64_t_buffer + recv_count + local_count;
5122 cell_sphere_part, recv_buffer, recv_count + local_count, &local_cells,
5123 num_local_cells_per_bnd_circle);
5125 int const * field_mask =
5132 for (
size_t i = 0, offset = 0, new_offset = 0;
5133 i < recv_count + local_count; ++i) {
5136 size_t curr_num_results = num_local_cells_per_bnd_circle[i];
5139 uint64_t new_num_results = 0;
5140 for (
size_t j = 0; j < curr_num_results; ++j, ++offset) {
5141 size_t local_cell_id = local_cells[offset];
5143 cell_bnd_circles + local_cell_id))
continue;
5144 if ((field_mask == NULL) || (field_mask[local_cell_id])) {
5145 if (offset != new_offset) local_cells[new_offset] = local_cell_id;
5150 num_local_cells_per_bnd_circle_uint64_t[i] = new_num_results;
5152 free(num_local_cells_per_bnd_circle);
5153 free(bnd_circle_buffer);
5157 num_local_cells_per_bnd_circle_uint64_t, recvcounts, rdispls,
5158 num_remote_cells_per_bnd_circle, sendcounts, sdispls,
5159 sizeof(*num_local_cells_per_bnd_circle_uint64_t), MPI_UINT64_T, comm);
5161 for (
int i = 0, saccu = 0, raccu = 0, soffset = 0, roffset = 0;
5162 i < comm_size; ++i) {
5164 result_sdispls[i] = saccu;
5165 result_rdispls[i] = raccu;
5167 int sendcount = recvcounts[i];
5168 int recvcount = sendcounts[i];
5170 result_sendcounts[i] = 0;
5171 result_recvcounts[i] = 0;
5172 for (
int j = 0; j < sendcount; ++j, ++soffset)
5173 result_sendcounts[i] +=
5174 (
int)(num_local_cells_per_bnd_circle_uint64_t[soffset]);
5175 for (
int j = 0; j < recvcount; ++j, ++roffset)
5176 result_recvcounts[i] += (
int)(num_remote_cells_per_bnd_circle[roffset]);
5178 saccu += result_sendcounts[i];
5179 raccu += result_recvcounts[i];
5184 size_t result_local_count = 0;
5185 for (
size_t i = recv_count; i < recv_count + local_count; ++i)
5186 result_local_count += (
size_t)(num_local_cells_per_bnd_circle_uint64_t[i]);
5188 size_t result_send_count = (size_t)(result_sdispls[comm_size-1]) +
5189 (size_t)(result_sendcounts[comm_size-1]);
5190 size_t result_recv_count = (size_t)(result_rdispls[comm_size-1]) +
5191 (size_t)(result_recvcounts[comm_size-1]);
5194 xmalloc((result_recv_count + result_send_count) *
5195 sizeof(*single_remote_point_buffer));
5202 for (
size_t i = 0; i < result_send_count; ++i) {
5203 size_t local_cell_id = local_cells[i];
5204 id_send_buffer[i].
global_id = cell_ids[local_cell_id];
5205 id_send_buffer[i].
data.
rank = comm_rank;
5209 MPI_Datatype single_remote_point_dt =
5214 id_send_buffer, result_sendcounts, result_sdispls,
5215 id_recv_buffer, result_recvcounts, result_rdispls,
5216 sizeof(*id_send_buffer), single_remote_point_dt, comm);
5218 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
5220 size_t * new_local_cells =
5221 xmalloc((result_recv_count + result_local_count) *
5222 sizeof(*new_local_cells));
5224 memcpy(new_local_cells + result_recv_count,
5225 local_cells + result_send_count,
5226 result_local_count *
sizeof(*new_local_cells));
5232 dist_grid, id_recv_buffer, result_recv_count,
CELL, new_local_cells);
5234 free(single_remote_point_buffer);
5236 size_t * reorder_idx =
5237 xmalloc((result_recv_count + result_local_count) *
sizeof(*reorder_idx));
5240 num_results_per_bnd_circle, 0, count *
sizeof(*num_results_per_bnd_circle));
5242 for (
size_t i = 0, offset = 0, reorder = 0, local_search_idx = recv_count,
5243 local_offset = result_recv_count; i < count; ++i) {
5244 int curr_num_ranks = num_ranks[i];
5245 int * ranks = rank_buffer + offset;
5246 offset += (size_t)curr_num_ranks;
5247 for (
int j = 0; j < curr_num_ranks; ++j) {
5248 int rank = ranks[j];
5249 if (rank == comm_rank) {
5250 uint64_t curr_num_results =
5251 num_local_cells_per_bnd_circle_uint64_t[local_search_idx++];
5252 num_results_per_bnd_circle[i] += (size_t)curr_num_results;
5253 for (uint64_t k = 0; k < curr_num_results; ++k, ++reorder)
5254 reorder_idx[local_offset++] = reorder;
5256 int rank_pos = sdispls[rank]++;
5257 uint64_t curr_num_results = num_remote_cells_per_bnd_circle[rank_pos];
5258 num_results_per_bnd_circle[i] += (size_t)curr_num_results;
5259 for (uint64_t k = 0; k < curr_num_results; ++k, ++reorder)
5260 reorder_idx[result_rdispls[rank]++] = reorder;
5264 free(uint64_t_buffer);
5271 reorder_idx, result_recv_count + result_local_count, new_local_cells);
5275 for (
size_t i = 0, offset = 0, new_offset = 0; i < count; ++i) {
5277 size_t * curr_local_cells = new_local_cells + offset;
5278 size_t curr_num_results_per_bnd_circle = num_results_per_bnd_circle[i];
5279 size_t new_num_results_per_bnd_circle = 0;
5280 size_t prev_cell = SIZE_MAX;
5281 offset += curr_num_results_per_bnd_circle;
5284 curr_local_cells, curr_num_results_per_bnd_circle, NULL);
5286 for (
size_t j = 0; j < curr_num_results_per_bnd_circle; ++j) {
5287 size_t curr_cell = curr_local_cells[j];
5288 if (curr_cell != prev_cell) {
5289 new_local_cells[new_offset++] = (prev_cell = curr_cell);
5290 ++new_num_results_per_bnd_circle;
5293 num_results_per_bnd_circle[i] = new_num_results_per_bnd_circle;
5296 *cells = new_local_cells;
5300 struct dist_grid_pair * grid_pair,
int search_grid_id,
int result_grid_id,
5301 size_t * search_cells,
size_t count,
size_t ** result_cells,
5302 size_t * num_results_per_search_cell,
struct