498 int ids_available_local =
500 int ids_available_global;
502 &ids_available_local, &ids_available_global, 1,
503 MPI_INT, MPI_MAX, comm), comm);
508 ids_available_local || !ids_available_global,
509 "ERROR(generate_vertex_ids): inconsistent global ids")
513 if (ids_available_global)
return vertex_ranks;
518 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
520 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
523 sendcounts[vertex_ranks[i]]++;
526 1, sendcounts, recvcounts, sdispls, rdispls, comm);
529 size_t dist_vertex_count =
530 recvcounts[comm_size - 1] + rdispls[comm_size - 1];
533 xmalloc((orig_vertex_count + dist_vertex_count) *
534 sizeof(*id_reorder_coord_buffer));
536 id_reorder_coord_send_buffer = id_reorder_coord_buffer;
538 id_reorder_coord_recv_buffer =
539 id_reorder_coord_buffer + orig_vertex_count;
542 for (
size_t i = 0; i < orig_vertex_count; ++i) {
543 size_t pos = sdispls[vertex_ranks[i] + 1]++;
545 for (
int j = 0; j < 3; ++j)
546 id_reorder_coord_send_buffer[pos].
coord[j] =
550 MPI_Datatype id_reorder_coord_coord_dt =
555 id_reorder_coord_send_buffer, sendcounts, sdispls,
556 id_reorder_coord_recv_buffer, recvcounts, rdispls,
557 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_coord_dt, comm);
559 yac_mpi_call(MPI_Type_free(&id_reorder_coord_coord_dt), comm);
561 for (
size_t i = 0; i < dist_vertex_count; ++i)
566 id_reorder_coord_recv_buffer, dist_vertex_count,
569 size_t unique_count = dist_vertex_count > 0;
573 for (
size_t i = 0; i < dist_vertex_count; ++i) {
583 unique_count <= (
size_t)XT_INT_MAX,
584 "ERROR(generate_vertex_ids): global_id out of bounds")
592 MPI_SUM, comm), comm);
595 if (comm_rank == 0) id_offset = 0;
598 ((
size_t)id_offset + unique_count) <= (
size_t)XT_INT_MAX,
599 "ERROR(generate_vertex_ids): global_id out of bounds")
602 for (
size_t i = 0; i < dist_vertex_count; ++i)
603 id_reorder_coord_recv_buffer[i].
global_id += id_offset;
606 qsort(id_reorder_coord_recv_buffer, dist_vertex_count,
607 sizeof(*id_reorder_coord_recv_buffer),
610 MPI_Datatype id_reorder_coord_id_dt =
615 id_reorder_coord_recv_buffer, recvcounts, rdispls,
616 id_reorder_coord_send_buffer, sendcounts, sdispls,
617 sizeof(*id_reorder_coord_send_buffer), id_reorder_coord_id_dt, comm);
619 yac_mpi_call(MPI_Type_free(&id_reorder_coord_id_dt), comm);
627 vertex_ids[id_reorder_coord_send_buffer[i].
reorder_idx] =
628 id_reorder_coord_send_buffer[i].
global_id;
630 free(id_reorder_coord_buffer);
639 int max_num_vertices_per_cell, MPI_Comm comm) {
648 int comm_rank, comm_size;
654 int ids_available_local[2], ids_available_global[2];
655 ids_available_local[0] =
657 ids_available_local[1] =
659 yac_mpi_call(MPI_Allreduce(ids_available_local, ids_available_global, 2,
660 MPI_INT, MPI_MAX, comm), comm);
664 (ids_available_local[0] == ids_available_global[0]),
665 "ERROR(generate_ce_ids): inconsistent global ids")
669 (ids_available_local[1] == ids_available_global[1]),
670 "ERROR(generate_ce_ids): inconsistent global ids")
673 if (ids_available_global[0] && ids_available_global[1])
return;
677 (((ids_available_global[0])?0:(
num_cells)) +
678 ((ids_available_global[1])?0:(
num_edges))) *
679 sizeof(*rank_buffer));
680 int * cell_ranks = rank_buffer;
682 rank_buffer + ((ids_available_global[0])?0:(
num_cells));
684 size_t * size_t_buffer =
685 xmalloc((8 * (
size_t)comm_size + 1) *
sizeof(*size_t_buffer));
686 size_t * sendcounts = size_t_buffer + 0 * comm_size;
687 size_t * recvcounts = size_t_buffer + 2 * comm_size;
688 size_t * total_sendcounts = size_t_buffer + 4 * comm_size;
689 size_t * total_recvcounts = size_t_buffer + 5 * comm_size;
690 size_t * total_sdispls = size_t_buffer + 6 * comm_size;
691 size_t * total_rdispls = size_t_buffer + 7 * comm_size + 1;
692 memset(sendcounts, 0, 2 * (
size_t)comm_size *
sizeof(*sendcounts));
694 yac_int * cell_to_vertex_ids = NULL;
696 if (!ids_available_global[0]) {
704 sizeof(*cell_to_vertex_ids));
709 yac_int * curr_cell_to_vertex_ids =
710 cell_to_vertex_ids + i * max_num_vertices_per_cell;
713 if (curr_num_vertices > 0) {
714 size_t * curr_cell_vertices =
716 size_t min_vertex = curr_cell_vertices[0];
717 curr_cell_to_vertex_ids[0] =
vertex_ids[min_vertex];
718 for (
int j = 1; j < curr_num_vertices; ++j) {
719 size_t curr_vertex_idx = curr_cell_vertices[j];
722 if (curr_cell_to_vertex_ids[0] == curr_vertex_id)
723 min_vertex = curr_vertex_idx;
725 cell_rank = vertex_ranks[min_vertex];
729 for (
int j = curr_num_vertices; j < max_num_vertices_per_cell; ++j)
730 curr_cell_to_vertex_ids[j] = XT_INT_MAX;
732 sendcounts[2 * ((cell_ranks[i] = cell_rank)) + 0]++;
736 yac_int * edge_to_vertex_ids = NULL;
738 if (!ids_available_global[1]) {
747 yac_int * curr_edge_vertex_ids = edge_to_vertex_ids + 2 * i;
748 curr_edge_vertex_ids[0] =
vertex_ids[curr_edge_to_vertex[0]];
749 curr_edge_vertex_ids[1] =
vertex_ids[curr_edge_to_vertex[1]];
751 if (curr_edge_vertex_ids[0] > curr_edge_vertex_ids[1]) {
752 yac_int temp = curr_edge_vertex_ids[0];
753 curr_edge_vertex_ids[0] = curr_edge_vertex_ids[1];
754 curr_edge_vertex_ids[1] = temp;
756 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[1]])) + 1]++;
759 2 * ((edge_ranks[i] = vertex_ranks[curr_edge_to_vertex[0]])) + 1]++;
768 total_sdispls[0] = 0;
769 size_t recv_counts[2] = {0,0};
770 size_t saccu = 0, raccu = 0;
771 for (
int i = 0; i < comm_size; ++i) {
772 total_sdispls[i+1] = saccu;
773 total_rdispls[i] = raccu;
774 recv_counts[0] += recvcounts[2 * i + 0];
775 recv_counts[1] += recvcounts[2 * i + 1];
776 total_sendcounts[i] = sendcounts[2 * i + 0] *
777 (size_t)max_num_vertices_per_cell +
778 sendcounts[2 * i + 1] * 2;
779 total_recvcounts[i] = recvcounts[2 * i + 0] *
780 (size_t)max_num_vertices_per_cell +
781 recvcounts[2 * i + 1] * 2;
782 saccu += total_sendcounts[i];
783 raccu += total_recvcounts[i];
785 size_t local_data_count = total_sendcounts[comm_size - 1] +
786 total_sdispls[comm_size];
787 size_t recv_count = total_recvcounts[comm_size - 1] +
788 total_rdispls[comm_size - 1];
791 xcalloc((local_data_count + recv_count),
sizeof(*yac_int_buffer));
792 yac_int * send_buffer = yac_int_buffer;
793 yac_int * recv_buffer = yac_int_buffer + local_data_count;
796 if (!ids_available_global[0])
798 for (
int j = 0; j < max_num_vertices_per_cell; ++j)
799 send_buffer[total_sdispls[cell_ranks[i] + 1]++] =
800 cell_to_vertex_ids[i * max_num_vertices_per_cell + j];
801 if (!ids_available_global[1])
803 for (
int j = 0; j < 2; ++j)
804 send_buffer[total_sdispls[edge_ranks[i] + 1]++] =
805 edge_to_vertex_ids[2 * i + j];
807 free(edge_to_vertex_ids);
808 free(cell_to_vertex_ids);
811 yac_alltoallv_yac_int_p2p(
812 send_buffer, total_sendcounts, total_sdispls,
813 recv_buffer, total_recvcounts, total_rdispls, comm);
816 xmalloc((recv_counts[0] + recv_counts[1]) *
sizeof(*n_ids_reorder_buffer));
818 {n_ids_reorder_buffer, n_ids_reorder_buffer + recv_counts[0]};
821 int index_counts[2] = {max_num_vertices_per_cell, 2};
825 for (
int i = 0; i < comm_size; ++i) {
826 for (
int j = 0; j < 2; ++j) {
827 size_t curr_count = recvcounts[2 * i + j];
828 for (
size_t k = 0; k < curr_count;
833 offset += index_counts[j];
838 for (
int i = 0; i < 2; ++i) {
840 if (ids_available_global[i])
continue;
845 size_t unique_count = recv_counts[i] > 0;
849 for (
size_t j = 0; j < recv_counts[i]; ++j, ++curr) {
858 unique_count <= (
size_t)XT_INT_MAX,
859 "ERROR(generate_global_ce_ids): global_id out of bounds")
866 MPI_SUM, comm), comm);
867 if (comm_rank == 0) id_offset = 0;
870 ((
size_t)id_offset + unique_count) <= (
size_t)XT_INT_MAX,
871 "ERROR(generate_global_ce_ids): global_id out of bounds")
874 for (
size_t j = 0; j < recv_counts[i]; ++j)
877 free(yac_int_buffer);
879 qsort(n_ids_reorder_buffer, recv_counts[0] + recv_counts[1],
883 xmalloc((recv_counts[0] + recv_counts[1] +
884 ((ids_available_global[0])?0:(num_cells)) +
885 ((ids_available_global[1])?0:(num_edges))) *
886 sizeof(*global_ids_buffer));
887 yac_int * send_global_ids = global_ids_buffer;
889 global_ids_buffer + recv_counts[0] + recv_counts[1];
891 for (
size_t i = 0; i < recv_counts[0] + recv_counts[1]; ++i)
892 send_global_ids[i] = n_ids_reorder_buffer[i].
global_id;
893 free(n_ids_reorder_buffer);
896 saccu = 0, raccu = 0;
897 for (
int i = 0; i < comm_size; ++i) {
898 total_sdispls[i] = saccu;
899 total_rdispls[i] = raccu;
901 ((total_sendcounts[i] = recvcounts[2 * i + 0] + recvcounts[2 * i + 1]));
903 ((total_recvcounts[i] = sendcounts[2 * i + 0] + sendcounts[2 * i + 1]));
907 yac_alltoallv_yac_int_p2p(
908 send_global_ids, total_sendcounts, total_sdispls,
909 recv_global_ids, total_recvcounts, total_rdispls, comm);
911 if ((!ids_available_global[0]) && (num_cells > 0))
914 if ((!ids_available_global[1]) && (num_edges > 0))
919 if (!ids_available_global[0])
920 for (
size_t i = 0; i < num_cells; ++i)
922 recv_global_ids[total_rdispls[cell_ranks[i]]++];
923 if (!ids_available_global[1])
924 for (
size_t i = 0; i < num_edges; ++i)
926 recv_global_ids[total_rdispls[edge_ranks[i]]++];
930 free(global_ids_buffer);
1341 int max_num_vertices_per_cell, MPI_Comm comm,
1343 size_t * dist_count) {
1352 int * num_ranks_buffer =
1355 int * num_cell_ranks = num_ranks_buffer;
1356 int * num_vertex_ranks = num_ranks_buffer +
num_cells;
1358 size_t * dist_rank_offsets =
1360 size_t * dist_cell_rank_offsets = dist_rank_offsets;
1361 size_t * dist_edge_rank_offsets = dist_rank_offsets +
num_cells;
1371 proc_sphere_part, grid_data, comm,
1372 &rank_buffer, num_cell_ranks, dist_cell_rank_offsets, max_num_vertices_per_cell);
1374 size_t rank_buffer_array_size = dist_cell_rank_offsets[
num_cells];
1382 proc_sphere_part, grid, comm, dist_cell_rank_offsets, dist_edge_rank_offsets,
1383 num_cell_ranks, num_edge_ranks, &rank_buffer, &rank_buffer_array_size);
1391 vertex_ranks, grid, comm, dist_edge_rank_offsets,
1392 num_edge_ranks, num_vertex_ranks, &rank_buffer, &rank_buffer_array_size);
1394 int * dist_cell_ranks = rank_buffer;
1395 int * dist_vertex_ranks = rank_buffer + dist_edge_rank_offsets[
num_edges];
1396 int * dist_edge_ranks = rank_buffer + dist_cell_rank_offsets[
num_cells];
1398 free(dist_rank_offsets);
1408 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1410 3, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1411 size_t * size_t_buffer =
1412 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
1413 size_t * total_sendcounts = size_t_buffer + 0 * comm_size;
1414 size_t * total_recvcounts = size_t_buffer + 1 * comm_size;
1415 size_t * total_sdispls = size_t_buffer + 2 * comm_size;
1416 size_t * total_rdispls = size_t_buffer + 3 * comm_size;
1424 {{.count = num_cells,
1425 .ranks = dist_cell_ranks,
1426 .num_ranks = num_cell_ranks,
1428 {.count = num_vertices,
1429 .ranks = dist_vertex_ranks,
1430 .num_ranks = num_vertex_ranks,
1432 {.count = num_edges,
1433 .ranks = dist_edge_ranks,
1434 .num_ranks = num_edge_ranks,
1439 for (
int location = 0; location < 3; ++location) {
1440 size_t count = cve_data[location].count;
1441 int * ranks = cve_data[location].ranks;
1442 int * num_ranks = cve_data[location].num_ranks;
1443 for (
size_t i = 0, k = 0; i < count; ++i) {
1444 int curr_num_ranks = num_ranks[i];
1445 for (
int j = 0; j < curr_num_ranks; ++j, ++k)
1446 sendcounts[3 * ranks[k] + location]++;
1451 3, sendcounts, recvcounts, sdispls, rdispls, comm);
1453 size_t receive_counts[3] = {0,0,0};
1454 size_t saccu = 0, raccu = 0;
1455 for (
int i = 0; i < comm_size; ++i) {
1456 total_sdispls[i] = saccu;
1457 total_rdispls[i] = raccu;
1458 total_sendcounts[i] = 0;
1459 total_recvcounts[i] = 0;
1460 for (
int location = 0; location < 3; ++location) {
1461 total_sendcounts[i] += sendcounts[3 * i + location];
1462 total_recvcounts[i] += recvcounts[3 * i + location];
1463 receive_counts[location] += recvcounts[3 * i + location];
1465 saccu += total_sendcounts[i];
1466 raccu += total_recvcounts[i];
1468 size_t local_data_count = total_sendcounts[comm_size - 1] +
1469 total_sdispls[comm_size - 1];
1470 size_t recv_count = total_recvcounts[comm_size - 1] +
1471 total_rdispls[comm_size - 1];
1473 struct id_pos * id_pos_buffer =
1474 xcalloc((local_data_count + recv_count),
sizeof(*id_pos_buffer));
1475 struct id_pos * id_pos_send_buffer = id_pos_buffer;
1476 struct id_pos * id_pos_recv_buffer =
1477 id_pos_buffer + local_data_count;
1480 for (
int location = 0; location < 3; ++location) {
1481 size_t count = cve_data[location].count;
1482 int * ranks = cve_data[location].ranks;
1483 int * num_ranks = cve_data[location].num_ranks;
1484 yac_int * ids = cve_data[location].ids;
1485 for (
size_t i = 0, k = 0; i < count; ++i) {
1486 int curr_num_ranks = num_ranks[i];
1488 for (
int j = 0; j < curr_num_ranks; ++j, ++k) {
1489 size_t pos = sdispls[3 * ranks[k] + location + 1]++;
1491 id_pos_send_buffer[pos].
orig_pos = i;
1495 free(num_ranks_buffer);
1502 id_pos_send_buffer, total_sendcounts, total_sdispls,
1503 id_pos_recv_buffer, total_recvcounts, total_rdispls,
1504 sizeof(*id_pos_send_buffer), id_pos_dt, comm);
1508 size_t dist_owner_counts[3] = {0, 0, 0};
1509 for (
int i = 0; i < comm_size; ++i)
1510 for (
int location = 0; location < 3; ++location)
1511 dist_owner_counts[location] += recvcounts[3 * i + location];
1512 size_t max_dist_owner_count =
1513 MAX(
MAX(dist_owner_counts[0], dist_owner_counts[1]), dist_owner_counts[2]);
1515 xcalloc(max_dist_owner_count,
sizeof(*temp_buffer));
1520 for (
int location = 0; location < 3; ++location) {
1523 for (
int i = 0; i < comm_size; ++i) {
1524 size_t curr_recvcount = recvcounts[3 * i + location];
1525 struct id_pos * curr_id_pos =
1526 id_pos_recv_buffer + rdispls[3 * i + location];
1527 for (
size_t k = 0; k < curr_recvcount; ++k, ++count) {
1535 qsort(temp_buffer, count,
sizeof(*temp_buffer),
1538 unique_ids =
xrealloc(unique_ids, count *
sizeof(*unique_ids));
1539 size_t num_unique_ids = 0;
1543 for (
size_t i = 0; i < count; ++i) {
1546 if (curr_id != prev_id) {
1548 unique_ids[num_unique_ids].
global_id = curr_id;
1549 unique_ids[num_unique_ids].
data.
count = 1;
1552 unique_ids[num_unique_ids-1].
data.
count++;
1557 ((dist_point_infos[location] =
1558 xmalloc(num_unique_ids *
sizeof(*(dist_point_infos[location])))));
1560 ((dist_global_ids[location] =
1561 xmalloc(num_unique_ids *
sizeof(*(dist_global_ids[location])))));
1562 dist_count[location] = num_unique_ids;
1567 for (
size_t i = 0, l = 0; i < num_unique_ids; ++i) {
1568 global_ids[i] = unique_ids[i].
global_id;
1569 int curr_count = unique_ids[i].
data.
count;
1570 point_infos[i].
count = curr_count;
1571 if (curr_count == 1) {
1577 (
size_t)curr_count *
1579 for (
int k = 0; k < curr_count; ++k, ++l) {
1587 free(id_pos_buffer);
1589 free(size_t_buffer);
1609 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1611 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1613 for (
size_t i = 0; i <
count; ++i) {
1615 (point_infos[i].
count > 1)?
1616 (point_infos[i].data.
multi):
1618 sendcounts[curr_info->
rank]++;
1622 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1623 size_t num_src_msg = 0, num_dst_msg = 0;
1624 for (
int i = 0; i < comm_size; ++i) {
1625 num_src_msg += (recvcounts[i] > 0);
1626 num_dst_msg += (sendcounts[i] > 0);
1630 rdispls[comm_size-1] + recvcounts[comm_size-1];
1633 xmalloc((recv_count + 2 * count) *
sizeof(*pos_buffer));
1634 int * src_pos_buffer = pos_buffer;
1635 int * dst_pos_buffer = pos_buffer + recv_count;
1636 int * send_pos_buffer = pos_buffer + recv_count + count;
1639 for (
size_t i = 0; i < count; ++i) {
1641 (point_infos[i].
count > 1)?
1642 (point_infos[i].data.
multi):
1644 size_t pos = sdispls[curr_info->
rank+1]++;
1645 dst_pos_buffer[pos] = i;
1646 send_pos_buffer[pos] = (int)(curr_info->
orig_pos);
1650 yac_alltoallv_int_p2p(
1651 send_pos_buffer, sendcounts, sdispls,
1652 src_pos_buffer, recvcounts, rdispls, comm);
1654 struct Xt_com_pos * com_pos =
1655 xmalloc(((
size_t)num_src_msg + (
size_t)num_dst_msg) *
sizeof(*com_pos));
1656 struct Xt_com_pos * src_com = com_pos;
1657 struct Xt_com_pos * dst_com = com_pos + num_src_msg;
1662 for (
int i = 0; i < comm_size; ++i) {
1663 if (recvcounts[i] > 0) {
1664 src_com[num_src_msg].transfer_pos = src_pos_buffer;
1665 src_com[num_src_msg].num_transfer_pos = recvcounts[i];
1666 src_com[num_src_msg].rank = i;
1667 src_pos_buffer += recvcounts[i];
1670 if (sendcounts[i] > 0) {
1671 dst_com[num_dst_msg].transfer_pos = dst_pos_buffer;
1672 dst_com[num_dst_msg].num_transfer_pos = sendcounts[i];
1673 dst_com[num_dst_msg].rank = i;
1674 dst_pos_buffer += sendcounts[i];
1681 xt_xmap_intersection_pos_new(
1682 num_src_msg, src_com, num_dst_msg, dst_com, comm);
1718 Xt_redist redist_mask, Xt_redist redist_coords, MPI_Comm comm) {
1722 uint64_t counts[2], max_counts[2];
1723 if (orig_field_data != NULL) {
1732 counts, max_counts, 2, MPI_UINT64_T, MPI_MAX, comm), comm);
1734 (orig_field_data == NULL) ||
1735 ((counts[0] == max_counts[0]) && (counts[1] == max_counts[1])),
1736 "ERROR(field_data_init): inconsistent number of masks or coordinates")
1738 int * data_available_flag =
1739 xcalloc(2 * max_counts[0] + max_counts[1],
sizeof(*data_available_flag));
1741 for (
size_t i = 0; i < counts[0]; ++i) {
1742 data_available_flag[i] =
1744 data_available_flag[i + counts[0]] =
1748 for (
size_t i = 0; i < counts[1]; ++i)
1749 data_available_flag[i + 2 * counts[0]] =
1754 MPI_IN_PLACE, data_available_flag,
1755 (
int)(2 * max_counts[0] + max_counts[1]), MPI_INT, MPI_MAX, comm), comm);
1757 for (
size_t i = 0; i < counts[0]; ++i) {
1759 data_available_flag[i] ==
1761 "ERROR(field_data_init): inconsistent availability of masks")
1766 data_available_flag[i + counts[0]] ==
1768 "ERROR(field_data_init): inconsistent mask names")
1771 for (
size_t i = 0; i < counts[1]; ++i)
1773 data_available_flag[i + 2 * counts[0]] ==
1775 "ERROR(field_data_init): inconsistent availability of coordinates")
1777 for (uint64_t i = 0; i < max_counts[0]; ++i) {
1778 int * dist_mask = NULL;
1779 if (data_available_flag[i]) {
1780 dist_mask =
xmalloc(dist_size *
sizeof(*dist_mask));
1781 int const * orig_mask =
1782 (orig_field_data != NULL)?
1784 xt_redist_s_exchange1(redist_mask, orig_mask, dist_mask);
1787 int mask_name_len = data_available_flag[i + max_counts[0]];
1788 char * mask_name = NULL;
1789 if (mask_name_len > 0) {
1790 mask_name =
xmalloc((
size_t)mask_name_len *
sizeof(*mask_name));
1791 if ((orig_field_data != NULL) &&
1795 (
size_t)mask_name_len);
1797 memset(mask_name, 0, (
size_t)mask_name_len *
sizeof(*mask_name));
1800 MPI_IN_PLACE, mask_name, mask_name_len, MPI_CHAR, MPI_MAX, comm),
1803 (orig_field_data == NULL) ||
1807 (
size_t)mask_name_len *
sizeof(*mask_name)),
1808 "ERROR(field_data_init): inconsistent mask names")
1813 for (uint64_t i = 0; i < max_counts[1]; ++i) {
1815 if (data_available_flag[i + 2 * max_counts[0]]) {
1816 dist_coordinates =
xmalloc(dist_size *
sizeof(*dist_coordinates));
1818 (orig_field_data != NULL)?
1820 xt_redist_s_exchange1(redist_coords, orig_coordinates, dist_coordinates);
1825 free(data_available_flag);
1827 return dist_field_data;
1874 MPI_Datatype dt_coord,
size_t num_vertices,
yac_int * sorted_vertex_ids,
1875 size_t * sorted_vertex_reorder_idx,
1885 MPI_Datatype dt_2yac_int;
1888 Xt_redist redist_edge_int = xt_redist_p2p_new(xmap, MPI_INT);
1889 Xt_redist redist_edge_coords = xt_redist_p2p_new(xmap, dt_coord);
1890 Xt_redist redist_edge_2yac_int = xt_redist_p2p_new(xmap, dt_2yac_int);
1892 xt_xmap_delete(xmap);
1896 int * temp_edge_type_src, * temp_edge_type_dst;
1897 if (
sizeof(*
edge_type) ==
sizeof(
int)) {
1898 temp_edge_type_src = (
int*)(grid_data->
edge_type);
1901 temp_edge_type_src =
1903 for (
size_t i = 0; i < grid_data->
num_edges; ++i)
1904 temp_edge_type_src[i] = (
int)(grid_data->
edge_type[i]);
1907 temp_edge_type_dst[i] = (
int)(
edge_type[i]);
1910 xt_redist_s_exchange1(
1911 redist_edge_int, temp_edge_type_src, temp_edge_type_dst);
1913 if (
sizeof(*
edge_type) !=
sizeof(
int)) {
1918 free(temp_edge_type_src);
1919 free(temp_edge_type_dst);
1929 yac_int * grid_edge_vertex_ids = vertex_id_buffer;
1932 size_t * grid_edge_to_vertex = &(grid_data->
edge_to_vertex[0][0]);
1934 for (
size_t i = 0; i < 2 * grid_data->
num_edges; ++i)
1935 grid_edge_vertex_ids[i] =
1936 grid_data->
vertex_ids[grid_edge_to_vertex[i]];
1938 xt_redist_s_exchange1(
1939 redist_edge_2yac_int, grid_edge_vertex_ids, edge_vertex_ids);
1942 "redistribute_edge_data", edge_vertex_ids,
1944 sorted_vertex_ids, sorted_vertex_reorder_idx,
num_vertices);
1946 free(vertex_id_buffer);
1953 redist_edge_int, redist_edge_coords, comm);
1955 xt_redist_delete(redist_edge_2yac_int);
1956 xt_redist_delete(redist_edge_coords);
1957 xt_redist_delete(redist_edge_int);
1959 *edge_to_vertex_ = edge_to_vertex;
1960 *edge_type_ = edge_type;
1961 *edge_field_data_ = edge_field_data;
1967 MPI_Datatype dt_coord,
1968 size_t num_edges,
yac_int * sorted_edge_ids,
1969 size_t * sorted_edge_reorder_idx,
1970 size_t num_vertices,
yac_int * sorted_vertex_ids,
1971 size_t * sorted_vertex_reorder_idx,
int max_num_vertices_per_cell,
1972 size_t ** cell_to_vertex_,
size_t ** cell_to_edge_,
1973 int ** num_vertices_per_cell_,
struct yac_field_data ** cell_field_data_) {
1981 MPI_Datatype dt_yac_ints;
1983 MPI_Type_contiguous(
1984 max_num_vertices_per_cell,
yac_int_dt, &dt_yac_ints), comm);
1986 Xt_redist redist_cell_int = xt_redist_p2p_new(xmap, MPI_INT);
1987 Xt_redist redist_cell_yac_ints = xt_redist_p2p_new(xmap, dt_yac_ints);
1988 Xt_redist redist_cell_coords = xt_redist_p2p_new(xmap, dt_coord);
1990 xt_xmap_delete(xmap);
1998 (
size_t)max_num_vertices_per_cell *
2001 id_buffer + (size_t)max_num_vertices_per_cell *
num_cells;
2002 size_t total_num_ids = 0;
2004 size_t grid_num_cells = grid_data->
num_cells;
2010 yac_int * sorted_ve_ids = sorted_vertex_ids;
2011 size_t * sorted_ve_reorder_idx = sorted_vertex_reorder_idx;
2015 for (
int location = 0; location < 2; ++location) {
2018 for (
size_t i = 0, k = 0; i < grid_num_cells; ++i) {
2019 int curr_num_ve_per_cell = grid_num_ve_per_cell[i];
2020 size_t * curr_cell_to_ve = grid_cell_to_ve + grid_cell_to_ve_offests[i];
2021 for (
int j = 0; j < curr_num_ve_per_cell; ++j, ++k)
2022 grid_id_buffer[k] = grid_ids[curr_cell_to_ve[j]];
2023 for (
int j = curr_num_ve_per_cell; j < max_num_vertices_per_cell; ++j, ++k)
2024 grid_id_buffer[k] = XT_INT_MAX;
2028 xt_redist_s_exchange1(
2029 redist_cell_yac_ints, grid_id_buffer, id_buffer);
2035 for (
size_t i = 0; i <
num_cells; ++i) {
2038 id_buffer + i * (size_t)max_num_vertices_per_cell;
2039 for (vertex_count = 0; vertex_count < max_num_vertices_per_cell;
2041 if (
vertex_ids[vertex_count] == XT_INT_MAX)
break;
2042 compact_flag |= vertex_count != max_num_vertices_per_cell;
2044 total_num_ids += (size_t)vertex_count;
2050 for (
size_t i = 0, j = 0; j < total_num_ids; ++i) {
2051 yac_int curr_id = id_buffer[i];
2052 if (curr_id != XT_INT_MAX) {
2053 id_buffer[j] = curr_id;
2060 size_t * cell_to_ve =
2061 ((*cell_to_ve_ =
xmalloc(total_num_ids *
sizeof(*cell_to_ve))));
2063 "redistribute_cell_data", id_buffer, cell_to_ve, total_num_ids,
2064 sorted_ve_ids, sorted_ve_reorder_idx, num_ve);
2071 sorted_ve_ids = sorted_edge_ids;
2072 sorted_ve_reorder_idx = sorted_edge_reorder_idx;
2083 redist_cell_int, redist_cell_coords, comm);
2085 xt_redist_delete(redist_cell_coords);
2086 xt_redist_delete(redist_cell_yac_ints);
2087 xt_redist_delete(redist_cell_int);
2089 *cell_to_vertex_ = cell_to_vertex;
2090 *cell_to_edge_ = cell_to_edge;
2091 *num_vertices_per_cell_ = num_vertices_per_cell;
2092 *cell_field_data_ = cell_field_data;
2183 int max_num_vertices_per_cell =
2197 proc_sphere_part, grid, &vertex_ranks, max_num_vertices_per_cell, comm);
2204 size_t dist_count[3];
2206 proc_sphere_part, grid, vertex_ranks, max_num_vertices_per_cell, comm,
2207 dist_point_infos, dist_global_ids, dist_count);
2217 yac_int * sorted_cell_ids, * sorted_vertex_ids, * sorted_edge_ids;
2218 size_t * sorted_cell_reorder_idx, * sorted_vertex_reorder_idx,
2219 * sorted_edge_reorder_idx;
2221 cell_ids, num_cells, &sorted_cell_ids, &sorted_cell_reorder_idx);
2223 vertex_ids, num_vertices, &sorted_vertex_ids,
2224 &sorted_vertex_reorder_idx);
2226 edge_ids, num_edges, &sorted_edge_ids, &sorted_edge_reorder_idx);
2237 comm, dt_coord, vertex_ranks,
2238 &vertex_coordinates, &vertex_owner, &vertex_field_data);
2247 grid, dist_point_infos[
YAC_LOC_EDGE], num_edges, comm, dt_coord,
2248 num_vertices, sorted_vertex_ids, sorted_vertex_reorder_idx,
2249 &edge_to_vertex, &edge_type, &edge_field_data);
2253 size_t * cell_to_vertex;
2254 size_t * cell_to_edge;
2255 int * num_vertices_per_cell;
2258 grid, dist_point_infos[
YAC_LOC_CELL], num_cells, comm, dt_coord,
2259 num_edges, sorted_edge_ids, sorted_edge_reorder_idx,
2260 num_vertices, sorted_vertex_ids, sorted_vertex_reorder_idx,
2261 max_num_vertices_per_cell, &cell_to_vertex, &cell_to_edge,
2262 &num_vertices_per_cell, &cell_field_data);
2267 size_t * cell_to_vertex_offsets =
2268 xmalloc(num_cells *
sizeof(*cell_to_vertex_offsets));
2269 size_t * cell_to_edge_offsets = cell_to_vertex_offsets;
2270 for (
size_t i = 0, accu = 0; i < num_cells; ++i) {
2271 cell_to_vertex_offsets[i] = accu;
2272 accu += (size_t)(num_vertices_per_cell[i]);
2278 .ids = {cell_ids, vertex_ids, edge_ids},
2279 .total_count = {num_cells, num_vertices, num_edges},
2280 .count = {num_cells, num_vertices, num_edges},
2293 .owner_mask = {NULL, NULL, NULL},
2294 .sorted_ids = {sorted_cell_ids, sorted_vertex_ids, sorted_edge_ids},
2295 .sorted_reorder_idx =
2296 {sorted_cell_reorder_idx, sorted_vertex_reorder_idx,
2297 sorted_edge_reorder_idx},
2300 cell_field_data, vertex_field_data, edge_field_data),
3264 size_t count,
size_t * idx,
3267 if (count == 0)
return;
3270 qsort(vertices, count,
sizeof(*vertices),
3275 size_t * sorted_vertex_reorder_idx =
3279 size_t prev_idx = 0;
3280 size_t add_count = 0;
3284 for (
size_t i = 0, j = 0; i < count; ++i) {
3287 size_t curr_reorder_idx = vertices[i].
reorder_idx;
3290 if (prev_global_id == curr_global_id) {
3291 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3295 prev_global_id = curr_global_id;
3299 while ((j < num_total_vertices) && (sorted_vertex_ids[j] < curr_global_id))
3303 if ((j < num_total_vertices) && (sorted_vertex_ids[j] == curr_global_id)) {
3305 if (idx != NULL) idx[curr_reorder_idx] = sorted_vertex_reorder_idx[j];
3306 prev_idx = sorted_vertex_reorder_idx[j];
3312 if (idx != NULL) idx[curr_reorder_idx] = num_total_vertices + add_count;
3313 prev_idx = num_total_vertices + add_count;
3314 if (add_count != i) vertices[add_count] = vertices[i];
3319 size_t new_num_total_vertices = num_total_vertices + add_count;
3322 sizeof(*vertex_coordinates));
3325 new_num_total_vertices *
sizeof(*vertex_ids));
3326 int * vertex_owner_mask =
3328 sizeof(*vertex_owner_mask));
3331 sizeof(*vertex_owners));
3334 sorted_vertex_ids, new_num_total_vertices *
sizeof(*sorted_vertex_ids));
3335 sorted_vertex_reorder_idx =
3337 sorted_vertex_reorder_idx, new_num_total_vertices *
3338 sizeof(*sorted_vertex_reorder_idx));
3341 for (
size_t i = 0, j = num_total_vertices; i < add_count; ++i, ++j) {
3343 vertex_coordinates[j][0] = vertices[i].
coord[0];
3344 vertex_coordinates[j][1] = vertices[i].
coord[1];
3345 vertex_coordinates[j][2] = vertices[i].
coord[2];
3347 vertex_owner_mask[j] = 0;
3348 vertex_owners[j] = vertices[i].
owners;
3349 sorted_vertex_ids[j] = vertices[i].
global_id;
3350 sorted_vertex_reorder_idx[j] = j;
3355 temp_vertex_field_data, vertices,
sizeof(*vertices),
3356 num_total_vertices, new_num_total_vertices);
3358 sorted_vertex_ids, new_num_total_vertices, sorted_vertex_reorder_idx);
3380 size_t count,
size_t * idx,
struct temp_field_data temp_edge_field_data) {
3382 if (count == 0)
return;
3391 size_t prev_idx = 0;
3392 size_t add_count = 0;
3396 for (
size_t i = 0, j = 0; i < count; ++i) {
3402 if (prev_global_id == curr_global_id) {
3403 if (idx != NULL) idx[curr_reorder_idx] = prev_idx;
3407 prev_global_id = curr_global_id;
3411 while ((j < num_total_edges) && (sorted_edge_ids[j] < curr_global_id)) ++j;
3414 if ((j < num_total_edges) && (sorted_edge_ids[j] == curr_global_id)) {
3416 if (idx != NULL) idx[curr_reorder_idx] = sorted_edge_reorder_idx[j];
3417 prev_idx = sorted_edge_reorder_idx[j];
3423 if (idx != NULL) idx[curr_reorder_idx] = num_total_edges + add_count;
3424 prev_idx = num_total_edges + add_count;
3425 if (add_count != i) edges[add_count] = edges[i];
3430 size_t new_num_total_edges = num_total_edges + add_count;
3433 new_num_total_edges *
sizeof(*edge_ids));
3436 new_num_total_edges *
sizeof(*
edge_type));
3442 new_num_total_edges *
sizeof(*edge_owners));
3443 int * edge_owner_mask =
3445 sizeof(*edge_owner_mask));
3448 sorted_edge_ids, new_num_total_edges *
sizeof(*sorted_edge_ids));
3449 sorted_edge_reorder_idx =
3451 sorted_edge_reorder_idx, new_num_total_edges *
3452 sizeof(*sorted_edge_reorder_idx));
3454 yac_int * vertex_ids =
xmalloc(2 * add_count *
sizeof(*vertex_ids));
3455 size_t * reorder =
xmalloc(2 * add_count *
sizeof(*reorder));
3458 for (
size_t i = 0, j = num_total_edges; i < add_count; ++i, ++j) {
3462 edge_owner_mask[j] = 0;
3463 edge_owners[j] = edges[i].
owners;
3464 sorted_edge_ids[j] = edges[i].
global_id;
3465 sorted_edge_reorder_idx[j] = j;
3469 reorder[2 * i + 0] = 2 * num_total_edges + 2 * i + 0;
3470 reorder[2 * i + 1] = 2 * num_total_edges + 2 * i + 1;
3475 temp_edge_field_data, edges,
sizeof(*edges),
3476 num_total_edges, new_num_total_edges);
3478 sorted_edge_ids, new_num_total_edges, sorted_edge_reorder_idx);
3483 size_t * sorted_vertex_reorder_idx =
3486 size_t * edge_to_vertex_ = (
size_t*)&(edge_to_vertex[0][0]);
3488 for (
size_t i = 0, j = 0; i < 2 * add_count; ++i) {
3489 yac_int curr_id = vertex_ids[i];
3490 while ((j < total_num_vertices) && (sorted_vertex_ids[j] < curr_id)) ++j;
3492 (j < total_num_vertices) && (sorted_vertex_ids[j] == curr_id),
3493 "ERROR(yac_dist_grid_add_edges): vertex id not found")
3494 edge_to_vertex_[reorder[i]] = sorted_vertex_reorder_idx[j];
3513 int * num_vertices_per_cell,
struct bounding_circle * cell_bnd_circles,
3514 size_t count,
size_t * cell_to_vertex,
size_t * cell_to_edge,
3518 if (
count == 0)
return;
3520 size_t * reorder_idx =
xmalloc(
count *
sizeof(reorder_idx));
3521 for (
size_t i = 0; i <
count; ++i) reorder_idx[i] = i;
3524 for (
size_t i = 0, accu = 0; i <
count;
3525 accu += (size_t)(num_vertices_per_cell[i++])) prescan[i] = accu;
3531 size_t * sorted_cell_reorder_idx =
3534 yac_int prev_global_id = cell_ids[0] - 1;
3535 size_t cell_add_count = 0;
3536 size_t relations_add_count = 0;
3540 for (
size_t i = 0, j = 0; i <
count; ++i) {
3542 yac_int curr_global_id = cell_ids[i];
3543 size_t curr_reorder_idx = reorder_idx[i];
3546 if (prev_global_id == curr_global_id) {
3550 prev_global_id = curr_global_id;
3554 while ((j < num_total_cells) && (sorted_cell_ids[j] < curr_global_id)) ++j;
3557 if ((j >= num_total_cells) || (sorted_cell_ids[j] != curr_global_id)) {
3559 if (cell_add_count != i) {
3560 cell_ids[cell_add_count] = curr_global_id;
3561 reorder_idx[cell_add_count] = curr_reorder_idx;
3564 relations_add_count += (size_t)(num_vertices_per_cell[curr_reorder_idx]);
3568 size_t new_num_total_cells = num_total_cells + cell_add_count;
3569 size_t num_total_relations =
3570 (num_total_cells > 0)?
3573 size_t new_num_total_relations = num_total_relations + relations_add_count;
3576 new_num_total_cells *
sizeof(*new_cell_ids));
3577 int * new_num_vertices_per_cell =
3579 sizeof(*new_num_vertices_per_cell));
3580 size_t * new_cell_to_vertex =
3582 sizeof(*new_cell_to_vertex));
3583 size_t * cell_to_vertex_offsets =
3585 sizeof(*cell_to_vertex_offsets));
3586 size_t * new_cell_to_edge =
3588 sizeof(*new_cell_to_edge));
3591 sizeof(*new_cell_bnd_circles));
3592 int * cell_owner_mask =
3594 new_num_total_cells *
sizeof(*cell_owner_mask));
3597 new_num_total_cells *
sizeof(*cell_owners));
3600 sorted_cell_ids, new_num_total_cells *
sizeof(*sorted_cell_ids));
3601 sorted_cell_reorder_idx =
3603 sorted_cell_reorder_idx, new_num_total_cells *
3604 sizeof(*sorted_cell_reorder_idx));
3607 for (
size_t i = 0, j = num_total_cells; i < cell_add_count;
3610 size_t curr_reorder_idx = reorder_idx[i];
3611 int curr_num_vertices = num_vertices_per_cell[curr_reorder_idx];
3612 size_t curr_relation_idx = prescan[curr_reorder_idx];
3614 new_cell_ids[j] = cell_ids[i];
3615 new_num_vertices_per_cell[j] = curr_num_vertices;
3616 cell_to_vertex_offsets[j] = num_total_relations;
3617 for (
int j = 0; j < curr_num_vertices;
3618 ++j, ++num_total_relations, ++curr_relation_idx) {
3619 new_cell_to_vertex[num_total_relations] =
3620 cell_to_vertex[curr_relation_idx];
3621 new_cell_to_edge[num_total_relations] = cell_to_edge[curr_relation_idx];
3623 cell_owner_mask[j] = 0;
3624 sorted_cell_ids[j] = cell_ids[i];
3625 sorted_cell_reorder_idx[j] = j;
3626 new_cell_bnd_circles[j] = cell_bnd_circles[curr_reorder_idx];
3627 new_cell_owners[j] = cell_owners[curr_reorder_idx];
3632 temp_cell_field_data, reorder_idx,
sizeof(*reorder_idx),
3633 num_total_cells, new_num_total_cells);
3635 sorted_cell_ids, new_num_total_cells, sorted_cell_reorder_idx);
3714 struct yac_dist_grid * dist_grid,
size_t count,
void * buffer,
3715 int buffer_size, MPI_Datatype bnd_circle_dt, MPI_Datatype point_info_dt,
3719 int * num_vertices_per_cell =
xmalloc(count *
sizeof(*num_vertices_per_cell));
3721 xmalloc(count *
sizeof(*cell_bnd_circles));
3726 size_t vertices_array_size = 0;
3727 size_t total_num_vertices = 0;
3730 size_t edges_array_size = 0;
3745 for (
size_t i = 0, buffer_offset = 0; i < count; ++i) {
3748 void * curr_buffer = (
char*)buffer + buffer_offset;
3753 MPI_Unpack(curr_buffer, buffer_size, &position, cell_ids + i, 1,
3757 curr_buffer, buffer_size, &position, i, temp_cell_field_data, comm);
3760 MPI_Unpack(curr_buffer, buffer_size, &position, &num_vertices, 1,
3761 MPI_INT, comm), comm);
3764 MPI_Unpack(curr_buffer, buffer_size, &position, cell_bnd_circles + i, 1,
3765 bnd_circle_dt, comm), comm);
3768 curr_buffer, buffer_size, &position, cell_owners + i,
3769 point_info_dt, comm);
3771 num_vertices_per_cell[i] = num_vertices;
3774 vertices, vertices_array_size, total_num_vertices + (
size_t)num_vertices);
3776 edges, edges_array_size, total_num_vertices + (
size_t)num_vertices);
3778 &temp_vertex_field_data, total_num_vertices + (
size_t)num_vertices);
3780 &temp_edge_field_data, total_num_vertices + (
size_t)num_vertices);
3782 for (
int j = 0; j < num_vertices; ++j, ++total_num_vertices) {
3784 vertices, total_num_vertices, curr_buffer, buffer_size, &position,
3785 temp_vertex_field_data, point_info_dt, comm);
3787 edges, total_num_vertices, curr_buffer, buffer_size, &position,
3788 temp_edge_field_data, point_info_dt, comm);
3789 vertices[total_num_vertices].
reorder_idx = total_num_vertices;
3790 edges[total_num_vertices].
reorder_idx = total_num_vertices;
3793 buffer_offset += (size_t)position;
3794 buffer_size -= position;
3797 size_t * cell_to_vertex =
xmalloc(total_num_vertices *
sizeof(*cell_to_vertex));
3798 size_t * cell_to_edge =
xmalloc(total_num_vertices *
sizeof(*cell_to_edge));
3801 dist_grid, vertices, total_num_vertices, cell_to_vertex,
3802 temp_vertex_field_data);
3804 dist_grid, edges, total_num_vertices, cell_to_edge,
3805 temp_edge_field_data);
3807 dist_grid, cell_ids, num_vertices_per_cell, cell_bnd_circles, count,
3808 cell_to_vertex, cell_to_edge, cell_owners, temp_cell_field_data);
3814 free(cell_to_vertex);
3818 free(cell_bnd_circles);
3819 free(num_vertices_per_cell);
3938 size_t count,
enum yac_location location,
size_t * idx) {
3940 MPI_Comm comm = dist_grid->
comm;
3941 int comm_rank, comm_size;
3945 size_t remote_count = 0;
3947 for (
size_t i = 0; i < count; ++i) {
3948 if (ids[i].global_id == XT_INT_MAX) idx[i] = SIZE_MAX;
3949 else if (ids[i].
data.rank != comm_rank) ++remote_count;
3954 xmalloc(remote_count *
sizeof(*missing_ids));
3956 for (
size_t i = 0, j = 0; i < count; ++i) {
3957 if ((ids[i].
data.rank != comm_rank) &&
3959 missing_ids[j].
data = ids[i];
3967 dist_grid, location, missing_ids, &remote_count, idx);
3970 qsort(missing_ids, remote_count,
sizeof(*missing_ids),
3973 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
3975 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
3977 for (
size_t i = 0; i < remote_count; ++i)
3981 1, sendcounts, recvcounts, sdispls, rdispls, comm);
3983 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
3985 uint64_t * uint64_t_buffer =
3986 xmalloc((remote_count + recv_count) *
sizeof(*uint64_t_buffer));
3987 uint64_t * orig_pos_send_buffer = uint64_t_buffer;
3988 uint64_t * orig_pos_recv_buffer = uint64_t_buffer + remote_count;
3991 for (
size_t i = 0; i < remote_count; ++i) {
3993 if (rank != comm_rank)
3994 orig_pos_send_buffer[sdispls[rank+1]++] =
3999 yac_alltoallv_uint64_p2p(
4000 orig_pos_send_buffer, sendcounts, sdispls,
4001 orig_pos_recv_buffer, recvcounts, rdispls, comm);
4008 void * packed_send_data = NULL;
4009 int * pack_sizes =
xmalloc(recv_count *
sizeof(*pack_sizes));
4013 dist_grid, location, orig_pos_recv_buffer, recv_count,
4014 &packed_send_data, pack_sizes,
4015 bnd_circle_dt, point_info_dt, comm);
4016 free(uint64_t_buffer);
4018 memset(sendcounts, 0, (
size_t)comm_size *
sizeof(*sendcounts));
4019 for (
int i = 0, k = 0; i < comm_size; ++i)
4020 for (
size_t j = 0; j < recvcounts[i]; ++j, ++k)
4021 sendcounts[i] += (
size_t)(pack_sizes[k]);
4026 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4028 recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4030 void * packed_recv_data =
xmalloc(recv_count);
4033 yac_alltoallv_packed_p2p(
4034 packed_send_data, sendcounts, sdispls+1,
4035 packed_recv_data, recvcounts, rdispls, comm);
4039 dist_grid, location, remote_count, packed_recv_data, (
int)recv_count,
4040 bnd_circle_dt, point_info_dt, comm);
4047 dist_grid, location, missing_ids, &remote_count, idx);
4050 free(packed_recv_data);
4051 free(packed_send_data);
4080 double coord[3],
struct yac_dist_grid * dist_grid,
size_t cell_idx,
4083 MPI_Comm comm = grid_pair->
comm;
4084 int comm_rank, comm_size;
4088 int * ranks =
xmalloc(count *
sizeof(ranks));
4103 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4105 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4106 for (
size_t i = 0; i < count; ++i) sendcounts[ranks[i]]++;
4108 size_t local_count = sendcounts[comm_rank];
4109 sendcounts[comm_rank] = 0;
4112 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4114 size_t remote_count = sdispls[comm_size] + sendcounts[comm_size-1];
4115 size_t request_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4118 xmalloc((remote_count + request_count + local_count) *
4119 sizeof(*coord_buffer));
4123 coord_buffer + remote_count + request_count;
4126 for (
size_t i = 0, k = 0; i < count; ++i) {
4127 if (ranks[i] == comm_rank) {
4128 coord_local_buffer[k][0] = search_coords[i][0];
4129 coord_local_buffer[k][1] = search_coords[i][1];
4130 coord_local_buffer[k][2] = search_coords[i][2];
4133 size_t displ = sdispls[ranks[i]+1]++;
4134 coord_send_buffer[displ][0] = search_coords[i][0];
4135 coord_send_buffer[displ][1] = search_coords[i][1];
4136 coord_send_buffer[displ][2] = search_coords[i][2];
4140 MPI_Datatype dt_coord;
4141 yac_mpi_call(MPI_Type_contiguous(3, MPI_DOUBLE, &dt_coord), comm);
4146 coord_send_buffer, sendcounts, sdispls,
4147 coord_recv_buffer, recvcounts, rdispls,
4148 sizeof(*coord_send_buffer), dt_coord, comm);
4152 size_t * local_cells =
4153 xmalloc((request_count + local_count) *
sizeof(*local_cells));
4162 grid_pair, grid_name, coord_recv_buffer, request_count + local_count,
4172 xmalloc((remote_count + request_count) *
4173 sizeof(*single_remote_point_buffer));
4182 for (
size_t i = 0; i < request_count; ++i) {
4183 size_t cell_idx = local_cells[i];
4184 id_send_buffer[i].
data.
rank = comm_rank;
4185 if (cell_idx != SIZE_MAX) {
4189 id_send_buffer[i].
global_id = XT_INT_MAX;
4194 MPI_Datatype single_remote_point_dt =
4199 id_send_buffer, recvcounts, rdispls, id_recv_buffer, sendcounts, sdispls,
4200 sizeof(*id_send_buffer), single_remote_point_dt,
comm);
4204 size_t * new_local_cells =
4205 xmalloc(remote_count *
sizeof(*new_local_cells));
4216 dist_grid, id_recv_buffer, remote_count,
YAC_LOC_CELL, new_local_cells);
4219 for (
size_t i = 0, k = 0; i <
count; ++i) {
4220 if (ranks[i] == comm_rank) {
4221 cells[i] = local_cells[request_count + k];
4224 size_t displ = sdispls[ranks[i]]++;
4225 cells[i] = new_local_cells[displ];
4229 free(new_local_cells);
4230 free(single_remote_point_buffer);
4419 MPI_Comm comm = grid_pair->
comm;
4420 int comm_rank, comm_size;
4425 (max_search_distance >= 0.0) && (max_search_distance <= M_PI),
4426 "ERROR(yac_dist_grid_pair_do_nnn_search): "
4427 "invalid max_search_distance (%lf)", max_search_distance)
4442 uint64_t unmasked_local_count =
4445 uint64_t * unmasked_local_counts =
4446 xmalloc((
size_t)comm_size *
sizeof(*unmasked_local_counts));
4451 &unmasked_local_count, 1, MPI_UINT64_T,
4452 unmasked_local_counts, 1, MPI_UINT64_T,
comm),
comm);
4456 for (
int i = 0; i < comm_size; ++i)
4457 flag |= unmasked_local_counts[i] < (uint64_t)n;
4462 uint64_t global_num_unmasked_count = 0;
4463 for (
int i = 0; i < comm_size; ++i)
4464 global_num_unmasked_count += unmasked_local_counts[i];
4467 (
size_t)global_num_unmasked_count >= n,
4468 "ERROR(yac_dist_grid_pair_do_nnn_search): insufficient number of "
4471 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4473 1, &sendcounts, &recvcounts, &sdispls, &rdispls,
comm);
4477 int * flag_buffer =
xcalloc(2 * (
size_t)comm_size,
sizeof(*flag_buffer));
4478 int * send_flags = flag_buffer;
4479 int * recv_flags = flag_buffer + comm_size;
4482 send_flags, recv_flags, comm_rank, comm_size);
4483 for (
int i = 0; i < comm_size; ++i) {
4484 sendcounts[i] = (size_t)send_flags[i];
4485 recvcounts[i] = (size_t)recv_flags[i];
4489 size_t local_send_count = (size_t)(
MIN(unmasked_local_count, n));
4492 for (
int i = 0; i < comm_size; ++i) {
4495 sendcounts[i] *= local_send_count;
4496 raccu += (recvcounts[i] *= (int)(
MIN(unmasked_local_counts[i], n)));
4499 size_t recv_count = recvcounts[comm_size-1] + rdispls[comm_size-1];
4503 (local_send_count + recv_count) *
sizeof(*single_remote_point_buffer));
4506 single_remote_point_buffer + local_send_count;
4510 dist_grid, field, comm_rank, local_send_count, local_send_ids);
4512 MPI_Datatype single_remote_point_dt =
4517 local_send_ids, sendcounts, sdispls, recv_ids, recvcounts, rdispls,
4518 sizeof(*local_send_ids), single_remote_point_dt, comm);
4520 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4522 size_t * dummy =
xmalloc(recv_count *
sizeof(*dummy));
4527 dist_grid, recv_ids, recv_count, field.
location, dummy);
4530 free(single_remote_point_buffer);
4533 free(unmasked_local_counts);
4546 sphere_part, count, search_coords, n, ubounds);
4553 int * request_ranks = NULL;
4554 size_t request_ranks_array_size = 0;
4555 size_t num_request_ranks = 0;
4556 int * num_requests =
xmalloc(count *
sizeof(*num_requests));
4559 for (
size_t i = 0; i < count; ++i) {
4564 3 *
sizeof(search_coords[0][0]));
4566 ubounds[i] = max_search_distance_angle;
4571 num_request_ranks + (
size_t)comm_size);
4575 int * curr_request_ranks = request_ranks + num_request_ranks;
4578 curr_request_ranks, num_requests + i);
4581 int new_num_requests = 0;
4582 for (
int j = 0; j < num_requests[i]; ++j) {
4583 if (curr_request_ranks[j] == comm_rank)
continue;
4584 if (new_num_requests != j)
4585 curr_request_ranks[new_num_requests] = curr_request_ranks[j];
4589 num_request_ranks += (size_t)(num_requests[i] = new_num_requests);
4598 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4600 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
4602 for (
size_t i = 0; i < num_request_ranks; ++i) sendcounts[request_ranks[i]]++;
4605 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4607 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4610 xmalloc((num_request_ranks + recv_count) *
sizeof(*bnd_circles));
4612 struct bounding_circle * recv_bnd_circles = bnd_circles + num_request_ranks;
4615 for (
size_t i = 0, k = 0; i < count; ++i) {
4616 for (
int j = 0; j < num_requests[i]; ++j, ++k) {
4618 send_bnd_circles + sdispls[request_ranks[k]+1];
4619 sdispls[request_ranks[k]+1]++;
4620 memcpy(curr_bnd_circle->
base_vector, search_coords[i],
4621 3 *
sizeof(search_coords[0][0]));
4622 curr_bnd_circle->
inc_angle = ubounds[i];
4627 free(request_ranks);
4634 send_bnd_circles, sendcounts, sdispls,
4635 recv_bnd_circles, recvcounts, rdispls,
4636 sizeof(*send_bnd_circles), bnd_circle_dt, comm);
4645 size_t * result_points = NULL;
4646 size_t result_points_array_size = 0;
4647 size_t * num_results_points =
4648 xmalloc(recv_count *
sizeof(*num_results_points));
4650 sphere_part, recv_count, recv_bnd_circles, n, &result_points,
4651 &result_points_array_size, num_results_points);
4657 size_t total_num_result_points = 0;
4658 size_t offset = 0, k = 0;
4659 for (
int i = 0; i < comm_size; ++i) {
4660 size_t curr_num_result_points = 0;
4661 for (
size_t j = 0; j < recvcounts[i]; ++j, ++k)
4662 curr_num_result_points += num_results_points[k];
4663 size_t new_num_result_points = curr_num_result_points;
4665 result_points + offset,
4668 result_points + offset, &new_num_result_points);
4670 result_points + total_num_result_points,
4671 result_points + offset, new_num_result_points *
4672 sizeof(*result_points));
4673 total_num_result_points += new_num_result_points;
4674 offset += curr_num_result_points;
4675 sendcounts[i] = new_num_result_points;
4677 free(num_results_points);
4680 1, sendcounts, recvcounts, sdispls, rdispls, comm);
4681 recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4687 xmalloc((total_num_result_points + recv_count) *
4688 sizeof(*single_remote_point_buffer));
4691 total_num_result_points;
4692 for (
size_t i = 0; i < total_num_result_points; ++i) {
4693 size_t orig_pos = result_points[i];
4694 id_send_buffer[i].
global_id = global_ids[orig_pos];
4695 id_send_buffer[i].
data.
rank = comm_rank;
4698 free(result_points);
4700 MPI_Datatype single_remote_point_dt =
4705 id_send_buffer, sendcounts, sdispls+1, id_recv_buffer, recvcounts, rdispls,
4706 sizeof(*id_send_buffer), single_remote_point_dt, comm);
4707 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4716 size_t * temp_idx =
xmalloc(recv_count *
sizeof(*temp_idx));
4718 dist_grid, id_recv_buffer, recv_count, field.
location, temp_idx);
4720 free(single_remote_point_buffer);
4727 dist_grid, field, count, search_coords, n, max_search_distance_angle.
cos,
4736 MPI_Comm comm = grid_pair->
comm;
4737 int comm_rank, comm_size;
4749 int * rank_buffer = NULL;
4750 size_t rank_buffer_size = 0;
4751 size_t rank_buffer_array_size = 0;
4753 for (
size_t i = 0; i <
count; ++i) {
4756 rank_buffer_size + (
size_t)comm_size);
4764 rank_buffer + rank_buffer_size, num_ranks + i);
4765 rank_buffer_size += (size_t)(num_ranks[i]);
4772 size_t * size_t_buffer =
4773 xmalloc(4 * (
size_t)comm_size *
sizeof(*size_t_buffer));
4774 size_t * result_sendcounts = size_t_buffer + 0 * comm_size;
4775 size_t * result_recvcounts = size_t_buffer + 1 * comm_size;
4776 size_t * result_sdispls = size_t_buffer + 2 * comm_size;
4777 size_t * result_rdispls = size_t_buffer + 3 * comm_size;
4779 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
4781 1, &sendcounts, &recvcounts, &sdispls, &rdispls,
comm);
4783 for (
size_t i = 0, offset = 0; i <
count; ++i) {
4784 int curr_num_ranks = num_ranks[i];
4785 int * ranks = rank_buffer + offset;
4786 offset += (size_t)curr_num_ranks;
4787 for (
int j = 0; j < curr_num_ranks; ++j) sendcounts[ranks[j]]++;
4791 size_t local_count = sendcounts[comm_rank];
4792 sendcounts[comm_rank] = 0;
4795 1, sendcounts, recvcounts, sdispls, rdispls,
comm);
4797 size_t send_count = sdispls[comm_size] + sendcounts[comm_size-1];
4798 size_t recv_count = rdispls[comm_size-1] + recvcounts[comm_size-1];
4801 xmalloc((send_count + recv_count + local_count) *
4802 sizeof(*bnd_circle_buffer));
4804 struct bounding_circle * recv_buffer = bnd_circle_buffer + send_count;
4806 bnd_circle_buffer + send_count + recv_count;
4809 for (
size_t i = 0, offset = 0, local_offset = 0; i < count; ++i) {
4810 int curr_num_ranks = num_ranks[i];
4811 int * ranks = rank_buffer + offset;
4812 offset += (size_t)curr_num_ranks;
4813 for (
int j = 0; j < curr_num_ranks; ++j) {
4814 int rank = ranks[j];
4815 if (rank == comm_rank)
4816 local_buffer[local_offset++] = bnd_circles[i];
4818 send_buffer[sdispls[rank + 1]++] = bnd_circles[i];
4827 send_buffer, sendcounts, sdispls, recv_buffer, recvcounts, rdispls,
4828 sizeof(*send_buffer), bnd_circle_dt, comm);
4839 size_t * local_cells = NULL;
4840 size_t * num_local_cells_per_bnd_circle =
4841 xmalloc((recv_count + local_count) *
4842 sizeof(*num_local_cells_per_bnd_circle));
4844 uint64_t * uint64_t_buffer =
4845 xmalloc((send_count + recv_count + local_count) *
4846 sizeof(*uint64_t_buffer));
4847 uint64_t * num_local_cells_per_bnd_circle_uint64_t = uint64_t_buffer;
4848 uint64_t * num_remote_cells_per_bnd_circle =
4849 uint64_t_buffer + recv_count + local_count;
4853 cell_sphere_part, recv_buffer, recv_count + local_count, &local_cells,
4854 num_local_cells_per_bnd_circle);
4861 int const * field_mask =
4868 for (
size_t i = 0, offset = 0, new_offset = 0;
4869 i < recv_count + local_count; ++i) {
4872 size_t curr_num_results = num_local_cells_per_bnd_circle[i];
4875 uint64_t new_num_results = 0;
4876 for (
size_t j = 0; j < curr_num_results; ++j, ++offset) {
4877 size_t local_cell_id = local_cells[offset];
4879 cell_bnd_circles + local_cell_id))
continue;
4880 if ((field_mask == NULL) || (field_mask[local_cell_id])) {
4881 if (offset != new_offset) local_cells[new_offset] = local_cell_id;
4886 num_local_cells_per_bnd_circle_uint64_t[i] = new_num_results;
4888 free(num_local_cells_per_bnd_circle);
4889 free(bnd_circle_buffer);
4897 num_local_cells_per_bnd_circle_uint64_t, recvcounts, rdispls,
4898 num_remote_cells_per_bnd_circle, sendcounts, sdispls,
4899 sizeof(*num_local_cells_per_bnd_circle_uint64_t), MPI_UINT64_T, comm);
4901 size_t saccu = 0, raccu = 0, soffset = 0, roffset = 0;
4902 for (
int i = 0; i < comm_size; ++i) {
4904 result_sdispls[i] = saccu;
4905 result_rdispls[i] = raccu;
4907 size_t sendcount = recvcounts[i];
4908 size_t recvcount = sendcounts[i];
4910 result_sendcounts[i] = 0;
4911 result_recvcounts[i] = 0;
4912 for (
size_t j = 0; j < sendcount; ++j, ++soffset)
4913 result_sendcounts[i] +=
4914 (
size_t)(num_local_cells_per_bnd_circle_uint64_t[soffset]);
4915 for (
size_t j = 0; j < recvcount; ++j, ++roffset)
4916 result_recvcounts[i] +=
4917 (
size_t)(num_remote_cells_per_bnd_circle[roffset]);
4919 saccu += result_sendcounts[i];
4920 raccu += result_recvcounts[i];
4925 size_t result_local_count = 0;
4926 for (
size_t i = recv_count; i < recv_count + local_count; ++i)
4927 result_local_count += (
size_t)(num_local_cells_per_bnd_circle_uint64_t[i]);
4929 size_t result_send_count = (size_t)(result_sdispls[comm_size-1]) +
4930 (size_t)(result_sendcounts[comm_size-1]);
4931 size_t result_recv_count = (size_t)(result_rdispls[comm_size-1]) +
4932 (size_t)(result_recvcounts[comm_size-1]);
4935 xmalloc((result_recv_count + result_send_count) *
4936 sizeof(*single_remote_point_buffer));
4943 for (
size_t i = 0; i < result_send_count; ++i) {
4944 size_t local_cell_id = local_cells[i];
4945 id_send_buffer[i].
global_id = cell_ids[local_cell_id];
4946 id_send_buffer[i].
data.
rank = comm_rank;
4950 MPI_Datatype single_remote_point_dt =
4955 id_send_buffer, result_sendcounts, result_sdispls,
4956 id_recv_buffer, result_recvcounts, result_rdispls,
4957 sizeof(*id_send_buffer), single_remote_point_dt, comm);
4959 yac_mpi_call(MPI_Type_free(&single_remote_point_dt), comm);
4961 size_t * new_local_cells =
4962 xmalloc((result_recv_count + result_local_count) *
4963 sizeof(*new_local_cells));
4965 memcpy(new_local_cells + result_recv_count,
4966 local_cells + result_send_count,
4967 result_local_count *
sizeof(*new_local_cells));
4978 dist_grid, id_recv_buffer, result_recv_count,
YAC_LOC_CELL, new_local_cells);
4980 free(single_remote_point_buffer);
4982 size_t * reorder_idx =
4983 xmalloc((result_recv_count + result_local_count) *
sizeof(*reorder_idx));
4986 num_results_per_bnd_circle, 0, count *
sizeof(*num_results_per_bnd_circle));
4988 for (
size_t i = 0, offset = 0, reorder = 0, local_search_idx = recv_count,
4989 local_offset = result_recv_count; i < count; ++i) {
4990 int curr_num_ranks = num_ranks[i];
4991 int * ranks = rank_buffer + offset;
4992 offset += (size_t)curr_num_ranks;
4993 for (
int j = 0; j < curr_num_ranks; ++j) {
4994 int rank = ranks[j];
4995 if (rank == comm_rank) {
4996 uint64_t curr_num_results =
4997 num_local_cells_per_bnd_circle_uint64_t[local_search_idx++];
4998 num_results_per_bnd_circle[i] += (size_t)curr_num_results;
4999 for (uint64_t k = 0; k < curr_num_results; ++k, ++reorder)
5000 reorder_idx[local_offset++] = reorder;
5002 size_t rank_pos = sdispls[rank]++;
5003 uint64_t curr_num_results = num_remote_cells_per_bnd_circle[rank_pos];
5004 num_results_per_bnd_circle[i] += (size_t)curr_num_results;
5005 for (uint64_t k = 0; k < curr_num_results; ++k, ++reorder)
5006 reorder_idx[result_rdispls[rank]++] = reorder;
5010 free(uint64_t_buffer);
5013 free(size_t_buffer);
5017 reorder_idx, result_recv_count + result_local_count, new_local_cells);
5021 for (
size_t i = 0, offset = 0, new_offset = 0; i < count; ++i) {
5023 size_t * curr_local_cells = new_local_cells + offset;
5024 size_t curr_num_results_per_bnd_circle = num_results_per_bnd_circle[i];
5025 size_t new_num_results_per_bnd_circle = 0;
5026 size_t prev_cell = SIZE_MAX;
5027 offset += curr_num_results_per_bnd_circle;
5030 curr_local_cells, curr_num_results_per_bnd_circle, NULL);
5032 for (
size_t j = 0; j < curr_num_results_per_bnd_circle; ++j) {
5033 size_t curr_cell = curr_local_cells[j];
5034 if (curr_cell != prev_cell) {
5035 new_local_cells[new_offset++] = (prev_cell = curr_cell);
5036 ++new_num_results_per_bnd_circle;
5039 num_results_per_bnd_circle[i] = new_num_results_per_bnd_circle;
5042 *cells = new_local_cells;
5130 size_t * cells,
size_t count,
size_t * neighbours) {
5140 int max_num_edges_per_cell = 0;
5142 if (max_num_edges_per_cell < dist_grid->num_vertices_per_cell[i])
5146 xmalloc((
size_t)max_num_edges_per_cell *
sizeof(*edge_vertices));
5148 size_t neigh_idx = 0;
5151 size_t missing_edge_neighbour_array_size = 0;
5152 size_t num_missing_neighbours = 0;
5155 for (
size_t i = 0; i < count; ++i) {
5157 size_t curr_cell = cells[i];
5161 size_t const * cell_edges =
5163 for (
size_t j = 0; j < curr_num_edges; ++j) {
5164 size_t const * curr_edge_to_vertex =
5166 edge_vertices[j][0] = curr_edge_to_vertex[0];
5167 edge_vertices[j][1] = curr_edge_to_vertex[1];
5172 num_missing_neighbours + curr_num_edges);
5176 size_t prev_vertex = edge_vertices[0][0];
5177 for (
size_t j = 0, edge_idx = 0; j < curr_num_edges; ++j, ++
neigh_idx) {
5180 size_t curr_edge = cell_edges[edge_idx];
5181 size_t * curr_edge_cells = edge_to_cell[curr_edge];
5182 size_t other_cell = curr_edge_cells[curr_edge_cells[0] == curr_cell];
5185 if (other_cell == SIZE_MAX) {
5198 size_t new_edge_idx = SIZE_MAX;
5199 for (
size_t k = 0; k < curr_num_edges; ++k) {
5200 if (k == edge_idx)
continue;
5201 else if (edge_vertices[k][0] == prev_vertex) {
5203 prev_vertex = edge_vertices[k][1];
5205 }
else if (edge_vertices[k][1] == prev_vertex) {
5207 prev_vertex = edge_vertices[k][0];
5212 new_edge_idx < SIZE_MAX,
5213 "ERROR(yac_basic_grid_data_get_cell_neighbours): "
5214 "inconsistent cell_to_edge/edge_to_vertex data")
5215 edge_idx = new_edge_idx;
5220 prev_vertex == edge_vertices[0][0],
5221 "ERROR(yac_basic_grid_data_get_cell_neighbours): "
5222 "inconsistent cell_to_edge/edge_to_vertex data")
5226 MPI_Comm comm = dist_grid->
comm;
5227 int comm_rank, comm_size;
5231 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
5233 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
5236 ((
size_t)comm_size + num_missing_neighbours) *
sizeof(*int_buffer));
5237 int * temp_ranks = int_buffer;
5238 int * num_ranks = int_buffer + comm_size;
5239 memset(num_ranks, 0, num_missing_neighbours *
sizeof(*num_ranks));
5241 int * rank_buffer = NULL;
5242 size_t rank_buffer_array_size = 0;
5243 size_t rank_buffer_size = 0;