249 const char * filename, MPI_Comm comm,
int * nbr_vertices,
int * nbr_cells,
250 int ** num_vertices_per_cell,
int **
cell_to_vertex,
int ** global_cell_ids,
251 int ** cell_owner,
int ** global_vertex_ids,
int ** vertex_owner,
252 double ** x_vertices,
double ** y_vertices,
253 double ** x_cells,
double ** y_cells,
int ** cell_msk) {
256 ((x_cells == NULL) && (y_cells == NULL)) ||
257 ((x_cells != NULL) && (y_cells != NULL)),
258 "ERROR(yac_read_icon_grid_information_parallel): "
259 "arguments x_cells and y_cells have to be either both NULL or "
260 "both different from NULL");
262 const int with_cell_coords = x_cells != NULL;
263 const int with_cell_mask = cell_msk != NULL;
265 int comm_rank, comm_size;
267 MPI_Comm_rank(comm, &comm_rank);
268 MPI_Comm_size(comm, &comm_size);
270 int local_is_io, * io_ranks, num_io_ranks;
273 size_t ncells, nvertices, nv, ne;
275 size_t read_local_start_cell = 0;
276 size_t read_num_local_cells = 0;
277 size_t read_local_start_vertex = 0;
278 size_t read_num_local_vertices = 0;
279 double * read_cell_lon = NULL;
280 double * read_cell_lat = NULL;
281 int * read_cell_mask = NULL;
282 double * read_vertex_lon = NULL;
283 double * read_vertex_lat = NULL;
284 int * read_dist_vertex_of_cell = NULL;
285 int * read_dist_cells_of_vertex = NULL;
289 unsigned long io_proc_idx = ULONG_MAX;
290 for (
int i = 0; (i < num_io_ranks) && (io_proc_idx == ULONG_MAX); ++i)
291 if (io_ranks[i] == comm_rank)
292 io_proc_idx = (
unsigned long)i;
310 read_local_start_cell =
311 ((
unsigned long)ncells * io_proc_idx) / (
unsigned long)num_io_ranks;
312 read_num_local_cells =
313 ((
unsigned long)ncells * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
314 (
unsigned long)read_local_start_cell;
315 read_local_start_vertex =
316 ((
unsigned long)nvertices * io_proc_idx) / (
unsigned long)num_io_ranks;
317 read_num_local_vertices =
318 ((
unsigned long)nvertices * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
319 (
unsigned long)read_local_start_vertex;
324 xmalloc(read_num_local_cells *
sizeof(*read_cell_lon)):NULL;
327 xmalloc(read_num_local_cells *
sizeof(*read_cell_lat)):NULL;
330 xmalloc(read_num_local_cells *
sizeof(*read_cell_mask)):NULL;
331 read_vertex_lon =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lon));
332 read_vertex_lat =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lat));
333 read_dist_vertex_of_cell =
334 xmalloc(read_num_local_cells * nv *
sizeof(*read_dist_vertex_of_cell));
335 read_dist_cells_of_vertex =
336 xmalloc(read_num_local_vertices * ne *
sizeof(*read_dist_cells_of_vertex));
338 if (with_cell_coords) {
341 &read_num_local_cells, read_cell_lon));
345 &read_num_local_cells, read_cell_lat));
348 if (with_cell_mask) {
351 &read_num_local_cells, read_cell_mask));
355 &read_num_local_vertices, read_vertex_lon));
356 convert_to_rad(ncid, varid, read_vertex_lon, read_num_local_vertices);
359 &read_num_local_vertices, read_vertex_lat));
360 convert_to_rad(ncid, varid, read_vertex_lat, read_num_local_vertices);
363 ne*read_num_local_vertices) *
sizeof(*
buffer));
365 size_t tmp_start[2] = {0, read_local_start_cell};
366 size_t tmp_count[2] = {nv, read_num_local_cells};
369 for (
size_t i = 0; i < read_num_local_cells; ++i)
370 for (
size_t j = 0; j < nv; ++j)
371 read_dist_vertex_of_cell[i * nv + j] =
372 buffer[i + j * read_num_local_cells];
375 size_t tmp_start[2] = {0, read_local_start_vertex};
376 size_t tmp_count[2] = {ne, read_num_local_vertices};
379 for (
size_t i = 0; i < read_num_local_vertices; ++i)
380 for (
size_t j = 0; j < ne; ++j)
381 read_dist_cells_of_vertex[i * ne + j] =
382 buffer[i + j * read_num_local_vertices];
387 for (
size_t i = 0; i < read_num_local_cells * nv; ++i)
388 if (read_dist_vertex_of_cell[i] > 0) read_dist_vertex_of_cell[i]--;
389 for (
size_t i = 0; i < read_num_local_vertices * ne; ++i)
390 read_dist_cells_of_vertex[i]--;
396 with_cell_coords?
xmalloc(1 *
sizeof(*read_cell_lon)):NULL;
398 with_cell_coords?
xmalloc(1 *
sizeof(*read_cell_lat)):NULL;
400 with_cell_mask?
xmalloc(1 *
sizeof(*read_cell_mask)):NULL;
401 read_vertex_lon =
xmalloc(1 *
sizeof(*read_vertex_lon));
402 read_vertex_lat =
xmalloc(1 *
sizeof(*read_vertex_lat));
403 read_dist_vertex_of_cell =
xmalloc(1 *
sizeof(*read_dist_vertex_of_cell));
404 read_dist_cells_of_vertex =
xmalloc(1 *
sizeof(*read_dist_cells_of_vertex));
411 if (comm_rank == 0) tmp = (int)ncells;
412 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
413 ncells = (size_t)tmp;
414 if (comm_rank == 0) tmp = (int)nvertices;
415 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
416 nvertices = (size_t)tmp;
417 if (comm_rank == 0) tmp = (int)nv;
418 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
420 if (comm_rank == 0) tmp = (int)ne;
421 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
426 size_t local_start_cell =
427 ((
unsigned long)ncells * (
unsigned long)comm_rank) /
428 (
unsigned long)comm_size;
429 size_t num_local_cells =
430 ((
unsigned long)ncells * ((
unsigned long)comm_rank+1)) /
431 (
unsigned long)comm_size - (
unsigned long)local_start_cell;
432 size_t local_start_vertex =
433 ((
unsigned long)nvertices * (
unsigned long)comm_rank) /
434 (
unsigned long)comm_size;
435 size_t num_local_vertices =
436 ((
unsigned long)nvertices * ((
unsigned long)comm_rank+1)) /
437 (
unsigned long)comm_size - (
unsigned long)local_start_vertex;
446 int * dist_vertex_of_cell =
447 xmalloc(num_local_cells * nv *
sizeof(*dist_vertex_of_cell));
449 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
450 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
452 for (
size_t i = 0; i < read_num_local_cells; ++i)
455 read_local_start_cell + i, ncells, comm_size)]++;
457 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
459 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
460 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
461 int send_accum = 0, recv_accum = 0;
462 for (
int i = 0; i < comm_size; ++i) {
463 send_displ[i] = send_accum;
464 recv_displ[i] = recv_accum;
465 send_accum += send_count[i];
466 recv_accum += recv_count[i];
469 if (with_cell_coords) {
470 MPI_Alltoallv(read_cell_lon, send_count, send_displ, MPI_DOUBLE,
471 cell_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
472 MPI_Alltoallv(read_cell_lat, send_count, send_displ, MPI_DOUBLE,
473 cell_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
475 if (with_cell_mask) {
476 MPI_Alltoallv(read_cell_mask, send_count, send_displ, MPI_INT,
477 cell_mask, recv_count, recv_displ, MPI_INT, comm);
480 for (
int i = 0; i < comm_size; ++i) {
487 MPI_Alltoallv(read_dist_vertex_of_cell, send_count, send_displ, MPI_INT,
488 dist_vertex_of_cell, recv_count, recv_displ, MPI_INT, comm);
496 free(read_cell_mask);
497 free(read_dist_vertex_of_cell);
501 double * vertex_lon =
xmalloc(num_local_vertices *
sizeof(*vertex_lon));
502 double * vertex_lat =
xmalloc(num_local_vertices *
sizeof(*vertex_lat));
503 int * dist_cells_of_vertex =
504 xmalloc(num_local_vertices * ne *
sizeof(*dist_cells_of_vertex));
506 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
507 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
509 for (
size_t i = 0; i < read_num_local_vertices; ++i)
512 read_local_start_vertex + i, nvertices, comm_size)]++;
514 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
516 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
517 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
518 int send_accum = 0, recv_accum = 0;
519 for (
int i = 0; i < comm_size; ++i) {
520 send_displ[i] = send_accum;
521 recv_displ[i] = recv_accum;
522 send_accum += send_count[i];
523 recv_accum += recv_count[i];
526 MPI_Alltoallv(read_vertex_lon, send_count, send_displ, MPI_DOUBLE,
527 vertex_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
528 MPI_Alltoallv(read_vertex_lat, send_count, send_displ, MPI_DOUBLE,
529 vertex_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
531 for (
int i = 0; i < comm_size; ++i) {
538 MPI_Alltoallv(read_dist_cells_of_vertex, send_count, send_displ, MPI_INT,
539 dist_cells_of_vertex, recv_count, recv_displ, MPI_INT, comm);
545 free(read_vertex_lon);
546 free(read_vertex_lat);
547 free(read_dist_cells_of_vertex);
551 size_t num_core_vertices = num_local_cells * nv;
552 int * core_vertices =
xmalloc(num_core_vertices *
sizeof(*core_vertices));
554 memcpy(core_vertices, dist_vertex_of_cell,
555 num_core_vertices *
sizeof(*core_vertices));
559 xrealloc(core_vertices, num_core_vertices *
sizeof(*core_vertices));
563 int * cells_of_vertex_core =
564 xmalloc(num_core_vertices * ne *
sizeof(*cells_of_vertex_core));
565 int * dist_vertex_owner =
566 xmalloc(num_local_vertices *
sizeof(*dist_vertex_owner));
568 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
569 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
571 for (
size_t i = 0; i < num_core_vertices; ++i)
572 send_count[((
unsigned long)(core_vertices[i]) *
573 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
574 (
unsigned long)nvertices]++;
576 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
578 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
579 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
580 int send_accum = 0, recv_accum = 0;
581 for (
int i = 0; i < comm_size; ++i) {
582 send_displ[i] = send_accum;
583 recv_displ[i] = recv_accum;
584 send_accum += send_count[i];
585 recv_accum += recv_count[i];
588 int num_core_vertices_remote = 0;
589 for (
int i = 0; i < comm_size; ++i)
590 num_core_vertices_remote += recv_count[i];
592 int * remote_vertex_buffer =
593 xmalloc(num_core_vertices_remote *
sizeof(*remote_vertex_buffer));
595 MPI_Alltoallv(core_vertices, send_count, send_displ, MPI_INT,
596 remote_vertex_buffer, recv_count, recv_displ, MPI_INT, comm);
598 for (
size_t i = 0; i < num_local_vertices; ++i) dist_vertex_owner[i] = -1;
600 for (
int i = 0, j = 0; i < comm_size; ++i)
601 for (
int k = 0; k < recv_count[i]; ++k, ++j)
602 dist_vertex_owner[remote_vertex_buffer[j] - local_start_vertex] = i;
604 int * send_cell_of_vertex =
605 xmalloc(num_core_vertices_remote * ne *
sizeof(*send_cell_of_vertex));
607 for (
int i = 0, l = 0, m = 0; i < comm_size; ++i) {
608 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
609 int idx = remote_vertex_buffer[l] - local_start_vertex;
610 for (
size_t k = 0; k < ne; ++k, ++m)
611 send_cell_of_vertex[m] = dist_cells_of_vertex[idx * ne + k];
614 free(dist_cells_of_vertex);
616 for (
int i = 0; i < comm_size; ++i) {
623 MPI_Alltoallv(send_cell_of_vertex, recv_count, recv_displ, MPI_INT,
624 cells_of_vertex_core, send_count, send_displ, MPI_INT, comm);
626 free(send_cell_of_vertex);
627 free(remote_vertex_buffer);
636 size_t num_halo_cells = 0;
638 int * tmp_cells = cells_of_vertex_core;
639 size_t num_tmp_cells = num_core_vertices * ne;
644 if ((num_tmp_cells > 0) && (tmp_cells[0] == -1)) {
649 halo_cells =
xmalloc((num_tmp_cells - num_local_cells) *
sizeof(*halo_cells));
652 for (; i < num_tmp_cells && tmp_cells[i] < (int)local_start_cell; ++i)
653 halo_cells[num_halo_cells++] = tmp_cells[i];
654 i += num_local_cells;
655 for (; i < num_tmp_cells; ++i) halo_cells[num_halo_cells++] = tmp_cells[i];
657 assert(num_halo_cells == num_tmp_cells - num_local_cells);
659 free(cells_of_vertex_core);
663 size_t num_all_local_vertices = num_halo_cells * nv + num_core_vertices;
664 int * all_cell_to_vertex;
665 int * all_local_vertices =
xrealloc(core_vertices, num_all_local_vertices *
666 sizeof(*all_local_vertices));
667 if (with_cell_coords) {
673 if (with_cell_mask) {
678 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
679 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
681 for (
size_t i = 0; i < num_halo_cells; ++i)
682 send_count[((
unsigned long)(halo_cells[i]) *
683 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
684 (
unsigned long)ncells]++;
686 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
688 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
689 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
690 int send_accum = 0, recv_accum = 0;
691 for (
int i = 0; i < comm_size; ++i) {
692 send_displ[i] = send_accum;
693 recv_displ[i] = recv_accum;
694 send_accum += send_count[i];
695 recv_accum += recv_count[i];
698 int num_halo_cells_remote = 0;
699 for (
int i = 0; i < comm_size; ++i)
700 num_halo_cells_remote += recv_count[i];
702 int * remote_halo_cell_buffer =
703 xmalloc(num_halo_cells_remote *
sizeof(*remote_halo_cell_buffer));
705 MPI_Alltoallv(halo_cells, send_count, send_displ, MPI_INT,
706 remote_halo_cell_buffer, recv_count, recv_displ, MPI_INT,
709 int * send_halo_cell_vertices =
710 xmalloc(num_halo_cells_remote * nv *
sizeof(*send_halo_cell_vertices));
711 double * send_cell_lon =
713 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_lon)):NULL;
714 double * send_cell_lat =
716 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_lat)):NULL;
717 int * send_cell_mask =
719 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_mask)):NULL;
721 for (
int i = 0, l = 0, m = 0; i < comm_size; ++i) {
722 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
723 int idx = remote_halo_cell_buffer[l] - local_start_cell;
724 for (
size_t k = 0; k < nv; ++k, ++m)
725 send_halo_cell_vertices[m] = dist_vertex_of_cell[idx * nv + k];
728 if (with_cell_coords) {
729 for (
int i = 0, l = 0; i < comm_size; ++i) {
730 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
731 int idx = remote_halo_cell_buffer[l] - local_start_cell;
737 if (with_cell_mask) {
738 for (
int i = 0, l = 0; i < comm_size; ++i) {
739 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
740 int idx = remote_halo_cell_buffer[l] - local_start_cell;
746 if (with_cell_coords) {
747 MPI_Alltoallv(send_cell_lon, recv_count, recv_displ, MPI_DOUBLE,
748 cell_lon + num_local_cells, send_count, send_displ,
750 MPI_Alltoallv(send_cell_lat, recv_count, recv_displ, MPI_DOUBLE,
751 cell_lat + num_local_cells, send_count, send_displ,
754 if (with_cell_mask) {
755 MPI_Alltoallv(send_cell_mask, recv_count, recv_displ, MPI_INT,
756 cell_mask + num_local_cells, send_count, send_displ,
760 for (
int i = 0; i < comm_size; ++i) {
768 xrealloc(dist_vertex_of_cell, (num_local_cells + num_halo_cells) * nv *
769 sizeof(*all_cell_to_vertex));
771 MPI_Alltoallv(send_halo_cell_vertices, recv_count, recv_displ, MPI_INT,
772 all_cell_to_vertex + num_local_cells * nv, send_count,
773 send_displ, MPI_INT, comm);
775 memcpy(all_local_vertices + num_core_vertices,
776 all_cell_to_vertex + num_local_cells * nv,
777 num_halo_cells * nv *
sizeof(*all_local_vertices));
779 free(send_cell_mask);
782 free(send_halo_cell_vertices);
783 free(remote_halo_cell_buffer);
790 all_local_vertices, num_all_local_vertices, NULL);
795 double * all_vertex_lon =
796 xmalloc(num_all_local_vertices *
sizeof(*all_vertex_lon));
797 double * all_vertex_lat =
798 xmalloc(num_all_local_vertices *
sizeof(*all_vertex_lat));
799 int * all_local_vertices_owner =
800 xmalloc(num_all_local_vertices *
sizeof(*all_local_vertices_owner));
802 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
803 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
805 for (
size_t i = 0; i < num_all_local_vertices; ++i)
806 send_count[((
unsigned long)(all_local_vertices[i]) *
807 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
808 (
unsigned long)nvertices]++;
810 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
812 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
813 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
814 int send_accum = 0, recv_accum = 0;
815 for (
int i = 0; i < comm_size; ++i) {
816 send_displ[i] = send_accum;
817 recv_displ[i] = recv_accum;
818 send_accum += send_count[i];
819 recv_accum += recv_count[i];
822 int num_all_local_vertices_remote = 0;
823 for (
int i = 0; i < comm_size; ++i)
824 num_all_local_vertices_remote += recv_count[i];
826 int * remote_vertex_buffer =
827 xmalloc(num_all_local_vertices_remote *
sizeof(*remote_vertex_buffer));
829 MPI_Alltoallv(all_local_vertices, send_count, send_displ, MPI_INT,
830 remote_vertex_buffer, recv_count, recv_displ, MPI_INT, comm);
832 int * send_vertex_owner = remote_vertex_buffer;
833 double * send_vertex_lon =
834 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lon));
835 double * send_vertex_lat =
836 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lat));
838 for (
int i = 0, l = 0; i < comm_size; ++i) {
839 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
840 int idx = remote_vertex_buffer[l] - local_start_vertex;
841 send_vertex_owner[l] = dist_vertex_owner[idx];
842 send_vertex_lon[l] = vertex_lon[idx];
843 send_vertex_lat[l] = vertex_lat[idx];
847 free(dist_vertex_owner);
851 MPI_Alltoallv(send_vertex_owner, recv_count, recv_displ, MPI_INT,
852 all_local_vertices_owner, send_count, send_displ, MPI_INT,
854 MPI_Alltoallv(send_vertex_lon, recv_count, recv_displ, MPI_DOUBLE,
855 all_vertex_lon, send_count, send_displ, MPI_DOUBLE, comm);
856 MPI_Alltoallv(send_vertex_lat, recv_count, recv_displ, MPI_DOUBLE,
857 all_vertex_lat, send_count, send_displ, MPI_DOUBLE, comm);
859 free(send_vertex_lat);
860 free(send_vertex_lon);
861 free(send_vertex_owner);
870 size_t vertex_count = nv * (num_local_cells + num_halo_cells);
871 int * permutation =
xmalloc(vertex_count *
sizeof(*permutation));
872 for (
size_t i = 0; i < vertex_count; ++i) permutation[i] = (
int)i;
876 for (
size_t i = 0, j = 0; i < vertex_count; ++i) {
877 while (all_local_vertices[j] != all_cell_to_vertex[i]) ++j;
878 all_cell_to_vertex[i] = (int)j;
885 *nbr_vertices = num_all_local_vertices;
886 *nbr_cells = num_local_cells + num_halo_cells;
888 *num_vertices_per_cell =
889 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**num_vertices_per_cell));
890 for (
size_t i = 0; i < num_local_cells + num_halo_cells; ++i)
891 (*num_vertices_per_cell)[i] = nv;
896 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**global_cell_ids));
897 for (
size_t i = 0; i < num_local_cells; ++i)
898 (*global_cell_ids)[i] = local_start_cell + i;
899 memcpy(*global_cell_ids + num_local_cells, halo_cells,
900 num_halo_cells *
sizeof(*halo_cells));
901 *global_vertex_ids =
xrealloc(all_local_vertices, num_all_local_vertices *
902 sizeof(*all_local_vertices));
905 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**cell_owner));
906 for (
size_t i = 0; i < num_local_cells; ++i)
907 (*cell_owner)[i] = -1;
908 for (
size_t i = 0; i < num_halo_cells; ++i)
909 (*cell_owner)[num_local_cells + i] =
910 ((
unsigned long)(halo_cells[i]) * (
unsigned long)comm_size +
911 (
unsigned long)comm_size - 1) / (
unsigned long)ncells;
914 for (
size_t i = 0; i < num_all_local_vertices; ++i)
915 if (all_local_vertices_owner[i] == comm_rank)
916 all_local_vertices_owner[i] = -1;
917 *vertex_owner = all_local_vertices_owner;
919 *x_vertices = all_vertex_lon;
920 *y_vertices = all_vertex_lat;
921 if (with_cell_coords) {
925 if (with_cell_mask) *cell_msk =
cell_mask;
959 const char * filename, MPI_Comm comm,
960 double ** x_vertices,
double ** y_vertices,
962 size_t *
num_cells,
size_t * num_vertices,
size_t * num_edges,
963 int ** num_vertices_per_cell,
int ** num_cells_per_vertex,
964 size_t **
cell_to_vertex,
size_t ** cell_to_edge,
size_t ** vertex_to_cell,
966 double ** x_cells,
double ** y_cells,
int ** cell_msk) {
969 ((x_cells == NULL) && (y_cells == NULL)) ||
970 ((x_cells != NULL) && (y_cells != NULL)),
971 "ERROR(yac_read_icon_grid_information_parallel_2): "
972 "arguments x_cells and y_cells have to be either both NULL or "
973 "both different from NULL");
975 const int with_cell_coords = x_cells != NULL;
976 const int with_cell_mask = cell_msk != NULL;
978 int comm_rank, comm_size;
980 MPI_Comm_rank(comm, &comm_rank);
981 MPI_Comm_size(comm, &comm_size);
983 int local_is_io, * io_ranks, num_io_ranks;
986 size_t ncells, nvertices, nedges, nv, ne;
988 size_t read_local_start_cell = 0;
989 size_t read_num_local_cells = 0;
990 size_t read_local_start_vertex = 0;
991 size_t read_num_local_vertices = 0;
992 size_t read_local_start_edge = 0;
993 size_t read_num_local_edges = 0;
994 double * read_cell_lon = NULL;
995 double * read_cell_lat = NULL;
996 int * read_cell_mask = NULL;
997 double * read_vertex_lon = NULL;
998 double * read_vertex_lat = NULL;
999 int * read_dist_vertex_of_cell = NULL;
1000 int * read_dist_edge_of_cell = NULL;
1001 int * read_dist_edge_vertices = NULL;
1005 unsigned long io_proc_idx = ULONG_MAX;
1006 for (
int i = 0; (i < num_io_ranks) && (io_proc_idx == ULONG_MAX); ++i)
1007 if (io_ranks[i] == comm_rank)
1008 io_proc_idx = (
unsigned long)i;
1028 read_local_start_cell =
1029 ((
unsigned long)ncells * io_proc_idx) / (
unsigned long)num_io_ranks;
1030 read_num_local_cells =
1031 ((
unsigned long)ncells * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
1032 (
unsigned long)read_local_start_cell;
1033 read_local_start_vertex =
1034 ((
unsigned long)nvertices * io_proc_idx) / (
unsigned long)num_io_ranks;
1035 read_num_local_vertices =
1036 ((
unsigned long)nvertices * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
1037 (
unsigned long)read_local_start_vertex;
1038 read_local_start_edge =
1039 ((
unsigned long)nedges * io_proc_idx) / (
unsigned long)num_io_ranks;
1040 read_num_local_edges =
1041 ((
unsigned long)nedges * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
1042 (
unsigned long)read_local_start_edge;
1045 read_vertex_lon =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lon));
1046 read_vertex_lat =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lat));
1049 xmalloc(read_num_local_cells *
sizeof(*read_cell_lon)):NULL;
1052 xmalloc(read_num_local_cells *
sizeof(*read_cell_lat)):NULL;
1055 xmalloc(read_num_local_cells *
sizeof(*read_cell_mask)):NULL;
1056 read_dist_vertex_of_cell =
1057 xmalloc(read_num_local_cells * nv *
sizeof(*read_dist_vertex_of_cell));
1058 read_dist_edge_of_cell =
1059 xmalloc(read_num_local_cells * nv *
sizeof(*read_dist_edge_of_cell));
1060 read_dist_edge_vertices =
1061 xmalloc(read_num_local_edges * 2 *
sizeof(*read_dist_edge_vertices));
1065 &read_num_local_vertices, read_vertex_lon));
1066 convert_to_rad(ncid, varid, read_vertex_lon, read_num_local_vertices);
1069 &read_num_local_vertices, read_vertex_lat));
1070 convert_to_rad(ncid, varid, read_vertex_lat, read_num_local_vertices);
1071 if (with_cell_coords) {
1075 ncid, varid, &read_local_start_cell,
1076 &read_num_local_cells, read_cell_lon));
1077 convert_to_rad(ncid, varid, read_cell_lon, read_num_local_cells);
1081 ncid, varid, &read_local_start_cell,
1082 &read_num_local_cells, read_cell_lat));
1083 convert_to_rad(ncid, varid, read_cell_lat, read_num_local_cells);
1085 if (read_cell_mask) {
1089 ncid, varid, &read_local_start_cell,
1090 &read_num_local_cells, read_cell_mask));
1095 MAX(nv*read_num_local_cells, 2*read_num_local_edges) *
1098 size_t tmp_start[2] = {0, read_local_start_cell};
1099 size_t tmp_count[2] = {nv, read_num_local_cells};
1102 for (
size_t i = 0; i < read_num_local_cells; ++i)
1103 for (
size_t j = 0; j < nv; ++j)
1104 read_dist_vertex_of_cell[i * nv + j] =
1105 buffer[i + j * read_num_local_cells];
1108 for (
size_t i = 0; i < read_num_local_cells; ++i)
1109 for (
size_t j = 0; j < nv; ++j)
1110 read_dist_edge_of_cell[i * nv + j] =
1111 buffer[i + j * read_num_local_cells];
1114 size_t tmp_start[2] = {0, read_local_start_edge};
1115 size_t tmp_count[2] = {2, read_num_local_edges};
1118 for (
size_t i = 0; i < read_num_local_edges; ++i)
1119 for (
int j = 0; j < 2; ++j)
1120 read_dist_edge_vertices[i * 2 + j] =
1121 buffer[i + j * read_num_local_edges];
1126 for (
size_t i = 0; i < read_num_local_cells * nv; ++i)
1127 if (read_dist_vertex_of_cell[i] > 0) read_dist_vertex_of_cell[i]--;
1128 for (
size_t i = 0; i < read_num_local_cells * nv; ++i)
1129 if (read_dist_edge_of_cell[i] > 0) read_dist_edge_of_cell[i]--;
1130 for (
size_t i = 0; i < read_num_local_edges * 2; ++i)
1131 if (read_dist_edge_vertices[i] > 0) read_dist_edge_vertices[i]--;
1136 read_cell_lon = with_cell_coords?
xmalloc(1 *
sizeof(*read_cell_lon)):NULL;
1137 read_cell_lat = with_cell_coords?
xmalloc(1 *
sizeof(*read_cell_lat)):NULL;
1138 read_cell_mask = with_cell_mask?
xmalloc(1 *
sizeof(*read_cell_mask)):NULL;
1139 read_vertex_lon =
xmalloc(1 *
sizeof(*read_vertex_lon));
1140 read_vertex_lat =
xmalloc(1 *
sizeof(*read_vertex_lat));
1141 read_dist_vertex_of_cell =
xmalloc(1 *
sizeof(*read_dist_vertex_of_cell));
1142 read_dist_edge_of_cell =
xmalloc(1 *
sizeof(*read_dist_edge_of_cell));
1143 read_dist_edge_vertices =
xmalloc(1 *
sizeof(*read_dist_edge_vertices));
1150 if (comm_rank == 0) tmp = (int)ncells;
1151 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1152 ncells = (size_t)tmp;
1153 if (comm_rank == 0) tmp = (int)nvertices;
1154 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1155 nvertices = (size_t)tmp;
1156 if (comm_rank == 0) tmp = (int)nedges;
1157 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1158 nedges = (size_t)tmp;
1159 if (comm_rank == 0) tmp = (int)nv;
1160 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1162 if (comm_rank == 0) tmp = (int)ne;
1163 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1168 size_t local_start_cell =
1169 ((
unsigned long)ncells * (
unsigned long)comm_rank) /
1170 (
unsigned long)comm_size;
1171 size_t num_local_cells =
1172 ((
unsigned long)ncells * ((
unsigned long)comm_rank+1)) /
1173 (
unsigned long)comm_size - (
unsigned long)local_start_cell;
1174 size_t local_start_vertex =
1175 ((
unsigned long)nvertices * (
unsigned long)comm_rank) /
1176 (
unsigned long)comm_size;
1177 size_t num_local_vertices =
1178 ((
unsigned long)nvertices * ((
unsigned long)comm_rank+1)) /
1179 (
unsigned long)comm_size - (
unsigned long)local_start_vertex;
1180 size_t local_start_edge =
1181 ((
unsigned long)nedges * (
unsigned long)comm_rank) /
1182 (
unsigned long)comm_size;
1183 size_t num_local_edges =
1184 ((
unsigned long)nedges * ((
unsigned long)comm_rank+1)) /
1185 (
unsigned long)comm_size - (
unsigned long)local_start_edge;
1188 if (with_cell_coords) {
1189 *x_cells =
xmalloc(num_local_cells *
sizeof(**x_cells));
1190 *y_cells =
xmalloc(num_local_cells *
sizeof(**y_cells));
1192 if (with_cell_mask) {
1193 *cell_msk =
xmalloc(num_local_cells *
sizeof(**cell_msk));
1195 int * dist_vertex_of_cell =
1196 xmalloc(num_local_cells * nv *
sizeof(*dist_vertex_of_cell));
1197 int * dist_edge_of_cell =
1198 xmalloc(num_local_cells * nv *
sizeof(*dist_edge_of_cell));
1200 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1201 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1203 for (
unsigned i = 0; i < read_num_local_cells; ++i)
1206 read_local_start_cell + i, ncells, comm_size)]++;
1208 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1210 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1211 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1212 int send_accum = 0, recv_accum = 0;
1213 for (
int i = 0; i < comm_size; ++i) {
1214 send_displ[i] = send_accum;
1215 recv_displ[i] = recv_accum;
1216 send_accum += send_count[i];
1217 recv_accum += recv_count[i];
1220 if (with_cell_coords) {
1221 MPI_Alltoallv(read_cell_lon, send_count, send_displ, MPI_DOUBLE,
1222 *x_cells, recv_count, recv_displ, MPI_DOUBLE, comm);
1223 MPI_Alltoallv(read_cell_lat, send_count, send_displ, MPI_DOUBLE,
1224 *y_cells, recv_count, recv_displ, MPI_DOUBLE, comm);
1226 if (with_cell_mask) {
1227 MPI_Alltoallv(read_cell_mask, send_count, send_displ, MPI_INT,
1228 *cell_msk, recv_count, recv_displ, MPI_INT, comm);
1231 for (
int i = 0; i < comm_size; ++i) {
1232 send_count[i] *= nv;
1233 send_displ[i] *= nv;
1234 recv_count[i] *= nv;
1235 recv_displ[i] *= nv;
1238 MPI_Alltoallv(read_dist_vertex_of_cell, send_count, send_displ, MPI_INT,
1239 dist_vertex_of_cell, recv_count, recv_displ, MPI_INT, comm);
1240 MPI_Alltoallv(read_dist_edge_of_cell, send_count, send_displ, MPI_INT,
1241 dist_edge_of_cell, recv_count, recv_displ, MPI_INT, comm);
1247 free(read_dist_vertex_of_cell);
1248 free(read_dist_edge_of_cell);
1249 free(read_cell_mask);
1250 free(read_cell_lat);
1251 free(read_cell_lon);
1255 double * vertex_lon =
xmalloc(num_local_vertices *
sizeof(*vertex_lon));
1256 double * vertex_lat =
xmalloc(num_local_vertices *
sizeof(*vertex_lat));
1258 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1259 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1261 for (
unsigned i = 0; i < read_num_local_vertices; ++i)
1264 read_local_start_vertex + i, nvertices, comm_size)]++;
1266 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1268 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1269 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1270 int send_accum = 0, recv_accum = 0;
1271 for (
int i = 0; i < comm_size; ++i) {
1272 send_displ[i] = send_accum;
1273 recv_displ[i] = recv_accum;
1274 send_accum += send_count[i];
1275 recv_accum += recv_count[i];
1278 MPI_Alltoallv(read_vertex_lon, send_count, send_displ, MPI_DOUBLE,
1279 vertex_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
1280 MPI_Alltoallv(read_vertex_lat, send_count, send_displ, MPI_DOUBLE,
1281 vertex_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
1287 free(read_vertex_lon);
1288 free(read_vertex_lat);
1292 int * dist_edge_vertices =
1293 xmalloc(num_local_edges * 2 *
sizeof(*dist_edge_vertices));
1295 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1296 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1298 for (
unsigned i = 0; i < read_num_local_edges; ++i)
1301 read_local_start_edge + i, nedges, comm_size)] += 2;
1303 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1305 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1306 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1307 int send_accum = 0, recv_accum = 0;
1308 for (
int i = 0; i < comm_size; ++i) {
1309 send_displ[i] = send_accum;
1310 recv_displ[i] = recv_accum;
1311 send_accum += send_count[i];
1312 recv_accum += recv_count[i];
1315 MPI_Alltoallv(read_dist_edge_vertices, send_count, send_displ, MPI_INT,
1316 dist_edge_vertices, recv_count, recv_displ, MPI_INT, comm);
1322 free(read_dist_edge_vertices);
1328 size_t num_core_vertices;
1330 size_t N = num_local_cells * nv;
1331 *vertex_ids =
xmalloc(
N *
sizeof(**vertex_ids));
1332 *num_cells_per_vertex =
xmalloc(
N *
sizeof(**num_cells_per_vertex));
1333 *vertex_to_cell =
xmalloc(
N *
sizeof(**vertex_to_cell));
1335 for (
size_t i = 0; i <
N; ++i)
1336 (*vertex_ids)[i] = (
yac_int)dist_vertex_of_cell[i];
1337 size_t * permutation = *vertex_to_cell;
1338 for (
size_t i = 0; i <
N; ++i) permutation[i] = i;
1341 yac_int prev_vertex_id = XT_INT_MAX;
1342 num_core_vertices = 0;
1343 for (
size_t i = 0; i <
N; ++i) {
1344 yac_int curr_vertex_id = (*vertex_ids)[i];
1345 if (prev_vertex_id == curr_vertex_id) {
1346 (*num_cells_per_vertex)[num_core_vertices-1]++;
1348 (*num_cells_per_vertex)[num_core_vertices] = 1;
1349 (*vertex_ids)[num_core_vertices] = (prev_vertex_id = curr_vertex_id);
1350 ++num_core_vertices;
1352 (*cell_to_vertex)[permutation[i]] = num_core_vertices-1;
1353 permutation[i] /= nv;
1356 xrealloc(*vertex_ids, num_core_vertices *
sizeof(**vertex_ids));
1357 *num_cells_per_vertex =
1359 num_core_vertices *
sizeof(**num_cells_per_vertex));
1360 free(dist_vertex_of_cell);
1365 size_t num_core_edges;
1367 size_t N = num_local_cells * nv;
1368 *edge_ids =
xmalloc(
N *
sizeof(**edge_ids));
1369 size_t * permutation =
xmalloc(
N *
sizeof(*permutation));
1370 *cell_to_edge =
xmalloc(
N *
sizeof(**cell_to_edge));
1371 for (
size_t i = 0; i <
N; ++i)
1372 (*edge_ids)[i] = (
yac_int)dist_edge_of_cell[i];
1373 for (
size_t i = 0; i <
N; ++i) permutation[i] = i;
1376 yac_int prev_edge_id = XT_INT_MAX;
1378 for (
size_t i = 0; i <
N; ++i) {
1379 yac_int curr_edge_id = (*edge_ids)[i];
1380 if (prev_edge_id != curr_edge_id)
1381 (*edge_ids)[num_core_edges++] = (prev_edge_id = curr_edge_id);
1382 (*cell_to_edge)[permutation[i]] = num_core_edges-1;
1383 permutation[i] /= nv;
1387 xrealloc(*edge_ids, num_core_edges *
sizeof(**edge_ids));
1388 free(dist_edge_of_cell);
1393 *x_vertices =
xmalloc(num_core_vertices *
sizeof(**x_vertices));
1394 *y_vertices =
xmalloc(num_core_vertices *
sizeof(**y_vertices));
1395 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1396 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1398 for (
size_t i = 0; i < num_core_vertices; ++i)
1399 send_count[((
unsigned long)((*vertex_ids)[i]) *
1400 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
1401 (
unsigned long)nvertices]++;
1403 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1405 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1406 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1407 int send_accum = 0, recv_accum = 0;
1408 for (
int i = 0; i < comm_size; ++i) {
1409 send_displ[i] = send_accum;
1410 recv_displ[i] = recv_accum;
1411 send_accum += send_count[i];
1412 recv_accum += recv_count[i];
1415 int num_all_local_vertices_remote = 0;
1416 for (
int i = 0; i < comm_size; ++i)
1417 num_all_local_vertices_remote += recv_count[i];
1419 yac_int * remote_vertex_buffer =
1420 xmalloc(num_all_local_vertices_remote *
sizeof(*remote_vertex_buffer));
1423 *vertex_ids, send_count, send_displ,
yac_int_dt,
1424 remote_vertex_buffer, recv_count, recv_displ,
yac_int_dt, comm);
1426 double * send_vertex_lon =
1427 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lon));
1428 double * send_vertex_lat =
1429 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lat));
1431 for (
int i = 0, l = 0; i < comm_size; ++i) {
1432 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
1433 size_t idx = (size_t)(remote_vertex_buffer[l]) - local_start_vertex;
1434 send_vertex_lon[l] = vertex_lon[idx];
1435 send_vertex_lat[l] = vertex_lat[idx];
1439 free(remote_vertex_buffer);
1443 MPI_Alltoallv(send_vertex_lon, recv_count, recv_displ, MPI_DOUBLE,
1444 *x_vertices, send_count, send_displ, MPI_DOUBLE, comm);
1445 MPI_Alltoallv(send_vertex_lat, recv_count, recv_displ, MPI_DOUBLE,
1446 *y_vertices, send_count, send_displ, MPI_DOUBLE, comm);
1448 free(send_vertex_lat);
1449 free(send_vertex_lon);
1458 xmalloc(2 * num_core_edges *
sizeof(**edge_to_vertex));
1460 int * local_edge_to_vertex =
1461 xmalloc(2 * num_core_edges *
sizeof(*local_edge_to_vertex));
1462 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1463 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1465 for (
size_t i = 0; i < num_core_edges; ++i)
1466 send_count[((
unsigned long)((*edge_ids)[i]) *
1467 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
1468 (
unsigned long)nedges]++;
1470 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1472 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1473 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1474 int send_accum = 0, recv_accum = 0;
1475 for (
int i = 0; i < comm_size; ++i) {
1476 send_displ[i] = send_accum;
1477 recv_displ[i] = recv_accum;
1478 send_accum += send_count[i];
1479 recv_accum += recv_count[i];
1482 int num_all_local_edges_remote = 0;
1483 for (
int i = 0; i < comm_size; ++i)
1484 num_all_local_edges_remote += recv_count[i];
1486 yac_int * remote_edge_buffer =
1487 xmalloc(num_all_local_edges_remote *
sizeof(*remote_edge_buffer));
1490 *edge_ids, send_count, send_displ,
yac_int_dt,
1491 remote_edge_buffer, recv_count, recv_displ,
yac_int_dt, comm);
1493 int * send_edge_vertices =
1494 xmalloc(2 * num_all_local_edges_remote *
sizeof(*send_edge_vertices));
1496 for (
int i = 0, l = 0; i < comm_size; ++i) {
1497 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
1498 size_t idx = (size_t)(remote_edge_buffer[l]) - local_start_edge;
1499 send_edge_vertices[2*l+0] = dist_edge_vertices[2*idx+0];
1500 send_edge_vertices[2*l+1] = dist_edge_vertices[2*idx+1];
1509 free(remote_edge_buffer);
1510 free(dist_edge_vertices);
1512 MPI_Alltoallv(send_edge_vertices, recv_count, recv_displ, MPI_INT,
1513 local_edge_to_vertex, send_count, send_displ, MPI_INT, comm);
1515 size_t * permutation =
xmalloc(2 * num_core_edges *
sizeof(*permutation));
1516 for (
size_t i = 0; i < 2 * num_core_edges; ++i) permutation[i] = i;
1519 local_edge_to_vertex, 2 * num_core_edges, permutation);
1521 for (
size_t i = 0, j = 0; i < 2 * num_core_edges; ++i) {
1523 while ((j < nvertices) && ((*vertex_ids)[j] < curr_vertex)) ++j;
1525 (j < nvertices) && ((*vertex_ids)[j] == curr_vertex),
1526 "ERROR(yac_read_icon_basic_grid_data_parallel): vertex id missmatch")
1527 (*edge_to_vertex)[permutation[i]] = j;
1531 free(send_edge_vertices);
1536 free(local_edge_to_vertex);
1540 *cell_ids =
xmalloc(num_local_cells *
sizeof(**cell_ids));
1541 for (
size_t i = 0; i < num_local_cells; ++i)
1542 (*cell_ids)[i] = (
yac_int)(local_start_cell + i);
1545 *num_vertices_per_cell =
1546 xmalloc(num_local_cells *
sizeof(**num_vertices_per_cell));
1547 for (
size_t i = 0; i < num_local_cells; ++i) (*num_vertices_per_cell)[i] = nv;
1550 *edge_type =
xmalloc(num_core_edges *
sizeof(**edge_type));
1551 for (
size_t i = 0; i < num_core_edges; ++i)
1555 *num_vertices = num_core_vertices;
1556 *num_edges = num_core_edges;