252 const char * filename, MPI_Comm comm,
int * nbr_vertices,
int * nbr_cells,
253 int ** num_vertices_per_cell,
int **
cell_to_vertex,
int ** global_cell_ids,
254 int ** cell_owner,
int ** global_vertex_ids,
int ** vertex_owner,
255 double ** x_vertices,
double ** y_vertices,
256 double ** x_cells,
double ** y_cells,
int ** cell_msk) {
259 ((x_cells == NULL) && (y_cells == NULL)) ||
260 ((x_cells != NULL) && (y_cells != NULL)),
261 "ERROR(yac_read_icon_grid_information_parallel): "
262 "arguments x_cells and y_cells have to be either both NULL or "
263 "both different from NULL");
265 const int with_cell_coords = x_cells != NULL;
266 const int with_cell_mask = cell_msk != NULL;
268 int comm_rank, comm_size;
270 MPI_Comm_rank(comm, &comm_rank);
271 MPI_Comm_size(comm, &comm_size);
273 int local_is_io, * io_ranks, num_io_ranks;
276 size_t ncells, nvertices, nv, ne;
278 size_t read_local_start_cell = 0;
279 size_t read_num_local_cells = 0;
280 size_t read_local_start_vertex = 0;
281 size_t read_num_local_vertices = 0;
282 double * read_cell_lon = NULL;
283 double * read_cell_lat = NULL;
284 int * read_cell_mask = NULL;
285 double * read_vertex_lon = NULL;
286 double * read_vertex_lat = NULL;
287 int * read_dist_vertex_of_cell = NULL;
288 int * read_dist_cells_of_vertex = NULL;
292 unsigned long io_proc_idx = ULONG_MAX;
293 for (
int i = 0; (i < num_io_ranks) && (io_proc_idx == ULONG_MAX); ++i)
294 if (io_ranks[i] == comm_rank)
295 io_proc_idx = (
unsigned long)i;
313 read_local_start_cell =
314 ((
unsigned long)ncells * io_proc_idx) / (
unsigned long)num_io_ranks;
315 read_num_local_cells =
316 ((
unsigned long)ncells * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
317 (
unsigned long)read_local_start_cell;
318 read_local_start_vertex =
319 ((
unsigned long)nvertices * io_proc_idx) / (
unsigned long)num_io_ranks;
320 read_num_local_vertices =
321 ((
unsigned long)nvertices * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
322 (
unsigned long)read_local_start_vertex;
327 xmalloc(read_num_local_cells *
sizeof(*read_cell_lon)):NULL;
330 xmalloc(read_num_local_cells *
sizeof(*read_cell_lat)):NULL;
333 xmalloc(read_num_local_cells *
sizeof(*read_cell_mask)):NULL;
334 read_vertex_lon =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lon));
335 read_vertex_lat =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lat));
336 read_dist_vertex_of_cell =
337 xmalloc(read_num_local_cells * nv *
sizeof(*read_dist_vertex_of_cell));
338 read_dist_cells_of_vertex =
339 xmalloc(read_num_local_vertices * ne *
sizeof(*read_dist_cells_of_vertex));
341 if (with_cell_coords) {
344 &read_num_local_cells, read_cell_lon));
348 &read_num_local_cells, read_cell_lat));
351 if (with_cell_mask) {
354 &read_num_local_cells, read_cell_mask));
358 &read_num_local_vertices, read_vertex_lon));
359 convert_to_rad(ncid, varid, read_vertex_lon, read_num_local_vertices);
362 &read_num_local_vertices, read_vertex_lat));
363 convert_to_rad(ncid, varid, read_vertex_lat, read_num_local_vertices);
366 ne*read_num_local_vertices) *
sizeof(*
buffer));
368 size_t tmp_start[2] = {0, read_local_start_cell};
369 size_t tmp_count[2] = {nv, read_num_local_cells};
372 for (
size_t i = 0; i < read_num_local_cells; ++i)
373 for (
size_t j = 0; j < nv; ++j)
374 read_dist_vertex_of_cell[i * nv + j] =
375 buffer[i + j * read_num_local_cells];
378 size_t tmp_start[2] = {0, read_local_start_vertex};
379 size_t tmp_count[2] = {ne, read_num_local_vertices};
382 for (
size_t i = 0; i < read_num_local_vertices; ++i)
383 for (
size_t j = 0; j < ne; ++j)
384 read_dist_cells_of_vertex[i * ne + j] =
385 buffer[i + j * read_num_local_vertices];
390 for (
size_t i = 0; i < read_num_local_cells * nv; ++i)
391 if (read_dist_vertex_of_cell[i] > 0) read_dist_vertex_of_cell[i]--;
392 for (
size_t i = 0; i < read_num_local_vertices * ne; ++i)
393 read_dist_cells_of_vertex[i]--;
399 with_cell_coords?
xmalloc(1 *
sizeof(*read_cell_lon)):NULL;
401 with_cell_coords?
xmalloc(1 *
sizeof(*read_cell_lat)):NULL;
403 with_cell_mask?
xmalloc(1 *
sizeof(*read_cell_mask)):NULL;
404 read_vertex_lon =
xmalloc(1 *
sizeof(*read_vertex_lon));
405 read_vertex_lat =
xmalloc(1 *
sizeof(*read_vertex_lat));
406 read_dist_vertex_of_cell =
xmalloc(1 *
sizeof(*read_dist_vertex_of_cell));
407 read_dist_cells_of_vertex =
xmalloc(1 *
sizeof(*read_dist_cells_of_vertex));
414 if (comm_rank == 0) tmp = (int)ncells;
415 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
416 ncells = (size_t)tmp;
417 if (comm_rank == 0) tmp = (int)nvertices;
418 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
419 nvertices = (size_t)tmp;
420 if (comm_rank == 0) tmp = (int)nv;
421 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
423 if (comm_rank == 0) tmp = (int)ne;
424 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
429 size_t local_start_cell =
430 ((
unsigned long)ncells * (
unsigned long)comm_rank) /
431 (
unsigned long)comm_size;
432 size_t num_local_cells =
433 ((
unsigned long)ncells * ((
unsigned long)comm_rank+1)) /
434 (
unsigned long)comm_size - (
unsigned long)local_start_cell;
435 size_t local_start_vertex =
436 ((
unsigned long)nvertices * (
unsigned long)comm_rank) /
437 (
unsigned long)comm_size;
438 size_t num_local_vertices =
439 ((
unsigned long)nvertices * ((
unsigned long)comm_rank+1)) /
440 (
unsigned long)comm_size - (
unsigned long)local_start_vertex;
449 int * dist_vertex_of_cell =
450 xmalloc(num_local_cells * nv *
sizeof(*dist_vertex_of_cell));
452 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
453 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
455 for (
size_t i = 0; i < read_num_local_cells; ++i)
458 read_local_start_cell + i, ncells, comm_size)]++;
460 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
462 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
463 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
464 int send_accum = 0, recv_accum = 0;
465 for (
int i = 0; i < comm_size; ++i) {
466 send_displ[i] = send_accum;
467 recv_displ[i] = recv_accum;
468 send_accum += send_count[i];
469 recv_accum += recv_count[i];
472 if (with_cell_coords) {
473 MPI_Alltoallv(read_cell_lon, send_count, send_displ, MPI_DOUBLE,
474 cell_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
475 MPI_Alltoallv(read_cell_lat, send_count, send_displ, MPI_DOUBLE,
476 cell_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
478 if (with_cell_mask) {
479 MPI_Alltoallv(read_cell_mask, send_count, send_displ, MPI_INT,
480 cell_mask, recv_count, recv_displ, MPI_INT, comm);
483 for (
int i = 0; i < comm_size; ++i) {
490 MPI_Alltoallv(read_dist_vertex_of_cell, send_count, send_displ, MPI_INT,
491 dist_vertex_of_cell, recv_count, recv_displ, MPI_INT, comm);
499 free(read_cell_mask);
500 free(read_dist_vertex_of_cell);
504 double * vertex_lon =
xmalloc(num_local_vertices *
sizeof(*vertex_lon));
505 double * vertex_lat =
xmalloc(num_local_vertices *
sizeof(*vertex_lat));
506 int * dist_cells_of_vertex =
507 xmalloc(num_local_vertices * ne *
sizeof(*dist_cells_of_vertex));
509 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
510 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
512 for (
size_t i = 0; i < read_num_local_vertices; ++i)
515 read_local_start_vertex + i, nvertices, comm_size)]++;
517 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
519 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
520 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
521 int send_accum = 0, recv_accum = 0;
522 for (
int i = 0; i < comm_size; ++i) {
523 send_displ[i] = send_accum;
524 recv_displ[i] = recv_accum;
525 send_accum += send_count[i];
526 recv_accum += recv_count[i];
529 MPI_Alltoallv(read_vertex_lon, send_count, send_displ, MPI_DOUBLE,
530 vertex_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
531 MPI_Alltoallv(read_vertex_lat, send_count, send_displ, MPI_DOUBLE,
532 vertex_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
534 for (
int i = 0; i < comm_size; ++i) {
541 MPI_Alltoallv(read_dist_cells_of_vertex, send_count, send_displ, MPI_INT,
542 dist_cells_of_vertex, recv_count, recv_displ, MPI_INT, comm);
548 free(read_vertex_lon);
549 free(read_vertex_lat);
550 free(read_dist_cells_of_vertex);
554 size_t num_core_vertices = num_local_cells * nv;
555 int * core_vertices =
xmalloc(num_core_vertices *
sizeof(*core_vertices));
557 memcpy(core_vertices, dist_vertex_of_cell,
558 num_core_vertices *
sizeof(*core_vertices));
562 xrealloc(core_vertices, num_core_vertices *
sizeof(*core_vertices));
566 int * cells_of_vertex_core =
567 xmalloc(num_core_vertices * ne *
sizeof(*cells_of_vertex_core));
568 int * dist_vertex_owner =
569 xmalloc(num_local_vertices *
sizeof(*dist_vertex_owner));
571 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
572 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
574 for (
size_t i = 0; i < num_core_vertices; ++i)
575 send_count[((
unsigned long)(core_vertices[i]) *
576 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
577 (
unsigned long)nvertices]++;
579 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
581 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
582 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
583 int send_accum = 0, recv_accum = 0;
584 for (
int i = 0; i < comm_size; ++i) {
585 send_displ[i] = send_accum;
586 recv_displ[i] = recv_accum;
587 send_accum += send_count[i];
588 recv_accum += recv_count[i];
591 int num_core_vertices_remote = 0;
592 for (
int i = 0; i < comm_size; ++i)
593 num_core_vertices_remote += recv_count[i];
595 int * remote_vertex_buffer =
596 xmalloc(num_core_vertices_remote *
sizeof(*remote_vertex_buffer));
598 MPI_Alltoallv(core_vertices, send_count, send_displ, MPI_INT,
599 remote_vertex_buffer, recv_count, recv_displ, MPI_INT, comm);
601 for (
size_t i = 0; i < num_local_vertices; ++i) dist_vertex_owner[i] = -1;
603 for (
int i = 0, j = 0; i < comm_size; ++i)
604 for (
int k = 0; k < recv_count[i]; ++k, ++j)
605 dist_vertex_owner[remote_vertex_buffer[j] - local_start_vertex] = i;
607 int * send_cell_of_vertex =
608 xmalloc(num_core_vertices_remote * ne *
sizeof(*send_cell_of_vertex));
610 for (
int i = 0, l = 0, m = 0; i < comm_size; ++i) {
611 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
612 int idx = remote_vertex_buffer[l] - local_start_vertex;
613 for (
size_t k = 0; k < ne; ++k, ++m)
614 send_cell_of_vertex[m] = dist_cells_of_vertex[idx * ne + k];
617 free(dist_cells_of_vertex);
619 for (
int i = 0; i < comm_size; ++i) {
626 MPI_Alltoallv(send_cell_of_vertex, recv_count, recv_displ, MPI_INT,
627 cells_of_vertex_core, send_count, send_displ, MPI_INT, comm);
629 free(send_cell_of_vertex);
630 free(remote_vertex_buffer);
639 size_t num_halo_cells = 0;
641 int * tmp_cells = cells_of_vertex_core;
642 size_t num_tmp_cells = num_core_vertices * ne;
647 if ((num_tmp_cells > 0) && (tmp_cells[0] == -1)) {
652 halo_cells =
xmalloc((num_tmp_cells - num_local_cells) *
sizeof(*halo_cells));
655 for (; i < num_tmp_cells && tmp_cells[i] < (int)local_start_cell; ++i)
656 halo_cells[num_halo_cells++] = tmp_cells[i];
657 i += num_local_cells;
658 for (; i < num_tmp_cells; ++i) halo_cells[num_halo_cells++] = tmp_cells[i];
660 assert(num_halo_cells == num_tmp_cells - num_local_cells);
662 free(cells_of_vertex_core);
666 size_t num_all_local_vertices = num_halo_cells * nv + num_core_vertices;
667 int * all_cell_to_vertex;
668 int * all_local_vertices =
xrealloc(core_vertices, num_all_local_vertices *
669 sizeof(*all_local_vertices));
670 if (with_cell_coords) {
676 if (with_cell_mask) {
681 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
682 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
684 for (
size_t i = 0; i < num_halo_cells; ++i)
685 send_count[((
unsigned long)(halo_cells[i]) *
686 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
687 (
unsigned long)ncells]++;
689 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
691 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
692 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
693 int send_accum = 0, recv_accum = 0;
694 for (
int i = 0; i < comm_size; ++i) {
695 send_displ[i] = send_accum;
696 recv_displ[i] = recv_accum;
697 send_accum += send_count[i];
698 recv_accum += recv_count[i];
701 int num_halo_cells_remote = 0;
702 for (
int i = 0; i < comm_size; ++i)
703 num_halo_cells_remote += recv_count[i];
705 int * remote_halo_cell_buffer =
706 xmalloc(num_halo_cells_remote *
sizeof(*remote_halo_cell_buffer));
708 MPI_Alltoallv(halo_cells, send_count, send_displ, MPI_INT,
709 remote_halo_cell_buffer, recv_count, recv_displ, MPI_INT,
712 int * send_halo_cell_vertices =
713 xmalloc(num_halo_cells_remote * nv *
sizeof(*send_halo_cell_vertices));
714 double * send_cell_lon =
716 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_lon)):NULL;
717 double * send_cell_lat =
719 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_lat)):NULL;
720 int * send_cell_mask =
722 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_mask)):NULL;
724 for (
int i = 0, l = 0, m = 0; i < comm_size; ++i) {
725 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
726 int idx = remote_halo_cell_buffer[l] - local_start_cell;
727 for (
size_t k = 0; k < nv; ++k, ++m)
728 send_halo_cell_vertices[m] = dist_vertex_of_cell[idx * nv + k];
731 if (with_cell_coords) {
732 for (
int i = 0, l = 0; i < comm_size; ++i) {
733 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
734 int idx = remote_halo_cell_buffer[l] - local_start_cell;
740 if (with_cell_mask) {
741 for (
int i = 0, l = 0; i < comm_size; ++i) {
742 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
743 int idx = remote_halo_cell_buffer[l] - local_start_cell;
749 if (with_cell_coords) {
750 MPI_Alltoallv(send_cell_lon, recv_count, recv_displ, MPI_DOUBLE,
751 cell_lon + num_local_cells, send_count, send_displ,
753 MPI_Alltoallv(send_cell_lat, recv_count, recv_displ, MPI_DOUBLE,
754 cell_lat + num_local_cells, send_count, send_displ,
757 if (with_cell_mask) {
758 MPI_Alltoallv(send_cell_mask, recv_count, recv_displ, MPI_INT,
759 cell_mask + num_local_cells, send_count, send_displ,
763 for (
int i = 0; i < comm_size; ++i) {
771 xrealloc(dist_vertex_of_cell, (num_local_cells + num_halo_cells) * nv *
772 sizeof(*all_cell_to_vertex));
774 MPI_Alltoallv(send_halo_cell_vertices, recv_count, recv_displ, MPI_INT,
775 all_cell_to_vertex + num_local_cells * nv, send_count,
776 send_displ, MPI_INT, comm);
778 memcpy(all_local_vertices + num_core_vertices,
779 all_cell_to_vertex + num_local_cells * nv,
780 num_halo_cells * nv *
sizeof(*all_local_vertices));
782 free(send_cell_mask);
785 free(send_halo_cell_vertices);
786 free(remote_halo_cell_buffer);
793 all_local_vertices, num_all_local_vertices, NULL);
798 double * all_vertex_lon =
799 xmalloc(num_all_local_vertices *
sizeof(*all_vertex_lon));
800 double * all_vertex_lat =
801 xmalloc(num_all_local_vertices *
sizeof(*all_vertex_lat));
802 int * all_local_vertices_owner =
803 xmalloc(num_all_local_vertices *
sizeof(*all_local_vertices_owner));
805 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
806 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
808 for (
size_t i = 0; i < num_all_local_vertices; ++i)
809 send_count[((
unsigned long)(all_local_vertices[i]) *
810 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
811 (
unsigned long)nvertices]++;
813 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
815 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
816 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
817 int send_accum = 0, recv_accum = 0;
818 for (
int i = 0; i < comm_size; ++i) {
819 send_displ[i] = send_accum;
820 recv_displ[i] = recv_accum;
821 send_accum += send_count[i];
822 recv_accum += recv_count[i];
825 int num_all_local_vertices_remote = 0;
826 for (
int i = 0; i < comm_size; ++i)
827 num_all_local_vertices_remote += recv_count[i];
829 int * remote_vertex_buffer =
830 xmalloc(num_all_local_vertices_remote *
sizeof(*remote_vertex_buffer));
832 MPI_Alltoallv(all_local_vertices, send_count, send_displ, MPI_INT,
833 remote_vertex_buffer, recv_count, recv_displ, MPI_INT, comm);
835 int * send_vertex_owner = remote_vertex_buffer;
836 double * send_vertex_lon =
837 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lon));
838 double * send_vertex_lat =
839 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lat));
841 for (
int i = 0, l = 0; i < comm_size; ++i) {
842 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
843 int idx = remote_vertex_buffer[l] - local_start_vertex;
844 send_vertex_owner[l] = dist_vertex_owner[idx];
845 send_vertex_lon[l] = vertex_lon[idx];
846 send_vertex_lat[l] = vertex_lat[idx];
850 free(dist_vertex_owner);
854 MPI_Alltoallv(send_vertex_owner, recv_count, recv_displ, MPI_INT,
855 all_local_vertices_owner, send_count, send_displ, MPI_INT,
857 MPI_Alltoallv(send_vertex_lon, recv_count, recv_displ, MPI_DOUBLE,
858 all_vertex_lon, send_count, send_displ, MPI_DOUBLE, comm);
859 MPI_Alltoallv(send_vertex_lat, recv_count, recv_displ, MPI_DOUBLE,
860 all_vertex_lat, send_count, send_displ, MPI_DOUBLE, comm);
862 free(send_vertex_lat);
863 free(send_vertex_lon);
864 free(send_vertex_owner);
873 size_t vertex_count = nv * (num_local_cells + num_halo_cells);
874 int * permutation =
xmalloc(vertex_count *
sizeof(*permutation));
875 for (
size_t i = 0; i < vertex_count; ++i) permutation[i] = (
int)i;
879 for (
size_t i = 0, j = 0; i < vertex_count; ++i) {
880 while (all_local_vertices[j] != all_cell_to_vertex[i]) ++j;
881 all_cell_to_vertex[i] = (int)j;
888 *nbr_vertices = num_all_local_vertices;
889 *nbr_cells = num_local_cells + num_halo_cells;
891 *num_vertices_per_cell =
892 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**num_vertices_per_cell));
893 for (
size_t i = 0; i < num_local_cells + num_halo_cells; ++i)
894 (*num_vertices_per_cell)[i] = nv;
899 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**global_cell_ids));
900 for (
size_t i = 0; i < num_local_cells; ++i)
901 (*global_cell_ids)[i] = local_start_cell + i;
902 memcpy(*global_cell_ids + num_local_cells, halo_cells,
903 num_halo_cells *
sizeof(*halo_cells));
904 *global_vertex_ids =
xrealloc(all_local_vertices, num_all_local_vertices *
905 sizeof(*all_local_vertices));
908 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**cell_owner));
909 for (
size_t i = 0; i < num_local_cells; ++i)
910 (*cell_owner)[i] = -1;
911 for (
size_t i = 0; i < num_halo_cells; ++i)
912 (*cell_owner)[num_local_cells + i] =
913 ((
unsigned long)(halo_cells[i]) * (
unsigned long)comm_size +
914 (
unsigned long)comm_size - 1) / (
unsigned long)ncells;
917 for (
size_t i = 0; i < num_all_local_vertices; ++i)
918 if (all_local_vertices_owner[i] == comm_rank)
919 all_local_vertices_owner[i] = -1;
920 *vertex_owner = all_local_vertices_owner;
922 *x_vertices = all_vertex_lon;
923 *y_vertices = all_vertex_lat;
924 if (with_cell_coords) {
928 if (with_cell_mask) *cell_msk =
cell_mask;
962 const char * filename, MPI_Comm comm,
963 double ** x_vertices,
double ** y_vertices,
965 size_t *
num_cells,
size_t * num_vertices,
size_t * num_edges,
966 int ** num_vertices_per_cell,
int ** num_cells_per_vertex,
967 size_t **
cell_to_vertex,
size_t ** cell_to_edge,
size_t ** vertex_to_cell,
969 double ** x_cells,
double ** y_cells,
int ** cell_msk) {
972 ((x_cells == NULL) && (y_cells == NULL)) ||
973 ((x_cells != NULL) && (y_cells != NULL)),
974 "ERROR(yac_read_icon_grid_information_parallel_2): "
975 "arguments x_cells and y_cells have to be either both NULL or "
976 "both different from NULL");
978 const int with_cell_coords = x_cells != NULL;
979 const int with_cell_mask = cell_msk != NULL;
981 int comm_rank, comm_size;
983 MPI_Comm_rank(comm, &comm_rank);
984 MPI_Comm_size(comm, &comm_size);
986 int local_is_io, * io_ranks, num_io_ranks;
989 size_t ncells, nvertices, nedges, nv, ne;
991 size_t read_local_start_cell = 0;
992 size_t read_num_local_cells = 0;
993 size_t read_local_start_vertex = 0;
994 size_t read_num_local_vertices = 0;
995 size_t read_local_start_edge = 0;
996 size_t read_num_local_edges = 0;
997 double * read_cell_lon = NULL;
998 double * read_cell_lat = NULL;
999 int * read_cell_mask = NULL;
1000 double * read_vertex_lon = NULL;
1001 double * read_vertex_lat = NULL;
1002 int * read_dist_vertex_of_cell = NULL;
1003 int * read_dist_edge_of_cell = NULL;
1004 int * read_dist_edge_vertices = NULL;
1008 unsigned long io_proc_idx = ULONG_MAX;
1009 for (
int i = 0; (i < num_io_ranks) && (io_proc_idx == ULONG_MAX); ++i)
1010 if (io_ranks[i] == comm_rank)
1011 io_proc_idx = (
unsigned long)i;
1031 read_local_start_cell =
1032 ((
unsigned long)ncells * io_proc_idx) / (
unsigned long)num_io_ranks;
1033 read_num_local_cells =
1034 ((
unsigned long)ncells * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
1035 (
unsigned long)read_local_start_cell;
1036 read_local_start_vertex =
1037 ((
unsigned long)nvertices * io_proc_idx) / (
unsigned long)num_io_ranks;
1038 read_num_local_vertices =
1039 ((
unsigned long)nvertices * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
1040 (
unsigned long)read_local_start_vertex;
1041 read_local_start_edge =
1042 ((
unsigned long)nedges * io_proc_idx) / (
unsigned long)num_io_ranks;
1043 read_num_local_edges =
1044 ((
unsigned long)nedges * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
1045 (
unsigned long)read_local_start_edge;
1048 read_vertex_lon =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lon));
1049 read_vertex_lat =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lat));
1052 xmalloc(read_num_local_cells *
sizeof(*read_cell_lon)):NULL;
1055 xmalloc(read_num_local_cells *
sizeof(*read_cell_lat)):NULL;
1058 xmalloc(read_num_local_cells *
sizeof(*read_cell_mask)):NULL;
1059 read_dist_vertex_of_cell =
1060 xmalloc(read_num_local_cells * nv *
sizeof(*read_dist_vertex_of_cell));
1061 read_dist_edge_of_cell =
1062 xmalloc(read_num_local_cells * nv *
sizeof(*read_dist_edge_of_cell));
1063 read_dist_edge_vertices =
1064 xmalloc(read_num_local_edges * 2 *
sizeof(*read_dist_edge_vertices));
1068 &read_num_local_vertices, read_vertex_lon));
1069 convert_to_rad(ncid, varid, read_vertex_lon, read_num_local_vertices);
1072 &read_num_local_vertices, read_vertex_lat));
1073 convert_to_rad(ncid, varid, read_vertex_lat, read_num_local_vertices);
1074 if (with_cell_coords) {
1078 ncid, varid, &read_local_start_cell,
1079 &read_num_local_cells, read_cell_lon));
1080 convert_to_rad(ncid, varid, read_cell_lon, read_num_local_cells);
1084 ncid, varid, &read_local_start_cell,
1085 &read_num_local_cells, read_cell_lat));
1086 convert_to_rad(ncid, varid, read_cell_lat, read_num_local_cells);
1088 if (read_cell_mask) {
1092 ncid, varid, &read_local_start_cell,
1093 &read_num_local_cells, read_cell_mask));
1098 MAX(nv*read_num_local_cells, 2*read_num_local_edges) *
1101 size_t tmp_start[2] = {0, read_local_start_cell};
1102 size_t tmp_count[2] = {nv, read_num_local_cells};
1105 for (
size_t i = 0; i < read_num_local_cells; ++i)
1106 for (
size_t j = 0; j < nv; ++j)
1107 read_dist_vertex_of_cell[i * nv + j] =
1108 buffer[i + j * read_num_local_cells];
1111 for (
size_t i = 0; i < read_num_local_cells; ++i)
1112 for (
size_t j = 0; j < nv; ++j)
1113 read_dist_edge_of_cell[i * nv + j] =
1114 buffer[i + j * read_num_local_cells];
1117 size_t tmp_start[2] = {0, read_local_start_edge};
1118 size_t tmp_count[2] = {2, read_num_local_edges};
1121 for (
size_t i = 0; i < read_num_local_edges; ++i)
1122 for (
int j = 0; j < 2; ++j)
1123 read_dist_edge_vertices[i * 2 + j] =
1124 buffer[i + j * read_num_local_edges];
1129 for (
size_t i = 0; i < read_num_local_cells * nv; ++i)
1130 if (read_dist_vertex_of_cell[i] > 0) read_dist_vertex_of_cell[i]--;
1131 for (
size_t i = 0; i < read_num_local_cells * nv; ++i)
1132 if (read_dist_edge_of_cell[i] > 0) read_dist_edge_of_cell[i]--;
1133 for (
size_t i = 0; i < read_num_local_edges * 2; ++i)
1134 if (read_dist_edge_vertices[i] > 0) read_dist_edge_vertices[i]--;
1139 read_cell_lon = with_cell_coords?
xmalloc(1 *
sizeof(*read_cell_lon)):NULL;
1140 read_cell_lat = with_cell_coords?
xmalloc(1 *
sizeof(*read_cell_lat)):NULL;
1141 read_cell_mask = with_cell_mask?
xmalloc(1 *
sizeof(*read_cell_mask)):NULL;
1142 read_vertex_lon =
xmalloc(1 *
sizeof(*read_vertex_lon));
1143 read_vertex_lat =
xmalloc(1 *
sizeof(*read_vertex_lat));
1144 read_dist_vertex_of_cell =
xmalloc(1 *
sizeof(*read_dist_vertex_of_cell));
1145 read_dist_edge_of_cell =
xmalloc(1 *
sizeof(*read_dist_edge_of_cell));
1146 read_dist_edge_vertices =
xmalloc(1 *
sizeof(*read_dist_edge_vertices));
1153 if (comm_rank == 0) tmp = (int)ncells;
1154 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1155 ncells = (size_t)tmp;
1156 if (comm_rank == 0) tmp = (int)nvertices;
1157 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1158 nvertices = (size_t)tmp;
1159 if (comm_rank == 0) tmp = (int)nedges;
1160 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1161 nedges = (size_t)tmp;
1162 if (comm_rank == 0) tmp = (int)nv;
1163 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1165 if (comm_rank == 0) tmp = (int)ne;
1166 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1171 size_t local_start_cell =
1172 ((
unsigned long)ncells * (
unsigned long)comm_rank) /
1173 (
unsigned long)comm_size;
1174 size_t num_local_cells =
1175 ((
unsigned long)ncells * ((
unsigned long)comm_rank+1)) /
1176 (
unsigned long)comm_size - (
unsigned long)local_start_cell;
1177 size_t local_start_vertex =
1178 ((
unsigned long)nvertices * (
unsigned long)comm_rank) /
1179 (
unsigned long)comm_size;
1180 size_t num_local_vertices =
1181 ((
unsigned long)nvertices * ((
unsigned long)comm_rank+1)) /
1182 (
unsigned long)comm_size - (
unsigned long)local_start_vertex;
1183 size_t local_start_edge =
1184 ((
unsigned long)nedges * (
unsigned long)comm_rank) /
1185 (
unsigned long)comm_size;
1186 size_t num_local_edges =
1187 ((
unsigned long)nedges * ((
unsigned long)comm_rank+1)) /
1188 (
unsigned long)comm_size - (
unsigned long)local_start_edge;
1191 if (with_cell_coords) {
1192 *x_cells =
xmalloc(num_local_cells *
sizeof(**x_cells));
1193 *y_cells =
xmalloc(num_local_cells *
sizeof(**y_cells));
1195 if (with_cell_mask) {
1196 *cell_msk =
xmalloc(num_local_cells *
sizeof(**cell_msk));
1198 int * dist_vertex_of_cell =
1199 xmalloc(num_local_cells * nv *
sizeof(*dist_vertex_of_cell));
1200 int * dist_edge_of_cell =
1201 xmalloc(num_local_cells * nv *
sizeof(*dist_edge_of_cell));
1203 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1204 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1206 for (
unsigned i = 0; i < read_num_local_cells; ++i)
1209 read_local_start_cell + i, ncells, comm_size)]++;
1211 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1213 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1214 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1215 int send_accum = 0, recv_accum = 0;
1216 for (
int i = 0; i < comm_size; ++i) {
1217 send_displ[i] = send_accum;
1218 recv_displ[i] = recv_accum;
1219 send_accum += send_count[i];
1220 recv_accum += recv_count[i];
1223 if (with_cell_coords) {
1224 MPI_Alltoallv(read_cell_lon, send_count, send_displ, MPI_DOUBLE,
1225 *x_cells, recv_count, recv_displ, MPI_DOUBLE, comm);
1226 MPI_Alltoallv(read_cell_lat, send_count, send_displ, MPI_DOUBLE,
1227 *y_cells, recv_count, recv_displ, MPI_DOUBLE, comm);
1229 if (with_cell_mask) {
1230 MPI_Alltoallv(read_cell_mask, send_count, send_displ, MPI_INT,
1231 *cell_msk, recv_count, recv_displ, MPI_INT, comm);
1234 for (
int i = 0; i < comm_size; ++i) {
1235 send_count[i] *= nv;
1236 send_displ[i] *= nv;
1237 recv_count[i] *= nv;
1238 recv_displ[i] *= nv;
1241 MPI_Alltoallv(read_dist_vertex_of_cell, send_count, send_displ, MPI_INT,
1242 dist_vertex_of_cell, recv_count, recv_displ, MPI_INT, comm);
1243 MPI_Alltoallv(read_dist_edge_of_cell, send_count, send_displ, MPI_INT,
1244 dist_edge_of_cell, recv_count, recv_displ, MPI_INT, comm);
1250 free(read_dist_vertex_of_cell);
1251 free(read_dist_edge_of_cell);
1252 free(read_cell_mask);
1253 free(read_cell_lat);
1254 free(read_cell_lon);
1258 double * vertex_lon =
xmalloc(num_local_vertices *
sizeof(*vertex_lon));
1259 double * vertex_lat =
xmalloc(num_local_vertices *
sizeof(*vertex_lat));
1261 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1262 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1264 for (
unsigned i = 0; i < read_num_local_vertices; ++i)
1267 read_local_start_vertex + i, nvertices, comm_size)]++;
1269 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1271 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1272 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1273 int send_accum = 0, recv_accum = 0;
1274 for (
int i = 0; i < comm_size; ++i) {
1275 send_displ[i] = send_accum;
1276 recv_displ[i] = recv_accum;
1277 send_accum += send_count[i];
1278 recv_accum += recv_count[i];
1281 MPI_Alltoallv(read_vertex_lon, send_count, send_displ, MPI_DOUBLE,
1282 vertex_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
1283 MPI_Alltoallv(read_vertex_lat, send_count, send_displ, MPI_DOUBLE,
1284 vertex_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
1290 free(read_vertex_lon);
1291 free(read_vertex_lat);
1295 int * dist_edge_vertices =
1296 xmalloc(num_local_edges * 2 *
sizeof(*dist_edge_vertices));
1298 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1299 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1301 for (
unsigned i = 0; i < read_num_local_edges; ++i)
1304 read_local_start_edge + i, nedges, comm_size)] += 2;
1306 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1308 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1309 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1310 int send_accum = 0, recv_accum = 0;
1311 for (
int i = 0; i < comm_size; ++i) {
1312 send_displ[i] = send_accum;
1313 recv_displ[i] = recv_accum;
1314 send_accum += send_count[i];
1315 recv_accum += recv_count[i];
1318 MPI_Alltoallv(read_dist_edge_vertices, send_count, send_displ, MPI_INT,
1319 dist_edge_vertices, recv_count, recv_displ, MPI_INT, comm);
1325 free(read_dist_edge_vertices);
1331 size_t num_core_vertices;
1333 size_t N = num_local_cells * nv;
1334 *vertex_ids =
xmalloc(
N *
sizeof(**vertex_ids));
1335 *num_cells_per_vertex =
xmalloc(
N *
sizeof(**num_cells_per_vertex));
1336 *vertex_to_cell =
xmalloc(
N *
sizeof(**vertex_to_cell));
1338 for (
size_t i = 0; i <
N; ++i)
1339 (*vertex_ids)[i] = (
yac_int)dist_vertex_of_cell[i];
1340 size_t * permutation = *vertex_to_cell;
1341 for (
size_t i = 0; i <
N; ++i) permutation[i] = i;
1344 yac_int prev_vertex_id = YAC_INT_MAX;
1345 num_core_vertices = 0;
1346 for (
size_t i = 0; i <
N; ++i) {
1347 yac_int curr_vertex_id = (*vertex_ids)[i];
1348 if (prev_vertex_id == curr_vertex_id) {
1349 (*num_cells_per_vertex)[num_core_vertices-1]++;
1351 (*num_cells_per_vertex)[num_core_vertices] = 1;
1352 (*vertex_ids)[num_core_vertices] = (prev_vertex_id = curr_vertex_id);
1353 ++num_core_vertices;
1355 (*cell_to_vertex)[permutation[i]] = num_core_vertices-1;
1356 permutation[i] /= nv;
1359 xrealloc(*vertex_ids, num_core_vertices *
sizeof(**vertex_ids));
1360 *num_cells_per_vertex =
1362 num_core_vertices *
sizeof(**num_cells_per_vertex));
1363 free(dist_vertex_of_cell);
1368 size_t num_core_edges;
1370 size_t N = num_local_cells * nv;
1371 *edge_ids =
xmalloc(
N *
sizeof(**edge_ids));
1372 size_t * permutation =
xmalloc(
N *
sizeof(*permutation));
1373 *cell_to_edge =
xmalloc(
N *
sizeof(**cell_to_edge));
1374 for (
size_t i = 0; i <
N; ++i)
1375 (*edge_ids)[i] = (
yac_int)dist_edge_of_cell[i];
1376 for (
size_t i = 0; i <
N; ++i) permutation[i] = i;
1379 yac_int prev_edge_id = YAC_INT_MAX;
1381 for (
size_t i = 0; i <
N; ++i) {
1382 yac_int curr_edge_id = (*edge_ids)[i];
1383 if (prev_edge_id != curr_edge_id)
1384 (*edge_ids)[num_core_edges++] = (prev_edge_id = curr_edge_id);
1385 (*cell_to_edge)[permutation[i]] = num_core_edges-1;
1386 permutation[i] /= nv;
1390 xrealloc(*edge_ids, num_core_edges *
sizeof(**edge_ids));
1391 free(dist_edge_of_cell);
1396 *x_vertices =
xmalloc(num_core_vertices *
sizeof(**x_vertices));
1397 *y_vertices =
xmalloc(num_core_vertices *
sizeof(**y_vertices));
1398 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1399 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1401 for (
size_t i = 0; i < num_core_vertices; ++i)
1402 send_count[((
unsigned long)((*vertex_ids)[i]) *
1403 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
1404 (
unsigned long)nvertices]++;
1406 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1408 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1409 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1410 int send_accum = 0, recv_accum = 0;
1411 for (
int i = 0; i < comm_size; ++i) {
1412 send_displ[i] = send_accum;
1413 recv_displ[i] = recv_accum;
1414 send_accum += send_count[i];
1415 recv_accum += recv_count[i];
1418 int num_all_local_vertices_remote = 0;
1419 for (
int i = 0; i < comm_size; ++i)
1420 num_all_local_vertices_remote += recv_count[i];
1422 yac_int * remote_vertex_buffer =
1423 xmalloc(num_all_local_vertices_remote *
sizeof(*remote_vertex_buffer));
1426 *vertex_ids, send_count, send_displ,
yac_int_dt,
1427 remote_vertex_buffer, recv_count, recv_displ,
yac_int_dt, comm);
1429 double * send_vertex_lon =
1430 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lon));
1431 double * send_vertex_lat =
1432 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lat));
1434 for (
int i = 0, l = 0; i < comm_size; ++i) {
1435 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
1436 size_t idx = (size_t)(remote_vertex_buffer[l]) - local_start_vertex;
1437 send_vertex_lon[l] = vertex_lon[idx];
1438 send_vertex_lat[l] = vertex_lat[idx];
1442 free(remote_vertex_buffer);
1446 MPI_Alltoallv(send_vertex_lon, recv_count, recv_displ, MPI_DOUBLE,
1447 *x_vertices, send_count, send_displ, MPI_DOUBLE, comm);
1448 MPI_Alltoallv(send_vertex_lat, recv_count, recv_displ, MPI_DOUBLE,
1449 *y_vertices, send_count, send_displ, MPI_DOUBLE, comm);
1451 free(send_vertex_lat);
1452 free(send_vertex_lon);
1461 xmalloc(2 * num_core_edges *
sizeof(**edge_to_vertex));
1463 int * local_edge_to_vertex =
1464 xmalloc(2 * num_core_edges *
sizeof(*local_edge_to_vertex));
1465 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1466 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1468 for (
size_t i = 0; i < num_core_edges; ++i)
1469 send_count[((
unsigned long)((*edge_ids)[i]) *
1470 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
1471 (
unsigned long)nedges]++;
1473 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1475 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1476 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1477 int send_accum = 0, recv_accum = 0;
1478 for (
int i = 0; i < comm_size; ++i) {
1479 send_displ[i] = send_accum;
1480 recv_displ[i] = recv_accum;
1481 send_accum += send_count[i];
1482 recv_accum += recv_count[i];
1485 int num_all_local_edges_remote = 0;
1486 for (
int i = 0; i < comm_size; ++i)
1487 num_all_local_edges_remote += recv_count[i];
1489 yac_int * remote_edge_buffer =
1490 xmalloc(num_all_local_edges_remote *
sizeof(*remote_edge_buffer));
1493 *edge_ids, send_count, send_displ,
yac_int_dt,
1494 remote_edge_buffer, recv_count, recv_displ,
yac_int_dt, comm);
1496 int * send_edge_vertices =
1497 xmalloc(2 * num_all_local_edges_remote *
sizeof(*send_edge_vertices));
1499 for (
int i = 0, l = 0; i < comm_size; ++i) {
1500 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
1501 size_t idx = (size_t)(remote_edge_buffer[l]) - local_start_edge;
1502 send_edge_vertices[2*l+0] = dist_edge_vertices[2*idx+0];
1503 send_edge_vertices[2*l+1] = dist_edge_vertices[2*idx+1];
1512 free(remote_edge_buffer);
1513 free(dist_edge_vertices);
1515 MPI_Alltoallv(send_edge_vertices, recv_count, recv_displ, MPI_INT,
1516 local_edge_to_vertex, send_count, send_displ, MPI_INT, comm);
1518 size_t * permutation =
xmalloc(2 * num_core_edges *
sizeof(*permutation));
1519 for (
size_t i = 0; i < 2 * num_core_edges; ++i) permutation[i] = i;
1522 local_edge_to_vertex, 2 * num_core_edges, permutation);
1524 for (
size_t i = 0, j = 0; i < 2 * num_core_edges; ++i) {
1526 while ((j < nvertices) && ((*vertex_ids)[j] < curr_vertex)) ++j;
1528 (j < nvertices) && ((*vertex_ids)[j] == curr_vertex),
1529 "ERROR(yac_read_icon_basic_grid_data_parallel): vertex id missmatch")
1530 (*edge_to_vertex)[permutation[i]] = j;
1534 free(send_edge_vertices);
1539 free(local_edge_to_vertex);
1543 *cell_ids =
xmalloc(num_local_cells *
sizeof(**cell_ids));
1544 for (
size_t i = 0; i < num_local_cells; ++i)
1545 (*cell_ids)[i] = (
yac_int)(local_start_cell + i);
1548 *num_vertices_per_cell =
1549 xmalloc(num_local_cells *
sizeof(**num_vertices_per_cell));
1550 for (
size_t i = 0; i < num_local_cells; ++i) (*num_vertices_per_cell)[i] = nv;
1553 *edge_type =
xmalloc(num_core_edges *
sizeof(**edge_type));
1554 for (
size_t i = 0; i < num_core_edges; ++i)
1558 *num_vertices = num_core_vertices;
1559 *num_edges = num_core_edges;