265 const char * filename, MPI_Comm comm,
int * nbr_vertices,
int * nbr_cells,
266 int ** num_vertices_per_cell,
int ** cell_to_vertex,
int ** global_cell_ids,
267 int ** cell_owner,
int ** global_vertex_ids,
int ** vertex_owner,
268 double ** x_vertices,
double ** y_vertices,
269 double ** x_cells,
double ** y_cells,
int ** cell_msk) {
271#ifndef YAC_NETCDF_ENABLED
277 UNUSED(num_vertices_per_cell);
281 UNUSED(global_vertex_ids);
289 "ERROR(yac_read_icon_grid_information_parallel): "
290 "YAC is built without the NetCDF support");
293 int comm_rank, comm_size;
295 MPI_Comm_rank(comm, &comm_rank);
296 MPI_Comm_size(comm, &comm_size);
298 int local_is_io, * io_ranks, num_io_ranks;
301 size_t ncells, nvertices;
303 size_t read_local_start_cell = 0;
304 size_t read_num_local_cells = 0;
305 size_t read_local_start_vertex = 0;
306 size_t read_num_local_vertices = 0;
307 double * read_cell_lon = NULL;
308 double * read_cell_lat = NULL;
309 int * read_cell_mask = NULL;
310 double * read_vertex_lon = NULL;
311 double * read_vertex_lat = NULL;
312 int * read_dist_vertex_of_cell = NULL;
313 int * read_dist_cells_of_vertex = NULL;
317 unsigned long io_proc_idx = ULONG_MAX;
318 for (
int i = 0; (i < num_io_ranks) && (io_proc_idx == ULONG_MAX); ++i)
319 if (io_ranks[i] == comm_rank)
320 io_proc_idx = (
unsigned long)i;
334 read_local_start_cell =
335 ((
unsigned long)ncells * io_proc_idx) / (
unsigned long)num_io_ranks;
336 read_num_local_cells =
337 ((
unsigned long)ncells * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
338 (
unsigned long)read_local_start_cell;
339 read_local_start_vertex =
340 ((
unsigned long)nvertices * io_proc_idx) / (
unsigned long)num_io_ranks;
341 read_num_local_vertices =
342 ((
unsigned long)nvertices * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
343 (
unsigned long)read_local_start_vertex;
346 read_cell_lon =
xmalloc(read_num_local_cells *
sizeof(*read_cell_lon));
347 read_cell_lat =
xmalloc(read_num_local_cells *
sizeof(*read_cell_lat));
348 read_cell_mask =
xmalloc(read_num_local_cells *
sizeof(*read_cell_mask));
349 read_vertex_lon =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lon));
350 read_vertex_lat =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lat));
351 read_dist_vertex_of_cell =
352 xmalloc(read_num_local_cells * 3 *
sizeof(*read_dist_vertex_of_cell));
353 read_dist_cells_of_vertex =
354 xmalloc(read_num_local_vertices * 6 *
sizeof(*read_dist_cells_of_vertex));
358 &read_num_local_cells, read_cell_lon));
362 &read_num_local_cells, read_cell_lat));
366 &read_num_local_cells, read_cell_mask));
369 &read_num_local_vertices, read_vertex_lon));
370 convert_to_rad(ncid, varid, read_vertex_lon, read_num_local_vertices);
373 &read_num_local_vertices, read_vertex_lat));
374 convert_to_rad(ncid, varid, read_vertex_lat, read_num_local_vertices);
376 int * buffer =
xmalloc(
MAX(3*read_num_local_cells,
377 6*read_num_local_vertices) *
sizeof(*buffer));
379 size_t tmp_start[2] = {0, read_local_start_cell};
380 size_t tmp_count[2] = {3, read_num_local_cells};
382 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, varid, tmp_start, tmp_count, buffer));
383 for (
size_t i = 0; i < read_num_local_cells; ++i)
384 for (
int j = 0; j < 3; ++j)
385 read_dist_vertex_of_cell[i * 3 + j] =
386 buffer[i + j * read_num_local_cells];
389 size_t tmp_start[2] = {0, read_local_start_vertex};
390 size_t tmp_count[2] = {6, read_num_local_vertices};
392 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, varid, tmp_start, tmp_count, buffer));
393 for (
size_t i = 0; i < read_num_local_vertices; ++i)
394 for (
int j = 0; j < 6; ++j)
395 read_dist_cells_of_vertex[i * 6 + j] =
396 buffer[i + j * read_num_local_vertices];
401 for (
size_t i = 0; i < read_num_local_cells * 3; ++i)
402 if (read_dist_vertex_of_cell[i] > 0) read_dist_vertex_of_cell[i]--;
403 for (
size_t i = 0; i < read_num_local_vertices * 6; ++i)
404 read_dist_cells_of_vertex[i]--;
409 read_cell_lon =
xmalloc(1 *
sizeof(*read_cell_lon));
410 read_cell_lat =
xmalloc(1 *
sizeof(*read_cell_lat));
411 read_cell_mask =
xmalloc(1 *
sizeof(*read_cell_mask));
412 read_vertex_lon =
xmalloc(1 *
sizeof(*read_vertex_lon));
413 read_vertex_lat =
xmalloc(1 *
sizeof(*read_vertex_lat));
414 read_dist_vertex_of_cell =
xmalloc(1 *
sizeof(*read_dist_vertex_of_cell));
415 read_dist_cells_of_vertex =
xmalloc(1 *
sizeof(*read_dist_cells_of_vertex));
422 if (comm_rank == 0) tmp = (int)ncells;
423 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
424 ncells = (size_t)tmp;
425 if (comm_rank == 0) tmp = (int)nvertices;
426 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
427 nvertices = (size_t)tmp;
431 size_t local_start_cell =
432 ((
unsigned long)ncells * (
unsigned long)comm_rank) /
433 (
unsigned long)comm_size;
434 size_t num_local_cells =
435 ((
unsigned long)ncells * ((
unsigned long)comm_rank+1)) /
436 (
unsigned long)comm_size - (
unsigned long)local_start_cell;
437 size_t local_start_vertex =
438 ((
unsigned long)nvertices * (
unsigned long)comm_rank) /
439 (
unsigned long)comm_size;
440 size_t num_local_vertices =
441 ((
unsigned long)nvertices * ((
unsigned long)comm_rank+1)) /
442 (
unsigned long)comm_size - (
unsigned long)local_start_vertex;
445 double * cell_lon =
xmalloc(num_local_cells *
sizeof(*cell_lon));
446 double * cell_lat =
xmalloc(num_local_cells *
sizeof(*cell_lat));
447 int * cell_mask =
xmalloc(num_local_cells *
sizeof(*cell_mask));
448 int * dist_vertex_of_cell =
449 xmalloc(num_local_cells * 3 *
sizeof(*dist_vertex_of_cell));
451 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
452 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
454 for (
size_t i = 0; i < read_num_local_cells; ++i)
457 read_local_start_cell + i, ncells, comm_size)]++;
459 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
461 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
462 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
463 int send_accum = 0, recv_accum = 0;
464 for (
int i = 0; i < comm_size; ++i) {
465 send_displ[i] = send_accum;
466 recv_displ[i] = recv_accum;
467 send_accum += send_count[i];
468 recv_accum += recv_count[i];
471 MPI_Alltoallv(read_cell_lon, send_count, send_displ, MPI_DOUBLE,
472 cell_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
473 MPI_Alltoallv(read_cell_lat, send_count, send_displ, MPI_DOUBLE,
474 cell_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
475 MPI_Alltoallv(read_cell_mask, send_count, send_displ, MPI_INT,
476 cell_mask, recv_count, recv_displ, MPI_INT, comm);
478 for (
int i = 0; i < comm_size; ++i) {
485 MPI_Alltoallv(read_dist_vertex_of_cell, send_count, send_displ, MPI_INT,
486 dist_vertex_of_cell, recv_count, recv_displ, MPI_INT, comm);
494 free(read_cell_mask);
495 free(read_dist_vertex_of_cell);
499 double * vertex_lon =
xmalloc(num_local_vertices *
sizeof(*vertex_lon));
500 double * vertex_lat =
xmalloc(num_local_vertices *
sizeof(*vertex_lat));
501 int * dist_cells_of_vertex =
502 xmalloc(num_local_vertices * 6 *
sizeof(*dist_cells_of_vertex));
504 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
505 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
507 for (
size_t i = 0; i < read_num_local_vertices; ++i)
510 read_local_start_vertex + i, nvertices, comm_size)]++;
512 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
514 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
515 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
516 int send_accum = 0, recv_accum = 0;
517 for (
int i = 0; i < comm_size; ++i) {
518 send_displ[i] = send_accum;
519 recv_displ[i] = recv_accum;
520 send_accum += send_count[i];
521 recv_accum += recv_count[i];
524 MPI_Alltoallv(read_vertex_lon, send_count, send_displ, MPI_DOUBLE,
525 vertex_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
526 MPI_Alltoallv(read_vertex_lat, send_count, send_displ, MPI_DOUBLE,
527 vertex_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
529 for (
int i = 0; i < comm_size; ++i) {
536 MPI_Alltoallv(read_dist_cells_of_vertex, send_count, send_displ, MPI_INT,
537 dist_cells_of_vertex, recv_count, recv_displ, MPI_INT, comm);
543 free(read_vertex_lon);
544 free(read_vertex_lat);
545 free(read_dist_cells_of_vertex);
549 size_t num_core_vertices = num_local_cells * 3;
550 int * core_vertices =
xmalloc(num_core_vertices *
sizeof(*core_vertices));
552 memcpy(core_vertices, dist_vertex_of_cell,
553 num_core_vertices *
sizeof(*core_vertices));
557 xrealloc(core_vertices, num_core_vertices *
sizeof(*core_vertices));
561 int * cells_of_vertex_core =
562 xmalloc(num_core_vertices * 6 *
sizeof(*cells_of_vertex_core));
563 int * dist_vertex_owner =
564 xmalloc(num_local_vertices *
sizeof(*dist_vertex_owner));
566 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
567 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
569 for (
size_t i = 0; i < num_core_vertices; ++i)
570 send_count[((
unsigned long)(core_vertices[i]) *
571 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
572 (
unsigned long)nvertices]++;
574 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
576 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
577 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
578 int send_accum = 0, recv_accum = 0;
579 for (
int i = 0; i < comm_size; ++i) {
580 send_displ[i] = send_accum;
581 recv_displ[i] = recv_accum;
582 send_accum += send_count[i];
583 recv_accum += recv_count[i];
586 int num_core_vertices_remote = 0;
587 for (
int i = 0; i < comm_size; ++i)
588 num_core_vertices_remote += recv_count[i];
590 int * remote_vertex_buffer =
591 xmalloc(num_core_vertices_remote *
sizeof(*remote_vertex_buffer));
593 MPI_Alltoallv(core_vertices, send_count, send_displ, MPI_INT,
594 remote_vertex_buffer, recv_count, recv_displ, MPI_INT, comm);
596 for (
size_t i = 0; i < num_local_vertices; ++i) dist_vertex_owner[i] = -1;
598 for (
int i = 0, j = 0; i < comm_size; ++i)
599 for (
int k = 0; k < recv_count[i]; ++k, ++j)
600 dist_vertex_owner[remote_vertex_buffer[j] - local_start_vertex] = i;
602 int * send_cell_of_vertex =
603 xmalloc(num_core_vertices_remote * 6 *
sizeof(*send_cell_of_vertex));
605 for (
int i = 0, l = 0, m = 0; i < comm_size; ++i) {
606 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
607 int idx = remote_vertex_buffer[l] - local_start_vertex;
608 for (
int k = 0; k < 6; ++k, ++m)
609 send_cell_of_vertex[m] = dist_cells_of_vertex[idx * 6 + k];
612 free(dist_cells_of_vertex);
614 for (
int i = 0; i < comm_size; ++i) {
621 MPI_Alltoallv(send_cell_of_vertex, recv_count, recv_displ, MPI_INT,
622 cells_of_vertex_core, send_count, send_displ, MPI_INT, comm);
624 free(send_cell_of_vertex);
625 free(remote_vertex_buffer);
634 size_t num_halo_cells = 0;
636 int * tmp_cells = cells_of_vertex_core;
637 size_t num_tmp_cells = num_core_vertices * 6;
642 if ((num_tmp_cells > 0) && (tmp_cells[0] == -1)) {
647 halo_cells =
xmalloc((num_tmp_cells - num_local_cells) *
sizeof(*halo_cells));
650 for (; i < num_tmp_cells && tmp_cells[i] < (int)local_start_cell; ++i)
651 halo_cells[num_halo_cells++] = tmp_cells[i];
652 i += num_local_cells;
653 for (; i < num_tmp_cells; ++i) halo_cells[num_halo_cells++] = tmp_cells[i];
655 assert(num_halo_cells == num_tmp_cells - num_local_cells);
657 free(cells_of_vertex_core);
661 size_t num_all_local_vertices = num_halo_cells * 3 + num_core_vertices;
662 int * all_cell_to_vertex;
663 int * all_local_vertices =
xrealloc(core_vertices, num_all_local_vertices *
664 sizeof(*all_local_vertices));
665 cell_lon =
xrealloc(cell_lon, (num_local_cells + num_halo_cells) *
667 cell_lat =
xrealloc(cell_lat, (num_local_cells + num_halo_cells) *
669 cell_mask =
xrealloc(cell_mask, (num_local_cells + num_halo_cells) *
672 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
673 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
675 for (
size_t i = 0; i < num_halo_cells; ++i)
676 send_count[((
unsigned long)(halo_cells[i]) *
677 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
678 (
unsigned long)ncells]++;
680 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
682 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
683 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
684 int send_accum = 0, recv_accum = 0;
685 for (
int i = 0; i < comm_size; ++i) {
686 send_displ[i] = send_accum;
687 recv_displ[i] = recv_accum;
688 send_accum += send_count[i];
689 recv_accum += recv_count[i];
692 int num_halo_cells_remote = 0;
693 for (
int i = 0; i < comm_size; ++i)
694 num_halo_cells_remote += recv_count[i];
696 int * remote_halo_cell_buffer =
697 xmalloc(num_halo_cells_remote *
sizeof(*remote_halo_cell_buffer));
699 MPI_Alltoallv(halo_cells, send_count, send_displ, MPI_INT,
700 remote_halo_cell_buffer, recv_count, recv_displ, MPI_INT,
703 int * send_halo_cell_vertices =
704 xmalloc(num_halo_cells_remote * 3 *
sizeof(*send_halo_cell_vertices));
705 double * send_cell_lon =
706 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_lon));
707 double * send_cell_lat =
708 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_lat));
709 int * send_cell_mask =
710 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_mask));
712 for (
int i = 0, l = 0, m = 0; i < comm_size; ++i) {
713 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
714 int idx = remote_halo_cell_buffer[l] - local_start_cell;
715 for (
int k = 0; k < 3; ++k, ++m)
716 send_halo_cell_vertices[m] = dist_vertex_of_cell[idx * 3 + k];
717 send_cell_lon[l] = cell_lon[idx];
718 send_cell_lat[l] = cell_lat[idx];
719 send_cell_mask[l] = cell_mask[idx];
723 MPI_Alltoallv(send_cell_lon, recv_count, recv_displ, MPI_DOUBLE,
724 cell_lon + num_local_cells, send_count, send_displ,
726 MPI_Alltoallv(send_cell_lat, recv_count, recv_displ, MPI_DOUBLE,
727 cell_lat + num_local_cells, send_count, send_displ,
729 MPI_Alltoallv(send_cell_mask, recv_count, recv_displ, MPI_INT,
730 cell_mask + num_local_cells, send_count, send_displ,
733 for (
int i = 0; i < comm_size; ++i) {
741 xrealloc(dist_vertex_of_cell, (num_local_cells + num_halo_cells) * 3 *
742 sizeof(*all_cell_to_vertex));
744 MPI_Alltoallv(send_halo_cell_vertices, recv_count, recv_displ, MPI_INT,
745 all_cell_to_vertex + num_local_cells * 3, send_count,
746 send_displ, MPI_INT, comm);
748 memcpy(all_local_vertices + num_core_vertices,
749 all_cell_to_vertex + num_local_cells * 3,
750 num_halo_cells * 3 *
sizeof(*all_local_vertices));
752 free(send_cell_mask);
755 free(send_halo_cell_vertices);
756 free(remote_halo_cell_buffer);
763 all_local_vertices, num_all_local_vertices, NULL);
768 double * all_vertex_lon =
769 xmalloc(num_all_local_vertices *
sizeof(*all_vertex_lon));
770 double * all_vertex_lat =
771 xmalloc(num_all_local_vertices *
sizeof(*all_vertex_lat));
772 int * all_local_vertices_owner =
773 xmalloc(num_all_local_vertices *
sizeof(*all_local_vertices_owner));
775 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
776 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
778 for (
size_t i = 0; i < num_all_local_vertices; ++i)
779 send_count[((
unsigned long)(all_local_vertices[i]) *
780 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
781 (
unsigned long)nvertices]++;
783 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
785 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
786 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
787 int send_accum = 0, recv_accum = 0;
788 for (
int i = 0; i < comm_size; ++i) {
789 send_displ[i] = send_accum;
790 recv_displ[i] = recv_accum;
791 send_accum += send_count[i];
792 recv_accum += recv_count[i];
795 int num_all_local_vertices_remote = 0;
796 for (
int i = 0; i < comm_size; ++i)
797 num_all_local_vertices_remote += recv_count[i];
799 int * remote_vertex_buffer =
800 xmalloc(num_all_local_vertices_remote *
sizeof(*remote_vertex_buffer));
802 MPI_Alltoallv(all_local_vertices, send_count, send_displ, MPI_INT,
803 remote_vertex_buffer, recv_count, recv_displ, MPI_INT, comm);
805 int * send_vertex_owner = remote_vertex_buffer;
806 double * send_vertex_lon =
807 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lon));
808 double * send_vertex_lat =
809 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lat));
811 for (
int i = 0, l = 0; i < comm_size; ++i) {
812 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
813 int idx = remote_vertex_buffer[l] - local_start_vertex;
814 send_vertex_owner[l] = dist_vertex_owner[idx];
815 send_vertex_lon[l] = vertex_lon[idx];
816 send_vertex_lat[l] = vertex_lat[idx];
820 free(dist_vertex_owner);
824 MPI_Alltoallv(send_vertex_owner, recv_count, recv_displ, MPI_INT,
825 all_local_vertices_owner, send_count, send_displ, MPI_INT,
827 MPI_Alltoallv(send_vertex_lon, recv_count, recv_displ, MPI_DOUBLE,
828 all_vertex_lon, send_count, send_displ, MPI_DOUBLE, comm);
829 MPI_Alltoallv(send_vertex_lat, recv_count, recv_displ, MPI_DOUBLE,
830 all_vertex_lat, send_count, send_displ, MPI_DOUBLE, comm);
832 free(send_vertex_lat);
833 free(send_vertex_lon);
834 free(send_vertex_owner);
843 size_t vertex_count = 3 * (num_local_cells + num_halo_cells);
844 int * permutation =
xmalloc(vertex_count *
sizeof(*permutation));
845 for (
size_t i = 0; i < vertex_count; ++i) permutation[i] = (
int)i;
849 for (
size_t i = 0, j = 0; i < vertex_count; ++i) {
850 while (all_local_vertices[j] != all_cell_to_vertex[i]) ++j;
851 all_cell_to_vertex[i] = (int)j;
858 *nbr_vertices = num_all_local_vertices;
859 *nbr_cells = num_local_cells + num_halo_cells;
861 *num_vertices_per_cell =
862 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**num_vertices_per_cell));
863 for (
size_t i = 0; i < num_local_cells + num_halo_cells; ++i)
864 (*num_vertices_per_cell)[i] = 3;
866 *cell_to_vertex = all_cell_to_vertex;
869 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**global_cell_ids));
870 for (
size_t i = 0; i < num_local_cells; ++i)
871 (*global_cell_ids)[i] = local_start_cell + i;
872 memcpy(*global_cell_ids + num_local_cells, halo_cells,
873 num_halo_cells *
sizeof(*halo_cells));
874 *global_vertex_ids =
xrealloc(all_local_vertices, num_all_local_vertices *
875 sizeof(*all_local_vertices));
878 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**cell_owner));
879 for (
size_t i = 0; i < num_local_cells; ++i)
880 (*cell_owner)[i] = -1;
881 for (
size_t i = 0; i < num_halo_cells; ++i)
882 (*cell_owner)[num_local_cells + i] =
883 ((
unsigned long)(halo_cells[i]) * (
unsigned long)comm_size +
884 (
unsigned long)comm_size - 1) / (
unsigned long)ncells;
887 for (
size_t i = 0; i < num_all_local_vertices; ++i)
888 if (all_local_vertices_owner[i] == comm_rank)
889 all_local_vertices_owner[i] = -1;
890 *vertex_owner = all_local_vertices_owner;
892 *x_vertices = all_vertex_lon;
893 *y_vertices = all_vertex_lat;
896 *cell_msk = cell_mask;
931 const char * filename, MPI_Comm comm) {
933#ifndef YAC_NETCDF_ENABLED
938 "ERROR(yac_read_icon_basic_grid_data_parallel): "
939 "YAC is built without the NetCDF support");
942 int comm_rank, comm_size;
944 MPI_Comm_rank(comm, &comm_rank);
945 MPI_Comm_size(comm, &comm_size);
947 int local_is_io, * io_ranks, num_io_ranks;
950 size_t ncells, nvertices, nedges;
952 size_t read_local_start_cell = 0;
953 size_t read_num_local_cells = 0;
954 size_t read_local_start_vertex = 0;
955 size_t read_num_local_vertices = 0;
956 size_t read_local_start_edge = 0;
957 size_t read_num_local_edges = 0;
958 double * read_vertex_lon = NULL;
959 double * read_vertex_lat = NULL;
960 int * read_dist_vertex_of_cell = NULL;
961 int * read_dist_edge_of_cell = NULL;
962 int * read_dist_edge_vertices = NULL;
966 unsigned long io_proc_idx = ULONG_MAX;
967 for (
int i = 0; (i < num_io_ranks) && (io_proc_idx == ULONG_MAX); ++i)
968 if (io_ranks[i] == comm_rank)
969 io_proc_idx = (
unsigned long)i;
985 read_local_start_cell =
986 ((
unsigned long)ncells * io_proc_idx) / (
unsigned long)num_io_ranks;
987 read_num_local_cells =
988 ((
unsigned long)ncells * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
989 (
unsigned long)read_local_start_cell;
990 read_local_start_vertex =
991 ((
unsigned long)nvertices * io_proc_idx) / (
unsigned long)num_io_ranks;
992 read_num_local_vertices =
993 ((
unsigned long)nvertices * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
994 (
unsigned long)read_local_start_vertex;
995 read_local_start_edge =
996 ((
unsigned long)nedges * io_proc_idx) / (
unsigned long)num_io_ranks;
997 read_num_local_edges =
998 ((
unsigned long)nedges * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
999 (
unsigned long)read_local_start_edge;
1002 read_vertex_lon =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lon));
1003 read_vertex_lat =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lat));
1004 read_dist_vertex_of_cell =
1005 xmalloc(read_num_local_cells * 3 *
sizeof(*read_dist_vertex_of_cell));
1006 read_dist_edge_of_cell =
1007 xmalloc(read_num_local_cells * 3 *
sizeof(*read_dist_edge_of_cell));
1008 read_dist_edge_vertices =
1009 xmalloc(read_num_local_edges * 2 *
sizeof(*read_dist_edge_vertices));
1013 &read_num_local_vertices, read_vertex_lon));
1014 convert_to_rad(ncid, varid, read_vertex_lon, read_num_local_vertices);
1017 &read_num_local_vertices, read_vertex_lat));
1018 convert_to_rad(ncid, varid, read_vertex_lat, read_num_local_vertices);
1022 MAX(3*read_num_local_cells, 2*read_num_local_edges) *
1025 size_t tmp_start[2] = {0, read_local_start_cell};
1026 size_t tmp_count[2] = {3, read_num_local_cells};
1028 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, varid, tmp_start, tmp_count, buffer));
1029 for (
size_t i = 0; i < read_num_local_cells; ++i)
1030 for (
int j = 0; j < 3; ++j)
1031 read_dist_vertex_of_cell[i * 3 + j] =
1032 buffer[i + j * read_num_local_cells];
1034 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, varid, tmp_start, tmp_count, buffer));
1035 for (
size_t i = 0; i < read_num_local_cells; ++i)
1036 for (
int j = 0; j < 3; ++j)
1037 read_dist_edge_of_cell[i * 3 + j] =
1038 buffer[i + j * read_num_local_cells];
1041 size_t tmp_start[2] = {0, read_local_start_edge};
1042 size_t tmp_count[2] = {2, read_num_local_edges};
1044 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, varid, tmp_start, tmp_count, buffer));
1045 for (
size_t i = 0; i < read_num_local_edges; ++i)
1046 for (
int j = 0; j < 2; ++j)
1047 read_dist_edge_vertices[i * 2 + j] =
1048 buffer[i + j * read_num_local_edges];
1053 for (
size_t i = 0; i < read_num_local_cells * 3; ++i)
1054 if (read_dist_vertex_of_cell[i] > 0) read_dist_vertex_of_cell[i]--;
1055 for (
size_t i = 0; i < read_num_local_cells * 3; ++i)
1056 if (read_dist_edge_of_cell[i] > 0) read_dist_edge_of_cell[i]--;
1057 for (
size_t i = 0; i < read_num_local_edges * 2; ++i)
1058 if (read_dist_edge_vertices[i] > 0) read_dist_edge_vertices[i]--;
1063 read_vertex_lon =
xmalloc(1 *
sizeof(*read_vertex_lon));
1064 read_vertex_lat =
xmalloc(1 *
sizeof(*read_vertex_lat));
1065 read_dist_vertex_of_cell =
xmalloc(1 *
sizeof(*read_dist_vertex_of_cell));
1066 read_dist_edge_of_cell =
xmalloc(1 *
sizeof(*read_dist_edge_of_cell));
1067 read_dist_edge_vertices =
xmalloc(1 *
sizeof(*read_dist_edge_vertices));
1074 if (comm_rank == 0) tmp = (int)ncells;
1075 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1076 ncells = (size_t)tmp;
1077 if (comm_rank == 0) tmp = (int)nvertices;
1078 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1079 nvertices = (size_t)tmp;
1080 if (comm_rank == 0) tmp = (int)nedges;
1081 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1082 nedges = (size_t)tmp;
1086 size_t local_start_cell =
1087 ((
unsigned long)ncells * (
unsigned long)comm_rank) /
1088 (
unsigned long)comm_size;
1089 size_t num_local_cells =
1090 ((
unsigned long)ncells * ((
unsigned long)comm_rank+1)) /
1091 (
unsigned long)comm_size - (
unsigned long)local_start_cell;
1092 size_t local_start_vertex =
1093 ((
unsigned long)nvertices * (
unsigned long)comm_rank) /
1094 (
unsigned long)comm_size;
1095 size_t num_local_vertices =
1096 ((
unsigned long)nvertices * ((
unsigned long)comm_rank+1)) /
1097 (
unsigned long)comm_size - (
unsigned long)local_start_vertex;
1098 size_t local_start_edge =
1099 ((
unsigned long)nedges * (
unsigned long)comm_rank) /
1100 (
unsigned long)comm_size;
1101 size_t num_local_edges =
1102 ((
unsigned long)nedges * ((
unsigned long)comm_rank+1)) /
1103 (
unsigned long)comm_size - (
unsigned long)local_start_edge;
1106 int * dist_vertex_of_cell =
1107 xmalloc(num_local_cells * 3 *
sizeof(*dist_vertex_of_cell));
1108 int * dist_edge_of_cell =
1109 xmalloc(num_local_cells * 3 *
sizeof(*dist_edge_of_cell));
1111 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1112 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1114 for (
unsigned i = 0; i < read_num_local_cells; ++i)
1117 read_local_start_cell + i, ncells, comm_size)] += 3;
1119 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1121 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1122 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1123 int send_accum = 0, recv_accum = 0;
1124 for (
int i = 0; i < comm_size; ++i) {
1125 send_displ[i] = send_accum;
1126 recv_displ[i] = recv_accum;
1127 send_accum += send_count[i];
1128 recv_accum += recv_count[i];
1131 MPI_Alltoallv(read_dist_vertex_of_cell, send_count, send_displ, MPI_INT,
1132 dist_vertex_of_cell, recv_count, recv_displ, MPI_INT, comm);
1133 MPI_Alltoallv(read_dist_edge_of_cell, send_count, send_displ, MPI_INT,
1134 dist_edge_of_cell, recv_count, recv_displ, MPI_INT, comm);
1140 free(read_dist_vertex_of_cell);
1141 free(read_dist_edge_of_cell);
1145 double * vertex_lon =
xmalloc(num_local_vertices *
sizeof(*vertex_lon));
1146 double * vertex_lat =
xmalloc(num_local_vertices *
sizeof(*vertex_lat));
1148 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1149 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1151 for (
unsigned i = 0; i < read_num_local_vertices; ++i)
1154 read_local_start_vertex + i, nvertices, comm_size)]++;
1156 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1158 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1159 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1160 int send_accum = 0, recv_accum = 0;
1161 for (
int i = 0; i < comm_size; ++i) {
1162 send_displ[i] = send_accum;
1163 recv_displ[i] = recv_accum;
1164 send_accum += send_count[i];
1165 recv_accum += recv_count[i];
1168 MPI_Alltoallv(read_vertex_lon, send_count, send_displ, MPI_DOUBLE,
1169 vertex_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
1170 MPI_Alltoallv(read_vertex_lat, send_count, send_displ, MPI_DOUBLE,
1171 vertex_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
1177 free(read_vertex_lon);
1178 free(read_vertex_lat);
1182 int * dist_edge_vertices =
1183 xmalloc(num_local_edges * 2 *
sizeof(*dist_edge_vertices));
1185 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1186 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1188 for (
unsigned i = 0; i < read_num_local_edges; ++i)
1191 read_local_start_edge + i, nedges, comm_size)] += 2;
1193 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1195 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1196 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1197 int send_accum = 0, recv_accum = 0;
1198 for (
int i = 0; i < comm_size; ++i) {
1199 send_displ[i] = send_accum;
1200 recv_displ[i] = recv_accum;
1201 send_accum += send_count[i];
1202 recv_accum += recv_count[i];
1205 MPI_Alltoallv(read_dist_edge_vertices, send_count, send_displ, MPI_INT,
1206 dist_edge_vertices, recv_count, recv_displ, MPI_INT, comm);
1212 free(read_dist_edge_vertices);
1218 size_t num_core_vertices;
1224 size_t N = num_local_cells * 3;
1225 core_vertices =
xmalloc(N *
sizeof(*core_vertices));
1229 for (
size_t i = 0; i < N; ++i)
1230 core_vertices[i] = (
yac_int)dist_vertex_of_cell[i];
1232 for (
size_t i = 0; i < N; ++i) permutation[i] = i;
1235 yac_int prev_vertex_id = XT_INT_MAX;
1236 num_core_vertices = 0;
1237 for (
size_t i = 0; i < N; ++i) {
1238 yac_int curr_vertex_id = core_vertices[i];
1239 if (prev_vertex_id == curr_vertex_id) {
1243 core_vertices[num_core_vertices] = (prev_vertex_id = curr_vertex_id);
1244 ++num_core_vertices;
1247 permutation[i] /= 3;
1250 xrealloc(core_vertices, num_core_vertices *
sizeof(*core_vertices));
1254 free(dist_vertex_of_cell);
1259 size_t num_core_edges;
1263 size_t N = num_local_cells * 3;
1264 core_edges =
xmalloc(N *
sizeof(*core_edges));
1265 size_t * permutation =
xmalloc(N *
sizeof(*permutation));
1267 for (
size_t i = 0; i < N; ++i)
1268 core_edges[i] = (
yac_int)dist_edge_of_cell[i];
1269 for (
size_t i = 0; i < N; ++i) permutation[i] = i;
1272 yac_int prev_edge_id = XT_INT_MAX;
1274 for (
size_t i = 0; i < N; ++i) {
1275 yac_int curr_edge_id = core_edges[i];
1276 if (prev_edge_id != curr_edge_id)
1277 core_edges[num_core_edges++] = (prev_edge_id = curr_edge_id);
1279 permutation[i] /= 3;
1283 xrealloc(core_edges, num_core_edges *
sizeof(*core_edges));
1284 free(dist_edge_of_cell);
1291 double * local_vertex_lon =
1292 xmalloc(num_core_vertices *
sizeof(*local_vertex_lon));
1293 double * local_vertex_lat =
1294 xmalloc(num_core_vertices *
sizeof(*local_vertex_lat));
1295 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1296 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1298 for (
size_t i = 0; i < num_core_vertices; ++i)
1299 send_count[((
unsigned long)(core_vertices[i]) *
1300 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
1301 (
unsigned long)nvertices]++;
1303 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1305 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1306 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1307 int send_accum = 0, recv_accum = 0;
1308 for (
int i = 0; i < comm_size; ++i) {
1309 send_displ[i] = send_accum;
1310 recv_displ[i] = recv_accum;
1311 send_accum += send_count[i];
1312 recv_accum += recv_count[i];
1315 int num_all_local_vertices_remote = 0;
1316 for (
int i = 0; i < comm_size; ++i)
1317 num_all_local_vertices_remote += recv_count[i];
1319 yac_int * remote_vertex_buffer =
1320 xmalloc(num_all_local_vertices_remote *
sizeof(*remote_vertex_buffer));
1323 core_vertices, send_count, send_displ,
yac_int_dt,
1324 remote_vertex_buffer, recv_count, recv_displ,
yac_int_dt, comm);
1326 double * send_vertex_lon =
1327 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lon));
1328 double * send_vertex_lat =
1329 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lat));
1331 for (
int i = 0, l = 0; i < comm_size; ++i) {
1332 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
1333 size_t idx = (size_t)(remote_vertex_buffer[l]) - local_start_vertex;
1334 send_vertex_lon[l] = vertex_lon[idx];
1335 send_vertex_lat[l] = vertex_lat[idx];
1339 free(remote_vertex_buffer);
1343 MPI_Alltoallv(send_vertex_lon, recv_count, recv_displ, MPI_DOUBLE,
1344 local_vertex_lon, send_count, send_displ, MPI_DOUBLE, comm);
1345 MPI_Alltoallv(send_vertex_lat, recv_count, recv_displ, MPI_DOUBLE,
1346 local_vertex_lat, send_count, send_displ, MPI_DOUBLE, comm);
1348 for (
unsigned i = 0; i < num_core_vertices; ++i)
1352 free(send_vertex_lat);
1353 free(send_vertex_lon);
1358 free(local_vertex_lon);
1359 free(local_vertex_lat);
1366 int * local_edge_to_vertex =
1367 xmalloc(2 * num_core_edges *
sizeof(*local_edge_to_vertex));
1368 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1369 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1371 for (
size_t i = 0; i < num_core_edges; ++i)
1372 send_count[((
unsigned long)(core_edges[i]) *
1373 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
1374 (
unsigned long)nedges]++;
1376 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1378 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1379 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1380 int send_accum = 0, recv_accum = 0;
1381 for (
int i = 0; i < comm_size; ++i) {
1382 send_displ[i] = send_accum;
1383 recv_displ[i] = recv_accum;
1384 send_accum += send_count[i];
1385 recv_accum += recv_count[i];
1388 int num_all_local_edges_remote = 0;
1389 for (
int i = 0; i < comm_size; ++i)
1390 num_all_local_edges_remote += recv_count[i];
1392 yac_int * remote_edge_buffer =
1393 xmalloc(num_all_local_edges_remote *
sizeof(*remote_edge_buffer));
1396 core_edges, send_count, send_displ,
yac_int_dt,
1397 remote_edge_buffer, recv_count, recv_displ,
yac_int_dt, comm);
1399 int * send_edge_vertices =
1400 xmalloc(2 * num_all_local_edges_remote *
sizeof(*send_edge_vertices));
1402 for (
int i = 0, l = 0; i < comm_size; ++i) {
1403 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
1404 size_t idx = (size_t)(remote_edge_buffer[l]) - local_start_edge;
1405 send_edge_vertices[2*l+0] = dist_edge_vertices[2*idx+0];
1406 send_edge_vertices[2*l+1] = dist_edge_vertices[2*idx+1];
1415 free(remote_edge_buffer);
1416 free(dist_edge_vertices);
1418 MPI_Alltoallv(send_edge_vertices, recv_count, recv_displ, MPI_INT,
1419 local_edge_to_vertex, send_count, send_displ, MPI_INT, comm);
1421 size_t * permutation =
xmalloc(2 * num_core_edges *
sizeof(*permutation));
1422 for (
size_t i = 0; i < 2 * num_core_edges; ++i) permutation[i] = i;
1425 local_edge_to_vertex, 2 * num_core_edges, permutation);
1427 for (
size_t i = 0, j = 0; i < 2 * num_core_edges; ++i) {
1429 while ((j < nvertices) && (core_vertices[j] < curr_vertex)) ++j;
1431 (j < nvertices) && (core_vertices[j] == curr_vertex),
1432 "ERROR(yac_read_icon_basic_grid_data_parallel): vertex id missmatch")
1437 free(send_edge_vertices);
1442 free(local_edge_to_vertex);
1447 for (
size_t i = 0; i < num_local_cells; ++i)