249 const char * filename, MPI_Comm comm,
int * nbr_vertices,
int * nbr_cells,
250 int ** num_vertices_per_cell,
int ** cell_to_vertex,
int ** global_cell_ids,
251 int ** cell_owner,
int ** global_vertex_ids,
int ** vertex_owner,
252 double ** x_vertices,
double ** y_vertices,
253 double ** x_cells,
double ** y_cells,
int ** cell_msk) {
255 int comm_rank, comm_size;
257 MPI_Comm_rank(comm, &comm_rank);
258 MPI_Comm_size(comm, &comm_size);
260 int local_is_io, * io_ranks, num_io_ranks;
263 size_t ncells, nvertices, nv, ne;
265 size_t read_local_start_cell = 0;
266 size_t read_num_local_cells = 0;
267 size_t read_local_start_vertex = 0;
268 size_t read_num_local_vertices = 0;
269 double * read_cell_lon = NULL;
270 double * read_cell_lat = NULL;
271 int * read_cell_mask = NULL;
272 double * read_vertex_lon = NULL;
273 double * read_vertex_lat = NULL;
274 int * read_dist_vertex_of_cell = NULL;
275 int * read_dist_cells_of_vertex = NULL;
279 unsigned long io_proc_idx = ULONG_MAX;
280 for (
int i = 0; (i < num_io_ranks) && (io_proc_idx == ULONG_MAX); ++i)
281 if (io_ranks[i] == comm_rank)
282 io_proc_idx = (
unsigned long)i;
300 read_local_start_cell =
301 ((
unsigned long)ncells * io_proc_idx) / (
unsigned long)num_io_ranks;
302 read_num_local_cells =
303 ((
unsigned long)ncells * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
304 (
unsigned long)read_local_start_cell;
305 read_local_start_vertex =
306 ((
unsigned long)nvertices * io_proc_idx) / (
unsigned long)num_io_ranks;
307 read_num_local_vertices =
308 ((
unsigned long)nvertices * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
309 (
unsigned long)read_local_start_vertex;
312 read_cell_lon =
xmalloc(read_num_local_cells *
sizeof(*read_cell_lon));
313 read_cell_lat =
xmalloc(read_num_local_cells *
sizeof(*read_cell_lat));
314 read_cell_mask =
xmalloc(read_num_local_cells *
sizeof(*read_cell_mask));
315 read_vertex_lon =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lon));
316 read_vertex_lat =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lat));
317 read_dist_vertex_of_cell =
318 xmalloc(read_num_local_cells * nv *
sizeof(*read_dist_vertex_of_cell));
319 read_dist_cells_of_vertex =
320 xmalloc(read_num_local_vertices * ne *
sizeof(*read_dist_cells_of_vertex));
324 &read_num_local_cells, read_cell_lon));
328 &read_num_local_cells, read_cell_lat));
332 &read_num_local_cells, read_cell_mask));
335 &read_num_local_vertices, read_vertex_lon));
336 convert_to_rad(ncid, varid, read_vertex_lon, read_num_local_vertices);
339 &read_num_local_vertices, read_vertex_lat));
340 convert_to_rad(ncid, varid, read_vertex_lat, read_num_local_vertices);
342 int * buffer =
xmalloc(
MAX(nv*read_num_local_cells,
343 ne*read_num_local_vertices) *
sizeof(*buffer));
345 size_t tmp_start[2] = {0, read_local_start_cell};
346 size_t tmp_count[2] = {nv, read_num_local_cells};
348 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, varid, tmp_start, tmp_count, buffer));
349 for (
size_t i = 0; i < read_num_local_cells; ++i)
350 for (
size_t j = 0; j < nv; ++j)
351 read_dist_vertex_of_cell[i * nv + j] =
352 buffer[i + j * read_num_local_cells];
355 size_t tmp_start[2] = {0, read_local_start_vertex};
356 size_t tmp_count[2] = {ne, read_num_local_vertices};
358 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, varid, tmp_start, tmp_count, buffer));
359 for (
size_t i = 0; i < read_num_local_vertices; ++i)
360 for (
size_t j = 0; j < ne; ++j)
361 read_dist_cells_of_vertex[i * ne + j] =
362 buffer[i + j * read_num_local_vertices];
367 for (
size_t i = 0; i < read_num_local_cells * nv; ++i)
368 if (read_dist_vertex_of_cell[i] > 0) read_dist_vertex_of_cell[i]--;
369 for (
size_t i = 0; i < read_num_local_vertices * ne; ++i)
370 read_dist_cells_of_vertex[i]--;
375 read_cell_lon =
xmalloc(1 *
sizeof(*read_cell_lon));
376 read_cell_lat =
xmalloc(1 *
sizeof(*read_cell_lat));
377 read_cell_mask =
xmalloc(1 *
sizeof(*read_cell_mask));
378 read_vertex_lon =
xmalloc(1 *
sizeof(*read_vertex_lon));
379 read_vertex_lat =
xmalloc(1 *
sizeof(*read_vertex_lat));
380 read_dist_vertex_of_cell =
xmalloc(1 *
sizeof(*read_dist_vertex_of_cell));
381 read_dist_cells_of_vertex =
xmalloc(1 *
sizeof(*read_dist_cells_of_vertex));
388 if (comm_rank == 0) tmp = (int)ncells;
389 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
390 ncells = (size_t)tmp;
391 if (comm_rank == 0) tmp = (int)nvertices;
392 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
393 nvertices = (size_t)tmp;
394 if (comm_rank == 0) tmp = (int)nv;
395 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
397 if (comm_rank == 0) tmp = (int)ne;
398 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
403 size_t local_start_cell =
404 ((
unsigned long)ncells * (
unsigned long)comm_rank) /
405 (
unsigned long)comm_size;
406 size_t num_local_cells =
407 ((
unsigned long)ncells * ((
unsigned long)comm_rank+1)) /
408 (
unsigned long)comm_size - (
unsigned long)local_start_cell;
409 size_t local_start_vertex =
410 ((
unsigned long)nvertices * (
unsigned long)comm_rank) /
411 (
unsigned long)comm_size;
412 size_t num_local_vertices =
413 ((
unsigned long)nvertices * ((
unsigned long)comm_rank+1)) /
414 (
unsigned long)comm_size - (
unsigned long)local_start_vertex;
417 double * cell_lon =
xmalloc(num_local_cells *
sizeof(*cell_lon));
418 double * cell_lat =
xmalloc(num_local_cells *
sizeof(*cell_lat));
419 int * cell_mask =
xmalloc(num_local_cells *
sizeof(*cell_mask));
420 int * dist_vertex_of_cell =
421 xmalloc(num_local_cells * nv *
sizeof(*dist_vertex_of_cell));
423 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
424 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
426 for (
size_t i = 0; i < read_num_local_cells; ++i)
429 read_local_start_cell + i, ncells, comm_size)]++;
431 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
433 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
434 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
435 int send_accum = 0, recv_accum = 0;
436 for (
int i = 0; i < comm_size; ++i) {
437 send_displ[i] = send_accum;
438 recv_displ[i] = recv_accum;
439 send_accum += send_count[i];
440 recv_accum += recv_count[i];
443 MPI_Alltoallv(read_cell_lon, send_count, send_displ, MPI_DOUBLE,
444 cell_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
445 MPI_Alltoallv(read_cell_lat, send_count, send_displ, MPI_DOUBLE,
446 cell_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
447 MPI_Alltoallv(read_cell_mask, send_count, send_displ, MPI_INT,
448 cell_mask, recv_count, recv_displ, MPI_INT, comm);
450 for (
int i = 0; i < comm_size; ++i) {
457 MPI_Alltoallv(read_dist_vertex_of_cell, send_count, send_displ, MPI_INT,
458 dist_vertex_of_cell, recv_count, recv_displ, MPI_INT, comm);
466 free(read_cell_mask);
467 free(read_dist_vertex_of_cell);
471 double * vertex_lon =
xmalloc(num_local_vertices *
sizeof(*vertex_lon));
472 double * vertex_lat =
xmalloc(num_local_vertices *
sizeof(*vertex_lat));
473 int * dist_cells_of_vertex =
474 xmalloc(num_local_vertices * ne *
sizeof(*dist_cells_of_vertex));
476 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
477 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
479 for (
size_t i = 0; i < read_num_local_vertices; ++i)
482 read_local_start_vertex + i, nvertices, comm_size)]++;
484 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
486 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
487 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
488 int send_accum = 0, recv_accum = 0;
489 for (
int i = 0; i < comm_size; ++i) {
490 send_displ[i] = send_accum;
491 recv_displ[i] = recv_accum;
492 send_accum += send_count[i];
493 recv_accum += recv_count[i];
496 MPI_Alltoallv(read_vertex_lon, send_count, send_displ, MPI_DOUBLE,
497 vertex_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
498 MPI_Alltoallv(read_vertex_lat, send_count, send_displ, MPI_DOUBLE,
499 vertex_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
501 for (
int i = 0; i < comm_size; ++i) {
508 MPI_Alltoallv(read_dist_cells_of_vertex, send_count, send_displ, MPI_INT,
509 dist_cells_of_vertex, recv_count, recv_displ, MPI_INT, comm);
515 free(read_vertex_lon);
516 free(read_vertex_lat);
517 free(read_dist_cells_of_vertex);
521 size_t num_core_vertices = num_local_cells * nv;
522 int * core_vertices =
xmalloc(num_core_vertices *
sizeof(*core_vertices));
524 memcpy(core_vertices, dist_vertex_of_cell,
525 num_core_vertices *
sizeof(*core_vertices));
529 xrealloc(core_vertices, num_core_vertices *
sizeof(*core_vertices));
533 int * cells_of_vertex_core =
534 xmalloc(num_core_vertices * ne *
sizeof(*cells_of_vertex_core));
535 int * dist_vertex_owner =
536 xmalloc(num_local_vertices *
sizeof(*dist_vertex_owner));
538 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
539 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
541 for (
size_t i = 0; i < num_core_vertices; ++i)
542 send_count[((
unsigned long)(core_vertices[i]) *
543 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
544 (
unsigned long)nvertices]++;
546 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
548 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
549 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
550 int send_accum = 0, recv_accum = 0;
551 for (
int i = 0; i < comm_size; ++i) {
552 send_displ[i] = send_accum;
553 recv_displ[i] = recv_accum;
554 send_accum += send_count[i];
555 recv_accum += recv_count[i];
558 int num_core_vertices_remote = 0;
559 for (
int i = 0; i < comm_size; ++i)
560 num_core_vertices_remote += recv_count[i];
562 int * remote_vertex_buffer =
563 xmalloc(num_core_vertices_remote *
sizeof(*remote_vertex_buffer));
565 MPI_Alltoallv(core_vertices, send_count, send_displ, MPI_INT,
566 remote_vertex_buffer, recv_count, recv_displ, MPI_INT, comm);
568 for (
size_t i = 0; i < num_local_vertices; ++i) dist_vertex_owner[i] = -1;
570 for (
int i = 0, j = 0; i < comm_size; ++i)
571 for (
int k = 0; k < recv_count[i]; ++k, ++j)
572 dist_vertex_owner[remote_vertex_buffer[j] - local_start_vertex] = i;
574 int * send_cell_of_vertex =
575 xmalloc(num_core_vertices_remote * ne *
sizeof(*send_cell_of_vertex));
577 for (
int i = 0, l = 0, m = 0; i < comm_size; ++i) {
578 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
579 int idx = remote_vertex_buffer[l] - local_start_vertex;
580 for (
size_t k = 0; k < ne; ++k, ++m)
581 send_cell_of_vertex[m] = dist_cells_of_vertex[idx * ne + k];
584 free(dist_cells_of_vertex);
586 for (
int i = 0; i < comm_size; ++i) {
593 MPI_Alltoallv(send_cell_of_vertex, recv_count, recv_displ, MPI_INT,
594 cells_of_vertex_core, send_count, send_displ, MPI_INT, comm);
596 free(send_cell_of_vertex);
597 free(remote_vertex_buffer);
606 size_t num_halo_cells = 0;
608 int * tmp_cells = cells_of_vertex_core;
609 size_t num_tmp_cells = num_core_vertices * ne;
614 if ((num_tmp_cells > 0) && (tmp_cells[0] == -1)) {
619 halo_cells =
xmalloc((num_tmp_cells - num_local_cells) *
sizeof(*halo_cells));
622 for (; i < num_tmp_cells && tmp_cells[i] < (int)local_start_cell; ++i)
623 halo_cells[num_halo_cells++] = tmp_cells[i];
624 i += num_local_cells;
625 for (; i < num_tmp_cells; ++i) halo_cells[num_halo_cells++] = tmp_cells[i];
627 assert(num_halo_cells == num_tmp_cells - num_local_cells);
629 free(cells_of_vertex_core);
633 size_t num_all_local_vertices = num_halo_cells * nv + num_core_vertices;
634 int * all_cell_to_vertex;
635 int * all_local_vertices =
xrealloc(core_vertices, num_all_local_vertices *
636 sizeof(*all_local_vertices));
637 cell_lon =
xrealloc(cell_lon, (num_local_cells + num_halo_cells) *
639 cell_lat =
xrealloc(cell_lat, (num_local_cells + num_halo_cells) *
641 cell_mask =
xrealloc(cell_mask, (num_local_cells + num_halo_cells) *
644 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
645 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
647 for (
size_t i = 0; i < num_halo_cells; ++i)
648 send_count[((
unsigned long)(halo_cells[i]) *
649 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
650 (
unsigned long)ncells]++;
652 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
654 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
655 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
656 int send_accum = 0, recv_accum = 0;
657 for (
int i = 0; i < comm_size; ++i) {
658 send_displ[i] = send_accum;
659 recv_displ[i] = recv_accum;
660 send_accum += send_count[i];
661 recv_accum += recv_count[i];
664 int num_halo_cells_remote = 0;
665 for (
int i = 0; i < comm_size; ++i)
666 num_halo_cells_remote += recv_count[i];
668 int * remote_halo_cell_buffer =
669 xmalloc(num_halo_cells_remote *
sizeof(*remote_halo_cell_buffer));
671 MPI_Alltoallv(halo_cells, send_count, send_displ, MPI_INT,
672 remote_halo_cell_buffer, recv_count, recv_displ, MPI_INT,
675 int * send_halo_cell_vertices =
676 xmalloc(num_halo_cells_remote * nv *
sizeof(*send_halo_cell_vertices));
677 double * send_cell_lon =
678 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_lon));
679 double * send_cell_lat =
680 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_lat));
681 int * send_cell_mask =
682 xmalloc(num_halo_cells_remote *
sizeof(*send_cell_mask));
684 for (
int i = 0, l = 0, m = 0; i < comm_size; ++i) {
685 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
686 int idx = remote_halo_cell_buffer[l] - local_start_cell;
687 for (
size_t k = 0; k < nv; ++k, ++m)
688 send_halo_cell_vertices[m] = dist_vertex_of_cell[idx * nv + k];
689 send_cell_lon[l] = cell_lon[idx];
690 send_cell_lat[l] = cell_lat[idx];
691 send_cell_mask[l] = cell_mask[idx];
695 MPI_Alltoallv(send_cell_lon, recv_count, recv_displ, MPI_DOUBLE,
696 cell_lon + num_local_cells, send_count, send_displ,
698 MPI_Alltoallv(send_cell_lat, recv_count, recv_displ, MPI_DOUBLE,
699 cell_lat + num_local_cells, send_count, send_displ,
701 MPI_Alltoallv(send_cell_mask, recv_count, recv_displ, MPI_INT,
702 cell_mask + num_local_cells, send_count, send_displ,
705 for (
int i = 0; i < comm_size; ++i) {
713 xrealloc(dist_vertex_of_cell, (num_local_cells + num_halo_cells) * nv *
714 sizeof(*all_cell_to_vertex));
716 MPI_Alltoallv(send_halo_cell_vertices, recv_count, recv_displ, MPI_INT,
717 all_cell_to_vertex + num_local_cells * nv, send_count,
718 send_displ, MPI_INT, comm);
720 memcpy(all_local_vertices + num_core_vertices,
721 all_cell_to_vertex + num_local_cells * nv,
722 num_halo_cells * nv *
sizeof(*all_local_vertices));
724 free(send_cell_mask);
727 free(send_halo_cell_vertices);
728 free(remote_halo_cell_buffer);
735 all_local_vertices, num_all_local_vertices, NULL);
740 double * all_vertex_lon =
741 xmalloc(num_all_local_vertices *
sizeof(*all_vertex_lon));
742 double * all_vertex_lat =
743 xmalloc(num_all_local_vertices *
sizeof(*all_vertex_lat));
744 int * all_local_vertices_owner =
745 xmalloc(num_all_local_vertices *
sizeof(*all_local_vertices_owner));
747 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
748 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
750 for (
size_t i = 0; i < num_all_local_vertices; ++i)
751 send_count[((
unsigned long)(all_local_vertices[i]) *
752 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
753 (
unsigned long)nvertices]++;
755 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
757 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
758 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
759 int send_accum = 0, recv_accum = 0;
760 for (
int i = 0; i < comm_size; ++i) {
761 send_displ[i] = send_accum;
762 recv_displ[i] = recv_accum;
763 send_accum += send_count[i];
764 recv_accum += recv_count[i];
767 int num_all_local_vertices_remote = 0;
768 for (
int i = 0; i < comm_size; ++i)
769 num_all_local_vertices_remote += recv_count[i];
771 int * remote_vertex_buffer =
772 xmalloc(num_all_local_vertices_remote *
sizeof(*remote_vertex_buffer));
774 MPI_Alltoallv(all_local_vertices, send_count, send_displ, MPI_INT,
775 remote_vertex_buffer, recv_count, recv_displ, MPI_INT, comm);
777 int * send_vertex_owner = remote_vertex_buffer;
778 double * send_vertex_lon =
779 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lon));
780 double * send_vertex_lat =
781 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lat));
783 for (
int i = 0, l = 0; i < comm_size; ++i) {
784 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
785 int idx = remote_vertex_buffer[l] - local_start_vertex;
786 send_vertex_owner[l] = dist_vertex_owner[idx];
787 send_vertex_lon[l] = vertex_lon[idx];
788 send_vertex_lat[l] = vertex_lat[idx];
792 free(dist_vertex_owner);
796 MPI_Alltoallv(send_vertex_owner, recv_count, recv_displ, MPI_INT,
797 all_local_vertices_owner, send_count, send_displ, MPI_INT,
799 MPI_Alltoallv(send_vertex_lon, recv_count, recv_displ, MPI_DOUBLE,
800 all_vertex_lon, send_count, send_displ, MPI_DOUBLE, comm);
801 MPI_Alltoallv(send_vertex_lat, recv_count, recv_displ, MPI_DOUBLE,
802 all_vertex_lat, send_count, send_displ, MPI_DOUBLE, comm);
804 free(send_vertex_lat);
805 free(send_vertex_lon);
806 free(send_vertex_owner);
815 size_t vertex_count = nv * (num_local_cells + num_halo_cells);
816 int * permutation =
xmalloc(vertex_count *
sizeof(*permutation));
817 for (
size_t i = 0; i < vertex_count; ++i) permutation[i] = (
int)i;
821 for (
size_t i = 0, j = 0; i < vertex_count; ++i) {
822 while (all_local_vertices[j] != all_cell_to_vertex[i]) ++j;
823 all_cell_to_vertex[i] = (int)j;
830 *nbr_vertices = num_all_local_vertices;
831 *nbr_cells = num_local_cells + num_halo_cells;
833 *num_vertices_per_cell =
834 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**num_vertices_per_cell));
835 for (
size_t i = 0; i < num_local_cells + num_halo_cells; ++i)
836 (*num_vertices_per_cell)[i] = nv;
838 *cell_to_vertex = all_cell_to_vertex;
841 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**global_cell_ids));
842 for (
size_t i = 0; i < num_local_cells; ++i)
843 (*global_cell_ids)[i] = local_start_cell + i;
844 memcpy(*global_cell_ids + num_local_cells, halo_cells,
845 num_halo_cells *
sizeof(*halo_cells));
846 *global_vertex_ids =
xrealloc(all_local_vertices, num_all_local_vertices *
847 sizeof(*all_local_vertices));
850 xmalloc((num_local_cells + num_halo_cells) *
sizeof(**cell_owner));
851 for (
size_t i = 0; i < num_local_cells; ++i)
852 (*cell_owner)[i] = -1;
853 for (
size_t i = 0; i < num_halo_cells; ++i)
854 (*cell_owner)[num_local_cells + i] =
855 ((
unsigned long)(halo_cells[i]) * (
unsigned long)comm_size +
856 (
unsigned long)comm_size - 1) / (
unsigned long)ncells;
859 for (
size_t i = 0; i < num_all_local_vertices; ++i)
860 if (all_local_vertices_owner[i] == comm_rank)
861 all_local_vertices_owner[i] = -1;
862 *vertex_owner = all_local_vertices_owner;
864 *x_vertices = all_vertex_lon;
865 *y_vertices = all_vertex_lat;
868 *cell_msk = cell_mask;
902 const char * filename, MPI_Comm comm) {
904 int comm_rank, comm_size;
906 MPI_Comm_rank(comm, &comm_rank);
907 MPI_Comm_size(comm, &comm_size);
909 int local_is_io, * io_ranks, num_io_ranks;
912 size_t ncells, nvertices, nedges, nv, ne;
914 size_t read_local_start_cell = 0;
915 size_t read_num_local_cells = 0;
916 size_t read_local_start_vertex = 0;
917 size_t read_num_local_vertices = 0;
918 size_t read_local_start_edge = 0;
919 size_t read_num_local_edges = 0;
920 double * read_vertex_lon = NULL;
921 double * read_vertex_lat = NULL;
922 int * read_dist_vertex_of_cell = NULL;
923 int * read_dist_edge_of_cell = NULL;
924 int * read_dist_edge_vertices = NULL;
928 unsigned long io_proc_idx = ULONG_MAX;
929 for (
int i = 0; (i < num_io_ranks) && (io_proc_idx == ULONG_MAX); ++i)
930 if (io_ranks[i] == comm_rank)
931 io_proc_idx = (
unsigned long)i;
951 read_local_start_cell =
952 ((
unsigned long)ncells * io_proc_idx) / (
unsigned long)num_io_ranks;
953 read_num_local_cells =
954 ((
unsigned long)ncells * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
955 (
unsigned long)read_local_start_cell;
956 read_local_start_vertex =
957 ((
unsigned long)nvertices * io_proc_idx) / (
unsigned long)num_io_ranks;
958 read_num_local_vertices =
959 ((
unsigned long)nvertices * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
960 (
unsigned long)read_local_start_vertex;
961 read_local_start_edge =
962 ((
unsigned long)nedges * io_proc_idx) / (
unsigned long)num_io_ranks;
963 read_num_local_edges =
964 ((
unsigned long)nedges * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
965 (
unsigned long)read_local_start_edge;
968 read_vertex_lon =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lon));
969 read_vertex_lat =
xmalloc(read_num_local_vertices *
sizeof(*read_vertex_lat));
970 read_dist_vertex_of_cell =
971 xmalloc(read_num_local_cells * nv *
sizeof(*read_dist_vertex_of_cell));
972 read_dist_edge_of_cell =
973 xmalloc(read_num_local_cells * nv *
sizeof(*read_dist_edge_of_cell));
974 read_dist_edge_vertices =
975 xmalloc(read_num_local_edges * 2 *
sizeof(*read_dist_edge_vertices));
979 &read_num_local_vertices, read_vertex_lon));
980 convert_to_rad(ncid, varid, read_vertex_lon, read_num_local_vertices);
983 &read_num_local_vertices, read_vertex_lat));
984 convert_to_rad(ncid, varid, read_vertex_lat, read_num_local_vertices);
988 MAX(nv*read_num_local_cells, 2*read_num_local_edges) *
991 size_t tmp_start[2] = {0, read_local_start_cell};
992 size_t tmp_count[2] = {nv, read_num_local_cells};
994 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, varid, tmp_start, tmp_count, buffer));
995 for (
size_t i = 0; i < read_num_local_cells; ++i)
996 for (
size_t j = 0; j < nv; ++j)
997 read_dist_vertex_of_cell[i * nv + j] =
998 buffer[i + j * read_num_local_cells];
1000 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, varid, tmp_start, tmp_count, buffer));
1001 for (
size_t i = 0; i < read_num_local_cells; ++i)
1002 for (
size_t j = 0; j < nv; ++j)
1003 read_dist_edge_of_cell[i * nv + j] =
1004 buffer[i + j * read_num_local_cells];
1007 size_t tmp_start[2] = {0, read_local_start_edge};
1008 size_t tmp_count[2] = {2, read_num_local_edges};
1010 YAC_HANDLE_ERROR(nc_get_vara_int(ncid, varid, tmp_start, tmp_count, buffer));
1011 for (
size_t i = 0; i < read_num_local_edges; ++i)
1012 for (
int j = 0; j < 2; ++j)
1013 read_dist_edge_vertices[i * 2 + j] =
1014 buffer[i + j * read_num_local_edges];
1019 for (
size_t i = 0; i < read_num_local_cells * nv; ++i)
1020 if (read_dist_vertex_of_cell[i] > 0) read_dist_vertex_of_cell[i]--;
1021 for (
size_t i = 0; i < read_num_local_cells * nv; ++i)
1022 if (read_dist_edge_of_cell[i] > 0) read_dist_edge_of_cell[i]--;
1023 for (
size_t i = 0; i < read_num_local_edges * 2; ++i)
1024 if (read_dist_edge_vertices[i] > 0) read_dist_edge_vertices[i]--;
1029 read_vertex_lon =
xmalloc(1 *
sizeof(*read_vertex_lon));
1030 read_vertex_lat =
xmalloc(1 *
sizeof(*read_vertex_lat));
1031 read_dist_vertex_of_cell =
xmalloc(1 *
sizeof(*read_dist_vertex_of_cell));
1032 read_dist_edge_of_cell =
xmalloc(1 *
sizeof(*read_dist_edge_of_cell));
1033 read_dist_edge_vertices =
xmalloc(1 *
sizeof(*read_dist_edge_vertices));
1040 if (comm_rank == 0) tmp = (int)ncells;
1041 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1042 ncells = (size_t)tmp;
1043 if (comm_rank == 0) tmp = (int)nvertices;
1044 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1045 nvertices = (size_t)tmp;
1046 if (comm_rank == 0) tmp = (int)nedges;
1047 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1048 nedges = (size_t)tmp;
1049 if (comm_rank == 0) tmp = (int)nv;
1050 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1052 if (comm_rank == 0) tmp = (int)ne;
1053 MPI_Bcast(&tmp, 1, MPI_INT, 0, comm);
1058 size_t local_start_cell =
1059 ((
unsigned long)ncells * (
unsigned long)comm_rank) /
1060 (
unsigned long)comm_size;
1061 size_t num_local_cells =
1062 ((
unsigned long)ncells * ((
unsigned long)comm_rank+1)) /
1063 (
unsigned long)comm_size - (
unsigned long)local_start_cell;
1064 size_t local_start_vertex =
1065 ((
unsigned long)nvertices * (
unsigned long)comm_rank) /
1066 (
unsigned long)comm_size;
1067 size_t num_local_vertices =
1068 ((
unsigned long)nvertices * ((
unsigned long)comm_rank+1)) /
1069 (
unsigned long)comm_size - (
unsigned long)local_start_vertex;
1070 size_t local_start_edge =
1071 ((
unsigned long)nedges * (
unsigned long)comm_rank) /
1072 (
unsigned long)comm_size;
1073 size_t num_local_edges =
1074 ((
unsigned long)nedges * ((
unsigned long)comm_rank+1)) /
1075 (
unsigned long)comm_size - (
unsigned long)local_start_edge;
1078 int * dist_vertex_of_cell =
1079 xmalloc(num_local_cells * nv *
sizeof(*dist_vertex_of_cell));
1080 int * dist_edge_of_cell =
1081 xmalloc(num_local_cells * nv *
sizeof(*dist_edge_of_cell));
1083 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1084 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1086 for (
unsigned i = 0; i < read_num_local_cells; ++i)
1089 read_local_start_cell + i, ncells, comm_size)] += nv;
1091 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1093 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1094 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1095 int send_accum = 0, recv_accum = 0;
1096 for (
int i = 0; i < comm_size; ++i) {
1097 send_displ[i] = send_accum;
1098 recv_displ[i] = recv_accum;
1099 send_accum += send_count[i];
1100 recv_accum += recv_count[i];
1103 MPI_Alltoallv(read_dist_vertex_of_cell, send_count, send_displ, MPI_INT,
1104 dist_vertex_of_cell, recv_count, recv_displ, MPI_INT, comm);
1105 MPI_Alltoallv(read_dist_edge_of_cell, send_count, send_displ, MPI_INT,
1106 dist_edge_of_cell, recv_count, recv_displ, MPI_INT, comm);
1112 free(read_dist_vertex_of_cell);
1113 free(read_dist_edge_of_cell);
1117 double * vertex_lon =
xmalloc(num_local_vertices *
sizeof(*vertex_lon));
1118 double * vertex_lat =
xmalloc(num_local_vertices *
sizeof(*vertex_lat));
1120 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1121 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1123 for (
unsigned i = 0; i < read_num_local_vertices; ++i)
1126 read_local_start_vertex + i, nvertices, comm_size)]++;
1128 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1130 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1131 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1132 int send_accum = 0, recv_accum = 0;
1133 for (
int i = 0; i < comm_size; ++i) {
1134 send_displ[i] = send_accum;
1135 recv_displ[i] = recv_accum;
1136 send_accum += send_count[i];
1137 recv_accum += recv_count[i];
1140 MPI_Alltoallv(read_vertex_lon, send_count, send_displ, MPI_DOUBLE,
1141 vertex_lon, recv_count, recv_displ, MPI_DOUBLE, comm);
1142 MPI_Alltoallv(read_vertex_lat, send_count, send_displ, MPI_DOUBLE,
1143 vertex_lat, recv_count, recv_displ, MPI_DOUBLE, comm);
1149 free(read_vertex_lon);
1150 free(read_vertex_lat);
1154 int * dist_edge_vertices =
1155 xmalloc(num_local_edges * 2 *
sizeof(*dist_edge_vertices));
1157 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1158 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1160 for (
unsigned i = 0; i < read_num_local_edges; ++i)
1163 read_local_start_edge + i, nedges, comm_size)] += 2;
1165 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1167 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1168 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1169 int send_accum = 0, recv_accum = 0;
1170 for (
int i = 0; i < comm_size; ++i) {
1171 send_displ[i] = send_accum;
1172 recv_displ[i] = recv_accum;
1173 send_accum += send_count[i];
1174 recv_accum += recv_count[i];
1177 MPI_Alltoallv(read_dist_edge_vertices, send_count, send_displ, MPI_INT,
1178 dist_edge_vertices, recv_count, recv_displ, MPI_INT, comm);
1184 free(read_dist_edge_vertices);
1190 size_t num_core_vertices;
1196 size_t N = num_local_cells * nv;
1197 core_vertices =
xmalloc(N *
sizeof(*core_vertices));
1201 for (
size_t i = 0; i < N; ++i)
1202 core_vertices[i] = (
yac_int)dist_vertex_of_cell[i];
1204 for (
size_t i = 0; i < N; ++i) permutation[i] = i;
1207 yac_int prev_vertex_id = XT_INT_MAX;
1208 num_core_vertices = 0;
1209 for (
size_t i = 0; i < N; ++i) {
1210 yac_int curr_vertex_id = core_vertices[i];
1211 if (prev_vertex_id == curr_vertex_id) {
1215 core_vertices[num_core_vertices] = (prev_vertex_id = curr_vertex_id);
1216 ++num_core_vertices;
1219 permutation[i] /= nv;
1222 xrealloc(core_vertices, num_core_vertices *
sizeof(*core_vertices));
1226 free(dist_vertex_of_cell);
1231 size_t num_core_edges;
1235 size_t N = num_local_cells * nv;
1236 core_edges =
xmalloc(N *
sizeof(*core_edges));
1237 size_t * permutation =
xmalloc(N *
sizeof(*permutation));
1239 for (
size_t i = 0; i < N; ++i)
1240 core_edges[i] = (
yac_int)dist_edge_of_cell[i];
1241 for (
size_t i = 0; i < N; ++i) permutation[i] = i;
1244 yac_int prev_edge_id = XT_INT_MAX;
1246 for (
size_t i = 0; i < N; ++i) {
1247 yac_int curr_edge_id = core_edges[i];
1248 if (prev_edge_id != curr_edge_id)
1249 core_edges[num_core_edges++] = (prev_edge_id = curr_edge_id);
1251 permutation[i] /= nv;
1255 xrealloc(core_edges, num_core_edges *
sizeof(*core_edges));
1256 free(dist_edge_of_cell);
1263 double * local_vertex_lon =
1264 xmalloc(num_core_vertices *
sizeof(*local_vertex_lon));
1265 double * local_vertex_lat =
1266 xmalloc(num_core_vertices *
sizeof(*local_vertex_lat));
1267 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1268 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1270 for (
size_t i = 0; i < num_core_vertices; ++i)
1271 send_count[((
unsigned long)(core_vertices[i]) *
1272 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
1273 (
unsigned long)nvertices]++;
1275 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1277 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1278 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1279 int send_accum = 0, recv_accum = 0;
1280 for (
int i = 0; i < comm_size; ++i) {
1281 send_displ[i] = send_accum;
1282 recv_displ[i] = recv_accum;
1283 send_accum += send_count[i];
1284 recv_accum += recv_count[i];
1287 int num_all_local_vertices_remote = 0;
1288 for (
int i = 0; i < comm_size; ++i)
1289 num_all_local_vertices_remote += recv_count[i];
1291 yac_int * remote_vertex_buffer =
1292 xmalloc(num_all_local_vertices_remote *
sizeof(*remote_vertex_buffer));
1295 core_vertices, send_count, send_displ,
yac_int_dt,
1296 remote_vertex_buffer, recv_count, recv_displ,
yac_int_dt, comm);
1298 double * send_vertex_lon =
1299 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lon));
1300 double * send_vertex_lat =
1301 xmalloc(num_all_local_vertices_remote *
sizeof(*send_vertex_lat));
1303 for (
int i = 0, l = 0; i < comm_size; ++i) {
1304 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
1305 size_t idx = (size_t)(remote_vertex_buffer[l]) - local_start_vertex;
1306 send_vertex_lon[l] = vertex_lon[idx];
1307 send_vertex_lat[l] = vertex_lat[idx];
1311 free(remote_vertex_buffer);
1315 MPI_Alltoallv(send_vertex_lon, recv_count, recv_displ, MPI_DOUBLE,
1316 local_vertex_lon, send_count, send_displ, MPI_DOUBLE, comm);
1317 MPI_Alltoallv(send_vertex_lat, recv_count, recv_displ, MPI_DOUBLE,
1318 local_vertex_lat, send_count, send_displ, MPI_DOUBLE, comm);
1320 for (
unsigned i = 0; i < num_core_vertices; ++i)
1324 free(send_vertex_lat);
1325 free(send_vertex_lon);
1330 free(local_vertex_lon);
1331 free(local_vertex_lat);
1338 int * local_edge_to_vertex =
1339 xmalloc(2 * num_core_edges *
sizeof(*local_edge_to_vertex));
1340 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
1341 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
1343 for (
size_t i = 0; i < num_core_edges; ++i)
1344 send_count[((
unsigned long)(core_edges[i]) *
1345 (
unsigned long)comm_size + (
unsigned long)comm_size - 1) /
1346 (
unsigned long)nedges]++;
1348 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
1350 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
1351 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
1352 int send_accum = 0, recv_accum = 0;
1353 for (
int i = 0; i < comm_size; ++i) {
1354 send_displ[i] = send_accum;
1355 recv_displ[i] = recv_accum;
1356 send_accum += send_count[i];
1357 recv_accum += recv_count[i];
1360 int num_all_local_edges_remote = 0;
1361 for (
int i = 0; i < comm_size; ++i)
1362 num_all_local_edges_remote += recv_count[i];
1364 yac_int * remote_edge_buffer =
1365 xmalloc(num_all_local_edges_remote *
sizeof(*remote_edge_buffer));
1368 core_edges, send_count, send_displ,
yac_int_dt,
1369 remote_edge_buffer, recv_count, recv_displ,
yac_int_dt, comm);
1371 int * send_edge_vertices =
1372 xmalloc(2 * num_all_local_edges_remote *
sizeof(*send_edge_vertices));
1374 for (
int i = 0, l = 0; i < comm_size; ++i) {
1375 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
1376 size_t idx = (size_t)(remote_edge_buffer[l]) - local_start_edge;
1377 send_edge_vertices[2*l+0] = dist_edge_vertices[2*idx+0];
1378 send_edge_vertices[2*l+1] = dist_edge_vertices[2*idx+1];
1387 free(remote_edge_buffer);
1388 free(dist_edge_vertices);
1390 MPI_Alltoallv(send_edge_vertices, recv_count, recv_displ, MPI_INT,
1391 local_edge_to_vertex, send_count, send_displ, MPI_INT, comm);
1393 size_t * permutation =
xmalloc(2 * num_core_edges *
sizeof(*permutation));
1394 for (
size_t i = 0; i < 2 * num_core_edges; ++i) permutation[i] = i;
1397 local_edge_to_vertex, 2 * num_core_edges, permutation);
1399 for (
size_t i = 0, j = 0; i < 2 * num_core_edges; ++i) {
1401 while ((j < nvertices) && (core_vertices[j] < curr_vertex)) ++j;
1403 (j < nvertices) && (core_vertices[j] == curr_vertex),
1404 "ERROR(yac_read_icon_basic_grid_data_parallel): vertex id missmatch")
1409 free(send_edge_vertices);
1414 free(local_edge_to_vertex);
1419 for (
size_t i = 0; i < num_local_cells; ++i)