49 size_t * num_elem,
size_t * num_nodes,
50 int ** num_nodes_per_elem,
int ** num_elem_per_node,
51 size_t ** elem_to_node,
size_t ** node_to_elem) {
53 int comm_rank, comm_size;
55 MPI_Comm_rank(comm, &comm_rank);
56 MPI_Comm_size(comm, &comm_size);
58 int local_is_io, * io_ranks, num_io_ranks;
61 size_t num_global_nodes, num_global_elem, num_nod_per_elem;
63 size_t read_local_start_elem = 0;
64 size_t read_num_local_elems = 0;
65 size_t read_local_start_node = 0;
66 size_t read_num_local_nodes = 0;
69 int * read_dist_elem_to_node = NULL;
73 unsigned long io_proc_idx = ULONG_MAX;
74 for (
int i = 0; (i < num_io_ranks) && (io_proc_idx == ULONG_MAX); ++i)
75 if (io_ranks[i] == comm_rank)
76 io_proc_idx = (
unsigned long)i;
84 size_t num_el_blk, num_el_in_blk1;
98 "ERROR(yac_read_exodus_grid_information_parallel): "
99 "reader currently only supports a single block of elements");
101 num_global_elem == num_el_in_blk1,
102 "ERROR(yac_read_exodus_grid_information_parallel): "
103 "total number of elements and number of elements in first block "
104 "of elements do not match");
107 read_local_start_elem =
108 ((
unsigned long)num_global_elem * io_proc_idx) / (
unsigned long)num_io_ranks;
109 read_num_local_elems =
110 ((
unsigned long)num_global_elem * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
111 (
unsigned long)read_local_start_elem;
112 read_local_start_node =
113 ((
unsigned long)num_global_nodes * io_proc_idx) / (
unsigned long)num_io_ranks;
114 read_num_local_nodes =
115 ((
unsigned long)num_global_nodes * (io_proc_idx+1)) / (
unsigned long)num_io_ranks -
116 (
unsigned long)read_local_start_node;
119 double * read_node_coord_x =
120 xmalloc(read_num_local_nodes *
sizeof(*read_node_coord_x));
121 double * read_node_coord_y =
122 xmalloc(read_num_local_nodes *
sizeof(*read_node_coord_y));
123 double * read_node_coord_z =
124 xmalloc(read_num_local_nodes *
sizeof(*read_node_coord_z));;
129 ncid, varid, (
size_t[]){0, read_local_start_node},
130 (
size_t[]){1,read_num_local_nodes}, read_node_coord_x));
133 ncid, varid, (
size_t[]){1, read_local_start_node},
134 (
size_t[]){1,read_num_local_nodes}, read_node_coord_y));
137 ncid, varid, (
size_t[]){2, read_local_start_node},
138 (
size_t[]){1,read_num_local_nodes}, read_node_coord_z));
140 xmalloc(read_num_local_nodes *
sizeof(*read_node_coords));
141 for (
size_t i = 0; i < read_num_local_nodes; ++i) {
142 read_node_coords[i][0] = read_node_coord_x[i];
143 read_node_coords[i][1] = read_node_coord_y[i];
144 read_node_coords[i][2] = read_node_coord_z[i];
146 free(read_node_coord_z);
147 free(read_node_coord_y);
148 free(read_node_coord_x);
150 read_dist_elem_to_node =
152 read_num_local_elems * num_nod_per_elem *
153 sizeof(*read_dist_elem_to_node));
157 ncid, varid, (
size_t[]){read_local_start_elem, 0},
158 (
size_t[]){read_num_local_elems, num_nod_per_elem},
159 read_dist_elem_to_node));
160 for (
size_t i = 0; i < read_num_local_elems * num_nod_per_elem; ++i)
161 read_dist_elem_to_node[i]--;
166 read_node_coords =
xmalloc(1 *
sizeof(*read_node_coords));
167 read_dist_elem_to_node =
xmalloc(1 *
sizeof(*read_dist_elem_to_node));
174 if (comm_rank == 0) tmp = num_global_nodes;
176 num_global_nodes = tmp;
177 if (comm_rank == 0) tmp = num_global_elem;
179 num_global_elem = tmp;
180 if (comm_rank == 0) tmp = num_nod_per_elem;
182 num_nod_per_elem = tmp;
186 size_t local_start_elem =
187 ((
unsigned long)num_global_elem * (
unsigned long)comm_rank) /
188 (
unsigned long)comm_size;
189 size_t num_local_elems =
190 ((
unsigned long)num_global_elem * ((
unsigned long)comm_rank+1)) /
191 (
unsigned long)comm_size - (
unsigned long)local_start_elem;
192 size_t local_start_node =
193 ((
unsigned long)num_global_nodes * (
unsigned long)comm_rank) /
194 (
unsigned long)comm_size;
195 size_t num_local_nodes =
196 ((
unsigned long)num_global_nodes * ((
unsigned long)comm_rank+1)) /
197 (
unsigned long)comm_size - (
unsigned long)local_start_node;
200 int * dist_elem_to_node =
201 xmalloc(num_local_elems * num_nod_per_elem *
sizeof(*dist_elem_to_node));
203 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
204 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
206 for (
size_t i = 0; i < read_num_local_elems; ++i)
209 read_local_start_elem + i, num_global_elem, comm_size)] +=
212 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
214 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
215 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
216 int send_accum = 0, recv_accum = 0;
217 for (
int i = 0; i < comm_size; ++i) {
218 send_displ[i] = send_accum;
219 recv_displ[i] = recv_accum;
220 send_accum += send_count[i];
221 recv_accum += recv_count[i];
224 MPI_Alltoallv(read_dist_elem_to_node, send_count, send_displ, MPI_INT,
225 dist_elem_to_node, recv_count, recv_displ, MPI_INT, comm);
231 free(read_dist_elem_to_node);
236 xmalloc(num_local_nodes *
sizeof(*dist_node_coords));
238 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
239 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
241 for (
size_t i = 0; i < read_num_local_nodes; ++i)
244 read_local_start_node + i, num_global_nodes, comm_size)] += 3;
246 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
248 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
249 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
250 int send_accum = 0, recv_accum = 0;
251 for (
int i = 0; i < comm_size; ++i) {
252 send_displ[i] = send_accum;
253 recv_displ[i] = recv_accum;
254 send_accum += send_count[i];
255 recv_accum += recv_count[i];
258 MPI_Alltoallv(read_node_coords, send_count, send_displ, MPI_DOUBLE,
259 dist_node_coords, recv_count, recv_displ, MPI_DOUBLE, comm);
265 free(read_node_coords);
270 size_t num_core_nodes;
272 size_t N = num_local_elems * num_nod_per_elem;
273 *node_ids =
xmalloc(
N *
sizeof(**node_ids));
274 *num_elem_per_node =
xmalloc(
N *
sizeof(**num_elem_per_node));
275 *elem_to_node =
xmalloc(
N *
sizeof(**elem_to_node));
276 *node_to_elem =
xmalloc(
N *
sizeof(**node_to_elem));
277 for (
size_t i = 0; i <
N; ++i)
278 (*node_ids)[i] = (
yac_int)dist_elem_to_node[i];
279 size_t * permutation = *node_to_elem;
280 for (
size_t i = 0; i <
N; ++i) permutation[i] = i;
283 yac_int prev_node_id = YAC_INT_MAX;
285 for (
size_t i = 0; i <
N; ++i) {
286 yac_int curr_node_id = (*node_ids)[i];
287 if (prev_node_id == curr_node_id) {
288 (*num_elem_per_node)[num_core_nodes-1]++;
290 (*num_elem_per_node)[num_core_nodes] = 1;
291 (*node_ids)[num_core_nodes] = (prev_node_id = curr_node_id);
294 (*elem_to_node)[permutation[i]] = num_core_nodes-1;
295 permutation[i] /= num_nod_per_elem;
298 xrealloc(*node_ids, num_core_nodes *
sizeof(**node_ids));
301 num_core_nodes *
sizeof(**num_elem_per_node));
302 free(dist_elem_to_node);
307 *node_coords =
xmalloc(num_core_nodes *
sizeof(**node_coords));
308 int * send_count =
xcalloc(comm_size,
sizeof(*send_count));
309 int * recv_count =
xcalloc(comm_size,
sizeof(*recv_count));
311 for (
size_t i = 0; i < num_core_nodes; ++i)
314 (*node_ids)[i], num_global_nodes, comm_size)]++;
316 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
318 int * send_displ =
xmalloc(comm_size *
sizeof(*send_displ));
319 int * recv_displ =
xmalloc(comm_size *
sizeof(*recv_displ));
320 int send_accum = 0, recv_accum = 0;
321 for (
int i = 0; i < comm_size; ++i) {
322 send_displ[i] = send_accum;
323 recv_displ[i] = recv_accum;
324 send_accum += send_count[i];
325 recv_accum += recv_count[i];
328 int num_all_local_nodes_remote = 0;
329 for (
int i = 0; i < comm_size; ++i)
330 num_all_local_nodes_remote += recv_count[i];
333 xmalloc(num_all_local_nodes_remote *
sizeof(*remote_node_buffer));
336 *node_ids, send_count, send_displ,
yac_int_dt,
337 remote_node_buffer, recv_count, recv_displ,
yac_int_dt, comm);
340 xmalloc(num_all_local_nodes_remote *
sizeof(*send_node_coords));
342 for (
int i = 0, l = 0; i < comm_size; ++i) {
343 for (
int j = 0; j < recv_count[i]; ++j, ++l) {
344 size_t idx = (size_t)(remote_node_buffer[l]) - local_start_node;
345 send_node_coords[l][0] = dist_node_coords[idx][0];
346 send_node_coords[l][1] = dist_node_coords[idx][1];
347 send_node_coords[l][2] = dist_node_coords[idx][2];
355 free(remote_node_buffer);
356 free(dist_node_coords);
358 MPI_Alltoallv(send_node_coords, recv_count, recv_displ, MPI_DOUBLE,
359 *node_coords, send_count, send_displ, MPI_DOUBLE, comm);
361 free(send_node_coords);
369 *elem_ids =
xmalloc(num_local_elems *
sizeof(**elem_ids));
370 for (
size_t i = 0; i < num_local_elems; ++i)
371 (*elem_ids)[i] = (
yac_int)(local_start_elem + i);
374 *num_nodes_per_elem =
375 xmalloc(num_local_elems *
sizeof(**num_nodes_per_elem));
376 for (
size_t i = 0; i < num_local_elems; ++i)
377 (*num_nodes_per_elem)[i] = num_nod_per_elem;
379 *num_elem = num_local_elems;
380 *num_nodes = num_core_nodes;
421 const char * filename,
int use_ll_edges, MPI_Comm comm) {
428 int * num_nodes_per_elem;
429 int * num_elem_per_node;
430 size_t * elem_to_node;
431 size_t * node_to_elem;
434 filename, comm, &node_coords, &elem_ids, &node_ids, &num_elem, &num_nodes,
435 &num_nodes_per_elem, &num_elem_per_node, &elem_to_node, &node_to_elem);
438 size_t * elem_to_edge;
445 size_t max_num_edges = 0;
446 for (
size_t i = 0; i < num_elem; ++i)
447 max_num_edges += num_nodes_per_elem[i];
451 xmalloc(max_num_edges *
sizeof(*temp_edges));
452 for (
size_t i = 0, offset = 0, k = 0; i < num_elem; ++i) {
453 size_t * curr_elem_to_node = elem_to_node + offset;
454 size_t curr_num_edges = num_nodes_per_elem[i];
455 offset += curr_num_edges;
456 for (
size_t j = 0; j < curr_num_edges; ++j, ++k) {
458 curr_elem_to_node[j] > curr_elem_to_node[(j+1)%curr_num_edges];
459 temp_edges[k].
node[order] = curr_elem_to_node[j];
460 temp_edges[k].
node[order^1] = curr_elem_to_node[(j+1)%curr_num_edges];
464 qsort(temp_edges, max_num_edges,
468 elem_to_edge =
xmalloc(max_num_edges *
sizeof(*elem_to_edge));
471 for (
size_t i = 0, prev_indices[2] = {SIZE_MAX, SIZE_MAX};
472 i < max_num_edges; ++i) {
475 if ((prev_indices[0] != temp_edges[i].
node[0]) ||
476 (prev_indices[1] != temp_edges[i].
node[1])) {
478 prev_indices[0] = temp_edges[i].
node[0];
479 prev_indices[1] = temp_edges[i].
node[1];
480 edge_to_node[num_edges][0] = prev_indices[0];
481 edge_to_node[num_edges][1] = prev_indices[1];
485 elem_to_edge[curr_elem_to_edge_idx] = num_edges - 1;
488 xrealloc(edge_to_node, num_edges *
sizeof(*edge_to_node));
490 edge_type =
xmalloc(num_edges *
sizeof(*edge_type));
492 for (
size_t i = 0; i < num_edges; ++i) {
493 double * edge_vertex_a = node_coords[edge_to_node[i][0]];
494 double * edge_vertex_b = node_coords[edge_to_node[i][1]];
495 int vertex_a_is_pole =
check_pole(edge_vertex_a);
496 int vertex_b_is_pole =
check_pole(edge_vertex_b);
498 (vertex_a_is_pole ^ vertex_b_is_pole) ||
499 (!vertex_a_is_pole && !vertex_b_is_pole &&
502 (vertex_a_is_pole && vertex_b_is_pole) ||
505 is_lon_edge || is_lat_edge,
506 "ERROR(yac_read_exodus_basic_grid_data_parallel): "
507 "\"use_ll_edges == true\" but edge is neither lon nor lat "
508 "((%lf,%lf,%lf),(%lf,%lf,%lf))",
509 edge_vertex_a[0], edge_vertex_a[1], edge_vertex_a[2],
510 edge_vertex_b[0], edge_vertex_b[1], edge_vertex_b[2]);
514 for (
size_t i = 0; i < num_edges; ++i)
520 grid_data.vertex_coordinates = node_coords;
531 grid_data.num_cells_per_vertex = num_elem_per_node;
541 grid_data.num_total_vertices = num_nodes;