YetAnotherCoupler 3.2.0_a
Loading...
Searching...
No Matches
duplicate_stencils.c
Go to the documentation of this file.
1// Copyright (c) 2024 The YAC Authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#include <string.h>
6#include <mpi.h>
7
9#include "basic_grid.h"
10#include "utils_common.h"
11#include "yac_mpi_internal.h"
12
19
20struct stencil_info {
22 size_t idx;
23 int rank;
24};
25
26static MPI_Datatype get_dup_info_mpi_datatype(MPI_Comm comm) {
27
28 struct dup_info dummy;
29 MPI_Datatype dup_info_dt;
30 int array_of_blocklengths[] = {1, 1, 1, 1};
31 const MPI_Aint array_of_displacements[] =
32 {(MPI_Aint)(intptr_t)(const void *)&(dummy.orig_global_id) -
33 (MPI_Aint)(intptr_t)(const void *)&dummy,
34 (MPI_Aint)(intptr_t)(const void *)&(dummy.duplicated_global_id) -
35 (MPI_Aint)(intptr_t)(const void *)&dummy,
36 (MPI_Aint)(intptr_t)(const void *)&(dummy.duplicated_orig_pos) -
37 (MPI_Aint)(intptr_t)(const void *)&dummy,
38 (MPI_Aint)(intptr_t)(const void *)&(dummy.duplicated_rank) -
39 (MPI_Aint)(intptr_t)(const void *)&dummy};
40 const MPI_Datatype array_of_types[] =
43 MPI_Type_create_struct(4, array_of_blocklengths, array_of_displacements,
44 array_of_types, &dup_info_dt), comm);
45 return yac_create_resized(dup_info_dt, sizeof(dummy), comm);
46}
47
48static MPI_Datatype get_stencil_info_mpi_datatype(MPI_Comm comm) {
49
50 struct stencil_info dummy;
51 MPI_Datatype stencil_info_dt;
52 int array_of_blocklengths[] = {1, 1, 1};
53 const MPI_Aint array_of_displacements[] =
54 {(MPI_Aint)(intptr_t)(const void *)&(dummy.tgt_global_id) -
55 (MPI_Aint)(intptr_t)(const void *)&dummy,
56 (MPI_Aint)(intptr_t)(const void *)&(dummy.idx) -
57 (MPI_Aint)(intptr_t)(const void *)&dummy,
58 (MPI_Aint)(intptr_t)(const void *)&(dummy.rank) -
59 (MPI_Aint)(intptr_t)(const void *)&dummy};
60 const MPI_Datatype array_of_types[] =
61 {yac_int_dt, YAC_MPI_SIZE_T, MPI_INT};
63 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
64 array_of_types, &stencil_info_dt), comm);
65 return yac_create_resized(stencil_info_dt, sizeof(dummy), comm);
66}
67
69 const void * a, const void * b) {
70
71 return (((const struct dup_info *)a)->orig_global_id >
72 ((const struct dup_info *)b)->orig_global_id) -
73 (((const struct dup_info *)a)->orig_global_id <
74 ((const struct dup_info *)b)->orig_global_id);
75}
76
78 const void * a, const void * b) {
79
80 return (((const struct stencil_info *)a)->tgt_global_id >
81 ((const struct stencil_info *)b)->tgt_global_id) -
82 (((const struct stencil_info *)a)->tgt_global_id <
83 ((const struct stencil_info *)b)->tgt_global_id);
84}
85
86static inline int compute_bucket(yac_int value, int comm_size) {
87 return (int)(value / 128) % comm_size;
88}
89
91 struct yac_interp_weights * weights, struct yac_basic_grid * tgt_grid,
92 yac_int * tgt_orig_global_id, size_t * tgt_duplicated_idx,
93 size_t nbr_duplicated, enum yac_location location) {
94
96 location == YAC_LOC_CELL,
97 "ERROR(yac_duplicate_stencils): only supported for cells");
98
99 yac_int * duplicated_global_ids =
100 xmalloc(nbr_duplicated * sizeof(*duplicated_global_ids));
101
102 struct yac_basic_grid_data * grid_data =
103 yac_basic_grid_get_data(tgt_grid);
104 yac_int const * cell_ids = grid_data->cell_ids;
105
106 // get global ids of all duplicated target cells
107 for (size_t i = 0; i < nbr_duplicated; ++i)
108 duplicated_global_ids[i] = cell_ids[tgt_duplicated_idx[i]];
109
110 // get target global ids of all local stencils
111 size_t stencil_count = yac_interp_weights_get_interp_count(weights);
112 yac_int * stencil_tgt_ids =
114
115 MPI_Comm comm = yac_interp_weights_get_comm(weights);
116 int comm_rank, comm_size;
117 yac_mpi_call(MPI_Comm_rank(comm, &comm_rank), comm);
118 yac_mpi_call(MPI_Comm_size(comm, &comm_size), comm);
119
120 int * int_buffer =
121 xmalloc((nbr_duplicated + stencil_count) * sizeof(*int_buffer));
122 int * dup_dist_ranks = int_buffer;
123 int * stencil_dist_ranks = int_buffer + nbr_duplicated;
124
125 // determine the bucket ranks
126 for (size_t i = 0; i < nbr_duplicated; ++i)
127 dup_dist_ranks[i] = compute_bucket(tgt_orig_global_id[i], comm_size);
128 for (size_t i = 0; i < stencil_count; ++i)
129 stencil_dist_ranks[i] = compute_bucket(stencil_tgt_ids[i], comm_size);
130
131 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
133 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
134
135 // set up counts and displs for redistribution of information about
136 // duplicated points
137 for (size_t i = 0; i < nbr_duplicated; ++i)
138 sendcounts[dup_dist_ranks[i]]++;
140 1, sendcounts, recvcounts, sdispls, rdispls, comm);
141
142 size_t dup_info_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
143 struct dup_info * dup_info_buffer =
144 xmalloc((nbr_duplicated + dup_info_count) * sizeof(*dup_info_buffer));
145 struct dup_info * dup_send_buffer = dup_info_buffer + dup_info_count;
146 struct dup_info * dup_recv_buffer = dup_info_buffer;
147
148 // pack information about duplicated points
149 for (size_t i = 0; i < nbr_duplicated; ++i) {
150 size_t pos = sdispls[dup_dist_ranks[i]+1]++;
151 dup_send_buffer[pos].orig_global_id = tgt_orig_global_id[i];
152 dup_send_buffer[pos].duplicated_global_id = duplicated_global_ids[i];
153 dup_send_buffer[pos].duplicated_orig_pos = tgt_duplicated_idx[i];
154 dup_send_buffer[pos].duplicated_rank = comm_rank;
155 }
156
157 // redistribute information about duplicated points
158 MPI_Datatype dup_info_dt = get_dup_info_mpi_datatype(comm);
159 yac_mpi_call(MPI_Type_commit(&dup_info_dt), comm);
161 dup_send_buffer, sendcounts, sdispls,
162 dup_recv_buffer, recvcounts, rdispls,
163 sizeof(*dup_send_buffer), dup_info_dt, comm);
164 yac_mpi_call(MPI_Type_free(&dup_info_dt), comm);
165 dup_recv_buffer =
166 xrealloc(dup_recv_buffer, dup_info_count * sizeof(*dup_recv_buffer));
167 qsort(
168 dup_recv_buffer, dup_info_count, sizeof(*dup_recv_buffer),
170
171 // set up counts and displs for redistribution of stencil information
172 memset(sendcounts, 0, (size_t)comm_size * sizeof(*sendcounts));
173 for (size_t i = 0; i < stencil_count; ++i)
174 sendcounts[stencil_dist_ranks[i]]++;
176 1, sendcounts, recvcounts, sdispls, rdispls, comm);
177
178 size_t stencil_info_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
179 struct stencil_info * stencil_info_buffer =
180 xmalloc((stencil_count + stencil_info_count) * sizeof(*stencil_info_buffer));
181 struct stencil_info * stencil_send_buffer =
182 stencil_info_buffer + stencil_info_count;
183 struct stencil_info * stencil_recv_buffer = stencil_info_buffer;
184
185 // pack stencil information
186 for (size_t i = 0; i < stencil_count; ++i) {
187 size_t pos = sdispls[stencil_dist_ranks[i]+1]++;
188 stencil_send_buffer[pos].tgt_global_id = stencil_tgt_ids[i];
189 stencil_send_buffer[pos].idx = i;
190 stencil_send_buffer[pos].rank = comm_rank;
191 }
192 free(stencil_tgt_ids);
193
194 // redistribute stencil information
195 MPI_Datatype stencil_info_dt = get_stencil_info_mpi_datatype(comm);
196 yac_mpi_call(MPI_Type_commit(&stencil_info_dt), comm);
198 stencil_send_buffer, sendcounts, sdispls,
199 stencil_recv_buffer, recvcounts, rdispls,
200 sizeof(*stencil_send_buffer), stencil_info_dt, comm);
201 yac_mpi_call(MPI_Type_free(&stencil_info_dt), comm);
202 stencil_recv_buffer =
203 xrealloc(
204 stencil_recv_buffer, stencil_info_count * sizeof(*stencil_recv_buffer));
205 qsort(
206 stencil_recv_buffer, stencil_info_count, sizeof(*stencil_recv_buffer),
208
209 // count number of duplicated targets for which at least one stencil exists
210 // and the number of stencils that match with a duplicated points
211 size_t dup_match_count = 0;
212 size_t stencil_match_count = 0;
213 for (size_t i = 0, j = 0; i < dup_info_count; ++i) {
214 yac_int curr_orig_global_id = dup_recv_buffer[i].orig_global_id;
215 while ((j < stencil_info_count) &&
216 (stencil_recv_buffer[j].tgt_global_id < curr_orig_global_id)) ++j;
217 if ((j < stencil_info_count) &&
218 (stencil_recv_buffer[j].tgt_global_id == curr_orig_global_id))
219 ++dup_match_count;
220 size_t k = j;
221 while ((k < stencil_info_count) &&
222 (stencil_recv_buffer[k].tgt_global_id == curr_orig_global_id)) {
223 ++k;
224 ++stencil_match_count;
225 }
226 }
227
228 struct remote_points dup_remote_points;
229 dup_remote_points.data =
230 xmalloc(dup_match_count * sizeof(*(dup_remote_points.data)));
231 dup_remote_points.count = dup_match_count;
232 size_t * num_stencils_per_tgt =
233 xmalloc(stencil_match_count * sizeof(*num_stencils_per_tgt));
234 size_t * stencil_indices =
235 xmalloc(stencil_match_count * sizeof(*stencil_indices));
236 int * stencil_ranks =
237 xmalloc(stencil_match_count * sizeof(*stencil_ranks));
238 double * w = xmalloc(stencil_match_count * sizeof(*w));
239
240 // match duplicated targets with stencils
241 dup_match_count = 0;
242 stencil_match_count = 0;
243 for (size_t i = 0, j = 0; i < dup_info_count; ++i) {
244 yac_int curr_orig_global_id = dup_recv_buffer[i].orig_global_id;
245
246 // search for a matching stencil
247 while ((j < stencil_info_count) &&
248 (stencil_recv_buffer[j].tgt_global_id < curr_orig_global_id)) ++j;
249
250 // if we found a matching stencil
251 if ((j < stencil_info_count) &&
252 (stencil_recv_buffer[j].tgt_global_id == curr_orig_global_id)) {
253
254 size_t curr_num_stencils_per_tgt = 0;
255
256 // for the current duplicated point -> get all matching stencils
257 size_t k = j;
258 while ((k < stencil_info_count) &&
259 (stencil_recv_buffer[k].tgt_global_id == curr_orig_global_id)) {
260 stencil_indices[stencil_match_count] = stencil_recv_buffer[k].idx;
261 stencil_ranks[stencil_match_count] = stencil_recv_buffer[k].rank;
262 w[stencil_match_count] = 1.0;
263 ++k;
264 ++stencil_match_count;
265 ++curr_num_stencils_per_tgt;
266 }
267
268 dup_remote_points.data[dup_match_count].global_id =
269 dup_recv_buffer[i].duplicated_global_id;
270 dup_remote_points.data[dup_match_count].data.count = 1;
271 dup_remote_points.data[dup_match_count].data.data.single.rank =
272 dup_recv_buffer[i].duplicated_rank;
273 dup_remote_points.data[dup_match_count].data.data.single.orig_pos =
274 (uint64_t)(dup_recv_buffer[i].duplicated_orig_pos);
275 num_stencils_per_tgt[dup_match_count] = curr_num_stencils_per_tgt;
276 ++dup_match_count;
277 }
278 }
279
280 // duplicate stencils
282 weights, &dup_remote_points, num_stencils_per_tgt,
283 stencil_indices, stencil_ranks, w);
284
285 // clean up
286 free(w);
287 free(stencil_ranks);
288 free(stencil_indices);
289 free(num_stencils_per_tgt);
290 free(stencil_recv_buffer);
291 free(dup_remote_points.data);
292 free(dup_recv_buffer);
293 yac_free_comm_buffers(sendcounts, recvcounts, sdispls, rdispls);
294 free(int_buffer);
295 free(duplicated_global_ids);
296}
297
299 struct yac_interp_weights * weights, struct yac_basic_grid * tgt_grid,
300 yac_int * tgt_orig_global_id, size_t * tgt_duplicated_idx,
301 size_t nbr_duplicated, int location) {
302
304 weights, tgt_grid, tgt_orig_global_id, tgt_duplicated_idx,
305 nbr_duplicated, yac_get_location(location));
306}
struct yac_basic_grid_data * yac_basic_grid_get_data(struct yac_basic_grid *grid)
Definition basic_grid.c:132
static MPI_Datatype get_stencil_info_mpi_datatype(MPI_Comm comm)
static MPI_Datatype get_dup_info_mpi_datatype(MPI_Comm comm)
static int compute_bucket(yac_int value, int comm_size)
static int compare_stencil_info_tgt_global_id(const void *a, const void *b)
void yac_duplicate_stencils(struct yac_interp_weights *weights, struct yac_basic_grid *tgt_grid, yac_int *tgt_orig_global_id, size_t *tgt_duplicated_idx, size_t nbr_duplicated, enum yac_location location)
void yac_duplicate_stencils_f2c(struct yac_interp_weights *weights, struct yac_basic_grid *tgt_grid, yac_int *tgt_orig_global_id, size_t *tgt_duplicated_idx, size_t nbr_duplicated, int location)
static int compare_dup_info_orig_global_ids(const void *a, const void *b)
struct @8::@9 value
void yac_interp_weights_wcopy_weights(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_stencils_per_tgt, size_t *stencil_indices, int *stencil_ranks, double *w)
yac_int * yac_interp_weights_get_interp_tgt(struct yac_interp_weights *weights)
MPI_Comm yac_interp_weights_get_comm(struct yac_interp_weights *weights)
size_t yac_interp_weights_get_interp_count(struct yac_interp_weights *weights)
enum yac_location yac_get_location(int const location)
Definition location.c:44
yac_location
Definition location.h:12
@ YAC_LOC_CELL
Definition location.h:14
#define xrealloc(ptr, size)
Definition ppm_xfuncs.h:67
#define xmalloc(size)
Definition ppm_xfuncs.h:66
size_t duplicated_orig_pos
yac_int duplicated_global_id
yac_int orig_global_id
struct remote_point_info single
union remote_point_infos::@1 data
struct remote_point_infos data
struct remote_point * data
#define YAC_ASSERT(exp, msg)
Definition yac_assert.h:15
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
Definition yac_mpi.c:569
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
Definition yac_mpi.c:624
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
Definition yac_mpi.c:593
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
Definition yac_mpi.c:548
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm)
Definition yac_mpi.c:129
#define yac_mpi_call(call, comm)
#define YAC_MPI_SIZE_T
Xt_int yac_int
Definition yac_types.h:15
#define yac_int_dt
Definition yac_types.h:16