92 yac_int * tgt_orig_global_id,
size_t * tgt_duplicated_idx,
97 "ERROR(yac_duplicate_stencils): only supported for cells");
99 yac_int * duplicated_global_ids =
100 xmalloc(nbr_duplicated *
sizeof(*duplicated_global_ids));
107 for (
size_t i = 0; i < nbr_duplicated; ++i)
108 duplicated_global_ids[i] =
cell_ids[tgt_duplicated_idx[i]];
116 int comm_rank, comm_size;
121 xmalloc((nbr_duplicated + stencil_count) *
sizeof(*int_buffer));
122 int * dup_dist_ranks = int_buffer;
123 int * stencil_dist_ranks = int_buffer + nbr_duplicated;
126 for (
size_t i = 0; i < nbr_duplicated; ++i)
127 dup_dist_ranks[i] =
compute_bucket(tgt_orig_global_id[i], comm_size);
128 for (
size_t i = 0; i < stencil_count; ++i)
129 stencil_dist_ranks[i] =
compute_bucket(stencil_tgt_ids[i], comm_size);
131 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
133 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
137 for (
size_t i = 0; i < nbr_duplicated; ++i)
138 sendcounts[dup_dist_ranks[i]]++;
140 1, sendcounts, recvcounts, sdispls, rdispls, comm);
142 size_t dup_info_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
144 xmalloc((nbr_duplicated + dup_info_count) *
sizeof(*dup_info_buffer));
145 struct dup_info * dup_send_buffer = dup_info_buffer + dup_info_count;
146 struct dup_info * dup_recv_buffer = dup_info_buffer;
149 for (
size_t i = 0; i < nbr_duplicated; ++i) {
150 size_t pos = sdispls[dup_dist_ranks[i]+1]++;
161 dup_send_buffer, sendcounts, sdispls,
162 dup_recv_buffer, recvcounts, rdispls,
163 sizeof(*dup_send_buffer), dup_info_dt, comm);
166 xrealloc(dup_recv_buffer, dup_info_count *
sizeof(*dup_recv_buffer));
168 dup_recv_buffer, dup_info_count,
sizeof(*dup_recv_buffer),
172 memset(sendcounts, 0, (
size_t)comm_size *
sizeof(*sendcounts));
173 for (
size_t i = 0; i < stencil_count; ++i)
174 sendcounts[stencil_dist_ranks[i]]++;
176 1, sendcounts, recvcounts, sdispls, rdispls, comm);
178 size_t stencil_info_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
180 xmalloc((stencil_count + stencil_info_count) *
sizeof(*stencil_info_buffer));
182 stencil_info_buffer + stencil_info_count;
183 struct stencil_info * stencil_recv_buffer = stencil_info_buffer;
186 for (
size_t i = 0; i < stencil_count; ++i) {
187 size_t pos = sdispls[stencil_dist_ranks[i]+1]++;
189 stencil_send_buffer[pos].
idx = i;
190 stencil_send_buffer[pos].
rank = comm_rank;
192 free(stencil_tgt_ids);
198 stencil_send_buffer, sendcounts, sdispls,
199 stencil_recv_buffer, recvcounts, rdispls,
200 sizeof(*stencil_send_buffer), stencil_info_dt, comm);
202 stencil_recv_buffer =
204 stencil_recv_buffer, stencil_info_count *
sizeof(*stencil_recv_buffer));
206 stencil_recv_buffer, stencil_info_count,
sizeof(*stencil_recv_buffer),
211 size_t dup_match_count = 0;
212 size_t stencil_match_count = 0;
213 for (
size_t i = 0, j = 0; i < dup_info_count; ++i) {
215 while ((j < stencil_info_count) &&
216 (stencil_recv_buffer[j].
tgt_global_id < curr_orig_global_id)) ++j;
217 if ((j < stencil_info_count) &&
218 (stencil_recv_buffer[j].
tgt_global_id == curr_orig_global_id))
221 while ((k < stencil_info_count) &&
222 (stencil_recv_buffer[k].
tgt_global_id == curr_orig_global_id)) {
224 ++stencil_match_count;
229 dup_remote_points.
data =
230 xmalloc(dup_match_count *
sizeof(*(dup_remote_points.
data)));
231 dup_remote_points.
count = dup_match_count;
232 size_t * num_stencils_per_tgt =
233 xmalloc(stencil_match_count *
sizeof(*num_stencils_per_tgt));
234 size_t * stencil_indices =
235 xmalloc(stencil_match_count *
sizeof(*stencil_indices));
236 int * stencil_ranks =
237 xmalloc(stencil_match_count *
sizeof(*stencil_ranks));
238 double * w =
xmalloc(stencil_match_count *
sizeof(*w));
242 stencil_match_count = 0;
243 for (
size_t i = 0, j = 0; i < dup_info_count; ++i) {
247 while ((j < stencil_info_count) &&
248 (stencil_recv_buffer[j].tgt_global_id < curr_orig_global_id)) ++j;
251 if ((j < stencil_info_count) &&
252 (stencil_recv_buffer[j].tgt_global_id == curr_orig_global_id)) {
254 size_t curr_num_stencils_per_tgt = 0;
258 while ((k < stencil_info_count) &&
259 (stencil_recv_buffer[k].tgt_global_id == curr_orig_global_id)) {
260 stencil_indices[stencil_match_count] = stencil_recv_buffer[k].
idx;
261 stencil_ranks[stencil_match_count] = stencil_recv_buffer[k].
rank;
262 w[stencil_match_count] = 1.0;
264 ++stencil_match_count;
265 ++curr_num_stencils_per_tgt;
274 (uint64_t)(dup_recv_buffer[i].duplicated_orig_pos);
275 num_stencils_per_tgt[dup_match_count] = curr_num_stencils_per_tgt;
282 weights, &dup_remote_points, num_stencils_per_tgt,
283 stencil_indices, stencil_ranks, w);
288 free(stencil_indices);
289 free(num_stencils_per_tgt);
290 free(stencil_recv_buffer);
291 free(dup_remote_points.
data);
292 free(dup_recv_buffer);
295 free(duplicated_global_ids);