92 yac_int * tgt_orig_global_id,
size_t * tgt_duplicated_idx,
95 char const * routine =
"yac_duplicate_stencils";
97 yac_int * duplicated_global_ids =
98 xmalloc(nbr_duplicated *
sizeof(*duplicated_global_ids));
106 "ERROR(%s): invalid location", routine)
123 for (
size_t i = 0; i < nbr_duplicated; ++i)
124 duplicated_global_ids[i] = point_ids[tgt_duplicated_idx[i]];
132 int comm_rank, comm_size;
137 xmalloc((nbr_duplicated + stencil_count) *
sizeof(*int_buffer));
138 int * dup_dist_ranks = int_buffer;
139 int * stencil_dist_ranks = int_buffer + nbr_duplicated;
142 for (
size_t i = 0; i < nbr_duplicated; ++i)
143 dup_dist_ranks[i] =
compute_bucket(tgt_orig_global_id[i], comm_size);
144 for (
size_t i = 0; i < stencil_count; ++i)
145 stencil_dist_ranks[i] =
compute_bucket(stencil_tgt_ids[i], comm_size);
147 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
149 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
153 for (
size_t i = 0; i < nbr_duplicated; ++i)
154 sendcounts[dup_dist_ranks[i]]++;
156 1, sendcounts, recvcounts, sdispls, rdispls, comm);
158 size_t dup_info_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
160 xmalloc((nbr_duplicated + dup_info_count) *
sizeof(*dup_info_buffer));
161 struct dup_info * dup_send_buffer = dup_info_buffer + dup_info_count;
162 struct dup_info * dup_recv_buffer = dup_info_buffer;
165 for (
size_t i = 0; i < nbr_duplicated; ++i) {
166 size_t pos = sdispls[dup_dist_ranks[i]+1]++;
177 dup_send_buffer, sendcounts, sdispls,
178 dup_recv_buffer, recvcounts, rdispls,
179 sizeof(*dup_send_buffer), dup_info_dt, comm, routine, __LINE__);
182 xrealloc(dup_recv_buffer, dup_info_count *
sizeof(*dup_recv_buffer));
184 dup_recv_buffer, dup_info_count,
sizeof(*dup_recv_buffer),
188 memset(sendcounts, 0, (
size_t)comm_size *
sizeof(*sendcounts));
189 for (
size_t i = 0; i < stencil_count; ++i)
190 sendcounts[stencil_dist_ranks[i]]++;
192 1, sendcounts, recvcounts, sdispls, rdispls, comm);
194 size_t stencil_info_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
196 xmalloc((stencil_count + stencil_info_count) *
sizeof(*stencil_info_buffer));
198 stencil_info_buffer + stencil_info_count;
199 struct stencil_info * stencil_recv_buffer = stencil_info_buffer;
202 for (
size_t i = 0; i < stencil_count; ++i) {
203 size_t pos = sdispls[stencil_dist_ranks[i]+1]++;
205 stencil_send_buffer[pos].
idx = i;
206 stencil_send_buffer[pos].
rank = comm_rank;
208 free(stencil_tgt_ids);
214 stencil_send_buffer, sendcounts, sdispls,
215 stencil_recv_buffer, recvcounts, rdispls,
216 sizeof(*stencil_send_buffer), stencil_info_dt, comm, routine, __LINE__);
218 stencil_recv_buffer =
220 stencil_recv_buffer, stencil_info_count *
sizeof(*stencil_recv_buffer));
222 stencil_recv_buffer, stencil_info_count,
sizeof(*stencil_recv_buffer),
227 size_t dup_match_count = 0;
228 size_t stencil_match_count = 0;
229 for (
size_t i = 0, j = 0; i < dup_info_count; ++i) {
231 while ((j < stencil_info_count) &&
232 (stencil_recv_buffer[j].
tgt_global_id < curr_orig_global_id)) ++j;
233 if ((j < stencil_info_count) &&
234 (stencil_recv_buffer[j].
tgt_global_id == curr_orig_global_id))
237 while ((k < stencil_info_count) &&
238 (stencil_recv_buffer[k].
tgt_global_id == curr_orig_global_id)) {
240 ++stencil_match_count;
245 dup_remote_points.
data =
246 xmalloc(dup_match_count *
sizeof(*(dup_remote_points.
data)));
247 dup_remote_points.
count = dup_match_count;
248 size_t * num_stencils_per_tgt =
249 xmalloc(stencil_match_count *
sizeof(*num_stencils_per_tgt));
250 size_t * stencil_indices =
251 xmalloc(stencil_match_count *
sizeof(*stencil_indices));
252 int * stencil_ranks =
253 xmalloc(stencil_match_count *
sizeof(*stencil_ranks));
254 double * w =
xmalloc(stencil_match_count *
sizeof(*w));
258 stencil_match_count = 0;
259 for (
size_t i = 0, j = 0; i < dup_info_count; ++i) {
263 while ((j < stencil_info_count) &&
264 (stencil_recv_buffer[j].tgt_global_id < curr_orig_global_id)) ++j;
267 if ((j < stencil_info_count) &&
268 (stencil_recv_buffer[j].tgt_global_id == curr_orig_global_id)) {
270 size_t curr_num_stencils_per_tgt = 0;
274 while ((k < stencil_info_count) &&
275 (stencil_recv_buffer[k].tgt_global_id == curr_orig_global_id)) {
276 stencil_indices[stencil_match_count] = stencil_recv_buffer[k].
idx;
277 stencil_ranks[stencil_match_count] = stencil_recv_buffer[k].
rank;
278 w[stencil_match_count] = 1.0;
280 ++stencil_match_count;
281 ++curr_num_stencils_per_tgt;
290 (uint64_t)(dup_recv_buffer[i].duplicated_orig_pos);
291 num_stencils_per_tgt[dup_match_count] = curr_num_stencils_per_tgt;
298 weights, &dup_remote_points, num_stencils_per_tgt,
299 stencil_indices, stencil_ranks, w);
304 free(stencil_indices);
305 free(num_stencils_per_tgt);
306 free(stencil_recv_buffer);
307 free(dup_remote_points.
data);
308 free(dup_recv_buffer);
311 free(duplicated_global_ids);