124 size_t * tgt_points,
size_t count,
131 int comm_rank, comm_size;
138 interp_grid, tgt_points, count, tgt_coords);
141 size_t * src_cells =
xmalloc(count *
sizeof(*src_cells));
143 interp_grid, tgt_coords, count, src_cells);
152 size_t num_unique_src_cells = 0;
153 size_t * src_to_unique_src =
154 xmalloc(temp_result_count *
sizeof(*src_to_unique_src));
155 for (
size_t i = 0, prev_src_cell = SIZE_MAX; i < temp_result_count; ++i) {
156 size_t curr_src_cell = src_cells[i];
157 if (curr_src_cell != prev_src_cell) {
158 src_cells[num_unique_src_cells++] = curr_src_cell;
159 prev_src_cell = curr_src_cell;
161 src_to_unique_src[i] = num_unique_src_cells - 1;
167 interp_grid,
YAC_LOC_CELL, src_cells, num_unique_src_cells);
171 int * orig_src_cell_ranks =
172 xmalloc(num_unique_src_cells *
sizeof(*orig_src_cell_ranks));
173 size_t * orig_src_cell_pos =
174 xmalloc(num_unique_src_cells *
sizeof(*orig_src_cell_pos));
175 for (
size_t i = 0; i < num_unique_src_cells; ++i)
177 src_remote_points + i, orig_src_cell_ranks + i, orig_src_cell_pos + i);
180 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
182 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
184 for (
size_t i = 0; i < temp_result_count; ++i)
185 sendcounts[orig_src_cell_ranks[src_to_unique_src[i]]]++;
187 1, sendcounts, recvcounts, sdispls, rdispls, comm);
188 size_t request_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
194 xmalloc((temp_result_count + request_count) *
sizeof(*request_buffer));
197 request_buffer + temp_result_count;
198 size_t * new_tgt_points =
199 xmalloc(temp_result_count *
sizeof(*new_tgt_points));
200 for (
size_t i = 0; i < temp_result_count; ++i) {
201 size_t unique_src_cell_idx = src_to_unique_src[i];
202 size_t pos = sdispls[orig_src_cell_ranks[unique_src_cell_idx] + 1]++;
204 orig_src_cell_pos[unique_src_cell_idx];
206 src_remote_points[unique_src_cell_idx].
global_id;
207 memcpy(request_send_buffer[pos].
tgt_coord, tgt_field_coords[tgt_points[i]],
209 new_tgt_points[pos] = tgt_points[i];
211 free(src_remote_points);
212 free(orig_src_cell_pos);
213 free(src_to_unique_src);
214 free(orig_src_cell_ranks);
217 memcpy(tgt_points, new_tgt_points, temp_result_count *
sizeof(*tgt_points));
218 free(new_tgt_points);
223 request_send_buffer, sendcounts, sdispls,
224 request_recv_buffer, recvcounts, rdispls,
225 sizeof(*request_recv_buffer), request_data_dt, comm);
232 uint64_t * uint64_t_buffer =
233 xmalloc(num_src_fields * (request_count + temp_result_count) *
234 sizeof(*uint64_t_buffer));
235 uint64_t * temp_num_results_per_src_field_per_tgt = uint64_t_buffer;
236 uint64_t * num_results_per_src_field_per_tgt_uint64 =
237 uint64_t_buffer + num_src_fields * request_count;
238 yac_int * temp_global_ids = NULL;
239 size_t temp_global_ids_array_size = 0;
240 double * temp_w = NULL;
241 size_t temp_w_array_size = 0;
242 size_t temp_weights_count = 0;
248 "ERROR(do_search_callback): "
249 "no callback routine defined on source process")
252 for (
size_t i = 0, k = 0; i < request_count; ++i) {
255 int const * curr_global_result_points[num_src_fields];
256 double * curr_result_weights[num_src_fields];
257 size_t curr_result_counts[num_src_fields];
259 (
double const *)(request_recv_buffer[i].
tgt_coord),
262 curr_global_result_points, curr_result_weights, curr_result_counts,
266 for (
size_t j = 0; j < num_src_fields; ++j, ++k) {
267 size_t curr_count = curr_result_counts[j];
268 temp_num_results_per_src_field_per_tgt[k] = (uint64_t)curr_count;
270 temp_global_ids, temp_global_ids_array_size,
271 temp_weights_count + curr_count);
273 temp_w, temp_w_array_size,
274 temp_weights_count + curr_count);
275 for (
size_t l = 0; l < curr_count; ++l)
276 temp_global_ids[temp_weights_count + l] =
277 (
yac_int)(curr_global_result_points[j][l]);
278 memcpy(temp_w + temp_weights_count, curr_result_weights[j],
279 curr_count *
sizeof(*curr_result_weights));
280 temp_weights_count += curr_count;
283 free(request_buffer);
286 for (
int i = 0; i < comm_size; ++i) {
287 sendcounts[i] *= num_src_fields;
288 recvcounts[i] *= num_src_fields;
289 sdispls[i] *= num_src_fields;
290 rdispls[i] *= num_src_fields;
292 yac_alltoallv_uint64_p2p(
293 temp_num_results_per_src_field_per_tgt, recvcounts, rdispls,
294 num_results_per_src_field_per_tgt_uint64, sendcounts, sdispls, comm);
298 size_t num_weights = 0;
299 size_t * total_num_results_per_src_field =
300 xcalloc(num_src_fields,
sizeof(*total_num_results_per_src_field));
302 uint64_t * curr_num_send_results = temp_num_results_per_src_field_per_tgt;
303 uint64_t * curr_num_recv_results = num_results_per_src_field_per_tgt_uint64;
304 size_t saccu = 0, raccu = 0;
305 for (
int i = 0; i < comm_size; ++i) {
306 size_t num_send_results = recvcounts[i] / num_src_fields;
307 size_t num_recv_results = sendcounts[i] / num_src_fields;
308 size_t num_send_weights = 0;
309 size_t num_recv_weights = 0;
310 for (
size_t j = 0; j < num_send_results; ++j)
311 for (
size_t k = 0; k < num_src_fields; ++k, ++curr_num_send_results)
312 num_send_weights += (
size_t)*curr_num_send_results;
313 for (
size_t j = 0; j < num_recv_results; ++j) {
314 for (
size_t k = 0; k < num_src_fields; ++k, ++curr_num_recv_results) {
315 size_t curr_count = (size_t)*curr_num_recv_results;
316 num_recv_weights += curr_count;
317 total_num_results_per_src_field[k] += curr_count;
322 sendcounts[i] = num_send_weights;
323 recvcounts[i] = num_recv_weights;
324 saccu += num_send_weights;
325 raccu += num_recv_weights;
326 num_weights += num_recv_weights;
330 double * w =
xmalloc(num_weights *
sizeof(*w));
331 yac_int * global_ids =
xmalloc(num_weights *
sizeof(*global_ids));
334 yac_alltoallv_dble_p2p(
335 temp_w, sendcounts, sdispls, w, recvcounts, rdispls, comm);
336 yac_alltoallv_yac_int_p2p(
337 temp_global_ids, sendcounts, sdispls,
338 global_ids, recvcounts, rdispls, comm);
341 free(temp_global_ids);
344 size_t * interpolated_flag =
345 xmalloc(temp_result_count *
sizeof(*interpolated_flag));
346 size_t result_count = 0;
347 for (
size_t i = 0, k = 0; i < temp_result_count; ++i) {
349 for (
size_t j = 0; j < num_src_fields; ++j, ++k)
350 flag |= (num_results_per_src_field_per_tgt_uint64[k] > 0);
352 if (result_count != i)
354 num_results_per_src_field_per_tgt_uint64 + result_count * num_src_fields,
355 num_results_per_src_field_per_tgt_uint64 + i * num_src_fields,
356 num_src_fields *
sizeof(*num_results_per_src_field_per_tgt_uint64));
357 interpolated_flag[i] = result_count++;
359 interpolated_flag[i] = SIZE_MAX;
366 interpolated_flag, temp_result_count, tgt_points);
367 free(interpolated_flag);
370 size_t * global_id_reorder_idx =
371 xmalloc((num_weights + num_src_fields) *
sizeof(*global_id_reorder_idx));
372 size_t * src_field_displ = global_id_reorder_idx + num_weights;
373 size_t max_num_results_per_src_field = 0;
374 size_t * num_results_per_src_field_per_tgt =
375 xmalloc(result_count * num_src_fields *
376 sizeof(*num_results_per_src_field_per_tgt));
377 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
378 src_field_displ[i] = accu;
379 accu += total_num_results_per_src_field[i];
380 if (max_num_results_per_src_field < total_num_results_per_src_field[i])
381 max_num_results_per_src_field = total_num_results_per_src_field[i];
383 for (
size_t i = 0, k = 0, l = 0; i < result_count; ++i) {
384 for (
size_t j = 0; j < num_src_fields; ++j, ++k) {
385 num_results_per_src_field_per_tgt[k] =
386 (size_t)(num_results_per_src_field_per_tgt_uint64[k]);
388 (size_t)(num_results_per_src_field_per_tgt_uint64[k]);
389 for (
size_t m = 0; m < curr_count; ++m, ++l)
390 global_id_reorder_idx[l] = src_field_displ[j]++;
394 global_id_reorder_idx, num_weights, global_ids);
395 free(uint64_t_buffer);
396 free(global_id_reorder_idx);
400 size_t * result_point_buffer =
401 xmalloc(max_num_results_per_src_field *
sizeof(*result_point_buffer));
402 for (
size_t i = 0, offset = 0; i < num_src_fields; ++i) {
403 size_t curr_count = total_num_results_per_src_field[i];
405 interp_grid, i, global_ids + offset, curr_count, result_point_buffer);
408 interp_grid, i, result_point_buffer, curr_count);
409 offset += curr_count;
411 free(result_point_buffer);
413 free(total_num_results_per_src_field);
418 interp_grid, tgt_points, result_count),
419 .count = result_count};
423 weights, &tgts, num_results_per_src_field_per_tgt, srcs_per_field, w,
427 for (
size_t i = 0; i < num_src_fields; ++i) free(srcs_per_field[i]);
428 free(num_results_per_src_field_per_tgt);