125 size_t * tgt_points,
size_t count,
127 int * interpolation_complete) {
129 if (*interpolation_complete)
return 0;
131 char const * routine =
"do_search_callback";
137 int comm_rank, comm_size;
144 interp_grid, tgt_points, count, tgt_coords);
147 size_t * src_cells =
xmalloc(count *
sizeof(*src_cells));
149 interp_grid, tgt_coords, count, src_cells);
158 size_t num_unique_src_cells = 0;
159 size_t * src_to_unique_src =
160 xmalloc(temp_result_count *
sizeof(*src_to_unique_src));
161 for (
size_t i = 0, prev_src_cell = SIZE_MAX; i < temp_result_count; ++i) {
162 size_t curr_src_cell = src_cells[i];
163 if (curr_src_cell != prev_src_cell) {
164 src_cells[num_unique_src_cells++] = curr_src_cell;
165 prev_src_cell = curr_src_cell;
167 src_to_unique_src[i] = num_unique_src_cells - 1;
173 interp_grid,
YAC_LOC_CELL, src_cells, num_unique_src_cells);
177 int * orig_src_cell_ranks =
178 xmalloc(num_unique_src_cells *
sizeof(*orig_src_cell_ranks));
179 size_t * orig_src_cell_pos =
180 xmalloc(num_unique_src_cells *
sizeof(*orig_src_cell_pos));
181 for (
size_t i = 0; i < num_unique_src_cells; ++i)
183 src_remote_points + i, orig_src_cell_ranks + i, orig_src_cell_pos + i);
186 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
188 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
190 for (
size_t i = 0; i < temp_result_count; ++i)
191 sendcounts[orig_src_cell_ranks[src_to_unique_src[i]]]++;
193 1, sendcounts, recvcounts, sdispls, rdispls, comm);
194 size_t request_count = recvcounts[comm_size - 1] + rdispls[comm_size - 1];
200 xmalloc((temp_result_count + request_count) *
sizeof(*request_buffer));
203 request_buffer + temp_result_count;
204 size_t * new_tgt_points =
205 xmalloc(temp_result_count *
sizeof(*new_tgt_points));
206 for (
size_t i = 0; i < temp_result_count; ++i) {
207 size_t unique_src_cell_idx = src_to_unique_src[i];
208 size_t pos = sdispls[orig_src_cell_ranks[unique_src_cell_idx] + 1]++;
210 orig_src_cell_pos[unique_src_cell_idx];
212 src_remote_points[unique_src_cell_idx].
global_id;
213 memcpy(request_send_buffer[pos].
tgt_coord, tgt_field_coords[tgt_points[i]],
215 new_tgt_points[pos] = tgt_points[i];
217 free(src_remote_points);
218 free(orig_src_cell_pos);
219 free(src_to_unique_src);
220 free(orig_src_cell_ranks);
223 memcpy(tgt_points, new_tgt_points, temp_result_count *
sizeof(*tgt_points));
224 free(new_tgt_points);
229 request_send_buffer, sendcounts, sdispls,
230 request_recv_buffer, recvcounts, rdispls,
231 sizeof(*request_recv_buffer), request_data_dt, comm, routine, __LINE__);
238 uint64_t * uint64_t_buffer =
239 xmalloc(num_src_fields * (request_count + temp_result_count) *
240 sizeof(*uint64_t_buffer));
241 uint64_t * temp_num_results_per_src_field_per_tgt = uint64_t_buffer;
242 uint64_t * num_results_per_src_field_per_tgt_uint64 =
243 uint64_t_buffer + num_src_fields * request_count;
244 yac_int * temp_global_ids = NULL;
245 size_t temp_global_ids_array_size = 0;
246 double * temp_w = NULL;
247 size_t temp_w_array_size = 0;
248 size_t temp_weights_count = 0;
254 "ERROR(%s): no callback routine defined on source process", routine)
257 for (
size_t i = 0, k = 0; i < request_count; ++i) {
260 int const * curr_global_result_points[num_src_fields];
261 double * curr_result_weights[num_src_fields];
262 size_t curr_result_counts[num_src_fields];
264 (
double const *)(request_recv_buffer[i].
tgt_coord),
267 curr_global_result_points, curr_result_weights, curr_result_counts,
271 for (
size_t j = 0; j < num_src_fields; ++j, ++k) {
272 size_t curr_count = curr_result_counts[j];
273 temp_num_results_per_src_field_per_tgt[k] = (uint64_t)curr_count;
275 temp_global_ids, temp_global_ids_array_size,
276 temp_weights_count + curr_count);
278 temp_w, temp_w_array_size,
279 temp_weights_count + curr_count);
280 for (
size_t l = 0; l < curr_count; ++l)
281 temp_global_ids[temp_weights_count + l] =
282 (
yac_int)(curr_global_result_points[j][l]);
283 memcpy(temp_w + temp_weights_count, curr_result_weights[j],
284 curr_count *
sizeof(*curr_result_weights));
285 temp_weights_count += curr_count;
288 free(request_buffer);
291 for (
int i = 0; i < comm_size; ++i) {
292 sendcounts[i] *= num_src_fields;
293 recvcounts[i] *= num_src_fields;
294 sdispls[i] *= num_src_fields;
295 rdispls[i] *= num_src_fields;
297 yac_alltoallv_uint64_p2p(
298 temp_num_results_per_src_field_per_tgt, recvcounts, rdispls,
299 num_results_per_src_field_per_tgt_uint64, sendcounts, sdispls, comm,
304 size_t num_weights = 0;
305 size_t * total_num_results_per_src_field =
306 xcalloc(num_src_fields,
sizeof(*total_num_results_per_src_field));
308 uint64_t * curr_num_send_results = temp_num_results_per_src_field_per_tgt;
309 uint64_t * curr_num_recv_results = num_results_per_src_field_per_tgt_uint64;
310 size_t saccu = 0, raccu = 0;
311 for (
int i = 0; i < comm_size; ++i) {
312 size_t num_send_results = recvcounts[i] / num_src_fields;
313 size_t num_recv_results = sendcounts[i] / num_src_fields;
314 size_t num_send_weights = 0;
315 size_t num_recv_weights = 0;
316 for (
size_t j = 0; j < num_send_results; ++j)
317 for (
size_t k = 0; k < num_src_fields; ++k, ++curr_num_send_results)
318 num_send_weights += (
size_t)*curr_num_send_results;
319 for (
size_t j = 0; j < num_recv_results; ++j) {
320 for (
size_t k = 0; k < num_src_fields; ++k, ++curr_num_recv_results) {
321 size_t curr_count = (size_t)*curr_num_recv_results;
322 num_recv_weights += curr_count;
323 total_num_results_per_src_field[k] += curr_count;
328 sendcounts[i] = num_send_weights;
329 recvcounts[i] = num_recv_weights;
330 saccu += num_send_weights;
331 raccu += num_recv_weights;
332 num_weights += num_recv_weights;
336 double * w =
xmalloc(num_weights *
sizeof(*w));
337 yac_int * global_ids =
xmalloc(num_weights *
sizeof(*global_ids));
340 yac_alltoallv_dble_p2p(
341 temp_w, sendcounts, sdispls, w, recvcounts, rdispls, comm,
343 yac_alltoallv_yac_int_p2p(
344 temp_global_ids, sendcounts, sdispls,
345 global_ids, recvcounts, rdispls, comm, routine, __LINE__);
348 free(temp_global_ids);
351 size_t * interpolated_flag =
352 xmalloc(temp_result_count *
sizeof(*interpolated_flag));
353 size_t result_count = 0;
354 for (
size_t i = 0, k = 0; i < temp_result_count; ++i) {
356 for (
size_t j = 0; j < num_src_fields; ++j, ++k)
357 flag |= (num_results_per_src_field_per_tgt_uint64[k] > 0);
359 if (result_count != i)
361 num_results_per_src_field_per_tgt_uint64 + result_count * num_src_fields,
362 num_results_per_src_field_per_tgt_uint64 + i * num_src_fields,
363 num_src_fields *
sizeof(*num_results_per_src_field_per_tgt_uint64));
364 interpolated_flag[i] = result_count++;
366 interpolated_flag[i] = SIZE_MAX;
373 interpolated_flag, temp_result_count, tgt_points);
374 free(interpolated_flag);
377 size_t * global_id_reorder_idx =
378 xmalloc((num_weights + num_src_fields) *
sizeof(*global_id_reorder_idx));
379 size_t * src_field_displ = global_id_reorder_idx + num_weights;
380 size_t max_num_results_per_src_field = 0;
381 size_t * num_results_per_src_field_per_tgt =
382 xmalloc(result_count * num_src_fields *
383 sizeof(*num_results_per_src_field_per_tgt));
384 for (
size_t i = 0, accu = 0; i < num_src_fields; ++i) {
385 src_field_displ[i] = accu;
386 accu += total_num_results_per_src_field[i];
387 if (max_num_results_per_src_field < total_num_results_per_src_field[i])
388 max_num_results_per_src_field = total_num_results_per_src_field[i];
390 for (
size_t i = 0, k = 0, l = 0; i < result_count; ++i) {
391 for (
size_t j = 0; j < num_src_fields; ++j, ++k) {
392 num_results_per_src_field_per_tgt[k] =
393 (size_t)(num_results_per_src_field_per_tgt_uint64[k]);
395 (size_t)(num_results_per_src_field_per_tgt_uint64[k]);
396 for (
size_t m = 0; m < curr_count; ++m, ++l)
397 global_id_reorder_idx[l] = src_field_displ[j]++;
401 global_id_reorder_idx, num_weights, global_ids);
402 free(uint64_t_buffer);
403 free(global_id_reorder_idx);
407 size_t * result_point_buffer =
408 xmalloc(max_num_results_per_src_field *
sizeof(*result_point_buffer));
409 for (
size_t i = 0, offset = 0; i < num_src_fields; ++i) {
410 size_t curr_count = total_num_results_per_src_field[i];
412 interp_grid, i, global_ids + offset, curr_count, result_point_buffer);
415 interp_grid, i, result_point_buffer, curr_count);
416 offset += curr_count;
418 free(result_point_buffer);
420 free(total_num_results_per_src_field);
425 interp_grid, tgt_points, result_count),
426 .count = result_count};
430 weights, &tgts, num_results_per_src_field_per_tgt, srcs_per_field, w,
434 for (
size_t i = 0; i < num_src_fields; ++i) free(srcs_per_field[i]);
435 free(num_results_per_src_field_per_tgt);