19 size_t * tgt_points,
size_t count,
21 int * interpolation_complete);
82 MPI_Datatype stencil_info_dt;
83 int array_of_blocklengths[] = {1, 1, 1};
84 const MPI_Aint array_of_displacements[] =
85 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
rank) -
86 (MPI_Aint)(intptr_t)(
const void *)&dummy,
87 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
idx) -
88 (MPI_Aint)(intptr_t)(
const void *)&dummy,
89 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
weight) -
90 (MPI_Aint)(intptr_t)(
const void *)&dummy};
91 const MPI_Datatype array_of_types[] =
92 {MPI_INT, MPI_UINT64_T, MPI_DOUBLE};
94 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
95 array_of_types, &stencil_info_dt), comm);
100 void * results,
size_t result_count,
size_t result_size,
102 int * pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm) {
104 int pack_size_global_id, pack_size_count;
106 yac_mpi_call(MPI_Pack_size(1, MPI_UINT64_T, comm, &pack_size_count), comm);
108 for (
size_t i = 0; i < result_count; ++i) {
112 (
void*)((
unsigned char*)results + pack_order[i] * result_size));
113 size_t curr_count = stencil->
count;
114 int pack_size_stencils;
117 (
int)curr_count, stencil_info_dt, comm, &pack_size_stencils), comm);
118 pack_sizes[i] = pack_size_global_id +
126 int * position, MPI_Datatype stencil_info_dt, MPI_Comm comm) {
131 buffer, buffer_size, position, comm), comm);
134 uint64_t count_uint64_t = (uint64_t)(stencil->
count);
136 MPI_Pack(&count_uint64_t, 1, MPI_UINT64_T,
137 buffer, buffer_size, position, comm), comm);
141 (count_uint64_t == 1)?(&(stencil->
data.single)):(stencil->
data.
multi);
143 MPI_Pack(stencils, (
int)count_uint64_t, stencil_info_dt,
144 buffer, buffer_size, position, comm), comm);
148 void * results,
size_t result_count,
size_t result_size,
150 void ** pack_data,
int * pack_sizes, MPI_Datatype stencil_info_dt,
154 results, result_count, result_size, get_stencil, pack_order,
155 pack_sizes, stencil_info_dt, comm);
157 size_t temp_total_pack_size = 0;
158 for (
size_t i = 0; i < result_count; ++i)
159 temp_total_pack_size += (
size_t)(pack_sizes[i]);
162 temp_total_pack_size <= INT_MAX,
163 "ERROR(pack_result_stencils): pack size exceeds INT_MAX")
165 void * pack_data_ =
xmalloc(temp_total_pack_size);
166 size_t total_pack_size = 0;
168 for (
size_t i = 0; i < result_count; ++i) {
172 (
void*)((
unsigned char*)results + pack_order[i] * result_size));
175 void * buffer = (
void*)((
char*)pack_data_ + total_pack_size);
176 int buffer_size = (int)(temp_total_pack_size - total_pack_size);
180 curr_stencil, buffer, buffer_size, &position, stencil_info_dt, comm);
182 pack_sizes[i] = position;
183 total_pack_size += (size_t)position;
186 *pack_data = pack_data_;
191 int * position, MPI_Datatype stencil_info_dt, MPI_Comm comm,
193 size_t * stencil_info_buffer_array_size,
194 size_t * stencil_info_buffer_size) {
198 MPI_Unpack(buffer, buffer_size, position,
202 uint64_t count_uint64_t;
204 MPI_Unpack(buffer, buffer_size, position,
205 &count_uint64_t, 1, MPI_UINT64_T, comm), comm);
206 size_t count = ((stencil->
count = (size_t)count_uint64_t));
210 if (count_uint64_t == 1) {
211 stencils = &(stencil->
data.single);
214 *stencil_info_buffer, *stencil_info_buffer_array_size,
215 *stencil_info_buffer_size + count);
218 *stencil_info_buffer + *stencil_info_buffer_size));
219 *stencil_info_buffer_size += count;
222 MPI_Unpack(buffer, buffer_size, position,
223 stencils, (
int)count, stencil_info_dt, comm), comm);
227 size_t count,
void * packed_data,
size_t packed_data_size,
228 MPI_Datatype stencil_info_dt, MPI_Comm comm) {
231 size_t stencil_info_buffer_array_size = 0;
232 size_t stencil_info_buffer_size = 0;
237 for (
size_t i = 0, offset = 0; i <
count; ++i) {
240 void * curr_buffer = (
void*)((
char*)packed_data + offset);
241 int buffer_size = (int)(packed_data_size - offset);
245 curr_stencil, curr_buffer, buffer_size, &position, stencil_info_dt, comm,
246 &stencil_info_buffer, &stencil_info_buffer_array_size,
247 &stencil_info_buffer_size);
248 offset += (size_t)position;
259 for (
size_t i = 0, offset = 0; i <
count; ++i) {
261 if (curr_count > 1) {
263 offset += curr_count;
271 void * results,
size_t result_count,
size_t result_size,
272 struct result_stencil*(*result_get_stencil)(
void*),
size_t * pack_order,
275 char const * routine =
"exchange_interp_results";
278 size_t local_count = 0;
279 for (
size_t i = 0; i < result_count; ++i) {
280 if (ranks[i] == comm.
rank) {
285 size_t send_count = result_count - local_count;
292 int * pack_sizes =
xmalloc(send_count *
sizeof(*pack_sizes));
294 results, send_count, result_size,
295 result_get_stencil, pack_order, &send_buffer, pack_sizes,
299 size_t * num_results_per_rank =
300 xmalloc((
size_t)comm.
size *
sizeof(*num_results_per_rank));
304 for (
int rank = 0; rank < comm.
size; ++rank) {
305 size_t curr_num_results = 0;
306 size_t curr_sendcount = 0;
307 while ((j < send_count) && (ranks[j] == rank)) {
308 curr_sendcount += (size_t)(pack_sizes[j++]);
311 num_results_per_rank[rank] = curr_num_results;
313 curr_sendcount <= INT_MAX,
"ERROR(%s): pack size to big", routine)
324 size_t recv_count = 0;
325 for (
int i = 0; i < comm.
size; ++i)
326 recv_count += num_results_per_rank[i];
327 free(num_results_per_rank);
331 void * recv_buffer =
xmalloc(recv_size);
334 yac_alltoallv_packed_p2p(
355 pack_order += send_count;
356 for (
size_t i = 0; i < local_count; ++i)
358 *(*result_get_stencil)(
359 (
void*)((
unsigned char*)results + pack_order[i] * result_size));
372 struct interp_result * interp_results,
size_t result_count) {
375 size_t * tgt_points =
xmalloc(result_count *
sizeof(*tgt_points));
376 for (
size_t i = 0; i < result_count; ++i)
377 tgt_points[i] = interp_results[i].local_id;
380 int * tgt_points_dist_owner =
381 xmalloc(result_count *
sizeof(*tgt_points_dist_owner));
383 interp_grid, tgt_points, result_count, tgt_points_dist_owner);
384 size_t * pack_order = tgt_points;
385 for (
size_t i = 0; i < result_count; ++i) pack_order[i] = i;
389 interp_results, result_count,
sizeof(*interp_results),
393 free(tgt_points_dist_owner);
395 return relocated_results;
401 struct interp_result ** interp_results,
size_t * result_count) {
404 size_t num_interpolated_tgt =
406 yac_int * interpolated_tgts_global_ids =
408 size_t * interpolated_tgts_local_ids =
409 xmalloc(num_interpolated_tgt *
sizeof(*interpolated_tgts_local_ids));
411 interp_grid, interpolated_tgts_global_ids, num_interpolated_tgt,
412 interpolated_tgts_local_ids);
416 (num_interpolated_tgt > 0)?
417 xmalloc(num_interpolated_tgt *
sizeof(*initial_interp_results)):NULL;
418 for (
size_t i = 0; i < num_interpolated_tgt; ++i) {
419 initial_interp_results[i].
local_id = interpolated_tgts_local_ids[i];
420 initial_interp_results[i].
global_id = interpolated_tgts_global_ids[i];
421 initial_interp_results[i].
idx = SIZE_MAX;
423 interpolated_tgts_global_ids[i];
426 initial_interp_results[i].
stencil.
data.single.idx = (uint64_t)i;
427 initial_interp_results[i].
stencil.
data.single.weight = 1.0;
429 free(interpolated_tgts_local_ids);
430 free(interpolated_tgts_global_ids);
432 *interp_results = initial_interp_results;
433 *result_count = num_interpolated_tgt;
437 const void * a,
const void * b) {
439 int ret = (((
const struct interp_result *)a)->stencil.count >
453 const void * a,
const void * b) {
470 const void * a,
const void * b) {
479 const void * a,
const void * b) {
488 const void * a,
const void * b) {
498 return ((*(
const yac_int *)a) >
508 size_t ** num_stencils_per_tgt_,
size_t ** stencil_indices_,
509 int ** stencil_ranks_,
double ** w_) {
512 qsort(interp_results, result_count,
sizeof(*interp_results),
515 size_t total_num_stencils = 0;
516 size_t * interpolated_tgts_local_ids =
517 xmalloc(result_count *
sizeof(*interpolated_tgts_local_ids));
518 for (
size_t i = 0; i < result_count; ++i) {
519 interpolated_tgts_local_ids[i] = interp_results[i].
local_id;
522 interp_tgt_remote_points->
data =
524 interp_grid, interpolated_tgts_local_ids, result_count);
525 interp_tgt_remote_points->
count = result_count;
526 free(interpolated_tgts_local_ids);
528 size_t * num_stencils_per_tgt =
529 xmalloc(result_count *
sizeof(*num_stencils_per_tgt));
530 size_t * stencil_indices =
531 xmalloc(total_num_stencils *
sizeof(*stencil_indices));
532 int * stencil_ranks =
533 xmalloc(total_num_stencils *
sizeof(*stencil_ranks));
534 double * w =
xmalloc(total_num_stencils *
sizeof(*w));
536 for (
size_t i = 0, j = 0; i < result_count; ++i) {
540 num_stencils_per_tgt[i] = curr_count;
542 if (curr_count == 1) {
543 stencil_indices[j] = interp_results[i].
stencil.
data.single.idx;
544 stencil_ranks[j] = interp_results[i].
stencil.
data.single.rank;
545 w[j] = interp_results[i].
stencil.
data.single.weight;
548 for (
size_t k = 0; k < curr_count; ++k, ++j) {
556 *num_stencils_per_tgt_ = num_stencils_per_tgt;
557 *stencil_indices_ = stencil_indices;
558 *stencil_ranks_ = stencil_ranks;
572 size_t request_count = *request_count_;
573 size_t match_count = 0;
576 size_t stencil_count = interp_stencils->
count;
580 stencils, stencil_count,
sizeof(*stencils),
582 for (
size_t i = 0, j = 0; i < stencil_count; ++i) {
585 while ((j < request_count) &&
586 (neigh_requests[j].
global_id < curr_global_id)) ++j;
587 while ((j < request_count) &&
588 (neigh_requests[j].
global_id == curr_global_id)) {
591 neigh_requests[j].
stencil = stencils[i];
598 qsort(neigh_requests, request_count,
sizeof(*neigh_requests),
601 request_count -= match_count;
602 *request_count_ = request_count;
603 struct tgt_request * neigh_request_matches = neigh_requests + request_count;
606 qsort(neigh_requests, request_count,
sizeof(*neigh_requests),
609 size_t * pack_order =
xmalloc(match_count *
sizeof(*pack_order));
610 int * origin_rank =
xmalloc(match_count *
sizeof(*origin_rank));
611 for (
size_t i = 0; i < match_count; ++i) {
613 origin_rank[i] = neigh_request_matches[i].
rank;
618 neigh_request_matches, match_count,
sizeof(*neigh_request_matches),
623 return relocated_results;
629 size_t ** neigh_local_ids_,
yac_int ** neigh_to_tgt_global_id_,
630 size_t * total_num_neighbours_) {
633 size_t total_num_neighbours = 0;
636 for (
size_t i = 0; i <
count; ++i)
637 total_num_neighbours +=
639 size_t * neigh_local_ids =
640 xmalloc(total_num_neighbours *
sizeof(*neigh_local_ids));
642 interp_grid, tgt_local_ids,
count, neigh_local_ids);
645 yac_int * neigh_to_tgt_global_id =
646 xmalloc(total_num_neighbours *
sizeof(*neigh_to_tgt_global_id));
647 for (
size_t i = 0, j = 0; i <
count; ++i) {
650 yac_int curr_tgt_global_id = tgt_global_ids[i];
651 for (
int k = 0; k < curr_num_neigh; ++k, ++j)
652 neigh_to_tgt_global_id[j] = curr_tgt_global_id;
655 *neigh_local_ids_ = neigh_local_ids;
656 *neigh_to_tgt_global_id_ = neigh_to_tgt_global_id;
657 *total_num_neighbours_ = total_num_neighbours;
663 size_t ** neigh_local_ids_,
yac_int ** neigh_to_tgt_global_id_,
664 size_t * total_num_neighbours_) {
666 int * num_neighs_per_vertex =
xmalloc(
count *
sizeof(*num_neighs_per_vertex));
667 size_t * neigh_vertices;
670 interp_grid, tgt_local_ids,
count,
671 &neigh_vertices, num_neighs_per_vertex);
673 size_t total_num_neighbours = 0;
674 for (
size_t i = 0; i <
count; ++i)
675 total_num_neighbours += (
size_t)(num_neighs_per_vertex[i]);
678 yac_int * neigh_to_tgt_global_id =
679 xmalloc(total_num_neighbours *
sizeof(*neigh_to_tgt_global_id));
680 for (
size_t i = 0, j = 0; i <
count; ++i) {
681 int curr_num_neigh = num_neighs_per_vertex[i];
682 yac_int curr_tgt_global_id = tgt_global_ids[i];
683 for (
int k = 0; k < curr_num_neigh; ++k, ++j)
684 neigh_to_tgt_global_id[j] = curr_tgt_global_id;
686 free(num_neighs_per_vertex);
688 *neigh_local_ids_ = neigh_vertices;
689 *neigh_to_tgt_global_id_ = neigh_to_tgt_global_id;
690 *total_num_neighbours_ = total_num_neighbours;
696 size_t * tgt_local_ids,
yac_int * tgt_global_ids,
size_t count,
697 size_t ** neigh_local_ids_,
yac_int ** neigh_global_ids_,
698 yac_int ** neigh_to_tgt_global_id_,
size_t * total_num_neighbours_) {
701 size_t * neigh_local_ids;
702 yac_int * neigh_to_tgt_global_id;
703 size_t total_num_neighbours;
709 "ERROR(get_tgt_neigh_info): unsupported target field location")
712 interp_grid, tgt_local_ids, tgt_global_ids,
count,
713 &neigh_local_ids, &neigh_to_tgt_global_id, &total_num_neighbours);
716 interp_grid, tgt_local_ids, tgt_global_ids,
count,
717 &neigh_local_ids, &neigh_to_tgt_global_id, &total_num_neighbours);
721 neigh_local_ids, total_num_neighbours, neigh_to_tgt_global_id);
722 while((total_num_neighbours > 0) &&
723 (neigh_local_ids[total_num_neighbours-1] == SIZE_MAX))
724 --total_num_neighbours;
728 xmalloc(total_num_neighbours *
sizeof(*neigh_global_ids));
730 interp_grid, neigh_local_ids, total_num_neighbours, neigh_global_ids);
733 xrealloc(neigh_local_ids, total_num_neighbours *
sizeof(*neigh_local_ids));
734 *neigh_global_ids_ = neigh_global_ids;
735 *neigh_to_tgt_global_id_ =
737 total_num_neighbours *
sizeof(*neigh_to_tgt_global_id));
738 *total_num_neighbours_ = total_num_neighbours;
745 size_t * neigh_local_ids,
yac_int * neigh_global_ids,
746 size_t num_neighbours,
struct tgt_request ** neigh_requests_,
747 size_t * request_count_) {
750 int * neigh_dist_owner =
751 xmalloc(num_neighbours *
sizeof(*neigh_dist_owner));
753 interp_grid, neigh_local_ids, num_neighbours,
756 yac_int * send_neigh_global_ids =
757 xmalloc(num_neighbours *
sizeof(*send_neigh_global_ids));
758 memcpy(send_neigh_global_ids, neigh_global_ids,
759 num_neighbours *
sizeof(*send_neigh_global_ids));
763 neigh_dist_owner, num_neighbours, send_neigh_global_ids);
766 size_t to = 0, new_to = 0, from = 0, new_from = 0;
767 for (
int rank = 0; rank < comm.
size; ++rank) {
768 while ((new_from < num_neighbours) &&
769 (neigh_dist_owner[new_from] == rank)) new_from++;
770 size_t curr_count = new_from - from;
771 qsort(send_neigh_global_ids + from, curr_count,
774 (curr_count > 0)?(send_neigh_global_ids[from]-1):0;
775 for (; from < new_from; ++from) {
776 yac_int curr_global_id = send_neigh_global_ids[from];
777 if (prev_global_id != curr_global_id) {
778 send_neigh_global_ids[new_to++] = curr_global_id;
779 prev_global_id = curr_global_id;
782 curr_count = new_to - to;
786 free(neigh_dist_owner);
793 yac_int * recv_neigh_global_ids =
794 xmalloc(request_count *
sizeof(*recv_neigh_global_ids));
795 yac_alltoallv_yac_int_p2p(
798 "send_neigh_request", __LINE__);
800 xmalloc(request_count *
sizeof(*neigh_requests));
801 for (
int i = 0, k = 0; i < comm.
size; ++i) {
802 for (
size_t j = 0; j < comm.
recvcounts[i]; ++j, ++k) {
803 neigh_requests[k].
global_id = recv_neigh_global_ids[k];
804 neigh_requests[k].
rank = i;
808 qsort(neigh_requests, request_count,
sizeof(*neigh_requests),
810 free(send_neigh_global_ids);
811 free(recv_neigh_global_ids);
813 *neigh_requests_ = neigh_requests;
814 *request_count_ = request_count;
818 size_t * tgt_local_ids,
yac_int * tgt_global_ids,
size_t count) {
821 xmalloc(count *
sizeof(*interp_results));
822 for (
size_t i = 0; i < count; ++i) {
823 interp_results[i].
local_id = tgt_local_ids[i];
824 interp_results[i].
global_id = tgt_global_ids[i];
825 interp_results[i].
idx = i;
828 qsort(interp_results, count,
sizeof(*interp_results),
830 return interp_results;
834 const void * a,
const void * b) {
850 size_t stencil_info_count = 0;
852 for (
size_t i = 0; i <
count; ++i)
853 stencil_info_count += neigh_stencils[stencil_indices[i]].
count;
858 if (stencil_info_count > 1) {
861 xmalloc(stencil_info_count *
sizeof(*stencil_infos))));
863 stencil_infos = &(stencil.
data.single);
867 for (
size_t i = 0, j = 0; i < count; ++i) {
869 neigh_stencils + stencil_indices[i];
870 size_t curr_stencil_info_count = curr_stencil->
count;
872 (curr_stencil_info_count == 1)?
874 memcpy(stencil_infos + j, curr_stencil_infos,
875 curr_stencil_info_count *
sizeof(*stencil_infos));
876 for (
size_t k = 0; k < curr_stencil_info_count; ++k, ++j)
883 if (stencil_info_count > 1) {
886 qsort(stencil_infos, stencil_info_count,
sizeof(*stencil_infos),
890 struct stencil_info * prev_stencil_info = stencil_infos,
891 * curr_stencil_info = stencil_infos + 1;
892 size_t new_stencil_info_count = 1;
893 for (
size_t i = 1; i < stencil_info_count; ++i, ++curr_stencil_info) {
895 curr_stencil_info, prev_stencil_info)) {
896 if (new_stencil_info_count != i)
897 stencil_infos[new_stencil_info_count] =
899 ++new_stencil_info_count;
900 prev_stencil_info = curr_stencil_info;
902 stencil_infos[new_stencil_info_count-1].
weight +=
903 curr_stencil_info->weight;
906 if (new_stencil_info_count != stencil_info_count) {
907 stencil_info_count = new_stencil_info_count;
908 if (new_stencil_info_count == 1) {
909 stencil.
data.single = *stencil_infos;
914 stencil_infos, stencil_info_count *
sizeof(*stencil_infos));
918 stencil.
count = stencil_info_count;
926 size_t * stencil_indices,
size_t * num_neighbours_,
927 struct interp_result * interp_results,
size_t * num_open_tgt_) {
929 size_t num_neighbours = *num_neighbours_;
930 size_t num_open_tgt = *num_open_tgt_;
933 size_t answer_count = neigh_answer->
count;
938 neigh_stencils, answer_count,
sizeof(*neigh_stencils),
940 size_t match_count = 0;
941 for (
size_t i = 0, j = 0; i < answer_count; ++i) {
944 while ((j < num_neighbours) &&
945 (neigh_global_ids[j] < curr_global_id)) ++j;
947 while ((j < num_neighbours) &&
948 (neigh_global_ids[j] == curr_global_id)) {
949 neigh_global_ids[j] = XT_INT_MAX;
950 stencil_indices[j] = i;
958 neigh_global_ids, num_neighbours, neigh_to_tgt_global_id, stencil_indices);
959 num_neighbours -= match_count;
960 *num_neighbours_ = num_neighbours;
962 neigh_to_tgt_global_id += num_neighbours;
963 stencil_indices += num_neighbours;
967 neigh_to_tgt_global_id, match_count, stencil_indices);
970 for (
size_t i = 0, k = 0; i < match_count;) {
975 yac_int curr_tgt_global_id = neigh_to_tgt_global_id[i++];
976 while ((i < match_count) &&
977 (neigh_to_tgt_global_id[i] == curr_tgt_global_id)) ++i;
978 size_t curr_stencil_count = i - prev_i;
980 while ((k < num_open_tgt) &&
981 (interp_results[k].
global_id < curr_tgt_global_id)) ++k;
983 while ((k < num_open_tgt) &&
984 (interp_results[k].
global_id == curr_tgt_global_id)) {
988 neigh_stencils, stencil_indices + prev_i, curr_stencil_count,
989 curr_tgt_global_id, 1.0 / (
double)curr_stencil_count);
996 qsort(interp_results, num_open_tgt,
sizeof(*interp_results),
998 size_t new_num_open_tgt = 0;
999 while ((new_num_open_tgt < num_open_tgt) &&
1000 (interp_results[new_num_open_tgt].stencil.count == 0))
1002 *num_open_tgt_ = new_num_open_tgt;
1003 return num_open_tgt - new_num_open_tgt;
1033 size_t * tgt_points,
yac_int * tgt_global_ids,
size_t count,
1039 size_t * neigh_local_ids;
1041 yac_int * neigh_to_tgt_global_id;
1042 size_t total_num_neighbours;
1044 interp_grid, tgt_points, tgt_global_ids, count,
1045 &neigh_local_ids, &neigh_global_ids, &neigh_to_tgt_global_id,
1046 &total_num_neighbours);
1047 size_t * stencil_indices =
1048 xmalloc(total_num_neighbours *
sizeof(*stencil_indices));
1053 size_t request_count;
1055 interp_grid, comm, neigh_local_ids, neigh_global_ids, total_num_neighbours,
1056 &neigh_requests, &request_count);
1057 free(neigh_local_ids);
1061 neigh_global_ids, total_num_neighbours, neigh_to_tgt_global_id);
1064 size_t num_open_tgt = count;
1071 size_t result_count;
1073 interp_grid, comm, weights, &initial_interp_results, &result_count);
1075 for (
int creep_distance = 0;
1076 creep_distance < max_creep_distance; ++creep_distance) {
1079 int result_flag = result_count > 0;
1082 MPI_IN_PLACE, &result_flag, 1, MPI_INT, MPI_MAX, comm.
comm), comm.
comm);
1083 if (result_flag == 0)
break;
1090 (creep_distance == 0)?
1091 initial_interp_results:(interp_results + num_open_tgt),
1093 if (creep_distance == 0) free(initial_interp_results);
1099 comm, neigh_requests, &request_count, interp_stencils);
1104 neigh_matches, neigh_global_ids, neigh_to_tgt_global_id,
1105 stencil_indices, &total_num_neighbours, interp_results,
1108 free(interp_stencils);
1110 free(neigh_matches);
1113 free(neigh_requests);
1114 free(stencil_indices);
1115 free(neigh_global_ids);
1116 free(neigh_to_tgt_global_id);
1118 for (
size_t i = 0; i <
count; ++i)
1119 interp_flag[interp_results[i].idx] =
1120 interp_results[i].stencil.count > 0;
1124 size_t * num_stencils_per_tgt;
1125 int * stencil_ranks;
1128 interp_grid, interp_results + num_open_tgt,
count - num_open_tgt,
1129 &interp_tgt_remote_points, &num_stencils_per_tgt, &stencil_indices,
1130 &stencil_ranks, &w);
1132 weights, &interp_tgt_remote_points, num_stencils_per_tgt,
1133 stencil_indices, stencil_ranks, w);
1134 free(interp_tgt_remote_points.
data);
1136 free(num_stencils_per_tgt);
1137 free(stencil_ranks);
1138 free(stencil_indices);
1139 for (
size_t i = num_open_tgt; i <
count; ++i)
1140 if (interp_results[i].stencil.count > 1)
1141 free(interp_results[i].stencil.data.multi);
1142 free(interp_results);
1149 size_t * tgt_points,
size_t count,
1151 int * interpolation_complete) {
1153 if (*interpolation_complete)
return 0;
1155 char const * routine =
"do_search_creep";
1167 "ERROR(%s): unsupported target field location "
1168 "(has to be YAC_LOC_CELL or YAC_LOC_CORNER)", routine)
1171 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1173 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1174 int comm_rank, comm_size;
1179 int * tgt_points_dist_owner =
1180 xmalloc(count *
sizeof(*tgt_points_dist_owner));
1182 interp_grid, tgt_points, count, tgt_points_dist_owner);
1183 yac_int * tgt_points_global_ids =
1184 xmalloc(count *
sizeof(*tgt_points_global_ids));
1186 interp_grid, tgt_points, count, tgt_points_global_ids);
1189 size_t local_count = 0;
1190 for (
size_t i = 0; i < count; ++i) {
1191 if (tgt_points_dist_owner[i] == comm_rank) {
1193 tgt_points_dist_owner[i] = INT_MAX;
1196 size_t send_count = count - local_count;
1200 tgt_points_dist_owner, count, tgt_points, tgt_points_global_ids);
1203 for (
size_t i = 0; i < send_count; ++i)
1204 sendcounts[tgt_points_dist_owner[i]]++;
1206 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1207 size_t recv_count = recvcounts[comm_size-1] + rdispls[comm_size-1];
1210 tgt_points_global_ids,
1211 (send_count + local_count + recv_count) *
sizeof(*global_ids_buffer));
1212 yac_int * send_global_ids = global_ids_buffer;
1213 yac_int * recv_global_ids = global_ids_buffer + send_count;
1214 yac_alltoallv_yac_int_p2p(
1215 send_global_ids, sendcounts, sdispls+1,
1216 recv_global_ids + local_count, recvcounts, rdispls, comm,
1220 size_t * temp_tgt_points =
1221 xmalloc((local_count + recv_count) *
sizeof(*temp_tgt_points));
1222 memcpy(temp_tgt_points, tgt_points + send_count,
1223 local_count *
sizeof(*tgt_points));
1225 interp_grid, recv_global_ids + local_count, recv_count,
1226 temp_tgt_points + local_count);
1229 int * interp_flag_buffer =
1230 xmalloc((send_count + local_count + recv_count) *
1231 sizeof(*interp_flag_buffer));
1232 int * temp_interp_flag = interp_flag_buffer + send_count;
1233 int * interp_flag = interp_flag_buffer;
1234 memset(temp_interp_flag, 0, (local_count + recv_count) *
sizeof(*temp_interp_flag));
1237 local_count + recv_count, temp_interp_flag, weights);
1238 free(global_ids_buffer);
1239 free(temp_tgt_points);
1242 yac_alltoallv_int_p2p(
1243 temp_interp_flag + local_count, recvcounts, rdispls,
1244 interp_flag, sendcounts, sdispls+1, comm, routine, __LINE__);
1248 size_t num_interpolated_tgt = 0;
1249 for (
size_t i = 0; i < count; ++i) {
1250 if (interp_flag[i]) {
1252 ++num_interpolated_tgt;
1258 free(interp_flag_buffer);
1259 free(tgt_points_dist_owner);
1263 return num_interpolated_tgt;
1269 xmalloc(1 *
sizeof(*method_creep));
1273 (creep_distance >= 0)?creep_distance:INT_MAX;
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
void yac_interp_grid_get_tgt_cell_neighbours(struct yac_interp_grid *interp_grid, size_t *tgt_cells, size_t count, size_t *neighbours)
enum yac_location yac_interp_grid_get_tgt_field_location(struct yac_interp_grid *interp_grid)
void yac_interp_grid_get_tgt_global_ids(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count, yac_int *tgt_global_ids)
MPI_Comm yac_interp_grid_get_MPI_Comm(struct yac_interp_grid *interp_grid)
struct remote_point * yac_interp_grid_get_tgt_remote_points(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count)
void yac_interp_grid_determine_dist_tgt_owners(struct yac_interp_grid *interp_grid, size_t *tgt_indices, size_t count, int *owners)
void yac_interp_grid_get_tgt_vertex_neighbours(struct yac_interp_grid *interp_grid, size_t *vertices, size_t count, size_t **neigh_vertices, int *num_neighs_per_vertex)
struct yac_const_basic_grid_data * yac_interp_grid_get_basic_grid_data_tgt(struct yac_interp_grid *interp_grid)
void yac_interp_grid_tgt_global_to_local(struct yac_interp_grid *interp_grid, yac_int *tgt_global_ids, size_t count, size_t *tgt_local_ids)
static void do_search_creep_2(struct yac_interp_grid *interp_grid, int const max_creep_distance, size_t *tgt_points, yac_int *tgt_global_ids, size_t count, int *interp_flag, struct yac_interp_weights *weights)
static int compare_tgt_request_global_id(const void *a, const void *b)
static void pack_result_stencil(struct result_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static struct result_stencils * unpack_result_stencils(size_t count, void *packed_data, size_t packed_data_size, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static int compare_interp_result_stencil_local_id(const void *a, const void *b)
static int compare_stencil_info(const void *a, const void *b)
static MPI_Datatype yac_get_stencil_info_mpi_datatype(MPI_Comm comm)
static struct result_stencil * tgt_request_get_stencil(void *tgt_request)
static struct result_stencils * update_neigh_requests(struct comm_stuff comm, struct tgt_request *neigh_requests, size_t *request_count_, struct result_stencils *interp_stencils)
static struct result_stencil * interp_result_get_stencil(void *interp_result)
static void get_tgt_neigh_info_vertex(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static struct result_stencils * exchange_interp_results(void *results, size_t result_count, size_t result_size, struct result_stencil *(*result_get_stencil)(void *), size_t *pack_order, int *ranks, struct comm_stuff comm)
static void free_comm_stuff(struct comm_stuff comm)
static struct comm_stuff init_comm_stuff(struct yac_interp_grid *interp_grid)
static int compare_result_stencil_global_id(const void *a, const void *b)
static int compare_interp_result_stencil(const void *a, const void *b)
static void get_tgt_neigh_info(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_global_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static void get_initial_results(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct yac_interp_weights *interp_weights, struct interp_result **interp_results, size_t *result_count)
static void extract_interp_info(struct yac_interp_grid *interp_grid, struct interp_result *interp_results, size_t result_count, struct remote_points *interp_tgt_remote_points, size_t **num_stencils_per_tgt_, size_t **stencil_indices_, int **stencil_ranks_, double **w_)
struct interp_method * yac_interp_method_creep_new(int creep_distance)
static struct result_stencil copy_result_stencil_multi(struct result_stencil *neigh_stencils, size_t *stencil_indices, size_t count, yac_int global_id, double weight)
static void pack_result_stencils(void *results, size_t result_count, size_t result_size, struct result_stencil *(*get_stencil)(void *), size_t *pack_order, void **pack_data, int *pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static size_t do_search_creep(struct interp_method *method, struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count, struct yac_interp_weights *weights, int *interpolation_complete)
static void get_tgt_neigh_info_cell(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static struct interp_result * init_interp_results(size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count)
static int compare_yac_int(const void *a, const void *b)
static size_t match_neigh_answers_with_tgts(struct result_stencils *neigh_answer, yac_int *neigh_global_ids, yac_int *neigh_to_tgt_global_id, size_t *stencil_indices, size_t *num_neighbours_, struct interp_result *interp_results, size_t *num_open_tgt_)
static void unpack_result_stencil(struct result_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype stencil_info_dt, MPI_Comm comm, struct stencil_info **stencil_info_buffer, size_t *stencil_info_buffer_array_size, size_t *stencil_info_buffer_size)
static void get_result_stencil_pack_sizes(void *results, size_t result_count, size_t result_size, struct result_stencil *(*get_stencil)(void *), size_t *pack_order, int *pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static struct interp_method_vtable interp_method_creep_vtable
static void send_neigh_request(struct yac_interp_grid *interp_grid, struct comm_stuff comm, size_t *neigh_local_ids, yac_int *neigh_global_ids, size_t num_neighbours, struct tgt_request **neigh_requests_, size_t *request_count_)
static void delete_creep(struct interp_method *method)
static struct result_stencils * relocate_interp_results(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct interp_result *interp_results, size_t result_count)
static int compare_tgt_request_stencil_count(const void *a, const void *b)
static int compare_interp_result_global_id(const void *a, const void *b)
void yac_interp_weights_wcopy_weights(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_stencils_per_tgt, size_t *stencil_indices, int *stencil_ranks, double *w)
yac_int * yac_interp_weights_get_interp_tgt(struct yac_interp_weights *weights)
size_t yac_interp_weights_get_interp_count(struct yac_interp_weights *weights)
#define xrealloc(ptr, size)
MPI_Datatype stencil_info_dt
struct interp_method_vtable * vtable
size_t(* do_search)(struct interp_method *method, struct yac_interp_grid *grid, size_t *tgt_points, size_t count, struct yac_interp_weights *weights, int *interpolation_complete)
struct result_stencil stencil
structure containing the information (global id and location)
struct remote_point * data
union result_stencil::@10 data
struct stencil_info single * multi
struct stencil_info stencil_info_buffer[]
struct result_stencil * stencils
struct result_stencil stencil
const const_int_pointer num_vertices_per_cell
void yac_quicksort_index_yac_int_size_t(yac_int *a, size_t n, size_t *idx)
void yac_quicksort_index_int_yac_int(int *a, size_t n, yac_int *idx)
void yac_quicksort_index_yac_int_yac_int_size_t(yac_int *a, size_t n, yac_int *b, size_t *c)
void yac_quicksort_index_size_t_yac_int(size_t *a, size_t n, yac_int *idx)
void yac_quicksort_index_int_size_t(int *a, size_t n, size_t *idx)
void yac_quicksort_index_yac_int_yac_int(yac_int *a, size_t n, yac_int *idx)
void yac_quicksort_index_int_size_t_yac_int(int *a, size_t n, size_t *b, yac_int *c)
#define YAC_ASSERT_F(exp, format,...)
#define YAC_ASSERT(exp, msg)
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
#define yac_mpi_call(call, comm)