19 size_t * tgt_points,
size_t count,
81 MPI_Datatype stencil_info_dt;
82 int array_of_blocklengths[] = {1, 1, 1};
83 const MPI_Aint array_of_displacements[] =
84 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
rank) -
85 (MPI_Aint)(intptr_t)(
const void *)&dummy,
86 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
idx) -
87 (MPI_Aint)(intptr_t)(
const void *)&dummy,
88 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
weight) -
89 (MPI_Aint)(intptr_t)(
const void *)&dummy};
90 const MPI_Datatype array_of_types[] =
91 {MPI_INT, MPI_UINT64_T, MPI_DOUBLE};
93 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
94 array_of_types, &stencil_info_dt), comm);
99 void * results,
size_t result_count,
size_t result_size,
101 int * pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm) {
103 int pack_size_global_id, pack_size_count;
105 yac_mpi_call(MPI_Pack_size(1, MPI_UINT64_T, comm, &pack_size_count), comm);
107 for (
size_t i = 0; i < result_count; ++i) {
111 (
void*)((
unsigned char*)results + pack_order[i] * result_size));
112 size_t curr_count = stencil->
count;
113 int pack_size_stencils;
116 (
int)curr_count, stencil_info_dt, comm, &pack_size_stencils), comm);
117 pack_sizes[i] = pack_size_global_id +
125 int * position, MPI_Datatype stencil_info_dt, MPI_Comm comm) {
130 buffer, buffer_size, position, comm), comm);
133 uint64_t count_uint64_t = (uint64_t)(stencil->
count);
135 MPI_Pack(&count_uint64_t, 1, MPI_UINT64_T,
136 buffer, buffer_size, position, comm), comm);
140 (count_uint64_t == 1)?(&(stencil->
data.single)):(stencil->
data.
multi);
142 MPI_Pack(stencils, (
int)count_uint64_t, stencil_info_dt,
143 buffer, buffer_size, position, comm), comm);
147 void * results,
size_t result_count,
size_t result_size,
149 void ** pack_data,
int * pack_sizes, MPI_Datatype stencil_info_dt,
153 results, result_count, result_size, get_stencil, pack_order,
154 pack_sizes, stencil_info_dt, comm);
156 size_t temp_total_pack_size = 0;
157 for (
size_t i = 0; i < result_count; ++i)
158 temp_total_pack_size += (
size_t)(pack_sizes[i]);
160 void * pack_data_ =
xmalloc(temp_total_pack_size);
161 size_t total_pack_size = 0;
163 for (
size_t i = 0; i < result_count; ++i) {
167 (
void*)((
unsigned char*)results + pack_order[i] * result_size));
170 void * buffer = (
void*)((
char*)pack_data_ + total_pack_size);
171 int buffer_size = (int)(temp_total_pack_size - total_pack_size);
175 curr_stencil, buffer, buffer_size, &position, stencil_info_dt, comm);
177 pack_sizes[i] = position;
178 total_pack_size += (size_t)position;
181 if (total_pack_size != temp_total_pack_size)
182 pack_data_ =
xrealloc(pack_data_, total_pack_size);
184 *pack_data = pack_data_;
189 int * position, MPI_Datatype stencil_info_dt, MPI_Comm comm,
191 size_t * stencil_info_buffer_array_size,
192 size_t * stencil_info_buffer_size) {
196 MPI_Unpack(buffer, buffer_size, position,
200 uint64_t count_uint64_t;
202 MPI_Unpack(buffer, buffer_size, position,
203 &count_uint64_t, 1, MPI_UINT64_T, comm), comm);
204 size_t count = ((stencil->
count = (size_t)count_uint64_t));
208 if (count_uint64_t == 1) {
209 stencils = &(stencil->
data.single);
212 *stencil_info_buffer, *stencil_info_buffer_array_size,
213 *stencil_info_buffer_size + count);
216 *stencil_info_buffer + *stencil_info_buffer_size));
217 *stencil_info_buffer_size += count;
220 MPI_Unpack(buffer, buffer_size, position,
221 stencils, (
int)count, stencil_info_dt, comm), comm);
225 size_t count,
void * packed_data,
size_t packed_data_size,
226 MPI_Datatype stencil_info_dt, MPI_Comm comm) {
229 size_t stencil_info_buffer_array_size = 0;
230 size_t stencil_info_buffer_size = 0;
235 for (
size_t i = 0, offset = 0; i <
count; ++i) {
238 void * curr_buffer = (
void*)((
char*)packed_data + offset);
239 int buffer_size = (int)(packed_data_size - offset);
243 curr_stencil, curr_buffer, buffer_size, &position, stencil_info_dt, comm,
244 &stencil_info_buffer, &stencil_info_buffer_array_size,
245 &stencil_info_buffer_size);
246 offset += (size_t)position;
257 for (
size_t i = 0, offset = 0; i <
count; ++i) {
259 if (curr_count > 1) {
261 offset += curr_count;
269 void * results,
size_t result_count,
size_t result_size,
270 struct result_stencil*(*result_get_stencil)(
void*),
size_t * pack_order,
274 size_t local_count = 0;
275 for (
size_t i = 0; i < result_count; ++i) {
276 if (ranks[i] == comm.
rank) {
281 size_t send_count = result_count - local_count;
288 int * pack_sizes =
xmalloc(send_count *
sizeof(*pack_sizes));
290 results, send_count, result_size,
291 result_get_stencil, pack_order, &send_buffer, pack_sizes,
295 size_t * num_results_per_rank =
296 xmalloc((
size_t)comm.
size *
sizeof(*num_results_per_rank));
300 for (
int rank = 0; rank < comm.
size; ++rank) {
301 size_t curr_num_results = 0;
302 size_t curr_sendcount = 0;
303 while ((j < send_count) && (ranks[j] == rank)) {
304 curr_sendcount += (size_t)(pack_sizes[j++]);
307 num_results_per_rank[rank] = curr_num_results;
309 curr_sendcount <= INT_MAX,
310 "ERROR(exchange_interp_results): pack size to big")
321 size_t recv_count = 0;
322 for (
int i = 0; i < comm.
size; ++i)
323 recv_count += num_results_per_rank[i];
324 free(num_results_per_rank);
328 void * recv_buffer =
xmalloc(recv_size);
331 yac_alltoallv_packed_p2p(
351 pack_order += send_count;
352 for (
size_t i = 0; i < local_count; ++i)
354 *(*result_get_stencil)(
355 (
void*)((
unsigned char*)results + pack_order[i] * result_size));
368 struct interp_result * interp_results,
size_t result_count) {
371 size_t * tgt_points =
xmalloc(result_count *
sizeof(*tgt_points));
372 for (
size_t i = 0; i < result_count; ++i)
373 tgt_points[i] = interp_results[i].local_id;
376 int * tgt_points_dist_owner =
377 xmalloc(result_count *
sizeof(*tgt_points_dist_owner));
379 interp_grid, tgt_points, result_count, tgt_points_dist_owner);
380 size_t * pack_order = tgt_points;
381 for (
size_t i = 0; i < result_count; ++i) pack_order[i] = i;
385 interp_results, result_count,
sizeof(*interp_results),
389 free(tgt_points_dist_owner);
391 return relocated_results;
397 struct interp_result ** interp_results,
size_t * result_count) {
400 size_t num_interpolated_tgt =
402 yac_int * interpolated_tgts_global_ids =
404 size_t * interpolated_tgts_local_ids =
405 xmalloc(num_interpolated_tgt *
sizeof(*interpolated_tgts_local_ids));
407 interp_grid, interpolated_tgts_global_ids, num_interpolated_tgt,
408 interpolated_tgts_local_ids);
412 (num_interpolated_tgt > 0)?
413 xmalloc(num_interpolated_tgt *
sizeof(*initial_interp_results)):NULL;
414 for (
size_t i = 0; i < num_interpolated_tgt; ++i) {
415 initial_interp_results[i].
local_id = interpolated_tgts_local_ids[i];
416 initial_interp_results[i].
global_id = interpolated_tgts_global_ids[i];
417 initial_interp_results[i].
idx = SIZE_MAX;
419 interpolated_tgts_global_ids[i];
422 initial_interp_results[i].
stencil.
data.single.idx = (uint64_t)i;
423 initial_interp_results[i].
stencil.
data.single.weight = 1.0;
425 free(interpolated_tgts_local_ids);
426 free(interpolated_tgts_global_ids);
428 *interp_results = initial_interp_results;
429 *result_count = num_interpolated_tgt;
433 const void * a,
const void * b) {
435 int ret = (((
const struct interp_result *)a)->stencil.count >
449 const void * a,
const void * b) {
466 const void * a,
const void * b) {
475 const void * a,
const void * b) {
484 const void * a,
const void * b) {
494 return ((*(
const yac_int *)a) >
504 size_t ** num_stencils_per_tgt_,
size_t ** stencil_indices_,
505 int ** stencil_ranks_,
double ** w_) {
508 qsort(interp_results, result_count,
sizeof(*interp_results),
511 size_t total_num_stencils = 0;
512 size_t * interpolated_tgts_local_ids =
513 xmalloc(result_count *
sizeof(*interpolated_tgts_local_ids));
514 for (
size_t i = 0; i < result_count; ++i) {
515 interpolated_tgts_local_ids[i] = interp_results[i].
local_id;
518 interp_tgt_remote_points->
data =
520 interp_grid, interpolated_tgts_local_ids, result_count);
521 interp_tgt_remote_points->
count = result_count;
522 free(interpolated_tgts_local_ids);
524 size_t * num_stencils_per_tgt =
525 xmalloc(result_count *
sizeof(*num_stencils_per_tgt));
526 size_t * stencil_indices =
527 xmalloc(total_num_stencils *
sizeof(*stencil_indices));
528 int * stencil_ranks =
529 xmalloc(total_num_stencils *
sizeof(*stencil_ranks));
530 double * w =
xmalloc(total_num_stencils *
sizeof(*w));
532 for (
size_t i = 0, j = 0; i < result_count; ++i) {
536 num_stencils_per_tgt[i] = curr_count;
538 if (curr_count == 1) {
539 stencil_indices[j] = interp_results[i].
stencil.
data.single.idx;
540 stencil_ranks[j] = interp_results[i].
stencil.
data.single.rank;
541 w[j] = interp_results[i].
stencil.
data.single.weight;
544 for (
size_t k = 0; k < curr_count; ++k, ++j) {
552 *num_stencils_per_tgt_ = num_stencils_per_tgt;
553 *stencil_indices_ = stencil_indices;
554 *stencil_ranks_ = stencil_ranks;
566 struct tgt_request * neigh_requests,
size_t * request_count_,
569 size_t request_count = *request_count_;
570 size_t match_count = 0;
573 size_t stencil_count = interp_stencils->
count;
577 stencils, stencil_count,
sizeof(*stencils),
579 for (
size_t i = 0, j = 0; i < stencil_count; ++i) {
582 while ((j < request_count) &&
583 (neigh_requests[j].
global_id < curr_global_id)) ++j;
584 while ((j < request_count) &&
585 (neigh_requests[j].
global_id == curr_global_id)) {
588 neigh_requests[j].
stencil = stencils[i];
595 qsort(neigh_requests, request_count,
sizeof(*neigh_requests),
598 request_count -= match_count;
599 *request_count_ = request_count;
600 struct tgt_request * neigh_request_matches = neigh_requests + request_count;
603 qsort(neigh_requests, request_count,
sizeof(*neigh_requests),
606 size_t * pack_order =
xmalloc(match_count *
sizeof(*pack_order));
607 int * origin_rank =
xmalloc(match_count *
sizeof(*origin_rank));
608 for (
size_t i = 0; i < match_count; ++i) {
610 origin_rank[i] = neigh_request_matches[i].
rank;
615 neigh_request_matches, match_count,
sizeof(*neigh_request_matches),
620 return relocated_results;
626 size_t ** neigh_local_ids_,
yac_int ** neigh_to_tgt_global_id_,
627 size_t * total_num_neighbours_) {
630 size_t total_num_neighbours = 0;
633 for (
size_t i = 0; i <
count; ++i)
634 total_num_neighbours +=
636 size_t * neigh_local_ids =
637 xmalloc(total_num_neighbours *
sizeof(*neigh_local_ids));
639 interp_grid, tgt_local_ids,
count, neigh_local_ids);
642 yac_int * neigh_to_tgt_global_id =
643 xmalloc(total_num_neighbours *
sizeof(*neigh_to_tgt_global_id));
644 for (
size_t i = 0, j = 0; i <
count; ++i) {
647 yac_int curr_tgt_global_id = tgt_global_ids[i];
648 for (
int k = 0; k < curr_num_neigh; ++k, ++j)
649 neigh_to_tgt_global_id[j] = curr_tgt_global_id;
652 *neigh_local_ids_ = neigh_local_ids;
653 *neigh_to_tgt_global_id_ = neigh_to_tgt_global_id;
654 *total_num_neighbours_ = total_num_neighbours;
660 size_t ** neigh_local_ids_,
yac_int ** neigh_to_tgt_global_id_,
661 size_t * total_num_neighbours_) {
663 int * num_neighs_per_vertex =
xmalloc(
count *
sizeof(*num_neighs_per_vertex));
664 size_t * neigh_vertices;
667 interp_grid, tgt_local_ids,
count,
668 &neigh_vertices, num_neighs_per_vertex);
670 size_t total_num_neighbours = 0;
671 for (
size_t i = 0; i <
count; ++i)
672 total_num_neighbours += (
size_t)(num_neighs_per_vertex[i]);
675 yac_int * neigh_to_tgt_global_id =
676 xmalloc(total_num_neighbours *
sizeof(*neigh_to_tgt_global_id));
677 for (
size_t i = 0, j = 0; i <
count; ++i) {
678 int curr_num_neigh = num_neighs_per_vertex[i];
679 yac_int curr_tgt_global_id = tgt_global_ids[i];
680 for (
int k = 0; k < curr_num_neigh; ++k, ++j)
681 neigh_to_tgt_global_id[j] = curr_tgt_global_id;
683 free(num_neighs_per_vertex);
685 *neigh_local_ids_ = neigh_vertices;
686 *neigh_to_tgt_global_id_ = neigh_to_tgt_global_id;
687 *total_num_neighbours_ = total_num_neighbours;
693 size_t * tgt_local_ids,
yac_int * tgt_global_ids,
size_t count,
694 size_t ** neigh_local_ids_,
yac_int ** neigh_global_ids_,
695 yac_int ** neigh_to_tgt_global_id_,
size_t * total_num_neighbours_) {
698 size_t * neigh_local_ids;
699 yac_int * neigh_to_tgt_global_id;
700 size_t total_num_neighbours;
706 "ERROR(get_tgt_neigh_info): unsupported target field location")
709 interp_grid, tgt_local_ids, tgt_global_ids,
count,
710 &neigh_local_ids, &neigh_to_tgt_global_id, &total_num_neighbours);
713 interp_grid, tgt_local_ids, tgt_global_ids,
count,
714 &neigh_local_ids, &neigh_to_tgt_global_id, &total_num_neighbours);
718 neigh_local_ids, total_num_neighbours, neigh_to_tgt_global_id);
719 while((total_num_neighbours > 0) &&
720 (neigh_local_ids[total_num_neighbours-1] == SIZE_MAX))
721 --total_num_neighbours;
725 xmalloc(total_num_neighbours *
sizeof(*neigh_global_ids));
727 interp_grid, neigh_local_ids, total_num_neighbours, neigh_global_ids);
730 xrealloc(neigh_local_ids, total_num_neighbours *
sizeof(*neigh_local_ids));
731 *neigh_global_ids_ = neigh_global_ids;
732 *neigh_to_tgt_global_id_ =
734 total_num_neighbours *
sizeof(*neigh_to_tgt_global_id));
735 *total_num_neighbours_ = total_num_neighbours;
742 size_t * neigh_local_ids,
yac_int * neigh_global_ids,
743 size_t num_neighbours,
struct tgt_request ** neigh_requests_,
744 size_t * request_count_) {
747 int * neigh_dist_owner =
748 xmalloc(num_neighbours *
sizeof(*neigh_dist_owner));
750 interp_grid, neigh_local_ids, num_neighbours,
753 yac_int * send_neigh_global_ids =
754 xmalloc(num_neighbours *
sizeof(*send_neigh_global_ids));
755 memcpy(send_neigh_global_ids, neigh_global_ids,
756 num_neighbours *
sizeof(*send_neigh_global_ids));
760 neigh_dist_owner, num_neighbours, send_neigh_global_ids);
763 size_t to = 0, new_to = 0, from = 0, new_from = 0;
764 for (
int rank = 0; rank < comm.
size; ++rank) {
765 while ((new_from < num_neighbours) &&
766 (neigh_dist_owner[new_from] == rank)) new_from++;
767 size_t curr_count = new_from - from;
768 qsort(send_neigh_global_ids + from, curr_count,
771 (curr_count > 0)?(send_neigh_global_ids[from]-1):0;
772 for (; from < new_from; ++from) {
773 yac_int curr_global_id = send_neigh_global_ids[from];
774 if (prev_global_id != curr_global_id) {
775 send_neigh_global_ids[new_to++] = curr_global_id;
776 prev_global_id = curr_global_id;
779 curr_count = new_to - to;
783 free(neigh_dist_owner);
790 yac_int * recv_neigh_global_ids =
791 xmalloc(request_count *
sizeof(*recv_neigh_global_ids));
792 yac_alltoallv_yac_int_p2p(
796 xmalloc(request_count *
sizeof(*neigh_requests));
797 for (
int i = 0, k = 0; i < comm.
size; ++i) {
798 for (
size_t j = 0; j < comm.
recvcounts[i]; ++j, ++k) {
799 neigh_requests[k].
global_id = recv_neigh_global_ids[k];
800 neigh_requests[k].
rank = i;
804 qsort(neigh_requests, request_count,
sizeof(*neigh_requests),
806 free(send_neigh_global_ids);
807 free(recv_neigh_global_ids);
809 *neigh_requests_ = neigh_requests;
810 *request_count_ = request_count;
814 size_t * tgt_local_ids,
yac_int * tgt_global_ids,
size_t count) {
817 xmalloc(count *
sizeof(*interp_results));
818 for (
size_t i = 0; i < count; ++i) {
819 interp_results[i].
local_id = tgt_local_ids[i];
820 interp_results[i].
global_id = tgt_global_ids[i];
821 interp_results[i].
idx = i;
824 qsort(interp_results, count,
sizeof(*interp_results),
826 return interp_results;
830 const void * a,
const void * b) {
846 size_t stencil_info_count = 0;
848 for (
size_t i = 0; i <
count; ++i)
849 stencil_info_count += neigh_stencils[stencil_indices[i]].
count;
854 if (stencil_info_count > 1) {
857 xmalloc(stencil_info_count *
sizeof(*stencil_infos))));
859 stencil_infos = &(stencil.
data.single);
863 for (
size_t i = 0, j = 0; i < count; ++i) {
865 neigh_stencils + stencil_indices[i];
866 size_t curr_stencil_info_count = curr_stencil->
count;
868 (curr_stencil_info_count == 1)?
870 memcpy(stencil_infos + j, curr_stencil_infos,
871 curr_stencil_info_count *
sizeof(*stencil_infos));
872 for (
size_t k = 0; k < curr_stencil_info_count; ++k, ++j)
879 if (stencil_info_count > 1) {
882 qsort(stencil_infos, stencil_info_count,
sizeof(*stencil_infos),
886 struct stencil_info * prev_stencil_info = stencil_infos,
887 * curr_stencil_info = stencil_infos + 1;
888 size_t new_stencil_info_count = 1;
889 for (
size_t i = 1; i < stencil_info_count; ++i, ++curr_stencil_info) {
891 curr_stencil_info, prev_stencil_info)) {
892 if (new_stencil_info_count != i)
893 stencil_infos[new_stencil_info_count] =
895 ++new_stencil_info_count;
896 prev_stencil_info = curr_stencil_info;
898 stencil_infos[new_stencil_info_count-1].
weight +=
899 curr_stencil_info->weight;
902 if (new_stencil_info_count != stencil_info_count) {
903 stencil_info_count = new_stencil_info_count;
904 if (new_stencil_info_count == 1) {
905 stencil.
data.single = *stencil_infos;
910 stencil_infos, stencil_info_count *
sizeof(*stencil_infos));
914 stencil.
count = stencil_info_count;
922 size_t * stencil_indices,
size_t * num_neighbours_,
923 struct interp_result * interp_results,
size_t * num_open_tgt_) {
925 size_t num_neighbours = *num_neighbours_;
926 size_t num_open_tgt = *num_open_tgt_;
929 size_t answer_count = neigh_answer->
count;
934 neigh_stencils, answer_count,
sizeof(*neigh_stencils),
936 size_t match_count = 0;
937 for (
size_t i = 0, j = 0; i < answer_count; ++i) {
940 while ((j < num_neighbours) &&
941 (neigh_global_ids[j] < curr_global_id)) ++j;
943 while ((j < num_neighbours) &&
944 (neigh_global_ids[j] == curr_global_id)) {
945 neigh_global_ids[j] = XT_INT_MAX;
946 stencil_indices[j] = i;
954 neigh_global_ids, num_neighbours, neigh_to_tgt_global_id, stencil_indices);
955 num_neighbours -= match_count;
956 *num_neighbours_ = num_neighbours;
958 neigh_to_tgt_global_id += num_neighbours;
959 stencil_indices += num_neighbours;
963 neigh_to_tgt_global_id, match_count, stencil_indices);
966 for (
size_t i = 0, k = 0; i < match_count;) {
971 yac_int curr_tgt_global_id = neigh_to_tgt_global_id[i++];
972 while ((i < match_count) &&
973 (neigh_to_tgt_global_id[i] == curr_tgt_global_id)) ++i;
974 size_t curr_stencil_count = i - prev_i;
976 while ((k < num_open_tgt) &&
977 (interp_results[k].
global_id < curr_tgt_global_id)) ++k;
979 while ((k < num_open_tgt) &&
980 (interp_results[k].
global_id == curr_tgt_global_id)) {
984 neigh_stencils, stencil_indices + prev_i, curr_stencil_count,
985 curr_tgt_global_id, 1.0 / (
double)curr_stencil_count);
992 qsort(interp_results, num_open_tgt,
sizeof(*interp_results),
994 size_t new_num_open_tgt = 0;
995 while ((new_num_open_tgt < num_open_tgt) &&
996 (interp_results[new_num_open_tgt].stencil.count == 0))
998 *num_open_tgt_ = new_num_open_tgt;
999 return num_open_tgt - new_num_open_tgt;
1029 size_t * tgt_points,
yac_int * tgt_global_ids,
size_t count,
1035 size_t * neigh_local_ids;
1037 yac_int * neigh_to_tgt_global_id;
1038 size_t total_num_neighbours;
1040 interp_grid, tgt_points, tgt_global_ids, count,
1041 &neigh_local_ids, &neigh_global_ids, &neigh_to_tgt_global_id,
1042 &total_num_neighbours);
1043 size_t * stencil_indices =
1044 xmalloc(total_num_neighbours *
sizeof(*stencil_indices));
1049 size_t request_count;
1051 interp_grid, comm, neigh_local_ids, neigh_global_ids, total_num_neighbours,
1052 &neigh_requests, &request_count);
1053 free(neigh_local_ids);
1057 neigh_global_ids, total_num_neighbours, neigh_to_tgt_global_id);
1060 size_t num_open_tgt = count;
1067 size_t result_count;
1069 interp_grid, comm, weights, &initial_interp_results, &result_count);
1071 for (
int creep_distance = 0;
1072 creep_distance < max_creep_distance; ++creep_distance) {
1075 int result_flag = result_count > 0;
1078 MPI_IN_PLACE, &result_flag, 1, MPI_INT, MPI_MAX, comm.
comm), comm.
comm);
1079 if (result_flag == 0)
break;
1086 (creep_distance == 0)?
1087 initial_interp_results:(interp_results + num_open_tgt),
1089 if (creep_distance == 0) free(initial_interp_results);
1095 interp_grid, comm, neigh_requests, &request_count,
1101 neigh_matches, neigh_global_ids, neigh_to_tgt_global_id,
1102 stencil_indices, &total_num_neighbours, interp_results,
1105 free(interp_stencils);
1107 free(neigh_matches);
1110 free(neigh_requests);
1111 free(stencil_indices);
1112 free(neigh_global_ids);
1113 free(neigh_to_tgt_global_id);
1115 for (
size_t i = 0; i <
count; ++i)
1116 interp_flag[interp_results[i].idx] =
1117 interp_results[i].stencil.count > 0;
1121 size_t * num_stencils_per_tgt;
1122 int * stencil_ranks;
1125 interp_grid, comm, interp_results + num_open_tgt,
count - num_open_tgt,
1126 &interp_tgt_remote_points, &num_stencils_per_tgt, &stencil_indices,
1127 &stencil_ranks, &w);
1129 weights, &interp_tgt_remote_points, num_stencils_per_tgt,
1130 stencil_indices, stencil_ranks, w);
1131 free(interp_tgt_remote_points.
data);
1133 free(num_stencils_per_tgt);
1134 free(stencil_ranks);
1135 free(stencil_indices);
1136 for (
size_t i = num_open_tgt; i <
count; ++i)
1137 if (interp_results[i].stencil.count > 1)
1138 free(interp_results[i].stencil.data.multi);
1139 free(interp_results);
1146 size_t * tgt_points,
size_t count,
1159 "ERROR(do_search_creep): unsupported target field location "
1160 "(has to be YAC_LOC_CELL or YAC_LOC_CORNER)")
1163 size_t * sendcounts, * recvcounts, * sdispls, * rdispls;
1165 1, &sendcounts, &recvcounts, &sdispls, &rdispls, comm);
1166 int comm_rank, comm_size;
1171 int * tgt_points_dist_owner =
1172 xmalloc(count *
sizeof(*tgt_points_dist_owner));
1174 interp_grid, tgt_points, count, tgt_points_dist_owner);
1175 yac_int * tgt_points_global_ids =
1176 xmalloc(count *
sizeof(*tgt_points_global_ids));
1178 interp_grid, tgt_points, count, tgt_points_global_ids);
1181 size_t local_count = 0;
1182 for (
size_t i = 0; i < count; ++i) {
1183 if (tgt_points_dist_owner[i] == comm_rank) {
1185 tgt_points_dist_owner[i] = INT_MAX;
1188 size_t send_count = count - local_count;
1192 tgt_points_dist_owner, count, tgt_points, tgt_points_global_ids);
1195 for (
size_t i = 0; i < send_count; ++i)
1196 sendcounts[tgt_points_dist_owner[i]]++;
1198 1, sendcounts, recvcounts, sdispls, rdispls, comm);
1199 size_t recv_count = recvcounts[comm_size-1] + rdispls[comm_size-1];
1202 tgt_points_global_ids,
1203 (send_count + local_count + recv_count) *
sizeof(*global_ids_buffer));
1204 yac_int * send_global_ids = global_ids_buffer;
1205 yac_int * recv_global_ids = global_ids_buffer + send_count;
1206 yac_alltoallv_yac_int_p2p(
1207 send_global_ids, sendcounts, sdispls+1,
1208 recv_global_ids + local_count, recvcounts, rdispls, comm);
1211 size_t * temp_tgt_points =
1212 xmalloc((local_count + recv_count) *
sizeof(*temp_tgt_points));
1213 memcpy(temp_tgt_points, tgt_points + send_count,
1214 local_count *
sizeof(*tgt_points));
1216 interp_grid, recv_global_ids + local_count, recv_count,
1217 temp_tgt_points + local_count);
1220 int * interp_flag_buffer =
1221 xmalloc((send_count + local_count + recv_count) *
1222 sizeof(*interp_flag_buffer));
1223 int * temp_interp_flag = interp_flag_buffer + send_count;
1224 int * interp_flag = interp_flag_buffer;
1225 memset(temp_interp_flag, 0, (local_count + recv_count) *
sizeof(*temp_interp_flag));
1228 local_count + recv_count, temp_interp_flag, weights);
1229 free(global_ids_buffer);
1230 free(temp_tgt_points);
1233 yac_alltoallv_int_p2p(
1234 temp_interp_flag + local_count, recvcounts, rdispls,
1235 interp_flag, sendcounts, sdispls+1, comm);
1239 size_t num_interpolated_tgt = 0;
1240 for (
size_t i = 0; i < count; ++i) {
1241 if (interp_flag[i]) {
1243 ++num_interpolated_tgt;
1249 free(interp_flag_buffer);
1250 free(tgt_points_dist_owner);
1254 return num_interpolated_tgt;
1260 xmalloc(1 *
sizeof(*method_creep));
1264 (creep_distance >= 0)?creep_distance:INT_MAX;
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
void yac_interp_grid_get_tgt_cell_neighbours(struct yac_interp_grid *interp_grid, size_t *tgt_cells, size_t count, size_t *neighbours)
enum yac_location yac_interp_grid_get_tgt_field_location(struct yac_interp_grid *interp_grid)
void yac_interp_grid_get_tgt_global_ids(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count, yac_int *tgt_global_ids)
MPI_Comm yac_interp_grid_get_MPI_Comm(struct yac_interp_grid *interp_grid)
struct remote_point * yac_interp_grid_get_tgt_remote_points(struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count)
void yac_interp_grid_determine_dist_tgt_owners(struct yac_interp_grid *interp_grid, size_t *tgt_indices, size_t count, int *owners)
void yac_interp_grid_get_tgt_vertex_neighbours(struct yac_interp_grid *interp_grid, size_t *vertices, size_t count, size_t **neigh_vertices, int *num_neighs_per_vertex)
struct yac_const_basic_grid_data * yac_interp_grid_get_basic_grid_data_tgt(struct yac_interp_grid *interp_grid)
void yac_interp_grid_tgt_global_to_local(struct yac_interp_grid *interp_grid, yac_int *tgt_global_ids, size_t count, size_t *tgt_local_ids)
static void do_search_creep_2(struct yac_interp_grid *interp_grid, int const max_creep_distance, size_t *tgt_points, yac_int *tgt_global_ids, size_t count, int *interp_flag, struct yac_interp_weights *weights)
static int compare_tgt_request_global_id(const void *a, const void *b)
static void pack_result_stencil(struct result_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static struct result_stencils * unpack_result_stencils(size_t count, void *packed_data, size_t packed_data_size, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static int compare_interp_result_stencil_local_id(const void *a, const void *b)
static int compare_stencil_info(const void *a, const void *b)
static MPI_Datatype yac_get_stencil_info_mpi_datatype(MPI_Comm comm)
static struct result_stencil * tgt_request_get_stencil(void *tgt_request)
static struct result_stencil * interp_result_get_stencil(void *interp_result)
static void get_tgt_neigh_info_vertex(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static struct result_stencils * exchange_interp_results(void *results, size_t result_count, size_t result_size, struct result_stencil *(*result_get_stencil)(void *), size_t *pack_order, int *ranks, struct comm_stuff comm)
static void free_comm_stuff(struct comm_stuff comm)
static struct comm_stuff init_comm_stuff(struct yac_interp_grid *interp_grid)
static int compare_result_stencil_global_id(const void *a, const void *b)
static int compare_interp_result_stencil(const void *a, const void *b)
static void extract_interp_info(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct interp_result *interp_results, size_t result_count, struct remote_points *interp_tgt_remote_points, size_t **num_stencils_per_tgt_, size_t **stencil_indices_, int **stencil_ranks_, double **w_)
static void get_tgt_neigh_info(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_global_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static void get_initial_results(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct yac_interp_weights *interp_weights, struct interp_result **interp_results, size_t *result_count)
static size_t do_search_creep(struct interp_method *method, struct yac_interp_grid *interp_grid, size_t *tgt_points, size_t count, struct yac_interp_weights *weights)
struct interp_method * yac_interp_method_creep_new(int creep_distance)
static struct result_stencil copy_result_stencil_multi(struct result_stencil *neigh_stencils, size_t *stencil_indices, size_t count, yac_int global_id, double weight)
static void pack_result_stencils(void *results, size_t result_count, size_t result_size, struct result_stencil *(*get_stencil)(void *), size_t *pack_order, void **pack_data, int *pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static void get_tgt_neigh_info_cell(struct yac_interp_grid *interp_grid, size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count, size_t **neigh_local_ids_, yac_int **neigh_to_tgt_global_id_, size_t *total_num_neighbours_)
static struct interp_result * init_interp_results(size_t *tgt_local_ids, yac_int *tgt_global_ids, size_t count)
static int compare_yac_int(const void *a, const void *b)
static size_t match_neigh_answers_with_tgts(struct result_stencils *neigh_answer, yac_int *neigh_global_ids, yac_int *neigh_to_tgt_global_id, size_t *stencil_indices, size_t *num_neighbours_, struct interp_result *interp_results, size_t *num_open_tgt_)
static void unpack_result_stencil(struct result_stencil *stencil, void *buffer, int buffer_size, int *position, MPI_Datatype stencil_info_dt, MPI_Comm comm, struct stencil_info **stencil_info_buffer, size_t *stencil_info_buffer_array_size, size_t *stencil_info_buffer_size)
static struct result_stencils * update_neigh_requests(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct tgt_request *neigh_requests, size_t *request_count_, struct result_stencils *interp_stencils)
static void get_result_stencil_pack_sizes(void *results, size_t result_count, size_t result_size, struct result_stencil *(*get_stencil)(void *), size_t *pack_order, int *pack_sizes, MPI_Datatype stencil_info_dt, MPI_Comm comm)
static struct interp_method_vtable interp_method_creep_vtable
static void send_neigh_request(struct yac_interp_grid *interp_grid, struct comm_stuff comm, size_t *neigh_local_ids, yac_int *neigh_global_ids, size_t num_neighbours, struct tgt_request **neigh_requests_, size_t *request_count_)
static void delete_creep(struct interp_method *method)
static struct result_stencils * relocate_interp_results(struct yac_interp_grid *interp_grid, struct comm_stuff comm, struct interp_result *interp_results, size_t result_count)
static int compare_tgt_request_stencil_count(const void *a, const void *b)
static int compare_interp_result_global_id(const void *a, const void *b)
void yac_interp_weights_wcopy_weights(struct yac_interp_weights *weights, struct remote_points *tgts, size_t *num_stencils_per_tgt, size_t *stencil_indices, int *stencil_ranks, double *w)
yac_int * yac_interp_weights_get_interp_tgt(struct yac_interp_weights *weights)
size_t yac_interp_weights_get_interp_count(struct yac_interp_weights *weights)
#define xrealloc(ptr, size)
MPI_Datatype stencil_info_dt
struct interp_method_vtable * vtable
size_t(* do_search)(struct interp_method *method, struct yac_interp_grid *grid, size_t *tgt_points, size_t count, struct yac_interp_weights *weights)
struct result_stencil stencil
struct remote_point * data
union result_stencil::@11 data
struct stencil_info single * multi
struct stencil_info stencil_info_buffer[]
struct result_stencil * stencils
struct result_stencil stencil
const_int_pointer num_vertices_per_cell
void yac_quicksort_index_yac_int_size_t(yac_int *a, size_t n, size_t *idx)
void yac_quicksort_index_int_yac_int(int *a, size_t n, yac_int *idx)
void yac_quicksort_index_yac_int_yac_int_size_t(yac_int *a, size_t n, yac_int *b, size_t *c)
void yac_quicksort_index_size_t_yac_int(size_t *a, size_t n, yac_int *idx)
void yac_quicksort_index_int_size_t(int *a, size_t n, size_t *idx)
void yac_quicksort_index_yac_int_yac_int(yac_int *a, size_t n, yac_int *idx)
void yac_quicksort_index_int_size_t_yac_int(int *a, size_t n, size_t *b, yac_int *c)
#define YAC_ASSERT(exp, msg)
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
#define yac_mpi_call(call, comm)