36 MPI_Initialized(&mpi_initialized);
38 return mpi_initialized;
45 "ERROR(yac_yaxt_init): MPI has not yet been initialised");
49 "ERROR(yac_yaxt_init): yaxt was initialised by YAC. \n"
50 "In case there are multiple instances of YAC in parallel, the user has "
51 "to initialise yaxt such that it is available on all processes that "
82 "ERROR(yac_mpi_init_core): "
83 "could not determine MPI data type for size_t "
84 "(sizeof(size_t): %zu; sizeof(%s): %zu)",
120 MPI_Comm_rank(comm, &rank);
122 char error_string[MPI_MAX_ERROR_STRING];
123 int length_of_error_string, error_class;
125 MPI_Error_class(error_code, &error_class);
126 MPI_Error_string(error_class, error_string, &length_of_error_string);
127 fprintf(stderr,
"%3d: %s\n", rank, error_string);
128 MPI_Abort(comm, error_code);
133 void const *
send_buffer,
size_t const * sendcounts,
size_t const * sdispls,
134 void *
recv_buffer,
size_t const * recvcounts,
size_t const * rdispls,
135 size_t dt_size, MPI_Datatype dt, MPI_Comm comm,
char const * caller,
int line) {
137#define USE_P2P_ALLTOALLV
138#ifdef USE_P2P_ALLTOALLV
139 int comm_rank, comm_size;
144 for (
int i = 0; i < comm_size; ++i)
145 req_count += (sendcounts[i] > 0) + (recvcounts[i] > 0);
146 MPI_Request * req =
xmalloc((
size_t)req_count *
sizeof(*req));
149 for (
int j = 0, lb = comm_rank, ub = comm_size; j < 2;
150 ++j, lb = 0, ub = comm_rank) {
151 for (
int i = lb; i < ub; ++i) {
152 if (sendcounts[i] > 0) {
154 sendcounts[i] <= INT_MAX,
155 "ERROR(%s(%d)::yac_alltoallv_p2p): "
156 "sendcounts[%d] = %zu exceeds INT_MAX (%d)",
157 caller, line, i, sendcounts[i], (
int)INT_MAX)
161 dt_size * sdispls[i]),
162 (
int)(sendcounts[i]), dt, i, 0,
163 comm, req + req_count), comm);
166 if (recvcounts[i] > 0) {
168 recvcounts[i] <= INT_MAX,
169 "ERROR(%s(%d)::yac_alltoallv_p2p): "
170 "recvcounts[%d] = %zu exceeds INT_MAX (%d)",
171 caller, line, i, recvcounts[i], (
int)INT_MAX)
175 dt_size * rdispls[i]),
176 (
int)(recvcounts[i]), dt, i, 0,
177 comm, req + req_count), comm);
182 yac_mpi_call(MPI_Waitall(req_count, req, MPI_STATUSES_IGNORE), comm);
187 int * int_buffer =
xmalloc(4 * comm_size *
sizeof(*int_buffer));
188 int * int_sendcounts = int_buffer + 0 * comm_size;
189 int * int_sdispls = int_buffer + 1 * comm_size;
190 int * int_recvcounts = int_buffer + 2 * comm_size;
191 int * int_rdispls = int_buffer + 3 * comm_size;
192 for (
int i = 0; i < comm_size; ++i) {
194 sendcounts[i] <= INT_MAX,
195 "ERROR(%s(%d)::yac_alltoallv_p2p): "
196 "sendcounts[%d] = %zu exceeds INT_MAX (%d)",
197 caller, line, i, sendcounts[i], (
int)INT_MAX)
199 sdispls[i] <= INT_MAX,
200 "ERROR(%s(%d)::yac_alltoallv_p2p): "
201 "sdispls[%d] = %zu exceeds INT_MAX (%d)",
202 caller, line, i, sdispls[i], (
int)INT_MAX)
204 recvcounts[i] <= INT_MAX,
205 "ERROR(%s(%d)::yac_alltoallv_p2p): "
206 "recvcounts[%d] = %zu exceeds INT_MAX (%d)",
207 caller, line, i, recvcounts[i], (
int)INT_MAX)
209 rdispls[i] <= INT_MAX,
210 "ERROR(%s(%d)::yac_alltoallv_p2p): "
211 "rdispls[%d] = %zu exceeds INT_MAX (%d)",
212 caller, line, i, rdispls[i], (
int)INT_MAX)
213 int_sendcounts[i] = (int)(sendcounts[i]);
214 int_sdispls[i] = (int)(sdispls[i]);
215 int_recvcounts[i] = (int)(recvcounts[i]);
216 int_rdispls[i] = (int)(rdispls[i]);
219 MPI_Alltoallv(
send_buffer, int_sendcounts, int_sdispls, dt,
220 recv_buffer, int_recvcounts, int_rdispls, dt, comm), comm);
225#define YAC_ALLTOALL_P2P_TYPE(NAME, TYPE, TYPE_SIZE, MPI_TYPE) \
226 void yac_alltoallv_ ## NAME ## _p2p( \
227 TYPE const * send_buffer, size_t const * sendcounts, size_t const * sdispls, \
228 TYPE * recv_buffer, size_t const * recvcounts, size_t const * rdispls, \
229 MPI_Comm comm, char const * caller, int line) { \
231 (void const *)send_buffer, sendcounts, sdispls, \
232 (void *)recv_buffer, recvcounts, rdispls, \
233 TYPE_SIZE, MPI_TYPE, comm, caller, line); \
244 void const *
send_buffer,
int const * sendcounts,
int const * sdispls,
245 void *
recv_buffer,
int const * recvcounts,
int const * rdispls,
246 size_t dt_size, MPI_Datatype dt, struct
yac_group_comm group_comm) {
248 MPI_Comm comm = group_comm.comm;
251 int rank = comm_rank - group_comm.start;
254 for (
int i = 0; i < group_comm.size; ++i)
255 req_count += (sendcounts[i] > 0) + (recvcounts[i] > 0);
256 MPI_Request * req =
xmalloc((
size_t)req_count *
sizeof(*req));
259 for (
int j = 0, lb = rank, ub = group_comm.size; j < 2;
260 ++j, lb = 0, ub = rank) {
261 for (
int i = lb; i < ub; ++i) {
262 if (sendcounts[i] > 0) {
267 dt_size * (
size_t)(sdispls[i])),
268 sendcounts[i], dt, i + group_comm.start, 0,
269 comm, req + req_count), comm);
272 if (recvcounts[i] > 0) {
276 dt_size * (
size_t)(rdispls[i])),
277 recvcounts[i], dt, i + group_comm.start, 0,
278 comm, req + req_count), comm);
283 yac_mpi_call(MPI_Waitall(req_count, req, MPI_STATUSES_IGNORE), comm);
290 while(power < x) power *= 2;
302 int rank = comm_rank - group_comm.
start;
304 int rem = group_comm.
size - pof2;
308 if (rank < 2 * rem) {
314 group_comm.
comm, MPI_STATUS_IGNORE), group_comm.
comm);
320 (
void const *)
buffer, count, MPI_DOUBLE, rank + 1 + group_comm.
start,
321 0, group_comm.
comm), group_comm.
comm);
325 my_rank = rank - rem;
329 while (
mask < pof2) {
330 int newdst = my_rank ^
mask;
332 if (newdst < rem) dst = newdst * 2 + 1;
333 else dst = newdst + rem;
336 (
void const*)
buffer, count, MPI_DOUBLE, dst + group_comm.
start, 0,
338 group_comm.
comm, MPI_STATUS_IGNORE),
345 if (rank < 2 * rem) {
349 (
void const*)
buffer, count, MPI_DOUBLE, rank - 1 + group_comm.
start,
350 0, group_comm.
comm), group_comm.
comm);
354 (
void*)
buffer, count, MPI_DOUBLE, rank + 1 + group_comm.
start, 0,
355 group_comm.
comm, MPI_STATUS_IGNORE), group_comm.
comm);
361 if (x <= 1)
return 0;
363 while (x >>= 1) ++l2;
369 const size_t * sendbuf,
size_t * recvbuf,
int count,
374 int rank = comm_rank - group_comm.
start;
376 size_t * temp =
xmalloc((
size_t)group_comm.
size * (
size_t)count *
sizeof(*temp));
379 memcpy(temp, sendbuf, (
size_t)count *
sizeof(*temp));
381 int curr_len = count;
383 for (
int r = 0; r < lg2; ++r) {
384 int dst = (rank - nblk + group_comm.
size) % group_comm.
size;
385 int src = (rank + nblk) % group_comm.
size;
390 src + group_comm.
start, 0, group_comm.
comm, MPI_STATUS_IGNORE),
395 int rest = count * group_comm.
size - curr_len;
396 int dst = (rank - nblk + group_comm.
size) % group_comm.
size;
397 int src = (rank + nblk) % group_comm.
size;
402 src + group_comm.
start, 0, group_comm.
comm, MPI_STATUS_IGNORE),
404 memcpy(recvbuf + (
size_t)count * (
size_t)rank,
405 temp, (
size_t)count * (
size_t)(group_comm.
size - rank) *
sizeof(*temp));
406 memcpy(recvbuf, temp + (
size_t)count * (
size_t)(group_comm.
size - rank),
407 (
size_t)count * (
size_t)rank *
sizeof(*temp));
413 void *
buffer,
int count, MPI_Datatype datatype,
int root,
418 int rank = comm_rank - group_comm.
start;
421 if ((root < group_comm.
start) ||
422 (root >= group_comm.
start + group_comm.
size)) {
424 if (comm_rank == root) {
427 (
void const*)
buffer, count, datatype, group_comm.
start, 0,
430 }
else if (comm_rank == group_comm.
start) {
433 buffer, count, datatype, root, 0, group_comm.
comm,
434 MPI_STATUS_IGNORE), group_comm.
comm);
438 root -= group_comm.
start;
444 int temp_rank = (group_comm.
size + rank - root) % group_comm.
size;
447 while (bit <= temp_rank) bit <<= 1;
451 (((temp_rank ^ bit) + root) % group_comm.
size) + group_comm.
start;
454 MPI_Recv(
buffer, count, datatype, src_rank, 0, group_comm.
comm,
455 MPI_STATUS_IGNORE), group_comm.
comm);
459 int temp_rank = (group_comm.
size + rank - root) % group_comm.
size;
460 int bit = 1, send_rank;
462 while(bit <= temp_rank) bit <<= 1;
464 while ((send_rank = temp_rank | bit) < group_comm.
size) {
468 send_rank = ((send_rank + root) % group_comm.
size) + group_comm.
start;
472 (
void const*)
buffer, count, datatype, send_rank, 0, group_comm.
comm),
480 group_comm.
start = 0;
497 return group_comm.
size;
522 (split_rank >= 0) && (split_rank < group_comm.
size),
523 "ERROR(yac_group_comm_split): invalid split rank")
526 int size[2] = {split_rank, group_comm.
size - split_rank};
527 int local_idx = (comm_rank - group_comm.
start) >= split_rank;
530 local_group_comm->
size =
size[local_idx];
532 remote_group_comm->
start =
start[local_idx^1];
533 remote_group_comm->
size =
size[local_idx^1];
540 MPI_Datatype bnd_circle_dt;
541 int array_of_blocklengths[] = {3, 1, 1};
542 const MPI_Aint array_of_displacements[] =
543 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
base_vector[0]) -
544 (MPI_Aint)(intptr_t)(
const void *)&dummy,
545 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
inc_angle.
sin) -
546 (MPI_Aint)(intptr_t)(
const void *)&dummy,
547 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
inc_angle.
cos) -
548 (MPI_Aint)(intptr_t)(
const void *)&dummy};
549 const MPI_Datatype array_of_types[] =
550 {MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE};
552 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
553 array_of_types, &bnd_circle_dt), comm);
558 MPI_Datatype dt,
size_t new_size, MPI_Comm comm) {
560 MPI_Datatype resized_dt;
562#define OPENMPI_WORKAROUND
563#ifdef OPENMPI_WORKAROUND
565 MPI_Type_get_extent(dt, &lb, &extent);
567 MPI_Type_create_resized(dt, lb, (MPI_Aint)new_size, &resized_dt), comm);
570 MPI_Type_create_resized(dt, 0, (MPI_Aint)new_size, &resized_dt), comm);
572#undef OPENMPI_WORKAROUND
579 int count,
size_t const * sendcounts,
size_t * recvcounts,
580 size_t * sdispls,
size_t * rdispls, MPI_Comm comm) {
594 size_t iter_count = (size_t)(count * comm_size);
595 for (
size_t i = 0, saccu = 0, raccu = 0; i < iter_count; ++i) {
596 sdispls[i+1] = saccu;
598 saccu += sendcounts[i];
599 raccu += recvcounts[i];
604 int count,
size_t ** sendcounts,
size_t ** recvcounts,
605 size_t ** sdispls,
size_t ** rdispls, MPI_Comm comm) {
610 size_t * comm_buffer_;
614 4 * (
size_t)count * (
size_t)comm_size + 1);
620 (4 * (
size_t)count * (
size_t)comm_size + 1) *
sizeof(*comm_buffer_));
623 size_t offset = (size_t)count * (
size_t)comm_size;
624 *sendcounts = comm_buffer_ + 0 * offset;
625 *recvcounts = comm_buffer_ + 1 * offset;
626 *rdispls = comm_buffer_ + 2 * offset;
627 *sdispls = comm_buffer_ + 3 * offset;
631 comm_buffer_, 0, (
size_t)count * (
size_t)comm_size *
sizeof(*comm_buffer_));
635 size_t * sendcounts,
size_t * recvcounts,
636 size_t * sdispls,
size_t * rdispls) {
647 char const * caller,
char const *
string, MPI_Comm comm,
int allow_null) {
650 (
string != NULL) || allow_null,
651 "ERROR(%s::yac_string_get_pack_size): "
652 "NULL string not allowed when allow_null is false", caller);
654 size_t len = (
string == NULL)?0:strlen(
string);
657 "ERROR(%s::yac_string_get_pack_size): string too long", caller);
659 int strlen_pack_size, string_pack_size;
660 yac_mpi_call(MPI_Pack_size(1, MPI_INT, comm, &strlen_pack_size), comm);
662 MPI_Pack_size((
int)len, MPI_CHAR, comm, &string_pack_size), comm);
664 return (
size_t)strlen_pack_size + (size_t)string_pack_size;
668 char const * caller,
char const *
string,
void *
buffer,
int buffer_size,
669 int * position, MPI_Comm comm,
int allow_null) {
672 (
string != NULL) || allow_null,
673 "ERROR(%s::yac_string_pack): "
674 "NULL string not allowed when allow_null is false", caller)
678 if (
string == NULL) {
679 len_int = allow_null ? -1 : 0;
681 size_t len = strlen(
string);
683 len <= INT_MAX,
"ERROR(%s::yac_string_pack): string too long", caller)
688 MPI_Pack(&len_int, 1, MPI_INT,
buffer, buffer_size, position, comm), comm);
692 MPI_Pack(
string, len_int, MPI_CHAR,
buffer, buffer_size, position, comm),
698 void const *
buffer,
int buffer_size,
int * position, MPI_Comm comm) {
702 MPI_Unpack(
buffer, buffer_size, position, &string_len, 1, MPI_INT, comm),
705 char *
string = NULL;
707 if (string_len > -1) {
709 string =
xmalloc((
size_t)string_len + 1);
710 if (string_len > 0) {
713 buffer, buffer_size, position,
string, string_len, MPI_CHAR, comm),
716 string[string_len] =
'\0';
#define YAC_ASSERT(exp, msg)
#define ENSURE_ARRAY_SIZE(arrayp, curr_array_size, req_size)
struct sin_cos_angle inc_angle
angle between the middle point and the boundary of the spherical cap
#define YAC_ASSERT_F(exp, format,...)
static int mpi_initialised_by_yac
#define YAC_ALLTOALL_P2P_TYPE(NAME, TYPE, TYPE_SIZE, MPI_TYPE)
void yac_alltoallv_p2p_group(void const *send_buffer, int const *sendcounts, int const *sdispls, void *recv_buffer, int const *recvcounts, int const *rdispls, size_t dt_size, MPI_Datatype dt, struct yac_group_comm group_comm)
int yac_group_comm_get_global_rank(struct yac_group_comm group_comm)
void yac_generate_alltoallv_args(int count, size_t const *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls, MPI_Comm comm)
static int yaxt_initialised_by_yac
void yac_free_comm_buffers(size_t *sendcounts, size_t *recvcounts, size_t *sdispls, size_t *rdispls)
int yac_group_comm_get_rank(struct yac_group_comm group_comm)
int yac_mpi_is_initialised()
void yac_group_comm_split(struct yac_group_comm group_comm, int split_rank, struct yac_group_comm *local_group_comm, struct yac_group_comm *remote_group_comm)
MPI_Datatype yac_get_bounding_circle_mpi_datatype(MPI_Comm comm)
static size_t * comm_buffer
int yac_group_comm_get_global_size(struct yac_group_comm group_comm)
void yac_yaxt_init(MPI_Comm comm)
struct yac_group_comm yac_group_comm_new(MPI_Comm comm)
void yac_mpi_error(int error_code, MPI_Comm comm)
static size_t comm_buffer_array_size
static int yaxt_init_count
void yac_allreduce_sum_dble(double *buffer, int count, struct yac_group_comm group_comm)
void yac_yaxt_init_f2c(MPI_Fint comm)
void yac_get_comm_buffers(int count, size_t **sendcounts, size_t **recvcounts, size_t **sdispls, size_t **rdispls, MPI_Comm comm)
int yac_group_comm_get_size(struct yac_group_comm group_comm)
size_t yac_string_get_pack_size(char const *caller, char const *string, MPI_Comm comm, int allow_null)
Compute number of bytes required to pack a string for MPI transport.
MPI_Datatype yac_create_resized(MPI_Datatype dt, size_t new_size, MPI_Comm comm)
void yac_bcast_group(void *buffer, int count, MPI_Datatype datatype, int root, struct yac_group_comm group_comm)
char * yac_string_unpack(void const *buffer, int buffer_size, int *position, MPI_Comm comm)
Unpack a C string from a buffer packed with yac_string_pack.
void yac_allgather_size_t(const size_t *sendbuf, size_t *recvbuf, int count, struct yac_group_comm group_comm)
static int nearest_power_of_two(int x)
static int comm_buffer_in_use
void yac_string_pack(char const *caller, char const *string, void *buffer, int buffer_size, int *position, MPI_Comm comm, int allow_null)
Pack a C string into a provided buffer using MPI_Pack semantics.
static void yac_yaxt_cleanup()
void yac_group_comm_delete(struct yac_group_comm group_comm)
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm, char const *caller, int line)
#define yac_mpi_call(call, comm)
#define YAC_MPI_SIZE_T_TYPE