119 MPI_Comm_rank(comm, &rank);
121 char error_string[MPI_MAX_ERROR_STRING];
122 int length_of_error_string, error_class;
124 MPI_Error_class(error_code, &error_class);
125 MPI_Error_string(error_class, error_string, &length_of_error_string);
126 fprintf(stderr,
"%3d: %s\n", rank, error_string);
127 MPI_Abort(comm, error_code);
132 void const * send_buffer,
size_t const * sendcounts,
size_t const * sdispls,
133 void * recv_buffer,
size_t const * recvcounts,
size_t const * rdispls,
134 size_t dt_size, MPI_Datatype dt, MPI_Comm comm,
char const * caller,
int line) {
136#define USE_P2P_ALLTOALLV
137#ifdef USE_P2P_ALLTOALLV
138 int comm_rank, comm_size;
143 for (
int i = 0; i < comm_size; ++i)
144 req_count += (sendcounts[i] > 0) + (recvcounts[i] > 0);
145 MPI_Request * req =
xmalloc((
size_t)req_count *
sizeof(*req));
148 for (
int j = 0, lb = comm_rank, ub = comm_size; j < 2;
149 ++j, lb = 0, ub = comm_rank) {
150 for (
int i = lb; i < ub; ++i) {
151 if (sendcounts[i] > 0) {
153 sendcounts[i] <= INT_MAX,
154 "ERROR(%s(%d)::yac_alltoallv_p2p): "
155 "sendcounts[%d] = %zu exceeds INT_MAX (%d)",
156 caller, line, i, sendcounts[i], (
int)INT_MAX)
159 (
void const *)((
unsigned char *)send_buffer +
160 dt_size * sdispls[i]),
161 (
int)(sendcounts[i]), dt, i, 0,
162 comm, req + req_count), comm);
165 if (recvcounts[i] > 0) {
167 recvcounts[i] <= INT_MAX,
168 "ERROR(%s(%d)::yac_alltoallv_p2p): "
169 "recvcounts[%d] = %zu exceeds INT_MAX (%d)",
170 caller, line, i, recvcounts[i], (
int)INT_MAX)
173 (
void *)((
unsigned char *)recv_buffer +
174 dt_size * rdispls[i]),
175 (
int)(recvcounts[i]), dt, i, 0,
176 comm, req + req_count), comm);
181 yac_mpi_call(MPI_Waitall(req_count, req, MPI_STATUSES_IGNORE), comm);
186 int * int_buffer =
xmalloc(4 * comm_size *
sizeof(*int_buffer));
187 int * int_sendcounts = int_buffer + 0 * comm_size;
188 int * int_sdispls = int_buffer + 1 * comm_size;
189 int * int_recvcounts = int_buffer + 2 * comm_size;
190 int * int_rdispls = int_buffer + 3 * comm_size;
191 for (
int i = 0; i < comm_size; ++i) {
193 sendcounts[i] <= INT_MAX,
194 "ERROR(%s(%d)::yac_alltoallv_p2p): "
195 "sendcounts[%d] = %zu exceeds INT_MAX (%d)",
196 caller, line, i, sendcounts[i], (
int)INT_MAX)
198 sdispls[i] <= INT_MAX,
199 "ERROR(%s(%d)::yac_alltoallv_p2p): "
200 "sdispls[%d] = %zu exceeds INT_MAX (%d)",
201 caller, line, i, sdispls[i], (
int)INT_MAX)
203 recvcounts[i] <= INT_MAX,
204 "ERROR(%s(%d)::yac_alltoallv_p2p): "
205 "recvcounts[%d] = %zu exceeds INT_MAX (%d)",
206 caller, line, i, recvcounts[i], (
int)INT_MAX)
208 rdispls[i] <= INT_MAX,
209 "ERROR(%s(%d)::yac_alltoallv_p2p): "
210 "rdispls[%d] = %zu exceeds INT_MAX (%d)",
211 caller, line, i, rdispls[i], (
int)INT_MAX)
212 int_sendcounts[i] = (int)(sendcounts[i]);
213 int_sdispls[i] = (int)(sdispls[i]);
214 int_recvcounts[i] = (int)(recvcounts[i]);
215 int_rdispls[i] = (int)(rdispls[i]);
218 MPI_Alltoallv(send_buffer, int_sendcounts, int_sdispls, dt,
219 recv_buffer, int_recvcounts, int_rdispls, dt, comm), comm);
243 void const * send_buffer,
int const * sendcounts,
int const * sdispls,
244 void * recv_buffer,
int const * recvcounts,
int const * rdispls,
245 size_t dt_size, MPI_Datatype dt, struct
yac_group_comm group_comm) {
247 MPI_Comm comm = group_comm.comm;
250 int rank = comm_rank - group_comm.start;
253 for (
int i = 0; i < group_comm.size; ++i)
254 req_count += (sendcounts[i] > 0) + (recvcounts[i] > 0);
255 MPI_Request * req =
xmalloc((
size_t)req_count *
sizeof(*req));
258 for (
int j = 0, lb = rank, ub = group_comm.size; j < 2;
259 ++j, lb = 0, ub = rank) {
260 for (
int i = lb; i < ub; ++i) {
261 if (sendcounts[i] > 0) {
265 (
void const *)((
unsigned char *)send_buffer +
266 dt_size * (
size_t)(sdispls[i])),
267 sendcounts[i], dt, i + group_comm.start, 0,
268 comm, req + req_count), comm);
271 if (recvcounts[i] > 0) {
274 (
void *)((
unsigned char *)recv_buffer +
275 dt_size * (
size_t)(rdispls[i])),
276 recvcounts[i], dt, i + group_comm.start, 0,
277 comm, req + req_count), comm);
282 yac_mpi_call(MPI_Waitall(req_count, req, MPI_STATUSES_IGNORE), comm);
301 int rank = comm_rank - group_comm.
start;
303 int rem = group_comm.
size - pof2;
305 double * recv_buffer =
xmalloc((
size_t)count *
sizeof(*recv_buffer));
307 if (rank < 2 * rem) {
312 (
void*)recv_buffer, count, MPI_DOUBLE, rank - 1 + group_comm.
start, 0,
313 group_comm.
comm, MPI_STATUS_IGNORE), group_comm.
comm);
314 for (
int i = 0; i < count; ++i) buffer[i] += recv_buffer[i];
319 (
void const *)buffer, count, MPI_DOUBLE, rank + 1 + group_comm.
start,
320 0, group_comm.
comm), group_comm.
comm);
324 my_rank = rank - rem;
328 while (mask < pof2) {
329 int newdst = my_rank ^ mask;
331 if (newdst < rem) dst = newdst * 2 + 1;
332 else dst = newdst + rem;
335 (
void const*)buffer, count, MPI_DOUBLE, dst + group_comm.
start, 0,
336 (
void*)recv_buffer, count, MPI_DOUBLE, dst + group_comm.
start, 0,
337 group_comm.
comm, MPI_STATUS_IGNORE),
339 for (
int i = 0; i < count; ++i) buffer[i] += recv_buffer[i];
344 if (rank < 2 * rem) {
348 (
void const*)buffer, count, MPI_DOUBLE, rank - 1 + group_comm.
start,
349 0, group_comm.
comm), group_comm.
comm);
353 (
void*)buffer, count, MPI_DOUBLE, rank + 1 + group_comm.
start, 0,
354 group_comm.
comm, MPI_STATUS_IGNORE), group_comm.
comm);
412 void * buffer,
int count, MPI_Datatype datatype,
int root,
417 int rank = comm_rank - group_comm.
start;
420 if ((root < group_comm.
start) ||
421 (root >= group_comm.
start + group_comm.
size)) {
423 if (comm_rank == root) {
426 (
void const*)buffer, count, datatype, group_comm.
start, 0,
429 }
else if (comm_rank == group_comm.
start) {
432 buffer, count, datatype, root, 0, group_comm.
comm,
433 MPI_STATUS_IGNORE), group_comm.
comm);
437 root -= group_comm.
start;
443 int temp_rank = (group_comm.
size + rank - root) % group_comm.
size;
446 while (bit <= temp_rank) bit <<= 1;
450 (((temp_rank ^ bit) + root) % group_comm.
size) + group_comm.
start;
453 MPI_Recv(buffer, count, datatype, src_rank, 0, group_comm.
comm,
454 MPI_STATUS_IGNORE), group_comm.
comm);
458 int temp_rank = (group_comm.
size + rank - root) % group_comm.
size;
459 int bit = 1, send_rank;
461 while(bit <= temp_rank) bit <<= 1;
463 while ((send_rank = temp_rank | bit) < group_comm.
size) {
467 send_rank = ((send_rank + root) % group_comm.
size) + group_comm.
start;
471 (
void const*)buffer, count, datatype, send_rank, 0, group_comm.
comm),
539 MPI_Datatype bnd_circle_dt;
540 int array_of_blocklengths[] = {3, 1, 1};
541 const MPI_Aint array_of_displacements[] =
542 {(MPI_Aint)(intptr_t)(
const void *)&(dummy.
base_vector[0]) -
543 (MPI_Aint)(intptr_t)(
const void *)&dummy,
544 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
inc_angle.
sin) -
545 (MPI_Aint)(intptr_t)(
const void *)&dummy,
546 (MPI_Aint)(intptr_t)(
const void *)&(dummy.
inc_angle.
cos) -
547 (MPI_Aint)(intptr_t)(
const void *)&dummy};
548 const MPI_Datatype array_of_types[] =
549 {MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE};
551 MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacements,
552 array_of_types, &bnd_circle_dt), comm);
557 MPI_Datatype dt,
size_t new_size, MPI_Comm comm) {
559 MPI_Datatype resized_dt;
561#define OPENMPI_WORKAROUND
562#ifdef OPENMPI_WORKAROUND
564 MPI_Type_get_extent(dt, &lb, &extent);
566 MPI_Type_create_resized(dt, lb, (MPI_Aint)new_size, &resized_dt), comm);
569 MPI_Type_create_resized(dt, 0, (MPI_Aint)new_size, &resized_dt), comm);
571#undef OPENMPI_WORKAROUND
void yac_alltoallv_p2p_group(void const *send_buffer, int const *sendcounts, int const *sdispls, void *recv_buffer, int const *recvcounts, int const *rdispls, size_t dt_size, MPI_Datatype dt, struct yac_group_comm group_comm)
void yac_alltoallv_p2p(void const *send_buffer, size_t const *sendcounts, size_t const *sdispls, void *recv_buffer, size_t const *recvcounts, size_t const *rdispls, size_t dt_size, MPI_Datatype dt, MPI_Comm comm, char const *caller, int line)