5#ifndef INTERPOLATION_UTILS_H
6#define INTERPOLATION_UTILS_H
14 double const * restrict ** src_fields,
15 double const * restrict ** src_frac_masks,
16 double const * restrict * remote_src_fields,
17 double const * restrict * remote_src_frac_masks,
18 double * restrict * tgt_field,
19 size_t const * restrict tgt_pos,
20 size_t tgt_count,
size_t const * restrict prefix_num_src_per_tgt,
21 double const * restrict weights,
22 size_t const * restrict src_field_idx,
23 size_t const * restrict src_idx,
24 size_t num_src_fields,
size_t collection_size,
25 double frac_mask_fallback_value,
26 double scale_factor,
double scale_summand) {
29 if (tgt_count == 0)
return;
31#define FRAC_MASK_TOL (1e-12)
33#define COMPUTE_TGT_FIELD_WGT_FRAC_(TGT_POS, WEIGHT, SCALE) \
35 for (size_t l = 0; l < collection_size; ++l) { \
36 double const * restrict * curr_local_field_data = \
37 src_fields?src_fields[l]:NULL; \
38 double const * restrict * curr_local_frac_mask_data = \
39 src_frac_masks?src_frac_masks[l]:NULL; \
40 double const * restrict curr_remote_field_data = \
41 remote_src_fields[l * num_src_fields]; \
42 double const * restrict curr_remote_frac_mask_data = \
43 remote_src_frac_masks[l * num_src_fields]; \
44 double * restrict curr_tgt_field = tgt_field[l]; \
48 for (size_t i = 0; i < tgt_count; ++i) { \
49 double result = 0.0; \
50 double frac_weight_sum = 0.0; \
51 size_t const k_bound = prefix_num_src_per_tgt[i+1]; \
52 for (size_t k = prefix_num_src_per_tgt[i]; k < k_bound; ++k) { \
53 double const * restrict frac_mask_data; \
54 double const * restrict src_field_data; \
55 if (src_field_idx[k] == SIZE_MAX) { \
56 frac_mask_data = curr_remote_frac_mask_data; \
57 src_field_data = curr_remote_field_data; \
60 curr_local_frac_mask_data[src_field_idx[k]]; \
61 src_field_data = curr_local_field_data[src_field_idx[k]]; \
63 result += src_field_data[src_idx[k]] * (WEIGHT); \
64 frac_weight_sum += frac_mask_data[src_idx[k]] * (WEIGHT); \
66 curr_tgt_field[(TGT_POS)] = \
67 (fabs(frac_weight_sum) > FRAC_MASK_TOL)? \
68 (SCALE(result / frac_weight_sum)): \
69 frac_mask_fallback_value; \
75#define COMPUTE_TGT_FIELD_WGT_NOFRAC_(TGT_POS, WEIGHT, SCALE) \
77 for (size_t l = 0; l < collection_size; ++l) { \
78 double const * restrict * curr_local_field_data = \
79 src_fields?src_fields[l]:NULL; \
80 double const * restrict curr_remote_field_data = \
81 remote_src_fields[l * num_src_fields]; \
82 double * restrict curr_tgt_field = tgt_field[l]; \
86 for (size_t i = 0; i < tgt_count; ++i) { \
87 double result = 0.0; \
88 size_t const k_bound = prefix_num_src_per_tgt[i+1]; \
89 for (size_t k = prefix_num_src_per_tgt[i]; k < k_bound; ++k) { \
90 double const * restrict src_field_data; \
91 if (src_field_idx[k] == SIZE_MAX) { \
92 src_field_data = curr_remote_field_data; \
94 src_field_data = curr_local_field_data[src_field_idx[k]]; \
96 result += src_field_data[src_idx[k]] * (WEIGHT); \
98 curr_tgt_field[(TGT_POS)] = SCALE(result); \
104#define COMPUTE_TGT_FIELD_WGT(COMPUTE, SCALE) \
106 if (weights != NULL) { \
107 if (tgt_pos != NULL) COMPUTE(tgt_pos[i], weights[k], SCALE) \
108 else COMPUTE(i, weights[k], SCALE) \
110 if (tgt_pos != NULL) COMPUTE(tgt_pos[i], 1.0, SCALE) \
111 else COMPUTE(i, 1.0, SCALE) \
115#define COMPUTE_TGT_FIELD_WGT_FRAC(SCALE) \
116 COMPUTE_TGT_FIELD_WGT(COMPUTE_TGT_FIELD_WGT_FRAC_, SCALE)
118#define COMPUTE_TGT_FIELD_WGT_NOFRAC(SCALE) \
119 COMPUTE_TGT_FIELD_WGT(COMPUTE_TGT_FIELD_WGT_NOFRAC_, SCALE)
121#define NO_SCALING(RESULT) (RESULT)
122#define MULT(RESULT) ((RESULT) * scale_factor)
123#define ADD(RESULT) ((RESULT) + scale_summand)
124#define MULT_ADD(RESULT) ((RESULT) * scale_factor + scale_summand)
126#define COMPUTE_FIELD(COMPUTE_FIELD_FRAC, COMPUTE_FIELD_NOFRAC) \
128 if (YAC_FRAC_MASK_VALUE_IS_VALID(frac_mask_fallback_value)) { \
129 if (scale_factor == 1.0) { \
130 if (scale_summand == 0.0) COMPUTE_FIELD_FRAC(NO_SCALING) \
131 else COMPUTE_FIELD_FRAC(ADD) \
133 if (scale_summand == 0.0) COMPUTE_FIELD_FRAC(MULT) \
134 else COMPUTE_FIELD_FRAC(MULT_ADD) \
137 if (scale_factor == 1.0) { \
138 if (scale_summand == 0.0) COMPUTE_FIELD_NOFRAC(NO_SCALING) \
139 else COMPUTE_FIELD_NOFRAC(ADD) \
141 if (scale_summand == 0.0) COMPUTE_FIELD_NOFRAC(MULT) \
142 else COMPUTE_FIELD_NOFRAC(MULT_ADD) \
149#undef COMPUTE_TGT_FIELD_WGT
150#undef COMPUTE_TGT_FIELD_WGT_FRAC
151#undef COMPUTE_TGT_FIELD_WGT_NOFRAC
152#undef COMPUTE_TGT_FIELD_WGT_FRAC_
153#undef COMPUTE_TGT_FIELD_WGT_NOFRAC_
157 double const * restrict ** src_fields,
158 double const * restrict ** src_frac_masks,
159 double * restrict * tgt_field,
160 size_t * restrict tgt_buffer_sizes,
161 size_t num_src_fields,
size_t collection_size,
162 double frac_mask_fallback_value,
163 double scale_factor,
double scale_summand) {
165#define COMPUTE_TGT_FIELD_FRAC(SCALE) \
167 for (size_t i = 0; i < collection_size; ++i) { \
168 for (size_t j = 0; j < num_src_fields; ++j) { \
169 memcpy(tgt_field[i * num_src_fields + j], \
170 src_fields[i][j], tgt_buffer_sizes[j]); \
171 size_t const tgt_count = \
172 tgt_buffer_sizes[j] / sizeof(***src_fields); \
176 for (size_t k = 0; k < tgt_count; ++k) { \
177 if (fabs(src_frac_masks[i][j][k]) > FRAC_MASK_TOL) \
178 tgt_field[i * num_src_fields + j][k] = \
180 tgt_field[i * num_src_fields + j][k] / \
181 src_frac_masks[i][j][k]); \
183 tgt_field[i * num_src_fields + j][k] = \
184 frac_mask_fallback_value; \
191#define COMPUTE_TGT_FIELD_NOFRAC(SCALE) \
193 for (size_t i = 0; i < collection_size; ++i) { \
194 for (size_t j = 0; j < num_src_fields; ++j) { \
195 memcpy(tgt_field[i * num_src_fields + j], \
196 src_fields[i][j], tgt_buffer_sizes[j]); \
197 size_t const tgt_count = \
198 tgt_buffer_sizes[j] / sizeof(***src_fields); \
202 for (size_t k = 0; k < tgt_count; ++k) \
203 tgt_field[i * num_src_fields + j][k] = \
204 SCALE(tgt_field[i * num_src_fields + j][k]); \
212#undef COMPUTE_TGT_FIELD_NOFRAC
213#undef COMPUTE_TGT_FIELD_FRAC
223#define CHECK_WITH_FRAC_MASK(ROUTINE) \
226 YAC_FRAC_MASK_VALUE_IS_VALID(frac_mask_fallback_value), \
228 "with_frac_mask does not match value provided to constructor\n" \
229 "(frac_mask_fallback_value = %lf with_frac_mask = %d " \
230 "with_frac_mask(constructor) %d)", (ROUTINE), \
231 frac_mask_fallback_value, \
232 frac_mask_fallback_value != YAC_FRAC_MASK_NO_VALUE, with_frac_mask)
246 Xt_redist * redists,
size_t num_fields,
size_t collection_size,
250 Xt_redist * redists,
size_t * min_buffer_sizes,
size_t num_fields,
255 size_t collection_size);
#define COMPUTE_TGT_FIELD_WGT_FRAC(SCALE)
struct yac_interpolation_buffer yac_interpolation_buffer_copy(struct yac_interpolation_buffer buffer, size_t num_fields, size_t collection_size)
#define COMPUTE_FIELD(COMPUTE_FIELD_FRAC, COMPUTE_FIELD_NOFRAC)
static void compute_tgt_field(double const *restrict **src_fields, double const *restrict **src_frac_masks, double *restrict *tgt_field, size_t *restrict tgt_buffer_sizes, size_t num_src_fields, size_t collection_size, double frac_mask_fallback_value, double scale_factor, double scale_summand)
#define COMPUTE_TGT_FIELD_FRAC(SCALE)
struct yac_interpolation_buffer yac_interpolation_buffer_init_2(Xt_redist *redists, size_t *min_buffer_sizes, size_t num_fields, size_t collection_size, enum yac_interpolation_buffer_type type)
static void compute_tgt_field_wgt(double const *restrict **src_fields, double const *restrict **src_frac_masks, double const *restrict *remote_src_fields, double const *restrict *remote_src_frac_masks, double *restrict *tgt_field, size_t const *restrict tgt_pos, size_t tgt_count, size_t const *restrict prefix_num_src_per_tgt, double const *restrict weights, size_t const *restrict src_field_idx, size_t const *restrict src_idx, size_t num_src_fields, size_t collection_size, double frac_mask_fallback_value, double scale_factor, double scale_summand)
void yac_interpolation_buffer_free(struct yac_interpolation_buffer *buffer)
#define COMPUTE_TGT_FIELD_NOFRAC(SCALE)
#define COMPUTE_TGT_FIELD_WGT_NOFRAC(SCALE)
yac_interpolation_buffer_type
struct yac_interpolation_buffer yac_interpolation_buffer_init(Xt_redist *redists, size_t num_fields, size_t collection_size, enum yac_interpolation_buffer_type type)
add versions of standard API functions not returning on error