Yet Another eXchange Tool 0.11.2
Loading...
Searching...
No Matches
xt_arithmetic_util.h
Go to the documentation of this file.
1
12/*
13 * Keywords:
14 * Maintainer: Jörg Behrens <behrens@dkrz.de>
15 * Moritz Hanke <hanke@dkrz.de>
16 * Thomas Jahns <jahns@dkrz.de>
17 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met:
22 *
23 * Redistributions of source code must retain the above copyright notice,
24 * this list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
29 *
30 * Neither the name of the DKRZ GmbH nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 */
46#ifndef XT_ARITHMETIC_UTIL_H
47#define XT_ARITHMETIC_UTIL_H
48
49#ifdef HAVE_CONFIG_H
50#include "config.h"
51#endif
52
53#include <assert.h>
54#include <limits.h>
55#include <stddef.h>
56
57#if HAVE_DECL___LZCNT || HAVE_DECL___LZCNT64 || HAVE_DECL___LZCNT16
58# include <intrin.h>
59#endif
60
61#include "xt/xt_core.h"
62
63enum {
64 xt_int_bits = sizeof (Xt_int) * CHAR_BIT,
65};
66
67/* simple operations on Xt_int */
71static inline Xt_int
73{
74#if (-1 >> 1) == -1
75 return ((x >> (sizeof (Xt_int) * CHAR_BIT - 1)) |
76 (Xt_int)((Xt_uint)(~x) >> (sizeof (Xt_int) * CHAR_BIT - 1)));
77#else
78 return (Xt_int)((x >= 0) - (x < 0));
79#endif
80}
81
85static inline int
86isign(int x)
87{
88#if (-1 >> 1) == -1
89 return ((x >> (sizeof (x) * CHAR_BIT - 1)) |
90 (int)((unsigned int)(~x) >> (sizeof (x) * CHAR_BIT - 1)));
91#else
92 return (x >= 0) - (x < 0);
93#endif
94}
95
99static inline int
101{
102#if (-1 >> 1) == -1
103 return x >> (sizeof (int) * CHAR_BIT - 1);
104#else
105#warning Unusual behaviour of shift operator detected.
106 return (x < 0) * ~0;
107#endif
108}
109
113static inline MPI_Aint
114asign_mask(MPI_Aint x)
115{
116#if (-1 >> 1) == -1
117 return x >> (sizeof (MPI_Aint) * CHAR_BIT - 1);
118#else
119#warning Unusual behaviour of shift operator detected.
120 return (x < 0) * ~(MPI_Aint)0;
121#endif
122}
123
124
128static inline Xt_int
130{
131#if (-1 >> 1) == -1
132 return x >> (sizeof (x) * CHAR_BIT - 1);
133#else
134#warning Unusual behaviour of shift operator detected.
135 return (x < 0) * ~(Xt_int)0;
136#endif
137}
138
142static inline long long
143llsign(long long x)
144{
145#if (-1 >> 1) == -1
146 return ((x >> (sizeof (x) * CHAR_BIT - 1)) |
147 (long long)((unsigned long long)(~x) >> (sizeof (x) * CHAR_BIT - 1)));
148#else
149 return (x >= 0) - (x < 0);
150#endif
151}
152
156static inline long long
157llsign_mask(long long x)
158{
159#if (-1 >> 1) == -1
160 return x >> (sizeof (x) * CHAR_BIT - 1);
161#else
162#warning Unusual behaviour of shift operator detected.
163 return (x < 0) * ~0LL;
164#endif
165}
166
170static inline int
171imin(int a, int b)
172{
173 return a <= b ? a : b;
174}
175
176/* return number of leading zeroes in an Xt_uint */
177#ifdef XT_INT_CLZ
178#define xinlz(v) XT_INT_CLZ(v)
179#else
180static inline int
182{
183 int c = 0;
184#if SIZEOF_XT_INT * CHAR_BIT == 64
185 if (v <= UINT64_C(0x00000000ffffffff)) {c += 32; v <<= 32;}
186 if (v <= UINT64_C(0x0000ffffffffffff)) {c += 16; v <<= 16;}
187 if (v <= UINT64_C(0x00ffffffffffffff)) {c += 8; v <<= 8;}
188 if (v <= UINT64_C(0x0fffffffffffffff)) {c += 4; v <<= 4;}
189 if (v <= UINT64_C(0x3fffffffffffffff)) {c += 2; v <<= 2;}
190 if (v <= UINT64_C(0x7fffffffffffffff)) {c += 1;}
191#elif SIZEOF_XT_INT * CHAR_BIT == 32
192 if (v <= 0x0000ffffUL) {c += 16; v <<= 16;}
193 if (v <= 0x00ffffffUL) {c += 8; v <<= 8;}
194 if (v <= 0x0fffffffUL) {c += 4; v <<= 4;}
195 if (v <= 0x3fffffffUL) {c += 2; v <<= 2;}
196 if (v <= 0x7fffffffUL) {c += 1;}
197#elif SIZEOF_XT_INT * CHAR_BIT == 16
198 if (v <= 0x00ffU) {c += 8; v <<= 8;}
199 if (v <= 0x0fffU) {c += 4; v <<= 4;}
200 if (v <= 0x3fffU) {c += 2; v <<= 2;}
201 if (v <= 0x7fffU) {c += 1;}
202#else
203#error "Unexpected size of Xt_int.\n"
204#endif
205 return c;
206}
207#endif
208
209/* return number of trailing zeroes in an Xt_uint */
210#ifdef XT_INT_CTZ
211#define xintz(v) XT_INT_CTZ(v)
212#else
213static inline int
215{
216 Xt_uint lc = (Xt_uint)((v ^ (v - 1)) >> 1);
217 return lc ? xt_int_bits - xinlz(lc) : 0;
218}
219#endif
220
221#ifdef HAVE_ASM_BSR
222static inline size_t
223next_2_pow(size_t v)
224{
225 enum {
226 size_t_bits = sizeof (size_t) * CHAR_BIT,
227 };
228 size_t r;
229 if (v <= 1) {
230 r = 1;
231 } else {
232 size_t ms1bpos;
233#if SIZEOF_LONG == 8
234 __asm__ ("bsrq %1, %0" : "=r" (ms1bpos) : "r" (v-1));
235#elif SIZEOF_LONG == 4
236 __asm__ ("bsrl %1, %0" : "=r" (ms1bpos) : "r" (v-1));
237#else
238#error "Unexpected size of size_t!"
239#endif
240 r = (size_t)1 << (ms1bpos+1);
241 }
242 return r;
243}
244#elif HAVE_DECL___BUILTIN_CLZL
245#define clzl(v) (__builtin_clzl(v))
246#elif HAVE_DECL___LZCNT && SIZEOF_LONG == SIZEOF_INT
247#define clzl(v) ((int)(__lzcnt(v)))
248#elif HAVE_DECL___LZCNT64 && SIZEOF_LONG == 8 && CHAR_BIT == 8
249#define clzl(v) ((int)(__lzcnt64(v)))
250#else
251static inline size_t
252next_2_pow(size_t v)
253{
254 v--;
255 v |= v >> 1;
256 v |= v >> 2;
257 v |= v >> 4;
258 v |= v >> 8;
259 v |= v >> 16;
260#if SIZEOF_SIZE_T * CHAR_BIT == 64
261 v |= v >> 32;
262#endif
263 return v+1;
264}
265#endif
266#ifdef clzl
267static inline size_t
268next_2_pow(size_t v)
269{
270 enum {
271 size_t_bits = sizeof (size_t) * CHAR_BIT,
272 };
273 size_t r;
274 if (v == 0) {
275 r = 1;
276 } else {
277 r = clzl(v-1);
278 r = (size_t)1 << (size_t_bits - r);
279 }
280 return r;
281}
282
283#endif
284
285static inline Xt_int
287{
288 return (Xt_int)((a - b) & -(a >= b));
289}
290
291
292// For signed integers, a similar method follows.
293//
294// Given c > 1 and odd, compute m such that (c * m) mod 2^n == 1
295// Then if c divides x (x%c ==0), the quotient is given by q = x/c == x*m mod 2^n
296//
297// x can range from ⎡-2^(n-1)/c⎤ * c, ... -c, 0, c, ... ⎣(2^(n-1) - 1)/c⎦ * c
298// Thus, x*m mod 2^n is ⎡-2^(n-1)/c⎤, ... -2, -1, 0, 1, 2, ... ⎣(2^(n-1) - 1)/c⎦
299//
300// So, x is a multiple of c if and only if:
301// ⎡-2^(n-1)/c⎤ <= x*m mod 2^n <= ⎣(2^(n-1) - 1)/c⎦
302//
303// Since c > 1 and odd, this can be simplified by
304// ⎡-2^(n-1)/c⎤ == ⎡(-2^(n-1) + 1)/c⎤ == -⎣(2^(n-1) - 1)/c⎦
305//
306// -⎣(2^(n-1) - 1)/c⎦ <= x*m mod 2^n <= ⎣(2^(n-1) - 1)/c⎦
307//
308// To extend this to even integers, consider c = d0 * 2^k where d0 is odd.
309// We can test whether x is divisible by both d0 and 2^k.
310//
311// Let m be such that (d0 * m) mod 2^n == 1.
312// Let q = x*m mod 2^n. Then c divides x if:
313//
314// -⎣(2^(n-1) - 1)/d0⎦ <= q <= ⎣(2^(n-1) - 1)/d0⎦ and q ends in at least k 0-bits
315//
316// To transform this to a single comparison, we use the following theorem (ZRS in Hacker's Delight).
317//
318// For a >= 0 the following conditions are equivalent:
319// 1) -a <= x <= a and x ends in at least k 0-bits
320// 2) RotRight(x+a', k) <= ⎣2a'/2^k⎦
321//
322// Where a' = a & -2^k (a with its right k bits set to zero)
323//
324// To see that 1 & 2 are equivalent, note that -a <= x <= a is equivalent to
325// -a' <= x <= a' if and only if x ends in at least k 0-bits. Adding -a' to each side gives,
326// 0 <= x + a' <= 2a' and x + a' ends in at least k 0-bits if and only if x does since a' has
327// k 0-bits by definition. We can use theorem ZRU above with x -> x + a' and a -> 2a' giving 1) == 2).
328//
329// Let m be such that (d0 * m) mod 2^n == 1.
330// Let q = x*m mod 2^n.
331// Let a' = ⎣(2^(n-1) - 1)/d0⎦ & -2^k
332//
333// Then the divisibility test is:
334//
335// RotRight(q+a', k) <= ⎣2a'/2^k⎦
336//
337// Note that the calculation is performed using unsigned integers.
338// Since a' can have n-1 bits, 2a' may have n bits and there is no
339// risk of overflow.
340
341#if defined XT_USE_FAST_DIVISIBLE_TEST \
342 || (defined __ICC && defined __OPTIMIZE__) \
343 || (defined __GNUC__ && defined __OPTIMIZE__) \
344 || (defined __PGI)
345#undef XT_USE_FAST_DIVISIBLE_TEST
346#define XT_USE_FAST_DIVISIBLE_TEST
347
348struct xt_fast_div_coeff {
349 Xt_uint bs; // trailingZeros(c)
350 Xt_uint minv; // minv * (c>>bs) mod 2^n == 1 multiplicative inverse of
351 // odd portion modulo 2^n
352 Xt_uint a; // ⎣(2^(n-1) - 1)/ (c>>bs)⎦ & -(1<<bs) additive constant
353 Xt_uint max; // ⎣(2 a) / (1<<bs)⎦ max value to for divisibility
354};
355
356static inline struct xt_fast_div_coeff
357get_fast_div_coeff(Xt_uint d)
358{
359 assert(d > 0);
360 // the initial values work for d == 1
361 struct xt_fast_div_coeff coeff = { .bs = 0, .minv = 0, .a = 0, .max = 0 };
362 if (d > 1) {
363 int bs = xintz(d);
364 Xt_uint d0 = (Xt_uint)(d >> bs); // the odd portion of the divisor
365
366 Xt_uint mask = (Xt_uint)(~(Xt_uint)0);
367
368 // Calculate the multiplicative inverse via Newton's method.
369 // Quadratic convergence doubles the number of correct bits per iteration.
370 Xt_uint m = d0; // initial guess correct to 3-bits d0*d0 mod 8 == 1
371 m = (Xt_uint)(m * (2 - m*d0)); // 6-bits
372 m = (Xt_uint)(m * (2 - m*d0)); // 12-bits
373 m = (Xt_uint)(m * (2 - m*d0)); // 24-bits
374#if SIZEOF_XT_INT * CHAR_BIT > 24
375 m = (Xt_uint)(m * (2 - m*d0)); // 48-bits
376#if SIZEOF_XT_INT * CHAR_BIT > 48
377 m = (Xt_uint)(m * (2 - m*d0)); // 96-bits >= 64-bits
378#endif
379#endif
380 Xt_uint a = (Xt_uint)(((mask >> 1) / d0) & (Xt_uint)(-(1 << bs)));
381 Xt_uint max = (Xt_uint)((2 * a) >> bs);
382
383 coeff.bs = (Xt_uint)bs;
384 coeff.minv = m;
385 coeff.a = a;
386 coeff.max = max;
387 }
388 return coeff;
389}
390
391static inline int
392fast_divisible(struct xt_fast_div_coeff coeff, Xt_int i)
393{
394 Xt_uint k = coeff.bs;
395 Xt_uint mul = (Xt_uint)((Xt_uint)i * coeff.minv + coeff.a);
396 Xt_uint rot = (Xt_uint)((mul >> k)
397 | (mul<<((xt_int_bits-k)&(xt_int_bits-1))));
398 return rot <= coeff.max;
399}
400
401#define is_divisible(divisor, coeff, dividend) fast_divisible(coeff, dividend)
402#else
403#define is_divisible(divisor, coeff, dividend) (divisor==0 || (dividend)%(divisor)==0)
404#endif
405
406#endif
407
408/*
409 * Local Variables:
410 * c-basic-offset: 2
411 * coding: utf-8
412 * indent-tabs-mode: nil
413 * show-trailing-whitespace: t
414 * require-trailing-newline: t
415 * End:
416 */
@ xt_int_bits
static int isign(int x)
static int isign_mask(int x)
static Xt_int Xt_isign_mask(Xt_int x)
static MPI_Aint asign_mask(MPI_Aint x)
static size_t next_2_pow(size_t v)
static int xinlz(Xt_uint v)
static int imin(int a, int b)
static int xintz(Xt_uint v)
static Xt_int Xt_isign(Xt_int x)
static long long llsign(long long x)
static long long llsign_mask(long long x)
static Xt_int Xt_doz(Xt_int a, Xt_int b)
base definitions header file
XT_INT Xt_int
Definition xt_core.h:72
unsigned XT_INT Xt_uint
Definition xt_core.h:74