Yet Another eXchange Tool 0.11.2
Loading...
Searching...
No Matches
xt_arithmetic_long.h
Go to the documentation of this file.
1
12/*
13 * Keywords:
14 * Maintainer: Jörg Behrens <behrens@dkrz.de>
15 * Moritz Hanke <hanke@dkrz.de>
16 * Thomas Jahns <jahns@dkrz.de>
17 * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met:
22 *
23 * Redistributions of source code must retain the above copyright notice,
24 * this list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
29 *
30 * Neither the name of the DKRZ GmbH nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 */
46#ifndef XT_ARITHMETIC_LONG_H
47#define XT_ARITHMETIC_LONG_H
48
49#ifdef HAVE_CONFIG_H
50#include "config.h"
51#endif
52
53#include <stdbool.h>
54
55#include "xt/xt_core.h"
56#include "xt_arithmetic_util.h"
57
58/* since the computation of the next overlap of two stripes might
59 * overflow Xt_int (and indeed need 3*xt_int_bits to represent), what
60 * follows is an implementation of double- and triple-length integer
61 * arithmetic, depending on whether a type twice the size of Xt_int
62 * is available.
63 *
64 * The implementation follows recipes from Henry S. Warren, Jr.;
65 * Hacker's Delight, 2-16 and 8-2 (first edition)
66 */
67#ifdef XT_LONG
68typedef XT_LONG Xt_long;
69typedef XT_ULONG Xt_ulong;
73static inline int
75{
76 return (x >= 0) - (x < 0);
77}
78
79static inline Xt_long
81{
82 return x < 0 ? -x : x;
83}
84
85static inline Xt_uint
87{
88 return (Xt_uint)(x >> xt_int_bits);
89}
90
91static inline Xt_uint
93{
94 return (Xt_uint)x;
95}
96
97
98/* can a be represented as Xt_int? */
99static inline bool
101{
102 return (a <= XT_INT_MAX) & (a >= XT_INT_MIN);
103}
104
105static inline Xt_long
106xiimul(Xt_int a, Xt_int b)
107{
108 return (Xt_long)a * b;
109}
110
111typedef struct { Xt_uint hi; Xt_ulong midlo; } Xt_tword;
112
113static inline Xt_uint
114xthi(Xt_tword x)
115{
116 return x.hi;
117}
118
119static inline Xt_uint
121{
122 return (Xt_uint)(x.midlo >> xt_int_bits);
123}
124
125static inline Xt_uint
126xtlo(Xt_tword x)
127{
128 return (Xt_uint)x.midlo;
129}
130
131static inline Xt_tword
132xlimulu(Xt_long a, Xt_uint b)
133{
134 Xt_tword r = { 0U, 0U };
135
136 Xt_ulong t = (Xt_ulong)(Xt_uint)a*b;
137 Xt_uint lo = (Xt_uint)t;
138 t = (Xt_ulong)((Xt_ulong)a >> xt_int_bits)*b + (t >> xt_int_bits);
139 Xt_uint mid = (Xt_uint)t;
140 r.hi = (Xt_uint)(t >> xt_int_bits);
141
142 // Now r has the unsigned product. Correct by
143 // subtracting b*2**(xt_int_bits*2) if a < 0
144
145 if (a < 0) {
146 r.hi = (Xt_uint)(r.hi - (Xt_uint)b);
147 }
148 r.midlo = ((Xt_ulong)mid << xt_int_bits) | lo;
149 return r;
150}
151
152static inline Xt_tword
154{
155 Xt_tword r = { 0U, 0U };
156
157 Xt_ulong t = (Xt_ulong)(Xt_uint)a*(Xt_uint)b;
158 Xt_uint lo = (Xt_uint)t;
159 t = (Xt_ulong)((Xt_ulong)a >> xt_int_bits)*(Xt_uint)b + (t >> xt_int_bits);
160 Xt_uint mid = (Xt_uint)t;
161 r.hi = (Xt_uint)(t >> xt_int_bits);
162
163 // Now r has the unsigned product. Correct by
164 // subtracting b*2**(xt_int_bits*2) if a < 0, and
165 // subtracting a*2**xt_int_bits if b < 0.
166
167 if (a < 0) {
168 r.hi = (Xt_uint)(r.hi - (Xt_uint)b);
169 }
170 if (b < 0) {
171 t = (Xt_ulong)mid - (Xt_uint)a;
172 mid = (Xt_uint)t;
173 Xt_uint borrow = (Xt_uint)t >> (xt_int_bits-1);
174 r.hi = (Xt_uint)(r.hi - (Xt_uint)((Xt_ulong)a >> xt_int_bits) - borrow);
175 }
176 r.midlo = ((Xt_ulong)mid << xt_int_bits) | lo;
177 return r;
178}
179
180static inline int
182{
183 return (a.hi == b.hi) & (a.midlo == b.midlo);
184}
185
186static inline int
188{
189 return a == b;
190}
191
192// Computes the magic number for signed division.
193// Adapted from Henry S. Warren, Hacker's Delight, 2nd ed., 10-1
194struct Xt_muldiv {
195 Xt_int den; // denominator
196 Xt_int M; // Magic number
197 int s; // and shift amount.
198};
199
200static inline struct Xt_muldiv
201xt_get_mulinv(Xt_int d)
202{
203 // Must have 2 <= d <= 2**(xt_int_bits-1)-1
204 // or -2**(xt_int_bits-1) <= d <= -2.
205 Xt_uint delta;
206 const Xt_uint msbs = (Xt_uint)1 << (xt_int_bits-1); // most-significant
207 // bit set
208 // => 2**(xt_int_bits-1)
209
210 Xt_uint ad = (Xt_uint)(XT_INT_ABS(d));
211 Xt_uint t = (Xt_uint)(msbs + ((Xt_uint)d >> (xt_int_bits - 1)));
212 Xt_uint anc = (Xt_uint)(t - 1 - t%ad); // Absolute value of nc.
213 Xt_int p = xt_int_bits - 1; // Init. p.
214 Xt_uint q1 = msbs/anc; // Init. q1 = 2**p/|nc|.
215 Xt_uint r1 = (Xt_uint)(msbs - q1*anc); // Init. r1 = rem(2**p, |nc|).
216 Xt_uint q2 = msbs/ad; // Init. q2 = 2**p/|d|.
217 Xt_uint r2 = (Xt_uint)(msbs - q2*ad); // Init. r2 = rem(2**p, |d|).
218 do {
219 ++p;
220 q1 = (Xt_uint)(2*q1); // Update q1 = 2**p/|nc|.
221 r1 = (Xt_uint)(2*r1); // Update r1 = rem(2**p, |nc|).
222 if (r1 >= anc) { // (Must be an unsigned comparison here).
223 ++q1;
224 r1 = (Xt_uint)(r1 - anc);
225 }
226 q2 = (Xt_uint)(2*q2); // Update q2 = 2**p/|d|.
227 r2 = (Xt_uint)(2*r2); // Update r2 = rem(2**p, |d|).
228 if (r2 >= ad) { // (Must be an unsigned comparison here).
229 ++q2;
230 r2 = (Xt_uint)(r2 - ad);
231 }
232 delta = (Xt_uint)(ad - r2);
233 } while (q1 < delta || (q1 == delta && r1 == 0));
234
235 struct Xt_muldiv mag
236 = { .M = (Xt_int)(d < 1 ? -(Xt_int)(q2 + 1) : (Xt_int)(q2 + 1)),
237 .den = d,
238 .s = (int)(p - xt_int_bits) };
239 return mag;
240}
241
242static inline XT_INT_DIV_T
243fast_division(Xt_int num, struct Xt_muldiv id)
244{
245 Xt_int M = id.M; // Magic num, see xt_get_mulinv
246 Xt_int snum
247 = (Xt_int)(id.den == 1 || (id.den > 0 && M < 0)
248 ? num : id.den < 0 && M > 0 ? -num : 0); // sign-adjusted numerator
249 Xt_int q
250 = (Xt_int)((Xt_long)M * num >> xt_int_bits); //q = floor(M*n/2**32).
251 q = (Xt_int)(q + snum); //q = floor(M*n/2**32) +/- n.
252 q = (Xt_int)(q >> id.s); //q = floor(q/4).
253 q = (Xt_int)(q + (q < 0)); //q is negative (n is positive).
254 Xt_int t = (Xt_int)(q * id.den); // Compute remainder from
255 Xt_int r = (Xt_int)(num - t); // r = n - q*(-7).
256 return (XT_INT_DIV_T){ .quot = q, .rem = r };
257}
258
259#define Xt_div(num, muldiv, den) fast_division(num, muldiv)
260
261#else
262/* stores in one-complement form */
263typedef struct { Xt_uint hi, lo; } Xt_long;
265
269static inline int
271{
272 int sign_bit = (int)(x.hi >> (xt_int_bits - 1));
273 return (sign_bit^1) - sign_bit;
274}
275
276static inline Xt_long
278{
279 Xt_long r = { .lo = (Xt_uint)a,
280 .hi = (Xt_uint)(Xt_isign_mask(a)) };
281 return r;
282}
283
284static inline Xt_long
286{
287 Xt_uint sign_mask = (Xt_uint)(Xt_isign_mask((Xt_int)a.hi));
288 Xt_uint borrow = sign_mask & (Xt_uint)(-(Xt_int)a.lo != 0);
289 Xt_long r = { .hi = ((a.hi + sign_mask) ^ sign_mask) - borrow,
290 .lo = (a.lo + sign_mask) ^ sign_mask };
291 return r;
292}
293
294static inline Xt_uint
296{
297 return x.hi;
298}
299
300static inline Xt_uint
302{
303 return x.lo;
304}
305
306static inline Xt_long
307xlnegate(Xt_long a, bool negate)
308{
309 Xt_uint borrow = (Xt_uint)((Xt_uint)negate & (Xt_uint)(-(Xt_int)a.lo != 0));
310 Xt_long r = { .hi = (a.hi ^ (Xt_uint)-(Xt_int)negate) + negate - borrow,
311 .lo = (a.lo ^ (Xt_uint)-(Xt_int)negate) + negate };
312 return r;
313}
314
315/* is a represntable as Xt_int? */
316static inline bool
318{
319 return Xt_isign_mask((Xt_int)a.lo) == (Xt_int)a.hi;
320}
321
322static inline Xt_long
324{
325 Xt_uint al = (Xt_uint)a, bl = (Xt_uint)b,
326 ah = (Xt_uint)(Xt_isign_mask(a)), bh = (Xt_uint)(Xt_isign_mask(b));
327 Xt_uint carry = ((al & bl) | ((al | bl) & ~(al + bl))) >> (xt_int_bits-1);
328 Xt_long r = { .lo = al + bl, .hi = ah + bh + carry };
329 return r;
330}
331
332static inline Xt_long
334{
335 Xt_uint al = a.lo, bl = (Xt_uint)b,
336 ah = a.hi, bh = (Xt_uint)(Xt_isign_mask(b));
337 Xt_uint carry = ((al & bl) | ((al | bl) & ~(al + bl))) >> (xt_int_bits-1);
338 Xt_long r = { .lo = al + bl, .hi = ah + bh + carry };
339 return r;
340}
341
342/* returns a incremented by value of b, i.e. zero or one */
343static inline Xt_long
344xlinc(Xt_long a, bool b)
345{
346 Xt_uint al = a.lo, bl = (Xt_uint)b, ah = a.hi;
347 Xt_uint carry = bl & !~al;
348 Xt_long r = { .lo = al + bl, .hi = ah + carry };
349 return r;
350}
351
352static inline Xt_long
354{
355 Xt_uint al = a.lo, bl = b.lo, ah = a.hi, bh = b.hi;
356 Xt_uint carry = ((al & bl) | ((al | bl) & ~(al + bl))) >> (xt_int_bits-1);
357 Xt_long r = { .lo = al + bl, .hi = ah + bh + carry };
358 return r;
359}
360
361static inline Xt_long
363{
364 Xt_uint al = (Xt_uint)a, bl = (Xt_uint)b,
365 ah = (Xt_uint)(Xt_isign_mask(a)), bh = (Xt_uint)(Xt_isign_mask(b));
366 Xt_uint carry = ((~al & bl) | ((~(al ^ bl)) & (al - bl))) >> (xt_int_bits-1);
367 Xt_long r = { .lo = al - bl, .hi = ah - bh - carry };
368 return r;
369}
370
371static inline Xt_long
373{
374 Xt_uint al = a.lo, bl = b.lo, ah = a.hi, bh = b.hi;
375 Xt_uint carry = ((~al & bl) | ((~(al ^ bl)) & (al - bl))) >> (xt_int_bits-1);
376 Xt_long r = { .lo = al - bl, .hi = ah - bh - carry };
377 return r;
378}
379
380static inline Xt_long
382{
383 Xt_uint al = a.lo, ah = a.hi;
384 Xt_uint carry
385 = ((~al & (Xt_uint)b) | ((~(al ^ (Xt_uint)b)) & (al - (Xt_uint)b)))
386 >> (xt_int_bits-1);
387 Xt_long r = { .lo = al - (Xt_uint)b, .hi = ah - carry };
388 return r;
389}
390
391static inline Xt_long
393{
394 Xt_uint al = (Xt_uint)a, bl = b.lo,
395 ah = (Xt_uint)(Xt_isign_mask(a)), bh = b.hi;
396 Xt_uint carry = ((~al & bl) | ((~(al ^ bl)) & (al - bl))) >> (xt_int_bits-1);
397 Xt_long r = { .lo = al - bl, .hi = ah - bh - carry };
398 return r;
399}
400
401enum {
403};
404
405/* this is implemented following H.S.Warren Hacker's Delight mulhs (8-2) */
406static inline Xt_long
408{
409 const Xt_uint lo_mask = ((Xt_uint)1 << xt_hint_bits) - 1U;
410 Xt_uint a_lo = (Xt_uint)a & lo_mask,
411 b_lo = (Xt_uint)b & lo_mask,
412 a_hi = (Xt_uint)(a >> xt_hint_bits),
413 b_hi = (Xt_uint)(b >> xt_hint_bits),
414 lo_prod = a_lo*b_lo;
415 Xt_int t = (Xt_int)(a_hi*b_lo + (lo_prod >> xt_hint_bits));
416 Xt_int w1 = (Xt_int)((Xt_uint)t & lo_mask),
417 w2 = t >> xt_hint_bits;
418 w1 = (Xt_int)(a_lo*b_hi + (Xt_uint)w1);
419 Xt_long r = { .hi = a_hi * b_hi + (Xt_uint)w2 + (Xt_uint)(w1 >> xt_hint_bits),
420 .lo = (Xt_uint)(a * b) };
421 return r;
422}
423
424typedef struct { Xt_uint hi, mid, lo; } Xt_tword;
425
426static inline Xt_uint
428{
429 return x.hi;
430}
431
432static inline Xt_uint
434{
435 return x.mid;
436}
437
438static inline Xt_uint
440{
441 return x.lo;
442}
443
444static inline Xt_tword
446{
447 const Xt_uint lo_mask = ((Xt_uint)1 << xt_hint_bits) - 1U;
448 Xt_tword r;
449
450 Xt_uint bl = (Xt_uint)b & lo_mask, bh = (Xt_uint)b >> xt_hint_bits,
451 a0 = a.lo & lo_mask, a1 = a.lo >> xt_hint_bits,
452 a2 = a.hi & lo_mask, a3 = a.hi >> xt_hint_bits;
453 Xt_uint t = a0*bl;
454 Xt_uint accum = t & lo_mask;
455 Xt_uint k;
456 t = a1*bl + (t >> xt_hint_bits);
457 r.lo = accum | (t << xt_hint_bits);
458 k = t >> xt_hint_bits;
459 t = a2*bl + k;
460 accum = t & lo_mask;
461 k = t >> xt_hint_bits;
462 t = a3*bl + k;
463 r.mid = accum | (t << xt_hint_bits);
464 r.hi = t >> xt_hint_bits;
465 t = a0*bh + (r.lo >> xt_hint_bits);
466 r.lo = (r.lo & lo_mask) | (t << xt_hint_bits);
467 k = t >> xt_hint_bits;
468 t = a1*bh + (r.mid & lo_mask) + k;
469 accum = t & lo_mask;
470 k = t >> xt_hint_bits;
471 t = a2*bh + (r.mid >> xt_hint_bits) + k;
472 r.mid = accum | (t << xt_hint_bits);
473 k = t >> xt_hint_bits;
474 r.hi += a3*bh + k;
475
476 // Now r has the unsigned product. Correct by
477 // subtracting b*2**(xt_int_bits*2) if a < 0, and
478 // subtracting a*2**xt_int_bits if b < 0.
479
480 if ((Xt_int)a.hi < 0) {
481 r.hi -= (Xt_uint)b;
482 }
483 if (b < 0) {
484 t = r.mid - a.lo;
485 r.mid = (Xt_uint)t;
486 Xt_uint borrow = t >> (xt_int_bits-1);
487 r.hi -= a.hi + borrow;
488 }
489 return r;
490}
491
492typedef struct { Xt_int quot, rem; } Xt_idiv;
493typedef struct { Xt_long quot, rem; } Xt_ldiv;
494
495/* divide long unsigned value a by b, from
496 * H.S.Warren, Hacker's Delight,
497 * this code is only valid if the result fits an Xt_uint
498 */
499/* This version is divlu1 except with outer loop unrolled, and array un
500changed into local variables. Several of the variables below could be
501"short," but having them fullwords gives better code on gcc/Intel.
502Statistics: Based on one million executions of this program, with
503uniformly distributed random values for the dividend and divisor, the
504number of times in each loop per execution of the program is:
505
506again1: 0.4060
507again2: 0.3469
508
509This is the version that's in the hacker book. */
510static inline Xt_idiv
512{
513 const Xt_uint lo_mask = ((Xt_uint)1 << xt_hint_bits) - 1U;
514 const Xt_uint base = (Xt_uint)1 << xt_hint_bits; // Number base
515 Xt_uint un1, un0, // Norm. dividend LSD's.
516 ah = a.hi, al = a.lo,
517 vn1, vn0, // Norm. divisor digits.
518 q1, q0, // Quotient digits.
519 un32, un21, un10, // Dividend digit pairs.
520 rhat; // A remainder.
521
522 if (ah >= b) // overflow, set remainder to impossible value
523 return (Xt_idiv){ .quot = (Xt_int)~(Xt_uint)0, .rem = (Xt_int)~(Xt_uint)0 };
524
525 // Shift amount for norm.
526 int s = xinlz(b); // 0 <= s <= xt_int_bits.
527 enum { int_bits = sizeof (int) * CHAR_BIT };
528 b = b << s; // Normalize divisor.
529 vn1 = b >> xt_hint_bits; // Break divisor up into
530 vn0 = b & lo_mask; // two half-words
531
532 un32 = (ah << s)
533 | ((al >> (xt_int_bits - s)) & (Xt_uint)(-s >> (int_bits-1)));
534 un10 = al << s; // Shift dividend left.
535
536 un1 = un10 >> xt_hint_bits; // Break right half of
537 un0 = un10 & lo_mask; // dividend into two digits.
538
539 q1 = un32/vn1; // Compute the first
540 rhat = un32 - q1*vn1; // quotient digit, q1.
541 while (q1 >= base || q1*vn0 > base*rhat + un1) {
542 q1 -= 1U;
543 rhat += vn1;
544 if (rhat >= base) break;
545 }
546
547 un21 = un32*base + un1 - q1*b; // Multiply and subtract.
548
549 q0 = un21/vn1; // Compute the second
550 rhat = un21 - q0*vn1; // quotient digit, q0.
551 while (q0 >= base || q0*vn0 > base*rhat + un0) {
552 q0 -= 1U;
553 rhat += vn1;
554 if (rhat >= base) break;
555 }
556
557 return (Xt_idiv){ .quot = (Xt_int)(q1*base + q0),
558 .rem = (Xt_int)((un21*base + un0 - q0*b) >> s)};
559}
560
561typedef XT_SHORT Xt_short;
562typedef unsigned XT_SHORT Xt_ushort;
563
564
565/* q[0], r[0], u[0], and v[0] contain the LEAST significant halfwords.
566(The sequence is in little-endian order).
567
568This first version is a fairly precise implementation of Knuth's
569Algorithm D, for a binary computer with base b = 2**16. The caller
570supplies
571 1. Space q for the quotient, m - n + 1 halfwords (at least one).
572 2. Space r for the remainder (optional), n halfwords.
573 3. The dividend u, m halfwords, m >= 1.
574 4. The divisor v, n halfwords, n >= 2.
575The most significant digit of the divisor, v[n-1], must be nonzero. The
576dividend u may have leading zeros; this just makes the algorithm take
577longer and makes the quotient contain more leading zeros. A value of
578NULL may be given for the address of the remainder to signify that the
579caller does not want the remainder.
580 The program does not alter the input parameters u and v.
581 The quotient and remainder returned may have leading zeros. The
582function itself returns a value of 0 for success and 1 for invalid
583parameters (e.g., division by 0).
584 For now, we must have m >= n. Knuth's Algorithm D also requires
585that the dividend be at least as long as the divisor. (In his terms,
586m >= 0 (unstated). Therefore m+n >= n.) */
587static inline void
588xdivmnu(Xt_ushort *restrict q, Xt_ushort *restrict r,
589 const Xt_ushort *restrict u, const Xt_ushort *restrict v,
590 int m, int n) {
591
592 const Xt_uint lo_mask = ((Xt_uint)1 << xt_hint_bits) - 1U;
593 const Xt_uint base = (Xt_uint)1 << xt_hint_bits; // Number base
594 Xt_int s, t;
595
596 if (m < n || n <= 0 || v[n-1] == 0)
597 return; // Return if invalid param.
598
599 if (n == 1) { // Take care of
600 Xt_uint k = 0; // the case of a
601 for (int j = m - 1; j >= 0; j--) { // single-digit
602 q[j] = (Xt_ushort)((k*base + u[j])/v[0]); // divisor here.
603 k = (k*base + (Xt_int)u[j]) - (Xt_int)(q[j]*v[0]);
604 }
605 if (r != NULL) r[0] = (Xt_ushort)k;
606 return;
607 }
608
609 // Normalize by shifting v left just enough so that
610 // its high-order bit is on, and shift u left the
611 // same amount. We may have to append a high-order
612 // digit on the dividend; we do that unconditionally.
613
614 s = xinlz(v[n-1]) - xt_hint_bits; // 0 <= s <= 15.
615 Xt_ushort vn[n]; /* normalized v */
616 for (int i = n - 1; i > 0; i--)
617 vn[i] = (v[i] << s) | (v[i-1] >> (xt_hint_bits-s));
618 vn[0] = v[0] << s;
619
620 Xt_ushort un[m + 1]; /* normalized u */
621 un[m] = u[m-1] >> (xt_hint_bits-s);
622 for (int i = m - 1; i > 0; i--)
623 un[i] = (u[i] << s) | (u[i-1] >> (xt_hint_bits-s));
624 un[0] = u[0] << s;
625
626 for (int j = m - n; j >= 0; j--) { // Main loop.
627 // Compute estimated quotient digit qhat of q[j].
628 Xt_uint qhat = (un[j+n]*base + un[j+n-1])/vn[n-1],
629 // and remainder
630 rhat = (un[j+n]*base + un[j+n-1]) - qhat*vn[n-1];
631 while (qhat >= base || qhat*vn[n-2] > base*rhat + un[j+n-2]) {
632 --qhat;
633 rhat += vn[n-1];
634 if (rhat >= base) break;
635 }
636
637 // Multiply and subtract.
638 Xt_int k = 0;
639 for (int i = 0; i < n; i++) {
640 Xt_uint p = qhat*vn[i]; // Product of two digits.
641 t = un[i+j] - k - (Xt_int)(p & lo_mask);
642 un[i+j] = (Xt_ushort)t;
643 k = (Xt_int)(p >> xt_hint_bits) - (t >> xt_hint_bits);
644 }
645 t = un[j+n] - k;
646 un[j+n] = (Xt_ushort)t;
647
648 q[j] = (Xt_ushort)qhat; // Store quotient digit.
649 if (t < 0) { // If we subtracted too
650 q[j] = q[j] - 1; // much, add back.
651 k = 0;
652 for (int i = 0; i < n; i++) {
653 t = un[i+j] + vn[i] + k;
654 un[i+j] = (Xt_ushort)t;
655 k = t >> xt_hint_bits;
656 }
657 un[j+n] = (Xt_ushort)(un[j+n] + k);
658 }
659 } // End j.
660 // If the caller wants the remainder, unnormalize
661 // it and pass it back.
662 if (r != NULL) {
663 for (int i = 0; i < n; i++)
664 r[i] = (un[i] >> s) | (un[i+1] << (xt_hint_bits-s));
665 }
666}
667
668static inline void
670{
671 a[0] = (Xt_ushort)u.lo;
672 a[1] = (Xt_ushort)(u.lo >> xt_hint_bits);
673 a[2] = (Xt_ushort)u.hi;
674 a[3] = (Xt_ushort)(u.hi >> xt_hint_bits);
675}
676
677static inline Xt_long
679{
680 Xt_long u;
681 u.lo = (Xt_uint)a[0] | (Xt_uint)a[1] << xt_hint_bits;
682 u.hi = (Xt_uint)a[2] | (Xt_uint)a[3] << xt_hint_bits;
683 return u;
684}
685
686static inline Xt_ldiv
688{
689 const Xt_uint lo_mask = ((Xt_uint)1 << xt_hint_bits) - 1U;
690 const Xt_uint base = (Xt_uint)1 << xt_hint_bits; // Number base
691
692 Xt_uint qhat; // Estimated quotient digit.
693 Xt_uint rhat; // A remainder.
694 Xt_ldiv d;
695
696 if (v.hi == 0U) { // Take care of the case of a single-word
697 d.quot.hi = u.hi/v.lo; // divisor here.
698 Xt_uint k = u.hi - d.quot.hi*v.lo;
699 Xt_idiv dl = xlidivu((Xt_ulong){ .hi = k, .lo = u.lo }, v.lo);
700 d.quot.lo = (Xt_uint)dl.quot; // divisor here.
701 d.rem = xllsub((Xt_long){ .hi = k, .lo = u.lo },
702 xiimul((Xt_int)d.quot.lo, (Xt_int)v.lo));
703 } else if (u.hi < v.hi || (u.hi == v.hi && u.lo < v.lo)) {
704 d.quot = (Xt_long){ 0U, 0U };
705 d.rem = u;
706 } else {
707 // Normalize by shifting v left just enough so that
708 // its high-order bit is on, and shift u left the
709 // same amount. We may have to append a high-order
710 // digit on the dividend; we do that unconditionally.
711
712 int s = xinlz(v.hi); // 0 <= s <= xt_int_bits.
713 Xt_ulong // Normalized form of u, v.
714 vn = { .hi = (v.hi << s) | (v.lo >> (xt_int_bits-s)),
715 .lo = v.lo << s };
717 un = { .hi = u.hi >> (xt_int_bits-s),
718 .mid = (u.hi << s) | (u.lo >> (xt_int_bits-s)),
719 .lo = u.lo << s };
720 // the normalized divisor has 4, the dividend 6 half-words
721 if ((u.hi >= base) == (v.hi >= base)) {
722 // Compute estimate qhat of q[j].
723 qhat = un.hi/(vn.hi >> xt_hint_bits);
724 rhat = un.hi - qhat * (vn.hi >> xt_hint_bits);
725 while (qhat >= base || qhat*(vn.hi & lo_mask) > base*rhat + (un.mid >> xt_hint_bits)) {
726 --qhat;
727 rhat += (vn.hi >> xt_hint_bits);
728 if (rhat >= base) break;
729 }
730
731 // Multiply and subtract.
732 Xt_uint p = qhat * (vn.lo & lo_mask);
733 Xt_int t = (Xt_int)((un.lo & lo_mask) - (p & lo_mask));
734 Xt_uint accum = (Xt_uint)t & lo_mask;
735 Xt_int k = (Xt_int)(p >> xt_hint_bits) - (Xt_int)(t >> xt_hint_bits);
736 p = qhat * (vn.lo >> xt_hint_bits);
737 t = (Xt_int)(un.lo >> xt_hint_bits) - k - (Xt_int)(p & lo_mask);
738 un.lo = accum | ((Xt_uint)t << xt_hint_bits);
739 k = (Xt_int)(p >> xt_hint_bits) - (t >> xt_hint_bits);
740 p = qhat * (vn.hi & lo_mask);
741 t = (Xt_int)(un.mid & lo_mask) - k - (Xt_int)(p & lo_mask);
742 accum = (Xt_uint)t & lo_mask;
743 k = (Xt_int)(p >> xt_hint_bits) - (t >> xt_hint_bits);
744 p = qhat * (vn.hi >> xt_hint_bits);
745 t = (Xt_int)(un.mid >> xt_hint_bits) - k - (Xt_int)(p & lo_mask);
746 un.mid = accum | ((Xt_uint)t << xt_hint_bits);
747 k = (Xt_int)(p >> xt_hint_bits) - (t >> xt_hint_bits);
748 t = (Xt_int)((un.hi & lo_mask) - (Xt_uint)k);
749 un.hi = (un.hi & ~lo_mask) + (Xt_uint)t;
750
751 d.quot.hi = 0U;
752 d.quot.lo = qhat; // Store quotient digit.
753 if (t < 0) { // If we subtracted too
754 --d.quot.lo; // much, add back.
755 t = (Xt_int)(un.lo & lo_mask) + (Xt_int)(vn.lo & lo_mask);
756 accum = (Xt_uint)t & lo_mask;
757 k = t >> xt_hint_bits;
758 t = (Xt_int)((un.lo >> xt_hint_bits) + (vn.lo >> xt_hint_bits)
759 + (Xt_uint)k);
760 un.lo = accum | ((Xt_uint)t << xt_hint_bits);
761 k = t >> xt_hint_bits;
762 t = (Xt_int)((un.mid & lo_mask) + (vn.hi & lo_mask) + (Xt_uint)k);
763 accum = (Xt_uint)t & lo_mask;
764 k = t >> xt_hint_bits;
765 t = (Xt_int)((un.mid >> xt_hint_bits) + (un.hi >> xt_hint_bits)
766 + (Xt_uint)k);
767 un.mid = accum | ((Xt_uint)t << xt_hint_bits);
768 k = t >> xt_hint_bits;
769 un.hi += (Xt_uint)k;
770 }
771 // unnormalize the remainder and pass it back.
772 d.rem = (Xt_long){ .lo = (un.lo >> s) | (un.mid << (xt_int_bits - s)),
773 .hi = (un.mid >> s) | (un.hi << (xt_int_bits - s)) };
774 } else {
775 Xt_ushort ua[4], va[4], qa[4] = { 0U }, ra[4] = { 0U };
776 xl2a(ua, u);
777 xl2a(va, v);
778 int m = 4, n = 4;
779 while (ua[m-1] == 0U) --m;
780 while (va[n-1] == 0U) --n;
781 xdivmnu(qa, ra, ua, va, m, n);
782 d.quot = xa2l(qa);
783 d.rem = xa2l(ra);
784 }
785 }
786 return d;
787}
788
789
790/* compute a divided by b */
791static inline Xt_ldiv
793{
794 /* todo: implement */
795 (void)a;
796 (void)b;
797 Xt_ldiv r = { { 0, 0 }, { 0, 0 } };
798 return r;
799}
800
801static inline Xt_ldiv
803{
804 Xt_long au = xlabs(a), bu = xlabs(b);
805 Xt_ldiv r = xlldivu(au, bu);
806 bool negate = (Xt_int)(a.hi ^ b.hi) < 0;
807 r.quot = xlnegate(r.quot, negate);
808 r.rem = xlnegate(r.rem, negate);
809 return r;
810}
811
815static inline int
817{
818 Xt_uint al = a.lo, bl = (Xt_uint)b;
819 Xt_int ah = (Xt_int)a.hi, bh = Xt_isign_mask(b);
820 int r = ah > bh || (ah == bh && al > bl);
821 return r;
822}
823
827static inline int
829{
830 Xt_uint al = a.lo, bl = (Xt_uint)b;
831 Xt_int ah = (Xt_int)a.hi, bh = Xt_isign_mask(b);
832 int r = ah > bh || (ah == bh && al >= bl);
833 return r;
834}
835
839static inline int
841{
842 Xt_uint al = a.lo, bl = (Xt_uint)b;
843 Xt_int ah = (Xt_int)a.hi, bh = Xt_isign_mask(b);
844 int r = ah < bh || (ah == bh && al <= bl);
845 return r;
846}
847
851static inline int
853{
854 Xt_uint al = a.lo, bl = (Xt_uint)b;
855 Xt_int ah = (Xt_int)a.hi, bh = Xt_isign_mask(b);
856 int r = ah < bh || (ah == bh && al < bl);
857 return r;
858}
859
860static inline int
862{
863 return ((a.hi == b.hi) & (a.lo == b.lo));
864}
865
866static inline int
868{
869 return (a.hi == b.hi) & (a.mid == b.mid) & (a.lo == b.lo);
870}
871
872#define Xt_div(num, muldiv, den) XT_INT_DIV((num), (den))
873
874#endif
875
876#endif
877/*
878 * Local Variables:
879 * coding: utf-8
880 * c-file-style: "Java"
881 * c-basic-offset: 2
882 * indent-tabs-mode: nil
883 * show-trailing-whitespace: t
884 * require-trailing-newline: t
885 * license-project-url: "https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/"
886 * license-default: "bsd"
887 * End:
888 */
static Xt_ldiv xlldivu(Xt_ulong u, Xt_ulong v)
static Xt_long xlisub(Xt_long a, Xt_int b)
@ xt_hint_bits
static Xt_uint xtlo(Xt_tword x)
static Xt_tword xlimul(Xt_long a, Xt_int b)
Xt_long Xt_ulong
XT_SHORT Xt_short
static int xlicmp_gt(Xt_long a, Xt_int b)
static Xt_long xlladd(Xt_long a, Xt_long b)
static Xt_long xiisub(Xt_int a, Xt_int b)
static Xt_long xi2l(Xt_int a)
static Xt_ldiv xlldiv(Xt_long a, Xt_long b)
static Xt_uint xlhi(Xt_long x)
static Xt_long xliadd(Xt_long a, Xt_int b)
static int xttcmp_eq(Xt_tword a, Xt_tword b)
static Xt_uint xtmid(Xt_tword x)
static Xt_long xllsub(Xt_long a, Xt_long b)
static Xt_uint xthi(Xt_tword x)
static Xt_uint xllo(Xt_long x)
static void xl2a(Xt_ushort a[4], Xt_long u)
static int xllcmp_eq(Xt_long a, Xt_long b)
unsigned XT_SHORT Xt_ushort
static Xt_idiv xlidivu(Xt_ulong a, Xt_uint b)
static void xdivmnu(Xt_ushort *restrict q, Xt_ushort *restrict r, const Xt_ushort *restrict u, const Xt_ushort *restrict v, int m, int n)
static int xlicmp_lt(Xt_long a, Xt_int b)
static Xt_long xa2l(Xt_ushort a[4])
static Xt_long xiimul(Xt_int a, Xt_int b)
static bool xl_is_in_xt_int_range(Xt_long a)
static Xt_ldiv xtidiv(Xt_tword a, Xt_int b)
static Xt_long xlabs(Xt_long a)
static Xt_long xilsub(Xt_int a, Xt_long b)
static Xt_long xlinc(Xt_long a, bool b)
static Xt_long xlnegate(Xt_long a, bool negate)
static int xlsign(Xt_long x)
static int xlicmp_le(Xt_long a, Xt_int b)
static Xt_long xiiadd(Xt_int a, Xt_int b)
static int xlicmp_ge(Xt_long a, Xt_int b)
@ xt_int_bits
static Xt_int Xt_isign_mask(Xt_int x)
static int xinlz(Xt_uint v)
base definitions header file
#define XT_INT_MIN
Definition xt_core.h:78
#define XT_INT_MAX
Definition xt_core.h:77
XT_INT Xt_int
Definition xt_core.h:72
unsigned XT_INT Xt_uint
Definition xt_core.h:74