libzahl

big integer library
git clone git://git.suckless.org/libzahl
Log | Files | Refs | README | LICENSE

commit f3b969b6991f154a1fde1ea6b4488320ed0b486f
parent 92be5631d8e319babf5cca49f53ea5e692c54793
Author: Mattias Andrée <maandree@kth.se>
Date:   Tue, 15 Mar 2016 11:40:46 +0100

Optimise zsetup, zgcd, zmul, and zsqr and add -flto

Signed-off-by: Mattias Andrée <maandree@kth.se>

Diffstat:
MTODO | 7+++++++
Mbench/benchmark.c | 4++++
Mconfig.mk | 2+-
Msrc/internals.h | 95+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Msrc/zadd.c | 27+++++++++++++++++++++++++++
Msrc/zgcd.c | 58++++++++++++++++++++++++----------------------------------
Msrc/zmul.c | 31+++++++------------------------
Msrc/zsetup.c | 6+++---
Msrc/zsqr.c | 21+++------------------
Msrc/zsub.c | 22++++++++++++++++++++--
Msrc/zunsetup.c | 2+-
Mzahl.h | 3+++
12 files changed, 171 insertions(+), 107 deletions(-)

diff --git a/TODO b/TODO @@ -8,3 +8,10 @@ Add zranddist value based on % for fitness to bound Test big endian Test always having used > 0 for zero Test negative/non-negative instead of sign + +Test optimisation of zmul: + bc = [(Hb * Hc) << (m2 << 1)] + + [(Hb * Hc) << m2] + - [(Hb - Lb)(Hc - Lc) << m2] + + [(Lb * Lc) << m2] + + (Lb * Lc) diff --git a/bench/benchmark.c b/bench/benchmark.c @@ -85,14 +85,18 @@ main(int argc, char *argv[]) BENCHMARK(zlsh(c, a, 76), 1); BENCHMARK(zrsh(c, a, 76), 1); BENCHMARK(ztrunc(c, a, 76), 1); + BENCHMARK(ztrunc(c, c, 76), 1); BENCHMARK(zsplit(c, d, a, 76), 1); BENCHMARK(zcmpmag(a, b), 1); BENCHMARK(zcmp(a, b), 1); BENCHMARK(zcmpi(a, 1000000000LL), 1); BENCHMARK(zcmpu(a, 1000000000ULL), 1); BENCHMARK(zbset(c, a, 76, 1), 1); + BENCHMARK(zbset(a, a, 76, 1), 1); BENCHMARK(zbset(c, a, 76, 0), 1); + BENCHMARK(zbset(c, c, 76, 0), 1); BENCHMARK(zbset(c, a, 76, -1), 1); + BENCHMARK(zbset(a, a, 76, -1), 1); BENCHMARK(zbtest(a, 76), 1); BENCHMARK(zgcd(c, a, b), 0); BENCHMARK(zmul(c, a, b), 0); diff --git a/config.mk b/config.mk @@ -14,5 +14,5 @@ RANLIB = ranlib # you have to add -DSECURE_RANDOM_PATHNAME=... to CPPFLAGS. CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_XOPEN_SOURCE=700 -CFLAGS = -std=c99 -O3 -Wall -pedantic +CFLAGS = -std=c99 -flto -O3 -Wall -pedantic LDFLAGS = -s diff --git a/src/internals.h b/src/internals.h @@ -45,29 +45,29 @@ #endif #define LIST_TEMPS\ - X(libzahl_tmp_cmp)\ - X(libzahl_tmp_str_num)\ - X(libzahl_tmp_str_mag)\ - X(libzahl_tmp_str_div)\ - X(libzahl_tmp_str_rem)\ - X(libzahl_tmp_gcd_u)\ - X(libzahl_tmp_gcd_v)\ - X(libzahl_tmp_sub)\ - X(libzahl_tmp_modmul)\ - X(libzahl_tmp_div)\ - X(libzahl_tmp_mod)\ - X(libzahl_tmp_pow_b)\ - X(libzahl_tmp_pow_c)\ - X(libzahl_tmp_pow_d)\ - X(libzahl_tmp_modsqr)\ - X(libzahl_tmp_divmod_a)\ - X(libzahl_tmp_divmod_b)\ - X(libzahl_tmp_divmod_d)\ - X(libzahl_tmp_ptest_x)\ - X(libzahl_tmp_ptest_a)\ - X(libzahl_tmp_ptest_d)\ - X(libzahl_tmp_ptest_n1)\ - X(libzahl_tmp_ptest_n4) + X(libzahl_tmp_cmp, 1)\ + X(libzahl_tmp_str_num, 0)\ + X(libzahl_tmp_str_mag, 0)\ + X(libzahl_tmp_str_div, 0)\ + X(libzahl_tmp_str_rem, 0)\ + X(libzahl_tmp_gcd_u, 0)\ + X(libzahl_tmp_gcd_v, 0)\ + X(libzahl_tmp_sub, 0)\ + X(libzahl_tmp_modmul, 0)\ + X(libzahl_tmp_div, 0)\ + X(libzahl_tmp_mod, 0)\ + X(libzahl_tmp_pow_b, 0)\ + X(libzahl_tmp_pow_c, 0)\ + X(libzahl_tmp_pow_d, 0)\ + X(libzahl_tmp_modsqr, 0)\ + X(libzahl_tmp_divmod_a, 0)\ + X(libzahl_tmp_divmod_b, 0)\ + X(libzahl_tmp_divmod_d, 0)\ + X(libzahl_tmp_ptest_x, 0)\ + X(libzahl_tmp_ptest_a, 0)\ + X(libzahl_tmp_ptest_d, 0)\ + X(libzahl_tmp_ptest_n1, 0)\ + X(libzahl_tmp_ptest_n4, 0) #define LIST_CONSTS\ X(libzahl_const_1e19, zsetu, 10000000000000000000ULL) /* The largest power of 10 < 2⁶⁴. */\ @@ -75,7 +75,7 @@ X(libzahl_const_2, zsetu, 2)\ X(libzahl_const_4, zsetu, 4) -#define X(x) extern z_t x; +#define X(x, s) extern z_t x; LIST_TEMPS #undef X #define X(x, f, v) extern z_t x; @@ -185,3 +185,50 @@ libzahl_add_overflow(zahl_char_t *rp, zahl_char_t a, zahl_char_t b) return carry; } #endif + +static inline void +zrsh_taint(z_t a, size_t bits) +{ + size_t i, chars, cbits; + + if (unlikely(!bits)) + return; + if (unlikely(zzero(a))) + return; + + chars = FLOOR_BITS_TO_CHARS(bits); + + if (unlikely(chars >= a->used || zbits(a) <= bits)) { + SET_SIGNUM(a, 0); + return; + } + + bits = BITS_IN_LAST_CHAR(bits); + cbits = BITS_PER_CHAR - bits; + + if (likely(chars)) { + a->used -= chars; + a->chars += chars; + } + + if (unlikely(bits)) { /* This if statement is very important in C. */ + a->chars[0] >>= bits; + for (i = 1; i < a->used; i++) { + a->chars[i - 1] |= a->chars[i] << cbits; + a->chars[i] >>= bits; + } + TRIM_NONZERO(a); + } +} + +static inline void +zswap_tainted_unsigned(z_t a, z_t b) +{ + z_t t; + t->used = b->used; + b->used = a->used; + a->used = t->used; + t->chars = b->chars; + b->chars = a->chars; + a->chars = t->chars; +} diff --git a/src/zadd.c b/src/zadd.c @@ -72,6 +72,33 @@ zadd_unsigned(z_t a, z_t b, z_t c) } void +zadd_unsigned_assign(z_t a, z_t b) +{ + size_t size, n; + + if (unlikely(zzero(a))) { + zabs(a, b); + return; + } else if (unlikely(zzero(b))) { + return; + } + + size = MAX(a->used, b->used); + n = a->used + b->used - size; + + ENSURE_SIZE(a, size + 1); + a->chars[size] = 0; + + if (a->used < b->used) { + n = b->used; + zmemset(a->chars + a->used, 0, n - a->used); + } + zadd_impl(a, b, n); + + SET_SIGNUM(a, 1); +} + +void zadd(z_t a, z_t b, z_t c) { if (unlikely(zzero(b))) { diff --git a/src/zgcd.c b/src/zgcd.c @@ -12,14 +12,11 @@ zgcd(z_t a, z_t b, z_t c) * Binary GCD algorithm. */ - size_t shifts = 0, i = 0, min; - zahl_char_t uv, bit; - int neg; + size_t shifts; + zahl_char_t *u_orig, *v_orig; + size_t u_lsb, v_lsb; + int neg, cmpmag; - if (unlikely(!zcmp(b, c))) { - SET(a, b); - return; - } if (unlikely(zzero(b))) { SET(a, c); return; @@ -29,37 +26,30 @@ zgcd(z_t a, z_t b, z_t c) return; } - zabs(u, b); - zabs(v, c); neg = znegative2(b, c); - min = MIN(u->used, v->used); - for (; i < min; i++) { - uv = u->chars[i] | v->chars[i]; - for (bit = 1; bit; bit <<= 1, shifts++) - if (uv & bit) - goto loop_done; + u_lsb = zlsb(b); + v_lsb = zlsb(c); + shifts = MIN(u_lsb, v_lsb); + zrsh(u, b, u_lsb); + zrsh(v, c, v_lsb); + + u_orig = u->chars; + v_orig = v->chars; + + for (;;) { + if (likely((cmpmag = zcmpmag(u, v)) >= 0)) { + if (unlikely(cmpmag == 0)) + break; + zswap_tainted_unsigned(u, v); + } + zsub_positive_assign(v, u); + zrsh_taint(v, zlsb(v)); } - for (; i < u->used; i++) - for (bit = 1; bit; bit <<= 1, shifts++) - if (u->chars[i] & bit) - goto loop_done; - for (; i < v->used; i++) - for (bit = 1; bit; bit <<= 1, shifts++) - if (v->chars[i] & bit) - goto loop_done; -loop_done: - zrsh(u, u, shifts); - zrsh(v, v, shifts); - - zrsh(u, u, zlsb(u)); - do { - zrsh(v, v, zlsb(v)); - if (zcmpmag(u, v) > 0) /* Both are non-negative. */ - zswap(u, v); - zsub_unsigned(v, v, u); - } while (!zzero(v)); zlsh(a, u, shifts); SET_SIGNUM(a, neg ? -1 : 1); + + u->chars = u_orig; + v->chars = v_orig; } diff --git a/src/zmul.c b/src/zmul.c @@ -57,39 +57,22 @@ zmul(z_t a, z_t b, z_t c) zsplit(b_high, b_low, b, m2); zsplit(c_high, c_low, c, m2); -#if 1 + zmul(z0, b_low, c_low); zmul(z2, b_high, c_high); - zadd(b_low, b_low, b_high); - zadd(c_low, c_low, c_high); + zadd_unsigned_assign(b_low, b_high); + zadd_unsigned_assign(c_low, c_high); zmul(z1, b_low, c_low); - zsub(z1, z1, z0); - zsub(z1, z1, z2); + zsub_nonnegative_assign(z1, z0); + zsub_nonnegative_assign(z1, z2); zlsh(z1, z1, m2); m2 <<= 1; - zlsh(z2, z2, m2); - - zadd(a, z2, z1); - zadd(a, a, z0); -#else - zmul(z0, b_low, c_low); - zmul(z2, b_high, c_high); - zsub(b_low, b_high, b_low); - zsub(c_low, c_high, c_low); - zmul(z1, b_low, c_low); - - zlsh(z0, z0, m2 + 1); - zlsh(z1, z1, m2); zlsh(a, z2, m2); - m2 <<= 1; - zlsh(z2, z2, m2); - zadd(z2, z2, a); + zadd_unsigned_assign(a, z1); + zadd_unsigned_assign(a, z0); - zsub(a, z2, z1); - zadd(a, a, z0); -#endif zfree(z0); zfree(z1); diff --git a/src/zsetup.c b/src/zsetup.c @@ -1,7 +1,7 @@ /* See LICENSE file for copyright and license details. */ #include "internals.h" -#define X(x) z_t x; +#define X(x, s) z_t x; LIST_TEMPS #undef X #define X(x, f, v) z_t x; @@ -30,8 +30,8 @@ zsetup(jmp_buf env) memset(libzahl_pool_n, 0, sizeof(libzahl_pool_n)); memset(libzahl_pool_alloc, 0, sizeof(libzahl_pool_alloc)); -#define X(x)\ - zinit(x), zsetu(x, 1); +#define X(x, s)\ + zinit(x); if (s) zsetu(x, 1); LIST_TEMPS; #undef X #define X(x, f, v)\ diff --git a/src/zsqr.c b/src/zsqr.c @@ -42,32 +42,17 @@ zsqr(z_t a, z_t b) zsplit(high, low, b, m2); -#if 1 + zsqr(z0, low); zsqr(z2, high); zmul(z1, low, high); zlsh(z1, z1, m2 + 1); m2 <<= 1; - zlsh(z2, z2, m2); - - zadd(a, z2, z1); - zadd(a, a, z0); -#else - zsqr(z0, low); - zsqr(z2, high); - zmul(z1, low, low); - - zlsh(z0, z0, m2 + 1); - zlsh(z1, z1, m2 + 1); zlsh(a, z2, m2); - m2 <<= 1; - zlsh(z2, z2, m2); - zadd(z2, z2, a); + zadd_unsigned_assign(a, z1); + zadd_unsigned_assign(a, z0); - zsub(a, z2, z1); - zadd(a, a, z0); -#endif zfree(z0); zfree(z1); diff --git a/src/zsub.c b/src/zsub.c @@ -46,7 +46,7 @@ libzahl_zsub_unsigned(z_t a, z_t b, z_t c) SET_SIGNUM(a, 0); return; } - n = MIN(b->used, c->used); + n = b->used; if (a == b) { zset(libzahl_tmp_sub, b); SET(a, c); @@ -56,7 +56,7 @@ libzahl_zsub_unsigned(z_t a, z_t b, z_t c) zsub_impl(a, b, n); } } else { - n = MIN(b->used, c->used); + n = c->used; if (unlikely(a == c)) { zset(libzahl_tmp_sub, c); SET(a, b); @@ -77,6 +77,24 @@ zsub_unsigned(z_t a, z_t b, z_t c) } void +zsub_nonnegative_assign(z_t a, z_t b) +{ + if (unlikely(zzero(b))) { + zabs(a, a); + } else if (unlikely(!zcmpmag(a, b))) { + SET_SIGNUM(a, 0); + } else { + zsub_impl(a, b, b->used); + } +} + +void +zsub_positive_assign(z_t a, z_t b) +{ + zsub_impl(a, b, b->used); +} + +void zsub(z_t a, z_t b, z_t c) { if (unlikely(zzero(b))) { diff --git a/src/zunsetup.c b/src/zunsetup.c @@ -8,7 +8,7 @@ zunsetup(void) size_t i; if (libzahl_set_up) { libzahl_set_up = 0; -#define X(x)\ +#define X(x, s)\ free(x->chars); LIST_TEMPS; #undef X diff --git a/zahl.h b/zahl.h @@ -119,6 +119,9 @@ void zmodpowu(z_t, z_t, unsigned long long int, z_t); /* These are used internally and may be removed in a future version. */ void zadd_unsigned(z_t, z_t, z_t); /* a := |b| + |c| */ void zsub_unsigned(z_t, z_t, z_t); /* a := |b| - |c| */ +void zadd_unsigned_assign(z_t, z_t); /* a := |a| + |b| */ +void zsub_nonnegative_assign(z_t, z_t); /* a := a - b, assuming a ≥ b ≥ 0 */ +void zsub_positive_assign(z_t, z_t); /* a := a - b, assuming a > b > 0 */ /* Bitwise operations. */