libzahl

big integer library
git clone git://git.suckless.org/libzahl
Log | Files | Refs | README | LICENSE

commit 83d95da004c8cc2387a4070b781a71a0c6433faa
parent 599a71a058b8913a4d166485fff6b964247763e9
Author: Mattias Andrée <maandree@kth.se>
Date:   Sat, 30 Apr 2016 05:47:05 +0200

Some optimisations

Signed-off-by: Mattias Andrée <maandree@kth.se>

Diffstat:
MMakefile | 2+-
MSTATUS | 10+++++-----
Msrc/internals.h | 9+++++++++
Msrc/zand.c | 22++++++++++++++++------
Msrc/zor.c | 35++++++++++++++++++++++-------------
Dsrc/zset.c | 16----------------
Msrc/zsub.c | 7+++----
Msrc/zxor.c | 48++++++++++++++++++++++++++++++++----------------
Mzahl-inlines.h | 14++++++++++++++
Mzahl.h | 3++-
10 files changed, 104 insertions(+), 62 deletions(-)

diff --git a/Makefile b/Makefile @@ -33,7 +33,6 @@ FUN =\ zptest\ zrand\ zrsh\ - zset\ zsets\ zsetup\ zsqr\ @@ -60,6 +59,7 @@ INLINE_FUN =\ zodd\ zodd_nonzero\ zsave\ + zset\ zseti\ zsetu\ zsignum\ diff --git a/STATUS b/STATUS @@ -14,11 +14,11 @@ zabs(a, b) .............. fastest zabs(a, a) .............. tomsfastmath is faster zadd_unsigned ........... fastest (faster than all others' zadd) zsub_unsigned ........... fastest (faster than all others' zsub) -zadd .................... 94 % of tomsfastmath, 90 % libtommath, 86 % of hebimath -zsub .................... 97 % of tomsfastmath, 95 % hebimath, 93 % of libtommath -zand .................... 55 % of tomsfastmath -zor ..................... 46 % of tomsfastmath -zxor .................... 57 % of tomsfastmath +zadd .................... fastest +zsub .................... 98 % of libtommath +zand .................... 77 % of tomsfastmath +zor ..................... 65 % of tomsfastmath +zxor .................... 87 % of tomsfastmath znot .................... fastest zeven ................... fastest (shared with gmp, libtommath, and tomsfastmath) zodd .................... fastest (shared with gmp, libtommath, and tomsfastmath) diff --git a/src/internals.h b/src/internals.h @@ -94,6 +94,8 @@ extern void *libzahl_temp_allocation; #define SWAP(a, b, t, m) ((t)->m = (a)->m, (a)->m = (b)->m, (b)->m = (t)->m) #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN_MAX_1(min, max, a, b) ((min) = MIN(a, b), (max) = MAX(a, b)) +#define MIN_MAX_2(min, max, a, b) ((min) = (a) + (b) - ((max) = MAX(a, b))) #define znegative(a) (zsignum(a) < 0) #define znegative1(a, b) ((zsignum(a) | zsignum(b)) < 0) #define znegative2(a, b) ((zsignum(a) & zsignum(b)) < 0) @@ -114,6 +116,13 @@ zzero1(z_t a, z_t b) return zzero(a) || zzero(b); } +O2 static inline void +zmemcpy_range(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t i, size_t n) +{ + for (; i < n; i++) + d[i] = s[i]; +} + static void libzahl_failure(int error) { diff --git a/src/zand.c b/src/zand.c @@ -2,14 +2,25 @@ #include "internals.h" -static inline O2 void -zand_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n) +O2 static inline void +zand_impl_3(register zahl_char_t *restrict a, + register const zahl_char_t *restrict b, size_t n) { size_t i; for (i = 0; i < n; i++) a[i] &= b[i]; } +static inline void +zand_impl_4(register zahl_char_t *restrict a, + register const zahl_char_t *restrict b, + register const zahl_char_t *restrict c, size_t n) +{ + size_t i; + for (i = 0; i < n; i++) + a[i] = b[i] & c[i]; +} + void zand(z_t a, z_t b, z_t c) { @@ -25,13 +36,12 @@ zand(z_t a, z_t b, z_t c) a->used = MIN(b->used, c->used); if (a == b) { - zand_impl(a->chars, c->chars, a->used); + zand_impl_3(a->chars, c->chars, a->used); } else if (unlikely(a == c)) { - zand_impl(a->chars, b->chars, a->used); + zand_impl_3(a->chars, b->chars, a->used); } else { ENSURE_SIZE(a, a->used); - zmemcpy(a->chars, c->chars, a->used); - zand_impl(a->chars, b->chars, a->used); + zand_impl_4(a->chars, b->chars, c->chars, a->used); } TRIM_AND_SIGN(a, zpositive1(b, c) * 2 - 1); diff --git a/src/zor.c b/src/zor.c @@ -2,14 +2,27 @@ #include "internals.h" -static inline O2 void -zor_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n) +O2 static inline void +zor_impl_3(register zahl_char_t *restrict a, + register const zahl_char_t *restrict b, size_t n) { size_t i; for (i = 0; i < n; i++) a[i] |= b[i]; } +static inline void +zor_impl_5(register zahl_char_t *restrict a, + register const zahl_char_t *restrict b, size_t n, + register const zahl_char_t *restrict c, size_t m) +{ + size_t i; + for (i = 0; i < n; i++) + a[i] = b[i] | c[i]; + for (; i < m; i++) + a[i] = c[i]; +} + void zor(z_t a, z_t b, z_t c) { @@ -23,25 +36,21 @@ zor(z_t a, z_t b, z_t c) return; } - m = MAX(b->used, c->used); - n = b->used + c->used - m; - + MIN_MAX_1(n, m, b->used, c->used); ENSURE_SIZE(a, m); if (a == b) { + zor_impl_3(a->chars, c->chars, n); if (a->used < c->used) - zmemcpy(a->chars + n, c->chars + n, m - n); - zor_impl(a->chars, c->chars, n); + zmemcpy_range(a->chars, c->chars, n, m); } else if (unlikely(a == c)) { + zor_impl_3(a->chars, b->chars, n); if (a->used < b->used) - zmemcpy(a->chars + n, b->chars + n, m - n); - zor_impl(a->chars, b->chars, n); + zmemcpy_range(a->chars, b->chars, n, m); } else if (m == b->used) { - zmemcpy(a->chars, b->chars, m); - zor_impl(a->chars, c->chars, n); + zor_impl_5(a->chars, c->chars, n, b->chars, m); } else { - zmemcpy(a->chars, c->chars, m); - zor_impl(a->chars, b->chars, n); + zor_impl_5(a->chars, b->chars, n, c->chars, m); } a->used = m; diff --git a/src/zset.c b/src/zset.c @@ -1,16 +0,0 @@ -/* See LICENSE file for copyright and license details. */ -#include "internals.h" - - -void -zset(z_t a, z_t b) -{ - if (unlikely(b->sign == 0)) { - a->sign = 0; - } else { - a->sign = b->sign; - a->used = b->used; - ENSURE_SIZE(a, b->used); - zmemcpy(a->chars, b->chars, b->used); - } -} diff --git a/src/zsub.c b/src/zsub.c @@ -79,13 +79,12 @@ zsub_unsigned(z_t a, z_t b, z_t c) void zsub_nonnegative_assign(z_t a, z_t b) { - if (unlikely(zzero(b))) { + if (unlikely(zzero(b))) zabs(a, a); - } else if (unlikely(!zcmpmag(a, b))) { + else if (unlikely(!zcmpmag(a, b))) SET_SIGNUM(a, 0); - } else { + else zsub_impl(a, b, b->used); - } } void diff --git a/src/zxor.c b/src/zxor.c @@ -2,18 +2,33 @@ #include "internals.h" -static inline void -zxor_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n) +O2 static inline void +zxor_impl_3(register zahl_char_t *restrict a, + register const zahl_char_t *restrict b, size_t n) { size_t i; for (i = 0; i < n; i++) a[i] ^= b[i]; } +static inline void +zxor_impl_5(register zahl_char_t *restrict a, + register const zahl_char_t *restrict b, size_t n, + register const zahl_char_t *restrict c, size_t m) +{ + size_t i; + for (i = 0; i < n; i++) + a[i] = b[i] ^ c[i]; + for (; i < m; i++) + a[i] = c[i]; +} + void zxor(z_t a, z_t b, z_t c) { - size_t n, m; + size_t n, m, bn, cn; + const zahl_char_t *restrict bc; + const zahl_char_t *restrict cc; if (unlikely(zzero(b))) { SET(a, c); @@ -23,25 +38,26 @@ zxor(z_t a, z_t b, z_t c) return; } - m = MAX(b->used, c->used); - n = b->used + c->used - m; + bn = b->used; + bc = b->chars; + cn = c->used; + cc = c->chars; + MIN_MAX_1(n, m, bn, cn); ENSURE_SIZE(a, m); if (a == b) { - if (a->used < c->used) - zmemcpy(a->chars + n, c->chars + n, m - n); - zxor_impl(a->chars, c->chars, n); + zxor_impl_3(a->chars, cc, n); + if (a->used < cn) + zmemcpy_range(a->chars, cc, n, m); } else if (unlikely(a == c)) { - if (a->used < b->used) - zmemcpy(a->chars + n, b->chars + n, m - n); - zxor_impl(a->chars, b->chars, n); - } else if (m == b->used) { - zmemcpy(a->chars, b->chars, m); - zxor_impl(a->chars, c->chars, n); + zxor_impl_3(a->chars, bc, n); + if (a->used < bn) + zmemcpy_range(a->chars, bc, n, m); + } else if (m == bn) { + zxor_impl_5(a->chars, cc, n, bc, m); } else { - zmemcpy(a->chars, c->chars, m); - zxor_impl(a->chars, b->chars, n); + zxor_impl_5(a->chars, bc, n, cc, m); } a->used = m; diff --git a/zahl-inlines.h b/zahl-inlines.h @@ -19,6 +19,20 @@ ZAHL_INLINE void zabs(z_t a, z_t b) { ZAHL_SET(a, b); a->sign = !!a->sign; } ZAHL_INLINE void +zset(z_t a, z_t b) +{ + if (ZAHL_UNLIKELY(b->sign == 0)) { + a->sign = 0; + } else { + a->sign = b->sign; + a->used = b->used; + ZAHL_ENSURE_SIZE(a, b->used); + libzahl_memcpy(a->chars, b->chars, b->used); + } +} + + +ZAHL_INLINE void zswap(z_t a, z_t b) { /* Almost three times faster than the naïve method. */ diff --git a/zahl.h b/zahl.h @@ -72,10 +72,11 @@ size_t zload(z_t, const void *); /* Restore a from b, and return number o /* Assignment functions. */ -void zset(z_t, z_t); /* a := b */ +ZAHL_INLINE void zset(z_t, z_t); /* a := b */ ZAHL_INLINE void zsetu(z_t, uint64_t); /* a := b */ ZAHL_INLINE void zseti(z_t, int64_t); /* a := b */ + /* Comparison functions. */ ZAHL_INLINE int zcmp(z_t, z_t); /* signum (a - b) */