libzahl

big integer library
git clone git://git.suckless.org/libzahl
Log | Files | Refs | README | LICENSE

commit 4bba6e7176632b4d760ba9362a1515552471d741
parent ecf7b161df9de7a868edc055d2c1dd84bf371b9d
Author: Mattias Andrée <maandree@kth.se>
Date:   Fri, 29 Apr 2016 21:54:39 +0200

Some optimisations, fix refsheet, and disable const/pure attributes in gmp in benchmark

Signed-off-by: Mattias Andrée <maandree@kth.se>

Diffstat:
MMakefile | 34+++++++++++++++++-----------------
MSTATUS | 16++++++++--------
Mbench/libgmp.h | 2++
Mdoc/refsheet.tex | 4++--
Msrc/internals.h | 31+++++++++++++++----------------
Msrc/zbset.c | 6+++---
Msrc/zmul.c | 4++--
Dsrc/zsave.c | 16----------------
Dsrc/zsetu.c | 16----------------
Dsrc/zsplit.c | 22----------------------
Mzahl-inlines.h | 68+++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Mzahl-internals.h | 19+++++++++++++++++++
Mzahl.h | 15++++++++++++---
13 files changed, 129 insertions(+), 124 deletions(-)

diff --git a/Makefile b/Makefile @@ -2,8 +2,8 @@ include config.mk HDR_PUBLIC =\ zahl.h\ - zahl-internals.h\ - zahl-inlines.h + zahl-inlines.h\ + zahl-internals.h HDR_PRIVATE =\ src/internals.h @@ -33,12 +33,9 @@ FUN =\ zptest\ zrand\ zrsh\ - zsave\ zset\ zsets\ - zsetu\ zsetup\ - zsplit\ zsqr\ zstr\ zstr_length\ @@ -48,24 +45,27 @@ FUN =\ zxor INLINE_FUN =\ - zinit\ - zswap\ - zeven\ - zodd\ - zeven_nonzero\ - zodd_nonzero\ - zzero\ - zsignum\ zabs\ - zneg\ - zlsb\ zbits\ - zseti\ + zbtest\ zcmp\ zcmpi\ zcmpmag\ zcmpu\ - zbtest + zeven\ + zeven_nonzero\ + zinit\ + zlsb\ + zneg\ + zodd\ + zodd_nonzero\ + zsave\ + zseti\ + zsetu\ + zsignum\ + zsplit\ + zswap\ + zzero DOC =\ refsheet.pdf diff --git a/STATUS b/STATUS @@ -16,7 +16,7 @@ zadd_unsigned ........... fastest (faster than all others' zadd) zsub_unsigned ........... fastest (faster than all others' zsub) zadd .................... 87 % of tomsfastmath, 83 % libtommath, 80 % of hebimath zsub .................... 97 % of tomsfastmath, 95 % hebimath, 93 % of libtommath -zand .................... 93 % of gmp, 49 % of tomsfastmath +zand .................... 49 % of tomsfastmath zor ..................... 36 % of tomsfastmath zxor .................... 51 % of tomsfastmath znot .................... fastest @@ -26,16 +26,16 @@ zeven_nonzero ........... fastest (shared with gmp, libtommath, and tomsfastmath zodd_nonzero ............ fastest (shared with gmp, libtommath, and tomsfastmath) zzero ................... fastest (shared with gmp and libtommath) zsignum ................. fastest (shared with gmp) -zbits ................... gmp is faster, because of bug in libzahl -zlsb .................... fastest (shared with gmp) +zbits ................... fastest +zlsb .................... fastest zswap ................... fastest zlsh .................... fastest zrsh .................... fastest ztrunc(a, b, c) ......... fastest ztrunc(a, a, b) ......... fastest -zsplit .................. 95 % of gmp -zcmpmag ................. gmp is faster -zcmp .................... 94 % of tomsfastmath, 81 % of hebimath, gmp is even faster (zcmpmag) +zsplit .................. fastest +zcmpmag ................. fastest +zcmp .................... 94 % of tomsfastmath, 81 % of hebimath (zcmpmag) zcmpi ................... fastest zcmpu ................... fastest zbset(a, b, 1) .......... fastest @@ -44,8 +44,8 @@ zbset(a, b, 0) .......... fastest zbset(a, a, 0) .......... fastest zbset(a, b, -1) ......... fastest zbset(a, a, -1) ......... fastest -zbtest .................. fastest (shared with gmp) -zgcd .................... 26 % of gmp (zcmpmag) +zbtest .................. fastest +zgcd .................... 17 % of gmp (zcmpmag) zmul .................... slowest zsqr .................... slowest (zmul) zmodmul(big mod) ........ slowest ((zmul, zmod)) diff --git a/bench/libgmp.h b/bench/libgmp.h @@ -1,3 +1,5 @@ +#define __GMP_NO_ATTRIBUTE_CONST_PURE + #include <gmp.h> #include <setjmp.h> diff --git a/doc/refsheet.tex b/doc/refsheet.tex @@ -106,10 +106,10 @@ Get string length of $a$ & {\tt zstr\_length(a, b)} & returns {\tt size\_ \textbf{Marshallisation} & {} & {} \\ Marshal $a$ into $b$ & {\tt zsave(a, b)} & returns {\tt size\_t} number of saved bytes, \\ -{} & {} & $~~~~~$ {\tt b} is a {\tt char *\_t} \\ +{} & {} & $~~~~~$ {\tt b} is a {\tt void *\_t} \\ Get marshal-size of $a$ & {\tt zsave(a, NULL)} & returns {\tt size\_t} \\ Unmarshal $a$ from $b$ & {\tt zload(a, b)} & returns {\tt size\_t} number of read bytes, \\ -{} & {} & $~~~~~$ {\tt b} is a {\tt const char *\_t} \\ +{} & {} & $~~~~~$ {\tt b} is a {\tt const void *\_t} \\ \\ \textbf{Number theory} & {} & {} \\ diff --git a/src/internals.h b/src/internals.h @@ -105,11 +105,12 @@ extern void *libzahl_temp_allocation; #define SET_SIGNUM(a, signum) ZAHL_SET_SIGNUM(a, signum) #define SET(a, b) ZAHL_SET(a, b) -#define ENSURE_SIZE(a, n) do { if ((a)->alloced < (n)) libzahl_realloc(a, (n)); } while (0) +#define ENSURE_SIZE(a, n) ZAHL_ENSURE_SIZE(a, n) #define TRIM(a) ZAHL_TRIM(a) #define TRIM_NONZERO(a) ZAHL_TRIM_NONZERO(a) #define TRIM_AND_ZERO(a) ZAHL_TRIM_AND_ZERO(a) #define TRIM_AND_SIGN(a, s) ZAHL_TRIM_AND_SIGN(a, s) +#define SWAP(a, b, t, m) ((t)->m = (a)->m, (a)->m = (b)->m, (b)->m = (t)->m) #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define znegative(a) (zsignum(a) < 0) @@ -120,8 +121,9 @@ extern void *libzahl_temp_allocation; #define zpositive2(a, b) (zsignum(a) + zsignum(b) == 2) #define zzero2(a, b) (!(zsignum(a) | zsignum(b))) #define zmemmove(d, s, n) memmove((d), (s), (n) * sizeof(zahl_char_t)) +#define zmemcpy(d, s, n) libzahl_memcpy(d, s, n) +#define zmemset(a, v, n) libzahl_memset(a, v, n) -void libzahl_realloc(z_t a, size_t need); void zmul_impl(z_t a, z_t b, z_t c); void zsqr_impl(z_t a, z_t b); @@ -151,20 +153,6 @@ libzahl_memfailure(void) libzahl_failure(errno); } -static inline void -zmemcpy(zahl_char_t *restrict d, const zahl_char_t *restrict s, register size_t n) -{ - while (n--) - d[n] = s[n]; -} - -static inline void -zmemset(zahl_char_t *a, register zahl_char_t v, register size_t n) -{ - while (n--) - a[n] = v; -} - /* * libzahl_msb_nz_zu * ^^^ ^^ ^^ @@ -218,6 +206,17 @@ libzahl_add_overflow(zahl_char_t *rp, zahl_char_t a, zahl_char_t b) #endif static inline void +zsplit_pz(z_t high, z_t low, z_t a, size_t delim) +{ + if (unlikely(zzero(a))) { + SET_SIGNUM(high, 0); + SET_SIGNUM(low, 0); + } else { + zsplit(high, low, a, delim); + } +} + +static inline void zrsh_taint(z_t a, size_t bits) { size_t i, chars, cbits; diff --git a/src/zbset.c b/src/zbset.c @@ -23,14 +23,14 @@ void -zbset_impl_set(z_t a, size_t bit) +zbset_ll_set(z_t a, size_t bit) { PROLOGUE(1); a->chars[chars] |= mask; } void -zbset_impl_clear(z_t a, size_t bit) +zbset_ll_clear(z_t a, size_t bit) { PROLOGUE(0); a->chars[chars] &= ~mask; @@ -38,7 +38,7 @@ zbset_impl_clear(z_t a, size_t bit) } void -zbset_impl_flip(z_t a, size_t bit) +zbset_ll_flip(z_t a, size_t bit) { PROLOGUE(1); a->chars[chars] ^= mask; diff --git a/src/zmul.c b/src/zmul.c @@ -53,8 +53,8 @@ zmul_impl(z_t a, z_t b, z_t c) zinit_temp(c_high); zinit_temp(c_low); - zsplit(b_high, b_low, b, m2); - zsplit(c_high, c_low, c, m2); + zsplit_pz(b_high, b_low, b, m2); + zsplit_pz(c_high, c_low, c, m2); zmul_impl(z0, b_low, c_low); diff --git a/src/zsave.c b/src/zsave.c @@ -1,16 +0,0 @@ -/* See LICENSE file for copyright and license details. */ -#include "internals.h" - - -size_t -zsave(z_t a, void *buffer) -{ - if (buffer) { - char *buf = buffer; - *((int *)buf) = a->sign, buf += sizeof(int); - *((size_t *)buf) = a->used, buf += sizeof(size_t); - if (likely(!zzero(a))) - zmemcpy((zahl_char_t *)buf, a->chars, a->used); - } - return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * sizeof(zahl_char_t)); -} diff --git a/src/zsetu.c b/src/zsetu.c @@ -1,16 +0,0 @@ -/* See LICENSE file for copyright and license details. */ -#include "internals.h" - - -void -zsetu(z_t a, uint64_t b) -{ - if (!b) { - SET_SIGNUM(a, 0); - return; - } - ENSURE_SIZE(a, 1); - SET_SIGNUM(a, 1); - a->chars[0] = (zahl_char_t)b; - a->used = 1; -} diff --git a/src/zsplit.c b/src/zsplit.c @@ -1,22 +0,0 @@ -/* See LICENSE file for copyright and license details. */ -#include "internals.h" - - -void -zsplit(z_t high, z_t low, z_t a, size_t delim) -{ - if (unlikely(zzero(a))) { - /* This is for performance. */ - SET_SIGNUM(high, 0); - SET_SIGNUM(low, 0); - return; - } - - if (unlikely(high == a)) { - ztrunc(low, a, delim); - zrsh(high, a, delim); - } else { - zrsh(high, a, delim); - ztrunc(low, a, delim); - } -} diff --git a/zahl-inlines.h b/zahl-inlines.h @@ -14,19 +14,12 @@ ZAHL_INLINE void zneg(z_t a, z_t b) { ZAHL_SET(a, b); a->sign = -a->sign; } ZAHL_INLINE void zswap(z_t a, z_t b) { + /* Almost three times faster than the naïve method. */ z_t t; - t->sign = a->sign; - a->sign = b->sign; - b->sign = t->sign; - t->used = b->used; - b->used = a->used; - a->used = t->used; - t->alloced = a->alloced; - a->alloced = b->alloced; - b->alloced = t->alloced; - t->chars = b->chars; - b->chars = a->chars; - a->chars = t->chars; + ZAHL_SWAP(a, b, t, sign); + ZAHL_SWAP(b, a, t, used); + ZAHL_SWAP(a, b, t, alloced); + ZAHL_SWAP(b, a, t, chars); } @@ -42,6 +35,20 @@ zseti(z_t a, int64_t b) } +ZAHL_INLINE void +zsetu(z_t a, uint64_t b) +{ + if (!b) { + ZAHL_SET_SIGNUM(a, 0); + return; + } + ZAHL_ENSURE_SIZE(a, 1); + ZAHL_SET_SIGNUM(a, 1); + a->chars[0] = (zahl_char_t)b; + a->used = 1; +} + + ZAHL_INLINE size_t zlsb(z_t a) { @@ -140,10 +147,6 @@ zcmpi(z_t a, int64_t b) } -void zbset_impl_set(z_t a, size_t bit); -void zbset_impl_clear(z_t a, size_t bit); -void zbset_impl_flip(z_t a, size_t bit); - ZAHL_INLINE void zbset(z_t a, z_t b, size_t bit, int action) { @@ -174,11 +177,11 @@ fallback: #endif if (action > 0) - zbset_impl_set(a, bit); + zbset_ll_set(a, bit); else if (action < 0) - zbset_impl_flip(a, bit); + zbset_ll_flip(a, bit); else - zbset_impl_clear(a, bit); + zbset_ll_clear(a, bit); } @@ -196,3 +199,30 @@ zbtest(z_t a, size_t bit) bit &= ZAHL_BITS_IN_LAST_CHAR(bit); return (a->chars[chars] >> bit) & 1; } + + +ZAHL_INLINE void +zsplit(z_t high, z_t low, z_t a, size_t delim) +{ + if (ZAHL_UNLIKELY(high == a)) { + ztrunc(low, a, delim); + zrsh(high, a, delim); + } else { + zrsh(high, a, delim); + ztrunc(low, a, delim); + } +} + + +ZAHL_INLINE size_t +zsave(z_t a, void *buffer) +{ + if (ZAHL_LIKELY(buffer)) { + char *buf = buffer; + *((int *)buf) = a->sign, buf += sizeof(int); + *((size_t *)buf) = a->used, buf += sizeof(size_t); + if (ZAHL_LIKELY(!zzero(a))) + libzahl_memcpy((zahl_char_t *)buf, a->chars, a->used); + } + return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * sizeof(zahl_char_t)); +} diff --git a/zahl-internals.h b/zahl-internals.h @@ -32,10 +32,12 @@ #define ZAHL_SET_SIGNUM(a, signum) ((a)->sign = (signum)) #define ZAHL_SET(a, b) do { if ((a) != (b)) zset(a, b); } while (0) +#define ZAHL_ENSURE_SIZE(a, n) do { if ((a)->alloced < (n)) libzahl_realloc(a, (n)); } while (0) #define ZAHL_TRIM(a) for (; (a)->used && !(a)->chars[(a)->used - 1]; (a)->used--) #define ZAHL_TRIM_NONZERO(a) for (; !(a)->chars[(a)->used - 1]; (a)->used--) #define ZAHL_TRIM_AND_ZERO(a) do { ZAHL_TRIM(a); if (!(a)->used) ZAHL_SET_SIGNUM(a, 0); } while (0) #define ZAHL_TRIM_AND_SIGN(a, s) do { ZAHL_TRIM(a); ZAHL_SET_SIGNUM(a, (a)->used ? (s) : 0); } while (0) +#define ZAHL_SWAP(a, b, t, m) ((t)->m = (a)->m, (a)->m = (b)->m, (b)->m = (t)->m) #if defined(__GNUC__) || defined(__clang__) @@ -72,3 +74,20 @@ struct zahl { size_t alloced; zahl_char_t *chars; }; + + +void libzahl_realloc(struct zahl *, size_t); + +static inline void +libzahl_memcpy(zahl_char_t *restrict d, const zahl_char_t *restrict s, register size_t n) +{ + while (n--) + d[n] = s[n]; +} + +static inline void +libzahl_memset(zahl_char_t *a, register zahl_char_t v, register size_t n) +{ + while (n--) + a[n] = v; +} diff --git a/zahl.h b/zahl.h @@ -66,14 +66,14 @@ void zunsetup(void); /* Free resources used by libzahl */ ZAHL_INLINE void zinit(z_t); /* Prepare a for use. */ ZAHL_INLINE void zswap(z_t, z_t); /* (a, b) := (b, a) */ void zfree(z_t); /* Free resources in a. */ -size_t zsave(z_t, void *); /* Store a into b (if !!b), and return number of written bytes. */ +ZAHL_INLINE size_t zsave(z_t, void *); /* Store a into b (if !!b), and return number of written bytes. */ size_t zload(z_t, const void *); /* Restore a from b, and return number of read bytes. */ /* Assignment functions. */ void zset(z_t, z_t); /* a := b */ -void zsetu(z_t, uint64_t); /* a := b */ +ZAHL_INLINE void zsetu(z_t, uint64_t); /* a := b */ ZAHL_INLINE void zseti(z_t, int64_t); /* a := b */ /* Comparison functions. */ @@ -119,7 +119,8 @@ void znot(z_t, z_t); /* a := ~b */ void zlsh(z_t, z_t, size_t); /* a := b << c */ void zrsh(z_t, z_t, size_t); /* a := b >> c */ void ztrunc(z_t, z_t, size_t); /* a := b & ((1 << c) - 1) */ -void zsplit(z_t, z_t, z_t, size_t); /* a := c >> d, b := c - (a << d) */ +ZAHL_INLINE void zsplit(z_t, z_t, z_t, size_t); + /* a := c >> d, b := c - (a << d) */ ZAHL_INLINE int zbtest(z_t, size_t); /* (a >> b) & 1 */ ZAHL_INLINE size_t zlsb(z_t); /* Index of first set bit, SIZE_MAX if none are set. */ ZAHL_INLINE size_t zbits(z_t); /* ⌊log₂ |a|⌋ + 1, 1 if a = 0 */ @@ -165,6 +166,14 @@ void zperror(const char *); /* Identical to perror(3p) except it sup +/* Low-level functions. [Do not count on these to be retained between different versions of libzahl.] */ + +void zbset_ll_set(z_t, size_t); /* zbset(a, a, b, 1) */ +void zbset_ll_clear(z_t, size_t); /* zbset(a, a, b, 0) */ +void zbset_ll_flip(z_t, size_t); /* zbset(a, a, b, -1) */ + + + #include "zahl-inlines.h" #endif