libzahl

big integer library
git clone git://git.suckless.org/libzahl
Log | Files | Refs | README | LICENSE

commit 40b860777616071997ec035783eeea402ffb1ae2
parent d0565fe373f559312be54b6bc8d74aa7fd34fe2c
Author: Mattias Andrée <maandree@kth.se>
Date:   Tue,  3 May 2016 14:03:33 +0200

Optimise libzahl_memcpy and libzahl_memset

Signed-off-by: Mattias Andrée <maandree@kth.se>

Diffstat:
MSTATUS | 2+-
MTODO | 3++-
Mzahl-internals.h | 56++++++++++++++++++++++++++++++++++++++++++++++++++------
3 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/STATUS b/STATUS @@ -6,7 +6,7 @@ left column. Double-parenthesis means there may be a better way to do it. Inside square-brackets, there are some comments on multi-bit comparisons. -zset .................... fastest [until ~750, then gmp, also tomsfastmath after ~2750] +zset .................... fastest [always with gcc, unless ~250 with clang] zseti ................... tomsfastmath is faster [always] zsetu ................... tomsfastmath is faster [always] zneg(a, b) .............. fastest [until ~300, then gmp] diff --git a/TODO b/TODO @@ -5,9 +5,10 @@ Add zsets_radix Add zstr_radix Test big endian -Test always having used > 0 for zero +Test always having .used > 0 for zero Test negative/non-negative instead of sign Test long .sign +Test always having .chars % 4 == 0 Test optimisation of zmul: bc = [(Hb * Hc) << (m2 << 1)] diff --git a/zahl-internals.h b/zahl-internals.h @@ -109,18 +109,62 @@ struct zahl { void libzahl_realloc(struct zahl *, size_t); -ZAHL_O2 ZAHL_INLINE void +ZAHL_INLINE void libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n) { size_t i; - for (i = 0; i < n; i++) - d[i] = s[i]; + if (n <= 4) { + if (n >= 1) + d[0] = s[0]; + if (n >= 2) + d[1] = s[1]; + if (n >= 3) + d[2] = s[2]; + if (n >= 4) + d[3] = s[3]; + } else { + for (i = 0; (i += 4) <= n;) { + d[i - 1] = s[i - 1]; + d[i - 2] = s[i - 2]; + d[i - 3] = s[i - 3]; + d[i - 4] = s[i - 4]; + } + if (i > n) { + i -= 4; + if (i < n) + d[i] = s[i], i++; + if (i < n) + d[i] = s[i], i++; + if (i < n) + d[i] = s[i], i++; + if (i < n) + d[i] = s[i], i++; + } + } } -ZAHL_O2 ZAHL_INLINE void +ZAHL_INLINE void libzahl_memset(register zahl_char_t *a, register zahl_char_t v, size_t n) { size_t i; - for (i = 0; i < n; i++) - a[i] = v; + if (n <= 4) { + if (n >= 1) + a[0] = v; + if (n >= 2) + a[1] = v; + if (n >= 3) + a[2] = v; + if (n >= 4) + a[3] = v; + } else { + for (i = 0; (i += 4) <= n;) { + a[i - 1] = v; + a[i - 2] = v; + a[i - 3] = v; + a[i - 4] = v; + } + if (i > n) + for (i -= 4; i < n; i++) + a[i] = v; + } }