libzahl

big integer library
git clone git://git.suckless.org/libzahl
Log | Files | Refs | README | LICENSE

commit 1f8023e18a6dc7b950826810b392fdd46bcb0d45
parent 49ba96599a73ab591d1d9978d13eeb856f256de7
Author: Mattias Andrée <maandree@kth.se>
Date:   Wed,  4 May 2016 14:33:27 +0200

Optimise zand, zor, and zxor

Signed-off-by: Mattias Andrée <maandree@kth.se>

Diffstat:
Msrc/internals.h | 28++++++++++++++++++++++++++++
Msrc/zand.c | 25+++----------------------
Msrc/zor.c | 31++++++-------------------------
Msrc/zxor.c | 31++++++-------------------------
4 files changed, 43 insertions(+), 72 deletions(-)

diff --git a/src/internals.h b/src/internals.h @@ -343,3 +343,31 @@ zfree_temp(z_t a) } /* } */ + +#define ZMEM_OP(a, b, c, n, OP) \ + do { \ + zahl_char_t *a__ = (a); \ + const zahl_char_t *b__ = (b); \ + const zahl_char_t *c__ = (c); \ + size_t i__, n__ = (n); \ + if (n__ <= 4) { \ + if (n__ >= 1) \ + a__[0] = b__[0] OP c__[0]; \ + if (n__ >= 2) \ + a__[1] = b__[1] OP c__[1]; \ + if (n__ >= 3) \ + a__[2] = b__[2] OP c__[2]; \ + if (n__ >= 4) \ + a__[3] = b__[3] OP c__[3]; \ + } else { \ + for (i__ = 0; (i__ += 4) < n__;) { \ + a__[i__ - 1] = b__[i__ - 1] OP c__[i__ - 1]; \ + a__[i__ - 2] = b__[i__ - 2] OP c__[i__ - 2]; \ + a__[i__ - 3] = b__[i__ - 3] OP c__[i__ - 3]; \ + a__[i__ - 4] = b__[i__ - 4] OP c__[i__ - 4]; \ + } \ + if (i__ > n__) \ + for (i__ -= 4; i__ < n__; i__++) \ + a__[i__] = b__[i__] OP c__[i__]; \ + } \ + } while (0) diff --git a/src/zand.c b/src/zand.c @@ -2,25 +2,6 @@ #include "internals.h" -O2 static inline void -zand_impl_3(register zahl_char_t *restrict a, - register const zahl_char_t *restrict b, size_t n) -{ - size_t i; - for (i = 0; i < n; i++) - a[i] &= b[i]; -} - -static inline void -zand_impl_4(register zahl_char_t *restrict a, - register const zahl_char_t *restrict b, - register const zahl_char_t *restrict c, size_t n) -{ - size_t i; - for (i = 0; i < n; i++) - a[i] = b[i] & c[i]; -} - void zand(z_t a, z_t b, z_t c) { @@ -36,12 +17,12 @@ zand(z_t a, z_t b, z_t c) a->used = MIN(b->used, c->used); if (a == b) { - zand_impl_3(a->chars, c->chars, a->used); + ZMEM_OP(a->chars, a->chars, c->chars, a->used, &); } else if (unlikely(a == c)) { - zand_impl_3(a->chars, b->chars, a->used); + ZMEM_OP(a->chars, a->chars, b->chars, a->used, &); } else { ENSURE_SIZE(a, a->used); - zand_impl_4(a->chars, b->chars, c->chars, a->used); + ZMEM_OP(a->chars, b->chars, c->chars, a->used, &); } TRIM_AND_SIGN(a, zpositive1(b, c) * 2 - 1); diff --git a/src/zor.c b/src/zor.c @@ -2,27 +2,6 @@ #include "internals.h" -O2 static inline void -zor_impl_3(register zahl_char_t *restrict a, - register const zahl_char_t *restrict b, size_t n) -{ - size_t i; - for (i = 0; i < n; i++) - a[i] |= b[i]; -} - -static inline void -zor_impl_5(register zahl_char_t *restrict a, - register const zahl_char_t *restrict b, size_t n, - register const zahl_char_t *restrict c, size_t m) -{ - size_t i; - for (i = 0; i < n; i++) - a[i] = b[i] | c[i]; - for (; i < m; i++) - a[i] = c[i]; -} - void zor(z_t a, z_t b, z_t c) { @@ -40,17 +19,19 @@ zor(z_t a, z_t b, z_t c) ENSURE_SIZE(a, m); if (a == b) { - zor_impl_3(a->chars, c->chars, n); + ZMEM_OP(a->chars, a->chars, c->chars, n, |); if (a->used < c->used) zmemcpy_range(a->chars, c->chars, n, m); } else if (unlikely(a == c)) { - zor_impl_3(a->chars, b->chars, n); + ZMEM_OP(a->chars, a->chars, b->chars, n, |); if (a->used < b->used) zmemcpy_range(a->chars, b->chars, n, m); } else if (m == b->used) { - zor_impl_5(a->chars, c->chars, n, b->chars, m); + ZMEM_OP(a->chars, c->chars, b->chars, n, |); + zmemcpy_range(a->chars, b->chars, n, m); } else { - zor_impl_5(a->chars, b->chars, n, c->chars, m); + ZMEM_OP(a->chars, b->chars, c->chars, n, |); + zmemcpy_range(a->chars, c->chars, n, m); } a->used = m; diff --git a/src/zxor.c b/src/zxor.c @@ -2,27 +2,6 @@ #include "internals.h" -O2 static inline void -zxor_impl_3(register zahl_char_t *restrict a, - register const zahl_char_t *restrict b, size_t n) -{ - size_t i; - for (i = 0; i < n; i++) - a[i] ^= b[i]; -} - -static inline void -zxor_impl_5(register zahl_char_t *restrict a, - register const zahl_char_t *restrict b, size_t n, - register const zahl_char_t *restrict c, size_t m) -{ - size_t i; - for (i = 0; i < n; i++) - a[i] = b[i] ^ c[i]; - for (; i < m; i++) - a[i] = c[i]; -} - void zxor(z_t a, z_t b, z_t c) { @@ -47,17 +26,19 @@ zxor(z_t a, z_t b, z_t c) ENSURE_SIZE(a, m); if (a == b) { - zxor_impl_3(a->chars, cc, n); + ZMEM_OP(a->chars, a->chars, cc, n, ^); if (a->used < cn) zmemcpy_range(a->chars, cc, n, m); } else if (unlikely(a == c)) { - zxor_impl_3(a->chars, bc, n); + ZMEM_OP(a->chars, b->chars, cc, n, ^); if (a->used < bn) zmemcpy_range(a->chars, bc, n, m); } else if (m == bn) { - zxor_impl_5(a->chars, cc, n, bc, m); + ZMEM_OP(a->chars, c->chars, b->chars, n, ^); + zmemcpy_range(a->chars, b->chars, n, m); } else { - zxor_impl_5(a->chars, bc, n, cc, m); + ZMEM_OP(a->chars, b->chars, c->chars, n, ^); + zmemcpy_range(a->chars, c->chars, n, m); } a->used = m;