commit fbace74784b115d24441d2a67b932a96011d7012
parent b8f7e263d28edb721f18f64d3553947710ff9fce
Author: Mattias Andrée <maandree@kth.se>
Date: Wed, 4 May 2016 21:04:52 +0200
Optimise zlsh
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat:
3 files changed, 49 insertions(+), 15 deletions(-)
diff --git a/src/internals.h b/src/internals.h
@@ -4,6 +4,7 @@
#include <errno.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
/* clang pretends to be GCC... */
#if defined(__GNUC__) && defined(__clang__)
@@ -103,7 +104,6 @@ extern void *libzahl_temp_allocation;
#define zpositive1(a, b) (zpositive(a) + zpositive(b) > 0)
#define zpositive2(a, b) (zsignum(a) + zsignum(b) == 2)
#define zzero2(a, b) (!(zsignum(a) | zsignum(b)))
-#define zmemmove(d, s, n) memmove((d), (s), (n) * sizeof(zahl_char_t))
#define zmemcpy(d, s, n) libzahl_memcpy(d, s, n)
#define zmemset(a, v, n) libzahl_memset(a, v, n)
@@ -245,12 +245,8 @@ static inline void
zswap_tainted_unsigned(z_t a, z_t b)
{
z_t t;
- t->used = b->used;
- b->used = a->used;
- a->used = t->used;
- t->chars = b->chars;
- b->chars = a->chars;
- a->chars = t->chars;
+ SWAP(a, b, t, used);
+ SWAP(b, a, t, chars);
}
static inline void
@@ -398,3 +394,40 @@ zfree_temp(z_t a)
a__[i__] = OP(b__[i__]); \
} \
} while (0)
+
+static inline void
+zmemcpyb(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n_)
+{
+ ssize_t i, n = (ssize_t)n_;
+ switch (n & 3) {
+ case 3:
+ d[n - 1] = s[n - 1];
+ d[n - 2] = s[n - 2];
+ d[n - 3] = s[n - 3];
+ break;
+ case 2:
+ d[n - 1] = s[n - 1];
+ d[n - 2] = s[n - 2];
+ break;
+ case 1:
+ d[n - 1] = s[n - 1];
+ break;
+ default:
+ break;
+ }
+ for (i = n & ~3; (i -= 4) >= 0;) {
+ d[i + 3] = s[i + 3];
+ d[i + 2] = s[i + 2];
+ d[i + 1] = s[i + 1];
+ d[i + 0] = s[i + 0];
+ }
+}
+
+static inline void
+zmemmove(register zahl_char_t *d, register const zahl_char_t *s, size_t n)
+{
+ if (d < s)
+ zmemcpy(d, s, n);
+ else
+ zmemcpyb(d, s, n);
+}
diff --git a/src/zlsh.c b/src/zlsh.c
@@ -18,10 +18,11 @@ zlsh(z_t a, z_t b, size_t bits)
cbits = BITS_PER_CHAR - bits;
ENSURE_SIZE(a, b->used + chars + 1);
- if (likely(a == b))
- zmemmove(a->chars + chars, b->chars, b->used);
- else
+ if (likely(a == b)) {
+ zmemcpyb(a->chars + chars, b->chars, b->used);
+ } else {
zmemcpy(a->chars + chars, b->chars, b->used);
+ }
zmemset(a->chars, 0, chars);
a->used = b->used + chars;
diff --git a/zahl-internals.h b/zahl-internals.h
@@ -110,7 +110,7 @@ struct zahl {
void libzahl_realloc(struct zahl *, size_t);
ZAHL_INLINE void
-libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n)
+libzahl_memcpy(register zahl_char_t *d, register const zahl_char_t *s, size_t n)
{
size_t i;
if (n <= 4) {
@@ -124,10 +124,10 @@ libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *res
d[3] = s[3];
} else {
for (i = 0; (i += 4) <= n;) {
- d[i - 1] = s[i - 1];
- d[i - 2] = s[i - 2];
- d[i - 3] = s[i - 3];
d[i - 4] = s[i - 4];
+ d[i - 3] = s[i - 3];
+ d[i - 2] = s[i - 2];
+ d[i - 1] = s[i - 1];
}
if (i > n) {
i -= 4;
@@ -138,7 +138,7 @@ libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *res
if (i < n)
d[i] = s[i], i++;
if (i < n)
- d[i] = s[i], i++;
+ d[i] = s[i];
}
}
}