commit b0e210a02b8576828ac64e8b97bd565fd28c8748
parent a02ba0b0602fb1d6caa5ce332a0c3483c3e4c4aa
Author: Mattias Andrée <maandree@kth.se>
Date: Fri, 6 May 2016 13:48:43 +0200
Make zmul and zsqr (calls low-level functions) inline
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat:
7 files changed, 49 insertions(+), 48 deletions(-)
diff --git a/src/internals.h b/src/internals.h
@@ -107,9 +107,6 @@ extern void *libzahl_temp_allocation;
#define zmemcpy(d, s, n) libzahl_memcpy(d, s, n)
#define zmemset(a, v, n) libzahl_memset(a, v, n)
-void zmul_impl(z_t a, z_t b, z_t c);
-void zsqr_impl(z_t a, z_t b);
-
static inline int
zzero1(z_t a, z_t b)
{
diff --git a/src/zmul.c b/src/zmul.c
@@ -3,7 +3,7 @@
static inline void
-zmul_impl_single_char(z_t a, z_t b, z_t c)
+zmul_ll_single_char(z_t a, z_t b, z_t c)
{
ENSURE_SIZE(a, 1);
a->used = 1;
@@ -12,7 +12,7 @@ zmul_impl_single_char(z_t a, z_t b, z_t c)
}
void
-zmul_impl(z_t a, z_t b, z_t c)
+zmul_ll(z_t a, z_t b, z_t c)
{
/*
* Karatsuba algorithm
@@ -41,7 +41,7 @@ zmul_impl(z_t a, z_t b, z_t c)
m2 = b == c ? m : zbits(c);
if (m + m2 <= BITS_PER_CHAR) {
- zmul_impl_single_char(a, b, c);
+ zmul_ll_single_char(a, b, c);
return;
}
@@ -57,11 +57,11 @@ zmul_impl(z_t a, z_t b, z_t c)
zsplit_pz(c_high, c_low, c, m2);
- zmul_impl(z0, b_low, c_low);
+ zmul_ll(z0, b_low, c_low);
zadd_unsigned_assign(b_low, b_high);
zadd_unsigned_assign(c_low, c_high);
- zmul_impl(z1, b_low, c_low);
- zmul_impl(z2, b_high, c_high);
+ zmul_ll(z1, b_low, c_low);
+ zmul_ll(z2, b_high, c_high);
zsub_nonnegative_assign(z1, z0);
zsub_nonnegative_assign(z1, z2);
@@ -78,15 +78,3 @@ zmul_impl(z_t a, z_t b, z_t c)
zfree_temp(b_low);
zfree_temp(b_high);
}
-
-void
-zmul(z_t a, z_t b, z_t c)
-{
- int b_sign, c_sign;
- b_sign = b->sign, b->sign *= b_sign;
- c_sign = c->sign, c->sign *= c_sign;
- zmul_impl(a, b, c);
- c->sign *= c_sign;
- b->sign *= b_sign;
- SET_SIGNUM(a, zsignum(b) * zsignum(c));
-}
diff --git a/src/zpow.c b/src/zpow.c
@@ -46,15 +46,15 @@ zpow(z_t a, z_t b, z_t c)
x = tc->chars[i];
for (j = BITS_PER_CHAR; j--; x >>= 1) {
if (x & 1)
- zmul_impl(a, a, tb);
- zsqr_impl(tb, tb);
+ zmul_ll(a, a, tb);
+ zsqr_ll(tb, tb);
}
}
x = tc->chars[i];
for (; x; x >>= 1) {
if (x & 1)
- zmul_impl(a, a, tb);
- zsqr_impl(tb, tb);
+ zmul_ll(a, a, tb);
+ zsqr_ll(tb, tb);
}
if (neg)
diff --git a/src/zpowu.c b/src/zpowu.c
@@ -24,11 +24,11 @@ zpowu(z_t a, z_t b, unsigned long long int c)
zsetu(a, 1);
if (c & 1)
- zmul_impl(a, a, tb);
+ zmul_ll(a, a, tb);
while (c >>= 1) {
- zsqr_impl(tb, tb);
+ zsqr_ll(tb, tb);
if (c & 1)
- zmul_impl(a, a, tb);
+ zmul_ll(a, a, tb);
}
if (neg)
diff --git a/src/zsqr.c b/src/zsqr.c
@@ -3,7 +3,7 @@
static inline void
-zsqr_impl_single_char(z_t a, z_t b)
+zsqr_ll_single_char(z_t a, z_t b)
{
ENSURE_SIZE(a, 1);
a->used = 1;
@@ -12,7 +12,7 @@ zsqr_impl_single_char(z_t a, z_t b)
}
void
-zsqr_impl(z_t a, z_t b)
+zsqr_ll(z_t a, z_t b)
{
/*
* Karatsuba algorithm, optimised for equal factors.
@@ -26,7 +26,7 @@ zsqr_impl(z_t a, z_t b)
bits = zbits(b);
if (bits <= BITS_PER_CHAR / 2) {
- zsqr_impl_single_char(a, b);
+ zsqr_ll_single_char(a, b);
return;
}
@@ -47,18 +47,18 @@ zsqr_impl(z_t a, z_t b)
if (unlikely(zzero(low))) {
- zsqr_impl(z2, high);
+ zsqr_ll(z2, high);
zlsh(a, z2, bits << 1);
} else {
zinit_temp(z0);
zinit_temp(z1);
- zsqr_impl(z0, low);
+ zsqr_ll(z0, low);
- zmul_impl(z1, low, high);
+ zmul_ll(z1, low, high);
zlsh(z1, z1, bits + 1);
- zsqr_impl(z2, high);
+ zsqr_ll(z2, high);
zlsh(a, z2, bits << 1);
zadd_unsigned_assign(a, z1);
@@ -68,14 +68,3 @@ zsqr_impl(z_t a, z_t b)
zfree_temp(z0);
}
}
-
-void
-zsqr(z_t a, z_t b)
-{
- if (unlikely(zzero(b))) {
- SET_SIGNUM(a, 0);
- } else {
- zsqr_impl(a, b);
- SET_SIGNUM(a, 1);
- }
-}
diff --git a/zahl.h b/zahl.h
@@ -91,12 +91,12 @@ ZAHL_INLINE void zabs(z_t, z_t); /* a := |b| */
ZAHL_INLINE void zneg(z_t, z_t); /* a := -b */
void zadd(z_t, z_t, z_t); /* a := b + c */
void zsub(z_t, z_t, z_t); /* a := b - c */
-void zmul(z_t, z_t, z_t); /* a := b * c */
+ZAHL_INLINE void zmul(z_t, z_t, z_t); /* a := b * c */
void zmodmul(z_t, z_t, z_t, z_t); /* a := (b * c) % d */
void zdiv(z_t, z_t, z_t); /* a := b / c */
void zdivmod(z_t, z_t, z_t, z_t); /* a := c / d, b = c % d */
void zmod(z_t, z_t, z_t); /* a := b % c */
-void zsqr(z_t, z_t); /* a := b² */
+ZAHL_INLINE void zsqr(z_t, z_t); /* a := b² */
void zmodsqr(z_t, z_t, z_t); /* a := b² % c */
void zpow(z_t, z_t, z_t); /* a := b ↑ c */
void zmodpow(z_t, z_t, z_t, z_t); /* a := (b ↑ c) % d */
@@ -172,6 +172,8 @@ void zperror(const char *); /* Identical to perror(3p) except it sup
void zbset_ll_set(z_t, size_t); /* zbset(a, a, b, 1) */
void zbset_ll_clear(z_t, size_t); /* zbset(a, a, b, 0) */
void zbset_ll_flip(z_t, size_t); /* zbset(a, a, b, -1) */
+void zmul_ll(z_t, z_t, z_t); /* zmul for non-negative operands */
+void zsqr_ll(z_t, z_t); /* zsqr for non-negative operand */
diff --git a/zahl/inlines.h b/zahl/inlines.h
@@ -266,3 +266,28 @@ zsave(z_t a, void *buffer)
}
return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * sizeof(zahl_char_t));
}
+
+
+ZAHL_INLINE void
+zmul(z_t a, z_t b, z_t c)
+{
+ int b_sign, c_sign;
+ b_sign = b->sign, b->sign *= b_sign;
+ c_sign = c->sign, c->sign *= c_sign;
+ zmul_ll(a, b, c);
+ c->sign = c_sign;
+ b->sign = b_sign;
+ ZAHL_SET_SIGNUM(a, zsignum(b) * zsignum(c));
+}
+
+
+ZAHL_INLINE void
+zsqr(z_t a, z_t b)
+{
+ if (ZAHL_UNLIKELY(zzero(b))) {
+ ZAHL_SET_SIGNUM(a, 0);
+ } else {
+ zsqr_ll(a, b);
+ ZAHL_SET_SIGNUM(a, 1);
+ }
+}