diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-10-04 09:34:19 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-10-04 09:34:19 +0800 |
commit | 7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5 (patch) | |
tree | 646666f2fd6c862383d3f18276d2845e4aa96a30 | |
parent | 28e818641bc7665f9da7b21112b568e74ff37362 (diff) | |
download | dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.tar.gz dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.tar.zst dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.zip |
use AddPre
-rw-r--r-- | include/mcl/op.hpp | 2 | ||||
-rw-r--r-- | sample/large.cpp | 22 | ||||
-rw-r--r-- | src/fp.cpp | 88 | ||||
-rw-r--r-- | src/fp_proto.hpp | 19 | ||||
-rw-r--r-- | src/low_gmp.hpp | 29 | ||||
-rw-r--r-- | test/low_test.cpp | 6 |
6 files changed, 114 insertions, 52 deletions
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 2694ee4..f318570 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -40,6 +40,8 @@ typedef void (*void3u)(Unit*, const Unit*, const Unit*); typedef void (*void4u)(Unit*, const Unit*, const Unit*, const Unit*); typedef int (*int2u)(Unit*, const Unit*); +typedef Unit (*u3u)(Unit*, const Unit*, const Unit*); + struct Block { const Unit *p; // pointer to original FpT.v_ size_t n; diff --git a/sample/large.cpp b/sample/large.cpp index b363440..72de4a2 100644 --- a/sample/large.cpp +++ b/sample/large.cpp @@ -23,19 +23,19 @@ void mulPre768(Unit *pz, const Unit *px, const Unit *py) ad + bc = (a + b)(c + d) - ac - bd */ const size_t H = N / 2; - low_mul<H>(pz, px, py); // bd - low_mul<H>(pz + N, px + H, py + H); // ac + low_mul_G<H>(pz, px, py); // bd + low_mul_G<H>(pz + N, px + H, py + H); // ac Unit a_b[H + 1]; Unit c_d[H + 1]; - a_b[H] = low_add<H>(a_b, px, px + H); // a + b - c_d[H] = low_add<H>(c_d, py, py + H); // c + d + a_b[H] = low_addNC_G<H>(a_b, px, px + H); // a + b + c_d[H] = low_addNC_G<H>(c_d, py, py + H); // c + d Unit work[N + H] = {}; - low_mul<H>(work, a_b, c_d); - if (c_d[H]) low_add<H + 1>(work + H, work + H, c_d); - if (a_b[H]) low_add<H + 1>(work + H, work + H, a_b); - work[N] -= low_sub<H>(work, work, pz); - work[N] -= low_sub<H>(work, work, pz + N); - low_add<H + N>(pz + H, pz + H, work); + low_mul_G<H>(work, a_b, c_d); + if (c_d[H]) low_addNC_G<H + 1>(work + H, work + H, c_d); + if (a_b[H]) low_addNC_G<H + 1>(work + H, work + H, a_b); + work[N] -= low_subNC_G<H>(work, work, pz); + work[N] -= low_subNC_G<H>(work, work, pz + N); + low_addNC_G<H + N>(pz + H, pz + H, work); } void testMul() { @@ -44,7 +44,7 @@ void testMul() ux[i] = -i * i + 5; uy[i] = -i * i + 9; } - low_mul<12>(a, ux, uy); + low_mul_G<12>(a, ux, uy); mulPre768(b, ux, uy); for (size_t i = 0; i < N * 2; i++) { if (a[i] != b[i]) { @@ -138,19 +138,19 @@ struct OpeFunc { } static inline void fp_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p) { - if (low_add<N>(z, x, y)) { - low_sub<N>(z, z, p); + if (AddPre<N, GTag>::f(z, x, y)) { + low_subNC_G<N>(z, z, p); return; } Unit tmp[N]; - if (low_sub<N>(tmp, z, p) == 0) { + if (low_subNC_G<N>(tmp, z, p) == 0) { memcpy(z, tmp, sizeof(tmp)); } } static inline void fp_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p) { - if (low_sub<N>(z, x, y)) { - low_add<N>(z, z, p); + if (low_subNC_G<N>(z, x, y)) { + AddPre<N, GTag>::f(z, z, p); } } /* @@ -158,54 +158,54 @@ struct OpeFunc { */ static inline void fpDbl_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p) { - if (low_add<N * 2>(z, x, y)) { - low_sub<N>(z + N, z + N, p); + if (AddPre<N * 2, GTag>::f(z, x, y)) { + low_subNC_G<N>(z + N, z + N, p); return; } Unit tmp[N]; - if (low_sub<N>(tmp, z + N, p) == 0) { + if (low_subNC_G<N>(tmp, z + N, p) == 0) { memcpy(z + N, tmp, sizeof(tmp)); } } static inline void fpDbl_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p) { - if (low_sub<N * 2>(z, x, y)) { - low_add<N>(z + N, z + N, p); + if (low_subNC_G<N * 2>(z, x, y)) { + AddPre<N, GTag>::f(z + N, z + N, p); } } // z[N] <- x[N] + y[N] without carry static inline void fp_addNCC(Unit *z, const Unit *x, const Unit *y) { - low_add<N>(z, x, y); + AddPre<N, GTag>::f(z, x, y); } static inline void fp_subNCC(Unit *z, const Unit *x, const Unit *y) { - low_sub<N>(z, x, y); + low_subNC_G<N>(z, x, y); } // z[N + 1] <- x[N] * y static inline void fp_mul_UnitPreC(Unit *z, const Unit *x, Unit y) { - low_mul_Unit<N>(z, x, y); + low_mul_Unit_G<N>(z, x, y); } // z[N * 2] <- x[N] * y[N] static inline void fpDbl_mulPreC(Unit *z, const Unit *x, const Unit *y) { - low_mul<N>(z, x, y); + low_mul_G<N>(z, x, y); } // y[N * 2] <- x[N]^2 static inline void fpDbl_sqrPreC(Unit *y, const Unit *x) { - low_sqr<N>(y, x); + low_sqr_G<N>(y, x); } // y[N] <- x[N + 1] mod p[N] static inline void fpN1_modC(Unit *y, const Unit *x, const Unit *p) { - low_N1_mod<N>(y, x, p); + low_N1_mod_G<N>(y, x, p); } // y[N] <- x[N * 2] mod p[N] static inline void fpDbl_modC(Unit *y, const Unit *x, const Unit *p) { - low_mod<N>(y, x, p); + low_mod_G<N>(y, x, p); } // z[N] <- mont(x[N], y[N]) static inline void fp_mulMontC(Unit *z, const Unit *x, const Unit *y, const Unit *p) @@ -218,25 +218,25 @@ struct OpeFunc { const Unit rp = p[-1]; Unit buf[N * 2 + 2]; Unit *c = buf; - low_mul_Unit<N>(c, x, y[0]); // x * y[0] + low_mul_Unit_G<N>(c, x, y[0]); // x * y[0] Unit q = c[0] * rp; Unit t[N + 2]; - low_mul_Unit<N>(t, p, q); // p * q + low_mul_Unit_G<N>(t, p, q); // p * q t[N + 1] = 0; // always zero - c[N + 1] = low_add<N + 1>(c, c, t); + c[N + 1] = AddPre<N + 1, GTag>::f(c, c, t); c++; for (size_t i = 1; i < N; i++) { - low_mul_Unit<N>(t, x, y[i]); - c[N + 1] = low_add<N + 1>(c, c, t); + low_mul_Unit_G<N>(t, x, y[i]); + c[N + 1] = AddPre<N + 1, GTag>::f(c, c, t); q = c[0] * rp; - low_mul_Unit<N>(t, p, q); - low_add<N + 2>(c, c, t); + low_mul_Unit_G<N>(t, p, q); + AddPre<N + 2, GTag>::f(c, c, t); c++; } if (c[N]) { - low_sub<N>(z, c, p); + low_subNC_G<N>(z, c, p); } else { - if (low_sub<N>(z, c, p)) { + if (low_subNC_G<N>(z, c, p)) { memcpy(z, c, N * sizeof(Unit)); } } @@ -254,20 +254,20 @@ struct OpeFunc { clearArray(t, N + 1, N * 2); Unit *c = buf; Unit q = xy[0] * rp; - low_mul_Unit<N>(t, p, q); - buf[N * 2] = low_add<N * 2>(buf, xy, t); + low_mul_Unit_G<N>(t, p, q); + buf[N * 2] = AddPre<N * 2, GTag>::f(buf, xy, t); c++; for (size_t i = 1; i < N; i++) { q = c[0] * rp; - low_mul_Unit<N>(t, p, q); + low_mul_Unit_G<N>(t, p, q); // QQQ mpn_add_n((mp_limb_t*)c, (const mp_limb_t*)c, (const mp_limb_t*)t, N * 2 + 1 - i); c++; } if (c[N]) { - low_sub<N>(z, c, p); + low_subNC_G<N>(z, c, p); } else { - if (low_sub<N>(z, c, p)) { + if (low_subNC_G<N>(z, c, p)) { memcpy(z, c, N * sizeof(Unit)); } } @@ -296,6 +296,32 @@ struct OpeFunc { fpDbl_sqrPreC(xx, x); fpDbl_modMontC(y, xx, p); } +#if 0 //#ifdef MCL_USE_LLVM + static inline void fp_mul_UnitL(Unit *z, const Unit *x, Unit y, const Unit *p) + { + Unit xy[N + 1]; + fp_mul_UnitPreC(xy, x, y); + fpN1_modC(z, xy, p); + } + static inline void fp_mulL(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + Unit xy[N * 2]; + fpDbl_mulPreC(xy, x, y); + fpDbl_modC(z, xy, p); + } + static inline void fp_sqrL(Unit *y, const Unit *x, const Unit *p) + { + Unit xx[N * 2]; + fpDbl_sqrPreC(xx, x); + fpDbl_modC(y, xx, p); + } + static inline void fp_sqrMontL(Unit *y, const Unit *x, const Unit *p) + { + Unit xx[N * 2]; + fpDbl_sqrPreC(xx, x); + fpDbl_modMontC(y, xx, p); + } +#endif static inline void fp_invOpC(Unit *y, const Unit *x, const Op& op) { mpz_class my; diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp index e100fde..99763e6 100644 --- a/src/fp_proto.hpp +++ b/src/fp_proto.hpp @@ -8,6 +8,25 @@ */ #include <mcl/op.hpp> +namespace mcl { namespace fp { + +// (carry, z[N]) <- x[N] + y[N] +template<size_t N, class Tag>class AddPre { static const u3u f; }; +// (carry, z[N]) <- x[N] - y[N] +template<size_t N, class Tag>class SubPre { static const u3u f; }; +// z[N * 2] <- x[N] * y[N] +template<size_t N, class Tag>class MulPre { static const void3u f; }; +// z[N * 2] <- x[N] * x[N] +template<size_t N, class Tag>class SqrPre { static const void2u f; }; +// z[N + 1] <- x[N] * y +template<size_t N, class Tag>class Mul_UnitPre { static const void2uI f; }; +// z[N] <- x[N + 1] % p[N] +template<size_t N, class Tag>class N1_Mod { static const void3u f; }; +// z[N] <- x[N * 2] % p[N] +template<size_t N, class Tag>class Dbl_Mod { static const void3u f; }; + +} } // mcl::fp + #ifdef MCL_USE_LLVM extern "C" { diff --git a/src/low_gmp.hpp b/src/low_gmp.hpp index e14697b..01a8a5b 100644 --- a/src/low_gmp.hpp +++ b/src/low_gmp.hpp @@ -1,45 +1,60 @@ #pragma once #include <mcl/op.hpp> +#include "fp_proto.hpp" namespace mcl { namespace fp { +struct GTag; + +template<size_t N> +struct AddPre<N, GTag> { + static inline Unit addPre(Unit *z, const Unit *x, const Unit *y) + { + return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); + } + static const u3u f; +}; + +template<size_t N> +const u3u AddPre<N, GTag>::f = &AddPre<N, GTag>::addPre; + template<size_t N> -Unit low_add(Unit *z, const Unit *x, const Unit *y) +inline Unit low_addNC_G(Unit *z, const Unit *x, const Unit *y) { return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); } template<size_t N> -Unit low_sub(Unit *z, const Unit *x, const Unit *y) +inline Unit low_subNC_G(Unit *z, const Unit *x, const Unit *y) { return mpn_sub_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); } // Z[N * 2] template<size_t N> -void low_mul(Unit *z, const Unit *x, const Unit *y) +inline void low_mul_G(Unit *z, const Unit *x, const Unit *y) { return mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); } template<size_t N> -void low_sqr(Unit *y, const Unit *x) +inline void low_sqr_G(Unit *y, const Unit *x) { return mpn_sqr((mp_limb_t*)y, (const mp_limb_t*)x, N); } // Z[N + 1] template<size_t N> -void low_mul_Unit(Unit *z, const Unit *x, Unit y) +inline void low_mul_Unit_G(Unit *z, const Unit *x, Unit y) { z[N] = mpn_mul_1((mp_limb_t*)z, (const mp_limb_t*)x, N, y); } // y[N] <- X[N + 1] mod p[N] template<size_t N> -void low_N1_mod(Unit *y, const Unit *x, const Unit *p) +inline void low_N1_mod_G(Unit *y, const Unit *x, const Unit *p) { mp_limb_t q[2]; // not used mpn_tdiv_qr(q, (mp_limb_t*)y, 0, (const mp_limb_t*)x, N + 1, (const mp_limb_t*)p, N); } // y[N] <- X[N * 2] mod p[N] template<size_t N> -void low_mod(Unit *y, const Unit *x, const Unit *p) +inline void low_mod_G(Unit *y, const Unit *x, const Unit *p) { mp_limb_t q[N + 1]; // not used mpn_tdiv_qr(q, (mp_limb_t*)y, 0, (const mp_limb_t*)x, N * 2, (const mp_limb_t*)p, N); diff --git a/test/low_test.cpp b/test/low_test.cpp index 1b5de8d..e1ae420 100644 --- a/test/low_test.cpp +++ b/test/low_test.cpp @@ -45,11 +45,11 @@ void bench() Unit w[N]; rg.read(x, N); rg.read(y, N); - low_add<N>(z, x, y); + low_addNC_G<N>(z, x, y); addNC<bit>(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); - low_sub<N>(z, x, y); + low_subNC_G<N>(z, x, y); subNC<bit>(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); } @@ -82,7 +82,7 @@ CYBOZU_TEST_AUTO(addNC) Unit w[N]; rg.read(x, N); rg.read(y, N); - low_add<N>(z, x, y); + low_addNC_G<N>(z, x, y); addNC<bit>(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); add_test(w, x, y); |