diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-11-21 14:19:08 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-11-21 14:19:08 +0800 |
commit | 4bc7bb18bb2b4476ba2982b82ae640611ac34b8b (patch) | |
tree | 62b5fc53f91fada5feeb5873fa95b57fb4645c09 | |
parent | 0024ca3aadd071bbd5e91a717e6a2127c22fd578 (diff) | |
download | tangerine-mcl-4bc7bb18bb2b4476ba2982b82ae640611ac34b8b.tar.gz tangerine-mcl-4bc7bb18bb2b4476ba2982b82ae640611ac34b8b.tar.zst tangerine-mcl-4bc7bb18bb2b4476ba2982b82ae640611ac34b8b.zip |
shortcut of mulUnit
-rw-r--r-- | include/mcl/fp.hpp | 22 | ||||
-rw-r--r-- | sample/rawbench.cpp | 33 | ||||
-rw-r--r-- | src/fp.cpp | 2 | ||||
-rw-r--r-- | src/low_func.hpp | 35 |
4 files changed, 60 insertions, 32 deletions
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index 57e5cfa..068774f 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -450,28 +450,6 @@ public: init(mstr, mode); } static inline size_t getModBitLen() { return getBitSize(); } -#if 0 -private: - static inline void fp_mulUnitW(Unit *z, const Unit *x, Unit y, const Unit *p) - { - Unit xy[maxSize + 1]; - op_.fp_mulUnitPre(xy, x, y); - // z[N] <- xy[N + 1] % p[N] - op_.fpN1_mod(z, xy, p); - } - static inline void fp_mulW(Unit *z, const Unit *x, const Unit *y, const Unit *p) - { - Unit xy[maxSize * 2]; - op_.fpDbl_mulPre(xy, x, y); - op_.fpDbl_mod(z, xy, p); - } - static inline void fp_sqrW(Unit *y, const Unit *x, const Unit *p) - { - Unit xx[maxSize * 2]; - op_.fpDbl_sqrPre(xx, x); - op_.fpDbl_mod(y, xx, p); - } -#endif }; template<class tag, size_t maxBitSize> fp::Op FpT<tag, maxBitSize>::op_; diff --git a/sample/rawbench.cpp b/sample/rawbench.cpp index 2f071a5..608a57e 100644 --- a/sample/rawbench.cpp +++ b/sample/rawbench.cpp @@ -11,11 +11,22 @@ typedef mcl::FpDblT<Fp> FpDbl; typedef mcl::Fp6T<Fp> Fp6; typedef mcl::Fp12T<Fp> Fp12; +typedef mcl::fp::Unit Unit; + +void mul9(const mcl::fp::Op& op, Unit *y, const Unit *x, const Unit *p) +{ + const size_t maxN = sizeof(Fp) / sizeof(Unit); + Unit tmp[maxN]; + op.fp_add(tmp, x, x, p); // 2x + op.fp_add(tmp, tmp, tmp, p); // 4x + op.fp_add(tmp, tmp, tmp, p); // 8x + op.fp_add(y, tmp, x, p); // 9x +} + void benchRaw(const char *p, mcl::fp::Mode mode) { Fp::init(p, mode); Fp2::init(1); - typedef mcl::fp::Unit Unit; const size_t maxN = sizeof(Fp) / sizeof(Unit); const mcl::fp::Op& op = Fp::getOp(); cybozu::XorShift rg; @@ -32,7 +43,9 @@ void benchRaw(const char *p, mcl::fp::Mode mode) double fp_addT, fp_subT; double fp_addPreT, fp_subPreT; double fp_sqrT, fp_mulT; - double fp_mulUnitT, fp_mulUnitPreT; + double fp_mulUnitT; + double mul9T; + double fp_mulUnitPreT; double fpN1_modT; double fpDbl_addT, fpDbl_subT; double fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT; @@ -43,8 +56,9 @@ void benchRaw(const char *p, mcl::fp::Mode mode) CYBOZU_BENCH_T(fp_subPreT, op.fp_subPre, uz, uy, ux); CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux, op.p); CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy, op.p); - CYBOZU_BENCH_T(fp_mulUnitT, op.fp_mulUnit, uz, ux, 12345678, op.p); - CYBOZU_BENCH_T(fp_mulUnitPreT, op.fp_mulUnitPre, ux, ux, 12345678); + CYBOZU_BENCH_T(fp_mulUnitT, op.fp_mulUnit, uz, ux, 9, op.p); + CYBOZU_BENCH_T(mul9T, mul9, op, uz, ux, op.p); + CYBOZU_BENCH_T(fp_mulUnitPreT, op.fp_mulUnitPre, ux, ux, 9); CYBOZU_BENCH_T(fpN1_modT, op.fpN1_mod, ux, uy, op.p); CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy, op.p); CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux, op.p); @@ -62,7 +76,9 @@ void benchRaw(const char *p, mcl::fp::Mode mode) "fp_add", "fp_sub", "addPre", "subPre", "fp_sqr", "fp_mul", - "mulUnit", "mulUnitP", + "mulUnit", + "mul9", + "mulUnitP", "fpN1_mod", "D_add", "D_sub", "D_sqrPre", "D_mulPre", "D_mod", @@ -76,7 +92,9 @@ void benchRaw(const char *p, mcl::fp::Mode mode) fp_addT, fp_subT, fp_addPreT, fp_subPreT, fp_sqrT, fp_mulT, - fp_mulUnitT, fp_mulUnitPreT, + fp_mulUnitT, + mul9T, + fp_mulUnitPreT, fpN1_modT, fpDbl_addT, fpDbl_subT, fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT, @@ -116,7 +134,8 @@ int main(int argc, char *argv[]) // N = 4 "0x0000000000000001000000000000000000000000000000000000000000000085", // min prime - "0x2523648240000001ba344d80000000086121000000000013a700000000000013", + "0x2523648240000001ba344d80000000086121000000000013a700000000000013", // BN254 + "0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47", // Snark "0x7523648240000001ba344d80000000086121000000000013a700000000000017", "0x800000000000000000000000000000000000000000000000000000000000005f", "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff43", // max prime @@ -177,7 +177,7 @@ void setOpSub(Op& op) op.fp_sqr = Sqr<N, Tag>::f; op.fpDbl_mod = Dbl_Mod<N, Tag>::f; } - op.fp_mulUnit = Mul_Unit<N, Tag>::f; + op.fp_mulUnit = MulUnit<N, Tag>::f; op.fpDbl_mulPre = MulPre<N, Tag>::f; op.fpDbl_sqrPre = SqrPre<N, Tag>::f; op.fp_mulUnitPre = MulUnitPre<N, Tag>::f; diff --git a/src/low_func.hpp b/src/low_func.hpp index 559f25f..2785f67 100644 --- a/src/low_func.hpp +++ b/src/low_func.hpp @@ -8,6 +8,7 @@ */ #include <mcl/op.hpp> #include <mcl/util.hpp> +#include <cybozu/bit_operation.hpp> #ifdef _MSC_VER #pragma warning(push) @@ -291,18 +292,48 @@ const void3u N1_Mod<N, Tag>::f = N1_Mod<N, Tag>::func; // z[N] <- (x[N] * y) % p[N] template<size_t N, class Tag = Gtag> -struct Mul_Unit { +struct MulUnit { static inline void func(Unit *z, const Unit *x, Unit y, const Unit *p) { Unit xy[N + 1]; MulUnitPre<N, Tag>::f(xy, x, y); +#if 1 + Unit len = UnitBitSize - 1 - cybozu::bsr(p[N - 1]); + Unit v = xy[N]; + if (N > 1 && len < 3 && v < 0xff) { + for (;;) { + if (len == 0) { + v = xy[N]; + } else { + v = (xy[N] << len) | (xy[N - 1] >> (UnitBitSize - len)); + } + if (v == 0) break; + if (v == 1) { + xy[N] -= SubPre<N, Tag>::f(xy, xy, p); + } else { + Unit t[N + 1]; + MulUnitPre<N, Tag>::f(t, p, v); + SubPre<N + 1, Tag>::f(xy, xy, t); + } + } + for (;;) { + if (SubPre<N, Tag>::f(z, xy, p)) { + copyC<N>(z, xy); + return; + } + if (SubPre<N, Tag>::f(xy, z, p)) { + return; + } + } + } +#endif N1_Mod<N, Tag>::f(z, xy, p); } static const void2uIu f; }; template<size_t N, class Tag> -const void2uIu Mul_Unit<N, Tag>::f = Mul_Unit<N, Tag>::func; +const void2uIu MulUnit<N, Tag>::f = MulUnit<N, Tag>::func; // z[N] <- x[N * 2] % p[N] template<size_t N, class Tag = Gtag> |