aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-11-21 14:19:08 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-11-21 14:19:08 +0800
commit4bc7bb18bb2b4476ba2982b82ae640611ac34b8b (patch)
tree62b5fc53f91fada5feeb5873fa95b57fb4645c09
parent0024ca3aadd071bbd5e91a717e6a2127c22fd578 (diff)
downloadtangerine-mcl-4bc7bb18bb2b4476ba2982b82ae640611ac34b8b.tar.gz
tangerine-mcl-4bc7bb18bb2b4476ba2982b82ae640611ac34b8b.tar.zst
tangerine-mcl-4bc7bb18bb2b4476ba2982b82ae640611ac34b8b.zip
shortcut of mulUnit
-rw-r--r--include/mcl/fp.hpp22
-rw-r--r--sample/rawbench.cpp33
-rw-r--r--src/fp.cpp2
-rw-r--r--src/low_func.hpp35
4 files changed, 60 insertions, 32 deletions
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 57e5cfa..068774f 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -450,28 +450,6 @@ public:
init(mstr, mode);
}
static inline size_t getModBitLen() { return getBitSize(); }
-#if 0
-private:
- static inline void fp_mulUnitW(Unit *z, const Unit *x, Unit y, const Unit *p)
- {
- Unit xy[maxSize + 1];
- op_.fp_mulUnitPre(xy, x, y);
- // z[N] <- xy[N + 1] % p[N]
- op_.fpN1_mod(z, xy, p);
- }
- static inline void fp_mulW(Unit *z, const Unit *x, const Unit *y, const Unit *p)
- {
- Unit xy[maxSize * 2];
- op_.fpDbl_mulPre(xy, x, y);
- op_.fpDbl_mod(z, xy, p);
- }
- static inline void fp_sqrW(Unit *y, const Unit *x, const Unit *p)
- {
- Unit xx[maxSize * 2];
- op_.fpDbl_sqrPre(xx, x);
- op_.fpDbl_mod(y, xx, p);
- }
-#endif
};
template<class tag, size_t maxBitSize> fp::Op FpT<tag, maxBitSize>::op_;
diff --git a/sample/rawbench.cpp b/sample/rawbench.cpp
index 2f071a5..608a57e 100644
--- a/sample/rawbench.cpp
+++ b/sample/rawbench.cpp
@@ -11,11 +11,22 @@ typedef mcl::FpDblT<Fp> FpDbl;
typedef mcl::Fp6T<Fp> Fp6;
typedef mcl::Fp12T<Fp> Fp12;
+typedef mcl::fp::Unit Unit;
+
+void mul9(const mcl::fp::Op& op, Unit *y, const Unit *x, const Unit *p)
+{
+ const size_t maxN = sizeof(Fp) / sizeof(Unit);
+ Unit tmp[maxN];
+ op.fp_add(tmp, x, x, p); // 2x
+ op.fp_add(tmp, tmp, tmp, p); // 4x
+ op.fp_add(tmp, tmp, tmp, p); // 8x
+ op.fp_add(y, tmp, x, p); // 9x
+}
+
void benchRaw(const char *p, mcl::fp::Mode mode)
{
Fp::init(p, mode);
Fp2::init(1);
- typedef mcl::fp::Unit Unit;
const size_t maxN = sizeof(Fp) / sizeof(Unit);
const mcl::fp::Op& op = Fp::getOp();
cybozu::XorShift rg;
@@ -32,7 +43,9 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
double fp_addT, fp_subT;
double fp_addPreT, fp_subPreT;
double fp_sqrT, fp_mulT;
- double fp_mulUnitT, fp_mulUnitPreT;
+ double fp_mulUnitT;
+ double mul9T;
+ double fp_mulUnitPreT;
double fpN1_modT;
double fpDbl_addT, fpDbl_subT;
double fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT;
@@ -43,8 +56,9 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
CYBOZU_BENCH_T(fp_subPreT, op.fp_subPre, uz, uy, ux);
CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux, op.p);
CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy, op.p);
- CYBOZU_BENCH_T(fp_mulUnitT, op.fp_mulUnit, uz, ux, 12345678, op.p);
- CYBOZU_BENCH_T(fp_mulUnitPreT, op.fp_mulUnitPre, ux, ux, 12345678);
+ CYBOZU_BENCH_T(fp_mulUnitT, op.fp_mulUnit, uz, ux, 9, op.p);
+ CYBOZU_BENCH_T(mul9T, mul9, op, uz, ux, op.p);
+ CYBOZU_BENCH_T(fp_mulUnitPreT, op.fp_mulUnitPre, ux, ux, 9);
CYBOZU_BENCH_T(fpN1_modT, op.fpN1_mod, ux, uy, op.p);
CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy, op.p);
CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux, op.p);
@@ -62,7 +76,9 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
"fp_add", "fp_sub",
"addPre", "subPre",
"fp_sqr", "fp_mul",
- "mulUnit", "mulUnitP",
+ "mulUnit",
+ "mul9",
+ "mulUnitP",
"fpN1_mod",
"D_add", "D_sub",
"D_sqrPre", "D_mulPre", "D_mod",
@@ -76,7 +92,9 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
fp_addT, fp_subT,
fp_addPreT, fp_subPreT,
fp_sqrT, fp_mulT,
- fp_mulUnitT, fp_mulUnitPreT,
+ fp_mulUnitT,
+ mul9T,
+ fp_mulUnitPreT,
fpN1_modT,
fpDbl_addT, fpDbl_subT,
fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT,
@@ -116,7 +134,8 @@ int main(int argc, char *argv[])
// N = 4
"0x0000000000000001000000000000000000000000000000000000000000000085", // min prime
- "0x2523648240000001ba344d80000000086121000000000013a700000000000013",
+ "0x2523648240000001ba344d80000000086121000000000013a700000000000013", // BN254
+ "0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47", // Snark
"0x7523648240000001ba344d80000000086121000000000013a700000000000017",
"0x800000000000000000000000000000000000000000000000000000000000005f",
"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff43", // max prime
diff --git a/src/fp.cpp b/src/fp.cpp
index 3c62fd5..9fb8649 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -177,7 +177,7 @@ void setOpSub(Op& op)
op.fp_sqr = Sqr<N, Tag>::f;
op.fpDbl_mod = Dbl_Mod<N, Tag>::f;
}
- op.fp_mulUnit = Mul_Unit<N, Tag>::f;
+ op.fp_mulUnit = MulUnit<N, Tag>::f;
op.fpDbl_mulPre = MulPre<N, Tag>::f;
op.fpDbl_sqrPre = SqrPre<N, Tag>::f;
op.fp_mulUnitPre = MulUnitPre<N, Tag>::f;
diff --git a/src/low_func.hpp b/src/low_func.hpp
index 559f25f..2785f67 100644
--- a/src/low_func.hpp
+++ b/src/low_func.hpp
@@ -8,6 +8,7 @@
*/
#include <mcl/op.hpp>
#include <mcl/util.hpp>
+#include <cybozu/bit_operation.hpp>
#ifdef _MSC_VER
#pragma warning(push)
@@ -291,18 +292,48 @@ const void3u N1_Mod<N, Tag>::f = N1_Mod<N, Tag>::func;
// z[N] <- (x[N] * y) % p[N]
template<size_t N, class Tag = Gtag>
-struct Mul_Unit {
+struct MulUnit {
static inline void func(Unit *z, const Unit *x, Unit y, const Unit *p)
{
Unit xy[N + 1];
MulUnitPre<N, Tag>::f(xy, x, y);
+#if 1
+ Unit len = UnitBitSize - 1 - cybozu::bsr(p[N - 1]);
+ Unit v = xy[N];
+ if (N > 1 && len < 3 && v < 0xff) {
+ for (;;) {
+ if (len == 0) {
+ v = xy[N];
+ } else {
+ v = (xy[N] << len) | (xy[N - 1] >> (UnitBitSize - len));
+ }
+ if (v == 0) break;
+ if (v == 1) {
+ xy[N] -= SubPre<N, Tag>::f(xy, xy, p);
+ } else {
+ Unit t[N + 1];
+ MulUnitPre<N, Tag>::f(t, p, v);
+ SubPre<N + 1, Tag>::f(xy, xy, t);
+ }
+ }
+ for (;;) {
+ if (SubPre<N, Tag>::f(z, xy, p)) {
+ copyC<N>(z, xy);
+ return;
+ }
+ if (SubPre<N, Tag>::f(xy, z, p)) {
+ return;
+ }
+ }
+ }
+#endif
N1_Mod<N, Tag>::f(z, xy, p);
}
static const void2uIu f;
};
template<size_t N, class Tag>
-const void2uIu Mul_Unit<N, Tag>::f = Mul_Unit<N, Tag>::func;
+const void2uIu MulUnit<N, Tag>::f = MulUnit<N, Tag>::func;
// z[N] <- x[N * 2] % p[N]
template<size_t N, class Tag = Gtag>