diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2018-07-11 14:44:46 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2018-07-11 15:29:49 +0800 |
commit | c57760ea54c180d2e02422e1ec37e4864bcc8aab (patch) | |
tree | f42286fe89ac9932ac82f47f07d99f0e471d051a /include | |
parent | 602a2df220c2a4544ef00550b50d24fc78b4d1d9 (diff) | |
download | dexon-mcl-c57760ea54c180d2e02422e1ec37e4864bcc8aab.tar.gz dexon-mcl-c57760ea54c180d2e02422e1ec37e4864bcc8aab.tar.zst dexon-mcl-c57760ea54c180d2e02422e1ec37e4864bcc8aab.zip |
add fp2Dbl_mulPre
Diffstat (limited to 'include')
-rw-r--r-- | include/mcl/fp_tower.hpp | 154 | ||||
-rw-r--r-- | include/mcl/op.hpp | 2 |
2 files changed, 76 insertions, 80 deletions
diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index b6a5229..318003e 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -134,6 +134,55 @@ public: } void operator+=(const FpDblT& x) { add(*this, *this, x); } void operator-=(const FpDblT& x) { sub(*this, *this, x); } + /* + Fp2Dbl::mulPre by FpDblT + @note mod of NIST_P192 is fast + */ + static void fp2Dbl_mulPreW(Unit *z, const Unit *x, const Unit *y) + { + const Fp *px = reinterpret_cast<const Fp*>(x); + const Fp *py = reinterpret_cast<const Fp*>(y); + const Fp& a = px[0]; + const Fp& b = px[1]; + const Fp& c = py[0]; + const Fp& d = py[1]; + FpDblT& d0 = reinterpret_cast<FpDblT*>(z)[0]; + FpDblT& d1 = reinterpret_cast<FpDblT*>(z)[1]; + FpDblT d2; + Fp s, t; + Fp::add(s, a, b); + Fp::add(t, c, d); + FpDblT::mulPre(d1, s, t); // (a + b)(c + d) + FpDblT::mulPre(d0, a, c); + FpDblT::mulPre(d2, b, d); + FpDblT::sub(d1, d1, d0); // (a + b)(c + d) - ac + FpDblT::sub(d1, d1, d2); // (a + b)(c + d) - ac - bd + FpDblT::sub(d0, d0, d2); // ac - bd + } + /* + Fp2Dbl::mulPre by FpDblT with No Carry + */ + static void fp2Dbl_mulPreNoCarryW(Unit *z, const Unit *x, const Unit *y) + { + const Fp *px = reinterpret_cast<const Fp*>(x); + const Fp *py = reinterpret_cast<const Fp*>(y); + const Fp& a = px[0]; + const Fp& b = px[1]; + const Fp& c = py[0]; + const Fp& d = py[1]; + FpDblT& d0 = reinterpret_cast<FpDblT*>(z)[0]; + FpDblT& d1 = reinterpret_cast<FpDblT*>(z)[1]; + FpDblT d2; + Fp s, t; + Fp::addPre(s, a, b); + Fp::addPre(t, c, d); + FpDblT::mulPre(d1, s, t); // (a + b)(c + d) + FpDblT::mulPre(d0, a, c); + FpDblT::mulPre(d2, b, d); + FpDblT::subPre(d1, d1, d0); // (a + b)(c + d) - ac + FpDblT::subPre(d1, d1, d2); // (a + b)(c + d) - ac - bd + FpDblT::sub(d0, d0, d2); // ac - bd + } }; template<class Fp> struct Fp12T; @@ -328,16 +377,25 @@ public: mcl::fp::Op& op = Fp::op_; op.fp2_add = fp2_addW; op.fp2_sub = fp2_subW; - if (op.isFastMod) { - op.fp2_mul = fp2_mulW; - } else if (!op.isFullBit) { - if (0 && sizeof(Fp) * 8 == op.N * fp::UnitBitSize && op.fp2_mulNF) { - op.fp2_mul = fp2_mulNFW; + if (op.fp2Dbl_mulPre == 0) { + if (op.isFullBit) { + op.fp2Dbl_mulPre = FpDblT<Fp>::fp2Dbl_mulPreW; } else { - op.fp2_mul = fp2_mulUseDblUseNCW; + op.fp2Dbl_mulPre = FpDblT<Fp>::fp2Dbl_mulPreNoCarryW; + } + } + if (op.fp2_mul == 0) { + if (op.isFastMod) { + op.fp2_mul = fp2_mulW; + } else if (!op.isFullBit) { + if (0 && sizeof(Fp) * 8 == op.N * fp::UnitBitSize && op.fp2_mulNF) { + op.fp2_mul = fp2_mulNFW; + } else { + op.fp2_mul = fp2_mulW; + } + } else { + op.fp2_mul = fp2_mulW; } - } else { - op.fp2_mul = fp2_mulUseDblW; } op.fp2_neg = fp2_negW; op.fp2_inv = fp2_invW; @@ -439,6 +497,7 @@ private: Fp::neg(py[0], px[0]); Fp::neg(py[1], px[1]); } +#if 0 /* x = a + bi, y = c + di, i^2 = -1 z = xy = (a + bi)(c + di) = (ac - bd) + (ad + bc)i @@ -464,58 +523,19 @@ private: Fp::sub(pz[1], t1, ac); pz[1] -= bd; } - /* - # of mod = 2 - @note mod of NIST_P192 is fast - */ - static void fp2_mulUseDblW(Unit *z, const Unit *x, const Unit *y) - { - const Fp *px = reinterpret_cast<const Fp*>(x); - const Fp *py = reinterpret_cast<const Fp*>(y); - const Fp& a = px[0]; - const Fp& b = px[1]; - const Fp& c = py[0]; - const Fp& d = py[1]; - FpDbl d0, d1, d2; - Fp s, t; - Fp::add(s, a, b); - Fp::add(t, c, d); - FpDbl::mulPre(d0, s, t); // (a + b)(c + d) - FpDbl::mulPre(d1, a, c); - FpDbl::mulPre(d2, b, d); - FpDbl::sub(d0, d0, d1); // (a + b)(c + d) - ac - FpDbl::sub(d0, d0, d2); // (a + b)(c + d) - ac - bd - Fp *pz = reinterpret_cast<Fp*>(z); - FpDbl::mod(pz[1], d0); - FpDbl::sub(d1, d1, d2); // ac - bd - FpDbl::mod(pz[0], d1); // set z0 - } +#endif static void fp2_mulNFW(Unit *z, const Unit *x, const Unit *y) { const fp::Op& op = Fp::op_; op.fp2_mulNF(z, x, y, op.p); } - static void fp2_mulUseDblUseNCW(Unit *z, const Unit *x, const Unit *y) + static void fp2_mulW(Unit *z, const Unit *x, const Unit *y) { - const Fp *px = reinterpret_cast<const Fp*>(x); - const Fp *py = reinterpret_cast<const Fp*>(y); - const Fp& a = px[0]; - const Fp& b = px[1]; - const Fp& c = py[0]; - const Fp& d = py[1]; - FpDbl d0, d1, d2; - Fp s, t; - Fp::addPre(s, a, b); - Fp::addPre(t, c, d); - FpDbl::mulPre(d0, s, t); // (a + b)(c + d) - FpDbl::mulPre(d1, a, c); - FpDbl::mulPre(d2, b, d); - FpDbl::subPre(d0, d0, d1); // (a + b)(c + d) - ac - FpDbl::subPre(d0, d0, d2); // (a + b)(c + d) - ac - bd + FpDbl d[2]; + Fp::getOp().fp2Dbl_mulPre(reinterpret_cast<Unit*>(d), x, y); Fp *pz = reinterpret_cast<Fp*>(z); - FpDbl::mod(pz[1], d0); - FpDbl::sub(d1, d1, d2); // ac - bd - FpDbl::mod(pz[0], d1); // set z0 + FpDbl::mod(pz[0], d[0]); + FpDbl::mod(pz[1], d[1]); } /* x = a + bi, i^2 = -1 @@ -665,33 +685,7 @@ struct Fp2DblT { } static void mulPre(Fp2DblT& z, const Fp2& x, const Fp2& y) { - const Fp& a = x.a; - const Fp& b = x.b; - const Fp& c = y.a; - const Fp& d = y.b; - if (Fp::isFullBit()) { - FpDbl BD; - Fp s, t; - Fp::add(s, a, b); // s = a + b - Fp::add(t, c, d); // t = c + d - FpDbl::mulPre(BD, b, d); // BD = bd - FpDbl::mulPre(z.a, a, c); // z.a = ac - FpDbl::mulPre(z.b, s, t); // z.b = st - FpDbl::sub(z.b, z.b, z.a); // z.b = st - ac - FpDbl::sub(z.b, z.b, BD); // z.b = st - ac - bd = ad + bc - FpDbl::sub(z.a, z.a, BD); // ac - bd - } else { - FpDbl BD; - Fp s, t; - Fp::addPre(s, a, b); // s = a + b - Fp::addPre(t, c, d); // t = c + d - FpDbl::mulPre(BD, b, d); // BD = bd - FpDbl::mulPre(z.a, a, c); // z.a = ac - FpDbl::mulPre(z.b, s, t); // z.b = st - FpDbl::subPre(z.b, z.b, z.a); // z.b = st - ac - FpDbl::subPre(z.b, z.b, BD); // z.b = st - ac - bd = ad + bc - FpDbl::sub(z.a, z.a, BD); // ac - bd - } + Fp::getOp().fp2Dbl_mulPre((fp::Unit*)&z, (const fp::Unit*)&x, (const fp::Unit*)&y); } static void mod(Fp2& y, const Fp2DblT& x) { diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 4ee70aa..82874b1 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -208,6 +208,7 @@ struct Op { u3u fp_subPre; // without modulo p u3u fpDbl_addPre; u3u fpDbl_subPre; + void3u fp2Dbl_mulPre; /* for Fp2 = F[u] / (u^2 + 1) x = a + bu @@ -284,6 +285,7 @@ struct Op { fp_subPre = 0; fpDbl_addPre = 0; fpDbl_subPre = 0; + fp2Dbl_mulPre = 0; xi_a = 0; fp2_add = 0; |