diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-10-14 16:08:04 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-10-14 16:08:04 +0800 |
commit | d142ba7253c5f353e6a6ee35dcc5dc59270dc2dd (patch) | |
tree | 95cad9599e1d513bc3964faeb609398a685ffd15 | |
parent | 2e5146418cb7daddb4f5a9d649ba65f838c3679b (diff) | |
download | dexon-mcl-d142ba7253c5f353e6a6ee35dcc5dc59270dc2dd.tar.gz dexon-mcl-d142ba7253c5f353e6a6ee35dcc5dc59270dc2dd.tar.zst dexon-mcl-d142ba7253c5f353e6a6ee35dcc5dc59270dc2dd.zip |
rename addNC, subNC to addPre, subPre
-rw-r--r-- | include/mcl/fp.hpp | 4 | ||||
-rw-r--r-- | include/mcl/fp_tower.hpp | 32 | ||||
-rw-r--r-- | include/mcl/op.hpp | 16 | ||||
-rw-r--r-- | sample/large.cpp | 20 | ||||
-rw-r--r-- | sample/rawbench.cpp | 16 | ||||
-rw-r--r-- | src/asm/low_arm.s | 36 | ||||
-rw-r--r-- | src/asm/low_x86-64.asm | 98 | ||||
-rw-r--r-- | src/fp.cpp | 14 | ||||
-rw-r--r-- | src/fp_generator.hpp | 26 | ||||
-rw-r--r-- | src/fp_proto.hpp | 50 | ||||
-rw-r--r-- | src/gen.cpp | 44 | ||||
-rw-r--r-- | test/fp_test.cpp | 8 | ||||
-rw-r--r-- | test/fp_tower_test.cpp | 4 | ||||
-rw-r--r-- | test/low_test.cpp | 64 |
14 files changed, 205 insertions, 227 deletions
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index 43749d9..ad916ee 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -330,8 +330,8 @@ public: } static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); } static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); } - static inline void addNC(FpT& z, const FpT& x, const FpT& y) { op_.fp_addNC(z.v_, x.v_, y.v_); } - static inline void subNC(FpT& z, const FpT& x, const FpT& y) { op_.fp_subNC(z.v_, x.v_, y.v_); } + static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); } + static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); } static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); } static inline void mul_Unit(FpT& z, const FpT& x, const Unit y) { op_.fp_mul_Unit(z.v_, x.v_, y, op_.p); } static inline void inv(FpT& y, const FpT& x) { op_.fp_invOp(y.v_, x.v_, op_); } diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index eea7b1e..0e7b53b 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -42,8 +42,8 @@ public: } static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); } static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } - static void addNC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addNC(z.v_, x.v_, y.v_); } - static void subNC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subNC(z.v_, x.v_, y.v_); } + static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } + static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } /* mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy) */ @@ -267,13 +267,13 @@ private: const Fp& d = py[1]; FpDbl d0, d1, d2; Fp s, t; - Fp::addNC(s, a, b); - Fp::addNC(t, c, d); + Fp::addPre(s, a, b); + Fp::addPre(t, c, d); FpDbl::mulPre(d0, s, t); // (a + b)(c + d) FpDbl::mulPre(d1, a, c); FpDbl::mulPre(d2, b, d); - FpDbl::subNC(d0, d0, d1); // (a + b)(c + d) - ac - FpDbl::subNC(d0, d0, d2); // (a + b)(c + d) - ac - bd + FpDbl::subPre(d0, d0, d1); // (a + b)(c + d) - ac + FpDbl::subPre(d0, d0, d2); // (a + b)(c + d) - ac - bd Fp *pz = reinterpret_cast<Fp*>(z); FpDbl::mod(pz[1], d0); FpDbl::sub(d1, d1, d2); // ac - bd @@ -300,9 +300,9 @@ private: #else Fp t1, t2; FpDbl d1, d2; - Fp::addNC(t1, b, b); // 2b + Fp::addPre(t1, b, b); // 2b FpDbl::mulPre(d2, t1, a); // 2ab - Fp::addNC(t1, a, b); // a + b + Fp::addPre(t1, a, b); // a + b Fp::sub(t2, a, b); // a - b FpDbl::mulPre(d1, t1, t2); // (a + b)(a - b) FpDbl::mod(py[0], d1); @@ -375,20 +375,20 @@ struct Fp2T<Fp>::Dbl { FpDbl::add(z.a, x.a, y.a); FpDbl::add(z.b, x.b, y.b); } - static void addNC(Dbl& z, const Dbl& x, const Dbl& y) + static void addPre(Dbl& z, const Dbl& x, const Dbl& y) { - FpDbl::addNC(z.a, x.a, y.a); - FpDbl::addNC(z.b, x.b, y.b); + FpDbl::addPre(z.a, x.a, y.a); + FpDbl::addPre(z.b, x.b, y.b); } static void sub(Dbl& z, const Dbl& x, const Dbl& y) { FpDbl::sub(z.a, x.a, y.a); FpDbl::sub(z.b, x.b, y.b); } - static void subNC(Dbl& z, const Dbl& x, const Dbl& y) + static void subPre(Dbl& z, const Dbl& x, const Dbl& y) { - FpDbl::subNC(z.a, x.a, y.a); - FpDbl::subNC(z.b, x.b, y.b); + FpDbl::subPre(z.a, x.a, y.a); + FpDbl::subPre(z.b, x.b, y.b); } static void neg(Dbl& y, const Dbl& x) { @@ -398,9 +398,9 @@ struct Fp2T<Fp>::Dbl { static void sqr(Dbl& y, const Fp2T& x) { Fp t1, t2; - Fp::addNC(t1, x.b, x.b); // 2b + Fp::addPre(t1, x.b, x.b); // 2b FpDbl::mulPre(y.b, t1, x.a); // 2ab - Fp::addNC(t1, x.a, x.b); // a + b + Fp::addPre(t1, x.a, x.b); // a + b Fp::sub(t2, x.a, x.b); // a - b FpDbl::mulPre(y.a, t1, t2); // (a + b)(a - b) } diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 0deb027..bc11c50 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -109,10 +109,10 @@ struct Op { void4u fpDbl_sub; void3u fpDbl_mod; - u3u fp_addNC; // without modulo p - u3u fp_subNC; // without modulo p - u3u fpDbl_addNC; - u3u fpDbl_subNC; + u3u fp_addPre; // without modulo p + u3u fp_subPre; // without modulo p + u3u fpDbl_addPre; + u3u fpDbl_subPre; /* for Fp2 = F[u] / (u^2 + 1) x = a + bu @@ -176,10 +176,10 @@ struct Op { fpDbl_sub = 0; fpDbl_mod = 0; - fp_addNC = 0; - fp_subNC = 0; - fpDbl_addNC = 0; - fpDbl_subNC = 0; + fp_addPre = 0; + fp_subPre = 0; + fpDbl_addPre = 0; + fpDbl_subPre = 0; xi_a = 0; fp2_add = 0; diff --git a/sample/large.cpp b/sample/large.cpp index d7a627c..2aa4489 100644 --- a/sample/large.cpp +++ b/sample/large.cpp @@ -27,15 +27,15 @@ void mulPre768(Unit *pz, const Unit *px, const Unit *py) low_mul_G<H>(pz + N, px + H, py + H); // ac Unit a_b[H + 1]; Unit c_d[H + 1]; - a_b[H] = low_addNC_G<H>(a_b, px, px + H); // a + b - c_d[H] = low_addNC_G<H>(c_d, py, py + H); // c + d + a_b[H] = low_addPre_G<H>(a_b, px, px + H); // a + b + c_d[H] = low_addPre_G<H>(c_d, py, py + H); // c + d Unit work[N + H] = {}; low_mul_G<H>(work, a_b, c_d); - if (c_d[H]) low_addNC_G<H + 1>(work + H, work + H, c_d); - if (a_b[H]) low_addNC_G<H + 1>(work + H, work + H, a_b); - work[N] -= low_subNC_G<H>(work, work, pz); - work[N] -= low_subNC_G<H>(work, work, pz + N); - low_addNC_G<H + N>(pz + H, pz + H, work); + if (c_d[H]) low_addPre_G<H + 1>(work + H, work + H, c_d); + if (a_b[H]) low_addPre_G<H + 1>(work + H, work + H, a_b); + work[N] -= low_subPre_G<H>(work, work, pz); + work[N] -= low_subPre_G<H>(work, work, pz + N); + low_addPre_G<H + N>(pz + H, pz + H, work); } void testMul() { @@ -110,9 +110,9 @@ void test(const std::string& pStr, mcl::fp::Mode mode) CYBOZU_BENCH("sqrPre", op.fpDbl_sqrPre, ux, ux); CYBOZU_BENCH("add", op.fpDbl_add, ux, ux, ux, op.p); CYBOZU_BENCH("sub", op.fpDbl_sub, ux, ux, ux, op.p); - if (op.fpDbl_addNC) { - CYBOZU_BENCH("addNC", op.fpDbl_addNC, ux, ux, ux); - CYBOZU_BENCH("subNC", op.fpDbl_subNC, ux, ux, ux); + if (op.fpDbl_addPre) { + CYBOZU_BENCH("addPre", op.fpDbl_addPre, ux, ux, ux); + CYBOZU_BENCH("subPre", op.fpDbl_subPre, ux, ux, ux); } CYBOZU_BENCH("mont", op.fpDbl_mod, ux, ux, op.p); CYBOZU_BENCH("mul", Fp::mul, x, x, x); diff --git a/sample/rawbench.cpp b/sample/rawbench.cpp index e911019..eab0e7c 100644 --- a/sample/rawbench.cpp +++ b/sample/rawbench.cpp @@ -30,7 +30,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode) memcpy(uy, fy.getUnit(), sizeof(Unit) * op.N); memcpy(ux + op.N, fx.getUnit(), sizeof(Unit) * op.N); double fp_addT, fp_subT; - double fp_addNCT, fp_subNCT; + double fp_addPreT, fp_subPreT; double fp_sqrT, fp_mulT; double fp_mul_UnitT, fp_mul_UnitPreT; double fpDbl_addT, fpDbl_subT; @@ -38,12 +38,12 @@ void benchRaw(const char *p, mcl::fp::Mode mode) double fp2_sqrT, fp2_mulT; CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy, op.p); CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux, op.p); - if (op.fp_addNC) { - CYBOZU_BENCH_T(fp_addNCT, op.fp_addNC, uz, ux, uy); - CYBOZU_BENCH_T(fp_subNCT, op.fp_subNC, uz, uy, ux); + if (op.fp_addPre) { + CYBOZU_BENCH_T(fp_addPreT, op.fp_addPre, uz, ux, uy); + CYBOZU_BENCH_T(fp_subPreT, op.fp_subPre, uz, uy, ux); } else { - fp_addNCT = 0; - fp_subNCT = 0; + fp_addPreT = 0; + fp_subPreT = 0; } CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux, op.p); CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy, op.p); @@ -63,7 +63,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode) printf("%s\n", mcl::fp::ModeToStr(mode)); const char *tStrTbl[] = { "fp_add", "fp_sub", - "addNC", "subNC", + "addPre", "subPre", "fp_sqr", "fp_mul", "mulUnit", "mulUnitP", "D_add", "D_sub", @@ -76,7 +76,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode) printf("\n"); const double tTbl[] = { fp_addT, fp_subT, - fp_addNCT, fp_subNCT, + fp_addPreT, fp_subPreT, fp_sqrT, fp_mulT, fp_mul_UnitT, fp_mul_UnitPreT, fpDbl_addT, fpDbl_subT, diff --git a/src/asm/low_arm.s b/src/asm/low_arm.s index a655520..1ed2a12 100644 --- a/src/asm/low_arm.s +++ b/src/asm/low_arm.s @@ -1,8 +1,8 @@ .arch armv7-a .align 2 - .global mcl_fp_addNC64 -mcl_fp_addNC64: + .global mcl_fp_addPre64 +mcl_fp_addPre64: ldm r1, {r3, r12} ldm r2, {r1, r2} adds r1, r1, r3 @@ -12,8 +12,8 @@ mcl_fp_addNC64: .align 2 - .global mcl_fp_addNC96 -mcl_fp_addNC96: + .global mcl_fp_addPre96 +mcl_fp_addPre96: push {r4, lr} ldm r1, {r1, r3, r12} ldm r2, {r2, r4, lr} @@ -26,8 +26,8 @@ mcl_fp_addNC96: # slower .align 2 - .global mcl_fp_addNC96_2 -mcl_fp_addNC96_2: + .global mcl_fp_addPre96_2 +mcl_fp_addPre96_2: ldr r3, [r1], #4 ldr r12, [r2], #4 adds r3, r3, r12 @@ -40,9 +40,9 @@ mcl_fp_addNC96_2: stm r0, {r1, r3} bx lr - .globl mcl_fp_addNC128 + .globl mcl_fp_addPre128 .align 2 -mcl_fp_addNC128: +mcl_fp_addPre128: push {r4, lr} ldm r1!, {r3, r4} ldm r2!, {r12, lr} @@ -58,9 +58,9 @@ mcl_fp_addNC128: bx lr # almost same - .globl mcl_fp_addNC128_2 + .globl mcl_fp_addPre128_2 .align 2 -cl_fp_addNC128_2: +cl_fp_addPre128_2: push {r4, r5, r6, lr} ldm r1, {r1, r3, r4, r5} ldm r2, {r2, r6, r12, lr} @@ -72,9 +72,9 @@ cl_fp_addNC128_2: pop {r4, r5, r6, lr} bx lr - .globl mcl_fp_addNC160 + .globl mcl_fp_addPre160 .align 2 -mcl_fp_addNC160: +mcl_fp_addPre160: push {r4, lr} ldm r1!, {r3, r4} ldm r2!, {r12, lr} @@ -90,9 +90,9 @@ mcl_fp_addNC160: pop {r4, lr} bx lr - .globl mcl_fp_addNC192 + .globl mcl_fp_addPre192 .align 2 -mcl_fp_addNC192: +mcl_fp_addPre192: push {r4, r5, r6, lr} ldm r1!, {r3, r4, r5} ldm r2!, {r6, r12, lr} @@ -110,9 +110,9 @@ mcl_fp_addNC192: pop {r4, r5, r6, lr} bx lr - .globl mcl_fp_addNC224 + .globl mcl_fp_addPre224 .align 2 -mcl_fp_addNC224: +mcl_fp_addPre224: push {r4, r5, r6, lr} ldm r1!, {r3, r4, r5} ldm r2!, {r6, r12, lr} @@ -131,9 +131,9 @@ mcl_fp_addNC224: pop {r4, r5, r6, lr} bx lr - .globl mcl_fp_addNC256 + .globl mcl_fp_addPre256 .align 2 -mcl_fp_addNC256: +mcl_fp_addPre256: push {r4, r5, r6, r7, r8, lr} ldm r1!, {r3, r4, r5, r6} ldm r2!, {r7, r8, r12, lr} diff --git a/src/asm/low_x86-64.asm b/src/asm/low_x86-64.asm index cb6947e..b09b9dc 100644 --- a/src/asm/low_x86-64.asm +++ b/src/asm/low_x86-64.asm @@ -21,7 +21,7 @@ global %1 segment .text -%imacro addNC 1 +%imacro addPre 1 mov rax, [p2org] add rax, [p3org] mov [p1org], rax @@ -53,54 +53,54 @@ segment .text ret %endmacro -proc mcl_fp_addNC64 - addNC 0 -proc mcl_fp_addNC128 - addNC 1 -proc mcl_fp_addNC192 - addNC 2 -proc mcl_fp_addNC256 - addNC 3 -proc mcl_fp_addNC320 - addNC 4 -proc mcl_fp_addNC384 - addNC 5 -proc mcl_fp_addNC448 - addNC 6 -proc mcl_fp_addNC512 - addNC 7 -proc mcl_fp_addNC576 - addNC 8 -proc mcl_fp_addNC640 - addNC 9 -proc mcl_fp_addNC704 - addNC 10 -proc mcl_fp_addNC768 - addNC 11 -proc mcl_fp_addNC832 - addNC 12 -proc mcl_fp_addNC896 - addNC 13 -proc mcl_fp_addNC960 - addNC 14 -proc mcl_fp_addNC1024 - addNC 15 -proc mcl_fp_addNC1088 - addNC 16 -proc mcl_fp_addNC1152 - addNC 17 -proc mcl_fp_addNC1216 - addNC 18 -proc mcl_fp_addNC1280 - addNC 19 -proc mcl_fp_addNC1344 - addNC 20 -proc mcl_fp_addNC1408 - addNC 21 -proc mcl_fp_addNC1472 - addNC 22 -proc mcl_fp_addNC1536 - addNC 23 +proc mcl_fp_addPre64 + addPre 0 +proc mcl_fp_addPre128 + addPre 1 +proc mcl_fp_addPre192 + addPre 2 +proc mcl_fp_addPre256 + addPre 3 +proc mcl_fp_addPre320 + addPre 4 +proc mcl_fp_addPre384 + addPre 5 +proc mcl_fp_addPre448 + addPre 6 +proc mcl_fp_addPre512 + addPre 7 +proc mcl_fp_addPre576 + addPre 8 +proc mcl_fp_addPre640 + addPre 9 +proc mcl_fp_addPre704 + addPre 10 +proc mcl_fp_addPre768 + addPre 11 +proc mcl_fp_addPre832 + addPre 12 +proc mcl_fp_addPre896 + addPre 13 +proc mcl_fp_addPre960 + addPre 14 +proc mcl_fp_addPre1024 + addPre 15 +proc mcl_fp_addPre1088 + addPre 16 +proc mcl_fp_addPre1152 + addPre 17 +proc mcl_fp_addPre1216 + addPre 18 +proc mcl_fp_addPre1280 + addPre 19 +proc mcl_fp_addPre1344 + addPre 20 +proc mcl_fp_addPre1408 + addPre 21 +proc mcl_fp_addPre1472 + addPre 22 +proc mcl_fp_addPre1536 + addPre 23 proc mcl_fp_subNC64 subNC 0 @@ -109,8 +109,8 @@ Mode StrToMode(const std::string& s) #ifdef MCL_USE_LLVM #define MCL_DEF_LLVM_FUNC(n) \ -template<>const u3u AddNC<n, Ltag>::f = &mcl_fp_addNC ## n ## L; \ -template<>const u3u SubNC<n, Ltag>::f = &mcl_fp_subNC ## n ## L; \ +template<>const u3u AddPre<n, Ltag>::f = &mcl_fp_addPre ## n ## L; \ +template<>const u3u SubPre<n, Ltag>::f = &mcl_fp_subPre ## n ## L; \ template<>const void3u MulPre<n, Ltag>::f = &mcl_fpDbl_mulPre ## n ## L; \ template<>const void2u SqrPre<n, Ltag>::f = &mcl_fpDbl_sqrPre ## n ## L; \ template<>const void2uI Mul_UnitPre<n, Ltag>::f = &mcl_fp_mul_UnitPre ## n ## L; \ @@ -178,7 +178,7 @@ static void fp_invMontOpC(Unit *y, const Unit *x, const Op& op) } /* - large (N * 2) specification of AddNC, SubNC + large (N * 2) specification of AddPre, SubPre */ template<size_t N, bool enable> struct SetFpDbl { @@ -190,8 +190,8 @@ struct SetFpDbl<N, true> { static inline void exec(Op& op) { if (!op.isFullBit) { - op.fpDbl_addNC = AddNC<N * 2, Ltag>::f; - op.fpDbl_subNC = SubNC<N * 2, Ltag>::f; + op.fpDbl_addPre = AddPre<N * 2, Ltag>::f; + op.fpDbl_subPre = SubPre<N * 2, Ltag>::f; } } }; @@ -219,8 +219,8 @@ void setOpSub(Op& op) op.fpDbl_add = DblAdd<N, Tag>::f; op.fpDbl_sub = DblSub<N, Tag>::f; if (!op.isFullBit) { - op.fp_addNC = AddNC<N, Tag>::f; - op.fp_subNC = SubNC<N, Tag>::f; + op.fp_addPre = AddPre<N, Tag>::f; + op.fp_subPre = SubPre<N, Tag>::f; } SetFpDbl<N, enableFpDbl>::exec(op); } diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index 8aba77b..489fe40 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -200,15 +200,15 @@ struct FpGenerator : Xbyak::CodeGenerator { gen_fp_sub(); if (op.isFullBit) { - op.fp_addNC = 0; - op.fp_subNC = 0; + op.fp_addPre = 0; + op.fp_subPre = 0; } else { align(16); - op.fp_addNC = getCurr<u3u>(); - gen_addSubNC(true, pn_); + op.fp_addPre = getCurr<u3u>(); + gen_addSubPre(true, pn_); align(16); - op.fp_subNC = getCurr<u3u>(); - gen_addSubNC(false, pn_); + op.fp_subPre = getCurr<u3u>(); + gen_addSubPre(false, pn_); } align(16); shr1_ = getCurr<void2op>(); @@ -248,15 +248,15 @@ struct FpGenerator : Xbyak::CodeGenerator { op.fpDbl_sub = getCurr<void4u>(); gen_fpDbl_sub(); if (op.isFullBit) { - op.fpDbl_addNC = 0; - op.fpDbl_subNC = 0; + op.fpDbl_addPre = 0; + op.fpDbl_subPre = 0; } else { align(16); - op.fpDbl_addNC = getCurr<u3u>(); - gen_addSubNC(true, pn_ * 2); + op.fpDbl_addPre = getCurr<u3u>(); + gen_addSubPre(true, pn_ * 2); align(16); - op.fpDbl_subNC = getCurr<u3u>(); - gen_addSubNC(false, pn_ * 2); + op.fpDbl_subPre = getCurr<u3u>(); + gen_addSubPre(false, pn_ * 2); } if (op.N == 2 || op.N == 3 || op.N == 4) { align(16); @@ -274,7 +274,7 @@ struct FpGenerator : Xbyak::CodeGenerator { gen_fpDbl_sqrPre(op); } } - void gen_addSubNC(bool isAdd, int n) + void gen_addSubPre(bool isAdd, int n) { StackFrame sf(this, 3); if (isAdd) { diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp index 571ae37..799fe6f 100644 --- a/src/fp_proto.hpp +++ b/src/fp_proto.hpp @@ -35,7 +35,7 @@ void copyC(Unit *y, const Unit *x) // (carry, z[N]) <- x[N] + y[N] template<size_t N, class Tag = Gtag> -struct AddNC { +struct AddPre { static inline Unit func(Unit *z, const Unit *x, const Unit *y) { return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); @@ -43,11 +43,11 @@ struct AddNC { static const u3u f; }; template<size_t N, class Tag> -const u3u AddNC<N, Tag>::f = &AddNC<N, Tag>::func; +const u3u AddPre<N, Tag>::f = &AddPre<N, Tag>::func; // (carry, z[N]) <- x[N] - y[N] template<size_t N, class Tag = Gtag> -struct SubNC { +struct SubPre { static inline Unit func(Unit *z, const Unit *x, const Unit *y) { return mpn_sub_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); @@ -56,7 +56,7 @@ struct SubNC { }; template<size_t N, class Tag> -const u3u SubNC<N, Tag>::f = &SubNC<N, Tag>::func; +const u3u SubPre<N, Tag>::f = &SubPre<N, Tag>::func; // y[N] <- (-x[N]) % p[N] template<size_t N, class Tag = Gtag> @@ -67,7 +67,7 @@ struct Neg { if (x != y) clearC<N>(y); return; } - SubNC<N, Tag>::f(y, p, x); + SubPre<N, Tag>::f(y, p, x); } static const void3u f; }; @@ -162,12 +162,12 @@ template<size_t N, class Tag = Gtag> struct Add { static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) { - if (AddNC<N, Tag>::f(z, x, y)) { - SubNC<N, Tag>::f(z, z, p); + if (AddPre<N, Tag>::f(z, x, y)) { + SubPre<N, Tag>::f(z, z, p); return; } Unit tmp[N]; - if (SubNC<N, Tag>::f(tmp, z, p) == 0) { + if (SubPre<N, Tag>::f(tmp, z, p) == 0) { memcpy(z, tmp, sizeof(tmp)); } } @@ -182,8 +182,8 @@ template<size_t N, class Tag = Gtag> struct Sub { static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) { - if (SubNC<N, Tag>::f(z, x, y)) { - AddNC<N, Tag>::f(z, z, p); + if (SubPre<N, Tag>::f(z, x, y)) { + AddPre<N, Tag>::f(z, z, p); } } static const void4u f; @@ -197,12 +197,12 @@ template<size_t N, class Tag = Gtag> struct DblAdd { static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) { - if (AddNC<N * 2, Tag>::f(z, x, y)) { - SubNC<N, Tag>::f(z + N, z + N, p); + if (AddPre<N * 2, Tag>::f(z, x, y)) { + SubPre<N, Tag>::f(z + N, z + N, p); return; } Unit tmp[N]; - if (SubNC<N, Tag>::f(tmp, z + N, p) == 0) { + if (SubPre<N, Tag>::f(tmp, z + N, p) == 0) { memcpy(z + N, tmp, sizeof(tmp)); } } @@ -217,8 +217,8 @@ template<size_t N, class Tag = Gtag> struct DblSub { static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) { - if (SubNC<N * 2, Tag>::f(z, x, y)) { - AddNC<N, Tag>::f(z + N, z + N, p); + if (SubPre<N * 2, Tag>::f(z, x, y)) { + AddPre<N, Tag>::f(z + N, z + N, p); } } static const void4u f; @@ -248,20 +248,20 @@ struct Mont { Unit t[N + 2]; Mul_UnitPre<N, Tag>::f(t, p, q); // p * q t[N + 1] = 0; // always zero - c[N + 1] = AddNC<N + 1, Tag>::f(c, c, t); + c[N + 1] = AddPre<N + 1, Tag>::f(c, c, t); c++; for (size_t i = 1; i < N; i++) { Mul_UnitPre<N, Tag>::f(t, x, y[i]); - c[N + 1] = AddNC<N + 1, Tag>::f(c, c, t); + c[N + 1] = AddPre<N + 1, Tag>::f(c, c, t); q = c[0] * rp; Mul_UnitPre<N, Tag>::f(t, p, q); - AddNC<N + 2, Tag>::f(c, c, t); + AddPre<N + 2, Tag>::f(c, c, t); c++; } if (c[N]) { - SubNC<N, Tag>::f(z, c, p); + SubPre<N, Tag>::f(z, c, p); } else { - if (SubNC<N, Tag>::f(z, c, p)) { + if (SubPre<N, Tag>::f(z, c, p)) { memcpy(z, c, N * sizeof(Unit)); } } @@ -288,7 +288,7 @@ struct MontRed { Unit *c = buf; Unit q = xy[0] * rp; Mul_UnitPre<N, Tag>::f(t, p, q); - buf[N * 2] = AddNC<N * 2, Tag>::f(buf, xy, t); + buf[N * 2] = AddPre<N * 2, Tag>::f(buf, xy, t); c++; for (size_t i = 1; i < N; i++) { q = c[0] * rp; @@ -298,9 +298,9 @@ struct MontRed { c++; } if (c[N]) { - SubNC<N, Tag>::f(z, c, p); + SubPre<N, Tag>::f(z, c, p); } else { - if (SubNC<N, Tag>::f(z, c, p)) { + if (SubPre<N, Tag>::f(z, c, p)) { memcpy(z, c, N * sizeof(Unit)); } } @@ -365,8 +365,8 @@ const void3u Sqr<N, Tag>::f = Sqr<N, Tag>::func; #define MCL_FP_DEF_FUNC_SUB(n, suf) \ void mcl_fp_add ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ void mcl_fp_sub ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ -mcl::fp::Unit mcl_fp_addNC ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ -mcl::fp::Unit mcl_fp_subNC ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ +mcl::fp::Unit mcl_fp_addPre ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ +mcl::fp::Unit mcl_fp_subPre ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ void mcl_fp_mul_UnitPre ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, mcl::fp::Unit y); \ void mcl_fpDbl_mulPre ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ void mcl_fpDbl_sqrPre ## n ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x); \ diff --git a/src/gen.cpp b/src/gen.cpp index 54f66e9..8014dac 100644 --- a/src/gen.cpp +++ b/src/gen.cpp @@ -23,8 +23,8 @@ struct Code : public mcl::Generator { Function makeNIST_P192; Function mcl_fpDbl_mod_NIST_P192; Function mcl_fp_sqr_NIST_P192; - FunctionMap mcl_fp_addNCM; - FunctionMap mcl_fp_subNCM; + FunctionMap mcl_fp_addPreM; + FunctionMap mcl_fp_subPreM; FunctionMap mcl_fp_addM; FunctionMap mcl_fp_subM; FunctionMap mulPvM; @@ -293,7 +293,7 @@ struct Code : public mcl::Generator { t = trunc(t, unit); return t; } - void gen_mcl_fp_addsubNC(bool isAdd) + void gen_mcl_fp_addsubPre(bool isAdd) { resetGlobalIdx(); Operand r(Int, unit); @@ -302,15 +302,15 @@ struct Code : public mcl::Generator { Operand py(IntPtr, bit); std::string name; if (isAdd) { - name = "mcl_fp_addNC" + cybozu::itoa(N) + "L"; - mcl_fp_addNCM[N] = Function(name, r, pz, px, py); - verifyAndSetPrivate(mcl_fp_addNCM[N]); - beginFunc(mcl_fp_addNCM[N]); + name = "mcl_fp_addPre" + cybozu::itoa(N) + "L"; + mcl_fp_addPreM[N] = Function(name, r, pz, px, py); + verifyAndSetPrivate(mcl_fp_addPreM[N]); + beginFunc(mcl_fp_addPreM[N]); } else { - name = "mcl_fp_subNC" + cybozu::itoa(N) + "L"; - mcl_fp_subNCM[N] = Function(name, r, pz, px, py); - verifyAndSetPrivate(mcl_fp_subNCM[N]); - beginFunc(mcl_fp_subNCM[N]); + name = "mcl_fp_subPre" + cybozu::itoa(N) + "L"; + mcl_fp_subPreM[N] = Function(name, r, pz, px, py); + verifyAndSetPrivate(mcl_fp_subPreM[N]); + beginFunc(mcl_fp_subPreM[N]); } Operand x = zext(load(px), bit + unit); Operand y = zext(load(py), bit + unit); @@ -328,7 +328,7 @@ struct Code : public mcl::Generator { endFunc(); } #if 0 // void-return version - void gen_mcl_fp_addsubNC(bool isAdd) + void gen_mcl_fp_addsubPre(bool isAdd) { resetGlobalIdx(); Operand pz(IntPtr, bit); @@ -336,15 +336,15 @@ struct Code : public mcl::Generator { Operand py(IntPtr, bit); std::string name; if (isAdd) { - name = "mcl_fp_addNC" + cybozu::itoa(bit) + "L"; - mcl_fp_addNCM[bit] = Function(name, Void, pz, px, py); - verifyAndSetPrivate(mcl_fp_addNCM[bit]); - beginFunc(mcl_fp_addNCM[bit]); + name = "mcl_fp_addPre" + cybozu::itoa(bit) + "L"; + mcl_fp_addPreM[bit] = Function(name, Void, pz, px, py); + verifyAndSetPrivate(mcl_fp_addPreM[bit]); + beginFunc(mcl_fp_addPreM[bit]); } else { - name = "mcl_fp_subNC" + cybozu::itoa(bit) + "L"; - mcl_fp_subNCM[bit] = Function(name, Void, pz, px, py); - verifyAndSetPrivate(mcl_fp_subNCM[bit]); - beginFunc(mcl_fp_subNCM[bit]); + name = "mcl_fp_subPre" + cybozu::itoa(bit) + "L"; + mcl_fp_subPreM[bit] = Function(name, Void, pz, px, py); + verifyAndSetPrivate(mcl_fp_subPreM[bit]); + beginFunc(mcl_fp_subPreM[bit]); } Operand x = load(px); Operand y = load(py); @@ -762,8 +762,8 @@ struct Code : public mcl::Generator { } void gen_all() { - gen_mcl_fp_addsubNC(true); - gen_mcl_fp_addsubNC(false); + gen_mcl_fp_addsubPre(true); + gen_mcl_fp_addsubPre(false); } void gen_addsub() { diff --git a/test/fp_test.cpp b/test/fp_test.cpp index 67aeb69..cf6dad0 100644 --- a/test/fp_test.cpp +++ b/test/fp_test.cpp @@ -383,15 +383,15 @@ void opeTest() } if (!Fp::isFullBit()) { Fp x(5), y(3), z; - Fp::addNC(z, x, y); + Fp::addPre(z, x, y); if (Fp::compareRaw(z, Fp::getP()) >= 0) { - Fp::subNC(z, z, Fp::getP()); + Fp::subPre(z, z, Fp::getP()); } CYBOZU_TEST_EQUAL(z, Fp(8)); if (Fp::compareRaw(x, y) < 0) { - Fp::addNC(x, x, Fp::getP()); + Fp::addPre(x, x, Fp::getP()); } - Fp::subNC(x, x, y); + Fp::subPre(x, x, y); CYBOZU_TEST_EQUAL(x, Fp(2)); } } diff --git a/test/fp_tower_test.cpp b/test/fp_tower_test.cpp index 6166c96..165f635 100644 --- a/test/fp_tower_test.cpp +++ b/test/fp_tower_test.cpp @@ -272,12 +272,12 @@ void testFpDbl() z.getMpz(mz); CYBOZU_TEST_EQUAL(mz, mo); if (!Fp::isFullBit()) { - FpDbl::addNC(z, x, y); + FpDbl::addPre(z, x, y); mo = mx + my; z.getMpz(mz); CYBOZU_TEST_EQUAL(mz, mo); if (mx >= my) { - FpDbl::subNC(z, x, y); + FpDbl::subPre(z, x, y); mo = mx - my; z.getMpz(mz); CYBOZU_TEST_EQUAL(mz, mo); diff --git a/test/low_test.cpp b/test/low_test.cpp index 2dbeae1..316aa9d 100644 --- a/test/low_test.cpp +++ b/test/low_test.cpp @@ -11,28 +11,6 @@ cybozu::XorShift rg; extern "C" void add_test(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); -template<size_t N> -void addNC(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); - -template<size_t N> -void subNC(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); - -#define DEF_FUNC(BIT) \ - template<> void addNC<BIT>(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y) { mcl_fp_addNC ## BIT(z, x, y); } \ - template<> void subNC<BIT>(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y) { mcl_fp_subNC ## BIT(z, x, y); } - -DEF_FUNC(64) -DEF_FUNC(128) -DEF_FUNC(192) -DEF_FUNC(256) -DEF_FUNC(320) -DEF_FUNC(384) -DEF_FUNC(448) -DEF_FUNC(512) -//DEF_FUNC(96) -//DEF_FUNC(160) -//DEF_FUNC(224) - template<size_t bit> void bench() { @@ -44,33 +22,33 @@ void bench() Unit w[N]; rg.read(x, N); rg.read(y, N); - low_addNC_G<N>(z, x, y); - addNC<bit>(w, x, y); + AddPre<N, Gtag>::f(z, x, y); + AddPre<N, Ltag>::f(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); - low_subNC_G<N>(z, x, y); - subNC<bit>(w, x, y); + SubPre<N, Gtag>::f(z, x, y); + SubPre<N, Ltag>::f(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); } const std::string bitS = cybozu::itoa(bit); std::string name; - name = "add" + bitS; CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y); - name = "sub" + bitS; CYBOZU_BENCH(name.c_str(), subNC<bit>, x, x, y); + name = "add" + bitS; CYBOZU_BENCH(name.c_str(), (AddPre<N, Ltag>::f), x, x, y); + name = "sub" + bitS; CYBOZU_BENCH(name.c_str(), (SubPre<N, Ltag>::f), x, x, y); } -CYBOZU_TEST_AUTO(addNC64) { bench<64>(); } -CYBOZU_TEST_AUTO(addNC128) { bench<128>(); } -CYBOZU_TEST_AUTO(addNC192) { bench<192>(); } -CYBOZU_TEST_AUTO(addNC256) { bench<256>(); } -CYBOZU_TEST_AUTO(addNC320) { bench<320>(); } -CYBOZU_TEST_AUTO(addNC384) { bench<384>(); } -CYBOZU_TEST_AUTO(addNC448) { bench<448>(); } -CYBOZU_TEST_AUTO(addNC512) { bench<512>(); } -//CYBOZU_TEST_AUTO(addNC96) { bench<96>(); } -//CYBOZU_TEST_AUTO(addNC160) { bench<160>(); } -//CYBOZU_TEST_AUTO(addNC224) { bench<224>(); } +CYBOZU_TEST_AUTO(addPre64) { bench<64>(); } +CYBOZU_TEST_AUTO(addPre128) { bench<128>(); } +CYBOZU_TEST_AUTO(addPre192) { bench<192>(); } +CYBOZU_TEST_AUTO(addPre256) { bench<256>(); } +CYBOZU_TEST_AUTO(addPre320) { bench<320>(); } +CYBOZU_TEST_AUTO(addPre384) { bench<384>(); } +CYBOZU_TEST_AUTO(addPre448) { bench<448>(); } +CYBOZU_TEST_AUTO(addPre512) { bench<512>(); } +//CYBOZU_TEST_AUTO(addPre96) { bench<96>(); } +//CYBOZU_TEST_AUTO(addPre160) { bench<160>(); } +//CYBOZU_TEST_AUTO(addPre224) { bench<224>(); } #if 0 -CYBOZU_TEST_AUTO(addNC) +CYBOZU_TEST_AUTO(addPre) { using namespace mcl::fp; const size_t bit = 128; @@ -81,14 +59,14 @@ CYBOZU_TEST_AUTO(addNC) Unit w[N]; rg.read(x, N); rg.read(y, N); - low_addNC_G<N>(z, x, y); - addNC<bit>(w, x, y); + low_addPre_G<N>(z, x, y); + addPre<bit>(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); add_test(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); } std::string name = "add" + cybozu::itoa(bit); - CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y); + CYBOZU_BENCH(name.c_str(), addPre<bit>, x, x, y); CYBOZU_BENCH("add", add_test, x, x, y); } #endif |