aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-10-14 16:08:04 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-10-14 16:08:04 +0800
commitd142ba7253c5f353e6a6ee35dcc5dc59270dc2dd (patch)
tree95cad9599e1d513bc3964faeb609398a685ffd15
parent2e5146418cb7daddb4f5a9d649ba65f838c3679b (diff)
downloaddexon-mcl-d142ba7253c5f353e6a6ee35dcc5dc59270dc2dd.tar.gz
dexon-mcl-d142ba7253c5f353e6a6ee35dcc5dc59270dc2dd.tar.zst
dexon-mcl-d142ba7253c5f353e6a6ee35dcc5dc59270dc2dd.zip
rename addNC, subNC to addPre, subPre
-rw-r--r--include/mcl/fp.hpp4
-rw-r--r--include/mcl/fp_tower.hpp32
-rw-r--r--include/mcl/op.hpp16
-rw-r--r--sample/large.cpp20
-rw-r--r--sample/rawbench.cpp16
-rw-r--r--src/asm/low_arm.s36
-rw-r--r--src/asm/low_x86-64.asm98
-rw-r--r--src/fp.cpp14
-rw-r--r--src/fp_generator.hpp26
-rw-r--r--src/fp_proto.hpp50
-rw-r--r--src/gen.cpp44
-rw-r--r--test/fp_test.cpp8
-rw-r--r--test/fp_tower_test.cpp4
-rw-r--r--test/low_test.cpp64
14 files changed, 205 insertions, 227 deletions
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 43749d9..ad916ee 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -330,8 +330,8 @@ public:
}
static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); }
static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); }
- static inline void addNC(FpT& z, const FpT& x, const FpT& y) { op_.fp_addNC(z.v_, x.v_, y.v_); }
- static inline void subNC(FpT& z, const FpT& x, const FpT& y) { op_.fp_subNC(z.v_, x.v_, y.v_); }
+ static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); }
+ static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); }
static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); }
static inline void mul_Unit(FpT& z, const FpT& x, const Unit y) { op_.fp_mul_Unit(z.v_, x.v_, y, op_.p); }
static inline void inv(FpT& y, const FpT& x) { op_.fp_invOp(y.v_, x.v_, op_); }
diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index eea7b1e..0e7b53b 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -42,8 +42,8 @@ public:
}
static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
- static void addNC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addNC(z.v_, x.v_, y.v_); }
- static void subNC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subNC(z.v_, x.v_, y.v_); }
+ static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
+ static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
/*
mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
*/
@@ -267,13 +267,13 @@ private:
const Fp& d = py[1];
FpDbl d0, d1, d2;
Fp s, t;
- Fp::addNC(s, a, b);
- Fp::addNC(t, c, d);
+ Fp::addPre(s, a, b);
+ Fp::addPre(t, c, d);
FpDbl::mulPre(d0, s, t); // (a + b)(c + d)
FpDbl::mulPre(d1, a, c);
FpDbl::mulPre(d2, b, d);
- FpDbl::subNC(d0, d0, d1); // (a + b)(c + d) - ac
- FpDbl::subNC(d0, d0, d2); // (a + b)(c + d) - ac - bd
+ FpDbl::subPre(d0, d0, d1); // (a + b)(c + d) - ac
+ FpDbl::subPre(d0, d0, d2); // (a + b)(c + d) - ac - bd
Fp *pz = reinterpret_cast<Fp*>(z);
FpDbl::mod(pz[1], d0);
FpDbl::sub(d1, d1, d2); // ac - bd
@@ -300,9 +300,9 @@ private:
#else
Fp t1, t2;
FpDbl d1, d2;
- Fp::addNC(t1, b, b); // 2b
+ Fp::addPre(t1, b, b); // 2b
FpDbl::mulPre(d2, t1, a); // 2ab
- Fp::addNC(t1, a, b); // a + b
+ Fp::addPre(t1, a, b); // a + b
Fp::sub(t2, a, b); // a - b
FpDbl::mulPre(d1, t1, t2); // (a + b)(a - b)
FpDbl::mod(py[0], d1);
@@ -375,20 +375,20 @@ struct Fp2T<Fp>::Dbl {
FpDbl::add(z.a, x.a, y.a);
FpDbl::add(z.b, x.b, y.b);
}
- static void addNC(Dbl& z, const Dbl& x, const Dbl& y)
+ static void addPre(Dbl& z, const Dbl& x, const Dbl& y)
{
- FpDbl::addNC(z.a, x.a, y.a);
- FpDbl::addNC(z.b, x.b, y.b);
+ FpDbl::addPre(z.a, x.a, y.a);
+ FpDbl::addPre(z.b, x.b, y.b);
}
static void sub(Dbl& z, const Dbl& x, const Dbl& y)
{
FpDbl::sub(z.a, x.a, y.a);
FpDbl::sub(z.b, x.b, y.b);
}
- static void subNC(Dbl& z, const Dbl& x, const Dbl& y)
+ static void subPre(Dbl& z, const Dbl& x, const Dbl& y)
{
- FpDbl::subNC(z.a, x.a, y.a);
- FpDbl::subNC(z.b, x.b, y.b);
+ FpDbl::subPre(z.a, x.a, y.a);
+ FpDbl::subPre(z.b, x.b, y.b);
}
static void neg(Dbl& y, const Dbl& x)
{
@@ -398,9 +398,9 @@ struct Fp2T<Fp>::Dbl {
static void sqr(Dbl& y, const Fp2T& x)
{
Fp t1, t2;
- Fp::addNC(t1, x.b, x.b); // 2b
+ Fp::addPre(t1, x.b, x.b); // 2b
FpDbl::mulPre(y.b, t1, x.a); // 2ab
- Fp::addNC(t1, x.a, x.b); // a + b
+ Fp::addPre(t1, x.a, x.b); // a + b
Fp::sub(t2, x.a, x.b); // a - b
FpDbl::mulPre(y.a, t1, t2); // (a + b)(a - b)
}
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 0deb027..bc11c50 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -109,10 +109,10 @@ struct Op {
void4u fpDbl_sub;
void3u fpDbl_mod;
- u3u fp_addNC; // without modulo p
- u3u fp_subNC; // without modulo p
- u3u fpDbl_addNC;
- u3u fpDbl_subNC;
+ u3u fp_addPre; // without modulo p
+ u3u fp_subPre; // without modulo p
+ u3u fpDbl_addPre;
+ u3u fpDbl_subPre;
/*
for Fp2 = F[u] / (u^2 + 1)
x = a + bu
@@ -176,10 +176,10 @@ struct Op {
fpDbl_sub = 0;
fpDbl_mod = 0;
- fp_addNC = 0;
- fp_subNC = 0;
- fpDbl_addNC = 0;
- fpDbl_subNC = 0;
+ fp_addPre = 0;
+ fp_subPre = 0;
+ fpDbl_addPre = 0;
+ fpDbl_subPre = 0;
xi_a = 0;
fp2_add = 0;
diff --git a/sample/large.cpp b/sample/large.cpp
index d7a627c..2aa4489 100644
--- a/sample/large.cpp
+++ b/sample/large.cpp
@@ -27,15 +27,15 @@ void mulPre768(Unit *pz, const Unit *px, const Unit *py)
low_mul_G<H>(pz + N, px + H, py + H); // ac
Unit a_b[H + 1];
Unit c_d[H + 1];
- a_b[H] = low_addNC_G<H>(a_b, px, px + H); // a + b
- c_d[H] = low_addNC_G<H>(c_d, py, py + H); // c + d
+ a_b[H] = low_addPre_G<H>(a_b, px, px + H); // a + b
+ c_d[H] = low_addPre_G<H>(c_d, py, py + H); // c + d
Unit work[N + H] = {};
low_mul_G<H>(work, a_b, c_d);
- if (c_d[H]) low_addNC_G<H + 1>(work + H, work + H, c_d);
- if (a_b[H]) low_addNC_G<H + 1>(work + H, work + H, a_b);
- work[N] -= low_subNC_G<H>(work, work, pz);
- work[N] -= low_subNC_G<H>(work, work, pz + N);
- low_addNC_G<H + N>(pz + H, pz + H, work);
+ if (c_d[H]) low_addPre_G<H + 1>(work + H, work + H, c_d);
+ if (a_b[H]) low_addPre_G<H + 1>(work + H, work + H, a_b);
+ work[N] -= low_subPre_G<H>(work, work, pz);
+ work[N] -= low_subPre_G<H>(work, work, pz + N);
+ low_addPre_G<H + N>(pz + H, pz + H, work);
}
void testMul()
{
@@ -110,9 +110,9 @@ void test(const std::string& pStr, mcl::fp::Mode mode)
CYBOZU_BENCH("sqrPre", op.fpDbl_sqrPre, ux, ux);
CYBOZU_BENCH("add", op.fpDbl_add, ux, ux, ux, op.p);
CYBOZU_BENCH("sub", op.fpDbl_sub, ux, ux, ux, op.p);
- if (op.fpDbl_addNC) {
- CYBOZU_BENCH("addNC", op.fpDbl_addNC, ux, ux, ux);
- CYBOZU_BENCH("subNC", op.fpDbl_subNC, ux, ux, ux);
+ if (op.fpDbl_addPre) {
+ CYBOZU_BENCH("addPre", op.fpDbl_addPre, ux, ux, ux);
+ CYBOZU_BENCH("subPre", op.fpDbl_subPre, ux, ux, ux);
}
CYBOZU_BENCH("mont", op.fpDbl_mod, ux, ux, op.p);
CYBOZU_BENCH("mul", Fp::mul, x, x, x);
diff --git a/sample/rawbench.cpp b/sample/rawbench.cpp
index e911019..eab0e7c 100644
--- a/sample/rawbench.cpp
+++ b/sample/rawbench.cpp
@@ -30,7 +30,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
memcpy(uy, fy.getUnit(), sizeof(Unit) * op.N);
memcpy(ux + op.N, fx.getUnit(), sizeof(Unit) * op.N);
double fp_addT, fp_subT;
- double fp_addNCT, fp_subNCT;
+ double fp_addPreT, fp_subPreT;
double fp_sqrT, fp_mulT;
double fp_mul_UnitT, fp_mul_UnitPreT;
double fpDbl_addT, fpDbl_subT;
@@ -38,12 +38,12 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
double fp2_sqrT, fp2_mulT;
CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy, op.p);
CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux, op.p);
- if (op.fp_addNC) {
- CYBOZU_BENCH_T(fp_addNCT, op.fp_addNC, uz, ux, uy);
- CYBOZU_BENCH_T(fp_subNCT, op.fp_subNC, uz, uy, ux);
+ if (op.fp_addPre) {
+ CYBOZU_BENCH_T(fp_addPreT, op.fp_addPre, uz, ux, uy);
+ CYBOZU_BENCH_T(fp_subPreT, op.fp_subPre, uz, uy, ux);
} else {
- fp_addNCT = 0;
- fp_subNCT = 0;
+ fp_addPreT = 0;
+ fp_subPreT = 0;
}
CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux, op.p);
CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy, op.p);
@@ -63,7 +63,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
printf("%s\n", mcl::fp::ModeToStr(mode));
const char *tStrTbl[] = {
"fp_add", "fp_sub",
- "addNC", "subNC",
+ "addPre", "subPre",
"fp_sqr", "fp_mul",
"mulUnit", "mulUnitP",
"D_add", "D_sub",
@@ -76,7 +76,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
printf("\n");
const double tTbl[] = {
fp_addT, fp_subT,
- fp_addNCT, fp_subNCT,
+ fp_addPreT, fp_subPreT,
fp_sqrT, fp_mulT,
fp_mul_UnitT, fp_mul_UnitPreT,
fpDbl_addT, fpDbl_subT,
diff --git a/src/asm/low_arm.s b/src/asm/low_arm.s
index a655520..1ed2a12 100644
--- a/src/asm/low_arm.s
+++ b/src/asm/low_arm.s
@@ -1,8 +1,8 @@
.arch armv7-a
.align 2
- .global mcl_fp_addNC64
-mcl_fp_addNC64:
+ .global mcl_fp_addPre64
+mcl_fp_addPre64:
ldm r1, {r3, r12}
ldm r2, {r1, r2}
adds r1, r1, r3
@@ -12,8 +12,8 @@ mcl_fp_addNC64:
.align 2
- .global mcl_fp_addNC96
-mcl_fp_addNC96:
+ .global mcl_fp_addPre96
+mcl_fp_addPre96:
push {r4, lr}
ldm r1, {r1, r3, r12}
ldm r2, {r2, r4, lr}
@@ -26,8 +26,8 @@ mcl_fp_addNC96:
# slower
.align 2
- .global mcl_fp_addNC96_2
-mcl_fp_addNC96_2:
+ .global mcl_fp_addPre96_2
+mcl_fp_addPre96_2:
ldr r3, [r1], #4
ldr r12, [r2], #4
adds r3, r3, r12
@@ -40,9 +40,9 @@ mcl_fp_addNC96_2:
stm r0, {r1, r3}
bx lr
- .globl mcl_fp_addNC128
+ .globl mcl_fp_addPre128
.align 2
-mcl_fp_addNC128:
+mcl_fp_addPre128:
push {r4, lr}
ldm r1!, {r3, r4}
ldm r2!, {r12, lr}
@@ -58,9 +58,9 @@ mcl_fp_addNC128:
bx lr
# almost same
- .globl mcl_fp_addNC128_2
+ .globl mcl_fp_addPre128_2
.align 2
-cl_fp_addNC128_2:
+cl_fp_addPre128_2:
push {r4, r5, r6, lr}
ldm r1, {r1, r3, r4, r5}
ldm r2, {r2, r6, r12, lr}
@@ -72,9 +72,9 @@ cl_fp_addNC128_2:
pop {r4, r5, r6, lr}
bx lr
- .globl mcl_fp_addNC160
+ .globl mcl_fp_addPre160
.align 2
-mcl_fp_addNC160:
+mcl_fp_addPre160:
push {r4, lr}
ldm r1!, {r3, r4}
ldm r2!, {r12, lr}
@@ -90,9 +90,9 @@ mcl_fp_addNC160:
pop {r4, lr}
bx lr
- .globl mcl_fp_addNC192
+ .globl mcl_fp_addPre192
.align 2
-mcl_fp_addNC192:
+mcl_fp_addPre192:
push {r4, r5, r6, lr}
ldm r1!, {r3, r4, r5}
ldm r2!, {r6, r12, lr}
@@ -110,9 +110,9 @@ mcl_fp_addNC192:
pop {r4, r5, r6, lr}
bx lr
- .globl mcl_fp_addNC224
+ .globl mcl_fp_addPre224
.align 2
-mcl_fp_addNC224:
+mcl_fp_addPre224:
push {r4, r5, r6, lr}
ldm r1!, {r3, r4, r5}
ldm r2!, {r6, r12, lr}
@@ -131,9 +131,9 @@ mcl_fp_addNC224:
pop {r4, r5, r6, lr}
bx lr
- .globl mcl_fp_addNC256
+ .globl mcl_fp_addPre256
.align 2
-mcl_fp_addNC256:
+mcl_fp_addPre256:
push {r4, r5, r6, r7, r8, lr}
ldm r1!, {r3, r4, r5, r6}
ldm r2!, {r7, r8, r12, lr}
diff --git a/src/asm/low_x86-64.asm b/src/asm/low_x86-64.asm
index cb6947e..b09b9dc 100644
--- a/src/asm/low_x86-64.asm
+++ b/src/asm/low_x86-64.asm
@@ -21,7 +21,7 @@ global %1
segment .text
-%imacro addNC 1
+%imacro addPre 1
mov rax, [p2org]
add rax, [p3org]
mov [p1org], rax
@@ -53,54 +53,54 @@ segment .text
ret
%endmacro
-proc mcl_fp_addNC64
- addNC 0
-proc mcl_fp_addNC128
- addNC 1
-proc mcl_fp_addNC192
- addNC 2
-proc mcl_fp_addNC256
- addNC 3
-proc mcl_fp_addNC320
- addNC 4
-proc mcl_fp_addNC384
- addNC 5
-proc mcl_fp_addNC448
- addNC 6
-proc mcl_fp_addNC512
- addNC 7
-proc mcl_fp_addNC576
- addNC 8
-proc mcl_fp_addNC640
- addNC 9
-proc mcl_fp_addNC704
- addNC 10
-proc mcl_fp_addNC768
- addNC 11
-proc mcl_fp_addNC832
- addNC 12
-proc mcl_fp_addNC896
- addNC 13
-proc mcl_fp_addNC960
- addNC 14
-proc mcl_fp_addNC1024
- addNC 15
-proc mcl_fp_addNC1088
- addNC 16
-proc mcl_fp_addNC1152
- addNC 17
-proc mcl_fp_addNC1216
- addNC 18
-proc mcl_fp_addNC1280
- addNC 19
-proc mcl_fp_addNC1344
- addNC 20
-proc mcl_fp_addNC1408
- addNC 21
-proc mcl_fp_addNC1472
- addNC 22
-proc mcl_fp_addNC1536
- addNC 23
+proc mcl_fp_addPre64
+ addPre 0
+proc mcl_fp_addPre128
+ addPre 1
+proc mcl_fp_addPre192
+ addPre 2
+proc mcl_fp_addPre256
+ addPre 3
+proc mcl_fp_addPre320
+ addPre 4
+proc mcl_fp_addPre384
+ addPre 5
+proc mcl_fp_addPre448
+ addPre 6
+proc mcl_fp_addPre512
+ addPre 7
+proc mcl_fp_addPre576
+ addPre 8
+proc mcl_fp_addPre640
+ addPre 9
+proc mcl_fp_addPre704
+ addPre 10
+proc mcl_fp_addPre768
+ addPre 11
+proc mcl_fp_addPre832
+ addPre 12
+proc mcl_fp_addPre896
+ addPre 13
+proc mcl_fp_addPre960
+ addPre 14
+proc mcl_fp_addPre1024
+ addPre 15
+proc mcl_fp_addPre1088
+ addPre 16
+proc mcl_fp_addPre1152
+ addPre 17
+proc mcl_fp_addPre1216
+ addPre 18
+proc mcl_fp_addPre1280
+ addPre 19
+proc mcl_fp_addPre1344
+ addPre 20
+proc mcl_fp_addPre1408
+ addPre 21
+proc mcl_fp_addPre1472
+ addPre 22
+proc mcl_fp_addPre1536
+ addPre 23
proc mcl_fp_subNC64
subNC 0
diff --git a/src/fp.cpp b/src/fp.cpp
index baa38f5..4891049 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -109,8 +109,8 @@ Mode StrToMode(const std::string& s)
#ifdef MCL_USE_LLVM
#define MCL_DEF_LLVM_FUNC(n) \
-template<>const u3u AddNC<n, Ltag>::f = &mcl_fp_addNC ## n ## L; \
-template<>const u3u SubNC<n, Ltag>::f = &mcl_fp_subNC ## n ## L; \
+template<>const u3u AddPre<n, Ltag>::f = &mcl_fp_addPre ## n ## L; \
+template<>const u3u SubPre<n, Ltag>::f = &mcl_fp_subPre ## n ## L; \
template<>const void3u MulPre<n, Ltag>::f = &mcl_fpDbl_mulPre ## n ## L; \
template<>const void2u SqrPre<n, Ltag>::f = &mcl_fpDbl_sqrPre ## n ## L; \
template<>const void2uI Mul_UnitPre<n, Ltag>::f = &mcl_fp_mul_UnitPre ## n ## L; \
@@ -178,7 +178,7 @@ static void fp_invMontOpC(Unit *y, const Unit *x, const Op& op)
}
/*
- large (N * 2) specification of AddNC, SubNC
+ large (N * 2) specification of AddPre, SubPre
*/
template<size_t N, bool enable>
struct SetFpDbl {
@@ -190,8 +190,8 @@ struct SetFpDbl<N, true> {
static inline void exec(Op& op)
{
if (!op.isFullBit) {
- op.fpDbl_addNC = AddNC<N * 2, Ltag>::f;
- op.fpDbl_subNC = SubNC<N * 2, Ltag>::f;
+ op.fpDbl_addPre = AddPre<N * 2, Ltag>::f;
+ op.fpDbl_subPre = SubPre<N * 2, Ltag>::f;
}
}
};
@@ -219,8 +219,8 @@ void setOpSub(Op& op)
op.fpDbl_add = DblAdd<N, Tag>::f;
op.fpDbl_sub = DblSub<N, Tag>::f;
if (!op.isFullBit) {
- op.fp_addNC = AddNC<N, Tag>::f;
- op.fp_subNC = SubNC<N, Tag>::f;
+ op.fp_addPre = AddPre<N, Tag>::f;
+ op.fp_subPre = SubPre<N, Tag>::f;
}
SetFpDbl<N, enableFpDbl>::exec(op);
}
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 8aba77b..489fe40 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -200,15 +200,15 @@ struct FpGenerator : Xbyak::CodeGenerator {
gen_fp_sub();
if (op.isFullBit) {
- op.fp_addNC = 0;
- op.fp_subNC = 0;
+ op.fp_addPre = 0;
+ op.fp_subPre = 0;
} else {
align(16);
- op.fp_addNC = getCurr<u3u>();
- gen_addSubNC(true, pn_);
+ op.fp_addPre = getCurr<u3u>();
+ gen_addSubPre(true, pn_);
align(16);
- op.fp_subNC = getCurr<u3u>();
- gen_addSubNC(false, pn_);
+ op.fp_subPre = getCurr<u3u>();
+ gen_addSubPre(false, pn_);
}
align(16);
shr1_ = getCurr<void2op>();
@@ -248,15 +248,15 @@ struct FpGenerator : Xbyak::CodeGenerator {
op.fpDbl_sub = getCurr<void4u>();
gen_fpDbl_sub();
if (op.isFullBit) {
- op.fpDbl_addNC = 0;
- op.fpDbl_subNC = 0;
+ op.fpDbl_addPre = 0;
+ op.fpDbl_subPre = 0;
} else {
align(16);
- op.fpDbl_addNC = getCurr<u3u>();
- gen_addSubNC(true, pn_ * 2);
+ op.fpDbl_addPre = getCurr<u3u>();
+ gen_addSubPre(true, pn_ * 2);
align(16);
- op.fpDbl_subNC = getCurr<u3u>();
- gen_addSubNC(false, pn_ * 2);
+ op.fpDbl_subPre = getCurr<u3u>();
+ gen_addSubPre(false, pn_ * 2);
}
if (op.N == 2 || op.N == 3 || op.N == 4) {
align(16);
@@ -274,7 +274,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
gen_fpDbl_sqrPre(op);
}
}
- void gen_addSubNC(bool isAdd, int n)
+ void gen_addSubPre(bool isAdd, int n)
{
StackFrame sf(this, 3);
if (isAdd) {
diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp
index 571ae37..799fe6f 100644
--- a/src/fp_proto.hpp
+++ b/src/fp_proto.hpp
@@ -35,7 +35,7 @@ void copyC(Unit *y, const Unit *x)
// (carry, z[N]) <- x[N] + y[N]
template<size_t N, class Tag = Gtag>
-struct AddNC {
+struct AddPre {
static inline Unit func(Unit *z, const Unit *x, const Unit *y)
{
return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
@@ -43,11 +43,11 @@ struct AddNC {
static const u3u f;
};
template<size_t N, class Tag>
-const u3u AddNC<N, Tag>::f = &AddNC<N, Tag>::func;
+const u3u AddPre<N, Tag>::f = &AddPre<N, Tag>::func;
// (carry, z[N]) <- x[N] - y[N]
template<size_t N, class Tag = Gtag>
-struct SubNC {
+struct SubPre {
static inline Unit func(Unit *z, const Unit *x, const Unit *y)
{
return mpn_sub_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
@@ -56,7 +56,7 @@ struct SubNC {
};
template<size_t N, class Tag>
-const u3u SubNC<N, Tag>::f = &SubNC<N, Tag>::func;
+const u3u SubPre<N, Tag>::f = &SubPre<N, Tag>::func;
// y[N] <- (-x[N]) % p[N]
template<size_t N, class Tag = Gtag>
@@ -67,7 +67,7 @@ struct Neg {
if (x != y) clearC<N>(y);
return;
}
- SubNC<N, Tag>::f(y, p, x);
+ SubPre<N, Tag>::f(y, p, x);
}
static const void3u f;
};
@@ -162,12 +162,12 @@ template<size_t N, class Tag = Gtag>
struct Add {
static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
- if (AddNC<N, Tag>::f(z, x, y)) {
- SubNC<N, Tag>::f(z, z, p);
+ if (AddPre<N, Tag>::f(z, x, y)) {
+ SubPre<N, Tag>::f(z, z, p);
return;
}
Unit tmp[N];
- if (SubNC<N, Tag>::f(tmp, z, p) == 0) {
+ if (SubPre<N, Tag>::f(tmp, z, p) == 0) {
memcpy(z, tmp, sizeof(tmp));
}
}
@@ -182,8 +182,8 @@ template<size_t N, class Tag = Gtag>
struct Sub {
static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
- if (SubNC<N, Tag>::f(z, x, y)) {
- AddNC<N, Tag>::f(z, z, p);
+ if (SubPre<N, Tag>::f(z, x, y)) {
+ AddPre<N, Tag>::f(z, z, p);
}
}
static const void4u f;
@@ -197,12 +197,12 @@ template<size_t N, class Tag = Gtag>
struct DblAdd {
static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
- if (AddNC<N * 2, Tag>::f(z, x, y)) {
- SubNC<N, Tag>::f(z + N, z + N, p);
+ if (AddPre<N * 2, Tag>::f(z, x, y)) {
+ SubPre<N, Tag>::f(z + N, z + N, p);
return;
}
Unit tmp[N];
- if (SubNC<N, Tag>::f(tmp, z + N, p) == 0) {
+ if (SubPre<N, Tag>::f(tmp, z + N, p) == 0) {
memcpy(z + N, tmp, sizeof(tmp));
}
}
@@ -217,8 +217,8 @@ template<size_t N, class Tag = Gtag>
struct DblSub {
static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
- if (SubNC<N * 2, Tag>::f(z, x, y)) {
- AddNC<N, Tag>::f(z + N, z + N, p);
+ if (SubPre<N * 2, Tag>::f(z, x, y)) {
+ AddPre<N, Tag>::f(z + N, z + N, p);
}
}
static const void4u f;
@@ -248,20 +248,20 @@ struct Mont {
Unit t[N + 2];
Mul_UnitPre<N, Tag>::f(t, p, q); // p * q
t[N + 1] = 0; // always zero
- c[N + 1] = AddNC<N + 1, Tag>::f(c, c, t);
+ c[N + 1] = AddPre<N + 1, Tag>::f(c, c, t);
c++;
for (size_t i = 1; i < N; i++) {
Mul_UnitPre<N, Tag>::f(t, x, y[i]);
- c[N + 1] = AddNC<N + 1, Tag>::f(c, c, t);
+ c[N + 1] = AddPre<N + 1, Tag>::f(c, c, t);
q = c[0] * rp;
Mul_UnitPre<N, Tag>::f(t, p, q);
- AddNC<N + 2, Tag>::f(c, c, t);
+ AddPre<N + 2, Tag>::f(c, c, t);
c++;
}
if (c[N]) {
- SubNC<N, Tag>::f(z, c, p);
+ SubPre<N, Tag>::f(z, c, p);
} else {
- if (SubNC<N, Tag>::f(z, c, p)) {
+ if (SubPre<N, Tag>::f(z, c, p)) {
memcpy(z, c, N * sizeof(Unit));
}
}
@@ -288,7 +288,7 @@ struct MontRed {
Unit *c = buf;
Unit q = xy[0] * rp;
Mul_UnitPre<N, Tag>::f(t, p, q);
- buf[N * 2] = AddNC<N * 2, Tag>::f(buf, xy, t);
+ buf[N * 2] = AddPre<N * 2, Tag>::f(buf, xy, t);
c++;
for (size_t i = 1; i < N; i++) {
q = c[0] * rp;
@@ -298,9 +298,9 @@ struct MontRed {
c++;
}
if (c[N]) {
- SubNC<N, Tag>::f(z, c, p);
+ SubPre<N, Tag>::f(z, c, p);
} else {
- if (SubNC<N, Tag>::f(z, c, p)) {
+ if (SubPre<N, Tag>::f(z, c, p)) {
memcpy(z, c, N * sizeof(Unit));
}
}
@@ -365,8 +365,8 @@ const void3u Sqr<N, Tag>::f = Sqr<N, Tag>::func;
#define MCL_FP_DEF_FUNC_SUB(n, suf) \
void mcl_fp_add ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_sub ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
-mcl::fp::Unit mcl_fp_addNC ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
-mcl::fp::Unit mcl_fp_subNC ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
+mcl::fp::Unit mcl_fp_addPre ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
+mcl::fp::Unit mcl_fp_subPre ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_mul_UnitPre ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, mcl::fp::Unit y); \
void mcl_fpDbl_mulPre ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fpDbl_sqrPre ## n ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
diff --git a/src/gen.cpp b/src/gen.cpp
index 54f66e9..8014dac 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -23,8 +23,8 @@ struct Code : public mcl::Generator {
Function makeNIST_P192;
Function mcl_fpDbl_mod_NIST_P192;
Function mcl_fp_sqr_NIST_P192;
- FunctionMap mcl_fp_addNCM;
- FunctionMap mcl_fp_subNCM;
+ FunctionMap mcl_fp_addPreM;
+ FunctionMap mcl_fp_subPreM;
FunctionMap mcl_fp_addM;
FunctionMap mcl_fp_subM;
FunctionMap mulPvM;
@@ -293,7 +293,7 @@ struct Code : public mcl::Generator {
t = trunc(t, unit);
return t;
}
- void gen_mcl_fp_addsubNC(bool isAdd)
+ void gen_mcl_fp_addsubPre(bool isAdd)
{
resetGlobalIdx();
Operand r(Int, unit);
@@ -302,15 +302,15 @@ struct Code : public mcl::Generator {
Operand py(IntPtr, bit);
std::string name;
if (isAdd) {
- name = "mcl_fp_addNC" + cybozu::itoa(N) + "L";
- mcl_fp_addNCM[N] = Function(name, r, pz, px, py);
- verifyAndSetPrivate(mcl_fp_addNCM[N]);
- beginFunc(mcl_fp_addNCM[N]);
+ name = "mcl_fp_addPre" + cybozu::itoa(N) + "L";
+ mcl_fp_addPreM[N] = Function(name, r, pz, px, py);
+ verifyAndSetPrivate(mcl_fp_addPreM[N]);
+ beginFunc(mcl_fp_addPreM[N]);
} else {
- name = "mcl_fp_subNC" + cybozu::itoa(N) + "L";
- mcl_fp_subNCM[N] = Function(name, r, pz, px, py);
- verifyAndSetPrivate(mcl_fp_subNCM[N]);
- beginFunc(mcl_fp_subNCM[N]);
+ name = "mcl_fp_subPre" + cybozu::itoa(N) + "L";
+ mcl_fp_subPreM[N] = Function(name, r, pz, px, py);
+ verifyAndSetPrivate(mcl_fp_subPreM[N]);
+ beginFunc(mcl_fp_subPreM[N]);
}
Operand x = zext(load(px), bit + unit);
Operand y = zext(load(py), bit + unit);
@@ -328,7 +328,7 @@ struct Code : public mcl::Generator {
endFunc();
}
#if 0 // void-return version
- void gen_mcl_fp_addsubNC(bool isAdd)
+ void gen_mcl_fp_addsubPre(bool isAdd)
{
resetGlobalIdx();
Operand pz(IntPtr, bit);
@@ -336,15 +336,15 @@ struct Code : public mcl::Generator {
Operand py(IntPtr, bit);
std::string name;
if (isAdd) {
- name = "mcl_fp_addNC" + cybozu::itoa(bit) + "L";
- mcl_fp_addNCM[bit] = Function(name, Void, pz, px, py);
- verifyAndSetPrivate(mcl_fp_addNCM[bit]);
- beginFunc(mcl_fp_addNCM[bit]);
+ name = "mcl_fp_addPre" + cybozu::itoa(bit) + "L";
+ mcl_fp_addPreM[bit] = Function(name, Void, pz, px, py);
+ verifyAndSetPrivate(mcl_fp_addPreM[bit]);
+ beginFunc(mcl_fp_addPreM[bit]);
} else {
- name = "mcl_fp_subNC" + cybozu::itoa(bit) + "L";
- mcl_fp_subNCM[bit] = Function(name, Void, pz, px, py);
- verifyAndSetPrivate(mcl_fp_subNCM[bit]);
- beginFunc(mcl_fp_subNCM[bit]);
+ name = "mcl_fp_subPre" + cybozu::itoa(bit) + "L";
+ mcl_fp_subPreM[bit] = Function(name, Void, pz, px, py);
+ verifyAndSetPrivate(mcl_fp_subPreM[bit]);
+ beginFunc(mcl_fp_subPreM[bit]);
}
Operand x = load(px);
Operand y = load(py);
@@ -762,8 +762,8 @@ struct Code : public mcl::Generator {
}
void gen_all()
{
- gen_mcl_fp_addsubNC(true);
- gen_mcl_fp_addsubNC(false);
+ gen_mcl_fp_addsubPre(true);
+ gen_mcl_fp_addsubPre(false);
}
void gen_addsub()
{
diff --git a/test/fp_test.cpp b/test/fp_test.cpp
index 67aeb69..cf6dad0 100644
--- a/test/fp_test.cpp
+++ b/test/fp_test.cpp
@@ -383,15 +383,15 @@ void opeTest()
}
if (!Fp::isFullBit()) {
Fp x(5), y(3), z;
- Fp::addNC(z, x, y);
+ Fp::addPre(z, x, y);
if (Fp::compareRaw(z, Fp::getP()) >= 0) {
- Fp::subNC(z, z, Fp::getP());
+ Fp::subPre(z, z, Fp::getP());
}
CYBOZU_TEST_EQUAL(z, Fp(8));
if (Fp::compareRaw(x, y) < 0) {
- Fp::addNC(x, x, Fp::getP());
+ Fp::addPre(x, x, Fp::getP());
}
- Fp::subNC(x, x, y);
+ Fp::subPre(x, x, y);
CYBOZU_TEST_EQUAL(x, Fp(2));
}
}
diff --git a/test/fp_tower_test.cpp b/test/fp_tower_test.cpp
index 6166c96..165f635 100644
--- a/test/fp_tower_test.cpp
+++ b/test/fp_tower_test.cpp
@@ -272,12 +272,12 @@ void testFpDbl()
z.getMpz(mz);
CYBOZU_TEST_EQUAL(mz, mo);
if (!Fp::isFullBit()) {
- FpDbl::addNC(z, x, y);
+ FpDbl::addPre(z, x, y);
mo = mx + my;
z.getMpz(mz);
CYBOZU_TEST_EQUAL(mz, mo);
if (mx >= my) {
- FpDbl::subNC(z, x, y);
+ FpDbl::subPre(z, x, y);
mo = mx - my;
z.getMpz(mz);
CYBOZU_TEST_EQUAL(mz, mo);
diff --git a/test/low_test.cpp b/test/low_test.cpp
index 2dbeae1..316aa9d 100644
--- a/test/low_test.cpp
+++ b/test/low_test.cpp
@@ -11,28 +11,6 @@ cybozu::XorShift rg;
extern "C" void add_test(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-template<size_t N>
-void addNC(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-
-template<size_t N>
-void subNC(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-
-#define DEF_FUNC(BIT) \
- template<> void addNC<BIT>(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y) { mcl_fp_addNC ## BIT(z, x, y); } \
- template<> void subNC<BIT>(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y) { mcl_fp_subNC ## BIT(z, x, y); }
-
-DEF_FUNC(64)
-DEF_FUNC(128)
-DEF_FUNC(192)
-DEF_FUNC(256)
-DEF_FUNC(320)
-DEF_FUNC(384)
-DEF_FUNC(448)
-DEF_FUNC(512)
-//DEF_FUNC(96)
-//DEF_FUNC(160)
-//DEF_FUNC(224)
-
template<size_t bit>
void bench()
{
@@ -44,33 +22,33 @@ void bench()
Unit w[N];
rg.read(x, N);
rg.read(y, N);
- low_addNC_G<N>(z, x, y);
- addNC<bit>(w, x, y);
+ AddPre<N, Gtag>::f(z, x, y);
+ AddPre<N, Ltag>::f(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
- low_subNC_G<N>(z, x, y);
- subNC<bit>(w, x, y);
+ SubPre<N, Gtag>::f(z, x, y);
+ SubPre<N, Ltag>::f(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
}
const std::string bitS = cybozu::itoa(bit);
std::string name;
- name = "add" + bitS; CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y);
- name = "sub" + bitS; CYBOZU_BENCH(name.c_str(), subNC<bit>, x, x, y);
+ name = "add" + bitS; CYBOZU_BENCH(name.c_str(), (AddPre<N, Ltag>::f), x, x, y);
+ name = "sub" + bitS; CYBOZU_BENCH(name.c_str(), (SubPre<N, Ltag>::f), x, x, y);
}
-CYBOZU_TEST_AUTO(addNC64) { bench<64>(); }
-CYBOZU_TEST_AUTO(addNC128) { bench<128>(); }
-CYBOZU_TEST_AUTO(addNC192) { bench<192>(); }
-CYBOZU_TEST_AUTO(addNC256) { bench<256>(); }
-CYBOZU_TEST_AUTO(addNC320) { bench<320>(); }
-CYBOZU_TEST_AUTO(addNC384) { bench<384>(); }
-CYBOZU_TEST_AUTO(addNC448) { bench<448>(); }
-CYBOZU_TEST_AUTO(addNC512) { bench<512>(); }
-//CYBOZU_TEST_AUTO(addNC96) { bench<96>(); }
-//CYBOZU_TEST_AUTO(addNC160) { bench<160>(); }
-//CYBOZU_TEST_AUTO(addNC224) { bench<224>(); }
+CYBOZU_TEST_AUTO(addPre64) { bench<64>(); }
+CYBOZU_TEST_AUTO(addPre128) { bench<128>(); }
+CYBOZU_TEST_AUTO(addPre192) { bench<192>(); }
+CYBOZU_TEST_AUTO(addPre256) { bench<256>(); }
+CYBOZU_TEST_AUTO(addPre320) { bench<320>(); }
+CYBOZU_TEST_AUTO(addPre384) { bench<384>(); }
+CYBOZU_TEST_AUTO(addPre448) { bench<448>(); }
+CYBOZU_TEST_AUTO(addPre512) { bench<512>(); }
+//CYBOZU_TEST_AUTO(addPre96) { bench<96>(); }
+//CYBOZU_TEST_AUTO(addPre160) { bench<160>(); }
+//CYBOZU_TEST_AUTO(addPre224) { bench<224>(); }
#if 0
-CYBOZU_TEST_AUTO(addNC)
+CYBOZU_TEST_AUTO(addPre)
{
using namespace mcl::fp;
const size_t bit = 128;
@@ -81,14 +59,14 @@ CYBOZU_TEST_AUTO(addNC)
Unit w[N];
rg.read(x, N);
rg.read(y, N);
- low_addNC_G<N>(z, x, y);
- addNC<bit>(w, x, y);
+ low_addPre_G<N>(z, x, y);
+ addPre<bit>(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
add_test(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
}
std::string name = "add" + cybozu::itoa(bit);
- CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y);
+ CYBOZU_BENCH(name.c_str(), addPre<bit>, x, x, y);
CYBOZU_BENCH("add", add_test, x, x, y);
}
#endif