aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2018-10-31 15:55:16 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2018-10-31 15:55:16 +0800
commit98a32e5c1610d795e700b73a5cccdd8ac5c0a42d (patch)
treef47e5e2731bc29ca7051d6e551555396cee93c98
parent8ac1d066e4c42d5bd8a9658c94a505204eae0ce8 (diff)
downloadtangerine-mcl-98a32e5c1610d795e700b73a5cccdd8ac5c0a42d.tar.gz
tangerine-mcl-98a32e5c1610d795e700b73a5cccdd8ac5c0a42d.tar.zst
tangerine-mcl-98a32e5c1610d795e700b73a5cccdd8ac5c0a42d.zip
mulPre6 + mont is a little faster
-rw-r--r--src/fp_generator.hpp35
-rw-r--r--test/bls12_test.cpp4
2 files changed, 25 insertions, 14 deletions
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 05ab384..446260d 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -315,11 +315,11 @@ private:
func = gen_fpDbl_sqrPre(op);
if (func) op.fpDbl_sqrPreA_ = reinterpret_cast<void2u>(func);
- if (op.N > 4) return;
- align(16);
- op.fp_mul = getCurr<void4u>(); // used in toMont/fromMont
- op.fp_mulA_ = getCurr<void3u>();
- gen_mul();
+ func = gen_mul();
+ if (func) {
+ op.fp_mul = reinterpret_cast<void4u>(func); // used in toMont/fromMont
+ op.fp_mulA_ = reinterpret_cast<void3u>(func);
+ }
if (op.N > 4) return;
align(16);
op.fp_sqrA_ = getCurr<void2u>();
@@ -817,19 +817,25 @@ private:
mov(ptr [pz + (pn_ - 1) * 8], *t0);
return func;
}
- void gen_mul()
+ const void* gen_mul()
{
+ align(16);
+ const void* func = getCurr<void*>();
if (op_->primeMode == PM_NIST_P192) {
StackFrame sf(this, 3, 10 | UseRDX, 8 * 6);
mulPre3(rsp, sf.p[1], sf.p[2], sf.t);
fpDbl_mod_NIST_P192(sf.p[0], rsp, sf.t);
+ return func;
}
if (pn_ == 3) {
gen_montMul3();
- } else if (pn_ == 4) {
+ return func;
+ }
+ if (pn_ == 4) {
gen_montMul4();
-#if 1
- } else if (pn_ == 6 && useAdx_) {
+ return func;
+ }
+ if (pn_ == 6 && useAdx_) {
// gen_montMul6(p_, rp_);
StackFrame sf(this, 3, 10 | UseRDX, (1 + 12) * 8);
mov(ptr[rsp + 12 * 8], gp0);
@@ -838,12 +844,15 @@ private:
mov(gp0, ptr[rsp + 12 * 8]);
mov(gp1, rsp);
call(fpDbl_modL);
-#endif
- } else if (pn_ <= 9) {
+ return func;
+ }
+#if 0
+ if (pn_ <= 9) {
gen_montMulN(p_, rp_, pn_);
- } else {
- throw cybozu::Exception("mcl:FpGenerator:gen_mul:not implemented for") << pn_;
+ return func;
}
+#endif
+ return 0;
}
/*
@input (z, xy)
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 5de000b..de29a14 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -696,9 +696,11 @@ if(0){
// exit(1);
}
// CYBOZU_BENCH_C("subDbl", 10000000, FpDbl::sub, dx, dx, dx);
- CYBOZU_BENCH_C("mul", 10000000 / n, f, xv, yv, xv);
CYBOZU_BENCH_C("mulPre", 100000000, FpDbl::mulPre, dx, xv[0], yv[0]);
CYBOZU_BENCH_C("sqrPre", 100000000, FpDbl::sqrPre, dx, xv[0]);
+ CYBOZU_BENCH_C("mod ", 100000000, FpDbl::mod, xv[0], dx);
+ CYBOZU_BENCH_C("mul ", 100000000, Fp::mul, xv[0], yv[0], xv[0]);
+ CYBOZU_BENCH_C("sqr ", 100000000, Fp::sqr, xv[0], xv[0]);
return 0;
#endif
return cybozu::test::autoRun.run(argc, argv);