aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2018-11-05 16:14:35 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2018-11-05 16:14:35 +0800
commitf4b4382433d66d89ced6712ab4edb60957fef009 (patch)
treec34977ef75c9724c565fdb4a85e5d6fddd8dde58
parent7c3a15b0f8b0ba7f0f0a8a8d778feee46bbd1325 (diff)
downloaddexon-mcl-f4b4382433d66d89ced6712ab4edb60957fef009.tar.gz
dexon-mcl-f4b4382433d66d89ced6712ab4edb60957fef009.tar.zst
dexon-mcl-f4b4382433d66d89ced6712ab4edb60957fef009.zip
add Fp2::add for bls12
-rw-r--r--src/fp_generator.hpp36
-rw-r--r--test/bls12_test.cpp12
2 files changed, 41 insertions, 7 deletions
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 4628ff8..2dca191 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -324,17 +324,15 @@ private:
if (func) {
op.fp_sqrA_ = reinterpret_cast<void2u>(func);
}
- if (op.N > 4) return;
if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4
align(16);
op.fp_preInv = getCurr<int2u>();
gen_preInv();
}
+ op.fp2_addA_ = gen_fp2_add();
+
if (op.N == 4 && !isFullBit_) {
align(16);
- op.fp2_addA_ = getCurr<void3u>();
- gen_fp2_add4();
- align(16);
op.fp2_subA_ = getCurr<void3u>();
gen_fp2_sub4();
align(16);
@@ -3505,6 +3503,36 @@ private:
gen_raw_fp_add(sf.p[0], sf.p[1], sf.p[2], sf.t, false);
gen_raw_fp_add(sf.p[0] + FpByte_, sf.p[1] + FpByte_, sf.p[2] + FpByte_, sf.t, false);
}
+ void gen_fp2_add6()
+ {
+ assert(!isFullBit_);
+ StackFrame sf(this, 3, 10);
+ const Reg64& pz = sf.p[0];
+ const Reg64& px = sf.p[1];
+ const Reg64& py = sf.p[2];
+ Pack t1 = sf.t.sub(0, 6);
+ Pack t2 = sf.t.sub(6);
+ t2.append(rax);
+ t2.append(px); // destory after used
+ movq(xm0, px);
+ gen_raw_fp_add6(pz, px, py, 0, t1, t2, false);
+ movq(px, xm0);
+ gen_raw_fp_add6(pz, px, py, FpByte_, t1, t2, false);
+ }
+ void3u gen_fp2_add()
+ {
+ align(16);
+ void3u func = getCurr<void3u>();
+ if (pn_ == 4 && !isFullBit_) {
+ gen_fp2_add4();
+ return func;
+ }
+ if (pn_ == 6 && !isFullBit_) {
+ gen_fp2_add6();
+ return func;
+ }
+ return 0;
+ }
void gen_fp2_sub4()
{
assert(!isFullBit_);
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 42a013d..8722e76 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -687,6 +687,11 @@ int main(int argc, char *argv[])
}
FpDbl dx;
FpDbl::mulPre(dx, xv[0], yv[0]);
+ Fp2 x2, y2;
+ x2.a.setByCSPRNG(rg);
+ x2.b.setByCSPRNG(rg);
+ y2.a.setByCSPRNG(rg);
+ y2.b.setByCSPRNG(rg);
if(0){
puts("----------");
xv[0].dump();
@@ -695,12 +700,13 @@ if(0){
puts("----------");
// exit(1);
}
-// CYBOZU_BENCH_C("subDbl", 10000000, FpDbl::sub, dx, dx, dx);
+ CYBOZU_BENCH_C("Fp2::add", 10000000, Fp2::add, x2, x2, y2);
+ CYBOZU_BENCH_C("Fp2::sub", 10000000, Fp2::sub, x2, x2, y2);
// CYBOZU_BENCH_C("mulPre", 100000000, FpDbl::mulPre, dx, xv[0], yv[0]);
// CYBOZU_BENCH_C("sqrPre", 100000000, FpDbl::sqrPre, dx, xv[0]);
// CYBOZU_BENCH_C("mod ", 100000000, FpDbl::mod, xv[0], dx);
- CYBOZU_BENCH_C("mul ", 100000000, Fp::mul, xv[0], yv[0], xv[0]);
- CYBOZU_BENCH_C("sqr ", 100000000, Fp::sqr, xv[0], xv[0]);
+// CYBOZU_BENCH_C("mul ", 100000000, Fp::mul, xv[0], yv[0], xv[0]);
+// CYBOZU_BENCH_C("sqr ", 100000000, Fp::sqr, xv[0], xv[0]);
return 0;
#endif
return cybozu::test::autoRun.run(argc, argv);