diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2018-08-08 17:23:26 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2018-08-08 17:23:26 +0800 |
commit | bd9f75d265c4ab79e90c133af15532312efb76ca (patch) | |
tree | 0c193d59913d34eff8586258ab0dab65ca954af9 | |
parent | 42710833307dd9c863be16bddf3754c2ff92ecc9 (diff) | |
download | tangerine-mcl-bd9f75d265c4ab79e90c133af15532312efb76ca.tar.gz tangerine-mcl-bd9f75d265c4ab79e90c133af15532312efb76ca.tar.zst tangerine-mcl-bd9f75d265c4ab79e90c133af15532312efb76ca.zip |
fail fp_tower_test
-rw-r--r-- | include/mcl/fp_tower.hpp | 26 | ||||
-rw-r--r-- | src/fp_generator.hpp | 10 |
2 files changed, 30 insertions, 6 deletions
diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 89d7fa0..27b2bfc 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -397,14 +397,15 @@ public: op.fp2_mul = fp2_mulW; } } + if (op.fp2_sqrA_) { + op.fp2_sqr = op.fp2_sqrA_; + } else { + op.fp2_sqr = fp2_sqrW; + } op.fp2_neg = fp2_negW; op.fp2_inv = fp2_invW; - op.fp2_sqr = fp2_sqrW; if (xi_a == 1) { op.fp2_mul_xi = fp2_mul_xi_1_1i; - if (op.fp2_sqrA_) { - op.fp2_sqr = op.fp2_sqrA_; - } } else { op.fp2_mul_xi = fp2_mul_xiW; } @@ -546,6 +547,11 @@ private: */ static void fp2_sqrW(Unit *y, const Unit *x) { +#if 0 + Unit xx[8], copyX[8]; + memcpy(copyX, x, sizeof(copyX)); + Fp::getOp().fp2_sqrA_(xx, x); +#endif const Fp *px = reinterpret_cast<const Fp*>(x); Fp *py = reinterpret_cast<Fp*>(y); const Fp& a = px[0]; @@ -569,6 +575,18 @@ private: FpDbl::mod(py[0], d1); FpDbl::mod(py[1], d2); #endif +#if 0 + for (int i = 0; i < 8; i++) { + if (y[i] != xx[i]) { + printf("ERR %d %016llx %016llx\n", i, (long long)y[i], (long long)xx[i]); + printf("X\n"); + for (int j = 0; j < 8; j++) { + printf("%016llx ", (long long)copyX[i]); + } + puts(""); + } + } +#endif } /* xi = xi_a + i diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index 6024043..e723e96 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -376,8 +376,8 @@ struct Code : Xbyak::CodeGenerator { op.fp2_mul = getCurr<void3u>(); gen_fp2_mul(); align(16); -// op.fp2_sqrA_ = getCurr<void2u>(); -// gen_fp2_sqr(); + op.fp2_sqrA_ = getCurr<void2u>(); + gen_fp2_sqr(); } } void gen_addSubPre(bool isAdd, int n) @@ -2918,6 +2918,11 @@ private: mov(gp2, ptr [x]); call(fp_mulL_); +#if 0 + mov(gp0, ptr [x]); + gen_raw_fp_add(t2, gp0, gp0 + FpByte_, sf.t, false); + gen_raw_fp_sub(t3, gp0, gp0 + FpByte_, sf.t, false); +#else Pack a = sf.t.sub(0, 4); Pack b = sf.t.sub(4, 4); mov(gp0, ptr [x]); @@ -2936,6 +2941,7 @@ private: add_rm(a, gp1); sub_rr(a, b); store_mr(t3, a); +#endif mov(gp0, ptr [y]); lea(gp1, ptr [t2]); |