aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2018-08-09 16:29:14 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2018-08-09 16:29:14 +0800
commitd957961c79bc6e05602ec9ae6317d824fc1c4e67 (patch)
treed3ee37afbe867b5948bf8c107e0ac019d4549a06
parentbd9f75d265c4ab79e90c133af15532312efb76ca (diff)
downloadtangerine-mcl-d957961c79bc6e05602ec9ae6317d824fc1c4e67.tar.gz
tangerine-mcl-d957961c79bc6e05602ec9ae6317d824fc1c4e67.tar.zst
tangerine-mcl-d957961c79bc6e05602ec9ae6317d824fc1c4e67.zip
fp2_sqr is ok
-rw-r--r--include/mcl/fp_tower.hpp17
-rw-r--r--src/fp_generator.hpp66
2 files changed, 37 insertions, 46 deletions
diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 27b2bfc..955fcfd 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -547,11 +547,6 @@ private:
*/
static void fp2_sqrW(Unit *y, const Unit *x)
{
-#if 0
- Unit xx[8], copyX[8];
- memcpy(copyX, x, sizeof(copyX));
- Fp::getOp().fp2_sqrA_(xx, x);
-#endif
const Fp *px = reinterpret_cast<const Fp*>(x);
Fp *py = reinterpret_cast<Fp*>(y);
const Fp& a = px[0];
@@ -575,18 +570,6 @@ private:
FpDbl::mod(py[0], d1);
FpDbl::mod(py[1], d2);
#endif
-#if 0
- for (int i = 0; i < 8; i++) {
- if (y[i] != xx[i]) {
- printf("ERR %d %016llx %016llx\n", i, (long long)y[i], (long long)xx[i]);
- printf("X\n");
- for (int j = 0; j < 8; j++) {
- printf("%016llx ", (long long)copyX[i]);
- }
- puts("");
- }
- }
-#endif
}
/*
xi = xi_a + i
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index e723e96..c570548 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -2900,48 +2900,56 @@ private:
const Ext1 t1(FpByte_, rsp, 2 * 8);
const Ext1 t2(FpByte_, rsp, t1.next);
const Ext1 t3(FpByte_, rsp, t2.next);
+ bool nocarry = (p_[pn_ - 1] >> 62) == 0;
StackFrame sf(this, 3, 10 | UseRDX, t3.next);
mov(ptr [y], gp0);
mov(ptr [x], gp1);
// t1 = b + b
lea(gp0, ptr [t1]);
- for (int i = 0; i < 4; i++) {
- mov(rax, ptr [gp1 + FpByte_ + i * 8]);
- if (i == 0) {
- add(rax, rax);
- } else {
- adc(rax, rax);
+ if (nocarry) {
+ for (int i = 0; i < 4; i++) {
+ mov(rax, ptr [gp1 + FpByte_ + i * 8]);
+ if (i == 0) {
+ add(rax, rax);
+ } else {
+ adc(rax, rax);
+ }
+ mov(ptr [gp0 + i * 8], rax);
}
- mov(ptr [gp0 + i * 8], rax);
+ } else {
+ gen_raw_fp_add(gp0, gp1 + FpByte_, gp1 + FpByte_, sf.t, false);
}
+ // t1 = 2ab
mov(gp1, gp0);
mov(gp2, ptr [x]);
call(fp_mulL_);
-#if 0
- mov(gp0, ptr [x]);
- gen_raw_fp_add(t2, gp0, gp0 + FpByte_, sf.t, false);
- gen_raw_fp_sub(t3, gp0, gp0 + FpByte_, sf.t, false);
-#else
- Pack a = sf.t.sub(0, 4);
- Pack b = sf.t.sub(4, 4);
- mov(gp0, ptr [x]);
- load_rm(a, gp0);
- load_rm(b, gp0 + FpByte_);
- for (int i = 0; i < 4; i++) {
- mov(rax, a[i]);
- if (i == 0) {
- add(rax, b[i]);
- } else {
- adc(rax, b[i]);
+ if (nocarry) {
+ Pack a = sf.t.sub(0, 4);
+ Pack b = sf.t.sub(4, 4);
+ mov(gp0, ptr [x]);
+ load_rm(a, gp0);
+ load_rm(b, gp0 + FpByte_);
+ // t2 = a + b
+ for (int i = 0; i < 4; i++) {
+ mov(rax, a[i]);
+ if (i == 0) {
+ add(rax, b[i]);
+ } else {
+ adc(rax, b[i]);
+ }
+ mov(ptr [(RegExp)t2 + i * 8], rax);
}
- mov(ptr [(RegExp)t2 + i * 8], rax);
+ // t3 = a + p - b
+ mov(gp1, (size_t)p_);
+ add_rm(a, gp1);
+ sub_rr(a, b);
+ store_mr(t3, a);
+ } else {
+ mov(gp0, ptr [x]);
+ gen_raw_fp_add(t2, gp0, gp0 + FpByte_, sf.t, false);
+ gen_raw_fp_sub(t3, gp0, gp0 + FpByte_, sf.t, false);
}
- mov(gp1, (size_t)p_);
- add_rm(a, gp1);
- sub_rr(a, b);
- store_mr(t3, a);
-#endif
mov(gp0, ptr [y]);
lea(gp1, ptr [t2]);