aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2019-03-06 15:47:23 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2019-03-06 15:47:23 +0800
commitda49e9158cc858d16f2277e39bd86c0c714ce30f (patch)
tree9daedfc33bd4a2ef9266c395a2d78bd9dd66fb55
parentfa696b564b0f7a0ae671b260e4159fea88d4bf5a (diff)
downloaddexon-mcl-da49e9158cc858d16f2277e39bd86c0c714ce30f.tar.gz
dexon-mcl-da49e9158cc858d16f2277e39bd86c0c714ce30f.tar.zst
dexon-mcl-da49e9158cc858d16f2277e39bd86c0c714ce30f.zip
optimize Vint::div for small denominator
-rw-r--r--include/mcl/vint.hpp54
-rw-r--r--test/vint_test.cpp64
2 files changed, 113 insertions, 5 deletions
diff --git a/include/mcl/vint.hpp b/include/mcl/vint.hpp
index c2a7a10..f9e1859 100644
--- a/include/mcl/vint.hpp
+++ b/include/mcl/vint.hpp
@@ -568,6 +568,7 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn)
yn = getRealSize(y, yn);
if (x == y) {
assert(xn == yn);
+ x_is_y:
clearN(r, rn);
if (q) {
q[0] = 1;
@@ -579,6 +580,7 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn)
/*
if y > x then q = 0 and r = x
*/
+ q_is_zero:
copyN(r, x, xn);
clearN(r + xn, rn - xn);
if (q) clearN(q, qn);
@@ -598,11 +600,61 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn)
clearN(r + 1, rn - 1);
return;
}
+ const size_t yTopBit = cybozu::bsr(y[yn - 1]);
assert(yn >= 2);
+ if (xn == yn) {
+ const size_t xTopBit = cybozu::bsr(x[xn - 1]);
+ if (xTopBit < yTopBit) goto q_is_zero;
+ if (yTopBit == xTopBit) {
+ int ret = compareNM(x, xn, y, yn);
+ if (ret == 0) goto x_is_y;
+ if (ret < 0) goto q_is_zero;
+ if (r) {
+ subN(r, x, y, yn);
+ }
+ if (q) {
+ q[0] = 1;
+ clearN(q + 1, qn - 1);
+ }
+ return;
+ }
+ assert(xTopBit > yTopBit);
+ // fast reduction for larger than fullbit-2 size p
+ if (yTopBit >= sizeof(T) * 8 - 3) {
+ T *xx = (T*)CYBOZU_ALLOCA(sizeof(T) * xn);
+ T qv = 0;
+ if (yTopBit == sizeof(T) * 8 - 2) {
+ copyN(xx, x, xn);
+ } else {
+ qv = x[xn - 1] >> (yTopBit + 1);
+ mulu1(xx, y, yn, qv);
+ subN(xx, x, xx, xn);
+ xn = getRealSize(xx, xn);
+ }
+ for (;;) {
+ T ret = subN(xx, xx, y, yn);
+ if (ret) {
+ addN(xx, xx, y, yn);
+ break;
+ }
+ qv++;
+ xn = getRealSize(xx, xn);
+ }
+ if (r) {
+ copyN(r, xx, xn);
+ clearN(r + xn, rn - xn);
+ }
+ if (q) {
+ q[0] = qv;
+ clearN(q + 1, qn - 1);
+ }
+ return;
+ }
+ }
/*
bitwise left shift x and y to adjust MSB of y[yn - 1] = 1
*/
- const size_t shift = sizeof(T) * 8 - 1 - cybozu::bsr(y[yn - 1]);
+ const size_t shift = sizeof(T) * 8 - 1 - yTopBit;
T *xx = (T*)CYBOZU_ALLOCA(sizeof(T) * (xn + 1));
const T *yy;
if (shift) {
diff --git a/test/vint_test.cpp b/test/vint_test.cpp
index 0eea8a9..ca4cdbf 100644
--- a/test/vint_test.cpp
+++ b/test/vint_test.cpp
@@ -551,14 +551,70 @@ CYBOZU_TEST_AUTO(quotRem)
"0xfffffffffffff0000000000000000000000000000000000000000000000000000000000000001",
"521481209941628322292632858916605385658190900090571826892867289394157573281830188869820088065",
},
+ {
+ "0x1230000000000000456",
+ "0x1230000000000000457",
+ "0x1230000000000000456",
+ },
+ {
+ "0x1230000000000000456",
+ "0x1230000000000000456",
+ "0",
+ },
+ {
+ "0x1230000000000000456",
+ "0x1230000000000000455",
+ "1",
+ },
+ {
+ "0x1230000000000000456",
+ "0x2000000000000000000",
+ "0x1230000000000000456",
+ },
+ {
+ "0xffffffffffffffffffffffffffffffff",
+ "0x80000000000000000000000000000000",
+ "0x7fffffffffffffffffffffffffffffff",
+ },
+ {
+ "0xffffffffffffffffffffffffffffffff",
+ "0x7fffffffffffffffffffffffffffffff",
+ "1",
+ },
+ {
+ "0xffffffffffffffffffffffffffffffff",
+ "0x70000000000000000000000000000000",
+ "0x1fffffffffffffffffffffffffffffff",
+ },
+ {
+ "0xffffffffffffffffffffffffffffffff",
+ "0x30000000000000000000000000000000",
+ "0x0fffffffffffffffffffffffffffffff",
+ },
+ {
+ "0xffffffffffffffffffffffffffffffff",
+ "0x10000000000000000000000000000000",
+ "0x0fffffffffffffffffffffffffffffff",
+ },
+ {
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+ "0x2523648240000001ba344d80000000086121000000000013a700000000000013",
+ "0x212ba4f27ffffff5a2c62effffffffcdb939ffffffffff8a15ffffffffffff8d",
+ },
+ {
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+ "0x2523648240000001ba344d8000000007ff9f800000000010a10000000000000d",
+ "0x212ba4f27ffffff5a2c62effffffffd00242ffffffffff9c39ffffffffffffb1",
+ },
};
- mcl::Vint x, y, r;
+ mcl::Vint x, y, q, r1, r2;
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
x.setStr(tbl[i].x);
y.setStr(tbl[i].y);
- r.setStr(tbl[i].r);
- x %= y;
- CYBOZU_TEST_EQUAL(x, r);
+ r1.setStr(tbl[i].r);
+ mcl::Vint::divMod(&q, r2, x, y);
+ CYBOZU_TEST_EQUAL(r1, r2);
+ CYBOZU_TEST_EQUAL(x, q * y + r2);
}
}