aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-03-28 11:52:36 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-03-28 11:52:36 +0800
commitdf18baf717d23ea42dd2fa62b60165f78cba3977 (patch)
tree7ad477d0bfe0895c62c98b9943435a2bcbf65331
parente0ce6909f8bcf4ef6f546311ad346e64cc07bc7b (diff)
downloaddexon-mcl-df18baf717d23ea42dd2fa62b60165f78cba3977.tar.gz
dexon-mcl-df18baf717d23ea42dd2fa62b60165f78cba3977.tar.zst
dexon-mcl-df18baf717d23ea42dd2fa62b60165f78cba3977.zip
mod_NIST_P521 by llvm
-rw-r--r--include/mcl/fp.hpp4
-rw-r--r--include/mcl/op.hpp3
-rw-r--r--sample/bench.cpp2
-rw-r--r--src/fp.cpp11
-rw-r--r--src/fp_proto.hpp2
-rw-r--r--src/gen.py5
-rw-r--r--src/once.txt47
-rw-r--r--test/base_test.cpp6
-rw-r--r--test/fp_test.cpp33
9 files changed, 86 insertions, 27 deletions
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 05983c7..4dbf8f0 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -492,13 +492,13 @@ private:
{
Unit xy[maxSize * 2];
op_.fpDbl_mulPre(xy, x, y);
- fpDbl_modW(z, xy);
+ op_.fpDbl_mod(z, xy);
}
static inline void fp_sqrW(Unit *y, const Unit *x)
{
Unit xx[maxSize * 2];
op_.fpDbl_sqrPre(xx, x);
- fpDbl_modW(y, xx);
+ op_.fpDbl_mod(y, xx);
}
static inline void fp_negW(Unit *y, const Unit *x)
{
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index a1aa4ad..ccfffff 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -53,7 +53,8 @@ enum Mode {
enum PrimeMode {
PM_GENERIC = 0,
- PM_NICT_P192
+ PM_NICT_P192,
+ PM_NICT_P521,
};
static inline const char *ModeToStr(Mode mode)
diff --git a/sample/bench.cpp b/sample/bench.cpp
index a1bc151..636256e 100644
--- a/sample/bench.cpp
+++ b/sample/bench.cpp
@@ -12,6 +12,7 @@ typedef mcl::EcT<Fp> Ec;
const char *getModeStr(mcl::fp::Mode mode)
{
switch (mode) {
+ case mcl::fp::FP_AUTO: return "auto";
case mcl::fp::FP_GMP: return "gmp";
case mcl::fp::FP_LLVM: return "llvm";
case mcl::fp::FP_LLVM_MONT: return "llvm+mont";
@@ -122,6 +123,7 @@ void benchEc(size_t bitSize, int mode, mcl::ec::Mode ecMode)
};
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
if (bitSize != 0 && tbl[i].bitSize != bitSize) continue;
+ benchEcSub(tbl[i], mcl::fp::FP_AUTO, ecMode);
if (mode & (1 << 0)) benchEcSub(tbl[i], mcl::fp::FP_GMP, ecMode);
#ifdef MCL_USE_LLVM
if (mode & (1 << 1)) benchEcSub(tbl[i], mcl::fp::FP_LLVM, ecMode);
diff --git a/src/fp.cpp b/src/fp.cpp
index cf25956..496cfa8 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -415,8 +415,14 @@ void Op::init(const std::string& mstr, int base, size_t maxBitSize, Mode mode)
if ((mode == FP_AUTO || mode == FP_LLVM || mode == FP_XBYAK)
&& mp == mpz_class("0xfffffffffffffffffffffffffffffffeffffffffffffffff")) {
primeMode = PM_NICT_P192;
+ isMont = false;
+ isFastMod = true;
}
- if (primeMode == PM_NICT_P192) {
+#endif
+#if defined(MCL_USE_LLVM)
+ if ((mode == FP_AUTO || mode == FP_LLVM)
+ && mp == mpz_class("0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")) {
+ primeMode = PM_NICT_P521;
isMont = false;
isFastMod = true;
}
@@ -452,6 +458,9 @@ void Op::init(const std::string& mstr, int base, size_t maxBitSize, Mode mode)
fp_sqr = &mcl_fp_sqr_NIST_P192;
fpDbl_mod = &mcl_fpDbl_mod_NIST_P192;
}
+ if (primeMode == PM_NICT_P521) {
+ fpDbl_mod = &mcl_fpDbl_mod_NIST_P521;
+ }
#endif
fp::initForMont(*this, p, mode);
sq.set(mp);
diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp
index e5cba74..4e766b6 100644
--- a/src/fp_proto.hpp
+++ b/src/fp_proto.hpp
@@ -60,6 +60,8 @@ void mcl_fpDbl_mod_NIST_P192(mcl::fp::Unit*, const mcl::fp::Unit*);
void mcl_fp_mul_NIST_P192(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*);
void mcl_fp_sqr_NIST_P192(mcl::fp::Unit*, const mcl::fp::Unit*);
+void mcl_fpDbl_mod_NIST_P521(mcl::fp::Unit*, const mcl::fp::Unit*);
+
}
#endif
diff --git a/src/gen.py b/src/gen.py
index acdd1ab..b91fc0c 100644
--- a/src/gen.py
+++ b/src/gen.py
@@ -42,7 +42,6 @@ def parseFor(s, envG):
v
@define i = 0
<exp>
- exp
@define i = 1
<exp>
@define i = 2
@@ -68,8 +67,8 @@ def parseFor(s, envG):
p = RE_FOR.search(stripped)
if p:
v = p.group(1).strip()
- b = eval(p.group(2), envG)
- e = eval(p.group(3), envG)
+ b = eval(p.group(2), envG, envL)
+ e = eval(p.group(3), envG, envL)
sub = ""
inFor = True
else:
diff --git a/src/once.txt b/src/once.txt
index 825733a..947d14a 100644
--- a/src/once.txt
+++ b/src/once.txt
@@ -109,21 +109,36 @@ define void @mcl_fp_mul_NIST_P192(i192* %pz, i192* %px, i192* %py) {
; p = (1 << 521) - 1
; x = [H:L]
; x % p = (L + H) % p
-@if unit == 64
-@define bit = 576
-@else
-@define bit = 544
-@endif
-@define b2 = bit * 2
-@define bu = bit + unit
-define void @mcl_fpDbl_mod_NIST_P521(i$(bit)* %py, i$(bit)* %px) {
- %L0 = load i$(bit)* %px
- %px1 = getelementptr i$(bit)* %px, i32 1
- %H0 = load i$(bit)* %px1
- %L = zext i$(bit) %L0 to i$(bu)
- %H = zext i$(bit) %H0 to i$(bu)
- %t = add i$(bu) %L, %H
- %t1 = lshr i$(bu) %t, $(bu-1)
- %t2 = add i$(bu) %t, %t1
+@define len = 521
+@define n = len / unit
+@define round = unit * (n + 1)
+@define round2 = unit * (n * 2 + 1)
+@define rem = len - n * unit
+@define mask = ((1 << unit) - (1 << rem))
+define void @mcl_fpDbl_mod_NIST_P521(i$(round)* %py, i$(round2)* %px) {
+ %x = load i$(round2)* %px
+ %L0 = trunc i$(round2) %x to i$(len)
+ %L = zext i$(len) %L0 to i$(round)
+ %H0 = lshr i$(round2) %x, $(len)
+ %H = trunc i$(round2) %H0 to i$(round) ; x = [H:L]
+ %t = add i$(round) %L, %H ; t = L + H
+ %t0 = lshr i$(round) %t, $(len)
+ %t1 = and i$(round) %t0, 1
+ %t2 = add i$(round) %t, %t1
+ %t3 = trunc i$(round) %t2 to i$(len)
+ %z0 = zext i$(len) %t3 to i$(round)
+ %z1 = call i$(unit) @extract$(round)(i$(round) %z0, i$(round) $(n * unit))
+ %m0 = or i$(unit) %z1, $(mask)
+@for i, 0, n
+ %s$(i) = call i$(unit) @extract$(round)(i$(round) %z0, i$(round) $(unit*i))
+ %m$(i+1) = and i$(unit) %m$(i), %s$(i)
+@endfor
+ %c = icmp eq i$(unit) %m$(n), -1
+ br i1 %c, label %zero, label %nonzero
+zero:
+ store i$(round) 0, i$(round)* %py
+ ret void
+nonzero:
+ store i$(round) %z0, i$(round)* %py
ret void
}
diff --git a/test/base_test.cpp b/test/base_test.cpp
index 7167763..0110f92 100644
--- a/test/base_test.cpp
+++ b/test/base_test.cpp
@@ -1,14 +1,14 @@
+// not compiled
#include <map>
-#define MCL_USE_LLVM
#include <mcl/op.hpp>
#include <cybozu/test.hpp>
#include <cybozu/benchmark.hpp>
#include <cybozu/xorshift.hpp>
#include <cybozu/bit_operation.hpp>
-#include "conversion.hpp"
+#include "../src/conversion.hpp"
#include <mcl/fp.hpp>
-#include "fp_generator.hpp"
+#include "../src/fp_generator.hpp"
#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && (CYBOZU_OS_BIT == 64)
#define USE_XBYAK
static mcl::FpGenerator fg;
diff --git a/test/fp_test.cpp b/test/fp_test.cpp
index 3a1240c..c2c23a4 100644
--- a/test/fp_test.cpp
+++ b/test/fp_test.cpp
@@ -2,6 +2,7 @@
#include <cybozu/test.hpp>
#include <mcl/fp.hpp>
#include <cybozu/benchmark.hpp>
+#include "../src/fp_proto.hpp"
#include <time.h>
#ifdef _MSC_VER
@@ -484,4 +485,34 @@ CYBOZU_TEST_AUTO(getStr)
}
}
-
+#ifdef MCL_USE_LLVM
+CYBOZU_TEST_AUTO(mod_NIST_P521)
+{
+ const size_t len = 521;
+ const size_t N = len / mcl::fp::UnitBitSize;
+ const char *tbl[] = {
+ "0",
+ "0xffffffff",
+ "0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0",
+ "0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe",
+ "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+ "0x20000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+ "0x20000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001",
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff00111423424",
+ "0x11111111111111112222222222222222333333333333333344444444444444445555555555555555666666666666666677777777777777778888888888888888aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbccccccccccccccccddddddddddddddddeeeeeeeeeeeeeeeeffffffffffffffff1234712341234123412341234123412341234",
+ "0x3ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+ };
+ const mpz_class p("0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff");
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ mpz_class x(tbl[i]);
+ mcl::fp::Unit in[N * 2 + 1];
+ mcl::fp::Unit my[N + 1];
+ mcl::Gmp::getArray(in, N * 2 + 1, x);
+ mcl_fpDbl_mod_NIST_P521(my, in);
+ mpz_class y = x % p;
+ mcl::fp::Unit ok[N + 1];
+ mcl::Gmp::getArray(ok, N + 1, y);
+ CYBOZU_TEST_ASSERT(memcmp(my, ok, sizeof(my)) == 0);
+ }
+}
+#endif