aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2015-05-05 09:52:44 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2015-05-05 09:52:44 +0800
commit14fd1d125d21f47c2539a0e820e2a417e0c715f8 (patch)
tree4fccb395f51a3f9c1202a783665ce396b649eeb9
downloadtangerine-mcl-14fd1d125d21f47c2539a0e820e2a417e0c715f8.tar.gz
tangerine-mcl-14fd1d125d21f47c2539a0e820e2a417e0c715f8.tar.zst
tangerine-mcl-14fd1d125d21f47c2539a0e820e2a417e0c715f8.zip
from mie
-rw-r--r--COPYRIGHT47
-rw-r--r--Makefile20
-rw-r--r--common.mk105
-rw-r--r--common.props26
-rw-r--r--debug.props14
-rw-r--r--include/mcl/ec.hpp585
-rw-r--r--include/mcl/ecparam.hpp161
-rw-r--r--include/mcl/fp.hpp446
-rw-r--r--include/mcl/fp_base.hpp527
-rw-r--r--include/mcl/fp_generator.hpp1675
-rw-r--r--include/mcl/fp_util.hpp294
-rw-r--r--include/mcl/gmp_util.hpp378
-rw-r--r--include/mcl/mont_fp.hpp463
-rw-r--r--include/mcl/operator.hpp118
-rw-r--r--include/mcl/power.hpp181
-rw-r--r--include/mcl/tagmultigr.hpp39
-rw-r--r--mcl.sln25
-rw-r--r--release.props12
-rw-r--r--sample/Makefile23
-rw-r--r--sample/ecdh_smpl.cpp69
-rw-r--r--sample/random_smpl.cpp29
-rw-r--r--src/Makefile42
-rw-r--r--src/all.txt7
-rw-r--r--src/gen.py187
-rw-r--r--src/long.txt54
-rw-r--r--src/mul.txt81
-rw-r--r--src/once.txt74
-rw-r--r--src/short.txt46
-rw-r--r--test/Makefile42
-rw-r--r--test/base_test.cpp392
-rw-r--r--test/ec_test.cpp397
-rw-r--r--test/fp_generator_test.cpp222
-rw-r--r--test/fp_test.cpp465
-rw-r--r--test/fp_util_test.cpp191
-rw-r--r--test/mk32.sh1
-rw-r--r--test/mont_fp_test.cpp809
-rw-r--r--test/proj/ec_test/ec_test.vcxproj88
-rw-r--r--test/proj/fp_test/fp_test.vcxproj91
-rw-r--r--test/sq_test.cpp21
39 files changed, 8447 insertions, 0 deletions
diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644
index 0000000..bfe54da
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,47 @@
+
+Copyright (c) 2015 MITSUNARI Shigeo
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+Neither the name of the copyright owner nor the names of its contributors may
+be used to endorse or promote products derived from this software without
+specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+THE POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
+す場合に限り、再頒布および使用が許可されます。
+
+ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
+を含めること。
+バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
+権表示、本条件一覧、および下記免責条項を含めること。
+書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
+に、著作権者の名前またはコントリビューターの名前を使用してはならない。
+本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
+れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
+に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
+著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
+問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
+あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
+本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
+喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
+損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
+一切責任を負わないものとします。
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..383af2b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,20 @@
+include common.mk
+
+all:
+ $(MKDIR) bin
+ $(MAKE) -C test
+ $(MAKE) -C sample
+
+test:
+ $(MAKE) -C test test
+
+sample:
+ $(MAKE) -C sample test
+
+clean:
+# $(MAKE) -C src clean
+ $(MAKE) -C test clean
+ $(MAKE) -C sample clean
+
+.PHONY: sample
+
diff --git a/common.mk b/common.mk
new file mode 100644
index 0000000..b861db1
--- /dev/null
+++ b/common.mk
@@ -0,0 +1,105 @@
+GCC_VER=$(shell $(PRE)$(CC) -dumpversion)
+UNAME_S=$(shell uname -s)
+ifeq ($(UNAME_S),Linux)
+ OS=Linux
+endif
+ifneq ($(UNAME_S),Darwin)
+ LDFLAGS += -lrt
+endif
+CP = cp -f
+AR = ar r
+MKDIR=mkdir -p
+RM=rm -fr
+CFLAGS_OPT+=-fomit-frame-pointer -DNDEBUG
+ifeq ($(CXX),clang++)
+ CFLAGS_OPT+=-O3
+else
+ ifeq ($(shell expr $(GCC_VER) \> 4.6.0),1)
+ CFLAGS_OPT+=-Ofast
+ else
+ CFLAGS_OPT+=-O3
+ endif
+endif
+CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
+CFLAGS+= -g -D_FILE_OFFSET_BITS=64
+CFLAGS+=$(CFLAGS_WARN)
+BIT?=64
+ifeq ($(BIT),32)
+ CPU?=x86
+else
+ ifeq ($(BIT),64)
+ CPU?=x64
+ endif
+endif
+ifeq ($(BIT),0)
+ BIT_OPT=
+else
+ BIT_OPT=-m$(BIT)
+endif
+ifeq ($(MARCH),)
+ifeq ($(shell expr $(GCC_VER) \> 4.2.1),1)
+ CFLAGS+=-march=native
+endif
+else
+ CFLAGS+=$(MARCH)
+endif
+
+DEBUG=1
+ifeq ($(RELEASE),1)
+ DEBUG=0
+endif
+
+ifeq ($(DEBUG),0)
+ CFLAGS+=$(CFLAGS_OPT)
+ OBJDIR=release
+ OBJSUF=
+else
+ ifeq ($(OS),Linux)
+ LDFLAGS+=-rdynamic
+ endif
+ OBJDIR=debug
+ OBJSUF=d
+endif
+
+####################################################
+
+LDFLAGS += -lpthread -m$(BIT) -lgmp -lgmpxx
+
+####################################################
+
+TOPDIR:=$(realpath $(dir $(lastword $(MAKEFILE_LIST))))/
+EXTDIR:=$(TOPDIR)../cybozulib_ext/
+CFLAGS+= -I$(TOPDIR)include -I$(TOPDIR)../cybozulib/include/ -I$(TOPDIR)../xbyak/ $(BIT_OPT) $(INC_DIR)
+LDFLAGS+= -L$(TOPDIR)lib $(BIT_OPT) -Wl,-rpath,'$$ORIGIN/../lib' $(LD_DIR)
+
+MKDEP = sh -ec '$(PRE)$(CC) -MM $(CFLAGS) $< | sed "s@\($*\)\.o[ :]*@$(OBJDIR)/\1.o $@ : @g" > $@; [ -s $@ ] || rm -f $@; touch $@'
+
+CLEAN=$(RM) $(TARGET) $(OBJDIR)
+
+define UNIT_TEST
+sh -ec 'for i in $(TARGET); do $$i|grep "ctest:name"; done' > result.txt
+grep -v "ng=0, exception=0" result.txt || echo "all unit tests are ok"
+endef
+
+define SAMPLE_TEST
+sh -ec 'for i in $(TARGET); do $$i; done'
+endef
+
+.SUFFIXES: .cpp .d .exe
+
+$(OBJDIR)/%.o: %.cpp
+ $(PRE)$(CXX) -c $< -o $@ $(CFLAGS)
+
+$(OBJDIR)/%.d: %.cpp $(OBJDIR)
+ @$(MKDEP)
+
+$(TOPDIR)bin/%$(OBJSUF).exe: $(OBJDIR)/%.o $(LIBS)
+ $(PRE)$(CXX) $< -o $@ $(LIBS) $(LDFLAGS)
+
+OBJS=$(addprefix $(OBJDIR)/,$(SRC:.cpp=.o))
+
+DEPEND_FILE=$(addprefix $(OBJDIR)/, $(SRC:.cpp=.d))
+TEST_FILE=$(addprefix $(TOPDIR)bin/, $(SRC:.cpp=$(OBJSUF).exe))
+
+.PHONY: test
+
diff --git a/common.props b/common.props
new file mode 100644
index 0000000..8ec3f67
--- /dev/null
+++ b/common.props
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ImportGroup Label="PropertySheets" />
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <OutDir>$(SolutionDir)bin\</OutDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup>
+ <ClCompile>
+ <AdditionalIncludeDirectories>$(SolutionDir)../cybozulib/include;$(SolutionDir)../cybozulib_ext/mpir/include;$(SolutionDir)include</AdditionalIncludeDirectories>
+ </ClCompile>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup>
+ <ClCompile>
+ <WarningLevel>Level4</WarningLevel>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <PrecompiledHeaderFile />
+ <PrecompiledHeaderOutputFile />
+ <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NOMINMAX</PreprocessorDefinitions>
+ </ClCompile>
+ <Link>
+ <AdditionalLibraryDirectories>$(SolutionDir)../cybozulib_ext/mpir/lib</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup />
+</Project> \ No newline at end of file
diff --git a/debug.props b/debug.props
new file mode 100644
index 0000000..d261c8d
--- /dev/null
+++ b/debug.props
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ImportGroup Label="PropertySheets" />
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <TargetName>$(ProjectName)d</TargetName>
+ </PropertyGroup>
+ <ItemDefinitionGroup>
+ <ClCompile>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ </ClCompile>
+ </ItemDefinitionGroup>
+ <ItemGroup />
+</Project> \ No newline at end of file
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
new file mode 100644
index 0000000..8b70b70
--- /dev/null
+++ b/include/mcl/ec.hpp
@@ -0,0 +1,585 @@
+#pragma once
+/**
+ @file
+ @brief elliptic curve
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+*/
+#include <sstream>
+#include <cybozu/exception.hpp>
+#include <cybozu/bitvector.hpp>
+#include <mcl/operator.hpp>
+#include <mcl/power.hpp>
+#include <mcl/gmp_util.hpp>
+
+namespace mcl {
+
+#define MCL_EC_USE_AFFINE 0
+#define MCL_EC_USE_PROJ 1
+#define MCL_EC_USE_JACOBI 2
+
+//#define MCL_EC_COORD MCL_EC_USE_JACOBI
+//#define MCL_EC_COORD MCL_EC_USE_PROJ
+#ifndef MCL_EC_COORD
+ #define MCL_EC_COORD MCL_EC_USE_PROJ
+#endif
+/*
+ elliptic curve
+ y^2 = x^3 + ax + b (affine)
+ y^2 = x^3 + az^4 + bz^6 (Jacobi) x = X/Z^2, y = Y/Z^3
+*/
+template<class _Fp>
+class EcT : public ope::addsub<EcT<_Fp>,
+ ope::comparable<EcT<_Fp>,
+ ope::hasNegative<EcT<_Fp> > > > {
+ enum {
+ zero,
+ minus3,
+ generic
+ };
+public:
+ typedef _Fp Fp;
+ typedef typename Fp::BlockType BlockType;
+#if MCL_EC_COORD == MCL_EC_USE_AFFINE
+ Fp x, y;
+ bool inf_;
+#else
+ mutable Fp x, y, z;
+#endif
+ static Fp a_;
+ static Fp b_;
+ static int specialA_;
+ static bool compressedExpression_;
+#if MCL_EC_COORD == MCL_EC_USE_AFFINE
+ EcT() : inf_(true) {}
+#else
+ EcT() { z.clear(); }
+#endif
+ EcT(const Fp& _x, const Fp& _y)
+ {
+ set(_x, _y);
+ }
+ void normalize() const
+ {
+#if MCL_EC_COORD == MCL_EC_USE_JACOBI
+ if (isZero() || z == 1) return;
+ Fp rz, rz2;
+ Fp::inv(rz, z);
+ rz2 = rz * rz;
+ x *= rz2;
+ y *= rz2 * rz;
+ z = 1;
+#elif MCL_EC_COORD == MCL_EC_USE_PROJ
+ if (isZero() || z == 1) return;
+ Fp rz;
+ Fp::inv(rz, z);
+ x *= rz;
+ y *= rz;
+ z = 1;
+#endif
+ }
+
+ static inline void setParam(const std::string& astr, const std::string& bstr)
+ {
+ a_.fromStr(astr);
+ b_.fromStr(bstr);
+ if (a_.isZero()) {
+ specialA_ = zero;
+ } else if (a_ == -3) {
+ specialA_ = minus3;
+ } else {
+ specialA_ = generic;
+ }
+ }
+ static inline bool isValid(const Fp& _x, const Fp& _y)
+ {
+ return _y * _y == (_x * _x + a_) * _x + b_;
+ }
+ void set(const Fp& _x, const Fp& _y, bool verify = true)
+ {
+ if (verify && !isValid(_x, _y)) throw cybozu::Exception("ec:EcT:set") << _x << _y;
+ x = _x; y = _y;
+#if MCL_EC_COORD == MCL_EC_USE_AFFINE
+ inf_ = false;
+#else
+ z = 1;
+#endif
+ }
+ void clear()
+ {
+#if MCL_EC_COORD == MCL_EC_USE_AFFINE
+ inf_ = true;
+#else
+ z = 0;
+#endif
+ x.clear();
+ y.clear();
+ }
+
+ static inline void dbl(EcT& R, const EcT& P, bool verifyInf = true)
+ {
+ if (verifyInf) {
+ if (P.isZero()) {
+ R.clear(); return;
+ }
+ }
+#if MCL_EC_COORD == MCL_EC_USE_JACOBI
+ Fp S, M, t, y2;
+ Fp::square(y2, P.y);
+ Fp::mul(S, P.x, y2);
+ S += S;
+ S += S;
+ Fp::square(M, P.x);
+ switch (specialA_) {
+ case zero:
+ Fp::add(t, M, M);
+ M += t;
+ break;
+ case minus3:
+ Fp::square(t, P.z);
+ Fp::square(t, t);
+ M -= t;
+ Fp::add(t, M, M);
+ M += t;
+ break;
+ case generic:
+ default:
+ Fp::square(t, P.z);
+ Fp::square(t, t);
+ t *= a_;
+ t += M;
+ M += M;
+ M += t;
+ break;
+ }
+ Fp::square(R.x, M);
+ R.x -= S;
+ R.x -= S;
+ Fp::mul(R.z, P.y, P.z);
+ R.z += R.z;
+ Fp::square(y2, y2);
+ y2 += y2;
+ y2 += y2;
+ y2 += y2;
+ Fp::sub(R.y, S, R.x);
+ R.y *= M;
+ R.y -= y2;
+#elif MCL_EC_COORD == MCL_EC_USE_PROJ
+ Fp w, t, h;
+ switch (specialA_) {
+ case zero:
+ Fp::square(w, P.x);
+ Fp::add(t, w, w);
+ w += t;
+ break;
+ case minus3:
+ Fp::square(w, P.x);
+ Fp::square(t, P.z);
+ w -= t;
+ Fp::add(t, w, w);
+ w += t;
+ break;
+ case generic:
+ default:
+ Fp::square(w, P.z);
+ w *= a_;
+ Fp::square(t, P.x);
+ w += t;
+ w += t;
+ w += t; // w = a z^2 + 3x^2
+ break;
+ }
+ Fp::mul(R.z, P.y, P.z); // s = yz
+ Fp::mul(t, R.z, P.x);
+ t *= P.y; // xys
+ t += t;
+ t += t; // 4(xys) ; 4B
+ Fp::square(h, w);
+ h -= t;
+ h -= t; // w^2 - 8B
+ Fp::mul(R.x, h, R.z);
+ t -= h; // h is free
+ t *= w;
+ Fp::square(w, P.y);
+ R.x += R.x;
+ R.z += R.z;
+ Fp::square(h, R.z);
+ w *= h;
+ R.z *= h;
+ Fp::sub(R.y, t, w);
+ R.y -= w;
+#else
+ Fp t, s;
+ Fp::square(t, P.x);
+ Fp::add(s, t, t);
+ t += s;
+ t += a_;
+ Fp::add(s, P.y, P.y);
+ t /= s;
+ Fp::square(s, t);
+ s -= P.x;
+ Fp x3;
+ Fp::sub(x3, s, P.x);
+ Fp::sub(s, P.x, x3);
+ s *= t;
+ Fp::sub(R.y, s, P.y);
+ R.x = x3;
+ R.inf_ = false;
+#endif
+ }
+ static inline void add(EcT& R, const EcT& P, const EcT& Q)
+ {
+ if (P.isZero()) { R = Q; return; }
+ if (Q.isZero()) { R = P; return; }
+#if MCL_EC_COORD == MCL_EC_USE_JACOBI
+ Fp r, U1, S1, H, H3;
+ Fp::square(r, P.z);
+ Fp::square(S1, Q.z);
+ Fp::mul(U1, P.x, S1);
+ Fp::mul(H, Q.x, r);
+ H -= U1;
+ r *= P.z;
+ S1 *= Q.z;
+ S1 *= P.y;
+ Fp::mul(r, Q.y, r);
+ r -= S1;
+ if (H.isZero()) {
+ if (r.isZero()) {
+ dbl(R, P, false);
+ } else {
+ R.clear();
+ }
+ return;
+ }
+ Fp::mul(R.z, P.z, Q.z);
+ R.z *= H;
+ Fp::square(H3, H); // H^2
+ Fp::square(R.y, r); // r^2
+ U1 *= H3; // U1 H^2
+ H3 *= H; // H^3
+ R.y -= U1;
+ R.y -= U1;
+ Fp::sub(R.x, R.y, H3);
+ U1 -= R.x;
+ U1 *= r;
+ H3 *= S1;
+ Fp::sub(R.y, U1, H3);
+#elif MCL_EC_COORD == MCL_EC_USE_PROJ
+ Fp r, PyQz, v, A, vv;
+ Fp::mul(r, P.x, Q.z);
+ Fp::mul(PyQz, P.y, Q.z);
+ Fp::mul(A, Q.y, P.z);
+ Fp::mul(v, Q.x, P.z);
+ v -= r;
+ if (v.isZero()) {
+ Fp::add(vv, A, PyQz);
+ if (vv.isZero()) {
+ R.clear();
+ } else {
+ dbl(R, P, false);
+ }
+ return;
+ }
+ Fp::sub(R.y, A, PyQz);
+ Fp::square(A, R.y);
+ Fp::square(vv, v);
+ r *= vv;
+ vv *= v;
+ Fp::mul(R.z, P.z, Q.z);
+ A *= R.z;
+ R.z *= vv;
+ A -= vv;
+ vv *= PyQz;
+ A -= r;
+ A -= r;
+ Fp::mul(R.x, v, A);
+ r -= A;
+ R.y *= r;
+ R.y -= vv;
+#else
+ Fp t;
+ Fp::neg(t, Q.y);
+ if (P.y == t) { R.clear(); return; }
+ Fp::sub(t, Q.x, P.x);
+ if (t.isZero()) {
+ dbl(R, P, false);
+ return;
+ }
+ Fp s;
+ Fp::sub(s, Q.y, P.y);
+ Fp::div(t, s, t);
+ R.inf_ = false;
+ Fp x3;
+ Fp::square(x3, t);
+ x3 -= P.x;
+ x3 -= Q.x;
+ Fp::sub(s, P.x, x3);
+ s *= t;
+ Fp::sub(R.y, s, P.y);
+ R.x = x3;
+#endif
+ }
+ static inline void sub(EcT& R, const EcT& P, const EcT& Q)
+ {
+#if 0
+ if (P.inf_) { neg(R, Q); return; }
+ if (Q.inf_) { R = P; return; }
+ if (P.y == Q.y) { R.clear(); return; }
+ Fp t;
+ Fp::sub(t, Q.x, P.x);
+ if (t.isZero()) {
+ dbl(R, P, false);
+ return;
+ }
+ Fp s;
+ Fp::add(s, Q.y, P.y);
+ Fp::neg(s, s);
+ Fp::div(t, s, t);
+ R.inf_ = false;
+ Fp x3;
+ Fp::mul(x3, t, t);
+ x3 -= P.x;
+ x3 -= Q.x;
+ Fp::sub(s, P.x, x3);
+ s *= t;
+ Fp::sub(R.y, s, P.y);
+ R.x = x3;
+#else
+ EcT nQ;
+ neg(nQ, Q);
+ add(R, P, nQ);
+#endif
+ }
+ static inline void neg(EcT& R, const EcT& P)
+ {
+ if (P.isZero()) {
+ R.clear();
+ return;
+ }
+#if MCL_EC_COORD == MCL_EC_USE_AFFINE
+ R.inf_ = false;
+ R.x = P.x;
+ Fp::neg(R.y, P.y);
+#else
+ R.x = P.x;
+ Fp::neg(R.y, P.y);
+ R.z = P.z;
+#endif
+ }
+ template<class N>
+ static inline void power(EcT& z, const EcT& x, const N& y)
+ {
+ power_impl::power(z, x, y);
+ }
+ /*
+ 0 <= P for any P
+ (Px, Py) <= (P'x, P'y) iff Px < P'x or Px == P'x and Py <= P'y
+ */
+ static inline int compare(const EcT& P, const EcT& Q)
+ {
+ P.normalize();
+ Q.normalize();
+ if (P.isZero()) {
+ if (Q.isZero()) return 0;
+ return -1;
+ }
+ if (Q.isZero()) return 1;
+ int c = _Fp::compare(P.x, Q.x);
+ if (c > 0) return 1;
+ if (c < 0) return -1;
+ return _Fp::compare(P.y, Q.y);
+ }
+ bool isZero() const
+ {
+#if MCL_EC_COORD == MCL_EC_USE_AFFINE
+ return inf_;
+#else
+ return z.isZero();
+#endif
+ }
+ friend inline std::ostream& operator<<(std::ostream& os, const EcT& self)
+ {
+ if (self.isZero()) {
+ return os << '0';
+ } else {
+ self.normalize();
+ os << self.x.toStr(16) << '_';
+ if (compressedExpression_) {
+ return os << Fp::isYodd(self.y);
+ } else {
+ return os << self.y.toStr(16);
+ }
+ }
+ }
+ friend inline std::istream& operator>>(std::istream& is, EcT& self)
+ {
+ std::string str;
+ is >> str;
+ if (str == "0") {
+ self.clear();
+ } else {
+#if MCL_EC_COORD == MCL_EC_USE_AFFINE
+ self.inf_ = false;
+#else
+ self.z = 1;
+#endif
+ size_t pos = str.find('_');
+ if (pos == std::string::npos) throw cybozu::Exception("EcT:operator>>:bad format") << str;
+ str[pos] = '\0';
+ self.x.fromStr(&str[0], 16);
+ if (compressedExpression_) {
+ const char c = str[pos + 1];
+ if ((c == '0' || c == '1') && str.size() == pos + 2) {
+ getYfromX(self.y, self.x, c == '1');
+ } else {
+ str[pos] = '_';
+ throw cybozu::Exception("EcT:operator>>:bad y") << str;
+ }
+ } else {
+ self.y.fromStr(&str[pos + 1], 16);
+ }
+ }
+ return is;
+ }
+ static inline void setCompressedExpression(bool compressedExpression)
+ {
+ compressedExpression_ = compressedExpression;
+ }
+ /*
+ append to bv(not clear bv)
+ */
+ void appendToBitVec(cybozu::BitVector& bv) const
+ {
+#if MCL_EC_COORD == MCL_EC_USE_AFFINE
+ #error "not implemented"
+#else
+ normalize();
+ const size_t bitLen = _Fp::getModBitLen();
+ /*
+ elem |x|y|z|
+ size n n 1 if not compressed
+ size n 1 1 if compressed
+ */
+ const size_t maxBitLen = compressedExpression_ ? (bitLen + 1 + 1) : (bitLen * 2 + 1);
+ if (isZero()) {
+ bv.resize(bv.size() + maxBitLen);
+ return;
+ }
+ x.appendToBitVec(bv);
+ if (compressedExpression_) {
+ bv.append(Fp::isYodd(y), 1);
+ } else {
+ y.appendToBitVec(bv);
+ }
+ bv.append(1, 1); // z = 1
+#endif
+ }
+ void fromBitVec(const cybozu::BitVector& bv)
+ {
+#if MCL_EC_COORD == MCL_EC_USE_AFFINE
+ #error "not implemented"
+#else
+ const size_t bitLen = _Fp::getModBitLen();
+ const size_t maxBitLen = compressedExpression_ ? (bitLen + 1 + 1) : (bitLen * 2 + 1);
+ if (bv.size() != maxBitLen) {
+ throw cybozu::Exception("EcT:fromBitVec:bad size") << bv.size() << maxBitLen;
+ }
+ if (!bv.get(maxBitLen - 1)) { // if z = 0
+ clear();
+ return;
+ }
+ cybozu::BitVector t;
+ bv.extract(t, 0, bitLen);
+ x.fromBitVec(t);
+ if (compressedExpression_) {
+ bool odd = bv.get(bitLen); // y
+ getYfromX(y, x, odd);
+ } else {
+ bv.extract(t, bitLen, bitLen);
+ y.fromBitVec(t);
+ }
+ z = 1;
+#endif
+ }
+ static inline size_t getBitVecSize()
+ {
+ const size_t bitLen = _Fp::getModBitLen();
+ if (compressedExpression_) {
+ return bitLen + 2;
+ } else {
+ return bitLen * 2 + 1;;
+ }
+ }
+ static inline void getYfromX(Fp& y, const Fp& x, bool isYodd)
+ {
+ Fp t;
+ Fp::square(t, x);
+ t += a_;
+ t *= x;
+ t += b_;
+ Fp::squareRoot(y, t);
+ if (Fp::isYodd(y) ^ isYodd) {
+ Fp::neg(y, y);
+ }
+ }
+};
+
+template<class T>
+struct TagMultiGr<EcT<T> > {
+ static void square(EcT<T>& z, const EcT<T>& x)
+ {
+ EcT<T>::dbl(z, x);
+ }
+ static void mul(EcT<T>& z, const EcT<T>& x, const EcT<T>& y)
+ {
+ EcT<T>::add(z, x, y);
+ }
+ static void inv(EcT<T>& z, const EcT<T>& x)
+ {
+ EcT<T>::neg(z, x);
+ }
+ static void div(EcT<T>& z, const EcT<T>& x, const EcT<T>& y)
+ {
+ EcT<T>::sub(z, x, y);
+ }
+ static void init(EcT<T>& x)
+ {
+ x.clear();
+ }
+};
+
+template<class _Fp> _Fp EcT<_Fp>::a_;
+template<class _Fp> _Fp EcT<_Fp>::b_;
+template<class _Fp> int EcT<_Fp>::specialA_;
+template<class _Fp> bool EcT<_Fp>::compressedExpression_;
+
+struct EcParam {
+ const char *name;
+ const char *p;
+ const char *a;
+ const char *b;
+ const char *gx;
+ const char *gy;
+ const char *n;
+ size_t bitLen; // bit length of p
+};
+
+} // mcl
+
+namespace std { CYBOZU_NAMESPACE_TR1_BEGIN
+template<class T> struct hash;
+
+template<class _Fp>
+struct hash<mcl::EcT<_Fp> > {
+ size_t operator()(const mcl::EcT<_Fp>& P) const
+ {
+ if (P.isZero()) return 0;
+ P.normalize();
+ uint64_t v = hash<_Fp>()(P.x);
+ v = hash<_Fp>()(P.y, v);
+ return static_cast<size_t>(v);
+ }
+};
+
+CYBOZU_NAMESPACE_TR1_END } // std
diff --git a/include/mcl/ecparam.hpp b/include/mcl/ecparam.hpp
new file mode 100644
index 0000000..a5206e9
--- /dev/null
+++ b/include/mcl/ecparam.hpp
@@ -0,0 +1,161 @@
+#pragma once
+/**
+ @file
+ @brief Elliptic curve parameter
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+*/
+#include <mcl/ec.hpp>
+
+namespace mcl { namespace ecparam {
+
+const struct mcl::EcParam secp160k1 = {
+ "secp160k1",
+ "0xfffffffffffffffffffffffffffffffeffffac73",
+ "0",
+ "7",
+ "0x3b4c382ce37aa192a4019e763036f4f5dd4d7ebb",
+ "0x938cf935318fdced6bc28286531733c3f03c4fee",
+ "0x100000000000000000001b8fa16dfab9aca16b6b3",
+ 160
+};
+// p=2^160 + 7
+const struct mcl::EcParam p160_1 = {
+ "p160_1",
+ "0x10000000000000000000000000000000000000007",
+ "10",
+ "1343632762150092499701637438970764818528075565078",
+ "1",
+ "1236612389951462151661156731535316138439983579284",
+ "1461501637330902918203683518218126812711137002561",
+ 161
+};
+const struct mcl::EcParam secp192k1 = {
+ "secp192k1",
+ "0xfffffffffffffffffffffffffffffffffffffffeffffee37",
+ "0",
+ "3",
+ "0xdb4ff10ec057e9ae26b07d0280b7f4341da5d1b1eae06c7d",
+ "0x9b2f2f6d9c5628a7844163d015be86344082aa88d95e2f9d",
+ "0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d",
+ 192
+};
+const struct mcl::EcParam secp224k1 = {
+ "secp224k1",
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffeffffe56d",
+ "0",
+ "5",
+ "0xa1455b334df099df30fc28a169a467e9e47075a90f7e650eb6b7a45c",
+ "0x7e089fed7fba344282cafbd6f7e319f7c0b0bd59e2ca4bdb556d61a5",
+ "0x10000000000000000000000000001dce8d2ec6184caf0a971769fb1f7",
+ 224
+};
+const struct mcl::EcParam secp256k1 = {
+ "secp256k1",
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f",
+ "0",
+ "7",
+ "0x79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798",
+ "0x483ada7726a3c4655da4fbfc0e1108a8fd17b448a68554199c47d08ffb10d4b8",
+ "0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141",
+ 256
+};
+const struct mcl::EcParam secp384r1 = {
+ "secp384r1",
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff",
+ "-3",
+ "0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef",
+ "0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7",
+ "0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f",
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52973",
+ 384
+};
+const struct mcl::EcParam secp521r1 = {
+ "secp521r1",
+ "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+ "-3",
+ "0x51953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00",
+ "0xc6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66",
+ "0x11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650",
+ "0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409",
+ 521
+};
+const struct mcl::EcParam NIST_P192 = {
+ "NIST_P192",
+ "0xfffffffffffffffffffffffffffffffeffffffffffffffff",
+ "-3",
+ "0x64210519e59c80e70fa7e9ab72243049feb8deecc146b9b1",
+ "0x188da80eb03090f67cbf20eb43a18800f4ff0afd82ff1012",
+ "0x07192b95ffc8da78631011ed6b24cdd573f977a11e794811",
+ "0xffffffffffffffffffffffff99def836146bc9b1b4d22831",
+ 192
+};
+const struct mcl::EcParam NIST_P224 = {
+ "NIST_P224",
+ "0xffffffffffffffffffffffffffffffff000000000000000000000001",
+ "-3",
+ "0xb4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4",
+ "0xb70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21",
+ "0xbd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34",
+ "0xffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3d",
+ 224
+};
+const struct mcl::EcParam NIST_P256 = {
+ "NIST_P256",
+ "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff",
+ "-3",
+ "0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b",
+ "0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296",
+ "0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5",
+ "0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551",
+ 256
+};
+// same secp384r1
+const struct mcl::EcParam NIST_P384 = {
+ "NIST_P384",
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff",
+ "-3",
+ "0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef",
+ "0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7",
+ "0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f",
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52973",
+ 384
+};
+// same secp521r1
+const struct mcl::EcParam NIST_P521 = {
+ "NIST_P521",
+ "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+ "-3",
+ "0x051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00",
+ "0xc6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66",
+ "0x11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650",
+ "0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409",
+ 521
+};
+
+} // mcl::ecparam
+
+static inline const mcl::EcParam* getEcParam(const std::string& name)
+{
+ static const mcl::EcParam *tbl[] = {
+ &ecparam::secp160k1,
+ &ecparam::secp192k1,
+ &ecparam::secp224k1,
+ &ecparam::secp256k1,
+ &ecparam::secp384r1,
+ &ecparam::secp521r1,
+
+ &ecparam::NIST_P192,
+ &ecparam::NIST_P224,
+ &ecparam::NIST_P256,
+ &ecparam::NIST_P384,
+ &ecparam::NIST_P521,
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ if (name == tbl[i]->name) return tbl[i];
+ }
+ throw cybozu::Exception("mcl::getEcParam:not support name") << name;
+}
+
+} // mcl
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
new file mode 100644
index 0000000..a8b980b
--- /dev/null
+++ b/include/mcl/fp.hpp
@@ -0,0 +1,446 @@
+#pragma once
+/**
+ @file
+ @brief finite field class
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+*/
+#include <sstream>
+#include <vector>
+#ifdef _MSC_VER
+ #pragma warning(push)
+ #pragma warning(disable : 4127)
+ #ifndef NOMINMAX
+ #define NOMINMAX
+ #endif
+#endif
+#if defined(_WIN64) || defined(__x86_64__)
+// #define USE_MONT_FP
+#endif
+#include <cybozu/hash.hpp>
+#include <cybozu/itoa.hpp>
+#include <cybozu/atoi.hpp>
+#include <cybozu/bitvector.hpp>
+#include <mcl/fp_base.hpp>
+#include <mcl/fp_util.hpp>
+#include <mcl/gmp_util.hpp>
+#include <mcl/power.hpp>
+
+#ifndef MCL_FP_BLOCK_MAX_BIT_N
+ #define MCL_FP_BLOCK_MAX_BIT_N 521
+#endif
+
+namespace mcl {
+
+struct Block {
+ typedef fp::Unit Unit;
+ const Unit *p; // pointer to original FpT.v_
+ size_t n;
+ static const size_t UnitByteN = sizeof(Unit);
+ static const size_t maxUnitN = (MCL_FP_BLOCK_MAX_BIT_N + UnitByteN * 8 - 1) / (UnitByteN * 8);
+ Unit v_[maxUnitN];
+};
+
+template<class tag = fp::TagDefault, size_t maxBitN = MCL_FP_BLOCK_MAX_BIT_N>
+class FpT {
+ typedef fp::Unit Unit;
+ static const size_t UnitByteN = sizeof(Unit);
+ static const size_t maxUnitN = (maxBitN + UnitByteN * 8 - 1) / (UnitByteN * 8);
+ static fp::Op op_;
+ static mcl::SquareRoot sq_;
+ static size_t pBitLen_;
+ template<class tag2, size_t maxBitN2> friend class FpT;
+ Unit v_[maxUnitN];
+public:
+ // return pointer to array v_[]
+ const Unit *getUnit() const { return v_; }
+ size_t getUnitN() const { return op_.N; }
+ typedef Unit BlockType;
+ void dump() const
+ {
+ const size_t N = op_.N;
+ for (size_t i = 0; i < N; i++) {
+ printf("%016llx ", (long long)v_[N - 1 - i]);
+ }
+ printf("\n");
+ }
+ static inline void setModulo(const std::string& mstr, int base = 0)
+ {
+ bool isMinus;
+ mpz_class mp;
+ inFromStr(mp, &isMinus, mstr, base);
+ if (isMinus) throw cybozu::Exception("mcl:FpT:setModulo:mstr is not minus") << mstr;
+ pBitLen_ = Gmp::getBitLen(mp);
+ if (pBitLen_ > maxBitN) throw cybozu::Exception("mcl:FpT:setModulo:too large bitLen") << pBitLen_ << maxBitN;
+ Unit p[maxUnitN] = {};
+ const size_t n = Gmp::getRaw(p, maxUnitN, mp);
+ if (n == 0) throw cybozu::Exception("mcl:FpT:setModulo:bad mstr") << mstr;
+#ifdef USE_MONT_FP
+ if (pBitLen_ <= 128) { op_ = fp::MontFp<tag, 128>::init(p); }
+#if CYBOZU_OS_BIT == 32
+ else if (pBitLen_ <= 160) { static fp::MontFp<tag, 160> f; op_ = f.init(p); }
+#endif
+ else if (pBitLen_ <= 192) { static fp::MontFp<tag, 192> f; op_ = f.init(p); }
+#if CYBOZU_OS_BIT == 32
+ else if (pBitLen_ <= 224) { static fp::MontFp<tag, 224> f; op_ = f.init(p); }
+#endif
+ else if (pBitLen_ <= 256) { static fp::MontFp<tag, 256> f; op_ = f.init(p); }
+ else if (pBitLen_ <= 384) { static fp::MontFp<tag, 384> f; op_ = f.init(p); }
+ else if (pBitLen_ <= 448) { static fp::MontFp<tag, 448> f; op_ = f.init(p); }
+#if CYBOZU_OS_BIT == 32
+ else if (pBitLen_ <= 544) { static fp::MontFp<tag, 544> f; op_ = f.init(p); }
+#else
+ else if (pBitLen_ <= 576) { static fp::MontFp<tag, 576> f; op_ = f.init(p); }
+#endif
+ else { static fp::MontFp<tag, maxBitN> f; op_ = f.init(p); }
+#else
+ if (pBitLen_ <= 128) { op_ = fp::FixedFp<tag, 128>::init(p); }
+#if CYBOZU_OS_BIT == 32
+ else if (pBitLen_ <= 160) { static fp::FixedFp<tag, 160> f; op_ = f.init(p); }
+#endif
+ else if (pBitLen_ <= 192) { static fp::FixedFp<tag, 192> f; op_ = f.init(p); }
+#if CYBOZU_OS_BIT == 32
+ else if (pBitLen_ <= 224) { static fp::FixedFp<tag, 224> f; op_ = f.init(p); }
+#endif
+ else if (pBitLen_ <= 256) { static fp::FixedFp<tag, 256> f; op_ = f.init(p); }
+ else if (pBitLen_ <= 384) { static fp::FixedFp<tag, 384> f; op_ = f.init(p); }
+ else if (pBitLen_ <= 448) { static fp::FixedFp<tag, 448> f; op_ = f.init(p); }
+#if CYBOZU_OS_BIT == 32
+ else if (pBitLen_ <= 544) { static fp::FixedFp<tag, 544> f; op_ = f.init(p); }
+#else
+ else if (pBitLen_ <= 576) { static fp::FixedFp<tag, 576> f; op_ = f.init(p); }
+#endif
+ else { static fp::FixedFp<tag, maxBitN> f; op_ = f.init(p); }
+#endif
+ assert(op_.N <= maxUnitN);
+ sq_.set(mp);
+ }
+ static inline void getModulo(std::string& pstr)
+ {
+ Gmp::toStr(pstr, op_.mp);
+ }
+ static inline bool isYodd(const FpT& x)
+ {
+ Block b;
+ x.getBlock(b);
+ return (b.p[0] & 1) == 1;
+ }
+ static inline bool squareRoot(FpT& y, const FpT& x)
+ {
+ mpz_class mx, my;
+ x.toGmp(mx);
+ bool b = sq_.get(my, mx);
+ if (!b) return false;
+ y.fromGmp(my);
+ return true;
+ }
+ FpT() {}
+ FpT(const FpT& x)
+ {
+ op_.copy(v_, x.v_);
+ }
+ FpT& operator=(const FpT& x)
+ {
+ op_.copy(v_, x.v_);
+ return *this;
+ }
+ void clear()
+ {
+ op_.clear(v_);
+ }
+ FpT(int64_t x) { operator=(x); }
+ explicit FpT(const std::string& str, int base = 0)
+ {
+ fromStr(str, base);
+ }
+ FpT& operator=(int64_t x)
+ {
+ clear();
+ if (x) {
+ int64_t y = x < 0 ? -x : x;
+ if (sizeof(Unit) == 8) {
+ v_[0] = y;
+ } else {
+ v_[0] = (uint32_t)y;
+ v_[1] = (uint32_t)(y >> 32);
+ }
+ if (x < 0) neg(*this, *this);
+ toMont(*this, *this);
+ }
+ return *this;
+ }
+ void toMont(FpT& y, const FpT& x)
+ {
+ if (op_.toMont) op_.toMont(y.v_, x.v_);
+ }
+ void fromMont(FpT& y, const FpT& x)
+ {
+ if (op_.fromMont) op_.fromMont(y.v_, x.v_);
+ }
+ void fromStr(const std::string& str, int base = 0)
+ {
+ bool isMinus;
+ mpz_class x;
+ inFromStr(x, &isMinus, str, base);
+ if (x >= op_.mp) throw cybozu::Exception("fp:FpT:fromStr:large str") << str;
+ fp::local::toArray(v_, op_.N, x.get_mpz_t());
+ if (isMinus) {
+ neg(*this, *this);
+ }
+ toMont(*this, *this);
+ }
+ // alias of fromStr
+ void set(const std::string& str, int base = 0) { fromStr(str, base); }
+ template<class S>
+ void setRaw(const S *inBuf, size_t n)
+ {
+ const size_t byteN = sizeof(S) * n;
+ const size_t fpByteN = sizeof(Unit) * op_.N;
+ if (byteN > fpByteN) throw cybozu::Exception("setRaw:bad n") << n << fpByteN;
+ assert(byteN <= fpByteN);
+ memcpy(v_, inBuf, byteN);
+ memset((char *)v_ + byteN, 0, fpByteN - byteN);
+ if (!isValid()) throw cybozu::Exception("setRaw:large value");
+ toMont(*this, *this);
+ }
+ template<class S>
+ size_t getRaw(S *outBuf, size_t n) const
+ {
+ const size_t byteN = sizeof(S) * n;
+ const size_t fpByteN = sizeof(Unit) * op_.N;
+ if (byteN < fpByteN) throw cybozu::Exception("getRaw:bad n") << n << fpByteN;
+ assert(byteN >= fpByteN);
+ Block b;
+ getBlock(b);
+ memcpy(outBuf, b.p, fpByteN);
+ const size_t writeN = (fpByteN + sizeof(S) - 1) / sizeof(S);
+ memset((char *)outBuf + fpByteN, 0, writeN * sizeof(S) - fpByteN);
+ return writeN;
+ }
+ void getBlock(Block& b) const
+ {
+ assert(maxUnitN <= Block::maxUnitN);
+ b.n = op_.N;
+ if (op_.fromMont) {
+ op_.fromMont(b.v_, v_);
+ b.p = &b.v_[0];
+ } else {
+ b.p = &v_[0];
+ }
+ }
+ template<class RG>
+ void setRand(RG& rg)
+ {
+ fp::getRandVal(v_, rg, op_.p, pBitLen_);
+ fromMont(*this, *this);
+ }
+ static inline void toStr(std::string& str, const Unit *x, size_t n, int base = 10, bool withPrefix = false)
+ {
+ switch (base) {
+ case 10:
+ {
+ mpz_class t;
+ Gmp::setRaw(t, x, n);
+ Gmp::toStr(str, t, 10);
+ }
+ return;
+ case 16:
+ mcl::fp::toStr16(str, x, n, withPrefix);
+ return;
+ case 2:
+ mcl::fp::toStr2(str, x, n, withPrefix);
+ return;
+ default:
+ throw cybozu::Exception("fp:FpT:toStr:bad base") << base;
+ }
+ }
+ void toStr(std::string& str, int base = 10, bool withPrefix = false) const
+ {
+ Block b;
+ getBlock(b);
+ toStr(str, b.p, b.n, base, withPrefix);
+ }
+ std::string toStr(int base = 10, bool withPrefix = false) const
+ {
+ std::string str;
+ toStr(str, base, withPrefix);
+ return str;
+ }
+ void toGmp(mpz_class& x) const
+ {
+ Block b;
+ getBlock(b);
+ Gmp::setRaw(x, b.p, b.n);
+ }
+ mpz_class toGmp() const
+ {
+ mpz_class x;
+ toGmp(x);
+ return x;
+ }
+ void fromGmp(const mpz_class& x)
+ {
+ setRaw(Gmp::getBlock(x), Gmp::getBlockSize(x));
+ }
+ static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.add(z.v_, x.v_, y.v_); }
+ static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.sub(z.v_, x.v_, y.v_); }
+ static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.mul(z.v_, x.v_, y.v_); }
+ static inline void inv(FpT& y, const FpT& x) { op_.inv(y.v_, x.v_); }
+ static inline void neg(FpT& y, const FpT& x) { op_.neg(y.v_, x.v_); }
+ static inline void square(FpT& y, const FpT& x) { op_.square(y.v_, x.v_); }
+ static inline void div(FpT& z, const FpT& x, const FpT& y)
+ {
+ FpT rev;
+ inv(rev, y);
+ mul(z, x, rev);
+ }
+ static inline void powerArray(FpT& z, const FpT& x, const Unit *y, size_t yn)
+ {
+ FpT out(1);
+ FpT t(x);
+ for (size_t i = 0; i < yn; i++) {
+ const Unit v = y[i];
+ int m = (int)sizeof(Unit) * 8;
+ if (i == yn - 1) {
+ while (m > 0 && (v & (Unit(1) << (m - 1))) == 0) {
+ m--;
+ }
+ }
+ for (int j = 0; j < m; j++) {
+ if (v & (Unit(1) << j)) {
+ out *= t;
+ }
+ t *= t;
+ }
+ }
+ z = out;
+ }
+ template<class tag2, size_t maxBitN2>
+ static inline void power(FpT& z, const FpT& x, const FpT<tag2, maxBitN2>& y)
+ {
+ Block b;
+ y.getBlock(b);
+ powerArray(z, x, b.p, b.n);
+ }
+ static inline void power(FpT& z, const FpT& x, int y)
+ {
+ if (y < 0) throw cybozu::Exception("FpT:power with negative y is not support") << y;
+ const Unit u = y;
+ powerArray(z, x, &u, 1);
+ }
+ static inline void power(FpT& z, const FpT& x, const mpz_class& y)
+ {
+ if (y < 0) throw cybozu::Exception("FpT:power with negative y is not support") << y;
+ powerArray(z, x, Gmp::getBlock(y), Gmp::getBlockSize(x));
+ }
+ bool isZero() const { return op_.isZero(v_); }
+ /*
+ append to bv(not clear bv)
+ */
+ void appendToBitVec(cybozu::BitVector& bv) const
+ {
+ Block b;
+ getBlock(b);
+ bv.append(b.p, pBitLen_);
+ }
+ bool isValid() const
+ {
+ return fp::local::compareArray(v_, op_.p, op_.N) < 0;
+ }
+ void fromBitVec(const cybozu::BitVector& bv)
+ {
+ if (bv.size() != pBitLen_) throw cybozu::Exception("FpT:fromBitVec:bad size") << bv.size() << pBitLen_;
+ setRaw(bv.getBlock(), bv.getBlockSize());
+ }
+ static inline size_t getModBitLen() { return pBitLen_; }
+ static inline size_t getBitVecSize() { return pBitLen_; }
+ bool operator==(const FpT& rhs) const { return fp::local::isEqualArray(v_, rhs.v_, op_.N); }
+ bool operator!=(const FpT& rhs) const { return !operator==(rhs); }
+ inline friend FpT operator+(const FpT& x, const FpT& y) { FpT z; add(z, x, y); return z; }
+ inline friend FpT operator-(const FpT& x, const FpT& y) { FpT z; sub(z, x, y); return z; }
+ inline friend FpT operator*(const FpT& x, const FpT& y) { FpT z; mul(z, x, y); return z; }
+ inline friend FpT operator/(const FpT& x, const FpT& y) { FpT z; div(z, x, y); return z; }
+ FpT& operator+=(const FpT& x) { add(*this, *this, x); return *this; }
+ FpT& operator-=(const FpT& x) { sub(*this, *this, x); return *this; }
+ FpT& operator*=(const FpT& x) { mul(*this, *this, x); return *this; }
+ FpT& operator/=(const FpT& x) { div(*this, *this, x); return *this; }
+ FpT operator-() const { FpT x; neg(x, *this); return x; }
+ friend inline std::ostream& operator<<(std::ostream& os, const FpT& self)
+ {
+ const std::ios_base::fmtflags f = os.flags();
+ if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator<<:oct is not supported");
+ const int base = (f & std::ios_base::hex) ? 16 : 10;
+ const bool showBase = (f & std::ios_base::showbase) != 0;
+ std::string str;
+ self.toStr(str, base, showBase);
+ return os << str;
+ }
+ friend inline std::istream& operator>>(std::istream& is, FpT& self)
+ {
+ const std::ios_base::fmtflags f = is.flags();
+ if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator>>:oct is not supported");
+ const int base = (f & std::ios_base::hex) ? 16 : 0;
+ std::string str;
+ is >> str;
+ self.fromStr(str, base);
+ return is;
+ }
+ /*
+ not support
+ getBitLen, operator<, >
+ */
+ /*
+ QQQ : should be removed
+ */
+ bool operator<(const FpT&) const { return false; }
+ static inline int compare(const FpT& x, const FpT& y)
+ {
+ Block xb, yb;
+ x.getBlock(xb);
+ y.getBlock(yb);
+ return fp::local::compareArray(xb.p, yb.p, xb.n);
+ }
+private:
+ static inline void inFromStr(mpz_class& x, bool *isMinus, const std::string& str, int base)
+ {
+ const char *p = fp::verifyStr(isMinus, &base, str);
+ if (!Gmp::fromStr(x, p, base)) {
+ throw cybozu::Exception("fp:FpT:inFromStr") << str;
+ }
+ }
+};
+
+template<class tag, size_t maxBitN> fp::Op FpT<tag, maxBitN>::op_;
+template<class tag, size_t maxBitN> mcl::SquareRoot FpT<tag, maxBitN>::sq_;
+template<class tag, size_t maxBitN> size_t FpT<tag, maxBitN>::pBitLen_;
+
+namespace power_impl {
+
+template<class G, class tag, size_t bitN, template<class _tag, size_t _bitN>class FpT>
+void power(G& z, const G& x, const FpT<tag, bitN>& y)
+{
+ Block b;
+ y.getBlock(b);
+ mcl::power_impl::powerArray(z, x, b.p, b.n);
+}
+
+} // mcl::power_impl
+} // mcl
+
+namespace std { CYBOZU_NAMESPACE_TR1_BEGIN
+template<class T> struct hash;
+
+template<class tag, size_t maxBitN>
+struct hash<mcl::FpT<tag, maxBitN> > : public std::unary_function<mcl::FpT<tag, maxBitN>, size_t> {
+ size_t operator()(const mcl::FpT<tag, maxBitN>& x, uint64_t v = 0) const
+ {
+ return static_cast<size_t>(cybozu::hash64(x.getUnit(), x.getUnitN(), v));
+ }
+};
+
+CYBOZU_NAMESPACE_TR1_END } // std::tr1
+
+#ifdef _WIN32
+ #pragma warning(pop)
+#endif
diff --git a/include/mcl/fp_base.hpp b/include/mcl/fp_base.hpp
new file mode 100644
index 0000000..0fb174f
--- /dev/null
+++ b/include/mcl/fp_base.hpp
@@ -0,0 +1,527 @@
+#pragma once
+/**
+ @file
+ @brief basic operation
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+*/
+#ifdef _MSC_VER
+ #pragma warning(push)
+ #pragma warning(disable : 4616)
+ #pragma warning(disable : 4800)
+ #pragma warning(disable : 4244)
+ #pragma warning(disable : 4127)
+ #pragma warning(disable : 4512)
+ #pragma warning(disable : 4146)
+#endif
+#include <stdint.h>
+#include <assert.h>
+#include <mcl/gmp_util.hpp>
+#ifdef _MSC_VER
+ #pragma warning(pop)
+#endif
+#include <cybozu/inttype.hpp>
+#ifdef USE_MONT_FP
+#include <mcl/fp_generator.hpp>
+#endif
+
+namespace mcl { namespace fp {
+
+#if defined(CYBOZU_OS_BIT) && (CYBOZU_OS_BIT == 32)
+typedef uint32_t Unit;
+#else
+typedef uint64_t Unit;
+#endif
+
+typedef void (*void1op)(Unit*);
+typedef void (*void2op)(Unit*, const Unit*);
+typedef void (*void3op)(Unit*, const Unit*, const Unit*);
+typedef void (*void4op)(Unit*, const Unit*, const Unit*, const Unit*);
+typedef int (*int2op)(Unit*, const Unit*);
+typedef void (*void4Iop)(Unit*, const Unit*, const Unit*, const Unit*, Unit);
+
+} } // mcl::fp
+
+#ifdef MCL_USE_LLVM
+
+extern "C" {
+
+#define MCL_FP_DEF_FUNC(len) \
+void mcl_fp_add ## len ## S(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \
+void mcl_fp_add ## len ## L(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \
+void mcl_fp_sub ## len ## S(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \
+void mcl_fp_sub ## len ## L(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \
+void mcl_fp_mul ## len ## pre(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \
+void mcl_fp_mont ## len(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, mcl::fp::Unit);
+
+MCL_FP_DEF_FUNC(128)
+MCL_FP_DEF_FUNC(192)
+MCL_FP_DEF_FUNC(256)
+MCL_FP_DEF_FUNC(320)
+MCL_FP_DEF_FUNC(384)
+MCL_FP_DEF_FUNC(448)
+MCL_FP_DEF_FUNC(512)
+#if CYBOZU_OS_BIT == 32
+MCL_FP_DEF_FUNC(160)
+MCL_FP_DEF_FUNC(224)
+MCL_FP_DEF_FUNC(288)
+MCL_FP_DEF_FUNC(352)
+MCL_FP_DEF_FUNC(416)
+MCL_FP_DEF_FUNC(480)
+MCL_FP_DEF_FUNC(544)
+#else
+MCL_FP_DEF_FUNC(576)
+#endif
+
+void mcl_fp_mul_NIST_P192(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*);
+
+}
+
+#endif
+
+namespace mcl { namespace fp {
+
+namespace local {
+
+inline int compareArray(const Unit* x, const Unit* y, size_t n)
+{
+ for (size_t i = n - 1; i != size_t(-1); i--) {
+ if (x[i] < y[i]) return -1;
+ if (x[i] > y[i]) return 1;
+ }
+ return 0;
+}
+
+inline bool isEqualArray(const Unit* x, const Unit* y, size_t n)
+{
+ for (size_t i = 0; i < n; i++) {
+ if (x[i] != y[i]) return false;
+ }
+ return true;
+}
+
+inline bool isZeroArray(const Unit *x, size_t n)
+{
+ for (size_t i = 0; i < n; i++) {
+ if (x[i]) return false;
+ }
+ return true;
+}
+
+inline void clearArray(Unit *x, size_t begin, size_t end)
+{
+ for (size_t i = begin; i < end; i++) x[i] = 0;
+}
+
+inline void copyArray(Unit *y, const Unit *x, size_t n)
+{
+ for (size_t i = 0; i < n; i++) y[i] = x[i];
+}
+
+inline void toArray(Unit *y, size_t yn, const mpz_srcptr x)
+{
+ const int xn = x->_mp_size;
+ assert(xn >= 0);
+ const Unit* xp = (const Unit*)x->_mp_d;
+ assert(xn <= (int)yn);
+ copyArray(y, xp, xn);
+ clearArray(y, xn, yn);
+}
+
+} // mcl::fp
+struct TagDefault;
+
+struct Op {
+ mpz_class mp;
+ const Unit* p;
+ size_t N;
+ bool (*isZero)(const Unit*);
+ void1op clear;
+ void2op neg;
+ void2op inv;
+ void2op square;
+ void2op copy;
+ void3op add;
+ void3op sub;
+ void3op mul;
+ // for Montgomery
+ void2op toMont;
+ void2op fromMont;
+ Op()
+ : p(0), N(0), isZero(0), clear(0), neg(0), inv(0)
+ , square(0), copy(0),add(0), sub(0), mul(0), toMont(0), fromMont(0)
+ {
+ }
+};
+
+template<class tag, size_t bitN>
+struct FixedFp {
+ typedef fp::Unit Unit;
+ static const size_t N = (bitN + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8);
+ static mpz_class mp_;
+ static Unit p_[N];
+ static inline void setModulo(const Unit* p)
+ {
+ assert(N >= 2);
+ assert(sizeof(mp_limb_t) == sizeof(Unit));
+ copy(p_, p);
+ Gmp::setRaw(mp_, p, N);
+ }
+ static inline void set_mpz_t(mpz_t& z, const Unit* p, int n = (int)N)
+ {
+ z->_mp_alloc = n;
+ int i = n;
+ while (i > 0 && p[i - 1] == 0) {
+ i--;
+ }
+ z->_mp_size = i;
+ z->_mp_d = (mp_limb_t*)const_cast<Unit*>(p);
+ }
+ static inline void set_zero(mpz_t& z, Unit *p, size_t n)
+ {
+ z->_mp_alloc = (int)n;
+ z->_mp_size = 0;
+ z->_mp_d = (mp_limb_t*)p;
+ }
+ static inline void clear(Unit *x)
+ {
+ local::clearArray(x, 0, N);
+ }
+ static inline void copy(Unit *y, const Unit *x)
+ {
+ local::copyArray(y, x, N);
+ }
+ static inline void add(Unit *z, const Unit *x, const Unit *y)
+ {
+ Unit ret[N + 2]; // not N + 1
+ mpz_t mz, mx, my;
+ set_zero(mz, ret, N + 2);
+ set_mpz_t(mx, x);
+ set_mpz_t(my, y);
+ mpz_add(mz, mx, my);
+ if (mpz_cmp(mz, mp_.get_mpz_t()) >= 0) {
+ mpz_sub(mz, mz, mp_.get_mpz_t());
+ }
+ local::toArray(z, N, mz);
+ }
+#ifdef MCL_USE_LLVM
+#if CYBOZU_OS_BIT == 64
+ static inline void add128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add128S(z, x, y, p_); }
+ static inline void sub128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub128S(z, x, y, p_); }
+ static inline void add192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add192S(z, x, y, p_); }
+ static inline void sub192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub192S(z, x, y, p_); }
+ static inline void add256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add256S(z, x, y, p_); }
+ static inline void sub256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub256S(z, x, y, p_); }
+ static inline void add384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add384L(z, x, y, p_); }
+ static inline void sub384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub384L(z, x, y, p_); }
+
+ static inline void add576(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add576L(z, x, y, p_); }
+ static inline void sub576(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub576L(z, x, y, p_); }
+#else
+ static inline void add128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add128S(z, x, y, p_); }
+ static inline void sub128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub128S(z, x, y, p_); }
+ static inline void add192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add192L(z, x, y, p_); }
+ static inline void sub192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub192L(z, x, y, p_); }
+ static inline void add256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add256L(z, x, y, p_); }
+ static inline void sub256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub256L(z, x, y, p_); }
+ static inline void add384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add384L(z, x, y, p_); }
+ static inline void sub384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub384L(z, x, y, p_); }
+
+ static inline void add160(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add160L(z, x, y, p_); }
+ static inline void sub160(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub160L(z, x, y, p_); }
+ static inline void add224(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add224L(z, x, y, p_); }
+ static inline void sub224(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub224L(z, x, y, p_); }
+ static inline void add544(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add544L(z, x, y, p_); }
+ static inline void sub544(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub544L(z, x, y, p_); }
+#endif
+#endif
+ static inline void sub(Unit *z, const Unit *x, const Unit *y)
+ {
+ Unit ret[N + 1];
+ mpz_t mz, mx, my;
+ set_zero(mz, ret, N + 1);
+ set_mpz_t(mx, x);
+ set_mpz_t(my, y);
+ mpz_sub(mz, mx, my);
+ if (mpz_sgn(mz) < 0) {
+ mpz_add(mz, mz, mp_.get_mpz_t());
+ }
+ local::toArray(z, N, mz);
+ }
+ static inline void mul(Unit *z, const Unit *x, const Unit *y)
+ {
+ Unit ret[N * 2];
+#ifdef MCL_USE_LLVM
+#if CYBOZU_OS_BIT == 64
+ if (bitN <= 128) { mcl_fp_mul128pre(ret, x, y); mod(z, ret); return; }
+ if (bitN <= 192) { mcl_fp_mul192pre(ret, x, y); mod(z, ret); return; }
+ if (bitN <= 256) { mcl_fp_mul256pre(ret, x, y); mod(z, ret); return; }
+ if (bitN <= 384) { mcl_fp_mul384pre(ret, x, y); mod(z, ret); return; }
+// if (bitN <= 576) { mcl_fp_mul576pre(ret, x, y); mod(z, ret); return; }
+#else
+ if (bitN <= 128) { mcl_fp_mul128pre(ret, x, y); mod(z, ret); return; }
+ if (bitN <= 160) { mcl_fp_mul160pre(ret, x, y); mod(z, ret); return; }
+ if (bitN <= 192) { mcl_fp_mul192pre(ret, x, y); mod(z, ret); return; }
+ if (bitN <= 224) { mcl_fp_mul224pre(ret, x, y); mod(z, ret); return; }
+// if (bitN <= 256) { mcl_fp_mul256pre(ret, x, y); mod(z, ret); return; }
+// if (bitN <= 384) { mcl_fp_mul384pre(ret, x, y); mod(z, ret); return; }
+// if (bitN <= 544) { mcl_fp_mul544pre(ret, x, y); mod(z, ret); return; }
+#endif
+#endif
+#if 0
+ pre_mul(ret, x, y);
+ mod(z, ret);
+#else
+ mpz_t mx, my, mz;
+ set_zero(mz, ret, N * 2);
+ set_mpz_t(mx, x);
+ set_mpz_t(my, y);
+ mpz_mul(mz, mx, my);
+ mpz_mod(mz, mz, mp_.get_mpz_t());
+ local::toArray(z, N, mz);
+#endif
+ }
+ static inline void pre_mul(Unit *z, const Unit *x, const Unit *y)
+ {
+ mpz_t mx, my, mz;
+ set_zero(mz, z, N * 2);
+ set_mpz_t(mx, x);
+ set_mpz_t(my, y);
+ mpz_mul(mz, mx, my);
+ local::toArray(z, N * 2, mz);
+ }
+ // x[N * 2] -> y[N]
+ static inline void mod(Unit *y, const Unit *x)
+ {
+ mpz_t mx, my;
+ set_mpz_t(mx, x, N * 2);
+ set_mpz_t(my, y, N);
+ mpz_mod(my, mx, mp_.get_mpz_t());
+ local::clearArray(y, my->_mp_size, N);
+ }
+ static inline void square(Unit *z, const Unit *x)
+ {
+ mul(z, x, x); // QQQ : use powMod with 2?
+ }
+ static inline void inv(Unit *y, const Unit *x)
+ {
+ mpz_class my;
+ mpz_t mx;
+ set_mpz_t(mx, x);
+ mpz_invert(my.get_mpz_t(), mx, mp_.get_mpz_t());
+ local::toArray(y, N, my.get_mpz_t());
+ }
+ static inline bool isZero(const Unit *x)
+ {
+ return local::isZeroArray(x, N);
+ }
+ static inline void neg(Unit *y, const Unit *x)
+ {
+ if (isZero(x)) {
+ if (x != y) clear(y);
+ return;
+ }
+ sub(y, p_, x);
+ }
+ static inline Op init(const Unit *p)
+ {
+ setModulo(p);
+ Op op;
+ op.N = N;
+ op.isZero = &isZero;
+ op.clear = &clear;
+ op.neg = &neg;
+ op.inv = &inv;
+ op.square = &square;
+ op.copy = &copy;
+#ifdef MCL_USE_LLVM
+ printf("fp2 use llvm bitN=%zd\n", bitN);
+ if (bitN <= 128) {
+ op.add = &add128;
+ op.sub = &sub128;
+ } else
+#if CYBOZU_OS_BIT == 32
+ if (bitN <= 160) {
+ op.add = &add160;
+ op.sub = &sub160;
+ } else
+#endif
+ if (bitN <= 192) {
+ op.add = &add192;
+ op.sub = &sub192;
+ } else
+#if CYBOZU_OS_BIT == 32
+ if (bitN <= 224) {
+ op.add = &add224;
+ op.sub = &sub224;
+ } else
+#endif
+ if (bitN <= 256) {
+ op.add = &add256;
+ op.sub = &sub256;
+ } else
+ if (bitN <= 384) {
+ op.add = &add384;
+ op.sub = &sub384;
+ } else
+#if CYBOZU_OS_BIT == 64
+ if (bitN <= 576) {
+ op.add = &add576;
+ op.sub = &sub576;
+ } else
+#else
+ if (bitN <= 544) {
+ op.add = &add544;
+ op.sub = &sub544;
+ } else
+#endif
+#endif
+ {
+ op.add = &add;
+ op.sub = &sub;
+ }
+#ifdef MCL_USE_LLVM
+ if (mp_ == mpz_class("0xfffffffffffffffffffffffffffffffeffffffffffffffff")) {
+ op.mul = &mcl_fp_mul_NIST_P192; // slower than MontFp192
+ } else
+#endif
+ {
+ op.mul = &mul;
+ }
+ op.mp = mp_;
+ op.p = &p_[0];
+ return op;
+ }
+};
+
+template<class tag, size_t bitN> mpz_class FixedFp<tag, bitN>::mp_;
+template<class tag, size_t bitN> fp::Unit FixedFp<tag, bitN>::p_[FixedFp<tag, bitN>::N];
+
+#ifdef USE_MONT_FP
+template<class tag, size_t bitN>
+struct MontFp {
+ typedef fp::Unit Unit;
+ static const size_t N = (bitN + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8);
+ static const size_t invTblN = N * sizeof(Unit) * 8 * 2;
+ static mpz_class mp_;
+// static mcl::SquareRoot sq_;
+ static Unit p_[N];
+ static Unit one_[N];
+ static Unit R_[N]; // (1 << (N * 64)) % p
+ static Unit RR_[N]; // (R * R) % p
+ static Unit invTbl_[invTblN][N];
+ static size_t modBitLen_;
+ static FpGenerator fg_;
+ static void3op add_;
+ static void3op mul_;
+
+ static inline void fromRawGmp(Unit *y, const mpz_class& x)
+ {
+ local::toArray(y, N, x.get_mpz_t());
+ }
+ static inline void setModulo(const Unit *p)
+ {
+ copy(p_, p);
+ Gmp::setRaw(mp_, p, N);
+// sq_.set(pOrg_);
+
+ mpz_class t = 1;
+ fromRawGmp(one_, t);
+ t = (t << (N * 64)) % mp_;
+ fromRawGmp(R_, t);
+ t = (t * t) % mp_;
+ fromRawGmp(RR_, t);
+ fg_.init(p_, N);
+
+ add_ = Xbyak::CastTo<void3op>(fg_.add_);
+ mul_ = Xbyak::CastTo<void3op>(fg_.mul_);
+ }
+ static void initInvTbl(Unit invTbl[invTblN][N])
+ {
+ Unit t[N];
+ clear(t);
+ t[0] = 2;
+ toMont(t, t);
+ for (int i = 0; i < invTblN; i++) {
+ copy(invTbl[invTblN - 1 - i], t);
+ add_(t, t, t);
+ }
+ }
+ static inline void clear(Unit *x)
+ {
+ local::clearArray(x, 0, N);
+ }
+ static inline void copy(Unit *y, const Unit *x)
+ {
+ local::copyArray(y, x, N);
+ }
+ static inline bool isZero(const Unit *x)
+ {
+ return local::isZeroArray(x, N);
+ }
+ static inline void invC(Unit *y, const Unit *x)
+ {
+ const int2op preInv = Xbyak::CastTo<int2op>(fg_.preInv_);
+ Unit r[N];
+ int k = preInv(r, x);
+ /*
+ xr = 2^k
+ R = 2^(N * 64)
+ get r2^(-k)R^2 = r 2^(N * 64 * 2 - k)
+ */
+ mul_(y, r, invTbl_[k]);
+ }
+ static inline void squareC(Unit *y, const Unit *x)
+ {
+ mul_(y, x, x);
+ }
+ static inline void toMont(Unit *y, const Unit *x)
+ {
+ mul_(y, x, RR_);
+ }
+ static inline void fromMont(Unit *y, const Unit *x)
+ {
+ mul_(y, x, one_);
+ }
+ static inline Op init(const Unit *p)
+ {
+puts("use MontFp2");
+ setModulo(p);
+ Op op;
+ op.N = N;
+ op.isZero = &isZero;
+ op.clear = &clear;
+ op.neg = Xbyak::CastTo<void2op>(fg_.neg_);
+ op.inv = &invC;
+ op.square = Xbyak::CastTo<void2op>(fg_.sqr_);
+ if (op.square == 0) op.square = &squareC;
+ op.copy = &copy;
+ op.add = add_;
+ op.sub = Xbyak::CastTo<void3op>(fg_.sub_);
+ op.mul = mul_;
+ op.mp = mp_;
+ op.p = &p_[0];
+ op.toMont = &toMont;
+ op.fromMont = &fromMont;
+
+// shr1 = Xbyak::CastTo<void2op>(fg_.shr1_);
+// addNc = Xbyak::CastTo<bool3op>(fg_.addNc_);
+// subNc = Xbyak::CastTo<bool3op>(fg_.subNc_);
+ initInvTbl(invTbl_);
+ return op;
+ }
+};
+template<class tag, size_t bitN> mpz_class MontFp<tag, bitN>::mp_;
+template<class tag, size_t bitN> fp::Unit MontFp<tag, bitN>::p_[MontFp<tag, bitN>::N];
+template<class tag, size_t bitN> fp::Unit MontFp<tag, bitN>::one_[MontFp<tag, bitN>::N];
+template<class tag, size_t bitN> fp::Unit MontFp<tag, bitN>::R_[MontFp<tag, bitN>::N];
+template<class tag, size_t bitN> fp::Unit MontFp<tag, bitN>::RR_[MontFp<tag, bitN>::N];
+template<class tag, size_t bitN> fp::Unit MontFp<tag, bitN>::invTbl_[MontFp<tag, bitN>::invTblN][MontFp<tag, bitN>::N];
+template<class tag, size_t bitN> size_t MontFp<tag, bitN>::modBitLen_;
+template<class tag, size_t bitN> FpGenerator MontFp<tag, bitN>::fg_;
+template<class tag, size_t bitN> void3op MontFp<tag, bitN>::add_;
+template<class tag, size_t bitN> void3op MontFp<tag, bitN>::mul_;
+#endif
+
+} } // mcl::fp
diff --git a/include/mcl/fp_generator.hpp b/include/mcl/fp_generator.hpp
new file mode 100644
index 0000000..9820ca9
--- /dev/null
+++ b/include/mcl/fp_generator.hpp
@@ -0,0 +1,1675 @@
+#pragma once
+/**
+ @file
+ @brief Fp generator
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+*/
+#include <stdio.h>
+#include <assert.h>
+#include <cybozu/exception.hpp>
+
+namespace mcl {
+
+namespace montgomery {
+
+/*
+ get pp such that p * pp = -1 mod M,
+ where p is prime and M = 1 << 64(or 32).
+ @param pLow [in] p mod M
+ T is uint32_t or uint64_t
+*/
+template<class T>
+T getCoff(T pLow)
+{
+ T ret = 0;
+ T t = 0;
+ T x = 1;
+
+ for (size_t i = 0; i < sizeof(T) * 8; i++) {
+ if ((t & 1) == 0) {
+ t += pLow;
+ ret += x;
+ }
+ t >>= 1;
+ x <<= 1;
+ }
+ return ret;
+}
+
+} } // mcl::montgomery
+
+#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && (CYBOZU_OS_BIT == 64)
+
+#ifndef XBYAK_NO_OP_NAMES
+ #define XBYAK_NO_OP_NAMES
+#endif
+#include <xbyak/xbyak.h>
+#include <xbyak/xbyak_util.h>
+
+namespace mcl {
+
+namespace fp_gen_local {
+
+class MemReg {
+ const Xbyak::Reg64 *r_;
+ const Xbyak::RegExp *m_;
+ size_t offset_;
+public:
+ MemReg(const Xbyak::Reg64 *r, const Xbyak::RegExp *m, size_t offset) : r_(r), m_(m), offset_(offset) {}
+ bool isReg() const { return r_ != 0; }
+ const Xbyak::Reg64& getReg() const { return *r_; }
+ Xbyak::RegExp getMem() const { return *m_ + offset_ * sizeof(size_t); }
+};
+
+struct MixPack {
+ static const size_t useAll = 100;
+ Xbyak::util::Pack p;
+ Xbyak::RegExp m;
+ size_t mn;
+ MixPack() : mn(0) {}
+ MixPack(Xbyak::util::Pack& remain, size_t& rspPos, size_t n, size_t useRegNum = useAll)
+ {
+ init(remain, rspPos, n, useRegNum);
+ }
+ void init(Xbyak::util::Pack& remain, size_t& rspPos, size_t n, size_t useRegNum = useAll)
+ {
+ size_t pn = std::min(remain.size(), n);
+ if (useRegNum != useAll && useRegNum < pn) pn = useRegNum;
+ this->mn = n - pn;
+ this->m = Xbyak::util::rsp + rspPos;
+ this->p = remain.sub(0, pn);
+ remain = remain.sub(pn);
+ rspPos += mn * 8;
+ }
+ size_t size() const { return p.size() + mn; }
+ bool isReg(size_t n) const { return n < p.size(); }
+ const Xbyak::Reg64& getReg(size_t n) const
+ {
+ assert(n < p.size());
+ return p[n];
+ }
+ Xbyak::RegExp getMem(size_t n) const
+ {
+ const size_t pn = p.size();
+ assert(pn <= n && n < size());
+ return m + (int)((n - pn) * sizeof(size_t));
+ }
+ MemReg operator[](size_t n) const
+ {
+ const size_t pn = p.size();
+ return MemReg((n < pn) ? &p[n] : 0, (n < pn) ? 0 : &m, n - pn);
+ }
+ void removeLast()
+ {
+ if (!size()) throw cybozu::Exception("MixPack:removeLast:empty");
+ if (mn > 0) {
+ mn--;
+ } else {
+ p = p.sub(0, p.size() - 1);
+ }
+ }
+ /*
+ replace Mem with r if possible
+ */
+ bool replaceMemWith(Xbyak::CodeGenerator *code, const Xbyak::Reg64& r)
+ {
+ if (mn == 0) return false;
+ p.append(r);
+ code->mov(r, code->ptr [m]);
+ m = m + 8;
+ mn--;
+ return true;
+ }
+};
+
+} // fp_gen_local
+
+/*
+ op(r, rm);
+ r : reg
+ rm : Reg/Mem
+*/
+#define MCL_FP_GEN_OP_RM(op, r, rm) \
+if (rm.isReg()) { \
+ op(r, rm.getReg()); \
+} else { \
+ op(r, qword [rm.getMem()]); \
+}
+
+/*
+ op(rm, r);
+ rm : Reg/Mem
+ r : reg
+*/
+#define MCL_FP_GEN_OP_MR(op, rm, r) \
+if (rm.isReg()) { \
+ op(rm.getReg(), r); \
+} else { \
+ op(qword [rm.getMem()], r); \
+}
+
+struct FpGenerator : Xbyak::CodeGenerator {
+ typedef Xbyak::RegExp RegExp;
+ typedef Xbyak::Reg64 Reg64;
+ typedef Xbyak::Xmm Xmm;
+ typedef Xbyak::Operand Operand;
+ typedef Xbyak::util::StackFrame StackFrame;
+ typedef Xbyak::util::Pack Pack;
+ typedef fp_gen_local::MixPack MixPack;
+ typedef fp_gen_local::MemReg MemReg;
+ static const int UseRDX = Xbyak::util::UseRDX;
+ static const int UseRCX = Xbyak::util::UseRCX;
+ Xbyak::util::Cpu cpu_;
+ bool useMulx_;
+ const uint64_t *p_;
+ uint64_t pp_;
+ int pn_;
+ bool isFullBit_;
+ // add/sub without carry. return true if overflow
+ typedef bool (*bool3op)(uint64_t*, const uint64_t*, const uint64_t*);
+
+ // add/sub with mod
+ typedef void (*void3op)(uint64_t*, const uint64_t*, const uint64_t*);
+
+ // mul without carry. return top of z
+ typedef uint64_t (*uint3opI)(uint64_t*, const uint64_t*, uint64_t);
+
+ // neg
+ typedef void (*void2op)(uint64_t*, const uint64_t*);
+
+ // preInv
+ typedef int (*int2op)(uint64_t*, const uint64_t*);
+ bool3op addNc_;
+ bool3op subNc_;
+ void3op add_;
+ void3op sub_;
+ void3op mul_;
+ uint3opI mulI_;
+ void2op sqr_;
+ void2op neg_;
+ void2op shr1_;
+ int2op preInv_;
+ FpGenerator()
+ : CodeGenerator(4096 * 8)
+ , p_(0)
+ , pp_(0)
+ , pn_(0)
+ , isFullBit_(0)
+ , addNc_(0)
+ , subNc_(0)
+ , add_(0)
+ , sub_(0)
+ , mul_(0)
+ , mulI_(0)
+ , neg_(0)
+ , shr1_(0)
+ , preInv_(0)
+ {
+ useMulx_ = cpu_.has(Xbyak::util::Cpu::tBMI2);
+ }
+ /*
+ @param p [in] pointer to prime
+ @param pn [in] length of prime
+ */
+ void init(const uint64_t *p, int pn)
+ {
+ if (pn < 2) throw cybozu::Exception("mcl:FpGenerator:small pn") << pn;
+ p_ = p;
+ pp_ = montgomery::getCoff(p[0]);
+ pn_ = pn;
+ isFullBit_ = (p_[pn_ - 1] >> 63) != 0;
+// printf("p=%p, pn_=%d, isFullBit_=%d\n", p_, pn_, isFullBit_);
+
+ setSize(0); // reset code
+ align(16);
+ addNc_ = getCurr<bool3op>();
+ gen_addSubNc(true);
+ align(16);
+ subNc_ = getCurr<bool3op>();
+ gen_addSubNc(false);
+ align(16);
+ add_ = getCurr<void3op>();
+ gen_addMod();
+ align(16);
+ sub_ = getCurr<void3op>();
+ gen_sub();
+ align(16);
+ neg_ = getCurr<void2op>();
+ gen_neg();
+ align(16);
+ mulI_ = getCurr<uint3opI>();
+ gen_mulI();
+ align(16);
+ mul_ = getCurr<void3op>();
+ gen_mul();
+ align(16);
+ sqr_ = getCurr<void2op>();
+ if (!gen_sqr()) {
+ sqr_ = 0;
+ }
+ align(16);
+ shr1_ = getCurr<void2op>();
+ gen_shr1();
+ preInv_ = getCurr<int2op>();
+ gen_preInv();
+ }
+ void gen_addSubNc(bool isAdd)
+ {
+ StackFrame sf(this, 3);
+ if (isAdd) {
+ gen_raw_add(sf.p[0], sf.p[1], sf.p[2], rax);
+ } else {
+ gen_raw_sub(sf.p[0], sf.p[1], sf.p[2], rax);
+ }
+ setc(al);
+ movzx(eax, al);
+ }
+ /*
+ pz[] = px[] + py[]
+ */
+ void gen_raw_add(const RegExp& pz, const RegExp& px, const RegExp& py, const Reg64& t)
+ {
+ mov(t, ptr [px]);
+ add(t, ptr [py]);
+ mov(ptr [pz], t);
+ for (int i = 1; i < pn_; i++) {
+ mov(t, ptr [px + i * 8]);
+ adc(t, ptr [py + i * 8]);
+ mov(ptr [pz + i * 8], t);
+ }
+ }
+ /*
+ pz[] = px[] - py[]
+ */
+ void gen_raw_sub(const RegExp& pz, const RegExp& px, const RegExp& py, const Reg64& t)
+ {
+ mov(t, ptr [px]);
+ sub(t, ptr [py]);
+ mov(ptr [pz], t);
+ for (int i = 1; i < pn_; i++) {
+ mov(t, ptr [px + i * 8]);
+ sbb(t, ptr [py + i * 8]);
+ mov(ptr [pz + i * 8], t);
+ }
+ }
+ /*
+ pz[] = -px[]
+ */
+ void gen_raw_neg(const RegExp& pz, const RegExp& px, const Reg64& t0, const Reg64& t1)
+ {
+ inLocalLabel();
+ mov(t0, ptr [px]);
+ test(t0, t0);
+ jnz(".neg");
+ if (pn_ > 1) {
+ for (int i = 1; i < pn_; i++) {
+ or_(t0, ptr [px + i * 8]);
+ }
+ jnz(".neg");
+ }
+ // zero
+ for (int i = 0; i < pn_; i++) {
+ mov(ptr [pz + i * 8], t0);
+ }
+ jmp(".exit");
+ L(".neg");
+ mov(t1, (size_t)p_);
+ gen_raw_sub(pz, t1, px, t0);
+ L(".exit");
+ outLocalLabel();
+ }
+ /*
+ (rdx:pz[0..n-1]) = px[0..n-1] * y
+ use t, rax, rdx
+ if n > 2
+ use
+ wk[0] if useMulx_
+ wk[0..n-2] otherwise
+ */
+ void gen_raw_mulI(const RegExp& pz, const RegExp& px, const Reg64& y, const MixPack& wk, const Reg64& t, size_t n)
+ {
+ assert(n >= 2);
+ if (n == 2) {
+ mov(rax, ptr [px]);
+ mul(y);
+ mov(ptr [pz], rax);
+ mov(t, rdx);
+ mov(rax, ptr [px + 8]);
+ mul(y);
+ add(rax, t);
+ adc(rdx, 0);
+ mov(ptr [pz + 8], rax);
+ return;
+ }
+ if (useMulx_) {
+ assert(wk.size() > 0 && wk.isReg(0));
+ const Reg64& t1 = wk.getReg(0);
+ // mulx(H, L, x) = [H:L] = x * rdx
+ mov(rdx, y);
+ mulx(t1, rax, ptr [px]); // [y:rax] = px * y
+ mov(ptr [pz], rax);
+ const Reg64 *pt0 = &t;
+ const Reg64 *pt1 = &t1;
+ for (size_t i = 1; i < n - 1; i++) {
+ mulx(*pt0, rax, ptr [px + i * 8]);
+ if (i == 1) {
+ add(rax, *pt1);
+ } else {
+ adc(rax, *pt1);
+ }
+ mov(ptr [pz + i * 8], rax);
+ std::swap(pt0, pt1);
+ }
+ mulx(rdx, rax, ptr [px + (n - 1) * 8]);
+ adc(rax, *pt1);
+ mov(ptr [pz + (n - 1) * 8], rax);
+ adc(rdx, 0);
+ return;
+ }
+ assert(wk.size() >= n - 1);
+ for (size_t i = 0; i < n; i++) {
+ mov(rax, ptr [px + i * 8]);
+ mul(y);
+ if (i < n - 1) {
+ mov(ptr [pz + i * 8], rax);
+ g_mov(wk[i], rdx);
+ }
+ }
+ for (size_t i = 1; i < n - 1; i++) {
+ mov(t, ptr [pz + i * 8]);
+ if (i == 1) {
+ g_add(t, wk[i - 1]);
+ } else {
+ g_adc(t, wk[i - 1]);
+ }
+ mov(ptr [pz + i * 8], t);
+ }
+ g_adc(rax, wk[n - 2]);
+ mov(ptr [pz + (n - 1) * 8], rax);
+ adc(rdx, 0);
+ }
+ void gen_mulI()
+ {
+ assert(pn_ >= 2);
+ const int regNum = useMulx_ ? 2 : (1 + std::min(pn_ - 1, 8));
+ const int stackSize = useMulx_ ? 0 : (pn_ - 1) * 8;
+ StackFrame sf(this, 3, regNum | UseRDX, stackSize);
+ const Reg64& pz = sf.p[0];
+ const Reg64& px = sf.p[1];
+ const Reg64& y = sf.p[2];
+ size_t rspPos = 0;
+ Pack remain = sf.t.sub(1);
+ MixPack wk(remain, rspPos, pn_ - 1);
+ gen_raw_mulI(pz, px, y, wk, sf.t[0], pn_);
+ mov(rax, rdx);
+ }
+ /*
+ pz[] = px[]
+ */
+ void gen_mov(const RegExp& pz, const RegExp& px, const Reg64& t, int n)
+ {
+ for (int i = 0; i < n; i++) {
+ mov(t, ptr [px + i * 8]);
+ mov(ptr [pz + i * 8], t);
+ }
+ }
+ void gen_addMod3()
+ {
+ StackFrame sf(this, 3, 7);
+ const Reg64& pz = sf.p[0];
+ const Reg64& px = sf.p[1];
+ const Reg64& py = sf.p[2];
+
+ const Reg64& t0 = sf.t[0];
+ const Reg64& t1 = sf.t[1];
+ const Reg64& t2 = sf.t[2];
+ const Reg64& t3 = sf.t[3];
+ const Reg64& t4 = sf.t[4];
+ const Reg64& t5 = sf.t[5];
+ const Reg64& t6 = sf.t[6];
+
+ xor_(t6, t6);
+ load_rm(Pack(t2, t1, t0), px);
+ add_rm(Pack(t2, t1, t0), py);
+ mov_rr(Pack(t5, t4, t3), Pack(t2, t1, t0));
+ adc(t6, 0);
+ mov(rax, (size_t)p_);
+ sub_rm(Pack(t5, t4, t3), rax);
+ sbb(t6, 0);
+ cmovc(t5, t2);
+ cmovc(t4, t1);
+ cmovc(t3, t0);
+ store_mr(pz, Pack(t5, t4, t3));
+ }
+ void gen_subMod_le4(int n)
+ {
+ assert(2 <= n && n <= 4);
+ StackFrame sf(this, 3, (n - 1) * 2);
+ const Reg64& pz = sf.p[0];
+ const Reg64& px = sf.p[1];
+ const Reg64& py = sf.p[2];
+
+ Pack rx = sf.t.sub(0, n - 1);
+ rx.append(px); // rx = [px, t1, t0]
+ Pack ry = sf.t.sub(n - 1, n - 1);
+ ry.append(rax); // ry = [rax, t3, t2]
+
+ load_rm(rx, px); // destroy px
+ sub_rm(rx, py);
+#if 0
+ sbb(ry[0], ry[0]); // rx[0] = (x > y) ? 0 : -1
+ for (int i = 1; i < n; i++) mov(ry[i], ry[0]);
+ mov(py, (size_t)p_);
+ for (int i = 0; i < n; i++) and_(ry[i], qword [py + 8 * i]);
+ add_rr(rx, ry);
+#else
+ // a little faster
+ sbb(py, py); // py = (x > y) ? 0 : -1
+ mov(rax, (size_t)p_);
+ load_rm(ry, rax); // destroy rax
+ for (size_t i = 0; i < ry.size(); i++) {
+ and_(ry[i], py);
+ }
+ add_rr(rx, ry);
+#endif
+ store_mr(pz, rx);
+ }
+ void gen_addMod()
+ {
+ if (pn_ == 3) {
+ gen_addMod3();
+ return;
+ }
+ StackFrame sf(this, 3, 0, pn_ * 8);
+ const Reg64& pz = sf.p[0];
+ const Reg64& px = sf.p[1];
+ const Reg64& py = sf.p[2];
+ const Xbyak::CodeGenerator::LabelType jmpMode = pn_ < 5 ? T_AUTO : T_NEAR;
+
+ inLocalLabel();
+ gen_raw_add(pz, px, py, rax);
+ mov(px, (size_t)p_); // destroy px
+ if (isFullBit_) {
+ jc(".over", jmpMode);
+ }
+#ifdef MCL_USE_JMP
+ for (int i = 0; i < pn_; i++) {
+ mov(py, ptr [pz + (pn_ - 1 - i) * 8]); // destroy py
+ cmp(py, ptr [px + (pn_ - 1 - i) * 8]);
+ jc(".exit", jmpMode);
+ jnz(".over", jmpMode);
+ }
+ L(".over");
+ gen_raw_sub(pz, pz, px, rax);
+ L(".exit");
+#else
+ gen_raw_sub(rsp, pz, px, rax);
+ jc(".exit", jmpMode);
+ gen_mov(pz, rsp, rax, pn_);
+ if (isFullBit_) {
+ jmp(".exit", jmpMode);
+ L(".over");
+ gen_raw_sub(pz, pz, px, rax);
+ }
+ L(".exit");
+#endif
+ outLocalLabel();
+ }
+ void gen_sub()
+ {
+ if (pn_ <= 4) {
+ gen_subMod_le4(pn_);
+ return;
+ }
+ StackFrame sf(this, 3);
+ const Reg64& pz = sf.p[0];
+ const Reg64& px = sf.p[1];
+ const Reg64& py = sf.p[2];
+ const Xbyak::CodeGenerator::LabelType jmpMode = pn_ < 5 ? T_AUTO : T_NEAR;
+
+ inLocalLabel();
+ gen_raw_sub(pz, px, py, rax);
+ jnc(".exit", jmpMode);
+ mov(px, (size_t)p_);
+ gen_raw_add(pz, pz, px, rax);
+ L(".exit");
+ outLocalLabel();
+ }
+ void gen_neg()
+ {
+ StackFrame sf(this, 2, 2);
+ const Reg64& pz = sf.p[0];
+ const Reg64& px = sf.p[1];
+ gen_raw_neg(pz, px, sf.t[0], sf.t[1]);
+ }
+ void gen_shr1()
+ {
+ const int c = 1;
+ StackFrame sf(this, 2, 1);
+ const Reg64 *t0 = &rax;
+ const Reg64 *t1 = &sf.t[0];
+ const Reg64& pz = sf.p[0];
+ const Reg64& px = sf.p[1];
+ mov(*t0, ptr [px]);
+ for (int i = 0; i < pn_ - 1; i++) {
+ mov(*t1, ptr [px + 8 * (i + 1)]);
+ shrd(*t0, *t1, c);
+ mov(ptr [pz + i * 8], *t0);
+ std::swap(t0, t1);
+ }
+ shr(*t0, c);
+ mov(ptr [pz + (pn_ - 1) * 8], *t0);
+ }
+ void gen_mul()
+ {
+ if (pn_ == 3) {
+ gen_montMul3(p_, pp_);
+ } else if (pn_ == 4) {
+ gen_montMul4(p_, pp_);
+ } else if (pn_ <= 9) {
+ gen_montMulN(p_, pp_, pn_);
+ } else {
+ throw cybozu::Exception("mcl:FpGenerator:gen_mul:not implemented for") << pn_;
+ }
+ }
+ bool gen_sqr()
+ {
+ if (pn_ == 3) {
+ gen_montSqr3(p_, pp_);
+ return true;
+ }
+ return false;
+ }
+ /*
+ input (pz[], px[], py[])
+ z[] <- montgomery(x[], y[])
+ */
+ void gen_montMulN(const uint64_t *p, uint64_t pp, int n)
+ {
+ assert(2 <= pn_ && pn_ <= 9);
+ const int regNum = useMulx_ ? 4 : 3 + std::min(n - 1, 7);
+ const int stackSize = (n * 3 + (isFullBit_ ? 2 : 1)) * 8;
+ StackFrame sf(this, 3, regNum | UseRDX, stackSize);
+ const Reg64& pz = sf.p[0];
+ const Reg64& px = sf.p[1];
+ const Reg64& py = sf.p[2];
+ const Reg64& y = sf.t[0];
+ const Reg64& pAddr = sf.t[1];
+ const Reg64& t = sf.t[2];
+ Pack remain = sf.t.sub(3);
+ size_t rspPos = 0;
+
+ MixPack pw1(remain, rspPos, n - 1);
+ const RegExp pw2 = rsp + rspPos; // pw2[0..n-1]
+ const RegExp pc = pw2 + n * 8; // pc[0..n+1]
+ mov(pAddr, (size_t)p);
+
+ for (int i = 0; i < n; i++) {
+ mov(y, ptr [py + i * 8]);
+ montgomeryN_1(pp, n, pc, px, y, pAddr, t, pw1, pw2, i == 0);
+ }
+ // pz[] = pc[] - p[]
+ gen_raw_sub(pz, pc, pAddr, t);
+ if (isFullBit_) sbb(qword[pc + n * 8], 0);
+ jnc("@f");
+ for (int i = 0; i < n; i++) {
+ mov(t, ptr [pc + i * 8]);
+ mov(ptr [pz + i * 8], t);
+ }
+ L("@@");
+ }
+ /*
+ input (z, x, y) = (p0, p1, p2)
+ z[0..3] <- montgomery(x[0..3], y[0..3])
+ destroy gt0, ..., gt9, xm0, xm1, p2
+ */
+ void gen_montMul4(const uint64_t *p, uint64_t pp)
+ {
+ StackFrame sf(this, 3, 10 | UseRDX);
+ const Reg64& p0 = sf.p[0];
+ const Reg64& p1 = sf.p[1];
+ const Reg64& p2 = sf.p[2];
+
+ const Reg64& t0 = sf.t[0];
+ const Reg64& t1 = sf.t[1];
+ const Reg64& t2 = sf.t[2];
+ const Reg64& t3 = sf.t[3];
+ const Reg64& t4 = sf.t[4];
+ const Reg64& t5 = sf.t[5];
+ const Reg64& t6 = sf.t[6];
+ const Reg64& t7 = sf.t[7];
+ const Reg64& t8 = sf.t[8];
+ const Reg64& t9 = sf.t[9];
+
+ movq(xm0, p0); // save p0
+ mov(p0, (uint64_t)p);
+ movq(xm1, p2);
+ mov(p2, ptr [p2]);
+ montgomery4_1(pp, t0, t7, t3, t2, t1, p1, p2, p0, t4, t5, t6, t8, t9, true, xm2);
+
+ movq(p2, xm1);
+ mov(p2, ptr [p2 + 8]);
+ montgomery4_1(pp, t1, t0, t7, t3, t2, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2);
+
+ movq(p2, xm1);
+ mov(p2, ptr [p2 + 16]);
+ montgomery4_1(pp, t2, t1, t0, t7, t3, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2);
+
+ movq(p2, xm1);
+ mov(p2, ptr [p2 + 24]);
+ montgomery4_1(pp, t3, t2, t1, t0, t7, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2);
+ // [t7:t3:t2:t1:t0]
+
+ mov(t4, t0);
+ mov(t5, t1);
+ mov(t6, t2);
+ mov(rdx, t3);
+ sub_rm(Pack(t3, t2, t1, t0), p0);
+ if (isFullBit_) sbb(t7, 0);
+ cmovc(t0, t4);
+ cmovc(t1, t5);
+ cmovc(t2, t6);
+ cmovc(t3, rdx);
+
+ movq(p0, xm0); // load p0
+ store_mr(p0, Pack(t3, t2, t1, t0));
+ }
+ /*
+ input (z, x, y) = (p0, p1, p2)
+ z[0..2] <- montgomery(x[0..2], y[0..2])
+ destroy gt0, ..., gt9, xm0, xm1, p2
+ */
+ void gen_montMul3(const uint64_t *p, uint64_t pp)
+ {
+ StackFrame sf(this, 3, 10 | UseRDX);
+ const Reg64& p0 = sf.p[0];
+ const Reg64& p1 = sf.p[1];
+ const Reg64& p2 = sf.p[2];
+
+ const Reg64& t0 = sf.t[0];
+ const Reg64& t1 = sf.t[1];
+ const Reg64& t2 = sf.t[2];
+ const Reg64& t3 = sf.t[3];
+ const Reg64& t4 = sf.t[4];
+ const Reg64& t5 = sf.t[5];
+ const Reg64& t6 = sf.t[6];
+ const Reg64& t7 = sf.t[7];
+ const Reg64& t8 = sf.t[8];
+ const Reg64& t9 = sf.t[9];
+
+ movq(xm0, p0); // save p0
+ mov(t7, (uint64_t)p);
+ mov(t9, ptr [p2]);
+ // c3, c2, c1, c0, px, y, p,
+ montgomery3_1(pp, t0, t3, t2, t1, p1, t9, t7, t4, t5, t6, t8, p0, true);
+ mov(t9, ptr [p2 + 8]);
+ montgomery3_1(pp, t1, t0, t3, t2, p1, t9, t7, t4, t5, t6, t8, p0, false);
+
+ mov(t9, ptr [p2 + 16]);
+ montgomery3_1(pp, t2, t1, t0, t3, p1, t9, t7, t4, t5, t6, t8, p0, false);
+
+ // [(t3):t2:t1:t0]
+ mov(t4, t0);
+ mov(t5, t1);
+ mov(t6, t2);
+ sub_rm(Pack(t2, t1, t0), t7);
+ if (isFullBit_) sbb(t3, 0);
+ cmovc(t0, t4);
+ cmovc(t1, t5);
+ cmovc(t2, t6);
+ movq(p0, xm0);
+ store_mr(p0, Pack(t2, t1, t0));
+ }
+ /*
+ input (pz, px)
+ z[0..2] <- montgomery(px[0..2], px[0..2])
+ destroy gt0, ..., gt9, xm0, xm1, p2
+ */
+ void gen_montSqr3(const uint64_t *p, uint64_t pp)
+ {
+ StackFrame sf(this, 3, 10 | UseRDX, 16 * 3);
+ const Reg64& pz = sf.p[0];
+ const Reg64& px = sf.p[1];
+// const Reg64& py = sf.p[2]; // not used
+
+ const Reg64& t0 = sf.t[0];
+ const Reg64& t1 = sf.t[1];
+ const Reg64& t2 = sf.t[2];
+ const Reg64& t3 = sf.t[3];
+ const Reg64& t4 = sf.t[4];
+ const Reg64& t5 = sf.t[5];
+ const Reg64& t6 = sf.t[6];
+ const Reg64& t7 = sf.t[7];
+ const Reg64& t8 = sf.t[8];
+ const Reg64& t9 = sf.t[9];
+
+ movq(xm0, pz); // save pz
+ mov(t7, (uint64_t)p);
+ mov(t9, ptr [px]);
+ mul3x1_sqr1(px, t9, t3, t2, t1, t0);
+ mov(t0, rdx);
+ montgomery3_sub(pp, t0, t9, t2, t1, px, t3, t7, t4, t5, t6, t8, pz, true);
+
+ mov(t3, ptr [px + 8]);
+ mul3x1_sqr2(px, t3, t6, t5, t4);
+ add_rr(Pack(t1, t0, t9, t2), Pack(rdx, rax, t5, t4));
+ if (isFullBit_) setc(pz.cvt8());
+ montgomery3_sub(pp, t1, t3, t9, t2, px, t0, t7, t4, t5, t6, t8, pz, false);
+
+ mov(t0, ptr [px + 16]);
+ mul3x1_sqr3(t0, t5, t4);
+ add_rr(Pack(t2, t1, t3, t9), Pack(rdx, rax, t5, t4));
+ if (isFullBit_) setc(pz.cvt8());
+ montgomery3_sub(pp, t2, t0, t3, t9, px, t1, t7, t4, t5, t6, t8, pz, false);
+
+ // [t9:t2:t0:t3]
+ mov(t4, t3);
+ mov(t5, t0);
+ mov(t6, t2);
+ sub_rm(Pack(t2, t0, t3), t7);
+ if (isFullBit_) sbb(t9, 0);
+ cmovc(t3, t4);
+ cmovc(t0, t5);
+ cmovc(t2, t6);
+ movq(pz, xm0);
+ store_mr(pz, Pack(t2, t0, t3));
+ }
+ static inline void debug_put_inner(const uint64_t *ptr, int n)
+ {
+ printf("debug ");
+ for (int i = 0; i < n; i++) {
+ printf("%016llx", (long long)ptr[n - 1 - i]);
+ }
+ printf("\n");
+ }
+#ifdef _MSC_VER
+ void debug_put(const RegExp& m, int n)
+ {
+ assert(n <= 8);
+ static uint64_t regBuf[7];
+
+ push(rax);
+ mov(rax, (size_t)regBuf);
+ mov(ptr [rax + 8 * 0], rcx);
+ mov(ptr [rax + 8 * 1], rdx);
+ mov(ptr [rax + 8 * 2], r8);
+ mov(ptr [rax + 8 * 3], r9);
+ mov(ptr [rax + 8 * 4], r10);
+ mov(ptr [rax + 8 * 5], r11);
+ mov(rcx, ptr [rsp + 8]); // org rax
+ mov(ptr [rax + 8 * 6], rcx); // save
+ mov(rcx, ptr [rax + 8 * 0]); // org rcx
+ pop(rax);
+
+ lea(rcx, ptr [m]);
+ mov(rdx, n);
+ mov(rax, (size_t)debug_put_inner);
+ sub(rsp, 32);
+ call(rax);
+ add(rsp, 32);
+
+ push(rax);
+ mov(rax, (size_t)regBuf);
+ mov(rcx, ptr [rax + 8 * 0]);
+ mov(rdx, ptr [rax + 8 * 1]);
+ mov(r8, ptr [rax + 8 * 2]);
+ mov(r9, ptr [rax + 8 * 3]);
+ mov(r10, ptr [rax + 8 * 4]);
+ mov(r11, ptr [rax + 8 * 5]);
+ mov(rax, ptr [rax + 8 * 6]);
+ add(rsp, 8);
+ }
+#endif
+ /*
+ z >>= c
+ @note shrd(r/m, r, imm)
+ */
+ void shr_mp(const MixPack& z, uint8_t c, const Reg64& t)
+ {
+ const size_t n = z.size();
+ for (size_t i = 0; i < n - 1; i++) {
+ const Reg64 *p;
+ if (z.isReg(i + 1)) {
+ p = &z.getReg(i + 1);
+ } else {
+ mov(t, ptr [z.getMem(i + 1)]);
+ p = &t;
+ }
+ if (z.isReg(i)) {
+ shrd(z.getReg(i), *p, c);
+ } else {
+ shrd(qword [z.getMem(i)], *p, c);
+ }
+ }
+ if (z.isReg(n - 1)) {
+ shr(z.getReg(n - 1), c);
+ } else {
+ shr(qword [z.getMem(n - 1)], c);
+ }
+ }
+ /*
+ z *= 2
+ */
+ void twice_mp(const MixPack& z, const Reg64& t)
+ {
+ g_add(z[0], z[0], t);
+ for (size_t i = 1, n = z.size(); i < n; i++) {
+ g_adc(z[i], z[i], t);
+ }
+ }
+ /*
+ z += x
+ */
+ void add_mp(const MixPack& z, const MixPack& x, const Reg64& t)
+ {
+ assert(z.size() == x.size());
+ g_add(z[0], x[0], t);
+ for (size_t i = 1, n = z.size(); i < n; i++) {
+ g_adc(z[i], x[i], t);
+ }
+ }
+ void add_m_m(const RegExp& mz, const RegExp& mx, const Reg64& t, int n)
+ {
+ for (int i = 0; i < n; i++) {
+ mov(t, ptr [mx + i * 8]);
+ if (i == 0) {
+ add(ptr [mz + i * 8], t);
+ } else {
+ adc(ptr [mz + i * 8], t);
+ }
+ }
+ }
+ /*
+ mz[] = mx[] - y
+ */
+ void sub_m_mp_m(const RegExp& mz, const RegExp& mx, const MixPack& y, const Reg64& t)
+ {
+ for (size_t i = 0; i < y.size(); i++) {
+ mov(t, ptr [mx + i * 8]);
+ if (i == 0) {
+ if (y.isReg(i)) {
+ sub(t, y.getReg(i));
+ } else {
+ sub(t, ptr [y.getMem(i)]);
+ }
+ } else {
+ if (y.isReg(i)) {
+ sbb(t, y.getReg(i));
+ } else {
+ sbb(t, ptr [y.getMem(i)]);
+ }
+ }
+ mov(ptr [mz + i * 8], t);
+ }
+ }
+ /*
+ z -= x
+ */
+ void sub_mp(const MixPack& z, const MixPack& x, const Reg64& t)
+ {
+ assert(z.size() == x.size());
+ g_sub(z[0], x[0], t);
+ for (size_t i = 1, n = z.size(); i < n; i++) {
+ g_sbb(z[i], x[i], t);
+ }
+ }
+ /*
+ z -= px[]
+ */
+ void sub_mp_m(const MixPack& z, const RegExp& px, const Reg64& t)
+ {
+ if (z.isReg(0)) {
+ sub(z.getReg(0), ptr [px]);
+ } else {
+ mov(t, ptr [px]);
+ sub(ptr [z.getMem(0)], t);
+ }
+ for (size_t i = 1, n = z.size(); i < n; i++) {
+ if (z.isReg(i)) {
+ sbb(z.getReg(i), ptr [px + i * 8]);
+ } else {
+ mov(t, ptr [px + i * 8]);
+ sbb(ptr [z.getMem(i)], t);
+ }
+ }
+ }
+ void store_mp(const RegExp& m, const MixPack& z, const Reg64& t)
+ {
+ for (size_t i = 0, n = z.size(); i < n; i++) {
+ if (z.isReg(i)) {
+ mov(ptr [m + i * 8], z.getReg(i));
+ } else {
+ mov(t, ptr [z.getMem(i)]);
+ mov(ptr [m + i * 8], t);
+ }
+ }
+ }
+ void load_mp(const MixPack& z, const RegExp& m, const Reg64& t)
+ {
+ for (size_t i = 0, n = z.size(); i < n; i++) {
+ if (z.isReg(i)) {
+ mov(z.getReg(i), ptr [m + i * 8]);
+ } else {
+ mov(t, ptr [m + i * 8]);
+ mov(ptr [z.getMem(i)], t);
+ }
+ }
+ }
+ void set_mp(const MixPack& z, const Reg64& t)
+ {
+ for (size_t i = 0, n = z.size(); i < n; i++) {
+ MCL_FP_GEN_OP_MR(mov, z[i], t)
+ }
+ }
+ void mov_mp(const MixPack& z, const MixPack& x, const Reg64& t)
+ {
+ for (size_t i = 0, n = z.size(); i < n; i++) {
+ const MemReg zi = z[i], xi = x[i];
+ if (z.isReg(i)) {
+ MCL_FP_GEN_OP_RM(mov, zi.getReg(), xi)
+ } else {
+ if (x.isReg(i)) {
+ mov(ptr [z.getMem(i)], x.getReg(i));
+ } else {
+ mov(t, ptr [x.getMem(i)]);
+ mov(ptr [z.getMem(i)], t);
+ }
+ }
+ }
+ }
+#ifdef _MSC_VER
+ void debug_put_mp(const MixPack& mp, int n, const Reg64& t)
+ {
+ if (n >= 10) exit(1);
+ static uint64_t buf[10];
+ movq(xm0, rax);
+ mov(rax, (size_t)buf);
+ store_mp(rax, mp, t);
+ movq(rax, xm0);
+ push(rax);
+ mov(rax, (size_t)buf);
+ debug_put(rax, n);
+ pop(rax);
+ }
+#endif
+
+ std::string mkLabel(const char *label, int n) const
+ {
+ return std::string(label) + Xbyak::Label::toStr(n);
+ }
+ /*
+ int k = preInvC(pr, px)
+ */
+ void gen_preInv()
+ {
+ assert(pn_ >= 2);
+ const int freeRegNum = 13;
+ if (pn_ > 9) {
+ throw cybozu::Exception("mcl:FpGenerator:gen_preInv:large pn_") << pn_;
+ }
+ StackFrame sf(this, 2, 10 | UseRDX | UseRCX, (std::max<int>(0, pn_ * 5 - freeRegNum) + 1 + (isFullBit_ ? 1 : 0)) * 8);
+ const Reg64& pr = sf.p[0];
+ const Reg64& px = sf.p[1];
+ const Reg64& t = rcx;
+ /*
+ k = rax, t = rcx : temporary
+ use rdx, pr, px in main loop, so we can use 13 registers
+ v = t[0, pn_) : all registers
+ */
+ size_t rspPos = 0;
+
+ assert(sf.t.size() >= (size_t)pn_);
+ Pack remain = sf.t;
+
+ const MixPack rr(remain, rspPos, pn_);
+ remain.append(rdx);
+ const MixPack ss(remain, rspPos, pn_);
+ remain.append(px);
+ const int rSize = (int)remain.size();
+ MixPack vv(remain, rspPos, pn_, rSize > 0 ? rSize / 2 : -1);
+ remain.append(pr);
+ MixPack uu(remain, rspPos, pn_);
+
+ const RegExp keep_pr = rsp + rspPos;
+ rspPos += 8;
+ const RegExp rTop = rsp + rspPos; // used if isFullBit_
+
+ inLocalLabel();
+ mov(ptr [keep_pr], pr);
+ mov(rax, px);
+ // px is free frome here
+ load_mp(vv, rax, t); // v = x
+ mov(rax, (size_t)p_);
+ load_mp(uu, rax, t); // u = p_
+ // k = 0
+ xor_(rax, rax);
+ // rTop = 0
+ if (isFullBit_) {
+ mov(ptr [rTop], rax);
+ }
+ // r = 0;
+ set_mp(rr, rax);
+ // s = 1
+ set_mp(ss, rax);
+ if (ss.isReg(0)) {
+ mov(ss.getReg(0), 1);
+ } else {
+ mov(qword [ss.getMem(0)], 1);
+ }
+#if 0
+ L(".lp");
+ or_mp(vv, t);
+ jz(".exit", T_NEAR);
+
+ g_test(uu[0], 1);
+ jz(".u_even", T_NEAR);
+ g_test(vv[0], 1);
+ jz(".v_even", T_NEAR);
+ for (int i = pn_ - 1; i >= 0; i--) {
+ g_cmp(vv[i], uu[i], t);
+ jc(".v_lt_u", T_NEAR);
+ if (i > 0) jnz(".v_ge_u", T_NEAR);
+ }
+
+ L(".v_ge_u");
+ sub_mp(vv, uu, t);
+ add_mp(ss, rr, t);
+ L(".v_even");
+ shr_mp(vv, 1, t);
+ twice_mp(rr, t);
+ if (isFullBit_) {
+ sbb(t, t);
+ mov(ptr [rTop], t);
+ }
+ inc(rax);
+ jmp(".lp", T_NEAR);
+ L(".v_lt_u");
+ sub_mp(uu, vv, t);
+ add_mp(rr, ss, t);
+ if (isFullBit_) {
+ sbb(t, t);
+ mov(ptr [rTop], t);
+ }
+ L(".u_even");
+ shr_mp(uu, 1, t);
+ twice_mp(ss, t);
+ inc(rax);
+ jmp(".lp", T_NEAR);
+#else
+ for (int cn = pn_; cn > 0; cn--) {
+ const std::string _lp = mkLabel(".lp", cn);
+ const std::string _u_v_odd = mkLabel(".u_v_odd", cn);
+ const std::string _u_even = mkLabel(".u_even", cn);
+ const std::string _v_even = mkLabel(".v_even", cn);
+ const std::string _v_ge_u = mkLabel(".v_ge_u", cn);
+ const std::string _v_lt_u = mkLabel(".v_lt_u", cn);
+ L(_lp);
+ or_mp(vv, t);
+ jz(".exit", T_NEAR);
+
+ g_test(uu[0], 1);
+ jz(_u_even, T_NEAR);
+ g_test(vv[0], 1);
+ jz(_v_even, T_NEAR);
+ L(_u_v_odd);
+ if (cn > 1) {
+ isBothZero(vv[cn - 1], uu[cn - 1], t);
+ jz(mkLabel(".u_v_odd", cn - 1), T_NEAR);
+ }
+ for (int i = cn - 1; i >= 0; i--) {
+ g_cmp(vv[i], uu[i], t);
+ jc(_v_lt_u, T_NEAR);
+ if (i > 0) jnz(_v_ge_u, T_NEAR);
+ }
+
+ L(_v_ge_u);
+ sub_mp(vv, uu, t);
+ add_mp(ss, rr, t);
+ L(_v_even);
+ shr_mp(vv, 1, t);
+ twice_mp(rr, t);
+ if (isFullBit_) {
+ sbb(t, t);
+ mov(ptr [rTop], t);
+ }
+ inc(rax);
+ jmp(_lp, T_NEAR);
+ L(_v_lt_u);
+ sub_mp(uu, vv, t);
+ add_mp(rr, ss, t);
+ if (isFullBit_) {
+ sbb(t, t);
+ mov(ptr [rTop], t);
+ }
+ L(_u_even);
+ shr_mp(uu, 1, t);
+ twice_mp(ss, t);
+ inc(rax);
+ jmp(_lp, T_NEAR);
+
+ if (cn > 0) {
+ vv.removeLast();
+ uu.removeLast();
+ }
+ }
+#endif
+ L(".exit");
+ assert(ss.isReg(0) && ss.isReg(1));
+ const Reg64& t2 = ss.getReg(0);
+ const Reg64& t3 = ss.getReg(1);
+
+ mov(t2, (size_t)p_);
+ if (isFullBit_) {
+ mov(t, ptr [rTop]);
+ test(t, t);
+ jz("@f");
+ sub_mp_m(rr, t2, t);
+ L("@@");
+ }
+ mov(t3, ptr [keep_pr]);
+ // pr[] = p[] - rr
+ sub_m_mp_m(t3, t2, rr, t);
+ jnc("@f");
+ // pr[] += p[]
+ add_m_m(t3, t2, t, pn_);
+ L("@@");
+ outLocalLabel();
+ }
+ void mov32c(const Reg64& r, uint64_t c)
+ {
+ if (c & 0xffffffff00000000ULL) {
+ mov(r, c);
+ } else {
+ mov(Xbyak::Reg32(r.getIdx()), (uint32_t)c);
+ }
+ }
+private:
+ FpGenerator(const FpGenerator&);
+ void operator=(const FpGenerator&);
+ void make_op_rm(void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), const Reg64& op1, const MemReg& op2)
+ {
+ if (op2.isReg()) {
+ (this->*op)(op1, op2.getReg());
+ } else {
+ (this->*op)(op1, qword [op2.getMem()]);
+ }
+ }
+ void make_op_mr(void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), const MemReg& op1, const Reg64& op2)
+ {
+ if (op1.isReg()) {
+ (this->*op)(op1.getReg(), op2);
+ } else {
+ (this->*op)(qword [op1.getMem()], op2);
+ }
+ }
+ void make_op(void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), const MemReg& op1, const MemReg& op2, const Reg64& t)
+ {
+ if (op1.isReg()) {
+ make_op_rm(op, op1.getReg(), op2);
+ } else if (op2.isReg()) {
+ (this->*op)(ptr [op1.getMem()], op2.getReg());
+ } else {
+ mov(t, ptr [op2.getMem()]);
+ (this->*op)(ptr [op1.getMem()], t);
+ }
+ }
+ void g_add(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::add, op1, op2, t); }
+ void g_adc(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::adc, op1, op2, t); }
+ void g_sub(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::sub, op1, op2, t); }
+ void g_sbb(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::sbb, op1, op2, t); }
+ void g_cmp(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::cmp, op1, op2, t); }
+ void g_or(const Reg64& r, const MemReg& op) { make_op_rm(&Xbyak::CodeGenerator::or_, r, op); }
+ void g_test(const MemReg& op1, const MemReg& op2, const Reg64& t)
+ {
+ const MemReg *pop1 = &op1;
+ const MemReg *pop2 = &op2;
+ if (!pop1->isReg()) {
+ std::swap(pop1, pop2);
+ }
+ // (M, M), (R, M), (R, R)
+ if (pop1->isReg()) {
+ MCL_FP_GEN_OP_MR(test, (*pop2), pop1->getReg())
+ } else {
+ mov(t, ptr [pop1->getMem()]);
+ test(ptr [pop2->getMem()], t);
+ }
+ }
+ void g_mov(const MemReg& op, const Reg64& r)
+ {
+ make_op_mr(&Xbyak::CodeGenerator::mov, op, r);
+ }
+ void g_mov(const Reg64& r, const MemReg& op)
+ {
+ make_op_rm(&Xbyak::CodeGenerator::mov, r, op);
+ }
+ void g_add(const Reg64& r, const MemReg& mr) { MCL_FP_GEN_OP_RM(add, r, mr) }
+ void g_adc(const Reg64& r, const MemReg& mr) { MCL_FP_GEN_OP_RM(adc, r, mr) }
+ void isBothZero(const MemReg& op1, const MemReg& op2, const Reg64& t)
+ {
+ g_mov(t, op1);
+ g_or(t, op2);
+ }
+ void g_test(const MemReg& op, int imm)
+ {
+ MCL_FP_GEN_OP_MR(test, op, imm)
+ }
+ /*
+ z[] = x[]
+ */
+ void mov_rr(const Pack& z, const Pack& x)
+ {
+ assert(z.size() == x.size());
+ for (int i = 0, n = (int)x.size(); i < n; i++) {
+ mov(z[i], x[i]);
+ }
+ }
+ /*
+ m[] = x[]
+ */
+ void store_mr(const RegExp& m, const Pack& x)
+ {
+ for (int i = 0, n = (int)x.size(); i < n; i++) {
+ mov(ptr [m + 8 * i], x[i]);
+ }
+ }
+ /*
+ x[] = m[]
+ */
+ void load_rm(const Pack& z, const RegExp& m)
+ {
+ for (int i = 0, n = (int)z.size(); i < n; i++) {
+ mov(z[i], ptr [m + 8 * i]);
+ }
+ }
+ /*
+ z[] += x[]
+ */
+ void add_rr(const Pack& z, const Pack& x)
+ {
+ add(z[0], x[0]);
+ assert(z.size() == x.size());
+ for (size_t i = 1, n = z.size(); i < n; i++) {
+ adc(z[i], x[i]);
+ }
+ }
+ /*
+ z[] -= x[]
+ */
+ void sub_rr(const Pack& z, const Pack& x)
+ {
+ sub(z[0], x[0]);
+ assert(z.size() == x.size());
+ for (size_t i = 1, n = z.size(); i < n; i++) {
+ sbb(z[i], x[i]);
+ }
+ }
+ /*
+ z[] += m[]
+ */
+ void add_rm(const Pack& z, const RegExp& m)
+ {
+ add(z[0], ptr [m + 8 * 0]);
+ for (int i = 1, n = (int)z.size(); i < n; i++) {
+ adc(z[i], ptr [m + 8 * i]);
+ }
+ }
+ /*
+ z[] -= m[]
+ */
+ void sub_rm(const Pack& z, const RegExp& m)
+ {
+ sub(z[0], ptr [m + 8 * 0]);
+ for (int i = 1, n = (int)z.size(); i < n; i++) {
+ sbb(z[i], ptr [m + 8 * i]);
+ }
+ }
+ /*
+ t = all or z[i]
+ ZF = z is zero
+ */
+ void or_mp(const MixPack& z, const Reg64& t)
+ {
+ const size_t n = z.size();
+ if (n == 1) {
+ if (z.isReg(0)) {
+ test(z.getReg(0), z.getReg(0));
+ } else {
+ mov(t, ptr [z.getMem(0)]);
+ test(t, t);
+ }
+ } else {
+ g_mov(t, z[0]);
+ for (size_t i = 1; i < n; i++) {
+ g_or(t, z[i]);
+ }
+ }
+ }
+ /*
+ [rdx:x:t1:t0] <- py[2:1:0] * x
+ destroy x, t
+ */
+ void mul3x1(const RegExp& py, const Reg64& x, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t)
+ {
+ if (useMulx_) {
+ // mulx(H, L, x) = [H:L] = x * rdx
+ /*
+ rdx:x
+ t:t1
+ rax:t0
+ */
+ mov(rdx, x);
+ mulx(rax, t0, ptr [py]); // [rax:t0] = py[0] * x
+ mulx(t, t1, ptr [py + 8]); // [t:t1] = py[1] * x
+ add(t1, rax);
+ mulx(rdx, x, ptr [py + 8 * 2]);
+ adc(x, t);
+ adc(rdx, 0);
+ } else {
+ mov(rax, ptr [py]);
+ mul(x);
+ mov(t0, rax);
+ mov(t1, rdx);
+ mov(rax, ptr [py + 8]);
+ mul(x);
+ mov(t, rax);
+ mov(t2, rdx);
+ mov(rax, ptr [py + 8 * 2]);
+ mul(x);
+ /*
+ rdx:rax
+ t2:t
+ t1:t0
+ */
+ add(t1, t);
+ adc(rax, t2);
+ adc(rdx, 0);
+ mov(x, rax);
+ }
+ }
+ /*
+ [x2:x1:x0] * x0
+ */
+ void mul3x1_sqr1(const RegExp& px, const Reg64& x0, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t)
+ {
+ mov(rax, x0);
+ mul(x0);
+ mov(t0, rax);
+ mov(t1, rdx);
+ mov(rax, ptr [px + 8]);
+ mul(x0);
+ mov(ptr [rsp + 0 * 8], rax); // (x0 * x1)_L
+ mov(ptr [rsp + 1 * 8], rdx); // (x0 * x1)_H
+ mov(t, rax);
+ mov(t2, rdx);
+ mov(rax, ptr [px + 8 * 2]);
+ mul(x0);
+ mov(ptr [rsp + 2 * 8], rax); // (x0 * x2)_L
+ mov(ptr [rsp + 3 * 8], rdx); // (x0 * x2)_H
+ /*
+ rdx:rax
+ t2:t
+ t1:t0
+ */
+ add(t1, t);
+ adc(t2, rax);
+ adc(rdx, 0);
+ }
+ /*
+ [x2:x1:x0] * x1
+ */
+ void mul3x1_sqr2(const RegExp& px, const Reg64& x1, const Reg64& t2, const Reg64& t1, const Reg64& t0)
+ {
+ mov(t0, ptr [rsp + 0 * 8]);// (x0 * x1)_L
+ mov(rax, x1);
+ mul(x1);
+ mov(t1, rax);
+ mov(t2, rdx);
+ mov(rax, ptr [px + 8 * 2]);
+ mul(x1);
+ mov(ptr [rsp + 4 * 8], rax); // (x1 * x2)_L
+ mov(ptr [rsp + 5 * 8], rdx); // (x1 * x2)_H
+ /*
+ rdx:rax
+ t2:t1
+ t:t0
+ */
+ add(t1, ptr [rsp + 1 * 8]); // (x0 * x1)_H
+ adc(rax, t2);
+ adc(rdx, 0);
+ }
+ /*
+ [rdx:rax:t1:t0] = [x2:x1:x0] * x2
+ */
+ void mul3x1_sqr3(const Reg64& x2, const Reg64& t1, const Reg64& t0)
+ {
+ mov(rax, x2);
+ mul(x2);
+ /*
+ rdx:rax
+ t2:t
+ t1:t0
+ */
+ mov(t0, ptr [rsp + 2 * 8]); // (x0 * x2)_L
+ mov(t1, ptr [rsp + 3 * 8]); // (x0 * x2)_H
+ add(t1, ptr [rsp + 4 * 8]); // (x1 * x2)_L
+ adc(rax, ptr [rsp + 5 * 8]); // (x1 * x2)_H
+ adc(rdx, 0);
+ }
+
+ /*
+ c = [c3:y:c1:c0] = c + x[2..0] * y
+ q = uint64_t(c0 * pp)
+ c = (c + q * p) >> 64
+ input [c3:c2:c1:c0], px, y, p
+ output [c0:c3:c2:c1] ; c0 == 0 unless isFullBit_
+
+ @note use rax, rdx, destroy y
+ */
+ void montgomery3_sub(uint64_t pp, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0,
+ const Reg64& /*px*/, const Reg64& y, const Reg64& p,
+ const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst)
+ {
+ // input [c3:y:c1:0]
+ // [t4:c3:y:c1:c0]
+ // t4 = 0 or 1 if isFullBit_, = 0 otherwise
+ mov(rax, pp);
+ mul(c0); // q = rax
+ mov(c2, rax);
+ mul3x1(p, c2, t2, t1, t0, t3);
+ // [rdx:c2:t1:t0] = p * q
+ add(c0, t0); // always c0 is zero
+ adc(c1, t1);
+ adc(c2, y);
+ adc(c3, rdx);
+ if (isFullBit_) {
+ if (isFirst) {
+ setc(c0.cvt8());
+ } else {
+ adc(c0.cvt8(), t4.cvt8());
+ }
+ }
+ }
+ /*
+ c = [c3:c2:c1:c0]
+ c += x[2..0] * y
+ q = uint64_t(c0 * pp)
+ c = (c + q * p) >> 64
+ input [c3:c2:c1:c0], px, y, p
+ output [c0:c3:c2:c1] ; c0 == 0 unless isFullBit_
+
+ @note use rax, rdx, destroy y
+ */
+ void montgomery3_1(uint64_t pp, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0,
+ const Reg64& px, const Reg64& y, const Reg64& p,
+ const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst)
+ {
+ if (isFirst) {
+ mul3x1(px, y, c2, c1, c0, c3);
+ mov(c3, rdx);
+ // [c3:y:c1:c0] = px[2..0] * y
+ } else {
+ mul3x1(px, y, t2, t1, t0, t3);
+ // [rdx:y:t1:t0] = px[2..0] * y
+ add_rr(Pack(c3, y, c1, c0), Pack(rdx, c2, t1, t0));
+ if (isFullBit_) setc(t4.cvt8());
+ }
+ montgomery3_sub(pp, c3, c2, c1, c0, px, y, p, t0, t1, t2, t3, t4, isFirst);
+ }
+ /*
+ pc[0..n] += x[0..n-1] * y ; pc[] = 0 if isFirst
+ pc[n + 1] is temporary used if isFullBit_
+ q = uint64_t(pc[0] * pp)
+ pc[] = (pc[] + q * p) >> 64
+ input : pc[], px[], y, p[], pw1[], pw2[]
+ output : pc[0..n] ; if isFullBit_
+ pc[0..n-1] ; if !isFullBit_
+ destroy y
+ use
+ pw1[0] if useMulx_
+ pw1[0..n-2] otherwise
+ pw2[0..n-1]
+ */
+ void montgomeryN_1(uint64_t pp, int n, const RegExp& pc, const RegExp& px, const Reg64& y, const Reg64& p, const Reg64& t, const MixPack& pw1, const RegExp& pw2, bool isFirst)
+ {
+ // pc[] += x[] * y
+ if (isFirst) {
+ gen_raw_mulI(pc, px, y, pw1, t, n);
+ mov(ptr [pc + n * 8], rdx);
+ } else {
+ gen_raw_mulI(pw2, px, y, pw1, t, n);
+ mov(t, ptr [pw2 + 0 * 8]);
+ add(ptr [pc + 0 * 8], t);
+ for (int i = 1; i < n; i++) {
+ mov(t, ptr [pw2 + i * 8]);
+ adc(ptr [pc + i * 8], t);
+ }
+ adc(ptr [pc + n * 8], rdx);
+ if (isFullBit_) {
+ mov(t, 0);
+ adc(t, 0);
+ mov(qword [pc + (n + 1) * 8], t);
+ }
+ }
+ mov(rax, pp);
+ mul(qword [pc]);
+ mov(y, rax); // y = q
+ gen_raw_mulI(pw2, p, y, pw1, t, n);
+ // c[] = (c[] + pw2[]) >> 64
+ mov(t, ptr [pw2 + 0 * 8]);
+ add(t, ptr [pc + 0 * 8]);
+ for (int i = 1; i < n; i++) {
+ mov(t, ptr [pw2 + i * 8]);
+ adc(t, ptr [pc + i * 8]);
+ mov(ptr [pc + (i - 1) * 8], t);
+ }
+ adc(rdx, ptr [pc + n * 8]);
+ mov(ptr [pc + (n - 1) * 8], rdx);
+ if (isFullBit_) {
+ if (isFirst) {
+ mov(t, 0);
+ } else {
+ mov(t, ptr [pc + (n + 1) * 8]);
+ }
+ adc(t, 0);
+ mov(qword [pc + n * 8], t);
+ } else {
+ xor_(eax, eax);
+ mov(ptr [pc + n * 8], rax);
+ }
+ }
+ /*
+ [rdx:x:t2:t1:t0] <- py[3:2:1:0] * x
+ destroy x, t
+ */
+ void mul4x1(const RegExp& py, const Reg64& x, const Reg64& t3, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t)
+ {
+ if (useMulx_) {
+ mov(rdx, x);
+ mulx(t1, t0, ptr [py + 8 * 0]);
+ mulx(t2, rax, ptr [py + 8 * 1]);
+ add(t1, rax);
+ mulx(x, rax, ptr [py + 8 * 2]);
+ adc(t2, rax);
+ mulx(rdx, rax, ptr [py + 8 * 3]);
+ adc(x, rax);
+ adc(rdx, 0);
+ } else {
+ mov(rax, ptr [py]);
+ mul(x);
+ mov(t0, rax);
+ mov(t1, rdx);
+ mov(rax, ptr [py + 8]);
+ mul(x);
+ mov(t, rax);
+ mov(t2, rdx);
+ mov(rax, ptr [py + 8 * 2]);
+ mul(x);
+ mov(t3, rax);
+ mov(rax, x);
+ mov(x, rdx);
+ mul(qword [py + 8 * 3]);
+ add(t1, t);
+ adc(t2, t3);
+ adc(x, rax);
+ adc(rdx, 0);
+ }
+ }
+
+ /*
+ c = [c4:c3:c2:c1:c0]
+ c += x[3..0] * y
+ q = uint64_t(c0 * pp)
+ c = (c + q * p) >> 64
+ input [c4:c3:c2:c1:c0], px, y, p
+ output [c0:c4:c3:c2:c1]
+
+ @note use rax, rdx, destroy y
+ use xt if isFullBit_
+ */
+ void montgomery4_1(uint64_t pp, const Reg64& c4, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0,
+ const Reg64& px, const Reg64& y, const Reg64& p,
+ const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst, const Xmm& xt)
+ {
+ if (isFirst) {
+ mul4x1(px, y, c3, c2, c1, c0, c4);
+ mov(c4, rdx);
+ // [c4:y:c2:c1:c0] = px[3..0] * y
+ } else {
+ mul4x1(px, y, t3, t2, t1, t0, t4);
+ // [rdx:y:t2:t1:t0] = px[3..0] * y
+ if (isFullBit_) {
+ movq(xt, px);
+ xor_(px, px);
+ }
+ add_rr(Pack(c4, y, c2, c1, c0), Pack(rdx, c3, t2, t1, t0));
+ if (isFullBit_) {
+ adc(px, 0);
+ }
+ }
+ // [px:c4:y:c2:c1:c0]
+ // px = 0 or 1 if isFullBit_, = 0 otherwise
+ mov(rax, pp);
+ mul(c0); // q = rax
+ mov(c3, rax);
+ mul4x1(p, c3, t3, t2, t1, t0, t4);
+ add(c0, t0); // always c0 is zero
+ adc(c1, t1);
+ adc(c2, t2);
+ adc(c3, y);
+ adc(c4, rdx);
+ if (isFullBit_) {
+ if (isFirst) {
+ adc(c0, 0);
+ } else {
+ adc(c0, px);
+ movq(px, xt);
+ }
+ }
+ }
+};
+
+} // mcl
+
+#endif
diff --git a/include/mcl/fp_util.hpp b/include/mcl/fp_util.hpp
new file mode 100644
index 0000000..7419672
--- /dev/null
+++ b/include/mcl/fp_util.hpp
@@ -0,0 +1,294 @@
+#pragma once
+#include <vector>
+#include <cybozu/itoa.hpp>
+#include <cybozu/atoi.hpp>
+#include <cybozu/bitvector.hpp>
+/**
+ @file
+ @brief utility of Fp
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+*/
+
+namespace mcl { namespace fp {
+
+#if defined(CYBOZU_OS_BIT) && (CYBOZU_OS_BIT == 32)
+ typedef uint32_t BlockType;
+#else
+ typedef uint64_t BlockType;
+#endif
+
+template<class S>
+void setBlockBit(S *buf, size_t bitLen, bool b)
+{
+ const size_t unitSize = sizeof(S) * 8;
+ const size_t q = bitLen / unitSize;
+ const size_t r = bitLen % unitSize;
+ if (b) {
+ buf[q] |= S(1) << r;
+ } else {
+ buf[q] &= ~(S(1) << r);
+ }
+}
+template<class S>
+bool getBlockBit(const S *buf, size_t bitLen)
+{
+ const size_t unitSize = sizeof(S) * 8;
+ const size_t q = bitLen / unitSize;
+ const size_t r = bitLen % unitSize;
+ return (buf[q] & (S(1) << r)) != 0;
+}
+/*
+ convert x[0..n) to hex string
+ start "0x" if withPrefix
+*/
+template<class T>
+void toStr16(std::string& str, const T *x, size_t n, bool withPrefix = false)
+{
+ size_t fullN = 0;
+ if (n > 1) {
+ size_t pos = n - 1;
+ while (pos > 0) {
+ if (x[pos]) break;
+ pos--;
+ }
+ if (pos > 0) fullN = pos;
+ }
+ const T v = n == 0 ? 0 : x[fullN];
+ const size_t topLen = cybozu::getHexLength(v);
+ const size_t startPos = withPrefix ? 2 : 0;
+ const size_t lenT = sizeof(T) * 2;
+ str.resize(startPos + fullN * lenT + topLen);
+ if (withPrefix) {
+ str[0] = '0';
+ str[1] = 'x';
+ }
+ cybozu::itohex(&str[startPos], topLen, v, false);
+ for (size_t i = 0; i < fullN; i++) {
+ cybozu::itohex(&str[startPos + topLen + i * lenT], lenT, x[fullN - 1 - i], false);
+ }
+}
+
+/*
+ convert x[0..n) to bin string
+ start "0b" if withPrefix
+*/
+template<class T>
+void toStr2(std::string& str, const T *x, size_t n, bool withPrefix)
+{
+ size_t fullN = 0;
+ if (n > 1) {
+ size_t pos = n - 1;
+ while (pos > 0) {
+ if (x[pos]) break;
+ pos--;
+ }
+ if (pos > 0) fullN = pos;
+ }
+ const T v = n == 0 ? 0 : x[fullN];
+ const size_t topLen = cybozu::getBinLength(v);
+ const size_t startPos = withPrefix ? 2 : 0;
+ const size_t lenT = sizeof(T) * 8;
+ str.resize(startPos + fullN * lenT + topLen);
+ if (withPrefix) {
+ str[0] = '0';
+ str[1] = 'b';
+ }
+ cybozu::itobin(&str[startPos], topLen, v);
+ for (size_t i = 0; i < fullN; i++) {
+ cybozu::itobin(&str[startPos + topLen + i * lenT], lenT, x[fullN - 1 - i]);
+ }
+}
+
+/*
+ convert hex string to x[0..xn)
+ hex string = [0-9a-fA-F]+
+*/
+template<class T>
+void fromStr16(T *x, size_t xn, const char *str, size_t strLen)
+{
+ if (strLen == 0) throw cybozu::Exception("fp:fromStr16:strLen is zero");
+ const size_t unitLen = sizeof(T) * 2;
+ const size_t q = strLen / unitLen;
+ const size_t r = strLen % unitLen;
+ const size_t requireSize = q + (r ? 1 : 0);
+ if (xn < requireSize) throw cybozu::Exception("fp:fromStr16:short size") << xn << requireSize;
+ for (size_t i = 0; i < q; i++) {
+ bool b;
+ x[i] = cybozu::hextoi(&b, &str[r + (q - 1 - i) * unitLen], unitLen);
+ if (!b) throw cybozu::Exception("fp:fromStr16:bad char") << cybozu::exception::makeString(str, strLen);
+ }
+ if (r) {
+ bool b;
+ x[q] = cybozu::hextoi(&b, str, r);
+ if (!b) throw cybozu::Exception("fp:fromStr16:bad char") << cybozu::exception::makeString(str, strLen);
+ }
+ for (size_t i = requireSize; i < xn; i++) x[i] = 0;
+}
+
+/*
+ @param base [inout]
+*/
+inline const char *verifyStr(bool *isMinus, int *base, const std::string& str)
+{
+ const char *p = str.c_str();
+ if (*p == '-') {
+ *isMinus = true;
+ p++;
+ } else {
+ *isMinus = false;
+ }
+ if (p[0] == '0') {
+ if (p[1] == 'x') {
+ if (*base != 0 && *base != 16) {
+ throw cybozu::Exception("fp:verifyStr:bad base") << *base << str;
+ }
+ *base = 16;
+ p += 2;
+ } else if (p[1] == 'b') {
+ if (*base != 0 && *base != 2) {
+ throw cybozu::Exception("fp:verifyStr:bad base") << *base << str;
+ }
+ *base = 2;
+ p += 2;
+ }
+ }
+ if (*base == 0) *base = 10;
+ if (*p == '\0') throw cybozu::Exception("fp:verifyStr:str is empty");
+ return p;
+}
+
+template<class S>
+size_t getRoundNum(size_t x)
+{
+ const size_t size = sizeof(S) * 8;
+ return (x + size - 1) / size;
+}
+
+/*
+ compare x[0, n) with y[0, n)
+*/
+template<class S>
+int compareArray(const S* x, const S* y, size_t n)
+{
+ for (size_t i = 0; i < n; i++) {
+ const S a = x[n - 1 - i];
+ const S b = y[n - 1 - i];
+ if (a > b) return 1;
+ if (a < b) return -1;
+ }
+ return 0;
+}
+
+/*
+ get random value less than in[]
+ n = (bitLen + sizeof(S) * 8) / (sizeof(S) * 8)
+ input in[0..n)
+ output out[n..n)
+ 0 <= out < in
+*/
+template<class RG, class S>
+inline void getRandVal(S *out, RG& rg, const S *in, size_t bitLen)
+{
+ const size_t unitBitSize = sizeof(S) * 8;
+ const size_t n = getRoundNum<S>(bitLen);
+ const size_t rem = bitLen & (unitBitSize - 1);
+ for (;;) {
+ rg.read(out, n);
+ if (rem > 0) out[n - 1] &= (S(1) << rem) - 1;
+ if (compareArray(out, in, n) < 0) return;
+ }
+}
+
+/*
+ z[] = (x[] << shift) | y
+ @param z [out] z[0..n)
+ @param x [in] x[0..n)
+ @param n [in] length of x, z
+ @param shift [in] 0 <= shift < (sizeof(S) * 8)
+ @param y [in]
+ @return (x[] << shift)[n]
+*/
+template<class S>
+S shiftLeftOr(S* z, const S* x, size_t n, size_t shift, S y = 0)
+{
+ if (n == 0) {
+ throw cybozu::Exception("fp:shiftLeftOr:bad n");
+ }
+ if (shift == 0) {
+ for (size_t i = n - 1; i > 0; i--) {
+ z[i] = x[i];
+ }
+ z[0] = x[0] | y;
+ return 0;
+ }
+ const size_t unitSize = sizeof(S) * 8;
+ if (shift >= unitSize) {
+ throw cybozu::Exception("fp:shiftLeftOr:large shift") << shift;
+ }
+ const size_t rev = unitSize - shift;
+ S ret = x[n - 1] >> rev;
+ for (size_t i = n - 1; i > 0; i--) {
+ z[i] = (x[i] << shift) | (x[i - 1] >> rev);
+ }
+ z[0] = (x[0] << shift) | y;
+ return ret;
+}
+template<class S>
+void shiftRight(S* z, const S* x, size_t n, size_t shift)
+{
+ if (n == 0) return;
+ if (shift == 0) {
+ for (size_t i = 0; i < n; i++) {
+ z[i] = x[i];
+ }
+ return;
+ }
+ const size_t unitSize = sizeof(S) * 8;
+ if (shift >= unitSize) {
+ throw cybozu::Exception("fp:shiftRight:large shift") << shift;
+ }
+ const size_t rev = unitSize - shift;
+ S prev = x[0];
+ for (size_t i = 0; i < n - 1; i++) {
+ S t = x[i + 1];
+ z[i] = (prev >> shift) | (t << rev);
+ prev = t;
+ }
+ z[n - 1] = prev >> shift;
+}
+
+template<class Vec, class T>
+size_t splitBitVec(Vec& v, const cybozu::BitVectorT<T>& bv, size_t width)
+{
+ if (width > sizeof(typename Vec::value_type) * 8) {
+ throw cybozu::Exception("fp:splitBitVec:bad width") << width;
+ }
+ const size_t q = bv.size() / width;
+ const size_t r = bv.size() % width;
+ for (size_t i = 0; i < q; i++) {
+ v.push_back(bv.extract(i * width, width));
+ }
+ if (r > 0) {
+ v.push_back(bv.extract(q * width, r));
+ }
+ return r ? r : width;
+}
+
+template<class Vec, class T>
+void concatBitVec(cybozu::BitVectorT<T>& bv, const Vec& v, size_t width, size_t lastWidth)
+{
+ if (width > sizeof(typename Vec::value_type) * 8) {
+ throw cybozu::Exception("fp:splitBitVec:bad width") << width;
+ }
+ bv.clear();
+ for (size_t i = 0; i < v.size() - 1; i++) {
+ bv.append(v[i], width);
+ }
+ bv.append(v[v.size() - 1], lastWidth);
+}
+
+} // mcl::fp
+
+} // fp
diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
new file mode 100644
index 0000000..c29c870
--- /dev/null
+++ b/include/mcl/gmp_util.hpp
@@ -0,0 +1,378 @@
+#pragma once
+/**
+ @file
+ @brief util function for gmp
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <vector>
+#include <assert.h>
+#ifdef _MSC_VER
+ #pragma warning(push)
+ #pragma warning(disable : 4616)
+ #pragma warning(disable : 4800)
+ #pragma warning(disable : 4244)
+ #pragma warning(disable : 4127)
+ #pragma warning(disable : 4512)
+ #pragma warning(disable : 4146)
+#endif
+#include <gmpxx.h>
+#include <stdint.h>
+#ifdef _MSC_VER
+ #pragma warning(pop)
+#endif
+#ifdef _MSC_VER
+#if _MSC_VER == 1900
+#ifdef _DEBUG
+#pragma comment(lib, "14/mpird.lib")
+#pragma comment(lib, "14/mpirxxd.lib")
+#else
+#pragma comment(lib, "14/mpir.lib")
+#pragma comment(lib, "14/mpirxx.lib")
+#endif
+#elif _MSC_VER == 1800
+#ifdef _DEBUG
+#pragma comment(lib, "12/mpird.lib")
+#pragma comment(lib, "12/mpirxxd.lib")
+#else
+#pragma comment(lib, "12/mpir.lib")
+#pragma comment(lib, "12/mpirxx.lib")
+#endif
+#else
+#ifdef _DEBUG
+#pragma comment(lib, "mpird.lib")
+#pragma comment(lib, "mpirxxd.lib")
+#else
+#pragma comment(lib, "mpir.lib")
+#pragma comment(lib, "mpirxx.lib")
+#endif
+#endif
+#endif
+#include <mcl/operator.hpp>
+
+namespace mcl {
+
+struct Gmp {
+ typedef mpz_class ImplType;
+#if CYBOZU_OS_BIT == 64
+ typedef uint64_t BlockType;
+#else
+ typedef uint32_t BlockType;
+#endif
+ // z = [buf[n-1]:..:buf[1]:buf[0]]
+ // eg. buf[] = {0x12345678, 0xaabbccdd}; => z = 0xaabbccdd12345678;
+ template<class T>
+ static void setRaw(mpz_class& z, const T *buf, size_t n)
+ {
+ mpz_import(z.get_mpz_t(), n, -1, sizeof(*buf), 0, 0, buf);
+ }
+ /*
+ return positive written size
+ return 0 if failure
+ */
+ template<class T>
+ static size_t getRaw(T *buf, size_t maxSize, const mpz_class& x)
+ {
+ const size_t totalSize = sizeof(T) * maxSize;
+ if (getBitLen(x) > totalSize * 8) return 0;
+ memset(buf, 0, sizeof(*buf) * maxSize);
+ size_t size;
+ mpz_export(buf, &size, -1, sizeof(T), 0, 0, x.get_mpz_t());
+ // if x == 0, then size = 0 for gmp, size = 1 for mpir
+ return size == 0 ? 1 : size;
+ }
+ static inline void set(mpz_class& z, uint64_t x)
+ {
+ setRaw(z, &x, 1);
+ }
+ static inline bool fromStr(mpz_class& z, const std::string& str, int base = 0)
+ {
+ return z.set_str(str, base) == 0;
+ }
+ static inline void toStr(std::string& str, const mpz_class& z, int base = 10)
+ {
+ str = z.get_str(base);
+ }
+ static inline void add(mpz_class& z, const mpz_class& x, const mpz_class& y)
+ {
+ mpz_add(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t());
+ }
+ static inline void add(mpz_class& z, const mpz_class& x, unsigned int y)
+ {
+ mpz_add_ui(z.get_mpz_t(), x.get_mpz_t(), y);
+ }
+ static inline void sub(mpz_class& z, const mpz_class& x, const mpz_class& y)
+ {
+ mpz_sub(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t());
+ }
+ static inline void sub(mpz_class& z, const mpz_class& x, unsigned int y)
+ {
+ mpz_sub_ui(z.get_mpz_t(), x.get_mpz_t(), y);
+ }
+ static inline void mul(mpz_class& z, const mpz_class& x, const mpz_class& y)
+ {
+ mpz_mul(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t());
+ }
+ static inline void square(mpz_class& z, const mpz_class& x)
+ {
+ mpz_mul(z.get_mpz_t(), x.get_mpz_t(), x.get_mpz_t());
+ }
+ static inline void mul(mpz_class& z, const mpz_class& x, unsigned int y)
+ {
+ mpz_mul_ui(z.get_mpz_t(), x.get_mpz_t(), y);
+ }
+ static inline void divmod(mpz_class& q, mpz_class& r, const mpz_class& x, const mpz_class& y)
+ {
+ mpz_divmod(q.get_mpz_t(), r.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t());
+ }
+ static inline void div(mpz_class& q, const mpz_class& x, const mpz_class& y)
+ {
+ mpz_div(q.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t());
+ }
+ static inline void div(mpz_class& q, const mpz_class& x, unsigned int y)
+ {
+ mpz_div_ui(q.get_mpz_t(), x.get_mpz_t(), y);
+ }
+ static inline void mod(mpz_class& r, const mpz_class& x, const mpz_class& m)
+ {
+ mpz_mod(r.get_mpz_t(), x.get_mpz_t(), m.get_mpz_t());
+ }
+ static inline void mod(mpz_class& r, const mpz_class& x, unsigned int m)
+ {
+ mpz_mod_ui(r.get_mpz_t(), x.get_mpz_t(), m);
+ }
+ static inline void clear(mpz_class& z)
+ {
+ mpz_set_ui(z.get_mpz_t(), 0);
+ }
+ static inline bool isZero(const mpz_class& z)
+ {
+ return mpz_sgn(z.get_mpz_t()) == 0;
+ }
+ static inline bool isNegative(const mpz_class& z)
+ {
+ return mpz_sgn(z.get_mpz_t()) < 0;
+ }
+ static inline void neg(mpz_class& z, const mpz_class& x)
+ {
+ mpz_neg(z.get_mpz_t(), x.get_mpz_t());
+ }
+ static inline int compare(const mpz_class& x, const mpz_class & y)
+ {
+ return mpz_cmp(x.get_mpz_t(), y.get_mpz_t());
+ }
+ static inline int compare(const mpz_class& x, int y)
+ {
+ return mpz_cmp_si(x.get_mpz_t(), y);
+ }
+ template<class T>
+ static inline void addMod(mpz_class& z, const mpz_class& x, const T& y, const mpz_class& m)
+ {
+ add(z, x, y);
+ if (compare(z, m) >= 0) {
+ sub(z, z, m);
+ }
+ }
+ template<class T>
+ static inline void subMod(mpz_class& z, const mpz_class& x, const T& y, const mpz_class& m)
+ {
+ sub(z, x, y);
+ if (!isNegative(z)) return;
+ add(z, z, m);
+ }
+ template<class T>
+ static inline void mulMod(mpz_class& z, const mpz_class& x, const T& y, const mpz_class& m)
+ {
+ mul(z, x, y);
+ mod(z, z, m);
+ }
+ static inline void squareMod(mpz_class& z, const mpz_class& x, const mpz_class& m)
+ {
+ square(z, x);
+ mod(z, z, m);
+ }
+ // z = x^y (y >= 0)
+ static inline void pow(mpz_class& z, const mpz_class& x, unsigned int y)
+ {
+ mpz_pow_ui(z.get_mpz_t(), x.get_mpz_t(), y);
+ }
+ // z = x^y mod m (y >=0)
+ static inline void powMod(mpz_class& z, const mpz_class& x, const mpz_class& y, const mpz_class& m)
+ {
+ mpz_powm(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t(), m.get_mpz_t());
+ }
+ // z = 1/x mod m
+ static inline void invMod(mpz_class& z, const mpz_class& x, const mpz_class& m)
+ {
+ mpz_invert(z.get_mpz_t(), x.get_mpz_t(), m.get_mpz_t());
+ }
+ // z = lcm(x, y)
+ static inline void lcm(mpz_class& z, const mpz_class& x, const mpz_class& y)
+ {
+ mpz_lcm(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t());
+ }
+ static inline mpz_class lcm(const mpz_class& x, const mpz_class& y)
+ {
+ mpz_class z;
+ lcm(z, x, y);
+ return z;
+ }
+ // z = gcd(x, y)
+ static inline void gcd(mpz_class& z, const mpz_class& x, const mpz_class& y)
+ {
+ mpz_gcd(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t());
+ }
+ static inline mpz_class gcd(const mpz_class& x, const mpz_class& y)
+ {
+ mpz_class z;
+ gcd(z, x, y);
+ return z;
+ }
+ /*
+ assume p : odd prime
+ return 1 if x^2 = a mod p for some x
+ return -1 if x^2 != a mod p for any x
+ */
+ static inline int legendre(const mpz_class& a, const mpz_class& p)
+ {
+ return mpz_legendre(a.get_mpz_t(), p.get_mpz_t());
+ }
+ static inline bool isPrime(const mpz_class& x)
+ {
+ return mpz_probab_prime_p(x.get_mpz_t(), 25) != 0;
+ }
+ static inline size_t getBitLen(const mpz_class& x)
+ {
+ return mpz_sizeinbase(x.get_mpz_t(), 2);
+ }
+ static inline BlockType getBlock(const mpz_class& x, size_t i)
+ {
+ return x.get_mpz_t()->_mp_d[i];
+ }
+ static inline const BlockType *getBlock(const mpz_class& x)
+ {
+ return reinterpret_cast<const BlockType*>(x.get_mpz_t()->_mp_d);
+ }
+ static inline size_t getBlockSize(const mpz_class& x)
+ {
+ assert(x.get_mpz_t()->_mp_size >= 0);
+ return x.get_mpz_t()->_mp_size;
+ }
+ template<class RG>
+ static inline void getRand(mpz_class& z, size_t bitLen, RG& rg)
+ {
+ assert(bitLen > 1);
+ const size_t rem = bitLen & 31;
+ const size_t n = (bitLen + 31) / 32;
+ std::vector<uint32_t> buf(n);
+ rg.read(buf.data(), n);
+ uint32_t v = buf[n - 1];
+ if (rem == 0) {
+ v |= 1U << 31;
+ } else {
+ v &= (1U << rem) - 1;
+ v |= 1U << (rem - 1);
+ }
+ buf[n - 1] = v;
+ Gmp::setRaw(z, &buf[0], n);
+ }
+ template<class RG>
+ static void getRandPrime(mpz_class& z, size_t bitLen, RG& rg, bool setSecondBit = false, bool mustBe3mod4 = false)
+ {
+ assert(bitLen > 2);
+ do {
+ getRand(z, bitLen, rg);
+ if (setSecondBit) {
+ z |= mpz_class(1) << (bitLen - 2);
+ }
+ if (mustBe3mod4) {
+ z |= 3;
+ }
+ } while (!(isPrime(z)));
+ }
+};
+
+/*
+ Tonelli-Shanks
+*/
+class SquareRoot {
+ bool isPrime;
+ mpz_class p;
+ mpz_class g;
+ int r;
+ mpz_class q; // p - 1 = 2^r q
+ mpz_class s; // s = g^q
+public:
+ SquareRoot() : isPrime(false) {}
+ void set(const mpz_class& p)
+ {
+ if (p <= 2) throw cybozu::Exception("SquareRoot:bad p") << p;
+ isPrime = Gmp::isPrime(p);
+ if (!isPrime) return; // don't throw until get() is called
+ this->p = p;
+ // g is quadratic nonresidue
+ g = 2;
+ while (Gmp::legendre(g, p) > 0) {
+ g++;
+ }
+ // p - 1 = 2^r q, q is odd
+ r = 0;
+ q = p - 1;
+ while ((q & 1) == 0) {
+ r++;
+ q /= 2;
+ }
+ Gmp::powMod(s, g, q, p);
+ }
+ /*
+ solve x^2 = a mod p
+ */
+ bool get(mpz_class& x, const mpz_class& a) const
+ {
+ if (!isPrime) throw cybozu::Exception("SquareRoot:get:not prime") << p;
+ if (Gmp::legendre(a, p) < 0) return false;
+ if (r == 1) {
+ Gmp::powMod(x, a, (p + 1) / 4, p);
+ return true;
+ }
+ mpz_class c = s, d;
+ int e = r;
+ Gmp::powMod(d, a, q, p);
+ Gmp::powMod(x, a, (q + 1) / 2, p); // destroy a if &x == &a
+ while (d != 1) {
+ int i = 1;
+ mpz_class dd = (d * d) % p;
+ while (dd != 1) {
+ dd = (dd * dd) % p;
+ i++;
+ }
+ mpz_class b = 1;
+ b <<= e - i - 1;
+ Gmp::powMod(b, c, b, p);
+ x = (x * b) % p;
+ c = (b * b) % p;
+ d = (d * c) % p;
+ e = i;
+ }
+ return true;
+ }
+};
+
+namespace ope {
+
+template<>
+struct Optimized<mpz_class> {
+ void init(const mpz_class&) {}
+ bool hasPowMod() const { return true; }
+ static void powMod(mpz_class& z, const mpz_class& x, const mpz_class& y, const mpz_class& m)
+ {
+ mpz_powm(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t(), m.get_mpz_t());
+ }
+};
+
+} // mcl::ope
+
+} // mcl
diff --git a/include/mcl/mont_fp.hpp b/include/mcl/mont_fp.hpp
new file mode 100644
index 0000000..1b539bf
--- /dev/null
+++ b/include/mcl/mont_fp.hpp
@@ -0,0 +1,463 @@
+#pragma once
+/**
+ @file
+ @brief Fp with montgomery(EXPERIMENTAL IMPLEMENTAION)
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+
+ @note this class should be merged to FpT
+*/
+#include <sstream>
+#include <vector>
+#include <mcl/gmp_util.hpp>
+#include <mcl/fp.hpp>
+#include <mcl/fp_generator.hpp>
+
+namespace mcl {
+
+template<size_t N, class tag = fp_local::TagDefault>
+class MontFpT : public ope::addsub<MontFpT<N, tag>,
+ ope::mulable<MontFpT<N, tag>,
+ ope::invertible<MontFpT<N, tag>,
+ ope::hasNegative<MontFpT<N, tag>,
+ ope::hasIO<MontFpT<N, tag> > > > > > {
+
+ static mpz_class pOrg_;
+ static mcl::SquareRoot sq_;
+ static MontFpT p_;
+ static MontFpT one_;
+ static MontFpT R_; // (1 << (N * 64)) % p
+ static MontFpT RR_; // (R * R) % p
+ static MontFpT invTbl_[N * 64 * 2];
+ static size_t modBitLen_;
+public:
+ static FpGenerator fg_;
+private:
+ uint64_t v_[N];
+ void fromRawGmp(const mpz_class& x)
+ {
+ if (Gmp::getRaw(v_, N, x) == 0) {
+ throw cybozu::Exception("MontFpT:fromRawGmp") << x;
+ }
+ }
+ template<class S>
+ void setMaskMod(std::vector<S>& buf)
+ {
+ assert(buf.size() * sizeof(S) * 8 <= modBitLen_);
+ assert(!buf.empty());
+ fp::maskBuffer(&buf[0], buf.size(), modBitLen_);
+ memcpy(v_, &buf[0], buf.size() * sizeof(S));
+ if (compare(*this, p_) >= 0) {
+ subNc(v_, v_, p_.v_);
+ }
+ assert(compare(*this, p_) < 0);
+ }
+ static void initInvTbl(MontFpT *invTbl)
+ {
+ MontFpT t(2);
+ const int n = N * 64 * 2;
+ for (int i = 0; i < n; i++) {
+ invTbl[n - 1 - i] = t;
+ t += t;
+ }
+ }
+ typedef void (*void3op)(MontFpT&, const MontFpT&, const MontFpT&);
+ typedef bool (*bool3op)(MontFpT&, const MontFpT&, const MontFpT&);
+ typedef void (*void2op)(MontFpT&, const MontFpT&);
+ typedef int (*int2op)(MontFpT&, const MontFpT&);
+public:
+ static const size_t BlockSize = N;
+ typedef uint64_t BlockType;
+ MontFpT() {}
+ MontFpT(int x) { operator=(x); }
+ MontFpT(uint64_t x) { operator=(x); }
+ explicit MontFpT(const std::string& str, int base = 0)
+ {
+ fromStr(str, base);
+ }
+ MontFpT& operator=(int x)
+ {
+ if (x == 0) {
+ clear();
+ } else {
+ v_[0] = abs(x);
+ for (size_t i = 1; i < N; i++) v_[i] = 0;
+ mul(*this, *this, RR_);
+ if (x < 0) {
+ neg(*this, *this);
+ }
+ }
+ return *this;
+ }
+ MontFpT& operator=(uint64_t x)
+ {
+ v_[0] = x;
+ for (size_t i = 1; i < N; i++) v_[i] = 0;
+ mul(*this, *this, RR_);
+ return *this;
+ }
+ void fromStr(const std::string& str, int base = 0)
+ {
+ bool isMinus;
+ const char *p = fp::verifyStr(&isMinus, &base, str);
+
+ if (base == 16) {
+ MontFpT t;
+ mcl::fp::fromStr16(t.v_, N, p, str.size() - (p - str.c_str()));
+ if (compare(t, p_) >= 0) throw cybozu::Exception("fp:MontFpT:str is too large") << str;
+ mul(*this, t, RR_);
+ } else {
+ mpz_class t;
+ if (!Gmp::fromStr(t, p, base)) {
+ throw cybozu::Exception("fp:MontFpT:fromStr") << str;
+ }
+ toMont(*this, t);
+ }
+ if (isMinus) {
+ neg(*this, *this);
+ }
+ }
+ void put() const
+ {
+ for (int i = N - 1; i >= 0; i--) {
+ printf("%016llx ", v_[i]);
+ }
+ printf("\n");
+ }
+ void set(const std::string& str, int base = 0) { fromStr(str, base); }
+ void toStr(std::string& str, int base = 10, bool withPrefix = false) const
+ {
+ if (isZero()) {
+ str = "0";
+ return;
+ }
+ if (base == 16 || base == 2) {
+ MontFpT t;
+ mul(t, *this, one_);
+ if (base == 16) {
+ mcl::fp::toStr16(str, t.v_, N, withPrefix);
+ } else {
+ mcl::fp::toStr2(str, t.v_, N, withPrefix);
+ }
+ return;
+ }
+ if (base != 10) throw cybozu::Exception("fp:MontFpT:toStr:bad base") << base;
+ // QQQ : remove conversion to gmp
+ mpz_class t;
+ fromMont(t, *this);
+ Gmp::toStr(str, t, base);
+ }
+ std::string toStr(int base = 10, bool withPrefix = false) const
+ {
+ std::string str;
+ toStr(str, base, withPrefix);
+ return str;
+ }
+ void clear()
+ {
+ for (size_t i = 0; i < N; i++) v_[i] = 0;
+ }
+ template<class RG>
+ void setRand(RG& rg)
+ {
+ fp::getRandVal(v_, rg, p_.v_, modBitLen_);
+ }
+ template<class S>
+ void setRaw(const S *inBuf, size_t n)
+ {
+ n = std::min(n, fp::getRoundNum<S>(modBitLen_));
+ if (n == 0) {
+ clear();
+ return;
+ }
+ std::vector<S> buf(inBuf, inBuf + n);
+ setMaskMod(buf);
+ }
+ static inline void setModulo(const std::string& pstr, int base = 0)
+ {
+ bool isMinus;
+ const char *p = fp::verifyStr(&isMinus, &base, pstr);
+ if (isMinus) throw cybozu::Exception("MontFp:setModulo:mstr is not pinus") << pstr;
+ if (!Gmp::fromStr(pOrg_, p, base)) {
+ throw cybozu::Exception("fp:MontFpT:setModulo") << pstr << base;
+ }
+ modBitLen_ = Gmp::getBitLen(pOrg_);
+ if (fp::getRoundNum<uint64_t>(modBitLen_) > N) {
+ throw cybozu::Exception("MontFp:setModulo:bad prime length") << pstr;
+ }
+ p_.fromRawGmp(pOrg_);
+ sq_.set(pOrg_);
+
+ mpz_class t = 1;
+ one_.fromRawGmp(t);
+ t = (t << (N * 64)) % pOrg_;
+ R_.fromRawGmp(t);
+ t = (t * t) % pOrg_;
+ RR_.fromRawGmp(t);
+ fg_.init(p_.v_, N);
+ add = Xbyak::CastTo<void3op>(fg_.add_);
+ sub = Xbyak::CastTo<void3op>(fg_.sub_);
+ mul = Xbyak::CastTo<void3op>(fg_.mul_);
+ square = Xbyak::CastTo<void2op>(fg_.sqr_);
+ if (square == 0) square = squareC;
+ neg = Xbyak::CastTo<void2op>(fg_.neg_);
+ shr1 = Xbyak::CastTo<void2op>(fg_.shr1_);
+ addNc = Xbyak::CastTo<bool3op>(fg_.addNc_);
+ subNc = Xbyak::CastTo<bool3op>(fg_.subNc_);
+ preInv = Xbyak::CastTo<int2op>(fg_.preInv_);
+ initInvTbl(invTbl_);
+ }
+ static inline void getModulo(std::string& pstr)
+ {
+ Gmp::toStr(pstr, pOrg_);
+ }
+ static inline bool isYodd(const MontFpT& y)
+ {
+#if 0
+ return (y.v_[0] & 1) == 1;
+#else
+ MontFpT t; // QQQ : is necessary?
+ mul(t, y, one_);
+ return (t.v_[0] & 1) == 1;
+#endif
+ }
+ static inline bool squareRoot(MontFpT& y, const MontFpT& x)
+ {
+ mpz_class t;
+ fromMont(t, x);
+ if (!sq_.get(t, t)) return false;
+ toMont(y, t);
+ return true;
+ }
+ static inline void fromMont(mpz_class& z, const MontFpT& x)
+ {
+ MontFpT t;
+ mul(t, x, one_);
+ Gmp::setRaw(z, t.v_, N);
+ }
+ static inline void toMont(MontFpT& z, const mpz_class& x)
+ {
+ if (x >= pOrg_) throw cybozu::Exception("fp:MontFpT:toMont:large x") << x;
+ MontFpT t;
+ t.fromRawGmp(x);
+ mul(z, t, RR_);
+ }
+ static void3op add;
+ static void3op sub;
+ static void3op mul;
+ static void2op square;
+ static void2op neg;
+ static void2op shr1;
+ static bool3op addNc;
+ static bool3op subNc;
+ static int2op preInv;
+ static inline void squareC(MontFpT& z, const MontFpT& x)
+ {
+ mul(z, x, x);
+ }
+ static inline int preInvC(MontFpT& r, const MontFpT& x)
+ {
+ MontFpT u, v, s;
+ u = p_;
+ v = x;
+ r.clear();
+ s.clear(); s.v_[0] = 1; // s is real 1
+ int k = 0;
+ // u, v : Pack, r, s : mem
+ bool rTop = false;
+ LP:
+ if (v.isZero()) goto EXIT;
+ if ((u.v_[0] & 1) == 0) {
+ goto U_EVEN;
+ }
+ if ((v.v_[0] & 1) == 0) {
+ goto V_EVEN;
+ }
+ if (compare(v, u) < 0) {
+ goto V_LT_U;
+ }
+ subNc(v, v, u); // sub_rr
+ addNc(s, s, r); // add_mm
+ V_EVEN:
+ shr1(v, v); // shr1_r
+ rTop = addNc(r, r, r); // twice_m
+ k++;
+ goto LP;
+ V_LT_U:
+ subNc(u, u, v); // sub_rr
+ rTop = addNc(r, r, s); // add_mm
+ U_EVEN:
+ shr1(u, u); // shr1_r
+ addNc(s, s, s); // twice_m
+ k++;
+ goto LP;
+ EXIT:;
+ if (rTop) subNc(r, r, p_);
+ if (subNc(r, p_, r)) {
+ addNc(r, r, p_);
+ }
+ return k;
+ }
+ static inline void inv(MontFpT& z, const MontFpT& x)
+ {
+#if 1
+ MontFpT r;
+#if 1
+ int k = preInv(r, x);
+#else
+ MontFpT s;
+ int h = preInvC(s, x);
+ int k = preInv(r, x);
+ if (r != s || k != h) {
+ std::cout << std::hex;
+ PUT(x);
+ PUT(r);
+ PUT(s);
+ printf("k=%d, h=%d\n", k, h);
+ exit(1);
+ }
+#endif
+ /*
+ xr = 2^k
+ R = 2^(N * 64)
+ get r2^(-k)R^2 = r 2^(N * 64 * 2 - k)
+ */
+ mul(z, r, invTbl_[k]);
+#else
+ mpz_class t;
+ fromMont(t, x);
+ Gmp::invMod(t, t, pOrg_);
+ toMont(z, t);
+#endif
+ }
+ static inline void div(MontFpT& z, const MontFpT& x, const MontFpT& y)
+ {
+ MontFpT ry;
+ inv(ry, y);
+ mul(z, x, ry);
+ }
+#if 0
+ static inline BlockType getBlock(const MontFpT& x, size_t i)
+ {
+ return Gmp::getBlock(x.v, i);
+ }
+ static inline const BlockType *getBlock(const MontFpT& x)
+ {
+ return Gmp::getBlock(x.v);
+ }
+ static inline size_t getBlockSize(const MontFpT& x)
+ {
+ return Gmp::getBlockSize(x.v);
+ }
+ static inline void shr(MontFpT& z, const MontFpT& x, size_t n)
+ {
+ z.v = x.v >> n;
+ }
+#endif
+ /*
+ append to bv(not clear bv)
+ */
+ void appendToBitVec(cybozu::BitVector& bv) const
+ {
+ MontFpT t;
+ MontFpT::mul(t, *this, MontFpT::one_);
+ bv.append(t.v_, modBitLen_);
+ }
+ void fromBitVec(const cybozu::BitVector& bv)
+ {
+ const size_t bitLen = bv.size();
+ if (bitLen != modBitLen_) throw cybozu::Exception("MontFp:fromBitVec:bad size") << bitLen << modBitLen_;
+ const size_t blockN = cybozu::RoundupBit<BlockType>(bitLen);
+ const MontFpT* src;
+ MontFpT t;
+ if (blockN == N) {
+ src = (const MontFpT*)bv.getBlock();
+ } else {
+ cybozu::CopyBit(t.v_, bv.getBlock(), bitLen);
+ for (size_t i = blockN; i < N; i++) t.v_[i] = 0;
+ src = &t;
+ }
+ mul(*this, *src, RR_);
+ if (compare(*this, p_) >= 0) {
+ throw cybozu::Exception("MontFpT:fromBitVec:large x") << *this << p_;
+ }
+ }
+ static inline size_t getBitVecSize() { return modBitLen_; }
+ static inline int compare(const MontFpT& x, const MontFpT& y)
+ {
+ return fp::compareArray(x.v_, y.v_, N);
+ }
+ static inline bool isZero(const MontFpT& x)
+ {
+ if (x.v_[0]) return false;
+ uint64_t r = 0;
+ for (size_t i = 1; i < N; i++) {
+ r |= x.v_[i];
+ }
+ return r == 0;
+ }
+ bool isZero() const { return isZero(*this); }
+ template<class Z>
+ static void power(MontFpT& z, const MontFpT& x, const Z& y)
+ {
+ power_impl::power(z, x, y);
+ }
+ const uint64_t* getInnerValue() const { return v_; }
+ bool operator==(const MontFpT& rhs) const { return compare(*this, rhs) == 0; }
+ bool operator!=(const MontFpT& rhs) const { return compare(*this, rhs) != 0; }
+ static inline size_t getModBitLen() { return modBitLen_; }
+ static inline uint64_t cvtInt(const MontFpT& x, bool *err = 0)
+ {
+ MontFpT t;
+ mul(t, x, one_);
+ for (size_t i = 1; i < N; i++) {
+ if (t.v_[i]) {
+ if (err) {
+ *err = true;
+ return 0;
+ } else {
+ throw cybozu::Exception("MontFp:cvtInt:too large") << x;
+ }
+ }
+ }
+ if (err) *err = false;
+ return t.v_[0];
+ }
+ uint64_t cvtInt(bool *err = 0) const { return cvtInt(*this, err); }
+};
+
+template<size_t N, class tag>mpz_class MontFpT<N, tag>::pOrg_;
+template<size_t N, class tag>mcl::SquareRoot MontFpT<N, tag>::sq_;
+template<size_t N, class tag>MontFpT<N, tag> MontFpT<N, tag>::p_;
+template<size_t N, class tag>MontFpT<N, tag> MontFpT<N, tag>::one_;
+template<size_t N, class tag>MontFpT<N, tag> MontFpT<N, tag>::R_;
+template<size_t N, class tag>MontFpT<N, tag> MontFpT<N, tag>::RR_;
+template<size_t N, class tag>MontFpT<N, tag> MontFpT<N, tag>::invTbl_[N * 64 * 2];
+template<size_t N, class tag>FpGenerator MontFpT<N, tag>::fg_;
+template<size_t N, class tag>size_t MontFpT<N, tag>::modBitLen_;
+
+template<size_t N, class tag>typename MontFpT<N, tag>::void3op MontFpT<N, tag>::add;
+template<size_t N, class tag>typename MontFpT<N, tag>::void3op MontFpT<N, tag>::sub;
+template<size_t N, class tag>typename MontFpT<N, tag>::void3op MontFpT<N, tag>::mul;
+template<size_t N, class tag>typename MontFpT<N, tag>::void2op MontFpT<N, tag>::square;
+template<size_t N, class tag>typename MontFpT<N, tag>::void2op MontFpT<N, tag>::neg;
+template<size_t N, class tag>typename MontFpT<N, tag>::void2op MontFpT<N, tag>::shr1;
+template<size_t N, class tag>typename MontFpT<N, tag>::bool3op MontFpT<N, tag>::addNc;
+template<size_t N, class tag>typename MontFpT<N, tag>::bool3op MontFpT<N, tag>::subNc;
+template<size_t N, class tag>typename MontFpT<N, tag>::int2op MontFpT<N, tag>::preInv;
+
+} // mcl
+
+namespace std { CYBOZU_NAMESPACE_TR1_BEGIN
+template<class T> struct hash;
+
+template<size_t N, class tag>
+struct hash<mcl::MontFpT<N, tag> > {
+ size_t operator()(const mcl::MontFpT<N, tag>& x, uint64_t v = 0) const
+ {
+ return static_cast<size_t>(cybozu::hash64(x.getInnerValue(), N, v));
+ }
+};
+
+CYBOZU_NAMESPACE_TR1_END } // std::tr1
diff --git a/include/mcl/operator.hpp b/include/mcl/operator.hpp
new file mode 100644
index 0000000..f5d0df3
--- /dev/null
+++ b/include/mcl/operator.hpp
@@ -0,0 +1,118 @@
+#pragma once
+/**
+ @file
+ @brief operator
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+*/
+#include <ios>
+#include <cybozu/exception.hpp>
+
+#ifdef _WIN32
+ #ifndef MCL_FORCE_INLINE
+ #define MCL_FORCE_INLINE __forceinline
+ #endif
+ #pragma warning(push)
+ #pragma warning(disable : 4714)
+#else
+ #ifndef MCL_FORCE_INLINE
+ #define MCL_FORCE_INLINE __attribute__((always_inline))
+ #endif
+#endif
+
+namespace mcl { namespace ope {
+
+template<class T>
+struct Empty {};
+
+/*
+ T must have compare
+*/
+template<class T, class E = Empty<T> >
+struct comparable : E {
+ friend MCL_FORCE_INLINE bool operator<(const T& x, const T& y) { return T::compare(x, y) < 0; }
+ friend MCL_FORCE_INLINE bool operator>=(const T& x, const T& y) { return !operator<(x, y); }
+
+ friend MCL_FORCE_INLINE bool operator>(const T& x, const T& y) { return T::compare(x, y) > 0; }
+ friend MCL_FORCE_INLINE bool operator<=(const T& x, const T& y) { return !operator>(x, y); }
+ friend MCL_FORCE_INLINE bool operator==(const T& x, const T& y) { return T::compare(x, y) == 0; }
+ friend MCL_FORCE_INLINE bool operator!=(const T& x, const T& y) { return !operator==(x, y); }
+};
+
+/*
+ T must have add, sub
+*/
+template<class T, class E = Empty<T> >
+struct addsub : E {
+ template<class S> MCL_FORCE_INLINE T& operator+=(const S& rhs) { T::add(static_cast<T&>(*this), static_cast<const T&>(*this), rhs); return static_cast<T&>(*this); }
+ template<class S> MCL_FORCE_INLINE T& operator-=(const S& rhs) { T::sub(static_cast<T&>(*this), static_cast<const T&>(*this), rhs); return static_cast<T&>(*this); }
+ template<class S> friend MCL_FORCE_INLINE T operator+(const T& a, const S& b) { T c; T::add(c, a, b); return c; }
+ template<class S> friend MCL_FORCE_INLINE T operator-(const T& a, const S& b) { T c; T::sub(c, a, b); return c; }
+};
+
+/*
+ T must have mul
+*/
+template<class T, class E = Empty<T> >
+struct mulable : E {
+ template<class S> MCL_FORCE_INLINE T& operator*=(const S& rhs) { T::mul(static_cast<T&>(*this), static_cast<const T&>(*this), rhs); return static_cast<T&>(*this); }
+ template<class S> friend MCL_FORCE_INLINE T operator*(const T& a, const S& b) { T c; T::mul(c, a, b); return c; }
+};
+
+/*
+ T must have inv, mul
+*/
+template<class T, class E = Empty<T> >
+struct invertible : E {
+ MCL_FORCE_INLINE T& operator/=(const T& rhs) { T c; T::inv(c, rhs); T::mul(static_cast<T&>(*this), static_cast<const T&>(*this), c); return static_cast<T&>(*this); }
+ friend MCL_FORCE_INLINE T operator/(const T& a, const T& b) { T c; T::inv(c, b); T::mul(c, c, a); return c; }
+};
+
+/*
+ T must have neg
+*/
+template<class T, class E = Empty<T> >
+struct hasNegative : E {
+ MCL_FORCE_INLINE T operator-() const { T c; T::neg(c, static_cast<const T&>(*this)); return c; }
+};
+
+template<class T, class E = Empty<T> >
+struct hasIO : E {
+ friend inline std::ostream& operator<<(std::ostream& os, const T& self)
+ {
+ const std::ios_base::fmtflags f = os.flags();
+ if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator<<:oct is not supported");
+ const int base = (f & std::ios_base::hex) ? 16 : 10;
+ const bool showBase = (f & std::ios_base::showbase) != 0;
+ std::string str;
+ self.toStr(str, base, showBase);
+ return os << str;
+ }
+ friend inline std::istream& operator>>(std::istream& is, T& self)
+ {
+ const std::ios_base::fmtflags f = is.flags();
+ if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator>>:oct is not supported");
+ const int base = (f & std::ios_base::hex) ? 16 : 0;
+ std::string str;
+ is >> str;
+ self.fromStr(str, base);
+ return is;
+ }
+};
+
+template<class T>
+struct Optimized {
+ bool hasMulMod() const { return false; }
+ void init(const T&) {}
+ static void mulMod(T&, const T&, const T&) {}
+ static void mulMod(T&, const T&, unsigned int) {}
+ bool hasPowMod() const { return false; }
+ static void powMod(T&, const T&, const T&, const T&) {}
+};
+
+} } // mcl::ope
+
+#ifdef _WIN32
+// #pragma warning(pop)
+#endif
diff --git a/include/mcl/power.hpp b/include/mcl/power.hpp
new file mode 100644
index 0000000..27fd15e
--- /dev/null
+++ b/include/mcl/power.hpp
@@ -0,0 +1,181 @@
+#pragma once
+/**
+ @file
+ @brief power
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+*/
+#include <assert.h>
+#include <cybozu/bit_operation.hpp>
+#include <mcl/tagmultigr.hpp>
+#ifdef _MSC_VER
+ #pragma warning(push)
+ #pragma warning(disable : 4616)
+ #pragma warning(disable : 4800)
+ #pragma warning(disable : 4244)
+ #pragma warning(disable : 4127)
+ #pragma warning(disable : 4512)
+ #pragma warning(disable : 4146)
+#endif
+#include <gmpxx.h>
+#ifdef _MSC_VER
+ #pragma warning(pop)
+#endif
+
+namespace mcl {
+
+namespace power_impl {
+
+template<class F>
+struct TagInt {
+ typedef typename F::BlockType BlockType;
+ static size_t getBlockSize(const F& x)
+ {
+ return F::getBlockSize(x);
+ }
+ static BlockType getBlock(const F& x, size_t i)
+ {
+ return F::getBlock(x, i);
+ }
+ static const BlockType* getBlock(const F& x)
+ {
+ return F::getBlock(x);
+ }
+ static size_t getBitLen(const F& x)
+ {
+ return F::getBitLen(x);
+ }
+ static void shr(F& x, size_t n)
+ {
+ F::shr(x, x, n);
+ }
+};
+
+template<>
+struct TagInt<int> {
+ typedef int BlockType;
+ static int getBlockSize(int)
+ {
+ return 1;
+ }
+ static BlockType getBlock(int x, size_t i)
+ {
+ assert(i == 0);
+ cybozu::disable_warning_unused_variable(i);
+ return x;
+ }
+ static const BlockType* getBlock(const int& x)
+ {
+ return &x;
+ }
+ static size_t getBitLen(int x)
+ {
+ return x == 0 ? 1 : cybozu::bsr(x) + 1;
+ }
+ static void shr(int& x, size_t n)
+ {
+ x >>= n;
+ }
+};
+
+template<>
+struct TagInt<size_t> {
+ typedef size_t BlockType;
+ static size_t getBlockSize(size_t)
+ {
+ return 1;
+ }
+ static BlockType getBlock(size_t x, size_t i)
+ {
+ assert(i == 0);
+ cybozu::disable_warning_unused_variable(i);
+ return x;
+ }
+ static const BlockType* getBlock(const size_t& x)
+ {
+ return &x;
+ }
+ static size_t getBitLen(size_t x)
+ {
+ return x == 0 ? 1 : cybozu::bsr<size_t>(x) + 1;
+ }
+ static void shr(size_t& x, size_t n)
+ {
+ x >>= n;
+ }
+};
+
+template<>
+struct TagInt<mpz_class> {
+ typedef mp_limb_t BlockType;
+ static size_t getBlockSize(const mpz_class& x)
+ {
+ return x.get_mpz_t()->_mp_size;
+ }
+ static BlockType getBlock(const mpz_class& x, size_t i)
+ {
+ return x.get_mpz_t()->_mp_d[i];
+ }
+ static const BlockType* getBlock(const mpz_class& x)
+ {
+ return x.get_mpz_t()->_mp_d;
+ }
+ static size_t getBitLen(const mpz_class& x)
+ {
+ return mpz_sizeinbase(x.get_mpz_t(), 2);
+ }
+ static void shr(mpz_class& x, size_t n)
+ {
+ x >>= n;
+ }
+};
+
+template<class G, class BlockType>
+void powerArray(G& z, const G& x, const BlockType *y, size_t n)
+{
+ typedef TagMultiGr<G> TagG;
+ G out;
+ TagG::init(out);
+ G t(x);
+ for (size_t i = 0; i < n; i++) {
+ BlockType v = y[i];
+ int m = (int)sizeof(BlockType) * 8;
+ if (i == n - 1) {
+ // avoid unused multiplication
+ while (m > 0 && (v & (BlockType(1) << (m - 1))) == 0) {
+ m--;
+ }
+ }
+ for (int j = 0; j < m; j++) {
+ if (v & (BlockType(1) << j)) {
+ TagG::mul(out, out, t);
+ }
+ TagG::square(t, t);
+ }
+ }
+ z = out;
+}
+
+template<class G, class F>
+void power(G& z, const G& x, const F& _y)
+{
+ typedef TagMultiGr<G> TagG;
+ typedef power_impl::TagInt<F> TagI;
+ if (_y == 0) {
+ TagG::init(z);
+ return;
+ }
+ if (_y == 1) {
+ z = x;
+ return;
+ }
+ bool isNegative = _y < 0;
+ const F& y = isNegative ? -_y : _y;
+ powerArray(z, x, TagI::getBlock(y), TagI::getBlockSize(y));
+ if (isNegative) {
+ TagG::inv(z, z);
+ }
+}
+
+} } // mcl::power_impl
diff --git a/include/mcl/tagmultigr.hpp b/include/mcl/tagmultigr.hpp
new file mode 100644
index 0000000..51add36
--- /dev/null
+++ b/include/mcl/tagmultigr.hpp
@@ -0,0 +1,39 @@
+#pragma once
+/**
+ @file
+ @brief TagMultiGr
+ @author MITSUNARI Shigeo(@herumi)
+ @license modified new BSD license
+ http://opensource.org/licenses/BSD-3-Clause
+*/
+#include <assert.h>
+
+namespace mcl {
+
+// default tag is for multiplicative group
+template<class G>
+struct TagMultiGr {
+ static void square(G& z, const G& x)
+ {
+ G::mul(z, x, x);
+ }
+ static void mul(G& z, const G& x, const G& y)
+ {
+ G::mul(z, x, y);
+ }
+ static void inv(G& z, const G& x)
+ {
+ G::inv(z, x);
+ }
+ static void div(G& z, const G& x, const G& y)
+ {
+ G::div(z, x, y);
+ }
+ static void init(G& x)
+ {
+ x = 1;
+ }
+};
+
+} // mcl
+
diff --git a/mcl.sln b/mcl.sln
new file mode 100644
index 0000000..e0edbbb
--- /dev/null
+++ b/mcl.sln
@@ -0,0 +1,25 @@
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Express 2012 for Windows Desktop
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fp_test", "test\proj\fp_test\fp_test.vcxproj", "{51266DE6-B57B-4AE3-B85C-282F170E1728}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ec_test", "test\proj\ec_test\ec_test.vcxproj", "{46B6E88E-739A-406B-9F68-BC46C5950FA3}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {51266DE6-B57B-4AE3-B85C-282F170E1728}.Debug|x64.ActiveCfg = Debug|x64
+ {51266DE6-B57B-4AE3-B85C-282F170E1728}.Debug|x64.Build.0 = Debug|x64
+ {51266DE6-B57B-4AE3-B85C-282F170E1728}.Release|x64.ActiveCfg = Release|x64
+ {51266DE6-B57B-4AE3-B85C-282F170E1728}.Release|x64.Build.0 = Release|x64
+ {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Debug|x64.ActiveCfg = Debug|x64
+ {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Debug|x64.Build.0 = Debug|x64
+ {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Release|x64.ActiveCfg = Release|x64
+ {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/release.props b/release.props
new file mode 100644
index 0000000..88b0830
--- /dev/null
+++ b/release.props
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ImportGroup Label="PropertySheets" />
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup />
+ <ItemDefinitionGroup>
+ <ClCompile>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ </ClCompile>
+ </ItemDefinitionGroup>
+ <ItemGroup />
+</Project> \ No newline at end of file
diff --git a/sample/Makefile b/sample/Makefile
new file mode 100644
index 0000000..96678a6
--- /dev/null
+++ b/sample/Makefile
@@ -0,0 +1,23 @@
+include ../common.mk
+
+TARGET=$(TEST_FILE)
+LIBS=
+
+SRC=$(wildcard *.cpp)
+
+all: $(TARGET)
+
+test: $(TARGET)
+ @$(SAMPLE_TEST)
+
+$(OBJDIR):
+ @$(MKDIR) $(OBJDIR)
+
+clean:
+ $(CLEAN)
+
+$(LIBS):
+ $(MAKE) -C ../src
+
+-include $(DEPEND_FILE)
+
diff --git a/sample/ecdh_smpl.cpp b/sample/ecdh_smpl.cpp
new file mode 100644
index 0000000..91bb9f6
--- /dev/null
+++ b/sample/ecdh_smpl.cpp
@@ -0,0 +1,69 @@
+/*
+ sample of Elliptic Curve Diffie-Hellman key sharing
+*/
+#include <iostream>
+#include <fstream>
+#include <cybozu/random_generator.hpp>
+#include <mcl/fp.hpp>
+#include <mcl/gmp_util.hpp>
+#include <mcl/ecparam.hpp>
+#include <mcl/ec.hpp>
+#include <mcl/fp.hpp>
+typedef mcl::FpT<> Fp;
+
+struct ZnTag;
+
+typedef mcl::EcT<Fp> Ec;
+typedef mcl::FpT<ZnTag> Zn;
+
+int main()
+{
+ cybozu::RandomGenerator rg;
+ /*
+ system setup with a parameter secp192k1 recommended by SECG
+ Ec is an elliptic curve over Fp
+ the cyclic group of <P> is isomorphic to Zn
+ */
+ const mcl::EcParam& para = mcl::ecparam::secp192k1;
+ Zn::setModulo(para.n);
+ Fp::setModulo(para.p);
+ Ec::setParam(para.a, para.b);
+ const Ec P(Fp(para.gx), Fp(para.gy));
+
+ /*
+ Alice setups a private key a and public key aP
+ */
+ Zn a;
+ Ec aP;
+
+ a.setRand(rg);
+ Ec::power(aP, P, a); // aP = a * P;
+
+ std::cout << "aP=" << aP << std::endl;
+
+ /*
+ Bob setups a private key b and public key bP
+ */
+ Zn b;
+ Ec bP;
+
+ b.setRand(rg);
+ Ec::power(bP, P, b); // bP = b * P;
+
+ std::cout << "bP=" << bP << std::endl;
+
+ Ec abP, baP;
+
+ // Alice uses bP(B's public key) and a(A's priavte key)
+ Ec::power(abP, bP, a); // abP = a * (bP)
+
+ // Bob uses aP(A's public key) and b(B's private key)
+ Ec::power(baP, aP, b); // baP = b * (aP)
+
+ if (abP == baP) {
+ std::cout << "key sharing succeed:" << abP << std::endl;
+ } else {
+ std::cout << "ERR(not here)" << std::endl;
+ }
+}
+
diff --git a/sample/random_smpl.cpp b/sample/random_smpl.cpp
new file mode 100644
index 0000000..19944de
--- /dev/null
+++ b/sample/random_smpl.cpp
@@ -0,0 +1,29 @@
+#include <mcl/fp.hpp>
+#include <mcl/gmp_util.hpp>
+#include <mcl/ecparam.hpp>
+#include <cybozu/random_generator.hpp>
+#include <map>
+#include <mcl/fp.hpp>
+typedef mcl::FpT<> Fp;
+
+typedef std::map<std::string, int> Map;
+
+int main(int argc, char *argv[])
+{
+ cybozu::RandomGenerator rg;
+ const char *p = mcl::ecparam::secp192k1.p;
+ if (argc == 2) {
+ p = argv[1];
+ }
+ Fp::setModulo(p);
+ Fp x;
+ printf("p=%s\n", p);
+ Map m;
+ for (int i = 0; i < 10000; i++) {
+ x.setRand(rg);
+ m[x.toStr(16)]++;
+ }
+ for (Map::const_iterator i = m.begin(), ie = m.end(); i != ie; ++i) {
+ printf("%s %d\n", i->first.c_str(), i->second);
+ }
+}
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..37c0873
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,42 @@
+VER=-3.5
+LLC=llc$(VER)
+OPT=opt$(VER)
+DIS=llvm-dis$(VER)
+ASM=llvm-as$(VER)
+OPT_LLC= $(OPT) -O3 -o - | $(LLC) -O3 -o -
+
+SRC = once.txt all.txt long.txt short.txt mul.txt
+TARGET=x64.s x86.s arm.s arm64.s
+AFLAGS=-mattr=bmi2
+all: $(TARGET)
+
+base64.ll: gen.py $(SRC)
+ python gen.py 64
+
+base32.ll: gen.py $(SRC)
+ python gen.py 32
+
+x64: base64.ll
+ $(LLC) base64.ll -o - -x86-asm-syntax=intel
+x86: base32.ll
+ $(LLC) base32.ll -o - -x86-asm-syntax=intel -march=x86
+arm64: base64.ll
+ $(LLC) base64.ll -o - -march=aarch64
+
+arm: base32.ll
+ $(LLC) base32.ll -o - -march=arm
+
+opt: base64.ll
+ cat base64.ll|$(OPT_LLC) -x86-asm-syntax=intel $(AFLAGS)
+
+x64.s: base64.ll
+ cat base64.ll|$(OPT_LLC) $(AFLAGS) > x64.s
+x86.s: base32.ll
+ cat base32.ll|$(OPT_LLC) $(AFLAGS) -march=x86 > x86.s
+arm.s: base32.ll
+ cat base32.ll|$(OPT_LLC) -march=arm > arm.s
+arm64.s: base32.ll
+ cat base64.ll|$(OPT_LLC) -march=aarch64 > arm64.s
+clean:
+ rm -rf base*.ll *.s
+
diff --git a/src/all.txt b/src/all.txt
new file mode 100644
index 0000000..0dbd4f9
--- /dev/null
+++ b/src/all.txt
@@ -0,0 +1,7 @@
+declare { i$(bit), i1 } @llvm.usub.with.overflow.i$(bit)(i$(bit) %x, i$(bit) %y)
+
+define i$(unit) @extract$(bit+unit)(i$(bit+unit) %x, i$(bit+unit) %shift) {
+ %t0 = lshr i$(bit+unit) %x, %shift
+ %t1 = trunc i$(bit+unit) %t0 to i$(unit)
+ ret i$(unit) %t1
+}
diff --git a/src/gen.py b/src/gen.py
new file mode 100644
index 0000000..acdd1ab
--- /dev/null
+++ b/src/gen.py
@@ -0,0 +1,187 @@
+import sys, re
+
+# @for <var>, <begin>, <end>
+RE_FOR = re.compile(r'@for\s+(\w+)\s*,\s*([^ ]+)\s*,\s*([^ ]+)')
+# $(<exp>)
+RE_VAL = re.compile(r'\$\(([^)]+)\)')
+# @define <var>=<exp>
+RE_DEFINE = re.compile(r'@define\s+(\w+)\s*=(.*)')
+# @if <exp>
+RE_IF = re.compile(r'@if\s+(.*)')
+# @elif <exp>
+RE_ELIF = re.compile(r'@elif\s+(.*)')
+
+def evalStr(s, envG, envL={}):
+ def eval2str(x):
+ s = x.group(1)
+ v = eval(s, envG, envL)
+ return str(v)
+ s = RE_VAL.sub(eval2str, s)
+ return s
+
+def parseDefine(s, envG, envL):
+ """
+ if s is @define statement, then update envL and return True
+ otherwise return False
+ """
+ p = RE_DEFINE.match(s)
+ if not p:
+ return False
+ lhs = p.group(1).strip()
+ rhs = p.group(2).strip()
+ envL[lhs] = eval(rhs, envG, envL)
+ return True
+
+def parseFor(s, envG):
+ """
+ @for i, 0, 3
+ <exp>
+ @endif
+
+ |
+ v
+ @define i = 0
+ <exp>
+ exp
+ @define i = 1
+ <exp>
+ @define i = 2
+ <exp>
+
+ """
+ out = ""
+ inFor = False
+ envL = {}
+ for line in s.split('\n'):
+ stripped = line.strip()
+ # save @define for parseIf
+ parseDefine(stripped, envG, envL)
+ if inFor:
+ if line.strip() == '@endfor':
+ inFor = False
+ for i in xrange(b, e):
+ out += "@define %s = %d\n" % (v, i)
+ out += sub
+ else:
+ sub += line + '\n'
+ else:
+ p = RE_FOR.search(stripped)
+ if p:
+ v = p.group(1).strip()
+ b = eval(p.group(2), envG)
+ e = eval(p.group(3), envG)
+ sub = ""
+ inFor = True
+ else:
+ out += line + '\n'
+ return out
+
+def parseIf(s, envG):
+ out = ""
+ IF_INIT = 0
+ IF_IF = 1
+ IF_ELSE = 2
+ ifState = IF_INIT
+ ifVar = False
+ # available variables in @(<expr>)
+ envL = {}
+ def evalIntLoc(s):
+ return eval(s, envG, envL)
+ for line in s.split('\n'):
+ stripped = line.strip()
+ # remove @define
+ if parseDefine(stripped, envG, envL):
+ continue
+ if ifState == IF_INIT:
+ p = RE_IF.match(stripped)
+ if p:
+ ifState = IF_IF
+ ifVar = evalIntLoc(p.group(1))
+ continue
+ elif ifState == IF_IF:
+ if stripped == '@endif':
+ ifState = IF_INIT
+ continue
+ elif stripped == '@else':
+ ifState = IF_ELSE
+ ifVar = not ifVar
+ continue
+ p = RE_ELIF.match(stripped)
+ if p:
+ ifVar = evalIntLoc(p.group(1))
+ continue
+ if not ifVar:
+ continue
+ elif ifState == IF_ELSE:
+ if stripped == '@endif':
+ ifState = IF_INIT
+ continue
+ if not ifVar:
+ continue
+ else:
+ raise Exception('bad state', ifState)
+ out += evalStr(line, envG, envL) + '\n'
+ return out
+
+def parse(s, unitL, bitL):
+ """
+ eval "@(<expr>)" to integer
+
+ @for <var>, <begin>, <end>
+ ...
+ @endfor
+
+ REMARK : @for is not nestable
+
+ @define <var> = <exp>
+ REMARK : var is global
+
+ @if <exp>
+ @elif <exp>
+ @endif
+
+ REMARK : @if is not nestable
+ """
+ # available variables in @(<expr>)
+ envG = {
+ 'unit' : unitL,
+ 'bit' : bitL,
+ 'N' : bitL / unitL,
+ }
+ s = parseFor(s, envG)
+ s = parseIf(s, envG)
+ return s
+
+def gen(fo, inLame, unitL, bitLL):
+ fi = open(inLame, 'r')
+ s = fi.read()
+ fi.close()
+ for bitL in bitLL:
+ t = parse(s, unitL, bitL)
+ fo.write(t)
+
+def main():
+ argv = sys.argv
+ args = len(argv)
+ unitL = 64
+ if args == 2:
+ unitL = int(argv[1])
+ if unitL not in [32, 64]:
+ print "bad unitL", unitL
+ exit(1)
+
+ outLame = 'base%d.ll' % unitL
+ fo = open(outLame, 'w')
+# gen(fo, 't.txt', unitL, [unitL * 4])
+# exit(1)
+ gen(fo, 'once.txt', unitL, [unitL * 2])
+
+ bitLL = range(unitL, 576 + 1, unitL)
+ gen(fo, 'all.txt', unitL, bitLL)
+ gen(fo, 'short.txt', unitL, bitLL)
+ gen(fo, 'long.txt', unitL, bitLL)
+ gen(fo, 'mul.txt', unitL, bitLL[1:])
+ fo.close()
+
+if __name__ == "__main__":
+ main()
diff --git a/src/long.txt b/src/long.txt
new file mode 100644
index 0000000..31082a7
--- /dev/null
+++ b/src/long.txt
@@ -0,0 +1,54 @@
+define void @mcl_fp_add$(bit)L(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) {
+ %x = load i$(bit)* %px
+ %y = load i$(bit)* %py
+ %p = load i$(bit)* %pp
+ %x1 = zext i$(bit) %x to i$(bit+unit)
+ %y1 = zext i$(bit) %y to i$(bit+unit)
+ %p1 = zext i$(bit) %p to i$(bit+unit)
+ %t0 = add i$(bit+unit) %x1, %y1 ; x + y
+ %t1 = trunc i$(bit+unit) %t0 to i$(bit)
+ store i$(bit) %t1, i$(bit)* %pz
+ %vc = sub i$(bit+unit) %t0, %p1
+ %c = lshr i$(bit+unit) %vc, $(bit+unit-1)
+ %c1 = trunc i$(bit+unit) %c to i1
+ br i1 %c1, label %carry, label %nocarry
+nocarry:
+ %v = trunc i$(bit+unit) %vc to i$(bit)
+ store i$(bit) %v, i$(bit)* %pz
+ ret void
+carry:
+ ret void
+}
+
+define internal { i$(bit), i$(unit) } @local_sbb$(bit)(i$(bit) %x, i$(bit) %y) {
+ %x1 = zext i$(bit) %x to i$(bit+unit)
+ %y1 = zext i$(bit) %y to i$(bit+unit)
+ %v1 = sub i$(bit+unit) %x1, %y1
+ %v = trunc i$(bit+unit) %v1 to i$(bit)
+ %c = lshr i$(bit+unit) %v1, $(bit)
+ %c1 = trunc i$(bit+unit) %c to i$(unit)
+ %r1 = insertvalue { i$(bit), i$(unit) } undef, i$(bit) %v, 0
+ %r2 = insertvalue { i$(bit), i$(unit) } %r1, i$(unit) %c1, 1
+ ret { i$(bit), i$(unit) } %r2
+}
+
+define void @mcl_fp_sub$(bit)L(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) {
+ %x = load i$(bit)* %px
+ %y = load i$(bit)* %py
+ %x1 = zext i$(bit) %x to i$(bit+unit)
+ %y1 = zext i$(bit) %y to i$(bit+unit)
+ %vc = sub i$(bit+unit) %x1, %y1
+ %v = trunc i$(bit+unit) %vc to i$(bit)
+ %c = lshr i$(bit+unit) %vc, $(bit+unit-1)
+ %c1 = trunc i$(bit+unit) %c to i1
+ store i$(bit) %v, i$(bit)* %pz
+ br i1 %c1, label %carry, label %nocarry
+nocarry:
+ ret void
+carry:
+ %p = load i$(bit)* %pp
+ %t = add i$(bit) %v, %p ; x - y + p
+ store i$(bit) %t, i$(bit)* %pz
+ ret void
+}
+
diff --git a/src/mul.txt b/src/mul.txt
new file mode 100644
index 0000000..4621c7a
--- /dev/null
+++ b/src/mul.txt
@@ -0,0 +1,81 @@
+@define bu = bit + unit
+define private i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y)
+@if N > 4
+noinline
+@endif
+{
+@for i, 0, N
+ %x$(i) = call i$(unit) @extract$(bit)(i$(bit) %x, i$(bit) $(unit*i))
+ %x$(i)y = call i$(unit*2) @mul$(unit)x$(unit)(i$(unit) %x$(i), i$(unit) %y)
+ %x$(i)y0 = zext i$(unit*2) %x$(i)y to i$(bu)
+@endfor
+@for i, 1, N
+ %x$(i)y1 = shl i$(bu) %x$(i)y0, $(unit*i)
+@endfor
+ %t0 = add i$(bu) %x0y0, %x1y1
+@for i, 1, N-1
+ %t$(i) = add i$(bu) %t$(i-1), %x$(i+1)y1
+@endfor
+ ret i$(bu) %t$(N-2)
+}
+define void @mcl_fp_mul$(bit)pre(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) {
+ %x = load i$(bit)* %px
+ %y = load i$(bit)* %py
+@for i, 0, N
+ %y$(i) = call i$(unit) @extract$(bit)(i$(bit) %y, i$(bit) $(unit*i))
+@endfor
+ %sum0 = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y0)
+ %t0 = trunc i$(bu) %sum0 to i$(unit)
+ store i$(unit) %t0, i$(unit)* %pz
+@for i, 1, N
+
+ %s$(i-1) = lshr i$(bu) %sum$(i-1), $(unit)
+ %xy$(i) = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y$(i))
+ %sum$(i) = add i$(bu) %s$(i-1), %xy$(i)
+ %z$(i) = getelementptr i$(unit)* %pz, i32 $(i)
+ @if i < N - 1
+ %ts$(i) = trunc i$(bu) %sum$(i) to i$(unit)
+ store i$(unit) %ts$(i), i$(unit)* %z$(i)
+ @endif
+@endfor
+ %p = bitcast i$(unit)* %z$(N-1) to i$(bu)*
+ store i$(bu) %sum$(N-1), i$(bu)* %p
+ ret void
+}
+
+@define bu = bit + unit
+@define bu2 = bit + unit * 2
+define void @mcl_fp_mont$(bit)(i$(bit)* %pz, i$(bit)* %px, i$(unit)* %py, i$(bit)* %pp, i$(unit) %r) {
+ %p = load i$(bit)* %pp
+ %x = load i$(bit)* %px
+
+@for i, 0, N
+ %py$(i) = getelementptr i$(unit)* %py, i$(unit) $(i)
+ %y$(i) = load i$(unit)* %py$(i)
+ %xy$(i) = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y$(i))
+@if i == 0
+ %a0 = zext i$(bu) %xy0 to i$(bu2)
+
+ %at$(i) = trunc i$(bu) %xy$(i) to i$(unit)
+@else
+ %xye$(i) = zext i$(bu) %xy$(i) to i$(bu2)
+ %a$(i) = add i$(bu2) %s$(i-1), %xye$(i)
+ %at$(i) = trunc i$(bu2) %a$(i) to i$(unit)
+@endif
+ %q$(i) = mul i$(unit) %at$(i), %r
+ %pq$(i) = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %p, i$(unit) %q$(i))
+ %pqe$(i) = zext i$(bu) %pq$(i) to i$(bu2)
+ %t$(i) = add i$(bu2) %a$(i), %pqe$(i)
+ %s$(i) = lshr i$(bu2) %t$(i), $(unit)
+@endfor
+ %v = trunc i$(bu2) %s$(N-1) to i$(bu)
+ %pe = zext i$(bit) %p to i$(bu)
+ %vc = sub i$(bu) %v, %pe
+ %c = lshr i$(bu) %vc, $(bit)
+ %c1 = trunc i$(bu) %c to i1
+ %z = select i1 %c1, i$(bu) %v, i$(bu) %vc
+ %zt = trunc i$(bu) %z to i$(bit)
+ store i$(bit) %zt, i$(bit)* %pz
+ ret void
+}
+
diff --git a/src/once.txt b/src/once.txt
new file mode 100644
index 0000000..501fb2b
--- /dev/null
+++ b/src/once.txt
@@ -0,0 +1,74 @@
+
+define i$(unit*2) @mul$(unit)x$(unit)(i$(unit) %x, i$(unit) %y) {
+ %x0 = zext i$(unit) %x to i$(unit*2)
+ %y0 = zext i$(unit) %y to i$(unit*2)
+ %z = mul i$(unit*2) %x0, %y0
+ ret i$(unit*2) %z
+}
+
+; NIST_P192
+; 0xfffffffffffffffffffffffffffffffeffffffffffffffff
+;
+; 0 1 2
+; ffffffffffffffff fffffffffffffffe ffffffffffffffff
+;
+; p = (1 << 192) - (1 << 64) - 1
+; (1 << 192) % p = (1 << 64) + 1
+;
+; L : 192bit
+; Hi: 64bit
+; x = [H:L] = [H2:H1:H0:L]
+; mod p
+; x = L + H + (H << 64)
+; = L + H + [H1:H0:0] + H2 + (H2 << 64)
+;[e:t] = L + H + [H1:H0:H2] + [H2:0] ; 2bit(e) over
+; = t + e + (e << 64)
+
+define internal i64 @extract192to64(i192 %x, i192 %shift) {
+ %t0 = lshr i192 %x, %shift
+ %t1 = trunc i192 %t0 to i64
+ ret i64 %t1
+}
+
+define internal void @modNIST_P192(i192* %out, i192* %px) {
+ %L192 = load i192* %px
+ %L = zext i192 %L192 to i256
+
+ %pH = getelementptr i192* %px, i32 1
+ %H192 = load i192* %pH
+ %H = zext i192 %H192 to i256
+
+ %H10_ = shl i192 %H192, 64
+ %H10 = zext i192 %H10_ to i256
+
+ %H2_ = call i64 @extract192to64(i192 %H192, i192 128)
+ %H2 = zext i64 %H2_ to i256
+ %H102 = or i256 %H10, %H2
+
+ %H2s = shl i256 %H2, 64
+
+ %t0 = add i256 %L, %H
+ %t1 = add i256 %t0, %H102
+ %t2 = add i256 %t1, %H2s
+
+ %e = lshr i256 %t2, 192
+ %t3 = trunc i256 %t2 to i192
+ %e1 = trunc i256 %e to i192
+
+
+ %t4 = add i192 %t3, %e1
+ %e2 = shl i192 %e1, 64
+ %t5 = add i192 %t4, %e2
+
+ store i192 %t5, i192* %out
+
+ ret void
+}
+
+define void @mcl_fp_mul_NIST_P192(i192* %pz, i192* %px, i192* %py) {
+ %buf = alloca i192, i32 2
+ %p = bitcast i192* %buf to i$(unit)*
+ call void @mcl_fp_mul192pre(i$(unit)* %p, i192* %px, i192* %py)
+ call void @modNIST_P192(i192* %pz, i192* %buf)
+ ret void
+}
diff --git a/src/short.txt b/src/short.txt
new file mode 100644
index 0000000..931a63f
--- /dev/null
+++ b/src/short.txt
@@ -0,0 +1,46 @@
+define void @mcl_fp_add$(bit)S(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) {
+entry:
+ %x = load i$(bit)* %px
+ %y = load i$(bit)* %py
+ %p = load i$(bit)* %pp
+ %x1 = zext i$(bit) %x to i$(bit+unit)
+ %y1 = zext i$(bit) %y to i$(bit+unit)
+ %p1 = zext i$(bit) %p to i$(bit+unit)
+ %t0 = add i$(bit+unit) %x1, %y1 ; x + y
+ %t1 = sub i$(bit+unit) %t0, %p1 ; x + y - p
+ %t2 = lshr i$(bit+unit) %t1, $(bit)
+ %t3 = trunc i$(bit+unit) %t2 to i1
+ %t4 = select i1 %t3, i$(bit+unit) %t0, i$(bit+unit) %t1
+ %t5 = trunc i$(bit+unit) %t4 to i$(bit)
+ store i$(bit) %t5, i$(bit)* %pz
+ ret void
+}
+
+define internal { i$(bit), i$(unit) } @mcl_local_sbb$(bit)(i$(bit) %x, i$(bit) %y) {
+ %x1 = zext i$(bit) %x to i$(bit+unit)
+ %y1 = zext i$(bit) %y to i$(bit+unit)
+ %v1 = sub i$(bit+unit) %x1, %y1
+ %v = trunc i$(bit+unit) %v1 to i$(bit)
+ %c = lshr i$(bit+unit) %v1, $(bit)
+ %c1 = trunc i$(bit+unit) %c to i$(unit)
+ %r1 = insertvalue { i$(bit), i$(unit) } undef, i$(bit) %v, 0
+ %r2 = insertvalue { i$(bit), i$(unit) } %r1, i$(unit) %c1, 1
+ ret { i$(bit), i$(unit) } %r2
+}
+
+define void @mcl_fp_sub$(bit)S(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) {
+ %x = load i$(bit)* %px
+ %y = load i$(bit)* %py
+ %x1 = zext i$(bit) %x to i$(bit+unit)
+ %y1 = zext i$(bit) %y to i$(bit+unit)
+ %vc = sub i$(bit+unit) %x1, %y1
+ %v = trunc i$(bit+unit) %vc to i$(bit)
+ %c = lshr i$(bit+unit) %vc, $(bit+unit-1)
+ %c1 = trunc i$(bit+unit) %c to i1
+ %p = load i$(bit)* %pp
+ %a = select i1 %c1, i$(bit) %p, i$(bit) 0
+ %v1 = add i$(bit) %v, %a
+ store i$(bit) %v1, i$(bit)* %pz
+ ret void
+}
+
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000..b1e01dd
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,42 @@
+include ../common.mk
+
+ifeq ($(USE_MONT_FP),1)
+ CFLAGS += -DUSE_MONT_FP
+endif
+
+ifeq ($(USE_LLVM),1)
+ CFLAGS += -DMIE_USE_LLVM
+ ASM_SRC=../src/$(CPU).s
+ ASM_OBJ=$(ASM_SRC:.s=.o)
+ SRC+=$(ASM_SRC)
+ LDFLAGS+=$(ASM_OBJ)
+endif
+
+TARGET=$(TEST_FILE)
+LIBS=
+
+SRC=fp_test.cpp ec_test.cpp fp_util_test.cpp
+ifeq ($(CPU),x64)
+ SRC+=fp_generator_test.cpp mont_fp_test.cpp
+endif
+
+all: $(TARGET)
+
+test: $(TARGET) $(ASM_OBJ)
+ @$(UNIT_TEST)
+
+$(OBJDIR):
+ @$(MKDIR) $(OBJDIR)
+
+clean:
+ $(CLEAN)
+
+$(LIBS):
+ $(MAKE) -C ../src
+
+-include $(DEPEND_FILE)
+
+ifeq ($(USE_LLVM),1)
+$(ASM_OBJ): $(ASM_SRC)
+ $(CXX) $< -o $@ -c
+endif
diff --git a/test/base_test.cpp b/test/base_test.cpp
new file mode 100644
index 0000000..29a177f
--- /dev/null
+++ b/test/base_test.cpp
@@ -0,0 +1,392 @@
+#include <map>
+#define MCL_USE_LLVM
+#include <mcl/fp_base.hpp>
+#include <cybozu/test.hpp>
+#include <cybozu/benchmark.hpp>
+#include <cybozu/xorshift.hpp>
+#include <cybozu/bit_operation.hpp>
+#include <mcl/fp_util.hpp>
+#include <mcl/fp.hpp>
+
+#include <mcl/fp_generator.hpp>
+#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && (CYBOZU_OS_BIT == 64)
+ #define USE_XBYAK
+ static mcl::FpGenerator fg;
+#endif
+#define PUT(x) std::cout << #x "=" << (x) << std::endl
+
+const size_t MAX_N = 32;
+typedef mcl::fp::Unit Unit;
+
+size_t getUnitN(size_t bitLen)
+{
+ return (bitLen + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8);
+}
+
+void setMpz(mpz_class& mx, const Unit *x, size_t n)
+{
+ mcl::Gmp::setRaw(mx, x, n);
+}
+void getMpz(Unit *x, size_t n, const mpz_class& mx)
+{
+ mcl::fp::local::toArray(x, n, mx.get_mpz_t());
+}
+
+struct Montgomery {
+ mpz_class p_;
+ mpz_class R_; // (1 << (n_ * 64)) % p
+ mpz_class RR_; // (R * R) % p
+ Unit r_; // p * r = -1 mod M = 1 << 64
+ size_t n_;
+ Montgomery() {}
+ explicit Montgomery(const mpz_class& p)
+ {
+ p_ = p;
+ r_ = mcl::montgomery::getCoff(mcl::Gmp::getBlock(p, 0));
+ n_ = mcl::Gmp::getBlockSize(p);
+ R_ = 1;
+ R_ = (R_ << (n_ * 64)) % p_;
+ RR_ = (R_ * R_) % p_;
+ }
+
+ void toMont(mpz_class& x) const { mul(x, x, RR_); }
+ void fromMont(mpz_class& x) const { mul(x, x, 1); }
+
+ void mont(Unit *z, const Unit *x, const Unit *y) const
+ {
+ mpz_class mx, my;
+ setMpz(mx, x, n_);
+ setMpz(my, y, n_);
+ mul(mx, mx, my);
+ getMpz(z, n_, mx);
+ }
+ void mul(mpz_class& z, const mpz_class& x, const mpz_class& y) const
+ {
+#if 1
+ const size_t ySize = mcl::Gmp::getBlockSize(y);
+ mpz_class c = y == 0 ? mpz_class(0) : x * mcl::Gmp::getBlock(y, 0);
+ Unit q = c == 0 ? 0 : mcl::Gmp::getBlock(c, 0) * r_;
+ c += p_ * q;
+ c >>= sizeof(Unit) * 8;
+ for (size_t i = 1; i < n_; i++) {
+ if (i < ySize) {
+ c += x * mcl::Gmp::getBlock(y, i);
+ }
+ Unit q = c == 0 ? 0 : mcl::Gmp::getBlock(c, 0) * r_;
+ c += p_ * q;
+ c >>= sizeof(Unit) * 8;
+ }
+ if (c >= p_) {
+ c -= p_;
+ }
+ z = c;
+#else
+ z = x * y;
+ const size_t zSize = mcl::Gmp::getBlockSize(z);
+ for (size_t i = 0; i < n_; i++) {
+ if (i < zSize) {
+ Unit q = mcl::Gmp::getBlock(z, 0) * r_;
+ z += p_ * (mp_limb_t)q;
+ }
+ z >>= sizeof(Unit) * 8;
+ }
+ if (z >= p_) {
+ z -= p_;
+ }
+#endif
+ }
+};
+
+void put(const char *msg, const Unit *x, size_t n)
+{
+ printf("%s ", msg);
+ for (size_t i = 0; i < n; i++) printf("%016llx ", (long long)x[n - 1 - i]);
+ printf("\n");
+}
+void verifyEqual(const Unit *x, const Unit *y, size_t n, const char *file, int line)
+{
+ bool ok = mcl::fp::local::isEqualArray(x, y, n);
+ CYBOZU_TEST_ASSERT(ok);
+ if (ok) return;
+ printf("%s:%d\n", file, line);
+ put("L", x, n);
+ put("R", y, n);
+ exit(1);
+}
+#define VERIFY_EQUAL(x, y, n) verifyEqual(x, y, n, __FILE__, __LINE__)
+
+void addC(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n)
+{
+ mpz_class mx, my, mp;
+ setMpz(mx, x, n);
+ setMpz(my, y, n);
+ setMpz(mp, p, n);
+ mx += my;
+ if (mx >= mp) mx -= mp;
+ getMpz(z, n, mx);
+}
+void subC(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n)
+{
+ mpz_class mx, my, mp;
+ setMpz(mx, x, n);
+ setMpz(my, y, n);
+ setMpz(mp, p, n);
+ mx -= my;
+ if (mx < 0) mx += mp;
+ getMpz(z, n, mx);
+}
+static inline void set_zero(mpz_t& z, Unit *p, size_t n)
+{
+ z->_mp_alloc = (int)n;
+ z->_mp_size = 0;
+ z->_mp_d = (mp_limb_t*)p;
+}
+static inline void set_mpz_t(mpz_t& z, const Unit* p, int n)
+{
+ z->_mp_alloc = n;
+ int i = n;
+ while (i > 0 && p[i - 1] == 0) {
+ i--;
+ }
+ z->_mp_size = i;
+ z->_mp_d = (mp_limb_t*)p;
+}
+
+// z[2n] <- x[n] * y[n]
+void mulPreC(Unit *z, const Unit *x, const Unit *y, size_t n)
+{
+#if 1
+ mpz_t mx, my, mz;
+ set_zero(mz, z, n * 2);
+ set_mpz_t(mx, x, n);
+ set_mpz_t(my, y, n);
+ mpz_mul(mz, mx, my);
+ mcl::fp::local::toArray(z, n * 2, mz);
+#else
+ mpz_class mx, my;
+ setMpz(mx, x, n);
+ setMpz(my, y, n);
+ mx *= my;
+ getMpz(z, n * 2, mx);
+#endif
+}
+
+void modC(Unit *y, const Unit *x, const Unit *p, size_t n)
+{
+ mpz_t mx, my, mp;
+ set_mpz_t(mx, x, n * 2);
+ set_mpz_t(my, y, n);
+ set_mpz_t(mp, p, n);
+ mpz_mod(my, mx, mp);
+ mcl::fp::local::clearArray(y, my->_mp_size, n);
+}
+
+void mul(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n)
+{
+ Unit ret[MAX_N * 2];
+ mpz_t mx, my, mz, mp;
+ set_zero(mz, ret, MAX_N * 2);
+ set_mpz_t(mx, x, n);
+ set_mpz_t(my, y, n);
+ set_mpz_t(mp, p, n);
+ mpz_mul(mz, mx, my);
+ mpz_mod(mz, mz, mp);
+ mcl::fp::local::toArray(z, n, mz);
+}
+
+typedef mcl::fp::void3op void3op;
+typedef mcl::fp::void4op void4op;
+typedef mcl::fp::void4Iop void4Iop;
+
+const struct FuncOp {
+ size_t bitLen;
+ void4op addS;
+ void4op addL;
+ void4op subS;
+ void4op subL;
+ void3op mulPre;
+ void4Iop mont;
+} gFuncOpTbl[] = {
+ { 128, mcl_fp_add128S, mcl_fp_add128L, mcl_fp_sub128S, mcl_fp_sub128L, mcl_fp_mul128pre, mcl_fp_mont128 },
+ { 192, mcl_fp_add192S, mcl_fp_add192L, mcl_fp_sub192S, mcl_fp_sub192L, mcl_fp_mul192pre, mcl_fp_mont192 },
+ { 256, mcl_fp_add256S, mcl_fp_add256L, mcl_fp_sub256S, mcl_fp_sub256L, mcl_fp_mul256pre, mcl_fp_mont256 },
+ { 320, mcl_fp_add320S, mcl_fp_add320L, mcl_fp_sub320S, mcl_fp_sub320L, mcl_fp_mul320pre, mcl_fp_mont320 },
+ { 384, mcl_fp_add384S, mcl_fp_add384L, mcl_fp_sub384S, mcl_fp_sub384L, mcl_fp_mul384pre, mcl_fp_mont384 },
+ { 448, mcl_fp_add448S, mcl_fp_add448L, mcl_fp_sub448S, mcl_fp_sub448L, mcl_fp_mul448pre, mcl_fp_mont448 },
+ { 512, mcl_fp_add512S, mcl_fp_add512L, mcl_fp_sub512S, mcl_fp_sub512L, mcl_fp_mul512pre, mcl_fp_mont512 },
+#if CYBOZU_OS_BIT == 32
+ { 160, mcl_fp_add160S, mcl_fp_add160L, mcl_fp_sub160S, mcl_fp_sub160L, mcl_fp_mul160pre, mcl_fp_mont160 },
+ { 224, mcl_fp_add224S, mcl_fp_add224L, mcl_fp_sub224S, mcl_fp_sub224L, mcl_fp_mul224pre, mcl_fp_mont224 },
+ { 288, mcl_fp_add288S, mcl_fp_add288L, mcl_fp_sub288S, mcl_fp_sub288L, mcl_fp_mul288pre, mcl_fp_mont288 },
+ { 352, mcl_fp_add352S, mcl_fp_add352L, mcl_fp_sub352S, mcl_fp_sub352L, mcl_fp_mul352pre, mcl_fp_mont352 },
+ { 416, mcl_fp_add416S, mcl_fp_add416L, mcl_fp_sub416S, mcl_fp_sub416L, mcl_fp_mul416pre, mcl_fp_mont416 },
+ { 480, mcl_fp_add480S, mcl_fp_add480L, mcl_fp_sub480S, mcl_fp_sub480L, mcl_fp_mul480pre, mcl_fp_mont480 },
+ { 544, mcl_fp_add544S, mcl_fp_add544L, mcl_fp_sub544S, mcl_fp_sub544L, mcl_fp_mul544pre, mcl_fp_mont544 },
+#else
+ { 576, mcl_fp_add576S, mcl_fp_add576L, mcl_fp_sub576S, mcl_fp_sub576L, mcl_fp_mul576pre, mcl_fp_mont576 },
+#endif
+};
+
+FuncOp getFuncOp(size_t bitLen)
+{
+ typedef std::map<size_t, FuncOp> Map;
+ static Map map;
+ static bool init = false;
+ if (!init) {
+ init = true;
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(gFuncOpTbl); i++) {
+ map[gFuncOpTbl[i].bitLen] = gFuncOpTbl[i];
+ }
+ }
+ for (Map::const_iterator i = map.begin(), ie = map.end(); i != ie; ++i) {
+ if (bitLen <= i->second.bitLen) {
+ return i->second;
+ }
+ }
+ printf("ERR bitLen=%d\n", (int)bitLen);
+ exit(1);
+}
+
+void test(const Unit *p, size_t bitLen)
+{
+ printf("bitLen %d\n", (int)bitLen);
+ const size_t n = getUnitN(bitLen);
+#ifdef NDEBUG
+ bool doBench = true;
+#else
+ bool doBench = false;
+#endif
+ const FuncOp funcOp = getFuncOp(bitLen);
+ const void4op addS = funcOp.addS;
+ const void4op addL = funcOp.addL;
+ const void4op subS = funcOp.subS;
+ const void4op subL = funcOp.subL;
+ const void3op mulPre = funcOp.mulPre;
+ const void4Iop mont = funcOp.mont;
+
+ mcl::fp::Unit x[MAX_N], y[MAX_N];
+ mcl::fp::Unit z[MAX_N], w[MAX_N];
+ mcl::fp::Unit z2[MAX_N * 2];
+ mcl::fp::Unit w2[MAX_N * 2];
+ cybozu::XorShift rg;
+ mcl::fp::getRandVal(x, rg, p, bitLen);
+ mcl::fp::getRandVal(y, rg, p, bitLen);
+ const size_t C = 10;
+
+ addC(z, x, y, p, n);
+ addS(w, x, y, p);
+ VERIFY_EQUAL(z, w, n);
+ for (size_t i = 0; i < C; i++) {
+ addC(z, y, z, p, n);
+ addS(w, y, w, p);
+ VERIFY_EQUAL(z, w, n);
+ addC(z, y, z, p, n);
+ addL(w, y, w, p);
+ VERIFY_EQUAL(z, w, n);
+ subC(z, x, z, p, n);
+ subS(w, x, w, p);
+ VERIFY_EQUAL(z, w, n);
+ subC(z, x, z, p, n);
+ subL(w, x, w, p);
+ VERIFY_EQUAL(z, w, n);
+ mulPreC(z2, x, z, n);
+ mulPre(w2, x, z);
+ VERIFY_EQUAL(z2, w2, n * 2);
+ }
+ {
+ mpz_class mp;
+ setMpz(mp, p, n);
+ Montgomery m(mp);
+#ifdef USE_XBYAK
+ if (bitLen > 128) fg.init(p, n);
+#endif
+ /*
+ real mont
+ 0 0
+ 1 R^-1
+ R 1
+ -1 -R^-1
+ -R -1
+ */
+ mpz_class t = 1;
+ const mpz_class R = (t << (n * 64)) % mp;
+ const mpz_class tbl[] = {
+ 0, 1, R, mp - 1, mp - R
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ const mpz_class& mx = tbl[i];
+ for (size_t j = i; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) {
+ const mpz_class& my = tbl[j];
+ getMpz(x, n, mx);
+ getMpz(y, n, my);
+ m.mont(z, x, y);
+ mont(w, x, y, p, m.r_);
+ VERIFY_EQUAL(z, w, n);
+#ifdef USE_XBYAK
+ if (bitLen > 128) {
+ fg.mul_(w, x, y);
+ VERIFY_EQUAL(z, w, n);
+ }
+#endif
+ }
+ }
+ if (doBench) {
+// CYBOZU_BENCH("montC", m.mont, x, y, x);
+ CYBOZU_BENCH("montA ", mont, x, y, x, p, m.r_);
+ }
+ }
+ if (doBench) {
+// CYBOZU_BENCH("addS", addS, x, y, x, p); // slow
+// CYBOZU_BENCH("subS", subS, x, y, x, p);
+// CYBOZU_BENCH("addL", addL, x, y, x, p);
+// CYBOZU_BENCH("subL", subL, x, y, x, p);
+ CYBOZU_BENCH("mulPreA", mulPre, w2, y, x);
+ CYBOZU_BENCH("mulPreC", mulPreC, w2, y, x, n);
+ CYBOZU_BENCH("modC ", modC, x, w2, p, n);
+ }
+#ifdef USE_XBYAK
+ if (bitLen <= 128) return;
+ if (doBench) {
+ fg.init(p, n);
+ CYBOZU_BENCH("addA ", fg.add_, x, y, x);
+ CYBOZU_BENCH("subA ", fg.sub_, x, y, x);
+// CYBOZU_BENCH("mulA", fg.mul_, x, y, x);
+ }
+#endif
+ printf("mont test %d\n", (int)bitLen);
+}
+
+CYBOZU_TEST_AUTO(all)
+{
+ const struct {
+ size_t n;
+ const uint64_t p[9];
+ } tbl[] = {
+// { 2, { 0xf000000000000001, 1, } },
+ { 2, { 0x000000000000001d, 0x8000000000000000, } },
+ { 3, { 0x000000000000012b, 0x0000000000000000, 0x0000000080000000, } },
+// { 3, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x07ffffffffffffff, } },
+// { 3, { 0x7900342423332197, 0x1234567890123456, 0x1480948109481904, } },
+ { 3, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0xffffffffffffffff, } },
+// { 4, { 0x7900342423332197, 0x4242342420123456, 0x1234567892342342, 0x1480948109481904, } },
+// { 4, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x17ffffffffffffff, 0x1513423423423415, } },
+ { 4, { 0xa700000000000013, 0x6121000000000013, 0xba344d8000000008, 0x2523648240000001, } },
+// { 5, { 0x0000000000000009, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, } },
+ { 5, { 0xfffffffffffffc97, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } },
+// { 6, { 0x4720422423332197, 0x0034230847204720, 0x3456789012345679, 0x4820984290482212, 0x9482094820948209, 0x0194810841094810, } },
+// { 6, { 0x7204224233321972, 0x0342308472047204, 0x4567890123456790, 0x0948204204243123, 0x2098420984209482, 0x2093482094810948, } },
+ { 6, { 0x00000000ffffffff, 0xffffffff00000000, 0xfffffffffffffffe, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } },
+// { 7, { 0x0000000000000063, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, } },
+ { 7, { 0x000000000fffcff1, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } },
+ { 8, { 0xffffffffffffd0c9, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } },
+ { 9, { 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x00000000000001ff, } },
+// { 9, { 0x4720422423332197, 0x0034230847204720, 0x3456789012345679, 0x2498540975555312, 0x9482904924029424, 0x0948209842098402, 0x1098410948109482, 0x0820958209582094, 0x0000000000000029, } },
+// { 9, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x7fffffffffffffff, 0x8572938572398583, 0x5732057823857293, 0x9820948205872380, 0x3409238420492034, 0x9483842098340298, 0x0000000000000003, } },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ const size_t n = tbl[i].n;
+ const size_t bitLen = (n - 1) * 64 + cybozu::bsr<uint64_t>(tbl[i].p[n - 1]) + 1;
+ test((const Unit*)tbl[i].p, bitLen);
+ }
+}
+
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
new file mode 100644
index 0000000..1255a1d
--- /dev/null
+++ b/test/ec_test.cpp
@@ -0,0 +1,397 @@
+#define PUT(x) std::cout << #x "=" << (x) << std::endl
+#define CYBOZU_TEST_DISABLE_AUTO_RUN
+#include <cybozu/test.hpp>
+#include <cybozu/benchmark.hpp>
+#include <mcl/gmp_util.hpp>
+
+#include <mcl/fp.hpp>
+typedef mcl::FpT<> Fp_3;
+typedef mcl::FpT<> Fp_4;
+typedef mcl::FpT<> Fp_6;
+typedef mcl::FpT<> Fp_9;
+#include <mcl/ec.hpp>
+#include <mcl/ecparam.hpp>
+#include <time.h>
+
+struct tagZn;
+typedef mcl::FpT<tagZn> Zn;
+
+template<class Fp>
+struct Test {
+ typedef mcl::EcT<Fp> Ec;
+ const mcl::EcParam& para;
+ Test(const mcl::EcParam& para)
+ : para(para)
+ {
+ Fp::setModulo(para.p);
+ Zn::setModulo(para.n);
+ Ec::setParam(para.a, para.b);
+// CYBOZU_TEST_EQUAL(para.bitLen, Fp(-1).getBitLen());
+ }
+ void cstr() const
+ {
+ Ec O;
+ CYBOZU_TEST_ASSERT(O.isZero());
+ Ec P;
+ Ec::neg(P, O);
+ CYBOZU_TEST_EQUAL(P, O);
+ }
+ void ope() const
+ {
+ Fp x(para.gx);
+ Fp y(para.gy);
+ Zn n = 0;
+ CYBOZU_TEST_ASSERT(Ec::isValid(x, y));
+ Ec P(x, y), Q, R, O;
+ {
+ Ec::neg(Q, P);
+ CYBOZU_TEST_EQUAL(Q.x, P.x);
+ CYBOZU_TEST_EQUAL(Q.y, -P.y);
+
+ R = P + Q;
+ CYBOZU_TEST_ASSERT(R.isZero());
+
+ R = P + O;
+ CYBOZU_TEST_EQUAL(R, P);
+ R = O + P;
+ CYBOZU_TEST_EQUAL(R, P);
+ }
+
+ {
+ Ec::dbl(R, P);
+ Ec R2 = P + P;
+ CYBOZU_TEST_EQUAL(R, R2);
+ {
+ Ec P2 = P;
+ Ec::dbl(P2, P2);
+ CYBOZU_TEST_EQUAL(P2, R2);
+ }
+ Ec R3L = R2 + P;
+ Ec R3R = P + R2;
+ CYBOZU_TEST_EQUAL(R3L, R3R);
+ {
+ Ec RR = R2;
+ RR = RR + P;
+ CYBOZU_TEST_EQUAL(RR, R3L);
+ RR = R2;
+ RR = P + RR;
+ CYBOZU_TEST_EQUAL(RR, R3L);
+ RR = P;
+ RR = RR + RR;
+ CYBOZU_TEST_EQUAL(RR, R2);
+ }
+ Ec::power(R, P, 2);
+ CYBOZU_TEST_EQUAL(R, R2);
+ Ec R4L = R3L + R2;
+ Ec R4R = R2 + R3L;
+ CYBOZU_TEST_EQUAL(R4L, R4R);
+ Ec::power(R, P, 5);
+ CYBOZU_TEST_EQUAL(R, R4L);
+ }
+ {
+ R = P;
+ for (int i = 0; i < 10; i++) {
+ R += P;
+ }
+ Ec R2;
+ Ec::power(R2, P, 11);
+ CYBOZU_TEST_EQUAL(R, R2);
+ }
+ Ec::power(R, P, n - 1);
+ CYBOZU_TEST_EQUAL(R, -P);
+ R += P; // Ec::power(R, P, n);
+ CYBOZU_TEST_ASSERT(R.isZero());
+ }
+
+ void power() const
+ {
+ Fp x(para.gx);
+ Fp y(para.gy);
+ Ec P(x, y);
+ Ec Q;
+ Ec R;
+ for (int i = 0; i < 100; i++) {
+ Ec::power(Q, P, i);
+ CYBOZU_TEST_EQUAL(Q, R);
+ R += P;
+ }
+ }
+
+ void neg_power() const
+ {
+ Fp x(para.gx);
+ Fp y(para.gy);
+ Ec P(x, y);
+ Ec Q;
+ Ec R;
+ for (int i = 0; i < 100; i++) {
+ Ec::power(Q, P, -i);
+ CYBOZU_TEST_EQUAL(Q, R);
+ R -= P;
+ }
+ }
+ void squareRoot() const
+ {
+ Fp x(para.gx);
+ Fp y(para.gy);
+ bool odd = Fp::isYodd(y);
+ Fp yy;
+ Ec::getYfromX(yy, x, odd);
+ CYBOZU_TEST_EQUAL(yy, y);
+ Fp::neg(y, y);
+ odd = Fp::isYodd(y);
+ yy.clear();
+ Ec::getYfromX(yy, x, odd);
+ CYBOZU_TEST_EQUAL(yy, y);
+ }
+ void power_fp() const
+ {
+ Fp x(para.gx);
+ Fp y(para.gy);
+ Ec P(x, y);
+ Ec Q;
+ Ec R;
+ for (int i = 0; i < 100; i++) {
+ Ec::power(Q, P, Zn(i));
+ CYBOZU_TEST_EQUAL(Q, R);
+ R += P;
+ }
+ }
+ void binaryExpression() const
+ {
+ puts("test binaryExpression");
+ const Fp x(para.gx);
+ const Fp y(para.gy);
+ Ec P(x, y);
+ Ec Q;
+ // not compressed
+ Ec::setCompressedExpression(false);
+ {
+ cybozu::BitVector bv;
+ P.appendToBitVec(bv);
+ Q.fromBitVec(bv);
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ {
+ P = -P;
+ cybozu::BitVector bv;
+ P.appendToBitVec(bv);
+ Q.fromBitVec(bv);
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ P.clear();
+ {
+ cybozu::BitVector bv;
+ P.appendToBitVec(bv);
+ Q.fromBitVec(bv);
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ // compressed
+ Ec::setCompressedExpression(true);
+ P.set(x, y);
+ {
+ cybozu::BitVector bv;
+ P.appendToBitVec(bv);
+ Q.fromBitVec(bv);
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ {
+ P = -P;
+ cybozu::BitVector bv;
+ P.appendToBitVec(bv);
+ Q.fromBitVec(bv);
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ P.clear();
+ {
+ cybozu::BitVector bv;
+ P.appendToBitVec(bv);
+ Q.fromBitVec(bv);
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ }
+ void str() const
+ {
+ puts("test str");
+ const Fp x(para.gx);
+ const Fp y(para.gy);
+ Ec P(x, y);
+ Ec Q;
+ // not compressed
+ Ec::setCompressedExpression(false);
+ {
+ std::stringstream ss;
+ ss << P;
+ ss >> Q;
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ {
+ P = -P;
+ std::stringstream ss;
+ ss << P;
+ ss >> Q;
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ P.clear();
+ {
+ std::stringstream ss;
+ ss << P;
+ ss >> Q;
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ // compressed
+ Ec::setCompressedExpression(true);
+ P.set(x, y);
+ {
+ std::stringstream ss;
+ ss << P;
+ ss >> Q;
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ {
+ P = -P;
+ std::stringstream ss;
+ ss << P;
+ ss >> Q;
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ P.clear();
+ {
+ std::stringstream ss;
+ ss << P;
+ ss >> Q;
+ CYBOZU_TEST_EQUAL(P, Q);
+ }
+ }
+
+ template<class F>
+ void test(F f, const char *msg) const
+ {
+ const int N = 300000;
+ Fp x(para.gx);
+ Fp y(para.gy);
+ Ec P(x, y);
+ Ec Q = P + P + P;
+ clock_t begin = clock();
+ for (int i = 0; i < N; i++) {
+ f(Q, P, Q);
+ }
+ clock_t end = clock();
+ printf("%s %.2fusec\n", msg, (end - begin) / double(CLOCKS_PER_SEC) / N * 1e6);
+ }
+ /*
+ add 8.71usec -> 6.94
+ sub 6.80usec -> 4.84
+ dbl 9.59usec -> 7.75
+ pos 2730usec -> 2153
+ */
+ void bench() const
+ {
+ Fp x(para.gx);
+ Fp y(para.gy);
+ Ec P(x, y);
+ Ec Q = P + P + P;
+ CYBOZU_BENCH("add", Ec::add, Q, P, Q);
+ CYBOZU_BENCH("sub", Ec::sub, Q, P, Q);
+ CYBOZU_BENCH("dbl", Ec::dbl, P, P);
+ Zn z("-3");
+ CYBOZU_BENCH("pow", Ec::power, P, P, z);
+ }
+/*
+Affine : sandy-bridge
+add 3.17usec
+sub 2.43usec
+dbl 3.32usec
+pow 905.00usec
+Jacobi
+add 2.34usec
+sub 2.65usec
+dbl 1.56usec
+pow 499.00usec
+*/
+ void run() const
+ {
+ cstr();
+ ope();
+ power();
+ neg_power();
+ power_fp();
+ binaryExpression();
+ squareRoot();
+ str();
+#ifdef NDEBUG
+ bench();
+#endif
+ }
+private:
+ Test(const Test&);
+ void operator=(const Test&);
+};
+
+template<class Fp>
+void test_sub(const mcl::EcParam *para, size_t paraNum)
+{
+ for (size_t i = 0; i < paraNum; i++) {
+ puts(para[i].name);
+ Test<Fp>(para[i]).run();
+ }
+}
+
+int g_partial = -1;
+
+CYBOZU_TEST_AUTO(all)
+{
+#ifdef USE_MONT_FP
+ puts("use MontFp");
+#else
+ puts("use GMP");
+#endif
+ if (g_partial & (1 << 3)) {
+ const struct mcl::EcParam para3[] = {
+ // mcl::ecparam::p160_1,
+ mcl::ecparam::secp160k1,
+ mcl::ecparam::secp192k1,
+ mcl::ecparam::NIST_P192,
+ };
+ test_sub<Fp_3>(para3, CYBOZU_NUM_OF_ARRAY(para3));
+ }
+
+ if (g_partial & (1 << 4)) {
+ const struct mcl::EcParam para4[] = {
+ mcl::ecparam::secp224k1,
+ mcl::ecparam::secp256k1,
+ mcl::ecparam::NIST_P224,
+ mcl::ecparam::NIST_P256,
+ };
+ test_sub<Fp_4>(para4, CYBOZU_NUM_OF_ARRAY(para4));
+ }
+
+ if (g_partial & (1 << 6)) {
+ const struct mcl::EcParam para6[] = {
+ // mcl::ecparam::secp384r1,
+ mcl::ecparam::NIST_P384,
+ };
+ test_sub<Fp_6>(para6, CYBOZU_NUM_OF_ARRAY(para6));
+ }
+
+ if (g_partial & (1 << 9)) {
+ const struct mcl::EcParam para9[] = {
+ // mcl::ecparam::secp521r1,
+ mcl::ecparam::NIST_P521,
+ };
+ test_sub<Fp_9>(para9, CYBOZU_NUM_OF_ARRAY(para9));
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc == 1) {
+ g_partial = -1;
+ } else {
+ g_partial = 0;
+ for (int i = 1; i < argc; i++) {
+ g_partial |= 1 << atoi(argv[i]);
+ }
+ }
+ return cybozu::test::autoRun.run(argc, argv);
+}
diff --git a/test/fp_generator_test.cpp b/test/fp_generator_test.cpp
new file mode 100644
index 0000000..9a61ab2
--- /dev/null
+++ b/test/fp_generator_test.cpp
@@ -0,0 +1,222 @@
+#include <cybozu/test.hpp>
+#if CYBOZU_OS_BIT == 32
+// not support
+#else
+#include <mcl/gmp_util.hpp>
+#include <stdint.h>
+#include <string>
+#include <cybozu/itoa.hpp>
+#include <mcl/fp_generator.hpp>
+#include <mcl/fp.hpp>
+#include <iostream>
+#include <cybozu/xorshift.hpp>
+#include <cybozu/benchmark.hpp>
+
+typedef mcl::FpT<> Fp;
+
+const int MAX_N = 4;
+
+const char *primeTable[] = {
+ "7fffffffffffffffffffffffffffffff", // 127bit(not full)
+ "ffffffffffffffffffffffffffffff61", // 128bit(full)
+ "fffffffffffffffffffffffffffffffffffffffeffffee37", // 192bit(full)
+ "2523648240000001ba344d80000000086121000000000013a700000000000013", // 254bit(not full)
+};
+
+/*
+ p is output buffer
+ pStr is hex
+ return the size of p
+*/
+int convertToArray(uint64_t *p, const mpz_class& x)
+{
+ const int pn = int(sizeof(mp_limb_t) * x.get_mpz_t()->_mp_size / sizeof(*p));
+ if (pn > MAX_N) {
+ printf("pn(%d) is too large\n", pn);
+ exit(1);
+ }
+ const uint64_t *q = (const uint64_t*)x.get_mpz_t()->_mp_d;
+ std::copy(q, q + pn, p);
+ std::fill(p + pn, p + MAX_N, 0);
+ return pn;
+}
+int convertToArray(uint64_t *p, const char *pStr)
+{
+ mpz_class x;
+ x.set_str(pStr, 16);
+ return convertToArray(p, x);
+}
+
+struct Int {
+ int vn;
+ uint64_t v[MAX_N];
+ Int()
+ : vn(0)
+ {
+ }
+ explicit Int(int vn)
+ {
+ if (vn > MAX_N) {
+ printf("vn(%d) is too large\n", vn);
+ exit(1);
+ }
+ this->vn = vn;
+ }
+ void set(const char *str) { fromStr(str); }
+ void set(const Fp& rhs)
+ {
+ convertToArray(v, rhs.toGmp());
+ }
+ void set(const uint64_t* x)
+ {
+ for (int i = 0; i < vn; i++) v[i] = x[i];
+ }
+ void fromStr(const char *str)
+ {
+ convertToArray(v, str);
+ }
+ std::string toStr() const
+ {
+ std::string ret;
+ for (int i = 0; i < vn; i++) {
+ ret += cybozu::itohex(v[vn - 1 - i], false);
+ }
+ return ret;
+ }
+ void put(const char *msg = "") const
+ {
+ if (msg) printf("%s=", msg);
+ printf("%s\n", toStr().c_str());
+ }
+ bool operator==(const Int& rhs) const
+ {
+ if (vn != rhs.vn) return false;
+ for (int i = 0; i < vn; i++) {
+ if (v[i] != rhs.v[i]) return false;
+ }
+ return true;
+ }
+ bool operator!=(const Int& rhs) const { return !operator==(rhs); }
+ bool operator==(const Fp& rhs) const
+ {
+ Int t(vn);
+ t.set(rhs);
+ return operator==(t);
+ }
+ bool operator!=(const Fp& rhs) const { return !operator==(rhs); }
+};
+static inline std::ostream& operator<<(std::ostream& os, const Int& x)
+{
+ return os << x.toStr();
+}
+
+void testAddSub(const mcl::FpGenerator& fg, int pn)
+{
+ Fp x, y;
+ Int mx(pn), my(pn);
+ x.fromStr("0x8811aabb23427cc");
+ y.fromStr("0x8811aabb23427cc11");
+ mx.set(x);
+ my.set(y);
+ for (int i = 0; i < 30; i++) {
+ CYBOZU_TEST_EQUAL(mx, x);
+ x += x;
+ fg.add_(mx.v, mx.v, mx.v);
+ }
+ for (int i = 0; i < 30; i++) {
+ CYBOZU_TEST_EQUAL(mx, x);
+ x += y;
+ fg.add_(mx.v, mx.v, my.v);
+ }
+ for (int i = 0; i < 30; i++) {
+ CYBOZU_TEST_EQUAL(my, y);
+ y -= x;
+ fg.sub_(my.v, my.v, mx.v);
+ }
+}
+
+void testNeg(const mcl::FpGenerator& fg, int pn)
+{
+ Fp x;
+ Int mx(pn), my(pn);
+ const char *tbl[] = {
+ "0",
+ "0x12346",
+ "0x11223344556677881122334455667788",
+ "0x0abbccddeeffaabb0000000000000000",
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ x.fromStr(tbl[i]);
+ mx.set(x);
+ x = -x;
+ fg.neg_(mx.v, mx.v);
+ CYBOZU_TEST_EQUAL(mx, x);
+ }
+}
+
+void testMulI(const mcl::FpGenerator& fg, int pn)
+{
+ cybozu::XorShift rg;
+ for (int i = 0; i < 100; i++) {
+ uint64_t x[MAX_N];
+ uint64_t z[MAX_N + 1];
+ rg.read(x, pn);
+ uint64_t y = rg.get64();
+ mpz_class mx;
+ mcl::Gmp::setRaw(mx, x, pn);
+ mpz_class my;
+ mcl::Gmp::set(my, y);
+ mx *= my;
+ uint64_t d = fg.mulI_(z, x, y);
+ z[pn] = d;
+ mcl::Gmp::setRaw(my, z, pn + 1);
+ CYBOZU_TEST_EQUAL(mx, my);
+ }
+ {
+ uint64_t x[MAX_N];
+ uint64_t z[MAX_N + 1];
+ rg.read(x, pn);
+ uint64_t y = rg.get64();
+ CYBOZU_BENCH_C("mulI", 10000000, fg.mulI_, z, x, y);
+ }
+}
+
+void testShr1(const mcl::FpGenerator& fg, int pn)
+{
+ cybozu::XorShift rg;
+ for (int i = 0; i < 100; i++) {
+ uint64_t x[MAX_N];
+ uint64_t z[MAX_N];
+ rg.read(x, pn);
+ mpz_class mx;
+ mcl::Gmp::setRaw(mx, x, pn);
+ mx >>= 1;
+ fg.shr1_(z, x);
+ mpz_class my;
+ mcl::Gmp::setRaw(my, z, pn);
+ CYBOZU_TEST_EQUAL(mx, my);
+ }
+}
+
+void test(const char *pStr)
+{
+ Fp::setModulo(pStr, 16);
+ uint64_t p[MAX_N];
+ const int pn = convertToArray(p, pStr);
+ printf("pn=%d\n", pn);
+ mcl::FpGenerator fg;
+ fg.init(p, pn);
+ testAddSub(fg, pn);
+ testNeg(fg, pn);
+ testMulI(fg, pn);
+ testShr1(fg, pn);
+}
+
+CYBOZU_TEST_AUTO(all)
+{
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(primeTable); i++) {
+ printf("test prime i=%d\n", (int)i);
+ test(primeTable[i]);
+ }
+}
+#endif
diff --git a/test/fp_test.cpp b/test/fp_test.cpp
new file mode 100644
index 0000000..aac80a1
--- /dev/null
+++ b/test/fp_test.cpp
@@ -0,0 +1,465 @@
+#define PUT(x) std::cout << #x "=" << (x) << std::endl
+#include <cybozu/test.hpp>
+#include <mcl/fp.hpp>
+#include <cybozu/benchmark.hpp>
+#include <time.h>
+
+#ifdef _MSC_VER
+ #pragma warning(disable: 4127) // const condition
+#endif
+
+typedef mcl::FpT<> Fp;
+
+const int m = 65537;
+struct Init {
+ Init()
+ {
+ std::ostringstream ms;
+ ms << m;
+ Fp::setModulo(ms.str());
+ }
+};
+
+CYBOZU_TEST_SETUP_FIXTURE(Init);
+
+#ifndef MCL_ONLY_BENCH
+CYBOZU_TEST_AUTO(cstr)
+{
+ const struct {
+ const char *str;
+ int val;
+ } tbl[] = {
+ { "0", 0 },
+ { "1", 1 },
+ { "123", 123 },
+ { "0x123", 0x123 },
+ { "0b10101", 21 },
+ { "-123", m - 123 },
+ { "-0x123", m - 0x123 },
+ { "-0b10101", m - 21 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ // string cstr
+ Fp x(tbl[i].str);
+ CYBOZU_TEST_EQUAL(x, tbl[i].val);
+
+ // int cstr
+ Fp y(tbl[i].val);
+ CYBOZU_TEST_EQUAL(y, x);
+
+ // copy cstr
+ Fp z(x);
+ CYBOZU_TEST_EQUAL(z, x);
+
+ // assign int
+ Fp w;
+ w = tbl[i].val;
+ CYBOZU_TEST_EQUAL(w, x);
+
+ // assign self
+ Fp u;
+ u = w;
+ CYBOZU_TEST_EQUAL(u, x);
+
+ // conv
+ std::ostringstream os;
+ os << tbl[i].val;
+
+ std::string str;
+ x.toStr(str);
+ CYBOZU_TEST_EQUAL(str, os.str());
+ }
+}
+
+CYBOZU_TEST_AUTO(fromStr)
+{
+ const struct {
+ const char *in;
+ int out;
+ int base;
+ } tbl[] = {
+ { "100", 100, 0 }, // set base = 10 if base = 0
+ { "100", 4, 2 },
+ { "100", 256, 16 },
+ { "0b100", 4, 0 },
+ { "0b100", 4, 2 },
+ { "0x100", 256, 0 },
+ { "0x100", 256, 16 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ Fp x;
+ x.fromStr(tbl[i].in, tbl[i].base);
+ CYBOZU_TEST_EQUAL(x, tbl[i].out);
+ }
+ // conflict prefix with base
+ Fp x;
+ CYBOZU_TEST_EXCEPTION(x.fromStr("0b100", 16), cybozu::Exception);
+ CYBOZU_TEST_EXCEPTION(x.fromStr("0x100", 2), cybozu::Exception);
+}
+
+CYBOZU_TEST_AUTO(stream)
+{
+ const struct {
+ const char *in;
+ int out10;
+ int out16;
+ } tbl[] = {
+ { "100", 100, 256 }, // set base = 10 if base = 0
+ { "0x100", 256, 256 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ {
+ std::istringstream is(tbl[i].in);
+ Fp x;
+ is >> x;
+ CYBOZU_TEST_EQUAL(x, tbl[i].out10);
+ }
+ {
+ std::istringstream is(tbl[i].in);
+ Fp x;
+ is >> std::hex >> x;
+ CYBOZU_TEST_EQUAL(x, tbl[i].out16);
+ }
+ }
+ std::istringstream is("0b100");
+ Fp x;
+ CYBOZU_TEST_EXCEPTION(is >> std::hex >> x, cybozu::Exception);
+}
+
+CYBOZU_TEST_AUTO(conv)
+{
+ const char *bin = "0b1001000110100";
+ const char *hex = "0x1234";
+ const char *dec = "4660";
+ Fp b(bin);
+ Fp h(hex);
+ Fp d(dec);
+ CYBOZU_TEST_EQUAL(b, h);
+ CYBOZU_TEST_EQUAL(b, d);
+
+ std::string str;
+ b.toStr(str, 2, true);
+ CYBOZU_TEST_EQUAL(str, bin);
+ b.toStr(str);
+ CYBOZU_TEST_EQUAL(str, dec);
+ b.toStr(str, 16, true);
+ CYBOZU_TEST_EQUAL(str, hex);
+}
+
+CYBOZU_TEST_AUTO(compare)
+{
+ const struct {
+ int lhs;
+ int rhs;
+ int cmp;
+ } tbl[] = {
+ { 0, 0, 0 },
+ { 1, 0, 1 },
+ { 0, 1, -1 },
+ { -1, 0, 1 }, // m-1, 0
+ { 0, -1, -1 }, // 0, m-1
+ { 123, 456, -1 },
+ { 456, 123, 1 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ const Fp x(tbl[i].lhs);
+ const Fp y(tbl[i].rhs);
+ const int cmp = tbl[i].cmp;
+ if (cmp == 0) {
+ CYBOZU_TEST_EQUAL(x, y);
+ } else {
+ CYBOZU_TEST_ASSERT(x != y);
+ }
+ }
+ {
+ Fp x(5);
+ CYBOZU_TEST_ASSERT(x == 5);
+ }
+}
+
+CYBOZU_TEST_AUTO(modulo)
+{
+ std::ostringstream ms;
+ ms << m;
+
+ std::string str;
+ Fp::getModulo(str);
+ CYBOZU_TEST_EQUAL(str, ms.str());
+}
+
+CYBOZU_TEST_AUTO(ope)
+{
+ const struct {
+ int x;
+ int y;
+ int add; // x + y
+ int sub; // x - y
+ int mul; // x * y
+ int sqr; // x^2
+ } tbl[] = {
+ { 0, 1, 1, m - 1, 0, 0 },
+ { 9, 5, 14, 4, 45, 81 },
+ { 10, 13, 23, m - 3, 130, 100 },
+ { 2000, 1000, 3000, 1000, (2000 * 1000) % m, (2000 * 2000) % m },
+ { 12345, 9999, 12345 + 9999, 12345 - 9999, (12345 * 9999) % m, (12345 * 12345) % m },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ const Fp x(tbl[i].x);
+ const Fp y(tbl[i].y);
+ Fp z;
+ Fp::add(z, x, y);
+ CYBOZU_TEST_EQUAL(z, tbl[i].add);
+ Fp::sub(z, x, y);
+ CYBOZU_TEST_EQUAL(z, tbl[i].sub);
+ Fp::mul(z, x, y);
+ CYBOZU_TEST_EQUAL(z, tbl[i].mul);
+
+ Fp r;
+ Fp::inv(r, y);
+ Fp::mul(z, z, r);
+ CYBOZU_TEST_EQUAL(z, tbl[i].x);
+ z = x + y;
+ CYBOZU_TEST_EQUAL(z, tbl[i].add);
+ z = x - y;
+ CYBOZU_TEST_EQUAL(z, tbl[i].sub);
+ z = x * y;
+ CYBOZU_TEST_EQUAL(z, tbl[i].mul);
+
+ Fp::square(z, x);
+ CYBOZU_TEST_EQUAL(z, tbl[i].sqr);
+
+ z = x / y;
+ z *= y;
+ CYBOZU_TEST_EQUAL(z, tbl[i].x);
+ }
+}
+
+struct tag2;
+
+CYBOZU_TEST_AUTO(power)
+{
+ Fp x, y, z;
+ x = 12345;
+ z = 1;
+ for (int i = 0; i < 100; i++) {
+ Fp::power(y, x, i);
+ CYBOZU_TEST_EQUAL(y, z);
+ z *= x;
+ }
+ typedef mcl::FpT<tag2, 128> Fp2;
+ Fp2::setModulo("1009");
+ x = 5;
+ Fp2 n = 3;
+ z = 3;
+ Fp::power(x, x, z);
+ CYBOZU_TEST_EQUAL(x, 125);
+ x = 5;
+ Fp::power(x, x, n);
+ CYBOZU_TEST_EQUAL(x, 125);
+}
+
+CYBOZU_TEST_AUTO(power_fp)
+{
+ Fp x, y, z;
+ x = 12345;
+ z = 1;
+ for (int i = 0; i < 100; i++) {
+ Fp::power(y, x, Fp(i));
+ CYBOZU_TEST_EQUAL(y, z);
+ z *= x;
+ }
+}
+
+struct TagAnother;
+
+CYBOZU_TEST_AUTO(another)
+{
+ typedef mcl::FpT<TagAnother, 128> G;
+ G::setModulo("13");
+ G a = 3;
+ G b = 9;
+ a *= b;
+ CYBOZU_TEST_EQUAL(a, 1);
+}
+
+
+CYBOZU_TEST_AUTO(setRaw)
+{
+ Fp::setModulo("1000000000000000000117");
+ char b1[] = { 0x56, 0x34, 0x12 };
+ Fp x;
+ x.setRaw(b1, 3);
+ CYBOZU_TEST_EQUAL(x, 0x123456);
+ int b2[] = { 0x12, 0x34 };
+ x.setRaw(b2, 2);
+ CYBOZU_TEST_EQUAL(x, Fp("0x3400000012"));
+ x.fromStr("0xffffffffffff");
+
+ Fp::setModulo("0x10000000000001234567a5");
+ const struct {
+ uint32_t buf[3];
+ size_t bufN;
+ const char *expected;
+ } tbl[] = {
+ { { 0x234567a4, 0x00000001, 0x00100000}, 1, "0x234567a4" },
+ { { 0x234567a4, 0x00000001, 0x00100000}, 2, "0x1234567a4" },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ x.setRaw(tbl[i].buf, tbl[i].bufN);
+ CYBOZU_TEST_EQUAL(x, Fp(tbl[i].expected));
+ }
+ uint32_t large[3] = { 0x234567a5, 0x00000001, 0x00100000};
+ CYBOZU_TEST_EXCEPTION(x.setRaw(large, 3), cybozu::Exception);
+}
+
+
+CYBOZU_TEST_AUTO(set64bit)
+{
+ Fp::setModulo("0x1000000000000000000f");
+ const struct {
+ const char *p;
+ int64_t i;
+ } tbl[] = {
+ { "0x1234567812345678", int64_t(0x1234567812345678ull) },
+ { "0xfffedcba987edcba997", -int64_t(0x1234567812345678ull) },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ Fp x(tbl[i].p);
+ Fp y(tbl[i].i);
+ CYBOZU_TEST_EQUAL(x, y);
+ }
+}
+
+CYBOZU_TEST_AUTO(getRaw)
+{
+ const struct {
+ const char *s;
+ uint32_t v[4];
+ size_t vn;
+ } tbl[] = {
+ { "0", { 0, 0, 0, 0 }, 1 },
+ { "1234", { 1234, 0, 0, 0 }, 1 },
+ { "0xaabbccdd12345678", { 0x12345678, 0xaabbccdd, 0, 0 }, 2 },
+ { "0x11112222333344445555666677778888", { 0x77778888, 0x55556666, 0x33334444, 0x11112222 }, 4 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ mpz_class x(tbl[i].s);
+ const size_t bufN = 8;
+ uint32_t buf[bufN];
+ size_t n = mcl::Gmp::getRaw(buf, bufN, x);
+ CYBOZU_TEST_EQUAL(n, tbl[i].vn);
+ CYBOZU_TEST_EQUAL_ARRAY(buf, tbl[i].v, n);
+ }
+}
+
+CYBOZU_TEST_AUTO(toStr)
+{
+ const char *tbl[] = {
+ "0x0",
+ "0x5",
+ "0x123",
+ "0x123456789012345679adbc",
+ "0xffffffff26f2fc170f69466a74defd8d",
+ "0x100000000000000000000000000000033",
+ "0x11ee12312312940000000000000000000000000002342343"
+ };
+ Fp::setModulo("0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d");
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ mpz_class x(tbl[i]);
+ Fp y(tbl[i]);
+ std::string xs, ys;
+ mcl::Gmp::toStr(xs, x, 16);
+ y.toStr(ys, 16);
+ CYBOZU_TEST_EQUAL(xs, ys);
+ }
+}
+
+CYBOZU_TEST_AUTO(binaryRepl)
+{
+ const struct {
+ const char *s;
+ size_t n;
+ uint32_t v[6];
+ } tbl[] = {
+ { "0", 0, { 0, 0, 0, 0, 0 } },
+ { "1234", 1, { 1234, 0, 0, 0, 0 } },
+ { "0xaabbccdd12345678", 2, { 0x12345678, 0xaabbccdd, 0, 0, 0 } },
+ { "0x11112222333344445555666677778888", 4, { 0x77778888, 0x55556666, 0x33334444, 0x11112222, 0 } },
+ { "0x9911112222333344445555666677778888", 5, { 0x77778888, 0x55556666, 0x33334444, 0x11112222, 0x99, 0 } },
+ };
+ Fp::setModulo("0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d");
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ Fp x(tbl[i].s);
+ cybozu::BitVector bv;
+ x.appendToBitVec(bv);
+ CYBOZU_TEST_EQUAL(bv.size(), Fp::getModBitLen());
+ CYBOZU_TEST_EQUAL(bv.size(), Fp::getBitVecSize());
+ const Fp::BlockType *block = bv.getBlock();
+ if (sizeof(Fp::BlockType) == 4) {
+ CYBOZU_TEST_EQUAL_ARRAY(block, tbl[i].v, tbl[i].n);
+ } else {
+ const size_t n = (tbl[i].n + 1) / 2;
+ for (size_t j = 0; j < n; j++) {
+ uint64_t v = (uint64_t(tbl[i].v[j * 2 + 1]) << 32) | tbl[i].v[j * 2];
+ CYBOZU_TEST_EQUAL(block[j], v);
+ }
+ }
+ Fp y;
+ y.fromBitVec(bv);
+ CYBOZU_TEST_EQUAL(x, y);
+ }
+}
+#endif
+
+#ifdef NDEBUG
+void benchSub(const char *pStr, const char *xStr, const char *yStr)
+ try
+{
+ Fp::setModulo(pStr);
+ Fp x(xStr);
+ Fp y(yStr);
+
+ CYBOZU_BENCH("add", Fp::add, x, x, x);
+ CYBOZU_BENCH("sub", Fp::sub, x, x, y);
+ CYBOZU_BENCH("mul", Fp::mul, x, x, x);
+ CYBOZU_BENCH("square", Fp::square, x, x);
+ CYBOZU_BENCH("inv", x += y;Fp::inv, x, x); // avoid same jmp
+ CYBOZU_BENCH("div", x += y;Fp::div, x, y, x);
+ puts("");
+} catch (std::exception& e) {
+ printf("ERR %s\n", e.what());
+}
+
+// square 76clk@sandy
+CYBOZU_TEST_AUTO(bench3)
+{
+ const char *pStr = "0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d";
+ const char *xStr = "0x148094810948190412345678901234567900342423332197";
+ const char *yStr = "0x7fffffffffffffffffffffe26f2fc170f69466a74defd8d";
+ benchSub(pStr, xStr, yStr);
+}
+
+CYBOZU_TEST_AUTO(bench4)
+{
+ const char *pStr = "0x2523648240000001ba344d80000000086121000000000013a700000000000013";
+ const char *xStr = "0x1480948109481904123456789234234242423424201234567900342423332197";
+ const char *yStr = "0x151342342342341517fffffffffffffffffffffe26f2fc170f69466a74defd8d";
+ benchSub(pStr, xStr, yStr);
+}
+
+CYBOZU_TEST_AUTO(bench6)
+{
+ const char *pStr = "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff";
+ const char *xStr = "0x19481084109481094820948209482094820984290482212345678901234567900342308472047204720422423332197";
+ const char *yStr = "0x209348209481094820984209842094820948204204243123456789012345679003423084720472047204224233321972";
+ benchSub(pStr, xStr, yStr);
+}
+
+CYBOZU_TEST_AUTO(bench9)
+{
+ const char *pStr = "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff";
+ const char *xStr = "0x2908209582095820941098410948109482094820984209840294829049240294242498540975555312345678901234567900342308472047204720422423332197";
+ const char *yStr = "0x3948384209834029834092384204920349820948205872380573205782385729385729385723985837ffffffffffffffffffffffe26f2fc170f69466a74defd8d";
+ benchSub(pStr, xStr, yStr);
+}
+#endif
diff --git a/test/fp_util_test.cpp b/test/fp_util_test.cpp
new file mode 100644
index 0000000..28d94ed
--- /dev/null
+++ b/test/fp_util_test.cpp
@@ -0,0 +1,191 @@
+#define PUT(x) std::cout << #x "=" << (x) << std::endl
+#include <mcl/fp_util.hpp>
+#include <cybozu/test.hpp>
+
+CYBOZU_TEST_AUTO(toStr16)
+{
+ const struct {
+ uint32_t x[4];
+ size_t n;
+ const char *str;
+ } tbl[] = {
+ { { 0, 0, 0, 0 }, 0, "0" },
+ { { 0x123, 0, 0, 0 }, 1, "123" },
+ { { 0x12345678, 0xaabbcc, 0, 0 }, 2, "aabbcc12345678" },
+ { { 0, 0x12, 0x234a, 0 }, 3, "234a0000001200000000" },
+ { { 1, 2, 0xffffffff, 0x123abc }, 4, "123abcffffffff0000000200000001" },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ std::string str;
+ mcl::fp::toStr16(str, tbl[i].x, tbl[i].n, false);
+ CYBOZU_TEST_EQUAL(str, tbl[i].str);
+ mcl::fp::toStr16(str, tbl[i].x, tbl[i].n, true);
+ CYBOZU_TEST_EQUAL(str, std::string("0x") + tbl[i].str);
+ }
+}
+
+// CYBOZU_TEST_AUTO(toStr2) // QQQ
+// CYBOZU_TEST_AUTO(verifyStr) // QQQ
+
+CYBOZU_TEST_AUTO(fromStr16)
+{
+ const struct {
+ const char *str;
+ uint64_t x[4];
+ } tbl[] = {
+ { "0", { 0, 0, 0, 0 } },
+ { "5", { 5, 0, 0, 0 } },
+ { "123", { 0x123, 0, 0, 0 } },
+ { "123456789012345679adbc", { uint64_t(0x789012345679adbcull), 0x123456, 0, 0 } },
+ { "ffffffff26f2fc170f69466a74defd8d", { uint64_t(0x0f69466a74defd8dull), uint64_t(0xffffffff26f2fc17ull), 0, 0 } },
+ { "100000000000000000000000000000033", { uint64_t(0x0000000000000033ull), 0, 1, 0 } },
+ { "11ee12312312940000000000000000000000000002342343", { uint64_t(0x0000000002342343ull), uint64_t(0x0000000000000000ull), uint64_t(0x11ee123123129400ull), 0 } },
+ { "1234567890abcdefABCDEF123456789aba32134723424242424", { uint64_t(0x2134723424242424ull), uint64_t(0xDEF123456789aba3ull), uint64_t(0x4567890abcdefABCull), 0x123 } },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ const size_t xN = 4;
+ uint64_t x[xN];
+ mcl::fp::fromStr16(x, xN, tbl[i].str, strlen(tbl[i].str));
+ for (size_t j = 0; j < xN; j++) {
+ CYBOZU_TEST_EQUAL(x[j], tbl[i].x[j]);
+ }
+ }
+}
+
+CYBOZU_TEST_AUTO(compareArray)
+{
+ const struct {
+ uint32_t a[4];
+ uint32_t b[4];
+ size_t n;
+ int expect;
+ } tbl[] = {
+ { { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, 0, 0 },
+ { { 1, 0, 0, 0 }, { 0, 0, 0, 0 }, 1, 1 },
+ { { 0, 0, 0, 0 }, { 1, 0, 0, 0 }, 1, -1 },
+ { { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, 1, 0 },
+ { { 3, 1, 1, 0 }, { 2, 1, 1, 0 }, 4, 1 },
+ { { 9, 2, 1, 1 }, { 1, 3, 1, 1 }, 4, -1 },
+ { { 1, 7, 8, 4 }, { 1, 7, 8, 9 }, 3, 0 },
+ { { 1, 7, 8, 4 }, { 1, 7, 8, 9 }, 4, -1 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ int e = mcl::fp::compareArray(tbl[i].a, tbl[i].b, tbl[i].n);
+ CYBOZU_TEST_EQUAL(e, tbl[i].expect);
+ }
+}
+
+struct Rand {
+ std::vector<uint32_t> v;
+ size_t pos;
+ int count;
+ void read(uint32_t *x, size_t n)
+ {
+ if (v.size() < pos + n) throw cybozu::Exception("Rand:get:bad n") << v.size() << pos << n;
+ std::copy(v.begin() + pos, v.begin() + pos + n, x);
+ pos += n;
+ count++;
+ }
+ Rand(const uint32_t *x, size_t n)
+ : pos(0)
+ , count(0)
+ {
+ for (size_t i = 0; i < n; i++) {
+ v.push_back(x[i]);
+ }
+ }
+};
+
+CYBOZU_TEST_AUTO(getRandVal)
+{
+ const size_t rn = 8;
+ const struct {
+ uint32_t r[rn];
+ uint32_t mod[2];
+ size_t bitLen;
+ int count;
+ uint32_t expect[2];
+ } tbl[] = {
+ { { 1, 2, 3, 4, 5, 6, 7, 8 }, { 5, 6 }, 64, 1, { 1, 2 } },
+ { { 0xfffffffc, 0x7, 3, 4, 5, 6, 7, 8 }, { 0xfffffffe, 0x3 }, 34, 1, { 0xfffffffc, 0x3 } },
+ { { 0xfffffffc, 0x7, 3, 4, 5, 6, 7, 8 }, { 0xfffffffb, 0x3 }, 34, 2, { 3, 0 } },
+ { { 2, 3, 5, 7, 4, 3, 0, 3 }, { 1, 0x3 }, 34, 4, { 0, 3 } },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ Rand rg(tbl[i].r, rn);
+ uint32_t out[2];
+ mcl::fp::getRandVal(out, rg, tbl[i].mod, tbl[i].bitLen);
+ CYBOZU_TEST_EQUAL(out[0], tbl[i].expect[0]);
+ CYBOZU_TEST_EQUAL(out[1], tbl[i].expect[1]);
+ CYBOZU_TEST_EQUAL(rg.count, tbl[i].count);
+ }
+}
+
+CYBOZU_TEST_AUTO(shiftLeftOr)
+{
+ const struct {
+ uint32_t x[4];
+ size_t n;
+ size_t shift;
+ uint32_t y;
+ uint32_t z[4];
+ uint32_t ret;
+ } tbl[] = {
+ { { 0x12345678, 0, 0, 0 }, 1, 0, 0, { 0x12345678, 0, 0, 0 }, 0 },
+ { { 0x12345678, 0, 0, 0 }, 1, 1, 0, { 0x2468acf0, 0, 0, 0 }, 0 },
+ { { 0xf2345678, 0, 0, 0 }, 1, 1, 5, { 0xe468acf5, 0, 0, 0 }, 1 },
+ { { 0x12345678, 0x9abcdef0, 0x11112222, 0xffccaaee }, 4, 19, 0x1234, { 0xb3c01234, 0xf78091a2, 0x1114d5e6, 0x57708889 }, 0x7fe65 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ uint32_t z[4];
+ uint32_t ret = mcl::fp::shiftLeftOr(z, tbl[i].x, tbl[i].n, tbl[i].shift, tbl[i].y);
+ CYBOZU_TEST_EQUAL_ARRAY(z, tbl[i].z, tbl[i].n);
+ CYBOZU_TEST_EQUAL(ret, tbl[i].ret);
+ }
+}
+
+CYBOZU_TEST_AUTO(shiftRight)
+{
+ const struct {
+ uint32_t x[4];
+ size_t n;
+ size_t shift;
+ uint32_t z[4];
+ } tbl[] = {
+ { { 0x12345678, 0, 0, 0 }, 4, 0, { 0x12345678, 0, 0, 0 } },
+ { { 0x12345678, 0xaaaabbbb, 0xffeebbcc, 0xfeba9874 }, 4, 1, { 0x891a2b3c, 0x55555ddd, 0x7ff75de6, 0x7f5d4c3a } },
+ { { 0x12345678, 0xaaaabbbb, 0xffeebbcc, 0xfeba9874 }, 4, 18, { 0xaeeec48d, 0xaef32aaa, 0xa61d3ffb, 0x3fae } },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ uint32_t z[4];
+ mcl::fp::shiftRight(z, tbl[i].x, tbl[i].n, tbl[i].shift);
+ CYBOZU_TEST_EQUAL_ARRAY(z, tbl[i].z, tbl[i].n);
+ }
+}
+
+CYBOZU_TEST_AUTO(splitBitVec)
+{
+ uint32_t tbl[] = { 0x12345678, 0xaaaabbbb, 0xffeebbcc };
+ typedef cybozu::BitVectorT<uint32_t> BitVec;
+ typedef std::vector<int> IntVec;
+ BitVec bv;
+ bv.append(tbl, sizeof(tbl) * 8);
+ for (size_t len = bv.size(); len > 0; len--) {
+ bv.resize(len);
+ for (size_t w = 1; w < 18; w++) {
+ IntVec iv;
+ size_t last = mcl::fp::splitBitVec(iv, bv, w);
+ size_t q = len / w;
+ size_t r = len % w;
+ if (r == 0) {
+ r = w;
+ } else {
+ q++;
+ }
+ CYBOZU_TEST_EQUAL(iv.size(), q);
+ BitVec bv2;
+ mcl::fp::concatBitVec(bv2, iv, w, last);
+ CYBOZU_TEST_ASSERT(bv == bv2);
+ }
+ }
+}
diff --git a/test/mk32.sh b/test/mk32.sh
new file mode 100644
index 0000000..4d5f607
--- /dev/null
+++ b/test/mk32.sh
@@ -0,0 +1 @@
+g++ -O3 -march=native base_test.cpp ../src/x86.s -m32 -I ~/32/include/ -I ../include/ -I ../../xbyak/ -I ../../cybozulib/include ~/32/lib/libgmp.a ~/32/lib/libgmpxx.a -I ~/32/lib -DNDEBUG
diff --git a/test/mont_fp_test.cpp b/test/mont_fp_test.cpp
new file mode 100644
index 0000000..6e13e4d
--- /dev/null
+++ b/test/mont_fp_test.cpp
@@ -0,0 +1,809 @@
+#define PUT(x) std::cout << #x "=" << (x) << std::endl
+#include <cybozu/test.hpp>
+#include <cybozu/benchmark.hpp>
+#include <time.h>
+
+#define USE_MONT_FP
+#include <mcl/fp.hpp>
+typedef mcl::FpT<> Zn;
+typedef mcl::FpT<> MontFp3;
+typedef mcl::FpT<> MontFp4;
+typedef mcl::FpT<> MontFp6;
+typedef mcl::FpT<> MontFp9;
+
+struct Montgomery {
+ typedef mcl::Gmp::BlockType BlockType;
+ mpz_class p_;
+ mpz_class R_; // (1 << (pn_ * 64)) % p
+ mpz_class RR_; // (R * R) % p
+ BlockType pp_; // p * pp = -1 mod M = 1 << 64
+ size_t pn_;
+ Montgomery() {}
+ explicit Montgomery(const mpz_class& p)
+ {
+ p_ = p;
+ pp_ = mcl::montgomery::getCoff(mcl::Gmp::getBlock(p, 0));
+ pn_ = mcl::Gmp::getBlockSize(p);
+ R_ = 1;
+ R_ = (R_ << (pn_ * 64)) % p_;
+ RR_ = (R_ * R_) % p_;
+ }
+
+ void toMont(mpz_class& x) const { mul(x, x, RR_); }
+ void fromMont(mpz_class& x) const { mul(x, x, 1); }
+
+ void mul(mpz_class& z, const mpz_class& x, const mpz_class& y) const
+ {
+#if 0
+ const size_t ySize = mcl::Gmp::getBlockSize(y);
+ mpz_class c = x * mcl::Gmp::getBlock(y, 0);
+ BlockType q = mcl::Gmp::getBlock(c, 0) * pp_;
+ c += p_ * q;
+ c >>= sizeof(BlockType) * 8;
+ for (size_t i = 1; i < pn_; i++) {
+ if (i < ySize) {
+ c += x * mcl::Gmp::getBlock(y, i);
+ }
+ BlockType q = mcl::Gmp::getBlock(c, 0) * pp_;
+ c += p_ * q;
+ c >>= sizeof(BlockType) * 8;
+ }
+ if (c >= p_) {
+ c -= p_;
+ }
+ z = c;
+#else
+ z = x * y;
+ for (size_t i = 0; i < pn_; i++) {
+ BlockType q = mcl::Gmp::getBlock(z, 0) * pp_;
+ z += p_ * (mp_limb_t)q;
+ z >>= sizeof(BlockType) * 8;
+ }
+ if (z >= p_) {
+ z -= p_;
+ }
+#endif
+ }
+};
+
+template<class T>
+mpz_class toGmp(const T& x)
+{
+ std::string str = x.toStr();
+ mpz_class t;
+ mcl::Gmp::fromStr(t, str);
+ return t;
+}
+
+template<class T>
+std::string toStr(const T& x)
+{
+ std::ostringstream os;
+ os << x;
+ return os.str();
+}
+
+template<class T, class U>
+T castTo(const U& x)
+{
+ T t;
+ t.fromStr(toStr(x));
+ return t;
+}
+
+template<class T>
+void putRaw(const T& x)
+{
+ const uint64_t *p = x.getInnerValue();
+ for (size_t i = 0, n = T::BlockSize; i < n; i++) {
+ printf("%016llx", p[n - 1 - i]);
+ }
+ printf("\n");
+}
+
+template<size_t N>
+void put(const uint64_t (&x)[N])
+{
+ for (size_t i = 0; i < N; i++) {
+ printf("%016llx", x[N - 1 - i]);
+ }
+ printf("\n");
+}
+
+template<size_t N>
+struct Test {
+ typedef mcl::FpT<> Fp;
+ mpz_class m;
+ void run(const char *p)
+ {
+ Fp::setModulo(p);
+ m = p;
+ Zn::setModulo(p);
+ edge();
+ cstr();
+ toStr();
+ fromStr();
+ stream();
+ conv();
+ compare();
+ modulo();
+ ope();
+ cvtInt();
+ power();
+ neg_power();
+ power_Zn();
+ setRaw();
+ set64bit();
+ getRaw();
+ binaryExp();
+ bench();
+ }
+ void cstr()
+ {
+ const struct {
+ const char *str;
+ int val;
+ } tbl[] = {
+ { "0", 0 },
+ { "1", 1 },
+ { "123", 123 },
+ { "0x123", 0x123 },
+ { "0b10101", 21 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ // string cstr
+ Fp x(tbl[i].str);
+ CYBOZU_TEST_EQUAL(x, tbl[i].val);
+
+ // int cstr
+ Fp y(tbl[i].val);
+ CYBOZU_TEST_EQUAL(y, x);
+
+ // copy cstr
+ Fp z(x);
+ CYBOZU_TEST_EQUAL(z, x);
+
+ // assign int
+ Fp w;
+ w = tbl[i].val;
+ CYBOZU_TEST_EQUAL(w, x);
+
+ // assign self
+ Fp u;
+ u = w;
+ CYBOZU_TEST_EQUAL(u, x);
+
+ // conv
+ std::ostringstream os;
+ os << tbl[i].val;
+
+ std::string str;
+ x.toStr(str);
+ CYBOZU_TEST_EQUAL(str, os.str());
+ }
+ const struct {
+ const char *str;
+ int val;
+ } tbl2[] = {
+ { "-123", 123 },
+ { "-0x123", 0x123 },
+ { "-0b10101", 21 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl2); i++) {
+ Fp x(tbl2[i].str);
+ x = -x;
+ CYBOZU_TEST_EQUAL(x, tbl2[i].val);
+ }
+ }
+ void toStr()
+ {
+ Fp x(0);
+ std::string str;
+ str = x.toStr();
+ CYBOZU_TEST_EQUAL(str, "0");
+ str = x.toStr(2, true);
+ CYBOZU_TEST_EQUAL(str, "0");
+ str = x.toStr(2, false);
+ CYBOZU_TEST_EQUAL(str, "0");
+ str = x.toStr(16, true);
+ CYBOZU_TEST_EQUAL(str, "0");
+ str = x.toStr(16, false);
+ CYBOZU_TEST_EQUAL(str, "0");
+
+ x = 123;
+ str = x.toStr();
+ CYBOZU_TEST_EQUAL(str, "123");
+ str = x.toStr(2, true);
+ CYBOZU_TEST_EQUAL(str, "0b1111011");
+ str = x.toStr(2, false);
+ CYBOZU_TEST_EQUAL(str, "1111011");
+ str = x.toStr(16, true);
+ CYBOZU_TEST_EQUAL(str, "0x7b");
+ str = x.toStr(16, false);
+ CYBOZU_TEST_EQUAL(str, "7b");
+
+ {
+ std::ostringstream os;
+ os << x;
+ CYBOZU_TEST_EQUAL(os.str(), "123");
+ }
+ {
+ std::ostringstream os;
+ os << std::hex << std::showbase << x;
+ CYBOZU_TEST_EQUAL(os.str(), "0x7b");
+ }
+ {
+ std::ostringstream os;
+ os << std::hex << x;
+ CYBOZU_TEST_EQUAL(os.str(), "7b");
+ }
+ }
+
+ void fromStr()
+ {
+ const struct {
+ const char *in;
+ int out;
+ int base;
+ } tbl[] = {
+ { "100", 100, 0 }, // set base = 10 if base = 0
+ { "100", 4, 2 },
+ { "100", 256, 16 },
+ { "0b100", 4, 0 },
+ { "0b100", 4, 2 },
+ { "0x100", 256, 0 },
+ { "0x100", 256, 16 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ Fp x;
+ x.fromStr(tbl[i].in, tbl[i].base);
+ CYBOZU_TEST_EQUAL(x, tbl[i].out);
+ }
+ // conflict prefix with base
+ Fp x;
+ CYBOZU_TEST_EXCEPTION(x.fromStr("0b100", 16), cybozu::Exception);
+ CYBOZU_TEST_EXCEPTION(x.fromStr("0x100", 2), cybozu::Exception);
+ }
+
+ void stream()
+ {
+ const struct {
+ const char *in;
+ int out10;
+ int out16;
+ } tbl[] = {
+ { "100", 100, 256 }, // set base = 10 if base = 0
+ { "0x100", 256, 256 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ {
+ std::istringstream is(tbl[i].in);
+ Fp x;
+ is >> x;
+ CYBOZU_TEST_EQUAL(x, tbl[i].out10);
+ }
+ {
+ std::istringstream is(tbl[i].in);
+ Fp x;
+ is >> std::hex >> x;
+ CYBOZU_TEST_EQUAL(x, tbl[i].out16);
+ }
+ }
+ std::istringstream is("0b100");
+ Fp x;
+ CYBOZU_TEST_EXCEPTION(is >> std::hex >> x, cybozu::Exception);
+ }
+ void edge()
+ {
+#if 0
+ std::cout << std::hex;
+ /*
+ real mont
+ 0 0
+ 1 R^-1
+ R 1
+ -1 -R^-1
+ -R -1
+ */
+ mpz_class t = 1;
+ const mpz_class R = (t << (N * 64)) % m;
+ const mpz_class tbl[] = {
+ 0, 1, R, m - 1, m - R
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ const mpz_class& x = tbl[i];
+ for (size_t j = i; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) {
+ const mpz_class& y = tbl[j];
+ mpz_class z = (x * y) % m;
+ Fp xx, yy;
+ Fp::toMont(xx, x);
+ Fp::toMont(yy, y);
+ Fp zz = xx * yy;
+ mpz_class t;
+ Fp::fromMont(t, zz);
+ CYBOZU_TEST_EQUAL(z, t);
+ }
+ }
+ std::cout << std::dec;
+#endif
+ }
+
+ void conv()
+ {
+ const char *bin = "0b100100011010001010110011110001001000000010010001101000101011001111000100100000001001000110100010101100111100010010000";
+ const char *hex = "0x123456789012345678901234567890";
+ const char *dec = "94522879687365475552814062743484560";
+ Fp b(bin);
+ Fp h(hex);
+ Fp d(dec);
+ CYBOZU_TEST_EQUAL(b, h);
+ CYBOZU_TEST_EQUAL(b, d);
+
+ std::string str;
+ b.toStr(str, 2, true);
+ CYBOZU_TEST_EQUAL(str, bin);
+ b.toStr(str);
+ CYBOZU_TEST_EQUAL(str, dec);
+ b.toStr(str, 16, true);
+ CYBOZU_TEST_EQUAL(str, hex);
+ }
+
+ void compare()
+ {
+ const struct {
+ int lhs;
+ int rhs;
+ int cmp;
+ } tbl[] = {
+ { 0, 0, 0 },
+ { 1, 0, 1 },
+ { 0, 1, -1 },
+ { -1, 0, 1 }, // m-1, 0
+ { 0, -1, -1 }, // 0, m-1
+ { 123, 456, -1 },
+ { 456, 123, 1 },
+ { 5, 5, 0 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ const Fp x(tbl[i].lhs);
+ const Fp y(tbl[i].rhs);
+ const int cmp = tbl[i].cmp;
+ if (cmp == 0) {
+ CYBOZU_TEST_EQUAL(x, y);
+ } else {
+ CYBOZU_TEST_ASSERT(x != y);
+ }
+ }
+ }
+
+ void modulo()
+ {
+ std::ostringstream ms;
+ ms << m;
+
+ std::string str;
+ Fp::getModulo(str);
+ CYBOZU_TEST_EQUAL(str, ms.str());
+ }
+
+ void ope()
+ {
+ const struct {
+ Zn x;
+ Zn y;
+ Zn add; // x + y
+ Zn sub; // x - y
+ Zn mul; // x * y
+ Zn sqr; // x * x
+ } tbl[] = {
+ { 0, 1, 1, -1, 0, 0 },
+ { 9, 7, 16, 2, 63, 81 },
+ { 10, 13, 23, -3, 130, 100 },
+ { 2000, -1000, 1000, 3000, -2000000, 4000000 },
+ { -12345, -9999, -(12345 + 9999), - 12345 + 9999, 12345 * 9999, 12345 * 12345 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ const Fp x(castTo<Fp>(tbl[i].x));
+ const Fp y(castTo<Fp>(tbl[i].y));
+ Fp z;
+ Fp::add(z, x, y);
+ CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].add));
+ Fp::sub(z, x, y);
+ CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].sub));
+ Fp::mul(z, x, y);
+ CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].mul));
+
+ Fp r;
+ Fp::inv(r, y);
+ Zn rr = 1 / tbl[i].y;
+ CYBOZU_TEST_EQUAL(r, castTo<Fp>(rr));
+ Fp::mul(z, z, r);
+ CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].x));
+
+ z = x + y;
+ CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].add));
+ z = x - y;
+ CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].sub));
+ z = x * y;
+ CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].mul));
+ Fp::square(z, x);
+ CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].sqr));
+
+ z = x / y;
+ z *= y;
+ CYBOZU_TEST_EQUAL(z, castTo<Fp>(tbl[i].x));
+ }
+ }
+ void cvtInt()
+ {
+#if 0
+ Fp x;
+ x = 12345;
+ uint64_t y = x.cvtInt();
+ CYBOZU_TEST_EQUAL(y, 12345u);
+ x.fromStr("123456789012342342342342342");
+ CYBOZU_TEST_EXCEPTION(x.cvtInt(), cybozu::Exception);
+ bool err = false;
+ CYBOZU_TEST_NO_EXCEPTION(x.cvtInt(&err));
+ CYBOZU_TEST_ASSERT(err);
+#endif
+ }
+
+ void power()
+ {
+ Fp x, y, z;
+ x = 12345;
+ z = 1;
+ for (int i = 0; i < 100; i++) {
+ Fp::power(y, x, i);
+ CYBOZU_TEST_EQUAL(y, z);
+ z *= x;
+ }
+ }
+
+ void neg_power()
+ {
+ Fp x, y, z;
+ x = 12345;
+ z = 1;
+ Fp rx = 1 / x;
+ for (int i = 0; i < 100; i++) {
+ Fp::power(y, x, -i);
+ CYBOZU_TEST_EQUAL(y, z);
+ z *= rx;
+ }
+ }
+
+ void power_Zn()
+ {
+ Fp x, y, z;
+ x = 12345;
+ z = 1;
+ for (int i = 0; i < 100; i++) {
+ Fp::power(y, x, Zn(i));
+ CYBOZU_TEST_EQUAL(y, z);
+ z *= x;
+ }
+ }
+
+ void setRaw()
+ {
+ // QQQ
+#if 0
+ char b1[] = { 0x56, 0x34, 0x12 };
+ Fp x;
+ x.setRaw(b1, 3);
+ CYBOZU_TEST_EQUAL(x, 0x123456);
+ int b2[] = { 0x12, 0x34 };
+ x.setRaw(b2, 2);
+ CYBOZU_TEST_EQUAL(x, Fp("0x3400000012"));
+#endif
+ }
+ void binaryExp()
+ {
+ puts("binaryExp");
+ for (int i = 2; i < 7; i++) {
+ mpz_class g = m / i;
+ Fp x, y;
+// Fp::toMont(x, g);
+ x.fromGmp(g);
+ cybozu::BitVector bv;
+ x.appendToBitVec(bv);
+ uint64_t buf[N];
+ mcl::Gmp::getRaw(buf, N, g);
+ CYBOZU_TEST_EQUAL(bv.getBlockSize(), N);
+ CYBOZU_TEST_EQUAL(bv.size(), Fp::getModBitLen());
+ CYBOZU_TEST_EQUAL(bv.size(), Fp::getBitVecSize());
+ const uint64_t *p = bv.getBlock();
+ CYBOZU_TEST_EQUAL_ARRAY(p, buf, N);
+ }
+ const mpz_class yy("0x1255556666777788881111222233334444");
+ if (yy > m) {
+ return;
+ }
+ Fp y;
+// Fp::toMont(y, yy);
+ y.fromGmp(yy);
+ uint64_t b1[N] = { uint64_t(0x1111222233334444ull), uint64_t(0x5555666677778888ull), 0x12 };
+ Fp x;
+ cybozu::BitVector bv;
+ bv.append(b1, Fp::getModBitLen());
+ x.fromBitVec(bv);
+ CYBOZU_TEST_EQUAL(x, y);
+ bv.clear();
+ x.appendToBitVec(bv);
+ const uint64_t *b2 = bv.getBlock();
+ CYBOZU_TEST_EQUAL_ARRAY(b1, b2, N);
+ }
+
+ void set64bit()
+ {
+ const struct {
+ const char *p;
+ uint64_t i;
+ } tbl[] = {
+ { "0x1234567812345678", uint64_t(0x1234567812345678ull) },
+ { "0xaaaaaaaaaaaaaaaa", uint64_t(0xaaaaaaaaaaaaaaaaull) },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ Fp x(tbl[i].p);
+ Fp y(tbl[i].i);
+ CYBOZU_TEST_EQUAL(x, y);
+ }
+ }
+
+ void getRaw()
+ {
+ const struct {
+ const char *s;
+ uint32_t v[4];
+ size_t vn;
+ } tbl[] = {
+ { "0", { 0, 0, 0, 0 }, 1 },
+ { "1234", { 1234, 0, 0, 0 }, 1 },
+ { "0xaabbccdd12345678", { 0x12345678, 0xaabbccdd, 0, 0 }, 2 },
+ { "0x11112222333344445555666677778888", { 0x77778888, 0x55556666, 0x33334444, 0x11112222 }, 4 },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ mpz_class x(tbl[i].s);
+ const size_t bufN = 8;
+ uint32_t buf[bufN];
+ size_t n = mcl::Gmp::getRaw(buf, bufN, x);
+ CYBOZU_TEST_EQUAL(n, tbl[i].vn);
+ for (size_t j = 0; j < n; j++) {
+ CYBOZU_TEST_EQUAL(buf[j], tbl[i].v[j]);
+ }
+ }
+ }
+ void bench()
+ {
+ Fp x("-123456789");
+ Fp y("-0x7ffffffff");
+ CYBOZU_BENCH("add", operator+, x, x);
+ CYBOZU_BENCH("sub", operator-, x, y);
+ CYBOZU_BENCH("mul", operator*, x, x);
+ CYBOZU_BENCH("sqr", Fp::square, x, x);
+ CYBOZU_BENCH("div", y += x; operator/, x, y);
+ }
+};
+
+void customTest(const char *pStr, const char *xStr, const char *yStr)
+{
+#if 0
+ {
+ pStr = "0xfffffffffffffffffffffffffffffffffffffffeffffee37",
+ MontFp3::setModulo(pStr);
+ static uint64_t x[3] = { 1, 0, 0 };
+ uint64_t z[3];
+std::cout<<std::hex;
+ MontFp3::inv(*(MontFp3*)z, *(const MontFp3*)x);
+put(z);
+ exit(1);
+ }
+#endif
+#if 0
+ std::cout << std::hex;
+ uint64_t x[9] = { 0xff7fffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x1ff };
+ uint64_t y[9] = { 0xff7fffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x1ff };
+ uint64_t z1[9], z2[9];
+ MontFp9::setModulo(pStr);
+ MontFp9::fg_.mul_(z2, x, y);
+ put(z2);
+ {
+ puts("C");
+ mpz_class p(pStr);
+ Montgomery mont(p);
+ mpz_class xx, yy;
+ mcl::Gmp::setRaw(xx, x, CYBOZU_NUM_OF_ARRAY(x));
+ mcl::Gmp::setRaw(yy, y, CYBOZU_NUM_OF_ARRAY(y));
+ mpz_class z;
+ mont.mul(z, xx, yy);
+ std::cout << std::hex << z << std::endl;
+ }
+ exit(1);
+#else
+ std::string rOrg, rC, rAsm;
+ Zn::setModulo(pStr);
+ Zn s(xStr), t(yStr);
+ s *= t;
+ rOrg = toStr(s);
+ {
+ puts("C");
+ mpz_class p(pStr);
+ Montgomery mont(p);
+ mpz_class x(xStr), y(yStr);
+ mont.toMont(x);
+ mont.toMont(y);
+ mpz_class z;
+ mont.mul(z, x, y);
+ mont.fromMont(z);
+ rC = toStr(z);
+ }
+
+ puts("asm");
+ MontFp9::setModulo(pStr);
+ MontFp9 x(xStr), y(yStr);
+ x *= y;
+ rAsm = toStr(x);
+ CYBOZU_TEST_EQUAL(rOrg, rC);
+ CYBOZU_TEST_EQUAL(rOrg, rAsm);
+#endif
+}
+
+CYBOZU_TEST_AUTO(customTest)
+{
+ const struct {
+ const char *p;
+ const char *x;
+ const char *y;
+ } tbl[] = {
+ {
+ "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+// "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff",
+// "0xfffffffffffffffffffffffffffffffffffffffeffffee37",
+ "0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe",
+ "0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+ },
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ customTest(tbl[i].p, tbl[i].x, tbl[i].y);
+ }
+}
+
+CYBOZU_TEST_AUTO(test3)
+{
+ Test<3> test;
+ const char *tbl[] = {
+ "0x000000000000000100000000000000000000000000000033", // min prime
+ "0x00000000fffffffffffffffffffffffffffffffeffffac73",
+ "0x0000000100000000000000000001b8fa16dfab9aca16b6b3",
+ "0x000000010000000000000000000000000000000000000007",
+ "0x30000000000000000000000000000000000000000000002b",
+ "0x70000000000000000000000000000000000000000000001f",
+ "0x800000000000000000000000000000000000000000000005",
+ "0xfffffffffffffffffffffffffffffffffffffffeffffee37",
+ "0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d",
+ "0xffffffffffffffffffffffffffffffffffffffffffffff13", // max prime
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ printf("prime=%s\n", tbl[i]);
+ test.run(tbl[i]);
+ }
+}
+
+CYBOZU_TEST_AUTO(test4)
+{
+ Test<4> test;
+ const char *tbl[] = {
+ "0x0000000000000001000000000000000000000000000000000000000000000085", // min prime
+ "0x2523648240000001ba344d80000000086121000000000013a700000000000013",
+ "0x7523648240000001ba344d80000000086121000000000013a700000000000017",
+ "0x800000000000000000000000000000000000000000000000000000000000005f",
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff43", // max prime
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ printf("prime=%s\n", tbl[i]);
+ test.run(tbl[i]);
+ }
+}
+
+CYBOZU_TEST_AUTO(test6)
+{
+ Test<6> test;
+ const char *tbl[] = {
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff",
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ printf("prime=%s\n", tbl[i]);
+ test.run(tbl[i]);
+ }
+}
+
+CYBOZU_TEST_AUTO(test9)
+{
+ Test<9> test;
+ const char *tbl[] = {
+ "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+ };
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ printf("prime=%s\n", tbl[i]);
+ test.run(tbl[i]);
+ }
+}
+
+CYBOZU_TEST_AUTO(toStr16)
+{
+ const char *tbl[] = {
+ "0x0",
+ "0x5",
+ "0x123",
+ "0x123456789012345679adbc",
+ "0xffffffff26f2fc170f69466a74defd8d",
+ "0x100000000000000000000000000000033",
+ "0x11ee12312312940000000000000000000000000002342343"
+ };
+ MontFp3::setModulo("0xffffffffffffffffffffffffffffffffffffffffffffff13");
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ std::string str, str2;
+ MontFp3 x(tbl[i]);
+ x.toStr(str, 16);
+ mpz_class y(tbl[i]);
+ mcl::Gmp::toStr(str2, y, 16);
+ CYBOZU_TEST_EQUAL(str, str2);
+ }
+}
+
+#if 0
+CYBOZU_TEST_AUTO(toStr16bench)
+{
+ const char *tbl[] = {
+ "0x0",
+ "0x5",
+ "0x123",
+ "0x123456789012345679adbc",
+ "0xffffffff26f2fc170f69466a74defd8d",
+ "0x100000000000000000000000000000033",
+ "0x11ee12312312940000000000000000000000000002342343"
+ };
+ const int C = 500000;
+ MontFp3::setModulo("0xffffffffffffffffffffffffffffffffffffffffffffff13");
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ std::string str, str2;
+ MontFp3 x(tbl[i]);
+ CYBOZU_BENCH_C("Mont:toStr", C, x.toStr, str, 16);
+ mpz_class y(tbl[i]);
+ CYBOZU_BENCH_C("Gmp:toStr ", C, mcl::Gmp::toStr, str2, y, 16);
+ str2.insert(0, "0x");
+ CYBOZU_TEST_EQUAL(str, str2);
+ }
+}
+
+CYBOZU_TEST_AUTO(fromStr16bench)
+{
+ const char *tbl[] = {
+ "0x0",
+ "0x5",
+ "0x123",
+ "0x123456789012345679adbc",
+ "0xffffffff26f2fc170f69466a74defd8d",
+ "0x100000000000000000000000000000033",
+ "0x11ee12312312940000000000000000000000000002342343"
+ };
+ const int C = 500000;
+ MontFp3::setModulo("0xffffffffffffffffffffffffffffffffffffffffffffff13");
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ std::string str = tbl[i];
+ MontFp3 x;
+ CYBOZU_BENCH_C("Mont:fromStr", C, x.fromStr, str);
+
+ mpz_class y;
+ str.erase(0, 2);
+ CYBOZU_BENCH_C("Gmp:fromStr ", C, mcl::Gmp::fromStr, y, str, 16);
+ x.toStr(str, 16);
+ std::string str2;
+ mcl::Gmp::toStr(str2, y, 16);
+ str2.insert(0, "0x");
+ CYBOZU_TEST_EQUAL(str, str2);
+ }
+}
+#endif
diff --git a/test/proj/ec_test/ec_test.vcxproj b/test/proj/ec_test/ec_test.vcxproj
new file mode 100644
index 0000000..b141754
--- /dev/null
+++ b/test/proj/ec_test/ec_test.vcxproj
@@ -0,0 +1,88 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{46B6E88E-739A-406B-9F68-BC46C5950FA3}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>ec_test</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v110</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v110</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(SolutionDir)common.props" />
+ <Import Project="$(SolutionDir)debug.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(SolutionDir)common.props" />
+ <Import Project="$(SolutionDir)release.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="$(SolutionDir)test\ec_test.cpp" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/test/proj/fp_test/fp_test.vcxproj b/test/proj/fp_test/fp_test.vcxproj
new file mode 100644
index 0000000..a77dc21
--- /dev/null
+++ b/test/proj/fp_test/fp_test.vcxproj
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{51266DE6-B57B-4AE3-B85C-282F170E1728}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>fp_test</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v110</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v110</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(SolutionDir)common.props" />
+ <Import Project="$(SolutionDir)debug.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(SolutionDir)common.props" />
+ <Import Project="$(SolutionDir)release.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)../xbyak/;$(SolutionDir)../cybozulib/include;$(SolutionDir)../cybozulib_ext/mpir/include;$(SolutionDir)include</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(SolutionDir)../xbyak/;$(SolutionDir)../cybozulib/include;$(SolutionDir)../cybozulib_ext/mpir/include;$(SolutionDir)include</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="$(SolutionDir)test\\fp_test.cpp" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/test/sq_test.cpp b/test/sq_test.cpp
new file mode 100644
index 0000000..6174be6
--- /dev/null
+++ b/test/sq_test.cpp
@@ -0,0 +1,21 @@
+#include <mcl/gmp_util.hpp>
+#include <cybozu/test.hpp>
+#include <iostream>
+
+CYBOZU_TEST_AUTO(sqrt)
+{
+ const int tbl[] = { 3, 5, 7, 11, 13, 17, 19, 257, 997, 1031 };
+ mcl::SquareRoot sq;
+ for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+ const mpz_class p = tbl[i];
+ sq.set(p);
+ for (mpz_class a = 1; a < p; a++) {
+ mpz_class x;
+ if (sq.get(x, a)) {
+ mpz_class y;
+ y = (x * x) % p;
+ CYBOZU_TEST_EQUAL(a, y);
+ }
+ }
+ }
+}