use AddPre

author: MITSUNARI Shigeo <herumi@nifty.com> 2016-10-04 09:34:19 +0800
committer: MITSUNARI Shigeo <herumi@nifty.com> 2016-10-04 09:34:19 +0800
commit: 7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5 (patch)
tree: 646666f2fd6c862383d3f18276d2845e4aa96a30
parent: 28e818641bc7665f9da7b21112b568e74ff37362 (diff)
download: dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.tar.gz
dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.tar.zst
dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.zip
6 files changed, 114 insertions, 52 deletions
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 2694ee4..f318570 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -40,6 +40,8 @@ typedef void (*void3u)(Unit*, const Unit*, const Unit*);
 typedef void (*void4u)(Unit*, const Unit*, const Unit*, const Unit*);
 typedef int (*int2u)(Unit*, const Unit*);
 
+typedef Unit (*u3u)(Unit*, const Unit*, const Unit*);
+
 struct Block {
 	const Unit *p; // pointer to original FpT.v_
 	size_t n;
diff --git a/sample/large.cpp b/sample/large.cpp
index b363440..72de4a2 100644
--- a/sample/large.cpp
+++ b/sample/large.cpp
@@ -23,19 +23,19 @@ void mulPre768(Unit *pz, const Unit *px, const Unit *py)
 		ad + bc = (a + b)(c + d) - ac - bd
 	*/
 	const size_t H = N / 2;
-	low_mul<H>(pz, px, py); // bd
-	low_mul<H>(pz + N, px + H, py + H); // ac
+	low_mul_G<H>(pz, px, py); // bd
+	low_mul_G<H>(pz + N, px + H, py + H); // ac
 	Unit a_b[H + 1];
 	Unit c_d[H + 1];
-	a_b[H] = low_add<H>(a_b, px, px + H); // a + b
-	c_d[H] = low_add<H>(c_d, py, py + H); // c + d
+	a_b[H] = low_addNC_G<H>(a_b, px, px + H); // a + b
+	c_d[H] = low_addNC_G<H>(c_d, py, py + H); // c + d
 	Unit work[N + H] = {};
-	low_mul<H>(work, a_b, c_d);
-	if (c_d[H]) low_add<H + 1>(work + H, work + H, c_d);
-	if (a_b[H]) low_add<H + 1>(work + H, work + H, a_b);
-	work[N] -= low_sub<H>(work, work, pz);
-	work[N] -= low_sub<H>(work, work, pz + N);
-	low_add<H + N>(pz + H, pz + H, work);
+	low_mul_G<H>(work, a_b, c_d);
+	if (c_d[H]) low_addNC_G<H + 1>(work + H, work + H, c_d);
+	if (a_b[H]) low_addNC_G<H + 1>(work + H, work + H, a_b);
+	work[N] -= low_subNC_G<H>(work, work, pz);
+	work[N] -= low_subNC_G<H>(work, work, pz + N);
+	low_addNC_G<H + N>(pz + H, pz + H, work);
 }
 void testMul()
 {
@@ -44,7 +44,7 @@ void testMul()
 		ux[i] = -i * i + 5;
 		uy[i] = -i * i + 9;
 	}
-	low_mul<12>(a, ux, uy);
+	low_mul_G<12>(a, ux, uy);
 	mulPre768(b, ux, uy);
 	for (size_t i = 0; i < N * 2; i++) {
 		if (a[i] != b[i]) {
diff --git a/src/fp.cpp b/src/fp.cpp
index 451c484..ac533ce 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -138,19 +138,19 @@ struct OpeFunc {
 	}
 	static inline void fp_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
 	{
-		if (low_add<N>(z, x, y)) {
-			low_sub<N>(z, z, p);
+		if (AddPre<N, GTag>::f(z, x, y)) {
+			low_subNC_G<N>(z, z, p);
 			return;
 		}
 		Unit tmp[N];
-		if (low_sub<N>(tmp, z, p) == 0) {
+		if (low_subNC_G<N>(tmp, z, p) == 0) {
 			memcpy(z, tmp, sizeof(tmp));
 		}
 	}
 	static inline void fp_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
 	{
-		if (low_sub<N>(z, x, y)) {
-			low_add<N>(z, z, p);
+		if (low_subNC_G<N>(z, x, y)) {
+			AddPre<N, GTag>::f(z, z, p);
 		}
 	}
 	/*
@@ -158,54 +158,54 @@ struct OpeFunc {
 	*/
 	static inline void fpDbl_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
 	{
-		if (low_add<N * 2>(z, x, y)) {
-			low_sub<N>(z + N, z + N, p);
+		if (AddPre<N * 2, GTag>::f(z, x, y)) {
+			low_subNC_G<N>(z + N, z + N, p);
 			return;
 		}
 		Unit tmp[N];
-		if (low_sub<N>(tmp, z + N, p) == 0) {
+		if (low_subNC_G<N>(tmp, z + N, p) == 0) {
 			memcpy(z + N, tmp, sizeof(tmp));
 		}
 	}
 	static inline void fpDbl_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
 	{
-		if (low_sub<N * 2>(z, x, y)) {
-			low_add<N>(z + N, z + N, p);
+		if (low_subNC_G<N * 2>(z, x, y)) {
+			AddPre<N, GTag>::f(z + N, z + N, p);
 		}
 	}
 	// z[N] <- x[N] + y[N] without carry
 	static inline void fp_addNCC(Unit *z, const Unit *x, const Unit *y)
 	{
-		low_add<N>(z, x, y);
+		AddPre<N, GTag>::f(z, x, y);
 	}
 	static inline void fp_subNCC(Unit *z, const Unit *x, const Unit *y)
 	{
-		low_sub<N>(z, x, y);
+		low_subNC_G<N>(z, x, y);
 	}
 	// z[N + 1] <- x[N] * y
 	static inline void fp_mul_UnitPreC(Unit *z, const Unit *x, Unit y)
 	{
-		low_mul_Unit<N>(z, x, y);
+		low_mul_Unit_G<N>(z, x, y);
 	}
 	// z[N * 2] <- x[N] * y[N]
 	static inline void fpDbl_mulPreC(Unit *z, const Unit *x, const Unit *y)
 	{
-		low_mul<N>(z, x, y);
+		low_mul_G<N>(z, x, y);
 	}
 	// y[N * 2] <- x[N]^2
 	static inline void fpDbl_sqrPreC(Unit *y, const Unit *x)
 	{
-		low_sqr<N>(y, x);
+		low_sqr_G<N>(y, x);
 	}
 	// y[N] <- x[N + 1] mod p[N]
 	static inline void fpN1_modC(Unit *y, const Unit *x, const Unit *p)
 	{
-		low_N1_mod<N>(y, x, p);
+		low_N1_mod_G<N>(y, x, p);
 	}
 	// y[N] <- x[N * 2] mod p[N]
 	static inline void fpDbl_modC(Unit *y, const Unit *x, const Unit *p)
 	{
-		low_mod<N>(y, x, p);
+		low_mod_G<N>(y, x, p);
 	}
 	// z[N] <- mont(x[N], y[N])
 	static inline void fp_mulMontC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
@@ -218,25 +218,25 @@ struct OpeFunc {
 		const Unit rp = p[-1];
 		Unit buf[N * 2 + 2];
 		Unit *c = buf;
-		low_mul_Unit<N>(c, x, y[0]); // x * y[0]
+		low_mul_Unit_G<N>(c, x, y[0]); // x * y[0]
 		Unit q = c[0] * rp;
 		Unit t[N + 2];
-		low_mul_Unit<N>(t, p, q); // p * q
+		low_mul_Unit_G<N>(t, p, q); // p * q
 		t[N + 1] = 0; // always zero
-		c[N + 1] = low_add<N + 1>(c, c, t);
+		c[N + 1] = AddPre<N + 1, GTag>::f(c, c, t);
 		c++;
 		for (size_t i = 1; i < N; i++) {
-			low_mul_Unit<N>(t, x, y[i]);
-			c[N + 1] = low_add<N + 1>(c, c, t);
+			low_mul_Unit_G<N>(t, x, y[i]);
+			c[N + 1] = AddPre<N + 1, GTag>::f(c, c, t);
 			q = c[0] * rp;
-			low_mul_Unit<N>(t, p, q);
-			low_add<N + 2>(c, c, t);
+			low_mul_Unit_G<N>(t, p, q);
+			AddPre<N + 2, GTag>::f(c, c, t);
 			c++;
 		}
 		if (c[N]) {
-			low_sub<N>(z, c, p);
+			low_subNC_G<N>(z, c, p);
 		} else {
-			if (low_sub<N>(z, c, p)) {
+			if (low_subNC_G<N>(z, c, p)) {
 				memcpy(z, c, N * sizeof(Unit));
 			}
 		}
@@ -254,20 +254,20 @@ struct OpeFunc {
 		clearArray(t, N + 1, N * 2);
 		Unit *c = buf;
 		Unit q = xy[0] * rp;
-		low_mul_Unit<N>(t, p, q);
-		buf[N * 2] = low_add<N * 2>(buf, xy, t);
+		low_mul_Unit_G<N>(t, p, q);
+		buf[N * 2] = AddPre<N * 2, GTag>::f(buf, xy, t);
 		c++;
 		for (size_t i = 1; i < N; i++) {
 			q = c[0] * rp;
-			low_mul_Unit<N>(t, p, q);
+			low_mul_Unit_G<N>(t, p, q);
 			// QQQ
 			mpn_add_n((mp_limb_t*)c, (const mp_limb_t*)c, (const mp_limb_t*)t, N * 2 + 1 - i);
 			c++;
 		}
 		if (c[N]) {
-			low_sub<N>(z, c, p);
+			low_subNC_G<N>(z, c, p);
 		} else {
-			if (low_sub<N>(z, c, p)) {
+			if (low_subNC_G<N>(z, c, p)) {
 				memcpy(z, c, N * sizeof(Unit));
 			}
 		}
@@ -296,6 +296,32 @@ struct OpeFunc {
 		fpDbl_sqrPreC(xx, x);
 		fpDbl_modMontC(y, xx, p);
 	}
+#if 0 //#ifdef MCL_USE_LLVM
+	static inline void fp_mul_UnitL(Unit *z, const Unit *x, Unit y, const Unit *p)
+	{
+		Unit xy[N + 1];
+		fp_mul_UnitPreC(xy, x, y);
+		fpN1_modC(z, xy, p);
+	}
+	static inline void fp_mulL(Unit *z, const Unit *x, const Unit *y, const Unit *p)
+	{
+		Unit xy[N * 2];
+		fpDbl_mulPreC(xy, x, y);
+		fpDbl_modC(z, xy, p);
+	}
+	static inline void fp_sqrL(Unit *y, const Unit *x, const Unit *p)
+	{
+		Unit xx[N * 2];
+		fpDbl_sqrPreC(xx, x);
+		fpDbl_modC(y, xx, p);
+	}
+	static inline void fp_sqrMontL(Unit *y, const Unit *x, const Unit *p)
+	{
+		Unit xx[N * 2];
+		fpDbl_sqrPreC(xx, x);
+		fpDbl_modMontC(y, xx, p);
+	}
+#endif
 	static inline void fp_invOpC(Unit *y, const Unit *x, const Op& op)
 	{
 		mpz_class my;
diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp
index e100fde..99763e6 100644
--- a/src/fp_proto.hpp
+++ b/src/fp_proto.hpp
@@ -8,6 +8,25 @@
 */
 #include <mcl/op.hpp>
 
+namespace mcl { namespace fp {
+
+// (carry, z[N]) <- x[N] + y[N]
+template<size_t N, class Tag>class AddPre { static const u3u f; };
+// (carry, z[N]) <- x[N] - y[N]
+template<size_t N, class Tag>class SubPre { static const u3u f; };
+// z[N * 2] <- x[N] * y[N]
+template<size_t N, class Tag>class MulPre { static const void3u f; };
+// z[N * 2] <- x[N] * x[N]
+template<size_t N, class Tag>class SqrPre { static const void2u f; };
+// z[N + 1] <- x[N] * y
+template<size_t N, class Tag>class Mul_UnitPre { static const void2uI f; };
+// z[N] <- x[N + 1] % p[N]
+template<size_t N, class Tag>class N1_Mod { static const void3u f; };
+// z[N] <- x[N * 2] % p[N]
+template<size_t N, class Tag>class Dbl_Mod { static const void3u f; };
+
+} } // mcl::fp
+
 #ifdef MCL_USE_LLVM
 
 extern "C" {
diff --git a/src/low_gmp.hpp b/src/low_gmp.hpp
index e14697b..01a8a5b 100644
--- a/src/low_gmp.hpp
+++ b/src/low_gmp.hpp
@@ -1,45 +1,60 @@
 #pragma once
 #include <mcl/op.hpp>
+#include "fp_proto.hpp"
 
 namespace mcl { namespace fp {
 
+struct GTag;
+
+template<size_t N>
+struct AddPre<N, GTag> {
+	static inline Unit addPre(Unit *z, const Unit *x, const Unit *y)
+	{
+		return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
+	}
+	static const u3u f;
+};
+
+template<size_t N>
+const u3u AddPre<N, GTag>::f = &AddPre<N, GTag>::addPre;
+
 template<size_t N>
-Unit low_add(Unit *z, const Unit *x, const Unit *y)
+inline Unit low_addNC_G(Unit *z, const Unit *x, const Unit *y)
 {
 	return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
 }
 template<size_t N>
-Unit low_sub(Unit *z, const Unit *x, const Unit *y)
+inline Unit low_subNC_G(Unit *z, const Unit *x, const Unit *y)
 {
 	return mpn_sub_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
 }
 // Z[N * 2]
 template<size_t N>
-void low_mul(Unit *z, const Unit *x, const Unit *y)
+inline void low_mul_G(Unit *z, const Unit *x, const Unit *y)
 {
 	return mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
 }
 template<size_t N>
-void low_sqr(Unit *y, const Unit *x)
+inline void low_sqr_G(Unit *y, const Unit *x)
 {
 	return mpn_sqr((mp_limb_t*)y, (const mp_limb_t*)x, N);
 }
 // Z[N + 1]
 template<size_t N>
-void low_mul_Unit(Unit *z, const Unit *x, Unit y)
+inline void low_mul_Unit_G(Unit *z, const Unit *x, Unit y)
 {
 	z[N] = mpn_mul_1((mp_limb_t*)z, (const mp_limb_t*)x, N, y);
 }
 // y[N] <- X[N + 1] mod p[N]
 template<size_t N>
-void low_N1_mod(Unit *y, const Unit *x, const Unit *p)
+inline void low_N1_mod_G(Unit *y, const Unit *x, const Unit *p)
 {
 	mp_limb_t q[2]; // not used
 	mpn_tdiv_qr(q, (mp_limb_t*)y, 0, (const mp_limb_t*)x, N + 1, (const mp_limb_t*)p, N);
 }
 // y[N] <- X[N * 2] mod p[N]
 template<size_t N>
-void low_mod(Unit *y, const Unit *x, const Unit *p)
+inline void low_mod_G(Unit *y, const Unit *x, const Unit *p)
 {
 	mp_limb_t q[N + 1]; // not used
 	mpn_tdiv_qr(q, (mp_limb_t*)y, 0, (const mp_limb_t*)x, N * 2, (const mp_limb_t*)p, N);
diff --git a/test/low_test.cpp b/test/low_test.cpp
index 1b5de8d..e1ae420 100644
--- a/test/low_test.cpp
+++ b/test/low_test.cpp
@@ -45,11 +45,11 @@ void bench()
 		Unit w[N];
 		rg.read(x, N);
 		rg.read(y, N);
-		low_add<N>(z, x, y);
+		low_addNC_G<N>(z, x, y);
 		addNC<bit>(w, x, y);
 		CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
 
-		low_sub<N>(z, x, y);
+		low_subNC_G<N>(z, x, y);
 		subNC<bit>(w, x, y);
 		CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
 	}
@@ -82,7 +82,7 @@ CYBOZU_TEST_AUTO(addNC)
 		Unit w[N];
 		rg.read(x, N);
 		rg.read(y, N);
-		low_add<N>(z, x, y);
+		low_addNC_G<N>(z, x, y);
 		addNC<bit>(w, x, y);
 		CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
 		add_test(w, x, y);
author	MITSUNARI Shigeo <herumi@nifty.com>	2016-10-04 09:34:19 +0800
committer	MITSUNARI Shigeo <herumi@nifty.com>	2016-10-04 09:34:19 +0800
commit	7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5 (patch)
tree	646666f2fd6c862383d3f18276d2845e4aa96a30
parent	28e818641bc7665f9da7b21112b568e74ff37362 (diff)
download	dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.tar.gz dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.tar.zst dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.zip