aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-10-04 09:34:19 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-10-04 09:34:19 +0800
commit7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5 (patch)
tree646666f2fd6c862383d3f18276d2845e4aa96a30
parent28e818641bc7665f9da7b21112b568e74ff37362 (diff)
downloaddexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.tar.gz
dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.tar.zst
dexon-mcl-7a0e8e67c9183acd114adcdd71f8902c2a0d7ff5.zip
use AddPre
-rw-r--r--include/mcl/op.hpp2
-rw-r--r--sample/large.cpp22
-rw-r--r--src/fp.cpp88
-rw-r--r--src/fp_proto.hpp19
-rw-r--r--src/low_gmp.hpp29
-rw-r--r--test/low_test.cpp6
6 files changed, 114 insertions, 52 deletions
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 2694ee4..f318570 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -40,6 +40,8 @@ typedef void (*void3u)(Unit*, const Unit*, const Unit*);
typedef void (*void4u)(Unit*, const Unit*, const Unit*, const Unit*);
typedef int (*int2u)(Unit*, const Unit*);
+typedef Unit (*u3u)(Unit*, const Unit*, const Unit*);
+
struct Block {
const Unit *p; // pointer to original FpT.v_
size_t n;
diff --git a/sample/large.cpp b/sample/large.cpp
index b363440..72de4a2 100644
--- a/sample/large.cpp
+++ b/sample/large.cpp
@@ -23,19 +23,19 @@ void mulPre768(Unit *pz, const Unit *px, const Unit *py)
ad + bc = (a + b)(c + d) - ac - bd
*/
const size_t H = N / 2;
- low_mul<H>(pz, px, py); // bd
- low_mul<H>(pz + N, px + H, py + H); // ac
+ low_mul_G<H>(pz, px, py); // bd
+ low_mul_G<H>(pz + N, px + H, py + H); // ac
Unit a_b[H + 1];
Unit c_d[H + 1];
- a_b[H] = low_add<H>(a_b, px, px + H); // a + b
- c_d[H] = low_add<H>(c_d, py, py + H); // c + d
+ a_b[H] = low_addNC_G<H>(a_b, px, px + H); // a + b
+ c_d[H] = low_addNC_G<H>(c_d, py, py + H); // c + d
Unit work[N + H] = {};
- low_mul<H>(work, a_b, c_d);
- if (c_d[H]) low_add<H + 1>(work + H, work + H, c_d);
- if (a_b[H]) low_add<H + 1>(work + H, work + H, a_b);
- work[N] -= low_sub<H>(work, work, pz);
- work[N] -= low_sub<H>(work, work, pz + N);
- low_add<H + N>(pz + H, pz + H, work);
+ low_mul_G<H>(work, a_b, c_d);
+ if (c_d[H]) low_addNC_G<H + 1>(work + H, work + H, c_d);
+ if (a_b[H]) low_addNC_G<H + 1>(work + H, work + H, a_b);
+ work[N] -= low_subNC_G<H>(work, work, pz);
+ work[N] -= low_subNC_G<H>(work, work, pz + N);
+ low_addNC_G<H + N>(pz + H, pz + H, work);
}
void testMul()
{
@@ -44,7 +44,7 @@ void testMul()
ux[i] = -i * i + 5;
uy[i] = -i * i + 9;
}
- low_mul<12>(a, ux, uy);
+ low_mul_G<12>(a, ux, uy);
mulPre768(b, ux, uy);
for (size_t i = 0; i < N * 2; i++) {
if (a[i] != b[i]) {
diff --git a/src/fp.cpp b/src/fp.cpp
index 451c484..ac533ce 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -138,19 +138,19 @@ struct OpeFunc {
}
static inline void fp_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
- if (low_add<N>(z, x, y)) {
- low_sub<N>(z, z, p);
+ if (AddPre<N, GTag>::f(z, x, y)) {
+ low_subNC_G<N>(z, z, p);
return;
}
Unit tmp[N];
- if (low_sub<N>(tmp, z, p) == 0) {
+ if (low_subNC_G<N>(tmp, z, p) == 0) {
memcpy(z, tmp, sizeof(tmp));
}
}
static inline void fp_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
- if (low_sub<N>(z, x, y)) {
- low_add<N>(z, z, p);
+ if (low_subNC_G<N>(z, x, y)) {
+ AddPre<N, GTag>::f(z, z, p);
}
}
/*
@@ -158,54 +158,54 @@ struct OpeFunc {
*/
static inline void fpDbl_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
- if (low_add<N * 2>(z, x, y)) {
- low_sub<N>(z + N, z + N, p);
+ if (AddPre<N * 2, GTag>::f(z, x, y)) {
+ low_subNC_G<N>(z + N, z + N, p);
return;
}
Unit tmp[N];
- if (low_sub<N>(tmp, z + N, p) == 0) {
+ if (low_subNC_G<N>(tmp, z + N, p) == 0) {
memcpy(z + N, tmp, sizeof(tmp));
}
}
static inline void fpDbl_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
- if (low_sub<N * 2>(z, x, y)) {
- low_add<N>(z + N, z + N, p);
+ if (low_subNC_G<N * 2>(z, x, y)) {
+ AddPre<N, GTag>::f(z + N, z + N, p);
}
}
// z[N] <- x[N] + y[N] without carry
static inline void fp_addNCC(Unit *z, const Unit *x, const Unit *y)
{
- low_add<N>(z, x, y);
+ AddPre<N, GTag>::f(z, x, y);
}
static inline void fp_subNCC(Unit *z, const Unit *x, const Unit *y)
{
- low_sub<N>(z, x, y);
+ low_subNC_G<N>(z, x, y);
}
// z[N + 1] <- x[N] * y
static inline void fp_mul_UnitPreC(Unit *z, const Unit *x, Unit y)
{
- low_mul_Unit<N>(z, x, y);
+ low_mul_Unit_G<N>(z, x, y);
}
// z[N * 2] <- x[N] * y[N]
static inline void fpDbl_mulPreC(Unit *z, const Unit *x, const Unit *y)
{
- low_mul<N>(z, x, y);
+ low_mul_G<N>(z, x, y);
}
// y[N * 2] <- x[N]^2
static inline void fpDbl_sqrPreC(Unit *y, const Unit *x)
{
- low_sqr<N>(y, x);
+ low_sqr_G<N>(y, x);
}
// y[N] <- x[N + 1] mod p[N]
static inline void fpN1_modC(Unit *y, const Unit *x, const Unit *p)
{
- low_N1_mod<N>(y, x, p);
+ low_N1_mod_G<N>(y, x, p);
}
// y[N] <- x[N * 2] mod p[N]
static inline void fpDbl_modC(Unit *y, const Unit *x, const Unit *p)
{
- low_mod<N>(y, x, p);
+ low_mod_G<N>(y, x, p);
}
// z[N] <- mont(x[N], y[N])
static inline void fp_mulMontC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
@@ -218,25 +218,25 @@ struct OpeFunc {
const Unit rp = p[-1];
Unit buf[N * 2 + 2];
Unit *c = buf;
- low_mul_Unit<N>(c, x, y[0]); // x * y[0]
+ low_mul_Unit_G<N>(c, x, y[0]); // x * y[0]
Unit q = c[0] * rp;
Unit t[N + 2];
- low_mul_Unit<N>(t, p, q); // p * q
+ low_mul_Unit_G<N>(t, p, q); // p * q
t[N + 1] = 0; // always zero
- c[N + 1] = low_add<N + 1>(c, c, t);
+ c[N + 1] = AddPre<N + 1, GTag>::f(c, c, t);
c++;
for (size_t i = 1; i < N; i++) {
- low_mul_Unit<N>(t, x, y[i]);
- c[N + 1] = low_add<N + 1>(c, c, t);
+ low_mul_Unit_G<N>(t, x, y[i]);
+ c[N + 1] = AddPre<N + 1, GTag>::f(c, c, t);
q = c[0] * rp;
- low_mul_Unit<N>(t, p, q);
- low_add<N + 2>(c, c, t);
+ low_mul_Unit_G<N>(t, p, q);
+ AddPre<N + 2, GTag>::f(c, c, t);
c++;
}
if (c[N]) {
- low_sub<N>(z, c, p);
+ low_subNC_G<N>(z, c, p);
} else {
- if (low_sub<N>(z, c, p)) {
+ if (low_subNC_G<N>(z, c, p)) {
memcpy(z, c, N * sizeof(Unit));
}
}
@@ -254,20 +254,20 @@ struct OpeFunc {
clearArray(t, N + 1, N * 2);
Unit *c = buf;
Unit q = xy[0] * rp;
- low_mul_Unit<N>(t, p, q);
- buf[N * 2] = low_add<N * 2>(buf, xy, t);
+ low_mul_Unit_G<N>(t, p, q);
+ buf[N * 2] = AddPre<N * 2, GTag>::f(buf, xy, t);
c++;
for (size_t i = 1; i < N; i++) {
q = c[0] * rp;
- low_mul_Unit<N>(t, p, q);
+ low_mul_Unit_G<N>(t, p, q);
// QQQ
mpn_add_n((mp_limb_t*)c, (const mp_limb_t*)c, (const mp_limb_t*)t, N * 2 + 1 - i);
c++;
}
if (c[N]) {
- low_sub<N>(z, c, p);
+ low_subNC_G<N>(z, c, p);
} else {
- if (low_sub<N>(z, c, p)) {
+ if (low_subNC_G<N>(z, c, p)) {
memcpy(z, c, N * sizeof(Unit));
}
}
@@ -296,6 +296,32 @@ struct OpeFunc {
fpDbl_sqrPreC(xx, x);
fpDbl_modMontC(y, xx, p);
}
+#if 0 //#ifdef MCL_USE_LLVM
+ static inline void fp_mul_UnitL(Unit *z, const Unit *x, Unit y, const Unit *p)
+ {
+ Unit xy[N + 1];
+ fp_mul_UnitPreC(xy, x, y);
+ fpN1_modC(z, xy, p);
+ }
+ static inline void fp_mulL(Unit *z, const Unit *x, const Unit *y, const Unit *p)
+ {
+ Unit xy[N * 2];
+ fpDbl_mulPreC(xy, x, y);
+ fpDbl_modC(z, xy, p);
+ }
+ static inline void fp_sqrL(Unit *y, const Unit *x, const Unit *p)
+ {
+ Unit xx[N * 2];
+ fpDbl_sqrPreC(xx, x);
+ fpDbl_modC(y, xx, p);
+ }
+ static inline void fp_sqrMontL(Unit *y, const Unit *x, const Unit *p)
+ {
+ Unit xx[N * 2];
+ fpDbl_sqrPreC(xx, x);
+ fpDbl_modMontC(y, xx, p);
+ }
+#endif
static inline void fp_invOpC(Unit *y, const Unit *x, const Op& op)
{
mpz_class my;
diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp
index e100fde..99763e6 100644
--- a/src/fp_proto.hpp
+++ b/src/fp_proto.hpp
@@ -8,6 +8,25 @@
*/
#include <mcl/op.hpp>
+namespace mcl { namespace fp {
+
+// (carry, z[N]) <- x[N] + y[N]
+template<size_t N, class Tag>class AddPre { static const u3u f; };
+// (carry, z[N]) <- x[N] - y[N]
+template<size_t N, class Tag>class SubPre { static const u3u f; };
+// z[N * 2] <- x[N] * y[N]
+template<size_t N, class Tag>class MulPre { static const void3u f; };
+// z[N * 2] <- x[N] * x[N]
+template<size_t N, class Tag>class SqrPre { static const void2u f; };
+// z[N + 1] <- x[N] * y
+template<size_t N, class Tag>class Mul_UnitPre { static const void2uI f; };
+// z[N] <- x[N + 1] % p[N]
+template<size_t N, class Tag>class N1_Mod { static const void3u f; };
+// z[N] <- x[N * 2] % p[N]
+template<size_t N, class Tag>class Dbl_Mod { static const void3u f; };
+
+} } // mcl::fp
+
#ifdef MCL_USE_LLVM
extern "C" {
diff --git a/src/low_gmp.hpp b/src/low_gmp.hpp
index e14697b..01a8a5b 100644
--- a/src/low_gmp.hpp
+++ b/src/low_gmp.hpp
@@ -1,45 +1,60 @@
#pragma once
#include <mcl/op.hpp>
+#include "fp_proto.hpp"
namespace mcl { namespace fp {
+struct GTag;
+
+template<size_t N>
+struct AddPre<N, GTag> {
+ static inline Unit addPre(Unit *z, const Unit *x, const Unit *y)
+ {
+ return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
+ }
+ static const u3u f;
+};
+
+template<size_t N>
+const u3u AddPre<N, GTag>::f = &AddPre<N, GTag>::addPre;
+
template<size_t N>
-Unit low_add(Unit *z, const Unit *x, const Unit *y)
+inline Unit low_addNC_G(Unit *z, const Unit *x, const Unit *y)
{
return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
}
template<size_t N>
-Unit low_sub(Unit *z, const Unit *x, const Unit *y)
+inline Unit low_subNC_G(Unit *z, const Unit *x, const Unit *y)
{
return mpn_sub_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
}
// Z[N * 2]
template<size_t N>
-void low_mul(Unit *z, const Unit *x, const Unit *y)
+inline void low_mul_G(Unit *z, const Unit *x, const Unit *y)
{
return mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
}
template<size_t N>
-void low_sqr(Unit *y, const Unit *x)
+inline void low_sqr_G(Unit *y, const Unit *x)
{
return mpn_sqr((mp_limb_t*)y, (const mp_limb_t*)x, N);
}
// Z[N + 1]
template<size_t N>
-void low_mul_Unit(Unit *z, const Unit *x, Unit y)
+inline void low_mul_Unit_G(Unit *z, const Unit *x, Unit y)
{
z[N] = mpn_mul_1((mp_limb_t*)z, (const mp_limb_t*)x, N, y);
}
// y[N] <- X[N + 1] mod p[N]
template<size_t N>
-void low_N1_mod(Unit *y, const Unit *x, const Unit *p)
+inline void low_N1_mod_G(Unit *y, const Unit *x, const Unit *p)
{
mp_limb_t q[2]; // not used
mpn_tdiv_qr(q, (mp_limb_t*)y, 0, (const mp_limb_t*)x, N + 1, (const mp_limb_t*)p, N);
}
// y[N] <- X[N * 2] mod p[N]
template<size_t N>
-void low_mod(Unit *y, const Unit *x, const Unit *p)
+inline void low_mod_G(Unit *y, const Unit *x, const Unit *p)
{
mp_limb_t q[N + 1]; // not used
mpn_tdiv_qr(q, (mp_limb_t*)y, 0, (const mp_limb_t*)x, N * 2, (const mp_limb_t*)p, N);
diff --git a/test/low_test.cpp b/test/low_test.cpp
index 1b5de8d..e1ae420 100644
--- a/test/low_test.cpp
+++ b/test/low_test.cpp
@@ -45,11 +45,11 @@ void bench()
Unit w[N];
rg.read(x, N);
rg.read(y, N);
- low_add<N>(z, x, y);
+ low_addNC_G<N>(z, x, y);
addNC<bit>(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
- low_sub<N>(z, x, y);
+ low_subNC_G<N>(z, x, y);
subNC<bit>(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
}
@@ -82,7 +82,7 @@ CYBOZU_TEST_AUTO(addNC)
Unit w[N];
rg.read(x, N);
rg.read(y, N);
- low_add<N>(z, x, y);
+ low_addNC_G<N>(z, x, y);
addNC<bit>(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
add_test(w, x, y);