aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-09-21 17:52:47 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-09-21 17:52:47 +0800
commit398c2e54af0834c0834d2451cde86f57cdb1e8ab (patch)
treea372b21e6483cb73b8da9a79b77fe797da0c786d
parentaf95cd7145139399b7aa22ecf9bc174ae3cace2e (diff)
downloaddexon-mcl-398c2e54af0834c0834d2451cde86f57cdb1e8ab.tar.gz
dexon-mcl-398c2e54af0834c0834d2451cde86f57cdb1e8ab.tar.zst
dexon-mcl-398c2e54af0834c0834d2451cde86f57cdb1e8ab.zip
add subNC x64 asm
-rw-r--r--src/asm/low_x86-64.asm65
-rw-r--r--test/low_test.cpp70
2 files changed, 101 insertions, 34 deletions
diff --git a/src/asm/low_x86-64.asm b/src/asm/low_x86-64.asm
index 72faa69..cb6947e 100644
--- a/src/asm/low_x86-64.asm
+++ b/src/asm/low_x86-64.asm
@@ -37,6 +37,22 @@ segment .text
ret
%endmacro
+%imacro subNC 1
+ mov rax, [p2org]
+ sub rax, [p3org]
+ mov [p1org], rax
+%assign i 1
+%rep %1
+ mov rax, [p2org + i * 8]
+ sbb rax, [p3org + i * 8]
+ mov [p1org + i * 8], rax
+%assign i (i+1)
+%endrep
+ setc al
+ movzx eax, al
+ ret
+%endmacro
+
proc mcl_fp_addNC64
addNC 0
proc mcl_fp_addNC128
@@ -86,3 +102,52 @@ proc mcl_fp_addNC1472
proc mcl_fp_addNC1536
addNC 23
+proc mcl_fp_subNC64
+ subNC 0
+proc mcl_fp_subNC128
+ subNC 1
+proc mcl_fp_subNC192
+ subNC 2
+proc mcl_fp_subNC256
+ subNC 3
+proc mcl_fp_subNC320
+ subNC 4
+proc mcl_fp_subNC384
+ subNC 5
+proc mcl_fp_subNC448
+ subNC 6
+proc mcl_fp_subNC512
+ subNC 7
+proc mcl_fp_subNC576
+ subNC 8
+proc mcl_fp_subNC640
+ subNC 9
+proc mcl_fp_subNC704
+ subNC 10
+proc mcl_fp_subNC768
+ subNC 11
+proc mcl_fp_subNC832
+ subNC 12
+proc mcl_fp_subNC896
+ subNC 13
+proc mcl_fp_subNC960
+ subNC 14
+proc mcl_fp_subNC1024
+ subNC 15
+proc mcl_fp_subNC1088
+ subNC 16
+proc mcl_fp_subNC1152
+ subNC 17
+proc mcl_fp_subNC1216
+ subNC 18
+proc mcl_fp_subNC1280
+ subNC 19
+proc mcl_fp_subNC1344
+ subNC 20
+proc mcl_fp_subNC1408
+ subNC 21
+proc mcl_fp_subNC1472
+ subNC 22
+proc mcl_fp_subNC1536
+ subNC 23
+
diff --git a/test/low_test.cpp b/test/low_test.cpp
index c3e413d..1b5de8d 100644
--- a/test/low_test.cpp
+++ b/test/low_test.cpp
@@ -10,36 +10,32 @@
cybozu::XorShift rg;
-extern "C" void mcl_fp_addNC64(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-extern "C" void mcl_fp_addNC96(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-extern "C" void mcl_fp_addNC128(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-extern "C" void mcl_fp_addNC160(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-extern "C" void mcl_fp_addNC192(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-extern "C" void mcl_fp_addNC224(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-extern "C" void mcl_fp_addNC256(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
extern "C" void add_test(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
template<size_t N>
void addNC(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-#define DEF_ADD(BIT) template<> void addNC<BIT>(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y) { mcl_fp_addNC ## BIT(z, x, y); }
+template<size_t N>
+void subNC(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y);
-DEF_ADD(64)
-DEF_ADD(128)
-DEF_ADD(192)
-DEF_ADD(256)
-DEF_ADD(320)
-DEF_ADD(384)
-DEF_ADD(448)
-DEF_ADD(512)
-//DEF_ADD(96)
-//DEF_ADD(160)
-//DEF_ADD(224)
+#define DEF_FUNC(BIT) \
+ template<> void addNC<BIT>(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y) { mcl_fp_addNC ## BIT(z, x, y); } \
+ template<> void subNC<BIT>(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y) { mcl_fp_subNC ## BIT(z, x, y); }
-#define CAT(S, BIT) "S##BIT"
+DEF_FUNC(64)
+DEF_FUNC(128)
+DEF_FUNC(192)
+DEF_FUNC(256)
+DEF_FUNC(320)
+DEF_FUNC(384)
+DEF_FUNC(448)
+DEF_FUNC(512)
+//DEF_FUNC(96)
+//DEF_FUNC(160)
+//DEF_FUNC(224)
template<size_t bit>
-void benchAdd()
+void bench()
{
using namespace mcl::fp;
const size_t N = bit / UnitBitSize;
@@ -52,22 +48,28 @@ void benchAdd()
low_add<N>(z, x, y);
addNC<bit>(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
+
+ low_sub<N>(z, x, y);
+ subNC<bit>(w, x, y);
+ CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
}
- std::string name = "add" + cybozu::itoa(bit);
- CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y);
+ const std::string bitS = cybozu::itoa(bit);
+ std::string name;
+ name = "add" + bitS; CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y);
+ name = "sub" + bitS; CYBOZU_BENCH(name.c_str(), subNC<bit>, x, x, y);
}
-CYBOZU_TEST_AUTO(addNC64) { benchAdd<64>(); }
-CYBOZU_TEST_AUTO(addNC128) { benchAdd<128>(); }
-CYBOZU_TEST_AUTO(addNC192) { benchAdd<192>(); }
-CYBOZU_TEST_AUTO(addNC256) { benchAdd<256>(); }
-CYBOZU_TEST_AUTO(addNC320) { benchAdd<320>(); }
-CYBOZU_TEST_AUTO(addNC384) { benchAdd<384>(); }
-CYBOZU_TEST_AUTO(addNC448) { benchAdd<448>(); }
-CYBOZU_TEST_AUTO(addNC512) { benchAdd<512>(); }
-//CYBOZU_TEST_AUTO(addNC96) { benchAdd<96>(); }
-//CYBOZU_TEST_AUTO(addNC160) { benchAdd<160>(); }
-//CYBOZU_TEST_AUTO(addNC224) { benchAdd<224>(); }
+CYBOZU_TEST_AUTO(addNC64) { bench<64>(); }
+CYBOZU_TEST_AUTO(addNC128) { bench<128>(); }
+CYBOZU_TEST_AUTO(addNC192) { bench<192>(); }
+CYBOZU_TEST_AUTO(addNC256) { bench<256>(); }
+CYBOZU_TEST_AUTO(addNC320) { bench<320>(); }
+CYBOZU_TEST_AUTO(addNC384) { bench<384>(); }
+CYBOZU_TEST_AUTO(addNC448) { bench<448>(); }
+CYBOZU_TEST_AUTO(addNC512) { bench<512>(); }
+//CYBOZU_TEST_AUTO(addNC96) { bench<96>(); }
+//CYBOZU_TEST_AUTO(addNC160) { bench<160>(); }
+//CYBOZU_TEST_AUTO(addNC224) { bench<224>(); }
#if 0
CYBOZU_TEST_AUTO(addNC)
{