diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-09-21 17:52:47 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-09-21 17:52:47 +0800 |
commit | 398c2e54af0834c0834d2451cde86f57cdb1e8ab (patch) | |
tree | a372b21e6483cb73b8da9a79b77fe797da0c786d | |
parent | af95cd7145139399b7aa22ecf9bc174ae3cace2e (diff) | |
download | dexon-mcl-398c2e54af0834c0834d2451cde86f57cdb1e8ab.tar.gz dexon-mcl-398c2e54af0834c0834d2451cde86f57cdb1e8ab.tar.zst dexon-mcl-398c2e54af0834c0834d2451cde86f57cdb1e8ab.zip |
add subNC x64 asm
-rw-r--r-- | src/asm/low_x86-64.asm | 65 | ||||
-rw-r--r-- | test/low_test.cpp | 70 |
2 files changed, 101 insertions, 34 deletions
diff --git a/src/asm/low_x86-64.asm b/src/asm/low_x86-64.asm index 72faa69..cb6947e 100644 --- a/src/asm/low_x86-64.asm +++ b/src/asm/low_x86-64.asm @@ -37,6 +37,22 @@ segment .text ret %endmacro +%imacro subNC 1 + mov rax, [p2org] + sub rax, [p3org] + mov [p1org], rax +%assign i 1 +%rep %1 + mov rax, [p2org + i * 8] + sbb rax, [p3org + i * 8] + mov [p1org + i * 8], rax +%assign i (i+1) +%endrep + setc al + movzx eax, al + ret +%endmacro + proc mcl_fp_addNC64 addNC 0 proc mcl_fp_addNC128 @@ -86,3 +102,52 @@ proc mcl_fp_addNC1472 proc mcl_fp_addNC1536 addNC 23 +proc mcl_fp_subNC64 + subNC 0 +proc mcl_fp_subNC128 + subNC 1 +proc mcl_fp_subNC192 + subNC 2 +proc mcl_fp_subNC256 + subNC 3 +proc mcl_fp_subNC320 + subNC 4 +proc mcl_fp_subNC384 + subNC 5 +proc mcl_fp_subNC448 + subNC 6 +proc mcl_fp_subNC512 + subNC 7 +proc mcl_fp_subNC576 + subNC 8 +proc mcl_fp_subNC640 + subNC 9 +proc mcl_fp_subNC704 + subNC 10 +proc mcl_fp_subNC768 + subNC 11 +proc mcl_fp_subNC832 + subNC 12 +proc mcl_fp_subNC896 + subNC 13 +proc mcl_fp_subNC960 + subNC 14 +proc mcl_fp_subNC1024 + subNC 15 +proc mcl_fp_subNC1088 + subNC 16 +proc mcl_fp_subNC1152 + subNC 17 +proc mcl_fp_subNC1216 + subNC 18 +proc mcl_fp_subNC1280 + subNC 19 +proc mcl_fp_subNC1344 + subNC 20 +proc mcl_fp_subNC1408 + subNC 21 +proc mcl_fp_subNC1472 + subNC 22 +proc mcl_fp_subNC1536 + subNC 23 + diff --git a/test/low_test.cpp b/test/low_test.cpp index c3e413d..1b5de8d 100644 --- a/test/low_test.cpp +++ b/test/low_test.cpp @@ -10,36 +10,32 @@ cybozu::XorShift rg; -extern "C" void mcl_fp_addNC64(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); -extern "C" void mcl_fp_addNC96(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); -extern "C" void mcl_fp_addNC128(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); -extern "C" void mcl_fp_addNC160(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); -extern "C" void mcl_fp_addNC192(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); -extern "C" void mcl_fp_addNC224(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); -extern "C" void mcl_fp_addNC256(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); extern "C" void add_test(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); template<size_t N> void addNC(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); -#define DEF_ADD(BIT) template<> void addNC<BIT>(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y) { mcl_fp_addNC ## BIT(z, x, y); } +template<size_t N> +void subNC(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y); -DEF_ADD(64) -DEF_ADD(128) -DEF_ADD(192) -DEF_ADD(256) -DEF_ADD(320) -DEF_ADD(384) -DEF_ADD(448) -DEF_ADD(512) -//DEF_ADD(96) -//DEF_ADD(160) -//DEF_ADD(224) +#define DEF_FUNC(BIT) \ + template<> void addNC<BIT>(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y) { mcl_fp_addNC ## BIT(z, x, y); } \ + template<> void subNC<BIT>(mcl::fp::Unit *z, const mcl::fp::Unit *x, const mcl::fp::Unit *y) { mcl_fp_subNC ## BIT(z, x, y); } -#define CAT(S, BIT) "S##BIT" +DEF_FUNC(64) +DEF_FUNC(128) +DEF_FUNC(192) +DEF_FUNC(256) +DEF_FUNC(320) +DEF_FUNC(384) +DEF_FUNC(448) +DEF_FUNC(512) +//DEF_FUNC(96) +//DEF_FUNC(160) +//DEF_FUNC(224) template<size_t bit> -void benchAdd() +void bench() { using namespace mcl::fp; const size_t N = bit / UnitBitSize; @@ -52,22 +48,28 @@ void benchAdd() low_add<N>(z, x, y); addNC<bit>(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); + + low_sub<N>(z, x, y); + subNC<bit>(w, x, y); + CYBOZU_TEST_EQUAL_ARRAY(z, w, N); } - std::string name = "add" + cybozu::itoa(bit); - CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y); + const std::string bitS = cybozu::itoa(bit); + std::string name; + name = "add" + bitS; CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y); + name = "sub" + bitS; CYBOZU_BENCH(name.c_str(), subNC<bit>, x, x, y); } -CYBOZU_TEST_AUTO(addNC64) { benchAdd<64>(); } -CYBOZU_TEST_AUTO(addNC128) { benchAdd<128>(); } -CYBOZU_TEST_AUTO(addNC192) { benchAdd<192>(); } -CYBOZU_TEST_AUTO(addNC256) { benchAdd<256>(); } -CYBOZU_TEST_AUTO(addNC320) { benchAdd<320>(); } -CYBOZU_TEST_AUTO(addNC384) { benchAdd<384>(); } -CYBOZU_TEST_AUTO(addNC448) { benchAdd<448>(); } -CYBOZU_TEST_AUTO(addNC512) { benchAdd<512>(); } -//CYBOZU_TEST_AUTO(addNC96) { benchAdd<96>(); } -//CYBOZU_TEST_AUTO(addNC160) { benchAdd<160>(); } -//CYBOZU_TEST_AUTO(addNC224) { benchAdd<224>(); } +CYBOZU_TEST_AUTO(addNC64) { bench<64>(); } +CYBOZU_TEST_AUTO(addNC128) { bench<128>(); } +CYBOZU_TEST_AUTO(addNC192) { bench<192>(); } +CYBOZU_TEST_AUTO(addNC256) { bench<256>(); } +CYBOZU_TEST_AUTO(addNC320) { bench<320>(); } +CYBOZU_TEST_AUTO(addNC384) { bench<384>(); } +CYBOZU_TEST_AUTO(addNC448) { bench<448>(); } +CYBOZU_TEST_AUTO(addNC512) { bench<512>(); } +//CYBOZU_TEST_AUTO(addNC96) { bench<96>(); } +//CYBOZU_TEST_AUTO(addNC160) { bench<160>(); } +//CYBOZU_TEST_AUTO(addNC224) { bench<224>(); } #if 0 CYBOZU_TEST_AUTO(addNC) { |