diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-06-24 15:09:58 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-06-24 15:09:58 +0800 |
commit | 94c14dccca6bc93ceed4613924699a263ca2522f (patch) | |
tree | 5627423a543c75843a9b9bbe79c1f934a82aba7c | |
parent | ec6ecdeb98d9408b35ab22f663a357eaa8b52e11 (diff) | |
download | dexon-mcl-94c14dccca6bc93ceed4613924699a263ca2522f.tar.gz dexon-mcl-94c14dccca6bc93ceed4613924699a263ca2522f.tar.zst dexon-mcl-94c14dccca6bc93ceed4613924699a263ca2522f.zip |
add mcl_fp_addNC128
-rw-r--r-- | src/low_armv7.s | 25 | ||||
-rw-r--r-- | test/low_test.cpp | 33 |
2 files changed, 46 insertions, 12 deletions
diff --git a/src/low_armv7.s b/src/low_armv7.s index b219e33..7d252e4 100644 --- a/src/low_armv7.s +++ b/src/low_armv7.s @@ -1,10 +1,7 @@ .arch armv7-a - .global mcl_fp_addNC64 - .global mcl_fp_addNC96 - .global mcl_fp_addNC96_2 - .align 2 + .global mcl_fp_addNC64 mcl_fp_addNC64: ldm r1, {r3, r12} ldm r2, {r1, r2} @@ -15,6 +12,7 @@ mcl_fp_addNC64: .align 2 + .global mcl_fp_addNC96 mcl_fp_addNC96: push {r4, lr} ldm r1, {r1, r3, r12} @@ -28,6 +26,7 @@ mcl_fp_addNC96: # slower .align 2 + .global mcl_fp_addNC96_2 mcl_fp_addNC96_2: ldr r3, [r1], #4 ldr r12, [r2], #4 @@ -40,3 +39,21 @@ mcl_fp_addNC96_2: adcs r3, r3, r12 stm r0, {r1, r3} bx lr + + .globl mcl_fp_addNC128 + .align 2 +mcl_fp_addNC128: + push {r4, lr} + ldm r1!, {r3, r4} + ldm r2!, {r12, lr} + adds r3, r3, r12 + adcs r4, r4, lr + stm r0!, {r3, r4} + ldm r1, {r3, r4} + ldm r2, {r12, lr} + adcs r3, r3, r12 + adcs r4, r4, lr + stm r0, {r3, r4} + pop {r4, lr} + bx lr + diff --git a/test/low_test.cpp b/test/low_test.cpp index c710737..80ee474 100644 --- a/test/low_test.cpp +++ b/test/low_test.cpp @@ -19,7 +19,7 @@ void addNC(uint32_t *z, const uint32_t *x, const uint32_t *y); DEF_ADD(64) DEF_ADD(96) -//DEF_ADD(128) +DEF_ADD(128) #define CAT(S, BIT) "S##BIT" @@ -42,12 +42,29 @@ void benchAdd() CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y); } -CYBOZU_TEST_AUTO(addNC64) +CYBOZU_TEST_AUTO(addNC64) { benchAdd<64>(); } +CYBOZU_TEST_AUTO(addNC96) { benchAdd<96>(); } +CYBOZU_TEST_AUTO(addNC128) { benchAdd<128>(); } +#if 0 +CYBOZU_TEST_AUTO(addNC128) { - benchAdd<64>(); -} -CYBOZU_TEST_AUTO(addNC96) -{ - benchAdd<96>(); + using namespace mcl::fp; + const size_t bit = 128; + const size_t N = bit / UnitBitSize; + Unit x[N], y[N]; + for (int i = 0; i < 10; i++) { + Unit z[N]; + Unit w[N]; + rg.read(x, N); + rg.read(y, N); + low_add<N>(z, x, y); + addNC<bit>(w, x, y); + CYBOZU_TEST_EQUAL_ARRAY(z, w, N); + mcl_fp_addNC128_2(w, x, y); + CYBOZU_TEST_EQUAL_ARRAY(z, w, N); + } + std::string name = "name" + cybozu::itoa(bit); + CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y); + CYBOZU_BENCH("ad128", mcl_fp_addNC128_2, x, x, y); } - +#endif |