diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-06-24 15:43:36 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-06-24 15:43:36 +0800 |
commit | a499a1aa9354b076bba85eabb9de98969caed99c (patch) | |
tree | 0d3d9f229ba70c8bc819f21e324a6121b8de21a9 | |
parent | 94c14dccca6bc93ceed4613924699a263ca2522f (diff) | |
download | dexon-mcl-a499a1aa9354b076bba85eabb9de98969caed99c.tar.gz dexon-mcl-a499a1aa9354b076bba85eabb9de98969caed99c.tar.zst dexon-mcl-a499a1aa9354b076bba85eabb9de98969caed99c.zip |
mcl_fp_addNC128 for arm
-rw-r--r-- | src/low_armv7.s | 21 | ||||
-rw-r--r-- | test/low_test.cpp | 12 |
2 files changed, 29 insertions, 4 deletions
diff --git a/src/low_armv7.s b/src/low_armv7.s index 7d252e4..7c70f86 100644 --- a/src/low_armv7.s +++ b/src/low_armv7.s @@ -57,3 +57,24 @@ mcl_fp_addNC128: pop {r4, lr} bx lr + .globl mcl_fp_addNC256 + .align 2 +mcl_fp_addNC256: + push {r4, r5, r6, r7, r8, lr} + ldm r1!, {r3, r4, r5, r6} + ldm r2!, {r7, r8, r12, lr} + adds r3, r3, r7 + adcs r4, r4, r8 + adcs r5, r5, r12 + adcs r6, r6, lr + stm r0!, {r3, r4, r5, r6} + + ldm r1!, {r3, r4, r5, r6} + ldm r2!, {r7, r8, r12, lr} + adcs r3, r3, r7 + adcs r4, r4, r8 + adcs r5, r5, r12 + adcs r6, r6, lr + stm r0!, {r3, r4, r5, r6} + pop {r4, r5, r6, r7, r8, lr} + bx lr diff --git a/test/low_test.cpp b/test/low_test.cpp index 80ee474..478620b 100644 --- a/test/low_test.cpp +++ b/test/low_test.cpp @@ -11,6 +11,7 @@ cybozu::XorShift rg; extern "C" void mcl_fp_addNC64(uint32_t *z, const uint32_t *x, const uint32_t *y); extern "C" void mcl_fp_addNC96(uint32_t *z, const uint32_t *x, const uint32_t *y); extern "C" void mcl_fp_addNC128(uint32_t *z, const uint32_t *x, const uint32_t *y); +extern "C" void mcl_fp_addNC256(uint32_t *z, const uint32_t *x, const uint32_t *y); template<size_t N> void addNC(uint32_t *z, const uint32_t *x, const uint32_t *y); @@ -20,6 +21,7 @@ void addNC(uint32_t *z, const uint32_t *x, const uint32_t *y); DEF_ADD(64) DEF_ADD(96) DEF_ADD(128) +DEF_ADD(256) #define CAT(S, BIT) "S##BIT" @@ -45,11 +47,12 @@ void benchAdd() CYBOZU_TEST_AUTO(addNC64) { benchAdd<64>(); } CYBOZU_TEST_AUTO(addNC96) { benchAdd<96>(); } CYBOZU_TEST_AUTO(addNC128) { benchAdd<128>(); } +CYBOZU_TEST_AUTO(addNC256) { benchAdd<256>(); } #if 0 -CYBOZU_TEST_AUTO(addNC128) +CYBOZU_TEST_AUTO(addNC) { using namespace mcl::fp; - const size_t bit = 128; + const size_t bit = 256; const size_t N = bit / UnitBitSize; Unit x[N], y[N]; for (int i = 0; i < 10; i++) { @@ -60,11 +63,12 @@ CYBOZU_TEST_AUTO(addNC128) low_add<N>(z, x, y); addNC<bit>(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); - mcl_fp_addNC128_2(w, x, y); + mcl_fp_addNC256_2(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); } std::string name = "name" + cybozu::itoa(bit); CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y); - CYBOZU_BENCH("ad128", mcl_fp_addNC128_2, x, x, y); + CYBOZU_BENCH("ad128", mcl_fp_addNC256_2, x, x, y); } #endif + |