aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-06-24 15:43:36 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-06-24 15:43:36 +0800
commita499a1aa9354b076bba85eabb9de98969caed99c (patch)
tree0d3d9f229ba70c8bc819f21e324a6121b8de21a9
parent94c14dccca6bc93ceed4613924699a263ca2522f (diff)
downloaddexon-mcl-a499a1aa9354b076bba85eabb9de98969caed99c.tar.gz
dexon-mcl-a499a1aa9354b076bba85eabb9de98969caed99c.tar.zst
dexon-mcl-a499a1aa9354b076bba85eabb9de98969caed99c.zip
mcl_fp_addNC128 for arm
-rw-r--r--src/low_armv7.s21
-rw-r--r--test/low_test.cpp12
2 files changed, 29 insertions, 4 deletions
diff --git a/src/low_armv7.s b/src/low_armv7.s
index 7d252e4..7c70f86 100644
--- a/src/low_armv7.s
+++ b/src/low_armv7.s
@@ -57,3 +57,24 @@ mcl_fp_addNC128:
pop {r4, lr}
bx lr
+ .globl mcl_fp_addNC256
+ .align 2
+mcl_fp_addNC256:
+ push {r4, r5, r6, r7, r8, lr}
+ ldm r1!, {r3, r4, r5, r6}
+ ldm r2!, {r7, r8, r12, lr}
+ adds r3, r3, r7
+ adcs r4, r4, r8
+ adcs r5, r5, r12
+ adcs r6, r6, lr
+ stm r0!, {r3, r4, r5, r6}
+
+ ldm r1!, {r3, r4, r5, r6}
+ ldm r2!, {r7, r8, r12, lr}
+ adcs r3, r3, r7
+ adcs r4, r4, r8
+ adcs r5, r5, r12
+ adcs r6, r6, lr
+ stm r0!, {r3, r4, r5, r6}
+ pop {r4, r5, r6, r7, r8, lr}
+ bx lr
diff --git a/test/low_test.cpp b/test/low_test.cpp
index 80ee474..478620b 100644
--- a/test/low_test.cpp
+++ b/test/low_test.cpp
@@ -11,6 +11,7 @@ cybozu::XorShift rg;
extern "C" void mcl_fp_addNC64(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC96(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC128(uint32_t *z, const uint32_t *x, const uint32_t *y);
+extern "C" void mcl_fp_addNC256(uint32_t *z, const uint32_t *x, const uint32_t *y);
template<size_t N>
void addNC(uint32_t *z, const uint32_t *x, const uint32_t *y);
@@ -20,6 +21,7 @@ void addNC(uint32_t *z, const uint32_t *x, const uint32_t *y);
DEF_ADD(64)
DEF_ADD(96)
DEF_ADD(128)
+DEF_ADD(256)
#define CAT(S, BIT) "S##BIT"
@@ -45,11 +47,12 @@ void benchAdd()
CYBOZU_TEST_AUTO(addNC64) { benchAdd<64>(); }
CYBOZU_TEST_AUTO(addNC96) { benchAdd<96>(); }
CYBOZU_TEST_AUTO(addNC128) { benchAdd<128>(); }
+CYBOZU_TEST_AUTO(addNC256) { benchAdd<256>(); }
#if 0
-CYBOZU_TEST_AUTO(addNC128)
+CYBOZU_TEST_AUTO(addNC)
{
using namespace mcl::fp;
- const size_t bit = 128;
+ const size_t bit = 256;
const size_t N = bit / UnitBitSize;
Unit x[N], y[N];
for (int i = 0; i < 10; i++) {
@@ -60,11 +63,12 @@ CYBOZU_TEST_AUTO(addNC128)
low_add<N>(z, x, y);
addNC<bit>(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
- mcl_fp_addNC128_2(w, x, y);
+ mcl_fp_addNC256_2(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
}
std::string name = "name" + cybozu::itoa(bit);
CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y);
- CYBOZU_BENCH("ad128", mcl_fp_addNC128_2, x, x, y);
+ CYBOZU_BENCH("ad128", mcl_fp_addNC256_2, x, x, y);
}
#endif
+