aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-06-24 16:20:16 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-06-24 16:20:16 +0800
commit2737a8e97a1f2ca3df55185f1af2083c3b5e27da (patch)
tree27b36da554d69f2406fcf1dac1e87dcb6a6076b8
parentbdc0ffd8fe33f876c390605fd5eea047c9f317af (diff)
downloaddexon-mcl-2737a8e97a1f2ca3df55185f1af2083c3b5e27da.tar.gz
dexon-mcl-2737a8e97a1f2ca3df55185f1af2083c3b5e27da.tar.zst
dexon-mcl-2737a8e97a1f2ca3df55185f1af2083c3b5e27da.zip
add mcl_fp_addNC{160,192,224}
-rw-r--r--src/low_armv7.s65
-rw-r--r--test/low_test.cpp11
2 files changed, 72 insertions, 4 deletions
diff --git a/src/low_armv7.s b/src/low_armv7.s
index 6df0dc3..a655520 100644
--- a/src/low_armv7.s
+++ b/src/low_armv7.s
@@ -72,6 +72,65 @@ cl_fp_addNC128_2:
pop {r4, r5, r6, lr}
bx lr
+ .globl mcl_fp_addNC160
+ .align 2
+mcl_fp_addNC160:
+ push {r4, lr}
+ ldm r1!, {r3, r4}
+ ldm r2!, {r12, lr}
+ adds r3, r3, r12
+ adcs r4, r4, lr
+ stm r0!, {r3, r4}
+ ldm r1, {r1, r3, r4}
+ ldm r2, {r2, r12, lr}
+ adcs r1, r1, r2
+ adcs r3, r3, r12
+ adcs r4, r4, lr
+ stm r0, {r1, r3, r4}
+ pop {r4, lr}
+ bx lr
+
+ .globl mcl_fp_addNC192
+ .align 2
+mcl_fp_addNC192:
+ push {r4, r5, r6, lr}
+ ldm r1!, {r3, r4, r5}
+ ldm r2!, {r6, r12, lr}
+ adds r3, r3, r6
+ adcs r4, r4, r12
+ adcs r5, r5, lr
+ stm r0!, {r3, r4, r5}
+
+ ldm r1, {r3, r4, r5}
+ ldm r2, {r6, r12, lr}
+ adcs r3, r3, r6
+ adcs r4, r4, r12
+ adcs r5, r5, lr
+ stm r0, {r3, r4, r5}
+ pop {r4, r5, r6, lr}
+ bx lr
+
+ .globl mcl_fp_addNC224
+ .align 2
+mcl_fp_addNC224:
+ push {r4, r5, r6, lr}
+ ldm r1!, {r3, r4, r5}
+ ldm r2!, {r6, r12, lr}
+ adds r3, r3, r6
+ adcs r4, r4, r12
+ adcs r5, r5, lr
+ stm r0!, {r3, r4, r5}
+
+ ldm r1, {r1, r3, r4, r5}
+ ldm r2, {r2, r6, r12, lr}
+ adcs r1, r1, r2
+ adcs r3, r3, r6
+ adcs r4, r4, r12
+ adcs r5, r5, lr
+ stm r0, {r1, r3, r4, r5}
+ pop {r4, r5, r6, lr}
+ bx lr
+
.globl mcl_fp_addNC256
.align 2
mcl_fp_addNC256:
@@ -84,12 +143,12 @@ mcl_fp_addNC256:
adcs r6, r6, lr
stm r0!, {r3, r4, r5, r6}
- ldm r1!, {r3, r4, r5, r6}
- ldm r2!, {r7, r8, r12, lr}
+ ldm r1, {r3, r4, r5, r6}
+ ldm r2, {r7, r8, r12, lr}
adcs r3, r3, r7
adcs r4, r4, r8
adcs r5, r5, r12
adcs r6, r6, lr
- stm r0!, {r3, r4, r5, r6}
+ stm r0, {r3, r4, r5, r6}
pop {r4, r5, r6, r7, r8, lr}
bx lr
diff --git a/test/low_test.cpp b/test/low_test.cpp
index dc1b48a..f73f14c 100644
--- a/test/low_test.cpp
+++ b/test/low_test.cpp
@@ -11,6 +11,9 @@ cybozu::XorShift rg;
extern "C" void mcl_fp_addNC64(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC96(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC128(uint32_t *z, const uint32_t *x, const uint32_t *y);
+extern "C" void mcl_fp_addNC160(uint32_t *z, const uint32_t *x, const uint32_t *y);
+extern "C" void mcl_fp_addNC192(uint32_t *z, const uint32_t *x, const uint32_t *y);
+extern "C" void mcl_fp_addNC224(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC256(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void add_test(uint32_t *z, const uint32_t *x, const uint32_t *y);
@@ -22,6 +25,9 @@ void addNC(uint32_t *z, const uint32_t *x, const uint32_t *y);
DEF_ADD(64)
DEF_ADD(96)
DEF_ADD(128)
+DEF_ADD(160)
+DEF_ADD(192)
+DEF_ADD(224)
DEF_ADD(256)
#define CAT(S, BIT) "S##BIT"
@@ -48,8 +54,11 @@ void benchAdd()
CYBOZU_TEST_AUTO(addNC64) { benchAdd<64>(); }
CYBOZU_TEST_AUTO(addNC96) { benchAdd<96>(); }
CYBOZU_TEST_AUTO(addNC128) { benchAdd<128>(); }
+CYBOZU_TEST_AUTO(addNC160) { benchAdd<160>(); }
+CYBOZU_TEST_AUTO(addNC192) { benchAdd<192>(); }
+CYBOZU_TEST_AUTO(addNC224) { benchAdd<224>(); }
CYBOZU_TEST_AUTO(addNC256) { benchAdd<256>(); }
-#if 1
+#if 0
CYBOZU_TEST_AUTO(addNC)
{
using namespace mcl::fp;