aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <herumi@nifty.com>2016-06-24 15:09:58 +0800
committerMITSUNARI Shigeo <herumi@nifty.com>2016-06-24 15:09:58 +0800
commit94c14dccca6bc93ceed4613924699a263ca2522f (patch)
tree5627423a543c75843a9b9bbe79c1f934a82aba7c
parentec6ecdeb98d9408b35ab22f663a357eaa8b52e11 (diff)
downloaddexon-mcl-94c14dccca6bc93ceed4613924699a263ca2522f.tar.gz
dexon-mcl-94c14dccca6bc93ceed4613924699a263ca2522f.tar.zst
dexon-mcl-94c14dccca6bc93ceed4613924699a263ca2522f.zip
add mcl_fp_addNC128
-rw-r--r--src/low_armv7.s25
-rw-r--r--test/low_test.cpp33
2 files changed, 46 insertions, 12 deletions
diff --git a/src/low_armv7.s b/src/low_armv7.s
index b219e33..7d252e4 100644
--- a/src/low_armv7.s
+++ b/src/low_armv7.s
@@ -1,10 +1,7 @@
.arch armv7-a
- .global mcl_fp_addNC64
- .global mcl_fp_addNC96
- .global mcl_fp_addNC96_2
-
.align 2
+ .global mcl_fp_addNC64
mcl_fp_addNC64:
ldm r1, {r3, r12}
ldm r2, {r1, r2}
@@ -15,6 +12,7 @@ mcl_fp_addNC64:
.align 2
+ .global mcl_fp_addNC96
mcl_fp_addNC96:
push {r4, lr}
ldm r1, {r1, r3, r12}
@@ -28,6 +26,7 @@ mcl_fp_addNC96:
# slower
.align 2
+ .global mcl_fp_addNC96_2
mcl_fp_addNC96_2:
ldr r3, [r1], #4
ldr r12, [r2], #4
@@ -40,3 +39,21 @@ mcl_fp_addNC96_2:
adcs r3, r3, r12
stm r0, {r1, r3}
bx lr
+
+ .globl mcl_fp_addNC128
+ .align 2
+mcl_fp_addNC128:
+ push {r4, lr}
+ ldm r1!, {r3, r4}
+ ldm r2!, {r12, lr}
+ adds r3, r3, r12
+ adcs r4, r4, lr
+ stm r0!, {r3, r4}
+ ldm r1, {r3, r4}
+ ldm r2, {r12, lr}
+ adcs r3, r3, r12
+ adcs r4, r4, lr
+ stm r0, {r3, r4}
+ pop {r4, lr}
+ bx lr
+
diff --git a/test/low_test.cpp b/test/low_test.cpp
index c710737..80ee474 100644
--- a/test/low_test.cpp
+++ b/test/low_test.cpp
@@ -19,7 +19,7 @@ void addNC(uint32_t *z, const uint32_t *x, const uint32_t *y);
DEF_ADD(64)
DEF_ADD(96)
-//DEF_ADD(128)
+DEF_ADD(128)
#define CAT(S, BIT) "S##BIT"
@@ -42,12 +42,29 @@ void benchAdd()
CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y);
}
-CYBOZU_TEST_AUTO(addNC64)
+CYBOZU_TEST_AUTO(addNC64) { benchAdd<64>(); }
+CYBOZU_TEST_AUTO(addNC96) { benchAdd<96>(); }
+CYBOZU_TEST_AUTO(addNC128) { benchAdd<128>(); }
+#if 0
+CYBOZU_TEST_AUTO(addNC128)
{
- benchAdd<64>();
-}
-CYBOZU_TEST_AUTO(addNC96)
-{
- benchAdd<96>();
+ using namespace mcl::fp;
+ const size_t bit = 128;
+ const size_t N = bit / UnitBitSize;
+ Unit x[N], y[N];
+ for (int i = 0; i < 10; i++) {
+ Unit z[N];
+ Unit w[N];
+ rg.read(x, N);
+ rg.read(y, N);
+ low_add<N>(z, x, y);
+ addNC<bit>(w, x, y);
+ CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
+ mcl_fp_addNC128_2(w, x, y);
+ CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
+ }
+ std::string name = "name" + cybozu::itoa(bit);
+ CYBOZU_BENCH(name.c_str(), addNC<bit>, x, x, y);
+ CYBOZU_BENCH("ad128", mcl_fp_addNC128_2, x, x, y);
}
-
+#endif