diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-12-26 14:05:01 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-12-26 14:05:01 +0800 |
commit | b308547c8b3b43d83ed7ae5eb9523720b40ccd8a (patch) | |
tree | 1d5779e6d1714648b6be3fa91e73a5c59ce94c14 | |
parent | 23ab1099ce1f8b02511e195b4c3b66cdca7a0f00 (diff) | |
download | dexon-mcl-b308547c8b3b43d83ed7ae5eb9523720b40ccd8a.tar.gz dexon-mcl-b308547c8b3b43d83ed7ae5eb9523720b40ccd8a.tar.zst dexon-mcl-b308547c8b3b43d83ed7ae5eb9523720b40ccd8a.zip |
add llvm version of mont with not full prime
-rw-r--r-- | src/gen.cpp | 50 | ||||
-rw-r--r-- | src/low_func_llvm.hpp | 2 |
2 files changed, 22 insertions, 30 deletions
diff --git a/src/gen.cpp b/src/gen.cpp index 401a582..97200fc 100644 --- a/src/gen.cpp +++ b/src/gen.cpp @@ -758,7 +758,7 @@ struct Code : public mcl::Generator { beginFunc(mcl_fp_montM[N]); Operand rp = load(getelementptr(pp, -1)); Operand z, s, a; - if (1 || isFullBit) { + if (isFullBit) { for (uint32_t i = 0; i < N; i++) { Operand y = load(getelementptr(py, i)); Operand xy = call(mulPvM[bit], px, y); @@ -785,35 +785,27 @@ struct Code : public mcl::Generator { z = trunc(z, bit); storeN(z, pz); } else { - for (uint32_t i = 0; i < N; i++) { - Operand y = load(getelementptr(py, i)); - Operand xy = call(mulPvM[bit], px, y); - Operand at; - if (i == 0) { - a = xy; - at = trunc(xy, unit); - Operand q = mul(at, rp); - Operand pq = call(mulPvM[bit], pp, q); - pq = zext(pq, bu2); - Operand t = add(a, pq); - s = lshr(t, unit); - } else { - xy = zext(xy, bu2); - a = add(s, xy); - at = trunc(a, unit); - Operand q = mul(at, rp); - Operand pq = call(mulPvM[bit], pp, q); - pq = zext(pq, bu2); - Operand t = add(a, pq); - s = lshr(t, unit); - } + Operand y = load(py); + Operand xy = call(mulPvM[bit], px, y); + Operand c0 = trunc(xy, unit); + Operand q = mul(c0, rp); + Operand pq = call(mulPvM[bit], pp, q); + Operand t = add(xy, pq); + t = lshr(t, unit); // bu-bit + for (uint32_t i = 1; i < N; i++) { + y = load(getelementptr(py, i)); + xy = call(mulPvM[bit], px, y); + t = add(t, xy); + c0 = trunc(t, unit); + q = mul(c0, rp); + pq = call(mulPvM[bit], pp, q); + t = add(t, pq); + t = lshr(t, unit); } - s = trunc(s, bu); - Operand p = zext(loadN(pp, N), bu); - Operand vc = sub(s, p); - Operand c = trunc(lshr(vc, bit), 1); - z = select(c, s, vc); - z = trunc(z, bit); + t = trunc(t, bit); + Operand vc = sub(t, loadN(pp, N)); + Operand c = trunc(lshr(vc, bit - 1), 1); + z = select(c, t, vc); storeN(z, pz); } ret(Void); diff --git a/src/low_func_llvm.hpp b/src/low_func_llvm.hpp index ea7a19c..98f4700 100644 --- a/src/low_func_llvm.hpp +++ b/src/low_func_llvm.hpp @@ -36,7 +36,7 @@ template<>const void4u Add<n, false, Ltag>::f = &mcl_fp_addNF ## n ## L; \ template<>const void4u Sub<n, true, Ltag>::f = &mcl_fp_sub ## n ## L; \ template<>const void4u Sub<n, false, Ltag>::f = &mcl_fp_subNF ## n ## L; \ template<>const void4u Mont<n, true, Ltag>::f = &mcl_fp_mont ## n ## L; \ -template<>const void4u Mont<n, false, Ltag>::f = &mcl_fp_mont ## n ## L; \ +template<>const void4u Mont<n, false, Ltag>::f = &mcl_fp_montNF ## n ## L; \ template<>const void3u MontRed<n, Ltag>::f = &mcl_fp_montRed ## n ## L; \ template<>const void4u DblAdd<n, Ltag>::f = &mcl_fpDbl_add ## n ## L; \ template<>const void4u DblSub<n, Ltag>::f = &mcl_fpDbl_sub ## n ## L; \ |