diff options
author | MITSUNARI Shigeo <herumi@nifty.com> | 2016-12-28 15:46:50 +0800 |
---|---|---|
committer | MITSUNARI Shigeo <herumi@nifty.com> | 2016-12-28 15:46:50 +0800 |
commit | 374acb1577a1c85f2d440354e481671bab1e1bff (patch) | |
tree | 33d3af189c55e3240caf60d13b591c3f75705d55 | |
parent | ce7dd1e830e4560deb99fa656524abb00447c346 (diff) | |
download | tangerine-mcl-374acb1577a1c85f2d440354e481671bab1e1bff.tar.gz tangerine-mcl-374acb1577a1c85f2d440354e481671bab1e1bff.tar.zst tangerine-mcl-374acb1577a1c85f2d440354e481671bab1e1bff.zip |
support w/wo bmi2
-rw-r--r-- | Makefile | 17 | ||||
-rw-r--r-- | src/fp.cpp | 2 | ||||
-rw-r--r-- | src/gen.cpp | 11 | ||||
-rw-r--r-- | src/low_func.hpp | 2 | ||||
-rw-r--r-- | src/low_func_llvm.hpp | 45 | ||||
-rw-r--r-- | src/proto.hpp | 4 |
6 files changed, 53 insertions, 28 deletions
@@ -50,13 +50,17 @@ LLVM_FLAGS+=-pre-RA-sched=list-ilp -max-sched-reorder=128 HAS_BMI2=$(shell cat "/proc/cpuinfo" | grep bmi2 >/dev/null && echo "1") ifeq ($(HAS_BMI2),1) - LLVM_FLAGS+=-mattr=bmi2 +# LLVM_FLAGS+=-mattr=bmi2 endif ifeq ($(USE_LOW_ASM),1) LOW_ASM_OBJ=$(LOW_ASM_SRC:.asm=.o) LIB_OBJ+=$(LOW_ASM_OBJ) endif +# special case for intel with bmi2 +ifeq ($(INTEL),1) + LIB_OBJ+=$(OBJ_DIR)/$(CPU).bmi2.o +endif $(MCL_LIB): $(LIB_OBJ) $(AR) $@ $(LIB_OBJ) @@ -70,6 +74,15 @@ $(ASM_SRC): $(LLVM_SRC) $(LLVM_SRC): $(GEN_EXE) $(FUNC_LIST) $(GEN_EXE) -f $(FUNC_LIST) > $@ +$(OBJ_DIR)/$(CPU).bmi2.o: src/$(CPU).bmi2.s + $(PRE)$(CXX) -c $< -o $@ $(CFLAGS) + +src/$(CPU).bmi2.s: src/base$(BIT).bmi2.ll + $(LLVM_OPT) -O3 -o - $< -march=$(CPU) | $(LLVM_LLC) -O3 -o $@ $(LLVM_FLAGS) -mattr=bmi2 + +src/base$(BIT).bmi2.ll: $(GEN_EXE) + $(GEN_EXE) -f $(FUNC_LIST) -s bmi2 > $@ + $(FUNC_LIST): $(LOW_ASM_SRC) ifeq ($(USE_LOW_ASM),1) $(shell awk '/global/ { print $$2}' $(LOW_ASM_SRC) > $(FUNC_LIST)) @@ -109,7 +122,7 @@ test: $(TEST_EXE) @grep -v "ng=0, exception=0" result.txt || echo "all unit tests are ok" clean: - $(RM) $(MCL_LIB) $(OBJ_DIR)/*.o $(OBJ_DIR)/*.d $(EXE_DIR)/*.exe $(GEN_EXE) $(ASM_SRC) $(ASM_OBJ) $(LIB_OBJ) $(LLVM_SRC) $(FUNC_LIST) + $(RM) $(MCL_LIB) $(OBJ_DIR)/*.o $(OBJ_DIR)/*.d $(EXE_DIR)/*.exe $(GEN_EXE) $(ASM_SRC) $(ASM_OBJ) $(LIB_OBJ) $(LLVM_SRC) $(FUNC_LIST) src/*.ll src/*.s ALL_SRC=$(SRC_SRC) $(TEST_SRC) $(SAMPLE_SRC) DEPEND_FILE=$(addprefix $(OBJ_DIR)/, $(ALL_SRC:.cpp=.d)) @@ -231,7 +231,7 @@ void setOp(Op& op, Mode mode) setOp2<N, Gtag, true>(op); #ifdef MCL_USE_LLVM if (mode != fp::FP_GMP && mode != fp::FP_GMP_MONT) { - setOp2<N, Ltag, (N * UnitBitSize <= 256)>(op); + setOp2<N, LBMI2tag, (N * UnitBitSize <= 256)>(op); } #else (void)mode; diff --git a/src/gen.cpp b/src/gen.cpp index 61edc15..189860f 100644 --- a/src/gen.cpp +++ b/src/gen.cpp @@ -149,7 +149,7 @@ struct Code : public mcl::Generator { Operand _0 = makeImm(64, 0); Operand _1 = makeImm(64, 1); Operand _2 = makeImm(64, 2); - makeNIST_P192 = Function("makeNIST_P192L", p); + makeNIST_P192 = Function("makeNIST_P192L" + suf, p); verifyAndSetPrivate(makeNIST_P192); beginFunc(makeNIST_P192); p0 = sub(_0, _1); @@ -188,7 +188,7 @@ struct Code : public mcl::Generator { resetGlobalIdx(); Operand out(IntPtr, unit); Operand px(IntPtr, unit); - mcl_fpDbl_mod_NIST_P192 = Function("mcl_fpDbl_mod_NIST_P192L", Void, out, px); + mcl_fpDbl_mod_NIST_P192 = Function("mcl_fpDbl_mod_NIST_P192L" + suf, Void, out, px); verifyAndSetPrivate(mcl_fpDbl_mod_NIST_P192); beginFunc(mcl_fpDbl_mod_NIST_P192); @@ -247,7 +247,7 @@ struct Code : public mcl::Generator { const size_t mask = -(1 << rem); const Operand py(IntPtr, unit); const Operand px(IntPtr, unit); - Function f("mcl_fpDbl_mod_NIST_P521L", Void, py, px); + Function f("mcl_fpDbl_mod_NIST_P521L" + suf, Void, py, px); verifyAndSetPrivate(f); beginFunc(f); Operand x = loadN(px, n * 2 + 1); @@ -286,7 +286,7 @@ struct Code : public mcl::Generator { resetGlobalIdx(); Operand py(IntPtr, unit); Operand px(IntPtr, unit); - mcl_fp_sqr_NIST_P192 = Function("mcl_fp_sqr_NIST_P192L", Void, py, px); + mcl_fp_sqr_NIST_P192 = Function("mcl_fp_sqr_NIST_P192L" + suf, Void, py, px); verifyAndSetPrivate(mcl_fp_sqr_NIST_P192); beginFunc(mcl_fp_sqr_NIST_P192); Operand buf = _alloca(unit, 192 * 2 / unit); @@ -303,7 +303,7 @@ struct Code : public mcl::Generator { Operand pz(IntPtr, unit); Operand px(IntPtr, unit); Operand py(IntPtr, unit); - Function f("mcl_fp_mulNIST_P192L", Void, pz, px, py); + Function f("mcl_fp_mulNIST_P192L" + suf, Void, pz, px, py); verifyAndSetPrivate(f); beginFunc(f); Operand buf = _alloca(unit, 192 * 2 / unit); @@ -603,6 +603,7 @@ struct Code : public mcl::Generator { Operand y(Int, unit); std::string name = "mulPv" + cybozu::itoa(bit) + "x" + cybozu::itoa(unit); mulPvM[bit] = Function(name, z, px, y); + mulPvM[bit].setPrivate(); verifyAndSetPrivate(mulPvM[bit]); beginFunc(mulPvM[bit]); OperandVec L(N), H(N); diff --git a/src/low_func.hpp b/src/low_func.hpp index 1cab20e..8684131 100644 --- a/src/low_func.hpp +++ b/src/low_func.hpp @@ -19,11 +19,13 @@ namespace mcl { namespace fp { struct Gtag; // GMP struct Ltag; // LLVM +struct LBMI2tag; // LLVM with Intel BMI2 instruction struct Atag; // asm template<class Tag> struct TagToStr { }; template<> struct TagToStr<Gtag> { static const char *f() { return "Gtag"; } }; template<> struct TagToStr<Ltag> { static const char *f() { return "Ltag"; } }; +template<> struct TagToStr<LBMI2tag> { static const char *f() { return "LBMI2tag"; } }; template<> struct TagToStr<Atag> { static const char *f() { return "Atag"; } }; template<size_t N> diff --git a/src/low_func_llvm.hpp b/src/low_func_llvm.hpp index 98f4700..a02001a 100644 --- a/src/low_func_llvm.hpp +++ b/src/low_func_llvm.hpp @@ -18,28 +18,37 @@ struct EnableKaratsuba<Ltag> { #endif #ifdef MCL_GMP_IS_FASTER_THAN_LLVM -#define MCL_DEF_MUL(n) +#define MCL_DEF_MUL(n, tag, suf) #else -#define MCL_DEF_MUL(n) \ -template<>const void3u MulPreCore<n, Ltag>::f = &mcl_fpDbl_mulPre ## n ## L; \ -template<>const void2u SqrPreCore<n, Ltag>::f = &mcl_fpDbl_sqrPre ## n ## L; +#define MCL_DEF_MUL(n, tag, suf) \ +template<>const void3u MulPreCore<n, tag>::f = &mcl_fpDbl_mulPre ## n ## suf; \ +template<>const void2u SqrPreCore<n, tag>::f = &mcl_fpDbl_sqrPre ## n ## suf; #endif +#define MCL_DEF_LLVM_FUNC2(n, tag, suf) \ +template<>const u3u AddPre<n, tag>::f = &mcl_fp_addPre ## n ## suf; \ +template<>const u3u SubPre<n, tag>::f = &mcl_fp_subPre ## n ## suf; \ +template<>const void2u Shr1<n, tag>::f = &mcl_fp_shr1_ ## n ## suf; \ +MCL_DEF_MUL(n, tag, suf) \ +template<>const void2uI MulUnitPre<n, tag>::f = &mcl_fp_mulUnitPre ## n ## suf; \ +template<>const void4u Add<n, true, tag>::f = &mcl_fp_add ## n ## suf; \ +template<>const void4u Add<n, false, tag>::f = &mcl_fp_addNF ## n ## suf; \ +template<>const void4u Sub<n, true, tag>::f = &mcl_fp_sub ## n ## suf; \ +template<>const void4u Sub<n, false, tag>::f = &mcl_fp_subNF ## n ## suf; \ +template<>const void4u Mont<n, true, tag>::f = &mcl_fp_mont ## n ## suf; \ +template<>const void4u Mont<n, false, tag>::f = &mcl_fp_montNF ## n ## suf; \ +template<>const void3u MontRed<n, tag>::f = &mcl_fp_montRed ## n ## suf; \ +template<>const void4u DblAdd<n, tag>::f = &mcl_fpDbl_add ## n ## suf; \ +template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \ + +#if CYBOZU_HOST == CYBOZU_HOST_INTEL +#define MCL_DEF_LLVM_FUNC(n) \ + MCL_DEF_LLVM_FUNC2(n, Ltag, L) \ + MCL_DEF_LLVM_FUNC2(n, LBMI2tag, Lbmi2) +#else #define MCL_DEF_LLVM_FUNC(n) \ -template<>const u3u AddPre<n, Ltag>::f = &mcl_fp_addPre ## n ## L; \ -template<>const u3u SubPre<n, Ltag>::f = &mcl_fp_subPre ## n ## L; \ -template<>const void2u Shr1<n, Ltag>::f = &mcl_fp_shr1_ ## n ## L; \ -MCL_DEF_MUL(n) \ -template<>const void2uI MulUnitPre<n, Ltag>::f = &mcl_fp_mulUnitPre ## n ## L; \ -template<>const void4u Add<n, true, Ltag>::f = &mcl_fp_add ## n ## L; \ -template<>const void4u Add<n, false, Ltag>::f = &mcl_fp_addNF ## n ## L; \ -template<>const void4u Sub<n, true, Ltag>::f = &mcl_fp_sub ## n ## L; \ -template<>const void4u Sub<n, false, Ltag>::f = &mcl_fp_subNF ## n ## L; \ -template<>const void4u Mont<n, true, Ltag>::f = &mcl_fp_mont ## n ## L; \ -template<>const void4u Mont<n, false, Ltag>::f = &mcl_fp_montNF ## n ## L; \ -template<>const void3u MontRed<n, Ltag>::f = &mcl_fp_montRed ## n ## L; \ -template<>const void4u DblAdd<n, Ltag>::f = &mcl_fpDbl_add ## n ## L; \ -template<>const void4u DblSub<n, Ltag>::f = &mcl_fpDbl_sub ## n ## L; \ + MCL_DEF_LLVM_FUNC2(n, Ltag, L) +#endif MCL_DEF_LLVM_FUNC(1) MCL_DEF_LLVM_FUNC(2) diff --git a/src/proto.hpp b/src/proto.hpp index 18e8567..0085f23 100644 --- a/src/proto.hpp +++ b/src/proto.hpp @@ -27,7 +27,7 @@ void mcl_fpDbl_sub ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const m #define MCL_FP_DEF_FUNC(n) \ MCL_FP_DEF_FUNC_SUB(n, L) \ - MCL_FP_DEF_FUNC_SUB(n, A) + MCL_FP_DEF_FUNC_SUB(n, Lbmi2) #define MCL_FP_DEF_FUNC_SPECIAL(suf) \ void mcl_fpDbl_mod_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); \ @@ -60,7 +60,7 @@ MCL_FP_DEF_FUNC(17) #endif MCL_FP_DEF_FUNC_SPECIAL(L) -MCL_FP_DEF_FUNC_SPECIAL(A) +MCL_FP_DEF_FUNC_SPECIAL(Lbmi2) } |