aot_llvm_extra.cpp 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include <llvm/Passes/StandardInstrumentations.h>
  6. #include <llvm/Support/Error.h>
  7. #include <llvm/ADT/SmallVector.h>
  8. #include <llvm/ADT/Twine.h>
  9. #include <llvm/ADT/Triple.h>
  10. #include <llvm/Analysis/TargetTransformInfo.h>
  11. #include <llvm/CodeGen/TargetPassConfig.h>
  12. #include <llvm/ExecutionEngine/ExecutionEngine.h>
  13. #include <llvm/MC/MCSubtargetInfo.h>
  14. #include <llvm/Support/TargetSelect.h>
  15. #include <llvm/Target/TargetMachine.h>
  16. #include <llvm-c/Core.h>
  17. #include <llvm-c/ExecutionEngine.h>
  18. #include <llvm-c/Initialization.h>
  19. #include <llvm/ExecutionEngine/GenericValue.h>
  20. #include <llvm/ExecutionEngine/JITEventListener.h>
  21. #include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
  22. #include <llvm/ExecutionEngine/Orc/LLJIT.h>
  23. #include <llvm/IR/DerivedTypes.h>
  24. #include <llvm/IR/Module.h>
  25. #include <llvm/IR/Instructions.h>
  26. #include <llvm/IR/IntrinsicInst.h>
  27. #include <llvm/IR/LegacyPassManager.h>
  28. #include <llvm/Support/CommandLine.h>
  29. #include <llvm/Support/ErrorHandling.h>
  30. #include <llvm/Target/CodeGenCWrappers.h>
  31. #include <llvm/Target/TargetMachine.h>
  32. #include <llvm/Target/TargetOptions.h>
  33. #include <llvm/Transforms/Utils/LowerMemIntrinsics.h>
  34. #include <llvm/Transforms/Vectorize/LoopVectorize.h>
  35. #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
  36. #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
  37. #include <llvm/Transforms/Scalar/LoopRotation.h>
  38. #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
  39. #include <llvm/Transforms/Scalar/LICM.h>
  40. #include <llvm/Transforms/Scalar/GVN.h>
  41. #include <llvm/Passes/PassBuilder.h>
  42. #include <llvm/Analysis/TargetLibraryInfo.h>
  43. #if LLVM_VERSION_MAJOR >= 12
  44. #include <llvm/Analysis/AliasAnalysis.h>
  45. #endif
  46. #include <cstring>
  47. #include "../aot/aot_runtime.h"
  48. #include "aot_llvm.h"
  49. using namespace llvm;
  50. using namespace llvm::orc;
  51. LLVM_C_EXTERN_C_BEGIN
  52. bool
  53. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str);
  54. void
  55. aot_add_expand_memory_op_pass(LLVMPassManagerRef pass);
  56. void
  57. aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass);
  58. void
  59. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module);
  60. LLVM_C_EXTERN_C_END
  61. ExitOnError ExitOnErr;
  62. class ExpandMemoryOpPass : public llvm::ModulePass
  63. {
  64. public:
  65. static char ID;
  66. ExpandMemoryOpPass()
  67. : ModulePass(ID)
  68. {}
  69. bool runOnModule(Module &M) override;
  70. bool expandMemIntrinsicUses(Function &F);
  71. StringRef getPassName() const override
  72. {
  73. return "Expand memory operation intrinsics";
  74. }
  75. void getAnalysisUsage(AnalysisUsage &AU) const override
  76. {
  77. AU.addRequired<TargetTransformInfoWrapperPass>();
  78. }
  79. };
  80. char ExpandMemoryOpPass::ID = 0;
  81. bool
  82. ExpandMemoryOpPass::expandMemIntrinsicUses(Function &F)
  83. {
  84. Intrinsic::ID ID = F.getIntrinsicID();
  85. bool Changed = false;
  86. for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
  87. Instruction *Inst = cast<Instruction>(*I);
  88. ++I;
  89. switch (ID) {
  90. case Intrinsic::memcpy:
  91. {
  92. auto *Memcpy = cast<MemCpyInst>(Inst);
  93. Function *ParentFunc = Memcpy->getParent()->getParent();
  94. const TargetTransformInfo &TTI =
  95. getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
  96. *ParentFunc);
  97. expandMemCpyAsLoop(Memcpy, TTI);
  98. Changed = true;
  99. Memcpy->eraseFromParent();
  100. break;
  101. }
  102. case Intrinsic::memmove:
  103. {
  104. auto *Memmove = cast<MemMoveInst>(Inst);
  105. expandMemMoveAsLoop(Memmove);
  106. Changed = true;
  107. Memmove->eraseFromParent();
  108. break;
  109. }
  110. case Intrinsic::memset:
  111. {
  112. auto *Memset = cast<MemSetInst>(Inst);
  113. expandMemSetAsLoop(Memset);
  114. Changed = true;
  115. Memset->eraseFromParent();
  116. break;
  117. }
  118. default:
  119. break;
  120. }
  121. }
  122. return Changed;
  123. }
  124. bool
  125. ExpandMemoryOpPass::runOnModule(Module &M)
  126. {
  127. bool Changed = false;
  128. for (Function &F : M) {
  129. if (!F.isDeclaration())
  130. continue;
  131. switch (F.getIntrinsicID()) {
  132. case Intrinsic::memcpy:
  133. case Intrinsic::memmove:
  134. case Intrinsic::memset:
  135. if (expandMemIntrinsicUses(F))
  136. Changed = true;
  137. break;
  138. default:
  139. break;
  140. }
  141. }
  142. return Changed;
  143. }
  144. void
  145. aot_add_expand_memory_op_pass(LLVMPassManagerRef pass)
  146. {
  147. reinterpret_cast<legacy::PassManager *>(pass)->add(
  148. new ExpandMemoryOpPass());
  149. }
  150. void
  151. aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass)
  152. {
  153. reinterpret_cast<legacy::PassManager *>(pass)->add(
  154. createSimpleLoopUnswitchLegacyPass());
  155. }
  156. bool
  157. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str)
  158. {
  159. #if WASM_ENABLE_SIMD != 0
  160. if (!arch_c_str || !cpu_c_str) {
  161. return false;
  162. }
  163. llvm::SmallVector<std::string, 1> targetAttributes;
  164. llvm::Triple targetTriple(arch_c_str, "", "");
  165. auto targetMachine =
  166. std::unique_ptr<llvm::TargetMachine>(llvm::EngineBuilder().selectTarget(
  167. targetTriple, "", std::string(cpu_c_str), targetAttributes));
  168. if (!targetMachine) {
  169. return false;
  170. }
  171. const llvm::Triple::ArchType targetArch =
  172. targetMachine->getTargetTriple().getArch();
  173. const llvm::MCSubtargetInfo *subTargetInfo =
  174. targetMachine->getMCSubtargetInfo();
  175. if (subTargetInfo == nullptr) {
  176. return false;
  177. }
  178. if (targetArch == llvm::Triple::x86_64) {
  179. return subTargetInfo->checkFeatures("+sse4.1");
  180. }
  181. else if (targetArch == llvm::Triple::aarch64) {
  182. return subTargetInfo->checkFeatures("+neon");
  183. }
  184. else {
  185. return false;
  186. }
  187. #else
  188. (void)arch_c_str;
  189. (void)cpu_c_str;
  190. return true;
  191. #endif /* WASM_ENABLE_SIMD */
  192. }
  193. void
  194. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
  195. {
  196. TargetMachine *TM =
  197. reinterpret_cast<TargetMachine *>(comp_ctx->target_machine);
  198. PipelineTuningOptions PTO;
  199. PTO.LoopVectorization = true;
  200. PTO.SLPVectorization = true;
  201. PTO.LoopUnrolling = true;
  202. #ifdef DEBUG_PASS
  203. PassInstrumentationCallbacks PIC;
  204. PassBuilder PB(TM, PTO, None, &PIC);
  205. #else
  206. #if LLVM_VERSION_MAJOR == 12
  207. PassBuilder PB(false, TM, PTO);
  208. #else
  209. PassBuilder PB(TM, PTO);
  210. #endif
  211. #endif
  212. /* Register all the basic analyses with the managers */
  213. LoopAnalysisManager LAM;
  214. FunctionAnalysisManager FAM;
  215. CGSCCAnalysisManager CGAM;
  216. ModuleAnalysisManager MAM;
  217. /* Register the target library analysis directly and give it a
  218. customized preset TLI */
  219. std::unique_ptr<TargetLibraryInfoImpl> TLII(
  220. new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
  221. FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
  222. /* Register the AA manager first so that our version is the one used */
  223. AAManager AA = PB.buildDefaultAAPipeline();
  224. FAM.registerPass([&] { return std::move(AA); });
  225. #ifdef DEBUG_PASS
  226. StandardInstrumentations SI(true, false);
  227. SI.registerCallbacks(PIC, &FAM);
  228. #endif
  229. PB.registerFunctionAnalyses(FAM);
  230. PB.registerLoopAnalyses(LAM);
  231. PB.registerModuleAnalyses(MAM);
  232. PB.registerCGSCCAnalyses(CGAM);
  233. PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
  234. #if LLVM_VERSION_MAJOR <= 13
  235. PassBuilder::OptimizationLevel OL;
  236. switch (comp_ctx->opt_level) {
  237. case 0:
  238. OL = PassBuilder::OptimizationLevel::O0;
  239. break;
  240. case 1:
  241. OL = PassBuilder::OptimizationLevel::O1;
  242. break;
  243. case 2:
  244. OL = PassBuilder::OptimizationLevel::O2;
  245. break;
  246. case 3:
  247. default:
  248. OL = PassBuilder::OptimizationLevel::O3;
  249. break;
  250. }
  251. #else
  252. OptimizationLevel OL;
  253. switch (comp_ctx->opt_level) {
  254. case 0:
  255. OL = OptimizationLevel::O0;
  256. break;
  257. case 1:
  258. OL = OptimizationLevel::O1;
  259. break;
  260. case 2:
  261. OL = OptimizationLevel::O2;
  262. break;
  263. case 3:
  264. default:
  265. OL = OptimizationLevel::O3;
  266. break;
  267. }
  268. #endif /* end of LLVM_VERSION_MAJOR */
  269. bool disable_llvm_lto = comp_ctx->disable_llvm_lto;
  270. #if WASM_ENABLE_SPEC_TEST != 0
  271. disable_llvm_lto = true;
  272. #endif
  273. Module *M = reinterpret_cast<Module *>(module);
  274. if (disable_llvm_lto) {
  275. for (Function &F : *M) {
  276. F.addFnAttr("disable-tail-calls", "true");
  277. }
  278. }
  279. ModulePassManager MPM;
  280. if (comp_ctx->is_jit_mode) {
  281. const char *Passes =
  282. "mem2reg,instcombine,simplifycfg,jump-threading,indvars";
  283. ExitOnErr(PB.parsePassPipeline(MPM, Passes));
  284. }
  285. else {
  286. FunctionPassManager FPM;
  287. /* Apply Vectorize related passes for AOT mode */
  288. FPM.addPass(LoopVectorizePass());
  289. FPM.addPass(SLPVectorizerPass());
  290. FPM.addPass(LoadStoreVectorizerPass());
  291. /*
  292. FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
  293. FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
  294. FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
  295. */
  296. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
  297. if (!disable_llvm_lto) {
  298. /* Apply LTO for AOT mode */
  299. if (comp_ctx->comp_data->func_count >= 10)
  300. /* Adds the pre-link optimizations if the func count
  301. is large enough */
  302. MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
  303. else
  304. MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
  305. }
  306. else {
  307. MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
  308. }
  309. }
  310. MPM.run(*M, MAM);
  311. }