aot_llvm_extra.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include <llvm/ADT/SmallVector.h>
  6. #include <llvm/ADT/Twine.h>
  7. #include <llvm/ADT/Triple.h>
  8. #include <llvm/Analysis/TargetTransformInfo.h>
  9. #include <llvm/CodeGen/TargetPassConfig.h>
  10. #include <llvm/ExecutionEngine/ExecutionEngine.h>
  11. #include <llvm/MC/MCSubtargetInfo.h>
  12. #include <llvm/Support/TargetSelect.h>
  13. #include <llvm/Target/TargetMachine.h>
  14. #include <llvm-c/Core.h>
  15. #include <llvm-c/ExecutionEngine.h>
  16. #include <llvm-c/Initialization.h>
  17. #include <llvm/ExecutionEngine/GenericValue.h>
  18. #include <llvm/ExecutionEngine/JITEventListener.h>
  19. #include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
  20. #include <llvm/ExecutionEngine/Orc/LLJIT.h>
  21. #include <llvm/IR/DerivedTypes.h>
  22. #include <llvm/IR/Module.h>
  23. #include <llvm/IR/Instructions.h>
  24. #include <llvm/IR/IntrinsicInst.h>
  25. #include <llvm/IR/LegacyPassManager.h>
  26. #include <llvm/Support/CommandLine.h>
  27. #include <llvm/Support/ErrorHandling.h>
  28. #include <llvm/Target/CodeGenCWrappers.h>
  29. #include <llvm/Target/TargetMachine.h>
  30. #include <llvm/Target/TargetOptions.h>
  31. #include <llvm/Transforms/Utils/LowerMemIntrinsics.h>
  32. #include <llvm/Transforms/Vectorize/LoopVectorize.h>
  33. #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
  34. #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
  35. #include <llvm/Transforms/Scalar/LoopRotation.h>
  36. #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
  37. #include <llvm/Transforms/Scalar/LICM.h>
  38. #include <llvm/Transforms/Scalar/GVN.h>
  39. #include <llvm/Passes/PassBuilder.h>
  40. #include <llvm/Analysis/TargetLibraryInfo.h>
  41. #if LLVM_VERSION_MAJOR >= 12
  42. #include <llvm/Analysis/AliasAnalysis.h>
  43. #endif
  44. #include <cstring>
  45. #if WASM_ENABLE_LAZY_JIT != 0
  46. #include "../aot/aot_runtime.h"
  47. #endif
  48. #include "aot_llvm.h"
  49. using namespace llvm;
  50. using namespace llvm::orc;
  51. extern "C" {
  52. LLVMBool
  53. WAMRCreateMCJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
  54. LLVMModuleRef M,
  55. LLVMMCJITCompilerOptions *PassedOptions,
  56. size_t SizeOfPassedOptions, char **OutError);
  57. bool
  58. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str);
  59. void
  60. aot_add_expand_memory_op_pass(LLVMPassManagerRef pass);
  61. void
  62. aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass);
  63. void
  64. aot_func_disable_tce(LLVMValueRef func);
  65. void
  66. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx);
  67. }
  68. static TargetMachine *
  69. unwrap(LLVMTargetMachineRef P)
  70. {
  71. return reinterpret_cast<TargetMachine *>(P);
  72. }
  73. LLVMBool
  74. WAMRCreateMCJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
  75. LLVMModuleRef M,
  76. LLVMMCJITCompilerOptions *PassedOptions,
  77. size_t SizeOfPassedOptions, char **OutError)
  78. {
  79. LLVMMCJITCompilerOptions options;
  80. // If the user passed a larger sized options struct, then they were compiled
  81. // against a newer LLVM. Tell them that something is wrong.
  82. if (SizeOfPassedOptions > sizeof(options)) {
  83. *OutError = strdup("Refusing to use options struct that is larger than "
  84. "my own; assuming LLVM library mismatch.");
  85. return 1;
  86. }
  87. // Defend against the user having an old version of the API by ensuring that
  88. // any fields they didn't see are cleared. We must defend against fields
  89. // being set to the bitwise equivalent of zero, and assume that this means
  90. // "do the default" as if that option hadn't been available.
  91. LLVMInitializeMCJITCompilerOptions(&options, sizeof(options));
  92. memcpy(&options, PassedOptions, SizeOfPassedOptions);
  93. TargetOptions targetOptions;
  94. targetOptions.EnableFastISel = options.EnableFastISel;
  95. std::unique_ptr<Module> Mod(unwrap(M));
  96. if (Mod) {
  97. // Set function attribute "frame-pointer" based on
  98. // NoFramePointerElim.
  99. for (auto &F : *Mod) {
  100. auto Attrs = F.getAttributes();
  101. StringRef Value = options.NoFramePointerElim ? "all" : "none";
  102. #if LLVM_VERSION_MAJOR <= 13
  103. Attrs =
  104. Attrs.addAttribute(F.getContext(), AttributeList::FunctionIndex,
  105. "frame-pointer", Value);
  106. #else
  107. Attrs = Attrs.addAttributeAtIndex(F.getContext(),
  108. AttributeList::FunctionIndex,
  109. "frame-pointer", Value);
  110. #endif
  111. F.setAttributes(Attrs);
  112. }
  113. }
  114. std::string Error;
  115. bool JIT;
  116. char *host_cpu = LLVMGetHostCPUName();
  117. if (!host_cpu) {
  118. *OutError = NULL;
  119. return false;
  120. }
  121. std::string mcpu(host_cpu);
  122. LLVMDisposeMessage(host_cpu);
  123. EngineBuilder builder(std::move(Mod));
  124. builder.setEngineKind(EngineKind::JIT)
  125. .setErrorStr(&Error)
  126. .setMCPU(mcpu)
  127. .setOptLevel((CodeGenOpt::Level)options.OptLevel)
  128. .setTargetOptions(targetOptions);
  129. if (Optional<CodeModel::Model> CM = unwrap(options.CodeModel, JIT))
  130. builder.setCodeModel(*CM);
  131. if (options.MCJMM)
  132. builder.setMCJITMemoryManager(
  133. std::unique_ptr<RTDyldMemoryManager>(unwrap(options.MCJMM)));
  134. if (ExecutionEngine *JIT = builder.create()) {
  135. *OutJIT = wrap(JIT);
  136. return 0;
  137. }
  138. *OutError = strdup(Error.c_str());
  139. return 1;
  140. }
  141. class ExpandMemoryOpPass : public llvm::ModulePass
  142. {
  143. public:
  144. static char ID;
  145. ExpandMemoryOpPass()
  146. : ModulePass(ID)
  147. {}
  148. bool runOnModule(Module &M) override;
  149. bool expandMemIntrinsicUses(Function &F);
  150. StringRef getPassName() const override
  151. {
  152. return "Expand memory operation intrinsics";
  153. }
  154. void getAnalysisUsage(AnalysisUsage &AU) const override
  155. {
  156. AU.addRequired<TargetTransformInfoWrapperPass>();
  157. }
  158. };
  159. char ExpandMemoryOpPass::ID = 0;
  160. bool
  161. ExpandMemoryOpPass::expandMemIntrinsicUses(Function &F)
  162. {
  163. Intrinsic::ID ID = F.getIntrinsicID();
  164. bool Changed = false;
  165. for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
  166. Instruction *Inst = cast<Instruction>(*I);
  167. ++I;
  168. switch (ID) {
  169. case Intrinsic::memcpy:
  170. {
  171. auto *Memcpy = cast<MemCpyInst>(Inst);
  172. Function *ParentFunc = Memcpy->getParent()->getParent();
  173. const TargetTransformInfo &TTI =
  174. getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
  175. *ParentFunc);
  176. expandMemCpyAsLoop(Memcpy, TTI);
  177. Changed = true;
  178. Memcpy->eraseFromParent();
  179. break;
  180. }
  181. case Intrinsic::memmove:
  182. {
  183. auto *Memmove = cast<MemMoveInst>(Inst);
  184. expandMemMoveAsLoop(Memmove);
  185. Changed = true;
  186. Memmove->eraseFromParent();
  187. break;
  188. }
  189. case Intrinsic::memset:
  190. {
  191. auto *Memset = cast<MemSetInst>(Inst);
  192. expandMemSetAsLoop(Memset);
  193. Changed = true;
  194. Memset->eraseFromParent();
  195. break;
  196. }
  197. default:
  198. break;
  199. }
  200. }
  201. return Changed;
  202. }
  203. bool
  204. ExpandMemoryOpPass::runOnModule(Module &M)
  205. {
  206. bool Changed = false;
  207. for (Function &F : M) {
  208. if (!F.isDeclaration())
  209. continue;
  210. switch (F.getIntrinsicID()) {
  211. case Intrinsic::memcpy:
  212. case Intrinsic::memmove:
  213. case Intrinsic::memset:
  214. if (expandMemIntrinsicUses(F))
  215. Changed = true;
  216. break;
  217. default:
  218. break;
  219. }
  220. }
  221. return Changed;
  222. }
  223. void
  224. aot_add_expand_memory_op_pass(LLVMPassManagerRef pass)
  225. {
  226. unwrap(pass)->add(new ExpandMemoryOpPass());
  227. }
  228. void
  229. aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass)
  230. {
  231. unwrap(pass)->add(createSimpleLoopUnswitchLegacyPass());
  232. }
  233. bool
  234. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str)
  235. {
  236. #if WASM_ENABLE_SIMD != 0
  237. if (!arch_c_str || !cpu_c_str) {
  238. return false;
  239. }
  240. llvm::SmallVector<std::string, 1> targetAttributes;
  241. llvm::Triple targetTriple(arch_c_str, "", "");
  242. auto targetMachine =
  243. std::unique_ptr<llvm::TargetMachine>(llvm::EngineBuilder().selectTarget(
  244. targetTriple, "", std::string(cpu_c_str), targetAttributes));
  245. if (!targetMachine) {
  246. return false;
  247. }
  248. const llvm::Triple::ArchType targetArch =
  249. targetMachine->getTargetTriple().getArch();
  250. const llvm::MCSubtargetInfo *subTargetInfo =
  251. targetMachine->getMCSubtargetInfo();
  252. if (subTargetInfo == nullptr) {
  253. return false;
  254. }
  255. if (targetArch == llvm::Triple::x86_64) {
  256. return subTargetInfo->checkFeatures("+sse4.1");
  257. }
  258. else if (targetArch == llvm::Triple::aarch64) {
  259. return subTargetInfo->checkFeatures("+neon");
  260. }
  261. else {
  262. return false;
  263. }
  264. #else
  265. (void)arch_c_str;
  266. (void)cpu_c_str;
  267. return true;
  268. #endif /* WASM_ENABLE_SIMD */
  269. }
  270. #if WASM_ENABLE_LAZY_JIT != 0
  271. #if LLVM_VERSION_MAJOR < 12
  272. LLVMOrcJITTargetMachineBuilderRef
  273. LLVMOrcJITTargetMachineBuilderFromTargetMachine(LLVMTargetMachineRef TM);
  274. LLVMOrcJITTargetMachineBuilderRef
  275. LLVMOrcJITTargetMachineBuilderCreateFromTargetMachine(LLVMTargetMachineRef TM)
  276. {
  277. return LLVMOrcJITTargetMachineBuilderFromTargetMachine(TM);
  278. }
  279. #endif
  280. DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLJITBuilder, LLVMOrcLLJITBuilderRef)
  281. void
  282. LLVMOrcLLJITBuilderSetNumCompileThreads(LLVMOrcLLJITBuilderRef orcjit_builder,
  283. unsigned num_compile_threads)
  284. {
  285. unwrap(orcjit_builder)->setNumCompileThreads(num_compile_threads);
  286. }
  287. void *
  288. aot_lookup_orcjit_func(LLVMOrcLLJITRef orc_lazyjit, void *module_inst,
  289. uint32 func_idx)
  290. {
  291. char func_name[32], buf[128], *err_msg = NULL;
  292. LLVMErrorRef error;
  293. LLVMOrcJITTargetAddress func_addr = 0;
  294. AOTModuleInstance *aot_inst = (AOTModuleInstance *)module_inst;
  295. AOTModule *aot_module = (AOTModule *)aot_inst->aot_module.ptr;
  296. void **func_ptrs = (void **)aot_inst->func_ptrs.ptr;
  297. /**
  298. * No need to lock the func_ptr[func_idx] here as it is basic
  299. * data type, the load/store for it can be finished by one cpu
  300. * instruction, and there can be only one cpu instruction
  301. * loading/storing at the same time.
  302. */
  303. if (func_ptrs[func_idx])
  304. return func_ptrs[func_idx];
  305. snprintf(func_name, sizeof(func_name), "%s%d", AOT_FUNC_PREFIX,
  306. func_idx - aot_module->import_func_count);
  307. if ((error = LLVMOrcLLJITLookup(orc_lazyjit, &func_addr, func_name))) {
  308. err_msg = LLVMGetErrorMessage(error);
  309. snprintf(buf, sizeof(buf), "failed to lookup orcjit function: %s",
  310. err_msg);
  311. aot_set_exception(aot_inst, buf);
  312. LLVMDisposeErrorMessage(err_msg);
  313. return NULL;
  314. }
  315. func_ptrs[func_idx] = (void *)func_addr;
  316. return (void *)func_addr;
  317. }
  318. #endif /* end of WASM_ENABLE_LAZY_JIT != 0 */
  319. void
  320. aot_func_disable_tce(LLVMValueRef func)
  321. {
  322. Function *F = unwrap<Function>(func);
  323. auto Attrs = F->getAttributes();
  324. #if LLVM_VERSION_MAJOR <= 13
  325. Attrs = Attrs.addAttribute(F->getContext(), AttributeList::FunctionIndex,
  326. "disable-tail-calls", "true");
  327. #else
  328. Attrs =
  329. Attrs.addAttributeAtIndex(F->getContext(), AttributeList::FunctionIndex,
  330. "disable-tail-calls", "true");
  331. #endif
  332. F->setAttributes(Attrs);
  333. }
  334. #if LLVM_VERSION_MAJOR >= 12
  335. void
  336. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx)
  337. {
  338. Module *M;
  339. TargetMachine *TM = unwrap(comp_ctx->target_machine);
  340. bool disable_llvm_lto = false;
  341. LoopAnalysisManager LAM;
  342. FunctionAnalysisManager FAM;
  343. CGSCCAnalysisManager CGAM;
  344. ModuleAnalysisManager MAM;
  345. PipelineTuningOptions PTO;
  346. PTO.LoopVectorization = true;
  347. PTO.SLPVectorization = true;
  348. PTO.LoopUnrolling = true;
  349. #if LLVM_VERSION_MAJOR == 12
  350. PassBuilder PB(false, TM, PTO);
  351. #else
  352. PassBuilder PB(TM, PTO);
  353. #endif
  354. // Register the target library analysis directly and give it a
  355. // customized preset TLI.
  356. std::unique_ptr<TargetLibraryInfoImpl> TLII(
  357. new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
  358. FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
  359. // Register the AA manager first so that our version is the one used.
  360. AAManager AA = PB.buildDefaultAAPipeline();
  361. FAM.registerPass([&] { return std::move(AA); });
  362. // Register all the basic analyses with the managers.
  363. PB.registerModuleAnalyses(MAM);
  364. PB.registerCGSCCAnalyses(CGAM);
  365. PB.registerFunctionAnalyses(FAM);
  366. PB.registerLoopAnalyses(LAM);
  367. PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
  368. ModulePassManager MPM;
  369. #if LLVM_VERSION_MAJOR <= 13
  370. PassBuilder::OptimizationLevel OL;
  371. switch (comp_ctx->opt_level) {
  372. case 0:
  373. OL = PassBuilder::OptimizationLevel::O0;
  374. break;
  375. case 1:
  376. OL = PassBuilder::OptimizationLevel::O1;
  377. break;
  378. case 2:
  379. OL = PassBuilder::OptimizationLevel::O2;
  380. break;
  381. case 3:
  382. default:
  383. OL = PassBuilder::OptimizationLevel::O3;
  384. break;
  385. }
  386. #else
  387. OptimizationLevel OL;
  388. switch (comp_ctx->opt_level) {
  389. case 0:
  390. OL = OptimizationLevel::O0;
  391. break;
  392. case 1:
  393. OL = OptimizationLevel::O1;
  394. break;
  395. case 2:
  396. OL = OptimizationLevel::O2;
  397. break;
  398. case 3:
  399. default:
  400. OL = OptimizationLevel::O3;
  401. break;
  402. }
  403. #endif /* end of LLVM_VERSION_MAJOR */
  404. if (comp_ctx->disable_llvm_lto) {
  405. disable_llvm_lto = true;
  406. }
  407. #if WASM_ENABLE_SPEC_TEST != 0
  408. disable_llvm_lto = true;
  409. #endif
  410. if (disable_llvm_lto) {
  411. uint32 i;
  412. for (i = 0; i < comp_ctx->func_ctx_count; i++) {
  413. aot_func_disable_tce(comp_ctx->func_ctxes[i]->func);
  414. }
  415. }
  416. if (comp_ctx->is_jit_mode) {
  417. /* Apply normal pipeline for JIT mode, without
  418. Vectorize related passes, without LTO */
  419. MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
  420. }
  421. else {
  422. FunctionPassManager FPM;
  423. /* Apply Vectorize related passes for AOT mode */
  424. FPM.addPass(LoopVectorizePass());
  425. FPM.addPass(SLPVectorizerPass());
  426. FPM.addPass(LoadStoreVectorizerPass());
  427. /*
  428. FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
  429. FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
  430. FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
  431. */
  432. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
  433. if (!disable_llvm_lto) {
  434. /* Apply LTO for AOT mode */
  435. #if LLVM_VERSION_MAJOR < 14
  436. MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
  437. #else
  438. MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
  439. #endif
  440. }
  441. else {
  442. MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
  443. }
  444. }
  445. #if WASM_ENABLE_LAZY_JIT == 0
  446. M = unwrap(comp_ctx->module);
  447. MPM.run(*M, MAM);
  448. #else
  449. uint32 i;
  450. for (i = 0; i < comp_ctx->func_ctx_count; i++) {
  451. M = unwrap(comp_ctx->modules[i]);
  452. MPM.run(*M, MAM);
  453. }
  454. #endif
  455. }
  456. #endif /* end of LLVM_VERSION_MAJOR >= 12 */