aot_llvm_extra.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include <llvm/Passes/StandardInstrumentations.h>
  6. #include <llvm/Support/Error.h>
  7. #include <llvm/ADT/SmallVector.h>
  8. #include <llvm/ADT/Twine.h>
  9. #include <llvm/ADT/Triple.h>
  10. #include <llvm/Analysis/TargetTransformInfo.h>
  11. #include <llvm/CodeGen/TargetPassConfig.h>
  12. #include <llvm/ExecutionEngine/ExecutionEngine.h>
  13. #include <llvm/MC/MCSubtargetInfo.h>
  14. #include <llvm/Support/TargetSelect.h>
  15. #include <llvm/Target/TargetMachine.h>
  16. #include <llvm-c/Core.h>
  17. #include <llvm-c/ExecutionEngine.h>
  18. #include <llvm-c/Initialization.h>
  19. #include <llvm/ExecutionEngine/GenericValue.h>
  20. #include <llvm/ExecutionEngine/JITEventListener.h>
  21. #include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
  22. #include <llvm/ExecutionEngine/Orc/LLJIT.h>
  23. #include <llvm/IR/DerivedTypes.h>
  24. #include <llvm/IR/Module.h>
  25. #include <llvm/IR/Instructions.h>
  26. #include <llvm/IR/IntrinsicInst.h>
  27. #include <llvm/IR/LegacyPassManager.h>
  28. #include <llvm/Support/CommandLine.h>
  29. #include <llvm/Support/ErrorHandling.h>
  30. #include <llvm/Target/CodeGenCWrappers.h>
  31. #include <llvm/Target/TargetMachine.h>
  32. #include <llvm/Target/TargetOptions.h>
  33. #include <llvm/Transforms/Utils/LowerMemIntrinsics.h>
  34. #include <llvm/Transforms/Vectorize/LoopVectorize.h>
  35. #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
  36. #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
  37. #include <llvm/Transforms/Scalar/LoopRotation.h>
  38. #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
  39. #include <llvm/Transforms/Scalar/LICM.h>
  40. #include <llvm/Transforms/Scalar/GVN.h>
  41. #include <llvm/Passes/PassBuilder.h>
  42. #include <llvm/Analysis/TargetLibraryInfo.h>
  43. #if LLVM_VERSION_MAJOR >= 12
  44. #include <llvm/Analysis/AliasAnalysis.h>
  45. #endif
  46. #include <llvm/ProfileData/InstrProf.h>
  47. #include <cstring>
  48. #include "../aot/aot_runtime.h"
  49. #include "aot_llvm.h"
  50. using namespace llvm;
  51. using namespace llvm::orc;
  52. LLVM_C_EXTERN_C_BEGIN
  53. bool
  54. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str);
  55. void
  56. aot_add_expand_memory_op_pass(LLVMPassManagerRef pass);
  57. void
  58. aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass);
  59. void
  60. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module);
  61. LLVM_C_EXTERN_C_END
  62. ExitOnError ExitOnErr;
  63. class ExpandMemoryOpPass : public llvm::ModulePass
  64. {
  65. public:
  66. static char ID;
  67. ExpandMemoryOpPass()
  68. : ModulePass(ID)
  69. {}
  70. bool runOnModule(Module &M) override;
  71. bool expandMemIntrinsicUses(Function &F);
  72. StringRef getPassName() const override
  73. {
  74. return "Expand memory operation intrinsics";
  75. }
  76. void getAnalysisUsage(AnalysisUsage &AU) const override
  77. {
  78. AU.addRequired<TargetTransformInfoWrapperPass>();
  79. }
  80. };
  81. char ExpandMemoryOpPass::ID = 0;
  82. bool
  83. ExpandMemoryOpPass::expandMemIntrinsicUses(Function &F)
  84. {
  85. Intrinsic::ID ID = F.getIntrinsicID();
  86. bool Changed = false;
  87. for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
  88. Instruction *Inst = cast<Instruction>(*I);
  89. ++I;
  90. switch (ID) {
  91. case Intrinsic::memcpy:
  92. {
  93. auto *Memcpy = cast<MemCpyInst>(Inst);
  94. Function *ParentFunc = Memcpy->getParent()->getParent();
  95. const TargetTransformInfo &TTI =
  96. getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
  97. *ParentFunc);
  98. expandMemCpyAsLoop(Memcpy, TTI);
  99. Changed = true;
  100. Memcpy->eraseFromParent();
  101. break;
  102. }
  103. case Intrinsic::memmove:
  104. {
  105. auto *Memmove = cast<MemMoveInst>(Inst);
  106. expandMemMoveAsLoop(Memmove);
  107. Changed = true;
  108. Memmove->eraseFromParent();
  109. break;
  110. }
  111. case Intrinsic::memset:
  112. {
  113. auto *Memset = cast<MemSetInst>(Inst);
  114. expandMemSetAsLoop(Memset);
  115. Changed = true;
  116. Memset->eraseFromParent();
  117. break;
  118. }
  119. default:
  120. break;
  121. }
  122. }
  123. return Changed;
  124. }
  125. bool
  126. ExpandMemoryOpPass::runOnModule(Module &M)
  127. {
  128. bool Changed = false;
  129. for (Function &F : M) {
  130. if (!F.isDeclaration())
  131. continue;
  132. switch (F.getIntrinsicID()) {
  133. case Intrinsic::memcpy:
  134. case Intrinsic::memmove:
  135. case Intrinsic::memset:
  136. if (expandMemIntrinsicUses(F))
  137. Changed = true;
  138. break;
  139. default:
  140. break;
  141. }
  142. }
  143. return Changed;
  144. }
  145. void
  146. aot_add_expand_memory_op_pass(LLVMPassManagerRef pass)
  147. {
  148. reinterpret_cast<legacy::PassManager *>(pass)->add(
  149. new ExpandMemoryOpPass());
  150. }
  151. void
  152. aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass)
  153. {
  154. reinterpret_cast<legacy::PassManager *>(pass)->add(
  155. createSimpleLoopUnswitchLegacyPass());
  156. }
  157. bool
  158. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str)
  159. {
  160. #if WASM_ENABLE_SIMD != 0
  161. if (!arch_c_str || !cpu_c_str) {
  162. return false;
  163. }
  164. llvm::SmallVector<std::string, 1> targetAttributes;
  165. llvm::Triple targetTriple(arch_c_str, "", "");
  166. auto targetMachine =
  167. std::unique_ptr<llvm::TargetMachine>(llvm::EngineBuilder().selectTarget(
  168. targetTriple, "", std::string(cpu_c_str), targetAttributes));
  169. if (!targetMachine) {
  170. return false;
  171. }
  172. const llvm::Triple::ArchType targetArch =
  173. targetMachine->getTargetTriple().getArch();
  174. const llvm::MCSubtargetInfo *subTargetInfo =
  175. targetMachine->getMCSubtargetInfo();
  176. if (subTargetInfo == nullptr) {
  177. return false;
  178. }
  179. if (targetArch == llvm::Triple::x86_64) {
  180. return subTargetInfo->checkFeatures("+sse4.1");
  181. }
  182. else if (targetArch == llvm::Triple::aarch64) {
  183. return subTargetInfo->checkFeatures("+neon");
  184. }
  185. else {
  186. return false;
  187. }
  188. #else
  189. (void)arch_c_str;
  190. (void)cpu_c_str;
  191. return true;
  192. #endif /* WASM_ENABLE_SIMD */
  193. }
  194. void
  195. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
  196. {
  197. TargetMachine *TM =
  198. reinterpret_cast<TargetMachine *>(comp_ctx->target_machine);
  199. PipelineTuningOptions PTO;
  200. PTO.LoopVectorization = true;
  201. PTO.SLPVectorization = true;
  202. PTO.LoopUnrolling = true;
  203. Optional<PGOOptions> PGO = None;
  204. if (comp_ctx->enable_llvm_pgo) {
  205. /* Disable static counter allocation for value profiler,
  206. it will be allocated by runtime */
  207. const char *argv[] = { "", "-vp-static-alloc=false" };
  208. cl::ParseCommandLineOptions(2, argv);
  209. PGO = PGOOptions("", "", "", PGOOptions::IRInstr);
  210. }
  211. else if (comp_ctx->use_prof_file) {
  212. PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse);
  213. }
  214. #ifdef DEBUG_PASS
  215. PassInstrumentationCallbacks PIC;
  216. PassBuilder PB(TM, PTO, PGO, &PIC);
  217. #else
  218. #if LLVM_VERSION_MAJOR == 12
  219. PassBuilder PB(false, TM, PTO, PGO);
  220. #else
  221. PassBuilder PB(TM, PTO, PGO);
  222. #endif
  223. #endif
  224. /* Register all the basic analyses with the managers */
  225. LoopAnalysisManager LAM;
  226. FunctionAnalysisManager FAM;
  227. CGSCCAnalysisManager CGAM;
  228. ModuleAnalysisManager MAM;
  229. /* Register the target library analysis directly and give it a
  230. customized preset TLI */
  231. std::unique_ptr<TargetLibraryInfoImpl> TLII(
  232. new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
  233. FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
  234. /* Register the AA manager first so that our version is the one used */
  235. AAManager AA = PB.buildDefaultAAPipeline();
  236. FAM.registerPass([&] { return std::move(AA); });
  237. #ifdef DEBUG_PASS
  238. StandardInstrumentations SI(true, false);
  239. SI.registerCallbacks(PIC, &FAM);
  240. #endif
  241. PB.registerFunctionAnalyses(FAM);
  242. PB.registerLoopAnalyses(LAM);
  243. PB.registerModuleAnalyses(MAM);
  244. PB.registerCGSCCAnalyses(CGAM);
  245. PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
  246. #if LLVM_VERSION_MAJOR <= 13
  247. PassBuilder::OptimizationLevel OL;
  248. switch (comp_ctx->opt_level) {
  249. case 0:
  250. OL = PassBuilder::OptimizationLevel::O0;
  251. break;
  252. case 1:
  253. OL = PassBuilder::OptimizationLevel::O1;
  254. break;
  255. case 2:
  256. OL = PassBuilder::OptimizationLevel::O2;
  257. break;
  258. case 3:
  259. default:
  260. OL = PassBuilder::OptimizationLevel::O3;
  261. break;
  262. }
  263. #else
  264. OptimizationLevel OL;
  265. switch (comp_ctx->opt_level) {
  266. case 0:
  267. OL = OptimizationLevel::O0;
  268. break;
  269. case 1:
  270. OL = OptimizationLevel::O1;
  271. break;
  272. case 2:
  273. OL = OptimizationLevel::O2;
  274. break;
  275. case 3:
  276. default:
  277. OL = OptimizationLevel::O3;
  278. break;
  279. }
  280. #endif /* end of LLVM_VERSION_MAJOR */
  281. bool disable_llvm_lto = comp_ctx->disable_llvm_lto;
  282. #if WASM_ENABLE_SPEC_TEST != 0
  283. disable_llvm_lto = true;
  284. #endif
  285. Module *M = reinterpret_cast<Module *>(module);
  286. if (disable_llvm_lto) {
  287. for (Function &F : *M) {
  288. F.addFnAttr("disable-tail-calls", "true");
  289. }
  290. }
  291. ModulePassManager MPM;
  292. if (comp_ctx->is_jit_mode) {
  293. const char *Passes =
  294. "mem2reg,instcombine,simplifycfg,jump-threading,indvars";
  295. ExitOnErr(PB.parsePassPipeline(MPM, Passes));
  296. }
  297. else {
  298. FunctionPassManager FPM;
  299. /* Apply Vectorize related passes for AOT mode */
  300. FPM.addPass(LoopVectorizePass());
  301. FPM.addPass(SLPVectorizerPass());
  302. FPM.addPass(LoadStoreVectorizerPass());
  303. if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
  304. LICMOptions licm_opt;
  305. /* LICM pass: loop invariant code motion, attempting to remove
  306. as much code from the body of a loop as possible. Experiments
  307. show it is good to enable it when pgo is enabled. */
  308. FPM.addPass(
  309. createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true));
  310. }
  311. /*
  312. FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
  313. FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
  314. */
  315. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
  316. if (!disable_llvm_lto) {
  317. /* Apply LTO for AOT mode */
  318. if (comp_ctx->comp_data->func_count >= 10
  319. || comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file)
  320. /* Add the pre-link optimizations if the func count
  321. is large enough or PGO is enabled */
  322. MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
  323. else
  324. MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
  325. }
  326. else {
  327. MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
  328. }
  329. }
  330. MPM.run(*M, MAM);
  331. }
  332. char *
  333. aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
  334. {
  335. std::vector<std::string> NameStrs;
  336. std::string Result;
  337. char buf[32], *compressed_str;
  338. uint32 compressed_str_len, i;
  339. for (i = 0; i < comp_ctx->func_ctx_count; i++) {
  340. snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i);
  341. std::string str(buf);
  342. NameStrs.push_back(str);
  343. }
  344. if (collectPGOFuncNameStrings(NameStrs, true, Result)) {
  345. aot_set_last_error("collect pgo func name strings failed");
  346. return NULL;
  347. }
  348. compressed_str_len = Result.size();
  349. if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) {
  350. aot_set_last_error("allocate memory failed");
  351. return NULL;
  352. }
  353. bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(),
  354. compressed_str_len);
  355. *p_size = compressed_str_len;
  356. return compressed_str;
  357. }