aot_llvm_extra.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include <llvm/Passes/StandardInstrumentations.h>
  6. #include <llvm/Support/Error.h>
  7. #if LLVM_VERSION_MAJOR < 17
  8. #include <llvm/ADT/None.h>
  9. #include <llvm/ADT/Optional.h>
  10. #include <llvm/ADT/Triple.h>
  11. #endif
  12. #include <llvm/ADT/SmallVector.h>
  13. #include <llvm/ADT/Twine.h>
  14. #include <llvm/Analysis/TargetTransformInfo.h>
  15. #include <llvm/CodeGen/TargetPassConfig.h>
  16. #include <llvm/ExecutionEngine/ExecutionEngine.h>
  17. #include <llvm/MC/MCSubtargetInfo.h>
  18. #include <llvm/Support/TargetSelect.h>
  19. #include <llvm/Target/TargetMachine.h>
  20. #include <llvm-c/Core.h>
  21. #include <llvm-c/ExecutionEngine.h>
  22. #if LLVM_VERSION_MAJOR < 17
  23. #include <llvm-c/Initialization.h>
  24. #endif
  25. #include <llvm/ExecutionEngine/GenericValue.h>
  26. #include <llvm/ExecutionEngine/JITEventListener.h>
  27. #include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
  28. #include <llvm/ExecutionEngine/Orc/LLJIT.h>
  29. #include <llvm/IR/DerivedTypes.h>
  30. #include <llvm/IR/Module.h>
  31. #include <llvm/IR/Instructions.h>
  32. #include <llvm/IR/IntrinsicInst.h>
  33. #include <llvm/IR/PassManager.h>
  34. #include <llvm/Support/CommandLine.h>
  35. #include <llvm/Support/ErrorHandling.h>
  36. #if LLVM_VERSION_MAJOR >= 17
  37. #include <llvm/Support/PGOOptions.h>
  38. #endif
  39. #include <llvm/Target/CodeGenCWrappers.h>
  40. #include <llvm/Target/TargetMachine.h>
  41. #include <llvm/Target/TargetOptions.h>
  42. #include <llvm/Transforms/Utils/LowerMemIntrinsics.h>
  43. #include <llvm/Transforms/Vectorize/LoopVectorize.h>
  44. #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
  45. #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
  46. #include <llvm/Transforms/Scalar/LoopRotation.h>
  47. #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
  48. #include <llvm/Transforms/Scalar/LICM.h>
  49. #include <llvm/Transforms/Scalar/GVN.h>
  50. #include <llvm/Passes/PassBuilder.h>
  51. #include <llvm/Analysis/TargetLibraryInfo.h>
  52. #if LLVM_VERSION_MAJOR >= 12
  53. #include <llvm/Analysis/AliasAnalysis.h>
  54. #endif
  55. #include <llvm/ProfileData/InstrProf.h>
  56. #include <cstring>
  57. #include "../aot/aot_runtime.h"
  58. #include "aot_llvm.h"
  59. using namespace llvm;
  60. using namespace llvm::orc;
  61. #if LLVM_VERSION_MAJOR >= 17
  62. namespace llvm {
  63. template<typename T>
  64. using Optional = std::optional<T>;
  65. }
  66. #endif
  67. LLVM_C_EXTERN_C_BEGIN
  68. bool
  69. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str);
  70. void
  71. aot_add_expand_memory_op_pass(LLVMPassManagerRef pass);
  72. void
  73. aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass);
  74. void
  75. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module);
  76. LLVM_C_EXTERN_C_END
  77. ExitOnError ExitOnErr;
  78. class ExpandMemoryOpPass : public PassInfoMixin<ExpandMemoryOpPass>
  79. {
  80. public:
  81. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
  82. };
  83. PreservedAnalyses
  84. ExpandMemoryOpPass::run(Function &F, FunctionAnalysisManager &AM)
  85. {
  86. SmallVector<MemIntrinsic *, 16> MemCalls;
  87. /* Iterate over all instructions in the function, looking for memcpy,
  88. * memmove, and memset. When we find one, expand it into a loop. */
  89. for (auto &BB : F) {
  90. for (auto &Inst : BB) {
  91. if (auto *Memcpy = dyn_cast_or_null<MemCpyInst>(&Inst)) {
  92. MemCalls.push_back(Memcpy);
  93. }
  94. else if (auto *Memmove = dyn_cast_or_null<MemMoveInst>(&Inst)) {
  95. MemCalls.push_back(Memmove);
  96. }
  97. else if (auto *Memset = dyn_cast_or_null<MemSetInst>(&Inst)) {
  98. MemCalls.push_back(Memset);
  99. }
  100. }
  101. }
  102. for (MemIntrinsic *MemCall : MemCalls) {
  103. if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
  104. Function *ParentFunc = Memcpy->getParent()->getParent();
  105. const TargetTransformInfo &TTI =
  106. AM.getResult<TargetIRAnalysis>(*ParentFunc);
  107. expandMemCpyAsLoop(Memcpy, TTI);
  108. Memcpy->eraseFromParent();
  109. }
  110. else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
  111. #if LLVM_VERSION_MAJOR >= 17
  112. Function *ParentFunc = Memmove->getParent()->getParent();
  113. const TargetTransformInfo &TTI =
  114. AM.getResult<TargetIRAnalysis>(*ParentFunc);
  115. expandMemMoveAsLoop(Memmove, TTI);
  116. #else
  117. expandMemMoveAsLoop(Memmove);
  118. #endif
  119. Memmove->eraseFromParent();
  120. }
  121. else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
  122. expandMemSetAsLoop(Memset);
  123. Memset->eraseFromParent();
  124. }
  125. }
  126. PreservedAnalyses PA;
  127. PA.preserveSet<CFGAnalyses>();
  128. return PA;
  129. }
  130. bool
  131. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str)
  132. {
  133. #if WASM_ENABLE_SIMD != 0
  134. if (!arch_c_str || !cpu_c_str) {
  135. return false;
  136. }
  137. llvm::SmallVector<std::string, 1> targetAttributes;
  138. llvm::Triple targetTriple(arch_c_str, "", "");
  139. auto targetMachine =
  140. std::unique_ptr<llvm::TargetMachine>(llvm::EngineBuilder().selectTarget(
  141. targetTriple, "", std::string(cpu_c_str), targetAttributes));
  142. if (!targetMachine) {
  143. return false;
  144. }
  145. const llvm::Triple::ArchType targetArch =
  146. targetMachine->getTargetTriple().getArch();
  147. const llvm::MCSubtargetInfo *subTargetInfo =
  148. targetMachine->getMCSubtargetInfo();
  149. if (subTargetInfo == nullptr) {
  150. return false;
  151. }
  152. if (targetArch == llvm::Triple::x86_64) {
  153. return subTargetInfo->checkFeatures("+sse4.1");
  154. }
  155. else if (targetArch == llvm::Triple::aarch64) {
  156. return subTargetInfo->checkFeatures("+neon");
  157. }
  158. else {
  159. return false;
  160. }
  161. #else
  162. (void)arch_c_str;
  163. (void)cpu_c_str;
  164. return true;
  165. #endif /* WASM_ENABLE_SIMD */
  166. }
  167. void
  168. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
  169. {
  170. TargetMachine *TM =
  171. reinterpret_cast<TargetMachine *>(comp_ctx->target_machine);
  172. PipelineTuningOptions PTO;
  173. PTO.LoopVectorization = true;
  174. PTO.SLPVectorization = true;
  175. PTO.LoopUnrolling = true;
  176. #if LLVM_VERSION_MAJOR >= 16
  177. Optional<PGOOptions> PGO = std::nullopt;
  178. #else
  179. Optional<PGOOptions> PGO = llvm::None;
  180. #endif
  181. // TODO
  182. #if LLVM_VERSION_MAJOR < 17
  183. if (comp_ctx->enable_llvm_pgo) {
  184. /* Disable static counter allocation for value profiler,
  185. it will be allocated by runtime */
  186. const char *argv[] = { "", "-vp-static-alloc=false" };
  187. cl::ParseCommandLineOptions(2, argv);
  188. PGO = PGOOptions("", "", "", PGOOptions::IRInstr);
  189. }
  190. else if (comp_ctx->use_prof_file) {
  191. PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse);
  192. }
  193. #endif
  194. #ifdef DEBUG_PASS
  195. PassInstrumentationCallbacks PIC;
  196. PassBuilder PB(TM, PTO, PGO, &PIC);
  197. #else
  198. #if LLVM_VERSION_MAJOR == 12
  199. PassBuilder PB(false, TM, PTO, PGO);
  200. #else
  201. PassBuilder PB(TM, PTO, PGO);
  202. #endif
  203. #endif
  204. /* Register all the basic analyses with the managers */
  205. LoopAnalysisManager LAM;
  206. FunctionAnalysisManager FAM;
  207. CGSCCAnalysisManager CGAM;
  208. ModuleAnalysisManager MAM;
  209. /* Register the target library analysis directly and give it a
  210. customized preset TLI */
  211. std::unique_ptr<TargetLibraryInfoImpl> TLII(
  212. new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
  213. FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
  214. /* Register the AA manager first so that our version is the one used */
  215. AAManager AA = PB.buildDefaultAAPipeline();
  216. FAM.registerPass([&] { return std::move(AA); });
  217. #ifdef DEBUG_PASS
  218. StandardInstrumentations SI(true, false);
  219. SI.registerCallbacks(PIC, &FAM);
  220. #endif
  221. PB.registerFunctionAnalyses(FAM);
  222. PB.registerLoopAnalyses(LAM);
  223. PB.registerModuleAnalyses(MAM);
  224. PB.registerCGSCCAnalyses(CGAM);
  225. PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
  226. #if LLVM_VERSION_MAJOR <= 13
  227. PassBuilder::OptimizationLevel OL;
  228. switch (comp_ctx->opt_level) {
  229. case 0:
  230. OL = PassBuilder::OptimizationLevel::O0;
  231. break;
  232. case 1:
  233. OL = PassBuilder::OptimizationLevel::O1;
  234. break;
  235. case 2:
  236. OL = PassBuilder::OptimizationLevel::O2;
  237. break;
  238. case 3:
  239. default:
  240. OL = PassBuilder::OptimizationLevel::O3;
  241. break;
  242. }
  243. #else
  244. OptimizationLevel OL;
  245. switch (comp_ctx->opt_level) {
  246. case 0:
  247. OL = OptimizationLevel::O0;
  248. break;
  249. case 1:
  250. OL = OptimizationLevel::O1;
  251. break;
  252. case 2:
  253. OL = OptimizationLevel::O2;
  254. break;
  255. case 3:
  256. default:
  257. OL = OptimizationLevel::O3;
  258. break;
  259. }
  260. #endif /* end of LLVM_VERSION_MAJOR */
  261. bool disable_llvm_lto = comp_ctx->disable_llvm_lto;
  262. #if WASM_ENABLE_SPEC_TEST != 0
  263. disable_llvm_lto = true;
  264. #endif
  265. Module *M = reinterpret_cast<Module *>(module);
  266. if (disable_llvm_lto) {
  267. for (Function &F : *M) {
  268. F.addFnAttr("disable-tail-calls", "true");
  269. }
  270. }
  271. ModulePassManager MPM;
  272. if (comp_ctx->is_jit_mode) {
  273. const char *Passes =
  274. "mem2reg,instcombine,simplifycfg,jump-threading,indvars";
  275. ExitOnErr(PB.parsePassPipeline(MPM, Passes));
  276. }
  277. else {
  278. FunctionPassManager FPM;
  279. /* Apply Vectorize related passes for AOT mode */
  280. FPM.addPass(LoopVectorizePass());
  281. FPM.addPass(SLPVectorizerPass());
  282. FPM.addPass(LoadStoreVectorizerPass());
  283. if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
  284. /* LICM pass: loop invariant code motion, attempting to remove
  285. as much code from the body of a loop as possible. Experiments
  286. show it is good to enable it when pgo is enabled. */
  287. #if LLVM_VERSION_MAJOR >= 15
  288. LICMOptions licm_opt;
  289. FPM.addPass(
  290. createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true));
  291. #else
  292. FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), true));
  293. #endif
  294. }
  295. /*
  296. FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
  297. FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
  298. */
  299. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
  300. if (comp_ctx->llvm_passes) {
  301. ExitOnErr(PB.parsePassPipeline(MPM, comp_ctx->llvm_passes));
  302. }
  303. if (!disable_llvm_lto) {
  304. /* Apply LTO for AOT mode */
  305. if (comp_ctx->comp_data->func_count >= 10
  306. || comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file)
  307. /* Add the pre-link optimizations if the func count
  308. is large enough or PGO is enabled */
  309. MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
  310. else
  311. MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
  312. }
  313. else {
  314. MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
  315. }
  316. /* Run specific passes for AOT indirect mode in last since general
  317. optimization may create some intrinsic function calls like
  318. llvm.memset, so let's remove these function calls here. */
  319. if (comp_ctx->is_indirect_mode) {
  320. FunctionPassManager FPM1;
  321. FPM1.addPass(ExpandMemoryOpPass());
  322. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM1)));
  323. }
  324. }
  325. MPM.run(*M, MAM);
  326. }
  327. char *
  328. aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
  329. {
  330. std::vector<std::string> NameStrs;
  331. std::string Result;
  332. char buf[32], *compressed_str;
  333. uint32 compressed_str_len, i;
  334. for (i = 0; i < comp_ctx->func_ctx_count; i++) {
  335. snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i);
  336. std::string str(buf);
  337. NameStrs.push_back(str);
  338. }
  339. if (collectPGOFuncNameStrings(NameStrs, true, Result)) {
  340. aot_set_last_error("collect pgo func name strings failed");
  341. return NULL;
  342. }
  343. compressed_str_len = Result.size();
  344. if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) {
  345. aot_set_last_error("allocate memory failed");
  346. return NULL;
  347. }
  348. bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(),
  349. compressed_str_len);
  350. *p_size = compressed_str_len;
  351. return compressed_str;
  352. }