aot_llvm_extra.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include <llvm/Passes/StandardInstrumentations.h>
  6. #include <llvm/Support/Error.h>
  7. #if LLVM_VERSION_MAJOR < 17
  8. #include <llvm/ADT/None.h>
  9. #include <llvm/ADT/Optional.h>
  10. #include <llvm/ADT/Triple.h>
  11. #endif
  12. #include <llvm/ADT/SmallVector.h>
  13. #include <llvm/ADT/Twine.h>
  14. #include <llvm/Analysis/TargetTransformInfo.h>
  15. #include <llvm/CodeGen/TargetPassConfig.h>
  16. #include <llvm/ExecutionEngine/ExecutionEngine.h>
  17. #include <llvm/MC/MCSubtargetInfo.h>
  18. #include <llvm/Support/TargetSelect.h>
  19. #include <llvm/Target/TargetMachine.h>
  20. #include <llvm-c/Core.h>
  21. #include <llvm-c/ExecutionEngine.h>
  22. #if LLVM_VERSION_MAJOR < 17
  23. #include <llvm-c/Initialization.h>
  24. #endif
  25. #include <llvm/ExecutionEngine/GenericValue.h>
  26. #include <llvm/ExecutionEngine/JITEventListener.h>
  27. #include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
  28. #include <llvm/ExecutionEngine/Orc/LLJIT.h>
  29. #include <llvm/IR/DerivedTypes.h>
  30. #include <llvm/IR/Module.h>
  31. #include <llvm/IR/Instructions.h>
  32. #include <llvm/IR/IntrinsicInst.h>
  33. #include <llvm/IR/PassManager.h>
  34. #include <llvm/Support/CommandLine.h>
  35. #include <llvm/Support/ErrorHandling.h>
  36. #if LLVM_VERSION_MAJOR >= 17
  37. #include <llvm/Support/PGOOptions.h>
  38. #include <llvm/Support/VirtualFileSystem.h>
  39. #endif
  40. #include <llvm/Target/CodeGenCWrappers.h>
  41. #include <llvm/Target/TargetMachine.h>
  42. #include <llvm/Target/TargetOptions.h>
  43. #include <llvm/Transforms/Utils/LowerMemIntrinsics.h>
  44. #include <llvm/Transforms/Vectorize/LoopVectorize.h>
  45. #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
  46. #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
  47. #include <llvm/Transforms/Scalar/LoopRotation.h>
  48. #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
  49. #include <llvm/Transforms/Scalar/LICM.h>
  50. #include <llvm/Transforms/Scalar/GVN.h>
  51. #include <llvm/Passes/PassBuilder.h>
  52. #include <llvm/Analysis/TargetLibraryInfo.h>
  53. #if LLVM_VERSION_MAJOR >= 12
  54. #include <llvm/Analysis/AliasAnalysis.h>
  55. #endif
  56. #include <llvm/ProfileData/InstrProf.h>
  57. #include <cstring>
  58. #include "../aot/aot_runtime.h"
  59. #include "aot_llvm.h"
  60. using namespace llvm;
  61. using namespace llvm::orc;
  62. #if LLVM_VERSION_MAJOR >= 17
  63. namespace llvm {
  64. template<typename T>
  65. using Optional = std::optional<T>;
  66. }
  67. #endif
  68. LLVM_C_EXTERN_C_BEGIN
  69. bool
  70. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str);
  71. void
  72. aot_add_expand_memory_op_pass(LLVMPassManagerRef pass);
  73. void
  74. aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass);
  75. void
  76. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module);
  77. LLVM_C_EXTERN_C_END
  78. ExitOnError ExitOnErr;
  79. class ExpandMemoryOpPass : public PassInfoMixin<ExpandMemoryOpPass>
  80. {
  81. public:
  82. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
  83. };
  84. PreservedAnalyses
  85. ExpandMemoryOpPass::run(Function &F, FunctionAnalysisManager &AM)
  86. {
  87. SmallVector<MemIntrinsic *, 16> MemCalls;
  88. /* Iterate over all instructions in the function, looking for memcpy,
  89. * memmove, and memset. When we find one, expand it into a loop. */
  90. for (auto &BB : F) {
  91. for (auto &Inst : BB) {
  92. if (auto *Memcpy = dyn_cast_or_null<MemCpyInst>(&Inst)) {
  93. MemCalls.push_back(Memcpy);
  94. }
  95. else if (auto *Memmove = dyn_cast_or_null<MemMoveInst>(&Inst)) {
  96. MemCalls.push_back(Memmove);
  97. }
  98. else if (auto *Memset = dyn_cast_or_null<MemSetInst>(&Inst)) {
  99. MemCalls.push_back(Memset);
  100. }
  101. }
  102. }
  103. for (MemIntrinsic *MemCall : MemCalls) {
  104. if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
  105. Function *ParentFunc = Memcpy->getParent()->getParent();
  106. const TargetTransformInfo &TTI =
  107. AM.getResult<TargetIRAnalysis>(*ParentFunc);
  108. expandMemCpyAsLoop(Memcpy, TTI);
  109. Memcpy->eraseFromParent();
  110. }
  111. else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
  112. #if LLVM_VERSION_MAJOR >= 17
  113. Function *ParentFunc = Memmove->getParent()->getParent();
  114. const TargetTransformInfo &TTI =
  115. AM.getResult<TargetIRAnalysis>(*ParentFunc);
  116. expandMemMoveAsLoop(Memmove, TTI);
  117. #else
  118. expandMemMoveAsLoop(Memmove);
  119. #endif
  120. Memmove->eraseFromParent();
  121. }
  122. else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
  123. expandMemSetAsLoop(Memset);
  124. Memset->eraseFromParent();
  125. }
  126. }
  127. PreservedAnalyses PA;
  128. PA.preserveSet<CFGAnalyses>();
  129. return PA;
  130. }
  131. bool
  132. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str)
  133. {
  134. #if WASM_ENABLE_SIMD != 0
  135. if (!arch_c_str || !cpu_c_str) {
  136. return false;
  137. }
  138. llvm::SmallVector<std::string, 1> targetAttributes;
  139. llvm::Triple targetTriple(arch_c_str, "", "");
  140. auto targetMachine =
  141. std::unique_ptr<llvm::TargetMachine>(llvm::EngineBuilder().selectTarget(
  142. targetTriple, "", std::string(cpu_c_str), targetAttributes));
  143. if (!targetMachine) {
  144. return false;
  145. }
  146. const llvm::Triple::ArchType targetArch =
  147. targetMachine->getTargetTriple().getArch();
  148. const llvm::MCSubtargetInfo *subTargetInfo =
  149. targetMachine->getMCSubtargetInfo();
  150. if (subTargetInfo == nullptr) {
  151. return false;
  152. }
  153. if (targetArch == llvm::Triple::x86_64) {
  154. return subTargetInfo->checkFeatures("+sse4.1");
  155. }
  156. else if (targetArch == llvm::Triple::aarch64) {
  157. return subTargetInfo->checkFeatures("+neon");
  158. }
  159. else {
  160. return false;
  161. }
  162. #else
  163. (void)arch_c_str;
  164. (void)cpu_c_str;
  165. return true;
  166. #endif /* WASM_ENABLE_SIMD */
  167. }
  168. void
  169. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
  170. {
  171. TargetMachine *TM =
  172. reinterpret_cast<TargetMachine *>(comp_ctx->target_machine);
  173. PipelineTuningOptions PTO;
  174. PTO.LoopVectorization = true;
  175. PTO.SLPVectorization = true;
  176. PTO.LoopUnrolling = true;
  177. #if LLVM_VERSION_MAJOR >= 16
  178. Optional<PGOOptions> PGO = std::nullopt;
  179. #else
  180. Optional<PGOOptions> PGO = llvm::None;
  181. #endif
  182. if (comp_ctx->enable_llvm_pgo) {
  183. /* Disable static counter allocation for value profiler,
  184. it will be allocated by runtime */
  185. const char *argv[] = { "", "-vp-static-alloc=false" };
  186. cl::ParseCommandLineOptions(2, argv);
  187. #if LLVM_VERSION_MAJOR < 17
  188. PGO = PGOOptions("", "", "", PGOOptions::IRInstr);
  189. #else
  190. auto FS = vfs::getRealFileSystem();
  191. PGO = PGOOptions("", "", "", "", FS, PGOOptions::IRInstr);
  192. #endif
  193. }
  194. else if (comp_ctx->use_prof_file) {
  195. #if LLVM_VERSION_MAJOR < 17
  196. PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse);
  197. #else
  198. auto FS = vfs::getRealFileSystem();
  199. PGO = PGOOptions(comp_ctx->use_prof_file, "", "", "", FS,
  200. PGOOptions::IRUse);
  201. #endif
  202. }
  203. #ifdef DEBUG_PASS
  204. PassInstrumentationCallbacks PIC;
  205. PassBuilder PB(TM, PTO, PGO, &PIC);
  206. #else
  207. #if LLVM_VERSION_MAJOR == 12
  208. PassBuilder PB(false, TM, PTO, PGO);
  209. #else
  210. PassBuilder PB(TM, PTO, PGO);
  211. #endif
  212. #endif
  213. /* Register all the basic analyses with the managers */
  214. LoopAnalysisManager LAM;
  215. FunctionAnalysisManager FAM;
  216. CGSCCAnalysisManager CGAM;
  217. ModuleAnalysisManager MAM;
  218. /* Register the target library analysis directly and give it a
  219. customized preset TLI */
  220. std::unique_ptr<TargetLibraryInfoImpl> TLII(
  221. new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
  222. FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
  223. /* Register the AA manager first so that our version is the one used */
  224. AAManager AA = PB.buildDefaultAAPipeline();
  225. FAM.registerPass([&] { return std::move(AA); });
  226. #ifdef DEBUG_PASS
  227. StandardInstrumentations SI(true, false);
  228. SI.registerCallbacks(PIC, &FAM);
  229. #endif
  230. PB.registerFunctionAnalyses(FAM);
  231. PB.registerLoopAnalyses(LAM);
  232. PB.registerModuleAnalyses(MAM);
  233. PB.registerCGSCCAnalyses(CGAM);
  234. PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
  235. #if LLVM_VERSION_MAJOR <= 13
  236. PassBuilder::OptimizationLevel OL;
  237. switch (comp_ctx->opt_level) {
  238. case 0:
  239. OL = PassBuilder::OptimizationLevel::O0;
  240. break;
  241. case 1:
  242. OL = PassBuilder::OptimizationLevel::O1;
  243. break;
  244. case 2:
  245. OL = PassBuilder::OptimizationLevel::O2;
  246. break;
  247. case 3:
  248. default:
  249. OL = PassBuilder::OptimizationLevel::O3;
  250. break;
  251. }
  252. #else
  253. OptimizationLevel OL;
  254. switch (comp_ctx->opt_level) {
  255. case 0:
  256. OL = OptimizationLevel::O0;
  257. break;
  258. case 1:
  259. OL = OptimizationLevel::O1;
  260. break;
  261. case 2:
  262. OL = OptimizationLevel::O2;
  263. break;
  264. case 3:
  265. default:
  266. OL = OptimizationLevel::O3;
  267. break;
  268. }
  269. #endif /* end of LLVM_VERSION_MAJOR */
  270. bool disable_llvm_lto = comp_ctx->disable_llvm_lto;
  271. #if WASM_ENABLE_SPEC_TEST != 0
  272. disable_llvm_lto = true;
  273. #endif
  274. Module *M = reinterpret_cast<Module *>(module);
  275. if (disable_llvm_lto) {
  276. for (Function &F : *M) {
  277. F.addFnAttr("disable-tail-calls", "true");
  278. }
  279. }
  280. ModulePassManager MPM;
  281. if (comp_ctx->is_jit_mode) {
  282. const char *Passes =
  283. "mem2reg,instcombine,simplifycfg,jump-threading,indvars";
  284. ExitOnErr(PB.parsePassPipeline(MPM, Passes));
  285. }
  286. else {
  287. FunctionPassManager FPM;
  288. /* Apply Vectorize related passes for AOT mode */
  289. FPM.addPass(LoopVectorizePass());
  290. FPM.addPass(SLPVectorizerPass());
  291. FPM.addPass(LoadStoreVectorizerPass());
  292. if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
  293. /* LICM pass: loop invariant code motion, attempting to remove
  294. as much code from the body of a loop as possible. Experiments
  295. show it is good to enable it when pgo is enabled. */
  296. #if LLVM_VERSION_MAJOR >= 15
  297. LICMOptions licm_opt;
  298. FPM.addPass(
  299. createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true));
  300. #else
  301. FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), true));
  302. #endif
  303. }
  304. /*
  305. FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
  306. FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
  307. */
  308. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
  309. if (comp_ctx->llvm_passes) {
  310. ExitOnErr(PB.parsePassPipeline(MPM, comp_ctx->llvm_passes));
  311. }
  312. if (
  313. #if LLVM_VERSION_MAJOR <= 13
  314. PassBuilder::OptimizationLevel::O0 == OL
  315. #else
  316. OptimizationLevel::O0 == OL
  317. #endif
  318. ) {
  319. MPM.addPass(PB.buildO0DefaultPipeline(OL));
  320. }
  321. else {
  322. if (!disable_llvm_lto) {
  323. /* Apply LTO for AOT mode */
  324. if (comp_ctx->comp_data->func_count >= 10
  325. || comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file)
  326. /* Add the pre-link optimizations if the func count
  327. is large enough or PGO is enabled */
  328. MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
  329. else
  330. MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
  331. }
  332. else {
  333. MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
  334. }
  335. }
  336. /* Run specific passes for AOT indirect mode in last since general
  337. optimization may create some intrinsic function calls like
  338. llvm.memset, so let's remove these function calls here. */
  339. if (comp_ctx->is_indirect_mode) {
  340. FunctionPassManager FPM1;
  341. FPM1.addPass(ExpandMemoryOpPass());
  342. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM1)));
  343. }
  344. }
  345. MPM.run(*M, MAM);
  346. }
  347. char *
  348. aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
  349. {
  350. std::vector<std::string> NameStrs;
  351. std::string Result;
  352. char buf[32], *compressed_str;
  353. uint32 compressed_str_len, i;
  354. for (i = 0; i < comp_ctx->func_ctx_count; i++) {
  355. snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i);
  356. std::string str(buf);
  357. NameStrs.push_back(str);
  358. }
  359. if (collectPGOFuncNameStrings(NameStrs, true, Result)) {
  360. aot_set_last_error("collect pgo func name strings failed");
  361. return NULL;
  362. }
  363. compressed_str_len = Result.size();
  364. if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) {
  365. aot_set_last_error("allocate memory failed");
  366. return NULL;
  367. }
  368. bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(),
  369. compressed_str_len);
  370. *p_size = compressed_str_len;
  371. return compressed_str;
  372. }