aot_llvm_extra.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include <llvm/Passes/StandardInstrumentations.h>
  6. #include <llvm/Support/Error.h>
  7. #if LLVM_VERSION_MAJOR < 17
  8. #include <llvm/ADT/None.h>
  9. #include <llvm/ADT/Optional.h>
  10. #include <llvm/ADT/Triple.h>
  11. #endif
  12. #include <llvm/ADT/SmallVector.h>
  13. #include <llvm/ADT/Twine.h>
  14. #include <llvm/Analysis/TargetTransformInfo.h>
  15. #include <llvm/CodeGen/TargetPassConfig.h>
  16. #include <llvm/ExecutionEngine/ExecutionEngine.h>
  17. #include <llvm/MC/MCSubtargetInfo.h>
  18. #include <llvm/Support/TargetSelect.h>
  19. #include <llvm/Target/TargetMachine.h>
  20. #include <llvm-c/Core.h>
  21. #include <llvm-c/ExecutionEngine.h>
  22. #if LLVM_VERSION_MAJOR < 17
  23. #include <llvm-c/Initialization.h>
  24. #endif
  25. #include <llvm/ExecutionEngine/GenericValue.h>
  26. #include <llvm/ExecutionEngine/JITEventListener.h>
  27. #include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
  28. #include <llvm/ExecutionEngine/Orc/LLJIT.h>
  29. #include <llvm/IR/DerivedTypes.h>
  30. #include <llvm/IR/Module.h>
  31. #include <llvm/IR/Instructions.h>
  32. #include <llvm/IR/IntrinsicInst.h>
  33. #include <llvm/IR/PassManager.h>
  34. #include <llvm/Support/CommandLine.h>
  35. #include <llvm/Support/ErrorHandling.h>
  36. #if LLVM_VERSION_MAJOR >= 17
  37. #include <llvm/Support/PGOOptions.h>
  38. #include <llvm/Support/VirtualFileSystem.h>
  39. #endif
  40. #include <llvm/Target/CodeGenCWrappers.h>
  41. #include <llvm/Target/TargetMachine.h>
  42. #include <llvm/Target/TargetOptions.h>
  43. #include <llvm/Transforms/Utils/LowerMemIntrinsics.h>
  44. #include <llvm/Transforms/Vectorize/LoopVectorize.h>
  45. #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
  46. #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
  47. #include <llvm/Transforms/Vectorize/VectorCombine.h>
  48. #include <llvm/Transforms/Scalar/LoopRotation.h>
  49. #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
  50. #include <llvm/Transforms/Scalar/LICM.h>
  51. #include <llvm/Transforms/Scalar/GVN.h>
  52. #include <llvm/Passes/PassBuilder.h>
  53. #include <llvm/Analysis/TargetLibraryInfo.h>
  54. #if LLVM_VERSION_MAJOR >= 12
  55. #include <llvm/Analysis/AliasAnalysis.h>
  56. #endif
  57. #include <llvm/ProfileData/InstrProf.h>
  58. #include <cstring>
  59. #include "../aot/aot_runtime.h"
  60. #include "aot_llvm.h"
  61. using namespace llvm;
  62. using namespace llvm::orc;
  63. #if LLVM_VERSION_MAJOR >= 17
  64. namespace llvm {
  65. template<typename T>
  66. using Optional = std::optional<T>;
  67. }
  68. #endif
  69. LLVM_C_EXTERN_C_BEGIN
  70. bool
  71. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str);
  72. void
  73. aot_add_expand_memory_op_pass(LLVMPassManagerRef pass);
  74. void
  75. aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass);
  76. void
  77. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module);
  78. LLVM_C_EXTERN_C_END
  79. ExitOnError ExitOnErr;
  80. class ExpandMemoryOpPass : public PassInfoMixin<ExpandMemoryOpPass>
  81. {
  82. public:
  83. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
  84. };
  85. PreservedAnalyses
  86. ExpandMemoryOpPass::run(Function &F, FunctionAnalysisManager &AM)
  87. {
  88. SmallVector<MemIntrinsic *, 16> MemCalls;
  89. /* Iterate over all instructions in the function, looking for memcpy,
  90. * memmove, and memset. When we find one, expand it into a loop. */
  91. for (auto &BB : F) {
  92. for (auto &Inst : BB) {
  93. if (auto *Memcpy = dyn_cast_or_null<MemCpyInst>(&Inst)) {
  94. MemCalls.push_back(Memcpy);
  95. }
  96. else if (auto *Memmove = dyn_cast_or_null<MemMoveInst>(&Inst)) {
  97. MemCalls.push_back(Memmove);
  98. }
  99. else if (auto *Memset = dyn_cast_or_null<MemSetInst>(&Inst)) {
  100. MemCalls.push_back(Memset);
  101. }
  102. }
  103. }
  104. for (MemIntrinsic *MemCall : MemCalls) {
  105. if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
  106. Function *ParentFunc = Memcpy->getParent()->getParent();
  107. const TargetTransformInfo &TTI =
  108. AM.getResult<TargetIRAnalysis>(*ParentFunc);
  109. expandMemCpyAsLoop(Memcpy, TTI);
  110. Memcpy->eraseFromParent();
  111. }
  112. else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
  113. #if LLVM_VERSION_MAJOR >= 17
  114. Function *ParentFunc = Memmove->getParent()->getParent();
  115. const TargetTransformInfo &TTI =
  116. AM.getResult<TargetIRAnalysis>(*ParentFunc);
  117. expandMemMoveAsLoop(Memmove, TTI);
  118. #else
  119. expandMemMoveAsLoop(Memmove);
  120. #endif
  121. Memmove->eraseFromParent();
  122. }
  123. else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
  124. expandMemSetAsLoop(Memset);
  125. Memset->eraseFromParent();
  126. }
  127. }
  128. PreservedAnalyses PA;
  129. PA.preserveSet<CFGAnalyses>();
  130. return PA;
  131. }
  132. bool
  133. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str)
  134. {
  135. #if WASM_ENABLE_SIMD != 0
  136. if (!arch_c_str || !cpu_c_str) {
  137. return false;
  138. }
  139. llvm::SmallVector<std::string, 1> targetAttributes;
  140. llvm::Triple targetTriple(arch_c_str, "", "");
  141. auto targetMachine =
  142. std::unique_ptr<llvm::TargetMachine>(llvm::EngineBuilder().selectTarget(
  143. targetTriple, "", std::string(cpu_c_str), targetAttributes));
  144. if (!targetMachine) {
  145. return false;
  146. }
  147. const llvm::Triple::ArchType targetArch =
  148. targetMachine->getTargetTriple().getArch();
  149. const llvm::MCSubtargetInfo *subTargetInfo =
  150. targetMachine->getMCSubtargetInfo();
  151. if (subTargetInfo == nullptr) {
  152. return false;
  153. }
  154. if (targetArch == llvm::Triple::x86_64) {
  155. return subTargetInfo->checkFeatures("+sse4.1");
  156. }
  157. else if (targetArch == llvm::Triple::aarch64) {
  158. return subTargetInfo->checkFeatures("+neon");
  159. }
  160. else {
  161. return false;
  162. }
  163. #else
  164. (void)arch_c_str;
  165. (void)cpu_c_str;
  166. return true;
  167. #endif /* WASM_ENABLE_SIMD */
  168. }
  169. void
  170. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
  171. {
  172. TargetMachine *TM =
  173. reinterpret_cast<TargetMachine *>(comp_ctx->target_machine);
  174. PipelineTuningOptions PTO;
  175. PTO.LoopVectorization = true;
  176. PTO.SLPVectorization = true;
  177. PTO.LoopUnrolling = true;
  178. #if LLVM_VERSION_MAJOR >= 16
  179. Optional<PGOOptions> PGO = std::nullopt;
  180. #else
  181. Optional<PGOOptions> PGO = llvm::None;
  182. #endif
  183. if (comp_ctx->enable_llvm_pgo) {
  184. /* Disable static counter allocation for value profiler,
  185. it will be allocated by runtime */
  186. const char *argv[] = { "", "-vp-static-alloc=false" };
  187. cl::ParseCommandLineOptions(2, argv);
  188. #if LLVM_VERSION_MAJOR < 17
  189. PGO = PGOOptions("", "", "", PGOOptions::IRInstr);
  190. #else
  191. auto FS = vfs::getRealFileSystem();
  192. PGO = PGOOptions("", "", "", "", FS, PGOOptions::IRInstr);
  193. #endif
  194. }
  195. else if (comp_ctx->use_prof_file) {
  196. #if LLVM_VERSION_MAJOR < 17
  197. PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse);
  198. #else
  199. auto FS = vfs::getRealFileSystem();
  200. PGO = PGOOptions(comp_ctx->use_prof_file, "", "", "", FS,
  201. PGOOptions::IRUse);
  202. #endif
  203. }
  204. #ifdef DEBUG_PASS
  205. PassInstrumentationCallbacks PIC;
  206. PassBuilder PB(TM, PTO, PGO, &PIC);
  207. #else
  208. #if LLVM_VERSION_MAJOR == 12
  209. PassBuilder PB(false, TM, PTO, PGO);
  210. #else
  211. PassBuilder PB(TM, PTO, PGO);
  212. #endif
  213. #endif
  214. /* Register all the basic analyses with the managers */
  215. LoopAnalysisManager LAM;
  216. FunctionAnalysisManager FAM;
  217. CGSCCAnalysisManager CGAM;
  218. ModuleAnalysisManager MAM;
  219. /* Register the target library analysis directly and give it a
  220. customized preset TLI */
  221. std::unique_ptr<TargetLibraryInfoImpl> TLII(
  222. new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
  223. FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
  224. /* Register the AA manager first so that our version is the one used */
  225. AAManager AA = PB.buildDefaultAAPipeline();
  226. FAM.registerPass([&] { return std::move(AA); });
  227. #ifdef DEBUG_PASS
  228. StandardInstrumentations SI(true, false);
  229. SI.registerCallbacks(PIC, &FAM);
  230. #endif
  231. PB.registerFunctionAnalyses(FAM);
  232. PB.registerLoopAnalyses(LAM);
  233. PB.registerModuleAnalyses(MAM);
  234. PB.registerCGSCCAnalyses(CGAM);
  235. PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
  236. #if LLVM_VERSION_MAJOR <= 13
  237. PassBuilder::OptimizationLevel OL;
  238. switch (comp_ctx->opt_level) {
  239. case 0:
  240. OL = PassBuilder::OptimizationLevel::O0;
  241. break;
  242. case 1:
  243. OL = PassBuilder::OptimizationLevel::O1;
  244. break;
  245. case 2:
  246. OL = PassBuilder::OptimizationLevel::O2;
  247. break;
  248. case 3:
  249. default:
  250. OL = PassBuilder::OptimizationLevel::O3;
  251. break;
  252. }
  253. #else
  254. OptimizationLevel OL;
  255. switch (comp_ctx->opt_level) {
  256. case 0:
  257. OL = OptimizationLevel::O0;
  258. break;
  259. case 1:
  260. OL = OptimizationLevel::O1;
  261. break;
  262. case 2:
  263. OL = OptimizationLevel::O2;
  264. break;
  265. case 3:
  266. default:
  267. OL = OptimizationLevel::O3;
  268. break;
  269. }
  270. #endif /* end of LLVM_VERSION_MAJOR */
  271. bool disable_llvm_lto = comp_ctx->disable_llvm_lto;
  272. #if WASM_ENABLE_SPEC_TEST != 0
  273. disable_llvm_lto = true;
  274. #endif
  275. Module *M = reinterpret_cast<Module *>(module);
  276. if (disable_llvm_lto) {
  277. for (Function &F : *M) {
  278. F.addFnAttr("disable-tail-calls", "true");
  279. }
  280. }
  281. ModulePassManager MPM;
  282. if (comp_ctx->is_jit_mode) {
  283. const char *Passes =
  284. "loop-vectorize,slp-vectorizer,"
  285. "load-store-vectorizer,vector-combine,"
  286. "mem2reg,instcombine,simplifycfg,jump-threading,indvars";
  287. ExitOnErr(PB.parsePassPipeline(MPM, Passes));
  288. }
  289. else {
  290. FunctionPassManager FPM;
  291. /* Apply Vectorize related passes for AOT mode */
  292. FPM.addPass(LoopVectorizePass());
  293. FPM.addPass(SLPVectorizerPass());
  294. FPM.addPass(LoadStoreVectorizerPass());
  295. FPM.addPass(VectorCombinePass());
  296. if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
  297. /* LICM pass: loop invariant code motion, attempting to remove
  298. as much code from the body of a loop as possible. Experiments
  299. show it is good to enable it when pgo is enabled. */
  300. #if LLVM_VERSION_MAJOR >= 15
  301. LICMOptions licm_opt;
  302. FPM.addPass(
  303. createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true));
  304. #else
  305. FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), true));
  306. #endif
  307. }
  308. /*
  309. FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
  310. FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
  311. */
  312. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
  313. if (comp_ctx->llvm_passes) {
  314. ExitOnErr(PB.parsePassPipeline(MPM, comp_ctx->llvm_passes));
  315. }
  316. if (
  317. #if LLVM_VERSION_MAJOR <= 13
  318. PassBuilder::OptimizationLevel::O0 == OL
  319. #else
  320. OptimizationLevel::O0 == OL
  321. #endif
  322. ) {
  323. MPM.addPass(PB.buildO0DefaultPipeline(OL));
  324. }
  325. else {
  326. if (!disable_llvm_lto) {
  327. /* Apply LTO for AOT mode */
  328. if (comp_ctx->comp_data->func_count >= 10
  329. || comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file)
  330. /* Add the pre-link optimizations if the func count
  331. is large enough or PGO is enabled */
  332. MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
  333. else
  334. MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
  335. }
  336. else {
  337. MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
  338. }
  339. }
  340. /* Run specific passes for AOT indirect mode in last since general
  341. optimization may create some intrinsic function calls like
  342. llvm.memset, so let's remove these function calls here. */
  343. if (comp_ctx->is_indirect_mode) {
  344. FunctionPassManager FPM1;
  345. FPM1.addPass(ExpandMemoryOpPass());
  346. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM1)));
  347. }
  348. }
  349. MPM.run(*M, MAM);
  350. }
  351. char *
  352. aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
  353. {
  354. std::vector<std::string> NameStrs;
  355. std::string Result;
  356. char buf[32], *compressed_str;
  357. uint32 compressed_str_len, i;
  358. for (i = 0; i < comp_ctx->func_ctx_count; i++) {
  359. snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i);
  360. std::string str(buf);
  361. NameStrs.push_back(str);
  362. }
  363. #if LLVM_VERSION_MAJOR < 18
  364. #define collectGlobalObjectNameStrings collectPGOFuncNameStrings
  365. #endif
  366. if (collectGlobalObjectNameStrings(NameStrs, true, Result)) {
  367. aot_set_last_error("collect pgo func name strings failed");
  368. return NULL;
  369. }
  370. compressed_str_len = Result.size();
  371. if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) {
  372. aot_set_last_error("allocate memory failed");
  373. return NULL;
  374. }
  375. bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(),
  376. compressed_str_len);
  377. *p_size = compressed_str_len;
  378. return compressed_str;
  379. }