aot_llvm_extra.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include <llvm/Passes/StandardInstrumentations.h>
  6. #include <llvm/Support/Error.h>
  7. #if LLVM_VERSION_MAJOR < 17
  8. #include <llvm/ADT/None.h>
  9. #include <llvm/ADT/Optional.h>
  10. #include <llvm/ADT/Triple.h>
  11. #endif
  12. #include <llvm/ADT/SmallVector.h>
  13. #include <llvm/ADT/Twine.h>
  14. #include <llvm/Analysis/TargetTransformInfo.h>
  15. #include <llvm/CodeGen/TargetPassConfig.h>
  16. #include <llvm/ExecutionEngine/ExecutionEngine.h>
  17. #include <llvm/MC/MCSubtargetInfo.h>
  18. #include <llvm/Support/TargetSelect.h>
  19. #include <llvm/Target/TargetMachine.h>
  20. #include <llvm-c/Core.h>
  21. #include <llvm-c/ExecutionEngine.h>
  22. #if LLVM_VERSION_MAJOR < 17
  23. #include <llvm-c/Initialization.h>
  24. #endif
  25. #include <llvm/ExecutionEngine/GenericValue.h>
  26. #include <llvm/ExecutionEngine/JITEventListener.h>
  27. #include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
  28. #include <llvm/ExecutionEngine/Orc/LLJIT.h>
  29. #include <llvm/IR/DerivedTypes.h>
  30. #include <llvm/IR/Module.h>
  31. #include <llvm/IR/Instructions.h>
  32. #include <llvm/IR/IntrinsicInst.h>
  33. #include <llvm/IR/PassManager.h>
  34. #include <llvm/Support/CommandLine.h>
  35. #include <llvm/Support/ErrorHandling.h>
  36. #if LLVM_VERSION_MAJOR >= 17
  37. #include <llvm/Support/PGOOptions.h>
  38. #include <llvm/Support/VirtualFileSystem.h>
  39. #endif
  40. #include <llvm/Target/CodeGenCWrappers.h>
  41. #include <llvm/Target/TargetMachine.h>
  42. #include <llvm/Target/TargetOptions.h>
  43. #include <llvm/Transforms/Utils/LowerMemIntrinsics.h>
  44. #include <llvm/Transforms/Vectorize/LoopVectorize.h>
  45. #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
  46. #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
  47. #include <llvm/Transforms/Vectorize/VectorCombine.h>
  48. #include <llvm/Transforms/Scalar/LoopRotation.h>
  49. #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
  50. #include <llvm/Transforms/Scalar/LICM.h>
  51. #include <llvm/Transforms/Scalar/GVN.h>
  52. #include <llvm/Passes/PassBuilder.h>
  53. #include <llvm/Analysis/TargetLibraryInfo.h>
  54. #if LLVM_VERSION_MAJOR >= 12
  55. #include <llvm/Analysis/AliasAnalysis.h>
  56. #endif
  57. #include <llvm/ProfileData/InstrProf.h>
  58. #include <cstring>
  59. #include "../aot/aot_runtime.h"
  60. #include "aot_llvm.h"
  61. using namespace llvm;
  62. using namespace llvm::orc;
  63. #if LLVM_VERSION_MAJOR >= 17
  64. namespace llvm {
  65. template<typename T>
  66. using Optional = std::optional<T>;
  67. }
  68. #endif
  69. LLVM_C_EXTERN_C_BEGIN
  70. bool
  71. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str);
  72. void
  73. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module);
  74. LLVM_C_EXTERN_C_END
  75. ExitOnError ExitOnErr;
  76. class ExpandMemoryOpPass : public PassInfoMixin<ExpandMemoryOpPass>
  77. {
  78. public:
  79. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
  80. };
  81. PreservedAnalyses
  82. ExpandMemoryOpPass::run(Function &F, FunctionAnalysisManager &AM)
  83. {
  84. SmallVector<MemIntrinsic *, 16> MemCalls;
  85. /* Iterate over all instructions in the function, looking for memcpy,
  86. * memmove, and memset. When we find one, expand it into a loop. */
  87. for (auto &BB : F) {
  88. for (auto &Inst : BB) {
  89. if (auto *Memcpy = dyn_cast_or_null<MemCpyInst>(&Inst)) {
  90. MemCalls.push_back(Memcpy);
  91. }
  92. else if (auto *Memmove = dyn_cast_or_null<MemMoveInst>(&Inst)) {
  93. MemCalls.push_back(Memmove);
  94. }
  95. else if (auto *Memset = dyn_cast_or_null<MemSetInst>(&Inst)) {
  96. MemCalls.push_back(Memset);
  97. }
  98. }
  99. }
  100. for (MemIntrinsic *MemCall : MemCalls) {
  101. if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
  102. Function *ParentFunc = Memcpy->getParent()->getParent();
  103. const TargetTransformInfo &TTI =
  104. AM.getResult<TargetIRAnalysis>(*ParentFunc);
  105. expandMemCpyAsLoop(Memcpy, TTI);
  106. Memcpy->eraseFromParent();
  107. }
  108. else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
  109. #if LLVM_VERSION_MAJOR >= 17
  110. Function *ParentFunc = Memmove->getParent()->getParent();
  111. const TargetTransformInfo &TTI =
  112. AM.getResult<TargetIRAnalysis>(*ParentFunc);
  113. expandMemMoveAsLoop(Memmove, TTI);
  114. #else
  115. expandMemMoveAsLoop(Memmove);
  116. #endif
  117. Memmove->eraseFromParent();
  118. }
  119. else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
  120. expandMemSetAsLoop(Memset);
  121. Memset->eraseFromParent();
  122. }
  123. }
  124. PreservedAnalyses PA;
  125. PA.preserveSet<CFGAnalyses>();
  126. return PA;
  127. }
  128. bool
  129. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str)
  130. {
  131. #if WASM_ENABLE_SIMD != 0
  132. if (!arch_c_str || !cpu_c_str) {
  133. return false;
  134. }
  135. llvm::SmallVector<std::string, 1> targetAttributes;
  136. llvm::Triple targetTriple(arch_c_str, "", "");
  137. auto targetMachine =
  138. std::unique_ptr<llvm::TargetMachine>(llvm::EngineBuilder().selectTarget(
  139. targetTriple, "", std::string(cpu_c_str), targetAttributes));
  140. if (!targetMachine) {
  141. return false;
  142. }
  143. const llvm::Triple::ArchType targetArch =
  144. targetMachine->getTargetTriple().getArch();
  145. const llvm::MCSubtargetInfo *subTargetInfo =
  146. targetMachine->getMCSubtargetInfo();
  147. if (subTargetInfo == nullptr) {
  148. return false;
  149. }
  150. if (targetArch == llvm::Triple::x86_64) {
  151. return subTargetInfo->checkFeatures("+sse4.1");
  152. }
  153. else if (targetArch == llvm::Triple::aarch64) {
  154. return subTargetInfo->checkFeatures("+neon");
  155. }
  156. else {
  157. return false;
  158. }
  159. #else
  160. (void)arch_c_str;
  161. (void)cpu_c_str;
  162. return true;
  163. #endif /* WASM_ENABLE_SIMD */
  164. }
  165. void
  166. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
  167. {
  168. TargetMachine *TM =
  169. reinterpret_cast<TargetMachine *>(comp_ctx->target_machine);
  170. PipelineTuningOptions PTO;
  171. PTO.LoopVectorization = true;
  172. PTO.SLPVectorization = true;
  173. PTO.LoopUnrolling = true;
  174. #if LLVM_VERSION_MAJOR >= 16
  175. Optional<PGOOptions> PGO = std::nullopt;
  176. #else
  177. Optional<PGOOptions> PGO = llvm::None;
  178. #endif
  179. if (comp_ctx->enable_llvm_pgo) {
  180. /* Disable static counter allocation for value profiler,
  181. it will be allocated by runtime */
  182. const char *argv[] = { "", "-vp-static-alloc=false" };
  183. cl::ParseCommandLineOptions(2, argv);
  184. #if LLVM_VERSION_MAJOR < 17
  185. PGO = PGOOptions("", "", "", PGOOptions::IRInstr);
  186. #else
  187. auto FS = vfs::getRealFileSystem();
  188. PGO = PGOOptions("", "", "", "", FS, PGOOptions::IRInstr);
  189. #endif
  190. }
  191. else if (comp_ctx->use_prof_file) {
  192. #if LLVM_VERSION_MAJOR < 17
  193. PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse);
  194. #else
  195. auto FS = vfs::getRealFileSystem();
  196. PGO = PGOOptions(comp_ctx->use_prof_file, "", "", "", FS,
  197. PGOOptions::IRUse);
  198. #endif
  199. }
  200. #ifdef DEBUG_PASS
  201. PassInstrumentationCallbacks PIC;
  202. PassBuilder PB(TM, PTO, PGO, &PIC);
  203. #else
  204. #if LLVM_VERSION_MAJOR == 12
  205. PassBuilder PB(false, TM, PTO, PGO);
  206. #else
  207. PassBuilder PB(TM, PTO, PGO);
  208. #endif
  209. #endif
  210. /* Register all the basic analyses with the managers */
  211. LoopAnalysisManager LAM;
  212. FunctionAnalysisManager FAM;
  213. CGSCCAnalysisManager CGAM;
  214. ModuleAnalysisManager MAM;
  215. /* Register the target library analysis directly and give it a
  216. customized preset TLI */
  217. std::unique_ptr<TargetLibraryInfoImpl> TLII(
  218. new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
  219. FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
  220. /* Register the AA manager first so that our version is the one used */
  221. AAManager AA = PB.buildDefaultAAPipeline();
  222. FAM.registerPass([&] { return std::move(AA); });
  223. #ifdef DEBUG_PASS
  224. StandardInstrumentations SI(true, false);
  225. SI.registerCallbacks(PIC, &FAM);
  226. #endif
  227. PB.registerFunctionAnalyses(FAM);
  228. PB.registerLoopAnalyses(LAM);
  229. PB.registerModuleAnalyses(MAM);
  230. PB.registerCGSCCAnalyses(CGAM);
  231. PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
  232. #if LLVM_VERSION_MAJOR <= 13
  233. PassBuilder::OptimizationLevel OL;
  234. switch (comp_ctx->opt_level) {
  235. case 0:
  236. OL = PassBuilder::OptimizationLevel::O0;
  237. break;
  238. case 1:
  239. OL = PassBuilder::OptimizationLevel::O1;
  240. break;
  241. case 2:
  242. OL = PassBuilder::OptimizationLevel::O2;
  243. break;
  244. case 3:
  245. default:
  246. OL = PassBuilder::OptimizationLevel::O3;
  247. break;
  248. }
  249. #else
  250. OptimizationLevel OL;
  251. switch (comp_ctx->opt_level) {
  252. case 0:
  253. OL = OptimizationLevel::O0;
  254. break;
  255. case 1:
  256. OL = OptimizationLevel::O1;
  257. break;
  258. case 2:
  259. OL = OptimizationLevel::O2;
  260. break;
  261. case 3:
  262. default:
  263. OL = OptimizationLevel::O3;
  264. break;
  265. }
  266. #endif /* end of LLVM_VERSION_MAJOR */
  267. bool disable_llvm_lto = comp_ctx->disable_llvm_lto;
  268. #if WASM_ENABLE_SPEC_TEST != 0
  269. disable_llvm_lto = true;
  270. #endif
  271. Module *M = reinterpret_cast<Module *>(module);
  272. if (disable_llvm_lto) {
  273. for (Function &F : *M) {
  274. F.addFnAttr("disable-tail-calls", "true");
  275. }
  276. }
  277. ModulePassManager MPM;
  278. if (comp_ctx->is_jit_mode) {
  279. const char *Passes =
  280. "loop-vectorize,slp-vectorizer,"
  281. "load-store-vectorizer,vector-combine,"
  282. "mem2reg,instcombine,simplifycfg,jump-threading,indvars";
  283. ExitOnErr(PB.parsePassPipeline(MPM, Passes));
  284. }
  285. else {
  286. FunctionPassManager FPM;
  287. /* Apply Vectorize related passes for AOT mode */
  288. FPM.addPass(LoopVectorizePass());
  289. FPM.addPass(SLPVectorizerPass());
  290. FPM.addPass(LoadStoreVectorizerPass());
  291. FPM.addPass(VectorCombinePass());
  292. if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
  293. /* LICM pass: loop invariant code motion, attempting to remove
  294. as much code from the body of a loop as possible. Experiments
  295. show it is good to enable it when pgo is enabled. */
  296. #if LLVM_VERSION_MAJOR >= 15
  297. LICMOptions licm_opt;
  298. FPM.addPass(
  299. createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true));
  300. #else
  301. FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), true));
  302. #endif
  303. }
  304. /*
  305. FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
  306. FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
  307. */
  308. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
  309. if (comp_ctx->llvm_passes) {
  310. ExitOnErr(PB.parsePassPipeline(MPM, comp_ctx->llvm_passes));
  311. }
  312. if (
  313. #if LLVM_VERSION_MAJOR <= 13
  314. PassBuilder::OptimizationLevel::O0 == OL
  315. #else
  316. OptimizationLevel::O0 == OL
  317. #endif
  318. ) {
  319. MPM.addPass(PB.buildO0DefaultPipeline(OL));
  320. }
  321. else {
  322. if (!disable_llvm_lto) {
  323. /* Apply LTO for AOT mode */
  324. if (comp_ctx->comp_data->func_count >= 10
  325. || comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file)
  326. /* Add the pre-link optimizations if the func count
  327. is large enough or PGO is enabled */
  328. MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
  329. else
  330. MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
  331. }
  332. else {
  333. MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
  334. }
  335. }
  336. /* Run specific passes for AOT indirect mode in last since general
  337. optimization may create some intrinsic function calls like
  338. llvm.memset, so let's remove these function calls here. */
  339. if (comp_ctx->is_indirect_mode) {
  340. FunctionPassManager FPM1;
  341. FPM1.addPass(ExpandMemoryOpPass());
  342. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM1)));
  343. }
  344. }
  345. MPM.run(*M, MAM);
  346. }
  347. char *
  348. aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
  349. {
  350. std::vector<std::string> NameStrs;
  351. std::string Result;
  352. char buf[32], *compressed_str;
  353. uint32 compressed_str_len, i;
  354. for (i = 0; i < comp_ctx->func_ctx_count; i++) {
  355. snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i);
  356. std::string str(buf);
  357. NameStrs.push_back(str);
  358. }
  359. #if LLVM_VERSION_MAJOR < 18
  360. #define collectGlobalObjectNameStrings collectPGOFuncNameStrings
  361. #endif
  362. if (collectGlobalObjectNameStrings(NameStrs, true, Result)) {
  363. aot_set_last_error("collect pgo func name strings failed");
  364. return NULL;
  365. }
  366. compressed_str_len = (uint32)Result.size();
  367. if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) {
  368. aot_set_last_error("allocate memory failed");
  369. return NULL;
  370. }
  371. bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(),
  372. compressed_str_len);
  373. *p_size = compressed_str_len;
  374. return compressed_str;
  375. }