aot_llvm_extra.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include <llvm/Passes/StandardInstrumentations.h>
  6. #include <llvm/Support/Error.h>
  7. #if LLVM_VERSION_MAJOR < 17
  8. #include <llvm/ADT/None.h>
  9. #include <llvm/ADT/Optional.h>
  10. #include <llvm/ADT/Triple.h>
  11. #endif
  12. #include <llvm/ADT/SmallVector.h>
  13. #include <llvm/ADT/Twine.h>
  14. #include <llvm/Analysis/TargetTransformInfo.h>
  15. #include <llvm/CodeGen/TargetPassConfig.h>
  16. #include <llvm/ExecutionEngine/ExecutionEngine.h>
  17. #include <llvm/MC/MCSubtargetInfo.h>
  18. #include <llvm/Support/TargetSelect.h>
  19. #include <llvm/Target/TargetMachine.h>
  20. #include <llvm-c/Core.h>
  21. #include <llvm-c/ExecutionEngine.h>
  22. #if LLVM_VERSION_MAJOR < 17
  23. #include <llvm-c/Initialization.h>
  24. #endif
  25. #include <llvm/ExecutionEngine/GenericValue.h>
  26. #include <llvm/ExecutionEngine/JITEventListener.h>
  27. #include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
  28. #include <llvm/ExecutionEngine/Orc/LLJIT.h>
  29. #include <llvm/IR/DerivedTypes.h>
  30. #include <llvm/IR/Module.h>
  31. #include <llvm/IR/Instructions.h>
  32. #include <llvm/IR/IntrinsicInst.h>
  33. #include <llvm/IR/PassManager.h>
  34. #include <llvm/Support/CommandLine.h>
  35. #include <llvm/Support/ErrorHandling.h>
  36. #if LLVM_VERSION_MAJOR >= 17
  37. #include <llvm/Support/PGOOptions.h>
  38. #include <llvm/Support/VirtualFileSystem.h>
  39. #endif
  40. #include <llvm/Target/CodeGenCWrappers.h>
  41. #include <llvm/Target/TargetMachine.h>
  42. #include <llvm/Target/TargetOptions.h>
  43. #if LLVM_VERSION_MAJOR >= 17
  44. #include <llvm/TargetParser/Triple.h>
  45. #endif
  46. #include <llvm/Transforms/Utils/LowerMemIntrinsics.h>
  47. #include <llvm/Transforms/Vectorize/LoopVectorize.h>
  48. #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
  49. #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
  50. #include <llvm/Transforms/Vectorize/VectorCombine.h>
  51. #include <llvm/Transforms/Scalar/LoopRotation.h>
  52. #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
  53. #include <llvm/Transforms/Scalar/LICM.h>
  54. #include <llvm/Transforms/Scalar/GVN.h>
  55. #include <llvm/Passes/PassBuilder.h>
  56. #include <llvm/Analysis/TargetLibraryInfo.h>
  57. #if LLVM_VERSION_MAJOR >= 12
  58. #include <llvm/Analysis/AliasAnalysis.h>
  59. #endif
  60. #include <llvm/ProfileData/InstrProf.h>
  61. #include <cstring>
  62. #include "../aot/aot_runtime.h"
  63. #include "aot_llvm.h"
  64. using namespace llvm;
  65. using namespace llvm::orc;
  66. #if LLVM_VERSION_MAJOR >= 17
  67. namespace llvm {
  68. template<typename T>
  69. using Optional = std::optional<T>;
  70. }
  71. #endif
  72. LLVM_C_EXTERN_C_BEGIN
  73. bool
  74. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str);
  75. void
  76. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module);
  77. LLVM_C_EXTERN_C_END
  78. ExitOnError ExitOnErr;
  79. class ExpandMemoryOpPass : public PassInfoMixin<ExpandMemoryOpPass>
  80. {
  81. public:
  82. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
  83. };
  84. PreservedAnalyses
  85. ExpandMemoryOpPass::run(Function &F, FunctionAnalysisManager &AM)
  86. {
  87. SmallVector<MemIntrinsic *, 16> MemCalls;
  88. /* Iterate over all instructions in the function, looking for memcpy,
  89. * memmove, and memset. When we find one, expand it into a loop. */
  90. for (auto &BB : F) {
  91. for (auto &Inst : BB) {
  92. if (auto *Memcpy = dyn_cast_or_null<MemCpyInst>(&Inst)) {
  93. MemCalls.push_back(Memcpy);
  94. }
  95. else if (auto *Memmove = dyn_cast_or_null<MemMoveInst>(&Inst)) {
  96. MemCalls.push_back(Memmove);
  97. }
  98. else if (auto *Memset = dyn_cast_or_null<MemSetInst>(&Inst)) {
  99. MemCalls.push_back(Memset);
  100. }
  101. }
  102. }
  103. for (MemIntrinsic *MemCall : MemCalls) {
  104. if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
  105. Function *ParentFunc = Memcpy->getParent()->getParent();
  106. const TargetTransformInfo &TTI =
  107. AM.getResult<TargetIRAnalysis>(*ParentFunc);
  108. expandMemCpyAsLoop(Memcpy, TTI);
  109. Memcpy->eraseFromParent();
  110. }
  111. else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
  112. #if LLVM_VERSION_MAJOR >= 17
  113. Function *ParentFunc = Memmove->getParent()->getParent();
  114. const TargetTransformInfo &TTI =
  115. AM.getResult<TargetIRAnalysis>(*ParentFunc);
  116. expandMemMoveAsLoop(Memmove, TTI);
  117. #else
  118. expandMemMoveAsLoop(Memmove);
  119. #endif
  120. Memmove->eraseFromParent();
  121. }
  122. else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
  123. expandMemSetAsLoop(Memset);
  124. Memset->eraseFromParent();
  125. }
  126. }
  127. PreservedAnalyses PA;
  128. PA.preserveSet<CFGAnalyses>();
  129. return PA;
  130. }
  131. bool
  132. aot_check_simd_compatibility(const char *arch_c_str, const char *cpu_c_str)
  133. {
  134. #if WASM_ENABLE_SIMD != 0
  135. if (!arch_c_str || !cpu_c_str) {
  136. return false;
  137. }
  138. llvm::SmallVector<std::string, 1> targetAttributes;
  139. llvm::Triple targetTriple(arch_c_str, "", "");
  140. auto targetMachine =
  141. std::unique_ptr<llvm::TargetMachine>(llvm::EngineBuilder().selectTarget(
  142. targetTriple, "", std::string(cpu_c_str), targetAttributes));
  143. if (!targetMachine) {
  144. return false;
  145. }
  146. const llvm::Triple::ArchType targetArch =
  147. targetMachine->getTargetTriple().getArch();
  148. const llvm::MCSubtargetInfo *subTargetInfo =
  149. targetMachine->getMCSubtargetInfo();
  150. if (subTargetInfo == nullptr) {
  151. return false;
  152. }
  153. if (targetArch == llvm::Triple::x86_64) {
  154. return subTargetInfo->checkFeatures("+sse4.1");
  155. }
  156. else if (targetArch == llvm::Triple::aarch64) {
  157. return subTargetInfo->checkFeatures("+neon");
  158. }
  159. else if (targetArch == llvm::Triple::arc) {
  160. return true;
  161. }
  162. else {
  163. return false;
  164. }
  165. #else
  166. (void)arch_c_str;
  167. (void)cpu_c_str;
  168. return true;
  169. #endif /* WASM_ENABLE_SIMD */
  170. }
  171. void
  172. aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
  173. {
  174. TargetMachine *TM =
  175. reinterpret_cast<TargetMachine *>(comp_ctx->target_machine);
  176. PipelineTuningOptions PTO;
  177. PTO.LoopVectorization = true;
  178. PTO.SLPVectorization = true;
  179. PTO.LoopUnrolling = true;
  180. #if LLVM_VERSION_MAJOR >= 16
  181. Optional<PGOOptions> PGO = std::nullopt;
  182. #else
  183. Optional<PGOOptions> PGO = llvm::None;
  184. #endif
  185. if (comp_ctx->enable_llvm_pgo) {
  186. /* Disable static counter allocation for value profiler,
  187. it will be allocated by runtime */
  188. const char *argv[] = { "", "-vp-static-alloc=false" };
  189. cl::ParseCommandLineOptions(2, argv);
  190. #if LLVM_VERSION_MAJOR < 17
  191. PGO = PGOOptions("", "", "", PGOOptions::IRInstr);
  192. #else
  193. auto FS = vfs::getRealFileSystem();
  194. PGO = PGOOptions("", "", "", "", FS, PGOOptions::IRInstr);
  195. #endif
  196. }
  197. else if (comp_ctx->use_prof_file) {
  198. #if LLVM_VERSION_MAJOR < 17
  199. PGO = PGOOptions(comp_ctx->use_prof_file, "", "", PGOOptions::IRUse);
  200. #else
  201. auto FS = vfs::getRealFileSystem();
  202. PGO = PGOOptions(comp_ctx->use_prof_file, "", "", "", FS,
  203. PGOOptions::IRUse);
  204. #endif
  205. }
  206. #ifdef DEBUG_PASS
  207. PassInstrumentationCallbacks PIC;
  208. PassBuilder PB(TM, PTO, PGO, &PIC);
  209. #else
  210. #if LLVM_VERSION_MAJOR == 12
  211. PassBuilder PB(false, TM, PTO, PGO);
  212. #else
  213. PassBuilder PB(TM, PTO, PGO);
  214. #endif
  215. #endif
  216. /* Register all the basic analyses with the managers */
  217. LoopAnalysisManager LAM;
  218. FunctionAnalysisManager FAM;
  219. CGSCCAnalysisManager CGAM;
  220. ModuleAnalysisManager MAM;
  221. /* Register the target library analysis directly and give it a
  222. customized preset TLI */
  223. std::unique_ptr<TargetLibraryInfoImpl> TLII(
  224. new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
  225. FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
  226. /* Register the AA manager first so that our version is the one used */
  227. AAManager AA = PB.buildDefaultAAPipeline();
  228. FAM.registerPass([&] { return std::move(AA); });
  229. #ifdef DEBUG_PASS
  230. StandardInstrumentations SI(true, false);
  231. SI.registerCallbacks(PIC, &FAM);
  232. #endif
  233. PB.registerFunctionAnalyses(FAM);
  234. PB.registerLoopAnalyses(LAM);
  235. PB.registerModuleAnalyses(MAM);
  236. PB.registerCGSCCAnalyses(CGAM);
  237. PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
  238. #if LLVM_VERSION_MAJOR <= 13
  239. PassBuilder::OptimizationLevel OL;
  240. switch (comp_ctx->opt_level) {
  241. case 0:
  242. OL = PassBuilder::OptimizationLevel::O0;
  243. break;
  244. case 1:
  245. OL = PassBuilder::OptimizationLevel::O1;
  246. break;
  247. case 2:
  248. OL = PassBuilder::OptimizationLevel::O2;
  249. break;
  250. case 3:
  251. default:
  252. OL = PassBuilder::OptimizationLevel::O3;
  253. break;
  254. }
  255. #else
  256. OptimizationLevel OL;
  257. switch (comp_ctx->opt_level) {
  258. case 0:
  259. OL = OptimizationLevel::O0;
  260. break;
  261. case 1:
  262. OL = OptimizationLevel::O1;
  263. break;
  264. case 2:
  265. OL = OptimizationLevel::O2;
  266. break;
  267. case 3:
  268. default:
  269. OL = OptimizationLevel::O3;
  270. break;
  271. }
  272. #endif /* end of LLVM_VERSION_MAJOR */
  273. bool disable_llvm_lto = comp_ctx->disable_llvm_lto;
  274. #if WASM_ENABLE_SPEC_TEST != 0
  275. disable_llvm_lto = true;
  276. #endif
  277. Module *M = reinterpret_cast<Module *>(module);
  278. if (disable_llvm_lto) {
  279. for (Function &F : *M) {
  280. F.addFnAttr("disable-tail-calls", "true");
  281. }
  282. }
  283. ModulePassManager MPM;
  284. if (comp_ctx->is_jit_mode) {
  285. const char *Passes =
  286. "loop-vectorize,slp-vectorizer,"
  287. "load-store-vectorizer,vector-combine,"
  288. "mem2reg,instcombine,simplifycfg,jump-threading,indvars";
  289. ExitOnErr(PB.parsePassPipeline(MPM, Passes));
  290. }
  291. else {
  292. FunctionPassManager FPM;
  293. /* Apply Vectorize related passes for AOT mode */
  294. FPM.addPass(LoopVectorizePass());
  295. FPM.addPass(SLPVectorizerPass());
  296. FPM.addPass(LoadStoreVectorizerPass());
  297. FPM.addPass(VectorCombinePass());
  298. if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
  299. /* LICM pass: loop invariant code motion, attempting to remove
  300. as much code from the body of a loop as possible. Experiments
  301. show it is good to enable it when pgo is enabled. */
  302. #if LLVM_VERSION_MAJOR >= 15
  303. LICMOptions licm_opt;
  304. FPM.addPass(
  305. createFunctionToLoopPassAdaptor(LICMPass(licm_opt), true));
  306. #else
  307. FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), true));
  308. #endif
  309. }
  310. /*
  311. FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
  312. FPM.addPass(createFunctionToLoopPassAdaptor(SimpleLoopUnswitchPass()));
  313. */
  314. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
  315. if (comp_ctx->llvm_passes) {
  316. ExitOnErr(PB.parsePassPipeline(MPM, comp_ctx->llvm_passes));
  317. }
  318. if (
  319. #if LLVM_VERSION_MAJOR <= 13
  320. PassBuilder::OptimizationLevel::O0 == OL
  321. #else
  322. OptimizationLevel::O0 == OL
  323. #endif
  324. ) {
  325. MPM.addPass(PB.buildO0DefaultPipeline(OL));
  326. }
  327. else {
  328. if (!disable_llvm_lto) {
  329. /* Apply LTO for AOT mode */
  330. if (comp_ctx->comp_data->func_count >= 10
  331. || comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file)
  332. /* Add the pre-link optimizations if the func count
  333. is large enough or PGO is enabled */
  334. MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(OL));
  335. else
  336. MPM.addPass(PB.buildLTODefaultPipeline(OL, NULL));
  337. }
  338. else {
  339. MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
  340. }
  341. }
  342. /* Run specific passes for AOT indirect mode in last since general
  343. optimization may create some intrinsic function calls like
  344. llvm.memset, so let's remove these function calls here. */
  345. if (comp_ctx->is_indirect_mode) {
  346. FunctionPassManager FPM1;
  347. FPM1.addPass(ExpandMemoryOpPass());
  348. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM1)));
  349. }
  350. }
  351. MPM.run(*M, MAM);
  352. }
  353. char *
  354. aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
  355. {
  356. std::vector<std::string> NameStrs;
  357. std::string Result;
  358. char buf[32], *compressed_str;
  359. uint32 compressed_str_len, i;
  360. for (i = 0; i < comp_ctx->func_ctx_count; i++) {
  361. snprintf(buf, sizeof(buf), "%s%d", AOT_FUNC_PREFIX, i);
  362. std::string str(buf);
  363. NameStrs.push_back(str);
  364. }
  365. #if LLVM_VERSION_MAJOR < 18
  366. #define collectGlobalObjectNameStrings collectPGOFuncNameStrings
  367. #endif
  368. if (collectGlobalObjectNameStrings(NameStrs, true, Result)) {
  369. aot_set_last_error("collect pgo func name strings failed");
  370. return NULL;
  371. }
  372. compressed_str_len = (uint32)Result.size();
  373. if (!(compressed_str = (char *)wasm_runtime_malloc(compressed_str_len))) {
  374. aot_set_last_error("allocate memory failed");
  375. return NULL;
  376. }
  377. bh_memcpy_s(compressed_str, compressed_str_len, Result.c_str(),
  378. compressed_str_len);
  379. *p_size = compressed_str_len;
  380. return compressed_str;
  381. }