simd_conversions.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include "simd_conversions.h"
  6. #include "simd_common.h"
  7. #include "../aot_emit_exception.h"
  8. #include "../aot_emit_numberic.h"
  9. #include "../../aot/aot_runtime.h"
  10. static bool
  11. simd_integer_narrow_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  12. LLVMTypeRef in_vector_type, LLVMTypeRef out_vector_type,
  13. const char *instrinsic)
  14. {
  15. LLVMValueRef vector1, vector2, result;
  16. LLVMTypeRef param_types[2] = { in_vector_type, in_vector_type };
  17. if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  18. in_vector_type, "vec2"))
  19. || !(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  20. in_vector_type, "vec1"))) {
  21. return false;
  22. }
  23. if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, instrinsic,
  24. out_vector_type, param_types, 2,
  25. vector1, vector2))) {
  26. HANDLE_FAILURE("LLVMBuildCall");
  27. return false;
  28. }
  29. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  30. }
  31. enum integer_sat_type {
  32. e_sat_i16x8 = 0,
  33. e_sat_i32x4,
  34. e_sat_i64x2,
  35. e_sat_i32x8,
  36. };
  37. static LLVMValueRef
  38. simd_saturate(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  39. enum integer_sat_type itype, LLVMValueRef vector,
  40. LLVMValueRef min, LLVMValueRef max, bool is_signed)
  41. {
  42. LLVMValueRef result;
  43. LLVMTypeRef vector_type;
  44. LLVMTypeRef param_types[][2] = {
  45. { V128_i16x8_TYPE, V128_i16x8_TYPE },
  46. { V128_i32x4_TYPE, V128_i32x4_TYPE },
  47. { V128_i64x2_TYPE, V128_i64x2_TYPE },
  48. { 0 },
  49. };
  50. const char *smin_intrinsic[] = {
  51. "llvm.smin.v8i16",
  52. "llvm.smin.v4i32",
  53. "llvm.smin.v2i64",
  54. "llvm.smin.v8i32",
  55. };
  56. const char *umin_intrinsic[] = {
  57. "llvm.umin.v8i16",
  58. "llvm.umin.v4i32",
  59. "llvm.umin.v2i64",
  60. "llvm.umin.v8i32",
  61. };
  62. const char *smax_intrinsic[] = {
  63. "llvm.smax.v8i16",
  64. "llvm.smax.v4i32",
  65. "llvm.smax.v2i64",
  66. "llvm.smax.v8i32",
  67. };
  68. const char *umax_intrinsic[] = {
  69. "llvm.umax.v8i16",
  70. "llvm.umax.v4i32",
  71. "llvm.umax.v2i64",
  72. "llvm.umax.v8i32",
  73. };
  74. if (e_sat_i32x8 == itype) {
  75. if (!(vector_type = LLVMVectorType(I32_TYPE, 8))) {
  76. HANDLE_FAILURE("LLVMVectorType");
  77. return NULL;
  78. }
  79. param_types[itype][0] = vector_type;
  80. param_types[itype][1] = vector_type;
  81. }
  82. if (!(result = aot_call_llvm_intrinsic(
  83. comp_ctx, func_ctx,
  84. is_signed ? smin_intrinsic[itype] : umin_intrinsic[itype],
  85. param_types[itype][0], param_types[itype], 2, vector, max))
  86. || !(result = aot_call_llvm_intrinsic(
  87. comp_ctx, func_ctx,
  88. is_signed ? smax_intrinsic[itype] : umax_intrinsic[itype],
  89. param_types[itype][0], param_types[itype], 2, result, min))) {
  90. return NULL;
  91. }
  92. return result;
  93. }
  94. static bool
  95. simd_integer_narrow_common(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  96. enum integer_sat_type itype, bool is_signed)
  97. {
  98. LLVMValueRef vec1, vec2, min, max, mask, result;
  99. LLVMTypeRef in_vector_type[] = { V128_i16x8_TYPE, V128_i32x4_TYPE,
  100. V128_i64x2_TYPE };
  101. LLVMTypeRef min_max_type[] = { INT16_TYPE, I32_TYPE, I64_TYPE };
  102. LLVMTypeRef trunc_type[3] = { 0 };
  103. uint8 length[] = { 8, 4, 2 };
  104. int64 smin[] = { 0xff80, 0xffFF8000, 0xffFFffFF80000000 };
  105. int64 umin[] = { 0x0, 0x0, 0x0 };
  106. int64 smax[] = { 0x007f, 0x00007fff, 0x000000007fFFffFF };
  107. int64 umax[] = { 0x00ff, 0x0000ffff, 0x00000000ffFFffFF };
  108. LLVMValueRef mask_element[] = {
  109. LLVM_CONST(i32_zero), LLVM_CONST(i32_one),
  110. LLVM_CONST(i32_two), LLVM_CONST(i32_three),
  111. LLVM_CONST(i32_four), LLVM_CONST(i32_five),
  112. LLVM_CONST(i32_six), LLVM_CONST(i32_seven),
  113. LLVM_CONST(i32_eight), LLVM_CONST(i32_nine),
  114. LLVM_CONST(i32_ten), LLVM_CONST(i32_eleven),
  115. LLVM_CONST(i32_twelve), LLVM_CONST(i32_thirteen),
  116. LLVM_CONST(i32_fourteen), LLVM_CONST(i32_fifteen),
  117. };
  118. if (!(trunc_type[0] = LLVMVectorType(INT8_TYPE, 8))
  119. || !(trunc_type[1] = LLVMVectorType(INT16_TYPE, 4))
  120. || !(trunc_type[2] = LLVMVectorType(I32_TYPE, 2))) {
  121. HANDLE_FAILURE("LLVMVectorType");
  122. return false;
  123. }
  124. if (!(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  125. in_vector_type[itype], "vec2"))
  126. || !(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  127. in_vector_type[itype], "vec1"))) {
  128. return false;
  129. }
  130. if (!(max = simd_build_splat_const_integer_vector(
  131. comp_ctx, min_max_type[itype],
  132. is_signed ? smax[itype] : umax[itype], length[itype]))
  133. || !(min = simd_build_splat_const_integer_vector(
  134. comp_ctx, min_max_type[itype],
  135. is_signed ? smin[itype] : umin[itype], length[itype]))) {
  136. return false;
  137. }
  138. /* Refer to:
  139. * https://github.com/WebAssembly/spec/blob/main/proposals/simd/SIMD.md#integer-to-integer-narrowing
  140. * Regardless of the whether the operation is signed or unsigned, the input
  141. * lanes are interpreted as signed integers.
  142. */
  143. if (!(vec1 = simd_saturate(comp_ctx, func_ctx, e_sat_i16x8, vec1, min, max,
  144. true))
  145. || !(vec2 = simd_saturate(comp_ctx, func_ctx, e_sat_i16x8, vec2, min,
  146. max, true))) {
  147. return false;
  148. }
  149. /* trunc */
  150. if (!(vec1 = LLVMBuildTrunc(comp_ctx->builder, vec1, trunc_type[itype],
  151. "vec1_trunc"))
  152. || !(vec2 = LLVMBuildTrunc(comp_ctx->builder, vec2, trunc_type[itype],
  153. "vec2_trunc"))) {
  154. HANDLE_FAILURE("LLVMBuildTrunc");
  155. return false;
  156. }
  157. /* combine */
  158. if (!(mask = LLVMConstVector(mask_element, (length[itype] << 1)))) {
  159. HANDLE_FAILURE("LLVMConstInt");
  160. return false;
  161. }
  162. if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, vec1, vec2, mask,
  163. "vec_shuffle"))) {
  164. HANDLE_FAILURE("LLVMBuildShuffleVector");
  165. return false;
  166. }
  167. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  168. }
  169. bool
  170. aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx,
  171. AOTFuncContext *func_ctx, bool is_signed)
  172. {
  173. if (is_target_x86(comp_ctx)) {
  174. return simd_integer_narrow_x86(
  175. comp_ctx, func_ctx, V128_i16x8_TYPE, V128_i8x16_TYPE,
  176. is_signed ? "llvm.x86.sse2.packsswb.128"
  177. : "llvm.x86.sse2.packuswb.128");
  178. }
  179. else {
  180. return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i16x8,
  181. is_signed);
  182. }
  183. }
  184. bool
  185. aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
  186. AOTFuncContext *func_ctx, bool is_signed)
  187. {
  188. if (is_target_x86(comp_ctx)) {
  189. return simd_integer_narrow_x86(comp_ctx, func_ctx, V128_i32x4_TYPE,
  190. V128_i16x8_TYPE,
  191. is_signed ? "llvm.x86.sse2.packssdw.128"
  192. : "llvm.x86.sse41.packusdw");
  193. }
  194. else {
  195. return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i32x4,
  196. is_signed);
  197. }
  198. }
  199. bool
  200. aot_compile_simd_i32x4_narrow_i64x2(AOTCompContext *comp_ctx,
  201. AOTFuncContext *func_ctx, bool is_signed)
  202. {
  203. /* TODO: x86 intrinsics */
  204. return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i64x2,
  205. is_signed);
  206. }
  207. enum integer_extend_type {
  208. e_ext_i8x16,
  209. e_ext_i16x8,
  210. e_ext_i32x4,
  211. };
  212. static LLVMValueRef
  213. simd_integer_extension(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  214. enum integer_extend_type itype, LLVMValueRef vector,
  215. bool lower_half, bool is_signed)
  216. {
  217. LLVMValueRef mask, sub_vector, result;
  218. LLVMValueRef bits[] = {
  219. LLVM_CONST(i32_zero), LLVM_CONST(i32_one),
  220. LLVM_CONST(i32_two), LLVM_CONST(i32_three),
  221. LLVM_CONST(i32_four), LLVM_CONST(i32_five),
  222. LLVM_CONST(i32_six), LLVM_CONST(i32_seven),
  223. LLVM_CONST(i32_eight), LLVM_CONST(i32_nine),
  224. LLVM_CONST(i32_ten), LLVM_CONST(i32_eleven),
  225. LLVM_CONST(i32_twelve), LLVM_CONST(i32_thirteen),
  226. LLVM_CONST(i32_fourteen), LLVM_CONST(i32_fifteen),
  227. };
  228. LLVMTypeRef out_vector_type[] = { V128_i16x8_TYPE, V128_i32x4_TYPE,
  229. V128_i64x2_TYPE };
  230. LLVMValueRef undef[] = { LLVM_CONST(i8x16_undef), LLVM_CONST(i16x8_undef),
  231. LLVM_CONST(i32x4_undef) };
  232. uint32 sub_vector_length[] = { 8, 4, 2 };
  233. if (!(mask = lower_half ? LLVMConstVector(bits, sub_vector_length[itype])
  234. : LLVMConstVector(bits + sub_vector_length[itype],
  235. sub_vector_length[itype]))) {
  236. HANDLE_FAILURE("LLVMConstVector");
  237. return false;
  238. }
  239. /* retrive the low or high half */
  240. if (!(sub_vector = LLVMBuildShuffleVector(comp_ctx->builder, vector,
  241. undef[itype], mask, "half"))) {
  242. HANDLE_FAILURE("LLVMBuildShuffleVector");
  243. return false;
  244. }
  245. if (is_signed) {
  246. if (!(result = LLVMBuildSExt(comp_ctx->builder, sub_vector,
  247. out_vector_type[itype], "sext"))) {
  248. HANDLE_FAILURE("LLVMBuildSExt");
  249. return false;
  250. }
  251. }
  252. else {
  253. if (!(result = LLVMBuildZExt(comp_ctx->builder, sub_vector,
  254. out_vector_type[itype], "zext"))) {
  255. HANDLE_FAILURE("LLVMBuildZExt");
  256. return false;
  257. }
  258. }
  259. return result;
  260. }
  261. static bool
  262. simd_integer_extension_wrapper(AOTCompContext *comp_ctx,
  263. AOTFuncContext *func_ctx,
  264. enum integer_extend_type itype, bool lower_half,
  265. bool is_signed)
  266. {
  267. LLVMValueRef vector, result;
  268. LLVMTypeRef in_vector_type[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
  269. V128_i32x4_TYPE };
  270. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  271. in_vector_type[itype], "vec"))) {
  272. return false;
  273. }
  274. if (!(result = simd_integer_extension(comp_ctx, func_ctx, itype, vector,
  275. lower_half, is_signed))) {
  276. return false;
  277. }
  278. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  279. }
  280. bool
  281. aot_compile_simd_i16x8_extend_i8x16(AOTCompContext *comp_ctx,
  282. AOTFuncContext *func_ctx, bool lower_half,
  283. bool is_signed)
  284. {
  285. return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i8x16,
  286. lower_half, is_signed);
  287. }
  288. bool
  289. aot_compile_simd_i32x4_extend_i16x8(AOTCompContext *comp_ctx,
  290. AOTFuncContext *func_ctx, bool lower_half,
  291. bool is_signed)
  292. {
  293. return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i16x8,
  294. lower_half, is_signed);
  295. }
  296. bool
  297. aot_compile_simd_i64x2_extend_i32x4(AOTCompContext *comp_ctx,
  298. AOTFuncContext *func_ctx, bool lower_half,
  299. bool is_signed)
  300. {
  301. return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i32x4,
  302. lower_half, is_signed);
  303. }
  304. static LLVMValueRef
  305. simd_trunc_sat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  306. const char *intrinsics, LLVMTypeRef in_vector_type,
  307. LLVMTypeRef out_vector_type)
  308. {
  309. LLVMValueRef vector, result;
  310. LLVMTypeRef param_types[] = { in_vector_type };
  311. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, in_vector_type,
  312. "vector"))) {
  313. return false;
  314. }
  315. if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsics,
  316. out_vector_type, param_types, 1,
  317. vector))) {
  318. return false;
  319. }
  320. return result;
  321. }
  322. bool
  323. aot_compile_simd_i32x4_trunc_sat_f32x4(AOTCompContext *comp_ctx,
  324. AOTFuncContext *func_ctx, bool is_signed)
  325. {
  326. LLVMValueRef result;
  327. if (!(result = simd_trunc_sat(comp_ctx, func_ctx,
  328. is_signed ? "llvm.fptosi.sat.v4i32.v4f32"
  329. : "llvm.fptoui.sat.v4i32.v4f32",
  330. V128_f32x4_TYPE, V128_i32x4_TYPE))) {
  331. return false;
  332. }
  333. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  334. }
  335. bool
  336. aot_compile_simd_i32x4_trunc_sat_f64x2(AOTCompContext *comp_ctx,
  337. AOTFuncContext *func_ctx, bool is_signed)
  338. {
  339. LLVMValueRef result, zero, mask;
  340. LLVMTypeRef out_vector_type;
  341. LLVMValueRef lanes[] = {
  342. LLVM_CONST(i32_zero),
  343. LLVM_CONST(i32_one),
  344. LLVM_CONST(i32_two),
  345. LLVM_CONST(i32_three),
  346. };
  347. if (!(out_vector_type = LLVMVectorType(I32_TYPE, 2))) {
  348. HANDLE_FAILURE("LLVMVectorType");
  349. return false;
  350. }
  351. if (!(result = simd_trunc_sat(comp_ctx, func_ctx,
  352. is_signed ? "llvm.fptosi.sat.v2i32.v2f64"
  353. : "llvm.fptoui.sat.v2i32.v2f64",
  354. V128_f64x2_TYPE, out_vector_type))) {
  355. return false;
  356. }
  357. if (!(zero = LLVMConstNull(out_vector_type))) {
  358. HANDLE_FAILURE("LLVMConstNull");
  359. return false;
  360. }
  361. /* v2i32 -> v4i32 */
  362. if (!(mask = LLVMConstVector(lanes, 4))) {
  363. HANDLE_FAILURE("LLVMConstVector");
  364. return false;
  365. }
  366. if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result, zero, mask,
  367. "extend"))) {
  368. HANDLE_FAILURE("LLVMBuildShuffleVector");
  369. return false;
  370. }
  371. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  372. }
  373. static LLVMValueRef
  374. simd_integer_convert(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  375. bool is_signed, LLVMValueRef vector,
  376. LLVMTypeRef out_vector_type)
  377. {
  378. LLVMValueRef result;
  379. result = is_signed ? LLVMBuildSIToFP(comp_ctx->builder, vector,
  380. out_vector_type, "converted")
  381. : LLVMBuildUIToFP(comp_ctx->builder, vector,
  382. out_vector_type, "converted");
  383. if (!result) {
  384. HANDLE_FAILURE("LLVMBuildSIToFP/LLVMBuildUIToFP");
  385. }
  386. return result;
  387. }
  388. bool
  389. aot_compile_simd_f32x4_convert_i32x4(AOTCompContext *comp_ctx,
  390. AOTFuncContext *func_ctx, bool is_signed)
  391. {
  392. LLVMValueRef vector, result;
  393. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  394. V128_i32x4_TYPE, "vec"))) {
  395. return false;
  396. }
  397. if (!(result = simd_integer_convert(comp_ctx, func_ctx, is_signed, vector,
  398. V128_f32x4_TYPE))) {
  399. return false;
  400. }
  401. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  402. }
  403. bool
  404. aot_compile_simd_f64x2_convert_i32x4(AOTCompContext *comp_ctx,
  405. AOTFuncContext *func_ctx, bool is_signed)
  406. {
  407. LLVMValueRef vector, mask, result;
  408. LLVMValueRef lanes[] = {
  409. LLVM_CONST(i32_zero),
  410. LLVM_CONST(i32_one),
  411. };
  412. LLVMTypeRef out_vector_type;
  413. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  414. V128_i32x4_TYPE, "vec"))) {
  415. return false;
  416. }
  417. if (!(out_vector_type = LLVMVectorType(F64_TYPE, 4))) {
  418. HANDLE_FAILURE("LLVMVectorType");
  419. return false;
  420. }
  421. if (!(result = simd_integer_convert(comp_ctx, func_ctx, is_signed, vector,
  422. out_vector_type))) {
  423. return false;
  424. }
  425. /* v4f64 -> v2f64 */
  426. if (!(mask = LLVMConstVector(lanes, 2))) {
  427. HANDLE_FAILURE("LLVMConstVector");
  428. return false;
  429. }
  430. if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result, result,
  431. mask, "trunc"))) {
  432. HANDLE_FAILURE("LLVMBuildShuffleVector");
  433. return false;
  434. }
  435. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  436. }
  437. static bool
  438. simd_extadd_pairwise(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  439. LLVMTypeRef in_vector_type, LLVMTypeRef out_vector_type,
  440. bool is_signed)
  441. {
  442. LLVMValueRef vector, even_mask, odd_mask, sub_vector_even, sub_vector_odd,
  443. result;
  444. LLVMValueRef even_element[] = {
  445. LLVM_CONST(i32_zero), LLVM_CONST(i32_two), LLVM_CONST(i32_four),
  446. LLVM_CONST(i32_six), LLVM_CONST(i32_eight), LLVM_CONST(i32_ten),
  447. LLVM_CONST(i32_twelve), LLVM_CONST(i32_fourteen),
  448. };
  449. LLVMValueRef odd_element[] = {
  450. LLVM_CONST(i32_one), LLVM_CONST(i32_three),
  451. LLVM_CONST(i32_five), LLVM_CONST(i32_seven),
  452. LLVM_CONST(i32_nine), LLVM_CONST(i32_eleven),
  453. LLVM_CONST(i32_thirteen), LLVM_CONST(i32_fifteen),
  454. };
  455. /* assumption about i16x8 from i8x16 and i32x4 from i16x8 */
  456. uint8 mask_length = V128_i16x8_TYPE == out_vector_type ? 8 : 4;
  457. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, in_vector_type,
  458. "vector"))) {
  459. return false;
  460. }
  461. if (!(even_mask = LLVMConstVector(even_element, mask_length))
  462. || !(odd_mask = LLVMConstVector(odd_element, mask_length))) {
  463. HANDLE_FAILURE("LLVMConstVector");
  464. return false;
  465. }
  466. /* shuffle a <16xi8> vector to two <8xi8> vectors */
  467. if (!(sub_vector_even = LLVMBuildShuffleVector(
  468. comp_ctx->builder, vector, vector, even_mask, "pick_even"))
  469. || !(sub_vector_odd = LLVMBuildShuffleVector(
  470. comp_ctx->builder, vector, vector, odd_mask, "pick_odd"))) {
  471. HANDLE_FAILURE("LLVMBuildShuffleVector");
  472. return false;
  473. }
  474. /* sext/zext <8xi8> to <8xi16> */
  475. if (is_signed) {
  476. if (!(sub_vector_even =
  477. LLVMBuildSExt(comp_ctx->builder, sub_vector_even,
  478. out_vector_type, "even_sext"))
  479. || !(sub_vector_odd =
  480. LLVMBuildSExt(comp_ctx->builder, sub_vector_odd,
  481. out_vector_type, "odd_sext"))) {
  482. HANDLE_FAILURE("LLVMBuildSExt");
  483. return false;
  484. }
  485. }
  486. else {
  487. if (!(sub_vector_even =
  488. LLVMBuildZExt(comp_ctx->builder, sub_vector_even,
  489. out_vector_type, "even_zext"))
  490. || !(sub_vector_odd =
  491. LLVMBuildZExt(comp_ctx->builder, sub_vector_odd,
  492. out_vector_type, "odd_zext"))) {
  493. HANDLE_FAILURE("LLVMBuildZExt");
  494. return false;
  495. }
  496. }
  497. if (!(result = LLVMBuildAdd(comp_ctx->builder, sub_vector_even,
  498. sub_vector_odd, "sum"))) {
  499. HANDLE_FAILURE("LLVMBuildAdd");
  500. return false;
  501. }
  502. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  503. }
  504. bool
  505. aot_compile_simd_i16x8_extadd_pairwise_i8x16(AOTCompContext *comp_ctx,
  506. AOTFuncContext *func_ctx,
  507. bool is_signed)
  508. {
  509. return simd_extadd_pairwise(comp_ctx, func_ctx, V128_i8x16_TYPE,
  510. V128_i16x8_TYPE, is_signed);
  511. }
  512. bool
  513. aot_compile_simd_i32x4_extadd_pairwise_i16x8(AOTCompContext *comp_ctx,
  514. AOTFuncContext *func_ctx,
  515. bool is_signed)
  516. {
  517. return simd_extadd_pairwise(comp_ctx, func_ctx, V128_i16x8_TYPE,
  518. V128_i32x4_TYPE, is_signed);
  519. }
  520. bool
  521. aot_compile_simd_i16x8_q15mulr_sat(AOTCompContext *comp_ctx,
  522. AOTFuncContext *func_ctx)
  523. {
  524. LLVMValueRef lhs, rhs, pad, offset, min, max, result;
  525. LLVMTypeRef vector_ext_type;
  526. if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE,
  527. "rhs"))
  528. || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  529. V128_i16x8_TYPE, "lhs"))) {
  530. return false;
  531. }
  532. if (!(vector_ext_type = LLVMVectorType(I32_TYPE, 8))) {
  533. HANDLE_FAILURE("LLVMVectorType");
  534. return false;
  535. }
  536. if (!(lhs = LLVMBuildSExt(comp_ctx->builder, lhs, vector_ext_type,
  537. "lhs_v8i32"))
  538. || !(rhs = LLVMBuildSExt(comp_ctx->builder, rhs, vector_ext_type,
  539. "rhs_v8i32"))) {
  540. HANDLE_FAILURE("LLVMBuildSExt");
  541. return false;
  542. }
  543. /* 0x4000 and 15*/
  544. if (!(pad = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
  545. 0x4000, 8))
  546. || !(offset = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
  547. 15, 8))) {
  548. return false;
  549. }
  550. /* TODO: looking for x86 intrinsics about integer"fused multiply-and-add" */
  551. /* S.SignedSaturate((x * y + 0x4000) >> 15) */
  552. if (!(result = LLVMBuildMul(comp_ctx->builder, lhs, rhs, "mul"))) {
  553. HANDLE_FAILURE("LLVMBuildMul");
  554. return false;
  555. }
  556. if (!(result = LLVMBuildAdd(comp_ctx->builder, result, pad, "add"))) {
  557. HANDLE_FAILURE("LLVMBuildAdd");
  558. return false;
  559. }
  560. if (!(result = LLVMBuildAShr(comp_ctx->builder, result, offset, "ashr"))) {
  561. HANDLE_FAILURE("LLVMBuildAShr");
  562. return false;
  563. }
  564. if (!(min = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
  565. 0xffff8000, 8))
  566. || !(max = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
  567. 0x00007fff, 8))) {
  568. return false;
  569. }
  570. /* sat after trunc will let *sat* part be optimized */
  571. if (!(result = simd_saturate(comp_ctx, func_ctx, e_sat_i32x8, result, min,
  572. max, true))) {
  573. return false;
  574. }
  575. if (!(result = LLVMBuildTrunc(comp_ctx->builder, result, V128_i16x8_TYPE,
  576. "down_to_v8i16"))) {
  577. HANDLE_FAILURE("LLVMBuidlTrunc");
  578. return false;
  579. }
  580. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  581. }
  582. enum integer_extmul_type {
  583. e_i16x8_extmul_i8x16,
  584. e_i32x4_extmul_i16x8,
  585. e_i64x2_extmul_i32x4,
  586. };
  587. static bool
  588. simd_integer_extmul(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  589. bool lower_half, bool is_signed,
  590. enum integer_extmul_type itype)
  591. {
  592. LLVMValueRef vec1, vec2, result;
  593. enum integer_extend_type ext_type[] = {
  594. e_ext_i8x16,
  595. e_ext_i16x8,
  596. e_ext_i32x4,
  597. };
  598. LLVMTypeRef in_vector_type[] = {
  599. V128_i8x16_TYPE,
  600. V128_i16x8_TYPE,
  601. V128_i32x4_TYPE,
  602. };
  603. if (!(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  604. in_vector_type[itype], "vec1"))
  605. || !(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  606. in_vector_type[itype], "vec2"))) {
  607. return false;
  608. }
  609. if (!(vec1 = simd_integer_extension(comp_ctx, func_ctx, ext_type[itype],
  610. vec1, lower_half, is_signed))
  611. || !(vec2 = simd_integer_extension(comp_ctx, func_ctx, ext_type[itype],
  612. vec2, lower_half, is_signed))) {
  613. return false;
  614. }
  615. if (!(result = LLVMBuildMul(comp_ctx->builder, vec1, vec2, "product"))) {
  616. return false;
  617. }
  618. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  619. }
  620. bool
  621. aot_compile_simd_i16x8_extmul_i8x16(AOTCompContext *comp_ctx,
  622. AOTFuncContext *func_ctx, bool lower_half,
  623. bool is_signed)
  624. {
  625. return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed,
  626. e_i16x8_extmul_i8x16);
  627. }
  628. bool
  629. aot_compile_simd_i32x4_extmul_i16x8(AOTCompContext *comp_ctx,
  630. AOTFuncContext *func_ctx, bool lower_half,
  631. bool is_signed)
  632. {
  633. return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed,
  634. e_i32x4_extmul_i16x8);
  635. }
  636. bool
  637. aot_compile_simd_i64x2_extmul_i32x4(AOTCompContext *comp_ctx,
  638. AOTFuncContext *func_ctx, bool lower_half,
  639. bool is_signed)
  640. {
  641. return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed,
  642. e_i64x2_extmul_i32x4);
  643. }