simd_conversions.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include "simd_conversions.h"
  6. #include "simd_common.h"
  7. #include "../aot_emit_exception.h"
  8. #include "../aot_emit_numberic.h"
  9. #include "../../aot/aot_runtime.h"
  10. static bool
  11. simd_integer_narrow_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  12. LLVMTypeRef in_vector_type, LLVMTypeRef out_vector_type,
  13. const char *instrinsic)
  14. {
  15. LLVMValueRef vector1, vector2, result;
  16. LLVMTypeRef param_types[2] = { in_vector_type, in_vector_type };
  17. if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  18. in_vector_type, "vec2"))
  19. || !(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  20. in_vector_type, "vec1"))) {
  21. return false;
  22. }
  23. if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, instrinsic,
  24. out_vector_type, param_types, 2,
  25. vector1, vector2))) {
  26. HANDLE_FAILURE("LLVMBuildCall");
  27. return false;
  28. }
  29. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  30. }
  31. enum integer_sat_type {
  32. e_sat_i16x8 = 0,
  33. e_sat_i32x4,
  34. e_sat_i64x2,
  35. e_sat_i32x8,
  36. };
  37. static LLVMValueRef
  38. simd_saturate(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  39. enum integer_sat_type itype, LLVMValueRef vector,
  40. LLVMValueRef min, LLVMValueRef max, bool is_signed)
  41. {
  42. LLVMValueRef result;
  43. LLVMTypeRef vector_type;
  44. LLVMTypeRef param_types[][2] = {
  45. { V128_i16x8_TYPE, V128_i16x8_TYPE },
  46. { V128_i32x4_TYPE, V128_i32x4_TYPE },
  47. { V128_i64x2_TYPE, V128_i64x2_TYPE },
  48. { 0 },
  49. };
  50. const char *smin_intrinsic[] = {
  51. "llvm.smin.v8i16",
  52. "llvm.smin.v4i32",
  53. "llvm.smin.v2i64",
  54. "llvm.smin.v8i32",
  55. };
  56. const char *umin_intrinsic[] = {
  57. "llvm.umin.v8i16",
  58. "llvm.umin.v4i32",
  59. "llvm.umin.v2i64",
  60. "llvm.umin.v8i32",
  61. };
  62. const char *smax_intrinsic[] = {
  63. "llvm.smax.v8i16",
  64. "llvm.smax.v4i32",
  65. "llvm.smax.v2i64",
  66. "llvm.smax.v8i32",
  67. };
  68. const char *umax_intrinsic[] = {
  69. "llvm.umax.v8i16",
  70. "llvm.umax.v4i32",
  71. "llvm.umax.v2i64",
  72. "llvm.umax.v8i32",
  73. };
  74. if (e_sat_i32x8 == itype) {
  75. if (!(vector_type = LLVMVectorType(I32_TYPE, 8))) {
  76. HANDLE_FAILURE("LLVMVectorType");
  77. return NULL;
  78. }
  79. param_types[itype][0] = vector_type;
  80. param_types[itype][1] = vector_type;
  81. }
  82. if (!(result = aot_call_llvm_intrinsic(
  83. comp_ctx, func_ctx,
  84. is_signed ? smin_intrinsic[itype] : umin_intrinsic[itype],
  85. param_types[itype][0], param_types[itype], 2, vector, max))
  86. || !(result = aot_call_llvm_intrinsic(
  87. comp_ctx, func_ctx,
  88. is_signed ? smax_intrinsic[itype] : umax_intrinsic[itype],
  89. param_types[itype][0], param_types[itype], 2, result, min))) {
  90. return NULL;
  91. }
  92. return result;
  93. }
  94. static bool
  95. simd_integer_narrow_common(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  96. enum integer_sat_type itype, bool is_signed)
  97. {
  98. LLVMValueRef vec1, vec2, min, max, mask, result;
  99. LLVMTypeRef in_vector_type[] = { V128_i16x8_TYPE, V128_i32x4_TYPE,
  100. V128_i64x2_TYPE };
  101. LLVMTypeRef min_max_type[] = { INT16_TYPE, I32_TYPE, I64_TYPE };
  102. LLVMTypeRef trunc_type[3] = { 0 };
  103. uint8 length[] = { 8, 4, 2 };
  104. int64 smin[] = { 0xff80, 0xffFF8000, 0xffFFffFF80000000 };
  105. int64 umin[] = { 0x0, 0x0, 0x0 };
  106. int64 smax[] = { 0x007f, 0x00007fff, 0x000000007fFFffFF };
  107. int64 umax[] = { 0x00ff, 0x0000ffff, 0x00000000ffFFffFF };
  108. LLVMValueRef mask_element[] = {
  109. LLVM_CONST(i32_zero), LLVM_CONST(i32_one),
  110. LLVM_CONST(i32_two), LLVM_CONST(i32_three),
  111. LLVM_CONST(i32_four), LLVM_CONST(i32_five),
  112. LLVM_CONST(i32_six), LLVM_CONST(i32_seven),
  113. LLVM_CONST(i32_eight), LLVM_CONST(i32_nine),
  114. LLVM_CONST(i32_ten), LLVM_CONST(i32_eleven),
  115. LLVM_CONST(i32_twelve), LLVM_CONST(i32_thirteen),
  116. LLVM_CONST(i32_fourteen), LLVM_CONST(i32_fifteen),
  117. };
  118. if (!(trunc_type[0] == LLVMVectorType(INT8_TYPE, 8))
  119. || !(trunc_type[1] == LLVMVectorType(INT16_TYPE, 4))
  120. || !(trunc_type[2] == LLVMVectorType(I32_TYPE, 2))) {
  121. HANDLE_FAILURE("LLVMVectorType");
  122. return false;
  123. }
  124. if (!(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  125. in_vector_type[itype], "vec2"))
  126. || !(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  127. in_vector_type[itype], "vec1"))) {
  128. return false;
  129. }
  130. if (!(max = simd_build_splat_const_integer_vector(
  131. comp_ctx, min_max_type[itype],
  132. is_signed ? smax[itype] : umax[itype], length[itype]))
  133. || !(min = simd_build_splat_const_integer_vector(
  134. comp_ctx, min_max_type[itype],
  135. is_signed ? smin[itype] : umin[itype], length[itype]))) {
  136. return false;
  137. }
  138. /* sat */
  139. if (!(vec1 = simd_saturate(comp_ctx, func_ctx, e_sat_i16x8, vec1, min, max,
  140. is_signed))
  141. || !(vec2 = simd_saturate(comp_ctx, func_ctx, e_sat_i16x8, vec2, min,
  142. max, is_signed))) {
  143. return false;
  144. }
  145. /* trunc */
  146. if (!(vec1 = LLVMBuildTrunc(comp_ctx->builder, vec1, trunc_type[itype],
  147. "vec1_trunc"))
  148. || !(vec2 = LLVMBuildTrunc(comp_ctx->builder, vec2, trunc_type[itype],
  149. "vec2_trunc"))) {
  150. HANDLE_FAILURE("LLVMBuildTrunc");
  151. return false;
  152. }
  153. /* combine */
  154. if (!(mask = LLVMConstVector(mask_element, (length[itype] << 1)))) {
  155. HANDLE_FAILURE("LLVMConstInt");
  156. return false;
  157. }
  158. if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, vec1, vec2, mask,
  159. "vec_shuffle"))) {
  160. HANDLE_FAILURE("LLVMBuildShuffleVector");
  161. return false;
  162. }
  163. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  164. }
  165. bool
  166. aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx,
  167. AOTFuncContext *func_ctx, bool is_signed)
  168. {
  169. if (is_target_x86(comp_ctx)) {
  170. return simd_integer_narrow_x86(
  171. comp_ctx, func_ctx, V128_i16x8_TYPE, V128_i8x16_TYPE,
  172. is_signed ? "llvm.x86.sse2.packsswb.128"
  173. : "llvm.x86.sse2.packuswb.128");
  174. }
  175. else {
  176. return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i16x8,
  177. is_signed);
  178. }
  179. }
  180. bool
  181. aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
  182. AOTFuncContext *func_ctx, bool is_signed)
  183. {
  184. if (is_target_x86(comp_ctx)) {
  185. return simd_integer_narrow_x86(comp_ctx, func_ctx, V128_i32x4_TYPE,
  186. V128_i16x8_TYPE,
  187. is_signed ? "llvm.x86.sse2.packssdw.128"
  188. : "llvm.x86.sse41.packusdw");
  189. }
  190. else {
  191. return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i32x4,
  192. is_signed);
  193. }
  194. }
  195. bool
  196. aot_compile_simd_i32x4_narrow_i64x2(AOTCompContext *comp_ctx,
  197. AOTFuncContext *func_ctx, bool is_signed)
  198. {
  199. /* TODO: x86 intrinsics */
  200. return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i64x2,
  201. is_signed);
  202. }
  203. enum integer_extend_type {
  204. e_ext_i8x16,
  205. e_ext_i16x8,
  206. e_ext_i32x4,
  207. };
  208. static LLVMValueRef
  209. simd_integer_extension(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  210. enum integer_extend_type itype, LLVMValueRef vector,
  211. bool lower_half, bool is_signed)
  212. {
  213. LLVMValueRef mask, sub_vector, result;
  214. LLVMValueRef bits[] = {
  215. LLVM_CONST(i32_zero), LLVM_CONST(i32_one),
  216. LLVM_CONST(i32_two), LLVM_CONST(i32_three),
  217. LLVM_CONST(i32_four), LLVM_CONST(i32_five),
  218. LLVM_CONST(i32_six), LLVM_CONST(i32_seven),
  219. LLVM_CONST(i32_eight), LLVM_CONST(i32_nine),
  220. LLVM_CONST(i32_ten), LLVM_CONST(i32_eleven),
  221. LLVM_CONST(i32_twelve), LLVM_CONST(i32_thirteen),
  222. LLVM_CONST(i32_fourteen), LLVM_CONST(i32_fifteen),
  223. };
  224. LLVMTypeRef out_vector_type[] = { V128_i16x8_TYPE, V128_i32x4_TYPE,
  225. V128_i64x2_TYPE };
  226. LLVMValueRef undef[] = { LLVM_CONST(i8x16_undef), LLVM_CONST(i16x8_undef),
  227. LLVM_CONST(i32x4_undef) };
  228. uint32 sub_vector_length[] = { 8, 4, 2 };
  229. if (!(mask = lower_half ? LLVMConstVector(bits, sub_vector_length[itype])
  230. : LLVMConstVector(bits + sub_vector_length[itype],
  231. sub_vector_length[itype]))) {
  232. HANDLE_FAILURE("LLVMConstVector");
  233. return false;
  234. }
  235. /* retrive the low or high half */
  236. if (!(sub_vector = LLVMBuildShuffleVector(comp_ctx->builder, vector,
  237. undef[itype], mask, "half"))) {
  238. HANDLE_FAILURE("LLVMBuildShuffleVector");
  239. return false;
  240. }
  241. if (is_signed) {
  242. if (!(result = LLVMBuildSExt(comp_ctx->builder, sub_vector,
  243. out_vector_type[itype], "sext"))) {
  244. HANDLE_FAILURE("LLVMBuildSExt");
  245. return false;
  246. }
  247. }
  248. else {
  249. if (!(result = LLVMBuildZExt(comp_ctx->builder, sub_vector,
  250. out_vector_type[itype], "zext"))) {
  251. HANDLE_FAILURE("LLVMBuildZExt");
  252. return false;
  253. }
  254. }
  255. return result;
  256. }
  257. static bool
  258. simd_integer_extension_wrapper(AOTCompContext *comp_ctx,
  259. AOTFuncContext *func_ctx,
  260. enum integer_extend_type itype, bool lower_half,
  261. bool is_signed)
  262. {
  263. LLVMValueRef vector, result;
  264. LLVMTypeRef in_vector_type[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
  265. V128_i32x4_TYPE };
  266. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  267. in_vector_type[itype], "vec"))) {
  268. return false;
  269. }
  270. if (!(result = simd_integer_extension(comp_ctx, func_ctx, itype, vector,
  271. lower_half, is_signed))) {
  272. return false;
  273. }
  274. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  275. }
  276. bool
  277. aot_compile_simd_i16x8_extend_i8x16(AOTCompContext *comp_ctx,
  278. AOTFuncContext *func_ctx, bool lower_half,
  279. bool is_signed)
  280. {
  281. return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i8x16,
  282. lower_half, is_signed);
  283. }
  284. bool
  285. aot_compile_simd_i32x4_extend_i16x8(AOTCompContext *comp_ctx,
  286. AOTFuncContext *func_ctx, bool lower_half,
  287. bool is_signed)
  288. {
  289. return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i16x8,
  290. lower_half, is_signed);
  291. }
  292. bool
  293. aot_compile_simd_i64x2_extend_i32x4(AOTCompContext *comp_ctx,
  294. AOTFuncContext *func_ctx, bool lower_half,
  295. bool is_signed)
  296. {
  297. return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i32x4,
  298. lower_half, is_signed);
  299. }
  300. static LLVMValueRef
  301. simd_trunc_sat(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  302. const char *intrinsics, LLVMTypeRef in_vector_type,
  303. LLVMTypeRef out_vector_type)
  304. {
  305. LLVMValueRef vector, result;
  306. LLVMTypeRef param_types[] = { in_vector_type };
  307. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, in_vector_type,
  308. "vector"))) {
  309. return false;
  310. }
  311. if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsics,
  312. out_vector_type, param_types, 1,
  313. vector))) {
  314. return false;
  315. }
  316. return result;
  317. }
  318. bool
  319. aot_compile_simd_i32x4_trunc_sat_f32x4(AOTCompContext *comp_ctx,
  320. AOTFuncContext *func_ctx, bool is_signed)
  321. {
  322. LLVMValueRef result;
  323. if (!(result = simd_trunc_sat(comp_ctx, func_ctx,
  324. is_signed ? "llvm.fptosi.sat.v4i32.v4f32"
  325. : "llvm.fptoui.sat.v4i32.v4f32",
  326. V128_f32x4_TYPE, V128_i32x4_TYPE))) {
  327. return false;
  328. }
  329. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  330. }
  331. bool
  332. aot_compile_simd_i32x4_trunc_sat_f64x2(AOTCompContext *comp_ctx,
  333. AOTFuncContext *func_ctx, bool is_signed)
  334. {
  335. LLVMValueRef result, zero, mask;
  336. LLVMTypeRef out_vector_type;
  337. LLVMValueRef lanes[] = {
  338. LLVM_CONST(i32_zero),
  339. LLVM_CONST(i32_one),
  340. LLVM_CONST(i32_two),
  341. LLVM_CONST(i32_three),
  342. };
  343. if (!(out_vector_type = LLVMVectorType(I32_TYPE, 2))) {
  344. HANDLE_FAILURE("LLVMVectorType");
  345. return false;
  346. }
  347. if (!(result = simd_trunc_sat(comp_ctx, func_ctx,
  348. is_signed ? "llvm.fptosi.sat.v2i32.v2f64"
  349. : "llvm.fptoui.sat.v2i32.v2f64",
  350. V128_f64x2_TYPE, out_vector_type))) {
  351. return false;
  352. }
  353. if (!(zero = LLVMConstNull(out_vector_type))) {
  354. HANDLE_FAILURE("LLVMConstNull");
  355. return false;
  356. }
  357. /* v2i32 -> v4i32 */
  358. if (!(mask = LLVMConstVector(lanes, 4))) {
  359. HANDLE_FAILURE("LLVMConstVector");
  360. return false;
  361. }
  362. if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result, zero, mask,
  363. "extend"))) {
  364. HANDLE_FAILURE("LLVMBuildShuffleVector");
  365. return false;
  366. }
  367. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  368. }
  369. static LLVMValueRef
  370. simd_integer_convert(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  371. bool is_signed, LLVMValueRef vector,
  372. LLVMTypeRef out_vector_type)
  373. {
  374. LLVMValueRef result;
  375. result = is_signed ? LLVMBuildSIToFP(comp_ctx->builder, vector,
  376. out_vector_type, "converted")
  377. : LLVMBuildUIToFP(comp_ctx->builder, vector,
  378. out_vector_type, "converted");
  379. if (!result) {
  380. HANDLE_FAILURE("LLVMBuildSIToFP/LLVMBuildUIToFP");
  381. }
  382. return result;
  383. }
  384. bool
  385. aot_compile_simd_f32x4_convert_i32x4(AOTCompContext *comp_ctx,
  386. AOTFuncContext *func_ctx, bool is_signed)
  387. {
  388. LLVMValueRef vector, result;
  389. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  390. V128_i32x4_TYPE, "vec"))) {
  391. return false;
  392. }
  393. if (!(result = simd_integer_convert(comp_ctx, func_ctx, is_signed, vector,
  394. V128_f32x4_TYPE))) {
  395. return false;
  396. }
  397. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  398. }
  399. bool
  400. aot_compile_simd_f64x2_convert_i32x4(AOTCompContext *comp_ctx,
  401. AOTFuncContext *func_ctx, bool is_signed)
  402. {
  403. LLVMValueRef vector, mask, result;
  404. LLVMValueRef lanes[] = {
  405. LLVM_CONST(i32_zero),
  406. LLVM_CONST(i32_one),
  407. };
  408. LLVMTypeRef out_vector_type;
  409. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  410. V128_i32x4_TYPE, "vec"))) {
  411. return false;
  412. }
  413. if (!(out_vector_type = LLVMVectorType(F64_TYPE, 4))) {
  414. HANDLE_FAILURE("LLVMVectorType");
  415. return false;
  416. }
  417. if (!(result = simd_integer_convert(comp_ctx, func_ctx, is_signed, vector,
  418. out_vector_type))) {
  419. return false;
  420. }
  421. /* v4f64 -> v2f64 */
  422. if (!(mask = LLVMConstVector(lanes, 2))) {
  423. HANDLE_FAILURE("LLVMConstVector");
  424. return false;
  425. }
  426. if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result, result,
  427. mask, "trunc"))) {
  428. HANDLE_FAILURE("LLVMBuildShuffleVector");
  429. return false;
  430. }
  431. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  432. }
  433. static bool
  434. simd_extadd_pairwise(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  435. LLVMTypeRef in_vector_type, LLVMTypeRef out_vector_type,
  436. bool is_signed)
  437. {
  438. LLVMValueRef vector, even_mask, odd_mask, sub_vector_even, sub_vector_odd,
  439. result;
  440. LLVMValueRef even_element[] = {
  441. LLVM_CONST(i32_zero), LLVM_CONST(i32_two), LLVM_CONST(i32_four),
  442. LLVM_CONST(i32_six), LLVM_CONST(i32_eight), LLVM_CONST(i32_ten),
  443. LLVM_CONST(i32_twelve), LLVM_CONST(i32_fourteen),
  444. };
  445. LLVMValueRef odd_element[] = {
  446. LLVM_CONST(i32_one), LLVM_CONST(i32_three),
  447. LLVM_CONST(i32_five), LLVM_CONST(i32_seven),
  448. LLVM_CONST(i32_nine), LLVM_CONST(i32_eleven),
  449. LLVM_CONST(i32_thirteen), LLVM_CONST(i32_fifteen),
  450. };
  451. /* assumption about i16x8 from i8x16 and i32x4 from i16x8 */
  452. uint8 mask_length = V128_i16x8_TYPE == out_vector_type ? 8 : 4;
  453. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, in_vector_type,
  454. "vector"))) {
  455. return false;
  456. }
  457. if (!(even_mask = LLVMConstVector(even_element, mask_length))
  458. || !(odd_mask = LLVMConstVector(odd_element, mask_length))) {
  459. HANDLE_FAILURE("LLVMConstVector");
  460. return false;
  461. }
  462. /* shuffle a <16xi8> vector to two <8xi8> vectors */
  463. if (!(sub_vector_even = LLVMBuildShuffleVector(
  464. comp_ctx->builder, vector, vector, even_mask, "pick_even"))
  465. || !(sub_vector_odd = LLVMBuildShuffleVector(
  466. comp_ctx->builder, vector, vector, odd_mask, "pick_odd"))) {
  467. HANDLE_FAILURE("LLVMBuildShuffleVector");
  468. return false;
  469. }
  470. /* sext/zext <8xi8> to <8xi16> */
  471. if (is_signed) {
  472. if (!(sub_vector_even =
  473. LLVMBuildSExt(comp_ctx->builder, sub_vector_even,
  474. out_vector_type, "even_sext"))
  475. || !(sub_vector_odd =
  476. LLVMBuildSExt(comp_ctx->builder, sub_vector_odd,
  477. out_vector_type, "odd_sext"))) {
  478. HANDLE_FAILURE("LLVMBuildSExt");
  479. return false;
  480. }
  481. }
  482. else {
  483. if (!(sub_vector_even =
  484. LLVMBuildZExt(comp_ctx->builder, sub_vector_even,
  485. out_vector_type, "even_zext"))
  486. || !(sub_vector_odd =
  487. LLVMBuildZExt(comp_ctx->builder, sub_vector_odd,
  488. out_vector_type, "odd_zext"))) {
  489. HANDLE_FAILURE("LLVMBuildZExt");
  490. return false;
  491. }
  492. }
  493. if (!(result = LLVMBuildAdd(comp_ctx->builder, sub_vector_even,
  494. sub_vector_odd, "sum"))) {
  495. HANDLE_FAILURE("LLVMBuildAdd");
  496. return false;
  497. }
  498. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  499. }
  500. bool
  501. aot_compile_simd_i16x8_extadd_pairwise_i8x16(AOTCompContext *comp_ctx,
  502. AOTFuncContext *func_ctx,
  503. bool is_signed)
  504. {
  505. return simd_extadd_pairwise(comp_ctx, func_ctx, V128_i8x16_TYPE,
  506. V128_i16x8_TYPE, is_signed);
  507. }
  508. bool
  509. aot_compile_simd_i32x4_extadd_pairwise_i16x8(AOTCompContext *comp_ctx,
  510. AOTFuncContext *func_ctx,
  511. bool is_signed)
  512. {
  513. return simd_extadd_pairwise(comp_ctx, func_ctx, V128_i16x8_TYPE,
  514. V128_i32x4_TYPE, is_signed);
  515. }
  516. bool
  517. aot_compile_simd_i16x8_q15mulr_sat(AOTCompContext *comp_ctx,
  518. AOTFuncContext *func_ctx)
  519. {
  520. LLVMValueRef lhs, rhs, pad, offset, min, max, result;
  521. LLVMTypeRef vector_ext_type;
  522. if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE,
  523. "rhs"))
  524. || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  525. V128_i16x8_TYPE, "lhs"))) {
  526. return false;
  527. }
  528. if (!(vector_ext_type = LLVMVectorType(I32_TYPE, 8))) {
  529. HANDLE_FAILURE("LLVMVectorType");
  530. return false;
  531. }
  532. if (!(lhs = LLVMBuildSExt(comp_ctx->builder, lhs, vector_ext_type,
  533. "lhs_v8i32"))
  534. || !(rhs = LLVMBuildSExt(comp_ctx->builder, rhs, vector_ext_type,
  535. "rhs_v8i32"))) {
  536. HANDLE_FAILURE("LLVMBuildSExt");
  537. return false;
  538. }
  539. /* 0x4000 and 15*/
  540. if (!(pad = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
  541. 0x4000, 8))
  542. || !(offset = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
  543. 15, 8))) {
  544. return false;
  545. }
  546. /* TODO: looking for x86 intrinsics about integer"fused multiply-and-add" */
  547. /* S.SignedSaturate((x * y + 0x4000) >> 15) */
  548. if (!(result = LLVMBuildMul(comp_ctx->builder, lhs, rhs, "mul"))) {
  549. HANDLE_FAILURE("LLVMBuildMul");
  550. return false;
  551. }
  552. if (!(result = LLVMBuildAdd(comp_ctx->builder, result, pad, "add"))) {
  553. HANDLE_FAILURE("LLVMBuildAdd");
  554. return false;
  555. }
  556. if (!(result = LLVMBuildAShr(comp_ctx->builder, result, offset, "ashr"))) {
  557. HANDLE_FAILURE("LLVMBuildAShr");
  558. return false;
  559. }
  560. if (!(min = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
  561. 0xffff8000, 8))
  562. || !(max = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
  563. 0x00007fff, 8))) {
  564. return false;
  565. }
  566. /* sat after trunc will let *sat* part be optimized */
  567. if (!(result = simd_saturate(comp_ctx, func_ctx, e_sat_i32x8, result, min,
  568. max, true))) {
  569. return false;
  570. }
  571. if (!(result = LLVMBuildTrunc(comp_ctx->builder, result, V128_i16x8_TYPE,
  572. "down_to_v8i16"))) {
  573. HANDLE_FAILURE("LLVMBuidlTrunc");
  574. return false;
  575. }
  576. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  577. }
  578. enum integer_extmul_type {
  579. e_i16x8_extmul_i8x16,
  580. e_i32x4_extmul_i16x8,
  581. e_i64x2_extmul_i32x4,
  582. };
  583. static bool
  584. simd_integer_extmul(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  585. bool lower_half, bool is_signed,
  586. enum integer_extmul_type itype)
  587. {
  588. LLVMValueRef vec1, vec2, result;
  589. enum integer_extend_type ext_type[] = {
  590. e_ext_i8x16,
  591. e_ext_i16x8,
  592. e_ext_i32x4,
  593. };
  594. LLVMTypeRef in_vector_type[] = {
  595. V128_i8x16_TYPE,
  596. V128_i16x8_TYPE,
  597. V128_i32x4_TYPE,
  598. };
  599. if (!(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  600. in_vector_type[itype], "vec1"))
  601. || !(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  602. in_vector_type[itype], "vec2"))) {
  603. return false;
  604. }
  605. if (!(vec1 = simd_integer_extension(comp_ctx, func_ctx, ext_type[itype],
  606. vec1, lower_half, is_signed))
  607. || !(vec2 = simd_integer_extension(comp_ctx, func_ctx, ext_type[itype],
  608. vec2, lower_half, is_signed))) {
  609. return false;
  610. }
  611. if (!(result = LLVMBuildMul(comp_ctx->builder, vec1, vec2, "product"))) {
  612. return false;
  613. }
  614. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  615. }
  616. bool
  617. aot_compile_simd_i16x8_extmul_i8x16(AOTCompContext *comp_ctx,
  618. AOTFuncContext *func_ctx, bool lower_half,
  619. bool is_signed)
  620. {
  621. return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed,
  622. e_i16x8_extmul_i8x16);
  623. }
  624. bool
  625. aot_compile_simd_i32x4_extmul_i16x8(AOTCompContext *comp_ctx,
  626. AOTFuncContext *func_ctx, bool lower_half,
  627. bool is_signed)
  628. {
  629. return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed,
  630. e_i32x4_extmul_i16x8);
  631. }
  632. bool
  633. aot_compile_simd_i64x2_extmul_i32x4(AOTCompContext *comp_ctx,
  634. AOTFuncContext *func_ctx, bool lower_half,
  635. bool is_signed)
  636. {
  637. return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed,
  638. e_i64x2_extmul_i32x4);
  639. }