simd_access_lanes.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /*
  2. * Copyright (C) 2019 Intel Corporation. All rights reserved.
  3. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. */
  5. #include "simd_access_lanes.h"
  6. #include "simd_common.h"
  7. #include "../aot_emit_exception.h"
  8. #include "../../aot/aot_runtime.h"
  9. bool
  10. aot_compile_simd_shuffle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  11. const uint8 *frame_ip)
  12. {
  13. LLVMValueRef vec1, vec2, mask, result;
  14. uint8 imm[16] = { 0 };
  15. int values[16];
  16. unsigned i;
  17. wasm_runtime_read_v128(frame_ip, (uint64 *)imm, (uint64 *)(imm + 8));
  18. for (i = 0; i < 16; i++) {
  19. values[i] = imm[i];
  20. }
  21. if (!(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
  22. "vec2"))) {
  23. goto fail;
  24. }
  25. if (!(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
  26. "vec1"))) {
  27. goto fail;
  28. }
  29. /* build a vector <16 x i32> */
  30. if (!(mask = simd_build_const_integer_vector(comp_ctx, I32_TYPE, values,
  31. 16))) {
  32. goto fail;
  33. }
  34. if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, vec1, vec2, mask,
  35. "new_vector"))) {
  36. HANDLE_FAILURE("LLVMBuildShuffleVector");
  37. goto fail;
  38. }
  39. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  40. fail:
  41. return false;
  42. }
  43. /*TODO: llvm.experimental.vector.*/
  44. /* shufflevector is not an option, since it requires *mask as a const */
  45. bool
  46. aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
  47. {
  48. LLVMValueRef vector, mask, max_lanes, condition, mask_lanes, result;
  49. LLVMTypeRef param_types[2];
  50. if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
  51. "mask"))) {
  52. goto fail;
  53. }
  54. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  55. V128_i8x16_TYPE, "vec"))) {
  56. goto fail;
  57. }
  58. /* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */
  59. if (!(max_lanes = simd_build_splat_const_integer_vector(comp_ctx, INT8_TYPE,
  60. 16, 16))) {
  61. goto fail;
  62. }
  63. /* if the highest bit of every i8 of mask is 1, means doesn't pick up
  64. from vector */
  65. /* select <16 x i1> %condition, <16 x i8> <0x80, 0x80, ...>,
  66. <16 x i8> %mask */
  67. if (!(mask_lanes = simd_build_splat_const_integer_vector(
  68. comp_ctx, INT8_TYPE, 0x80, 16))) {
  69. goto fail;
  70. }
  71. if (!(condition = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, mask,
  72. max_lanes, "compare_with_16"))) {
  73. HANDLE_FAILURE("LLVMBuildICmp");
  74. goto fail;
  75. }
  76. if (!(mask = LLVMBuildSelect(comp_ctx->builder, condition, mask_lanes, mask,
  77. "mask"))) {
  78. HANDLE_FAILURE("LLVMBuildSelect");
  79. goto fail;
  80. }
  81. param_types[0] = V128_i8x16_TYPE;
  82. param_types[1] = V128_i8x16_TYPE;
  83. if (!(result = aot_call_llvm_intrinsic(
  84. comp_ctx, func_ctx, "llvm.x86.ssse3.pshuf.b.128", V128_i8x16_TYPE,
  85. param_types, 2, vector, mask))) {
  86. HANDLE_FAILURE("LLVMBuildCall");
  87. goto fail;
  88. }
  89. if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
  90. "ret"))) {
  91. HANDLE_FAILURE("LLVMBuildBitCast");
  92. goto fail;
  93. }
  94. PUSH_V128(result);
  95. return true;
  96. fail:
  97. return false;
  98. }
  99. static bool
  100. aot_compile_simd_swizzle_common(AOTCompContext *comp_ctx,
  101. AOTFuncContext *func_ctx)
  102. {
  103. LLVMValueRef vector, mask, default_lane_value, condition, max_lane_id,
  104. result, idx, id, replace_with_zero, elem, elem_or_zero, undef;
  105. uint8 i;
  106. if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
  107. "mask"))) {
  108. goto fail;
  109. }
  110. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
  111. V128_i8x16_TYPE, "vec"))) {
  112. goto fail;
  113. }
  114. if (!(undef = LLVMGetUndef(V128_i8x16_TYPE))) {
  115. HANDLE_FAILURE("LLVMGetUndef");
  116. goto fail;
  117. }
  118. /* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */
  119. if (!(max_lane_id = simd_build_splat_const_integer_vector(
  120. comp_ctx, INT8_TYPE, 16, 16))) {
  121. goto fail;
  122. }
  123. if (!(condition = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, mask,
  124. max_lane_id, "out_of_range"))) {
  125. HANDLE_FAILURE("LLVMBuldICmp");
  126. goto fail;
  127. }
  128. /* if the id is out of range (>=16), set the id as 0 */
  129. if (!(default_lane_value = simd_build_splat_const_integer_vector(
  130. comp_ctx, INT8_TYPE, 0, 16))) {
  131. goto fail;
  132. }
  133. if (!(idx = LLVMBuildSelect(comp_ctx->builder, condition,
  134. default_lane_value, mask, "mask"))) {
  135. HANDLE_FAILURE("LLVMBuildSelect");
  136. goto fail;
  137. }
  138. for (i = 0; i < 16; i++) {
  139. if (!(id = LLVMBuildExtractElement(comp_ctx->builder, idx, I8_CONST(i),
  140. "id"))) {
  141. HANDLE_FAILURE("LLVMBuildExtractElement");
  142. goto fail;
  143. }
  144. if (!(replace_with_zero =
  145. LLVMBuildExtractElement(comp_ctx->builder, condition,
  146. I8_CONST(i), "replace_with_zero"))) {
  147. HANDLE_FAILURE("LLVMBuildExtractElement");
  148. goto fail;
  149. }
  150. if (!(elem = LLVMBuildExtractElement(comp_ctx->builder, vector, id,
  151. "vector[mask[i]]"))) {
  152. HANDLE_FAILURE("LLVMBuildExtractElement");
  153. goto fail;
  154. }
  155. if (!(elem_or_zero =
  156. LLVMBuildSelect(comp_ctx->builder, replace_with_zero,
  157. I8_CONST(0), elem, "elem_or_zero"))) {
  158. HANDLE_FAILURE("LLVMBuildSelect");
  159. goto fail;
  160. }
  161. if (!(undef =
  162. LLVMBuildInsertElement(comp_ctx->builder, undef, elem_or_zero,
  163. I8_CONST(i), "new_vector"))) {
  164. HANDLE_FAILURE("LLVMBuildInsertElement");
  165. goto fail;
  166. }
  167. }
  168. if (!(result = LLVMBuildBitCast(comp_ctx->builder, undef, V128_i64x2_TYPE,
  169. "ret"))) {
  170. HANDLE_FAILURE("LLVMBuildBitCast");
  171. goto fail;
  172. }
  173. PUSH_V128(result);
  174. return true;
  175. fail:
  176. return false;
  177. }
  178. bool
  179. aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
  180. {
  181. if (is_target_x86(comp_ctx)) {
  182. return aot_compile_simd_swizzle_x86(comp_ctx, func_ctx);
  183. }
  184. else {
  185. return aot_compile_simd_swizzle_common(comp_ctx, func_ctx);
  186. }
  187. }
  188. static bool
  189. aot_compile_simd_extract(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  190. uint8 lane_id, bool need_extend, bool is_signed,
  191. LLVMTypeRef vector_type, LLVMTypeRef result_type,
  192. unsigned aot_value_type)
  193. {
  194. LLVMValueRef vector, lane, result;
  195. if (!(lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id))) {
  196. HANDLE_FAILURE("LLVMConstInt");
  197. goto fail;
  198. }
  199. /* bitcast <2 x i64> %0 to <vector_type> */
  200. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
  201. "vec"))) {
  202. goto fail;
  203. }
  204. /* extractelement <vector_type> %vector, i8 lane_id*/
  205. if (!(result = LLVMBuildExtractElement(comp_ctx->builder, vector, lane,
  206. "element"))) {
  207. HANDLE_FAILURE("LLVMBuildExtractElement");
  208. goto fail;
  209. }
  210. if (need_extend) {
  211. if (is_signed) {
  212. /* sext <element_type> %element to <result_type> */
  213. if (!(result = LLVMBuildSExt(comp_ctx->builder, result, result_type,
  214. "ret"))) {
  215. HANDLE_FAILURE("LLVMBuildSExt");
  216. goto fail;
  217. }
  218. }
  219. else {
  220. /* sext <element_type> %element to <result_type> */
  221. if (!(result = LLVMBuildZExt(comp_ctx->builder, result, result_type,
  222. "ret"))) {
  223. HANDLE_FAILURE("LLVMBuildZExt");
  224. goto fail;
  225. }
  226. }
  227. }
  228. PUSH(result, aot_value_type);
  229. return true;
  230. fail:
  231. return false;
  232. }
  233. bool
  234. aot_compile_simd_extract_i8x16(AOTCompContext *comp_ctx,
  235. AOTFuncContext *func_ctx, uint8 lane_id,
  236. bool is_signed)
  237. {
  238. return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, true,
  239. is_signed, V128_i8x16_TYPE, I32_TYPE,
  240. VALUE_TYPE_I32);
  241. }
  242. bool
  243. aot_compile_simd_extract_i16x8(AOTCompContext *comp_ctx,
  244. AOTFuncContext *func_ctx, uint8 lane_id,
  245. bool is_signed)
  246. {
  247. return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, true,
  248. is_signed, V128_i16x8_TYPE, I32_TYPE,
  249. VALUE_TYPE_I32);
  250. }
  251. bool
  252. aot_compile_simd_extract_i32x4(AOTCompContext *comp_ctx,
  253. AOTFuncContext *func_ctx, uint8 lane_id)
  254. {
  255. return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, false, false,
  256. V128_i32x4_TYPE, I32_TYPE, VALUE_TYPE_I32);
  257. }
  258. bool
  259. aot_compile_simd_extract_i64x2(AOTCompContext *comp_ctx,
  260. AOTFuncContext *func_ctx, uint8 lane_id)
  261. {
  262. return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, false, false,
  263. V128_i64x2_TYPE, I64_TYPE, VALUE_TYPE_I64);
  264. }
  265. bool
  266. aot_compile_simd_extract_f32x4(AOTCompContext *comp_ctx,
  267. AOTFuncContext *func_ctx, uint8 lane_id)
  268. {
  269. return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, false, false,
  270. V128_f32x4_TYPE, F32_TYPE, VALUE_TYPE_F32);
  271. }
  272. bool
  273. aot_compile_simd_extract_f64x2(AOTCompContext *comp_ctx,
  274. AOTFuncContext *func_ctx, uint8 lane_id)
  275. {
  276. return aot_compile_simd_extract(comp_ctx, func_ctx, lane_id, false, false,
  277. V128_f64x2_TYPE, F64_TYPE, VALUE_TYPE_F64);
  278. }
  279. static bool
  280. aot_compile_simd_replace(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
  281. uint8 lane_id, unsigned new_value_type,
  282. LLVMTypeRef vector_type, bool need_reduce,
  283. LLVMTypeRef element_type)
  284. {
  285. LLVMValueRef vector, new_value, lane, result;
  286. POP(new_value, new_value_type);
  287. if (!(lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id))) {
  288. goto fail;
  289. }
  290. if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
  291. "vec"))) {
  292. goto fail;
  293. }
  294. /* trunc <new_value_type> to <element_type> */
  295. if (need_reduce) {
  296. if (!(new_value = LLVMBuildTrunc(comp_ctx->builder, new_value,
  297. element_type, "element"))) {
  298. HANDLE_FAILURE("LLVMBuildTrunc");
  299. goto fail;
  300. }
  301. }
  302. /* insertelement <vector_type> %vector, <element_type> %element,
  303. i32 lane */
  304. if (!(result = LLVMBuildInsertElement(comp_ctx->builder, vector, new_value,
  305. lane, "new_vector"))) {
  306. HANDLE_FAILURE("LLVMBuildInsertElement");
  307. goto fail;
  308. }
  309. return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
  310. fail:
  311. return false;
  312. }
  313. bool
  314. aot_compile_simd_replace_i8x16(AOTCompContext *comp_ctx,
  315. AOTFuncContext *func_ctx, uint8 lane_id)
  316. {
  317. return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, VALUE_TYPE_I32,
  318. V128_i8x16_TYPE, true, INT8_TYPE);
  319. }
  320. bool
  321. aot_compile_simd_replace_i16x8(AOTCompContext *comp_ctx,
  322. AOTFuncContext *func_ctx, uint8 lane_id)
  323. {
  324. return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, VALUE_TYPE_I32,
  325. V128_i16x8_TYPE, true, INT16_TYPE);
  326. }
  327. bool
  328. aot_compile_simd_replace_i32x4(AOTCompContext *comp_ctx,
  329. AOTFuncContext *func_ctx, uint8 lane_id)
  330. {
  331. return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, VALUE_TYPE_I32,
  332. V128_i32x4_TYPE, false, I32_TYPE);
  333. }
  334. bool
  335. aot_compile_simd_replace_i64x2(AOTCompContext *comp_ctx,
  336. AOTFuncContext *func_ctx, uint8 lane_id)
  337. {
  338. return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, VALUE_TYPE_I64,
  339. V128_i64x2_TYPE, false, I64_TYPE);
  340. }
  341. bool
  342. aot_compile_simd_replace_f32x4(AOTCompContext *comp_ctx,
  343. AOTFuncContext *func_ctx, uint8 lane_id)
  344. {
  345. return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, VALUE_TYPE_F32,
  346. V128_f32x4_TYPE, false, F32_TYPE);
  347. }
  348. bool
  349. aot_compile_simd_replace_f64x2(AOTCompContext *comp_ctx,
  350. AOTFuncContext *func_ctx, uint8 lane_id)
  351. {
  352. return aot_compile_simd_replace(comp_ctx, func_ctx, lane_id, VALUE_TYPE_F64,
  353. V128_f64x2_TYPE, false, F64_TYPE);
  354. }