hace 4 años · 46db353017
--- a/core/iwasm/aot/aot_loader.c
+++ b/core/iwasm/aot/aot_loader.c
@@ -285,7 +285,7 @@ check_machine_info(AOTTargetInfo *target_info,
 
				                              error_buf, error_buf_size))
			
 
				         return false;
			
 
				 
			
 
				-    if (strcmp(target_expected, target_got)) {
			
 
				+    if (strncmp(target_expected, target_got, strlen(target_expected))) {
			
 
				         set_error_buf_v(error_buf, error_buf_size,
			
 
				                         "invalid target type, expected %s but got %s",
			
 
				                         target_expected, target_got);
			
--- a/core/iwasm/common/arch/invokeNative_aarch64_simd.s
+++ b/core/iwasm/common/arch/invokeNative_aarch64_simd.s
@@ -0,0 +1,79 @@
 
				+/*
			
 
				+ * Copyright (C) 2020 Intel Corporation Corporation.  All rights reserved.
			
 
				+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+ */
			
 
				+        .text
			
 
				+        .align  2
			
 
				+#ifndef BH_PLATFORM_DARWIN
			
 
				+        .globl invokeNative
			
 
				+        .type  invokeNative, function
			
 
				+invokeNative:
			
 
				+#else
			
 
				+        .globl _invokeNative
			
 
				+_invokeNative:
			
 
				+#endif /* end of BH_PLATFORM_DARWIN */
			
 
				+
			
 
				+/*
			
 
				+ * Arguments passed in:
			
 
				+ *
			
 
				+ * x0 function ptr
			
 
				+ * x1 argv
			
 
				+ * x2 nstacks
			
 
				+ */
			
 
				+
			
 
				+        sub     sp, sp, #0x30
			
 
				+        stp     x19, x20, [sp, #0x20] /* save the registers */
			
 
				+        stp     x21, x22, [sp, #0x10]
			
 
				+        stp     x23, x24, [sp, #0x0]
			
 
				+
			
 
				+        mov     x19, x0          /* x19 = function ptr */
			
 
				+        mov     x20, x1          /* x20 = argv */
			
 
				+        mov     x21, x2          /* x21 = nstacks */
			
 
				+        mov     x22, sp          /* save the sp before call function */
			
 
				+
			
 
				+        /* Fill in float-point registers */
			
 
				+        ld1    {v0.2D, v1.2D, v2.2D, v3.2D}, [x20], #64 /* v0 = argv[0], v1 = argv[1], v2 = argv[2], v3 = argv[3]*/
			
 
				+        ld1    {v4.2D, v5.2D, v6.2D, v7.2D}, [x20], #64 /* v4 = argv[4], v5 = argv[5], v6 = argv[6], v7 = argv[7]*/
			
 
				+
			
 
				+        /* Fill inteter registers */
			
 
				+        ldp     x0, x1, [x20], #16 /* x0 = argv[8] = exec_env, x1 = argv[9] */
			
 
				+        ldp     x2, x3, [x20], #16 /* x2 = argv[10], x3 = argv[11] */
			
 
				+        ldp     x4, x5, [x20], #16 /* x4 = argv[12], x5 = argv[13] */
			
 
				+        ldp     x6, x7, [x20], #16 /* x6 = argv[14], x7 = argv[15] */
			
 
				+
			
 
				+        /* Now x20 points to stack args */
			
 
				+
			
 
				+        /* Directly call the fucntion if no args in stack */
			
 
				+        cmp     x21, #0
			
 
				+        beq     call_func
			
 
				+
			
 
				+        /* Fill all stack args: reserve stack space and fill one by one */
			
 
				+        mov     x23, sp
			
 
				+        bic     sp,  x23, #15    /* Ensure stack is 16 bytes aligned */
			
 
				+        lsl     x23, x21, #3     /* x23 = nstacks * 8 */
			
 
				+        add     x23, x23, #15    /* x23 = (x23 + 15) & ~15 */
			
 
				+        bic     x23, x23, #15
			
 
				+        sub     sp, sp, x23      /* reserved stack space for stack arguments */
			
 
				+        mov     x23, sp
			
 
				+
			
 
				+loop_stack_args:                 /* copy stack arguments to stack */
			
 
				+        cmp     x21, #0
			
 
				+        beq     call_func
			
 
				+        ldr     x24, [x20], #8
			
 
				+        str     x24, [x23], #8
			
 
				+        sub     x21, x21, #1
			
 
				+        b       loop_stack_args
			
 
				+
			
 
				+call_func:
			
 
				+        mov     x20, x30         /* save x30(lr) */
			
 
				+        blr     x19
			
 
				+        mov     sp, x22          /* restore sp which is saved before calling fuction*/
			
 
				+
			
 
				+return:
			
 
				+        mov     x30,  x20              /* restore x30(lr) */
			
 
				+        ldp     x19, x20, [sp, #0x20]  /* restore the registers in stack */
			
 
				+        ldp     x21, x22, [sp, #0x10]
			
 
				+        ldp     x23, x24, [sp, #0x0]
			
 
				+        add     sp, sp, #0x30          /* restore sp */
			
 
				+        ret
			
 
				+
			
--- a/core/iwasm/common/iwasm_common.cmake
+++ b/core/iwasm/common/iwasm_common.cmake
@@ -43,7 +43,11 @@ elseif (WAMR_BUILD_TARGET MATCHES "THUMB.*")
 
				     set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_thumb.s)
			
 
				   endif ()
			
 
				 elseif (WAMR_BUILD_TARGET MATCHES "AARCH64.*")
			
 
				-  set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_aarch64.s)
			
 
				+  if (NOT WAMR_BUILD_SIMD EQUAL 1)
			
 
				+    set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_aarch64.s)
			
 
				+  else()
			
 
				+    set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_aarch64_simd.s)
			
 
				+  endif()
			
 
				 elseif (WAMR_BUILD_TARGET STREQUAL "MIPS")
			
 
				   set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_mips.s)
			
 
				 elseif (WAMR_BUILD_TARGET STREQUAL "XTENSA")
			
--- a/core/iwasm/common/wasm_runtime_common.c
+++ b/core/iwasm/common/wasm_runtime_common.c
@@ -3414,10 +3414,14 @@ typedef union __declspec(intrin_type) __declspec(align(8)) v128 {
 
				     unsigned __int32 m128i_u32[4];
			
 
				     unsigned __int64 m128i_u64[2];
			
 
				 } v128;
			
 
				-#else
			
 
				+#elif defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
			
 
				 typedef long long v128 __attribute__ ((__vector_size__ (16),
			
 
				                                        __may_alias__, __aligned__ (1)));
			
 
				-#endif /* end of defined(_WIN32) || defined(_WIN32_) */
			
 
				+#elif defined(BUILD_TARGET_AARCH64)
			
 
				+#include <arm_neon.h>
			
 
				+typedef uint32x4_t __m128i;
			
 
				+#define v128 __m128i
			
 
				+#endif
			
 
				 
			
 
				 #endif /* end of WASM_ENABLE_SIMD != 0 */
			
 
				 
			
--- a/core/iwasm/compilation/aot_llvm.c
+++ b/core/iwasm/compilation/aot_llvm.c
@@ -1496,7 +1496,8 @@ aot_create_comp_context(AOTCompData *comp_data,
 
				     }
			
 
				 
			
 
				     if (option->enable_simd
			
 
				-        && strcmp(comp_ctx->target_arch, "x86_64") != 0) {
			
 
				+        && strcmp(comp_ctx->target_arch, "x86_64") != 0
			
 
				+        && strncmp(comp_ctx->target_arch, "aarch64", 7) != 0) {
			
 
				         /* Disable simd if it isn't supported by target arch */
			
 
				         option->enable_simd = false;
			
 
				     }
			
--- a/core/iwasm/compilation/simd/simd_access_lanes.c
+++ b/core/iwasm/compilation/simd/simd_access_lanes.c
@@ -8,6 +8,13 @@
 
				 #include "../aot_emit_exception.h"
			
 
				 #include "../../aot/aot_runtime.h"
			
 
				 
			
 
				+static bool
			
 
				+is_target_x86(AOTCompContext *comp_ctx)
			
 
				+{
			
 
				+    return !strncmp(comp_ctx->target_arch, "x86_64", 6) ||
			
 
				+           !strncmp(comp_ctx->target_arch, "i386", 4);
			
 
				+}
			
 
				+
			
 
				 static LLVMValueRef
			
 
				 build_intx16_vector(const AOTCompContext *comp_ctx,
			
 
				                     const LLVMTypeRef element_type,
			
@@ -86,7 +93,7 @@ fail:
 
				 /* TODO: instructions for other CPUs */
			
 
				 /* shufflevector is not an option, since it requires *mask as a const */
			
 
				 bool
			
 
				-aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				 {
			
 
				     LLVMValueRef vector, mask, max_lanes, condition, mask_lanes, result;
			
 
				     LLVMTypeRef param_types[2];
			
@@ -151,6 +158,109 @@ fail:
 
				     return false;
			
 
				 }
			
 
				 
			
 
				+bool
			
 
				+aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    LLVMValueRef vector, mask, default_lane_value, condition, max_lane_id,
			
 
				+      result, idx, id, replace_with_zero, elem, elem_or_zero, undef;
			
 
				+    uint8 i;
			
 
				+
			
 
				+    if (is_target_x86(comp_ctx)) {
			
 
				+        return aot_compile_simd_swizzle_x86(comp_ctx, func_ctx);
			
 
				+    }
			
 
				+
			
 
				+    int const_lane_ids[16] = { 16, 16, 16, 16, 16, 16, 16, 16,
			
 
				+                               16, 16, 16, 16, 16, 16, 16, 16 },
			
 
				+        const_zeors[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
			
 
				+                            0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
			
 
				+
			
 
				+    if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
			
 
				+                                           "mask"))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             V128_i8x16_TYPE, "vec"))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(undef = LLVMGetUndef(V128_i8x16_TYPE))) {
			
 
				+        HANDLE_FAILURE("LLVMGetUndef");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    /* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */
			
 
				+    if (!(max_lane_id =
			
 
				+            build_intx16_vector(comp_ctx, INT8_TYPE, const_lane_ids))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(condition = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, mask,
			
 
				+                                    max_lane_id, "out_of_range"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    /*  if the id is out of range (>=16), set the id as 0 */
			
 
				+    if (!(default_lane_value =
			
 
				+            build_intx16_vector(comp_ctx, INT8_TYPE, const_zeors))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(idx = LLVMBuildSelect(comp_ctx->builder, condition,
			
 
				+                                default_lane_value, mask, "mask"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    for (i = 0; i < 16; i++) {
			
 
				+        if (!(id = LLVMBuildExtractElement(comp_ctx->builder, idx, I8_CONST(i),
			
 
				+                                           "id"))) {
			
 
				+            HANDLE_FAILURE("LLVMBuildExtractElement");
			
 
				+            goto fail;
			
 
				+        }
			
 
				+
			
 
				+        if (!(replace_with_zero =
			
 
				+                LLVMBuildExtractElement(comp_ctx->builder, condition,
			
 
				+                                        I8_CONST(i), "replace_with_zero"))) {
			
 
				+            HANDLE_FAILURE("LLVMBuildExtractElement");
			
 
				+            goto fail;
			
 
				+        }
			
 
				+
			
 
				+        if (!(elem = LLVMBuildExtractElement(comp_ctx->builder, vector, id,
			
 
				+                                             "vector[mask[i]]"))) {
			
 
				+            HANDLE_FAILURE("LLVMBuildExtractElement");
			
 
				+            goto fail;
			
 
				+        }
			
 
				+
			
 
				+        if (!(elem_or_zero =
			
 
				+                LLVMBuildSelect(comp_ctx->builder, replace_with_zero,
			
 
				+                                I8_CONST(0), elem, "elem_or_zero"))) {
			
 
				+            HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+            goto fail;
			
 
				+        }
			
 
				+
			
 
				+        if (!(undef =
			
 
				+                LLVMBuildInsertElement(comp_ctx->builder, undef, elem_or_zero,
			
 
				+                                       I8_CONST(i), "new_vector"))) {
			
 
				+            HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				+            goto fail;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildBitCast(comp_ctx->builder, undef, V128_i64x2_TYPE,
			
 
				+                                    "ret"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    PUSH_V128(result);
			
 
				+
			
 
				+    return true;
			
 
				+fail:
			
 
				+    return false;
			
 
				+}
			
 
				+
			
 
				 static bool
			
 
				 aot_compile_simd_extract(AOTCompContext *comp_ctx,
			
 
				                          AOTFuncContext *func_ctx,
			
--- a/core/iwasm/compilation/simd/simd_conversions.c
+++ b/core/iwasm/compilation/simd/simd_conversions.c
@@ -9,6 +9,13 @@
 
				 #include "../aot_emit_numberic.h"
			
 
				 #include "../../aot/aot_runtime.h"
			
 
				 
			
 
				+static bool
			
 
				+is_target_x86(AOTCompContext *comp_ctx)
			
 
				+{
			
 
				+    return !strncmp(comp_ctx->target_arch, "x86_64", 6) ||
			
 
				+           !strncmp(comp_ctx->target_arch, "i386", 4);
			
 
				+}
			
 
				+
			
 
				 static bool
			
 
				 simd_integer_narrow(AOTCompContext *comp_ctx,
			
 
				                     AOTFuncContext *func_ctx,
			
@@ -49,8 +56,85 @@ fail:
 
				     return false;
			
 
				 }
			
 
				 
			
 
				+static LLVMValueRef
			
 
				+build_intx4_vector(const AOTCompContext *comp_ctx,
			
 
				+                    const LLVMTypeRef element_type,
			
 
				+                    const int *element_value)
			
 
				+{
			
 
				+    LLVMValueRef vector, elements[4];
			
 
				+    unsigned i;
			
 
				+
			
 
				+    for (i = 0; i < 4; i++) {
			
 
				+        if (!(elements[i] =
			
 
				+                LLVMConstInt(element_type, element_value[i], true))) {
			
 
				+            HANDLE_FAILURE("LLVMConstInst");
			
 
				+            goto fail;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector = LLVMConstVector(elements, 4))) {
			
 
				+        HANDLE_FAILURE("LLVMConstVector");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+    return vector;
			
 
				+fail:
			
 
				+    return NULL;
			
 
				+}
			
 
				+
			
 
				+static LLVMValueRef
			
 
				+build_intx8_vector(const AOTCompContext *comp_ctx,
			
 
				+                    const LLVMTypeRef element_type,
			
 
				+                    const int *element_value)
			
 
				+{
			
 
				+    LLVMValueRef vector, elements[8];
			
 
				+    unsigned i;
			
 
				+
			
 
				+    for (i = 0; i < 8; i++) {
			
 
				+        if (!(elements[i] =
			
 
				+                LLVMConstInt(element_type, element_value[i], true))) {
			
 
				+            HANDLE_FAILURE("LLVMConstInst");
			
 
				+            goto fail;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector = LLVMConstVector(elements, 8))) {
			
 
				+        HANDLE_FAILURE("LLVMConstVector");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    return vector;
			
 
				+fail:
			
 
				+    return NULL;
			
 
				+}
			
 
				+
			
 
				+static LLVMValueRef
			
 
				+build_intx16_vector(const AOTCompContext *comp_ctx,
			
 
				+                    const LLVMTypeRef element_type,
			
 
				+                    const int *element_value)
			
 
				+{
			
 
				+    LLVMValueRef vector, elements[16];
			
 
				+    unsigned i;
			
 
				+
			
 
				+    for (i = 0; i < 16; i++) {
			
 
				+        if (!(elements[i] =
			
 
				+                LLVMConstInt(element_type, element_value[i], true))) {
			
 
				+            HANDLE_FAILURE("LLVMConstInst");
			
 
				+            goto fail;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector = LLVMConstVector(elements, 16))) {
			
 
				+        HANDLE_FAILURE("LLVMConstVector");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    return vector;
			
 
				+fail:
			
 
				+    return NULL;
			
 
				+}
			
 
				+
			
 
				 bool
			
 
				-aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i8x16_narrow_i16x8_x86(AOTCompContext *comp_ctx,
			
 
				                                     AOTFuncContext *func_ctx,
			
 
				                                     bool is_signed)
			
 
				 {
			
@@ -60,7 +144,7 @@ aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx,
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i16x8_narrow_i32x4_x86(AOTCompContext *comp_ctx,
			
 
				                                     AOTFuncContext *func_ctx,
			
 
				                                     bool is_signed)
			
 
				 {
			
@@ -69,6 +153,273 @@ aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
 
				       is_signed ? "llvm.x86.sse2.packssdw.128" : "llvm.x86.sse41.packusdw");
			
 
				 }
			
 
				 
			
 
				+bool
			
 
				+aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool is_signed)
			
 
				+{
			
 
				+    LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle,
			
 
				+      vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced,
			
 
				+      shuffle_vector;
			
 
				+    LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min;
			
 
				+
			
 
				+    if (is_target_x86(comp_ctx)) {
			
 
				+        return aot_compile_simd_i8x16_narrow_i16x8_x86(comp_ctx, func_ctx,
			
 
				+                                                       is_signed);
			
 
				+    }
			
 
				+
			
 
				+    int min_s_array[8] = { 0xff80, 0xff80, 0xff80, 0xff80,
			
 
				+                           0xff80, 0xff80, 0xff80, 0xff80 };
			
 
				+    int max_s_array[8] = { 0x007f, 0x007f, 0x007f, 0x007f,
			
 
				+                           0x007f, 0x007f, 0x007f, 0x007f };
			
 
				+
			
 
				+    int min_u_array[8] = { 0x0000, 0x0000, 0x0000, 0x0000,
			
 
				+                           0x0000, 0x0000, 0x0000, 0x0000 };
			
 
				+    int max_u_array[8] = { 0x00ff, 0x00ff, 0x00ff, 0x00ff,
			
 
				+                           0x00ff, 0x00ff, 0x00ff, 0x00ff };
			
 
				+
			
 
				+    int shuffle_array[16] = { 0, 1, 2,  3,  4,  5,  6,  7,
			
 
				+                              8, 9, 10, 11, 12, 13, 14, 15 };
			
 
				+
			
 
				+    if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                              V128_i16x8_TYPE, "vec2"))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                              V128_i16x8_TYPE, "vec1"))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector_min = build_intx8_vector(
			
 
				+            comp_ctx, INT16_TYPE, is_signed ? min_s_array : min_u_array))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+    if (!(vector_max = build_intx8_vector(
			
 
				+            comp_ctx, INT16_TYPE, is_signed ? max_s_array : max_u_array))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+    if (!(shuffle = build_intx16_vector(comp_ctx, I32_TYPE, shuffle_array))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(v1_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector1,
			
 
				+                                    vector_max, "v1_great_than_max"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(v2_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector2,
			
 
				+                                    vector_max, "v2_great_than_max"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(v1_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector1,
			
 
				+                                    vector_min, "v1_less_than_min"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(v2_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector2,
			
 
				+                                    vector_min, "v2_less_than_min"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector1_clamped =
			
 
				+            LLVMBuildSelect(comp_ctx->builder, v1_gt_max, vector_max, vector1,
			
 
				+                            "vector1_clamped_max"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector1_clamped =
			
 
				+            LLVMBuildSelect(comp_ctx->builder, v1_lt_min, vector_min,
			
 
				+                            vector1_clamped, "vector1_clamped_min"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector2_clamped =
			
 
				+            LLVMBuildSelect(comp_ctx->builder, v2_gt_max, vector_max, vector2,
			
 
				+                            "vector2_clamped_max"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector2_clamped =
			
 
				+            LLVMBuildSelect(comp_ctx->builder, v2_lt_min, vector_min,
			
 
				+                            vector2_clamped, "vector2_clamped_min"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector1_trunced =
			
 
				+            LLVMBuildTrunc(comp_ctx->builder, vector1_clamped,
			
 
				+                           LLVMVectorType(INT8_TYPE, 8), "vector1_trunced"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector2_trunced =
			
 
				+            LLVMBuildTrunc(comp_ctx->builder, vector2_clamped,
			
 
				+                           LLVMVectorType(INT8_TYPE, 8), "vector2_trunced"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(shuffle_vector = LLVMBuildShuffleVector(
			
 
				+            comp_ctx->builder, vector1_trunced, vector2_trunced, shuffle,
			
 
				+            "shuffle_vector"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildBitCast(comp_ctx->builder, shuffle_vector,
			
 
				+                                    V128_i64x2_TYPE, "ret"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    PUSH_V128(result);
			
 
				+    return true;
			
 
				+
			
 
				+fail:
			
 
				+    return false;
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool is_signed)
			
 
				+{
			
 
				+    LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle,
			
 
				+      vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced,
			
 
				+      shuffle_vector;
			
 
				+    LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min;
			
 
				+
			
 
				+    if (is_target_x86(comp_ctx)) {
			
 
				+        return aot_compile_simd_i16x8_narrow_i32x4_x86(comp_ctx, func_ctx,
			
 
				+                                                       is_signed);
			
 
				+    }
			
 
				+
			
 
				+    int min_s_array[4] = { 0xffff8000, 0xffff8000, 0xffff8000, 0xffff8000 };
			
 
				+    int32 max_s_array[4] = { 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff };
			
 
				+
			
 
				+    int min_u_array[4] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
			
 
				+    int max_u_array[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
			
 
				+
			
 
				+    int shuffle_array[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
			
 
				+
			
 
				+    if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                              V128_i32x4_TYPE, "vec2"))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                              V128_i32x4_TYPE, "vec1"))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector_min = build_intx4_vector(
			
 
				+            comp_ctx, I32_TYPE, is_signed ? min_s_array : min_u_array))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+    if (!(vector_max = build_intx4_vector(
			
 
				+            comp_ctx, I32_TYPE, is_signed ? max_s_array : max_u_array))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+    if (!(shuffle = build_intx8_vector(comp_ctx, I32_TYPE, shuffle_array))) {
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(v1_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector1,
			
 
				+                                    vector_max, "v1_great_than_max"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(v2_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector2,
			
 
				+                                    vector_max, "v2_great_than_max"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(v1_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector1,
			
 
				+                                    vector_min, "v1_less_than_min"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(v2_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector2,
			
 
				+                                    vector_min, "v2_less_than_min"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector1_clamped =
			
 
				+            LLVMBuildSelect(comp_ctx->builder, v1_gt_max, vector_max, vector1,
			
 
				+                            "vector1_clamped_max"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector1_clamped =
			
 
				+            LLVMBuildSelect(comp_ctx->builder, v1_lt_min, vector_min,
			
 
				+                            vector1_clamped, "vector1_clamped_min"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector2_clamped =
			
 
				+            LLVMBuildSelect(comp_ctx->builder, v2_gt_max, vector_max, vector2,
			
 
				+                            "vector2_clamped_max"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector2_clamped =
			
 
				+            LLVMBuildSelect(comp_ctx->builder, v2_lt_min, vector_min,
			
 
				+                            vector2_clamped, "vector2_clamped_min"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector1_trunced = LLVMBuildTrunc(comp_ctx->builder, vector1_clamped,
			
 
				+                                           LLVMVectorType(INT16_TYPE, 4),
			
 
				+                                           "vector1_trunced"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector2_trunced = LLVMBuildTrunc(comp_ctx->builder, vector2_clamped,
			
 
				+                                           LLVMVectorType(INT16_TYPE, 4),
			
 
				+                                           "vector2_trunced"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(shuffle_vector = LLVMBuildShuffleVector(
			
 
				+            comp_ctx->builder, vector1_trunced, vector2_trunced, shuffle,
			
 
				+            "shuffle_vector"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildBitCast(comp_ctx->builder, shuffle_vector,
			
 
				+                                    V128_i64x2_TYPE, "ret"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    PUSH_V128(result);
			
 
				+    return true;
			
 
				+
			
 
				+fail:
			
 
				+    return false;
			
 
				+}
			
 
				+
			
 
				 bool
			
 
				 aot_compile_simd_i16x8_widen_i8x16(AOTCompContext *comp_ctx,
			
 
				                                    AOTFuncContext *func_ctx,