Răsfoiți Sursa

Implement SIMD float ceil/floor/trunc/nearest opcodes (#472)

Implement SIMD f32x4/f64x2 ceil/floor/trunc/nearest opcodes

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
Wenyong Huang 5 ani în urmă
părinte
comite
ad35c3c21b

+ 49 - 0
core/iwasm/compilation/aot_compiler.c

@@ -1741,6 +1741,55 @@ build_atomic_rmw:
             break;
           }
 
+          case SIMD_f32x4_ceil:
+          {
+            if (!aot_compile_simd_f32x4_ceil(comp_ctx, func_ctx))
+              return false;
+            break;
+          }
+          case SIMD_f64x2_ceil:
+          {
+            if (!aot_compile_simd_f64x2_ceil(comp_ctx, func_ctx))
+              return false;
+            break;
+          }
+          case SIMD_f32x4_floor:
+          {
+            if (!aot_compile_simd_f32x4_floor(comp_ctx, func_ctx))
+              return false;
+            break;
+          }
+          case SIMD_f64x2_floor:
+          {
+            if (!aot_compile_simd_f64x2_floor(comp_ctx, func_ctx))
+              return false;
+            break;
+          }
+          case SIMD_f32x4_trunc:
+          {
+            if (!aot_compile_simd_f32x4_trunc(comp_ctx, func_ctx))
+              return false;
+            break;
+          }
+          case SIMD_f64x2_trunc:
+          {
+            if (!aot_compile_simd_f64x2_trunc(comp_ctx, func_ctx))
+              return false;
+            break;
+          }
+          case SIMD_f32x4_nearest:
+          {
+            if (!aot_compile_simd_f32x4_nearest(comp_ctx, func_ctx))
+              return false;
+            break;
+          }
+          case SIMD_f64x2_nearest:
+          {
+            if (!aot_compile_simd_f64x2_nearest(comp_ctx, func_ctx))
+              return false;
+            break;
+          }
+
           default:
             break;
         }

+ 67 - 44
core/iwasm/compilation/simd/simd_floating_point.c

@@ -178,21 +178,21 @@ aot_compile_simd_f64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 }
 
 static bool
-simd_v128_float_abs(AOTCompContext *comp_ctx,
-                    AOTFuncContext *func_ctx,
-                    LLVMTypeRef vector_type,
-                    const char *intrinsic)
+simd_v128_float_intrinsic(AOTCompContext *comp_ctx,
+                          AOTFuncContext *func_ctx,
+                          LLVMTypeRef vector_type,
+                          const char *intrinsic)
 {
-    LLVMValueRef vector, result;
+    LLVMValueRef number, result;
     LLVMTypeRef param_types[1] = { vector_type };
 
-    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
-                                             "vec"))) {
+    if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
+                                             "number"))) {
         goto fail;
     }
 
     if (!(result = aot_call_llvm_intrinsic(comp_ctx, intrinsic, vector_type,
-                                           param_types, 1, vector))) {
+                                           param_types, 1, number))) {
         HANDLE_FAILURE("LLVMBuildCall");
         goto fail;
     }
@@ -205,6 +205,7 @@ simd_v128_float_abs(AOTCompContext *comp_ctx,
 
     /* push result into the stack */
     PUSH_V128(result);
+
     return true;
 fail:
     return false;
@@ -213,61 +214,83 @@ fail:
 bool
 aot_compile_simd_f32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 {
-    return simd_v128_float_abs(comp_ctx, func_ctx, V128_f32x4_TYPE,
-                               "llvm.fabs.v4f32");
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
+                                     "llvm.fabs.v4f32");
 }
 
 bool
 aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 {
-    return simd_v128_float_abs(comp_ctx, func_ctx, V128_f64x2_TYPE,
-                               "llvm.fabs.v2f64");
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
+                                     "llvm.fabs.v2f64");
 }
 
-static bool
-simd_v128_float_sqrt(AOTCompContext *comp_ctx,
-                     AOTFuncContext *func_ctx,
-                     LLVMTypeRef vector_type,
-                     const char *intrinsic)
+bool
+aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 {
-    LLVMValueRef number, result;
-    LLVMTypeRef param_types[1] = { vector_type };
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
+                                     "llvm.sqrt.v4f32");
+}
 
-    if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
-                                             "number"))) {
-        goto fail;
-    }
+bool
+aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
+                                     "llvm.sqrt.v2f64");
+}
 
-    if (!(result = aot_call_llvm_intrinsic(comp_ctx, intrinsic, vector_type,
-                                           param_types, 1, number))) {
-        HANDLE_FAILURE("LLVMBuildCall");
-        goto fail;
-    }
+bool
+aot_compile_simd_f32x4_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
+                                     "llvm.ceil.v4f32");
+}
 
-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
-                                    "ret"))) {
-        HANDLE_FAILURE("LLVMBuildBitCast");
-        goto fail;
-    }
+bool
+aot_compile_simd_f64x2_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
+                                     "llvm.ceil.v2f64");
+}
 
-    /* push result into the stack */
-    PUSH_V128(result);
+bool
+aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
+                                     "llvm.floor.v4f32");
+}
 
-    return true;
-fail:
-    return false;
+bool
+aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
+                                     "llvm.floor.v2f64");
 }
 
 bool
-aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 {
-    return simd_v128_float_sqrt(comp_ctx, func_ctx, V128_f32x4_TYPE,
-                                "llvm.sqrt.v4f32");
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
+                                     "llvm.trunc.v4f32");
 }
 
 bool
-aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
+                                     "llvm.trunc.v2f64");
+}
+
+bool
+aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
+                                     "llvm.rint.v4f32");
+}
+
+bool
+aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 {
-    return simd_v128_float_sqrt(comp_ctx, func_ctx, V128_f64x2_TYPE,
-                                "llvm.sqrt.v2f64");
+    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
+                                     "llvm.rint.v2f64");
 }

+ 26 - 4
core/iwasm/compilation/simd/simd_floating_point.h

@@ -35,12 +35,34 @@ bool
 aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
 
 bool
-aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx,
-                            AOTFuncContext *func_ctx);
+aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
 
 bool
-aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx,
-                            AOTFuncContext *func_ctx);
+aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
+
+bool
+aot_compile_simd_f32x4_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
+
+bool
+aot_compile_simd_f64x2_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
+
+bool
+aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
+
+bool
+aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
+
+bool
+aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
+
+bool
+aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
+
+bool
+aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
+
+bool
+aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
 
 #ifdef __cplusplus
 } /* end of extern "C" */

+ 8 - 0
core/iwasm/interpreter/wasm_loader.c

@@ -7481,6 +7481,14 @@ fail_data_cnt_sec_require:
                         POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
                         break;
 
+                    case SIMD_f32x4_ceil:
+                    case SIMD_f32x4_floor:
+                    case SIMD_f32x4_trunc:
+                    case SIMD_f32x4_nearest:
+                    case SIMD_f64x2_ceil:
+                    case SIMD_f64x2_floor:
+                    case SIMD_f64x2_trunc:
+                    case SIMD_f64x2_nearest:
                     case SIMD_v128_not:
                     case SIMD_i8x16_abs:
                     case SIMD_i8x16_neg:

+ 10 - 0
core/iwasm/interpreter/wasm_opcode.h

@@ -472,6 +472,16 @@ typedef enum WASMSimdEXTOpcode {
     SIMD_i64x2_sub    = 0xd1,
     SIMD_i64x2_mul    = 0xd5,
 
+    /* float ceil/floor/trunc/nearest */
+    SIMD_f32x4_ceil   = 0xd8,
+    SIMD_f32x4_floor  = 0xd9,
+    SIMD_f32x4_trunc  = 0xda,
+    SIMD_f32x4_nearest = 0xdb,
+    SIMD_f64x2_ceil   = 0xdc,
+    SIMD_f64x2_floor  = 0xdd,
+    SIMD_f64x2_trunc  = 0xde,
+    SIMD_f64x2_nearest = 0xdf,
+
     /* f32x4 operation */
     SIMD_f32x4_abs    = 0xe0,
     SIMD_f32x4_neg    = 0xe1,