Просмотр исходного кода

Fix splat opcodes, add V128 handling in preserve_referenced_local and reserve_block_ret

James Marsh 1 год назад
Родитель
Сommit
b2804c004f
2 измененных файлов с 136 добавлено и 36 удалено
  1. 32 13
      core/iwasm/interpreter/wasm_interp_fast.c
  2. 104 23
      core/iwasm/interpreter/wasm_loader.c

+ 32 - 13
core/iwasm/interpreter/wasm_interp_fast.c

@@ -47,7 +47,7 @@ typedef float64 CellType_F64;
      && (app_addr) <= shared_heap_end_off - bytes + 1)
 
 #define shared_heap_addr_app_to_native(app_addr, native_addr) \
-    native_addr = shared_heap_base_addr + ((app_addr)-shared_heap_start_off)
+    native_addr = shared_heap_base_addr + ((app_addr) - shared_heap_start_off)
 
 #define CHECK_SHARED_HEAP_OVERFLOW(app_addr, bytes, native_addr) \
     if (app_addr_in_shared_heap(app_addr, bytes))                \
@@ -1793,7 +1793,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 else
                     cur_func_type = cur_func->u.func->func_type;
 
-                    /* clang-format off */
+                /* clang-format off */
 #if WASM_ENABLE_GC == 0
                 if (cur_type != cur_func_type) {
                     wasm_set_exception(module, "indirect call type mismatch");
@@ -5923,12 +5923,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         uint32 offset, addr;
                         offset = read_uint32(frame_ip);
                         V128 data = POP_V128();
-                        addr = POP_I32();
+                        int32 base = POP_I32();
+                        offset += base;
+                        addr = GET_OPERAND(uint32, I32, 0);
 
-                        V128 data;
-                        data = POP_V128();
-
-                        CHECK_MEMORY_OVERFLOW(16);
+                        CHECK_MEMORY_OVERFLOW(32);
                         STORE_V128(maddr, data);
                         break;
                     }
@@ -5948,14 +5947,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     case SIMD_v8x16_shuffle:
                     {
                         V128 indices;
-                        V128 v2 = POP_V128();
-                        V128 v1 = POP_V128();
-                        addr_ret = GET_OFFSET();
-
                         bh_memcpy_s(&indices, sizeof(V128), frame_ip,
                                     sizeof(V128));
                         frame_ip += sizeof(V128);
 
+                        V128 v2 = POP_V128();
+                        V128 v1 = POP_V128();
+                        addr_ret = GET_OFFSET();
+
                         V128 result;
                         for (int i = 0; i < 16; i++) {
                             uint8_t index = indices.i8x16[i];
@@ -5983,6 +5982,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         SIMDE_V128_TO_SIMD_V128(simde_result, result);
 
                         PUT_V128_TO_ADDR(frame_lp + addr_ret, result);
+                        break;
                     }
 
                     /* Splat */
@@ -6008,7 +6008,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 
                     case SIMD_i8x16_splat:
                     {
-                        SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat);
+                        uint32 val = POP_I32();
+                        addr_ret = GET_OFFSET();
+
+                        simde_v128_t simde_result = simde_wasm_i8x16_splat(val);
+
+                        V128 result;
+                        SIMDE_V128_TO_SIMD_V128(simde_result, result);
+
+                        PUT_V128_TO_ADDR(frame_lp + addr_ret, result);
                         break;
                     }
                     case SIMD_i16x8_splat:
@@ -6140,7 +6148,18 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     /* i8x16 comparison operations */
                     case SIMD_i8x16_eq:
                     {
-                        SIMD_DOUBLE_OP(simde_wasm_i8x16_eq);
+                        V128 v2 = POP_V128();
+                        V128 v1 = POP_V128();
+                        addr_ret = GET_OFFSET();
+
+                        simde_v128_t simde_result =
+                            simde_wasm_i8x16_eq(SIMD_V128_TO_SIMDE_V128(v1),
+                                                SIMD_V128_TO_SIMDE_V128(v2));
+
+                        V128 result;
+                        SIMDE_V128_TO_SIMD_V128(simde_result, result);
+
+                        PUT_V128_TO_ADDR(frame_lp + addr_ret, result);
                         break;
                     }
                     case SIMD_i8x16_ne:

+ 104 - 23
core/iwasm/interpreter/wasm_loader.c

@@ -9125,6 +9125,9 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode,
 
         if (is_32bit_type(cur_type))
             i++;
+        else if (cur_type == VALUE_TYPE_V128) {
+            i += 4;
+        }
         else
             i += 2;
     }
@@ -9155,7 +9158,10 @@ preserve_local_for_block(WASMLoaderContext *loader_ctx, uint8 opcode,
                 return false;
         }
 
-        if (is_32bit_type(cur_type)) {
+        if (cur_type == VALUE_TYPE_V128) {
+            i += 4;
+        }
+        else if (is_32bit_type(cur_type)) {
             i++;
         }
         else {
@@ -9498,6 +9504,8 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value,
                 || (type == VALUE_TYPE_EXTERNREF
                     && *(int32 *)value == c->value.i32)
 #endif
+                || (type == VALUE_TYPE_V128
+                    && (0 == memcmp(value, &(c->value.v128), sizeof(V128))))
                 || (type == VALUE_TYPE_F64
                     && (0 == memcmp(value, &(c->value.f64), sizeof(float64))))
                 || (type == VALUE_TYPE_F32
@@ -9508,6 +9516,9 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value,
         }
         if (is_32bit_type(c->value_type))
             operand_offset += 1;
+        else if (c->value_type == VALUE_TYPE_V128) {
+            operand_offset += 4;
+        }
         else
             operand_offset += 2;
     }
@@ -9559,6 +9570,10 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value,
                 c->value.i32 = *(int32 *)value;
                 ctx->const_cell_num++;
                 break;
+            case VALUE_TYPE_V128:
+                bh_memcpy_s(&(c->value.v128), sizeof(WASMValue), value,
+                            sizeof(V128));
+                ctx->const_cell_num++;
 #if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0
             case VALUE_TYPE_EXTERNREF:
             case VALUE_TYPE_FUNCREF:
@@ -9760,17 +9775,22 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode,
         block_type, &return_types, &reftype_maps, &reftype_map_count);
 #endif
 
-    /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64 instead
-     * of EXT_OP_COPY_STACK_VALUES for interpreter performance. */
+    /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64/V128
+     * instead of EXT_OP_COPY_STACK_VALUES for interpreter performance. */
     if (return_count == 1) {
         uint8 cell = (uint8)wasm_value_type_cell_num(return_types[0]);
-        if (cell <= 2 /* V128 isn't supported whose cell num is 4 */
-            && block->dynamic_offset != *(loader_ctx->frame_offset - cell)) {
+        if (block->dynamic_offset != *(loader_ctx->frame_offset - cell)) {
             /* insert op_copy before else opcode */
             if (opcode == WASM_OP_ELSE)
                 skip_label();
-            emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP
-                                 : EXT_OP_COPY_STACK_TOP_I64);
+
+            if (cell == 4) {
+                emit_label(EXT_OP_COPY_STACK_TOP_V128);
+            }
+            else {
+                emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP
+                                     : EXT_OP_COPY_STACK_TOP_I64);
+            }
             emit_operand(loader_ctx, *(loader_ctx->frame_offset - cell));
             emit_operand(loader_ctx, block->dynamic_offset);
 
@@ -9805,11 +9825,37 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode,
     for (i = (int32)return_count - 1; i >= 0; i--) {
         uint8 cells = (uint8)wasm_value_type_cell_num(return_types[i]);
 
-        frame_offset -= cells;
-        dynamic_offset -= cells;
-        if (dynamic_offset != *frame_offset) {
-            value_count++;
-            total_cel_num += cells;
+        if (frame_offset - cells < loader_ctx->frame_offset_bottom) {
+            set_error_buf(error_buf, error_buf_size, "frame offset underflow");
+            goto fail;
+        }
+
+        if (cells == 4) {
+            bool needs_copy = false;
+            int16 v128_dynamic = dynamic_offset - cells;
+
+            for (int j = 0; j < 4; j++) {
+                if (*(frame_offset - j - 1) != (v128_dynamic + j)) {
+                    needs_copy = true;
+                    break;
+                }
+            }
+
+            if (needs_copy) {
+                value_count++;
+                total_cel_num += cells;
+            }
+
+            frame_offset -= cells;
+            dynamic_offset = v128_dynamic;
+        }
+        else {
+            frame_offset -= cells;
+            dynamic_offset -= cells;
+            if (dynamic_offset != *frame_offset) {
+                value_count++;
+                total_cel_num += cells;
+            }
         }
     }
 
@@ -9845,19 +9891,50 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode,
         dynamic_offset = dynamic_offset_org;
         for (i = (int32)return_count - 1, j = 0; i >= 0; i--) {
             uint8 cell = (uint8)wasm_value_type_cell_num(return_types[i]);
-            frame_offset -= cell;
-            dynamic_offset -= cell;
-            if (dynamic_offset != *frame_offset) {
-                /* cell num */
-                cells[j] = cell;
-                /* src offset */
-                src_offsets[j] = *frame_offset;
-                /* dst offset */
-                dst_offsets[j] = dynamic_offset;
-                j++;
+
+            if (cell == 4) {
+                bool needs_copy = false;
+                int16 v128_dynamic = dynamic_offset - cell;
+
+                for (int k = 0; k < 4; k++) {
+                    if (*(frame_offset - k - 1) != (v128_dynamic + k)) {
+                        needs_copy = true;
+                        break;
+                    }
+                }
+
+                if (needs_copy) {
+                    cells[j] = cell;
+                    src_offsets[j] = *(frame_offset - cell);
+                    dst_offsets[j] = v128_dynamic;
+                    j++;
+                }
+
+                frame_offset -= cell;
+                dynamic_offset = v128_dynamic;
+            }
+            else {
+                frame_offset -= cell;
+                dynamic_offset -= cell;
+                if (dynamic_offset != *frame_offset) {
+                    cells[j] = cell;
+                    /* src offset */
+                    src_offsets[j] = *frame_offset;
+                    /* dst offset */
+                    dst_offsets[j] = dynamic_offset;
+                    j++;
+                }
             }
+
             if (opcode == WASM_OP_ELSE) {
-                *frame_offset = dynamic_offset;
+                if (cell == 4) {
+                    for (int k = 0; k < cell; k++) {
+                        *(frame_offset + k) = dynamic_offset + k;
+                    }
+                }
+                else {
+                    *frame_offset = dynamic_offset;
+                }
             }
             else {
                 loader_ctx->frame_offset = frame_offset;
@@ -13031,6 +13108,10 @@ re_scan:
                         emit_label(EXT_OP_TEE_LOCAL_FAST);
                         emit_byte(loader_ctx, (uint8)local_offset);
                     }
+                    else if (local_type == VALUE_TYPE_V128) {
+                        emit_label(EXT_OP_TEE_LOCAL_FAST_V128);
+                        emit_byte(loader_ctx, (uint8)local_offset);
+                    }
                     else {
                         emit_label(EXT_OP_TEE_LOCAL_FAST_I64);
                         emit_byte(loader_ctx, (uint8)local_offset);