Explorar el Código

release/1.3.x: Cumulative fixes for release/1.3.x (#3342)

Apply patches which were fixed in branch main to branch `release/1.3.x`:
- aot debug: Fix a few NULL dereferences on errors (#3273)
- aot debug: Fix a NULL dereference (#3274)
- aot debug: Process lldb_function_to_function_dbi only for C (#3278)
- Revert "lldb_function_to_function_dbi: A hack to avoid crashing on C++ methods (#3190)" (#3281)
- Fix warnings/issues reported in Windows and by CodeQL/Coverity (#3275)
- thread mgr: Free aux stack only when it was allocated (#3282)
- interp: Restore context from prev_frame after tail calling a native f
unction (#3283)
- Sync simd opcode definitions spec (#3290)
- Add more checks in wasm loader (#3300)
- Fix posix_fadvise error handling (#3323)
- Fix readdir for posix (#3339)
- wasm loader: Fix checks for opcode ref.func and opcode else (#3340)
- Enhance wasm loader checks for opcode br_table (#3352)
- CI: Use macos-13 instead of macos-latest (#3366)
Wenyong Huang hace 1 año
padre
commit
190571feae
Se han modificado 34 ficheros con 327 adiciones y 274 borrados
  1. 7 7
      .github/workflows/compilation_on_macos.yml
  2. 5 5
      .github/workflows/release_process.yml
  3. 2 2
      core/config.h
  4. 3 0
      core/iwasm/common/wasm_exec_env.h
  5. 7 6
      core/iwasm/common/wasm_runtime_common.c
  6. 2 2
      core/iwasm/common/wasm_runtime_common.h
  7. 3 52
      core/iwasm/compilation/aot_compiler.c
  8. 8 2
      core/iwasm/compilation/aot_emit_control.c
  9. 45 27
      core/iwasm/compilation/debug/dwarf_extractor.cpp
  10. 0 9
      core/iwasm/compilation/simd/simd_conversions.c
  11. 0 4
      core/iwasm/compilation/simd/simd_conversions.h
  12. 0 14
      core/iwasm/compilation/simd/simd_floating_point.c
  13. 0 8
      core/iwasm/compilation/simd/simd_floating_point.h
  14. 1 10
      core/iwasm/compilation/simd/simd_int_arith.c
  15. 0 4
      core/iwasm/compilation/simd/simd_int_arith.h
  16. 0 15
      core/iwasm/compilation/simd/simd_sat_int_arith.c
  17. 0 4
      core/iwasm/compilation/simd/simd_sat_int_arith.h
  18. 6 6
      core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp
  19. 39 7
      core/iwasm/interpreter/wasm_interp_classic.c
  20. 24 3
      core/iwasm/interpreter/wasm_interp_fast.c
  21. 67 39
      core/iwasm/interpreter/wasm_loader.c
  22. 58 23
      core/iwasm/interpreter/wasm_mini_loader.c
  23. 10 10
      core/iwasm/interpreter/wasm_opcode.h
  24. 2 2
      core/iwasm/interpreter/wasm_runtime.c
  25. 3 1
      core/iwasm/libraries/debug-engine/handler.c
  26. 5 0
      core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/blocking_op.h
  27. 9 2
      core/iwasm/libraries/thread-mgr/thread_manager.c
  28. 6 1
      core/shared/platform/common/posix/posix_file.c
  29. 5 4
      core/shared/platform/linux-sgx/sgx_platform.c
  30. 1 1
      core/shared/utils/runtime_timer.c
  31. 3 1
      doc/perf_tune.md
  32. 2 1
      product-mini/platforms/posix/main.c
  33. 3 1
      product-mini/platforms/windows/main.c
  34. 1 1
      wamr-compiler/main.c

+ 7 - 7
.github/workflows/compilation_on_macos.yml

@@ -1,7 +1,7 @@
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-name: compilation on macos-latest
+name: compilation on macos
 
 on:
   # will be triggered on PR events
@@ -58,7 +58,7 @@ jobs:
   build_llvm_libraries:
     uses: ./.github/workflows/build_llvm_libraries.yml
     with:
-      os: "macos-latest"
+      os: "macos-13"
       arch: "X86"
 
   build_wamrc:
@@ -67,7 +67,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - os: macos-latest
+          - os: macos-13
             llvm_cache_key: ${{ needs.build_llvm_libraries.outputs.cache_key }}
     steps:
       - name: checkout
@@ -128,7 +128,7 @@ jobs:
             "-DWAMR_BUILD_TAIL_CALL=1",
             "-DWAMR_DISABLE_HW_BOUND_CHECK=1",
           ]
-        os: [macos-latest]
+        os: [macos-13]
         platform: [darwin]
         exclude:
           # uncompatiable feature and platform
@@ -170,7 +170,7 @@ jobs:
           - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1"
         include:
-          - os: macos-latest
+          - os: macos-13
             llvm_cache_key: ${{ needs.build_llvm_libraries.outputs.cache_key }}
     steps:
       - name: checkout
@@ -215,7 +215,7 @@ jobs:
             #$LLVM_EAGER_JIT_BUILD_OPTIONS,
             #$AOT_BUILD_OPTIONS,
           ]
-        os: [macos-latest]
+        os: [macos-13]
         wasi_sdk_release:
           [
             "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-macos.tar.gz",
@@ -247,7 +247,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [macos-latest]
+        os: [macos-13]
         wasi_sdk_release:
           [
             "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-macos.tar.gz",

+ 5 - 5
.github/workflows/release_process.yml

@@ -69,7 +69,7 @@ jobs:
     needs: [create_tag, create_release]
     uses: ./.github/workflows/build_llvm_libraries.yml
     with:
-      os: "macos-latest"
+      os: "macos-13"
       arch: "AArch64 ARM Mips RISCV X86"
 
   #
@@ -100,7 +100,7 @@ jobs:
     with:
       llvm_cache_key: ${{ needs.build_llvm_libraries_on_macos.outputs.cache_key }}
       release: true
-      runner: macos-latest
+      runner: macos-13
       upload_url: ${{ needs.create_release.outputs.upload_url }}
       ver_num: ${{ needs.create_tag.outputs.new_ver }}
 
@@ -132,7 +132,7 @@ jobs:
     with:
       cwd: product-mini/platforms/darwin
       llvm_cache_key: ${{ needs.build_llvm_libraries_on_macos.outputs.cache_key }}
-      runner: macos-latest
+      runner: macos-13
       upload_url: ${{ needs.create_release.outputs.upload_url }}
       ver_num: ${{ needs.create_tag.outputs.new_ver}}
 
@@ -163,7 +163,7 @@ jobs:
     uses: ./.github/workflows/build_wamr_sdk.yml
     with:
       config_file: wamr_config_macos_release.cmake
-      runner: macos-latest
+      runner: macos-13
       upload_url: ${{ needs.create_release.outputs.upload_url }}
       ver_num: ${{ needs.create_tag.outputs.new_ver}}
       wasi_sdk_url: https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-19/wasi-sdk-19.0-macos.tar.gz
@@ -209,7 +209,7 @@ jobs:
     needs: [create_tag, create_release]
     uses: ./.github/workflows/build_wamr_lldb.yml
     with:
-      runner: macos-latest
+      runner: macos-13
       arch: universal
       upload_url: ${{ needs.create_release.outputs.upload_url }}
       ver_num: ${{ needs.create_tag.outputs.new_ver}}

+ 2 - 2
core/config.h

@@ -390,7 +390,7 @@
 #else
 #define DEFAULT_WASM_STACK_SIZE (12 * 1024)
 #endif
-/* Min auxilliary stack size of each wasm thread */
+/* Min auxiliary stack size of each wasm thread */
 #define WASM_THREAD_AUX_STACK_SIZE_MIN (256)
 
 /* Default/min native stack size of each app thread */
@@ -515,7 +515,7 @@
 #endif
 
 /* Support registering quick AOT/JIT function entries of some func types
-   to speedup the calling process of invoking the AOT/JIT functions of
+   to speed up the calling process of invoking the AOT/JIT functions of
    these types from the host embedder */
 #ifndef WASM_ENABLE_QUICK_AOT_ENTRY
 #define WASM_ENABLE_QUICK_AOT_ENTRY 1

+ 3 - 0
core/iwasm/common/wasm_exec_env.h

@@ -114,6 +114,9 @@ typedef struct WASMExecEnv {
 
     /* whether current thread is detached */
     bool thread_is_detached;
+
+    /* whether the aux stack is allocated */
+    bool is_aux_stack_allocated;
 #endif
 
 #if WASM_ENABLE_DEBUG_INTERP != 0

+ 7 - 6
core/iwasm/common/wasm_runtime_common.c

@@ -269,10 +269,10 @@ decode_insn(uint8 *insn)
                                         runtime_address);
 
         /* Print current instruction */
-        /*
+#if 0
         os_printf("%012" PRIX64 "  ", runtime_address);
         puts(buffer);
-        */
+#endif
 
         return instruction.length;
     }
@@ -946,7 +946,7 @@ wasm_runtime_register_module_internal(const char *module_name,
     /* module hasn't been registered */
     node = runtime_malloc(sizeof(WASMRegisteredModule), NULL, NULL, 0);
     if (!node) {
-        LOG_DEBUG("malloc WASMRegisteredModule failed. SZ=%d",
+        LOG_DEBUG("malloc WASMRegisteredModule failed. SZ=%zu",
                   sizeof(WASMRegisteredModule));
         return false;
     }
@@ -5033,6 +5033,7 @@ wasm_externref_set_cleanup(WASMModuleInstanceCommon *module_inst,
     if (lookup_user_data.found) {
         void *key = (void *)(uintptr_t)lookup_user_data.externref_idx;
         ExternRefMapNode *node = bh_hash_map_find(externref_map, key);
+        bh_assert(node);
         node->cleanup = extern_obj_cleanup;
         ok = true;
     }
@@ -6061,12 +6062,12 @@ wasm_runtime_load_depended_module(const WASMModuleCommon *parent_module,
     if (!ret) {
         LOG_DEBUG("read the file of %s failed", sub_module_name);
         set_error_buf_v(parent_module, error_buf, error_buf_size,
-                        "unknown import", sub_module_name);
+                        "unknown import %s", sub_module_name);
         goto delete_loading_module;
     }
     if (get_package_type(buffer, buffer_size) != parent_module->module_type) {
         LOG_DEBUG("moudle %s type error", sub_module_name);
-        goto delete_loading_module;
+        goto destroy_file_buffer;
     }
     if (get_package_type(buffer, buffer_size) == Wasm_Module_Bytecode) {
 #if WASM_ENABLE_INTERP != 0
@@ -6171,7 +6172,7 @@ wasm_runtime_sub_module_instantiate(WASMModuleCommon *module,
         sub_module_inst_list_node = loader_malloc(sizeof(WASMSubModInstNode),
                                                   error_buf, error_buf_size);
         if (!sub_module_inst_list_node) {
-            LOG_DEBUG("Malloc WASMSubModInstNode failed, SZ:%d",
+            LOG_DEBUG("Malloc WASMSubModInstNode failed, SZ: %zu",
                       sizeof(WASMSubModInstNode));
             if (sub_module_inst)
                 wasm_runtime_deinstantiate_internal(sub_module_inst, false);

+ 2 - 2
core/iwasm/common/wasm_runtime_common.h

@@ -336,7 +336,7 @@ typedef struct WASMModuleCommon {
 
     /* The following uint8[1] member is a dummy just to indicate
        some module_type dependent members follow.
-       Typically it should be accessed by casting to the corresponding
+       Typically, it should be accessed by casting to the corresponding
        actual module_type dependent structure, not via this member. */
     uint8 module_data[1];
 } WASMModuleCommon;
@@ -352,7 +352,7 @@ typedef struct WASMModuleInstanceCommon {
 
     /* The following uint8[1] member is a dummy just to indicate
        some module_type dependent members follow.
-       Typically it should be accessed by casting to the corresponding
+       Typically, it should be accessed by casting to the corresponding
        actual module_type dependent structure, not via this member. */
     uint8 module_inst_data[1];
 } WASMModuleInstanceCommon;

+ 3 - 52
core/iwasm/compilation/aot_compiler.c

@@ -196,7 +196,9 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
         location = dwarf_gen_location(
             comp_ctx, func_ctx,
             (frame_ip - 1) - comp_ctx->comp_data->wasm_module->buf_code);
-        LLVMSetCurrentDebugLocation2(comp_ctx->builder, location);
+        if (location != NULL) {
+            LLVMSetCurrentDebugLocation2(comp_ctx->builder, location);
+        }
 #endif
 
         switch (opcode) {
@@ -2134,16 +2136,6 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                         break;
                     }
 
-                    case SIMD_i32x4_narrow_i64x2_s:
-                    case SIMD_i32x4_narrow_i64x2_u:
-                    {
-                        if (!aot_compile_simd_i32x4_narrow_i64x2(
-                                comp_ctx, func_ctx,
-                                SIMD_i32x4_narrow_i64x2_s == opcode))
-                            return false;
-                        break;
-                    }
-
                     case SIMD_i32x4_extend_low_i16x8_s:
                     case SIMD_i32x4_extend_high_i16x8_s:
                     {
@@ -2183,16 +2175,6 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                         break;
                     }
 
-                    case SIMD_i32x4_add_sat_s:
-                    case SIMD_i32x4_add_sat_u:
-                    {
-                        if (!aot_compile_simd_i32x4_saturate(
-                                comp_ctx, func_ctx, V128_ADD,
-                                opcode == SIMD_i32x4_add_sat_s))
-                            return false;
-                        break;
-                    }
-
                     case SIMD_i32x4_sub:
                     {
                         if (!aot_compile_simd_i32x4_arith(comp_ctx, func_ctx,
@@ -2201,16 +2183,6 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                         break;
                     }
 
-                    case SIMD_i32x4_sub_sat_s:
-                    case SIMD_i32x4_sub_sat_u:
-                    {
-                        if (!aot_compile_simd_i32x4_saturate(
-                                comp_ctx, func_ctx, V128_SUB,
-                                opcode == SIMD_i32x4_add_sat_s))
-                            return false;
-                        break;
-                    }
-
                     case SIMD_i32x4_mul:
                     {
                         if (!aot_compile_simd_i32x4_arith(comp_ctx, func_ctx,
@@ -2247,13 +2219,6 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                         break;
                     }
 
-                    case SIMD_i32x4_avgr_u:
-                    {
-                        if (!aot_compile_simd_i32x4_avgr_u(comp_ctx, func_ctx))
-                            return false;
-                        break;
-                    }
-
                     case SIMD_i32x4_extmul_low_i16x8_s:
                     case SIMD_i32x4_extmul_high_i16x8_s:
                     {
@@ -2410,13 +2375,6 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                         break;
                     }
 
-                    case SIMD_f32x4_round:
-                    {
-                        if (!aot_compile_simd_f32x4_round(comp_ctx, func_ctx))
-                            return false;
-                        break;
-                    }
-
                     case SIMD_f32x4_sqrt:
                     {
                         if (!aot_compile_simd_f32x4_sqrt(comp_ctx, func_ctx))
@@ -2470,13 +2428,6 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                         break;
                     }
 
-                    case SIMD_f64x2_round:
-                    {
-                        if (!aot_compile_simd_f64x2_round(comp_ctx, func_ctx))
-                            return false;
-                        break;
-                    }
-
                     case SIMD_f64x2_sqrt:
                     {
                         if (!aot_compile_simd_f64x2_sqrt(comp_ctx, func_ctx))

+ 8 - 2
core/iwasm/compilation/aot_emit_control.c

@@ -272,7 +272,9 @@ handle_next_reachable_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                 goto fail;
             }
 #if WASM_ENABLE_DEBUG_AOT != 0
-            LLVMInstructionSetDebugLoc(ret, return_location);
+            if (return_location != NULL) {
+                LLVMInstructionSetDebugLoc(ret, return_location);
+            }
 #endif
         }
         else {
@@ -281,7 +283,9 @@ handle_next_reachable_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                 goto fail;
             }
 #if WASM_ENABLE_DEBUG_AOT != 0
-            LLVMInstructionSetDebugLoc(ret, return_location);
+            if (return_location != NULL) {
+                LLVMInstructionSetDebugLoc(ret, return_location);
+            }
 #endif
         }
     }
@@ -1048,6 +1052,7 @@ aot_compile_op_br_table(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                         PUSH(values[j], target_block->result_types[j]);
                     }
                     wasm_runtime_free(values);
+                    values = NULL;
                 }
                 target_block->is_reachable = true;
                 if (i == br_count)
@@ -1073,6 +1078,7 @@ aot_compile_op_br_table(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                         PUSH(values[j], target_block->param_types[j]);
                     }
                     wasm_runtime_free(values);
+                    values = NULL;
                 }
                 if (i == br_count)
                     default_llvm_block = target_block->llvm_entry_block;

+ 45 - 27
core/iwasm/compilation/debug/dwarf_extractor.cpp

@@ -295,6 +295,28 @@ lldb_function_to_function_dbi(const AOTCompContext *comp_ctx,
     const size_t num_function_args = function_args.GetSize();
     dwarf_extractor *extractor;
 
+    /*
+     * Process only known languages.
+     * We have a few assumptions which might not be true for non-C functions.
+     *
+     * At least it's known broken for C++ and Rust:
+     * https://github.com/bytecodealliance/wasm-micro-runtime/issues/3187
+     * https://github.com/bytecodealliance/wasm-micro-runtime/issues/3163
+     */
+    LanguageType language_type = function.GetLanguage();
+    switch (language_type) {
+        case eLanguageTypeC89:
+        case eLanguageTypeC:
+        case eLanguageTypeC99:
+        case eLanguageTypeC11:
+        case eLanguageTypeC17:
+            break;
+        default:
+            LOG_WARNING("func %s has unsuppoted language_type 0x%x",
+                        function_name, (int)language_type);
+            return NULL;
+    }
+
     if (!(extractor = TO_EXTACTOR(comp_ctx->comp_data->extractor)))
         return NULL;
 
@@ -313,6 +335,17 @@ lldb_function_to_function_dbi(const AOTCompContext *comp_ctx,
         if (function_arg_type.IsValid()) {
             ParamTypes[function_arg_idx + 1] =
                 lldb_type_to_type_dbi(comp_ctx, function_arg_type);
+            if (ParamTypes[function_arg_idx + 1] == NULL) {
+                LOG_WARNING(
+                    "func %s arg %" PRIu32
+                    " has a type not implemented by lldb_type_to_type_dbi",
+                    function_name, function_arg_idx);
+            }
+        }
+        else {
+            LOG_WARNING("func %s arg %" PRIu32 ": GetTypeAtIndex failed",
+                        function_name, function_arg_idx);
+            ParamTypes[function_arg_idx + 1] = NULL;
         }
     }
 
@@ -354,27 +387,10 @@ lldb_function_to_function_dbi(const AOTCompContext *comp_ctx,
         LLVMDIBuilderCreateExpression(DIB, NULL, 0);
     auto variable_list =
         function.GetBlock().GetVariables(extractor->target, true, false, false);
-    unsigned int variable_offset = 0;
     if (num_function_args != variable_list.GetSize()) {
-        // A hack to detect C++ "this" pointer.
-        //
-        // REVISIT: is there a more reliable way?
-        // At the DWARF level, we can probably look at DW_AT_object_pointer
-        // and DW_AT_artificial. I'm not sure how it can be done via the
-        // LLDB API though.
-        if (num_function_args + 1 == variable_list.GetSize()) {
-            SBValue variable(variable_list.GetValueAtIndex(0));
-            const char *varname = variable.GetName();
-            if (varname != NULL && !strcmp(varname, "this")) {
-                variable_offset = 1;
-            }
-        }
-        if (!variable_offset) {
-            LOG_ERROR("function args number dismatch!:function %s %s value "
-                      "number=%d, function args=%d",
-                      function_name, function.GetMangledName(),
-                      variable_list.GetSize(), num_function_args);
-        }
+        LOG_ERROR(
+            "function args number dismatch!:value number=%d, function args=%d",
+            variable_list.GetSize(), num_function_args);
     }
 
     LLVMMetadataRef ParamLocation = LLVMDIBuilderCreateDebugLocation(
@@ -395,11 +411,10 @@ lldb_function_to_function_dbi(const AOTCompContext *comp_ctx,
     LLVMDIBuilderInsertDbgValueAtEnd(DIB, Param, ParamVar, ParamExpression,
                                      ParamLocation, block_curr);
 
-    for (uint32_t function_arg_idx = 0; function_arg_idx < num_function_args;
-         ++function_arg_idx) {
-        uint32_t variable_idx = variable_offset + function_arg_idx;
-        SBValue variable(variable_list.GetValueAtIndex(variable_idx));
-        if (variable.IsValid()) {
+    for (uint32_t function_arg_idx = 0;
+         function_arg_idx < variable_list.GetSize(); ++function_arg_idx) {
+        SBValue variable(variable_list.GetValueAtIndex(function_arg_idx));
+        if (variable.IsValid() && ParamTypes[function_arg_idx + 1] != NULL) {
             SBDeclaration dec(variable.GetDeclaration());
             auto valtype = variable.GetType();
             LLVMMetadataRef ParamLocation = LLVMDIBuilderCreateDebugLocation(
@@ -408,11 +423,12 @@ lldb_function_to_function_dbi(const AOTCompContext *comp_ctx,
             const char *varname = variable.GetName();
             LLVMMetadataRef ParamVar = LLVMDIBuilderCreateParameterVariable(
                 DIB, FunctionMetadata, varname, varname ? strlen(varname) : 0,
-                variable_idx + 1 + 1,
+                function_arg_idx + 1 + 1,
                 File, // starts form 1, and 1 is exenv,
                 dec.GetLine(), ParamTypes[function_arg_idx + 1], true,
                 LLVMDIFlagZero);
-            LLVMValueRef Param = LLVMGetParam(func_ctx->func, variable_idx + 1);
+            LLVMValueRef Param =
+                LLVMGetParam(func_ctx->func, function_arg_idx + 1);
             LLVMDIBuilderInsertDbgValueAtEnd(DIB, Param, ParamVar,
                                              ParamExpression, ParamLocation,
                                              block_curr);
@@ -491,6 +507,8 @@ dwarf_gen_location(const AOTCompContext *comp_ctx,
     dwarf_extractor *extractor;
     AOTFunc *func = func_ctx->aot_func;
 
+    if (func_ctx->debug_func == NULL)
+        return NULL;
     if (!(extractor = TO_EXTACTOR(comp_ctx->comp_data->extractor)))
         return NULL;
 

+ 0 - 9
core/iwasm/compilation/simd/simd_conversions.c

@@ -226,15 +226,6 @@ aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
     }
 }
 
-bool
-aot_compile_simd_i32x4_narrow_i64x2(AOTCompContext *comp_ctx,
-                                    AOTFuncContext *func_ctx, bool is_signed)
-{
-    /* TODO: x86 intrinsics */
-    return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i64x2,
-                                      is_signed);
-}
-
 enum integer_extend_type {
     e_ext_i8x16,
     e_ext_i16x8,

+ 0 - 4
core/iwasm/compilation/simd/simd_conversions.h

@@ -20,10 +20,6 @@ bool
 aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
                                     AOTFuncContext *func_ctx, bool is_signed);
 
-bool
-aot_compile_simd_i32x4_narrow_i64x2(AOTCompContext *comp_ctx,
-                                    AOTFuncContext *func_ctx, bool is_signed);
-
 bool
 aot_compile_simd_i16x8_extend_i8x16(AOTCompContext *comp_ctx,
                                     AOTFuncContext *func_ctx, bool is_low,

+ 0 - 14
core/iwasm/compilation/simd/simd_floating_point.c

@@ -129,20 +129,6 @@ aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
                                 "llvm.fabs.v2f64");
 }
 
-bool
-aot_compile_simd_f32x4_round(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
-{
-    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
-                                "llvm.round.v4f32");
-}
-
-bool
-aot_compile_simd_f64x2_round(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
-{
-    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
-                                "llvm.round.v2f64");
-}
-
 bool
 aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 {

+ 0 - 8
core/iwasm/compilation/simd/simd_floating_point.h

@@ -32,14 +32,6 @@ aot_compile_simd_f32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
 bool
 aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
 
-bool
-aot_compile_simd_f32x4_round(AOTCompContext *comp_ctx,
-                             AOTFuncContext *func_ctx);
-
-bool
-aot_compile_simd_f64x2_round(AOTCompContext *comp_ctx,
-                             AOTFuncContext *func_ctx);
-
 bool
 aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
 

+ 1 - 10
core/iwasm/compilation/simd/simd_int_arith.c

@@ -243,7 +243,6 @@ aot_compile_simd_i64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 enum integer_avgr_u {
     e_avgr_u_i8x16,
     e_avgr_u_i16x8,
-    e_avgr_u_i32x4,
 };
 
 /* TODO: try int_x86_mmx_pavg_b and int_x86_mmx_pavg_w */
@@ -257,9 +256,8 @@ simd_v128_avg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     LLVMTypeRef vector_type[] = {
         V128_i8x16_TYPE,
         V128_i16x8_TYPE,
-        V128_i32x4_TYPE,
     };
-    unsigned lanes[] = { 16, 8, 4 };
+    unsigned lanes[] = { 16, 8 };
 
     if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
                                           vector_type[itype], "rhs"))
@@ -325,13 +323,6 @@ aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx,
     return simd_v128_avg(comp_ctx, func_ctx, e_avgr_u_i16x8);
 }
 
-bool
-aot_compile_simd_i32x4_avgr_u(AOTCompContext *comp_ctx,
-                              AOTFuncContext *func_ctx)
-{
-    return simd_v128_avg(comp_ctx, func_ctx, e_avgr_u_i32x4);
-}
-
 bool
 aot_compile_simd_i32x4_dot_i16x8(AOTCompContext *comp_ctx,
                                  AOTFuncContext *func_ctx)

+ 0 - 4
core/iwasm/compilation/simd/simd_int_arith.h

@@ -76,10 +76,6 @@ bool
 aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx,
                               AOTFuncContext *func_ctx);
 
-bool
-aot_compile_simd_i32x4_avgr_u(AOTCompContext *comp_ctx,
-                              AOTFuncContext *func_ctx);
-
 bool
 aot_compile_simd_i32x4_dot_i16x8(AOTCompContext *comp_ctx,
                                  AOTFuncContext *func_ctx);

+ 0 - 15
core/iwasm/compilation/simd/simd_sat_int_arith.c

@@ -64,18 +64,3 @@ aot_compile_simd_i16x8_saturate(AOTCompContext *comp_ctx,
                               is_signed ? intrinsics[arith_op][0]
                                         : intrinsics[arith_op][1]);
 }
-
-bool
-aot_compile_simd_i32x4_saturate(AOTCompContext *comp_ctx,
-                                AOTFuncContext *func_ctx,
-                                V128Arithmetic arith_op, bool is_signed)
-{
-    char *intrinsics[][2] = {
-        { "llvm.sadd.sat.v4i32", "llvm.uadd.sat.v4i32" },
-        { "llvm.ssub.sat.v4i32", "llvm.usub.sat.v4i32" },
-    };
-
-    return simd_sat_int_arith(comp_ctx, func_ctx, V128_i16x8_TYPE,
-                              is_signed ? intrinsics[arith_op][0]
-                                        : intrinsics[arith_op][1]);
-}

+ 0 - 4
core/iwasm/compilation/simd/simd_sat_int_arith.h

@@ -22,10 +22,6 @@ aot_compile_simd_i16x8_saturate(AOTCompContext *comp_ctx,
                                 AOTFuncContext *func_ctx,
                                 V128Arithmetic arith_op, bool is_signed);
 
-bool
-aot_compile_simd_i32x4_saturate(AOTCompContext *comp_ctx,
-                                AOTFuncContext *func_ctx,
-                                V128Arithmetic arith_op, bool is_signed);
 #ifdef __cplusplus
 } /* end of extern "C" */
 #endif

+ 6 - 6
core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp

@@ -7511,7 +7511,7 @@ at_rmw_xor_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
             CHECK_KIND(r3, JIT_REG_KIND_I64);                                  \
         }                                                                      \
         /* r0: read/return value r2: memory base addr can't be const */        \
-        /* already check it's not const in LOAD_4ARGS(); */                    \
+        /* already check it's not const in LOAD_4ARGS() */                     \
         reg_no_dst = jit_reg_no(r0);                                           \
         CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0));                            \
         /* mem_data base address has to be non-const */                        \
@@ -9293,8 +9293,8 @@ jit_codegen_init()
         imm.setValue(INT32_MAX);
         a.jne(imm);
 
-        char *stream = (char *)a.code()->sectionById(0)->buffer().data()
-                       + a.code()->sectionById(0)->buffer().size();
+        char *stream_old = (char *)a.code()->sectionById(0)->buffer().data()
+                           + a.code()->sectionById(0)->buffer().size();
 
         /* If yes, call jit_set_exception_with_id to throw exception,
            and then set eax to JIT_INTERP_ACTION_THROWN, and jump to
@@ -9319,7 +9319,7 @@ jit_codegen_init()
         /* Patch the offset of jne instruction */
         char *stream_new = (char *)a.code()->sectionById(0)->buffer().data()
                            + a.code()->sectionById(0)->buffer().size();
-        *(int32 *)(stream - 4) = (int32)(stream_new - stream);
+        *(int32 *)(stream_old - 4) = (int32)(stream_new - stream_old);
     }
 
     /* Load compiled func ptr and call it */
@@ -9419,7 +9419,7 @@ static uint8 hreg_info_F64[3][16] = {
       1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
 };
 
-static const JitHardRegInfo hreg_info = {
+static const JitHardRegInfo g_hreg_info = {
     {
         { 0, NULL, NULL, NULL }, /* VOID */
 
@@ -9459,7 +9459,7 @@ static const JitHardRegInfo hreg_info = {
 const JitHardRegInfo *
 jit_codegen_get_hreg_info()
 {
-    return &hreg_info;
+    return &g_hreg_info;
 }
 
 static const char *reg_names_i32[] = {

+ 39 - 7
core/iwasm/interpreter/wasm_interp_classic.c

@@ -1222,6 +1222,9 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
     bool disable_bounds_checks = false;
 #endif
 #endif
+#if WASM_ENABLE_TAIL_CALL != 0
+    bool is_return_call = false;
+#endif
 
 #if WASM_ENABLE_DEBUG_INTERP != 0
     uint8 *frame_ip_orig = NULL;
@@ -4205,6 +4208,9 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                 frame_ip = frame->ip;
                 frame_sp = frame->sp;
                 frame_csp = frame->csp;
+#if WASM_ENABLE_TAIL_CALL != 0
+                is_return_call = false;
+#endif
                 goto call_func_from_entry;
             }
 
@@ -4291,6 +4297,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
         }
         FREE_FRAME(exec_env, frame);
         wasm_exec_env_set_cur_frame(exec_env, prev_frame);
+        is_return_call = true;
         goto call_func_from_entry;
     }
 #endif
@@ -4304,6 +4311,9 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
             word_copy(outs_area->lp, frame_sp, cur_func->param_cell_num);
         }
         prev_frame = frame;
+#if WASM_ENABLE_TAIL_CALL != 0
+        is_return_call = false;
+#endif
     }
 
     call_func_from_entry:
@@ -4313,15 +4323,27 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
             if (cur_func->import_func_inst) {
                 wasm_interp_call_func_import(module, exec_env, cur_func,
                                              prev_frame);
+#if WASM_ENABLE_TAIL_CALL != 0
+                if (is_return_call) {
+                    /* the frame was freed before tail calling and
+                       the prev_frame was set as exec_env's cur_frame,
+                       so here we recover context from prev_frame */
+                    RECOVER_CONTEXT(prev_frame);
+                }
+                else
+#endif
+                {
+                    prev_frame = frame->prev_frame;
+                    cur_func = frame->function;
+                    UPDATE_ALL_FROM_FRAME();
+                }
+
 #if WASM_ENABLE_EXCE_HANDLING != 0
                 char uncaught_exception[128] = { 0 };
                 bool has_exception =
                     wasm_copy_exception(module, uncaught_exception);
                 if (has_exception
                     && strstr(uncaught_exception, "uncaught wasm exception")) {
-                    /* fix framesp */
-                    UPDATE_ALL_FROM_FRAME();
-
                     uint32 import_exception;
                     /* initialize imported exception index to be invalid */
                     SET_INVALID_TAGINDEX(import_exception);
@@ -4363,12 +4385,22 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
             {
                 wasm_interp_call_func_native(module, exec_env, cur_func,
                                              prev_frame);
+#if WASM_ENABLE_TAIL_CALL != 0
+                if (is_return_call) {
+                    /* the frame was freed before tail calling and
+                       the prev_frame was set as exec_env's cur_frame,
+                       so here we recover context from prev_frame */
+                    RECOVER_CONTEXT(prev_frame);
+                }
+                else
+#endif
+                {
+                    prev_frame = frame->prev_frame;
+                    cur_func = frame->function;
+                    UPDATE_ALL_FROM_FRAME();
+                }
             }
 
-            prev_frame = frame->prev_frame;
-            cur_func = frame->function;
-            UPDATE_ALL_FROM_FRAME();
-
             /* update memory size, no need to update memory ptr as
                it isn't changed in wasm_enlarge_memory */
 #if !defined(OS_ENABLE_HW_BOUND_CHECK)              \

+ 24 - 3
core/iwasm/interpreter/wasm_interp_fast.c

@@ -1224,6 +1224,9 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
     bool disable_bounds_checks = false;
 #endif
 #endif
+#if WASM_ENABLE_TAIL_CALL != 0
+    bool is_return_call = false;
+#endif
 
 #if WASM_ENABLE_LABELS_AS_VALUES != 0
 #define HANDLE_OPCODE(op) &&HANDLE_##op
@@ -3639,6 +3642,9 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
             {
                 frame = prev_frame;
                 frame_ip = frame->ip;
+#if WASM_ENABLE_TAIL_CALL != 0
+                is_return_call = false;
+#endif
                 goto call_func_from_entry;
             }
 
@@ -3779,6 +3785,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
         FREE_FRAME(exec_env, frame);
         frame_ip += cur_func->param_count * sizeof(int16);
         wasm_exec_env_set_cur_frame(exec_env, (WASMRuntimeFrame *)prev_frame);
+        is_return_call = true;
         goto call_func_from_entry;
     }
 #endif /* WASM_ENABLE_TAIL_CALL */
@@ -3839,6 +3846,9 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
         }
         SYNC_ALL_TO_FRAME();
         prev_frame = frame;
+#if WASM_ENABLE_TAIL_CALL != 0
+        is_return_call = false;
+#endif
     }
 
     call_func_from_entry:
@@ -3856,9 +3866,20 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                                              prev_frame);
             }
 
-            prev_frame = frame->prev_frame;
-            cur_func = frame->function;
-            UPDATE_ALL_FROM_FRAME();
+#if WASM_ENABLE_TAIL_CALL != 0
+            if (is_return_call) {
+                /* the frame was freed before tail calling and
+                   the prev_frame was set as exec_env's cur_frame,
+                   so here we recover context from prev_frame */
+                RECOVER_CONTEXT(prev_frame);
+            }
+            else
+#endif
+            {
+                prev_frame = frame->prev_frame;
+                cur_func = frame->function;
+                UPDATE_ALL_FROM_FRAME();
+            }
 
             /* update memory size, no need to update memory ptr as
                it isn't changed in wasm_enlarge_memory */

+ 67 - 39
core/iwasm/interpreter/wasm_loader.c

@@ -131,7 +131,7 @@ read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
     }
     else if (sign && maxbits == 32) {
         if (shift < maxbits) {
-            /* Sign extend, second highest bit is the sign bit */
+            /* Sign extend, second-highest bit is the sign bit */
             if ((uint8)byte & 0x40)
                 result |= (~((uint64)0)) << shift;
         }
@@ -146,7 +146,7 @@ read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
     }
     else if (sign && maxbits == 64) {
         if (shift < maxbits) {
-            /* Sign extend, second highest bit is the sign bit */
+            /* Sign extend, second-highest bit is the sign bit */
             if ((uint8)byte & 0x40)
                 result |= (~((uint64)0)) << shift;
         }
@@ -219,7 +219,10 @@ type2str(uint8 type)
 static bool
 is_32bit_type(uint8 type)
 {
-    if (type == VALUE_TYPE_I32 || type == VALUE_TYPE_F32
+    if (type == VALUE_TYPE_I32
+        || type == VALUE_TYPE_F32
+        /* the operand stack is in polymorphic state */
+        || type == VALUE_TYPE_ANY
 #if WASM_ENABLE_REF_TYPES != 0
         || type == VALUE_TYPE_FUNCREF || type == VALUE_TYPE_EXTERNREF
 #endif
@@ -5631,7 +5634,7 @@ wasm_loader_push_frame_ref(WASMLoaderContext *ctx, uint8 type, char *error_buf,
 
     *ctx->frame_ref++ = type;
     ctx->stack_cell_num++;
-    if (is_32bit_type(type) || type == VALUE_TYPE_ANY)
+    if (is_32bit_type(type))
         goto check_stack_and_return;
 
     if (!check_stack_push(ctx, error_buf, error_buf_size))
@@ -7266,7 +7269,7 @@ check_branch_block_for_delegate(WASMLoaderContext *loader_ctx, uint8 **p_buf,
     }
     frame_csp_tmp = loader_ctx->frame_csp - depth - 2;
 #if WASM_ENABLE_FAST_INTERP != 0
-    emit_br_info(frame_csp_tmp);
+    emit_br_info(frame_csp_tmp, false);
 #endif
 
     *p_buf = p;
@@ -8095,8 +8098,10 @@ re_scan:
                 BlockType block_type;
 
                 if (loader_ctx->csp_num < 2
-                    || (loader_ctx->frame_csp - 1)->label_type
-                           != LABEL_TYPE_IF) {
+                    /* the matched if isn't found */
+                    || (loader_ctx->frame_csp - 1)->label_type != LABEL_TYPE_IF
+                    /* duplicated else is found */
+                    || (loader_ctx->frame_csp - 1)->else_addr) {
                     set_error_buf(
                         error_buf, error_buf_size,
                         "opcode else found without matched opcode if");
@@ -8137,8 +8142,8 @@ re_scan:
                     bh_memcpy_s(loader_ctx->frame_offset, size,
                                 block->param_frame_offsets, size);
                     loader_ctx->frame_offset += (size / sizeof(int16));
-                    loader_ctx->dynamic_offset = block->start_dynamic_offset;
                 }
+                loader_ctx->dynamic_offset = block->start_dynamic_offset;
 #endif
 
                 break;
@@ -8247,13 +8252,11 @@ re_scan:
             case WASM_OP_BR_TABLE:
             {
                 uint8 *ret_types = NULL;
-                uint32 ret_count = 0;
+                uint32 ret_count = 0, depth = 0;
 #if WASM_ENABLE_FAST_INTERP == 0
-                uint8 *p_depth_begin, *p_depth;
-                uint32 depth = 0, j;
                 BrTableCache *br_table_cache = NULL;
-
-                p_org = p - 1;
+                uint8 *p_depth_begin, *p_depth, *p_opcode = p - 1;
+                uint32 j;
 #endif
 
                 read_leb_uint32(p, p_end, count);
@@ -8262,6 +8265,20 @@ re_scan:
 #endif
                 POP_I32();
 
+                /* Get each depth and check it */
+                p_org = p;
+                for (i = 0; i <= count; i++) {
+                    read_leb_uint32(p, p_end, depth);
+                    bh_assert(loader_ctx->csp_num > 0);
+                    if (loader_ctx->csp_num - 1 < depth) {
+                        set_error_buf(error_buf, error_buf_size,
+                                      "unknown label, "
+                                      "unexpected end of section or function");
+                        goto fail;
+                    }
+                }
+                p = p_org;
+
 #if WASM_ENABLE_FAST_INTERP == 0
                 p_depth_begin = p_depth = p;
 #endif
@@ -8315,7 +8332,7 @@ re_scan:
                             /* The depth cannot be stored in one byte,
                                create br_table cache to store each depth */
 #if WASM_ENABLE_DEBUG_INTERP != 0
-                            if (!record_fast_op(module, p_org, *p_org,
+                            if (!record_fast_op(module, p_opcode, *p_opcode,
                                                 error_buf, error_buf_size)) {
                                 goto fail;
                             }
@@ -8327,8 +8344,8 @@ re_scan:
                                       error_buf, error_buf_size))) {
                                 goto fail;
                             }
-                            *p_org = EXT_OP_BR_TABLE_CACHE;
-                            br_table_cache->br_table_op_addr = p_org;
+                            *p_opcode = EXT_OP_BR_TABLE_CACHE;
+                            br_table_cache->br_table_op_addr = p_opcode;
                             br_table_cache->br_count = count;
                             /* Copy previous depths which are one byte */
                             for (j = 0; j < i; j++) {
@@ -8583,8 +8600,7 @@ re_scan:
                 }
 
                 if (available_stack_cell > 0) {
-                    if (is_32bit_type(*(loader_ctx->frame_ref - 1))
-                        || *(loader_ctx->frame_ref - 1) == VALUE_TYPE_ANY) {
+                    if (is_32bit_type(*(loader_ctx->frame_ref - 1))) {
                         loader_ctx->frame_ref--;
                         loader_ctx->stack_cell_num--;
 #if WASM_ENABLE_FAST_INTERP != 0
@@ -8924,25 +8940,44 @@ re_scan:
                     goto fail;
                 }
 
-                /* Refer to a forward-declared function */
-                if (func_idx >= cur_func_idx + module->import_function_count) {
+                /* Refer to a forward-declared function:
+                   the function must be an import, exported, or present in
+                   a table elem segment or global initializer to be used as
+                   the operand to ref.func */
+                if (func_idx >= module->import_function_count) {
                     WASMTableSeg *table_seg = module->table_segments;
                     bool func_declared = false;
                     uint32 j;
 
-                    /* Check whether the function is declared in table segs,
-                       note that it doesn't matter whether the table seg's mode
-                       is passive, active or declarative. */
-                    for (i = 0; i < module->table_seg_count; i++, table_seg++) {
-                        if (table_seg->elem_type == VALUE_TYPE_FUNCREF) {
-                            for (j = 0; j < table_seg->function_count; j++) {
-                                if (table_seg->func_indexes[j] == func_idx) {
-                                    func_declared = true;
-                                    break;
+                    for (i = 0; i < module->global_count; i++) {
+                        if (module->globals[i].type == VALUE_TYPE_FUNCREF
+                            && module->globals[i].init_expr.init_expr_type
+                                   == INIT_EXPR_TYPE_FUNCREF_CONST
+                            && module->globals[i].init_expr.u.u32 == func_idx) {
+                            func_declared = true;
+                            break;
+                        }
+                    }
+
+                    if (!func_declared) {
+                        /* Check whether the function is declared in table segs,
+                           note that it doesn't matter whether the table seg's
+                           mode is passive, active or declarative. */
+                        for (i = 0; i < module->table_seg_count;
+                             i++, table_seg++) {
+                            if (table_seg->elem_type == VALUE_TYPE_FUNCREF) {
+                                for (j = 0; j < table_seg->function_count;
+                                     j++) {
+                                    if (table_seg->func_indexes[j]
+                                        == func_idx) {
+                                        func_declared = true;
+                                        break;
+                                    }
                                 }
                             }
                         }
                     }
+
                     if (!func_declared) {
                         /* Check whether the function is exported */
                         for (i = 0; i < module->export_count; i++) {
@@ -9424,6 +9459,7 @@ re_scan:
                 break;
 
             case WASM_OP_F32_CONST:
+                CHECK_BUF(p, p_end, sizeof(float32));
                 p += sizeof(float32);
 #if WASM_ENABLE_FAST_INTERP != 0
                 skip_label();
@@ -9442,6 +9478,7 @@ re_scan:
                 break;
 
             case WASM_OP_F64_CONST:
+                CHECK_BUF(p, p_end, sizeof(float64));
                 p += sizeof(float64);
 #if WASM_ENABLE_FAST_INTERP != 0
                 skip_label();
@@ -9763,6 +9800,7 @@ re_scan:
                     }
                     case WASM_OP_MEMORY_COPY:
                     {
+                        CHECK_BUF(p, p_end, sizeof(int16));
                         /* both src and dst memory index should be 0 */
                         if (*(int16 *)p != 0x0000)
                             goto fail_zero_byte_expected;
@@ -10448,13 +10486,6 @@ re_scan:
                         break;
                     }
 
-                    case SIMD_i32x4_narrow_i64x2_s:
-                    case SIMD_i32x4_narrow_i64x2_u:
-                    {
-                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
-                        break;
-                    }
-
                     case SIMD_i32x4_extend_low_i16x8_s:
                     case SIMD_i32x4_extend_high_i16x8_s:
                     case SIMD_i32x4_extend_low_i16x8_u:
@@ -10481,7 +10512,6 @@ re_scan:
                     case SIMD_i32x4_max_s:
                     case SIMD_i32x4_max_u:
                     case SIMD_i32x4_dot_i16x8_s:
-                    case SIMD_i32x4_avgr_u:
                     case SIMD_i32x4_extmul_low_i16x8_s:
                     case SIMD_i32x4_extmul_high_i16x8_s:
                     case SIMD_i32x4_extmul_low_i16x8_u:
@@ -10545,7 +10575,6 @@ re_scan:
                     /* f32x4 operation */
                     case SIMD_f32x4_abs:
                     case SIMD_f32x4_neg:
-                    case SIMD_f32x4_round:
                     case SIMD_f32x4_sqrt:
                     {
                         POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
@@ -10568,7 +10597,6 @@ re_scan:
                     /* f64x2 operation */
                     case SIMD_f64x2_abs:
                     case SIMD_f64x2_neg:
-                    case SIMD_f64x2_round:
                     case SIMD_f64x2_sqrt:
                     {
                         POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);

+ 58 - 23
core/iwasm/interpreter/wasm_mini_loader.c

@@ -51,7 +51,10 @@ set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
 static bool
 is_32bit_type(uint8 type)
 {
-    if (type == VALUE_TYPE_I32 || type == VALUE_TYPE_F32
+    if (type == VALUE_TYPE_I32
+        || type == VALUE_TYPE_F32
+        /* the operand stack is in polymorphic state */
+        || type == VALUE_TYPE_ANY
 #if WASM_ENABLE_REF_TYPES != 0
         || type == VALUE_TYPE_FUNCREF || type == VALUE_TYPE_EXTERNREF
 #endif
@@ -5898,8 +5901,11 @@ re_scan:
                 BranchBlock *block = NULL;
                 BlockType block_type = (loader_ctx->frame_csp - 1)->block_type;
                 bh_assert(loader_ctx->csp_num >= 2
+                          /* the matched if is found */
                           && (loader_ctx->frame_csp - 1)->label_type
-                                 == LABEL_TYPE_IF);
+                                 == LABEL_TYPE_IF
+                          /* duplicated else isn't found */
+                          && !(loader_ctx->frame_csp - 1)->else_addr);
                 block = loader_ctx->frame_csp - 1;
 
                 /* check whether if branch's stack matches its result type */
@@ -5934,8 +5940,8 @@ re_scan:
                     bh_memcpy_s(loader_ctx->frame_offset, size,
                                 block->param_frame_offsets, size);
                     loader_ctx->frame_offset += (size / sizeof(int16));
-                    loader_ctx->dynamic_offset = block->start_dynamic_offset;
                 }
+                loader_ctx->dynamic_offset = block->start_dynamic_offset;
 #endif
 
                 break;
@@ -6039,13 +6045,11 @@ re_scan:
             case WASM_OP_BR_TABLE:
             {
                 uint8 *ret_types = NULL;
-                uint32 ret_count = 0;
+                uint32 ret_count = 0, depth = 0;
 #if WASM_ENABLE_FAST_INTERP == 0
-                uint8 *p_depth_begin, *p_depth;
-                uint32 depth = 0, j;
                 BrTableCache *br_table_cache = NULL;
-
-                p_org = p - 1;
+                uint8 *p_depth_begin, *p_depth, *p_opcode = p - 1;
+                uint32 j;
 #endif
 
                 read_leb_uint32(p, p_end, count);
@@ -6054,6 +6058,16 @@ re_scan:
 #endif
                 POP_I32();
 
+                /* Get each depth and check it */
+                p_org = p;
+                for (i = 0; i <= count; i++) {
+                    read_leb_uint32(p, p_end, depth);
+                    bh_assert(loader_ctx->csp_num > 0);
+                    bh_assert(loader_ctx->csp_num - 1 >= depth);
+                    (void)depth;
+                }
+                p = p_org;
+
 #if WASM_ENABLE_FAST_INTERP == 0
                 p_depth_begin = p_depth = p;
 #endif
@@ -6079,8 +6093,8 @@ re_scan:
                                       error_buf, error_buf_size))) {
                                 goto fail;
                             }
-                            *p_org = EXT_OP_BR_TABLE_CACHE;
-                            br_table_cache->br_table_op_addr = p_org;
+                            *p_opcode = EXT_OP_BR_TABLE_CACHE;
+                            br_table_cache->br_table_op_addr = p_opcode;
                             br_table_cache->br_count = count;
                             /* Copy previous depths which are one byte */
                             for (j = 0; j < i; j++) {
@@ -6311,8 +6325,7 @@ re_scan:
                             && !cur_block->is_stack_polymorphic));
 
                 if (available_stack_cell > 0) {
-                    if (is_32bit_type(*(loader_ctx->frame_ref - 1))
-                        || *(loader_ctx->frame_ref - 1) == VALUE_TYPE_ANY) {
+                    if (is_32bit_type(*(loader_ctx->frame_ref - 1))) {
                         loader_ctx->frame_ref--;
                         loader_ctx->stack_cell_num--;
 #if WASM_ENABLE_FAST_INTERP != 0
@@ -6618,25 +6631,44 @@ re_scan:
                     goto fail;
                 }
 
-                /* Refer to a forward-declared function */
-                if (func_idx >= cur_func_idx + module->import_function_count) {
+                /* Refer to a forward-declared function:
+                   the function must be an import, exported, or present in
+                   a table elem segment or global initializer to be used as
+                   the operand to ref.func */
+                if (func_idx >= module->import_function_count) {
                     WASMTableSeg *table_seg = module->table_segments;
                     bool func_declared = false;
                     uint32 j;
 
-                    /* Check whether the function is declared in table segs,
-                       note that it doesn't matter whether the table seg's mode
-                       is passive, active or declarative. */
-                    for (i = 0; i < module->table_seg_count; i++, table_seg++) {
-                        if (table_seg->elem_type == VALUE_TYPE_FUNCREF) {
-                            for (j = 0; j < table_seg->function_count; j++) {
-                                if (table_seg->func_indexes[j] == func_idx) {
-                                    func_declared = true;
-                                    break;
+                    for (i = 0; i < module->global_count; i++) {
+                        if (module->globals[i].type == VALUE_TYPE_FUNCREF
+                            && module->globals[i].init_expr.init_expr_type
+                                   == INIT_EXPR_TYPE_FUNCREF_CONST
+                            && module->globals[i].init_expr.u.u32 == func_idx) {
+                            func_declared = true;
+                            break;
+                        }
+                    }
+
+                    if (!func_declared) {
+                        /* Check whether the function is declared in table segs,
+                           note that it doesn't matter whether the table seg's
+                           mode is passive, active or declarative. */
+                        for (i = 0; i < module->table_seg_count;
+                             i++, table_seg++) {
+                            if (table_seg->elem_type == VALUE_TYPE_FUNCREF) {
+                                for (j = 0; j < table_seg->function_count;
+                                     j++) {
+                                    if (table_seg->func_indexes[j]
+                                        == func_idx) {
+                                        func_declared = true;
+                                        break;
+                                    }
                                 }
                             }
                         }
                     }
+
                     if (!func_declared) {
                         /* Check whether the function is exported */
                         for (i = 0; i < module->export_count; i++) {
@@ -7065,6 +7097,7 @@ re_scan:
                 break;
 
             case WASM_OP_F32_CONST:
+                CHECK_BUF(p, p_end, sizeof(float32));
                 p += sizeof(float32);
 #if WASM_ENABLE_FAST_INTERP != 0
                 skip_label();
@@ -7083,6 +7116,7 @@ re_scan:
                 break;
 
             case WASM_OP_F64_CONST:
+                CHECK_BUF(p, p_end, sizeof(float64));
                 p += sizeof(float64);
 #if WASM_ENABLE_FAST_INTERP != 0
                 skip_label();
@@ -7390,6 +7424,7 @@ re_scan:
                     }
                     case WASM_OP_MEMORY_COPY:
                     {
+                        CHECK_BUF(p, p_end, sizeof(int16));
                         /* both src and dst memory index should be 0 */
                         bh_assert(*(int16 *)p == 0x0000);
                         p += 2;

+ 10 - 10
core/iwasm/interpreter/wasm_opcode.h

@@ -496,8 +496,8 @@ typedef enum WASMSimdEXTOpcode {
     /* placeholder            = 0xa2 */
     SIMD_i32x4_all_true = 0xa3,
     SIMD_i32x4_bitmask = 0xa4,
-    SIMD_i32x4_narrow_i64x2_s = 0xa5,
-    SIMD_i32x4_narrow_i64x2_u = 0xa6,
+    /* placeholder     = 0xa5 */
+    /* placeholder     = 0xa6 */
     SIMD_i32x4_extend_low_i16x8_s = 0xa7,
     SIMD_i32x4_extend_high_i16x8_s = 0xa8,
     SIMD_i32x4_extend_low_i16x8_u = 0xa9,
@@ -506,19 +506,19 @@ typedef enum WASMSimdEXTOpcode {
     SIMD_i32x4_shr_s = 0xac,
     SIMD_i32x4_shr_u = 0xad,
     SIMD_i32x4_add = 0xae,
-    SIMD_i32x4_add_sat_s = 0xaf,
-    SIMD_i32x4_add_sat_u = 0xb0,
+    /* placeholder = 0xaf */
+    /* placeholder = 0xb0 */
     SIMD_i32x4_sub = 0xb1,
-    SIMD_i32x4_sub_sat_s = 0xb2,
-    SIMD_i32x4_sub_sat_u = 0xb3,
-    /* placeholder            = 0xb4 */
+    /* placeholder = 0xb2 */
+    /* placeholder = 0xb3 */
+    /* placeholder = 0xb4 */
     SIMD_i32x4_mul = 0xb5,
     SIMD_i32x4_min_s = 0xb6,
     SIMD_i32x4_min_u = 0xb7,
     SIMD_i32x4_max_s = 0xb8,
     SIMD_i32x4_max_u = 0xb9,
     SIMD_i32x4_dot_i16x8_s = 0xba,
-    SIMD_i32x4_avgr_u = 0xbb,
+    /* placeholder         = 0xbb */
     SIMD_i32x4_extmul_low_i16x8_s = 0xbc,
     SIMD_i32x4_extmul_high_i16x8_s = 0xbd,
     SIMD_i32x4_extmul_low_i16x8_u = 0xbe,
@@ -561,7 +561,7 @@ typedef enum WASMSimdEXTOpcode {
     /* f32x4 operation */
     SIMD_f32x4_abs = 0xe0,
     SIMD_f32x4_neg = 0xe1,
-    SIMD_f32x4_round = 0xe2,
+    /* placeholder = 0xe2 */
     SIMD_f32x4_sqrt = 0xe3,
     SIMD_f32x4_add = 0xe4,
     SIMD_f32x4_sub = 0xe5,
@@ -575,7 +575,7 @@ typedef enum WASMSimdEXTOpcode {
     /* f64x2 operation */
     SIMD_f64x2_abs = 0xec,
     SIMD_f64x2_neg = 0xed,
-    SIMD_f64x2_round = 0xee,
+    /* placeholder = 0xee */
     SIMD_f64x2_sqrt = 0xef,
     SIMD_f64x2_add = 0xf0,
     SIMD_f64x2_sub = 0xf1,

+ 2 - 2
core/iwasm/interpreter/wasm_runtime.c

@@ -2017,7 +2017,7 @@ wasm_instantiate(WASMModule *module, WASMModuleInstance *parent,
 
         /* check offset */
         if (base_offset > memory_size) {
-            LOG_DEBUG("base_offset(%d) > memory_size(%d)", base_offset,
+            LOG_DEBUG("base_offset(%u) > memory_size(%" PRIu64 ")", base_offset,
                       memory_size);
 #if WASM_ENABLE_REF_TYPES != 0
             set_error_buf(error_buf, error_buf_size,
@@ -2032,7 +2032,7 @@ wasm_instantiate(WASMModule *module, WASMModuleInstance *parent,
         /* check offset + length(could be zero) */
         length = data_seg->data_length;
         if ((uint64)base_offset + length > memory_size) {
-            LOG_DEBUG("base_offset(%d) + length(%d) > memory_size(%d)",
+            LOG_DEBUG("base_offset(%u) + length(%u) > memory_size(%" PRIu64 ")",
                       base_offset, length, memory_size);
 #if WASM_ENABLE_REF_TYPES != 0
             set_error_buf(error_buf, error_buf_size,

+ 3 - 1
core/iwasm/libraries/debug-engine/handler.c

@@ -309,9 +309,11 @@ handle_general_query(WASMGDBServer *server, char *payload)
     }
 
     if (!strcmp(name, "WasmData")) {
+        write_packet(server, "");
     }
 
     if (!strcmp(name, "WasmMem")) {
+        write_packet(server, "");
     }
 
     if (!strcmp(name, "Symbol")) {
@@ -447,7 +449,7 @@ send_thread_stop_status(WASMGDBServer *server, uint32 status, korp_tid tid)
                             "thread-pcs:%" PRIx64 ";00:%s;reason:%s;", pc,
                             pc_string, "trace");
         }
-        else if (status > 0) {
+        else { /* status > 0 (== 0 is checked at the function beginning) */
             len += snprintf(tmpbuf + len, MAX_PACKET_SIZE - len,
                             "thread-pcs:%" PRIx64 ";00:%s;reason:%s;", pc,
                             pc_string, "signal");

+ 5 - 0
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/blocking_op.h

@@ -3,6 +3,9 @@
  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  */
 
+#ifndef _BLOCKING_OP_H_
+#define _BLOCKING_OP_H_
+
 #include "bh_platform.h"
 #include "wasm_export.h"
 
@@ -57,3 +60,5 @@ __wasi_errno_t
 blocking_op_poll(wasm_exec_env_t exec_env, struct pollfd *pfds, nfds_t nfds,
                  int timeout, int *retp);
 #endif
+
+#endif /* end of _BLOCKING_OP_H_ */

+ 9 - 2
core/iwasm/libraries/thread-mgr/thread_manager.c

@@ -556,6 +556,7 @@ wasm_cluster_spawn_exec_env(WASMExecEnv *exec_env)
                                      aux_stack_size)) {
         goto fail3;
     }
+    new_exec_env->is_aux_stack_allocated = true;
 
     /* Inherit suspend_flags of parent thread */
     new_exec_env->suspend_flags.flags =
@@ -601,7 +602,9 @@ wasm_cluster_destroy_spawned_exec_env(WASMExecEnv *exec_env)
         exec_env_tls = exec_env;
     }
 
-    /* Free aux stack space */
+    /* Free aux stack space which was allocated in
+       wasm_cluster_spawn_exec_env */
+    bh_assert(exec_env_tls->is_aux_stack_allocated);
     wasm_cluster_free_aux_stack(exec_env_tls,
                                 exec_env->aux_stack_bottom.bottom);
 
@@ -653,7 +656,9 @@ thread_manager_start_routine(void *arg)
 #endif
 
     /* Free aux stack space */
-    wasm_cluster_free_aux_stack(exec_env, exec_env->aux_stack_bottom.bottom);
+    if (exec_env->is_aux_stack_allocated)
+        wasm_cluster_free_aux_stack(exec_env,
+                                    exec_env->aux_stack_bottom.bottom);
 
     os_mutex_lock(&cluster_list_lock);
 
@@ -721,11 +726,13 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
                                          aux_stack_size)) {
             goto fail2;
         }
+        new_exec_env->is_aux_stack_allocated = true;
     }
     else {
         /* Disable aux stack */
         new_exec_env->aux_stack_boundary.boundary = 0;
         new_exec_env->aux_stack_bottom.bottom = UINT32_MAX;
+        new_exec_env->is_aux_stack_allocated = false;
     }
 
     /* Inherit suspend_flags of parent thread */

+ 6 - 1
core/shared/platform/common/posix/posix_file.c

@@ -920,7 +920,12 @@ os_readdir(os_dir_stream dir_stream, __wasi_dirent_t *entry,
 
     if (dent == NULL) {
         *d_name = NULL;
-        return convert_errno(errno);
+        if (errno != 0) {
+            return convert_errno(errno);
+        }
+        else {
+            return 0;
+        }
     }
 
     long offset = (__wasi_dircookie_t)telldir(dir_stream);

+ 5 - 4
core/shared/platform/linux-sgx/sgx_platform.c

@@ -154,8 +154,8 @@ os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
 
     ret = sgx_alloc_rsrv_mem(aligned_size);
     if (ret == NULL) {
-        os_printf("os_mmap(size=%u, aligned size=%lu, prot=0x%x) failed.", size,
-                  aligned_size, prot);
+        os_printf("os_mmap(size=%u, aligned size=%lu, prot=0x%x) failed.\n",
+                  size, aligned_size, prot);
         return NULL;
     }
 
@@ -168,7 +168,7 @@ os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
 
     st = sgx_tprotect_rsrv_mem(ret, aligned_size, mprot);
     if (st != SGX_SUCCESS) {
-        os_printf("os_mmap(size=%u, prot=0x%x) failed to set protect.", size,
+        os_printf("os_mmap(size=%u, prot=0x%x) failed to set protect.\n", size,
                   prot);
         sgx_free_rsrv_mem(ret, aligned_size);
         return NULL;
@@ -205,7 +205,8 @@ os_mprotect(void *addr, size_t size, int prot)
         mprot |= SGX_PROT_EXEC;
     st = sgx_tprotect_rsrv_mem(addr, aligned_size, mprot);
     if (st != SGX_SUCCESS)
-        os_printf("os_mprotect(addr=0x%" PRIx64 ", size=%u, prot=0x%x) failed.",
+        os_printf("os_mprotect(addr=0x%" PRIx64
+                  ", size=%u, prot=0x%x) failed.\n",
                   (uintptr_t)addr, size, prot);
 
     return (st == SGX_SUCCESS ? 0 : -1);

+ 1 - 1
core/shared/utils/runtime_timer.c

@@ -394,7 +394,7 @@ handle_expired_timers(timer_ctx_t ctx, app_timer_t *expired)
            operation may change expired->next */
         expired = expired->next;
         if (t->is_periodic) {
-            /* if it is repeating, then reschedule it; */
+            /* if it is repeating, then reschedule it */
             reschedule_timer(ctx, t);
         }
         else {

+ 3 - 1
doc/perf_tune.md

@@ -28,7 +28,7 @@ emcc -msimd128 -O3 -o <wasm_file> <c/c++ source files>
 - Reduce the footprint of JIT/AOT, the JIT/AOT code generated is smaller
 - Reduce the compilation time of JIT/AOT
 
-Currently it is supported on linux x86-64, developer can use `--enable-segue=[<flags>]` for wamrc:
+Currently it is only supported on linux x86-64, developer can use `--enable-segue=[<flags>]` for wamrc:
 
 ```bash
 wamrc --enable-segue -o aot_file wasm_file
@@ -50,6 +50,8 @@ iwasm --enable-segue wasm_file      (iwasm is built with llvm-jit enabled)
 iwasm --enable-segue=[<flags>] wasm_file
 ```
 
+> Note: Currently it is only supported on linux x86-64.
+
 ## 5. Use the AOT static PGO method
 
 LLVM PGO (Profile-Guided Optimization) allows the compiler to better optimize code for how it actually runs. WAMR supports AOT static PGO, currently it is tested on Linux x86-64 and x86-32. The basic steps are:

+ 2 - 1
product-mini/platforms/posix/main.c

@@ -833,7 +833,8 @@ main(int argc, char *argv[])
 #if WASM_ENABLE_DEBUG_INTERP != 0
     init_args.instance_port = instance_port;
     if (ip_addr)
-        strcpy(init_args.ip_addr, ip_addr);
+        /* ensure that init_args.ip_addr is null terminated */
+        strncpy(init_args.ip_addr, ip_addr, sizeof(init_args.ip_addr) - 1);
 #endif
 
     /* initialize runtime environment */

+ 3 - 1
product-mini/platforms/windows/main.c

@@ -464,7 +464,9 @@ main(int argc, char *argv[])
 #if WASM_ENABLE_DEBUG_INTERP != 0
     init_args.instance_port = instance_port;
     if (ip_addr)
-        strcpy(init_args.ip_addr, ip_addr);
+        /* ensure that init_args.ip_addr is null terminated */
+        strncpy_s(init_args.ip_addr, sizeof(init_args.ip_addr) - 1, ip_addr,
+                  strlen(ip_addr));
 #endif
 
     /* initialize runtime environment */

+ 1 - 1
wamr-compiler/main.c

@@ -174,7 +174,7 @@ print_help()
     printf("                            Enable the specified LLVM passes, using comma to separate\n");
     printf("  --use-prof-file=<file>    Use profile file collected by LLVM PGO (Profile-Guided Optimization)\n");
     printf("  --enable-segue[=<flags>]  Enable using segment register GS as the base address of linear memory,\n");
-    printf("                            only available on linux/linux-sgx x86-64, which may improve performance,\n");
+    printf("                            only available on linux x86-64, which may improve performance,\n");
     printf("                            flags can be: i32.load, i64.load, f32.load, f64.load, v128.load,\n");
     printf("                                          i32.store, i64.store, f32.store, f64.store, v128.store\n");
     printf("                            Use comma to separate, e.g. --enable-segue=i32.load,i64.store\n");