Просмотр исходного кода

Merge pull request #3004 from bytecodealliance/main

Merge branch main into gitbook
Wenyong Huang 2 лет назад
Родитель
Сommit
0fcda32d36
70 измененных файлов с 3293 добавлено и 781 удалено
  1. 70 0
      RELEASE_NOTES.md
  2. 29 1
      build-scripts/config_common.cmake
  3. 12 0
      core/config.h
  4. 112 1
      core/iwasm/aot/aot_loader.c
  5. 1 0
      core/iwasm/aot/aot_reloc.h
  6. 45 36
      core/iwasm/aot/aot_runtime.c
  7. 0 1
      core/iwasm/aot/debug/jit_debug.c
  8. 94 94
      core/iwasm/common/wasm_c_api.c
  9. 5 0
      core/iwasm/common/wasm_c_api_internal.h
  10. 966 113
      core/iwasm/common/wasm_native.c
  11. 5 0
      core/iwasm/common/wasm_native.h
  12. 100 13
      core/iwasm/common/wasm_runtime_common.c
  13. 21 3
      core/iwasm/common/wasm_runtime_common.h
  14. 44 26
      core/iwasm/compilation/aot_emit_aot_file.c
  15. 246 12
      core/iwasm/compilation/aot_emit_function.c
  16. 12 0
      core/iwasm/compilation/aot_emit_table.c
  17. 4 1
      core/iwasm/compilation/aot_emit_table.h
  18. 12 5
      core/iwasm/compilation/aot_llvm.c
  19. 7 3
      core/iwasm/compilation/aot_llvm.h
  20. 9 1
      core/iwasm/compilation/aot_llvm_extra.cpp
  21. 19 0
      core/iwasm/compilation/aot_llvm_extra2.cpp
  22. 27 0
      core/iwasm/compilation/aot_orc_extra.cpp
  23. 3 3
      core/iwasm/compilation/debug/dwarf_extractor.cpp
  24. 8 4
      core/iwasm/compilation/simd/simd_conversions.c
  25. 11 6
      core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp
  26. 5 1
      core/iwasm/fast-jit/fe/jit_emit_numberic.c
  27. 0 1
      core/iwasm/fast-jit/jit_compiler.h
  28. 1 1
      core/iwasm/include/aot_export.h
  29. 15 2
      core/iwasm/include/wasm_c_api.h
  30. 19 5
      core/iwasm/include/wasm_export.h
  31. 4 0
      core/iwasm/interpreter/wasm.h
  32. 91 48
      core/iwasm/interpreter/wasm_interp_classic.c
  33. 0 4
      core/iwasm/interpreter/wasm_interp_fast.c
  34. 157 53
      core/iwasm/interpreter/wasm_loader.c
  35. 149 52
      core/iwasm/interpreter/wasm_mini_loader.c
  36. 23 12
      core/iwasm/interpreter/wasm_runtime.c
  37. 1 1
      core/iwasm/interpreter/wasm_runtime.h
  38. 18 3
      core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c
  39. 1 1
      core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c
  40. 1 1
      core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c
  41. 3 2
      core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h
  42. 63 17
      core/iwasm/libraries/thread-mgr/thread_manager.c
  43. 17 1
      core/iwasm/libraries/thread-mgr/thread_manager.h
  44. 46 1
      core/shared/platform/common/freertos/freertos_thread.c
  45. 18 5
      core/shared/platform/common/posix/posix_memmap.c
  46. 2 1
      core/shared/platform/common/posix/posix_thread.c
  47. 3 0
      core/shared/platform/esp-idf/shared_platform.cmake
  48. 12 0
      core/shared/platform/linux-sgx/sgx_platform.c
  49. 2 1
      core/shared/platform/linux-sgx/sgx_socket.c
  50. 2 0
      core/shared/utils/bh_log.c
  51. 1 1
      core/version.h
  52. 3 3
      doc/build_wamr.md
  53. 14 0
      doc/build_wasm_app.md
  54. 14 1
      doc/perf_tune.md
  55. 2 206
      doc/source_debugging.md
  56. 100 0
      doc/source_debugging_aot.md
  57. 115 0
      doc/source_debugging_interpreter.md
  58. 12 0
      product-mini/platforms/linux-sgx/CMakeLists.txt
  59. 2 2
      product-mini/platforms/linux-sgx/enclave-sample/Enclave/Enclave.cpp
  60. 11 2
      product-mini/platforms/linux-sgx/enclave-sample/Makefile
  61. 1 1
      product-mini/platforms/linux-sgx/enclave-sample/Makefile_minimal
  62. 6 0
      product-mini/platforms/nuttx/wamr.mk
  63. 15 7
      product-mini/platforms/posix/main.c
  64. 1 1
      product-mini/platforms/windows/main.c
  65. 1 0
      samples/sgx-ra/CMakeLists.txt
  66. 1 1
      samples/wasm-c-api-imports/wasm/CMakeLists.txt
  67. 161 0
      test-tools/append-aot-to-wasm/append_aot_to_wasm.py
  68. 210 0
      test-tools/trans-jitted-func-name/trans_wasm_func_name.py
  69. 85 16
      wamr-compiler/CMakeLists.txt
  70. 23 4
      wamr-compiler/main.c

+ 70 - 0
RELEASE_NOTES.md

@@ -1,3 +1,73 @@
+## WAMR-1.3.1
+
+### Breaking Changes
+- In multi-threading, when an exception was thrown in wasm_func_call(),
+  the trap returned contains the stack frames of the thread where the
+  exception occurs, but not the stack frames of the main thread.
+- Disable emitting custom name section to AOT file with
+  `wamrc --enable-dump-call-stack` option, instead, use
+  `wamrc --emit-custom-sections=name` to emit it and make it clear.
+
+### New Features
+- Enable AOT linux perf support (#2930)
+
+### Bug Fixes
+- Corrects Zephyr include files for current versions of Zephyr (#2881)
+- Fix possible dead lock in wasm_cluster_spawn_exec_env (#2882)
+- Handle ambiguous fstflags on fd_filestat_set_times (#2892)
+- Fix memory size not updating after growing in interpreter (#2898)
+- fixed(freertos): Fix crash when wasm app call pthread_exit(NULL) (#2970)
+- fast-jit: Fix const shift and const i64 compare issues (#2969)
+- Fix ref.is_null processing in fast-interp loader (#2971)
+- simd-128: The input lanes of integer-to-integer narrowing ops should be interpreted as signed (#2850)
+- Fix ref.func function declared check in wasm loader (#2972)
+- Fix fast-interp polymorphic stack processing (#2974)
+- Fix potential recursive lock in pthread_create_wrapper (#2980)
+- Fix build failure on esp-idf platform (#2991)
+- Return stack frames of crashed thread when using wasm-c-api (#2908)
+- Fix compilation error on iOS due to macOS-specific API (#2995)
+- Fix a bug when emit the custom name section to aot file (#2987)
+- Fix linux-sgx build error when libc-wasi is disabled (#2997)
+
+### Enhancements
+- fix command-reactor: Look for _initialize only if _start not found (#2891)
+- Refactor reloc symbols for riscv (#2894)
+- Avoid memory import failure when wasi-threads is enabled (#2893)
+- interpreter: Simplify memory.grow a bit (#2899)
+- Avoid reporting timestamp if custom logger is used (#2905)
+- Expose API to set log level in embedder (#2907)
+- Add a script to translate jitted function names in flamegraph (#2906)
+- Refine wasm-c-api wasm_func_call (#2922)
+- Add VectorCombine pass for JIT and AOT (#2923)
+- Enable wasm_runtime_terminate for single-threading (#2924)
+- nuttx: Add CONFIG_INTERPRETERS_WAMR_DEBUG_AOT (#2929)
+- Allow to control built-in libraries for wamrc from command line options (#2928)
+- Fix a bug that appends '_precheck' to aot_func (#2936)
+- freertos: Add os_cond_broadcast for pthread wrapper (#2937)
+- Append .aot to .wasm as a custom section named "aot" (#2933)
+- fix(sgx-ra): Fix building when enclave is built without librats ahead (#2968)
+- Refine LLVM JIT function call process (#2925)
+- Refine AOT function call process (#2940)
+- Allow to set segue flags for wasm-c-api JIT (#2926)
+- freertos: Minor changes for freertos libc_wasi build adaption (#2973)
+- freertos: Change ssp_config.h due to clock_nanosleep() not supported in freertos (#2979)
+- aot compiler: Some updates for LLVM 18 (#2981)
+- Enable MAP_32BIT for macOS (#2992)
+- Register quick call entries to speedup the aot/jit func call process (#2978)
+- Refine AOT/JIT code call wasm-c-api import process (#2982)
+
+### Others
+- compilation_on_nuttx.yml: Use docker image to simplify env setup (#2878)
+- samples/spawn-thread: Disable libc and pthread (#2883)
+- Add arm64 to nuttx compilation test (#2886)
+- samples/spawn-thread: Tweak to expose a bug (#2888)
+- Fix typo in CI config and suppress STORE_U8 in TSAN (#2802)
+- Using docker image for nuttx spectest (#2887)
+- doc: Separate source_debugging.md into two files (#2932)
+- doc/build_wasm_app.md: Add a note about aot abi compatibility (#2993)
+
+---
+
 ## WAMR-1.3.0
 
 ### Breaking Changes

+ 29 - 1
build-scripts/config_common.cmake

@@ -147,13 +147,20 @@ elseif (WAMR_BUILD_SANITIZER STREQUAL "asan")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" )
     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
   endif()
-elseif (WAMR_BUILD_SANITIZER STREQUAL "tsan") 
+elseif (WAMR_BUILD_SANITIZER STREQUAL "tsan")
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" )
   set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread")
 elseif (NOT (WAMR_BUILD_SANITIZER STREQUAL "") )
   message(SEND_ERROR "Unsupported sanitizer: ${WAMR_BUILD_SANITIZER}")
 endif()
 
+if (WAMR_BUILD_LINUX_PERF EQUAL 1)
+  if (NOT WAMR_BUILD_JIT AND NOT WAMR_BUILD_AOT)
+    message(WARNING "only support perf in aot and llvm-jit")
+    set(WAMR_BUILD_LINUX_PERF 0)
+  endif ()
+endif ()
+
 ########################################
 
 message ("-- Build Configurations:")
@@ -440,3 +447,24 @@ if (WAMR_CONFIGUABLE_BOUNDS_CHECKS EQUAL 1)
   add_definitions (-DWASM_CONFIGURABLE_BOUNDS_CHECKS=1)
   message ("     Configurable bounds checks enabled")
 endif ()
+if (WAMR_BUILD_LINUX_PERF EQUAL 1)
+  add_definitions (-DWASM_ENABLE_LINUX_PERF=1)
+  message ("     Linux perf support enabled")
+endif ()
+if (NOT DEFINED WAMR_BUILD_QUICK_AOT_ENTRY)
+  # Enable quick aot/jit entries by default
+  set (WAMR_BUILD_QUICK_AOT_ENTRY 1)
+endif ()
+if (WAMR_BUILD_QUICK_AOT_ENTRY EQUAL 1)
+  add_definitions (-DWASM_ENABLE_QUICK_AOT_ENTRY=1)
+  message ("     Quick AOT/JIT entries enabled")
+else ()
+  add_definitions (-DWASM_ENABLE_QUICK_AOT_ENTRY=0)
+  message ("     Quick AOT/JIT entries disabled")
+endif ()
+
+if (APPLE)
+  # On recent macOS versions, by default, the size of page zero is 4GB.
+  # Shrink it to make MAP_32BIT mmap can work.
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-pagezero_size,0x4000")
+endif ()

+ 12 - 0
core/config.h

@@ -490,4 +490,16 @@
 #define WASM_MAX_INSTANCE_CONTEXTS 8
 #endif
 
+/* linux perf support */
+#ifndef WASM_ENABLE_LINUX_PERF
+#define WASM_ENABLE_LINUX_PERF 0
+#endif
+
+/* Support registering quick AOT/JIT function entries of some func types
+   to speedup the calling process of invoking the AOT/JIT functions of
+   these types from the host embedder */
+#ifndef WASM_ENABLE_QUICK_AOT_ENTRY
+#define WASM_ENABLE_QUICK_AOT_ENTRY 1
+#endif
+
 #endif /* end of _CONFIG_H_ */

+ 112 - 1
core/iwasm/aot/aot_loader.c

@@ -822,7 +822,9 @@ load_custom_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
         case AOT_CUSTOM_SECTION_NAME:
             if (!load_name_section(buf, buf_end, module, is_load_from_file_buf,
                                    error_buf, error_buf_size))
-                goto fail;
+                LOG_VERBOSE("Load name section failed.");
+            else
+                LOG_VERBOSE("Load name section success.");
             break;
 #if WASM_ENABLE_LOAD_CUSTOM_SECTION != 0
         case AOT_CUSTOM_SECTION_RAW:
@@ -1202,6 +1204,11 @@ load_func_types(const uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
 
         func_types[i]->param_cell_num = (uint16)param_cell_num;
         func_types[i]->ret_cell_num = (uint16)ret_cell_num;
+
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+        func_types[i]->quick_aot_entry =
+            wasm_native_lookup_quick_aot_entry(func_types[i]);
+#endif
     }
 
     *p_buf = buf;
@@ -2764,6 +2771,104 @@ fail:
     return ret;
 }
 
+#if WASM_ENABLE_LINUX_PERF != 0
+struct func_info {
+    uint32 idx;
+    void *ptr;
+};
+
+static uint32
+get_func_size(const AOTModule *module, struct func_info *sorted_func_ptrs,
+              uint32 idx)
+{
+    uint32 func_sz;
+
+    if (idx == module->func_count - 1)
+        func_sz = (uintptr_t)module->code + module->code_size
+                  - (uintptr_t)(sorted_func_ptrs[idx].ptr);
+    else
+        func_sz = (uintptr_t)(sorted_func_ptrs[idx + 1].ptr)
+                  - (uintptr_t)(sorted_func_ptrs[idx].ptr);
+
+    return func_sz;
+}
+
+static int
+compare_func_ptrs(const void *f1, const void *f2)
+{
+    return (intptr_t)((struct func_info *)f1)->ptr
+           - (intptr_t)((struct func_info *)f2)->ptr;
+}
+
+static struct func_info *
+sort_func_ptrs(const AOTModule *module, char *error_buf, uint32 error_buf_size)
+{
+    uint64 content_len;
+    struct func_info *sorted_func_ptrs;
+    unsigned i;
+
+    content_len = (uint64)sizeof(struct func_info) * module->func_count;
+    sorted_func_ptrs = loader_malloc(content_len, error_buf, error_buf_size);
+    if (!sorted_func_ptrs)
+        return NULL;
+
+    for (i = 0; i < module->func_count; i++) {
+        sorted_func_ptrs[i].idx = i;
+        sorted_func_ptrs[i].ptr = module->func_ptrs[i];
+    }
+
+    qsort(sorted_func_ptrs, module->func_count, sizeof(struct func_info),
+          compare_func_ptrs);
+
+    return sorted_func_ptrs;
+}
+
+static bool
+create_perf_map(const AOTModule *module, char *error_buf, uint32 error_buf_size)
+{
+    struct func_info *sorted_func_ptrs = NULL;
+    char perf_map_info[128] = { 0 };
+    FILE *perf_map = NULL;
+    uint32 i;
+    pid_t pid = getpid();
+    bool ret = false;
+
+    sorted_func_ptrs = sort_func_ptrs(module, error_buf, error_buf_size);
+    if (!sorted_func_ptrs)
+        goto quit;
+
+    snprintf(perf_map_info, 128, "/tmp/perf-%d.map", pid);
+    perf_map = fopen(perf_map_info, "w");
+    if (!perf_map) {
+        LOG_WARNING("warning: can't create /tmp/perf-%d.map, because %s", pid,
+                    strerror(errno));
+        goto quit;
+    }
+
+    for (i = 0; i < module->func_count; i++) {
+        memset(perf_map_info, 0, 128);
+        snprintf(perf_map_info, 128, "%lx  %x  aot_func#%u\n",
+                 (uintptr_t)sorted_func_ptrs[i].ptr,
+                 get_func_size(module, sorted_func_ptrs, i),
+                 sorted_func_ptrs[i].idx);
+
+        fwrite(perf_map_info, 1, strlen(perf_map_info), perf_map);
+    }
+
+    LOG_VERBOSE("generate /tmp/perf-%d.map", pid);
+    ret = true;
+
+quit:
+    if (sorted_func_ptrs)
+        free(sorted_func_ptrs);
+
+    if (perf_map)
+        fclose(perf_map);
+
+    return ret;
+}
+#endif /* WASM_ENABLE_LINUX_PERF != 0*/
+
 static bool
 load_from_sections(AOTModule *module, AOTSection *sections,
                    bool is_load_from_file_buf, char *error_buf,
@@ -3224,6 +3329,12 @@ load(const uint8 *buf, uint32 size, AOTModule *module, char *error_buf,
     }
 #endif
 
+#if WASM_ENABLE_LINUX_PERF != 0
+    if (wasm_runtime_get_linux_perf())
+        if (!create_perf_map(module, error_buf, error_buf_size))
+            goto fail;
+#endif
+
     return ret;
 fail:
     return false;

+ 1 - 0
core/iwasm/aot/aot_reloc.h

@@ -136,6 +136,7 @@ typedef struct {
     REG_SYM(aot_enlarge_memory),          \
     REG_SYM(aot_set_exception),           \
     REG_SYM(aot_check_app_addr_and_convert),\
+    REG_SYM(wasm_runtime_quick_invoke_c_api_native),\
     { "memset", (void*)aot_memset },      \
     { "memmove", (void*)aot_memmove },    \
     { "memcpy", (void*)aot_memmove },     \

+ 45 - 36
core/iwasm/aot/aot_runtime.c

@@ -47,6 +47,13 @@ bh_static_assert(sizeof(AOTMemoryInstance) == 104);
 bh_static_assert(offsetof(AOTTableInstance, elems) == 8);
 
 bh_static_assert(offsetof(AOTModuleInstanceExtra, stack_sizes) == 0);
+bh_static_assert(offsetof(AOTModuleInstanceExtra, common.c_api_func_imports)
+                 == sizeof(uint64));
+
+bh_static_assert(sizeof(CApiFuncImport) == sizeof(uintptr_t) * 3);
+
+bh_static_assert(sizeof(wasm_val_t) == 16);
+bh_static_assert(offsetof(wasm_val_t, of) == 8);
 
 static void
 set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
@@ -1374,7 +1381,6 @@ aot_lookup_function(const AOTModuleInstance *module_inst, const char *name,
 }
 
 #ifdef OS_ENABLE_HW_BOUND_CHECK
-
 static bool
 invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr,
                                   const WASMType *func_type,
@@ -1386,9 +1392,6 @@ invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr,
     WASMJmpBuf jmpbuf_node = { 0 }, *jmpbuf_node_pop;
     uint32 page_size = os_getpagesize();
     uint32 guard_page_count = STACK_OVERFLOW_CHECK_GUARD_PAGE_COUNT;
-    uint16 param_count = func_type->param_count;
-    uint16 result_count = func_type->result_count;
-    const uint8 *types = func_type->types;
 #ifdef BH_PLATFORM_WINDOWS
     int result;
     bool has_exception;
@@ -1406,42 +1409,43 @@ invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr,
         return false;
     }
 
-    if (exec_env_tls && (exec_env_tls != exec_env)) {
-        aot_set_exception(module_inst, "invalid exec env");
-        return false;
-    }
+    if (!exec_env_tls) {
+        if (!os_thread_signal_inited()) {
+            aot_set_exception(module_inst, "thread signal env not inited");
+            return false;
+        }
 
-    if (!os_thread_signal_inited()) {
-        aot_set_exception(module_inst, "thread signal env not inited");
-        return false;
+        /* Set thread handle and stack boundary if they haven't been set */
+        wasm_exec_env_set_thread_info(exec_env);
+
+        wasm_runtime_set_exec_env_tls(exec_env);
+    }
+    else {
+        if (exec_env_tls != exec_env) {
+            aot_set_exception(module_inst, "invalid exec env");
+            return false;
+        }
     }
 
     wasm_exec_env_push_jmpbuf(exec_env, &jmpbuf_node);
 
-    wasm_runtime_set_exec_env_tls(exec_env);
     if (os_setjmp(jmpbuf_node.jmpbuf) == 0) {
-        /* Quick call with func_ptr if the function signature is simple */
-        if (!signature && param_count == 1 && types[0] == VALUE_TYPE_I32) {
-            if (result_count == 0) {
-                void (*NativeFunc)(WASMExecEnv *, uint32) =
-                    (void (*)(WASMExecEnv *, uint32))func_ptr;
-                NativeFunc(exec_env, argv[0]);
-                ret = aot_copy_exception(module_inst, NULL) ? false : true;
-            }
-            else if (result_count == 1
-                     && types[param_count] == VALUE_TYPE_I32) {
-                uint32 (*NativeFunc)(WASMExecEnv *, uint32) =
-                    (uint32(*)(WASMExecEnv *, uint32))func_ptr;
-                argv_ret[0] = NativeFunc(exec_env, argv[0]);
-                ret = aot_copy_exception(module_inst, NULL) ? false : true;
-            }
-            else {
-                ret = wasm_runtime_invoke_native(exec_env, func_ptr, func_type,
-                                                 signature, attachment, argv,
-                                                 argc, argv_ret);
-            }
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+        /* Quick call if the quick aot entry is registered */
+        if (!signature && func_type->quick_aot_entry) {
+            void (*invoke_native)(
+                void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+                uint32 *argv_ret) = func_type->quick_aot_entry;
+            invoke_native(func_ptr,
+                          func_type->result_count > 0
+                              ? func_type->types[func_type->param_count]
+                              : VALUE_TYPE_VOID,
+                          exec_env, argv, argv_ret);
+            ret = !aot_copy_exception(module_inst, NULL);
         }
-        else {
+        else
+#endif
+        {
             ret = wasm_runtime_invoke_native(exec_env, func_ptr, func_type,
                                              signature, attachment, argv, argc,
                                              argv_ret);
@@ -1473,7 +1477,6 @@ invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr,
     (void)jmpbuf_node_pop;
     return ret;
 }
-
 #define invoke_native_internal invoke_native_with_hw_bound_check
 #else /* else of OS_ENABLE_HW_BOUND_CHECK */
 #define invoke_native_internal wasm_runtime_invoke_native
@@ -1543,10 +1546,16 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
 
     /* func pointer was looked up previously */
     bh_assert(func_ptr != NULL);
-    /* set thread handle and stack boundary */
+
+#ifndef OS_ENABLE_HW_BOUND_CHECK
+    /* Set thread handle and stack boundary */
     wasm_exec_env_set_thread_info(exec_env);
+#else
+    /* Set thread info in invoke_native_with_hw_bound_check when
+       hw bound check is enabled */
+#endif
 
-    /* set exec env so it can be later retrieved from instance */
+    /* Set exec env so it can be later retrieved from instance */
     ((AOTModuleInstanceExtra *)module_inst->e)->common.cur_exec_env = exec_env;
 
     if (ext_ret_count > 0) {

+ 0 - 1
core/iwasm/aot/debug/jit_debug.c

@@ -23,7 +23,6 @@
 
 #include <stdio.h>
 #include <assert.h>
-#include <fcntl.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>

+ 94 - 94
core/iwasm/common/wasm_c_api.c

@@ -299,6 +299,7 @@ wasm_config_new(void)
 
     memset(config, 0, sizeof(wasm_config_t));
     config->mem_alloc_type = Alloc_With_System_Allocator;
+
     return config;
 }
 
@@ -330,7 +331,17 @@ wasm_config_set_linux_perf_opt(wasm_config_t *config, bool enable)
     if (!config)
         return NULL;
 
-    config->linux_perf_support = enable;
+    config->enable_linux_perf = enable;
+    return config;
+}
+
+wasm_config_t *
+wasm_config_set_segue_flags(wasm_config_t *config, uint32 segue_flags)
+{
+    if (!config)
+        return NULL;
+
+    config->segue_flags = segue_flags;
     return config;
 }
 
@@ -367,6 +378,9 @@ wasm_engine_new_internal(wasm_config_t *config)
     wasm_engine_t *engine = NULL;
     /* init runtime */
     RuntimeInitArgs init_args = { 0 };
+#if WASM_ENABLE_JIT != 0
+    LLVMJITOptions *jit_options = wasm_runtime_get_llvm_jit_options();
+#endif
 
 #ifndef NDEBUG
     bh_log_set_verbose_level(BH_LOG_LEVEL_VERBOSE);
@@ -380,7 +394,12 @@ wasm_engine_new_internal(wasm_config_t *config)
     init_args.mem_alloc_type = config->mem_alloc_type;
     memcpy(&init_args.mem_alloc_option, &config->mem_alloc_option,
            sizeof(MemAllocOption));
-    init_args.linux_perf_support = config->linux_perf_support;
+    init_args.enable_linux_perf = config->enable_linux_perf;
+    init_args.segue_flags = config->segue_flags;
+
+#if WASM_ENABLE_JIT != 0
+    jit_options->quick_invoke_c_api_import = true;
+#endif
 
     if (!wasm_runtime_full_init(&init_args)) {
         LOG_DEBUG("wasm_runtime_full_init failed");
@@ -1902,10 +1921,26 @@ wasm_frame_func_offset(const wasm_frame_t *frame)
     return frame ? frame->func_offset : 0;
 }
 
+void
+wasm_frame_vec_clone_internal(Vector *src, Vector *out)
+{
+    bh_assert(src->num_elems != 0 && src->data);
+
+    bh_vector_destroy(out);
+    if (!bh_vector_init(out, src->num_elems, sizeof(WASMCApiFrame), false)) {
+        bh_vector_destroy(out);
+        return;
+    }
+
+    bh_memcpy_s(out->data, src->num_elems * sizeof(WASMCApiFrame), src->data,
+                src->num_elems * sizeof(WASMCApiFrame));
+    out->num_elems = src->num_elems;
+}
+
 static wasm_trap_t *
 wasm_trap_new_internal(wasm_store_t *store,
                        WASMModuleInstanceCommon *inst_comm_rt,
-                       const char *error_info)
+                       const char *error_info, Vector *cluster_frames)
 {
     wasm_trap_t *trap;
 #if WASM_ENABLE_DUMP_CALL_STACK != 0
@@ -1935,7 +1970,9 @@ wasm_trap_new_internal(wasm_store_t *store,
 
     /* fill in frames */
 #if WASM_ENABLE_DUMP_CALL_STACK != 0
-    trap->frames = ((WASMModuleInstance *)inst_comm_rt)->frames;
+    trap->frames = cluster_frames
+                       ? cluster_frames
+                       : ((WASMModuleInstance *)inst_comm_rt)->frames;
 
     if (trap->frames) {
         /* fill in instances */
@@ -2046,10 +2083,7 @@ wasm_trap_trace(const wasm_trap_t *trap, own wasm_frame_vec_t *out)
     }
 
     for (i = 0; i < trap->frames->num_elems; i++) {
-        wasm_frame_t *frame;
-
-        frame = ((wasm_frame_t *)trap->frames->data) + i;
-
+        wasm_frame_t *frame = ((wasm_frame_t *)trap->frames->data) + i;
         if (!(out->data[i] =
                   wasm_frame_new(frame->instance, frame->module_offset,
                                  frame->func_index, frame->func_offset))) {
@@ -2926,6 +2960,8 @@ wasm_func_new_basic(wasm_store_t *store, const wasm_functype_t *type,
     if (!(func->type = wasm_functype_copy(type))) {
         goto failed;
     }
+    func->param_count = func->type->params->num_elems;
+    func->result_count = func->type->results->num_elems;
 
     RETURN_OBJ(func, wasm_func_delete)
 }
@@ -2956,6 +2992,8 @@ wasm_func_new_with_env_basic(wasm_store_t *store, const wasm_functype_t *type,
     if (!(func->type = wasm_functype_copy(type))) {
         goto failed;
     }
+    func->param_count = func->type->params->num_elems;
+    func->result_count = func->type->results->num_elems;
 
     RETURN_OBJ(func, wasm_func_delete)
 }
@@ -3045,6 +3083,8 @@ wasm_func_new_internal(wasm_store_t *store, uint16 func_idx_rt,
     if (!func->type) {
         goto failed;
     }
+    func->param_count = func->type->params->num_elems;
+    func->result_count = func->type->results->num_elems;
 
     /* will add name information when processing "exports" */
     func->store = store;
@@ -3136,48 +3176,31 @@ params_to_argv(const wasm_val_vec_t *params,
                const wasm_valtype_vec_t *param_defs, uint32 *argv,
                uint32 *ptr_argc)
 {
-    size_t i = 0;
-
-    if (!param_defs->num_elems) {
-        return true;
-    }
+    uint32 *argv_org = argv;
+    const wasm_val_t *param, *param_end;
 
-    if (!params || !params->num_elems || !params->size || !params->data) {
-        LOG_ERROR("the parameter params is invalid");
-        return false;
-    }
+    bh_assert(params && params->num_elems && params->size && params->data);
 
-    *ptr_argc = 0;
-    for (i = 0; i < param_defs->num_elems; ++i) {
-        const wasm_val_t *param = params->data + i;
-        bh_assert((*(param_defs->data + i))->kind == param->kind);
+    param = params->data;
+    param_end = param + param_defs->num_elems;
 
+    for (; param < param_end; param++) {
         switch (param->kind) {
             case WASM_I32:
+            case WASM_F32:
                 *(int32 *)argv = param->of.i32;
                 argv += 1;
-                *ptr_argc += 1;
                 break;
             case WASM_I64:
+            case WASM_F64:
                 *(int64 *)argv = param->of.i64;
                 argv += 2;
-                *ptr_argc += 2;
                 break;
-            case WASM_F32:
-                *(float32 *)argv = param->of.f32;
-                argv += 1;
-                *ptr_argc += 1;
-                break;
-            case WASM_F64:
-                *(float64 *)argv = param->of.f64;
-                argv += 2;
-                *ptr_argc += 2;
                 break;
 #if WASM_ENABLE_REF_TYPES != 0
             case WASM_ANYREF:
                 *(uintptr_t *)argv = (uintptr_t)param->of.ref;
                 argv += sizeof(uintptr_t) / sizeof(uint32);
-                *ptr_argc += 1;
                 break;
 #endif
             default:
@@ -3186,6 +3209,7 @@ params_to_argv(const wasm_val_vec_t *params,
         }
     }
 
+    *ptr_argc = (uint32)(argv - argv_org);
     return true;
 }
 
@@ -3193,62 +3217,37 @@ static bool
 argv_to_results(const uint32 *argv, const wasm_valtype_vec_t *result_defs,
                 wasm_val_vec_t *results)
 {
-    size_t i = 0, argv_i = 0;
+    wasm_valtype_t **result_def, **result_def_end;
     wasm_val_t *result;
 
-    if (!result_defs->num_elems) {
-        return true;
-    }
+    bh_assert(results && results->size && results->data);
 
-    if (!results || !results->size || !results->data) {
-        LOG_ERROR("the parameter results is invalid");
-        return false;
-    }
+    result_def = result_defs->data;
+    result_def_end = result_def + result_defs->num_elems;
+    result = results->data;
 
-    for (i = 0, result = results->data, argv_i = 0; i < result_defs->num_elems;
-         i++, result++) {
-        switch (result_defs->data[i]->kind) {
+    for (; result_def < result_def_end; result_def++, result++) {
+        result->kind = result_def[0]->kind;
+        switch (result->kind) {
             case WASM_I32:
-            {
-                result->kind = WASM_I32;
-                result->of.i32 = *(int32 *)(argv + argv_i);
-                argv_i += 1;
-                break;
-            }
-            case WASM_I64:
-            {
-                result->kind = WASM_I64;
-                result->of.i64 = *(int64 *)(argv + argv_i);
-                argv_i += 2;
-                break;
-            }
             case WASM_F32:
-            {
-                result->kind = WASM_F32;
-                result->of.f32 = *(float32 *)(argv + argv_i);
-                argv_i += 1;
+                result->of.i32 = *(int32 *)argv;
+                argv += 1;
                 break;
-            }
+            case WASM_I64:
             case WASM_F64:
-            {
-                result->kind = WASM_F64;
-                result->of.f64 = *(float64 *)(argv + argv_i);
-                argv_i += 2;
+                result->of.i64 = *(int64 *)argv;
+                argv += 2;
                 break;
-            }
 #if WASM_ENABLE_REF_TYPES != 0
             case WASM_ANYREF:
-            {
-                result->kind = WASM_ANYREF;
-                result->of.ref =
-                    (struct wasm_ref_t *)(*(uintptr_t *)(argv + argv_i));
-                argv_i += sizeof(uintptr_t) / sizeof(uint32);
+                result->of.ref = (struct wasm_ref_t *)(*(uintptr_t *)argv);
+                argv += sizeof(uintptr_t) / sizeof(uint32);
                 break;
-            }
 #endif
             default:
                 LOG_WARNING("%s meets unsupported type: %d", __FUNCTION__,
-                            result_defs->data[i]->kind);
+                            result->kind);
                 return false;
         }
     }
@@ -3268,10 +3267,9 @@ wasm_func_call(const wasm_func_t *func, const wasm_val_vec_t *params,
     WASMFunctionInstanceCommon *func_comm_rt = NULL;
     WASMExecEnv *exec_env = NULL;
     size_t param_count, result_count, alloc_count;
+    Vector *cluster_frames = NULL;
 
-    if (!func) {
-        return NULL;
-    }
+    bh_assert(func && func->type);
 
     if (!func->inst_comm_rt) {
         wasm_name_t message = { 0 };
@@ -3285,17 +3283,14 @@ wasm_func_call(const wasm_func_t *func, const wasm_val_vec_t *params,
         return trap;
     }
 
-    bh_assert(func->type);
-
-#if WASM_ENABLE_INTERP != 0
     if (func->inst_comm_rt->module_type == Wasm_Module_Bytecode) {
+#if WASM_ENABLE_INTERP != 0
         func_comm_rt = ((WASMModuleInstance *)func->inst_comm_rt)->e->functions
                        + func->func_idx_rt;
-    }
 #endif
-
+    }
+    else if (func->inst_comm_rt->module_type == Wasm_Module_AoT) {
 #if WASM_ENABLE_AOT != 0
-    if (func->inst_comm_rt->module_type == Wasm_Module_AoT) {
         if (!(func_comm_rt = func->func_comm_rt)) {
             AOTModuleInstance *inst_aot =
                 (AOTModuleInstance *)func->inst_comm_rt;
@@ -3316,8 +3311,8 @@ wasm_func_call(const wasm_func_t *func, const wasm_val_vec_t *params,
                 }
             }
         }
-    }
 #endif
+    }
 
     /*
      * a wrong combination of module filetype and compilation flags
@@ -3385,27 +3380,32 @@ failed:
     if (argv != argv_buf)
         wasm_runtime_free(argv);
 
-    return wasm_trap_new_internal(
+#if WASM_ENABLE_DUMP_CALL_STACK != 0 && WASM_ENABLE_THREAD_MGR != 0
+    WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
+    cluster_frames = &cluster->exception_frames;
+    wasm_cluster_traverse_lock(exec_env);
+#endif
+
+    wasm_trap_t *trap = wasm_trap_new_internal(
         func->store, func->inst_comm_rt,
-        wasm_runtime_get_exception(func->inst_comm_rt));
+        wasm_runtime_get_exception(func->inst_comm_rt), cluster_frames);
+
+#if WASM_ENABLE_DUMP_CALL_STACK != 0 && WASM_ENABLE_THREAD_MGR != 0
+    wasm_cluster_traverse_unlock(exec_env);
+#endif
+    return trap;
 }
 
 size_t
 wasm_func_param_arity(const wasm_func_t *func)
 {
-    if (!func || !func->type || !func->type->params) {
-        return 0;
-    }
-    return func->type->params->num_elems;
+    return func->param_count;
 }
 
 size_t
 wasm_func_result_arity(const wasm_func_t *func)
 {
-    if (!func || !func->type || !func->type->results) {
-        return 0;
-    }
-    return func->type->results->num_elems;
+    return func->result_count;
 }
 
 wasm_global_t *

+ 5 - 0
core/iwasm/common/wasm_c_api_internal.h

@@ -130,6 +130,8 @@ struct wasm_func_t {
 
     struct wasm_host_info host_info;
     wasm_functype_t *type;
+    uint16 param_count;
+    uint16 result_count;
 
     bool with_env;
     union {
@@ -238,4 +240,7 @@ wasm_memory_new_internal(wasm_store_t *store, uint16 memory_idx_rt,
 wasm_table_t *
 wasm_table_new_internal(wasm_store_t *store, uint16 table_idx_rt,
                         WASMModuleInstanceCommon *inst_comm_rt);
+
+void
+wasm_frame_vec_clone_internal(Vector *src, Vector *out);
 #endif /* _WASM_C_API_INTERNAL_H */

+ 966 - 113
core/iwasm/common/wasm_native.c

@@ -16,19 +16,6 @@
 #include "../libraries/thread-mgr/thread_manager.h"
 #endif
 
-#if !defined(BH_PLATFORM_ZEPHYR) && !defined(BH_PLATFORM_ALIOS_THINGS) \
-    && !defined(BH_PLATFORM_OPENRTOS) && !defined(BH_PLATFORM_ESP_IDF)
-#define ENABLE_QUICKSORT 1
-#else
-#define ENABLE_QUICKSORT 0
-#endif
-
-#define ENABLE_SORT_DEBUG 0
-
-#if ENABLE_SORT_DEBUG != 0
-#include <sys/time.h>
-#endif
-
 static NativeSymbolsList g_native_symbols_list = NULL;
 
 #if WASM_ENABLE_LIBC_WASI != 0
@@ -171,93 +158,26 @@ check_symbol_signature(const WASMType *type, const char *signature)
     return true;
 }
 
-#if ENABLE_QUICKSORT == 0
-static void
-sort_symbol_ptr(NativeSymbol *native_symbols, uint32 n_native_symbols)
-{
-    uint32 i, j;
-    NativeSymbol temp;
-
-    for (i = 0; i < n_native_symbols - 1; i++) {
-        for (j = i + 1; j < n_native_symbols; j++) {
-            if (strcmp(native_symbols[i].symbol, native_symbols[j].symbol)
-                > 0) {
-                temp = native_symbols[i];
-                native_symbols[i] = native_symbols[j];
-                native_symbols[j] = temp;
-            }
-        }
-    }
-}
-#else
-static void
-swap_symbol(NativeSymbol *left, NativeSymbol *right)
+static int
+native_symbol_cmp(const void *native_symbol1, const void *native_symbol2)
 {
-    NativeSymbol temp = *left;
-    *left = *right;
-    *right = temp;
+    return strcmp(((const NativeSymbol *)native_symbol1)->symbol,
+                  ((const NativeSymbol *)native_symbol2)->symbol);
 }
 
-static void
-quick_sort_symbols(NativeSymbol *native_symbols, int left, int right)
-{
-    NativeSymbol base_symbol;
-    int pin_left = left;
-    int pin_right = right;
-
-    if (left >= right) {
-        return;
-    }
-
-    base_symbol = native_symbols[left];
-    while (left < right) {
-        while (left < right
-               && strcmp(native_symbols[right].symbol, base_symbol.symbol)
-                      > 0) {
-            right--;
-        }
-
-        if (left < right) {
-            swap_symbol(&native_symbols[left], &native_symbols[right]);
-            left++;
-        }
-
-        while (left < right
-               && strcmp(native_symbols[left].symbol, base_symbol.symbol) < 0) {
-            left++;
-        }
-
-        if (left < right) {
-            swap_symbol(&native_symbols[left], &native_symbols[right]);
-            right--;
-        }
-    }
-    native_symbols[left] = base_symbol;
-
-    quick_sort_symbols(native_symbols, pin_left, left - 1);
-    quick_sort_symbols(native_symbols, left + 1, pin_right);
-}
-#endif /* end of ENABLE_QUICKSORT */
-
 static void *
 lookup_symbol(NativeSymbol *native_symbols, uint32 n_native_symbols,
               const char *symbol, const char **p_signature, void **p_attachment)
 {
-    int low = 0, mid, ret;
-    int high = (int32)n_native_symbols - 1;
+    NativeSymbol *native_symbol, key = { 0 };
 
-    while (low <= high) {
-        mid = (low + high) / 2;
-        ret = strcmp(symbol, native_symbols[mid].symbol);
-        if (ret == 0) {
-            *p_signature = native_symbols[mid].signature;
-            *p_attachment = native_symbols[mid].attachment;
-            return native_symbols[mid].func_ptr;
-        }
-        else if (ret < 0)
-            high = mid - 1;
-        else
-            low = mid + 1;
+    key.symbol = symbol;
+
+    if ((native_symbol = bsearch(&key, native_symbols, n_native_symbols,
+                                 sizeof(NativeSymbol), native_symbol_cmp))) {
+        *p_signature = native_symbol->signature;
+        *p_attachment = native_symbol->attachment;
+        return native_symbol->func_ptr;
     }
 
     return NULL;
@@ -328,11 +248,6 @@ register_natives(const char *module_name, NativeSymbol *native_symbols,
                  uint32 n_native_symbols, bool call_conv_raw)
 {
     NativeSymbolsNode *node;
-#if ENABLE_SORT_DEBUG != 0
-    struct timeval start;
-    struct timeval end;
-    unsigned long timer;
-#endif
 
     if (!(node = wasm_runtime_malloc(sizeof(NativeSymbolsNode))))
         return false;
@@ -349,23 +264,9 @@ register_natives(const char *module_name, NativeSymbol *native_symbols,
     node->next = g_native_symbols_list;
     g_native_symbols_list = node;
 
-#if ENABLE_SORT_DEBUG != 0
-    gettimeofday(&start, NULL);
-#endif
+    qsort(native_symbols, n_native_symbols, sizeof(NativeSymbol),
+          native_symbol_cmp);
 
-#if ENABLE_QUICKSORT == 0
-    sort_symbol_ptr(native_symbols, n_native_symbols);
-#else
-    quick_sort_symbols(native_symbols, 0, (int)(n_native_symbols - 1));
-#endif
-
-#if ENABLE_SORT_DEBUG != 0
-    gettimeofday(&end, NULL);
-    timer =
-        1000000 * (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec);
-    LOG_ERROR("module_name: %s, nums: %d, sorted used: %ld us", module_name,
-              n_native_symbols, timer);
-#endif
     return true;
 }
 
@@ -555,6 +456,11 @@ wasi_context_dtor(WASMModuleInstanceCommon *inst, void *ctx)
 }
 #endif /* end of WASM_ENABLE_LIBC_WASI */
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+static bool
+quick_aot_entry_init();
+#endif
+
 bool
 wasm_native_init()
 {
@@ -655,6 +561,20 @@ wasm_native_init()
         goto fail;
 #endif
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+    if (!quick_aot_entry_init()) {
+#if WASM_ENABLE_SPEC_TEST != 0 || WASM_ENABLE_LIBC_BUILTIN != 0     \
+    || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0      \
+    || WASM_ENABLE_LIB_RATS != 0 || WASM_ENABLE_WASI_NN != 0        \
+    || WASM_ENABLE_APP_FRAMEWORK != 0 || WASM_ENABLE_LIBC_WASI != 0 \
+    || WASM_ENABLE_LIB_PTHREAD != 0 || WASM_ENABLE_LIB_WASI_THREADS != 0
+        goto fail;
+#else
+        return false;
+#endif
+    }
+#endif
+
     return true;
 #if WASM_ENABLE_SPEC_TEST != 0 || WASM_ENABLE_LIBC_BUILTIN != 0     \
     || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0      \
@@ -695,3 +615,936 @@ wasm_native_destroy()
 
     g_native_symbols_list = NULL;
 }
+
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+static void
+invoke_no_args_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+                 uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env);
+}
+static void
+invoke_no_args_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+                 uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env);
+}
+static void
+invoke_no_args_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+                 uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_i_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0]);
+}
+static void
+invoke_i_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0]);
+}
+static void
+invoke_i_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_I_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv));
+}
+static void
+invoke_I_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv));
+}
+static void
+invoke_I_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_ii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1]);
+}
+static void
+invoke_ii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], argv[1]);
+}
+static void
+invoke_ii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], argv[1]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1));
+}
+static void
+invoke_iI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1));
+}
+static void
+invoke_iI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_Ii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2]);
+}
+static void
+invoke_Ii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2]);
+}
+static void
+invoke_Ii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_II_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2));
+}
+static void
+invoke_II_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                              GET_I64_FROM_ADDR(argv + 2));
+}
+static void
+invoke_II_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                             GET_I64_FROM_ADDR(argv + 2));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], argv[2]);
+}
+static void
+invoke_iii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], argv[1], argv[2]);
+}
+static void
+invoke_iii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], argv[1], argv[2]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2));
+}
+static void
+invoke_iiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2));
+}
+static void
+invoke_iiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1), argv[3]);
+}
+static void
+invoke_iIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1), argv[3]);
+}
+static void
+invoke_iIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1), argv[3]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iII_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_iII_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                              GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_iII_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                             GET_I64_FROM_ADDR(argv + 3));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_Iii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2], argv[3]);
+}
+static void
+invoke_Iii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2], argv[3]);
+}
+static void
+invoke_Iii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2], argv[3]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_IiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                              GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_IiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                             GET_I64_FROM_ADDR(argv + 3));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                argv[4]);
+}
+static void
+invoke_IIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                              GET_I64_FROM_ADDR(argv + 2), argv[4]);
+}
+static void
+invoke_IIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                             GET_I64_FROM_ADDR(argv + 2), argv[4]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_III_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_III_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                    GET_I64_FROM_ADDR(argv + 2), GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_III_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                    GET_I64_FROM_ADDR(argv + 2), GET_I64_FROM_ADDR(argv + 4));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], argv[2], argv[3]);
+}
+static void
+invoke_iiii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], argv[1], argv[2], argv[3]);
+}
+static void
+invoke_iiii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], argv[1], argv[2], argv[3]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], argv[2],
+                GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_iiiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], argv[1], argv[2],
+                              GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_iiiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], argv[1], argv[2],
+                             GET_I64_FROM_ADDR(argv + 3));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2),
+                argv[4]);
+}
+static void
+invoke_iiIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], argv[1],
+                              GET_I64_FROM_ADDR(argv + 2), argv[4]);
+}
+static void
+invoke_iiIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], argv[1],
+                             GET_I64_FROM_ADDR(argv + 2), argv[4]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiII_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2),
+                GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_iiII_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2),
+                    GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_iiII_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2),
+                    GET_I64_FROM_ADDR(argv + 4));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iIii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1), argv[3],
+                argv[4]);
+}
+static void
+invoke_iIii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                              argv[3], argv[4]);
+}
+static void
+invoke_iIii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                             argv[3], argv[4]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iIiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1), argv[3],
+                GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_iIiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                              argv[3], GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_iIiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                             argv[3], GET_I64_FROM_ADDR(argv + 4));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iIIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                GET_I64_FROM_ADDR(argv + 3), argv[5]);
+}
+static void
+invoke_iIIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                              GET_I64_FROM_ADDR(argv + 3), argv[5]);
+}
+static void
+invoke_iIIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                             GET_I64_FROM_ADDR(argv + 3), argv[5]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iIII_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_iIII_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                    GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_iIII_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                    GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_Iiii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2], argv[3], argv[4]);
+}
+static void
+invoke_Iiii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                              argv[3], argv[4]);
+}
+static void
+invoke_Iiii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                             argv[3], argv[4]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IiiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2], argv[3],
+                GET_I64_FROM_ADDR(argv + 4));
+}
+
+static void
+invoke_IiiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                              argv[3], GET_I64_FROM_ADDR(argv + 4));
+}
+
+static void
+invoke_IiiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                             argv[3], GET_I64_FROM_ADDR(argv + 4));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IiIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                GET_I64_FROM_ADDR(argv + 3), argv[5]);
+}
+static void
+invoke_IiIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                              GET_I64_FROM_ADDR(argv + 3), argv[5]);
+}
+static void
+invoke_IiIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                             GET_I64_FROM_ADDR(argv + 3), argv[5]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IiII_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_IiII_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                    GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_IiII_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                    GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IIii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                argv[4], argv[5]);
+}
+static void
+invoke_IIii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                              GET_I64_FROM_ADDR(argv + 2), argv[4], argv[5]);
+}
+static void
+invoke_IIii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                             GET_I64_FROM_ADDR(argv + 2), argv[4], argv[5]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IIiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                argv[4], GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_IIiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                              GET_I64_FROM_ADDR(argv + 2), argv[4],
+                              GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_IIiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                             GET_I64_FROM_ADDR(argv + 2), argv[4],
+                             GET_I64_FROM_ADDR(argv + 5));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IIIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                GET_I64_FROM_ADDR(argv + 4), argv[6]);
+}
+static void
+invoke_IIIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                              GET_I64_FROM_ADDR(argv + 2),
+                              GET_I64_FROM_ADDR(argv + 4), argv[6]);
+}
+static void
+invoke_IIIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                             GET_I64_FROM_ADDR(argv + 2),
+                             GET_I64_FROM_ADDR(argv + 4), argv[6]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IIII_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                GET_I64_FROM_ADDR(argv + 4), GET_I64_FROM_ADDR(argv + 6));
+}
+static void
+invoke_IIII_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(
+        exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+        GET_I64_FROM_ADDR(argv + 4), GET_I64_FROM_ADDR(argv + 6));
+}
+static void
+invoke_IIII_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(
+        exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+        GET_I64_FROM_ADDR(argv + 4), GET_I64_FROM_ADDR(argv + 6));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiiii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+               uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], argv[2], argv[3], argv[4]);
+}
+static void
+invoke_iiiii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+               uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, argv[0], argv[1], argv[2], argv[3], argv[4]);
+}
+static void
+invoke_iiiii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+               uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, argv[0], argv[1], argv[2], argv[3], argv[4]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+typedef struct QuickAOTEntry {
+    const char *signature;
+    void *func_ptr;
+} QuickAOTEntry;
+
+/* clang-format off */
+static QuickAOTEntry quick_aot_entries[] = {
+    { "()v", invoke_no_args_v },
+    { "()i", invoke_no_args_i },
+    { "()I", invoke_no_args_I },
+
+    { "(i)v", invoke_i_v }, { "(i)i", invoke_i_i }, { "(i)I", invoke_i_I },
+    { "(I)v", invoke_I_v }, { "(I)i", invoke_I_i }, { "(I)I", invoke_I_I },
+
+    { "(ii)v", invoke_ii_v }, { "(ii)i", invoke_ii_i }, { "(ii)I", invoke_ii_I },
+    { "(iI)v", invoke_iI_v }, { "(iI)i", invoke_iI_i }, { "(iI)I", invoke_iI_I },
+    { "(Ii)v", invoke_Ii_v }, { "(Ii)i", invoke_Ii_i }, { "(Ii)I", invoke_Ii_I },
+    { "(II)v", invoke_II_v }, { "(II)i", invoke_II_i }, { "(II)I", invoke_II_I },
+
+    { "(iii)v", invoke_iii_v }, { "(iii)i", invoke_iii_i }, { "(iii)I", invoke_iii_I },
+    { "(iiI)v", invoke_iiI_v }, { "(iiI)i", invoke_iiI_i }, { "(iiI)I", invoke_iiI_I },
+    { "(iIi)v", invoke_iIi_v }, { "(iIi)i", invoke_iIi_i }, { "(iIi)I", invoke_iIi_I },
+    { "(iII)v", invoke_iII_v }, { "(iII)i", invoke_iII_i }, { "(iII)I", invoke_iII_I },
+    { "(Iii)v", invoke_Iii_v }, { "(Iii)i", invoke_Iii_i }, { "(Iii)I", invoke_Iii_I },
+    { "(IiI)v", invoke_IiI_v }, { "(IiI)i", invoke_IiI_i }, { "(IiI)I", invoke_IiI_I },
+    { "(IIi)v", invoke_IIi_v }, { "(IIi)i", invoke_IIi_i }, { "(IIi)I", invoke_IIi_I },
+    { "(III)v", invoke_III_v }, { "(III)i", invoke_III_i }, { "(III)I", invoke_III_I },
+
+    { "(iiii)v", invoke_iiii_v }, { "(iiii)i", invoke_iiii_i }, { "(iiii)I", invoke_iiii_I },
+    { "(iiiI)v", invoke_iiiI_v }, { "(iiiI)i", invoke_iiiI_i }, { "(iiiI)I", invoke_iiiI_I },
+    { "(iiIi)v", invoke_iiIi_v }, { "(iiIi)i", invoke_iiIi_i }, { "(iiIi)I", invoke_iiIi_I },
+    { "(iiII)v", invoke_iiII_v }, { "(iiII)i", invoke_iiII_i }, { "(iiII)I", invoke_iiII_I },
+    { "(iIii)v", invoke_iIii_v }, { "(iIii)i", invoke_iIii_i }, { "(iIii)I", invoke_iIii_I },
+    { "(iIiI)v", invoke_iIiI_v }, { "(iIiI)i", invoke_iIiI_i }, { "(iIiI)I", invoke_iIiI_I },
+    { "(iIIi)v", invoke_iIIi_v }, { "(iIIi)i", invoke_iIIi_i }, { "(iIIi)I", invoke_iIIi_I },
+    { "(iIII)v", invoke_iIII_v }, { "(iIII)i", invoke_iIII_i }, { "(iIII)I", invoke_iIII_I },
+    { "(Iiii)v", invoke_Iiii_v }, { "(Iiii)i", invoke_Iiii_i }, { "(Iiii)I", invoke_Iiii_I },
+    { "(IiiI)v", invoke_IiiI_v }, { "(IiiI)i", invoke_IiiI_i }, { "(IiiI)I", invoke_IiiI_I },
+    { "(IiIi)v", invoke_IiIi_v }, { "(IiIi)i", invoke_IiIi_i }, { "(IiIi)I", invoke_IiIi_I },
+    { "(IiII)v", invoke_IiII_v }, { "(IiII)i", invoke_IiII_i }, { "(IiII)I", invoke_IiII_I },
+    { "(IIii)v", invoke_IIii_v }, { "(IIii)i", invoke_IIii_i }, { "(IIii)I", invoke_IIii_I },
+    { "(IIiI)v", invoke_IIiI_v }, { "(IIiI)i", invoke_IIiI_i }, { "(IIiI)I", invoke_IIiI_I },
+    { "(IIIi)v", invoke_IIIi_v }, { "(IIIi)i", invoke_IIIi_i }, { "(IIIi)I", invoke_IIIi_I },
+    { "(IIII)v", invoke_IIII_v }, { "(IIII)i", invoke_IIII_i }, { "(IIII)I", invoke_IIII_I },
+
+    { "(iiiii)v", invoke_iiiii_v }, { "(iiiii)i", invoke_iiiii_i }, { "(iiiii)I", invoke_iiiii_I },
+};
+/* clang-format on */
+
+static int
+quick_aot_entry_cmp(const void *quick_aot_entry1, const void *quick_aot_entry2)
+{
+    return strcmp(((const QuickAOTEntry *)quick_aot_entry1)->signature,
+                  ((const QuickAOTEntry *)quick_aot_entry2)->signature);
+}
+
+static bool
+quick_aot_entry_init()
+{
+    qsort(quick_aot_entries, sizeof(quick_aot_entries) / sizeof(QuickAOTEntry),
+          sizeof(QuickAOTEntry), quick_aot_entry_cmp);
+
+    return true;
+}
+
+void *
+wasm_native_lookup_quick_aot_entry(const WASMType *func_type)
+{
+    char signature[16] = { 0 };
+    uint32 param_count = func_type->param_count;
+    uint32 result_count = func_type->result_count, i, j = 0;
+    const uint8 *types = func_type->types;
+    QuickAOTEntry *quick_aot_entry, key = { 0 };
+
+    if (param_count > 5 || result_count > 1)
+        return NULL;
+
+    signature[j++] = '(';
+
+    for (i = 0; i < param_count; i++) {
+        if (types[i] == VALUE_TYPE_I32)
+            signature[j++] = 'i';
+        else if (types[i] == VALUE_TYPE_I64)
+            signature[j++] = 'I';
+        else
+            return NULL;
+    }
+
+    signature[j++] = ')';
+
+    if (result_count == 0) {
+        signature[j++] = 'v';
+    }
+    else {
+        if (types[i] == VALUE_TYPE_I32)
+            signature[j++] = 'i';
+        else if (types[i] == VALUE_TYPE_I64)
+            signature[j++] = 'I';
+        else
+            return NULL;
+    }
+
+    key.signature = signature;
+    if ((quick_aot_entry =
+             bsearch(&key, quick_aot_entries,
+                     sizeof(quick_aot_entries) / sizeof(QuickAOTEntry),
+                     sizeof(QuickAOTEntry), quick_aot_entry_cmp))) {
+        return quick_aot_entry->func_ptr;
+    }
+
+    return NULL;
+}
+#endif /* end of WASM_ENABLE_QUICK_AOT_ENTRY != 0 */

+ 5 - 0
core/iwasm/common/wasm_native.h

@@ -104,6 +104,11 @@ wasm_native_init();
 void
 wasm_native_destroy();
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+void *
+wasm_native_lookup_quick_aot_entry(const WASMType *func_type);
+#endif
+
 #ifdef __cplusplus
 }
 #endif

+ 100 - 13
core/iwasm/common/wasm_runtime_common.c

@@ -158,6 +158,8 @@ static JitCompOptions jit_options = { 0 };
 #endif
 
 #if WASM_ENABLE_JIT != 0
+/* opt_level: 3, size_level: 3, segue-flags: 0,
+   quick_invoke_c_api_import: false */
 static LLVMJITOptions llvm_jit_options = { 3, 3, 0, false };
 #endif
 
@@ -638,10 +640,10 @@ wasm_runtime_get_default_running_mode(void)
 }
 
 #if WASM_ENABLE_JIT != 0
-LLVMJITOptions
+LLVMJITOptions *
 wasm_runtime_get_llvm_jit_options(void)
 {
-    return llvm_jit_options;
+    return &llvm_jit_options;
 }
 #endif
 
@@ -662,14 +664,17 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args)
 #endif
 
 #if WASM_ENABLE_JIT != 0
-    LOG_DEBUG("Start LLVM_JIT, opt_sz=%u, opt_lvl=%u, segue=%s, linux_perf=%s",
-              init_args->llvm_jit_size_level, init_args->llvm_jit_opt_level,
-              init_args->segue_flags ? "Yes" : "No",
-              init_args->linux_perf_support ? "Yes" : "No");
     llvm_jit_options.size_level = init_args->llvm_jit_size_level;
     llvm_jit_options.opt_level = init_args->llvm_jit_opt_level;
     llvm_jit_options.segue_flags = init_args->segue_flags;
-    llvm_jit_options.linux_perf_support = init_args->linux_perf_support;
+#endif
+
+#if WASM_ENABLE_LINUX_PERF != 0
+    wasm_runtime_set_linux_perf(init_args->enable_linux_perf);
+#else
+    if (init_args->enable_linux_perf)
+        LOG_WARNING("warning: to enable linux perf support, please recompile "
+                    "with -DWAMR_BUILD_LINUX_PERF=1");
 #endif
 
     if (!wasm_runtime_env_init()) {
@@ -701,6 +706,12 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args)
     return true;
 }
 
+void
+wasm_runtime_set_log_level(log_level_t level)
+{
+    bh_log_set_verbose_level(level);
+}
+
 bool
 wasm_runtime_is_running_mode_supported(RunningMode running_mode)
 {
@@ -2542,7 +2553,6 @@ wasm_runtime_clear_exception(WASMModuleInstanceCommon *module_inst_comm)
     wasm_runtime_set_exception(module_inst_comm, NULL);
 }
 
-#if WASM_ENABLE_THREAD_MGR != 0
 void
 wasm_runtime_terminate(WASMModuleInstanceCommon *module_inst_comm)
 {
@@ -2552,7 +2562,6 @@ wasm_runtime_terminate(WASMModuleInstanceCommon *module_inst_comm)
               || module_inst_comm->module_type == Wasm_Module_AoT);
     wasm_set_exception(module_inst, "terminated by user");
 }
-#endif
 
 void
 wasm_runtime_set_custom_data_internal(
@@ -5668,7 +5677,7 @@ wasm_runtime_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
     wasm_val_t *params = params_buf, *results = results_buf;
     wasm_trap_t *trap = NULL;
     bool ret = false;
-    wasm_val_vec_t params_vec, results_vec;
+    wasm_val_vec_t params_vec = { 0 }, results_vec = { 0 };
 
     if (func_type->param_count > 16) {
         if (!(params =
@@ -5696,12 +5705,10 @@ wasm_runtime_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
     params_vec.data = params;
     params_vec.num_elems = func_type->param_count;
     params_vec.size = func_type->param_count;
-    params_vec.size_of_elem = sizeof(wasm_val_t);
 
     results_vec.data = results;
     results_vec.num_elems = 0;
     results_vec.size = func_type->result_count;
-    results_vec.size_of_elem = sizeof(wasm_val_t);
 
     if (!with_env) {
         wasm_func_callback_t callback = (wasm_func_callback_t)func_ptr;
@@ -5737,7 +5744,6 @@ wasm_runtime_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
         wasm_runtime_set_exception(module_inst, "unsupported result type");
         goto fail;
     }
-    results_vec.num_elems = func_type->result_count;
     ret = true;
 
 fail:
@@ -5748,6 +5754,71 @@ fail:
     return ret;
 }
 
+bool
+wasm_runtime_quick_invoke_c_api_native(WASMModuleInstanceCommon *inst_comm,
+                                       CApiFuncImport *c_api_import,
+                                       wasm_val_t *params, uint32 param_count,
+                                       wasm_val_t *results, uint32 result_count)
+{
+    WASMModuleInstance *module_inst = (WASMModuleInstance *)inst_comm;
+    void *func_ptr = c_api_import->func_ptr_linked;
+    bool with_env_arg = c_api_import->with_env_arg, ret = true;
+    wasm_val_vec_t params_vec = { 0 }, results_vec = { 0 };
+    wasm_trap_t *trap = NULL;
+
+    params_vec.data = params;
+    params_vec.num_elems = param_count;
+    params_vec.size = param_count;
+
+    results_vec.data = results;
+    results_vec.num_elems = 0;
+    results_vec.size = result_count;
+
+    if (!func_ptr) {
+        wasm_set_exception_with_id(module_inst, EXCE_CALL_UNLINKED_IMPORT_FUNC);
+        ret = false;
+        goto fail;
+    }
+
+    if (!with_env_arg) {
+        wasm_func_callback_t callback = (wasm_func_callback_t)func_ptr;
+        trap = callback(&params_vec, &results_vec);
+    }
+    else {
+        void *wasm_c_api_env = c_api_import->env_arg;
+        wasm_func_callback_with_env_t callback =
+            (wasm_func_callback_with_env_t)func_ptr;
+        trap = callback(wasm_c_api_env, &params_vec, &results_vec);
+    }
+
+    if (trap) {
+        if (trap->message->data) {
+            /* since trap->message->data does not end with '\0' */
+            char trap_message[108] = { 0 };
+            uint32 max_size_to_copy = (uint32)sizeof(trap_message) - 1;
+            uint32 size_to_copy = (trap->message->size < max_size_to_copy)
+                                      ? (uint32)trap->message->size
+                                      : max_size_to_copy;
+            bh_memcpy_s(trap_message, (uint32)sizeof(trap_message),
+                        trap->message->data, size_to_copy);
+            wasm_set_exception(module_inst, trap_message);
+        }
+        else {
+            wasm_set_exception(module_inst,
+                               "native function throw unknown exception");
+        }
+        wasm_trap_delete(trap);
+        ret = false;
+    }
+
+fail:
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    if (!ret)
+        wasm_runtime_access_exce_check_guard_page();
+#endif
+    return ret;
+}
+
 void
 wasm_runtime_show_app_heap_corrupted_prompt()
 {
@@ -6142,3 +6213,19 @@ wasm_runtime_get_context(WASMModuleInstanceCommon *inst, void *key)
     return wasm_native_get_context(inst, key);
 }
 #endif /* WASM_ENABLE_MODULE_INST_CONTEXT != 0 */
+
+#if WASM_ENABLE_LINUX_PERF != 0
+static bool enable_linux_perf = false;
+
+bool
+wasm_runtime_get_linux_perf(void)
+{
+    return enable_linux_perf;
+}
+
+void
+wasm_runtime_set_linux_perf(bool flag)
+{
+    enable_linux_perf = flag;
+}
+#endif

+ 21 - 3
core/iwasm/common/wasm_runtime_common.h

@@ -438,12 +438,12 @@ typedef struct wasm_frame_t {
     const char *func_name_wp;
 } WASMCApiFrame;
 
-#ifdef WASM_ENABLE_JIT
+#if WASM_ENABLE_JIT != 0
 typedef struct LLVMJITOptions {
     uint32 opt_level;
     uint32 size_level;
     uint32 segue_flags;
-    bool linux_perf_support;
+    bool quick_invoke_c_api_import;
 } LLVMJITOptions;
 #endif
 
@@ -477,7 +477,7 @@ wasm_runtime_get_default_running_mode(void);
 
 #if WASM_ENABLE_JIT != 0
 /* Internal API */
-LLVMJITOptions
+LLVMJITOptions *
 wasm_runtime_get_llvm_jit_options(void);
 #endif
 
@@ -1080,6 +1080,16 @@ wasm_runtime_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
                                  uint32 argc, uint32 *argv, bool with_env,
                                  void *wasm_c_api_env);
 
+struct CApiFuncImport;
+/* A quick version of wasm_runtime_invoke_c_api_native to directly invoke
+   wasm-c-api import function from jitted code to improve performance */
+bool
+wasm_runtime_quick_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
+                                       struct CApiFuncImport *c_api_import,
+                                       wasm_val_t *params, uint32 param_count,
+                                       wasm_val_t *results,
+                                       uint32 result_count);
+
 void
 wasm_runtime_show_app_heap_corrupted_prompt();
 
@@ -1105,6 +1115,14 @@ wasm_runtime_end_blocking_op(WASMExecEnv *exec_env);
 void
 wasm_runtime_interrupt_blocking_op(WASMExecEnv *exec_env);
 
+#if WASM_ENABLE_LINUX_PERF != 0
+bool
+wasm_runtime_get_linux_perf(void);
+
+void
+wasm_runtime_set_linux_perf(bool flag);
+#endif
+
 #ifdef __cplusplus
 }
 #endif

+ 44 - 26
core/iwasm/compilation/aot_emit_aot_file.c

@@ -912,9 +912,6 @@ get_native_symbol_list_size(AOTCompContext *comp_ctx)
     return len;
 }
 
-static uint32
-get_name_section_size(AOTCompData *comp_data);
-
 static uint32
 get_custom_sections_size(AOTCompContext *comp_ctx, AOTCompData *comp_data);
 
@@ -972,15 +969,6 @@ get_aot_file_size(AOTCompContext *comp_ctx, AOTCompData *comp_data,
         size += get_native_symbol_list_size(comp_ctx);
     }
 
-    if (comp_ctx->enable_aux_stack_frame) {
-        /* custom name section */
-        size = align_uint(size, 4);
-        /* section id + section size + sub section id */
-        size += (uint32)sizeof(uint32) * 3;
-        size += (comp_data->aot_name_section_size =
-                     get_name_section_size(comp_data));
-    }
-
     size_custom_section = get_custom_sections_size(comp_ctx, comp_data);
     if (size_custom_section > 0) {
         size = align_uint(size, 4);
@@ -1333,6 +1321,21 @@ get_custom_sections_size(AOTCompContext *comp_ctx, AOTCompData *comp_data)
         const uint8 *content = NULL;
         uint32 length = 0;
 
+        if (strcmp(section_name, "name") == 0) {
+            /* custom name section */
+            comp_data->aot_name_section_size = get_name_section_size(comp_data);
+            if (comp_data->aot_name_section_size == 0) {
+                LOG_WARNING("Can't find custom section [name], ignore it");
+                continue;
+            }
+
+            size = align_uint(size, 4);
+            /* section id + section size + sub section id */
+            size += (uint32)sizeof(uint32) * 3;
+            size += comp_data->aot_name_section_size;
+            continue;
+        }
+
         content = wasm_loader_get_custom_section(comp_data->wasm_module,
                                                  section_name, &length);
         if (!content) {
@@ -2066,23 +2069,25 @@ static bool
 aot_emit_name_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
                       AOTCompData *comp_data, AOTCompContext *comp_ctx)
 {
-    if (comp_ctx->enable_aux_stack_frame) {
-        uint32 offset = *p_offset;
+    uint32 offset = *p_offset;
 
-        *p_offset = offset = align_uint(offset, 4);
+    if (comp_data->aot_name_section_size == 0)
+        return true;
 
-        EMIT_U32(AOT_SECTION_TYPE_CUSTOM);
-        /* sub section id + name section size */
-        EMIT_U32(sizeof(uint32) * 1 + comp_data->aot_name_section_size);
-        EMIT_U32(AOT_CUSTOM_SECTION_NAME);
-        bh_memcpy_s((uint8 *)(buf + offset), (uint32)(buf_end - buf),
-                    comp_data->aot_name_section_buf,
-                    (uint32)comp_data->aot_name_section_size);
-        offset += comp_data->aot_name_section_size;
+    offset = align_uint(offset, 4);
 
-        *p_offset = offset;
-    }
+    EMIT_U32(AOT_SECTION_TYPE_CUSTOM);
+    /* sub section id + name section size */
+    EMIT_U32(sizeof(uint32) * 1 + comp_data->aot_name_section_size);
+    EMIT_U32(AOT_CUSTOM_SECTION_NAME);
+    bh_memcpy_s((uint8 *)(buf + offset), (uint32)(buf_end - buf),
+                comp_data->aot_name_section_buf,
+                (uint32)comp_data->aot_name_section_size);
+    offset += comp_data->aot_name_section_size;
 
+    *p_offset = offset;
+
+    LOG_DEBUG("emit name section");
     return true;
 }
 
@@ -2098,6 +2103,16 @@ aot_emit_custom_sections(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
         const uint8 *content = NULL;
         uint32 length = 0;
 
+        if (strcmp(section_name, "name") == 0) {
+            *p_offset = offset;
+            if (!aot_emit_name_section(buf, buf_end, p_offset, comp_data,
+                                       comp_ctx))
+                return false;
+
+            offset = *p_offset;
+            continue;
+        }
+
         content = wasm_loader_get_custom_section(comp_data->wasm_module,
                                                  section_name, &length);
         if (!content) {
@@ -3589,6 +3604,10 @@ aot_emit_aot_file_buf(AOTCompContext *comp_ctx, AOTCompData *comp_data,
         return NULL;
 
     aot_file_size = get_aot_file_size(comp_ctx, comp_data, obj_data);
+    if (aot_file_size == 0) {
+        aot_set_last_error("get aot file size failed");
+        goto fail1;
+    }
 
     if (!(buf = aot_file_buf = wasm_runtime_malloc(aot_file_size))) {
         aot_set_last_error("allocate memory failed.");
@@ -3610,7 +3629,6 @@ aot_emit_aot_file_buf(AOTCompContext *comp_ctx, AOTCompData *comp_data,
         || !aot_emit_relocation_section(buf, buf_end, &offset, comp_ctx,
                                         comp_data, obj_data)
         || !aot_emit_native_symbol(buf, buf_end, &offset, comp_ctx)
-        || !aot_emit_name_section(buf, buf_end, &offset, comp_data, comp_ctx)
         || !aot_emit_custom_sections(buf, buf_end, &offset, comp_data,
                                      comp_ctx))
         goto fail2;

+ 246 - 12
core/iwasm/compilation/aot_emit_function.c

@@ -288,6 +288,213 @@ call_aot_invoke_native_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     return true;
 }
 
+static bool
+call_aot_invoke_c_api_native(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+                             uint32 import_func_idx, AOTFuncType *aot_func_type,
+                             LLVMValueRef *params)
+{
+    LLVMTypeRef int8_ptr_type, param_types[6], ret_type;
+    LLVMTypeRef value_ptr_type = NULL, value_type = NULL;
+    LLVMTypeRef func_type, func_ptr_type;
+    LLVMValueRef param_values[6], res, func, value = NULL, offset;
+    LLVMValueRef c_api_func_imports, c_api_func_import;
+    LLVMValueRef c_api_params, c_api_results, value_ret;
+    LLVMValueRef c_api_param_kind, c_api_param_value;
+    LLVMValueRef c_api_result_value;
+    uint32 offset_c_api_func_imports, i;
+    uint32 offset_param_kind, offset_param_value;
+    char buf[16];
+
+    /* `int8 **` type */
+    int8_ptr_type = LLVMPointerType(INT8_PTR_TYPE, 0);
+    if (!int8_ptr_type) {
+        aot_set_last_error("create llvm pointer type failed");
+        return false;
+    }
+
+    param_types[0] = INT8_PTR_TYPE; /* module_inst */
+    param_types[1] = INT8_PTR_TYPE; /* CApiFuncImport *c_api_import */
+    param_types[2] = INT8_PTR_TYPE; /* wasm_val_t *params */
+    param_types[3] = I32_TYPE;      /* uint32 param_count */
+    param_types[4] = INT8_PTR_TYPE; /* wasm_val_t *results */
+    param_types[5] = I32_TYPE;      /* uint32 result_count */
+
+    ret_type = INT8_TYPE;
+
+    GET_AOT_FUNCTION(wasm_runtime_quick_invoke_c_api_native, 6);
+
+    param_values[0] = func_ctx->aot_inst;
+
+    /* Get module_inst->e->common.c_api_func_imports */
+    offset_c_api_func_imports =
+        get_module_inst_extra_offset(comp_ctx)
+        + (comp_ctx->is_jit_mode
+               ? offsetof(WASMModuleInstanceExtra, common.c_api_func_imports)
+               /* offsetof(AOTModuleInstanceExtra, common.c_api_func_imports) */
+               : sizeof(uint64));
+    offset = I32_CONST(offset_c_api_func_imports);
+    CHECK_LLVM_CONST(offset);
+    c_api_func_imports =
+        LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, func_ctx->aot_inst,
+                              &offset, 1, "c_api_func_imports_addr");
+    c_api_func_imports =
+        LLVMBuildBitCast(comp_ctx->builder, c_api_func_imports, int8_ptr_type,
+                         "c_api_func_imports_ptr");
+    c_api_func_imports =
+        LLVMBuildLoad2(comp_ctx->builder, INT8_PTR_TYPE, c_api_func_imports,
+                       "c_api_func_imports");
+
+    /* Get &c_api_func_imports[func_idx], note size of CApiFuncImport
+       is pointer_size * 3 */
+    offset = I32_CONST((comp_ctx->pointer_size * 3) * import_func_idx);
+    CHECK_LLVM_CONST(offset);
+    c_api_func_import =
+        LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, c_api_func_imports,
+                              &offset, 1, "c_api_func_import");
+
+    param_values[1] = c_api_func_import;
+    param_values[2] = c_api_params = func_ctx->argv_buf;
+    param_values[3] = I32_CONST(aot_func_type->param_count);
+    CHECK_LLVM_CONST(param_values[3]);
+
+    /* Ensure sizeof(wasm_val_t) is 16 bytes */
+    offset = I32_CONST(sizeof(wasm_val_t) * aot_func_type->param_count);
+    c_api_results =
+        LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, func_ctx->argv_buf,
+                              &offset, 1, "results");
+    param_values[4] = c_api_results;
+
+    param_values[5] = I32_CONST(aot_func_type->result_count);
+    CHECK_LLVM_CONST(param_values[5]);
+
+    /* Set each c api param */
+    for (i = 0; i < aot_func_type->param_count; i++) {
+        /* Ensure sizeof(wasm_val_t) is 16 bytes */
+        offset_param_kind = sizeof(wasm_val_t) * i;
+        offset = I32_CONST(offset_param_kind);
+        CHECK_LLVM_CONST(offset);
+        c_api_param_kind =
+            LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, c_api_params,
+                                  &offset, 1, "c_api_param_kind_addr");
+        c_api_param_kind =
+            LLVMBuildBitCast(comp_ctx->builder, c_api_param_kind, INT8_PTR_TYPE,
+                             "c_api_param_kind_ptr");
+
+        switch (aot_func_type->types[i]) {
+            case VALUE_TYPE_I32:
+                value = I8_CONST(WASM_I32);
+                break;
+            case VALUE_TYPE_F32:
+                value = I8_CONST(WASM_F32);
+                break;
+            case VALUE_TYPE_I64:
+                value = I8_CONST(WASM_I64);
+                break;
+            case VALUE_TYPE_F64:
+                value = I8_CONST(WASM_F64);
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+        CHECK_LLVM_CONST(value);
+
+        LLVMBuildStore(comp_ctx->builder, value, c_api_param_kind);
+
+        /* Ensure offsetof(wasm_val_t, of) is 8 bytes */
+        offset_param_value = offset_param_kind + offsetof(wasm_val_t, of);
+        offset = I32_CONST(offset_param_value);
+        CHECK_LLVM_CONST(offset);
+        c_api_param_value =
+            LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, c_api_params,
+                                  &offset, 1, "c_api_param_value_addr");
+
+        switch (aot_func_type->types[i]) {
+            case VALUE_TYPE_I32:
+                value_ptr_type = INT32_PTR_TYPE;
+                break;
+            case VALUE_TYPE_F32:
+                value_ptr_type = F32_PTR_TYPE;
+                break;
+            case VALUE_TYPE_I64:
+                value_ptr_type = INT64_PTR_TYPE;
+                break;
+            case VALUE_TYPE_F64:
+                value_ptr_type = F64_PTR_TYPE;
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+
+        c_api_param_value =
+            LLVMBuildBitCast(comp_ctx->builder, c_api_param_value,
+                             value_ptr_type, "c_api_param_value_ptr");
+        LLVMBuildStore(comp_ctx->builder, params[i], c_api_param_value);
+    }
+
+    /* Call the function */
+    if (!(res = LLVMBuildCall2(comp_ctx->builder, func_type, func, param_values,
+                               6, "call"))) {
+        aot_set_last_error("LLVM build call failed.");
+        goto fail;
+    }
+
+    /* Check whether exception was thrown when executing the function */
+    if (comp_ctx->enable_bound_check
+        && !check_call_return(comp_ctx, func_ctx, res)) {
+        goto fail;
+    }
+
+    for (i = 0; i < aot_func_type->result_count; i++) {
+        /* Ensure sizeof(wasm_val_t) is 16 bytes and
+           offsetof(wasm_val_t, of) is 8 bytes */
+        uint32 offset_result_value =
+            sizeof(wasm_val_t) * i + offsetof(wasm_val_t, of);
+
+        offset = I32_CONST(offset_result_value);
+        CHECK_LLVM_CONST(offset);
+        c_api_result_value =
+            LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, c_api_results,
+                                  &offset, 1, "c_api_result_value_addr");
+
+        switch (aot_func_type->types[aot_func_type->param_count + i]) {
+            case VALUE_TYPE_I32:
+                value_type = I32_TYPE;
+                value_ptr_type = INT32_PTR_TYPE;
+                break;
+            case VALUE_TYPE_F32:
+                value_type = F32_TYPE;
+                value_ptr_type = F32_PTR_TYPE;
+                break;
+            case VALUE_TYPE_I64:
+                value_type = I64_TYPE;
+                value_ptr_type = INT64_PTR_TYPE;
+                break;
+            case VALUE_TYPE_F64:
+                value_type = F64_TYPE;
+                value_ptr_type = F64_PTR_TYPE;
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+
+        c_api_result_value =
+            LLVMBuildBitCast(comp_ctx->builder, c_api_result_value,
+                             value_ptr_type, "c_api_result_value_ptr");
+        snprintf(buf, sizeof(buf), "%s%u", "ret", i);
+        value_ret = LLVMBuildLoad2(comp_ctx->builder, value_type,
+                                   c_api_result_value, buf);
+
+        PUSH(value_ret, aot_func_type->types[aot_func_type->param_count + i]);
+    }
+
+    return true;
+fail:
+    return false;
+}
+
 #if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
 static bool
 call_aot_alloc_frame_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
@@ -533,6 +740,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     const char *signature = NULL;
     bool ret = false;
     char buf[32];
+    bool quick_invoke_c_api_import = false;
 
 #if WASM_ENABLE_THREAD_MGR != 0
     /* Insert suspend check point */
@@ -702,17 +910,43 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         }
 
         if (!signature) {
-            /* call aot_invoke_native() */
-            if (!call_aot_invoke_native_func(
-                    comp_ctx, func_ctx, import_func_idx, func_type,
-                    param_types + 1, param_values + 1, param_count,
-                    param_cell_num, ret_type, wasm_ret_type, &value_ret, &res))
-                goto fail;
-            /* Check whether there was exception thrown when executing
-               the function */
-            if ((comp_ctx->enable_bound_check || is_win_platform(comp_ctx))
-                && !check_call_return(comp_ctx, func_ctx, res))
-                goto fail;
+            if (comp_ctx->quick_invoke_c_api_import) {
+                uint32 buf_size_needed =
+                    sizeof(wasm_val_t) * (param_count + result_count);
+
+                /* length of exec_env->argv_buf is 64 */
+                if (buf_size_needed < sizeof(uint32) * 64) {
+                    for (i = 0; i < param_count + result_count; i++) {
+                        /* Only support i32/i64/f32/f64 now */
+                        if (!(func_type->types[i] == VALUE_TYPE_I32
+                              || func_type->types[i] == VALUE_TYPE_I64
+                              || func_type->types[i] == VALUE_TYPE_F32
+                              || func_type->types[i] == VALUE_TYPE_F64))
+                            break;
+                    }
+                    if (i == param_count + result_count)
+                        quick_invoke_c_api_import = true;
+                }
+            }
+            if (quick_invoke_c_api_import) {
+                if (!call_aot_invoke_c_api_native(comp_ctx, func_ctx, func_idx,
+                                                  func_type, param_values + 1))
+                    goto fail;
+            }
+            else {
+                /* call aot_invoke_native() */
+                if (!call_aot_invoke_native_func(
+                        comp_ctx, func_ctx, import_func_idx, func_type,
+                        param_types + 1, param_values + 1, param_count,
+                        param_cell_num, ret_type, wasm_ret_type, &value_ret,
+                        &res))
+                    goto fail;
+                /* Check whether there was exception thrown when executing
+                   the function */
+                if ((comp_ctx->enable_bound_check || is_win_platform(comp_ctx))
+                    && !check_call_return(comp_ctx, func_ctx, res))
+                    goto fail;
+            }
         }
         else { /* call native func directly */
             LLVMTypeRef native_func_type, func_ptr_type;
@@ -869,7 +1103,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
             goto fail;
     }
 
-    if (func_type->result_count > 0) {
+    if (func_type->result_count > 0 && !quick_invoke_c_api_import) {
         /* Push the first result to stack */
         PUSH(value_ret, func_type->types[func_type->param_count]);
         /* Load extra result from its address and push to stack */

+ 12 - 0
core/iwasm/compilation/aot_emit_table.c

@@ -46,6 +46,18 @@ get_tbl_inst_offset(const AOTCompContext *comp_ctx,
     return offset;
 }
 
+uint32
+get_module_inst_extra_offset(AOTCompContext *comp_ctx)
+{
+    const AOTCompData *comp_data = comp_ctx->comp_data;
+    uint32 table_count = comp_data->import_table_count + comp_data->table_count;
+    uint64 offset = get_tbl_inst_offset(comp_ctx, NULL, table_count);
+    uint32 offset_32 = (uint32)offset;
+    bh_assert(offset <= UINT32_MAX);
+    offset_32 = align_uint(offset_32, 8);
+    return offset_32;
+}
+
 #if WASM_ENABLE_REF_TYPES != 0
 
 LLVMValueRef

+ 4 - 1
core/iwasm/compilation/aot_emit_table.h

@@ -49,6 +49,9 @@ uint64
 get_tbl_inst_offset(const AOTCompContext *comp_ctx,
                     const AOTFuncContext *func_ctx, uint32 tbl_idx);
 
+uint32
+get_module_inst_extra_offset(AOTCompContext *comp_ctx);
+
 LLVMValueRef
 aot_compile_get_tbl_inst(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                          uint32 tbl_idx);
@@ -56,4 +59,4 @@ aot_compile_get_tbl_inst(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 #ifdef __cplusplus
 } /* end of extern "C" */
 #endif
-#endif
+#endif

+ 12 - 5
core/iwasm/compilation/aot_llvm.c

@@ -2174,7 +2174,7 @@ jit_stack_size_callback(void *user_data, const char *name, size_t namelen,
 }
 
 static bool
-orc_jit_create(AOTCompContext *comp_ctx, bool linux_perf_support)
+orc_jit_create(AOTCompContext *comp_ctx)
 {
     LLVMErrorRef err;
     LLVMOrcLLLazyJITRef orc_jit = NULL;
@@ -2214,13 +2214,15 @@ orc_jit_create(AOTCompContext *comp_ctx, bool linux_perf_support)
     /* Ownership transfer: LLVMOrcLLJITBuilderRef -> LLVMOrcLLJITRef */
     builder = NULL;
 
-    if (linux_perf_support) {
-        LOG_DEBUG("Enable linux perf support");
+#if WASM_ENABLE_LINUX_PERF != 0
+    if (wasm_runtime_get_linux_perf()) {
+        LOG_DEBUG("Enable linux perf support in JIT");
         LLVMOrcObjectLayerRef obj_linking_layer =
             (LLVMOrcObjectLayerRef)LLVMOrcLLLazyJITGetObjLinkingLayer(orc_jit);
         LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(
             obj_linking_layer, LLVMCreatePerfJITEventListener());
     }
+#endif
 
     /* Ownership transfer: local -> AOTCompContext */
     comp_ctx->orc_jit = orc_jit;
@@ -2320,7 +2322,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
         goto fail;
     }
 
-    if (option->linux_perf_support) {
+#if WASM_ENABLE_LINUX_PERF != 0
+    if (wasm_runtime_get_linux_perf()) {
         /* FramePointerKind.All */
         LLVMMetadataRef val =
             LLVMValueAsMetadata(LLVMConstInt(LLVMInt32Type(), 2, false));
@@ -2330,6 +2333,7 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
 
         comp_ctx->emit_frame_pointer = true;
     }
+#endif
 
     if (BH_LIST_ERROR == bh_list_init(&comp_ctx->native_symbols)) {
         goto fail;
@@ -2394,6 +2398,9 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
     if (option->enable_stack_estimation)
         comp_ctx->enable_stack_estimation = true;
 
+    if (option->quick_invoke_c_api_import)
+        comp_ctx->quick_invoke_c_api_import = true;
+
     if (option->llvm_passes)
         comp_ctx->llvm_passes = option->llvm_passes;
 
@@ -2434,7 +2441,7 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
             goto fail;
 
         /* Create LLJIT Instance */
-        if (!orc_jit_create(comp_ctx, option->linux_perf_support))
+        if (!orc_jit_create(comp_ctx))
             goto fail;
     }
     else {

+ 7 - 3
core/iwasm/compilation/aot_llvm.h

@@ -321,10 +321,10 @@ typedef struct AOTCompContext {
     /* Bulk memory feature */
     bool enable_bulk_memory;
 
-    /* Bounday Check */
+    /* Boundary Check */
     bool enable_bound_check;
 
-    /* Native stack bounday Check */
+    /* Native stack boundary Check */
     bool enable_stack_bound_check;
 
     /* Native stack usage estimation */
@@ -357,6 +357,10 @@ typedef struct AOTCompContext {
     /* Enable LLVM PGO (Profile-Guided Optimization) */
     bool enable_llvm_pgo;
 
+    /* Treat unknown import function as wasm-c-api import function
+       and allow to directly invoke it from AOT/JIT code */
+    bool quick_invoke_c_api_import;
+
     /* Use profile file collected by LLVM PGO */
     char *use_prof_file;
 
@@ -454,6 +458,7 @@ typedef struct AOTCompOption {
     bool disable_llvm_lto;
     bool enable_llvm_pgo;
     bool enable_stack_estimation;
+    bool quick_invoke_c_api_import;
     char *use_prof_file;
     uint32 opt_level;
     uint32 size_level;
@@ -461,7 +466,6 @@ typedef struct AOTCompOption {
     uint32 bounds_checks;
     uint32 stack_bounds_checks;
     uint32 segue_flags;
-    bool linux_perf_support;
     char **custom_sections;
     uint32 custom_sections_count;
     const char *stack_usage_file;

+ 9 - 1
core/iwasm/compilation/aot_llvm_extra.cpp

@@ -45,6 +45,7 @@
 #include <llvm/Transforms/Vectorize/LoopVectorize.h>
 #include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
 #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
+#include <llvm/Transforms/Vectorize/VectorCombine.h>
 #include <llvm/Transforms/Scalar/LoopRotation.h>
 #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
 #include <llvm/Transforms/Scalar/LICM.h>
@@ -315,8 +316,11 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
     }
 
     ModulePassManager MPM;
+
     if (comp_ctx->is_jit_mode) {
         const char *Passes =
+            "loop-vectorize,slp-vectorizer,"
+            "load-store-vectorizer,vector-combine,"
             "mem2reg,instcombine,simplifycfg,jump-threading,indvars";
         ExitOnErr(PB.parsePassPipeline(MPM, Passes));
     }
@@ -327,6 +331,7 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
         FPM.addPass(LoopVectorizePass());
         FPM.addPass(SLPVectorizerPass());
         FPM.addPass(LoadStoreVectorizerPass());
+        FPM.addPass(VectorCombinePass());
 
         if (comp_ctx->enable_llvm_pgo || comp_ctx->use_prof_file) {
             /* LICM pass: loop invariant code motion, attempting to remove
@@ -404,7 +409,10 @@ aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
         NameStrs.push_back(str);
     }
 
-    if (collectPGOFuncNameStrings(NameStrs, true, Result)) {
+#if LLVM_VERSION_MAJOR < 18
+#define collectGlobalObjectNameStrings collectPGOFuncNameStrings
+#endif
+    if (collectGlobalObjectNameStrings(NameStrs, true, Result)) {
         aot_set_last_error("collect pgo func name strings failed");
         return NULL;
     }

+ 19 - 0
core/iwasm/compilation/aot_llvm_extra2.cpp

@@ -58,6 +58,7 @@ convert(LLVMRelocMode reloc_mode)
 #endif
 }
 
+#if LLVM_VERSION_MAJOR < 18
 static llvm::CodeGenOpt::Level
 convert(LLVMCodeGenOptLevel opt_level)
 {
@@ -74,6 +75,24 @@ convert(LLVMCodeGenOptLevel opt_level)
     bh_assert(0);
     return llvm::CodeGenOpt::None;
 }
+#else
+static llvm::CodeGenOptLevel
+convert(LLVMCodeGenOptLevel opt_level)
+{
+    switch (opt_level) {
+        case LLVMCodeGenLevelNone:
+            return llvm::CodeGenOptLevel::None;
+        case LLVMCodeGenLevelLess:
+            return llvm::CodeGenOptLevel::Less;
+        case LLVMCodeGenLevelDefault:
+            return llvm::CodeGenOptLevel::Default;
+        case LLVMCodeGenLevelAggressive:
+            return llvm::CodeGenOptLevel::Aggressive;
+    }
+    bh_assert(0);
+    return llvm::CodeGenOptLevel::None;
+}
+#endif
 
 static llvm::Optional<llvm::CodeModel::Model>
 convert(LLVMCodeModel code_model, bool *jit)

+ 27 - 0
core/iwasm/compilation/aot_orc_extra.cpp

@@ -3,6 +3,10 @@
  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  */
 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
 #include "llvm-c/LLJIT.h"
 #include "llvm-c/Orc.h"
 #include "llvm-c/OrcEE.h"
@@ -44,6 +48,7 @@ class InProgressLookupState;
 class OrcV2CAPIHelper
 {
   public:
+#if LLVM_VERSION_MAJOR < 18
     using PoolEntry = SymbolStringPtr::PoolEntry;
     using PoolEntryPtr = SymbolStringPtr::PoolEntryPtr;
 
@@ -86,6 +91,7 @@ class OrcV2CAPIHelper
         S.S = P;
     }
 
+#endif
     static InProgressLookupState *extractLookupState(LookupState &LS)
     {
         return LS.IPLS.release();
@@ -101,6 +107,20 @@ class OrcV2CAPIHelper
 } // namespace llvm
 
 // ORC.h
+#if LLVM_VERSION_MAJOR >= 18
+inline LLVMOrcSymbolStringPoolEntryRef
+wrap(SymbolStringPoolEntryUnsafe E)
+{
+    return reinterpret_cast<LLVMOrcSymbolStringPoolEntryRef>(E.rawPtr());
+}
+
+inline SymbolStringPoolEntryUnsafe
+unwrap(LLVMOrcSymbolStringPoolEntryRef E)
+{
+    return reinterpret_cast<SymbolStringPoolEntryUnsafe::PoolEntry *>(E);
+}
+#endif
+
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionSession, LLVMOrcExecutionSessionRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRTransformLayer, LLVMOrcIRTransformLayerRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JITDylib, LLVMOrcJITDylibRef)
@@ -108,8 +128,10 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JITTargetMachineBuilder,
                                    LLVMOrcJITTargetMachineBuilderRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ObjectTransformLayer,
                                    LLVMOrcObjectTransformLayerRef)
+#if LLVM_VERSION_MAJOR < 18
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcV2CAPIHelper::PoolEntry,
                                    LLVMOrcSymbolStringPoolEntryRef)
+#endif
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ObjectLayer, LLVMOrcObjectLayerRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(SymbolStringPool, LLVMOrcSymbolStringPoolRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ThreadSafeModule, LLVMOrcThreadSafeModuleRef)
@@ -292,8 +314,13 @@ LLVMOrcSymbolStringPoolEntryRef
 LLVMOrcLLLazyJITMangleAndIntern(LLVMOrcLLLazyJITRef J,
                                 const char *UnmangledName)
 {
+#if LLVM_VERSION_MAJOR < 18
     return wrap(OrcV2CAPIHelper::moveFromSymbolStringPtr(
         unwrap(J)->mangleAndIntern(UnmangledName)));
+#else
+    return wrap(SymbolStringPoolEntryUnsafe::take(
+        unwrap(J)->mangleAndIntern(UnmangledName)));
+#endif
 }
 
 LLVMOrcJITDylibRef

+ 3 - 3
core/iwasm/compilation/debug/dwarf_extractor.cpp

@@ -152,7 +152,7 @@ dwarf_gen_mock_vm_info(AOTCompContext *comp_ctx)
 
     comp_unit = LLVMDIBuilderCreateCompileUnit(
       comp_ctx->debug_builder, LLVMDWARFSourceLanguageC, file_info,
-      "ant compiler", 12, 0, NULL, 0, 1, NULL, 0, LLVMDWARFEmissionFull, 0, 0,
+      "WAMR AoT compiler", 12, 0, NULL, 0, 1, NULL, 0, LLVMDWARFEmissionFull, 0, 0,
       0, "/", 1, "", 0);
 
     LLVMTypeRef ParamTys[] = {
@@ -208,8 +208,8 @@ dwarf_gen_comp_unit_info(const AOTCompContext *comp_ctx)
 
         comp_unit = LLVMDIBuilderCreateCompileUnit(
             comp_ctx->debug_builder, LLDB_TO_LLVM_LANG_TYPE(lang_type),
-            comp_ctx->debug_file, "ant compiler", 12, 0, NULL, 0, 1, NULL, 0,
-            LLVMDWARFEmissionFull, 0, 0, 0, "/", 1, "", 0);
+            comp_ctx->debug_file, "WAMR AoT compiler", 12, 0, NULL, 0, 1, NULL,
+            0, LLVMDWARFEmissionFull, 0, 0, 0, "/", 1, "", 0);
     }
     return comp_unit;
 }

+ 8 - 4
core/iwasm/compilation/simd/simd_conversions.c

@@ -158,11 +158,15 @@ simd_integer_narrow_common(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         return false;
     }
 
-    /* sat */
+    /* Refer to:
+     * https://github.com/WebAssembly/spec/blob/main/proposals/simd/SIMD.md#integer-to-integer-narrowing
+     * Regardless of the whether the operation is signed or unsigned, the input
+     * lanes are interpreted as signed integers.
+     */
     if (!(vec1 = simd_saturate(comp_ctx, func_ctx, e_sat_i16x8, vec1, min, max,
-                               is_signed))
+                               true))
         || !(vec2 = simd_saturate(comp_ctx, func_ctx, e_sat_i16x8, vec2, min,
-                                  max, is_signed))) {
+                                  max, true))) {
         return false;
     }
 
@@ -740,4 +744,4 @@ aot_compile_simd_i64x2_extmul_i32x4(AOTCompContext *comp_ctx,
 {
     return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed,
                                e_i64x2_extmul_i32x4);
-}
+}

+ 11 - 6
core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp

@@ -4379,13 +4379,18 @@ cmp_r_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
  * @return true if success, false otherwise
  */
 static bool
-cmp_imm_imm_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 data1_src,
-                     int32 data2_src)
+cmp_imm_imm_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int64 data1_src,
+                     int64 data2_src)
 {
-    Imm imm(data1_src);
-    a.mov(regs_i64[REG_I64_FREE_IDX], imm);
-    imm.setValue(data2_src);
-    a.cmp(regs_i64[REG_I64_FREE_IDX], imm);
+    /* imm -> m64 */
+    const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+    x86::Mem mem = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
+                                  offsetof(WASMExecEnv, jit_cache));
+    Imm imm(data2_src);
+    mov_imm_to_m(a, mem, imm, 8);
+
+    a.mov(regs_i64[REG_I64_FREE_IDX], data1_src);
+    a.cmp(regs_i64[REG_I64_FREE_IDX], mem);
     (void)reg_no_dst;
     return true;
 }

+ 5 - 1
core/iwasm/fast-jit/fe/jit_emit_numberic.c

@@ -1066,13 +1066,15 @@ DEF_UNI_INT_CONST_OPS(shru)
 static int32
 do_i32_const_shl(int32 lhs, int32 rhs)
 {
+    rhs &= 31;
     return (int32)((uint32)lhs << (uint32)rhs);
 }
 
 static int64
 do_i64_const_shl(int64 lhs, int64 rhs)
 {
-    return (int32)((uint64)lhs << (uint64)rhs);
+    rhs &= 63LL;
+    return (uint64)lhs << (uint64)rhs;
 }
 
 DEF_BI_INT_CONST_OPS(shrs, >>)
@@ -1080,12 +1082,14 @@ DEF_BI_INT_CONST_OPS(shrs, >>)
 static int32
 do_i32_const_shru(int32 lhs, int32 rhs)
 {
+    rhs &= 31;
     return (uint32)lhs >> rhs;
 }
 
 static int64
 do_i64_const_shru(int64 lhs, int64 rhs)
 {
+    rhs &= 63LL;
     return (uint64)lhs >> rhs;
 }
 

+ 0 - 1
core/iwasm/fast-jit/jit_compiler.h

@@ -70,7 +70,6 @@ typedef struct JitInterpSwitchInfo {
 typedef struct JitCompOptions {
     uint32 code_cache_size;
     uint32 opt_level;
-    bool linux_perf_support;
 } JitCompOptions;
 
 bool

+ 1 - 1
core/iwasm/include/aot_export.h

@@ -58,6 +58,7 @@ typedef struct AOTCompOption {
     bool disable_llvm_lto;
     bool enable_llvm_pgo;
     bool enable_stack_estimation;
+    bool quick_invoke_c_api_import;
     char *use_prof_file;
     uint32_t opt_level;
     uint32_t size_level;
@@ -65,7 +66,6 @@ typedef struct AOTCompOption {
     uint32_t bounds_checks;
     uint32_t stack_bounds_checks;
     uint32_t segue_flags;
-    bool linux_perf_support;
     char **custom_sections;
     uint32_t custom_sections_count;
     const char *stack_usage_file;

+ 15 - 2
core/iwasm/include/wasm_c_api.h

@@ -181,7 +181,8 @@ typedef union MemAllocOption {
 struct wasm_config_t {
     mem_alloc_type_t mem_alloc_type;
     MemAllocOption mem_alloc_option;
-    bool linux_perf_support;
+    uint32_t segue_flags;
+    bool enable_linux_perf;
     /*TODO: wasi args*/
 };
 
@@ -189,7 +190,7 @@ struct wasm_config_t {
  * by default:
  * - mem_alloc_type is Alloc_With_System_Allocator
  * - mem_alloc_option is all 0
- * - linux_perf_support is false
+ * - enable_linux_perf is false
  */
 WASM_API_EXTERN own wasm_config_t* wasm_config_new(void);
 
@@ -200,6 +201,17 @@ wasm_config_set_mem_alloc_opt(wasm_config_t *, mem_alloc_type_t, MemAllocOption
 WASM_API_EXTERN own wasm_config_t*
 wasm_config_set_linux_perf_opt(wasm_config_t *, bool);
 
+/**
+ * Enable using GS register as the base address of linear memory in linux x86_64,
+ * which may speedup the linear memory access for LLVM AOT/JIT:
+ *   bit0 to bit4 denotes i32.load, i64.load, f32.load, f64.load, v128.load
+ *   bit8 to bit12 denotes i32.store, i64.store, f32.store, f64.store, v128.store
+ * For example, 0x01 enables i32.load, 0x0100 enables i32.store.
+ * To enable all load/store operations, use 0x1F1F
+ */
+WASM_API_EXTERN wasm_config_t*
+wasm_config_set_segue_flags(wasm_config_t *config, uint32_t segue_flags);
+
 // Engine
 
 WASM_DECLARE_OWN(engine)
@@ -405,6 +417,7 @@ struct wasm_ref_t;
 
 typedef struct wasm_val_t {
   wasm_valkind_t kind;
+  uint8_t __paddings[7];
   union {
     int32_t i32;
     int64_t i64;

+ 19 - 5
core/iwasm/include/wasm_export.h

@@ -172,12 +172,12 @@ typedef struct RuntimeInitArgs {
     /**
      * If enabled
      * - llvm-jit will output a jitdump file for `perf inject`
-     * - aot. TBD
+     * - aot will output a perf-${pid}.map for `perf record`
      * - fast-jit. TBD
      * - multi-tier-jit. TBD
      * - interpreter. TBD
      */
-    bool linux_perf_support;
+    bool enable_linux_perf;
 } RuntimeInitArgs;
 
 #ifndef WASM_VALKIND_T_DEFINED
@@ -199,6 +199,7 @@ struct wasm_ref_t;
 
 typedef struct wasm_val_t {
     wasm_valkind_t kind;
+    uint8_t __paddings[7];
     union {
         /* also represent a function index */
         int32_t i32;
@@ -212,6 +213,14 @@ typedef struct wasm_val_t {
 } wasm_val_t;
 #endif
 
+typedef enum {
+    WASM_LOG_LEVEL_FATAL = 0,
+    WASM_LOG_LEVEL_ERROR = 1,
+    WASM_LOG_LEVEL_WARNING = 2,
+    WASM_LOG_LEVEL_DEBUG = 3,
+    WASM_LOG_LEVEL_VERBOSE = 4
+} log_level_t;
+
 /**
  * Initialize the WASM runtime environment, and also initialize
  * the memory allocator with system allocator, which calls os_malloc
@@ -234,6 +243,14 @@ wasm_runtime_init(void);
 WASM_RUNTIME_API_EXTERN bool
 wasm_runtime_full_init(RuntimeInitArgs *init_args);
 
+/**
+ * Set the log level. To be called after the runtime is initialized.
+ *
+ * @param level the log level to set
+ */
+WASM_RUNTIME_API_EXTERN void
+wasm_runtime_set_log_level(log_level_t level);
+
 /**
  * Query whether a certain running mode is supported for the runtime
  *
@@ -924,9 +941,6 @@ wasm_runtime_clear_exception(wasm_module_inst_t module_inst);
  *  - Another thread has a copy of `wasm_module_inst_t` of
  *    the module instance and wants to terminate it asynchronously.
  *
- * This function is provided only when WAMR is built with threading enabled.
- * (`WASM_ENABLE_THREAD_MGR=1`)
- *
  * @param module_inst the WASM module instance
  */
 WASM_RUNTIME_API_EXTERN void

+ 4 - 0
core/iwasm/interpreter/wasm.h

@@ -129,6 +129,10 @@ typedef struct WASMType {
     /* Code block to call llvm jit functions of this
        kind of function type from fast jit jitted code */
     void *call_to_llvm_jit_from_fast_jit;
+#endif
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+    /* Quick AOT/JIT entry of this func type */
+    void *quick_aot_entry;
 #endif
     /* types of params and results */
     uint8 types[1];

+ 91 - 48
core/iwasm/interpreter/wasm_interp_classic.c

@@ -2149,11 +2149,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 #if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
     || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
     || WASM_ENABLE_BULK_MEMORY != 0
-#if WASM_ENABLE_THREAD_MGR == 0
-                    linear_mem_size = memory->memory_data_size;
-#else
                     linear_mem_size = GET_LINEAR_MEMORY_SIZE(memory);
-#endif
 #endif
                 }
 
@@ -4113,7 +4109,7 @@ llvm_jit_call_func_bytecode(WASMModuleInstance *module_inst,
     uint32 result_count = func_type->result_count;
     uint32 ext_ret_count = result_count > 1 ? result_count - 1 : 0;
     uint32 func_idx = (uint32)(function - module_inst->e->functions);
-    bool ret;
+    bool ret = false;
 
 #if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
     if (!llvm_jit_alloc_frame(exec_env, function - module_inst->e->functions)) {
@@ -4141,7 +4137,8 @@ llvm_jit_call_func_bytecode(WASMModuleInstance *module_inst,
             if (size > UINT32_MAX
                 || !(argv1 = wasm_runtime_malloc((uint32)size))) {
                 wasm_set_exception(module_inst, "allocate memory failed");
-                return false;
+                ret = false;
+                goto fail;
             }
         }
 
@@ -4165,7 +4162,7 @@ llvm_jit_call_func_bytecode(WASMModuleInstance *module_inst,
         if (!ret) {
             if (argv1 != argv1_buf)
                 wasm_runtime_free(argv1);
-            return ret;
+            goto fail;
         }
 
         /* Get extra result values */
@@ -4199,15 +4196,41 @@ llvm_jit_call_func_bytecode(WASMModuleInstance *module_inst,
 
         if (argv1 != argv1_buf)
             wasm_runtime_free(argv1);
-        return true;
+        ret = true;
     }
     else {
-        ret = wasm_runtime_invoke_native(
-            exec_env, module_inst->func_ptrs[func_idx], func_type, NULL, NULL,
-            argv, argc, argv);
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+        /* Quick call if the quick jit entry is registered */
+        if (func_type->quick_aot_entry) {
+            void (*invoke_native)(
+                void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+                uint32 *argv_ret) = func_type->quick_aot_entry;
+            invoke_native(module_inst->func_ptrs[func_idx],
+                          func_type->result_count > 0
+                              ? func_type->types[func_type->param_count]
+                              : VALUE_TYPE_VOID,
+                          exec_env, argv, argv);
+            ret = !wasm_copy_exception(module_inst, NULL);
+        }
+        else
+#endif
+        {
+            ret = wasm_runtime_invoke_native(
+                exec_env, module_inst->func_ptrs[func_idx], func_type, NULL,
+                NULL, argv, argc, argv);
 
-        return ret && !wasm_copy_exception(module_inst, NULL) ? true : false;
+            if (ret)
+                ret = !wasm_copy_exception(module_inst, NULL);
+        }
     }
+
+fail:
+
+#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
+    llvm_jit_free_frame(exec_env);
+#endif
+
+    return ret;
 }
 #endif /* end of WASM_ENABLE_JIT != 0 */
 
@@ -4216,16 +4239,11 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
                       WASMFunctionInstance *function, uint32 argc,
                       uint32 argv[])
 {
-    WASMRuntimeFrame *prev_frame = wasm_exec_env_get_cur_frame(exec_env);
-    WASMInterpFrame *frame, *outs_area;
+    WASMRuntimeFrame *frame = NULL, *prev_frame, *outs_area;
+    RunningMode running_mode =
+        wasm_runtime_get_running_mode((WASMModuleInstanceCommon *)module_inst);
     /* Allocate sufficient cells for all kinds of return values.  */
-    unsigned all_cell_num =
-        function->ret_cell_num > 2 ? function->ret_cell_num : 2;
-    /* This frame won't be used by JITed code, so only allocate interp
-       frame here.  */
-    unsigned frame_size = wasm_interp_interp_frame_size(all_cell_num);
-    unsigned i;
-    bool copy_argv_from_frame = true;
+    bool alloc_frame = true;
 
     if (argc < function->param_cell_num) {
         char buf[128];
@@ -4248,25 +4266,56 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
     }
 #endif
 
-    if (!(frame = ALLOC_FRAME(exec_env, frame_size, prev_frame)))
-        return;
+    if (!function->is_import_func) {
+        /* No need to alloc frame when calling LLVM JIT function */
+#if WASM_ENABLE_JIT != 0
+        if (running_mode == Mode_LLVM_JIT) {
+            alloc_frame = false;
+        }
+#if WASM_ENABLE_LAZY_JIT != 0 && WASM_ENABLE_FAST_JIT != 0
+        else if (running_mode == Mode_Multi_Tier_JIT) {
+            /* Tier-up from Fast JIT to LLVM JIT, call llvm jit function
+               if it is compiled, else call fast jit function */
+            uint32 func_idx = (uint32)(function - module_inst->e->functions);
+            if (module_inst->module->func_ptrs_compiled
+                    [func_idx - module_inst->module->import_function_count]) {
+                alloc_frame = false;
+            }
+        }
+#endif
+#endif
+    }
 
-    outs_area = wasm_exec_env_wasm_stack_top(exec_env);
-    frame->function = NULL;
-    frame->ip = NULL;
-    /* There is no local variable. */
-    frame->sp = frame->lp + 0;
+    if (alloc_frame) {
+        unsigned all_cell_num =
+            function->ret_cell_num > 2 ? function->ret_cell_num : 2;
+        unsigned frame_size;
 
-    if ((uint8 *)(outs_area->lp + function->param_cell_num)
-        > exec_env->wasm_stack.s.top_boundary) {
-        wasm_set_exception(module_inst, "wasm operand stack overflow");
-        return;
-    }
+        prev_frame = wasm_exec_env_get_cur_frame(exec_env);
+        /* This frame won't be used by JITed code, so only allocate interp
+           frame here.  */
+        frame_size = wasm_interp_interp_frame_size(all_cell_num);
 
-    if (argc > 0)
-        word_copy(outs_area->lp, argv, argc);
+        if (!(frame = ALLOC_FRAME(exec_env, frame_size, prev_frame)))
+            return;
 
-    wasm_exec_env_set_cur_frame(exec_env, frame);
+        outs_area = wasm_exec_env_wasm_stack_top(exec_env);
+        frame->function = NULL;
+        frame->ip = NULL;
+        /* There is no local variable. */
+        frame->sp = frame->lp + 0;
+
+        if ((uint8 *)(outs_area->lp + function->param_cell_num)
+            > exec_env->wasm_stack.s.top_boundary) {
+            wasm_set_exception(module_inst, "wasm operand stack overflow");
+            return;
+        }
+
+        if (argc > 0)
+            word_copy(outs_area->lp, argv, argc);
+
+        wasm_exec_env_set_cur_frame(exec_env, frame);
+    }
 
 #if defined(os_writegsbase)
     {
@@ -4292,9 +4341,6 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
         }
     }
     else {
-        RunningMode running_mode =
-            wasm_runtime_get_running_mode((wasm_module_inst_t)module_inst);
-
         if (running_mode == Mode_Interp) {
             wasm_interp_call_func_bytecode(module_inst, exec_env, function,
                                            frame);
@@ -4308,9 +4354,6 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
         else if (running_mode == Mode_LLVM_JIT) {
             llvm_jit_call_func_bytecode(module_inst, exec_env, function, argc,
                                         argv);
-            /* For llvm jit, the results have been stored in argv,
-               no need to copy them from stack frame again */
-            copy_argv_from_frame = false;
         }
 #endif
 #if WASM_ENABLE_LAZY_JIT != 0 && WASM_ENABLE_FAST_JIT != 0 \
@@ -4323,9 +4366,6 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
                     [func_idx - module_inst->module->import_function_count]) {
                 llvm_jit_call_func_bytecode(module_inst, exec_env, function,
                                             argc, argv);
-                /* For llvm jit, the results have been stored in argv,
-                   no need to copy them from stack frame again */
-                copy_argv_from_frame = false;
             }
             else {
                 fast_jit_call_func_bytecode(module_inst, exec_env, function,
@@ -4346,7 +4386,8 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
 
     /* Output the return value to the caller */
     if (!wasm_copy_exception(module_inst, NULL)) {
-        if (copy_argv_from_frame) {
+        if (alloc_frame) {
+            uint32 i;
             for (i = 0; i < function->ret_cell_num; i++) {
                 argv[i] = *(frame->sp + i - function->ret_cell_num);
             }
@@ -4360,6 +4401,8 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
 #endif
     }
 
-    wasm_exec_env_set_cur_frame(exec_env, prev_frame);
-    FREE_FRAME(exec_env, frame);
+    if (alloc_frame) {
+        wasm_exec_env_set_cur_frame(exec_env, prev_frame);
+        FREE_FRAME(exec_env, frame);
+    }
 }

+ 0 - 4
core/iwasm/interpreter/wasm_interp_fast.c

@@ -1917,11 +1917,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 #if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
     || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
     || WASM_ENABLE_BULK_MEMORY != 0
-#if WASM_ENABLE_THREAD_MGR == 0
-                    linear_mem_size = memory->memory_data_size;
-#else
                     linear_mem_size = GET_LINEAR_MEMORY_SIZE(memory);
-#endif
 #endif
                 }
 

+ 157 - 53
core/iwasm/interpreter/wasm_loader.c

@@ -641,6 +641,10 @@ load_type_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
             type->param_cell_num = (uint16)param_cell_num;
             type->ret_cell_num = (uint16)ret_cell_num;
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+            type->quick_aot_entry = wasm_native_lookup_quick_aot_entry(type);
+#endif
+
             /* If there is already a same type created, use it instead */
             for (j = 0; j < i; j++) {
                 if (wasm_type_equal(type, module->types[j])) {
@@ -2675,8 +2679,12 @@ handle_name_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
                             if (!(module->functions[func_index]->field_name =
                                       const_str_list_insert(
                                           p, func_name_len, module,
-                                          is_load_from_file_buf, error_buf,
-                                          error_buf_size))) {
+#if WASM_ENABLE_WAMR_COMPILER != 0
+                                          false,
+#else
+                                          is_load_from_file_buf,
+#endif
+                                          error_buf, error_buf_size))) {
                                 return false;
                             }
                         }
@@ -2848,7 +2856,7 @@ static bool
 init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
                                uint32 error_buf_size)
 {
-    LLVMJITOptions llvm_jit_options = wasm_runtime_get_llvm_jit_options();
+    LLVMJITOptions *llvm_jit_options = wasm_runtime_get_llvm_jit_options();
     AOTCompOption option = { 0 };
     char *aot_last_error;
     uint64 size;
@@ -2888,11 +2896,11 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
 
     option.is_jit_mode = true;
 
-    llvm_jit_options = wasm_runtime_get_llvm_jit_options();
-    option.opt_level = llvm_jit_options.opt_level;
-    option.size_level = llvm_jit_options.size_level;
-    option.segue_flags = llvm_jit_options.segue_flags;
-    option.linux_perf_support = llvm_jit_options.linux_perf_support;
+    option.opt_level = llvm_jit_options->opt_level;
+    option.size_level = llvm_jit_options->size_level;
+    option.segue_flags = llvm_jit_options->segue_flags;
+    option.quick_invoke_c_api_import =
+        llvm_jit_options->quick_invoke_c_api_import;
 
 #if WASM_ENABLE_BULK_MEMORY != 0
     option.enable_bulk_memory = true;
@@ -4941,6 +4949,9 @@ typedef struct BranchBlock {
     BranchBlockPatch *patch_list;
     /* This is used to save params frame_offset of of if block */
     int16 *param_frame_offsets;
+    /* This is used to store available param num for if/else branch, so the else
+     * opcode can know how many parameters should be copied to the stack */
+    uint32 available_param_num;
 #endif
 
     /* Indicate the operand stack is in polymorphic state.
@@ -6858,15 +6869,18 @@ fail:
  * 1) POP original parameter out;
  * 2) Push and copy original values to dynamic space.
  * The copy instruction format:
- *   Part a: param count
+ *   Part a: available param count
  *   Part b: all param total cell num
  *   Part c: each param's cell_num, src offset and dst offset
  *   Part d: each param's src offset
  *   Part e: each param's dst offset
+ * Note: if the stack is in polymorphic state, the actual copied parameters may
+ * be fewer than the defined number in block type
  */
 static bool
 copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
-                             char *error_buf, uint32 error_buf_size)
+                             uint32 *p_available_param_count, char *error_buf,
+                             uint32 error_buf_size)
 {
     bool ret = false;
     int16 *frame_offset = NULL;
@@ -6878,35 +6892,47 @@ copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
     BlockType *block_type = &block->block_type;
     WASMType *wasm_type = block_type->u.type;
     uint32 param_count = block_type->u.type->param_count;
+    uint32 available_param_count = 0;
     int16 condition_offset = 0;
     bool disable_emit = false;
     int16 operand_offset = 0;
+    uint64 size;
 
-    uint64 size = (uint64)param_count * (sizeof(*cells) + sizeof(*src_offsets));
+    if (is_if_block)
+        condition_offset = *loader_ctx->frame_offset;
+
+    /* POP original parameter out */
+    for (i = 0; i < param_count; i++) {
+        int32 available_stack_cell =
+            (int32)(loader_ctx->stack_cell_num - block->stack_cell_num);
+
+        if (available_stack_cell <= 0 && block->is_stack_polymorphic)
+            break;
+
+        POP_OFFSET_TYPE(wasm_type->types[param_count - i - 1]);
+        wasm_loader_emit_backspace(loader_ctx, sizeof(int16));
+    }
+    available_param_count = i;
+
+    size =
+        (uint64)available_param_count * (sizeof(*cells) + sizeof(*src_offsets));
 
     /* For if block, we also need copy the condition operand offset. */
     if (is_if_block)
         size += sizeof(*cells) + sizeof(*src_offsets);
 
     /* Allocate memory for the emit data */
-    if (!(emit_data = loader_malloc(size, error_buf, error_buf_size)))
+    if ((size > 0)
+        && !(emit_data = loader_malloc(size, error_buf, error_buf_size)))
         return false;
 
     cells = emit_data;
     src_offsets = (int16 *)(cells + param_count);
 
-    if (is_if_block)
-        condition_offset = *loader_ctx->frame_offset;
-
-    /* POP original parameter out */
-    for (i = 0; i < param_count; i++) {
-        POP_OFFSET_TYPE(wasm_type->types[param_count - i - 1]);
-        wasm_loader_emit_backspace(loader_ctx, sizeof(int16));
-    }
     frame_offset = loader_ctx->frame_offset;
 
     /* Get each param's cell num and src offset */
-    for (i = 0; i < param_count; i++) {
+    for (i = 0; i < available_param_count; i++) {
         cell = (uint8)wasm_value_type_cell_num(wasm_type->types[i]);
         cells[i] = cell;
         src_offsets[i] = *frame_offset;
@@ -6916,34 +6942,41 @@ copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
     /* emit copy instruction */
     emit_label(EXT_OP_COPY_STACK_VALUES);
     /* Part a) */
-    emit_uint32(loader_ctx, is_if_block ? param_count + 1 : param_count);
+    emit_uint32(loader_ctx, is_if_block ? available_param_count + 1
+                                        : available_param_count);
     /* Part b) */
     emit_uint32(loader_ctx, is_if_block ? wasm_type->param_cell_num + 1
                                         : wasm_type->param_cell_num);
     /* Part c) */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         emit_byte(loader_ctx, cells[i]);
     if (is_if_block)
         emit_byte(loader_ctx, 1);
 
     /* Part d) */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         emit_operand(loader_ctx, src_offsets[i]);
     if (is_if_block)
         emit_operand(loader_ctx, condition_offset);
 
     /* Part e) */
     /* Push to dynamic space. The push will emit the dst offset. */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         PUSH_OFFSET_TYPE(wasm_type->types[i]);
     if (is_if_block)
         PUSH_OFFSET_TYPE(VALUE_TYPE_I32);
 
+    if (p_available_param_count) {
+        *p_available_param_count = available_param_count;
+    }
+
     ret = true;
 
 fail:
     /* Free the emit data */
-    wasm_runtime_free(emit_data);
+    if (emit_data) {
+        wasm_runtime_free(emit_data);
+    }
 
     return ret;
 }
@@ -7071,7 +7104,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
     uint8 *func_const_end, *func_const = NULL;
     int16 operand_offset = 0;
     uint8 last_op = 0;
-    bool disable_emit, preserve_local = false;
+    bool disable_emit, preserve_local = false, if_condition_available = true;
     float32 f32_const;
     float64 f64_const;
 
@@ -7141,11 +7174,24 @@ re_scan:
                 break;
 
             case WASM_OP_IF:
+            {
 #if WASM_ENABLE_FAST_INTERP != 0
+                BranchBlock *parent_block = loader_ctx->frame_csp - 1;
+                int32 available_stack_cell =
+                    (int32)(loader_ctx->stack_cell_num
+                            - parent_block->stack_cell_num);
+
+                if (available_stack_cell <= 0
+                    && parent_block->is_stack_polymorphic)
+                    if_condition_available = false;
+                else
+                    if_condition_available = true;
+
                 PRESERVE_LOCAL_FOR_BLOCK();
 #endif
                 POP_I32();
                 goto handle_op_block_and_loop;
+            }
             case WASM_OP_BLOCK:
             case WASM_OP_LOOP:
 #if WASM_ENABLE_FAST_INTERP != 0
@@ -7155,6 +7201,9 @@ re_scan:
             {
                 uint8 value_type;
                 BlockType block_type;
+#if WASM_ENABLE_FAST_INTERP != 0
+                uint32 available_params = 0;
+#endif
 
                 p_org = p - 1;
                 CHECK_BUF(p, p_end, 1);
@@ -7196,9 +7245,27 @@ re_scan:
                 /* Pop block parameters from stack */
                 if (BLOCK_HAS_PARAM(block_type)) {
                     WASMType *wasm_type = block_type.u.type;
-                    for (i = 0; i < block_type.u.type->param_count; i++)
+
+                    BranchBlock *cur_block = loader_ctx->frame_csp - 1;
+#if WASM_ENABLE_FAST_INTERP != 0
+                    available_params = block_type.u.type->param_count;
+#endif
+                    for (i = 0; i < block_type.u.type->param_count; i++) {
+
+                        int32 available_stack_cell =
+                            (int32)(loader_ctx->stack_cell_num
+                                    - cur_block->stack_cell_num);
+                        if (available_stack_cell <= 0
+                            && cur_block->is_stack_polymorphic) {
+#if WASM_ENABLE_FAST_INTERP != 0
+                            available_params = i;
+#endif
+                            break;
+                        }
+
                         POP_TYPE(
                             wasm_type->types[wasm_type->param_count - i - 1]);
+                    }
                 }
 
                 PUSH_CSP(LABEL_TYPE_BLOCK + (opcode - WASM_OP_BLOCK),
@@ -7206,25 +7273,35 @@ re_scan:
 
                 /* Pass parameters to block */
                 if (BLOCK_HAS_PARAM(block_type)) {
-                    for (i = 0; i < block_type.u.type->param_count; i++)
+                    for (i = 0; i < block_type.u.type->param_count; i++) {
                         PUSH_TYPE(block_type.u.type->types[i]);
+#if WASM_ENABLE_FAST_INTERP != 0
+                        if (i >= available_params) {
+                            PUSH_OFFSET_TYPE(block_type.u.type->types[i]);
+                        }
+#endif
+                    }
                 }
 
 #if WASM_ENABLE_FAST_INTERP != 0
                 if (opcode == WASM_OP_BLOCK || opcode == WASM_OP_LOOP) {
                     skip_label();
+
                     if (BLOCK_HAS_PARAM(block_type)) {
                         /* Make sure params are in dynamic space */
-                        if (!copy_params_to_dynamic_space(
-                                loader_ctx, false, error_buf, error_buf_size))
+                        if (!copy_params_to_dynamic_space(loader_ctx, false,
+                                                          NULL, error_buf,
+                                                          error_buf_size))
                             goto fail;
                     }
+
                     if (opcode == WASM_OP_LOOP) {
                         (loader_ctx->frame_csp - 1)->code_compiled =
                             loader_ctx->p_code_compiled;
                     }
                 }
                 else if (opcode == WASM_OP_IF) {
+                    BranchBlock *block = loader_ctx->frame_csp - 1;
                     /* If block has parameters, we should make sure they are in
                      * dynamic space. Otherwise, when else branch is missing,
                      * the later opcode may consume incorrect operand offset.
@@ -7242,8 +7319,7 @@ re_scan:
                      * recover them before entering else branch.
                      *
                      */
-                    if (BLOCK_HAS_PARAM(block_type)) {
-                        BranchBlock *block = loader_ctx->frame_csp - 1;
+                    if (if_condition_available && BLOCK_HAS_PARAM(block_type)) {
                         uint64 size;
 
                         /* skip the if condition operand offset */
@@ -7252,7 +7328,8 @@ re_scan:
                         skip_label();
                         /* Emit a copy instruction */
                         if (!copy_params_to_dynamic_space(
-                                loader_ctx, true, error_buf, error_buf_size))
+                                loader_ctx, true, &block->available_param_num,
+                                error_buf, error_buf_size))
                             goto fail;
 
                         /* Emit the if instruction */
@@ -7273,6 +7350,9 @@ re_scan:
                                         - size / sizeof(int16),
                                     (uint32)size);
                     }
+                    else {
+                        block->available_param_num = 0;
+                    }
 
                     emit_empty_label_addr_and_frame_ip(PATCH_ELSE);
                     emit_empty_label_addr_and_frame_ip(PATCH_END);
@@ -7283,7 +7363,8 @@ re_scan:
 
             case WASM_OP_ELSE:
             {
-                BlockType block_type = (loader_ctx->frame_csp - 1)->block_type;
+                BranchBlock *block = NULL;
+                BlockType block_type;
 
                 if (loader_ctx->csp_num < 2
                     || (loader_ctx->frame_csp - 1)->label_type
@@ -7293,13 +7374,15 @@ re_scan:
                         "opcode else found without matched opcode if");
                     goto fail;
                 }
+                block = loader_ctx->frame_csp - 1;
 
                 /* check whether if branch's stack matches its result type */
-                if (!check_block_stack(loader_ctx, loader_ctx->frame_csp - 1,
-                                       error_buf, error_buf_size))
+                if (!check_block_stack(loader_ctx, block, error_buf,
+                                       error_buf_size))
                     goto fail;
 
-                (loader_ctx->frame_csp - 1)->else_addr = p - 1;
+                block->else_addr = p - 1;
+                block_type = block->block_type;
 
 #if WASM_ENABLE_FAST_INTERP != 0
                 /* if the result of if branch is in local or const area, add a
@@ -7320,10 +7403,9 @@ re_scan:
 
 #if WASM_ENABLE_FAST_INTERP != 0
                 /* Recover top param_count values of frame_offset stack */
-                if (BLOCK_HAS_PARAM((block_type))) {
+                if (block->available_param_num) {
                     uint32 size;
-                    BranchBlock *block = loader_ctx->frame_csp - 1;
-                    size = sizeof(int16) * block_type.u.type->param_cell_num;
+                    size = sizeof(int16) * block->available_param_num;
                     bh_memcpy_s(loader_ctx->frame_offset, size,
                                 block->param_frame_offsets, size);
                     loader_ctx->frame_offset += (size / sizeof(int16));
@@ -8062,13 +8144,33 @@ re_scan:
             case WASM_OP_REF_IS_NULL:
             {
 #if WASM_ENABLE_FAST_INTERP != 0
-                if (!wasm_loader_pop_frame_ref_offset(loader_ctx,
-                                                      VALUE_TYPE_FUNCREF,
-                                                      error_buf, error_buf_size)
-                    && !wasm_loader_pop_frame_ref_offset(
-                        loader_ctx, VALUE_TYPE_EXTERNREF, error_buf,
-                        error_buf_size)) {
-                    goto fail;
+                BranchBlock *cur_block = loader_ctx->frame_csp - 1;
+                int32 block_stack_cell_num =
+                    (int32)(loader_ctx->stack_cell_num
+                            - cur_block->stack_cell_num);
+                if (block_stack_cell_num <= 0) {
+                    if (!cur_block->is_stack_polymorphic) {
+                        set_error_buf(
+                            error_buf, error_buf_size,
+                            "type mismatch: expect data but stack was empty");
+                        goto fail;
+                    }
+                }
+                else {
+                    if (*(loader_ctx->frame_ref - 1) == VALUE_TYPE_FUNCREF
+                        || *(loader_ctx->frame_ref - 1) == VALUE_TYPE_EXTERNREF
+                        || *(loader_ctx->frame_ref - 1) == VALUE_TYPE_ANY) {
+                        if (!wasm_loader_pop_frame_ref_offset(
+                                loader_ctx, *(loader_ctx->frame_ref - 1),
+                                error_buf, error_buf_size)) {
+                            goto fail;
+                        }
+                    }
+                    else {
+                        set_error_buf(error_buf, error_buf_size,
+                                      "type mismatch");
+                        goto fail;
+                    }
                 }
 #else
                 if (!wasm_loader_pop_frame_ref(loader_ctx, VALUE_TYPE_FUNCREF,
@@ -8097,10 +8199,11 @@ re_scan:
                     bool func_declared = false;
                     uint32 j;
 
-                    /* Check whether the function is declared in table segs */
+                    /* Check whether the function is declared in table segs,
+                       note that it doesn't matter whether the table seg's mode
+                       is passive, active or declarative. */
                     for (i = 0; i < module->table_seg_count; i++, table_seg++) {
-                        if (table_seg->elem_type == VALUE_TYPE_FUNCREF
-                            && wasm_elem_is_declarative(table_seg->mode)) {
+                        if (table_seg->elem_type == VALUE_TYPE_FUNCREF) {
                             for (j = 0; j < table_seg->function_count; j++) {
                                 if (table_seg->func_indexes[j] == func_idx) {
                                     func_declared = true;
@@ -8365,8 +8468,6 @@ re_scan:
                                         - module->import_global_count]
                               .type;
 
-                POP_TYPE(global_type);
-
 #if WASM_ENABLE_FAST_INTERP == 0
                 if (global_type == VALUE_TYPE_I64
                     || global_type == VALUE_TYPE_F64) {
@@ -8405,6 +8506,9 @@ re_scan:
                 emit_uint32(loader_ctx, global_idx);
                 POP_OFFSET_TYPE(global_type);
 #endif /* end of WASM_ENABLE_FAST_INTERP */
+
+                POP_TYPE(global_type);
+
                 break;
             }
 

+ 149 - 52
core/iwasm/interpreter/wasm_mini_loader.c

@@ -418,6 +418,10 @@ load_type_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
             type->param_cell_num = (uint16)param_cell_num;
             type->ret_cell_num = (uint16)ret_cell_num;
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+            type->quick_aot_entry = wasm_native_lookup_quick_aot_entry(type);
+#endif
+
             /* If there is already a same type created, use it instead */
             for (j = 0; j < i; ++j) {
                 if (wasm_type_equal(type, module->types[j])) {
@@ -1874,10 +1878,11 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
     }
 
     option.is_jit_mode = true;
-    option.opt_level = llvm_jit_options.opt_level;
-    option.size_level = llvm_jit_options.size_level;
-    option.segue_flags = llvm_jit_options.segue_flags;
-    option.linux_perf_support = llvm_jit_options.linux_perf_support;
+    option.opt_level = llvm_jit_options->opt_level;
+    option.size_level = llvm_jit_options->size_level;
+    option.segue_flags = llvm_jit_options->segue_flags;
+    option.quick_invoke_c_api_import =
+        llvm_jit_options->quick_invoke_c_api_import;
 
 #if WASM_ENABLE_BULK_MEMORY != 0
     option.enable_bulk_memory = true;
@@ -3595,6 +3600,9 @@ typedef struct BranchBlock {
     BranchBlockPatch *patch_list;
     /* This is used to save params frame_offset of of if block */
     int16 *param_frame_offsets;
+    /* This is used to store available param num for if/else branch, so the else
+     * opcode can know how many parameters should be copied to the stack */
+    uint32 available_param_num;
 #endif
 
     /* Indicate the operand stack is in polymorphic state.
@@ -5344,16 +5352,20 @@ fail:
  * 1) POP original parameter out;
  * 2) Push and copy original values to dynamic space.
  * The copy instruction format:
- *   Part a: param count
+ *   Part a: available param count
  *   Part b: all param total cell num
  *   Part c: each param's cell_num, src offset and dst offset
  *   Part d: each param's src offset
  *   Part e: each param's dst offset
+ * Note: if the stack is in polymorphic state, the actual copied parameters may
+ * be fewer than the defined number in block type
  */
 static bool
 copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
-                             char *error_buf, uint32 error_buf_size)
+                             uint32 *p_available_param_count, char *error_buf,
+                             uint32 error_buf_size)
 {
+    bool ret = false;
     int16 *frame_offset = NULL;
     uint8 *cells = NULL, cell;
     int16 *src_offsets = NULL;
@@ -5363,35 +5375,47 @@ copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
     BlockType *block_type = &block->block_type;
     WASMType *wasm_type = block_type->u.type;
     uint32 param_count = block_type->u.type->param_count;
+    uint32 available_param_count = 0;
     int16 condition_offset = 0;
     bool disable_emit = false;
     int16 operand_offset = 0;
+    uint64 size;
+
+    if (is_if_block)
+        condition_offset = *loader_ctx->frame_offset;
+
+    /* POP original parameter out */
+    for (i = 0; i < param_count; i++) {
+        int32 available_stack_cell =
+            (int32)(loader_ctx->stack_cell_num - block->stack_cell_num);
+
+        if (available_stack_cell <= 0 && block->is_stack_polymorphic)
+            break;
 
-    uint64 size = (uint64)param_count * (sizeof(*cells) + sizeof(*src_offsets));
+        POP_OFFSET_TYPE(wasm_type->types[param_count - i - 1]);
+        wasm_loader_emit_backspace(loader_ctx, sizeof(int16));
+    }
+    available_param_count = i;
+
+    size =
+        (uint64)available_param_count * (sizeof(*cells) + sizeof(*src_offsets));
 
     /* For if block, we also need copy the condition operand offset. */
     if (is_if_block)
         size += sizeof(*cells) + sizeof(*src_offsets);
 
     /* Allocate memory for the emit data */
-    if (!(emit_data = loader_malloc(size, error_buf, error_buf_size)))
+    if ((size > 0)
+        && !(emit_data = loader_malloc(size, error_buf, error_buf_size)))
         return false;
 
     cells = emit_data;
     src_offsets = (int16 *)(cells + param_count);
 
-    if (is_if_block)
-        condition_offset = *loader_ctx->frame_offset;
-
-    /* POP original parameter out */
-    for (i = 0; i < param_count; i++) {
-        POP_OFFSET_TYPE(wasm_type->types[param_count - i - 1]);
-        wasm_loader_emit_backspace(loader_ctx, sizeof(int16));
-    }
     frame_offset = loader_ctx->frame_offset;
 
     /* Get each param's cell num and src offset */
-    for (i = 0; i < param_count; i++) {
+    for (i = 0; i < available_param_count; i++) {
         cell = (uint8)wasm_value_type_cell_num(wasm_type->types[i]);
         cells[i] = cell;
         src_offsets[i] = *frame_offset;
@@ -5401,37 +5425,43 @@ copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
     /* emit copy instruction */
     emit_label(EXT_OP_COPY_STACK_VALUES);
     /* Part a) */
-    emit_uint32(loader_ctx, is_if_block ? param_count + 1 : param_count);
+    emit_uint32(loader_ctx, is_if_block ? available_param_count + 1
+                                        : available_param_count);
     /* Part b) */
     emit_uint32(loader_ctx, is_if_block ? wasm_type->param_cell_num + 1
                                         : wasm_type->param_cell_num);
     /* Part c) */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         emit_byte(loader_ctx, cells[i]);
     if (is_if_block)
         emit_byte(loader_ctx, 1);
 
     /* Part d) */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         emit_operand(loader_ctx, src_offsets[i]);
     if (is_if_block)
         emit_operand(loader_ctx, condition_offset);
 
     /* Part e) */
     /* Push to dynamic space. The push will emit the dst offset. */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         PUSH_OFFSET_TYPE(wasm_type->types[i]);
     if (is_if_block)
         PUSH_OFFSET_TYPE(VALUE_TYPE_I32);
 
-    /* Free the emit data */
-    wasm_runtime_free(emit_data);
-    return true;
+    if (p_available_param_count) {
+        *p_available_param_count = available_param_count;
+    }
+
+    ret = true;
 
 fail:
     /* Free the emit data */
-    wasm_runtime_free(emit_data);
-    return false;
+    if (emit_data) {
+        wasm_runtime_free(emit_data);
+    }
+
+    return ret;
 }
 #endif
 
@@ -5498,7 +5528,8 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
     uint8 *func_const_end, *func_const = NULL;
     int16 operand_offset = 0;
     uint8 last_op = 0;
-    bool disable_emit, preserve_local = false;
+    bool disable_emit, preserve_local = false, if_condition_available = true;
+    ;
     float32 f32_const;
     float64 f64_const;
 
@@ -5568,11 +5599,23 @@ re_scan:
                 break;
 
             case WASM_OP_IF:
+            {
 #if WASM_ENABLE_FAST_INTERP != 0
+                BranchBlock *parent_block = loader_ctx->frame_csp - 1;
+                int32 available_stack_cell =
+                    (int32)(loader_ctx->stack_cell_num
+                            - parent_block->stack_cell_num);
+
+                if (available_stack_cell <= 0
+                    && parent_block->is_stack_polymorphic)
+                    if_condition_available = false;
+                else
+                    if_condition_available = true;
                 PRESERVE_LOCAL_FOR_BLOCK();
 #endif
                 POP_I32();
                 goto handle_op_block_and_loop;
+            }
             case WASM_OP_BLOCK:
             case WASM_OP_LOOP:
 #if WASM_ENABLE_FAST_INTERP != 0
@@ -5582,6 +5625,9 @@ re_scan:
             {
                 uint8 value_type;
                 BlockType block_type;
+#if WASM_ENABLE_FAST_INTERP != 0
+                uint32 available_params = 0;
+#endif
 
                 p_org = p - 1;
                 value_type = read_uint8(p);
@@ -5612,9 +5658,27 @@ re_scan:
                 /* Pop block parameters from stack */
                 if (BLOCK_HAS_PARAM(block_type)) {
                     WASMType *wasm_type = block_type.u.type;
-                    for (i = 0; i < block_type.u.type->param_count; i++)
+
+                    BranchBlock *cur_block = loader_ctx->frame_csp - 1;
+#if WASM_ENABLE_FAST_INTERP != 0
+                    available_params = block_type.u.type->param_count;
+#endif
+                    for (i = 0; i < block_type.u.type->param_count; i++) {
+
+                        int32 available_stack_cell =
+                            (int32)(loader_ctx->stack_cell_num
+                                    - cur_block->stack_cell_num);
+                        if (available_stack_cell <= 0
+                            && cur_block->is_stack_polymorphic) {
+#if WASM_ENABLE_FAST_INTERP != 0
+                            available_params = i;
+#endif
+                            break;
+                        }
+
                         POP_TYPE(
                             wasm_type->types[wasm_type->param_count - i - 1]);
+                    }
                 }
 
                 PUSH_CSP(LABEL_TYPE_BLOCK + (opcode - WASM_OP_BLOCK),
@@ -5622,8 +5686,14 @@ re_scan:
 
                 /* Pass parameters to block */
                 if (BLOCK_HAS_PARAM(block_type)) {
-                    for (i = 0; i < block_type.u.type->param_count; i++)
+                    for (i = 0; i < block_type.u.type->param_count; i++) {
                         PUSH_TYPE(block_type.u.type->types[i]);
+#if WASM_ENABLE_FAST_INTERP != 0
+                        if (i >= available_params) {
+                            PUSH_OFFSET_TYPE(block_type.u.type->types[i]);
+                        }
+#endif
+                    }
                 }
 
 #if WASM_ENABLE_FAST_INTERP != 0
@@ -5631,8 +5701,9 @@ re_scan:
                     skip_label();
                     if (BLOCK_HAS_PARAM(block_type)) {
                         /* Make sure params are in dynamic space */
-                        if (!copy_params_to_dynamic_space(
-                                loader_ctx, false, error_buf, error_buf_size))
+                        if (!copy_params_to_dynamic_space(loader_ctx, false,
+                                                          NULL, error_buf,
+                                                          error_buf_size))
                             goto fail;
                     }
                     if (opcode == WASM_OP_LOOP) {
@@ -5641,6 +5712,7 @@ re_scan:
                     }
                 }
                 else if (opcode == WASM_OP_IF) {
+                    BranchBlock *block = loader_ctx->frame_csp - 1;
                     /* If block has parameters, we should make sure they are in
                      * dynamic space. Otherwise, when else branch is missing,
                      * the later opcode may consume incorrect operand offset.
@@ -5658,8 +5730,7 @@ re_scan:
                      * recover them before entering else branch.
                      *
                      */
-                    if (BLOCK_HAS_PARAM(block_type)) {
-                        BranchBlock *block = loader_ctx->frame_csp - 1;
+                    if (if_condition_available && BLOCK_HAS_PARAM(block_type)) {
                         uint64 size;
 
                         /* skip the if condition operand offset */
@@ -5668,7 +5739,8 @@ re_scan:
                         skip_label();
                         /* Emit a copy instruction */
                         if (!copy_params_to_dynamic_space(
-                                loader_ctx, true, error_buf, error_buf_size))
+                                loader_ctx, true, &block->available_param_num,
+                                error_buf, error_buf_size))
                             goto fail;
 
                         /* Emit the if instruction */
@@ -5689,6 +5761,9 @@ re_scan:
                                         - size / sizeof(int16),
                                     (uint32)size);
                     }
+                    else {
+                        block->available_param_num = 0;
+                    }
 
                     emit_empty_label_addr_and_frame_ip(PATCH_ELSE);
                     emit_empty_label_addr_and_frame_ip(PATCH_END);
@@ -5699,17 +5774,19 @@ re_scan:
 
             case WASM_OP_ELSE:
             {
+                BranchBlock *block = NULL;
                 BlockType block_type = (loader_ctx->frame_csp - 1)->block_type;
                 bh_assert(loader_ctx->csp_num >= 2
                           && (loader_ctx->frame_csp - 1)->label_type
                                  == LABEL_TYPE_IF);
+                block = loader_ctx->frame_csp - 1;
 
                 /* check whether if branch's stack matches its result type */
-                if (!check_block_stack(loader_ctx, loader_ctx->frame_csp - 1,
-                                       error_buf, error_buf_size))
+                if (!check_block_stack(loader_ctx, block, error_buf,
+                                       error_buf_size))
                     goto fail;
 
-                (loader_ctx->frame_csp - 1)->else_addr = p - 1;
+                block->else_addr = p - 1;
 
 #if WASM_ENABLE_FAST_INTERP != 0
                 /* if the result of if branch is in local or const area, add a
@@ -5730,10 +5807,9 @@ re_scan:
 
 #if WASM_ENABLE_FAST_INTERP != 0
                 /* Recover top param_count values of frame_offset stack */
-                if (BLOCK_HAS_PARAM((block_type))) {
+                if (block->available_param_num) {
                     uint32 size;
-                    BranchBlock *block = loader_ctx->frame_csp - 1;
-                    size = sizeof(int16) * block_type.u.type->param_cell_num;
+                    size = sizeof(int16) * block->available_param_num;
                     bh_memcpy_s(loader_ctx->frame_offset, size,
                                 block->param_frame_offsets, size);
                     loader_ctx->frame_offset += (size / sizeof(int16));
@@ -6356,13 +6432,33 @@ re_scan:
             case WASM_OP_REF_IS_NULL:
             {
 #if WASM_ENABLE_FAST_INTERP != 0
-                if (!wasm_loader_pop_frame_ref_offset(loader_ctx,
-                                                      VALUE_TYPE_FUNCREF,
-                                                      error_buf, error_buf_size)
-                    && !wasm_loader_pop_frame_ref_offset(
-                        loader_ctx, VALUE_TYPE_EXTERNREF, error_buf,
-                        error_buf_size)) {
-                    goto fail;
+                BranchBlock *cur_block = loader_ctx->frame_csp - 1;
+                int32 block_stack_cell_num =
+                    (int32)(loader_ctx->stack_cell_num
+                            - cur_block->stack_cell_num);
+                if (block_stack_cell_num <= 0) {
+                    if (!cur_block->is_stack_polymorphic) {
+                        set_error_buf(
+                            error_buf, error_buf_size,
+                            "type mismatch: expect data but stack was empty");
+                        goto fail;
+                    }
+                }
+                else {
+                    if (*(loader_ctx->frame_ref - 1) == VALUE_TYPE_FUNCREF
+                        || *(loader_ctx->frame_ref - 1) == VALUE_TYPE_EXTERNREF
+                        || *(loader_ctx->frame_ref - 1) == VALUE_TYPE_ANY) {
+                        if (!wasm_loader_pop_frame_ref_offset(
+                                loader_ctx, *(loader_ctx->frame_ref - 1),
+                                error_buf, error_buf_size)) {
+                            goto fail;
+                        }
+                    }
+                    else {
+                        set_error_buf(error_buf, error_buf_size,
+                                      "type mismatch");
+                        goto fail;
+                    }
                 }
 #else
                 if (!wasm_loader_pop_frame_ref(loader_ctx, VALUE_TYPE_FUNCREF,
@@ -6392,10 +6488,11 @@ re_scan:
                     bool func_declared = false;
                     uint32 j;
 
-                    /* Check whether the function is declared in table segs */
+                    /* Check whether the function is declared in table segs,
+                       note that it doesn't matter whether the table seg's mode
+                       is passive, active or declarative. */
                     for (i = 0; i < module->table_seg_count; i++, table_seg++) {
-                        if (table_seg->elem_type == VALUE_TYPE_FUNCREF
-                            && wasm_elem_is_declarative(table_seg->mode)) {
+                        if (table_seg->elem_type == VALUE_TYPE_FUNCREF) {
                             for (j = 0; j < table_seg->function_count; j++) {
                                 if (table_seg->func_indexes[j] == func_idx) {
                                     func_declared = true;
@@ -6630,8 +6727,6 @@ re_scan:
                                         - module->import_global_count]
                               .type;
 
-                POP_TYPE(global_type);
-
 #if WASM_ENABLE_FAST_INTERP == 0
                 if (is_64bit_type(global_type)) {
                     *p_org = WASM_OP_SET_GLOBAL_64;
@@ -6657,6 +6752,8 @@ re_scan:
                 POP_OFFSET_TYPE(global_type);
 #endif /* end of WASM_ENABLE_FAST_INTERP */
 
+                POP_TYPE(global_type);
+
                 (void)is_mutable;
                 break;
             }

+ 23 - 12
core/iwasm/interpreter/wasm_runtime.c

@@ -2271,7 +2271,6 @@ wasm_lookup_table(const WASMModuleInstance *module_inst, const char *name)
 #endif
 
 #ifdef OS_ENABLE_HW_BOUND_CHECK
-
 static void
 call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst,
                               WASMExecEnv *exec_env,
@@ -2301,19 +2300,26 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst,
         return;
     }
 
-    if (exec_env_tls && (exec_env_tls != exec_env)) {
-        wasm_set_exception(module_inst, "invalid exec env");
-        return;
-    }
+    if (!exec_env_tls) {
+        if (!os_thread_signal_inited()) {
+            wasm_set_exception(module_inst, "thread signal env not inited");
+            return;
+        }
 
-    if (!os_thread_signal_inited()) {
-        wasm_set_exception(module_inst, "thread signal env not inited");
-        return;
+        /* Set thread handle and stack boundary if they haven't been set */
+        wasm_exec_env_set_thread_info(exec_env);
+
+        wasm_runtime_set_exec_env_tls(exec_env);
+    }
+    else {
+        if (exec_env_tls != exec_env) {
+            wasm_set_exception(module_inst, "invalid exec env");
+            return;
+        }
     }
 
     wasm_exec_env_push_jmpbuf(exec_env, &jmpbuf_node);
 
-    wasm_runtime_set_exec_env_tls(exec_env);
     if (os_setjmp(jmpbuf_node.jmpbuf) == 0) {
 #ifndef BH_PLATFORM_WINDOWS
         wasm_interp_call_wasm(module_inst, exec_env, function, argc, argv);
@@ -2323,7 +2329,7 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst,
         } __except (wasm_copy_exception(module_inst, NULL)
                         ? EXCEPTION_EXECUTE_HANDLER
                         : EXCEPTION_CONTINUE_SEARCH) {
-            /* exception was thrown in wasm_exception_handler */
+            /* Exception was thrown in wasm_exception_handler */
             ret = false;
         }
         has_exception = wasm_copy_exception(module_inst, exception);
@@ -2377,10 +2383,15 @@ wasm_call_function(WASMExecEnv *exec_env, WASMFunctionInstance *function,
     WASMModuleInstance *module_inst =
         (WASMModuleInstance *)exec_env->module_inst;
 
-    /* set thread handle and stack boundary */
+#ifndef OS_ENABLE_HW_BOUND_CHECK
+    /* Set thread handle and stack boundary */
     wasm_exec_env_set_thread_info(exec_env);
+#else
+    /* Set thread info in call_wasm_with_hw_bound_check when
+       hw bound check is enabled */
+#endif
 
-    /* set exec env so it can be later retrieved from instance */
+    /* Set exec env so it can be later retrieved from instance */
     module_inst->e->common.cur_exec_env = exec_env;
 
     interp_call_wasm(module_inst, exec_env, function, argc, argv);

+ 1 - 1
core/iwasm/interpreter/wasm_runtime.h

@@ -221,8 +221,8 @@ typedef struct CApiFuncImport {
 
 /* The common part of WASMModuleInstanceExtra and AOTModuleInstanceExtra */
 typedef struct WASMModuleInstanceExtraCommon {
-    void *contexts[WASM_MAX_INSTANCE_CONTEXTS];
     CApiFuncImport *c_api_func_imports;
+    void *contexts[WASM_MAX_INSTANCE_CONTEXTS];
     /* pointer to the exec env currently used */
     WASMExecEnv *cur_exec_env;
 #if WASM_CONFIGURABLE_BOUNDS_CHECKS != 0

+ 18 - 3
core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c

@@ -558,6 +558,7 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
     ThreadRoutineArgs *routine_args = NULL;
     uint32 thread_handle;
     uint32 stack_size = 8192;
+    uint32 aux_stack_start = 0, aux_stack_size;
     int32 ret = -1;
 
     bh_assert(module);
@@ -609,10 +610,22 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
     routine_args->info_node = info_node;
     routine_args->module_inst = new_module_inst;
 
+    /* Allocate aux stack previously since exec_env->wait_lock is acquired
+       below, and if the stack is allocated in wasm_cluster_create_thread,
+       runtime may call the exported malloc function to allocate the stack,
+       which acquires exec_env->wait again in wasm_exec_env_set_thread_info,
+       and recursive lock (or hang) occurs */
+    if (!wasm_cluster_allocate_aux_stack(exec_env, &aux_stack_start,
+                                         &aux_stack_size)) {
+        LOG_ERROR("thread manager error: "
+                  "failed to allocate aux stack space for new thread");
+        goto fail;
+    }
+
     os_mutex_lock(&exec_env->wait_lock);
-    ret =
-        wasm_cluster_create_thread(exec_env, new_module_inst, true,
-                                   pthread_start_routine, (void *)routine_args);
+    ret = wasm_cluster_create_thread(
+        exec_env, new_module_inst, true, aux_stack_start, aux_stack_size,
+        pthread_start_routine, (void *)routine_args);
     if (ret != 0) {
         os_mutex_unlock(&exec_env->wait_lock);
         goto fail;
@@ -636,6 +649,8 @@ fail:
         wasm_runtime_free(info_node);
     if (routine_args)
         wasm_runtime_free(routine_args);
+    if (aux_stack_start)
+        wasm_cluster_free_aux_stack(exec_env, aux_stack_start);
     return ret;
 }
 

+ 1 - 1
core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c

@@ -119,7 +119,7 @@ thread_spawn_wrapper(wasm_exec_env_t exec_env, uint32 start_arg)
     thread_start_arg->arg = start_arg;
     thread_start_arg->start_func = start_func;
 
-    ret = wasm_cluster_create_thread(exec_env, new_module_inst, false,
+    ret = wasm_cluster_create_thread(exec_env, new_module_inst, false, 0, 0,
                                      thread_start, thread_start_arg);
     if (ret != 0) {
         LOG_ERROR("Failed to spawn a new thread");

+ 1 - 1
core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c

@@ -1047,7 +1047,7 @@ execute_interruptible_poll_oneoff(
 
         if (wasm_cluster_is_thread_terminated(exec_env)) {
             wasm_runtime_free(in_copy);
-            return EINTR;
+            return __WASI_EINTR;
         }
         else if (*nevents > 0) {
             all_outs_are_type_clock = true;

+ 3 - 2
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h

@@ -41,7 +41,8 @@
 #endif
 
 #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__EMSCRIPTEN__) \
-    && !defined(ESP_PLATFORM) && !defined(DISABLE_CLOCK_NANOSLEEP)
+    && !defined(ESP_PLATFORM) && !defined(DISABLE_CLOCK_NANOSLEEP)           \
+    && !defined(BH_PLATFORM_FREERTOS)
 #define CONFIG_HAS_CLOCK_NANOSLEEP 1
 #else
 #define CONFIG_HAS_CLOCK_NANOSLEEP 0
@@ -54,7 +55,7 @@
 #endif
 
 #if !defined(__APPLE__) && !defined(BH_PLATFORM_LINUX_SGX) && !defined(_WIN32) \
-    && !defined(__COSMOPOLITAN__)
+    && !defined(__COSMOPOLITAN__) && !defined(BH_PLATFORM_FREERTOS)
 #define CONFIG_HAS_PTHREAD_CONDATTR_SETCLOCK 1
 #else
 #define CONFIG_HAS_PTHREAD_CONDATTR_SETCLOCK 0

+ 63 - 17
core/iwasm/libraries/thread-mgr/thread_manager.c

@@ -4,6 +4,7 @@
  */
 
 #include "thread_manager.h"
+#include "../common/wasm_c_api_internal.h"
 
 #if WASM_ENABLE_INTERP != 0
 #include "../interpreter/wasm_runtime.h"
@@ -208,6 +209,33 @@ free_aux_stack(WASMExecEnv *exec_env, uint32 start)
 #endif
 }
 
+bool
+wasm_cluster_allocate_aux_stack(WASMExecEnv *exec_env, uint32 *p_start,
+                                uint32 *p_size)
+{
+    WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
+    bool ret;
+
+    os_mutex_lock(&cluster->lock);
+    ret = allocate_aux_stack(exec_env, p_start, p_size);
+    os_mutex_unlock(&cluster->lock);
+
+    return ret;
+}
+
+bool
+wasm_cluster_free_aux_stack(WASMExecEnv *exec_env, uint32 start)
+{
+    WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
+    bool ret;
+
+    os_mutex_lock(&cluster->lock);
+    ret = free_aux_stack(exec_env, start);
+    os_mutex_unlock(&cluster->lock);
+
+    return ret;
+}
+
 WASMCluster *
 wasm_cluster_create(WASMExecEnv *exec_env)
 {
@@ -343,6 +371,10 @@ wasm_cluster_destroy(WASMCluster *cluster)
     wasm_debug_instance_destroy(cluster);
 #endif
 
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
+    bh_vector_destroy(&cluster->exception_frames);
+#endif
+
     wasm_runtime_free(cluster);
 }
 
@@ -654,12 +686,13 @@ thread_manager_start_routine(void *arg)
 
 int32
 wasm_cluster_create_thread(WASMExecEnv *exec_env,
-                           wasm_module_inst_t module_inst, bool alloc_aux_stack,
+                           wasm_module_inst_t module_inst,
+                           bool is_aux_stack_allocated, uint32 aux_stack_start,
+                           uint32 aux_stack_size,
                            void *(*thread_routine)(void *), void *arg)
 {
     WASMCluster *cluster;
     WASMExecEnv *new_exec_env;
-    uint32 aux_stack_start = 0, aux_stack_size;
     korp_tid tid;
 
     cluster = wasm_exec_env_get_cluster(exec_env);
@@ -676,17 +709,11 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
     if (!new_exec_env)
         goto fail1;
 
-    if (alloc_aux_stack) {
-        if (!allocate_aux_stack(exec_env, &aux_stack_start, &aux_stack_size)) {
-            LOG_ERROR("thread manager error: "
-                      "failed to allocate aux stack space for new thread");
-            goto fail2;
-        }
-
+    if (is_aux_stack_allocated) {
         /* Set aux stack for current thread */
         if (!wasm_exec_env_set_aux_stack(new_exec_env, aux_stack_start,
                                          aux_stack_size)) {
-            goto fail3;
+            goto fail2;
         }
     }
     else {
@@ -699,7 +726,7 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
     new_exec_env->suspend_flags.flags = exec_env->suspend_flags.flags;
 
     if (!wasm_cluster_add_exec_env(cluster, new_exec_env))
-        goto fail3;
+        goto fail2;
 
     new_exec_env->thread_start_routine = thread_routine;
     new_exec_env->thread_arg = arg;
@@ -711,7 +738,7 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
                             (void *)new_exec_env,
                             APP_THREAD_STACK_SIZE_DEFAULT)) {
         os_mutex_unlock(&new_exec_env->wait_lock);
-        goto fail4;
+        goto fail3;
     }
 
     /* Wait until the new_exec_env->handle is set to avoid it is
@@ -723,12 +750,8 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
 
     return 0;
 
-fail4:
-    wasm_cluster_del_exec_env_internal(cluster, new_exec_env, false);
 fail3:
-    /* free the allocated aux stack space */
-    if (alloc_aux_stack)
-        free_aux_stack(exec_env, aux_stack_start);
+    wasm_cluster_del_exec_env_internal(cluster, new_exec_env, false);
 fail2:
     wasm_exec_env_destroy_internal(new_exec_env);
 fail1:
@@ -1303,6 +1326,29 @@ wasm_cluster_set_exception(WASMExecEnv *exec_env, const char *exception)
     data.exception = exception;
 
     os_mutex_lock(&cluster->lock);
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
+    if (has_exception) {
+        /* Save the stack frames of the crashed thread into the cluster */
+        WASMModuleInstance *module_inst =
+            (WASMModuleInstance *)get_module_inst(exec_env);
+
+#if WASM_ENABLE_INTERP != 0
+        if (module_inst->module_type == Wasm_Module_Bytecode
+            && wasm_interp_create_call_stack(exec_env)) {
+            wasm_frame_vec_clone_internal(module_inst->frames,
+                                          &cluster->exception_frames);
+        }
+#endif
+
+#if WASM_ENABLE_AOT != 0
+        if (module_inst->module_type == Wasm_Module_AoT
+            && aot_create_call_stack(exec_env)) {
+            wasm_frame_vec_clone_internal(module_inst->frames,
+                                          &cluster->exception_frames);
+        }
+#endif
+    }
+#endif /* WASM_ENABLE_DUMP_CALL_STACK != 0 */
     cluster->has_exception = has_exception;
     traverse_list(&cluster->exec_env_list, set_exception_visitor, &data);
     os_mutex_unlock(&cluster->lock);

+ 17 - 1
core/iwasm/libraries/thread-mgr/thread_manager.h

@@ -51,6 +51,13 @@ struct WASMCluster {
 #if WASM_ENABLE_DEBUG_INTERP != 0
     WASMDebugInstance *debug_inst;
 #endif
+
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
+    /* When an exception occurs in a thread, the stack frames of that thread are
+     * saved into the cluster
+     */
+    Vector exception_frames;
+#endif
 };
 
 void
@@ -81,7 +88,9 @@ wasm_cluster_dup_c_api_imports(WASMModuleInstanceCommon *module_inst_dst,
 
 int32
 wasm_cluster_create_thread(WASMExecEnv *exec_env,
-                           wasm_module_inst_t module_inst, bool alloc_aux_stack,
+                           wasm_module_inst_t module_inst,
+                           bool is_aux_stack_allocated, uint32 aux_stack_start,
+                           uint32 aux_stack_size,
                            void *(*thread_routine)(void *), void *arg);
 
 int32
@@ -221,6 +230,13 @@ wasm_cluster_traverse_lock(WASMExecEnv *exec_env);
 void
 wasm_cluster_traverse_unlock(WASMExecEnv *exec_env);
 
+bool
+wasm_cluster_allocate_aux_stack(WASMExecEnv *exec_env, uint32 *p_start,
+                                uint32 *p_size);
+
+bool
+wasm_cluster_free_aux_stack(WASMExecEnv *exec_env, uint32 start);
+
 #ifdef __cplusplus
 }
 #endif

+ 46 - 1
core/shared/platform/common/freertos/freertos_thread.c

@@ -205,7 +205,6 @@ os_thread_wrapper(void *arg)
 
     thread_data->start_routine(thread_data->arg);
     os_thread_cleanup();
-    vTaskDelete(NULL);
 }
 
 int
@@ -301,6 +300,22 @@ os_thread_join(korp_tid thread, void **value_ptr)
     return BHT_OK;
 }
 
+int
+os_thread_detach(korp_tid thread)
+{
+    /* Do nothing */
+    (void)thread;
+    return BHT_OK;
+}
+
+void
+os_thread_exit(void *retval)
+{
+    (void)retval;
+    os_thread_cleanup();
+    vTaskDelete(NULL);
+}
+
 int
 os_mutex_init(korp_mutex *mutex)
 {
@@ -452,3 +467,33 @@ os_cond_signal(korp_cond *cond)
 
     return BHT_OK;
 }
+
+int
+os_cond_broadcast(korp_cond *cond)
+{
+    /* Signal all of the wait node of wait list */
+    xSemaphoreTake(cond->wait_list_lock, portMAX_DELAY);
+    if (cond->thread_wait_list) {
+        os_thread_wait_node *p = cond->thread_wait_list;
+        while (p) {
+            xSemaphoreGive(p->sem);
+            p = p->next;
+        }
+    }
+    xSemaphoreGive(cond->wait_list_lock);
+
+    return BHT_OK;
+}
+
+uint8 *
+os_thread_get_stack_boundary()
+{
+    /* TODO: get freertos stack boundary */
+    return NULL;
+}
+
+void
+os_thread_jit_write_protect_np(bool enabled)
+{
+    (void)enabled;
+}

+ 18 - 5
core/shared/platform/common/posix/posix_memmap.c

@@ -5,7 +5,7 @@
 
 #include "platform_api_vmcore.h"
 
-#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__)
+#if defined(__APPLE__) || defined(__MACH__)
 #include <libkern/OSCacheControl.h>
 #endif
 
@@ -40,7 +40,8 @@ void *
 os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
 {
     int map_prot = PROT_NONE;
-#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__)
+#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__) \
+    && defined(TARGET_OS_OSX) && TARGET_OS_OSX != 0
     int map_flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_JIT;
 #else
     int map_flags = MAP_ANONYMOUS | MAP_PRIVATE;
@@ -77,15 +78,19 @@ os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
         map_prot |= PROT_EXEC;
 
 #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
-#ifndef __APPLE__
     if (flags & MMAP_MAP_32BIT)
         map_flags |= MAP_32BIT;
-#endif
 #endif
 
     if (flags & MMAP_MAP_FIXED)
         map_flags |= MAP_FIXED;
 
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+#if defined(__APPLE__)
+retry_without_map_32bit:
+#endif
+#endif
+
 #if defined(BUILD_TARGET_RISCV64_LP64D) || defined(BUILD_TARGET_RISCV64_LP64)
     /* As AOT relocation in RISCV64 may require that the code/data mapped
      * is in range 0 to 2GB, we try to map the memory with hint address
@@ -143,6 +148,14 @@ os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
     }
 
     if (addr == MAP_FAILED) {
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+#if defined(__APPLE__)
+        if ((map_flags & MAP_32BIT) != 0) {
+            map_flags &= ~MAP_32BIT;
+            goto retry_without_map_32bit;
+        }
+#endif
+#endif
 #if BH_ENABLE_TRACE_MMAP != 0
         os_printf("mmap failed\n");
 #endif
@@ -263,7 +276,7 @@ os_dcache_flush(void)
 void
 os_icache_flush(void *start, size_t len)
 {
-#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__)
+#if defined(__APPLE__) || defined(__MACH__)
     sys_icache_invalidate(start, len);
 #endif
 }

+ 2 - 1
core/shared/platform/common/posix/posix_thread.c

@@ -476,7 +476,8 @@ os_thread_get_stack_boundary()
 void
 os_thread_jit_write_protect_np(bool enabled)
 {
-#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__)
+#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__) \
+    && defined(TARGET_OS_OSX) && TARGET_OS_OSX != 0
     pthread_jit_write_protect_np(enabled);
 #endif
 }

+ 3 - 0
core/shared/platform/esp-idf/shared_platform.cmake

@@ -10,6 +10,9 @@ include_directories(${PLATFORM_SHARED_DIR}/../include)
 
 file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c)
 
+include (${CMAKE_CURRENT_LIST_DIR}/../common/libc-util/platform_common_libc_util.cmake)
+set (source_all ${source_all} ${PLATFORM_COMMON_LIBC_UTIL_SOURCE})
+
 set (PLATFORM_SHARED_SOURCE ${source_all} ${PLATFORM_COMMON_MATH_SOURCE})
 
 # If enable PSRAM of ESP32-S3, it had better to put AOT into PSRAM, so that

+ 12 - 0
core/shared/platform/linux-sgx/sgx_platform.c

@@ -119,6 +119,18 @@ strcpy(char *dest, const char *src)
     return dest;
 }
 
+#if WASM_ENABLE_LIBC_WASI == 0
+bool
+os_is_handle_valid(os_file_handle *handle)
+{
+    assert(handle != NULL);
+
+    return *handle > -1;
+}
+#else
+/* implemented in posix_file.c */
+#endif
+
 void *
 os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
 {

+ 2 - 1
core/shared/platform/linux-sgx/sgx_socket.c

@@ -5,10 +5,11 @@
 
 #include "platform_api_vmcore.h"
 #include "platform_api_extension.h"
-#include "libc_errno.h"
 
 #ifndef SGX_DISABLE_WASI
 
+#include "libc_errno.h"
+
 #define TRACE_OCALL_FAIL() os_printf("ocall %s failed!\n", __FUNCTION__)
 
 /** OCALLs prototypes **/

+ 2 - 0
core/shared/utils/bh_log.c

@@ -43,7 +43,9 @@ bh_log(LogLevel log_level, const char *file, int line, const char *fmt, ...)
              "%02" PRIu32 ":%02" PRIu32 ":%02" PRIu32 ":%03" PRIu32, h, m, s,
              mills);
 
+#ifndef BH_VPRINTF
     os_printf("[%s - %" PRIXPTR "]: ", buf, (uintptr_t)self);
+#endif
 
     if (file)
         os_printf("%s, line %d, ", file, line);

+ 1 - 1
core/version.h

@@ -7,5 +7,5 @@
 #define _WAMR_VERSION_H_
 #define WAMR_VERSION_MAJOR 1
 #define WAMR_VERSION_MINOR 3
-#define WAMR_VERSION_PATCH 0
+#define WAMR_VERSION_PATCH 1
 #endif

+ 3 - 3
doc/build_wamr.md

@@ -1,7 +1,7 @@
 
 # Build WAMR vmcore
 
-WAMR vmcore is a set of runtime libraries for loading and running Wasm modules. This document introduces how to build the WAMR vmcore.  
+WAMR vmcore is a set of runtime libraries for loading and running Wasm modules. This document introduces how to build the WAMR vmcore.
 
 References:
 - [how to build iwasm](../product-mini/README.md): building different target platforms such as Linux, Windows, Mac etc
@@ -130,7 +130,7 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM
 > Note: if it is enabled, the call stack will be dumped when exception occurs.
 
 > - For interpreter mode, the function names are firstly extracted from *custom name section*, if this section doesn't exist or the feature is not enabled, then the name will be extracted from the import/export sections
-> - For AOT/JIT mode, the function names are extracted from import/export section, please export as many functions as possible (for `wasi-sdk` you can use `-Wl,--export-all`) when compiling wasm module, and add `--enable-dump-call-stack` option to wamrc during compiling AOT module.
+> - For AOT/JIT mode, the function names are extracted from import/export section, please export as many functions as possible (for `wasi-sdk` you can use `-Wl,--export-all`) when compiling wasm module, and add `--enable-dump-call-stack --emit-custom-sections=name` option to wamrc during compiling AOT module.
 
 #### **Enable memory profiling (Experiment)**
 - **WAMR_BUILD_MEMORY_PROFILING**=1/0, default to disable if not set
@@ -201,7 +201,7 @@ Currently we only profile the memory consumption of module, module_instance and
 
 > Note: If `WAMR_BUILD_CUSTOM_NAME_SECTION` is enabled, then the `custom name section` will be treated as a special section and consumed by the runtime, not available to the embedder.
 
-> For AoT file, must use `--emit-custom-sections` to specify which sections need to be emitted into AoT file, otherwise all custom sections (except custom name section) will be ignored.
+> For AoT file, must use `--emit-custom-sections` to specify which sections need to be emitted into AoT file, otherwise all custom sections will be ignored.
 
 ### **Stack guard size**
 - **WAMR_BUILD_STACK_GUARD_SIZE**=n, default to N/A if not set.

+ 14 - 0
doc/build_wasm_app.md

@@ -372,6 +372,20 @@ Examples: wamrc -o test.aot test.wasm
           wamrc --target=i386 --format=object -o test.o test.wasm
 ```
 
+## AoT-compiled module compatibility among WAMR versions
+
+When making major ABI changes for AoT-compiled modules, we bump
+`AOT_CURRENT_VERSION` constant in `core/config.h` header.
+The runtime rejects to load a module AoT-compiled with wamrc with
+a different `AOT_CURRENT_VERSION`.
+
+We try our best to maintain our runtime ABI for AoT-compiled modules
+compatible among WAMR versions with the same `AOT_CURRENT_VERSION`
+so that combinations of older wamrc and newer runtime usually work.
+However, there might be minor incompatibilities time to time.
+For productions, we recommend to use the exactly same version of
+wamrc and the runtime.
+
 ## AoT compilation with 3rd-party toolchains
 
 `wamrc` uses LLVM to compile wasm bytecode to AoT file, this works for most of the architectures, but there may be circumstances where you want to use 3rd-party toolchains to take over some steps of the compilation pipeline, e.g.

+ 14 - 1
doc/perf_tune.md

@@ -134,7 +134,7 @@ $ perf report --input=perf.data
 >
 > For example, with EMCC, you can add `-g2`.
 >
-> If not able to get the context of the custom name section, WAMR will use `aot_func#N` to represent the function name. `N` is from 0. `aot_func#0` represents the first *not imported wasm function*.
+> If not able to get the context of the custom name section, WAMR will use `aot_func#N` to represent the function name. `N` is from 0. `aot_func#0` represents the first _not imported wasm function_.
 
 ### 7.1 Flamegraph
 
@@ -177,3 +177,16 @@ $ ./FlameGraph/flamegraph.pl out.folded > perf.foo.wasm.svg
 > # only jitted functions
 > $ grep "wasm_runtime_invoke_native" out.folded | ./FlameGraph/flamegraph.pl > perf.foo.wasm.only.svg
 > ```
+
+> [!TIP]
+> use [trans_wasm_func_name.py](../test-tools/trans-jitted-func-name/trans_wasm_func_name.py) to translate jitted function
+> names to its original wasm function names. It requires _wasm-objdump_ in _wabt_ and _name section_ in the .wasm file
+>
+> The input file is the output of `./FlameGraph/stackcollapse-perf.pl`.
+>
+> ```bash
+> python trans_wasm_func_name.py --wabt_home <wabt-installation> --folded out.folded <.wasm>
+> ```
+>
+> Then you will see a new file named _out.folded.translated_ which contains the translated folded stacks.
+> All wasm functions are translated to its original names with a prefix like "[Wasm]"

+ 2 - 206
doc/source_debugging.md

@@ -1,13 +1,5 @@
 # WAMR source debugging
 
-References:
-- [Blog: WAMR source debugging basic](https://bytecodealliance.github.io/wamr.dev/blog/wamr-source-debugging-basic/)  
-- [Blog: Debugging wasm with VSCode](https://bytecodealliance.github.io/wamr.dev/blog/debugging-wasm-with-vscode/)
-
-WAMR supports source level debugging based on DWARF (normally used in C/C++/Rust), source map (normally used in AssemblyScript) is not supported.
-
-**The lldb's ability to debug wasm application is based on the patch [Add class WasmProcess for WebAssembly debugging](https://reviews.llvm.org/D78801). Thanks very much to the author @paolosev for such a great work!**
-
 ## Build wasm application with debug information
 To debug your application, you need to compile them with debug information. You can use `-g` option when compiling the source code if you are using wasi-sdk (also work for emcc and rustc):
 ``` bash
@@ -20,205 +12,9 @@ llvm-dwarfdump-12 test.wasm
 ```
 
 ## Debugging with interpreter
-1. Install dependent libraries
-``` bash
-apt update && apt install cmake make g++ libxml2-dev -y
-```
-
-2. Build iwasm with source debugging feature
-``` bash
-cd ${WAMR_ROOT}/product-mini/platforms/linux
-mkdir build && cd build
-cmake .. -DWAMR_BUILD_DEBUG_INTERP=1
-make
-```
-> Note: On MacOS M1 environment, pass the additional `-DWAMR_DISABLE_HW_BOUND_CHECK=1` cmake configuration.
-
-3. Execute iwasm with debug engine enabled
-``` bash
-iwasm -g=127.0.0.1:1234 test.wasm
-# Use port = 0 to allow a random assigned debug port
-```
-
-4. Build customized lldb
-``` bash
-git clone --branch release/13.x --depth=1 https://github.com/llvm/llvm-project
-cd llvm-project
-git apply ${WAMR_ROOT}/build-scripts/lldb_wasm.patch
-mkdir build-lldb
-cmake -S ./llvm -B build-lldb \
-    -G Ninja \
-    -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS="clang;lldb" \
-    -DLLVM_TARGETS_TO_BUILD:STRING="X86;WebAssembly" \
-    -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DLLVM_BUILD_BENCHMARKS:BOOL=OFF \
-    -DLLVM_BUILD_DOCS:BOOL=OFF  -DLLVM_BUILD_EXAMPLES:BOOL=OFF  \
-    -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF  -DLLVM_BUILD_TESTS:BOOL=OFF  \
-    -DLLVM_ENABLE_BINDINGS:BOOL=OFF  -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF  \
-    -DLLVM_INCLUDE_DOCS:BOOL=OFF  -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF  \
-    -DLLVM_INCLUDE_TESTS:BOOL=OFF -DLLVM_ENABLE_LIBXML2:BOOL=ON
-cmake --build build-lldb --target lldb --parallel $(nproc)
-# The lldb is generated under build-lldb/bin/lldb
-```
-> Note: If using `CommandLineTools` on MacOS, make sure only one SDK is present in `/Library/Developer/CommandLineTools/SDKs`.
-
-> You can download pre-built `wamr-lldb` binaries from [here](https://github.com/bytecodealliance/wasm-micro-runtime/releases).
 
-5. Launch customized lldb and connect to iwasm
-``` bash
-lldb
-(lldb) process connect -p wasm connect://127.0.0.1:1234
-```
-Then you can use lldb commands to debug your applications. Please refer to [lldb document](https://lldb.llvm.org/use/tutorial.html) for command usage.
+See [Debuggging with interpreter](source_debugging_interpreter.md).
 
 ## Debugging with AOT
 
-> Note: AOT debugging is experimental and only a few debugging capabilities are supported.
-
-1. Build lldb (assume you have already built llvm)
-``` bash
-cd ${WAMR_ROOT}/core/deps/llvm/build
-cmake ../llvm -DLLVM_ENABLE_PROJECTS="clang;lldb" -DLLDB_INCLUDE_TESTS=OFF
-make -j $(nproc)
-```
-
-2. Build wamrc with debugging feature
-``` bash
-cd ${WAMR_ROOT}/wamr-compiler
-mkdir build && cd build
-cmake .. -DWAMR_BUILD_DEBUG_AOT=1
-make -j $(nproc)
-```
-
-3. Build iwasm with debugging feature
-``` bash
-cd ${WAMR_ROOT}/product-mini/platforms/linux
-mkdir build && cd build
-cmake .. -DWAMR_BUILD_DEBUG_AOT=1
-make
-```
-
-4. Compile wasm module to AOT module
-``` bash
-wamrc -o test.aot test.wasm
-```
-
-5. Execute iwasm using lldb
-
-   Then you can use lldb commands to debug both wamr runtime and your wasm application in ***current terminal***.
-
-   ``` bash
-   % lldb iwasm -- test.aot
-   (lldb) target create "iwasm"
-   Current executable set to 'iwasm' (x86_64).
-   (lldb) settings set -- target.run-args  "test.aot"
-   (lldb) settings set plugin.jit-loader.gdb.enable on
-   (lldb) b main
-   Breakpoint 1: where = iwasm`main + 48 at main.c:294:11, address = 0x0000000100001020
-   (lldb) run
-   Process 27954 launched: '/tmp/bin/iwasm' (x86_64)
-   Process 27954 stopped
-   * thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
-       frame #0: 0x0000000100001020 iwasm`main(argc=2, argv=0x00007ff7bfeff678) at main.c:294:11
-      291  int
-      292  main(int argc, char *argv[])
-      293  {
-   -> 294      int32 ret = -1;
-      295      char *wasm_file = NULL;
-      296      const char *func_name = NULL;
-      297      uint8 *wasm_file_buf = NULL;
-   Target 0: (iwasm) stopped.
-   (lldb) c
-   Process 27954 resuming
-   1 location added to breakpoint 1
-   error: need to add support for DW_TAG_base_type 'void' encoded with DW_ATE = 0x0, bit_size = 0
-   Process 27954 stopped
-   * thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.2
-       frame #0: 0x00000001002980a0 JIT(0x100298004)`main(exenv=0x0000000301808200) at hello.c:6:9
-      3    int
-      4    main(void)
-      5    {
-   -> 6            printf("hello\n");
-      7
-      8            return 0;
-      9    }
-   Target 0: (iwasm) stopped.
-   (lldb) br l
-   Current breakpoints:
-   1: name = 'main', locations = 2, resolved = 2, hit count = 2
-     1.1: where = iwasm`main + 48 at main.c:294:11, address = 0x0000000100001020, resolved, hit count = 1
-     1.2: where = JIT(0x100298004)`main + 12 at hello.c:6:9, address = 0x00000001002980a0, resolved, hit count = 1
-
-   (lldb)
-   ```
-
-   * In the above example,
-
-     * The first `main` function, which is in `main.c`, is the main
-       function of the iwasm command.
-
-     * The second `main` function, which is in `hello.c`, is the main
-       function of the AOT-compiled wasm module.
-
-   * WAMR AOT debugging uses the GDB JIT loader mechanism to load
-     the debug info of the debugee module.
-     On some platforms including macOS, you need to enable it explicitly.
-     (`settings set plugin.jit-loader.gdb.enable on`)
-
-     References:
-
-     * https://github.com/llvm/llvm-project/blob/main/llvm/docs/DebuggingJITedCode.rst
-     * https://sourceware.org/gdb/current/onlinedocs/gdb/JIT-Interface.html
-
-## Enable debugging in embedders (for interpreter)
-
-There are three steps to enable debugging in embedders
-
-1. Set the debug parameters when initializing the runtime environment:
-    ``` c
-    RuntimeInitArgs init_args;
-    memset(&init_args, 0, sizeof(RuntimeInitArgs));
-
-    /* ... */
-    strcpy(init_args.ip_addr, "127.0.0.1");
-    init_args.instance_port = 1234;
-    /*
-    * Or set port to 0 to use a port assigned by os
-    * init_args.instance_port = 0;
-    */
-
-    if (!wasm_runtime_full_init(&init_args)) {
-        return false;
-    }
-    ```
-
-2. Use `wasm_runtime_start_debug_instance` to create the debug instance:
-    ``` c
-    /*
-        initialization, loading and instantiating
-        ...
-    */
-    exec_env = wasm_runtime_create_exec_env(module_inst, stack_size);
-    uint32_t debug_port = wasm_runtime_start_debug_instance(exec_env);
-    ```
-
-3. Enable source debugging features during building
-
-    You can use `-DWAMR_BUILD_DEBUG_INTERP=1` during cmake configuration
-
-    Or you can set it directly in `cmake` files:
-    ``` cmake
-    set (WAMR_BUILD_DEBUG_INTERP 1)
-    ```
-
-### Attentions
-- Debugging `multi-thread wasm module` is not supported, if your wasm module use pthread APIs (see [pthread_library.md](./pthread_library.md)), or the embedder use `wasm_runtime_spawn_thread` to create new wasm threads, then there may be **unexpected behaviour** during debugging.
-
-    > Note: This attention is about "wasm thread" rather than native threads. Executing wasm functions in several different native threads will **not** affect the normal behaviour of debugging feature.
-
-- When using source debugging features, **don't** create multiple `wasm_instance` from the same `wasm_module`, because the debugger may change the bytecode (set/unset breakpoints) of the `wasm_module`. If you do need several instance from the same bytecode, you need to copy the bytecode to a new butter, then load a new `wasm_module`, and then instantiate the new wasm module to get the new instance.
-
-- If you are running `lldb` on non-linux platforms, please use `platform select remote-linux` command in lldb before connecting to the runtime:
-    ```
-    (lldb) platform select remote-linux
-    (lldb) process connect -p wasm connect://127.0.0.1:1234
-    ```
+See [Debuggging with AOT](source_debugging_aot.md).

+ 100 - 0
doc/source_debugging_aot.md

@@ -0,0 +1,100 @@
+# WAMR source debugging (AOT)
+
+## Debugging with AOT
+
+> Note: AOT debugging is experimental and only a few debugging capabilities are supported.
+
+1. Build lldb (assume you have already built llvm)
+``` bash
+cd ${WAMR_ROOT}/core/deps/llvm/build
+cmake ../llvm -DLLVM_ENABLE_PROJECTS="clang;lldb" -DLLDB_INCLUDE_TESTS=OFF
+make -j $(nproc)
+```
+
+2. Build wamrc with debugging feature
+``` bash
+cd ${WAMR_ROOT}/wamr-compiler
+mkdir build && cd build
+cmake .. -DWAMR_BUILD_DEBUG_AOT=1
+make -j $(nproc)
+```
+
+3. Build iwasm with debugging feature
+``` bash
+cd ${WAMR_ROOT}/product-mini/platforms/linux
+mkdir build && cd build
+cmake .. -DWAMR_BUILD_DEBUG_AOT=1
+make
+```
+
+4. Compile wasm module to AOT module
+``` bash
+wamrc -o test.aot test.wasm
+```
+
+5. Execute iwasm using lldb
+
+   Then you can use lldb commands to debug both wamr runtime and your wasm application in ***current terminal***.
+
+   ``` bash
+   % lldb iwasm -- test.aot
+   (lldb) target create "iwasm"
+   Current executable set to 'iwasm' (x86_64).
+   (lldb) settings set -- target.run-args  "test.aot"
+   (lldb) settings set plugin.jit-loader.gdb.enable on
+   (lldb) b main
+   Breakpoint 1: where = iwasm`main + 48 at main.c:294:11, address = 0x0000000100001020
+   (lldb) run
+   Process 27954 launched: '/tmp/bin/iwasm' (x86_64)
+   Process 27954 stopped
+   * thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
+       frame #0: 0x0000000100001020 iwasm`main(argc=2, argv=0x00007ff7bfeff678) at main.c:294:11
+      291  int
+      292  main(int argc, char *argv[])
+      293  {
+   -> 294      int32 ret = -1;
+      295      char *wasm_file = NULL;
+      296      const char *func_name = NULL;
+      297      uint8 *wasm_file_buf = NULL;
+   Target 0: (iwasm) stopped.
+   (lldb) c
+   Process 27954 resuming
+   1 location added to breakpoint 1
+   error: need to add support for DW_TAG_base_type 'void' encoded with DW_ATE = 0x0, bit_size = 0
+   Process 27954 stopped
+   * thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.2
+       frame #0: 0x00000001002980a0 JIT(0x100298004)`main(exenv=0x0000000301808200) at hello.c:6:9
+      3    int
+      4    main(void)
+      5    {
+   -> 6            printf("hello\n");
+      7
+      8            return 0;
+      9    }
+   Target 0: (iwasm) stopped.
+   (lldb) br l
+   Current breakpoints:
+   1: name = 'main', locations = 2, resolved = 2, hit count = 2
+     1.1: where = iwasm`main + 48 at main.c:294:11, address = 0x0000000100001020, resolved, hit count = 1
+     1.2: where = JIT(0x100298004)`main + 12 at hello.c:6:9, address = 0x00000001002980a0, resolved, hit count = 1
+
+   (lldb)
+   ```
+
+   * In the above example,
+
+     * The first `main` function, which is in `main.c`, is the main
+       function of the iwasm command.
+
+     * The second `main` function, which is in `hello.c`, is the main
+       function of the AOT-compiled wasm module.
+
+   * WAMR AOT debugging uses the GDB JIT loader mechanism to load
+     the debug info of the debugee module.
+     On some platforms including macOS, you need to enable it explicitly.
+     (`settings set plugin.jit-loader.gdb.enable on`)
+
+     References:
+
+     * https://github.com/llvm/llvm-project/blob/main/llvm/docs/DebuggingJITedCode.rst
+     * https://sourceware.org/gdb/current/onlinedocs/gdb/JIT-Interface.html

+ 115 - 0
doc/source_debugging_interpreter.md

@@ -0,0 +1,115 @@
+# WAMR source debugging (interpreter)
+
+References:
+- [Blog: WAMR source debugging basic](https://bytecodealliance.github.io/wamr.dev/blog/wamr-source-debugging-basic/)  
+- [Blog: Debugging wasm with VSCode](https://bytecodealliance.github.io/wamr.dev/blog/debugging-wasm-with-vscode/)
+
+WAMR supports source level debugging based on DWARF (normally used in C/C++/Rust), source map (normally used in AssemblyScript) is not supported.
+
+**The lldb's ability to debug wasm application is based on the patch [Add class WasmProcess for WebAssembly debugging](https://reviews.llvm.org/D78801). Thanks very much to the author @paolosev for such a great work!**
+
+## Debugging with interpreter
+
+1. Install dependent libraries
+``` bash
+apt update && apt install cmake make g++ libxml2-dev -y
+```
+
+2. Build iwasm with source debugging feature
+``` bash
+cd ${WAMR_ROOT}/product-mini/platforms/linux
+mkdir build && cd build
+cmake .. -DWAMR_BUILD_DEBUG_INTERP=1
+make
+```
+> Note: On MacOS M1 environment, pass the additional `-DWAMR_DISABLE_HW_BOUND_CHECK=1` cmake configuration.
+
+3. Execute iwasm with debug engine enabled
+``` bash
+iwasm -g=127.0.0.1:1234 test.wasm
+# Use port = 0 to allow a random assigned debug port
+```
+
+4. Build customized lldb
+``` bash
+git clone --branch release/13.x --depth=1 https://github.com/llvm/llvm-project
+cd llvm-project
+git apply ${WAMR_ROOT}/build-scripts/lldb_wasm.patch
+mkdir build-lldb
+cmake -S ./llvm -B build-lldb \
+    -G Ninja \
+    -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS="clang;lldb" \
+    -DLLVM_TARGETS_TO_BUILD:STRING="X86;WebAssembly" \
+    -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DLLVM_BUILD_BENCHMARKS:BOOL=OFF \
+    -DLLVM_BUILD_DOCS:BOOL=OFF  -DLLVM_BUILD_EXAMPLES:BOOL=OFF  \
+    -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF  -DLLVM_BUILD_TESTS:BOOL=OFF  \
+    -DLLVM_ENABLE_BINDINGS:BOOL=OFF  -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF  \
+    -DLLVM_INCLUDE_DOCS:BOOL=OFF  -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF  \
+    -DLLVM_INCLUDE_TESTS:BOOL=OFF -DLLVM_ENABLE_LIBXML2:BOOL=ON
+cmake --build build-lldb --target lldb --parallel $(nproc)
+# The lldb is generated under build-lldb/bin/lldb
+```
+> Note: If using `CommandLineTools` on MacOS, make sure only one SDK is present in `/Library/Developer/CommandLineTools/SDKs`.
+
+> You can download pre-built `wamr-lldb` binaries from [here](https://github.com/bytecodealliance/wasm-micro-runtime/releases).
+
+5. Launch customized lldb and connect to iwasm
+``` bash
+lldb
+(lldb) process connect -p wasm connect://127.0.0.1:1234
+```
+Then you can use lldb commands to debug your applications. Please refer to [lldb document](https://lldb.llvm.org/use/tutorial.html) for command usage.
+
+## Enable debugging in embedders (for interpreter)
+
+There are three steps to enable debugging in embedders
+
+1. Set the debug parameters when initializing the runtime environment:
+    ``` c
+    RuntimeInitArgs init_args;
+    memset(&init_args, 0, sizeof(RuntimeInitArgs));
+
+    /* ... */
+    strcpy(init_args.ip_addr, "127.0.0.1");
+    init_args.instance_port = 1234;
+    /*
+    * Or set port to 0 to use a port assigned by os
+    * init_args.instance_port = 0;
+    */
+
+    if (!wasm_runtime_full_init(&init_args)) {
+        return false;
+    }
+    ```
+
+2. Use `wasm_runtime_start_debug_instance` to create the debug instance:
+    ``` c
+    /*
+        initialization, loading and instantiating
+        ...
+    */
+    exec_env = wasm_runtime_create_exec_env(module_inst, stack_size);
+    uint32_t debug_port = wasm_runtime_start_debug_instance(exec_env);
+    ```
+
+3. Enable source debugging features during building
+
+    You can use `-DWAMR_BUILD_DEBUG_INTERP=1` during cmake configuration
+
+    Or you can set it directly in `cmake` files:
+    ``` cmake
+    set (WAMR_BUILD_DEBUG_INTERP 1)
+    ```
+
+### Attentions
+- Debugging `multi-thread wasm module` is not supported, if your wasm module use pthread APIs (see [pthread_library.md](./pthread_library.md)), or the embedder use `wasm_runtime_spawn_thread` to create new wasm threads, then there may be **unexpected behaviour** during debugging.
+
+    > Note: This attention is about "wasm thread" rather than native threads. Executing wasm functions in several different native threads will **not** affect the normal behaviour of debugging feature.
+
+- When using source debugging features, **don't** create multiple `wasm_instance` from the same `wasm_module`, because the debugger may change the bytecode (set/unset breakpoints) of the `wasm_module`. If you do need several instance from the same bytecode, you need to copy the bytecode to a new butter, then load a new `wasm_module`, and then instantiate the new wasm module to get the new instance.
+
+- If you are running `lldb` on non-linux platforms, please use `platform select remote-linux` command in lldb before connecting to the runtime:
+    ```
+    (lldb) platform select remote-linux
+    (lldb) process connect -p wasm connect://127.0.0.1:1234
+    ```

+ 12 - 0
product-mini/platforms/linux-sgx/CMakeLists.txt

@@ -164,3 +164,15 @@ else()
         OUTPUT_VARIABLE cmdOutput
     )
 endif()
+
+if (WAMR_BUILD_LIBC_WASI EQUAL 1)
+    execute_process(
+        COMMAND bash -c "sed -i -E 's/^WAMR_BUILD_LIBC_WASI = 0/WAMR_BUILD_LIBC_WASI = 1/g' ${CMAKE_CURRENT_SOURCE_DIR}/enclave-sample/Makefile"
+        OUTPUT_VARIABLE cmdOutput
+    )
+else()
+    execute_process(
+        COMMAND bash -c "sed -i -E 's/^WAMR_BUILD_LIBC_WASI = 1/WAMR_BUILD_LIBC_WASI = 0/g' ${CMAKE_CURRENT_SOURCE_DIR}/enclave-sample/Makefile"
+        OUTPUT_VARIABLE cmdOutput
+    )
+endif()

+ 2 - 2
product-mini/platforms/linux-sgx/enclave-sample/Enclave/Enclave.cpp

@@ -510,7 +510,7 @@ handle_cmd_set_log_level(uint64 *args, uint32 argc)
 #endif
 }
 
-#ifndef SGX_DISABLE_WASI
+#if WASM_ENABLE_LIBC_WASI != 0
 static void
 handle_cmd_set_wasi_args(uint64 *args, int32 argc)
 {
@@ -637,7 +637,7 @@ handle_cmd_set_wasi_args(uint64 *args, int32 argc)
 {
     *args = true;
 }
-#endif /* end of SGX_DISABLE_WASI */
+#endif /* end of WASM_ENABLE_LIBC_WASI != 0 */
 
 static void
 handle_cmd_get_version(uint64 *args, uint32 argc)

+ 11 - 2
product-mini/platforms/linux-sgx/enclave-sample/Makefile

@@ -16,6 +16,7 @@ WAMR_BUILD_LIB_RATS = 0
 WAMR_BUILD_GLOBAL_HEAP_POOL = 0
 WAMR_BUILD_GLOBAL_HEAP_SIZE = 10485760
 WAMR_BUILD_STATIC_PGO = 0
+WAMR_BUILD_LIBC_WASI = 1
 
 VMLIB_BUILD_DIR ?= $(CURDIR)/../build
 LIB_RATS_SRC ?= $(VMLIB_BUILD_DIR)/_deps/librats-build
@@ -66,7 +67,9 @@ ifeq ($(WAMR_BUILD_LIB_RATS), 1)
 	App_Include_Paths += -I$(LIB_RATS_INCLUDE_DIR)
 endif
 
-App_C_Flags := $(SGX_COMMON_CFLAGS) -fPIC -Wno-attributes $(App_Include_Paths) -DWASM_ENABLE_STATIC_PGO=$(WAMR_BUILD_STATIC_PGO)
+App_C_Flags := $(SGX_COMMON_CFLAGS) -fPIC -Wno-attributes $(App_Include_Paths) \
+			   -DWASM_ENABLE_STATIC_PGO=$(WAMR_BUILD_STATIC_PGO) \
+			   -DWASM_ENABLE_LIBC_WASI=$(WAMR_BUILD_LIBC_WASI)
 
 # Three configuration modes - Debug, prerelease, release
 #   Debug - Macro DEBUG enabled.
@@ -135,7 +138,13 @@ ifeq ($(WAMR_BUILD_LIB_RATS), 1)
 	Enclave_Include_Paths += -I$(LIB_RATS_INCLUDE_DIR) -I$(SGX_SSL)/include
 endif
 
-Enclave_C_Flags := $(SGX_COMMON_CFLAGS) -nostdinc -fvisibility=hidden -fpie -fstack-protector $(Enclave_Include_Paths) -DWASM_GLOBAL_HEAP_SIZE=$(WAMR_BUILD_GLOBAL_HEAP_SIZE) -DWASM_ENABLE_GLOBAL_HEAP_POOL=$(WAMR_BUILD_GLOBAL_HEAP_POOL) -DWASM_ENABLE_LIB_RATS=$(WAMR_BUILD_LIB_RATS) -DWASM_ENABLE_STATIC_PGO=$(WAMR_BUILD_STATIC_PGO)
+Enclave_C_Flags := $(SGX_COMMON_CFLAGS) -nostdinc -fvisibility=hidden \
+				   -fpie -fstack-protector $(Enclave_Include_Paths) \
+				   -DWASM_GLOBAL_HEAP_SIZE=$(WAMR_BUILD_GLOBAL_HEAP_SIZE) \
+				   -DWASM_ENABLE_GLOBAL_HEAP_POOL=$(WAMR_BUILD_GLOBAL_HEAP_POOL) \
+				   -DWASM_ENABLE_LIB_RATS=$(WAMR_BUILD_LIB_RATS) \
+				   -DWASM_ENABLE_STATIC_PGO=$(WAMR_BUILD_STATIC_PGO) \
+				   -DWASM_ENABLE_LIBC_WASI=$(WAMR_BUILD_LIBC_WASI)
 ifeq ($(SPEC_TEST), 1)
 	Enclave_C_Flags += -DWASM_ENABLE_SPEC_TEST=1
 else

+ 1 - 1
product-mini/platforms/linux-sgx/enclave-sample/Makefile_minimal

@@ -102,7 +102,7 @@ Enclave_Include_Paths := -IEnclave -I$(WAMR_ROOT)/core/iwasm/include \
 Enclave_C_Flags := $(SGX_COMMON_CFLAGS) -nostdinc -fvisibility=hidden -fpie -fstack-protector $(Enclave_Include_Paths)
 
 # disable wasi
-Enclave_C_Flags += -DSGX_DISABLE_WASI
+Enclave_C_Flags += -DWASM_ENABLE_LIBC_WASI=0
 
 ifeq ($(SPEC_TEST), 1)
 	Enclave_C_Flags += -DWASM_ENABLE_SPEC_TEST=1

+ 6 - 0
product-mini/platforms/nuttx/wamr.mk

@@ -133,6 +133,11 @@ CSRCS += aot_loader.c \
          $(AOT_RELOC) \
          aot_intrinsic.c \
          aot_runtime.c
+ifeq ($(CONFIG_INTERPRETERS_WAMR_DEBUG_AOT),y)
+CFLAGS += -DWASM_ENABLE_DEBUG_AOT=1
+CSRCS += elf_parser.c \
+         jit_debug.c
+endif
 else
 CFLAGS += -DWASM_ENABLE_AOT=0
 endif
@@ -412,3 +417,4 @@ VPATH += $(IWASM_ROOT)/libraries/lib-pthread
 VPATH += $(IWASM_ROOT)/common/arch
 VPATH += $(IWASM_ROOT)/aot
 VPATH += $(IWASM_ROOT)/aot/arch
+VPATH += $(IWASM_ROOT)/aot/debug

+ 15 - 7
product-mini/platforms/posix/main.c

@@ -58,7 +58,6 @@ print_help()
 #if WASM_ENABLE_JIT != 0
     printf("  --llvm-jit-size-level=n  Set LLVM JIT size level, default is 3\n");
     printf("  --llvm-jit-opt-level=n   Set LLVM JIT optimization level, default is 3\n");
-    printf("  --perf-profile           Enable linux perf support. For now, it only works in llvm-jit.\n");
 #if defined(os_writegsbase)
     printf("  --enable-segue[=<flags>] Enable using segment register GS as the base address of\n");
     printf("                           linear memory, which may improve performance, flags can be:\n");
@@ -67,6 +66,9 @@ print_help()
     printf("                           Use comma to separate, e.g. --enable-segue=i32.load,i64.store\n");
     printf("                           and --enable-segue means all flags are added.\n");
 #endif
+#endif /* WASM_ENABLE_JIT != 0*/
+#if WASM_ENABLE_LINUX_PERF != 0
+    printf("  --enable-linux-perf      Enable linux perf support. It works in aot and llvm-jit.\n");
 #endif
     printf("  --repl                   Start a very simple REPL (read-eval-print-loop) mode\n"
            "                           that runs commands in the form of \"FUNC ARG...\"\n");
@@ -561,7 +563,9 @@ main(int argc, char *argv[])
     uint32 llvm_jit_size_level = 3;
     uint32 llvm_jit_opt_level = 3;
     uint32 segue_flags = 0;
-    bool enable_linux_perf_support = false;
+#endif
+#if WASM_ENABLE_LINUX_PERF != 0
+    bool enable_linux_perf = false;
 #endif
     wasm_module_t wasm_module = NULL;
     wasm_module_inst_t wasm_module_inst = NULL;
@@ -702,9 +706,6 @@ main(int argc, char *argv[])
             if (segue_flags == (uint32)-1)
                 return print_help();
         }
-        else if (!strncmp(argv[0], "--perf-profile", 14)) {
-            enable_linux_perf_support = true;
-        }
 #endif /* end of WASM_ENABLE_JIT != 0 */
 #if BH_HAS_DLFCN
         else if (!strncmp(argv[0], "--native-lib=", 13)) {
@@ -718,6 +719,11 @@ main(int argc, char *argv[])
             native_lib_list[native_lib_count++] = argv[0] + 13;
         }
 #endif
+#if WASM_ENABLE_LINUX_PERF != 0
+        else if (!strncmp(argv[0], "--enable-linux-perf", 19)) {
+            enable_linux_perf = true;
+        }
+#endif
 #if WASM_ENABLE_MULTI_MODULE != 0
         else if (!strncmp(argv[0],
                           "--module-path=", strlen("--module-path="))) {
@@ -761,7 +767,7 @@ main(int argc, char *argv[])
             gen_prof_file = argv[0] + 16;
         }
 #endif
-        else if (!strncmp(argv[0], "--version", 9)) {
+        else if (!strcmp(argv[0], "--version")) {
             uint32 major, minor, patch;
             wasm_runtime_get_version(&major, &minor, &patch);
             printf("iwasm %" PRIu32 ".%" PRIu32 ".%" PRIu32 "\n", major, minor,
@@ -819,7 +825,9 @@ main(int argc, char *argv[])
     init_args.llvm_jit_size_level = llvm_jit_size_level;
     init_args.llvm_jit_opt_level = llvm_jit_opt_level;
     init_args.segue_flags = segue_flags;
-    init_args.linux_perf_support = enable_linux_perf_support;
+#endif
+#if WASM_ENABLE_LINUX_PERF != 0
+    init_args.enable_linux_perf = enable_linux_perf;
 #endif
 
 #if WASM_ENABLE_DEBUG_INTERP != 0

+ 1 - 1
product-mini/platforms/windows/main.c

@@ -406,7 +406,7 @@ main(int argc, char *argv[])
             ip_addr = argv[0] + 3;
         }
 #endif
-        else if (!strncmp(argv[0], "--version", 9)) {
+        else if (!strcmp(argv[0], "--version")) {
             uint32 major, minor, patch;
             wasm_runtime_get_version(&major, &minor, &patch);
             printf("iwasm %" PRIu32 ".%" PRIu32 ".%" PRIu32 "\n", major, minor,

+ 1 - 0
samples/sgx-ra/CMakeLists.txt

@@ -69,6 +69,7 @@ execute_process (
 add_custom_target (
               iwasm ALL
               DEPENDS vmlib_untrusted vmlib_untrusted vmlib
+              COMMAND make -C ${SGX_PLATFORM_DIR}/enclave-sample clean
               COMMAND make -C  ${SGX_PLATFORM_DIR}/enclave-sample SGX_MODE=HW SGX_DEBUG=1 VMLIB_BUILD_DIR=${CMAKE_BINARY_DIR}
               COMMAND ${CMAKE_COMMAND} -E copy ${SGX_PLATFORM_DIR}/enclave-sample/enclave.signed.so ${CMAKE_BINARY_DIR}
               COMMAND ${CMAKE_COMMAND} -E copy ${SGX_PLATFORM_DIR}/enclave-sample/iwasm ${CMAKE_BINARY_DIR}

+ 1 - 1
samples/wasm-c-api-imports/wasm/CMakeLists.txt

@@ -31,7 +31,7 @@ target_link_options(send_recv PRIVATE
 if(WASM_TO_AOT)
   # wasm -> aot
   add_custom_target(send_recv_aot ALL
-    COMMAND pwd && ${WAMRC_PATH} --enable-multi-thread -o ./send_recv.aot ./send_recv.wasm
+    COMMAND pwd && ${WAMRC_PATH} --invoke-c-api-import --enable-multi-thread -o ./send_recv.aot ./send_recv.wasm
     DEPENDS send_recv
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )

+ 161 - 0
test-tools/append-aot-to-wasm/append_aot_to_wasm.py

@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+"""
+It is used to append a .aot to a .wasm as a custom section.
+The custom section name is "aot".
+
+e.g.
+$ python3 append_aot_to_wasm.py --wasm quicksort.wasm --aot quicksort.aot --output quicksort.aot.wasm
+"""
+
+import argparse
+from pathlib import Path
+
+
+def leb128_encode_uint(value: int) -> bytes:
+    """
+    encode unsigned int into a leb128 bytes
+    """
+    binary = []
+    while value != 0:
+        lower_7_bits = value & 0x7F
+        value >>= 7
+
+        if value != 0:
+            current_byte = 0x80 | lower_7_bits
+        else:
+            current_byte = 0x00 | lower_7_bits
+
+        binary.append(current_byte)
+
+    return bytes(binary)
+
+
+def leb128_decode_uint(binary: bytes) -> (int, int):
+    """
+    decode binary unsigned from a leb128 bytes
+    """
+
+    result = 0
+    shift = 0
+    for i, b in enumerate(binary):
+        lower_7_bits = b & 0x7F
+        result |= lower_7_bits << shift
+
+        highest_bit = b & 0x80
+        if not highest_bit:
+            break
+
+        shift += 7
+
+    return i + 1, result
+
+
+def is_aligned(n: int, alignment: int):
+    return (n & (alignment - 1)) == 0
+
+
+def align_up(n: int, alignment: int):
+    return n + (alignment - 1) & ~(alignment - 1)
+
+
+def present_as_vector(content: bytes) -> bytes:
+    v_l = len(content)
+    v_bin = leb128_encode_uint(v_l) if v_l else b"\x00"
+    return v_bin + content
+
+
+def calc_padding(
+    alignment: int, name_bin_len: int, content_len: int, start_pos: int
+) -> bytes:
+    for padding in range(alignment * 2):
+        padding_bin = present_as_vector(b"\x00" * padding)
+        section_length = name_bin_len + len(padding_bin) + content_len
+        section_length_bin = leb128_encode_uint(section_length)
+
+        pos = start_pos + 1 + len(section_length_bin) + name_bin_len + len(padding_bin)
+        if is_aligned(pos, alignment):
+            return padding_bin
+
+
+def build_content(content: bytes, pos: int, adding: bytes) -> (int, bytes):
+    return pos + len(adding), content + adding
+
+
+def create_custom_section_aligned(
+    start_pos: int, name: str, content: bytes, alignment: int = 4
+) -> bytes:
+    """
+        be sure the section_content starts at a X alignment position
+
+          1B
+        | \x00 | length | name vec | padding vec | content |
+        ^                                        ^
+        |                                        |
+    start address                           aligned address
+    """
+
+    name_bin = present_as_vector(name.encode("ascii"))
+    padding_bin = calc_padding(alignment, len(name_bin), len(content), start_pos)
+
+    full_content_bin = b""
+    pos = start_pos
+
+    # custome section id 0
+    pos, full_content_bin = build_content(full_content_bin, pos, b"\x00")
+
+    # custom section length
+    section_length = len(name_bin) + len(padding_bin) + len(content)
+    section_length_bin = leb128_encode_uint(section_length)
+    pos, full_content_bin = build_content(full_content_bin, pos, section_length_bin)
+
+    # custom section name
+    pos, full_content_bin = build_content(full_content_bin, pos, name_bin)
+
+    # padding
+    pos, full_content_bin = build_content(full_content_bin, pos, padding_bin)
+    assert is_aligned(pos, alignment), f"{pos} is not aligned to {alignment}"
+
+    print(f"append .aot @ offset {pos}(0x{pos:X})")
+    _, full_content_bin = build_content(full_content_bin, pos, content)
+
+    return full_content_bin
+
+
+def main(wasm_file: str, aot_file: str, output: str) -> None:
+    cwd = Path.cwd()
+    wasm_file = cwd.joinpath(wasm_file).resolve()
+    aot_file = cwd.joinpath(aot_file).resolve()
+    output = cwd.joinpath(output).resolve()
+
+    assert wasm_file.exists()
+    assert aot_file.exists()
+    output.unlink(missing_ok=True)
+
+    # read aot content
+    with open(aot_file, "rb") as f:
+        aot_content = f.read()
+
+    # append to .wasm
+    with open(wasm_file, "rb") as f_in, open(output, "wb") as f_out:
+        wasm_content = f_in.read(1024)
+        while wasm_content:
+            f_out.write(wasm_content)
+            wasm_content = f_in.read(1024)
+
+        f_out.write(create_custom_section_aligned(f_out.tell(), "aot", aot_content, 4))
+
+    print(f"{wasm_file.name} + {aot_file.name} ==> {output}")
+
+
+if __name__ == "__main__":
+    argparse = argparse.ArgumentParser()
+    argparse.add_argument("--wasm", help="a .wasm")
+    argparse.add_argument("--aot", help="a .aot")
+    argparse.add_argument("-o", "--output", help="the output, still be a .wasm")
+
+    args = argparse.parse_args()
+    main(args.wasm, args.aot, args.output)

+ 210 - 0
test-tools/trans-jitted-func-name/trans_wasm_func_name.py

@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+"""
+It is used to translate jitted functions' names(in out.folded) to coorespond name in name section in .wasm
+
+Usage:
+
+After
+```
+$ perf script -i perf.data > out.perf
+
+# fold call stacks
+$ ./FlameGraph/stackcollapse-perf.pl out.perf > out.folded
+```
+
+Add a step:
+```
+# translate jitted functions' names
+$ python translate_wasm_function_name.py --wabt_home <wabt-installation> --folded out.folded <.wasm>
+# out.folded -> out.folded.translated
+$ ls out.folded.translated
+```
+
+Then
+```
+# generate flamegraph
+$ ./FlameGraph/flamegraph.pl out.folded.translated > perf.wasm.svg
+```
+
+"""
+
+import argparse
+import os
+from pathlib import Path
+import re
+import shlex
+import subprocess
+
+
+def preflight_check(wabt_home: Path) -> Path:
+    """
+    if wasm-objdump exists in wabt_home
+    """
+    wasm_objdump_bin = wabt_home.joinpath("bin", "wasm-objdump")
+    if not wasm_objdump_bin.exists():
+        raise RuntimeError(f"wasm-objdump not found in {wabt_home}")
+
+    return wasm_objdump_bin
+
+
+def collect_import_section_content(wasm_objdump_bin: Path, wasm_file: Path) -> dict:
+    """
+    execute "wasm_objdump_bin -j Import -x <wasm_file>" and return a dict like {function: X, global: Y, memory: Z, table: N}
+    """
+    assert wasm_objdump_bin.exists()
+    assert wasm_file.exists()
+
+    command = f"{wasm_objdump_bin} -j Import -x {wasm_file}"
+    p = subprocess.run(
+        shlex.split(command),
+        capture_output=True,
+        check=False,
+        text=True,
+        universal_newlines=True,
+    )
+
+    if p.stderr:
+        return {}
+
+    import_section = {}
+    for line in p.stdout.split(os.linesep):
+        line = line.strip()
+
+        if not line:
+            continue
+
+        if line.startswith(" - func"):
+            import_section.update("function", import_section.get("function", 0) + 1)
+        else:
+            pass
+
+    return import_section
+
+
+def collect_name_section_content(wasm_objdump_bin: Path, wasm_file: Path) -> dict:
+    """
+    execute "wasm_objdump_bin -j name -x wasm_file" and store the output in a list
+    """
+    assert wasm_objdump_bin.exists()
+    assert wasm_file.exists()
+
+    command = f"{wasm_objdump_bin} -j name -x {wasm_file}"
+    p = subprocess.run(
+        shlex.split(command),
+        capture_output=True,
+        check=False,
+        text=True,
+        universal_newlines=True,
+    )
+
+    if p.stderr:
+        raise RuntimeError(f"not found name section in {wasm_file}")
+
+    name_section = {}
+    for line in p.stdout.split(os.linesep):
+        line = line.strip()
+
+        if not line:
+            continue
+
+        # - func[0] <__imported_wasi_snapshot_preview1_fd_close>
+        if line.startswith("- func"):
+            m = re.match(r"- func\[(\d+)\] <(.+)>", line)
+            assert m
+
+            func_index, func_name = m.groups()
+            name_section.update({func_index: func_name})
+
+    assert name_section
+    return name_section
+
+
+def replace_function_name(
+    import_section: dict, name_section: dict, folded_in: str, folded_out: str
+) -> None:
+    """
+    read content in <folded_in>. each line will be like:
+
+    quiche::BalsaFrame::ProcessHeaders;non-virtual thunk to Envoy::Http::Http1::BalsaParser::MessageDone;Envoy::Http::Http1::ConnectionImpl::onMessageComplete;Envoy::Http::Http1::ConnectionImpl::onMessageCompleteImpl;Envoy::Http::Http1::ServerConnectionImpl::onMessageCompleteBase;Envoy::Http::ConnectionManagerImpl::ActiveStream::decodeHeaders;Envoy::Http::FilterManager::decodeHeaders;virtual thunk to Envoy::Extensions::Common::Wasm::Context::decodeHeaders;proxy_wasm::ContextBase::onRequestHeaders;proxy_wasm::wamr::Wamr::getModuleFunctionImpl<proxy_wasm::Word, proxy_wasm::Word, proxy_wasm::Word, proxy_wasm::Word>;wasm_func_call;wasm_runtime_call_wasm;wasm_call_function;call_wasm_with_hw_bound_check;wasm_interp_call_wasm;llvm_jit_call_func_bytecode;wasm_runtime_invoke_native;push_args_end;aot_func_internal#3302;aot_func_internal#3308;asm_sysvec_apic_timer_interrupt;sysvec_apic_timer_interrupt;__sysvec_apic_timer_interrupt;hrtimer_interrupt;__hrtimer_run_queues;__remove_hrtimer;rb_next 1110899
+
+    symbol names are spearated by ";"
+
+    if there is a symbol named like "aot_func#XXX" or "aot_func_internal#XXX", it will be replaced with the function name in name section by index
+    """
+    folded_in = Path(folded_in)
+    assert folded_in.exists()
+    folded_out = Path(folded_out)
+
+    import_function_count = import_section.get("function", 0)
+    with folded_in.open("rt", encoding="utf-8") as f_in, folded_out.open(
+        "wt", encoding="utf-8"
+    ) as f_out:
+        precheck_mode = False
+        for line in f_in:
+            line = line.strip()
+            if "aot_func_internal" in line:
+                precheck_mode = True
+
+        f_in.seek(0)
+        for line in f_in:
+            new_line = []
+            line = line.strip()
+
+            m = re.match(r"(.*) (\d+)", line)
+            syms, samples = m.groups()
+            for sym in syms.split(";"):
+                m = re.match(r"aot_func(_internal)?#(\d+)", sym)
+                if not m:
+                    new_line.append(sym)
+                    continue
+
+                func_idx = m.groups()[-1]
+                if func_idx in name_section:
+                    wasm_func_name = f"[Wasm] {name_section[func_idx]}"
+                else:
+                    wasm_func_name = (
+                        f"[Wasm] function[{func_idx + import_function_count}]"
+                    )
+
+                if precheck_mode:
+                    # aot_func_internal -> xxx
+                    # aot_func --> xxx_precheck
+                    wasm_func_name += "_precheck" if not m.groups()[0] else ""
+                else:
+                    # aot_func --> xxx
+                    pass
+
+                new_line.append(wasm_func_name)
+
+            line = ";".join(new_line)
+            line += f" {samples}"
+            f_out.write(line + os.linesep)
+
+    print(f"⚙️ {folded_in} -> {folded_out}")
+
+
+def main(wabt_home: str, wasm_file: str, folded: str) -> None:
+    wabt_home = Path(wabt_home)
+    wasm_file = Path(wasm_file)
+
+    wasm_objdump_bin = preflight_check(wabt_home)
+    import_section = collect_import_section_content(wasm_objdump_bin, wasm_file)
+    name_section = collect_name_section_content(wasm_objdump_bin, wasm_file)
+
+    replace_function_name(import_section, name_section, folded, folded + ".translated")
+
+
+if __name__ == "__main__":
+    argparse = argparse.ArgumentParser()
+    argparse.add_argument(
+        "--folded", help="stackcollapse-perf.pl generated, like out.folded"
+    )
+    argparse.add_argument("wasm_file", help="wasm file")
+    argparse.add_argument("--wabt_home", help="wabt home, like /opt/wabt-1.0.33")
+
+    args = argparse.parse_args()
+    main(args.wabt_home, args.wasm_file, args.folded)

+ 85 - 16
wamr-compiler/CMakeLists.txt

@@ -44,12 +44,15 @@ add_definitions(-DWASM_ENABLE_CUSTOM_NAME_SECTION=1)
 add_definitions(-DWASM_ENABLE_DUMP_CALL_STACK=1)
 add_definitions(-DWASM_ENABLE_PERF_PROFILING=1)
 add_definitions(-DWASM_ENABLE_LOAD_CUSTOM_SECTION=1)
-add_definitions(-DWASM_ENABLE_LIB_WASI_THREADS=1)
 add_definitions(-DWASM_ENABLE_MODULE_INST_CONTEXT=1)
 
 if (WAMR_BUILD_LLVM_LEGACY_PM EQUAL 1)
   add_definitions(-DWASM_ENABLE_LLVM_LEGACY_PM=1)
-endif()
+endif ()
+
+if (LINUX)
+  add_definitions(-DWASM_ENABLE_LINUX_PERF=1)
+endif ()
 
 if (DEFINED WAMR_BUILD_AOT_FUNC_PREFIX)
   add_definitions(-DAOT_FUNC_PREFIX="${WAMR_BUILD_AOT_FUNC_PREFIX}")
@@ -199,31 +202,97 @@ include_directories (${SHARED_DIR}/include
 enable_language (ASM)
 
 if (NOT MINGW AND NOT MSVC)
-    set(WAMR_BUILD_LIBC_WASI 1)
-else()
-    set(WAMR_BUILD_LIBC_UVWASI 1)
-endif()
+  if ((NOT DEFINED WAMR_BUILD_LIBC_WASI) AND (NOT DEFINED WAMR_BUILD_LIBC_UVWASI))
+    set (WAMR_BUILD_LIBC_WASI 1)
+  endif ()
+
+  if ((WAMR_BUILD_LIBC_WASI EQUAL 1) AND (WAMR_BUILD_LIBC_UVWASI EQUAL 1))
+    message (WARNING "-- pick WAMR_BULID_LIBC_UVWASI when both are enabled")
+    set (WAMR_BUILD_LIBC_WASI 0)
+  endif ()
+else ()
+  if (NOT DEFINED WAMR_BUILD_LIBC_UVWASI)
+    set (WAMR_BUILD_LIBC_UVWASI 1)
+  endif ()
+
+  if (WAMR_BUILD_LIBC_WASI EQUAL 1)
+    message (WARNING "-- don't accept WAMR_BUILD_LIBC_WASI=1 on MINGW or MSVC")
+    set (WAMR_BUILD_LIBC_WASI 0)
+  endif ()
+endif ()
+
+if (NOT DEFINED WAMR_BUILD_LIBC_BUILTIN)
+  set (WAMR_BUILD_LIBC_BUILTIN 1)
+endif ()
+
+if (NOT DEFINED WAMR_BUILD_LIB_PTHREAD)
+  set (WAMR_BUILD_LIB_PTHREAD 1)
+endif ()
+
+if (NOT DEFINED WAMR_BUILD_LIB_WASI_THREADS)
+  set (WAMR_BUILD_LIB_WASI_THREADS 1)
+endif ()
+
+if (WAMR_BUILD_LIBC_UVWASI EQUAL 1)
+  message ("-- Libc WASI enabled with uvwasi implementation")
+endif ()
+
+if (WAMR_BUILD_LIBC_WASI EQUAL 1)
+  message ("-- Libc WASI enabled")
+endif ()
+
+if ((NOT WAMR_BUILD_LIBC_WASI) AND (NOT WAMR_BUILD_LIBC_UVWASI))
+  message ("-- Libc WASI disabled")
+endif ()
+
+if (WAMR_BUILD_LIBC_BUILTIN EQUAL 1)
+  message ("-- Libc builtin enabled")
+else ()
+  message ("-- Libc builtin disabled")
+endif ()
+
+if (WAMR_BUILD_LIB_PTHREAD EQUAL 1)
+  message ("-- Lib pthread enabled")
+else ()
+  message ("-- Lib pthread disabled")
+endif ()
+
+if (WAMR_BUILD_LIB_WASI_THREADS EQUAL 1)
+  message ("-- Lib wasi-threads enabled")
+else ()
+  message ("-- Lib wasi-threads disabled")
+endif ()
 
 include (${SHARED_DIR}/platform/${WAMR_BUILD_PLATFORM}/shared_platform.cmake)
 include (${SHARED_DIR}/mem-alloc/mem_alloc.cmake)
 include (${SHARED_DIR}/utils/shared_utils.cmake)
 include (${SHARED_DIR}/utils/uncommon/shared_uncommon.cmake)
 include (${IWASM_DIR}/libraries/thread-mgr/thread_mgr.cmake)
-include (${IWASM_DIR}/libraries/libc-builtin/libc_builtin.cmake)
-if (NOT MINGW)
-  if (NOT MSVC)
-    include (${IWASM_DIR}/libraries/libc-wasi/libc_wasi.cmake)
-  else()
-    include (${IWASM_DIR}/libraries/libc-uvwasi/libc_uvwasi.cmake)
-  endif()
-endif()
-include (${IWASM_DIR}/libraries/lib-pthread/lib_pthread.cmake)
-include (${IWASM_DIR}/libraries/lib-wasi-threads/lib_wasi_threads.cmake)
 include (${IWASM_DIR}/common/iwasm_common.cmake)
 include (${IWASM_DIR}/interpreter/iwasm_interp.cmake)
 include (${IWASM_DIR}/aot/iwasm_aot.cmake)
 include (${IWASM_DIR}/compilation/iwasm_compl.cmake)
 
+if (WAMR_BUILD_LIBC_BUILTIN EQUAL 1)
+  include (${IWASM_DIR}/libraries/libc-builtin/libc_builtin.cmake)
+endif ()
+
+if (WAMR_BUILD_LIBC_UVWASI EQUAL 1)
+  include (${IWASM_DIR}/libraries/libc-uvwasi/libc_uvwasi.cmake)
+endif ()
+
+if (WAMR_BUILD_LIBC_WASI EQUAL 1)
+  include (${IWASM_DIR}/libraries/libc-wasi/libc_wasi.cmake)
+endif ()
+
+if (WAMR_BUILD_LIB_PTHREAD EQUAL 1)
+  include (${IWASM_DIR}/libraries/lib-pthread/lib_pthread.cmake)
+endif ()
+
+if (WAMR_BUILD_LIB_WASI_THREADS EQUAL 1)
+  include (${IWASM_DIR}/libraries/lib-wasi-threads/lib_wasi_threads.cmake)
+endif ()
+
 # set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wconversion -Wsign-conversion")
 if (WAMR_BUILD_TARGET MATCHES "X86_.*" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
   if (NOT (CMAKE_C_COMPILER MATCHES ".*clang.*" OR CMAKE_C_COMPILER_ID MATCHES ".*Clang" OR MSVC))

+ 23 - 4
wamr-compiler/main.c

@@ -184,9 +184,14 @@ print_help()
     printf("                            multiple names, e.g.\n");
     printf("                                --emit-custom-sections=section1,section2,sectionN\n");
 #if BH_HAS_DLFCN
-    printf("  --native-lib=<lib>       Register native libraries to the WASM module, which\n");
-    printf("                           are shared object (.so) files, for example:\n");
-    printf("                             --native-lib=test1.so --native-lib=test2.so\n");
+    printf("  --native-lib=<lib>        Register native libraries to the WASM module, which\n");
+    printf("                            are shared object (.so) files, for example:\n");
+    printf("                              --native-lib=test1.so --native-lib=test2.so\n");
+#endif
+    printf("  --invoke-c-api-import     Treat unknown import function as wasm-c-api import function and\n");
+    printf("                            quick call it from AOT code\n");
+#if WASM_ENABLE_LINUX_PERF != 0
+    printf("  --enable-linux-perf       Enable linux perf support\n");
 #endif
     printf("  -v=n                      Set log verbose level (0 to 5, default is 2), larger with more log\n");
     printf("  --version                 Show version information\n");
@@ -325,6 +330,9 @@ main(int argc, char *argv[])
     void *native_handle_list[8] = { NULL };
     uint32 native_handle_count = 0;
 #endif
+#if WASM_ENABLE_LINUX_PERF != 0
+    bool enable_linux_perf = false;
+#endif
 
     option.opt_level = 3;
     option.size_level = 3;
@@ -526,7 +534,15 @@ main(int argc, char *argv[])
             native_lib_list[native_lib_count++] = argv[0] + 13;
         }
 #endif
-        else if (!strncmp(argv[0], "--version", 9)) {
+        else if (!strcmp(argv[0], "--invoke-c-api-import")) {
+            option.quick_invoke_c_api_import = true;
+        }
+#if WASM_ENABLE_LINUX_PERF != 0
+        else if (!strcmp(argv[0], "--enable-linux-perf")) {
+            enable_linux_perf = true;
+        }
+#endif
+        else if (!strcmp(argv[0], "--version")) {
             uint32 major, minor, patch;
             wasm_runtime_get_version(&major, &minor, &patch);
             printf("wamrc %u.%u.%u\n", major, minor, patch);
@@ -579,6 +595,9 @@ main(int argc, char *argv[])
     init_args.mem_alloc_option.allocator.malloc_func = malloc;
     init_args.mem_alloc_option.allocator.realloc_func = realloc;
     init_args.mem_alloc_option.allocator.free_func = free;
+#if WASM_ENABLE_LINUX_PERF != 0
+    init_args.enable_linux_perf = enable_linux_perf;
+#endif
 
     /* initialize runtime environment */
     if (!wasm_runtime_full_init(&init_args)) {