Explorar el Código

Merge branch main into dev/gc_refactor

Wenyong Huang hace 2 años
padre
commit
644044522c
Se han modificado 51 ficheros con 2048 adiciones y 336 borrados
  1. 70 0
      RELEASE_NOTES.md
  2. 18 1
      build-scripts/config_common.cmake
  3. 7 0
      core/config.h
  4. 8 1
      core/iwasm/aot/aot_loader.c
  5. 1 0
      core/iwasm/aot/aot_reloc.h
  6. 22 24
      core/iwasm/aot/aot_runtime.c
  7. 0 1
      core/iwasm/aot/debug/jit_debug.c
  8. 42 8
      core/iwasm/common/wasm_c_api.c
  9. 3 0
      core/iwasm/common/wasm_c_api_internal.h
  10. 966 113
      core/iwasm/common/wasm_native.c
  11. 5 0
      core/iwasm/common/wasm_native.h
  12. 71 7
      core/iwasm/common/wasm_runtime_common.c
  13. 12 1
      core/iwasm/common/wasm_runtime_common.h
  14. 44 26
      core/iwasm/compilation/aot_emit_aot_file.c
  15. 246 12
      core/iwasm/compilation/aot_emit_function.c
  16. 3 0
      core/iwasm/compilation/aot_llvm.c
  17. 4 0
      core/iwasm/compilation/aot_llvm.h
  18. 4 1
      core/iwasm/compilation/aot_llvm_extra.cpp
  19. 19 0
      core/iwasm/compilation/aot_llvm_extra2.cpp
  20. 27 0
      core/iwasm/compilation/aot_orc_extra.cpp
  21. 3 3
      core/iwasm/compilation/debug/dwarf_extractor.cpp
  22. 2 1
      core/iwasm/include/aot_comp_option.h
  23. 1 0
      core/iwasm/include/wasm_c_api.h
  24. 1 0
      core/iwasm/include/wasm_export.h
  25. 5 0
      core/iwasm/interpreter/wasm.h
  26. 22 5
      core/iwasm/interpreter/wasm_interp_classic.c
  27. 129 44
      core/iwasm/interpreter/wasm_loader.c
  28. 118 41
      core/iwasm/interpreter/wasm_mini_loader.c
  29. 1 1
      core/iwasm/interpreter/wasm_runtime.h
  30. 18 3
      core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c
  31. 1 1
      core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c
  32. 1 1
      core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c
  33. 3 2
      core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h
  34. 63 17
      core/iwasm/libraries/thread-mgr/thread_manager.c
  35. 17 1
      core/iwasm/libraries/thread-mgr/thread_manager.h
  36. 18 5
      core/shared/platform/common/posix/posix_memmap.c
  37. 2 1
      core/shared/platform/common/posix/posix_thread.c
  38. 3 0
      core/shared/platform/esp-idf/shared_platform.cmake
  39. 12 0
      core/shared/platform/linux-sgx/sgx_platform.c
  40. 2 1
      core/shared/platform/linux-sgx/sgx_socket.c
  41. 1 1
      core/version.h
  42. 3 3
      doc/build_wamr.md
  43. 14 0
      doc/build_wasm_app.md
  44. 12 0
      product-mini/platforms/linux-sgx/CMakeLists.txt
  45. 2 2
      product-mini/platforms/linux-sgx/enclave-sample/Enclave/Enclave.cpp
  46. 11 2
      product-mini/platforms/linux-sgx/enclave-sample/Makefile
  47. 1 1
      product-mini/platforms/linux-sgx/enclave-sample/Makefile_minimal
  48. 1 1
      product-mini/platforms/posix/main.c
  49. 1 1
      product-mini/platforms/windows/main.c
  50. 1 1
      samples/wasm-c-api-imports/wasm/CMakeLists.txt
  51. 7 2
      wamr-compiler/main.c

+ 70 - 0
RELEASE_NOTES.md

@@ -1,3 +1,73 @@
+## WAMR-1.3.1
+
+### Breaking Changes
+- In multi-threading, when an exception was thrown in wasm_func_call(),
+  the trap returned contains the stack frames of the thread where the
+  exception occurs, but not the stack frames of the main thread.
+- Disable emitting custom name section to AOT file with
+  `wamrc --enable-dump-call-stack` option, instead, use
+  `wamrc --emit-custom-sections=name` to emit it and make it clear.
+
+### New Features
+- Enable AOT linux perf support (#2930)
+
+### Bug Fixes
+- Corrects Zephyr include files for current versions of Zephyr (#2881)
+- Fix possible dead lock in wasm_cluster_spawn_exec_env (#2882)
+- Handle ambiguous fstflags on fd_filestat_set_times (#2892)
+- Fix memory size not updating after growing in interpreter (#2898)
+- fixed(freertos): Fix crash when wasm app call pthread_exit(NULL) (#2970)
+- fast-jit: Fix const shift and const i64 compare issues (#2969)
+- Fix ref.is_null processing in fast-interp loader (#2971)
+- simd-128: The input lanes of integer-to-integer narrowing ops should be interpreted as signed (#2850)
+- Fix ref.func function declared check in wasm loader (#2972)
+- Fix fast-interp polymorphic stack processing (#2974)
+- Fix potential recursive lock in pthread_create_wrapper (#2980)
+- Fix build failure on esp-idf platform (#2991)
+- Return stack frames of crashed thread when using wasm-c-api (#2908)
+- Fix compilation error on iOS due to macOS-specific API (#2995)
+- Fix a bug when emit the custom name section to aot file (#2987)
+- Fix linux-sgx build error when libc-wasi is disabled (#2997)
+
+### Enhancements
+- fix command-reactor: Look for _initialize only if _start not found (#2891)
+- Refactor reloc symbols for riscv (#2894)
+- Avoid memory import failure when wasi-threads is enabled (#2893)
+- interpreter: Simplify memory.grow a bit (#2899)
+- Avoid reporting timestamp if custom logger is used (#2905)
+- Expose API to set log level in embedder (#2907)
+- Add a script to translate jitted function names in flamegraph (#2906)
+- Refine wasm-c-api wasm_func_call (#2922)
+- Add VectorCombine pass for JIT and AOT (#2923)
+- Enable wasm_runtime_terminate for single-threading (#2924)
+- nuttx: Add CONFIG_INTERPRETERS_WAMR_DEBUG_AOT (#2929)
+- Allow to control built-in libraries for wamrc from command line options (#2928)
+- Fix a bug that appends '_precheck' to aot_func (#2936)
+- freertos: Add os_cond_broadcast for pthread wrapper (#2937)
+- Append .aot to .wasm as a custom section named "aot" (#2933)
+- fix(sgx-ra): Fix building when enclave is built without librats ahead (#2968)
+- Refine LLVM JIT function call process (#2925)
+- Refine AOT function call process (#2940)
+- Allow to set segue flags for wasm-c-api JIT (#2926)
+- freertos: Minor changes for freertos libc_wasi build adaption (#2973)
+- freertos: Change ssp_config.h due to clock_nanosleep() not supported in freertos (#2979)
+- aot compiler: Some updates for LLVM 18 (#2981)
+- Enable MAP_32BIT for macOS (#2992)
+- Register quick call entries to speedup the aot/jit func call process (#2978)
+- Refine AOT/JIT code call wasm-c-api import process (#2982)
+
+### Others
+- compilation_on_nuttx.yml: Use docker image to simplify env setup (#2878)
+- samples/spawn-thread: Disable libc and pthread (#2883)
+- Add arm64 to nuttx compilation test (#2886)
+- samples/spawn-thread: Tweak to expose a bug (#2888)
+- Fix typo in CI config and suppress STORE_U8 in TSAN (#2802)
+- Using docker image for nuttx spectest (#2887)
+- doc: Separate source_debugging.md into two files (#2932)
+- doc/build_wasm_app.md: Add a note about aot abi compatibility (#2993)
+
+---
+
 ## WAMR-1.3.0
 
 ### Breaking Changes

+ 18 - 1
build-scripts/config_common.cmake

@@ -489,5 +489,22 @@ if (WAMR_CONFIGUABLE_BOUNDS_CHECKS EQUAL 1)
 endif ()
 if (WAMR_BUILD_LINUX_PERF EQUAL 1)
   add_definitions (-DWASM_ENABLE_LINUX_PERF=1)
-  message ("     Enable linux perf support")
+  message ("     Linux perf support enabled")
+endif ()
+if (NOT DEFINED WAMR_BUILD_QUICK_AOT_ENTRY)
+  # Enable quick aot/jit entries by default
+  set (WAMR_BUILD_QUICK_AOT_ENTRY 1)
+endif ()
+if (WAMR_BUILD_QUICK_AOT_ENTRY EQUAL 1)
+  add_definitions (-DWASM_ENABLE_QUICK_AOT_ENTRY=1)
+  message ("     Quick AOT/JIT entries enabled")
+else ()
+  add_definitions (-DWASM_ENABLE_QUICK_AOT_ENTRY=0)
+  message ("     Quick AOT/JIT entries disabled")
+endif ()
+
+if (APPLE)
+  # On recent macOS versions, by default, the size of page zero is 4GB.
+  # Shrink it to make MAP_32BIT mmap can work.
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-pagezero_size,0x4000")
 endif ()

+ 7 - 0
core/config.h

@@ -541,6 +541,13 @@
 #define WASM_ENABLE_LINUX_PERF 0
 #endif
 
+/* Support registering quick AOT/JIT function entries of some func types
+   to speedup the calling process of invoking the AOT/JIT functions of
+   these types from the host embedder */
+#ifndef WASM_ENABLE_QUICK_AOT_ENTRY
+#define WASM_ENABLE_QUICK_AOT_ENTRY 1
+#endif
+
 #ifndef WASM_TABLE_MAX_SIZE
 #define WASM_TABLE_MAX_SIZE 1024
 #endif

+ 8 - 1
core/iwasm/aot/aot_loader.c

@@ -945,7 +945,9 @@ load_custom_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
         case AOT_CUSTOM_SECTION_NAME:
             if (!load_name_section(buf, buf_end, module, is_load_from_file_buf,
                                    error_buf, error_buf_size))
-                goto fail;
+                LOG_VERBOSE("Load name section failed.");
+            else
+                LOG_VERBOSE("Load name section success.");
             break;
 #if WASM_ENABLE_STRINGREF != 0
         case AOT_CUSTOM_SECTION_STRING_LITERAL:
@@ -1969,6 +1971,11 @@ load_types(const uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
 
         func_types[i]->param_cell_num = (uint16)param_cell_num;
         func_types[i]->ret_cell_num = (uint16)ret_cell_num;
+
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+        func_types[i]->quick_aot_entry =
+            wasm_native_lookup_quick_aot_entry(func_types[i]);
+#endif
     }
 
     *p_buf = buf;

+ 1 - 0
core/iwasm/aot/aot_reloc.h

@@ -183,6 +183,7 @@ typedef struct {
     REG_SYM(aot_enlarge_memory),          \
     REG_SYM(aot_set_exception),           \
     REG_SYM(aot_check_app_addr_and_convert),\
+    REG_SYM(wasm_runtime_quick_invoke_c_api_native),\
     { "memset", (void*)aot_memset },      \
     { "memmove", (void*)aot_memmove },    \
     { "memcpy", (void*)aot_memmove },     \

+ 22 - 24
core/iwasm/aot/aot_runtime.c

@@ -54,6 +54,13 @@ bh_static_assert(sizeof(AOTMemoryInstance) == 104);
 bh_static_assert(offsetof(AOTTableInstance, elems) == 24);
 
 bh_static_assert(offsetof(AOTModuleInstanceExtra, stack_sizes) == 0);
+bh_static_assert(offsetof(AOTModuleInstanceExtra, common.c_api_func_imports)
+                 == sizeof(uint64));
+
+bh_static_assert(sizeof(CApiFuncImport) == sizeof(uintptr_t) * 3);
+
+bh_static_assert(sizeof(wasm_val_t) == 16);
+bh_static_assert(offsetof(wasm_val_t, of) == 8);
 
 bh_static_assert(offsetof(AOTFrame, prev_frame) == sizeof(uintptr_t) * 0);
 bh_static_assert(offsetof(AOTFrame, func_index) == sizeof(uintptr_t) * 1);
@@ -2001,9 +2008,6 @@ invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr,
     WASMJmpBuf jmpbuf_node = { 0 }, *jmpbuf_node_pop;
     uint32 page_size = os_getpagesize();
     uint32 guard_page_count = STACK_OVERFLOW_CHECK_GUARD_PAGE_COUNT;
-    uint16 param_count = func_type->param_count;
-    uint16 result_count = func_type->result_count;
-    const uint8 *types = func_type->types;
 #ifdef BH_PLATFORM_WINDOWS
     int result;
     bool has_exception;
@@ -2042,28 +2046,22 @@ invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr,
     wasm_exec_env_push_jmpbuf(exec_env, &jmpbuf_node);
 
     if (os_setjmp(jmpbuf_node.jmpbuf) == 0) {
-        /* Quick call with func_ptr if the function signature is simple */
-        if (!signature && param_count == 1 && types[0] == VALUE_TYPE_I32) {
-            if (result_count == 0) {
-                void (*NativeFunc)(WASMExecEnv *, uint32) =
-                    (void (*)(WASMExecEnv *, uint32))func_ptr;
-                NativeFunc(exec_env, argv[0]);
-                ret = aot_copy_exception(module_inst, NULL) ? false : true;
-            }
-            else if (result_count == 1
-                     && types[param_count] == VALUE_TYPE_I32) {
-                uint32 (*NativeFunc)(WASMExecEnv *, uint32) =
-                    (uint32(*)(WASMExecEnv *, uint32))func_ptr;
-                argv_ret[0] = NativeFunc(exec_env, argv[0]);
-                ret = aot_copy_exception(module_inst, NULL) ? false : true;
-            }
-            else {
-                ret = wasm_runtime_invoke_native(exec_env, func_ptr, func_type,
-                                                 signature, attachment, argv,
-                                                 argc, argv_ret);
-            }
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+        /* Quick call if the quick aot entry is registered */
+        if (!signature && func_type->quick_aot_entry) {
+            void (*invoke_native)(
+                void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+                uint32 *argv_ret) = func_type->quick_aot_entry;
+            invoke_native(func_ptr,
+                          func_type->result_count > 0
+                              ? func_type->types[func_type->param_count]
+                              : VALUE_TYPE_VOID,
+                          exec_env, argv, argv_ret);
+            ret = !aot_copy_exception(module_inst, NULL);
         }
-        else {
+        else
+#endif
+        {
             ret = wasm_runtime_invoke_native(exec_env, func_ptr, func_type,
                                              signature, attachment, argv, argc,
                                              argv_ret);

+ 0 - 1
core/iwasm/aot/debug/jit_debug.c

@@ -23,7 +23,6 @@
 
 #include <stdio.h>
 #include <assert.h>
-#include <fcntl.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>

+ 42 - 8
core/iwasm/common/wasm_c_api.c

@@ -378,6 +378,9 @@ wasm_engine_new_internal(wasm_config_t *config)
     wasm_engine_t *engine = NULL;
     /* init runtime */
     RuntimeInitArgs init_args = { 0 };
+#if WASM_ENABLE_JIT != 0
+    LLVMJITOptions *jit_options = wasm_runtime_get_llvm_jit_options();
+#endif
 
 #ifndef NDEBUG
     bh_log_set_verbose_level(BH_LOG_LEVEL_VERBOSE);
@@ -394,6 +397,10 @@ wasm_engine_new_internal(wasm_config_t *config)
     init_args.enable_linux_perf = config->enable_linux_perf;
     init_args.segue_flags = config->segue_flags;
 
+#if WASM_ENABLE_JIT != 0
+    jit_options->quick_invoke_c_api_import = true;
+#endif
+
     if (!wasm_runtime_full_init(&init_args)) {
         LOG_DEBUG("wasm_runtime_full_init failed");
         goto failed;
@@ -1914,10 +1921,26 @@ wasm_frame_func_offset(const wasm_frame_t *frame)
     return frame ? frame->func_offset : 0;
 }
 
+void
+wasm_frame_vec_clone_internal(Vector *src, Vector *out)
+{
+    bh_assert(src->num_elems != 0 && src->data);
+
+    bh_vector_destroy(out);
+    if (!bh_vector_init(out, src->num_elems, sizeof(WASMCApiFrame), false)) {
+        bh_vector_destroy(out);
+        return;
+    }
+
+    bh_memcpy_s(out->data, src->num_elems * sizeof(WASMCApiFrame), src->data,
+                src->num_elems * sizeof(WASMCApiFrame));
+    out->num_elems = src->num_elems;
+}
+
 static wasm_trap_t *
 wasm_trap_new_internal(wasm_store_t *store,
                        WASMModuleInstanceCommon *inst_comm_rt,
-                       const char *error_info)
+                       const char *error_info, Vector *cluster_frames)
 {
     wasm_trap_t *trap;
 #if WASM_ENABLE_DUMP_CALL_STACK != 0
@@ -1947,7 +1970,9 @@ wasm_trap_new_internal(wasm_store_t *store,
 
     /* fill in frames */
 #if WASM_ENABLE_DUMP_CALL_STACK != 0
-    trap->frames = ((WASMModuleInstance *)inst_comm_rt)->frames;
+    trap->frames = cluster_frames
+                       ? cluster_frames
+                       : ((WASMModuleInstance *)inst_comm_rt)->frames;
 
     if (trap->frames) {
         /* fill in instances */
@@ -2058,10 +2083,7 @@ wasm_trap_trace(const wasm_trap_t *trap, own wasm_frame_vec_t *out)
     }
 
     for (i = 0; i < trap->frames->num_elems; i++) {
-        wasm_frame_t *frame;
-
-        frame = ((wasm_frame_t *)trap->frames->data) + i;
-
+        wasm_frame_t *frame = ((wasm_frame_t *)trap->frames->data) + i;
         if (!(out->data[i] =
                   wasm_frame_new(frame->instance, frame->module_offset,
                                  frame->func_index, frame->func_offset))) {
@@ -3254,6 +3276,7 @@ wasm_func_call(const wasm_func_t *func, const wasm_val_vec_t *params,
     WASMFunctionInstanceCommon *func_comm_rt = NULL;
     WASMExecEnv *exec_env = NULL;
     size_t param_count, result_count, alloc_count;
+    Vector *cluster_frames = NULL;
 
     bh_assert(func && func->type);
 
@@ -3366,9 +3389,20 @@ failed:
     if (argv != argv_buf)
         wasm_runtime_free(argv);
 
-    return wasm_trap_new_internal(
+#if WASM_ENABLE_DUMP_CALL_STACK != 0 && WASM_ENABLE_THREAD_MGR != 0
+    WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
+    cluster_frames = &cluster->exception_frames;
+    wasm_cluster_traverse_lock(exec_env);
+#endif
+
+    wasm_trap_t *trap = wasm_trap_new_internal(
         func->store, func->inst_comm_rt,
-        wasm_runtime_get_exception(func->inst_comm_rt));
+        wasm_runtime_get_exception(func->inst_comm_rt), cluster_frames);
+
+#if WASM_ENABLE_DUMP_CALL_STACK != 0 && WASM_ENABLE_THREAD_MGR != 0
+    wasm_cluster_traverse_unlock(exec_env);
+#endif
+    return trap;
 }
 
 size_t

+ 3 - 0
core/iwasm/common/wasm_c_api_internal.h

@@ -240,4 +240,7 @@ wasm_memory_new_internal(wasm_store_t *store, uint16 memory_idx_rt,
 wasm_table_t *
 wasm_table_new_internal(wasm_store_t *store, uint16 table_idx_rt,
                         WASMModuleInstanceCommon *inst_comm_rt);
+
+void
+wasm_frame_vec_clone_internal(Vector *src, Vector *out);
 #endif /* _WASM_C_API_INTERNAL_H */

+ 966 - 113
core/iwasm/common/wasm_native.c

@@ -16,19 +16,6 @@
 #include "../libraries/thread-mgr/thread_manager.h"
 #endif
 
-#if !defined(BH_PLATFORM_ZEPHYR) && !defined(BH_PLATFORM_ALIOS_THINGS) \
-    && !defined(BH_PLATFORM_OPENRTOS) && !defined(BH_PLATFORM_ESP_IDF)
-#define ENABLE_QUICKSORT 1
-#else
-#define ENABLE_QUICKSORT 0
-#endif
-
-#define ENABLE_SORT_DEBUG 0
-
-#if ENABLE_SORT_DEBUG != 0
-#include <sys/time.h>
-#endif
-
 static NativeSymbolsList g_native_symbols_list = NULL;
 
 #if WASM_ENABLE_LIBC_WASI != 0
@@ -181,93 +168,26 @@ check_symbol_signature(const WASMFuncType *type, const char *signature)
     return true;
 }
 
-#if ENABLE_QUICKSORT == 0
-static void
-sort_symbol_ptr(NativeSymbol *native_symbols, uint32 n_native_symbols)
-{
-    uint32 i, j;
-    NativeSymbol temp;
-
-    for (i = 0; i < n_native_symbols - 1; i++) {
-        for (j = i + 1; j < n_native_symbols; j++) {
-            if (strcmp(native_symbols[i].symbol, native_symbols[j].symbol)
-                > 0) {
-                temp = native_symbols[i];
-                native_symbols[i] = native_symbols[j];
-                native_symbols[j] = temp;
-            }
-        }
-    }
-}
-#else
-static void
-swap_symbol(NativeSymbol *left, NativeSymbol *right)
+static int
+native_symbol_cmp(const void *native_symbol1, const void *native_symbol2)
 {
-    NativeSymbol temp = *left;
-    *left = *right;
-    *right = temp;
+    return strcmp(((const NativeSymbol *)native_symbol1)->symbol,
+                  ((const NativeSymbol *)native_symbol2)->symbol);
 }
 
-static void
-quick_sort_symbols(NativeSymbol *native_symbols, int left, int right)
-{
-    NativeSymbol base_symbol;
-    int pin_left = left;
-    int pin_right = right;
-
-    if (left >= right) {
-        return;
-    }
-
-    base_symbol = native_symbols[left];
-    while (left < right) {
-        while (left < right
-               && strcmp(native_symbols[right].symbol, base_symbol.symbol)
-                      > 0) {
-            right--;
-        }
-
-        if (left < right) {
-            swap_symbol(&native_symbols[left], &native_symbols[right]);
-            left++;
-        }
-
-        while (left < right
-               && strcmp(native_symbols[left].symbol, base_symbol.symbol) < 0) {
-            left++;
-        }
-
-        if (left < right) {
-            swap_symbol(&native_symbols[left], &native_symbols[right]);
-            right--;
-        }
-    }
-    native_symbols[left] = base_symbol;
-
-    quick_sort_symbols(native_symbols, pin_left, left - 1);
-    quick_sort_symbols(native_symbols, left + 1, pin_right);
-}
-#endif /* end of ENABLE_QUICKSORT */
-
 static void *
 lookup_symbol(NativeSymbol *native_symbols, uint32 n_native_symbols,
               const char *symbol, const char **p_signature, void **p_attachment)
 {
-    int low = 0, mid, ret;
-    int high = (int32)n_native_symbols - 1;
+    NativeSymbol *native_symbol, key = { 0 };
 
-    while (low <= high) {
-        mid = (low + high) / 2;
-        ret = strcmp(symbol, native_symbols[mid].symbol);
-        if (ret == 0) {
-            *p_signature = native_symbols[mid].signature;
-            *p_attachment = native_symbols[mid].attachment;
-            return native_symbols[mid].func_ptr;
-        }
-        else if (ret < 0)
-            high = mid - 1;
-        else
-            low = mid + 1;
+    key.symbol = symbol;
+
+    if ((native_symbol = bsearch(&key, native_symbols, n_native_symbols,
+                                 sizeof(NativeSymbol), native_symbol_cmp))) {
+        *p_signature = native_symbol->signature;
+        *p_attachment = native_symbol->attachment;
+        return native_symbol->func_ptr;
     }
 
     return NULL;
@@ -339,11 +259,6 @@ register_natives(const char *module_name, NativeSymbol *native_symbols,
                  uint32 n_native_symbols, bool call_conv_raw)
 {
     NativeSymbolsNode *node;
-#if ENABLE_SORT_DEBUG != 0
-    struct timeval start;
-    struct timeval end;
-    unsigned long timer;
-#endif
 
     if (!(node = wasm_runtime_malloc(sizeof(NativeSymbolsNode))))
         return false;
@@ -360,23 +275,9 @@ register_natives(const char *module_name, NativeSymbol *native_symbols,
     node->next = g_native_symbols_list;
     g_native_symbols_list = node;
 
-#if ENABLE_SORT_DEBUG != 0
-    gettimeofday(&start, NULL);
-#endif
+    qsort(native_symbols, n_native_symbols, sizeof(NativeSymbol),
+          native_symbol_cmp);
 
-#if ENABLE_QUICKSORT == 0
-    sort_symbol_ptr(native_symbols, n_native_symbols);
-#else
-    quick_sort_symbols(native_symbols, 0, (int)(n_native_symbols - 1));
-#endif
-
-#if ENABLE_SORT_DEBUG != 0
-    gettimeofday(&end, NULL);
-    timer =
-        1000000 * (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec);
-    LOG_ERROR("module_name: %s, nums: %d, sorted used: %ld us", module_name,
-              n_native_symbols, timer);
-#endif
     return true;
 }
 
@@ -566,6 +467,11 @@ wasi_context_dtor(WASMModuleInstanceCommon *inst, void *ctx)
 }
 #endif /* end of WASM_ENABLE_LIBC_WASI */
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+static bool
+quick_aot_entry_init();
+#endif
+
 bool
 wasm_native_init()
 {
@@ -666,6 +572,20 @@ wasm_native_init()
         goto fail;
 #endif
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+    if (!quick_aot_entry_init()) {
+#if WASM_ENABLE_SPEC_TEST != 0 || WASM_ENABLE_LIBC_BUILTIN != 0     \
+    || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0      \
+    || WASM_ENABLE_LIB_RATS != 0 || WASM_ENABLE_WASI_NN != 0        \
+    || WASM_ENABLE_APP_FRAMEWORK != 0 || WASM_ENABLE_LIBC_WASI != 0 \
+    || WASM_ENABLE_LIB_PTHREAD != 0 || WASM_ENABLE_LIB_WASI_THREADS != 0
+        goto fail;
+#else
+        return false;
+#endif
+    }
+#endif
+
     return true;
 #if WASM_ENABLE_SPEC_TEST != 0 || WASM_ENABLE_LIBC_BUILTIN != 0     \
     || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0      \
@@ -706,3 +626,936 @@ wasm_native_destroy()
 
     g_native_symbols_list = NULL;
 }
+
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+static void
+invoke_no_args_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+                 uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env);
+}
+static void
+invoke_no_args_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+                 uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env);
+}
+static void
+invoke_no_args_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+                 uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_i_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0]);
+}
+static void
+invoke_i_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0]);
+}
+static void
+invoke_i_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_I_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv));
+}
+static void
+invoke_I_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv));
+}
+static void
+invoke_I_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+           uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_ii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1]);
+}
+static void
+invoke_ii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], argv[1]);
+}
+static void
+invoke_ii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], argv[1]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1));
+}
+static void
+invoke_iI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1));
+}
+static void
+invoke_iI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_Ii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2]);
+}
+static void
+invoke_Ii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2]);
+}
+static void
+invoke_Ii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_II_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2));
+}
+static void
+invoke_II_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                              GET_I64_FROM_ADDR(argv + 2));
+}
+static void
+invoke_II_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+            uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                             GET_I64_FROM_ADDR(argv + 2));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], argv[2]);
+}
+static void
+invoke_iii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], argv[1], argv[2]);
+}
+static void
+invoke_iii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], argv[1], argv[2]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2));
+}
+static void
+invoke_iiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2));
+}
+static void
+invoke_iiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1), argv[3]);
+}
+static void
+invoke_iIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1), argv[3]);
+}
+static void
+invoke_iIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1), argv[3]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iII_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_iII_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                              GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_iII_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                             GET_I64_FROM_ADDR(argv + 3));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_Iii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2], argv[3]);
+}
+static void
+invoke_Iii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2], argv[3]);
+}
+static void
+invoke_Iii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2], argv[3]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_IiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                              GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_IiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                             GET_I64_FROM_ADDR(argv + 3));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                argv[4]);
+}
+static void
+invoke_IIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                              GET_I64_FROM_ADDR(argv + 2), argv[4]);
+}
+static void
+invoke_IIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                             GET_I64_FROM_ADDR(argv + 2), argv[4]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_III_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_III_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                    GET_I64_FROM_ADDR(argv + 2), GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_III_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+             uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                    GET_I64_FROM_ADDR(argv + 2), GET_I64_FROM_ADDR(argv + 4));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], argv[2], argv[3]);
+}
+static void
+invoke_iiii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], argv[1], argv[2], argv[3]);
+}
+static void
+invoke_iiii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], argv[1], argv[2], argv[3]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], argv[2],
+                GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_iiiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], argv[1], argv[2],
+                              GET_I64_FROM_ADDR(argv + 3));
+}
+static void
+invoke_iiiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], argv[1], argv[2],
+                             GET_I64_FROM_ADDR(argv + 3));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2),
+                argv[4]);
+}
+static void
+invoke_iiIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], argv[1],
+                              GET_I64_FROM_ADDR(argv + 2), argv[4]);
+}
+static void
+invoke_iiIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], argv[1],
+                             GET_I64_FROM_ADDR(argv + 2), argv[4]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiII_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2),
+                GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_iiII_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2),
+                    GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_iiII_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, argv[0], argv[1], GET_I64_FROM_ADDR(argv + 2),
+                    GET_I64_FROM_ADDR(argv + 4));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iIii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1), argv[3],
+                argv[4]);
+}
+static void
+invoke_iIii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                              argv[3], argv[4]);
+}
+static void
+invoke_iIii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                             argv[3], argv[4]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iIiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1), argv[3],
+                GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_iIiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                              argv[3], GET_I64_FROM_ADDR(argv + 4));
+}
+static void
+invoke_iIiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                             argv[3], GET_I64_FROM_ADDR(argv + 4));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iIIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                GET_I64_FROM_ADDR(argv + 3), argv[5]);
+}
+static void
+invoke_iIIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                              GET_I64_FROM_ADDR(argv + 3), argv[5]);
+}
+static void
+invoke_iIIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                             GET_I64_FROM_ADDR(argv + 3), argv[5]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iIII_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_iIII_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                    GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_iIII_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, argv[0], GET_I64_FROM_ADDR(argv + 1),
+                    GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_Iiii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2], argv[3], argv[4]);
+}
+static void
+invoke_Iiii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                              argv[3], argv[4]);
+}
+static void
+invoke_Iiii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                             argv[3], argv[4]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IiiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2], argv[3],
+                GET_I64_FROM_ADDR(argv + 4));
+}
+
+static void
+invoke_IiiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                              argv[3], GET_I64_FROM_ADDR(argv + 4));
+}
+
+static void
+invoke_IiiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                             argv[3], GET_I64_FROM_ADDR(argv + 4));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IiIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                GET_I64_FROM_ADDR(argv + 3), argv[5]);
+}
+static void
+invoke_IiIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                              GET_I64_FROM_ADDR(argv + 3), argv[5]);
+}
+static void
+invoke_IiIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                             GET_I64_FROM_ADDR(argv + 3), argv[5]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IiII_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_IiII_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                    GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_IiII_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, GET_I64_FROM_ADDR(argv), argv[2],
+                    GET_I64_FROM_ADDR(argv + 3), GET_I64_FROM_ADDR(argv + 5));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IIii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                argv[4], argv[5]);
+}
+static void
+invoke_IIii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                              GET_I64_FROM_ADDR(argv + 2), argv[4], argv[5]);
+}
+static void
+invoke_IIii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                             GET_I64_FROM_ADDR(argv + 2), argv[4], argv[5]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IIiI_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                argv[4], GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_IIiI_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                              GET_I64_FROM_ADDR(argv + 2), argv[4],
+                              GET_I64_FROM_ADDR(argv + 5));
+}
+static void
+invoke_IIiI_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                             GET_I64_FROM_ADDR(argv + 2), argv[4],
+                             GET_I64_FROM_ADDR(argv + 5));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IIIi_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                GET_I64_FROM_ADDR(argv + 4), argv[6]);
+}
+static void
+invoke_IIIi_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                              GET_I64_FROM_ADDR(argv + 2),
+                              GET_I64_FROM_ADDR(argv + 4), argv[6]);
+}
+static void
+invoke_IIIi_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(exec_env, GET_I64_FROM_ADDR(argv),
+                             GET_I64_FROM_ADDR(argv + 2),
+                             GET_I64_FROM_ADDR(argv + 4), argv[6]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_IIII_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+                GET_I64_FROM_ADDR(argv + 4), GET_I64_FROM_ADDR(argv + 6));
+}
+static void
+invoke_IIII_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] = native_code(
+        exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+        GET_I64_FROM_ADDR(argv + 4), GET_I64_FROM_ADDR(argv + 6));
+}
+static void
+invoke_IIII_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+              uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret = native_code(
+        exec_env, GET_I64_FROM_ADDR(argv), GET_I64_FROM_ADDR(argv + 2),
+        GET_I64_FROM_ADDR(argv + 4), GET_I64_FROM_ADDR(argv + 6));
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+static void
+invoke_iiiii_v(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+               uint32 *argv_ret)
+{
+    void (*native_code)() = func_ptr;
+    native_code(exec_env, argv[0], argv[1], argv[2], argv[3], argv[4]);
+}
+static void
+invoke_iiiii_i(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+               uint32 *argv_ret)
+{
+    uint32 (*native_code)() = func_ptr;
+    argv_ret[0] =
+        native_code(exec_env, argv[0], argv[1], argv[2], argv[3], argv[4]);
+}
+static void
+invoke_iiiii_I(void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+               uint32 *argv_ret)
+{
+    uint64 (*native_code)() = func_ptr;
+    uint64 ret =
+        native_code(exec_env, argv[0], argv[1], argv[2], argv[3], argv[4]);
+    PUT_I64_TO_ADDR(argv_ret, ret);
+}
+
+typedef struct QuickAOTEntry {
+    const char *signature;
+    void *func_ptr;
+} QuickAOTEntry;
+
+/* clang-format off */
+static QuickAOTEntry quick_aot_entries[] = {
+    { "()v", invoke_no_args_v },
+    { "()i", invoke_no_args_i },
+    { "()I", invoke_no_args_I },
+
+    { "(i)v", invoke_i_v }, { "(i)i", invoke_i_i }, { "(i)I", invoke_i_I },
+    { "(I)v", invoke_I_v }, { "(I)i", invoke_I_i }, { "(I)I", invoke_I_I },
+
+    { "(ii)v", invoke_ii_v }, { "(ii)i", invoke_ii_i }, { "(ii)I", invoke_ii_I },
+    { "(iI)v", invoke_iI_v }, { "(iI)i", invoke_iI_i }, { "(iI)I", invoke_iI_I },
+    { "(Ii)v", invoke_Ii_v }, { "(Ii)i", invoke_Ii_i }, { "(Ii)I", invoke_Ii_I },
+    { "(II)v", invoke_II_v }, { "(II)i", invoke_II_i }, { "(II)I", invoke_II_I },
+
+    { "(iii)v", invoke_iii_v }, { "(iii)i", invoke_iii_i }, { "(iii)I", invoke_iii_I },
+    { "(iiI)v", invoke_iiI_v }, { "(iiI)i", invoke_iiI_i }, { "(iiI)I", invoke_iiI_I },
+    { "(iIi)v", invoke_iIi_v }, { "(iIi)i", invoke_iIi_i }, { "(iIi)I", invoke_iIi_I },
+    { "(iII)v", invoke_iII_v }, { "(iII)i", invoke_iII_i }, { "(iII)I", invoke_iII_I },
+    { "(Iii)v", invoke_Iii_v }, { "(Iii)i", invoke_Iii_i }, { "(Iii)I", invoke_Iii_I },
+    { "(IiI)v", invoke_IiI_v }, { "(IiI)i", invoke_IiI_i }, { "(IiI)I", invoke_IiI_I },
+    { "(IIi)v", invoke_IIi_v }, { "(IIi)i", invoke_IIi_i }, { "(IIi)I", invoke_IIi_I },
+    { "(III)v", invoke_III_v }, { "(III)i", invoke_III_i }, { "(III)I", invoke_III_I },
+
+    { "(iiii)v", invoke_iiii_v }, { "(iiii)i", invoke_iiii_i }, { "(iiii)I", invoke_iiii_I },
+    { "(iiiI)v", invoke_iiiI_v }, { "(iiiI)i", invoke_iiiI_i }, { "(iiiI)I", invoke_iiiI_I },
+    { "(iiIi)v", invoke_iiIi_v }, { "(iiIi)i", invoke_iiIi_i }, { "(iiIi)I", invoke_iiIi_I },
+    { "(iiII)v", invoke_iiII_v }, { "(iiII)i", invoke_iiII_i }, { "(iiII)I", invoke_iiII_I },
+    { "(iIii)v", invoke_iIii_v }, { "(iIii)i", invoke_iIii_i }, { "(iIii)I", invoke_iIii_I },
+    { "(iIiI)v", invoke_iIiI_v }, { "(iIiI)i", invoke_iIiI_i }, { "(iIiI)I", invoke_iIiI_I },
+    { "(iIIi)v", invoke_iIIi_v }, { "(iIIi)i", invoke_iIIi_i }, { "(iIIi)I", invoke_iIIi_I },
+    { "(iIII)v", invoke_iIII_v }, { "(iIII)i", invoke_iIII_i }, { "(iIII)I", invoke_iIII_I },
+    { "(Iiii)v", invoke_Iiii_v }, { "(Iiii)i", invoke_Iiii_i }, { "(Iiii)I", invoke_Iiii_I },
+    { "(IiiI)v", invoke_IiiI_v }, { "(IiiI)i", invoke_IiiI_i }, { "(IiiI)I", invoke_IiiI_I },
+    { "(IiIi)v", invoke_IiIi_v }, { "(IiIi)i", invoke_IiIi_i }, { "(IiIi)I", invoke_IiIi_I },
+    { "(IiII)v", invoke_IiII_v }, { "(IiII)i", invoke_IiII_i }, { "(IiII)I", invoke_IiII_I },
+    { "(IIii)v", invoke_IIii_v }, { "(IIii)i", invoke_IIii_i }, { "(IIii)I", invoke_IIii_I },
+    { "(IIiI)v", invoke_IIiI_v }, { "(IIiI)i", invoke_IIiI_i }, { "(IIiI)I", invoke_IIiI_I },
+    { "(IIIi)v", invoke_IIIi_v }, { "(IIIi)i", invoke_IIIi_i }, { "(IIIi)I", invoke_IIIi_I },
+    { "(IIII)v", invoke_IIII_v }, { "(IIII)i", invoke_IIII_i }, { "(IIII)I", invoke_IIII_I },
+
+    { "(iiiii)v", invoke_iiiii_v }, { "(iiiii)i", invoke_iiiii_i }, { "(iiiii)I", invoke_iiiii_I },
+};
+/* clang-format on */
+
+static int
+quick_aot_entry_cmp(const void *quick_aot_entry1, const void *quick_aot_entry2)
+{
+    return strcmp(((const QuickAOTEntry *)quick_aot_entry1)->signature,
+                  ((const QuickAOTEntry *)quick_aot_entry2)->signature);
+}
+
+static bool
+quick_aot_entry_init()
+{
+    qsort(quick_aot_entries, sizeof(quick_aot_entries) / sizeof(QuickAOTEntry),
+          sizeof(QuickAOTEntry), quick_aot_entry_cmp);
+
+    return true;
+}
+
+void *
+wasm_native_lookup_quick_aot_entry(const WASMFuncType *func_type)
+{
+    char signature[16] = { 0 };
+    uint32 param_count = func_type->param_count;
+    uint32 result_count = func_type->result_count, i, j = 0;
+    const uint8 *types = func_type->types;
+    QuickAOTEntry *quick_aot_entry, key = { 0 };
+
+    if (param_count > 5 || result_count > 1)
+        return NULL;
+
+    signature[j++] = '(';
+
+    for (i = 0; i < param_count; i++) {
+        if (types[i] == VALUE_TYPE_I32)
+            signature[j++] = 'i';
+        else if (types[i] == VALUE_TYPE_I64)
+            signature[j++] = 'I';
+        else
+            return NULL;
+    }
+
+    signature[j++] = ')';
+
+    if (result_count == 0) {
+        signature[j++] = 'v';
+    }
+    else {
+        if (types[i] == VALUE_TYPE_I32)
+            signature[j++] = 'i';
+        else if (types[i] == VALUE_TYPE_I64)
+            signature[j++] = 'I';
+        else
+            return NULL;
+    }
+
+    key.signature = signature;
+    if ((quick_aot_entry =
+             bsearch(&key, quick_aot_entries,
+                     sizeof(quick_aot_entries) / sizeof(QuickAOTEntry),
+                     sizeof(QuickAOTEntry), quick_aot_entry_cmp))) {
+        return quick_aot_entry->func_ptr;
+    }
+
+    return NULL;
+}
+#endif /* end of WASM_ENABLE_QUICK_AOT_ENTRY != 0 */

+ 5 - 0
core/iwasm/common/wasm_native.h

@@ -105,6 +105,11 @@ wasm_native_init();
 void
 wasm_native_destroy();
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+void *
+wasm_native_lookup_quick_aot_entry(const WASMFuncType *func_type);
+#endif
+
 #ifdef __cplusplus
 }
 #endif

+ 71 - 7
core/iwasm/common/wasm_runtime_common.c

@@ -161,7 +161,9 @@ static JitCompOptions jit_options = { 0 };
 #endif
 
 #if WASM_ENABLE_JIT != 0
-static LLVMJITOptions llvm_jit_options = { 3, 3, 0 };
+/* opt_level: 3, size_level: 3, segue-flags: 0,
+   quick_invoke_c_api_import: false */
+static LLVMJITOptions llvm_jit_options = { 3, 3, 0, false };
 #endif
 
 #if WASM_ENABLE_GC != 0
@@ -645,10 +647,10 @@ wasm_runtime_get_default_running_mode(void)
 }
 
 #if WASM_ENABLE_JIT != 0
-LLVMJITOptions
+LLVMJITOptions *
 wasm_runtime_get_llvm_jit_options(void)
 {
-    return llvm_jit_options;
+    return &llvm_jit_options;
 }
 #endif
 
@@ -5945,7 +5947,7 @@ wasm_runtime_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
     wasm_val_t *params = params_buf, *results = results_buf;
     wasm_trap_t *trap = NULL;
     bool ret = false;
-    wasm_val_vec_t params_vec, results_vec;
+    wasm_val_vec_t params_vec = { 0 }, results_vec = { 0 };
 
     if (func_type->param_count > 16) {
         if (!(params =
@@ -5973,12 +5975,10 @@ wasm_runtime_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
     params_vec.data = params;
     params_vec.num_elems = func_type->param_count;
     params_vec.size = func_type->param_count;
-    params_vec.size_of_elem = sizeof(wasm_val_t);
 
     results_vec.data = results;
     results_vec.num_elems = 0;
     results_vec.size = func_type->result_count;
-    results_vec.size_of_elem = sizeof(wasm_val_t);
 
     if (!with_env) {
         wasm_func_callback_t callback = (wasm_func_callback_t)func_ptr;
@@ -6014,7 +6014,6 @@ wasm_runtime_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
         wasm_runtime_set_exception(module_inst, "unsupported result type");
         goto fail;
     }
-    results_vec.num_elems = func_type->result_count;
     ret = true;
 
 fail:
@@ -6025,6 +6024,71 @@ fail:
     return ret;
 }
 
+bool
+wasm_runtime_quick_invoke_c_api_native(WASMModuleInstanceCommon *inst_comm,
+                                       CApiFuncImport *c_api_import,
+                                       wasm_val_t *params, uint32 param_count,
+                                       wasm_val_t *results, uint32 result_count)
+{
+    WASMModuleInstance *module_inst = (WASMModuleInstance *)inst_comm;
+    void *func_ptr = c_api_import->func_ptr_linked;
+    bool with_env_arg = c_api_import->with_env_arg, ret = true;
+    wasm_val_vec_t params_vec = { 0 }, results_vec = { 0 };
+    wasm_trap_t *trap = NULL;
+
+    params_vec.data = params;
+    params_vec.num_elems = param_count;
+    params_vec.size = param_count;
+
+    results_vec.data = results;
+    results_vec.num_elems = 0;
+    results_vec.size = result_count;
+
+    if (!func_ptr) {
+        wasm_set_exception_with_id(module_inst, EXCE_CALL_UNLINKED_IMPORT_FUNC);
+        ret = false;
+        goto fail;
+    }
+
+    if (!with_env_arg) {
+        wasm_func_callback_t callback = (wasm_func_callback_t)func_ptr;
+        trap = callback(&params_vec, &results_vec);
+    }
+    else {
+        void *wasm_c_api_env = c_api_import->env_arg;
+        wasm_func_callback_with_env_t callback =
+            (wasm_func_callback_with_env_t)func_ptr;
+        trap = callback(wasm_c_api_env, &params_vec, &results_vec);
+    }
+
+    if (trap) {
+        if (trap->message->data) {
+            /* since trap->message->data does not end with '\0' */
+            char trap_message[108] = { 0 };
+            uint32 max_size_to_copy = (uint32)sizeof(trap_message) - 1;
+            uint32 size_to_copy = (trap->message->size < max_size_to_copy)
+                                      ? (uint32)trap->message->size
+                                      : max_size_to_copy;
+            bh_memcpy_s(trap_message, (uint32)sizeof(trap_message),
+                        trap->message->data, size_to_copy);
+            wasm_set_exception(module_inst, trap_message);
+        }
+        else {
+            wasm_set_exception(module_inst,
+                               "native function throw unknown exception");
+        }
+        wasm_trap_delete(trap);
+        ret = false;
+    }
+
+fail:
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    if (!ret)
+        wasm_runtime_access_exce_check_guard_page();
+#endif
+    return ret;
+}
+
 void
 wasm_runtime_show_app_heap_corrupted_prompt()
 {

+ 12 - 1
core/iwasm/common/wasm_runtime_common.h

@@ -489,6 +489,7 @@ typedef struct LLVMJITOptions {
     uint32 opt_level;
     uint32 size_level;
     uint32 segue_flags;
+    bool quick_invoke_c_api_import;
 } LLVMJITOptions;
 #endif
 
@@ -522,7 +523,7 @@ wasm_runtime_get_default_running_mode(void);
 
 #if WASM_ENABLE_JIT != 0
 /* Internal API */
-LLVMJITOptions
+LLVMJITOptions *
 wasm_runtime_get_llvm_jit_options(void);
 #endif
 
@@ -1151,6 +1152,16 @@ wasm_runtime_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
                                  uint32 argc, uint32 *argv, bool with_env,
                                  void *wasm_c_api_env);
 
+struct CApiFuncImport;
+/* A quick version of wasm_runtime_invoke_c_api_native to directly invoke
+   wasm-c-api import function from jitted code to improve performance */
+bool
+wasm_runtime_quick_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
+                                       struct CApiFuncImport *c_api_import,
+                                       wasm_val_t *params, uint32 param_count,
+                                       wasm_val_t *results,
+                                       uint32 result_count);
+
 void
 wasm_runtime_show_app_heap_corrupted_prompt();
 

+ 44 - 26
core/iwasm/compilation/aot_emit_aot_file.c

@@ -1229,9 +1229,6 @@ get_native_symbol_list_size(AOTCompContext *comp_ctx)
     return len;
 }
 
-static uint32
-get_name_section_size(AOTCompData *comp_data);
-
 #if WASM_ENABLE_STRINGREF != 0
 static uint32
 get_string_literal_section_size(AOTCompContext *comp_ctx,
@@ -1298,15 +1295,6 @@ get_aot_file_size(AOTCompContext *comp_ctx, AOTCompData *comp_data,
         size += get_native_symbol_list_size(comp_ctx);
     }
 
-    if (comp_ctx->enable_aux_stack_frame) {
-        /* custom name section */
-        size = align_uint(size, 4);
-        /* section id + section size + sub section id */
-        size += (uint32)sizeof(uint32) * 3;
-        size += (comp_data->aot_name_section_size =
-                     get_name_section_size(comp_data));
-    }
-
     size_custom_section = get_custom_sections_size(comp_ctx, comp_data);
     if (size_custom_section > 0) {
         size = align_uint(size, 4);
@@ -1694,6 +1682,21 @@ get_custom_sections_size(AOTCompContext *comp_ctx, AOTCompData *comp_data)
         const uint8 *content = NULL;
         uint32 length = 0;
 
+        if (strcmp(section_name, "name") == 0) {
+            /* custom name section */
+            comp_data->aot_name_section_size = get_name_section_size(comp_data);
+            if (comp_data->aot_name_section_size == 0) {
+                LOG_WARNING("Can't find custom section [name], ignore it");
+                continue;
+            }
+
+            size = align_uint(size, 4);
+            /* section id + section size + sub section id */
+            size += (uint32)sizeof(uint32) * 3;
+            size += comp_data->aot_name_section_size;
+            continue;
+        }
+
         content = wasm_loader_get_custom_section(comp_data->wasm_module,
                                                  section_name, &length);
         if (!content) {
@@ -2827,23 +2830,25 @@ static bool
 aot_emit_name_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
                       AOTCompData *comp_data, AOTCompContext *comp_ctx)
 {
-    if (comp_ctx->enable_aux_stack_frame) {
-        uint32 offset = *p_offset;
+    uint32 offset = *p_offset;
 
-        *p_offset = offset = align_uint(offset, 4);
+    if (comp_data->aot_name_section_size == 0)
+        return true;
 
-        EMIT_U32(AOT_SECTION_TYPE_CUSTOM);
-        /* sub section id + name section size */
-        EMIT_U32(sizeof(uint32) * 1 + comp_data->aot_name_section_size);
-        EMIT_U32(AOT_CUSTOM_SECTION_NAME);
-        bh_memcpy_s((uint8 *)(buf + offset), (uint32)(buf_end - buf),
-                    comp_data->aot_name_section_buf,
-                    (uint32)comp_data->aot_name_section_size);
-        offset += comp_data->aot_name_section_size;
+    offset = align_uint(offset, 4);
 
-        *p_offset = offset;
-    }
+    EMIT_U32(AOT_SECTION_TYPE_CUSTOM);
+    /* sub section id + name section size */
+    EMIT_U32(sizeof(uint32) * 1 + comp_data->aot_name_section_size);
+    EMIT_U32(AOT_CUSTOM_SECTION_NAME);
+    bh_memcpy_s((uint8 *)(buf + offset), (uint32)(buf_end - buf),
+                comp_data->aot_name_section_buf,
+                (uint32)comp_data->aot_name_section_size);
+    offset += comp_data->aot_name_section_size;
+
+    *p_offset = offset;
 
+    LOG_DEBUG("emit name section");
     return true;
 }
 
@@ -2903,6 +2908,16 @@ aot_emit_custom_sections(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
         const uint8 *content = NULL;
         uint32 length = 0;
 
+        if (strcmp(section_name, "name") == 0) {
+            *p_offset = offset;
+            if (!aot_emit_name_section(buf, buf_end, p_offset, comp_data,
+                                       comp_ctx))
+                return false;
+
+            offset = *p_offset;
+            continue;
+        }
+
         content = wasm_loader_get_custom_section(comp_data->wasm_module,
                                                  section_name, &length);
         if (!content) {
@@ -4415,6 +4430,10 @@ aot_emit_aot_file_buf(AOTCompContext *comp_ctx, AOTCompData *comp_data,
         return NULL;
 
     aot_file_size = get_aot_file_size(comp_ctx, comp_data, obj_data);
+    if (aot_file_size == 0) {
+        aot_set_last_error("get aot file size failed");
+        goto fail1;
+    }
 
     if (!(buf = aot_file_buf = wasm_runtime_malloc(aot_file_size))) {
         aot_set_last_error("allocate memory failed.");
@@ -4437,7 +4456,6 @@ aot_emit_aot_file_buf(AOTCompContext *comp_ctx, AOTCompData *comp_data,
         || !aot_emit_relocation_section(buf, buf_end, &offset, comp_ctx,
                                         comp_data, obj_data)
         || !aot_emit_native_symbol(buf, buf_end, &offset, comp_ctx)
-        || !aot_emit_name_section(buf, buf_end, &offset, comp_data, comp_ctx)
         || !aot_emit_custom_sections(buf, buf_end, &offset, comp_data, comp_ctx)
 #if WASM_ENABLE_STRINGREF != 0
         || !aot_emit_string_literal_section(buf, buf_end, &offset, comp_data,

+ 246 - 12
core/iwasm/compilation/aot_emit_function.c

@@ -292,6 +292,213 @@ call_aot_invoke_native_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     return true;
 }
 
+static bool
+call_aot_invoke_c_api_native(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+                             uint32 import_func_idx, AOTFuncType *aot_func_type,
+                             LLVMValueRef *params)
+{
+    LLVMTypeRef int8_ptr_type, param_types[6], ret_type;
+    LLVMTypeRef value_ptr_type = NULL, value_type = NULL;
+    LLVMTypeRef func_type, func_ptr_type;
+    LLVMValueRef param_values[6], res, func, value = NULL, offset;
+    LLVMValueRef c_api_func_imports, c_api_func_import;
+    LLVMValueRef c_api_params, c_api_results, value_ret;
+    LLVMValueRef c_api_param_kind, c_api_param_value;
+    LLVMValueRef c_api_result_value;
+    uint32 offset_c_api_func_imports, i;
+    uint32 offset_param_kind, offset_param_value;
+    char buf[16];
+
+    /* `int8 **` type */
+    int8_ptr_type = LLVMPointerType(INT8_PTR_TYPE, 0);
+    if (!int8_ptr_type) {
+        aot_set_last_error("create llvm pointer type failed");
+        return false;
+    }
+
+    param_types[0] = INT8_PTR_TYPE; /* module_inst */
+    param_types[1] = INT8_PTR_TYPE; /* CApiFuncImport *c_api_import */
+    param_types[2] = INT8_PTR_TYPE; /* wasm_val_t *params */
+    param_types[3] = I32_TYPE;      /* uint32 param_count */
+    param_types[4] = INT8_PTR_TYPE; /* wasm_val_t *results */
+    param_types[5] = I32_TYPE;      /* uint32 result_count */
+
+    ret_type = INT8_TYPE;
+
+    GET_AOT_FUNCTION(wasm_runtime_quick_invoke_c_api_native, 6);
+
+    param_values[0] = func_ctx->aot_inst;
+
+    /* Get module_inst->e->common.c_api_func_imports */
+    offset_c_api_func_imports =
+        get_module_inst_extra_offset(comp_ctx)
+        + (comp_ctx->is_jit_mode
+               ? offsetof(WASMModuleInstanceExtra, common.c_api_func_imports)
+               /* offsetof(AOTModuleInstanceExtra, common.c_api_func_imports) */
+               : sizeof(uint64));
+    offset = I32_CONST(offset_c_api_func_imports);
+    CHECK_LLVM_CONST(offset);
+    c_api_func_imports =
+        LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, func_ctx->aot_inst,
+                              &offset, 1, "c_api_func_imports_addr");
+    c_api_func_imports =
+        LLVMBuildBitCast(comp_ctx->builder, c_api_func_imports, int8_ptr_type,
+                         "c_api_func_imports_ptr");
+    c_api_func_imports =
+        LLVMBuildLoad2(comp_ctx->builder, INT8_PTR_TYPE, c_api_func_imports,
+                       "c_api_func_imports");
+
+    /* Get &c_api_func_imports[func_idx], note size of CApiFuncImport
+       is pointer_size * 3 */
+    offset = I32_CONST((comp_ctx->pointer_size * 3) * import_func_idx);
+    CHECK_LLVM_CONST(offset);
+    c_api_func_import =
+        LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, c_api_func_imports,
+                              &offset, 1, "c_api_func_import");
+
+    param_values[1] = c_api_func_import;
+    param_values[2] = c_api_params = func_ctx->argv_buf;
+    param_values[3] = I32_CONST(aot_func_type->param_count);
+    CHECK_LLVM_CONST(param_values[3]);
+
+    /* Ensure sizeof(wasm_val_t) is 16 bytes */
+    offset = I32_CONST(sizeof(wasm_val_t) * aot_func_type->param_count);
+    c_api_results =
+        LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, func_ctx->argv_buf,
+                              &offset, 1, "results");
+    param_values[4] = c_api_results;
+
+    param_values[5] = I32_CONST(aot_func_type->result_count);
+    CHECK_LLVM_CONST(param_values[5]);
+
+    /* Set each c api param */
+    for (i = 0; i < aot_func_type->param_count; i++) {
+        /* Ensure sizeof(wasm_val_t) is 16 bytes */
+        offset_param_kind = sizeof(wasm_val_t) * i;
+        offset = I32_CONST(offset_param_kind);
+        CHECK_LLVM_CONST(offset);
+        c_api_param_kind =
+            LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, c_api_params,
+                                  &offset, 1, "c_api_param_kind_addr");
+        c_api_param_kind =
+            LLVMBuildBitCast(comp_ctx->builder, c_api_param_kind, INT8_PTR_TYPE,
+                             "c_api_param_kind_ptr");
+
+        switch (aot_func_type->types[i]) {
+            case VALUE_TYPE_I32:
+                value = I8_CONST(WASM_I32);
+                break;
+            case VALUE_TYPE_F32:
+                value = I8_CONST(WASM_F32);
+                break;
+            case VALUE_TYPE_I64:
+                value = I8_CONST(WASM_I64);
+                break;
+            case VALUE_TYPE_F64:
+                value = I8_CONST(WASM_F64);
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+        CHECK_LLVM_CONST(value);
+
+        LLVMBuildStore(comp_ctx->builder, value, c_api_param_kind);
+
+        /* Ensure offsetof(wasm_val_t, of) is 8 bytes */
+        offset_param_value = offset_param_kind + offsetof(wasm_val_t, of);
+        offset = I32_CONST(offset_param_value);
+        CHECK_LLVM_CONST(offset);
+        c_api_param_value =
+            LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, c_api_params,
+                                  &offset, 1, "c_api_param_value_addr");
+
+        switch (aot_func_type->types[i]) {
+            case VALUE_TYPE_I32:
+                value_ptr_type = INT32_PTR_TYPE;
+                break;
+            case VALUE_TYPE_F32:
+                value_ptr_type = F32_PTR_TYPE;
+                break;
+            case VALUE_TYPE_I64:
+                value_ptr_type = INT64_PTR_TYPE;
+                break;
+            case VALUE_TYPE_F64:
+                value_ptr_type = F64_PTR_TYPE;
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+
+        c_api_param_value =
+            LLVMBuildBitCast(comp_ctx->builder, c_api_param_value,
+                             value_ptr_type, "c_api_param_value_ptr");
+        LLVMBuildStore(comp_ctx->builder, params[i], c_api_param_value);
+    }
+
+    /* Call the function */
+    if (!(res = LLVMBuildCall2(comp_ctx->builder, func_type, func, param_values,
+                               6, "call"))) {
+        aot_set_last_error("LLVM build call failed.");
+        goto fail;
+    }
+
+    /* Check whether exception was thrown when executing the function */
+    if (comp_ctx->enable_bound_check
+        && !check_call_return(comp_ctx, func_ctx, res)) {
+        goto fail;
+    }
+
+    for (i = 0; i < aot_func_type->result_count; i++) {
+        /* Ensure sizeof(wasm_val_t) is 16 bytes and
+           offsetof(wasm_val_t, of) is 8 bytes */
+        uint32 offset_result_value =
+            sizeof(wasm_val_t) * i + offsetof(wasm_val_t, of);
+
+        offset = I32_CONST(offset_result_value);
+        CHECK_LLVM_CONST(offset);
+        c_api_result_value =
+            LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, c_api_results,
+                                  &offset, 1, "c_api_result_value_addr");
+
+        switch (aot_func_type->types[aot_func_type->param_count + i]) {
+            case VALUE_TYPE_I32:
+                value_type = I32_TYPE;
+                value_ptr_type = INT32_PTR_TYPE;
+                break;
+            case VALUE_TYPE_F32:
+                value_type = F32_TYPE;
+                value_ptr_type = F32_PTR_TYPE;
+                break;
+            case VALUE_TYPE_I64:
+                value_type = I64_TYPE;
+                value_ptr_type = INT64_PTR_TYPE;
+                break;
+            case VALUE_TYPE_F64:
+                value_type = F64_TYPE;
+                value_ptr_type = F64_PTR_TYPE;
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+
+        c_api_result_value =
+            LLVMBuildBitCast(comp_ctx->builder, c_api_result_value,
+                             value_ptr_type, "c_api_result_value_ptr");
+        snprintf(buf, sizeof(buf), "%s%u", "ret", i);
+        value_ret = LLVMBuildLoad2(comp_ctx->builder, value_type,
+                                   c_api_result_value, buf);
+
+        PUSH(value_ret, aot_func_type->types[aot_func_type->param_count + i]);
+    }
+
+    return true;
+fail:
+    return false;
+}
+
 #if WASM_ENABLE_AOT_STACK_FRAME != 0 || WASM_ENABLE_JIT_STACK_FRAME != 0
 static bool
 call_aot_alloc_frame_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
@@ -1199,6 +1406,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     const char *signature = NULL;
     bool ret = false;
     char buf[32];
+    bool quick_invoke_c_api_import = false;
 
     /* Check function index */
     if (func_idx >= import_func_count + func_count) {
@@ -1378,17 +1586,43 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         }
 
         if (!signature) {
-            /* call aot_invoke_native() */
-            if (!call_aot_invoke_native_func(
-                    comp_ctx, func_ctx, import_func_idx, func_type,
-                    param_types + 1, param_values + 1, param_count,
-                    param_cell_num, ret_type, wasm_ret_type, &value_ret, &res))
-                goto fail;
-            /* Check whether there was exception thrown when executing
-               the function */
-            if ((comp_ctx->enable_bound_check || is_win_platform(comp_ctx))
-                && !check_call_return(comp_ctx, func_ctx, res))
-                goto fail;
+            if (comp_ctx->quick_invoke_c_api_import) {
+                uint32 buf_size_needed =
+                    sizeof(wasm_val_t) * (param_count + result_count);
+
+                /* length of exec_env->argv_buf is 64 */
+                if (buf_size_needed < sizeof(uint32) * 64) {
+                    for (i = 0; i < param_count + result_count; i++) {
+                        /* Only support i32/i64/f32/f64 now */
+                        if (!(func_type->types[i] == VALUE_TYPE_I32
+                              || func_type->types[i] == VALUE_TYPE_I64
+                              || func_type->types[i] == VALUE_TYPE_F32
+                              || func_type->types[i] == VALUE_TYPE_F64))
+                            break;
+                    }
+                    if (i == param_count + result_count)
+                        quick_invoke_c_api_import = true;
+                }
+            }
+            if (quick_invoke_c_api_import) {
+                if (!call_aot_invoke_c_api_native(comp_ctx, func_ctx, func_idx,
+                                                  func_type, param_values + 1))
+                    goto fail;
+            }
+            else {
+                /* call aot_invoke_native() */
+                if (!call_aot_invoke_native_func(
+                        comp_ctx, func_ctx, import_func_idx, func_type,
+                        param_types + 1, param_values + 1, param_count,
+                        param_cell_num, ret_type, wasm_ret_type, &value_ret,
+                        &res))
+                    goto fail;
+                /* Check whether there was exception thrown when executing
+                   the function */
+                if ((comp_ctx->enable_bound_check || is_win_platform(comp_ctx))
+                    && !check_call_return(comp_ctx, func_ctx, res))
+                    goto fail;
+            }
         }
         else { /* call native func directly */
             LLVMTypeRef native_func_type, func_ptr_type;
@@ -1545,7 +1779,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
             goto fail;
     }
 
-    if (func_type->result_count > 0) {
+    if (func_type->result_count > 0 && !quick_invoke_c_api_import) {
         /* Push the first result to stack */
         PUSH(value_ret, func_type->types[func_type->param_count]);
         /* Load extra result from its address and push to stack */

+ 3 - 0
core/iwasm/compilation/aot_llvm.c

@@ -2576,6 +2576,9 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
     if (option->enable_stack_estimation)
         comp_ctx->enable_stack_estimation = true;
 
+    if (option->quick_invoke_c_api_import)
+        comp_ctx->quick_invoke_c_api_import = true;
+
     if (option->llvm_passes)
         comp_ctx->llvm_passes = option->llvm_passes;
 

+ 4 - 0
core/iwasm/compilation/aot_llvm.h

@@ -433,6 +433,10 @@ typedef struct AOTCompContext {
     /* Enable LLVM PGO (Profile-Guided Optimization) */
     bool enable_llvm_pgo;
 
+    /* Treat unknown import function as wasm-c-api import function
+       and allow to directly invoke it from AOT/JIT code */
+    bool quick_invoke_c_api_import;
+
     /* Use profile file collected by LLVM PGO */
     char *use_prof_file;
 

+ 4 - 1
core/iwasm/compilation/aot_llvm_extra.cpp

@@ -409,7 +409,10 @@ aot_compress_aot_func_names(AOTCompContext *comp_ctx, uint32 *p_size)
         NameStrs.push_back(str);
     }
 
-    if (collectPGOFuncNameStrings(NameStrs, true, Result)) {
+#if LLVM_VERSION_MAJOR < 18
+#define collectGlobalObjectNameStrings collectPGOFuncNameStrings
+#endif
+    if (collectGlobalObjectNameStrings(NameStrs, true, Result)) {
         aot_set_last_error("collect pgo func name strings failed");
         return NULL;
     }

+ 19 - 0
core/iwasm/compilation/aot_llvm_extra2.cpp

@@ -58,6 +58,7 @@ convert(LLVMRelocMode reloc_mode)
 #endif
 }
 
+#if LLVM_VERSION_MAJOR < 18
 static llvm::CodeGenOpt::Level
 convert(LLVMCodeGenOptLevel opt_level)
 {
@@ -74,6 +75,24 @@ convert(LLVMCodeGenOptLevel opt_level)
     bh_assert(0);
     return llvm::CodeGenOpt::None;
 }
+#else
+static llvm::CodeGenOptLevel
+convert(LLVMCodeGenOptLevel opt_level)
+{
+    switch (opt_level) {
+        case LLVMCodeGenLevelNone:
+            return llvm::CodeGenOptLevel::None;
+        case LLVMCodeGenLevelLess:
+            return llvm::CodeGenOptLevel::Less;
+        case LLVMCodeGenLevelDefault:
+            return llvm::CodeGenOptLevel::Default;
+        case LLVMCodeGenLevelAggressive:
+            return llvm::CodeGenOptLevel::Aggressive;
+    }
+    bh_assert(0);
+    return llvm::CodeGenOptLevel::None;
+}
+#endif
 
 static llvm::Optional<llvm::CodeModel::Model>
 convert(LLVMCodeModel code_model, bool *jit)

+ 27 - 0
core/iwasm/compilation/aot_orc_extra.cpp

@@ -3,6 +3,10 @@
  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  */
 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
 #include "llvm-c/LLJIT.h"
 #include "llvm-c/Orc.h"
 #include "llvm-c/OrcEE.h"
@@ -44,6 +48,7 @@ class InProgressLookupState;
 class OrcV2CAPIHelper
 {
   public:
+#if LLVM_VERSION_MAJOR < 18
     using PoolEntry = SymbolStringPtr::PoolEntry;
     using PoolEntryPtr = SymbolStringPtr::PoolEntryPtr;
 
@@ -86,6 +91,7 @@ class OrcV2CAPIHelper
         S.S = P;
     }
 
+#endif
     static InProgressLookupState *extractLookupState(LookupState &LS)
     {
         return LS.IPLS.release();
@@ -101,6 +107,20 @@ class OrcV2CAPIHelper
 } // namespace llvm
 
 // ORC.h
+#if LLVM_VERSION_MAJOR >= 18
+inline LLVMOrcSymbolStringPoolEntryRef
+wrap(SymbolStringPoolEntryUnsafe E)
+{
+    return reinterpret_cast<LLVMOrcSymbolStringPoolEntryRef>(E.rawPtr());
+}
+
+inline SymbolStringPoolEntryUnsafe
+unwrap(LLVMOrcSymbolStringPoolEntryRef E)
+{
+    return reinterpret_cast<SymbolStringPoolEntryUnsafe::PoolEntry *>(E);
+}
+#endif
+
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionSession, LLVMOrcExecutionSessionRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRTransformLayer, LLVMOrcIRTransformLayerRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JITDylib, LLVMOrcJITDylibRef)
@@ -108,8 +128,10 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JITTargetMachineBuilder,
                                    LLVMOrcJITTargetMachineBuilderRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ObjectTransformLayer,
                                    LLVMOrcObjectTransformLayerRef)
+#if LLVM_VERSION_MAJOR < 18
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcV2CAPIHelper::PoolEntry,
                                    LLVMOrcSymbolStringPoolEntryRef)
+#endif
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ObjectLayer, LLVMOrcObjectLayerRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(SymbolStringPool, LLVMOrcSymbolStringPoolRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ThreadSafeModule, LLVMOrcThreadSafeModuleRef)
@@ -292,8 +314,13 @@ LLVMOrcSymbolStringPoolEntryRef
 LLVMOrcLLLazyJITMangleAndIntern(LLVMOrcLLLazyJITRef J,
                                 const char *UnmangledName)
 {
+#if LLVM_VERSION_MAJOR < 18
     return wrap(OrcV2CAPIHelper::moveFromSymbolStringPtr(
         unwrap(J)->mangleAndIntern(UnmangledName)));
+#else
+    return wrap(SymbolStringPoolEntryUnsafe::take(
+        unwrap(J)->mangleAndIntern(UnmangledName)));
+#endif
 }
 
 LLVMOrcJITDylibRef

+ 3 - 3
core/iwasm/compilation/debug/dwarf_extractor.cpp

@@ -152,7 +152,7 @@ dwarf_gen_mock_vm_info(AOTCompContext *comp_ctx)
 
     comp_unit = LLVMDIBuilderCreateCompileUnit(
       comp_ctx->debug_builder, LLVMDWARFSourceLanguageC, file_info,
-      "ant compiler", 12, 0, NULL, 0, 1, NULL, 0, LLVMDWARFEmissionFull, 0, 0,
+      "WAMR AoT compiler", 12, 0, NULL, 0, 1, NULL, 0, LLVMDWARFEmissionFull, 0, 0,
       0, "/", 1, "", 0);
 
     LLVMTypeRef ParamTys[] = {
@@ -208,8 +208,8 @@ dwarf_gen_comp_unit_info(const AOTCompContext *comp_ctx)
 
         comp_unit = LLVMDIBuilderCreateCompileUnit(
             comp_ctx->debug_builder, LLDB_TO_LLVM_LANG_TYPE(lang_type),
-            comp_ctx->debug_file, "ant compiler", 12, 0, NULL, 0, 1, NULL, 0,
-            LLVMDWARFEmissionFull, 0, 0, 0, "/", 1, "", 0);
+            comp_ctx->debug_file, "WAMR AoT compiler", 12, 0, NULL, 0, 1, NULL,
+            0, LLVMDWARFEmissionFull, 0, 0, 0, "/", 1, "", 0);
     }
     return comp_unit;
 }

+ 2 - 1
core/iwasm/include/aot_comp_option.h

@@ -19,6 +19,7 @@ typedef struct AOTCompOption {
     bool enable_tail_call;
     bool enable_simd;
     bool enable_ref_types;
+    bool enable_gc;
     bool enable_aux_stack_check;
     bool enable_aux_stack_frame;
     bool enable_perf_profiling;
@@ -27,7 +28,7 @@ typedef struct AOTCompOption {
     bool disable_llvm_lto;
     bool enable_llvm_pgo;
     bool enable_stack_estimation;
-    bool enable_gc;
+    bool quick_invoke_c_api_import;
     char *use_prof_file;
     uint32_t opt_level;
     uint32_t size_level;

+ 1 - 0
core/iwasm/include/wasm_c_api.h

@@ -417,6 +417,7 @@ struct wasm_ref_t;
 
 typedef struct wasm_val_t {
   wasm_valkind_t kind;
+  uint8_t __paddings[7];
   union {
     int32_t i32;
     int64_t i64;

+ 1 - 0
core/iwasm/include/wasm_export.h

@@ -202,6 +202,7 @@ struct wasm_ref_t;
 
 typedef struct wasm_val_t {
     wasm_valkind_t kind;
+    uint8_t __paddings[7];
     union {
         /* also represent a function index */
         int32_t i32;

+ 5 - 0
core/iwasm/interpreter/wasm.h

@@ -351,6 +351,11 @@ typedef struct WASMFuncType {
     uint16 ref_count;
 #endif
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+    /* Quick AOT/JIT entry of this func type */
+    void *quick_aot_entry;
+#endif
+
     /* types of params and results, only store the first byte
      * of the type, if it cannot be described with one byte,
      * then the full type info is stored in ref_type_maps */

+ 22 - 5
core/iwasm/interpreter/wasm_interp_classic.c

@@ -6358,12 +6358,29 @@ llvm_jit_call_func_bytecode(WASMModuleInstance *module_inst,
         ret = true;
     }
     else {
-        ret = wasm_runtime_invoke_native(
-            exec_env, module_inst->func_ptrs[func_idx], func_type, NULL, NULL,
-            argv, argc, argv);
-
-        if (ret)
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+        /* Quick call if the quick jit entry is registered */
+        if (func_type->quick_aot_entry) {
+            void (*invoke_native)(
+                void *func_ptr, uint8 ret_type, void *exec_env, uint32 *argv,
+                uint32 *argv_ret) = func_type->quick_aot_entry;
+            invoke_native(module_inst->func_ptrs[func_idx],
+                          func_type->result_count > 0
+                              ? func_type->types[func_type->param_count]
+                              : VALUE_TYPE_VOID,
+                          exec_env, argv, argv);
             ret = !wasm_copy_exception(module_inst, NULL);
+        }
+        else
+#endif
+        {
+            ret = wasm_runtime_invoke_native(
+                exec_env, module_inst->func_ptrs[func_idx], func_type, NULL,
+                NULL, argv, argc, argv);
+
+            if (ret)
+                ret = !wasm_copy_exception(module_inst, NULL);
+        }
     }
 
 fail:

+ 129 - 44
core/iwasm/interpreter/wasm_loader.c

@@ -1986,6 +1986,10 @@ load_type_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
             type->param_cell_num = (uint16)param_cell_num;
             type->ret_cell_num = (uint16)ret_cell_num;
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+            type->quick_aot_entry = wasm_native_lookup_quick_aot_entry(type);
+#endif
+
             /* If there is already a same type created, use it instead */
             for (j = 0; j < i; j++) {
                 if (wasm_type_equal(type, module->types[j], module->types, i)) {
@@ -4645,8 +4649,12 @@ handle_name_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
                             if (!(module->functions[func_index]->field_name =
                                       const_str_list_insert(
                                           p, func_name_len, module,
-                                          is_load_from_file_buf, error_buf,
-                                          error_buf_size))) {
+#if WASM_ENABLE_WAMR_COMPILER != 0
+                                          false,
+#else
+                                          is_load_from_file_buf,
+#endif
+                                          error_buf, error_buf_size))) {
                                 return false;
                             }
                         }
@@ -4818,7 +4826,7 @@ static bool
 init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
                                uint32 error_buf_size)
 {
-    LLVMJITOptions llvm_jit_options = wasm_runtime_get_llvm_jit_options();
+    LLVMJITOptions *llvm_jit_options = wasm_runtime_get_llvm_jit_options();
     AOTCompOption option = { 0 };
     char *aot_last_error;
     uint64 size;
@@ -4863,10 +4871,11 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
 
     option.is_jit_mode = true;
 
-    llvm_jit_options = wasm_runtime_get_llvm_jit_options();
-    option.opt_level = llvm_jit_options.opt_level;
-    option.size_level = llvm_jit_options.size_level;
-    option.segue_flags = llvm_jit_options.segue_flags;
+    option.opt_level = llvm_jit_options->opt_level;
+    option.size_level = llvm_jit_options->size_level;
+    option.segue_flags = llvm_jit_options->segue_flags;
+    option.quick_invoke_c_api_import =
+        llvm_jit_options->quick_invoke_c_api_import;
 
 #if WASM_ENABLE_BULK_MEMORY != 0
     option.enable_bulk_memory = true;
@@ -7192,6 +7201,9 @@ typedef struct BranchBlock {
     BranchBlockPatch *patch_list;
     /* This is used to save params frame_offset of of if block */
     int16 *param_frame_offsets;
+    /* This is used to store available param num for if/else branch, so the else
+     * opcode can know how many parameters should be copied to the stack */
+    uint32 available_param_num;
 #endif
 
     /* Indicate the operand stack is in polymorphic state.
@@ -9771,15 +9783,18 @@ fail:
  * 1) POP original parameter out;
  * 2) Push and copy original values to dynamic space.
  * The copy instruction format:
- *   Part a: param count
+ *   Part a: available param count
  *   Part b: all param total cell num
  *   Part c: each param's cell_num, src offset and dst offset
  *   Part d: each param's src offset
  *   Part e: each param's dst offset
+ * Note: if the stack is in polymorphic state, the actual copied parameters may
+ * be fewer than the defined number in block type
  */
 static bool
 copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
-                             char *error_buf, uint32 error_buf_size)
+                             uint32 *p_available_param_count, char *error_buf,
+                             uint32 error_buf_size)
 {
     bool ret = false;
     int16 *frame_offset = NULL;
@@ -9791,35 +9806,47 @@ copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
     BlockType *block_type = &block->block_type;
     WASMFuncType *wasm_type = block_type->u.type;
     uint32 param_count = block_type->u.type->param_count;
+    uint32 available_param_count = 0;
     int16 condition_offset = 0;
     bool disable_emit = false;
     int16 operand_offset = 0;
+    uint64 size;
 
-    uint64 size = (uint64)param_count * (sizeof(*cells) + sizeof(*src_offsets));
+    if (is_if_block)
+        condition_offset = *loader_ctx->frame_offset;
+
+    /* POP original parameter out */
+    for (i = 0; i < param_count; i++) {
+        int32 available_stack_cell =
+            (int32)(loader_ctx->stack_cell_num - block->stack_cell_num);
+
+        if (available_stack_cell <= 0 && block->is_stack_polymorphic)
+            break;
+
+        POP_OFFSET_TYPE(wasm_type->types[param_count - i - 1]);
+        wasm_loader_emit_backspace(loader_ctx, sizeof(int16));
+    }
+    available_param_count = i;
+
+    size =
+        (uint64)available_param_count * (sizeof(*cells) + sizeof(*src_offsets));
 
     /* For if block, we also need copy the condition operand offset. */
     if (is_if_block)
         size += sizeof(*cells) + sizeof(*src_offsets);
 
     /* Allocate memory for the emit data */
-    if (!(emit_data = loader_malloc(size, error_buf, error_buf_size)))
+    if ((size > 0)
+        && !(emit_data = loader_malloc(size, error_buf, error_buf_size)))
         return false;
 
     cells = emit_data;
     src_offsets = (int16 *)(cells + param_count);
 
-    if (is_if_block)
-        condition_offset = *loader_ctx->frame_offset;
-
-    /* POP original parameter out */
-    for (i = 0; i < param_count; i++) {
-        POP_OFFSET_TYPE(wasm_type->types[param_count - i - 1]);
-        wasm_loader_emit_backspace(loader_ctx, sizeof(int16));
-    }
     frame_offset = loader_ctx->frame_offset;
 
     /* Get each param's cell num and src offset */
-    for (i = 0; i < param_count; i++) {
+    for (i = 0; i < available_param_count; i++) {
         cell = (uint8)wasm_value_type_cell_num(wasm_type->types[i]);
         cells[i] = cell;
         src_offsets[i] = *frame_offset;
@@ -9829,34 +9856,41 @@ copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
     /* emit copy instruction */
     emit_label(EXT_OP_COPY_STACK_VALUES);
     /* Part a) */
-    emit_uint32(loader_ctx, is_if_block ? param_count + 1 : param_count);
+    emit_uint32(loader_ctx, is_if_block ? available_param_count + 1
+                                        : available_param_count);
     /* Part b) */
     emit_uint32(loader_ctx, is_if_block ? wasm_type->param_cell_num + 1
                                         : wasm_type->param_cell_num);
     /* Part c) */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         emit_byte(loader_ctx, cells[i]);
     if (is_if_block)
         emit_byte(loader_ctx, 1);
 
     /* Part d) */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         emit_operand(loader_ctx, src_offsets[i]);
     if (is_if_block)
         emit_operand(loader_ctx, condition_offset);
 
     /* Part e) */
     /* Push to dynamic space. The push will emit the dst offset. */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         PUSH_OFFSET_TYPE(wasm_type->types[i]);
     if (is_if_block)
         PUSH_OFFSET_TYPE(VALUE_TYPE_I32);
 
+    if (p_available_param_count) {
+        *p_available_param_count = available_param_count;
+    }
+
     ret = true;
 
 fail:
     /* Free the emit data */
-    wasm_runtime_free(emit_data);
+    if (emit_data) {
+        wasm_runtime_free(emit_data);
+    }
 
     return ret;
 }
@@ -10024,7 +10058,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
     uint8 *func_const_end, *func_const = NULL;
     int16 operand_offset = 0;
     uint8 last_op = 0;
-    bool disable_emit, preserve_local = false;
+    bool disable_emit, preserve_local = false, if_condition_available = true;
     float32 f32_const;
     float64 f64_const;
 
@@ -10105,13 +10139,26 @@ re_scan:
                 break;
 
             case WASM_OP_IF:
+            {
 #if WASM_ENABLE_FAST_INTERP != 0
+                BranchBlock *parent_block = loader_ctx->frame_csp - 1;
+                int32 available_stack_cell =
+                    (int32)(loader_ctx->stack_cell_num
+                            - parent_block->stack_cell_num);
+
+                if (available_stack_cell <= 0
+                    && parent_block->is_stack_polymorphic)
+                    if_condition_available = false;
+                else
+                    if_condition_available = true;
+
                 PRESERVE_LOCAL_FOR_BLOCK();
 #endif
 #if WASM_ENABLE_GC == 0
                 POP_I32();
 #endif
                 goto handle_op_block_and_loop;
+            }
             case WASM_OP_BLOCK:
             case WASM_OP_LOOP:
 #if WASM_ENABLE_FAST_INTERP != 0
@@ -10121,6 +10168,9 @@ re_scan:
             {
                 uint8 value_type;
                 BlockType block_type;
+#if WASM_ENABLE_FAST_INTERP != 0
+                uint32 available_params = 0;
+#endif
 
                 CHECK_BUF(p, p_end, 1);
                 value_type = read_uint8(p);
@@ -10204,10 +10254,28 @@ re_scan:
 
                 /* Pop block parameters from stack */
                 if (BLOCK_HAS_PARAM(block_type)) {
-                    WASMFuncType *func_type = block_type.u.type;
-                    for (i = 0; i < block_type.u.type->param_count; i++)
+                    WASMFuncType *wasm_type = block_type.u.type;
+
+                    BranchBlock *cur_block = loader_ctx->frame_csp - 1;
+#if WASM_ENABLE_FAST_INTERP != 0
+                    available_params = block_type.u.type->param_count;
+#endif
+                    for (i = 0; i < block_type.u.type->param_count; i++) {
+
+                        int32 available_stack_cell =
+                            (int32)(loader_ctx->stack_cell_num
+                                    - cur_block->stack_cell_num);
+                        if (available_stack_cell <= 0
+                            && cur_block->is_stack_polymorphic) {
+#if WASM_ENABLE_FAST_INTERP != 0
+                            available_params = i;
+#endif
+                            break;
+                        }
+
                         POP_TYPE(
-                            func_type->types[func_type->param_count - i - 1]);
+                            wasm_type->types[wasm_type->param_count - i - 1]);
+                    }
                 }
 
                 PUSH_CSP(LABEL_TYPE_BLOCK + (opcode - WASM_OP_BLOCK),
@@ -10215,25 +10283,35 @@ re_scan:
 
                 /* Pass parameters to block */
                 if (BLOCK_HAS_PARAM(block_type)) {
-                    for (i = 0; i < block_type.u.type->param_count; i++)
+                    for (i = 0; i < block_type.u.type->param_count; i++) {
                         PUSH_TYPE(block_type.u.type->types[i]);
+#if WASM_ENABLE_FAST_INTERP != 0
+                        if (i >= available_params) {
+                            PUSH_OFFSET_TYPE(block_type.u.type->types[i]);
+                        }
+#endif
+                    }
                 }
 
 #if WASM_ENABLE_FAST_INTERP != 0
                 if (opcode == WASM_OP_BLOCK || opcode == WASM_OP_LOOP) {
                     skip_label();
+
                     if (BLOCK_HAS_PARAM(block_type)) {
                         /* Make sure params are in dynamic space */
-                        if (!copy_params_to_dynamic_space(
-                                loader_ctx, false, error_buf, error_buf_size))
+                        if (!copy_params_to_dynamic_space(loader_ctx, false,
+                                                          NULL, error_buf,
+                                                          error_buf_size))
                             goto fail;
                     }
+
                     if (opcode == WASM_OP_LOOP) {
                         (loader_ctx->frame_csp - 1)->code_compiled =
                             loader_ctx->p_code_compiled;
                     }
                 }
                 else if (opcode == WASM_OP_IF) {
+                    BranchBlock *block = loader_ctx->frame_csp - 1;
                     /* If block has parameters, we should make sure they are in
                      * dynamic space. Otherwise, when else branch is missing,
                      * the later opcode may consume incorrect operand offset.
@@ -10251,8 +10329,7 @@ re_scan:
                      * recover them before entering else branch.
                      *
                      */
-                    if (BLOCK_HAS_PARAM(block_type)) {
-                        BranchBlock *block = loader_ctx->frame_csp - 1;
+                    if (if_condition_available && BLOCK_HAS_PARAM(block_type)) {
                         uint64 size;
 
                         /* skip the if condition operand offset */
@@ -10261,7 +10338,8 @@ re_scan:
                         skip_label();
                         /* Emit a copy instruction */
                         if (!copy_params_to_dynamic_space(
-                                loader_ctx, true, error_buf, error_buf_size))
+                                loader_ctx, true, &block->available_param_num,
+                                error_buf, error_buf_size))
                             goto fail;
 
                         /* Emit the if instruction */
@@ -10282,6 +10360,9 @@ re_scan:
                                         - size / sizeof(int16),
                                     (uint32)size);
                     }
+                    else {
+                        block->available_param_num = 0;
+                    }
 
                     emit_empty_label_addr_and_frame_ip(PATCH_ELSE);
                     emit_empty_label_addr_and_frame_ip(PATCH_END);
@@ -10292,7 +10373,8 @@ re_scan:
 
             case WASM_OP_ELSE:
             {
-                BlockType block_type = (loader_ctx->frame_csp - 1)->block_type;
+                BranchBlock *block = NULL;
+                BlockType block_type;
 
                 if (loader_ctx->csp_num < 2
                     || (loader_ctx->frame_csp - 1)->label_type
@@ -10302,13 +10384,16 @@ re_scan:
                         "opcode else found without matched opcode if");
                     goto fail;
                 }
+                block = loader_ctx->frame_csp - 1;
 
                 /* check whether if branch's stack matches its result type */
-                if (!check_block_stack(loader_ctx, loader_ctx->frame_csp - 1,
-                                       error_buf, error_buf_size))
+                if (!check_block_stack(loader_ctx, block, error_buf,
+                                       error_buf_size))
                     goto fail;
 
-                (loader_ctx->frame_csp - 1)->else_addr = p - 1;
+                block->else_addr = p - 1;
+                block_type = block->block_type;
+
 #if WASM_ENABLE_GC != 0
                 if (!wasm_loader_init_local_use_masks(
                         loader_ctx, local_count, error_buf, error_buf_size)) {
@@ -10335,10 +10420,9 @@ re_scan:
 
 #if WASM_ENABLE_FAST_INTERP != 0
                 /* Recover top param_count values of frame_offset stack */
-                if (BLOCK_HAS_PARAM((block_type))) {
+                if (block->available_param_num) {
                     uint32 size;
-                    BranchBlock *block = loader_ctx->frame_csp - 1;
-                    size = sizeof(int16) * block_type.u.type->param_cell_num;
+                    size = sizeof(int16) * block->available_param_num;
                     bh_memcpy_s(loader_ctx->frame_offset, size,
                                 block->param_frame_offsets, size);
                     loader_ctx->frame_offset += (size / sizeof(int16));
@@ -11796,8 +11880,6 @@ re_scan:
                 }
 #endif
 
-                POP_TYPE(global_type);
-
 #if WASM_ENABLE_FAST_INTERP == 0
                 if (global_type == VALUE_TYPE_I64
                     || global_type == VALUE_TYPE_F64) {
@@ -11836,6 +11918,9 @@ re_scan:
                 emit_uint32(loader_ctx, global_idx);
                 POP_OFFSET_TYPE(global_type);
 #endif /* end of WASM_ENABLE_FAST_INTERP */
+
+                POP_TYPE(global_type);
+
                 break;
             }
 

+ 118 - 41
core/iwasm/interpreter/wasm_mini_loader.c

@@ -617,6 +617,10 @@ load_type_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
             type->param_cell_num = (uint16)param_cell_num;
             type->ret_cell_num = (uint16)ret_cell_num;
 
+#if WASM_ENABLE_QUICK_AOT_ENTRY != 0
+            type->quick_aot_entry = wasm_native_lookup_quick_aot_entry(type);
+#endif
+
             /* If there is already a same type created, use it instead */
             for (j = 0; j < i; ++j) {
                 if (wasm_type_equal(type, module->types[j], module->types, i)) {
@@ -2116,9 +2120,11 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
     }
 
     option.is_jit_mode = true;
-    option.opt_level = llvm_jit_options.opt_level;
-    option.size_level = llvm_jit_options.size_level;
-    option.segue_flags = llvm_jit_options.segue_flags;
+    option.opt_level = llvm_jit_options->opt_level;
+    option.size_level = llvm_jit_options->size_level;
+    option.segue_flags = llvm_jit_options->segue_flags;
+    option.quick_invoke_c_api_import =
+        llvm_jit_options->quick_invoke_c_api_import;
 
 #if WASM_ENABLE_BULK_MEMORY != 0
     option.enable_bulk_memory = true;
@@ -3842,6 +3848,9 @@ typedef struct BranchBlock {
     BranchBlockPatch *patch_list;
     /* This is used to save params frame_offset of of if block */
     int16 *param_frame_offsets;
+    /* This is used to store available param num for if/else branch, so the else
+     * opcode can know how many parameters should be copied to the stack */
+    uint32 available_param_num;
 #endif
 
     /* Indicate the operand stack is in polymorphic state.
@@ -5568,16 +5577,20 @@ fail:
  * 1) POP original parameter out;
  * 2) Push and copy original values to dynamic space.
  * The copy instruction format:
- *   Part a: param count
+ *   Part a: available param count
  *   Part b: all param total cell num
  *   Part c: each param's cell_num, src offset and dst offset
  *   Part d: each param's src offset
  *   Part e: each param's dst offset
+ * Note: if the stack is in polymorphic state, the actual copied parameters may
+ * be fewer than the defined number in block type
  */
 static bool
 copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
-                             char *error_buf, uint32 error_buf_size)
+                             uint32 *p_available_param_count, char *error_buf,
+                             uint32 error_buf_size)
 {
+    bool ret = false;
     int16 *frame_offset = NULL;
     uint8 *cells = NULL, cell;
     int16 *src_offsets = NULL;
@@ -5587,35 +5600,47 @@ copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
     BlockType *block_type = &block->block_type;
     WASMFuncType *wasm_type = block_type->u.type;
     uint32 param_count = block_type->u.type->param_count;
+    uint32 available_param_count = 0;
     int16 condition_offset = 0;
     bool disable_emit = false;
     int16 operand_offset = 0;
+    uint64 size;
+
+    if (is_if_block)
+        condition_offset = *loader_ctx->frame_offset;
+
+    /* POP original parameter out */
+    for (i = 0; i < param_count; i++) {
+        int32 available_stack_cell =
+            (int32)(loader_ctx->stack_cell_num - block->stack_cell_num);
+
+        if (available_stack_cell <= 0 && block->is_stack_polymorphic)
+            break;
+
+        POP_OFFSET_TYPE(wasm_type->types[param_count - i - 1]);
+        wasm_loader_emit_backspace(loader_ctx, sizeof(int16));
+    }
+    available_param_count = i;
 
-    uint64 size = (uint64)param_count * (sizeof(*cells) + sizeof(*src_offsets));
+    size =
+        (uint64)available_param_count * (sizeof(*cells) + sizeof(*src_offsets));
 
     /* For if block, we also need copy the condition operand offset. */
     if (is_if_block)
         size += sizeof(*cells) + sizeof(*src_offsets);
 
     /* Allocate memory for the emit data */
-    if (!(emit_data = loader_malloc(size, error_buf, error_buf_size)))
+    if ((size > 0)
+        && !(emit_data = loader_malloc(size, error_buf, error_buf_size)))
         return false;
 
     cells = emit_data;
     src_offsets = (int16 *)(cells + param_count);
 
-    if (is_if_block)
-        condition_offset = *loader_ctx->frame_offset;
-
-    /* POP original parameter out */
-    for (i = 0; i < param_count; i++) {
-        POP_OFFSET_TYPE(wasm_type->types[param_count - i - 1]);
-        wasm_loader_emit_backspace(loader_ctx, sizeof(int16));
-    }
     frame_offset = loader_ctx->frame_offset;
 
     /* Get each param's cell num and src offset */
-    for (i = 0; i < param_count; i++) {
+    for (i = 0; i < available_param_count; i++) {
         cell = (uint8)wasm_value_type_cell_num(wasm_type->types[i]);
         cells[i] = cell;
         src_offsets[i] = *frame_offset;
@@ -5625,37 +5650,43 @@ copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
     /* emit copy instruction */
     emit_label(EXT_OP_COPY_STACK_VALUES);
     /* Part a) */
-    emit_uint32(loader_ctx, is_if_block ? param_count + 1 : param_count);
+    emit_uint32(loader_ctx, is_if_block ? available_param_count + 1
+                                        : available_param_count);
     /* Part b) */
     emit_uint32(loader_ctx, is_if_block ? wasm_type->param_cell_num + 1
                                         : wasm_type->param_cell_num);
     /* Part c) */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         emit_byte(loader_ctx, cells[i]);
     if (is_if_block)
         emit_byte(loader_ctx, 1);
 
     /* Part d) */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         emit_operand(loader_ctx, src_offsets[i]);
     if (is_if_block)
         emit_operand(loader_ctx, condition_offset);
 
     /* Part e) */
     /* Push to dynamic space. The push will emit the dst offset. */
-    for (i = 0; i < param_count; i++)
+    for (i = 0; i < available_param_count; i++)
         PUSH_OFFSET_TYPE(wasm_type->types[i]);
     if (is_if_block)
         PUSH_OFFSET_TYPE(VALUE_TYPE_I32);
 
-    /* Free the emit data */
-    wasm_runtime_free(emit_data);
-    return true;
+    if (p_available_param_count) {
+        *p_available_param_count = available_param_count;
+    }
+
+    ret = true;
 
 fail:
     /* Free the emit data */
-    wasm_runtime_free(emit_data);
-    return false;
+    if (emit_data) {
+        wasm_runtime_free(emit_data);
+    }
+
+    return ret;
 }
 #endif
 
@@ -5722,7 +5753,8 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
     uint8 *func_const_end, *func_const = NULL;
     int16 operand_offset = 0;
     uint8 last_op = 0;
-    bool disable_emit, preserve_local = false;
+    bool disable_emit, preserve_local = false, if_condition_available = true;
+    ;
     float32 f32_const;
     float64 f64_const;
 
@@ -5792,11 +5824,23 @@ re_scan:
                 break;
 
             case WASM_OP_IF:
+            {
 #if WASM_ENABLE_FAST_INTERP != 0
+                BranchBlock *parent_block = loader_ctx->frame_csp - 1;
+                int32 available_stack_cell =
+                    (int32)(loader_ctx->stack_cell_num
+                            - parent_block->stack_cell_num);
+
+                if (available_stack_cell <= 0
+                    && parent_block->is_stack_polymorphic)
+                    if_condition_available = false;
+                else
+                    if_condition_available = true;
                 PRESERVE_LOCAL_FOR_BLOCK();
 #endif
                 POP_I32();
                 goto handle_op_block_and_loop;
+            }
             case WASM_OP_BLOCK:
             case WASM_OP_LOOP:
 #if WASM_ENABLE_FAST_INTERP != 0
@@ -5806,6 +5850,9 @@ re_scan:
             {
                 uint8 value_type;
                 BlockType block_type;
+#if WASM_ENABLE_FAST_INTERP != 0
+                uint32 available_params = 0;
+#endif
 
                 p_org = p - 1;
                 value_type = read_uint8(p);
@@ -5836,9 +5883,27 @@ re_scan:
                 /* Pop block parameters from stack */
                 if (BLOCK_HAS_PARAM(block_type)) {
                     WASMFuncType *wasm_type = block_type.u.type;
-                    for (i = 0; i < block_type.u.type->param_count; i++)
+
+                    BranchBlock *cur_block = loader_ctx->frame_csp - 1;
+#if WASM_ENABLE_FAST_INTERP != 0
+                    available_params = block_type.u.type->param_count;
+#endif
+                    for (i = 0; i < block_type.u.type->param_count; i++) {
+
+                        int32 available_stack_cell =
+                            (int32)(loader_ctx->stack_cell_num
+                                    - cur_block->stack_cell_num);
+                        if (available_stack_cell <= 0
+                            && cur_block->is_stack_polymorphic) {
+#if WASM_ENABLE_FAST_INTERP != 0
+                            available_params = i;
+#endif
+                            break;
+                        }
+
                         POP_TYPE(
                             wasm_type->types[wasm_type->param_count - i - 1]);
+                    }
                 }
 
                 PUSH_CSP(LABEL_TYPE_BLOCK + (opcode - WASM_OP_BLOCK),
@@ -5846,8 +5911,14 @@ re_scan:
 
                 /* Pass parameters to block */
                 if (BLOCK_HAS_PARAM(block_type)) {
-                    for (i = 0; i < block_type.u.type->param_count; i++)
+                    for (i = 0; i < block_type.u.type->param_count; i++) {
                         PUSH_TYPE(block_type.u.type->types[i]);
+#if WASM_ENABLE_FAST_INTERP != 0
+                        if (i >= available_params) {
+                            PUSH_OFFSET_TYPE(block_type.u.type->types[i]);
+                        }
+#endif
+                    }
                 }
 
 #if WASM_ENABLE_FAST_INTERP != 0
@@ -5855,8 +5926,9 @@ re_scan:
                     skip_label();
                     if (BLOCK_HAS_PARAM(block_type)) {
                         /* Make sure params are in dynamic space */
-                        if (!copy_params_to_dynamic_space(
-                                loader_ctx, false, error_buf, error_buf_size))
+                        if (!copy_params_to_dynamic_space(loader_ctx, false,
+                                                          NULL, error_buf,
+                                                          error_buf_size))
                             goto fail;
                     }
                     if (opcode == WASM_OP_LOOP) {
@@ -5865,6 +5937,7 @@ re_scan:
                     }
                 }
                 else if (opcode == WASM_OP_IF) {
+                    BranchBlock *block = loader_ctx->frame_csp - 1;
                     /* If block has parameters, we should make sure they are in
                      * dynamic space. Otherwise, when else branch is missing,
                      * the later opcode may consume incorrect operand offset.
@@ -5882,8 +5955,7 @@ re_scan:
                      * recover them before entering else branch.
                      *
                      */
-                    if (BLOCK_HAS_PARAM(block_type)) {
-                        BranchBlock *block = loader_ctx->frame_csp - 1;
+                    if (if_condition_available && BLOCK_HAS_PARAM(block_type)) {
                         uint64 size;
 
                         /* skip the if condition operand offset */
@@ -5892,7 +5964,8 @@ re_scan:
                         skip_label();
                         /* Emit a copy instruction */
                         if (!copy_params_to_dynamic_space(
-                                loader_ctx, true, error_buf, error_buf_size))
+                                loader_ctx, true, &block->available_param_num,
+                                error_buf, error_buf_size))
                             goto fail;
 
                         /* Emit the if instruction */
@@ -5913,6 +5986,9 @@ re_scan:
                                         - size / sizeof(int16),
                                     (uint32)size);
                     }
+                    else {
+                        block->available_param_num = 0;
+                    }
 
                     emit_empty_label_addr_and_frame_ip(PATCH_ELSE);
                     emit_empty_label_addr_and_frame_ip(PATCH_END);
@@ -5923,17 +5999,19 @@ re_scan:
 
             case WASM_OP_ELSE:
             {
+                BranchBlock *block = NULL;
                 BlockType block_type = (loader_ctx->frame_csp - 1)->block_type;
                 bh_assert(loader_ctx->csp_num >= 2
                           && (loader_ctx->frame_csp - 1)->label_type
                                  == LABEL_TYPE_IF);
+                block = loader_ctx->frame_csp - 1;
 
                 /* check whether if branch's stack matches its result type */
-                if (!check_block_stack(loader_ctx, loader_ctx->frame_csp - 1,
-                                       error_buf, error_buf_size))
+                if (!check_block_stack(loader_ctx, block, error_buf,
+                                       error_buf_size))
                     goto fail;
 
-                (loader_ctx->frame_csp - 1)->else_addr = p - 1;
+                block->else_addr = p - 1;
 
 #if WASM_ENABLE_FAST_INTERP != 0
                 /* if the result of if branch is in local or const area, add a
@@ -5954,10 +6032,9 @@ re_scan:
 
 #if WASM_ENABLE_FAST_INTERP != 0
                 /* Recover top param_count values of frame_offset stack */
-                if (BLOCK_HAS_PARAM((block_type))) {
+                if (block->available_param_num) {
                     uint32 size;
-                    BranchBlock *block = loader_ctx->frame_csp - 1;
-                    size = sizeof(int16) * block_type.u.type->param_cell_num;
+                    size = sizeof(int16) * block->available_param_num;
                     bh_memcpy_s(loader_ctx->frame_offset, size,
                                 block->param_frame_offsets, size);
                     loader_ctx->frame_offset += (size / sizeof(int16));
@@ -6869,8 +6946,6 @@ re_scan:
                                         - module->import_global_count]
                               .type;
 
-                POP_TYPE(global_type);
-
 #if WASM_ENABLE_FAST_INTERP == 0
                 if (is_64bit_type(global_type)) {
                     *p_org = WASM_OP_SET_GLOBAL_64;
@@ -6896,6 +6971,8 @@ re_scan:
                 POP_OFFSET_TYPE(global_type);
 #endif /* end of WASM_ENABLE_FAST_INTERP */
 
+                POP_TYPE(global_type);
+
                 (void)is_mutable;
                 break;
             }

+ 1 - 1
core/iwasm/interpreter/wasm_runtime.h

@@ -256,8 +256,8 @@ typedef struct CApiFuncImport {
 
 /* The common part of WASMModuleInstanceExtra and AOTModuleInstanceExtra */
 typedef struct WASMModuleInstanceExtraCommon {
-    void *contexts[WASM_MAX_INSTANCE_CONTEXTS];
     CApiFuncImport *c_api_func_imports;
+    void *contexts[WASM_MAX_INSTANCE_CONTEXTS];
     /* pointer to the exec env currently used */
     WASMExecEnv *cur_exec_env;
 #if WASM_CONFIGURABLE_BOUNDS_CHECKS != 0

+ 18 - 3
core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c

@@ -558,6 +558,7 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
     ThreadRoutineArgs *routine_args = NULL;
     uint32 thread_handle;
     uint32 stack_size = 8192;
+    uint32 aux_stack_start = 0, aux_stack_size;
     int32 ret = -1;
 
     bh_assert(module);
@@ -609,10 +610,22 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
     routine_args->info_node = info_node;
     routine_args->module_inst = new_module_inst;
 
+    /* Allocate aux stack previously since exec_env->wait_lock is acquired
+       below, and if the stack is allocated in wasm_cluster_create_thread,
+       runtime may call the exported malloc function to allocate the stack,
+       which acquires exec_env->wait again in wasm_exec_env_set_thread_info,
+       and recursive lock (or hang) occurs */
+    if (!wasm_cluster_allocate_aux_stack(exec_env, &aux_stack_start,
+                                         &aux_stack_size)) {
+        LOG_ERROR("thread manager error: "
+                  "failed to allocate aux stack space for new thread");
+        goto fail;
+    }
+
     os_mutex_lock(&exec_env->wait_lock);
-    ret =
-        wasm_cluster_create_thread(exec_env, new_module_inst, true,
-                                   pthread_start_routine, (void *)routine_args);
+    ret = wasm_cluster_create_thread(
+        exec_env, new_module_inst, true, aux_stack_start, aux_stack_size,
+        pthread_start_routine, (void *)routine_args);
     if (ret != 0) {
         os_mutex_unlock(&exec_env->wait_lock);
         goto fail;
@@ -636,6 +649,8 @@ fail:
         wasm_runtime_free(info_node);
     if (routine_args)
         wasm_runtime_free(routine_args);
+    if (aux_stack_start)
+        wasm_cluster_free_aux_stack(exec_env, aux_stack_start);
     return ret;
 }
 

+ 1 - 1
core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c

@@ -119,7 +119,7 @@ thread_spawn_wrapper(wasm_exec_env_t exec_env, uint32 start_arg)
     thread_start_arg->arg = start_arg;
     thread_start_arg->start_func = start_func;
 
-    ret = wasm_cluster_create_thread(exec_env, new_module_inst, false,
+    ret = wasm_cluster_create_thread(exec_env, new_module_inst, false, 0, 0,
                                      thread_start, thread_start_arg);
     if (ret != 0) {
         LOG_ERROR("Failed to spawn a new thread");

+ 1 - 1
core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c

@@ -1047,7 +1047,7 @@ execute_interruptible_poll_oneoff(
 
         if (wasm_cluster_is_thread_terminated(exec_env)) {
             wasm_runtime_free(in_copy);
-            return EINTR;
+            return __WASI_EINTR;
         }
         else if (*nevents > 0) {
             all_outs_are_type_clock = true;

+ 3 - 2
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h

@@ -41,7 +41,8 @@
 #endif
 
 #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__EMSCRIPTEN__) \
-    && !defined(ESP_PLATFORM) && !defined(DISABLE_CLOCK_NANOSLEEP)
+    && !defined(ESP_PLATFORM) && !defined(DISABLE_CLOCK_NANOSLEEP)           \
+    && !defined(BH_PLATFORM_FREERTOS)
 #define CONFIG_HAS_CLOCK_NANOSLEEP 1
 #else
 #define CONFIG_HAS_CLOCK_NANOSLEEP 0
@@ -54,7 +55,7 @@
 #endif
 
 #if !defined(__APPLE__) && !defined(BH_PLATFORM_LINUX_SGX) && !defined(_WIN32) \
-    && !defined(__COSMOPOLITAN__)
+    && !defined(__COSMOPOLITAN__) && !defined(BH_PLATFORM_FREERTOS)
 #define CONFIG_HAS_PTHREAD_CONDATTR_SETCLOCK 1
 #else
 #define CONFIG_HAS_PTHREAD_CONDATTR_SETCLOCK 0

+ 63 - 17
core/iwasm/libraries/thread-mgr/thread_manager.c

@@ -4,6 +4,7 @@
  */
 
 #include "thread_manager.h"
+#include "../common/wasm_c_api_internal.h"
 
 #if WASM_ENABLE_INTERP != 0
 #include "../interpreter/wasm_runtime.h"
@@ -208,6 +209,33 @@ free_aux_stack(WASMExecEnv *exec_env, uint32 start)
 #endif
 }
 
+bool
+wasm_cluster_allocate_aux_stack(WASMExecEnv *exec_env, uint32 *p_start,
+                                uint32 *p_size)
+{
+    WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
+    bool ret;
+
+    os_mutex_lock(&cluster->lock);
+    ret = allocate_aux_stack(exec_env, p_start, p_size);
+    os_mutex_unlock(&cluster->lock);
+
+    return ret;
+}
+
+bool
+wasm_cluster_free_aux_stack(WASMExecEnv *exec_env, uint32 start)
+{
+    WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
+    bool ret;
+
+    os_mutex_lock(&cluster->lock);
+    ret = free_aux_stack(exec_env, start);
+    os_mutex_unlock(&cluster->lock);
+
+    return ret;
+}
+
 WASMCluster *
 wasm_cluster_create(WASMExecEnv *exec_env)
 {
@@ -343,6 +371,10 @@ wasm_cluster_destroy(WASMCluster *cluster)
     wasm_debug_instance_destroy(cluster);
 #endif
 
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
+    bh_vector_destroy(&cluster->exception_frames);
+#endif
+
     wasm_runtime_free(cluster);
 }
 
@@ -654,12 +686,13 @@ thread_manager_start_routine(void *arg)
 
 int32
 wasm_cluster_create_thread(WASMExecEnv *exec_env,
-                           wasm_module_inst_t module_inst, bool alloc_aux_stack,
+                           wasm_module_inst_t module_inst,
+                           bool is_aux_stack_allocated, uint32 aux_stack_start,
+                           uint32 aux_stack_size,
                            void *(*thread_routine)(void *), void *arg)
 {
     WASMCluster *cluster;
     WASMExecEnv *new_exec_env;
-    uint32 aux_stack_start = 0, aux_stack_size;
     korp_tid tid;
 
     cluster = wasm_exec_env_get_cluster(exec_env);
@@ -676,17 +709,11 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
     if (!new_exec_env)
         goto fail1;
 
-    if (alloc_aux_stack) {
-        if (!allocate_aux_stack(exec_env, &aux_stack_start, &aux_stack_size)) {
-            LOG_ERROR("thread manager error: "
-                      "failed to allocate aux stack space for new thread");
-            goto fail2;
-        }
-
+    if (is_aux_stack_allocated) {
         /* Set aux stack for current thread */
         if (!wasm_exec_env_set_aux_stack(new_exec_env, aux_stack_start,
                                          aux_stack_size)) {
-            goto fail3;
+            goto fail2;
         }
     }
     else {
@@ -699,7 +726,7 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
     new_exec_env->suspend_flags.flags = exec_env->suspend_flags.flags;
 
     if (!wasm_cluster_add_exec_env(cluster, new_exec_env))
-        goto fail3;
+        goto fail2;
 
     new_exec_env->thread_start_routine = thread_routine;
     new_exec_env->thread_arg = arg;
@@ -711,7 +738,7 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
                             (void *)new_exec_env,
                             APP_THREAD_STACK_SIZE_DEFAULT)) {
         os_mutex_unlock(&new_exec_env->wait_lock);
-        goto fail4;
+        goto fail3;
     }
 
     /* Wait until the new_exec_env->handle is set to avoid it is
@@ -723,12 +750,8 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
 
     return 0;
 
-fail4:
-    wasm_cluster_del_exec_env_internal(cluster, new_exec_env, false);
 fail3:
-    /* free the allocated aux stack space */
-    if (alloc_aux_stack)
-        free_aux_stack(exec_env, aux_stack_start);
+    wasm_cluster_del_exec_env_internal(cluster, new_exec_env, false);
 fail2:
     wasm_exec_env_destroy_internal(new_exec_env);
 fail1:
@@ -1303,6 +1326,29 @@ wasm_cluster_set_exception(WASMExecEnv *exec_env, const char *exception)
     data.exception = exception;
 
     os_mutex_lock(&cluster->lock);
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
+    if (has_exception) {
+        /* Save the stack frames of the crashed thread into the cluster */
+        WASMModuleInstance *module_inst =
+            (WASMModuleInstance *)get_module_inst(exec_env);
+
+#if WASM_ENABLE_INTERP != 0
+        if (module_inst->module_type == Wasm_Module_Bytecode
+            && wasm_interp_create_call_stack(exec_env)) {
+            wasm_frame_vec_clone_internal(module_inst->frames,
+                                          &cluster->exception_frames);
+        }
+#endif
+
+#if WASM_ENABLE_AOT != 0
+        if (module_inst->module_type == Wasm_Module_AoT
+            && aot_create_call_stack(exec_env)) {
+            wasm_frame_vec_clone_internal(module_inst->frames,
+                                          &cluster->exception_frames);
+        }
+#endif
+    }
+#endif /* WASM_ENABLE_DUMP_CALL_STACK != 0 */
     cluster->has_exception = has_exception;
     traverse_list(&cluster->exec_env_list, set_exception_visitor, &data);
     os_mutex_unlock(&cluster->lock);

+ 17 - 1
core/iwasm/libraries/thread-mgr/thread_manager.h

@@ -51,6 +51,13 @@ struct WASMCluster {
 #if WASM_ENABLE_DEBUG_INTERP != 0
     WASMDebugInstance *debug_inst;
 #endif
+
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
+    /* When an exception occurs in a thread, the stack frames of that thread are
+     * saved into the cluster
+     */
+    Vector exception_frames;
+#endif
 };
 
 void
@@ -81,7 +88,9 @@ wasm_cluster_dup_c_api_imports(WASMModuleInstanceCommon *module_inst_dst,
 
 int32
 wasm_cluster_create_thread(WASMExecEnv *exec_env,
-                           wasm_module_inst_t module_inst, bool alloc_aux_stack,
+                           wasm_module_inst_t module_inst,
+                           bool is_aux_stack_allocated, uint32 aux_stack_start,
+                           uint32 aux_stack_size,
                            void *(*thread_routine)(void *), void *arg);
 
 int32
@@ -221,6 +230,13 @@ wasm_cluster_traverse_lock(WASMExecEnv *exec_env);
 void
 wasm_cluster_traverse_unlock(WASMExecEnv *exec_env);
 
+bool
+wasm_cluster_allocate_aux_stack(WASMExecEnv *exec_env, uint32 *p_start,
+                                uint32 *p_size);
+
+bool
+wasm_cluster_free_aux_stack(WASMExecEnv *exec_env, uint32 start);
+
 #ifdef __cplusplus
 }
 #endif

+ 18 - 5
core/shared/platform/common/posix/posix_memmap.c

@@ -5,7 +5,7 @@
 
 #include "platform_api_vmcore.h"
 
-#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__)
+#if defined(__APPLE__) || defined(__MACH__)
 #include <libkern/OSCacheControl.h>
 #endif
 
@@ -40,7 +40,8 @@ void *
 os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
 {
     int map_prot = PROT_NONE;
-#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__)
+#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__) \
+    && defined(TARGET_OS_OSX) && TARGET_OS_OSX != 0
     int map_flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_JIT;
 #else
     int map_flags = MAP_ANONYMOUS | MAP_PRIVATE;
@@ -77,15 +78,19 @@ os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
         map_prot |= PROT_EXEC;
 
 #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
-#ifndef __APPLE__
     if (flags & MMAP_MAP_32BIT)
         map_flags |= MAP_32BIT;
-#endif
 #endif
 
     if (flags & MMAP_MAP_FIXED)
         map_flags |= MAP_FIXED;
 
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+#if defined(__APPLE__)
+retry_without_map_32bit:
+#endif
+#endif
+
 #if defined(BUILD_TARGET_RISCV64_LP64D) || defined(BUILD_TARGET_RISCV64_LP64)
     /* As AOT relocation in RISCV64 may require that the code/data mapped
      * is in range 0 to 2GB, we try to map the memory with hint address
@@ -143,6 +148,14 @@ os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
     }
 
     if (addr == MAP_FAILED) {
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+#if defined(__APPLE__)
+        if ((map_flags & MAP_32BIT) != 0) {
+            map_flags &= ~MAP_32BIT;
+            goto retry_without_map_32bit;
+        }
+#endif
+#endif
 #if BH_ENABLE_TRACE_MMAP != 0
         os_printf("mmap failed\n");
 #endif
@@ -263,7 +276,7 @@ os_dcache_flush(void)
 void
 os_icache_flush(void *start, size_t len)
 {
-#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__)
+#if defined(__APPLE__) || defined(__MACH__)
     sys_icache_invalidate(start, len);
 #endif
 }

+ 2 - 1
core/shared/platform/common/posix/posix_thread.c

@@ -476,7 +476,8 @@ os_thread_get_stack_boundary()
 void
 os_thread_jit_write_protect_np(bool enabled)
 {
-#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__)
+#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__) \
+    && defined(TARGET_OS_OSX) && TARGET_OS_OSX != 0
     pthread_jit_write_protect_np(enabled);
 #endif
 }

+ 3 - 0
core/shared/platform/esp-idf/shared_platform.cmake

@@ -10,6 +10,9 @@ include_directories(${PLATFORM_SHARED_DIR}/../include)
 
 file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c)
 
+include (${CMAKE_CURRENT_LIST_DIR}/../common/libc-util/platform_common_libc_util.cmake)
+set (source_all ${source_all} ${PLATFORM_COMMON_LIBC_UTIL_SOURCE})
+
 set (PLATFORM_SHARED_SOURCE ${source_all} ${PLATFORM_COMMON_MATH_SOURCE})
 
 # If enable PSRAM of ESP32-S3, it had better to put AOT into PSRAM, so that

+ 12 - 0
core/shared/platform/linux-sgx/sgx_platform.c

@@ -119,6 +119,18 @@ strcpy(char *dest, const char *src)
     return dest;
 }
 
+#if WASM_ENABLE_LIBC_WASI == 0
+bool
+os_is_handle_valid(os_file_handle *handle)
+{
+    assert(handle != NULL);
+
+    return *handle > -1;
+}
+#else
+/* implemented in posix_file.c */
+#endif
+
 void *
 os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
 {

+ 2 - 1
core/shared/platform/linux-sgx/sgx_socket.c

@@ -5,10 +5,11 @@
 
 #include "platform_api_vmcore.h"
 #include "platform_api_extension.h"
-#include "libc_errno.h"
 
 #ifndef SGX_DISABLE_WASI
 
+#include "libc_errno.h"
+
 #define TRACE_OCALL_FAIL() os_printf("ocall %s failed!\n", __FUNCTION__)
 
 /** OCALLs prototypes **/

+ 1 - 1
core/version.h

@@ -7,5 +7,5 @@
 #define _WAMR_VERSION_H_
 #define WAMR_VERSION_MAJOR 1
 #define WAMR_VERSION_MINOR 3
-#define WAMR_VERSION_PATCH 0
+#define WAMR_VERSION_PATCH 1
 #endif

+ 3 - 3
doc/build_wamr.md

@@ -1,7 +1,7 @@
 
 # Build WAMR vmcore
 
-WAMR vmcore is a set of runtime libraries for loading and running Wasm modules. This document introduces how to build the WAMR vmcore.  
+WAMR vmcore is a set of runtime libraries for loading and running Wasm modules. This document introduces how to build the WAMR vmcore.
 
 References:
 - [how to build iwasm](../product-mini/README.md): building different target platforms such as Linux, Windows, Mac etc
@@ -138,7 +138,7 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM
 > Note: if it is enabled, the call stack will be dumped when exception occurs.
 
 > - For interpreter mode, the function names are firstly extracted from *custom name section*, if this section doesn't exist or the feature is not enabled, then the name will be extracted from the import/export sections
-> - For AOT/JIT mode, the function names are extracted from import/export section, please export as many functions as possible (for `wasi-sdk` you can use `-Wl,--export-all`) when compiling wasm module, and add `--enable-dump-call-stack` option to wamrc during compiling AOT module.
+> - For AOT/JIT mode, the function names are extracted from import/export section, please export as many functions as possible (for `wasi-sdk` you can use `-Wl,--export-all`) when compiling wasm module, and add `--enable-dump-call-stack --emit-custom-sections=name` option to wamrc during compiling AOT module.
 
 #### **Enable memory profiling (Experiment)**
 - **WAMR_BUILD_MEMORY_PROFILING**=1/0, default to disable if not set
@@ -210,7 +210,7 @@ Currently we only profile the memory consumption of module, module_instance and
 
 > Note: If `WAMR_BUILD_CUSTOM_NAME_SECTION` is enabled, then the `custom name section` will be treated as a special section and consumed by the runtime, not available to the embedder.
 
-> For AoT file, must use `--emit-custom-sections` to specify which sections need to be emitted into AoT file, otherwise all custom sections (except custom name section) will be ignored.
+> For AoT file, must use `--emit-custom-sections` to specify which sections need to be emitted into AoT file, otherwise all custom sections will be ignored.
 
 #### **Stack guard size**
 - **WAMR_BUILD_STACK_GUARD_SIZE**=n, default to N/A if not set.

+ 14 - 0
doc/build_wasm_app.md

@@ -372,6 +372,20 @@ Examples: wamrc -o test.aot test.wasm
           wamrc --target=i386 --format=object -o test.o test.wasm
 ```
 
+## AoT-compiled module compatibility among WAMR versions
+
+When making major ABI changes for AoT-compiled modules, we bump
+`AOT_CURRENT_VERSION` constant in `core/config.h` header.
+The runtime rejects to load a module AoT-compiled with wamrc with
+a different `AOT_CURRENT_VERSION`.
+
+We try our best to maintain our runtime ABI for AoT-compiled modules
+compatible among WAMR versions with the same `AOT_CURRENT_VERSION`
+so that combinations of older wamrc and newer runtime usually work.
+However, there might be minor incompatibilities time to time.
+For productions, we recommend to use the exactly same version of
+wamrc and the runtime.
+
 ## AoT compilation with 3rd-party toolchains
 
 `wamrc` uses LLVM to compile wasm bytecode to AoT file, this works for most of the architectures, but there may be circumstances where you want to use 3rd-party toolchains to take over some steps of the compilation pipeline, e.g.

+ 12 - 0
product-mini/platforms/linux-sgx/CMakeLists.txt

@@ -168,3 +168,15 @@ else()
         OUTPUT_VARIABLE cmdOutput
     )
 endif()
+
+if (WAMR_BUILD_LIBC_WASI EQUAL 1)
+    execute_process(
+        COMMAND bash -c "sed -i -E 's/^WAMR_BUILD_LIBC_WASI = 0/WAMR_BUILD_LIBC_WASI = 1/g' ${CMAKE_CURRENT_SOURCE_DIR}/enclave-sample/Makefile"
+        OUTPUT_VARIABLE cmdOutput
+    )
+else()
+    execute_process(
+        COMMAND bash -c "sed -i -E 's/^WAMR_BUILD_LIBC_WASI = 1/WAMR_BUILD_LIBC_WASI = 0/g' ${CMAKE_CURRENT_SOURCE_DIR}/enclave-sample/Makefile"
+        OUTPUT_VARIABLE cmdOutput
+    )
+endif()

+ 2 - 2
product-mini/platforms/linux-sgx/enclave-sample/Enclave/Enclave.cpp

@@ -510,7 +510,7 @@ handle_cmd_set_log_level(uint64 *args, uint32 argc)
 #endif
 }
 
-#ifndef SGX_DISABLE_WASI
+#if WASM_ENABLE_LIBC_WASI != 0
 static void
 handle_cmd_set_wasi_args(uint64 *args, int32 argc)
 {
@@ -637,7 +637,7 @@ handle_cmd_set_wasi_args(uint64 *args, int32 argc)
 {
     *args = true;
 }
-#endif /* end of SGX_DISABLE_WASI */
+#endif /* end of WASM_ENABLE_LIBC_WASI != 0 */
 
 static void
 handle_cmd_get_version(uint64 *args, uint32 argc)

+ 11 - 2
product-mini/platforms/linux-sgx/enclave-sample/Makefile

@@ -16,6 +16,7 @@ WAMR_BUILD_LIB_RATS = 0
 WAMR_BUILD_GLOBAL_HEAP_POOL = 0
 WAMR_BUILD_GLOBAL_HEAP_SIZE = 10485760
 WAMR_BUILD_STATIC_PGO = 0
+WAMR_BUILD_LIBC_WASI = 1
 
 VMLIB_BUILD_DIR ?= $(CURDIR)/../build
 LIB_RATS_SRC ?= $(VMLIB_BUILD_DIR)/_deps/librats-build
@@ -66,7 +67,9 @@ ifeq ($(WAMR_BUILD_LIB_RATS), 1)
 	App_Include_Paths += -I$(LIB_RATS_INCLUDE_DIR)
 endif
 
-App_C_Flags := $(SGX_COMMON_CFLAGS) -fPIC -Wno-attributes $(App_Include_Paths) -DWASM_ENABLE_STATIC_PGO=$(WAMR_BUILD_STATIC_PGO)
+App_C_Flags := $(SGX_COMMON_CFLAGS) -fPIC -Wno-attributes $(App_Include_Paths) \
+			   -DWASM_ENABLE_STATIC_PGO=$(WAMR_BUILD_STATIC_PGO) \
+			   -DWASM_ENABLE_LIBC_WASI=$(WAMR_BUILD_LIBC_WASI)
 
 # Three configuration modes - Debug, prerelease, release
 #   Debug - Macro DEBUG enabled.
@@ -135,7 +138,13 @@ ifeq ($(WAMR_BUILD_LIB_RATS), 1)
 	Enclave_Include_Paths += -I$(LIB_RATS_INCLUDE_DIR) -I$(SGX_SSL)/include
 endif
 
-Enclave_C_Flags := $(SGX_COMMON_CFLAGS) -nostdinc -fvisibility=hidden -fpie -fstack-protector $(Enclave_Include_Paths) -DWASM_GLOBAL_HEAP_SIZE=$(WAMR_BUILD_GLOBAL_HEAP_SIZE) -DWASM_ENABLE_GLOBAL_HEAP_POOL=$(WAMR_BUILD_GLOBAL_HEAP_POOL) -DWASM_ENABLE_LIB_RATS=$(WAMR_BUILD_LIB_RATS) -DWASM_ENABLE_STATIC_PGO=$(WAMR_BUILD_STATIC_PGO)
+Enclave_C_Flags := $(SGX_COMMON_CFLAGS) -nostdinc -fvisibility=hidden \
+				   -fpie -fstack-protector $(Enclave_Include_Paths) \
+				   -DWASM_GLOBAL_HEAP_SIZE=$(WAMR_BUILD_GLOBAL_HEAP_SIZE) \
+				   -DWASM_ENABLE_GLOBAL_HEAP_POOL=$(WAMR_BUILD_GLOBAL_HEAP_POOL) \
+				   -DWASM_ENABLE_LIB_RATS=$(WAMR_BUILD_LIB_RATS) \
+				   -DWASM_ENABLE_STATIC_PGO=$(WAMR_BUILD_STATIC_PGO) \
+				   -DWASM_ENABLE_LIBC_WASI=$(WAMR_BUILD_LIBC_WASI)
 ifeq ($(SPEC_TEST), 1)
 	Enclave_C_Flags += -DWASM_ENABLE_SPEC_TEST=1
 else

+ 1 - 1
product-mini/platforms/linux-sgx/enclave-sample/Makefile_minimal

@@ -102,7 +102,7 @@ Enclave_Include_Paths := -IEnclave -I$(WAMR_ROOT)/core/iwasm/include \
 Enclave_C_Flags := $(SGX_COMMON_CFLAGS) -nostdinc -fvisibility=hidden -fpie -fstack-protector $(Enclave_Include_Paths)
 
 # disable wasi
-Enclave_C_Flags += -DSGX_DISABLE_WASI
+Enclave_C_Flags += -DWASM_ENABLE_LIBC_WASI=0
 
 ifeq ($(SPEC_TEST), 1)
 	Enclave_C_Flags += -DWASM_ENABLE_SPEC_TEST=1

+ 1 - 1
product-mini/platforms/posix/main.c

@@ -781,7 +781,7 @@ main(int argc, char *argv[])
             gen_prof_file = argv[0] + 16;
         }
 #endif
-        else if (!strncmp(argv[0], "--version", 9)) {
+        else if (!strcmp(argv[0], "--version")) {
             uint32 major, minor, patch;
             wasm_runtime_get_version(&major, &minor, &patch);
             printf("iwasm %" PRIu32 ".%" PRIu32 ".%" PRIu32 "\n", major, minor,

+ 1 - 1
product-mini/platforms/windows/main.c

@@ -406,7 +406,7 @@ main(int argc, char *argv[])
             ip_addr = argv[0] + 3;
         }
 #endif
-        else if (!strncmp(argv[0], "--version", 9)) {
+        else if (!strcmp(argv[0], "--version")) {
             uint32 major, minor, patch;
             wasm_runtime_get_version(&major, &minor, &patch);
             printf("iwasm %" PRIu32 ".%" PRIu32 ".%" PRIu32 "\n", major, minor,

+ 1 - 1
samples/wasm-c-api-imports/wasm/CMakeLists.txt

@@ -31,7 +31,7 @@ target_link_options(send_recv PRIVATE
 if(WASM_TO_AOT)
   # wasm -> aot
   add_custom_target(send_recv_aot ALL
-    COMMAND pwd && ${WAMRC_PATH} --enable-multi-thread -o ./send_recv.aot ./send_recv.wasm
+    COMMAND pwd && ${WAMRC_PATH} --invoke-c-api-import --enable-multi-thread -o ./send_recv.aot ./send_recv.wasm
     DEPENDS send_recv
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )

+ 7 - 2
wamr-compiler/main.c

@@ -190,6 +190,8 @@ print_help()
     printf("                            are shared object (.so) files, for example:\n");
     printf("                              --native-lib=test1.so --native-lib=test2.so\n");
 #endif
+    printf("  --invoke-c-api-import     Treat unknown import function as wasm-c-api import function and\n");
+    printf("                            quick call it from AOT code\n");
 #if WASM_ENABLE_LINUX_PERF != 0
     printf("  --enable-linux-perf       Enable linux perf support\n");
 #endif
@@ -540,12 +542,15 @@ main(int argc, char *argv[])
             native_lib_list[native_lib_count++] = argv[0] + 13;
         }
 #endif
+        else if (!strcmp(argv[0], "--invoke-c-api-import")) {
+            option.quick_invoke_c_api_import = true;
+        }
 #if WASM_ENABLE_LINUX_PERF != 0
-        else if (!strncmp(argv[0], "--enable-linux-perf", 19)) {
+        else if (!strcmp(argv[0], "--enable-linux-perf")) {
             enable_linux_perf = true;
         }
 #endif
-        else if (!strncmp(argv[0], "--version", 9)) {
+        else if (!strcmp(argv[0], "--version")) {
             uint32 major, minor, patch;
             wasm_runtime_get_version(&major, &minor, &patch);
             printf("wamrc %u.%u.%u\n", major, minor, patch);