Browse Source

Merge pull request #3782 from bytecodealliance/main

Merge branch main into dev/dynamic_linking
Wenyong Huang 1 year ago
parent
commit
4f96a766e3
62 changed files with 2137 additions and 704 deletions
  1. 2 1
      .github/workflows/compilation_on_nuttx.yml
  2. 5 18
      .github/workflows/spec_test_on_nuttx.yml
  3. 26 5
      CMakeLists.txt
  4. 7 1
      build-scripts/config_common.cmake
  5. 13 1
      core/iwasm/aot/aot_loader.c
  6. 1 1
      core/iwasm/aot/aot_reloc.h
  7. 164 67
      core/iwasm/aot/aot_runtime.c
  8. 13 2
      core/iwasm/aot/aot_runtime.h
  9. 3 10
      core/iwasm/aot/arch/aot_reloc_aarch64.c
  10. 97 97
      core/iwasm/aot/arch/aot_reloc_arm.c
  11. 1 1
      core/iwasm/aot/arch/aot_reloc_mips.c
  12. 53 53
      core/iwasm/aot/arch/aot_reloc_riscv.c
  13. 97 97
      core/iwasm/aot/arch/aot_reloc_thumb.c
  14. 1 1
      core/iwasm/aot/arch/aot_reloc_x86_64.c
  15. 37 37
      core/iwasm/aot/arch/aot_reloc_xtensa.c
  16. 5 5
      core/iwasm/aot/debug/jit_debug.c
  17. 2 2
      core/iwasm/aot/debug/jit_debug.h
  18. 2 2
      core/iwasm/common/wasm_memory.c
  19. 2 2
      core/iwasm/common/wasm_memory.h
  20. 2 2
      core/iwasm/common/wasm_native.c
  21. 2 2
      core/iwasm/common/wasm_native.h
  22. 9 9
      core/iwasm/common/wasm_runtime_common.c
  23. 4 4
      core/iwasm/common/wasm_runtime_common.h
  24. 2 2
      core/iwasm/common/wasm_shared_memory.h
  25. 1 1
      core/iwasm/compilation/aot.h
  26. 91 31
      core/iwasm/compilation/aot_compiler.c
  27. 18 0
      core/iwasm/compilation/aot_compiler.h
  28. 6 0
      core/iwasm/compilation/aot_emit_aot_file.c
  29. 43 7
      core/iwasm/compilation/aot_emit_control.c
  30. 5 44
      core/iwasm/compilation/aot_emit_exception.c
  31. 87 30
      core/iwasm/compilation/aot_emit_function.c
  32. 22 13
      core/iwasm/compilation/aot_llvm.c
  33. 4 1
      core/iwasm/compilation/aot_llvm.h
  34. 27 0
      core/iwasm/compilation/aot_stack_frame.h
  35. 148 0
      core/iwasm/compilation/aot_stack_frame_comp.c
  36. 33 0
      core/iwasm/compilation/aot_stack_frame_comp.h
  37. 35 1
      core/iwasm/include/aot_comp_option.h
  38. 1 0
      core/iwasm/interpreter/wasm_interp_classic.c
  39. 1 1
      core/iwasm/interpreter/wasm_interp_fast.c
  40. 5 3
      core/iwasm/interpreter/wasm_loader.c
  41. 3 2
      core/iwasm/interpreter/wasm_mini_loader.c
  42. 1 1
      core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c
  43. 4 4
      core/iwasm/libraries/thread-mgr/thread_manager.h
  44. 22 18
      core/iwasm/libraries/wasi-nn/README.md
  45. 17 0
      core/iwasm/libraries/wasi-nn/cmake/Findcjson.cmake
  46. 18 0
      core/iwasm/libraries/wasi-nn/cmake/Findllamacpp.cmake
  47. 18 40
      core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake
  48. 58 22
      core/iwasm/libraries/wasi-nn/cmake/wasi_nn.cmake
  49. 9 0
      core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h
  50. 108 25
      core/iwasm/libraries/wasi-nn/src/wasi_nn.c
  51. 601 0
      core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c
  52. 29 14
      core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke
  53. 60 0
      core/iwasm/libraries/wasi-nn/test/run_smoke_test.py
  54. 12 5
      core/shared/platform/common/posix/posix_memmap.c
  55. 5 5
      core/shared/platform/include/platform_api_extension.h
  56. 1 1
      core/shared/platform/nuttx/platform_internal.h
  57. 1 1
      product-mini/platforms/common/libc_wasi.c
  58. 1 1
      product-mini/platforms/nuttx/CMakeLists.txt
  59. 1 1
      product-mini/platforms/nuttx/wamr.mk
  60. 1 1
      product-mini/platforms/posix/main.c
  61. 1 1
      tests/wamr-test-suites/test_wamr.sh
  62. 89 8
      wamr-compiler/main.c

+ 2 - 1
.github/workflows/compilation_on_nuttx.yml

@@ -119,11 +119,12 @@ jobs:
         run: make -j$(nproc) EXTRAFLAGS=-Werror
         run: make -j$(nproc) EXTRAFLAGS=-Werror
 
 
       - name: Checkout Bloaty
       - name: Checkout Bloaty
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
         with:
           repository: google/bloaty
           repository: google/bloaty
           submodules: recursive
           submodules: recursive
           path: bloaty
           path: bloaty
+          ref: 34f4a66559ad4938c1e629e9b5f54630b2b4d7b0
 
 
       - name: Build Bloaty
       - name: Build Bloaty
         run: |
         run: |

+ 5 - 18
.github/workflows/spec_test_on_nuttx.yml

@@ -74,11 +74,11 @@ jobs:
             target: "riscv32",
             target: "riscv32",
             fpu_type: "none"
             fpu_type: "none"
           },
           },
-          {
-            config: "boards/risc-v/qemu-rv/rv-virt/configs/nsh",
-            target: "riscv32_ilp32f",
-            fpu_type: "fp"
-          },
+          #{
+          #  config: "boards/risc-v/qemu-rv/rv-virt/configs/nsh",
+          #  target: "riscv32_ilp32f",
+          #  fpu_type: "fp"
+          #},
           # {
           # {
           #   config: "boards/risc-v/qemu-rv/rv-virt/configs/nsh",
           #   config: "boards/risc-v/qemu-rv/rv-virt/configs/nsh",
           #   target: "riscv32_ilp32d",
           #   target: "riscv32_ilp32d",
@@ -327,19 +327,6 @@ jobs:
         working-directory: apps/interpreters/wamr/wamr
         working-directory: apps/interpreters/wamr/wamr
 
 
       - name: Test
       - name: Test
-        if: matrix.target_config.target != 'xtensa'
-        run: |
-          cd apps/interpreters/wamr/wamr/tests/wamr-test-suites
-          ./test_wamr.sh -s spec ${{ matrix.wamr_test_option.mode }} -m ${{ matrix.target_config.target }} -b -Q -P -F ${{ steps.build_firmware_path.outputs.firmware }} ${{ matrix.wamr_feature_option.mode}}
-
-      # for xtensa, for some reasons, when running the tests
-      # with test_wamr.sh -P, nuttx occasionally hangs after
-      # "total segments stored 6" on the CI.
-      # i (yamamoto) couldn't reproduce it locally (macOS) even
-      # with the identical flash image.
-      # for now, run the tests without -P.
-      - name: Test
-        if: matrix.target_config.target == 'xtensa'
         run: |
         run: |
           cd apps/interpreters/wamr/wamr/tests/wamr-test-suites
           cd apps/interpreters/wamr/wamr/tests/wamr-test-suites
           ./test_wamr.sh -s spec ${{ matrix.wamr_test_option.mode }} -m ${{ matrix.target_config.target }} -b -Q -F ${{ steps.build_firmware_path.outputs.firmware }} ${{ matrix.wamr_feature_option.mode}}
           ./test_wamr.sh -s spec ${{ matrix.wamr_test_option.mode }} -m ${{ matrix.target_config.target }} -b -Q -F ${{ steps.build_firmware_path.outputs.firmware }} ${{ matrix.wamr_feature_option.mode}}

+ 26 - 5
CMakeLists.txt

@@ -121,10 +121,14 @@ set (WAMR_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 
 include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake)
 include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake)
 
 
-set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wshadow -Wno-unused-parameter -fvisibility=hidden")
-# set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wconversion -Wsign-conversion")
-
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wno-unused")
+if (NOT WIN32)
+  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat -Wformat-security \
+                                       -ffunction-sections -fdata-sections \
+                                       -Wno-unused-parameter -Wno-pedantic")
+  # Remove the extra spaces for better make log
+  string (REGEX REPLACE "  *" " " CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
+  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wno-unused")
+endif()
 
 
 if (WAMR_BUILD_TARGET MATCHES "X86_.*" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
 if (WAMR_BUILD_TARGET MATCHES "X86_.*" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
   if (NOT (CMAKE_C_COMPILER MATCHES ".*clang.*" OR CMAKE_C_COMPILER_ID MATCHES ".*Clang"))
   if (NOT (CMAKE_C_COMPILER MATCHES ".*clang.*" OR CMAKE_C_COMPILER_ID MATCHES ".*Clang"))
@@ -145,6 +149,10 @@ include (${SHARED_DIR}/utils/uncommon/shared_uncommon.cmake)
 set (THREADS_PREFER_PTHREAD_FLAG ON)
 set (THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 find_package(Threads REQUIRED)
 
 
+if (MSVC)
+  add_definitions(-DCOMPILING_WASM_RUNTIME_API=1)
+endif ()
+
 # STATIC LIBRARY
 # STATIC LIBRARY
 if (WAMR_BUILD_STATIC)
 if (WAMR_BUILD_STATIC)
     add_library(iwasm_static STATIC ${WAMR_RUNTIME_LIB_SOURCE})
     add_library(iwasm_static STATIC ${WAMR_RUNTIME_LIB_SOURCE})
@@ -155,6 +163,14 @@ if (WAMR_BUILD_STATIC)
       target_link_libraries(iwasm_static INTERFACE boringssl_crypto)
       target_link_libraries(iwasm_static INTERFACE boringssl_crypto)
     endif ()
     endif ()
 
 
+    if (MINGW)
+      target_link_libraries (iwasm_static PRIVATE ws2_32)
+    endif ()
+
+    if (WIN32)
+      target_link_libraries(iwasm_static PRIVATE ntdll)  
+    endif()
+
     install (TARGETS iwasm_static ARCHIVE DESTINATION lib)
     install (TARGETS iwasm_static ARCHIVE DESTINATION lib)
 endif ()
 endif ()
 
 
@@ -169,9 +185,14 @@ if (WAMR_BUILD_SHARED)
     endif ()
     endif ()
 
 
     if (MINGW)
     if (MINGW)
-      target_link_libraries (iwasm_shared INTERFACE -lWs2_32 -lwsock32)
+      target_link_libraries(iwasm_shared INTERFACE -lWs2_32 -lwsock32)
+      target_link_libraries(iwasm_shared PRIVATE ws2_32)
     endif ()
     endif ()
 
 
+    if (WIN32)
+      target_link_libraries(iwasm_shared PRIVATE ntdll)  
+    endif()
+
     install (TARGETS iwasm_shared LIBRARY DESTINATION lib)
     install (TARGETS iwasm_shared LIBRARY DESTINATION lib)
 endif ()
 endif ()
 
 

+ 7 - 1
build-scripts/config_common.cmake

@@ -442,7 +442,9 @@ if (WAMR_BUILD_WASI_NN EQUAL 1)
   message ("     WASI-NN enabled")
   message ("     WASI-NN enabled")
   add_definitions (-DWASM_ENABLE_WASI_NN=1)
   add_definitions (-DWASM_ENABLE_WASI_NN=1)
   # Variant backends
   # Variant backends
-  if (NOT WAMR_BUILD_WASI_NN_TFLITE EQUAL 1 AND NOT WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1)
+  if (NOT WAMR_BUILD_WASI_NN_TFLITE EQUAL 1 AND
+      NOT WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1 AND
+      NOT WAMR_BUILD_WASI_NN_LLAMACPP EQUAL 1)
     message (FATAL_ERROR "   Need to select a backend for WASI-NN")
     message (FATAL_ERROR "   Need to select a backend for WASI-NN")
   endif ()
   endif ()
 
 
@@ -454,6 +456,10 @@ if (WAMR_BUILD_WASI_NN EQUAL 1)
     message ("     WASI-NN: backend openvino enabled")
     message ("     WASI-NN: backend openvino enabled")
     add_definitions (-DWASM_ENABLE_WASI_NN_OPENVINO)
     add_definitions (-DWASM_ENABLE_WASI_NN_OPENVINO)
   endif ()
   endif ()
+  if (WAMR_BUILD_WASI_NN_LLAMACPP EQUAL 1)
+    message ("     WASI-NN: backend llamacpp enabled")
+    add_definitions (-DWASM_ENABLE_WASI_NN_LLAMACPP)
+  endif ()
   # Variant devices
   # Variant devices
   if (WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
   if (WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
       message ("     WASI-NN: GPU enabled")
       message ("     WASI-NN: GPU enabled")

+ 13 - 1
core/iwasm/aot/aot_loader.c

@@ -302,7 +302,10 @@ loader_mmap(uint32 size, bool prot_exec, char *error_buf, uint32 error_buf_size)
     int map_flags;
     int map_flags;
     void *mem;
     void *mem;
 
 
-#if UINTPTR_MAX == UINT64_MAX
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
+    || defined(BUILD_TARGET_RISCV64_LP64D)                       \
+    || defined(BUILD_TARGET_RISCV64_LP64)
+#ifndef __APPLE__
     /* The mmapped AOT data and code in 64-bit targets had better be in
     /* The mmapped AOT data and code in 64-bit targets had better be in
        range 0 to 2G, or aot loader may fail to apply some relocations,
        range 0 to 2G, or aot loader may fail to apply some relocations,
        e.g., R_X86_64_32/R_X86_64_32S/R_X86_64_PC32/R_RISCV_32.
        e.g., R_X86_64_32/R_X86_64_32S/R_X86_64_PC32/R_RISCV_32.
@@ -316,6 +319,7 @@ loader_mmap(uint32 size, bool prot_exec, char *error_buf, uint32 error_buf_size)
         bh_assert((uintptr_t)mem < INT32_MAX);
         bh_assert((uintptr_t)mem < INT32_MAX);
         return mem;
         return mem;
     }
     }
+#endif
 #endif
 #endif
 
 
     map_flags = MMAP_MAP_NONE;
     map_flags = MMAP_MAP_NONE;
@@ -579,6 +583,10 @@ load_target_info_section(const uint8 *buf, const uint8 *buf_end,
         return false;
         return false;
     }
     }
 
 
+    /* for backwards compatibility with previous wamrc aot files */
+    if (!strcmp(target_info.arch, "arm64"))
+        bh_strcpy_s(target_info.arch, sizeof(target_info.arch), "aarch64v8");
+
     /* Check machine info */
     /* Check machine info */
     if (!check_machine_info(&target_info, error_buf, error_buf_size)) {
     if (!check_machine_info(&target_info, error_buf, error_buf_size)) {
         return false;
         return false;
@@ -589,6 +597,10 @@ load_target_info_section(const uint8 *buf, const uint8 *buf_end,
         return false;
         return false;
     }
     }
 
 
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
+    module->feature_flags = target_info.feature_flags;
+#endif
+
     /* Finally, check feature flags */
     /* Finally, check feature flags */
     return check_feature_flags(error_buf, error_buf_size,
     return check_feature_flags(error_buf, error_buf_size,
                                target_info.feature_flags);
                                target_info.feature_flags);

+ 1 - 1
core/iwasm/aot/aot_reloc.h

@@ -226,7 +226,7 @@ SymbolMap *
 get_target_symbol_map(uint32 *sym_num);
 get_target_symbol_map(uint32 *sym_num);
 
 
 uint32
 uint32
-get_plt_table_size();
+get_plt_table_size(void);
 
 
 void
 void
 init_plt_table(uint8 *plt);
 init_plt_table(uint8 *plt);

+ 164 - 67
core/iwasm/aot/aot_runtime.c

@@ -4,6 +4,7 @@
  */
  */
 
 
 #include "aot_runtime.h"
 #include "aot_runtime.h"
+#include "../compilation/aot_stack_frame.h"
 #include "bh_log.h"
 #include "bh_log.h"
 #include "mem_alloc.h"
 #include "mem_alloc.h"
 #include "../common/wasm_runtime_common.h"
 #include "../common/wasm_runtime_common.h"
@@ -72,6 +73,10 @@ bh_static_assert(offsetof(AOTFrame, sp) == sizeof(uintptr_t) * 5);
 bh_static_assert(offsetof(AOTFrame, frame_ref) == sizeof(uintptr_t) * 6);
 bh_static_assert(offsetof(AOTFrame, frame_ref) == sizeof(uintptr_t) * 6);
 bh_static_assert(offsetof(AOTFrame, lp) == sizeof(uintptr_t) * 7);
 bh_static_assert(offsetof(AOTFrame, lp) == sizeof(uintptr_t) * 7);
 
 
+bh_static_assert(offsetof(AOTTinyFrame, func_index) == sizeof(uint32) * 0);
+bh_static_assert(offsetof(AOTTinyFrame, ip_offset) == sizeof(uint32) * 1);
+bh_static_assert(sizeof(AOTTinyFrame) == sizeof(uint32) * 2);
+
 static void
 static void
 set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
 set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
 {
 {
@@ -110,6 +115,55 @@ runtime_malloc(uint64 size, char *error_buf, uint32 error_buf_size)
     return mem;
     return mem;
 }
 }
 
 
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
+static bool
+is_tiny_frame(WASMExecEnv *exec_env)
+{
+    AOTModule *module =
+        (AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module;
+
+    return module->feature_flags & WASM_FEATURE_TINY_STACK_FRAME;
+}
+
+static bool
+is_frame_per_function(WASMExecEnv *exec_env)
+{
+    AOTModule *module =
+        (AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module;
+
+    return module->feature_flags & WASM_FEATURE_FRAME_PER_FUNCTION;
+}
+
+static void *
+get_top_frame(WASMExecEnv *exec_env)
+{
+    if (is_tiny_frame(exec_env)) {
+        return exec_env->wasm_stack.top > exec_env->wasm_stack.bottom
+                   ? exec_env->wasm_stack.top - sizeof(AOTTinyFrame)
+                   : NULL;
+    }
+    else {
+        return exec_env->cur_frame;
+    }
+}
+
+static void *
+get_prev_frame(WASMExecEnv *exec_env, void *cur_frame)
+{
+    bh_assert(cur_frame);
+
+    if (is_tiny_frame(exec_env)) {
+        if ((uint8 *)cur_frame == exec_env->wasm_stack.bottom) {
+            return NULL;
+        }
+        return ((AOTTinyFrame *)cur_frame) - 1;
+    }
+    else {
+        return ((AOTFrame *)cur_frame)->prev_frame;
+    }
+}
+#endif
+
 static bool
 static bool
 check_global_init_expr(const AOTModule *module, uint32 global_index,
 check_global_init_expr(const AOTModule *module, uint32 global_index,
                        char *error_buf, uint32 error_buf_size)
                        char *error_buf, uint32 error_buf_size)
@@ -2265,7 +2319,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
         uint32 ext_ret_cell = wasm_get_cell_num(ext_ret_types, ext_ret_count);
         uint32 ext_ret_cell = wasm_get_cell_num(ext_ret_types, ext_ret_count);
         uint64 size;
         uint64 size;
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
-        struct WASMInterpFrame *prev_frame = exec_env->cur_frame;
+        void *prev_frame = get_top_frame(exec_env);
 #endif
 #endif
 
 
         /* Allocate memory all arguments */
         /* Allocate memory all arguments */
@@ -2296,7 +2350,8 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
         }
         }
 
 
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
-        if (!aot_alloc_frame(exec_env, function->func_index)) {
+        if (!is_frame_per_function(exec_env)
+            && !aot_alloc_frame(exec_env, function->func_index)) {
             if (argv1 != argv1_buf)
             if (argv1 != argv1_buf)
                 wasm_runtime_free(argv1);
                 wasm_runtime_free(argv1);
             return false;
             return false;
@@ -2324,7 +2379,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
         /* Free all frames allocated, note that some frames
         /* Free all frames allocated, note that some frames
            may be allocated in AOT code and haven't been
            may be allocated in AOT code and haven't been
            freed if exception occurred */
            freed if exception occurred */
-        while (exec_env->cur_frame != prev_frame)
+        while (get_top_frame(exec_env) != prev_frame)
             aot_free_frame(exec_env);
             aot_free_frame(exec_env);
 #endif
 #endif
         if (!ret) {
         if (!ret) {
@@ -2367,9 +2422,12 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
     }
     }
     else {
     else {
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
-        struct WASMInterpFrame *prev_frame = exec_env->cur_frame;
-
-        if (!aot_alloc_frame(exec_env, function->func_index)) {
+        void *prev_frame = get_top_frame(exec_env);
+        /* Only allocate frame for frame-per-call mode; in the
+           frame-per-function mode the frame is allocated at the
+           beginning of the function. */
+        if (!is_frame_per_function(exec_env)
+            && !aot_alloc_frame(exec_env, function->func_index)) {
             return false;
             return false;
         }
         }
 #endif
 #endif
@@ -2394,7 +2452,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
         /* Free all frames allocated, note that some frames
         /* Free all frames allocated, note that some frames
            may be allocated in AOT code and haven't been
            may be allocated in AOT code and haven't been
            freed if exception occurred */
            freed if exception occurred */
-        while (exec_env->cur_frame != prev_frame)
+        while (get_top_frame(exec_env) != prev_frame)
             aot_free_frame(exec_env);
             aot_free_frame(exec_env);
 #endif
 #endif
 
 
@@ -2880,7 +2938,7 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
             goto fail;
             goto fail;
         }
         }
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
-        struct WASMInterpFrame *prev_frame = exec_env->cur_frame;
+        void *prev_frame = get_top_frame(exec_env);
 
 
         if (!aot_alloc_frame(exec_env, func_idx)) {
         if (!aot_alloc_frame(exec_env, func_idx)) {
             goto fail;
             goto fail;
@@ -2894,7 +2952,7 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
         /* Free all frames allocated, note that some frames
         /* Free all frames allocated, note that some frames
            may be allocated in AOT code and haven't been
            may be allocated in AOT code and haven't been
            freed if exception occurred */
            freed if exception occurred */
-        while (exec_env->cur_frame != prev_frame)
+        while (get_top_frame(exec_env) != prev_frame)
             aot_free_frame(exec_env);
             aot_free_frame(exec_env);
 #endif
 #endif
     }
     }
@@ -3622,8 +3680,8 @@ get_func_name_from_index(const AOTModuleInstance *module_inst,
           WASM_ENABLE_PERF_PROFILING != 0 */
           WASM_ENABLE_PERF_PROFILING != 0 */
 
 
 #if WASM_ENABLE_GC == 0
 #if WASM_ENABLE_GC == 0
-bool
-aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
+static bool
+aot_alloc_standard_frame(WASMExecEnv *exec_env, uint32 func_index)
 {
 {
     AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
     AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
 #if WASM_ENABLE_PERF_PROFILING != 0
 #if WASM_ENABLE_PERF_PROFILING != 0
@@ -3668,37 +3726,10 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
     return true;
     return true;
 }
 }
 
 
-static inline void
-aot_free_frame_internal(WASMExecEnv *exec_env)
-{
-    AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame;
-    AOTFrame *prev_frame = cur_frame->prev_frame;
-
-#if WASM_ENABLE_PERF_PROFILING != 0
-    uint64 time_elapsed =
-        (uintptr_t)os_time_thread_cputime_us() - cur_frame->time_started;
-
-    cur_frame->func_perf_prof_info->total_exec_time += time_elapsed;
-    cur_frame->func_perf_prof_info->total_exec_cnt++;
-
-    /* parent function */
-    if (prev_frame)
-        prev_frame->func_perf_prof_info->children_exec_time += time_elapsed;
-#endif
-
-    exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame;
-}
-
-void
-aot_free_frame(WASMExecEnv *exec_env)
-{
-    aot_free_frame_internal(exec_env);
-}
-
 #else /* else of WASM_ENABLE_GC == 0 */
 #else /* else of WASM_ENABLE_GC == 0 */
 
 
-bool
-aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
+static bool
+aot_alloc_standard_frame(WASMExecEnv *exec_env, uint32 func_index)
 {
 {
     AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
     AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
     AOTModule *module = (AOTModule *)module_inst->module;
     AOTModule *module = (AOTModule *)module_inst->module;
@@ -3752,12 +3783,50 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
     frame->func_index = func_index;
     frame->func_index = func_index;
     return true;
     return true;
 }
 }
+#endif /* end of WASM_ENABLE_GC == 0 */
+
+static bool
+aot_alloc_tiny_frame(WASMExecEnv *exec_env, uint32 func_index)
+{
+    AOTTinyFrame *new_frame = (AOTTinyFrame *)exec_env->wasm_stack.top;
+
+    if ((uint8 *)new_frame > exec_env->wasm_stack.top_boundary) {
+        aot_set_exception((WASMModuleInstance *)exec_env->module_inst,
+                          "wasm operand stack overflow");
+        return false;
+    }
+
+    new_frame->func_index = func_index;
+    exec_env->wasm_stack.top += sizeof(AOTTinyFrame);
+    return true;
+}
+
+bool
+aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
+{
+    AOTModule *module =
+        (AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module;
+
+    if (is_frame_per_function(exec_env)
+        && func_index >= module->import_func_count) {
+        /* in frame per function mode the frame is allocated at
+        the beginning of each frame, so we only need to allocate
+        the frame for imported functions */
+        return true;
+    }
+    if (is_tiny_frame(exec_env)) {
+        return aot_alloc_tiny_frame(exec_env, func_index);
+    }
+    else {
+        return aot_alloc_standard_frame(exec_env, func_index);
+    }
+}
 
 
 static inline void
 static inline void
-aot_free_frame_internal(WASMExecEnv *exec_env)
+aot_free_standard_frame(WASMExecEnv *exec_env)
 {
 {
     AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame;
     AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame;
-    AOTFrame *prev_frame = cur_frame->prev_frame;
+    AOTFrame *prev_frame = (AOTFrame *)cur_frame->prev_frame;
 
 
 #if WASM_ENABLE_PERF_PROFILING != 0
 #if WASM_ENABLE_PERF_PROFILING != 0
     uint64 time_elapsed =
     uint64 time_elapsed =
@@ -3771,18 +3840,30 @@ aot_free_frame_internal(WASMExecEnv *exec_env)
         prev_frame->func_perf_prof_info->children_exec_time += time_elapsed;
         prev_frame->func_perf_prof_info->children_exec_time += time_elapsed;
 #endif
 #endif
 
 
+#if WASM_ENABLE_GC != 0
     wasm_exec_env_free_wasm_frame(exec_env, cur_frame);
     wasm_exec_env_free_wasm_frame(exec_env, cur_frame);
+#endif
     exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame;
     exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame;
 }
 }
 
 
+static inline void
+aot_free_tiny_frame(WASMExecEnv *exec_env)
+{
+    exec_env->wasm_stack.top =
+        get_prev_frame(exec_env, exec_env->wasm_stack.top);
+}
+
 void
 void
 aot_free_frame(WASMExecEnv *exec_env)
 aot_free_frame(WASMExecEnv *exec_env)
 {
 {
-    aot_free_frame_internal(exec_env);
+    if (is_tiny_frame(exec_env)) {
+        aot_free_tiny_frame(exec_env);
+    }
+    else {
+        aot_free_standard_frame(exec_env);
+    }
 }
 }
 
 
-#endif /* end of WASM_ENABLE_GC == 0 */
-
 void
 void
 aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame)
 aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame)
 {
 {
@@ -3831,14 +3912,13 @@ aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame)
 bool
 bool
 aot_create_call_stack(struct WASMExecEnv *exec_env)
 aot_create_call_stack(struct WASMExecEnv *exec_env)
 {
 {
-    AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame,
-             *first_frame = cur_frame;
     AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
     AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
     AOTModule *module = (AOTModule *)module_inst->module;
     AOTModule *module = (AOTModule *)module_inst->module;
     uint32 n = 0;
     uint32 n = 0;
 
 
-    while (cur_frame) {
-        cur_frame = cur_frame->prev_frame;
+    void *top_frame = get_top_frame(exec_env);
+    while (top_frame) {
+        top_frame = get_prev_frame(exec_env, top_frame);
         n++;
         n++;
     }
     }
 
 
@@ -3848,28 +3928,46 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
         return false;
         return false;
     }
     }
 
 
-    cur_frame = first_frame;
-    while (cur_frame) {
+    top_frame = get_top_frame(exec_env);
+    while (n-- > 0) {
+        uint32 func_index, ip_offset;
+        uint32 *lp = NULL;
+#if WASM_ENABLE_GC != 0
+        uint32 *sp = NULL;
+        uint8 *frame_ref = NULL;
+#endif
+        if (is_tiny_frame(exec_env)) {
+            AOTTinyFrame *frame = (AOTTinyFrame *)top_frame;
+            func_index = (uint32)frame->func_index;
+            ip_offset = (uint32)frame->ip_offset;
+        }
+        else {
+            AOTFrame *frame = (AOTFrame *)top_frame;
+            func_index = (uint32)frame->func_index;
+            ip_offset = (uint32)frame->ip_offset;
+            lp = frame->lp;
+#if WASM_ENABLE_GC != 0
+            sp = frame->sp;
+            frame_ref = frame->frame_ref;
+#endif
+        }
         WASMCApiFrame frame = { 0 };
         WASMCApiFrame frame = { 0 };
         uint32 max_local_cell_num, max_stack_cell_num;
         uint32 max_local_cell_num, max_stack_cell_num;
         uint32 all_cell_num, lp_size;
         uint32 all_cell_num, lp_size;
 
 
         frame.instance = module_inst;
         frame.instance = module_inst;
         frame.module_offset = 0;
         frame.module_offset = 0;
-        frame.func_index = (uint32)cur_frame->func_index;
-        frame.func_offset = (uint32)cur_frame->ip_offset;
-        frame.func_name_wp = get_func_name_from_index(
-            module_inst, (uint32)cur_frame->func_index);
-
-        if (cur_frame->func_index >= module->import_func_count) {
-            uint32 aot_func_idx =
-                (uint32)(cur_frame->func_index - module->import_func_count);
+        frame.func_index = func_index;
+        frame.func_offset = ip_offset;
+        frame.func_name_wp = get_func_name_from_index(module_inst, func_index);
+
+        if (func_index >= module->import_func_count) {
+            uint32 aot_func_idx = func_index - module->import_func_count;
             max_local_cell_num = module->max_local_cell_nums[aot_func_idx];
             max_local_cell_num = module->max_local_cell_nums[aot_func_idx];
             max_stack_cell_num = module->max_stack_cell_nums[aot_func_idx];
             max_stack_cell_num = module->max_stack_cell_nums[aot_func_idx];
         }
         }
         else {
         else {
-            AOTFuncType *func_type =
-                module->import_funcs[cur_frame->func_index].func_type;
+            AOTFuncType *func_type = module->import_funcs[func_index].func_type;
             max_local_cell_num =
             max_local_cell_num =
                 func_type->param_cell_num > 2 ? func_type->param_cell_num : 2;
                 func_type->param_cell_num > 2 ? func_type->param_cell_num : 2;
             max_stack_cell_num = 0;
             max_stack_cell_num = 0;
@@ -3881,12 +3979,12 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
 #else
 #else
         lp_size = align_uint(all_cell_num * 5, 4);
         lp_size = align_uint(all_cell_num * 5, 4);
 #endif
 #endif
-        if (lp_size > 0) {
+        if (lp_size > 0 && !is_tiny_frame(exec_env)) {
             if (!(frame.lp = wasm_runtime_malloc(lp_size))) {
             if (!(frame.lp = wasm_runtime_malloc(lp_size))) {
                 destroy_c_api_frames(module_inst->frames);
                 destroy_c_api_frames(module_inst->frames);
                 return false;
                 return false;
             }
             }
-            bh_memcpy_s(frame.lp, lp_size, cur_frame->lp, lp_size);
+            bh_memcpy_s(frame.lp, lp_size, lp, lp_size);
 
 
 #if WASM_ENABLE_GC != 0
 #if WASM_ENABLE_GC != 0
             uint32 local_ref_flags_cell_num =
             uint32 local_ref_flags_cell_num =
@@ -3894,9 +3992,8 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
                     .local_ref_flag_cell_num;
                     .local_ref_flag_cell_num;
             uint8 *local_ref_flags =
             uint8 *local_ref_flags =
                 module->func_local_ref_flags[frame.func_index].local_ref_flags;
                 module->func_local_ref_flags[frame.func_index].local_ref_flags;
-            frame.sp = frame.lp + (cur_frame->sp - cur_frame->lp);
-            frame.frame_ref = (uint8 *)frame.lp
-                              + (cur_frame->frame_ref - (uint8 *)cur_frame->lp);
+            frame.sp = frame.lp + (sp - lp);
+            frame.frame_ref = (uint8 *)frame.lp + (frame_ref - (uint8 *)lp);
             /* copy local ref flags from AOT module */
             /* copy local ref flags from AOT module */
             bh_memcpy_s(frame.frame_ref, local_ref_flags_cell_num,
             bh_memcpy_s(frame.frame_ref, local_ref_flags_cell_num,
                         local_ref_flags, lp_size);
                         local_ref_flags, lp_size);
@@ -3910,7 +4007,7 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
             return false;
             return false;
         }
         }
 
 
-        cur_frame = cur_frame->prev_frame;
+        top_frame = get_prev_frame(exec_env, top_frame);
     }
     }
 
 
     return true;
     return true;

+ 13 - 2
core/iwasm/aot/aot_runtime.h

@@ -25,12 +25,15 @@ extern "C" {
 #define WASM_FEATURE_REF_TYPES (1 << 3)
 #define WASM_FEATURE_REF_TYPES (1 << 3)
 #define WASM_FEATURE_GARBAGE_COLLECTION (1 << 4)
 #define WASM_FEATURE_GARBAGE_COLLECTION (1 << 4)
 #define WASM_FEATURE_EXCEPTION_HANDLING (1 << 5)
 #define WASM_FEATURE_EXCEPTION_HANDLING (1 << 5)
-#define WASM_FEATURE_MEMORY64 (1 << 6)
+#define WASM_FEATURE_TINY_STACK_FRAME (1 << 6)
 #define WASM_FEATURE_MULTI_MEMORY (1 << 7)
 #define WASM_FEATURE_MULTI_MEMORY (1 << 7)
 #define WASM_FEATURE_DYNAMIC_LINKING (1 << 8)
 #define WASM_FEATURE_DYNAMIC_LINKING (1 << 8)
 #define WASM_FEATURE_COMPONENT_MODEL (1 << 9)
 #define WASM_FEATURE_COMPONENT_MODEL (1 << 9)
 #define WASM_FEATURE_RELAXED_SIMD (1 << 10)
 #define WASM_FEATURE_RELAXED_SIMD (1 << 10)
 #define WASM_FEATURE_FLEXIBLE_VECTORS (1 << 11)
 #define WASM_FEATURE_FLEXIBLE_VECTORS (1 << 11)
+/* Stack frame is created at the beginning of the function,
+ * and not at the beginning of each function call */
+#define WASM_FEATURE_FRAME_PER_FUNCTION (1 << 12)
 
 
 typedef enum AOTSectionType {
 typedef enum AOTSectionType {
     AOT_SECTION_TYPE_TARGET_INFO = 0,
     AOT_SECTION_TYPE_TARGET_INFO = 0,
@@ -39,6 +42,10 @@ typedef enum AOTSectionType {
     AOT_SECTION_TYPE_FUNCTION = 3,
     AOT_SECTION_TYPE_FUNCTION = 3,
     AOT_SECTION_TYPE_EXPORT = 4,
     AOT_SECTION_TYPE_EXPORT = 4,
     AOT_SECTION_TYPE_RELOCATION = 5,
     AOT_SECTION_TYPE_RELOCATION = 5,
+    /*
+     * Note: We haven't had anything to use AOT_SECTION_TYPE_SIGNATURE.
+     * It's just reserved for possible module signing features.
+     */
     AOT_SECTION_TYPE_SIGNATURE = 6,
     AOT_SECTION_TYPE_SIGNATURE = 6,
     AOT_SECTION_TYPE_CUSTOM = 100,
     AOT_SECTION_TYPE_CUSTOM = 100,
 } AOTSectionType;
 } AOTSectionType;
@@ -322,6 +329,10 @@ typedef struct AOTModule {
     /* `.data` and `.text` sections merged into one large mmaped section */
     /* `.data` and `.text` sections merged into one large mmaped section */
     uint8 *merged_data_text_sections;
     uint8 *merged_data_text_sections;
     uint32 merged_data_text_sections_size;
     uint32 merged_data_text_sections_size;
+
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
+    uint32 feature_flags;
+#endif
 } AOTModule;
 } AOTModule;
 
 
 #define AOTMemoryInstance WASMMemoryInstance
 #define AOTMemoryInstance WASMMemoryInstance
@@ -637,7 +648,7 @@ aot_check_app_addr_and_convert(AOTModuleInstance *module_inst, bool is_str,
                                void **p_native_addr);
                                void **p_native_addr);
 
 
 uint32
 uint32
-aot_get_plt_table_size();
+aot_get_plt_table_size(void);
 
 
 void *
 void *
 aot_memmove(void *dest, const void *src, size_t n);
 aot_memmove(void *dest, const void *src, size_t n);

+ 3 - 10
core/iwasm/aot/arch/aot_reloc_aarch64.c

@@ -53,12 +53,6 @@ get_target_symbol_map(uint32 *sym_num)
     return target_sym_map;
     return target_sym_map;
 }
 }
 
 
-#if (defined(__APPLE__) || defined(__MACH__)) && defined(__arm64__)
-#define BUILD_TARGET_AARCH64_DEFAULT "arm64"
-#else
-#define BUILD_TARGET_AARCH64_DEFAULT "aarch64v8"
-#endif
-
 void
 void
 get_current_target(char *target_buf, uint32 target_buf_size)
 get_current_target(char *target_buf, uint32 target_buf_size)
 {
 {
@@ -68,8 +62,8 @@ get_current_target(char *target_buf, uint32 target_buf_size)
 
 
     /* Set to "aarch64v8" by default if sub version isn't specified */
     /* Set to "aarch64v8" by default if sub version isn't specified */
     if (strcmp(s, "AARCH64") == 0) {
     if (strcmp(s, "AARCH64") == 0) {
-        s = BUILD_TARGET_AARCH64_DEFAULT;
-        s_size = sizeof(BUILD_TARGET_AARCH64_DEFAULT);
+        s = "aarch64v8";
+        s_size = 9; /* strlen("aarch64v8"); */
     }
     }
     if (target_buf_size < s_size) {
     if (target_buf_size < s_size) {
         s_size = target_buf_size;
         s_size = target_buf_size;
@@ -83,10 +77,9 @@ get_current_target(char *target_buf, uint32 target_buf_size)
     /* Ensure the string is null byte ('\0') terminated */
     /* Ensure the string is null byte ('\0') terminated */
     *d = '\0';
     *d = '\0';
 }
 }
-#undef BUILD_TARGET_AARCH64_DEFAULT
 
 
 static uint32
 static uint32
-get_plt_item_size()
+get_plt_item_size(void)
 {
 {
     /* 6*4 bytes instructions and 8 bytes symbol address */
     /* 6*4 bytes instructions and 8 bytes symbol address */
     return 32;
     return 32;

+ 97 - 97
core/iwasm/aot/arch/aot_reloc_arm.c

@@ -12,102 +12,102 @@
 #define R_ARM_MOVT_ABS 44
 #define R_ARM_MOVT_ABS 44
 
 
 /* clang-format off */
 /* clang-format off */
-void __adddf3();
-void __addsf3();
-void __aeabi_d2f();
-void __aeabi_d2iz();
-void __aeabi_d2lz();
-void __aeabi_d2uiz();
-void __aeabi_d2ulz();
-void __aeabi_dadd();
-void __aeabi_dcmpeq();
-void __aeabi_dcmpge();
-void __aeabi_dcmpgt();
-void __aeabi_dcmple();
-void __aeabi_dcmplt();
-void __aeabi_dcmpun();
-void __aeabi_ddiv();
-void __aeabi_dmul();
-void __aeabi_dsub();
-void __aeabi_f2d();
-void __aeabi_f2iz();
-void __aeabi_f2lz();
-void __aeabi_f2ulz();
-void __aeabi_fadd();
-void __aeabi_fcmpeq();
-void __aeabi_fcmpge();
-void __aeabi_fcmpgt();
-void __aeabi_fcmple();
-void __aeabi_fcmplt();
-void __aeabi_fcmpun();
-void __aeabi_fdiv();
-void __aeabi_fmul();
-void __aeabi_fsub();
-void __aeabi_i2d();
-void __aeabi_i2f();
-void __aeabi_idiv();
-void __aeabi_idivmod();
-void __aeabi_l2d();
-void __aeabi_l2f();
-void __aeabi_ldivmod();
-void __aeabi_memclr();
-void __aeabi_memcpy();
-void __aeabi_memmove();
-void __aeabi_memset();
-void __aeabi_ui2d();
-void __aeabi_ui2f();
-void __aeabi_uidiv();
-void __aeabi_uidivmod();
-void __aeabi_ul2d();
-void __aeabi_ul2f();
-void __aeabi_uldivmod();
-void __clzsi2();
-void __divdf3();
-void __divdi3();
-void __divsf3();
-void __divsi3();
-void __eqdf2();
-void __eqsf2();
-void __extendsfdf2();
-void __fixdfdi();
-void __fixdfsi();
-void __fixsfdi();
-void __fixsfsi();
-void __fixunsdfdi();
-void __fixunsdfsi();
-void __fixunssfdi();
-void __floatdidf();
-void __floatdisf();
-void __floatsidf();
-void __floatsisf();
-void __floatundidf();
-void __floatundisf();
-void __floatunsidf();
-void __floatunsisf();
-void __gedf2();
-void __gesf2();
-void __gtdf2();
-void __gtsf2();
-void __ledf2();
-void __lesf2();
-void __ltdf2();
-void __ltsf2();
-void __moddi3();
-void __modsi3();
-void __muldf3();
-void __mulsf3();
-void __nedf2();
-void __nesf2();
-void __subdf3();
-void __subsf3();
-void __truncdfsf2();
-void __udivdi3();
-void __udivmoddi4();
-void __udivsi3();
-void __umoddi3();
-void __umodsi3();
-void __unorddf2();
-void __unordsf2();
+void __adddf3(void);
+void __addsf3(void);
+void __aeabi_d2f(void);
+void __aeabi_d2iz(void);
+void __aeabi_d2lz(void);
+void __aeabi_d2uiz(void);
+void __aeabi_d2ulz(void);
+void __aeabi_dadd(void);
+void __aeabi_dcmpeq(void);
+void __aeabi_dcmpge(void);
+void __aeabi_dcmpgt(void);
+void __aeabi_dcmple(void);
+void __aeabi_dcmplt(void);
+void __aeabi_dcmpun(void);
+void __aeabi_ddiv(void);
+void __aeabi_dmul(void);
+void __aeabi_dsub(void);
+void __aeabi_f2d(void);
+void __aeabi_f2iz(void);
+void __aeabi_f2lz(void);
+void __aeabi_f2ulz(void);
+void __aeabi_fadd(void);
+void __aeabi_fcmpeq(void);
+void __aeabi_fcmpge(void);
+void __aeabi_fcmpgt(void);
+void __aeabi_fcmple(void);
+void __aeabi_fcmplt(void);
+void __aeabi_fcmpun(void);
+void __aeabi_fdiv(void);
+void __aeabi_fmul(void);
+void __aeabi_fsub(void);
+void __aeabi_i2d(void);
+void __aeabi_i2f(void);
+void __aeabi_idiv(void);
+void __aeabi_idivmod(void);
+void __aeabi_l2d(void);
+void __aeabi_l2f(void);
+void __aeabi_ldivmod(void);
+void __aeabi_memclr(void);
+void __aeabi_memcpy(void);
+void __aeabi_memmove(void);
+void __aeabi_memset(void);
+void __aeabi_ui2d(void);
+void __aeabi_ui2f(void);
+void __aeabi_uidiv(void);
+void __aeabi_uidivmod(void);
+void __aeabi_ul2d(void);
+void __aeabi_ul2f(void);
+void __aeabi_uldivmod(void);
+void __clzsi2(void);
+void __divdf3(void);
+void __divdi3(void);
+void __divsf3(void);
+void __divsi3(void);
+void __eqdf2(void);
+void __eqsf2(void);
+void __extendsfdf2(void);
+void __fixdfdi(void);
+void __fixdfsi(void);
+void __fixsfdi(void);
+void __fixsfsi(void);
+void __fixunsdfdi(void);
+void __fixunsdfsi(void);
+void __fixunssfdi(void);
+void __floatdidf(void);
+void __floatdisf(void);
+void __floatsidf(void);
+void __floatsisf(void);
+void __floatundidf(void);
+void __floatundisf(void);
+void __floatunsidf(void);
+void __floatunsisf(void);
+void __gedf2(void);
+void __gesf2(void);
+void __gtdf2(void);
+void __gtsf2(void);
+void __ledf2(void);
+void __lesf2(void);
+void __ltdf2(void);
+void __ltsf2(void);
+void __moddi3(void);
+void __modsi3(void);
+void __muldf3(void);
+void __mulsf3(void);
+void __nedf2(void);
+void __nesf2(void);
+void __subdf3(void);
+void __subsf3(void);
+void __truncdfsf2(void);
+void __udivdi3(void);
+void __udivmoddi4(void);
+void __udivsi3(void);
+void __umoddi3(void);
+void __umodsi3(void);
+void __unorddf2(void);
+void __unordsf2(void);
 /* clang-format on */
 /* clang-format on */
 
 
 static SymbolMap target_sym_map[] = {
 static SymbolMap target_sym_map[] = {
@@ -255,7 +255,7 @@ get_current_target(char *target_buf, uint32 target_buf_size)
 #undef BUILD_TARGET_ARM_DEFAULT
 #undef BUILD_TARGET_ARM_DEFAULT
 
 
 uint32
 uint32
-get_plt_item_size()
+get_plt_item_size(void)
 {
 {
     /* 8 bytes instructions and 4 bytes symbol address */
     /* 8 bytes instructions and 4 bytes symbol address */
     return 12;
     return 12;

+ 1 - 1
core/iwasm/aot/arch/aot_reloc_mips.c

@@ -28,7 +28,7 @@ get_current_target(char *target_buf, uint32 target_buf_size)
 }
 }
 
 
 static uint32
 static uint32
-get_plt_item_size()
+get_plt_item_size(void)
 {
 {
     return 0;
     return 0;
 }
 }

+ 53 - 53
core/iwasm/aot/arch/aot_reloc_riscv.c

@@ -49,58 +49,58 @@
 #endif
 #endif
 
 
 /* clang-format off */
 /* clang-format off */
-void __adddf3();
-void __addsf3();
-void __divdf3();
-void __divdi3();
-void __divsf3();
-void __divsi3();
-void __eqdf2();
-void __eqsf2();
-void __extendsfdf2();
-void __fixdfdi();
-void __fixdfsi();
-void __fixsfdi();
-void __fixsfsi();
-void __fixunsdfdi();
-void __fixunsdfsi();
-void __fixunssfdi();
-void __fixunssfsi();
-void __floatdidf();
-void __floatdisf();
-void __floatsidf();
-void __floatsisf();
-void __floatundidf();
-void __floatundisf();
-void __floatunsidf();
-void __floatunsisf();
-void __gedf2();
-void __gesf2();
-void __gtdf2();
-void __gtsf2();
-void __ledf2();
-void __lesf2();
-void __ltdf2();
-void __ltsf2();
-void __moddi3();
-void __modsi3();
-void __muldf3();
-void __muldi3();
-void __mulsf3();
-void __mulsi3();
-void __nedf2();
-void __negdf2();
-void __negsf2();
-void __nesf2();
-void __subdf3();
-void __subsf3();
-void __truncdfsf2();
-void __udivdi3();
-void __udivsi3();
-void __umoddi3();
-void __umodsi3();
-void __unorddf2();
-void __unordsf2();
+void __adddf3(void);
+void __addsf3(void);
+void __divdf3(void);
+void __divdi3(void);
+void __divsf3(void);
+void __divsi3(void);
+void __eqdf2(void);
+void __eqsf2(void);
+void __extendsfdf2(void);
+void __fixdfdi(void);
+void __fixdfsi(void);
+void __fixsfdi(void);
+void __fixsfsi(void);
+void __fixunsdfdi(void);
+void __fixunsdfsi(void);
+void __fixunssfdi(void);
+void __fixunssfsi(void);
+void __floatdidf(void);
+void __floatdisf(void);
+void __floatsidf(void);
+void __floatsisf(void);
+void __floatundidf(void);
+void __floatundisf(void);
+void __floatunsidf(void);
+void __floatunsisf(void);
+void __gedf2(void);
+void __gesf2(void);
+void __gtdf2(void);
+void __gtsf2(void);
+void __ledf2(void);
+void __lesf2(void);
+void __ltdf2(void);
+void __ltsf2(void);
+void __moddi3(void);
+void __modsi3(void);
+void __muldf3(void);
+void __muldi3(void);
+void __mulsf3(void);
+void __mulsi3(void);
+void __nedf2(void);
+void __negdf2(void);
+void __negsf2(void);
+void __nesf2(void);
+void __subdf3(void);
+void __subsf3(void);
+void __truncdfsf2(void);
+void __udivdi3(void);
+void __udivsi3(void);
+void __umoddi3(void);
+void __umodsi3(void);
+void __unorddf2(void);
+void __unordsf2(void);
 /* clang-format on */
 /* clang-format on */
 
 
 static SymbolMap target_sym_map[] = {
 static SymbolMap target_sym_map[] = {
@@ -193,7 +193,7 @@ get_current_target(char *target_buf, uint32 target_buf_size)
 }
 }
 
 
 uint32
 uint32
-get_plt_item_size()
+get_plt_item_size(void)
 {
 {
 #if __riscv_xlen == 64
 #if __riscv_xlen == 64
     /* auipc + ld + jalr + nop + addr */
     /* auipc + ld + jalr + nop + addr */

+ 97 - 97
core/iwasm/aot/arch/aot_reloc_thumb.c

@@ -14,102 +14,102 @@
 #define R_ARM_THM_MOVT_PREL 50
 #define R_ARM_THM_MOVT_PREL 50
 
 
 /* clang-format off */
 /* clang-format off */
-void __adddf3();
-void __addsf3();
-void __aeabi_d2f();
-void __aeabi_d2iz();
-void __aeabi_d2lz();
-void __aeabi_d2uiz();
-void __aeabi_d2ulz();
-void __aeabi_dadd();
-void __aeabi_dcmpeq();
-void __aeabi_dcmpge();
-void __aeabi_dcmpgt();
-void __aeabi_dcmple();
-void __aeabi_dcmplt();
-void __aeabi_dcmpun();
-void __aeabi_ddiv();
-void __aeabi_dmul();
-void __aeabi_dsub();
-void __aeabi_f2d();
-void __aeabi_f2iz();
-void __aeabi_f2lz();
-void __aeabi_f2ulz();
-void __aeabi_fadd();
-void __aeabi_fcmpeq();
-void __aeabi_fcmpge();
-void __aeabi_fcmpgt();
-void __aeabi_fcmple();
-void __aeabi_fcmplt();
-void __aeabi_fcmpun();
-void __aeabi_fdiv();
-void __aeabi_fmul();
-void __aeabi_fsub();
-void __aeabi_i2d();
-void __aeabi_i2f();
-void __aeabi_idiv();
-void __aeabi_idivmod();
-void __aeabi_l2d();
-void __aeabi_l2f();
-void __aeabi_ldivmod();
-void __aeabi_llsl();
-void __aeabi_llsr();
-void __aeabi_lmul();
-void __aeabi_ui2d();
-void __aeabi_ui2f();
-void __aeabi_uidiv();
-void __aeabi_uidivmod();
-void __aeabi_ul2d();
-void __aeabi_ul2f();
-void __aeabi_uldivmod();
-void __ashldi3();
-void __clzsi2();
-void __divdf3();
-void __divdi3();
-void __divsi3();
-void __eqdf2();
-void __eqsf2();
-void __extendsfdf2();
-void __fixdfdi();
-void __fixdfsi();
-void __fixsfdi();
-void __fixunsdfdi();
-void __fixunsdfsi();
-void __fixunssfdi();
-void __floatdidf();
-void __floatdisf();
-void __floatsidf();
-void __floatsisf();
-void __floatundidf();
-void __floatundisf();
-void __floatunsidf();
-void __floatunsisf();
-void __gedf2();
-void __gesf2();
-void __gtdf2();
-void __gtsf2();
-void __ledf2();
-void __lesf2();
-void __lshrdi3();
-void __ltdf2();
-void __ltsf2();
-void __moddi3();
-void __modsi3();
-void __muldf3();
-void __muldi3();
-void __mulsf3();
-void __nedf2();
-void __nesf2();
-void __subdf3();
-void __subsf3();
-void __truncdfsf2();
-void __udivdi3();
-void __udivmoddi4();
-void __udivsi3();
-void __umoddi3();
-void __umodsi3();
-void __unorddf2();
-void __unordsf2();
+void __adddf3(void);
+void __addsf3(void);
+void __aeabi_d2f(void);
+void __aeabi_d2iz(void);
+void __aeabi_d2lz(void);
+void __aeabi_d2uiz(void);
+void __aeabi_d2ulz(void);
+void __aeabi_dadd(void);
+void __aeabi_dcmpeq(void);
+void __aeabi_dcmpge(void);
+void __aeabi_dcmpgt(void);
+void __aeabi_dcmple(void);
+void __aeabi_dcmplt(void);
+void __aeabi_dcmpun(void);
+void __aeabi_ddiv(void);
+void __aeabi_dmul(void);
+void __aeabi_dsub(void);
+void __aeabi_f2d(void);
+void __aeabi_f2iz(void);
+void __aeabi_f2lz(void);
+void __aeabi_f2ulz(void);
+void __aeabi_fadd(void);
+void __aeabi_fcmpeq(void);
+void __aeabi_fcmpge(void);
+void __aeabi_fcmpgt(void);
+void __aeabi_fcmple(void);
+void __aeabi_fcmplt(void);
+void __aeabi_fcmpun(void);
+void __aeabi_fdiv(void);
+void __aeabi_fmul(void);
+void __aeabi_fsub(void);
+void __aeabi_i2d(void);
+void __aeabi_i2f(void);
+void __aeabi_idiv(void);
+void __aeabi_idivmod(void);
+void __aeabi_l2d(void);
+void __aeabi_l2f(void);
+void __aeabi_ldivmod(void);
+void __aeabi_llsl(void);
+void __aeabi_llsr(void);
+void __aeabi_lmul(void);
+void __aeabi_ui2d(void);
+void __aeabi_ui2f(void);
+void __aeabi_uidiv(void);
+void __aeabi_uidivmod(void);
+void __aeabi_ul2d(void);
+void __aeabi_ul2f(void);
+void __aeabi_uldivmod(void);
+void __ashldi3(void);
+void __clzsi2(void);
+void __divdf3(void);
+void __divdi3(void);
+void __divsi3(void);
+void __eqdf2(void);
+void __eqsf2(void);
+void __extendsfdf2(void);
+void __fixdfdi(void);
+void __fixdfsi(void);
+void __fixsfdi(void);
+void __fixunsdfdi(void);
+void __fixunsdfsi(void);
+void __fixunssfdi(void);
+void __floatdidf(void);
+void __floatdisf(void);
+void __floatsidf(void);
+void __floatsisf(void);
+void __floatundidf(void);
+void __floatundisf(void);
+void __floatunsidf(void);
+void __floatunsisf(void);
+void __gedf2(void);
+void __gesf2(void);
+void __gtdf2(void);
+void __gtsf2(void);
+void __ledf2(void);
+void __lesf2(void);
+void __lshrdi3(void);
+void __ltdf2(void);
+void __ltsf2(void);
+void __moddi3(void);
+void __modsi3(void);
+void __muldf3(void);
+void __muldi3(void);
+void __mulsf3(void);
+void __nedf2(void);
+void __nesf2(void);
+void __subdf3(void);
+void __subsf3(void);
+void __truncdfsf2(void);
+void __udivdi3(void);
+void __udivmoddi4(void);
+void __udivsi3(void);
+void __umoddi3(void);
+void __umodsi3(void);
+void __unorddf2(void);
+void __unordsf2(void);
 /* clang-format on */
 /* clang-format on */
 
 
 static SymbolMap target_sym_map[] = {
 static SymbolMap target_sym_map[] = {
@@ -259,7 +259,7 @@ get_current_target(char *target_buf, uint32 target_buf_size)
 #undef BUILD_TARGET_THUMB_V4T
 #undef BUILD_TARGET_THUMB_V4T
 
 
 uint32
 uint32
-get_plt_item_size()
+get_plt_item_size(void)
 {
 {
     /* 16 bytes instructions and 4 bytes symbol address */
     /* 16 bytes instructions and 4 bytes symbol address */
     return 20;
     return 20;

+ 1 - 1
core/iwasm/aot/arch/aot_reloc_x86_64.c

@@ -58,7 +58,7 @@ get_current_target(char *target_buf, uint32 target_buf_size)
 }
 }
 
 
 static uint32
 static uint32
-get_plt_item_size()
+get_plt_item_size(void)
 {
 {
     /* size of mov instruction and jmp instruction */
     /* size of mov instruction and jmp instruction */
     return 12;
     return 12;

+ 37 - 37
core/iwasm/aot/arch/aot_reloc_xtensa.c

@@ -10,44 +10,44 @@
 
 
 /* clang-format off */
 /* clang-format off */
 /* for soft-float */
 /* for soft-float */
-void __floatsidf();
-void __divdf3();
-void __ltdf2();
+void __floatsidf(void);
+void __divdf3(void);
+void __ltdf2(void);
 
 
 /* for mul32 */
 /* for mul32 */
-void __mulsi3();
-void __muldi3();
-
-void __modsi3();
-
-void __divdi3();
-
-void __udivdi3();
-void __unorddf2();
-void __adddf3();
-void __eqdf2();
-void __muldf3();
-void __gedf2();
-void __ledf2();
-void __fixunsdfsi();
-void __floatunsidf();
-void __subdf3();
-void __nedf2();
-void __fixdfsi();
-void __moddi3();
-void __extendsfdf2();
-void __truncdfsf2();
-void __gtdf2();
-void __umoddi3();
-void __floatdidf();
-void __divsf3();
-void __fixdfdi();
-void __floatundidf();
-void __fixsfdi();
-void __fixunssfdi();
-void __fixunsdfdi();
-void __floatdisf();
-void __floatundisf();
+void __mulsi3(void);
+void __muldi3(void);
+
+void __modsi3(void);
+
+void __divdi3(void);
+
+void __udivdi3(void);
+void __unorddf2(void);
+void __adddf3(void);
+void __eqdf2(void);
+void __muldf3(void);
+void __gedf2(void);
+void __ledf2(void);
+void __fixunsdfsi(void);
+void __floatunsidf(void);
+void __subdf3(void);
+void __nedf2(void);
+void __fixdfsi(void);
+void __moddi3(void);
+void __extendsfdf2(void);
+void __truncdfsf2(void);
+void __gtdf2(void);
+void __umoddi3(void);
+void __floatdidf(void);
+void __divsf3(void);
+void __fixdfdi(void);
+void __floatundidf(void);
+void __fixsfdi(void);
+void __fixunssfdi(void);
+void __fixunsdfdi(void);
+void __floatdisf(void);
+void __floatundisf(void);
 
 
 
 
 static SymbolMap target_sym_map[] = {
 static SymbolMap target_sym_map[] = {
@@ -119,7 +119,7 @@ get_current_target(char *target_buf, uint32 target_buf_size)
 }
 }
 
 
 static uint32
 static uint32
-get_plt_item_size()
+get_plt_item_size(void)
 {
 {
     return 0;
     return 0;
 }
 }

+ 5 - 5
core/iwasm/aot/debug/jit_debug.c

@@ -69,10 +69,10 @@ typedef struct JITDescriptor {
  * and inline assembler statement inside.
  * and inline assembler statement inside.
  */
  */
 void attribute_noinline
 void attribute_noinline
-__jit_debug_register_code();
+__jit_debug_register_code(void);
 
 
 void attribute_noinline
 void attribute_noinline
-__jit_debug_register_code()
+__jit_debug_register_code(void)
 {
 {
     int x;
     int x;
     *(char *)&x = '\0';
     *(char *)&x = '\0';
@@ -96,7 +96,7 @@ extern JITDescriptor __jit_debug_descriptor;
  * This gives the debugger an easy way to inject custom code to
  * This gives the debugger an easy way to inject custom code to
  * handle the events.
  * handle the events.
  */
  */
-void (*__jit_debug_register_code_ptr)() = __jit_debug_register_code;
+void (*__jit_debug_register_code_ptr)(void) = __jit_debug_register_code;
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
@@ -171,7 +171,7 @@ DestroyJITCodeEntryInternal(JITCodeEntry *entry)
 }
 }
 
 
 bool
 bool
-jit_debug_engine_init()
+jit_debug_engine_init(void)
 {
 {
     if (jit_debug_engine) {
     if (jit_debug_engine) {
         return true;
         return true;
@@ -194,7 +194,7 @@ jit_debug_engine_init()
 }
 }
 
 
 void
 void
-jit_debug_engine_destroy()
+jit_debug_engine_destroy(void)
 {
 {
     if (jit_debug_engine) {
     if (jit_debug_engine) {
         WASMJITEntryNode *node, *node_next;
         WASMJITEntryNode *node, *node_next;

+ 2 - 2
core/iwasm/aot/debug/jit_debug.h

@@ -11,10 +11,10 @@ extern "C" {
 #endif
 #endif
 
 
 bool
 bool
-jit_debug_engine_init();
+jit_debug_engine_init(void);
 
 
 void
 void
-jit_debug_engine_destroy();
+jit_debug_engine_destroy(void);
 
 
 bool
 bool
 jit_code_entry_create(const uint8 *symfile_addr, uint64 symfile_size);
 jit_code_entry_create(const uint8 *symfile_addr, uint64 symfile_size);

+ 2 - 2
core/iwasm/common/wasm_memory.c

@@ -159,7 +159,7 @@ wasm_runtime_memory_init(mem_alloc_type_t mem_alloc_type,
 }
 }
 
 
 void
 void
-wasm_runtime_memory_destroy()
+wasm_runtime_memory_destroy(void)
 {
 {
     if (memory_mode == MEMORY_MODE_POOL) {
     if (memory_mode == MEMORY_MODE_POOL) {
 #if BH_ENABLE_GC_VERIFY == 0
 #if BH_ENABLE_GC_VERIFY == 0
@@ -176,7 +176,7 @@ wasm_runtime_memory_destroy()
 }
 }
 
 
 unsigned
 unsigned
-wasm_runtime_memory_pool_size()
+wasm_runtime_memory_pool_size(void)
 {
 {
     if (memory_mode == MEMORY_MODE_POOL)
     if (memory_mode == MEMORY_MODE_POOL)
         return global_pool_size;
         return global_pool_size;

+ 2 - 2
core/iwasm/common/wasm_memory.h

@@ -46,10 +46,10 @@ wasm_runtime_memory_init(mem_alloc_type_t mem_alloc_type,
                          const MemAllocOption *alloc_option);
                          const MemAllocOption *alloc_option);
 
 
 void
 void
-wasm_runtime_memory_destroy();
+wasm_runtime_memory_destroy(void);
 
 
 unsigned
 unsigned
-wasm_runtime_memory_pool_size();
+wasm_runtime_memory_pool_size(void);
 
 
 void
 void
 wasm_runtime_set_mem_bound_check_bytes(WASMMemoryInstance *memory,
 wasm_runtime_set_mem_bound_check_bytes(WASMMemoryInstance *memory,

+ 2 - 2
core/iwasm/common/wasm_native.c

@@ -469,7 +469,7 @@ wasi_context_dtor(WASMModuleInstanceCommon *inst, void *ctx)
 
 
 #if WASM_ENABLE_QUICK_AOT_ENTRY != 0
 #if WASM_ENABLE_QUICK_AOT_ENTRY != 0
 static bool
 static bool
-quick_aot_entry_init();
+quick_aot_entry_init(void);
 #endif
 #endif
 
 
 bool
 bool
@@ -1461,7 +1461,7 @@ quick_aot_entry_cmp(const void *quick_aot_entry1, const void *quick_aot_entry2)
 }
 }
 
 
 static bool
 static bool
-quick_aot_entry_init()
+quick_aot_entry_init(void)
 {
 {
     qsort(quick_aot_entries, sizeof(quick_aot_entries) / sizeof(QuickAOTEntry),
     qsort(quick_aot_entries, sizeof(quick_aot_entries) / sizeof(QuickAOTEntry),
           sizeof(QuickAOTEntry), quick_aot_entry_cmp);
           sizeof(QuickAOTEntry), quick_aot_entry_cmp);

+ 2 - 2
core/iwasm/common/wasm_native.h

@@ -100,10 +100,10 @@ wasm_native_inherit_contexts(struct WASMModuleInstanceCommon *child,
 #endif /* WASM_ENABLE_MODULE_INST_CONTEXT */
 #endif /* WASM_ENABLE_MODULE_INST_CONTEXT */
 
 
 bool
 bool
-wasm_native_init();
+wasm_native_init(void);
 
 
 void
 void
-wasm_native_destroy();
+wasm_native_destroy(void);
 
 
 #if WASM_ENABLE_QUICK_AOT_ENTRY != 0
 #if WASM_ENABLE_QUICK_AOT_ENTRY != 0
 void *
 void *

+ 9 - 9
core/iwasm/common/wasm_runtime_common.c

@@ -86,7 +86,7 @@ static bh_list registered_module_list_head;
 static bh_list *const registered_module_list = &registered_module_list_head;
 static bh_list *const registered_module_list = &registered_module_list_head;
 static korp_mutex registered_module_list_lock;
 static korp_mutex registered_module_list_lock;
 static void
 static void
-wasm_runtime_destroy_registered_module_list();
+wasm_runtime_destroy_registered_module_list(void);
 #endif /* WASM_ENABLE_MULTI_MODULE */
 #endif /* WASM_ENABLE_MULTI_MODULE */
 
 
 #define E_TYPE_XIP 4
 #define E_TYPE_XIP 4
@@ -97,11 +97,11 @@ val_type_to_val_kind(uint8 value_type);
 #if WASM_ENABLE_GC == 0 && WASM_ENABLE_REF_TYPES != 0
 #if WASM_ENABLE_GC == 0 && WASM_ENABLE_REF_TYPES != 0
 /* Initialize externref hashmap */
 /* Initialize externref hashmap */
 static bool
 static bool
-wasm_externref_map_init();
+wasm_externref_map_init(void);
 
 
 /* Destroy externref hashmap */
 /* Destroy externref hashmap */
 static void
 static void
-wasm_externref_map_destroy();
+wasm_externref_map_destroy(void);
 #endif /* end of WASM_ENABLE_GC == 0 && WASM_ENABLE_REF_TYPES != 0 */
 #endif /* end of WASM_ENABLE_GC == 0 && WASM_ENABLE_REF_TYPES != 0 */
 
 
 static void
 static void
@@ -438,7 +438,7 @@ wasm_runtime_get_exec_env_tls()
 #endif /* end of OS_ENABLE_HW_BOUND_CHECK */
 #endif /* end of OS_ENABLE_HW_BOUND_CHECK */
 
 
 static bool
 static bool
-wasm_runtime_env_init()
+wasm_runtime_env_init(void)
 {
 {
     if (bh_platform_init() != 0)
     if (bh_platform_init() != 0)
         return false;
         return false;
@@ -584,7 +584,7 @@ static korp_mutex runtime_lock = OS_THREAD_MUTEX_INITIALIZER;
 static int32 runtime_ref_count = 0;
 static int32 runtime_ref_count = 0;
 
 
 static bool
 static bool
-wasm_runtime_init_internal()
+wasm_runtime_init_internal(void)
 {
 {
     if (!wasm_runtime_memory_init(Alloc_With_System_Allocator, NULL))
     if (!wasm_runtime_memory_init(Alloc_With_System_Allocator, NULL))
         return false;
         return false;
@@ -622,7 +622,7 @@ wasm_runtime_init()
 }
 }
 
 
 static void
 static void
-wasm_runtime_destroy_internal()
+wasm_runtime_destroy_internal(void)
 {
 {
 #if WASM_ENABLE_GC == 0 && WASM_ENABLE_REF_TYPES != 0
 #if WASM_ENABLE_GC == 0 && WASM_ENABLE_REF_TYPES != 0
     wasm_externref_map_destroy();
     wasm_externref_map_destroy();
@@ -4747,7 +4747,7 @@ fail:
     || defined(BUILD_TARGET_RISCV32_ILP32D)                          \
     || defined(BUILD_TARGET_RISCV32_ILP32D)                          \
     || defined(BUILD_TARGET_RISCV32_ILP32F)                          \
     || defined(BUILD_TARGET_RISCV32_ILP32F)                          \
     || defined(BUILD_TARGET_RISCV32_ILP32) || defined(BUILD_TARGET_ARC)
     || defined(BUILD_TARGET_RISCV32_ILP32) || defined(BUILD_TARGET_ARC)
-typedef void (*GenericFunctionPointer)();
+typedef void (*GenericFunctionPointer)(void);
 void
 void
 invokeNative(GenericFunctionPointer f, uint32 *args, uint32 n_stacks);
 invokeNative(GenericFunctionPointer f, uint32 *args, uint32 n_stacks);
 
 
@@ -5312,7 +5312,7 @@ fail:
 #if defined(BUILD_TARGET_X86_32) || defined(BUILD_TARGET_ARM)    \
 #if defined(BUILD_TARGET_X86_32) || defined(BUILD_TARGET_ARM)    \
     || defined(BUILD_TARGET_THUMB) || defined(BUILD_TARGET_MIPS) \
     || defined(BUILD_TARGET_THUMB) || defined(BUILD_TARGET_MIPS) \
     || defined(BUILD_TARGET_XTENSA)
     || defined(BUILD_TARGET_XTENSA)
-typedef void (*GenericFunctionPointer)();
+typedef void (*GenericFunctionPointer)(void);
 void
 void
 invokeNative(GenericFunctionPointer f, uint32 *args, uint32 sz);
 invokeNative(GenericFunctionPointer f, uint32 *args, uint32 sz);
 
 
@@ -5597,7 +5597,7 @@ typedef uint32x4_t __m128i;
 
 
 #endif /* end of WASM_ENABLE_SIMD != 0 */
 #endif /* end of WASM_ENABLE_SIMD != 0 */
 
 
-typedef void (*GenericFunctionPointer)();
+typedef void (*GenericFunctionPointer)(void);
 void
 void
 invokeNative(GenericFunctionPointer f, uint64 *args, uint64 n_stacks);
 invokeNative(GenericFunctionPointer f, uint64 *args, uint64 n_stacks);
 
 

+ 4 - 4
core/iwasm/common/wasm_runtime_common.h

@@ -852,10 +852,10 @@ wasm_runtime_set_module_reader(const module_reader reader,
                                const module_destroyer destroyer);
                                const module_destroyer destroyer);
 
 
 module_reader
 module_reader
-wasm_runtime_get_module_reader();
+wasm_runtime_get_module_reader(void);
 
 
 module_destroyer
 module_destroyer
-wasm_runtime_get_module_destroyer();
+wasm_runtime_get_module_destroyer(void);
 
 
 bool
 bool
 wasm_runtime_register_module_internal(const char *module_name,
 wasm_runtime_register_module_internal(const char *module_name,
@@ -881,7 +881,7 @@ bool
 wasm_runtime_is_loading_module(const char *module_name);
 wasm_runtime_is_loading_module(const char *module_name);
 
 
 void
 void
-wasm_runtime_destroy_loading_module_list();
+wasm_runtime_destroy_loading_module_list(void);
 
 
 WASMModuleCommon *
 WASMModuleCommon *
 wasm_runtime_search_sub_module(const WASMModuleCommon *parent_module,
 wasm_runtime_search_sub_module(const WASMModuleCommon *parent_module,
@@ -1168,7 +1168,7 @@ wasm_runtime_quick_invoke_c_api_native(WASMModuleInstanceCommon *module_inst,
                                        uint32 result_count);
                                        uint32 result_count);
 
 
 void
 void
-wasm_runtime_show_app_heap_corrupted_prompt();
+wasm_runtime_show_app_heap_corrupted_prompt(void);
 
 
 #if WASM_ENABLE_LOAD_CUSTOM_SECTION != 0
 #if WASM_ENABLE_LOAD_CUSTOM_SECTION != 0
 void
 void

+ 2 - 2
core/iwasm/common/wasm_shared_memory.h

@@ -17,10 +17,10 @@ extern "C" {
 extern korp_mutex g_shared_memory_lock;
 extern korp_mutex g_shared_memory_lock;
 
 
 bool
 bool
-wasm_shared_memory_init();
+wasm_shared_memory_init(void);
 
 
 void
 void
-wasm_shared_memory_destroy();
+wasm_shared_memory_destroy(void);
 
 
 uint16
 uint16
 shared_memory_inc_reference(WASMMemoryInstance *memory);
 shared_memory_inc_reference(WASMMemoryInstance *memory);

+ 1 - 1
core/iwasm/compilation/aot.h

@@ -312,7 +312,7 @@ void
 aot_destroy_comp_data(AOTCompData *comp_data);
 aot_destroy_comp_data(AOTCompData *comp_data);
 
 
 char *
 char *
-aot_get_last_error();
+aot_get_last_error(void);
 
 
 void
 void
 aot_set_last_error(const char *error);
 aot_set_last_error(const char *error);

+ 91 - 31
core/iwasm/compilation/aot_compiler.c

@@ -16,6 +16,7 @@
 #include "aot_emit_parametric.h"
 #include "aot_emit_parametric.h"
 #include "aot_emit_table.h"
 #include "aot_emit_table.h"
 #include "aot_emit_gc.h"
 #include "aot_emit_gc.h"
+#include "aot_stack_frame_comp.h"
 #include "simd/simd_access_lanes.h"
 #include "simd/simd_access_lanes.h"
 #include "simd/simd_bitmask_extracts.h"
 #include "simd/simd_bitmask_extracts.h"
 #include "simd/simd_bit_shifts.h"
 #include "simd/simd_bit_shifts.h"
@@ -253,6 +254,13 @@ store_value(AOTCompContext *comp_ctx, LLVMValueRef value, uint8 value_type,
     return true;
     return true;
 }
 }
 
 
+void
+aot_call_stack_features_init_default(AOTCallStackFeatures *features)
+{
+    memset(features, 1, sizeof(AOTCallStackFeatures));
+    features->frame_per_function = false;
+}
+
 bool
 bool
 aot_frame_store_value(AOTCompContext *comp_ctx, LLVMValueRef value,
 aot_frame_store_value(AOTCompContext *comp_ctx, LLVMValueRef value,
                       uint8 value_type, LLVMValueRef cur_frame, uint32 offset)
                       uint8 value_type, LLVMValueRef cur_frame, uint32 offset)
@@ -337,6 +345,10 @@ aot_gen_commit_values(AOTCompFrame *frame)
     LLVMValueRef value;
     LLVMValueRef value;
     uint32 n;
     uint32 n;
 
 
+    if (!frame->comp_ctx->call_stack_features.values) {
+        return true;
+    }
+
     /* First, commit reference flags
     /* First, commit reference flags
      * For LLVM JIT, iterate all local and stack ref flags
      * For LLVM JIT, iterate all local and stack ref flags
      * For AOT, ignore local(params + locals) ref flags */
      * For AOT, ignore local(params + locals) ref flags */
@@ -569,6 +581,64 @@ aot_gen_commit_values(AOTCompFrame *frame)
     return true;
     return true;
 }
 }
 
 
+static bool
+aot_standard_frame_gen_commit_ip(AOTCompContext *comp_ctx,
+                                 AOTFuncContext *func_ctx,
+                                 LLVMValueRef ip_value, bool is_64bit)
+{
+    LLVMValueRef cur_frame = func_ctx->cur_frame;
+    LLVMValueRef value_offset, value_addr, value_ptr;
+    uint32 offset_ip;
+
+    if (!comp_ctx->is_jit_mode)
+        offset_ip = comp_ctx->pointer_size * 4;
+    else
+        offset_ip = offsetof(WASMInterpFrame, ip);
+
+    if (!(value_offset = I32_CONST(offset_ip))) {
+        aot_set_last_error("llvm build const failed");
+        return false;
+    }
+
+    if (!(value_addr =
+              LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, cur_frame,
+                                    &value_offset, 1, "ip_addr"))) {
+        aot_set_last_error("llvm build in bounds gep failed");
+        return false;
+    }
+
+    if (!(value_ptr = LLVMBuildBitCast(
+              comp_ctx->builder, value_addr,
+              is_64bit ? INT64_PTR_TYPE : INT32_PTR_TYPE, "ip_ptr"))) {
+        aot_set_last_error("llvm build bit cast failed");
+        return false;
+    }
+
+    if (!LLVMBuildStore(comp_ctx->builder, ip_value, value_ptr)) {
+        aot_set_last_error("llvm build store failed");
+        return false;
+    }
+
+    return true;
+}
+
+bool
+aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+                  LLVMValueRef ip_value, bool is_64bit)
+{
+    switch (comp_ctx->aux_stack_frame_type) {
+        case AOT_STACK_FRAME_TYPE_STANDARD:
+            return aot_standard_frame_gen_commit_ip(comp_ctx, func_ctx,
+                                                    ip_value, is_64bit);
+        case AOT_STACK_FRAME_TYPE_TINY:
+            return aot_tiny_frame_gen_commit_ip(comp_ctx, func_ctx, ip_value);
+        default:
+            aot_set_last_error(
+                "unsupported mode when generating commit_ip code");
+            return false;
+    }
+}
+
 bool
 bool
 aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip)
 aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip)
 {
 {
@@ -577,40 +647,19 @@ aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip)
     LLVMValueRef cur_frame = func_ctx->cur_frame;
     LLVMValueRef cur_frame = func_ctx->cur_frame;
     LLVMValueRef value_offset, value_addr, value_ptr, value;
     LLVMValueRef value_offset, value_addr, value_ptr, value;
     LLVMTypeRef int8_ptr_ptr_type;
     LLVMTypeRef int8_ptr_ptr_type;
-    uint32 offset_ip, offset_sp, n;
+    uint32 offset_sp, n;
     bool is_64bit = (comp_ctx->pointer_size == sizeof(uint64)) ? true : false;
     bool is_64bit = (comp_ctx->pointer_size == sizeof(uint64)) ? true : false;
     const AOTValueSlot *sp = frame->sp;
     const AOTValueSlot *sp = frame->sp;
     const uint8 *ip = frame->frame_ip;
     const uint8 *ip = frame->frame_ip;
 
 
     if (!comp_ctx->is_jit_mode) {
     if (!comp_ctx->is_jit_mode) {
-        offset_ip = frame->comp_ctx->pointer_size * 4;
         offset_sp = frame->comp_ctx->pointer_size * 5;
         offset_sp = frame->comp_ctx->pointer_size * 5;
     }
     }
     else {
     else {
-        offset_ip = offsetof(WASMInterpFrame, ip);
         offset_sp = offsetof(WASMInterpFrame, sp);
         offset_sp = offsetof(WASMInterpFrame, sp);
     }
     }
 
 
-    if (commit_ip) {
-        if (!(value_offset = I32_CONST(offset_ip))) {
-            aot_set_last_error("llvm build const failed");
-            return false;
-        }
-
-        if (!(value_addr =
-                  LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, cur_frame,
-                                        &value_offset, 1, "ip_addr"))) {
-            aot_set_last_error("llvm build in bounds gep failed");
-            return false;
-        }
-
-        if (!(value_ptr = LLVMBuildBitCast(
-                  comp_ctx->builder, value_addr,
-                  is_64bit ? INT64_PTR_TYPE : INT32_PTR_TYPE, "ip_ptr"))) {
-            aot_set_last_error("llvm build bit cast failed");
-            return false;
-        }
-
+    if (commit_ip && comp_ctx->call_stack_features.ip) {
         if (!comp_ctx->is_jit_mode) {
         if (!comp_ctx->is_jit_mode) {
             WASMModule *module = comp_ctx->comp_data->wasm_module;
             WASMModule *module = comp_ctx->comp_data->wasm_module;
             if (is_64bit)
             if (is_64bit)
@@ -630,13 +679,12 @@ aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip)
             return false;
             return false;
         }
         }
 
 
-        if (!LLVMBuildStore(comp_ctx->builder, value, value_ptr)) {
-            aot_set_last_error("llvm build store failed");
+        if (!aot_gen_commit_ip(comp_ctx, func_ctx, value, is_64bit)) {
             return false;
             return false;
         }
         }
     }
     }
 
 
-    if (commit_sp) {
+    if (commit_sp && comp_ctx->call_stack_features.values) {
         n = (uint32)(sp - frame->lp);
         n = (uint32)(sp - frame->lp);
         value = I32_CONST(offset_of_local(comp_ctx, n));
         value = I32_CONST(offset_of_local(comp_ctx, n));
         if (!value) {
         if (!value) {
@@ -940,6 +988,7 @@ static bool
 aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
 aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
 {
 {
     AOTFuncContext *func_ctx = comp_ctx->func_ctxes[func_index];
     AOTFuncContext *func_ctx = comp_ctx->func_ctxes[func_index];
+    LLVMValueRef func_index_ref;
     uint8 *frame_ip = func_ctx->aot_func->code, opcode, *p_f32, *p_f64;
     uint8 *frame_ip = func_ctx->aot_func->code, opcode, *p_f32, *p_f64;
     uint8 *frame_ip_end = frame_ip + func_ctx->aot_func->code_size;
     uint8 *frame_ip_end = frame_ip + func_ctx->aot_func->code_size;
     uint8 *param_types = NULL;
     uint8 *param_types = NULL;
@@ -962,16 +1011,27 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
     LLVMMetadataRef location;
     LLVMMetadataRef location;
 #endif
 #endif
 
 
-    if (comp_ctx->enable_aux_stack_frame) {
+    /* Start to translate the opcodes */
+    LLVMPositionBuilderAtEnd(
+        comp_ctx->builder,
+        func_ctx->block_stack.block_list_head->llvm_entry_block);
+
+    if (comp_ctx->aux_stack_frame_type
+        && comp_ctx->call_stack_features.frame_per_function) {
+        INT_CONST(func_index_ref,
+                  func_index + comp_ctx->comp_data->import_func_count, I32_TYPE,
+                  true);
+        if (!aot_alloc_frame_per_function_frame_for_aot_func(comp_ctx, func_ctx,
+                                                             func_index_ref)) {
+            return false;
+        }
+    }
+    if (comp_ctx->aux_stack_frame_type) {
         if (!init_comp_frame(comp_ctx, func_ctx, func_index)) {
         if (!init_comp_frame(comp_ctx, func_ctx, func_index)) {
             return false;
             return false;
         }
         }
     }
     }
 
 
-    /* Start to translate the opcodes */
-    LLVMPositionBuilderAtEnd(
-        comp_ctx->builder,
-        func_ctx->block_stack.block_list_head->llvm_entry_block);
     while (frame_ip < frame_ip_end) {
     while (frame_ip < frame_ip_end) {
         opcode = *frame_ip++;
         opcode = *frame_ip++;
 
 

+ 18 - 0
core/iwasm/compilation/aot_compiler.h

@@ -195,6 +195,15 @@ aot_gen_commit_values(AOTCompFrame *frame);
 bool
 bool
 aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip);
 aot_gen_commit_sp_ip(AOTCompFrame *frame, bool commit_sp, bool commit_ip);
 
 
+/**
+ * Generate instructions to commit IP pointer to the frame.
+ *
+ * @param frame the frame information
+ */
+bool
+aot_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+                  LLVMValueRef ip_value, bool is_64bit);
+
 bool
 bool
 aot_frame_store_value(AOTCompContext *comp_ctx, LLVMValueRef value,
 aot_frame_store_value(AOTCompContext *comp_ctx, LLVMValueRef value,
                       uint8 value_type, LLVMValueRef cur_frame, uint32 offset);
                       uint8 value_type, LLVMValueRef cur_frame, uint32 offset);
@@ -652,6 +661,15 @@ set_local_gc_ref(AOTCompFrame *frame, int n, LLVMValueRef value, uint8 ref_type)
 #define F64_CONST(v) LLVMConstReal(F64_TYPE, v)
 #define F64_CONST(v) LLVMConstReal(F64_TYPE, v)
 #define I8_CONST(v) LLVMConstInt(INT8_TYPE, v, true)
 #define I8_CONST(v) LLVMConstInt(INT8_TYPE, v, true)
 
 
+#define INT_CONST(variable, value, type, is_signed)        \
+    do {                                                   \
+        variable = LLVMConstInt(type, value, is_signed);   \
+        if (!variable) {                                   \
+            aot_set_last_error("llvm build const failed"); \
+            return false;                                  \
+        }                                                  \
+    } while (0)
+
 #define LLVM_CONST(name) (comp_ctx->llvm_consts.name)
 #define LLVM_CONST(name) (comp_ctx->llvm_consts.name)
 #define I1_ZERO LLVM_CONST(i1_zero)
 #define I1_ZERO LLVM_CONST(i1_zero)
 #define I1_ONE LLVM_CONST(i1_one)
 #define I1_ONE LLVM_CONST(i1_one)

+ 6 - 0
core/iwasm/compilation/aot_emit_aot_file.c

@@ -4433,6 +4433,12 @@ aot_obj_data_create(AOTCompContext *comp_ctx)
     if (comp_ctx->enable_gc) {
     if (comp_ctx->enable_gc) {
         obj_data->target_info.feature_flags |= WASM_FEATURE_GARBAGE_COLLECTION;
         obj_data->target_info.feature_flags |= WASM_FEATURE_GARBAGE_COLLECTION;
     }
     }
+    if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_TINY) {
+        obj_data->target_info.feature_flags |= WASM_FEATURE_TINY_STACK_FRAME;
+    }
+    if (comp_ctx->call_stack_features.frame_per_function) {
+        obj_data->target_info.feature_flags |= WASM_FEATURE_FRAME_PER_FUNCTION;
+    }
 
 
     bh_print_time("Begin to resolve object file info");
     bh_print_time("Begin to resolve object file info");
 
 

+ 43 - 7
core/iwasm/compilation/aot_emit_control.c

@@ -6,6 +6,7 @@
 #include "aot_emit_control.h"
 #include "aot_emit_control.h"
 #include "aot_compiler.h"
 #include "aot_compiler.h"
 #include "aot_emit_exception.h"
 #include "aot_emit_exception.h"
+#include "aot_stack_frame_comp.h"
 #if WASM_ENABLE_GC != 0
 #if WASM_ENABLE_GC != 0
 #include "aot_emit_gc.h"
 #include "aot_emit_gc.h"
 #endif
 #endif
@@ -38,13 +39,24 @@ format_block_name(char *name, uint32 name_size, uint32 block_index,
         snprintf(name, name_size, "%s", "func_end");
         snprintf(name, name_size, "%s", "func_end");
 }
 }
 
 
-#define CREATE_BLOCK(new_llvm_block, name)                      \
-    do {                                                        \
-        if (!(new_llvm_block = LLVMAppendBasicBlockInContext(   \
-                  comp_ctx->context, func_ctx->func, name))) {  \
-            aot_set_last_error("add LLVM basic block failed."); \
-            goto fail;                                          \
-        }                                                       \
+#define CREATE_BLOCK(new_llvm_block, name)                                   \
+    do {                                                                     \
+        if (!(new_llvm_block = LLVMAppendBasicBlockInContext(                \
+                  comp_ctx->context, func_ctx->func, name))) {               \
+            aot_set_last_error("add LLVM basic block failed.");              \
+            goto fail;                                                       \
+        }                                                                    \
+        if (!strcmp(name, "func_end") && comp_ctx->aux_stack_frame_type      \
+            && comp_ctx->call_stack_features.frame_per_function) {           \
+            LLVMBasicBlockRef cur_block =                                    \
+                LLVMGetInsertBlock(comp_ctx->builder);                       \
+            SET_BUILDER_POS(new_llvm_block);                                 \
+            if (!aot_free_frame_per_function_frame_for_aot_func(comp_ctx,    \
+                                                                func_ctx)) { \
+                goto fail;                                                   \
+            }                                                                \
+            SET_BUILDER_POS(cur_block);                                      \
+        }                                                                    \
     } while (0)
     } while (0)
 
 
 #define CURR_BLOCK() LLVMGetInsertBlock(comp_ctx->builder)
 #define CURR_BLOCK() LLVMGetInsertBlock(comp_ctx->builder)
@@ -93,6 +105,11 @@ format_block_name(char *name, uint32 name_size, uint32 block_index,
                 goto fail;                                                  \
                 goto fail;                                                  \
             }                                                               \
             }                                                               \
             SET_BUILDER_POS(block->llvm_end_block);                         \
             SET_BUILDER_POS(block->llvm_end_block);                         \
+            LLVMValueRef first_instr =                                      \
+                get_first_non_phi(block->llvm_end_block);                   \
+            if (first_instr) {                                              \
+                LLVMPositionBuilderBefore(comp_ctx->builder, first_instr);  \
+            }                                                               \
             for (_i = 0; _i < block->result_count; _i++) {                  \
             for (_i = 0; _i < block->result_count; _i++) {                  \
                 if (!(block->result_phis[_i] = LLVMBuildPhi(                \
                 if (!(block->result_phis[_i] = LLVMBuildPhi(                \
                           comp_ctx->builder,                                \
                           comp_ctx->builder,                                \
@@ -158,6 +175,18 @@ get_target_block(AOTFuncContext *func_ctx, uint32 br_depth)
     return block;
     return block;
 }
 }
 
 
+LLVMValueRef
+get_first_non_phi(LLVMBasicBlockRef block)
+{
+    LLVMValueRef instr = LLVMGetFirstInstruction(block);
+
+    while (instr && LLVMIsAPHINode(instr)) {
+        instr = LLVMGetNextInstruction(instr);
+    }
+
+    return instr;
+}
+
 static void
 static void
 clear_frame_locals(AOTCompFrame *aot_frame)
 clear_frame_locals(AOTCompFrame *aot_frame)
 {
 {
@@ -1361,6 +1390,13 @@ aot_compile_op_return(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         (*p_frame_ip - 1) - comp_ctx->comp_data->wasm_module->buf_code);
         (*p_frame_ip - 1) - comp_ctx->comp_data->wasm_module->buf_code);
 #endif
 #endif
 
 
+    if (comp_ctx->aux_stack_frame_type
+        && comp_ctx->call_stack_features.frame_per_function
+        && !aot_free_frame_per_function_frame_for_aot_func(comp_ctx,
+                                                           func_ctx)) {
+        return false;
+    }
+
     if (block_func->result_count) {
     if (block_func->result_count) {
         /* Store extra result values to function parameters */
         /* Store extra result values to function parameters */
         for (i = 0; i < block_func->result_count - 1; i++) {
         for (i = 0; i < block_func->result_count - 1; i++) {

+ 5 - 44
core/iwasm/compilation/aot_emit_exception.c

@@ -4,49 +4,10 @@
  */
  */
 
 
 #include "aot_emit_exception.h"
 #include "aot_emit_exception.h"
+#include "aot_compiler.h"
 #include "../interpreter/wasm_runtime.h"
 #include "../interpreter/wasm_runtime.h"
 #include "../aot/aot_runtime.h"
 #include "../aot/aot_runtime.h"
 
 
-static bool
-commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
-          LLVMValueRef exce_ip, bool is_64bit)
-{
-    LLVMValueRef cur_frame = func_ctx->cur_frame;
-    LLVMValueRef value_offset, value_addr, value_ptr;
-    uint32 offset_ip;
-
-    if (!comp_ctx->is_jit_mode)
-        offset_ip = comp_ctx->pointer_size * 4;
-    else
-        offset_ip = offsetof(WASMInterpFrame, ip);
-
-    if (!(value_offset = I32_CONST(offset_ip))) {
-        aot_set_last_error("llvm build const failed");
-        return false;
-    }
-
-    if (!(value_addr =
-              LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, cur_frame,
-                                    &value_offset, 1, "ip_addr"))) {
-        aot_set_last_error("llvm build in bounds gep failed");
-        return false;
-    }
-
-    if (!(value_ptr = LLVMBuildBitCast(
-              comp_ctx->builder, value_addr,
-              is_64bit ? INT64_PTR_TYPE : INT32_PTR_TYPE, "ip_ptr"))) {
-        aot_set_last_error("llvm build bit cast failed");
-        return false;
-    }
-
-    if (!LLVMBuildStore(comp_ctx->builder, exce_ip, value_ptr)) {
-        aot_set_last_error("llvm build store failed");
-        return false;
-    }
-
-    return true;
-}
-
 bool
 bool
 aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                    int32 exception_id, bool is_cond_br, LLVMValueRef cond_br_if,
                    int32 exception_id, bool is_cond_br, LLVMValueRef cond_br_if,
@@ -80,7 +41,7 @@ aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
             return false;
             return false;
         }
         }
 
 
-        if (comp_ctx->aot_frame) {
+        if (comp_ctx->aot_frame && comp_ctx->call_stack_features.trap_ip) {
             /* Create exception ip phi */
             /* Create exception ip phi */
             if (!(func_ctx->exception_ip_phi = LLVMBuildPhi(
             if (!(func_ctx->exception_ip_phi = LLVMBuildPhi(
                       comp_ctx->builder, is_64bit ? I64_TYPE : I32_TYPE,
                       comp_ctx->builder, is_64bit ? I64_TYPE : I32_TYPE,
@@ -90,8 +51,8 @@ aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
             }
             }
 
 
             /* Commit ip to current frame */
             /* Commit ip to current frame */
-            if (!commit_ip(comp_ctx, func_ctx, func_ctx->exception_ip_phi,
-                           is_64bit)) {
+            if (!aot_gen_commit_ip(comp_ctx, func_ctx,
+                                   func_ctx->exception_ip_phi, is_64bit)) {
                 return false;
                 return false;
             }
             }
         }
         }
@@ -173,7 +134,7 @@ aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     /* Add phi incoming value to got_exception block */
     /* Add phi incoming value to got_exception block */
     LLVMAddIncoming(func_ctx->exception_id_phi, &exce_id, &block_curr, 1);
     LLVMAddIncoming(func_ctx->exception_id_phi, &exce_id, &block_curr, 1);
 
 
-    if (comp_ctx->aot_frame) {
+    if (comp_ctx->aot_frame && comp_ctx->call_stack_features.trap_ip) {
         const uint8 *ip = comp_ctx->aot_frame->frame_ip;
         const uint8 *ip = comp_ctx->aot_frame->frame_ip;
         LLVMValueRef exce_ip = NULL;
         LLVMValueRef exce_ip = NULL;
 
 

+ 87 - 30
core/iwasm/compilation/aot_emit_function.c

@@ -7,6 +7,7 @@
 #include "aot_emit_exception.h"
 #include "aot_emit_exception.h"
 #include "aot_emit_control.h"
 #include "aot_emit_control.h"
 #include "aot_emit_table.h"
 #include "aot_emit_table.h"
+#include "aot_stack_frame_comp.h"
 #include "../aot/aot_runtime.h"
 #include "../aot/aot_runtime.h"
 #if WASM_ENABLE_GC != 0
 #if WASM_ENABLE_GC != 0
 #include "aot_emit_gc.h"
 #include "aot_emit_gc.h"
@@ -682,24 +683,29 @@ alloc_frame_for_aot_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
 
     new_frame = wasm_stack_top;
     new_frame = wasm_stack_top;
 
 
-    if (!(check_wasm_stack_succ = LLVMAppendBasicBlockInContext(
-              comp_ctx->context, func_ctx->func, "check_wasm_stack_succ"))) {
-        aot_set_last_error("llvm add basic block failed.");
-        return false;
-    }
+    if (comp_ctx->call_stack_features.bounds_checks) {
+        if (!(check_wasm_stack_succ = LLVMAppendBasicBlockInContext(
+                  comp_ctx->context, func_ctx->func,
+                  "check_wasm_stack_succ"))) {
+            aot_set_last_error("llvm add basic block failed.");
+            return false;
+        }
 
 
-    LLVMMoveBasicBlockAfter(check_wasm_stack_succ,
-                            LLVMGetInsertBlock(comp_ctx->builder));
+        LLVMMoveBasicBlockAfter(check_wasm_stack_succ,
+                                LLVMGetInsertBlock(comp_ctx->builder));
 
 
-    if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGT, wasm_stack_top_max,
-                              wasm_stack_top_bound, "cmp"))) {
-        aot_set_last_error("llvm build icmp failed");
-        return false;
-    }
+        if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGT,
+                                  wasm_stack_top_max, wasm_stack_top_bound,
+                                  "cmp"))) {
+            aot_set_last_error("llvm build icmp failed");
+            return false;
+        }
 
 
-    if (!(aot_emit_exception(comp_ctx, func_ctx, EXCE_OPERAND_STACK_OVERFLOW,
-                             true, cmp, check_wasm_stack_succ))) {
-        return false;
+        if (!(aot_emit_exception(comp_ctx, func_ctx,
+                                 EXCE_OPERAND_STACK_OVERFLOW, true, cmp,
+                                 check_wasm_stack_succ))) {
+            return false;
+        }
     }
     }
 
 
 #if WASM_ENABLE_GC != 0
 #if WASM_ENABLE_GC != 0
@@ -1285,6 +1291,10 @@ commit_params_to_frame_of_import_func(AOTCompContext *comp_ctx,
 {
 {
     uint32 i, n;
     uint32 i, n;
 
 
+    if (!comp_ctx->call_stack_features.values) {
+        return true;
+    }
+
     for (i = 0, n = 0; i < func_type->param_count; i++, n++) {
     for (i = 0, n = 0; i < func_type->param_count; i++, n++) {
         switch (func_type->types[i]) {
         switch (func_type->types[i]) {
             case VALUE_TYPE_I32:
             case VALUE_TYPE_I32:
@@ -1394,6 +1404,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     LLVMValueRef *param_values = NULL, value_ret = NULL, func;
     LLVMValueRef *param_values = NULL, value_ret = NULL, func;
     LLVMValueRef import_func_idx, res;
     LLVMValueRef import_func_idx, res;
     LLVMValueRef ext_ret, ext_ret_ptr, ext_ret_idx;
     LLVMValueRef ext_ret, ext_ret_ptr, ext_ret_idx;
+    LLVMValueRef func_idx_ref;
     int32 i, j = 0, param_count, result_count, ext_ret_count;
     int32 i, j = 0, param_count, result_count, ext_ret_count;
     uint64 total_size;
     uint64 total_size;
     uint8 wasm_ret_type;
     uint8 wasm_ret_type;
@@ -1438,12 +1449,28 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
             return false;
             return false;
     }
     }
 
 
-    if (comp_ctx->enable_aux_stack_frame) {
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
-        if (!alloc_frame_for_aot_func(comp_ctx, func_ctx, func_idx))
-            return false;
-#endif
+    if (comp_ctx->aux_stack_frame_type) {
+        if (func_idx < import_func_count
+            && comp_ctx->call_stack_features.frame_per_function) {
+            INT_CONST(func_idx_ref, func_idx, I32_TYPE, true);
+            if (!aot_alloc_frame_per_function_frame_for_aot_func(
+                    comp_ctx, func_ctx, func_idx_ref)) {
+                return false;
+            }
+        }
+        else if (!comp_ctx->call_stack_features.frame_per_function) {
+            if (comp_ctx->aux_stack_frame_type
+                != AOT_STACK_FRAME_TYPE_STANDARD) {
+                aot_set_last_error("unsupported mode");
+                return false;
+            }
+            if (!alloc_frame_for_aot_func(comp_ctx, func_ctx, func_idx)) {
+                return false;
+            }
+        }
     }
     }
+#endif
 
 
     /* Get param cell number */
     /* Get param cell number */
     param_cell_num = func_type->param_cell_num;
     param_cell_num = func_type->param_cell_num;
@@ -1513,7 +1540,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     }
     }
 
 
     if (func_idx < import_func_count) {
     if (func_idx < import_func_count) {
-        if (comp_ctx->enable_aux_stack_frame
+        if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
             && !commit_params_to_frame_of_import_func(
             && !commit_params_to_frame_of_import_func(
                 comp_ctx, func_ctx, func_type, param_values + 1)) {
                 comp_ctx, func_ctx, func_type, param_values + 1)) {
             goto fail;
             goto fail;
@@ -1804,12 +1831,26 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         }
         }
     }
     }
 
 
-    if (comp_ctx->enable_aux_stack_frame) {
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
-        if (!free_frame_for_aot_func(comp_ctx, func_ctx))
-            goto fail;
-#endif
+    if (comp_ctx->aux_stack_frame_type) {
+        if (func_idx < import_func_count
+            && comp_ctx->call_stack_features.frame_per_function) {
+            if (!aot_free_frame_per_function_frame_for_aot_func(comp_ctx,
+                                                                func_ctx)) {
+                goto fail;
+            }
+        }
+        else if (!comp_ctx->call_stack_features.frame_per_function) {
+            if (comp_ctx->aux_stack_frame_type
+                != AOT_STACK_FRAME_TYPE_STANDARD) {
+                aot_set_last_error("unsupported mode");
+            }
+            if (!free_frame_for_aot_func(comp_ctx, func_ctx)) {
+                goto fail;
+            }
+        }
     }
     }
+#endif
 
 
     /* Insert suspend check point */
     /* Insert suspend check point */
     if (comp_ctx->enable_thread_mgr) {
     if (comp_ctx->enable_thread_mgr) {
@@ -2430,7 +2471,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         goto fail;
         goto fail;
     }
     }
 
 
-    if (comp_ctx->enable_aux_stack_frame) {
+    if (comp_ctx->aux_stack_frame_type
+        && !comp_ctx->call_stack_features.frame_per_function) {
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
         /*  TODO: use current frame instead of allocating new frame
         /*  TODO: use current frame instead of allocating new frame
                   for WASM_OP_RETURN_CALL_INDIRECT */
                   for WASM_OP_RETURN_CALL_INDIRECT */
@@ -2499,7 +2541,13 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     /* Translate call import block */
     /* Translate call import block */
     LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import);
     LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import);
 
 
-    if (comp_ctx->enable_aux_stack_frame
+    if (comp_ctx->aot_frame && comp_ctx->call_stack_features.frame_per_function
+        && !aot_alloc_frame_per_function_frame_for_aot_func(comp_ctx, func_ctx,
+                                                            func_idx)) {
+        goto fail;
+    }
+
+    if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
         && !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type,
         && !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type,
                                                   param_values + 1)) {
                                                   param_values + 1)) {
         goto fail;
         goto fail;
@@ -2536,6 +2584,12 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         && !check_call_return(comp_ctx, func_ctx, res))
         && !check_call_return(comp_ctx, func_ctx, res))
         goto fail;
         goto fail;
 
 
+    if (comp_ctx->aot_frame && comp_ctx->call_stack_features.frame_per_function
+        && !aot_free_frame_per_function_frame_for_aot_func(comp_ctx,
+                                                           func_ctx)) {
+        goto fail;
+    }
+
     block_curr = LLVMGetInsertBlock(comp_ctx->builder);
     block_curr = LLVMGetInsertBlock(comp_ctx->builder);
     for (i = 0; i < func_result_count; i++) {
     for (i = 0; i < func_result_count; i++) {
         LLVMAddIncoming(result_phis[i], &value_rets[i], &block_curr, 1);
         LLVMAddIncoming(result_phis[i], &value_rets[i], &block_curr, 1);
@@ -2620,7 +2674,8 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         PUSH(result_phis[i], func_type->types[func_param_count + i]);
         PUSH(result_phis[i], func_type->types[func_param_count + i]);
     }
     }
 
 
-    if (comp_ctx->enable_aux_stack_frame) {
+    if (comp_ctx->aux_stack_frame_type
+        && !comp_ctx->call_stack_features.frame_per_function) {
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
         if (!free_frame_for_aot_func(comp_ctx, func_ctx))
         if (!free_frame_for_aot_func(comp_ctx, func_ctx))
             goto fail;
             goto fail;
@@ -2927,7 +2982,8 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         goto fail;
         goto fail;
     }
     }
 
 
-    if (comp_ctx->enable_aux_stack_frame) {
+    if (comp_ctx->aux_stack_frame_type
+        && !comp_ctx->call_stack_features.frame_per_function) {
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
         /*  TODO: use current frame instead of allocating new frame
         /*  TODO: use current frame instead of allocating new frame
                   for WASM_OP_RETURN_CALL_REF */
                   for WASM_OP_RETURN_CALL_REF */
@@ -2996,7 +3052,7 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     /* Translate call import block */
     /* Translate call import block */
     LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import);
     LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import);
 
 
-    if (comp_ctx->enable_aux_stack_frame
+    if (comp_ctx->aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
         && !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type,
         && !commit_params_to_frame_of_import_func(comp_ctx, func_ctx, func_type,
                                                   param_values + 1)) {
                                                   param_values + 1)) {
         goto fail;
         goto fail;
@@ -3124,7 +3180,8 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         PUSH(result_phis[i], func_type->types[func_param_count + i]);
         PUSH(result_phis[i], func_type->types[func_param_count + i]);
     }
     }
 
 
-    if (comp_ctx->enable_aux_stack_frame) {
+    if (comp_ctx->aux_stack_frame_type
+        && !comp_ctx->call_stack_features.frame_per_function) {
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
 #if WASM_ENABLE_AOT_STACK_FRAME != 0
         if (!free_frame_for_aot_func(comp_ctx, func_ctx))
         if (!free_frame_for_aot_func(comp_ctx, func_ctx))
             goto fail;
             goto fail;

+ 22 - 13
core/iwasm/compilation/aot_llvm.c

@@ -1771,7 +1771,7 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx,
         goto fail;
         goto fail;
     }
     }
 
 
-    if (comp_ctx->enable_aux_stack_frame
+    if (comp_ctx->aux_stack_frame_type
         && !create_aux_stack_frame(comp_ctx, func_ctx)) {
         && !create_aux_stack_frame(comp_ctx, func_ctx)) {
         goto fail;
         goto fail;
     }
     }
@@ -2577,8 +2577,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
     if (option->enable_ref_types)
     if (option->enable_ref_types)
         comp_ctx->enable_ref_types = true;
         comp_ctx->enable_ref_types = true;
 
 
-    if (option->enable_aux_stack_frame)
-        comp_ctx->enable_aux_stack_frame = true;
+    comp_ctx->aux_stack_frame_type = option->aux_stack_frame_type;
+    comp_ctx->call_stack_features = option->call_stack_features;
 
 
     if (option->enable_perf_profiling)
     if (option->enable_perf_profiling)
         comp_ctx->enable_perf_profiling = true;
         comp_ctx->enable_perf_profiling = true;
@@ -2790,6 +2790,15 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
                 bh_assert(vendor_sys);
                 bh_assert(vendor_sys);
                 bh_memcpy_s(default_arch, sizeof(default_arch), default_triple,
                 bh_memcpy_s(default_arch, sizeof(default_arch), default_triple,
                             (uint32)(vendor_sys - default_triple));
                             (uint32)(vendor_sys - default_triple));
+                /**
+                 * On Mac M[1-9]+ LLVM will report arm64 as the
+                 * architecture, for the purposes of wamr this is the
+                 * same as aarch64v8 so we'll normalize it here.
+                 */
+                if (!strcmp(default_arch, "arm64")) {
+                    bh_strcpy_s(default_arch, sizeof(default_arch),
+                                "aarch64v8");
+                }
                 arch1 = default_arch;
                 arch1 = default_arch;
 
 
                 LLVMDisposeMessage(default_triple);
                 LLVMDisposeMessage(default_triple);
@@ -2960,12 +2969,12 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
                                     sizeof(comp_ctx->target_arch));
                                     sizeof(comp_ctx->target_arch));
 
 
         if (option->bounds_checks == 1 || option->bounds_checks == 0) {
         if (option->bounds_checks == 1 || option->bounds_checks == 0) {
-            /* Set by user */
+            /* Set by the user */
             comp_ctx->enable_bound_check =
             comp_ctx->enable_bound_check =
                 (option->bounds_checks == 1) ? true : false;
                 (option->bounds_checks == 1) ? true : false;
         }
         }
         else {
         else {
-            /* Unset by user, use default value */
+            /* Unset by the user, use the default value */
             if (strstr(comp_ctx->target_arch, "64")
             if (strstr(comp_ctx->target_arch, "64")
                 && !option->is_sgx_platform) {
                 && !option->is_sgx_platform) {
                 comp_ctx->enable_bound_check = false;
                 comp_ctx->enable_bound_check = false;
@@ -2975,17 +2984,17 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
             }
             }
         }
         }
 
 
-        if (comp_ctx->enable_bound_check) {
-            /* Always enable stack boundary check if `bounds-checks`
-               is enabled */
-            comp_ctx->enable_stack_bound_check = true;
-        }
-        else {
-            /* When `bounds-checks` is disabled, we set stack boundary
-               check status according to the input option */
+        if (option->stack_bounds_checks == 1
+            || option->stack_bounds_checks == 0) {
+            /* Set by the user */
             comp_ctx->enable_stack_bound_check =
             comp_ctx->enable_stack_bound_check =
                 (option->stack_bounds_checks == 1) ? true : false;
                 (option->stack_bounds_checks == 1) ? true : false;
         }
         }
+        else {
+            /* Unset by the user, use the default value, it will be the same
+             * value as the bound check */
+            comp_ctx->enable_stack_bound_check = comp_ctx->enable_bound_check;
+        }
 
 
         if ((comp_ctx->enable_stack_bound_check
         if ((comp_ctx->enable_stack_bound_check
              || comp_ctx->enable_stack_estimation)
              || comp_ctx->enable_stack_estimation)

+ 4 - 1
core/iwasm/compilation/aot_llvm.h

@@ -410,7 +410,10 @@ typedef struct AOTCompContext {
     bool enable_aux_stack_check;
     bool enable_aux_stack_check;
 
 
     /* Generate auxiliary stack frame */
     /* Generate auxiliary stack frame */
-    bool enable_aux_stack_frame;
+    AOTStackFrameType aux_stack_frame_type;
+
+    /* Auxiliary call stack features */
+    AOTCallStackFeatures call_stack_features;
 
 
     /* Function performance profiling */
     /* Function performance profiling */
     bool enable_perf_profiling;
     bool enable_perf_profiling;

+ 27 - 0
core/iwasm/compilation/aot_stack_frame.h

@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2024 Amazon Inc.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _AOT_STACK_FRAME_H_
+#define _AOT_STACK_FRAME_H_
+
+#include "platform_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    /* The non-imported function index of current function */
+    uint32 func_index;
+
+    /* Instruction pointer: offset to the bytecode array */
+    uint32 ip_offset;
+} AOTTinyFrame;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

+ 148 - 0
core/iwasm/compilation/aot_stack_frame_comp.c

@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2024 Amazon Inc.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+#include "aot_stack_frame_comp.h"
+#include "aot_emit_exception.h"
+
+#define ADD_IN_BOUNDS_GEP(variable, type, pointer, indices, num_indices)     \
+    do {                                                                     \
+        if (!(variable =                                                     \
+                  LLVMBuildInBoundsGEP2(comp_ctx->builder, type, pointer,    \
+                                        indices, num_indices, #variable))) { \
+            aot_set_last_error("llvm build in bounds gep failed");           \
+            return false;                                                    \
+        }                                                                    \
+    } while (0)
+
+#define ADD_STORE(value, pointer)                                 \
+    do {                                                          \
+        if (!LLVMBuildStore(comp_ctx->builder, value, pointer)) { \
+            aot_set_last_error("llvm build store failed");        \
+            return false;                                         \
+        }                                                         \
+    } while (0)
+
+#define ADD_LOAD(value, type, pointer)                                         \
+    do {                                                                       \
+        if (!(value =                                                          \
+                  LLVMBuildLoad2(comp_ctx->builder, type, pointer, #value))) { \
+            aot_set_last_error("llvm build load failed");                      \
+            return false;                                                      \
+        }                                                                      \
+    } while (0)
+
+static bool
+aot_alloc_tiny_frame_for_aot_func(AOTCompContext *comp_ctx,
+                                  AOTFuncContext *func_ctx,
+                                  LLVMValueRef func_index)
+{
+    LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr,
+                 wasm_stack_top_bound = func_ctx->wasm_stack_top_bound,
+                 wasm_stack_top, cmp;
+    LLVMBasicBlockRef check_wasm_stack_succ;
+    LLVMValueRef offset;
+
+    ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr);
+
+    if (comp_ctx->call_stack_features.bounds_checks) {
+        if (!(check_wasm_stack_succ = LLVMAppendBasicBlockInContext(
+                  comp_ctx->context, func_ctx->func,
+                  "check_wasm_stack_succ"))) {
+            aot_set_last_error("llvm add basic block failed.");
+            return false;
+        }
+
+        LLVMMoveBasicBlockAfter(check_wasm_stack_succ,
+                                LLVMGetInsertBlock(comp_ctx->builder));
+
+        if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, wasm_stack_top,
+                                  wasm_stack_top_bound, "cmp"))) {
+            aot_set_last_error("llvm build icmp failed");
+            return false;
+        }
+
+        if (!(aot_emit_exception(comp_ctx, func_ctx,
+                                 EXCE_OPERAND_STACK_OVERFLOW, true, cmp,
+                                 check_wasm_stack_succ))) {
+            return false;
+        }
+    }
+
+    /* Save the func_idx on the top of the stack */
+    ADD_STORE(func_index, wasm_stack_top);
+
+    /* increment the stack pointer */
+    INT_CONST(offset, sizeof(AOTTinyFrame), I32_TYPE, true);
+    ADD_IN_BOUNDS_GEP(wasm_stack_top, INT8_TYPE, wasm_stack_top, &offset, 1);
+    ADD_STORE(wasm_stack_top, wasm_stack_top_ptr);
+
+    return true;
+}
+
+static bool
+aot_free_tiny_frame_for_aot_func(AOTCompContext *comp_ctx,
+                                 AOTFuncContext *func_ctx)
+{
+    LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr,
+                 wasm_stack_top;
+    LLVMValueRef offset;
+
+    ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr);
+
+    INT_CONST(offset, -sizeof(AOTTinyFrame),
+              comp_ctx->pointer_size == 8 ? I64_TYPE : I32_TYPE, true);
+    ADD_IN_BOUNDS_GEP(wasm_stack_top, INT8_TYPE, wasm_stack_top, &offset, 1);
+    ADD_STORE(wasm_stack_top, wasm_stack_top_ptr);
+
+    return true;
+}
+
+bool
+aot_tiny_frame_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+                             LLVMValueRef ip_value)
+{
+    LLVMValueRef wasm_stack_top_ptr = func_ctx->wasm_stack_top_ptr,
+                 wasm_stack_top;
+    LLVMValueRef offset, ip_addr;
+
+    bh_assert(ip_value);
+
+    ADD_LOAD(wasm_stack_top, INT8_PTR_TYPE, wasm_stack_top_ptr);
+
+    INT_CONST(offset, -4, comp_ctx->pointer_size == 8 ? I64_TYPE : I32_TYPE,
+              true);
+    ADD_IN_BOUNDS_GEP(ip_addr, INT8_TYPE, wasm_stack_top, &offset, 1);
+
+    ADD_STORE(ip_value, ip_addr);
+
+    return true;
+}
+
+bool
+aot_alloc_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx,
+                                                AOTFuncContext *func_ctx,
+                                                LLVMValueRef func_index)
+{
+    switch (comp_ctx->aux_stack_frame_type) {
+        case AOT_STACK_FRAME_TYPE_TINY:
+            return aot_alloc_tiny_frame_for_aot_func(comp_ctx, func_ctx,
+                                                     func_index);
+        default:
+            aot_set_last_error("unsupported mode");
+            return false;
+    }
+}
+
+bool
+aot_free_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx,
+                                               AOTFuncContext *func_ctx)
+{
+    switch (comp_ctx->aux_stack_frame_type) {
+        case AOT_STACK_FRAME_TYPE_TINY:
+            return aot_free_tiny_frame_for_aot_func(comp_ctx, func_ctx);
+        default:
+            aot_set_last_error("unsupported mode");
+            return false;
+    }
+}

+ 33 - 0
core/iwasm/compilation/aot_stack_frame_comp.h

@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2024 Amazon Inc.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _AOT_STACK_FRAME_COMP_H_
+#define _AOT_STACK_FRAME_COMP_H_
+
+#include "aot_stack_frame.h"
+#include "aot_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+aot_alloc_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx,
+                                                AOTFuncContext *func_ctx,
+                                                LLVMValueRef func_index);
+
+bool
+aot_free_frame_per_function_frame_for_aot_func(AOTCompContext *comp_ctx,
+                                               AOTFuncContext *func_ctx);
+
+bool
+aot_tiny_frame_gen_commit_ip(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+                             LLVMValueRef ip_value);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

+ 35 - 1
core/iwasm/include/aot_comp_option.h

@@ -6,6 +6,39 @@
 #ifndef __AOT_COMP_OPTION_H__
 #ifndef __AOT_COMP_OPTION_H__
 #define __AOT_COMP_OPTION_H__
 #define __AOT_COMP_OPTION_H__
 
 
+typedef struct {
+    /* Enables or disables bounds checks for stack frames. When enabled, the AOT
+     * compiler generates code to check if the stack pointer is within the
+     * bounds of the current stack frame (and if not, traps). */
+    bool bounds_checks;
+
+    /*  Enables or disables instruction pointer (IP) tracking.*/
+    bool ip;
+
+    /* Enables or disables tracking instruction pointer of a trap. Only takes
+     * effect when `ip` is enabled.*/
+    bool trap_ip;
+
+    /* Enables or disables parameters, locals and stack operands. */
+    bool values;
+
+    /* If enabled, stack frame is generated at the beginning of each
+     * function (frame-per-function mode). Otherwise, stack frame is
+     * generated before each call of a function (frame-per-call mode). */
+    bool frame_per_function;
+} AOTCallStackFeatures;
+
+void
+aot_call_stack_features_init_default(AOTCallStackFeatures *features);
+
+typedef enum {
+    AOT_STACK_FRAME_OFF = 0,
+    /* Use a small stack frame data structure (AOTTinyFrame) */
+    AOT_STACK_FRAME_TYPE_TINY,
+    /* Use a regular stack frame data structure (AOTFrame) */
+    AOT_STACK_FRAME_TYPE_STANDARD,
+} AOTStackFrameType;
+
 typedef struct AOTCompOption {
 typedef struct AOTCompOption {
     bool is_jit_mode;
     bool is_jit_mode;
     bool is_indirect_mode;
     bool is_indirect_mode;
@@ -21,7 +54,8 @@ typedef struct AOTCompOption {
     bool enable_ref_types;
     bool enable_ref_types;
     bool enable_gc;
     bool enable_gc;
     bool enable_aux_stack_check;
     bool enable_aux_stack_check;
-    bool enable_aux_stack_frame;
+    AOTStackFrameType aux_stack_frame_type;
+    AOTCallStackFeatures call_stack_features;
     bool enable_perf_profiling;
     bool enable_perf_profiling;
     bool enable_memory_profiling;
     bool enable_memory_profiling;
     bool disable_llvm_intrinsics;
     bool disable_llvm_intrinsics;

+ 1 - 0
core/iwasm/interpreter/wasm_interp_classic.c

@@ -5739,6 +5739,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         /* use memmove when memory64 is enabled since len
                         /* use memmove when memory64 is enabled since len
                            may be larger than UINT32_MAX */
                            may be larger than UINT32_MAX */
                         memmove(mdst, msrc, len);
                         memmove(mdst, msrc, len);
+                        (void)dlen;
 #endif
 #endif
                         break;
                         break;
                     }
                     }

+ 1 - 1
core/iwasm/interpreter/wasm_interp_fast.c

@@ -6030,7 +6030,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 
 
 #if WASM_ENABLE_LABELS_AS_VALUES != 0
 #if WASM_ENABLE_LABELS_AS_VALUES != 0
 void **
 void **
-wasm_interp_get_handle_table()
+wasm_interp_get_handle_table(void)
 {
 {
     WASMModuleInstance module;
     WASMModuleInstance module;
     memset(&module, 0, sizeof(WASMModuleInstance));
     memset(&module, 0, sizeof(WASMModuleInstance));

+ 5 - 3
core/iwasm/interpreter/wasm_loader.c

@@ -2474,7 +2474,8 @@ wasm_loader_resolve_tag(const char *module_name, const char *tag_name,
     }
     }
 
 
     /* check function type */
     /* check function type */
-    if (!wasm_type_equal(expected_tag_type, tag->tag_type)) {
+    if (!wasm_type_equal(expected_tag_type, tag->tag_type, module->types,
+                         module->type_count)) {
         LOG_DEBUG("%s.%s failed the type check", module_name, tag_name);
         LOG_DEBUG("%s.%s failed the type check", module_name, tag_name);
         set_error_buf(error_buf, error_buf_size, "incompatible import type");
         set_error_buf(error_buf, error_buf_size, "incompatible import type");
         return NULL;
         return NULL;
@@ -5406,7 +5407,8 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
     option.enable_aux_stack_check = true;
     option.enable_aux_stack_check = true;
 #if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \
 #if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \
     || WASM_ENABLE_AOT_STACK_FRAME != 0
     || WASM_ENABLE_AOT_STACK_FRAME != 0
-    option.enable_aux_stack_frame = true;
+    option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
+    aot_call_stack_features_init_default(&option.call_stack_features);
 #endif
 #endif
 #if WASM_ENABLE_PERF_PROFILING != 0
 #if WASM_ENABLE_PERF_PROFILING != 0
     option.enable_perf_profiling = true;
     option.enable_perf_profiling = true;
@@ -5750,7 +5752,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
 
 
 #if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_LABELS_AS_VALUES != 0
 #if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_LABELS_AS_VALUES != 0
 void **
 void **
-wasm_interp_get_handle_table();
+wasm_interp_get_handle_table(void);
 
 
 static void **handle_table;
 static void **handle_table;
 #endif
 #endif

+ 3 - 2
core/iwasm/interpreter/wasm_mini_loader.c

@@ -2148,7 +2148,8 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
     option.enable_aux_stack_check = true;
     option.enable_aux_stack_check = true;
 #if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \
 #if WASM_ENABLE_PERF_PROFILING != 0 || WASM_ENABLE_DUMP_CALL_STACK != 0 \
     || WASM_ENABLE_AOT_STACK_FRAME != 0
     || WASM_ENABLE_AOT_STACK_FRAME != 0
-    option.enable_aux_stack_frame = true;
+    option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
+    aot_call_stack_features_init_default(&option.call_stack_features);
 #endif
 #endif
 #if WASM_ENABLE_PERF_PROFILING != 0
 #if WASM_ENABLE_PERF_PROFILING != 0
     option.enable_perf_profiling = true;
     option.enable_perf_profiling = true;
@@ -2531,7 +2532,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
 
 
 #if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_LABELS_AS_VALUES != 0
 #if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_LABELS_AS_VALUES != 0
 void **
 void **
-wasm_interp_get_handle_table();
+wasm_interp_get_handle_table(void);
 
 
 static void **handle_table;
 static void **handle_table;
 #endif
 #endif

+ 1 - 1
core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c

@@ -29,7 +29,7 @@ typedef struct {
 } ThreadStartArg;
 } ThreadStartArg;
 
 
 static int32
 static int32
-allocate_thread_id()
+allocate_thread_id(void)
 {
 {
     os_mutex_lock(&thread_id_lock);
     os_mutex_lock(&thread_id_lock);
     int32 id = tid_allocator_get_tid(&tid_allocator);
     int32 id = tid_allocator_get_tid(&tid_allocator);

+ 4 - 4
core/iwasm/libraries/thread-mgr/thread_manager.h

@@ -64,10 +64,10 @@ void
 wasm_cluster_set_max_thread_num(uint32 num);
 wasm_cluster_set_max_thread_num(uint32 num);
 
 
 bool
 bool
-thread_manager_init();
+thread_manager_init(void);
 
 
 void
 void
-thread_manager_destroy();
+thread_manager_destroy(void);
 
 
 /* Create cluster */
 /* Create cluster */
 WASMCluster *
 WASMCluster *
@@ -109,7 +109,7 @@ bool
 wasm_cluster_register_destroy_callback(void (*callback)(WASMCluster *));
 wasm_cluster_register_destroy_callback(void (*callback)(WASMCluster *));
 
 
 void
 void
-wasm_cluster_cancel_all_callbacks();
+wasm_cluster_cancel_all_callbacks(void);
 
 
 void
 void
 wasm_cluster_suspend_all(WASMCluster *cluster);
 wasm_cluster_suspend_all(WASMCluster *cluster);
@@ -190,7 +190,7 @@ struct WASMCurrentEnvStatus {
 };
 };
 
 
 WASMCurrentEnvStatus *
 WASMCurrentEnvStatus *
-wasm_cluster_create_exenv_status();
+wasm_cluster_create_exenv_status(void);
 
 
 void
 void
 wasm_cluster_destroy_exenv_status(WASMCurrentEnvStatus *status);
 wasm_cluster_destroy_exenv_status(WASMCurrentEnvStatus *status);

+ 22 - 18
core/iwasm/libraries/wasi-nn/README.md

@@ -4,7 +4,7 @@
 
 
 ### Host
 ### Host
 
 
-Enable WASI-NN in the WAMR by spefiying it in the cmake building configuration as follows,
+Enable WASI-NN in the WAMR by specifying it in the cmake building configuration as follows,
 
 
 ```cmake
 ```cmake
 set (WAMR_BUILD_WASI_NN  1)
 set (WAMR_BUILD_WASI_NN  1)
@@ -17,14 +17,15 @@ $ cmake -DWAMR_BUILD_WASI_NN=1 <other options> ...
 ```
 ```
 
 
 > ![Caution]
 > ![Caution]
-> If enable `WAMR_BUID_WASI_NN`, iwasm will link a shared WAMR library instead of a static one. Wasi-nn backends will be loaded dynamically at runtime. Users shall specify the path of the backend library and register it to the iwasm runtime with `--native-lib=<path of backend library>`. All shared libraries should be placed in the `LD_LIBRARY_PATH`.
+> Enabling WAMR_BUILD_WASI_NN will cause the IWASM to link to a shared WAMR library instead of a static one. The WASI-NN backends will then be loaded dynamically when the program is run. You must ensure that all shared libraries are included in the `LD_LIBRARY_PATH`.
 
 
 #### Compilation options
 #### Compilation options
 
 
-- `WAMR_BUILD_WASI_NN`. enable wasi-nn support. can't work alone. need to identify a backend. Match legacy wasi-nn spec naming convention. use `wasi_nn` as import module names.
-- `WAMR_BUILD_WASI_EPHEMERAL_NN`. Match latest wasi-nn spec naming convention. use `wasi_ephemeral_nn` as import module names.
-- `WAMR_BUILD_WASI_NN_TFLITE`. identify the backend as TensorFlow Lite.
-- `WAMR_BUILD_WASI_NN_OPENVINO`. identify the backend as OpenVINO.
+- `WAMR_BUILD_WASI_NN`. This option enables support for WASI-NN. It cannot function independently and requires specifying a backend. It follows the original WASI-NN specification for naming conventions and uses wasi_nn for import module names.
+- `WAMR_BUILD_WASI_EPHEMERAL_NN`. This option adheres to the most recent WASI-NN specification for naming conventions and uses wasi_ephemeral_nn for import module names.
+- `WAMR_BUILD_WASI_NN_TFLITE`. This option designates TensorFlow Lite as the backend.
+- `WAMR_BUILD_WASI_NN_OPENVINO`. This option designates OpenVINO as the backend.
+- `WAMR_BUILD_WASI_NN_LLAMACPP`. This option designates Llama.cpp as the backend.
 
 
 ### Wasm
 ### Wasm
 
 
@@ -44,7 +45,7 @@ typedef enum { fp16 = 0, fp32, up8, ip32 } tensor_type;
 
 
 It is required to recompile the Wasm application if you want to switch between the two sets of functions.
 It is required to recompile the Wasm application if you want to switch between the two sets of functions.
 
 
-#### Openvino
+#### Openvino installation
 
 
 If you're planning to use OpenVINO backends, the first step is to install OpenVINO on your computer. To do this correctly, please follow the official installation guide which you can find at this link: https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-archive-linux.html.
 If you're planning to use OpenVINO backends, the first step is to install OpenVINO on your computer. To do this correctly, please follow the official installation guide which you can find at this link: https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-archive-linux.html.
 
 
@@ -162,17 +163,9 @@ Supported:
 
 
 ### Testing with WasmEdge-WASINN Examples
 ### Testing with WasmEdge-WASINN Examples
 
 
-To ensure everything is set up correctly, use the examples from [WasmEdge-WASINN-examples](https://github.com/second-state/WasmEdge-WASINN-examples/tree/master). These examples help verify that WASI-NN support in WAMR is functioning as expected.
+To make sure everything is configured properly, refer to the examples provided at [WasmEdge-WASINN-examples](https://github.com/second-state/WasmEdge-WASINN-examples/tree/master). These examples are useful for confirming that the WASI-NN support in WAMR is working correctly.
 
 
-> Note: The repository contains two types of examples. Some use the [standard wasi-nn](https://github.com/WebAssembly/wasi-nn), while others use [WasmEdge's version of wasi-nn](https://github.com/second-state/wasmedge-wasi-nn), which is enhanced to meet specific customer needs.
-
-The examples test the following machine learning backends:
-
-- OpenVINO
-- PyTorch
-- TensorFlow Lite
-
-Due to the different requirements of each backend, we'll use a Docker container for a hassle-free testing environment.
+Because each backend has its own set of requirements, we recommend using a Docker container to create a straightforward testing environment without complications.
 
 
 #### Prepare the execution environment
 #### Prepare the execution environment
 
 
@@ -186,9 +179,20 @@ $ docker build -t wasi-nn-smoke:v1.0 -f ./core/iwasm/libraries/wasi-nn/test/Dock
 #### Execute
 #### Execute
 
 
 ```bash
 ```bash
+$ pwd
+/workspaces/wasm-micro-runtime/
 $ docker run --rm wasi-nn-smoke:v1.0
 $ docker run --rm wasi-nn-smoke:v1.0
 ```
 ```
 
 
-### Testing with bytecodealliance wasi-nn
+It should be noted that the qwen example is selected as the default one about the Llama.cpp backend because it uses a small model and is easy to run.
+
+```bash
+- openvino_mobile_image. PASS
+- openvino_mobile_raw. PASS
+- openvino_road_segmentation_adas. PASS
+- wasmedge_ggml_qwen. PASS
+```
+
+### Testing with bytecodealliance WASI-NN
 
 
 For another example, check out [classification-example](https://github.com/bytecodealliance/wasi-nn/tree/main/rust/examples/classification-example), which focuses on OpenVINO. You can run it using the same Docker container mentioned above.
 For another example, check out [classification-example](https://github.com/bytecodealliance/wasi-nn/tree/main/rust/examples/classification-example), which focuses on OpenVINO. You can run it using the same Docker container mentioned above.

+ 17 - 0
core/iwasm/libraries/wasi-nn/cmake/Findcjson.cmake

@@ -0,0 +1,17 @@
+# Copyright (C) 2019 Intel Corporation. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+include(FetchContent)
+
+set(CJSON_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/cjson")
+
+FetchContent_Declare(
+  cjson
+  GIT_REPOSITORY https://github.com/DaveGamble/cJSON.git
+  GIT_TAG        v1.7.18
+  SOURCE_DIR     ${CJSON_SOURCE_DIR}
+)
+
+set(ENABLE_CJSON_TEST OFF CACHE INTERNAL "Turn off tests")
+set(ENABLE_CJSON_UNINSTALL OFF CACHE INTERNAL "Turn off uninstall to avoid targets conflict")
+FetchContent_MakeAvailable(cjson)

+ 18 - 0
core/iwasm/libraries/wasi-nn/cmake/Findllamacpp.cmake

@@ -0,0 +1,18 @@
+# Copyright (C) 2019 Intel Corporation. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+include(FetchContent)
+
+set(LLAMA_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/llama.cpp")
+
+FetchContent_Declare(
+  llamacpp
+  GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
+  GIT_TAG        b3573
+  SOURCE_DIR     ${LLAMA_SOURCE_DIR}
+)
+
+set(LLAMA_BUILD_TESTS OFF)
+set(LLAMA_BUILD_EXAMPLES OFF)
+set(LLAMA_BUILD_SERVER OFF)
+FetchContent_MakeAvailable(llamacpp)

+ 18 - 40
core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake

@@ -1,47 +1,25 @@
 # Copyright (C) 2019 Intel Corporation. All rights reserved.
 # Copyright (C) 2019 Intel Corporation. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 
-find_library(TENSORFLOW_LITE
-  NAMES tensorflow-lite
-  HINTS ${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite
-  NO_DEFAULT_PATHS
+include(FetchContent)
+
+set(TFLITE_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")
+
+FetchContent_Declare(
+  tensorflow_lite
+  GIT_REPOSITORY https://github.com/tensorflow/tensorflow.git 
+  GIT_TAG        v2.12.0 
+  GIT_SHALLOW    ON
+  GIT_PROGRESS   ON
+  SOURCE_DIR     ${TFLITE_SOURCE_DIR}
+  SOURCE_SUBDIR  tensorflow/lite
 )
 )
 
 
-if(NOT TENSORFLOW_LITE)
-  if(NOT EXISTS "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")
-    execute_process(
-      COMMAND "${WAMR_ROOT_DIR}/core/deps/install_tensorflow.sh"
-      RESULT_VARIABLE TENSORFLOW_RESULT
-    )
-  else()
-    message("Tensorflow is already downloaded.")
-  endif()
-
-  set(TENSORFLOW_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")
-
-  if(WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
-    # Tensorflow specific:
-    # * https://www.tensorflow.org/lite/guide/build_cmake#available_options_to_build_tensorflow_lite
-    set (TFLITE_ENABLE_GPU ON)
-  endif()
-
-  if (CMAKE_SIZEOF_VOID_P EQUAL 4)
-    set (TFLITE_ENABLE_XNNPACK OFF)
-  endif()
-
-  add_subdirectory(
-    "${TENSORFLOW_SOURCE_DIR}/tensorflow/lite"
-    "${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite"
-    EXCLUDE_FROM_ALL
-  )
-else ()
-  message(STATUS "TensorFlow Lite library found: ${TENSORFLOW_LITE}")
-  set(TENSORFLOW_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")
+if(WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
+  set(TFLITE_ENABLE_GPU ON)
+endif()
+if (CMAKE_SIZEOF_VOID_P EQUAL 4)
+  set(TFLITE_ENABLE_XNNPACK OFF)
 endif()
 endif()
 
 
-set(TENSORFLOW_LITE_INCLUDE_DIR "${TENSORFLOW_SOURCE_DIR}/tensorflow/lite")
-set(FLATBUFFER_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers/include")
-
-include_directories(${TENSORFLOW_SOURCE_DIR})
-include_directories(${FLATBUFFER_INCLUDE_DIR})
-link_directories(${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite)
+FetchContent_MakeAvailable(tensorflow_lite)

+ 58 - 22
core/iwasm/libraries/wasi-nn/cmake/wasi_nn.cmake

@@ -3,27 +3,6 @@
 
 
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR})
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR})
 
 
-if(WAMR_BUILD_WASI_NN_TFLITE EQUAL 1)
-  # Find tensorflow-lite
-  find_package(tensorflow_lite REQUIRED)
-endif()
-
-if(WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1)
-  if(NOT DEFINED ENV{OpenVINO_DIR})
-    message(FATAL_ERROR
-        "OpenVINO_DIR is not defined. "
-        "Please follow https://docs.openvino.ai/2024/get-started/install-openvino.html,"
-        "install openvino, and set environment variable OpenVINO_DIR."
-        "Like OpenVINO_DIR=/usr/lib/openvino-2023.2/ cmake ..."
-        "Or OpenVINO_DIR=/opt/intel/openvino/ cmake ..."
-    )
-  endif()
-
-  list(APPEND CMAKE_MODULE_PATH $ENV{OpenVINO_DIR})
-  # Find OpenVINO
-  find_package(OpenVINO REQUIRED COMPONENTS Runtime)
-endif()
-
 #
 #
 # wasi-nn general
 # wasi-nn general
 set(WASI_NN_ROOT ${CMAKE_CURRENT_LIST_DIR}/..)
 set(WASI_NN_ROOT ${CMAKE_CURRENT_LIST_DIR}/..)
@@ -42,22 +21,46 @@ add_compile_definitions(
 #
 #
 # - tflite
 # - tflite
 if(WAMR_BUILD_WASI_NN_TFLITE EQUAL 1)
 if(WAMR_BUILD_WASI_NN_TFLITE EQUAL 1)
+  find_package(tensorflow_lite REQUIRED)
+
   add_library(
   add_library(
     wasi_nn_tflite
     wasi_nn_tflite
     SHARED
     SHARED
       ${WASI_NN_ROOT}/src/wasi_nn_tensorflowlite.cpp
       ${WASI_NN_ROOT}/src/wasi_nn_tensorflowlite.cpp
   )
   )
 
 
+  target_include_directories(
+    wasi_nn_tflite
+    PUBLIC
+      ${tensorflow_lite_SOURCE_DIR}
+  )
+
   target_link_libraries(
   target_link_libraries(
     wasi_nn_tflite
     wasi_nn_tflite
     PUBLIC
     PUBLIC
       libiwasm
       libiwasm
       tensorflow-lite
       tensorflow-lite
   )
   )
+
+  install(TARGETS wasi_nn_tflite DESTINATION lib)
 endif()
 endif()
 
 
 # - openvino
 # - openvino
 if(WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1)
 if(WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1)
+  if(NOT DEFINED ENV{OpenVINO_DIR})
+    message(FATAL_ERROR
+        "OpenVINO_DIR is not defined. "
+        "Please follow https://docs.openvino.ai/2024/get-started/install-openvino.html,"
+        "install openvino, and set environment variable OpenVINO_DIR."
+        "Like OpenVINO_DIR=/usr/lib/openvino-2023.2/ cmake ..."
+        "Or OpenVINO_DIR=/opt/intel/openvino/ cmake ..."
+    )
+  endif()
+
+  list(APPEND CMAKE_MODULE_PATH $ENV{OpenVINO_DIR})
+  # Find OpenVINO
+  find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+
   add_library(
   add_library(
     wasi_nn_openvino
     wasi_nn_openvino
     SHARED
     SHARED
@@ -71,4 +74,37 @@ if(WAMR_BUILD_WASI_NN_OPENVINO EQUAL 1)
       openvino::runtime
       openvino::runtime
       openvino::runtime::c
       openvino::runtime::c
   )
   )
-endif()
+
+  install(TARGETS wasi_nn_openvino DESTINATION lib)
+endif()
+
+# - llamacpp
+
+if(WAMR_BUILD_WASI_NN_LLAMACPP EQUAL 1)
+  find_package(cjson REQUIRED)
+  find_package(llamacpp REQUIRED)
+
+  add_library(
+    wasi_nn_llamacpp
+    SHARED
+      ${WASI_NN_ROOT}/src/wasi_nn_llamacpp.c
+  )
+
+  target_include_directories(
+    wasi_nn_llamacpp
+    PUBLIC
+      ${cjson_SOURCE_DIR}
+  )
+
+  target_link_libraries(
+    wasi_nn_llamacpp
+    PUBLIC
+      libiwasm
+      cjson
+      common
+      ggml
+      llama
+  )
+
+  install(TARGETS wasi_nn_llamacpp DESTINATION lib)
+endif()

+ 9 - 0
core/iwasm/libraries/wasi-nn/include/wasi_nn_types.h

@@ -43,6 +43,11 @@ typedef enum {
     security,
     security,
     // The operation failed for an unspecified reason.
     // The operation failed for an unspecified reason.
     unknown,
     unknown,
+    // for WasmEdge-wasi-nn
+    end_of_sequence = 100,  // End of Sequence Found.
+    context_full = 101,     // Context Full.
+    prompt_tool_long = 102, // Prompt Too Long.
+    model_not_found = 103,  // Model Not Found.
 } wasi_nn_error;
 } wasi_nn_error;
 
 
 /**
 /**
@@ -140,6 +145,9 @@ typedef uint32_t graph_execution_context;
 typedef wasi_nn_error (*LOAD)(void *, graph_builder_array *, graph_encoding,
 typedef wasi_nn_error (*LOAD)(void *, graph_builder_array *, graph_encoding,
                               execution_target, graph *);
                               execution_target, graph *);
 typedef wasi_nn_error (*LOAD_BY_NAME)(void *, const char *, uint32_t, graph *);
 typedef wasi_nn_error (*LOAD_BY_NAME)(void *, const char *, uint32_t, graph *);
+typedef wasi_nn_error (*LOAD_BY_NAME_WITH_CONFIG)(void *, const char *,
+                                                  uint32_t, void *, uint32_t,
+                                                  graph *);
 typedef wasi_nn_error (*INIT_EXECUTION_CONTEXT)(void *, graph,
 typedef wasi_nn_error (*INIT_EXECUTION_CONTEXT)(void *, graph,
                                                 graph_execution_context *);
                                                 graph_execution_context *);
 typedef wasi_nn_error (*SET_INPUT)(void *, graph_execution_context, uint32_t,
 typedef wasi_nn_error (*SET_INPUT)(void *, graph_execution_context, uint32_t,
@@ -154,6 +162,7 @@ typedef wasi_nn_error (*BACKEND_DEINITIALIZE)(void *);
 typedef struct {
 typedef struct {
     LOAD load;
     LOAD load;
     LOAD_BY_NAME load_by_name;
     LOAD_BY_NAME load_by_name;
+    LOAD_BY_NAME_WITH_CONFIG load_by_name_with_config;
     INIT_EXECUTION_CONTEXT init_execution_context;
     INIT_EXECUTION_CONTEXT init_execution_context;
     SET_INPUT set_input;
     SET_INPUT set_input;
     COMPUTE compute;
     COMPUTE compute;

+ 108 - 25
core/iwasm/libraries/wasi-nn/src/wasi_nn.c

@@ -29,7 +29,7 @@
 struct backends_api_functions {
 struct backends_api_functions {
     void *backend_handle;
     void *backend_handle;
     api_function functions;
     api_function functions;
-} lookup[autodetect] = { 0 };
+} lookup[autodetect + 1] = { 0 };
 
 
 #define call_wasi_nn_func(backend_encoding, func, wasi_error, ...)         \
 #define call_wasi_nn_func(backend_encoding, func, wasi_error, ...)         \
     do {                                                                   \
     do {                                                                   \
@@ -168,14 +168,7 @@ wasi_nn_destroy()
             lookup[i].backend_handle = NULL;
             lookup[i].backend_handle = NULL;
         }
         }
 
 
-        lookup[i].functions.init = NULL;
-        lookup[i].functions.deinit = NULL;
-        lookup[i].functions.load = NULL;
-        lookup[i].functions.load_by_name = NULL;
-        lookup[i].functions.init_execution_context = NULL;
-        lookup[i].functions.set_input = NULL;
-        lookup[i].functions.compute = NULL;
-        lookup[i].functions.get_output = NULL;
+        memset(&lookup[i].functions, 0, sizeof(api_function));
     }
     }
 }
 }
 
 
@@ -208,6 +201,10 @@ choose_a_backend()
         return ggml;
         return ggml;
     }
     }
 
 
+#ifndef NDEBUG
+    NN_WARN_PRINTF("%s", dlerror());
+#endif
+
     handle = dlopen(OPENVINO_BACKEND_LIB, RTLD_LAZY);
     handle = dlopen(OPENVINO_BACKEND_LIB, RTLD_LAZY);
     if (handle) {
     if (handle) {
         NN_INFO_PRINTF("Using openvino backend");
         NN_INFO_PRINTF("Using openvino backend");
@@ -215,6 +212,10 @@ choose_a_backend()
         return openvino;
         return openvino;
     }
     }
 
 
+#ifndef NDEBUG
+    NN_WARN_PRINTF("%s", dlerror());
+#endif
+
     handle = dlopen(TFLITE_BACKEND_LIB, RTLD_LAZY);
     handle = dlopen(TFLITE_BACKEND_LIB, RTLD_LAZY);
     if (handle) {
     if (handle) {
         NN_INFO_PRINTF("Using tflite backend");
         NN_INFO_PRINTF("Using tflite backend");
@@ -222,6 +223,11 @@ choose_a_backend()
         return tensorflowlite;
         return tensorflowlite;
     }
     }
 
 
+#ifndef NDEBUG
+    NN_WARN_PRINTF("%s", dlerror());
+#endif
+
+    NN_WARN_PRINTF("No backend found");
     return unknown_backend;
     return unknown_backend;
 }
 }
 
 
@@ -257,6 +263,14 @@ register_backend(void *handle, api_function *functions)
     }
     }
     functions->load_by_name = load_by_name;
     functions->load_by_name = load_by_name;
 
 
+    LOAD_BY_NAME_WITH_CONFIG load_by_name_with_config =
+        (LOAD_BY_NAME_WITH_CONFIG)dlsym(handle, "load_by_name_with_config");
+    if (!load_by_name_with_config) {
+        NN_WARN_PRINTF("load_by_name_with_config() not found");
+        // since only llama.cpp backend need to support this function
+    }
+    functions->load_by_name_with_config = load_by_name_with_config;
+
     INIT_EXECUTION_CONTEXT init_execution_context =
     INIT_EXECUTION_CONTEXT init_execution_context =
         (INIT_EXECUTION_CONTEXT)dlsym(handle, "init_execution_context");
         (INIT_EXECUTION_CONTEXT)dlsym(handle, "init_execution_context");
     if (!init_execution_context) {
     if (!init_execution_context) {
@@ -329,21 +343,23 @@ graph_encoding_to_backend_lib_name(graph_encoding encoding)
 static bool
 static bool
 detect_and_load_backend(graph_encoding backend_hint,
 detect_and_load_backend(graph_encoding backend_hint,
                         struct backends_api_functions *backends,
                         struct backends_api_functions *backends,
-                        graph_encoding *loaded_backed)
+                        graph_encoding *loaded_backend)
 {
 {
-    if (backend_hint >= autodetect)
+    if (backend_hint > autodetect)
         return false;
         return false;
 
 
     if (backend_hint == autodetect)
     if (backend_hint == autodetect)
         backend_hint = choose_a_backend();
         backend_hint = choose_a_backend();
 
 
+    if (backend_hint == unknown_backend)
+        return false;
+
+    *loaded_backend = backend_hint;
+
     /* if already loaded */
     /* if already loaded */
-    if (lookup[backend_hint].backend_handle) {
-        *loaded_backed = backend_hint;
+    if (lookup[backend_hint].backend_handle)
         return true;
         return true;
-    }
 
 
-    *loaded_backed = backend_hint;
     const char *backend_lib_name =
     const char *backend_lib_name =
         graph_encoding_to_backend_lib_name(backend_hint);
         graph_encoding_to_backend_lib_name(backend_hint);
     if (!backend_lib_name)
     if (!backend_lib_name)
@@ -353,6 +369,7 @@ detect_and_load_backend(graph_encoding backend_hint,
 }
 }
 
 
 /* WASI-NN implementation */
 /* WASI-NN implementation */
+
 #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0
 #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0
 wasi_nn_error
 wasi_nn_error
 wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_wasm *builder,
 wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_wasm *builder,
@@ -392,15 +409,15 @@ wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_array_wasm *builder,
         goto fail;
         goto fail;
     }
     }
 
 
-    graph_encoding loaded_backed = autodetect;
-    if (!detect_and_load_backend(encoding, lookup, &loaded_backed)) {
+    graph_encoding loaded_backend = autodetect;
+    if (!detect_and_load_backend(encoding, lookup, &loaded_backend)) {
         res = invalid_encoding;
         res = invalid_encoding;
         NN_ERR_PRINTF("load backend failed");
         NN_ERR_PRINTF("load backend failed");
         goto fail;
         goto fail;
     }
     }
 
 
     WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
     WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
-    wasi_nn_ctx->backend = loaded_backed;
+    wasi_nn_ctx->backend = loaded_backend;
 
 
     /* init() the backend */
     /* init() the backend */
     call_wasi_nn_func(wasi_nn_ctx->backend, init, res,
     call_wasi_nn_func(wasi_nn_ctx->backend, init, res,
@@ -413,7 +430,6 @@ wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_array_wasm *builder,
     if (res != success)
     if (res != success)
         goto fail;
         goto fail;
 
 
-    wasi_nn_ctx->backend = loaded_backed;
     wasi_nn_ctx->is_model_loaded = true;
     wasi_nn_ctx->is_model_loaded = true;
 
 
 fail:
 fail:
@@ -428,8 +444,6 @@ wasi_nn_error
 wasi_nn_load_by_name(wasm_exec_env_t exec_env, char *name, uint32_t name_len,
 wasi_nn_load_by_name(wasm_exec_env_t exec_env, char *name, uint32_t name_len,
                      graph *g)
                      graph *g)
 {
 {
-    NN_DBG_PRINTF("[WASI NN] LOAD_BY_NAME %s...", name);
-
     wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
     wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
     if (!instance) {
     if (!instance) {
         return runtime_error;
         return runtime_error;
@@ -446,15 +460,23 @@ wasi_nn_load_by_name(wasm_exec_env_t exec_env, char *name, uint32_t name_len,
         return invalid_argument;
         return invalid_argument;
     }
     }
 
 
-    graph_encoding loaded_backed = autodetect;
-    if (detect_and_load_backend(autodetect, lookup, &loaded_backed)) {
+    if (name_len == 0 || name[name_len] != '\0') {
+        NN_ERR_PRINTF("Invalid filename");
+        return invalid_argument;
+    }
+
+    NN_DBG_PRINTF("[WASI NN] LOAD_BY_NAME %s...", name);
+
+    graph_encoding loaded_backend = autodetect;
+    if (!detect_and_load_backend(autodetect, lookup, &loaded_backend)) {
         NN_ERR_PRINTF("load backend failed");
         NN_ERR_PRINTF("load backend failed");
         return invalid_encoding;
         return invalid_encoding;
     }
     }
 
 
     WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
     WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
-    wasi_nn_error res;
+    wasi_nn_ctx->backend = loaded_backend;
 
 
+    wasi_nn_error res;
     /* init() the backend */
     /* init() the backend */
     call_wasi_nn_func(wasi_nn_ctx->backend, init, res,
     call_wasi_nn_func(wasi_nn_ctx->backend, init, res,
                       &wasi_nn_ctx->backend_ctx);
                       &wasi_nn_ctx->backend_ctx);
@@ -466,7 +488,67 @@ wasi_nn_load_by_name(wasm_exec_env_t exec_env, char *name, uint32_t name_len,
     if (res != success)
     if (res != success)
         return res;
         return res;
 
 
-    wasi_nn_ctx->backend = loaded_backed;
+    wasi_nn_ctx->backend = loaded_backend;
+    wasi_nn_ctx->is_model_loaded = true;
+    return success;
+}
+
+wasi_nn_error
+wasi_nn_load_by_name_with_config(wasm_exec_env_t exec_env, char *name,
+                                 int32_t name_len, char *config,
+                                 int32_t config_len, graph *g)
+{
+    wasm_module_inst_t instance = wasm_runtime_get_module_inst(exec_env);
+    if (!instance) {
+        return runtime_error;
+    }
+
+    if (!wasm_runtime_validate_native_addr(instance, name, name_len)) {
+        NN_ERR_PRINTF("name is invalid");
+        return invalid_argument;
+    }
+
+    if (!wasm_runtime_validate_native_addr(instance, g,
+                                           (uint64)sizeof(graph))) {
+        NN_ERR_PRINTF("graph is invalid");
+        return invalid_argument;
+    }
+
+    if (name_len == 0 || name[name_len] != '\0') {
+        NN_ERR_PRINTF("Invalid filename");
+        return invalid_argument;
+    }
+
+    if (!config || config_len == 0 || config[config_len] != '\0') {
+        NN_ERR_PRINTF("Invalid config");
+        return invalid_argument;
+    }
+
+    NN_DBG_PRINTF("[WASI NN] LOAD_BY_NAME_WITH_CONFIG %s %s...", name, config);
+
+    graph_encoding loaded_backend = autodetect;
+    if (!detect_and_load_backend(autodetect, lookup, &loaded_backend)) {
+        NN_ERR_PRINTF("load backend failed");
+        return invalid_encoding;
+    }
+
+    WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
+    wasi_nn_ctx->backend = loaded_backend;
+
+    wasi_nn_error res;
+    /* init() the backend */
+    call_wasi_nn_func(wasi_nn_ctx->backend, init, res,
+                      &wasi_nn_ctx->backend_ctx);
+    if (res != success)
+        return res;
+
+    call_wasi_nn_func(wasi_nn_ctx->backend, load_by_name_with_config, res,
+                      wasi_nn_ctx->backend_ctx, name, name_len, config,
+                      config_len, g);
+    if (res != success)
+        return res;
+
+    wasi_nn_ctx->backend = loaded_backend;
     wasi_nn_ctx->is_model_loaded = true;
     wasi_nn_ctx->is_model_loaded = true;
     return success;
     return success;
 }
 }
@@ -608,6 +690,7 @@ static NativeSymbol native_symbols_wasi_nn[] = {
 #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0
 #if WASM_ENABLE_WASI_EPHEMERAL_NN != 0
     REG_NATIVE_FUNC(load, "(*iii*)i"),
     REG_NATIVE_FUNC(load, "(*iii*)i"),
     REG_NATIVE_FUNC(load_by_name, "(*i*)i"),
     REG_NATIVE_FUNC(load_by_name, "(*i*)i"),
+    REG_NATIVE_FUNC(load_by_name_with_config, "(*i*i*)i"),
     REG_NATIVE_FUNC(init_execution_context, "(i*)i"),
     REG_NATIVE_FUNC(init_execution_context, "(i*)i"),
     REG_NATIVE_FUNC(set_input, "(ii*)i"),
     REG_NATIVE_FUNC(set_input, "(ii*)i"),
     REG_NATIVE_FUNC(compute, "(i)i"),
     REG_NATIVE_FUNC(compute, "(i)i"),

+ 601 - 0
core/iwasm/libraries/wasi-nn/src/wasi_nn_llamacpp.c

@@ -0,0 +1,601 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+#include "wasi_nn_types.h"
+#include "utils/logger.h"
+#include "llama.h"
+#include "ggml.h"
+#include "cJSON.h"
+
+// build info
+extern int LLAMA_BUILD_NUMBER;
+extern char const *LLAMA_COMMIT;
+extern char const *LLAMA_COMPILER;
+extern char const *LLAMA_BUILD_TARGET;
+
+// compatable with WasmEdge
+// https://github.com/second-state/WasmEdge-WASINN-examples/blob/master/wasmedge-ggml/README.md#parameters
+// https://github.com/WasmEdge/WasmEdge/blob/master/plugins/wasi_nn/ggml.cpp
+struct wasi_nn_llama_config {
+    // Backend(plugin in WasmEdge) parameters:
+    bool enable_log;
+    bool enable_debug_log;
+    bool stream_stdout;
+    // embedding mode
+    bool embedding;
+    // TODO: can it be -1?
+    // can't bigger than ctx_size
+    int32_t n_predict;
+    char *reverse_prompt;
+
+    // Used by LLaVA
+    // multi-model project file
+    char *mmproj;
+    char *image;
+
+    // Model parameters (need to reload the model if updated):
+    // align to definition of struct llama_model_params
+    int32_t n_gpu_layers;
+    int32_t main_gpu;
+    // limited size: llama_max_devices()
+    float *tensor_split;
+    bool use_mmap;
+
+    // Context parameters (used by the llama context):
+    uint32_t ctx_size;
+    uint32_t batch_size;
+    uint32_t ubatch_size;
+    uint32_t threads;
+
+    // Sampling parameters (used by the llama sampling context).
+    float temp;
+    float topP;
+    float repeat_penalty;
+    float presence_penalty;
+    float frequency_penalty;
+};
+
+struct LlamaContext {
+    struct llama_context *ctx;
+    struct llama_model *model;
+    llama_token *prompt;
+    size_t prompt_len;
+    llama_token *generation;
+    size_t generation_len;
+    struct wasi_nn_llama_config config;
+};
+
+static void
+wasm_edge_llama_default_configuration(struct wasi_nn_llama_config *output)
+{
+    output->enable_log = false;
+    output->enable_debug_log = false;
+    output->stream_stdout = false;
+    output->embedding = false;
+    output->n_predict = 512;
+    output->reverse_prompt = NULL;
+
+    output->mmproj = NULL;
+    output->image = NULL;
+
+    output->main_gpu = 0;
+    output->n_gpu_layers = 0;
+    output->tensor_split = NULL;
+    output->use_mmap = true;
+
+    // 0 = from model
+    output->ctx_size = 0;
+    output->batch_size = 512;
+    output->ubatch_size = output->batch_size;
+    output->threads = 1;
+
+    output->temp = 0.80;
+    output->topP = 0.95;
+    output->repeat_penalty = 1.10;
+    output->presence_penalty = 0.0;
+    output->frequency_penalty = 0.0;
+}
+
+static void
+wasm_edge_llama_apply_configuration(const char *config_json,
+                                    struct wasi_nn_llama_config *output)
+{
+    cJSON *root = cJSON_Parse(config_json);
+    if (root == NULL) {
+        const char *error_ptr = cJSON_GetErrorPtr();
+        if (error_ptr != NULL) {
+            NN_WARN_PRINTF("Error before: %s\n", error_ptr);
+        }
+        else {
+            NN_WARN_PRINTF("Failed to parse JSON");
+        }
+        return;
+    }
+
+    cJSON *item = NULL;
+
+    item = cJSON_GetObjectItem(root, "enable-log");
+    if (item != NULL) {
+        output->enable_log = cJSON_IsTrue(item);
+        NN_DBG_PRINTF("apply enable-log %d", output->enable_log);
+    }
+
+    item = cJSON_GetObjectItem(root, "enable-debug-log");
+    if (item != NULL) {
+        output->enable_debug_log = cJSON_IsTrue(item);
+        NN_DBG_PRINTF("apply enable-debug-log %d", output->enable_debug_log);
+    }
+
+    item = cJSON_GetObjectItem(root, "stream-stdout");
+    if (item != NULL) {
+        output->stream_stdout = cJSON_IsTrue(item);
+        NN_DBG_PRINTF("apply stream-stdout %d", output->stream_stdout);
+    }
+
+    item = cJSON_GetObjectItem(root, "embedding");
+    if (item != NULL) {
+        output->embedding = cJSON_IsTrue(item);
+        NN_DBG_PRINTF("apply embedding %d", output->embedding);
+    }
+
+    item = cJSON_GetObjectItem(root, "n-predict");
+    if (item != NULL) {
+        output->n_predict = (int32_t)cJSON_GetNumberValue(item);
+        NN_DBG_PRINTF("apply n-predict %d", output->n_predict);
+    }
+
+    item = cJSON_GetObjectItem(root, "n-gpu-layers");
+    if (item != NULL) {
+        output->n_gpu_layers = (int32_t)cJSON_GetNumberValue(item);
+        NN_DBG_PRINTF("apply n_gpu_layers %d", output->n_gpu_layers);
+    }
+
+    item = cJSON_GetObjectItem(root, "ctx-size");
+    if (item != NULL) {
+        output->ctx_size = (uint32_t)cJSON_GetNumberValue(item);
+        NN_DBG_PRINTF("apply ctx-size %d", output->ctx_size);
+    }
+
+    // more ...
+
+    cJSON_Delete(root);
+}
+
+static struct llama_model_params
+llama_model_params_from_wasi_nn_llama_config(
+    struct wasi_nn_llama_config *config)
+{
+    struct llama_model_params result = llama_model_default_params();
+
+    // TODO: support more
+    result.main_gpu = config->main_gpu;
+    result.n_gpu_layers = config->n_gpu_layers;
+    result.use_mmap = config->use_mmap;
+
+    return result;
+}
+
+static struct llama_context_params
+llama_context_params_from_wasi_nn_llama_config(
+    struct wasi_nn_llama_config *config)
+{
+    struct llama_context_params result = llama_context_default_params();
+
+    // TODO: support more
+    result.n_ctx = config->ctx_size;
+    // result.embeddings = config->embedding;
+
+    return result;
+}
+
+static void
+llama_batch_clear(struct llama_batch *batch)
+{
+    batch->n_tokens = 0;
+}
+
+static void
+llama_batch_add(struct llama_batch *batch, llama_token id, llama_pos pos,
+                llama_seq_id *seq_ids, size_t seq_ids_len, bool logits)
+{
+    batch->token[batch->n_tokens] = id;
+    batch->pos[batch->n_tokens] = pos;
+    batch->n_seq_id[batch->n_tokens] = seq_ids_len;
+    for (size_t i = 0; i < seq_ids_len; ++i) {
+        batch->seq_id[batch->n_tokens][i] = seq_ids[i];
+    }
+    batch->logits[batch->n_tokens] = logits;
+
+    batch->n_tokens++;
+}
+
+// always output ERROR and WARN
+// INFO needs enable_log
+// DEBUG needs enable_debug_log
+static void
+llama_log_callback_local(enum ggml_log_level level, const char *text,
+                         void *user_data)
+{
+    struct LlamaContext *backend_ctx = (struct LlamaContext *)user_data;
+
+    if (level == GGML_LOG_LEVEL_DEBUG && !backend_ctx->config.enable_debug_log)
+        return;
+
+    if (level == GGML_LOG_LEVEL_INFO && !backend_ctx->config.enable_log)
+        return;
+
+    printf("%s", text);
+}
+
+static void
+llama_build_output_metadata(const struct LlamaContext *backend_ctx,
+                            char *output_buf, size_t output_buf_size)
+{
+    snprintf(output_buf, output_buf_size,
+             "{\"input_tokens\":%ld, \"output_tokens\":%ld, "
+             "\"llama_build_number\":%d,"
+             "\"llama_commit\":\"%s\"}",
+             backend_ctx->prompt_len, backend_ctx->generation_len,
+             LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+init_backend(void **ctx)
+{
+    struct LlamaContext *backend_ctx = calloc(1, sizeof(struct LlamaContext));
+    if (!backend_ctx) {
+        NN_ERR_PRINTF("Allocate for OpenVINOContext failed");
+        return runtime_error;
+    }
+
+    llama_backend_init();
+    // llama_numa_init();
+    llama_log_set(llama_log_callback_local, backend_ctx);
+
+#ifndef NDEBUG
+    NN_INFO_PRINTF("llama_build_number: % d, llama_commit: %s, llama_compiler: "
+                   "%s, llama_build_target: %s",
+                   LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER,
+                   LLAMA_BUILD_TARGET);
+#endif
+
+    *ctx = (void *)backend_ctx;
+    return success;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+deinit_backend(void *ctx)
+{
+    struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx;
+
+    if (!backend_ctx)
+        return invalid_argument;
+
+    if (backend_ctx->generation)
+        free(backend_ctx->generation);
+
+    if (backend_ctx->prompt)
+        free(backend_ctx->prompt);
+
+    if (backend_ctx->ctx)
+        llama_free(backend_ctx->ctx);
+
+    if (backend_ctx->model)
+        llama_free_model(backend_ctx->model);
+
+    llama_backend_free();
+
+    os_free(backend_ctx);
+    return success;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+load(void *ctx, graph_builder_array *builder, graph_encoding encoding,
+     execution_target target, graph *g)
+{
+    return unsupported_operation;
+}
+
+static wasi_nn_error
+__load_by_name_with_configuration(void *ctx, const char *filename, graph *g)
+{
+    struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx;
+
+    // make sure backend_ctx->config is initialized
+
+    struct llama_model_params model_params =
+        llama_model_params_from_wasi_nn_llama_config(&backend_ctx->config);
+    struct llama_model *model =
+        llama_load_model_from_file(filename, model_params);
+    if (model == NULL) {
+        NN_ERR_PRINTF("Failed to load model from file %s", filename);
+        return runtime_error;
+    }
+
+#ifndef NDEBUG
+    char buf[128] = { 0 };
+    llama_model_desc(model, buf, 127);
+    NN_INFO_PRINTF("Model desc %s", buf);
+#endif
+
+    backend_ctx->model = model;
+
+    return success;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+load_by_name(void *ctx, const char *filename, uint32_t filename_len, graph *g)
+{
+    struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx;
+
+    // use default params
+    wasm_edge_llama_default_configuration(&backend_ctx->config);
+    return __load_by_name_with_configuration(ctx, filename, g);
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+load_by_name_with_config(void *ctx, const char *filename, uint32_t filename_len,
+                         const char *config, uint32_t config_len, graph *g)
+{
+    struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx;
+
+    wasm_edge_llama_default_configuration(&backend_ctx->config);
+
+    if (config != NULL) {
+        // parse wasmedge config
+        wasm_edge_llama_apply_configuration(config, &backend_ctx->config);
+    }
+    else {
+        NN_INFO_PRINTF("No configuration provided, use default");
+    }
+
+    return __load_by_name_with_configuration(ctx, filename, g);
+}
+
+// It is assumed that model params shouldn't be changed in Config stage.
+// We only load the model once in the Load stage.
+__attribute__((visibility("default"))) wasi_nn_error
+init_execution_context(void *ctx, graph g, graph_execution_context *exec_ctx)
+{
+    struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx;
+
+    struct llama_context_params ctx_params =
+        llama_context_params_from_wasi_nn_llama_config(&backend_ctx->config);
+    struct llama_context *llama_ctx =
+        llama_new_context_with_model(backend_ctx->model, ctx_params);
+    if (llama_ctx == NULL) {
+        NN_ERR_PRINTF("Failed to create context for model");
+        return runtime_error;
+    }
+
+    backend_ctx->ctx = llama_ctx;
+
+    NN_INFO_PRINTF("n_predict = %d, n_ctx = %d", backend_ctx->config.n_predict,
+                   llama_n_ctx(backend_ctx->ctx));
+    return success;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+set_input(void *ctx, graph_execution_context exec_ctx, uint32_t index,
+          tensor *wasi_nn_tensor)
+{
+    struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx;
+    // tensor->data is the prompt string. ends with \0
+    char *prompt_text = (char *)wasi_nn_tensor->data;
+
+#ifndef NDEBUG
+    NN_DBG_PRINTF("--------------------------------------------------");
+    NN_DBG_PRINTF("prompt_text: %s", prompt_text);
+    NN_DBG_PRINTF("--------------------------------------------------");
+#endif
+
+    // tokenize the prompt
+    uint32_t n_token_max = llama_n_ctx(backend_ctx->ctx);
+    uint32_t prompt_text_len = strlen(prompt_text);
+
+    if (backend_ctx->prompt == NULL) {
+        backend_ctx->prompt = calloc(n_token_max, sizeof(llama_token));
+        if (backend_ctx->prompt == NULL) {
+            NN_ERR_PRINTF("Failed to allocate tokens_list");
+            return runtime_error;
+        }
+    }
+
+    int32_t n_tokens =
+        llama_tokenize(backend_ctx->model, prompt_text, prompt_text_len,
+                       backend_ctx->prompt, n_token_max, true, false);
+    if (n_tokens < 0) {
+        NN_ERR_PRINTF("Failed to tokenize prompt text");
+        return runtime_error;
+    }
+
+    backend_ctx->prompt_len = n_tokens;
+
+    // make sure the KV cache is big enough to hold all the prompt and generated
+    // tokens
+    int n_kv_req = n_tokens + (backend_ctx->config.n_predict - n_tokens);
+    if (n_kv_req < 0 || (uint32_t)n_kv_req > n_token_max) {
+        NN_ERR_PRINTF("the required KV cache size is not big enough, either "
+                      "reduce n_predict or increase n_ctx");
+        return runtime_error;
+    }
+
+    return success;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+compute(void *ctx, graph_execution_context exec_ctx)
+{
+    struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx;
+    wasi_nn_error ret = runtime_error;
+
+    // reset the generation buffer
+    if (backend_ctx->generation == NULL) {
+        backend_ctx->generation =
+            calloc(backend_ctx->config.n_predict, sizeof(llama_token));
+        if (backend_ctx->generation == NULL) {
+            NN_ERR_PRINTF("Failed to allocate generation");
+            return runtime_error;
+        }
+    }
+
+    backend_ctx->generation_len = 0;
+
+    // check KV cache
+    uint32_t n_ctx = llama_n_ctx(backend_ctx->ctx);
+    if (n_ctx <= backend_ctx->generation_len) {
+        NN_ERR_PRINTF(
+            "ctx_size(%u) is not big enough(<%ld), please increase it", n_ctx,
+            backend_ctx->generation_len);
+        return context_full;
+    }
+
+    // prepare the batch
+    struct llama_batch batch =
+        llama_batch_init(backend_ctx->config.batch_size, 0, 1);
+
+    // evaluate the initial prompt
+    llama_seq_id seq_ids[1] = { 0 };
+    for (size_t i = 0; i < backend_ctx->prompt_len; i++) {
+        llama_batch_add(&batch, backend_ctx->prompt[i], i, seq_ids,
+                        sizeof(seq_ids) / sizeof(seq_ids[0]), false);
+    }
+
+    batch.logits[batch.n_tokens - 1] = true;
+
+    if (batch.n_tokens > backend_ctx->config.n_predict) {
+        NN_DBG_PRINTF("n_predict(%d) is not big enough(%d), please increase it",
+                      backend_ctx->config.n_predict, batch.n_tokens);
+        return prompt_tool_long;
+    }
+
+    if (llama_decode(backend_ctx->ctx, batch) != 0) {
+        NN_ERR_PRINTF("First decode failed");
+        return runtime_error;
+    }
+
+    // main loop
+    int32_t n_cur = batch.n_tokens;
+    int n_decode = 0;
+    int32_t n_vocab = llama_n_vocab(backend_ctx->model);
+    llama_token_data *candidates = NULL;
+
+    candidates = calloc(n_vocab, sizeof(llama_token_data));
+    if (candidates == NULL) {
+        NN_ERR_PRINTF("Failed to allocate candidates");
+        goto fail;
+    }
+
+    while (n_cur <= backend_ctx->config.n_predict) {
+        // sample the next token
+        float *logits =
+            llama_get_logits_ith(backend_ctx->ctx, batch.n_tokens - 1);
+
+        memset(candidates, 0, sizeof(llama_token_data) * n_vocab);
+        for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
+            candidates[token_id].id = token_id;
+            candidates[token_id].logit = logits[token_id];
+            candidates[token_id].p = 0.0f;
+        }
+
+        llama_token_data_array candidates_p = { candidates, n_vocab, false };
+
+        // sample the most likely token
+        llama_token new_token_id =
+            llama_sample_token_greedy(backend_ctx->ctx, &candidates_p);
+
+        backend_ctx->generation[backend_ctx->generation_len++] = new_token_id;
+
+#ifndef NDEBUG
+        {
+            char buf[128] = { 0 };
+            llama_token_to_piece(backend_ctx->model, new_token_id, buf, 120, 0,
+                                 true);
+            printf("%d(%s),", new_token_id, buf);
+        }
+#endif
+
+        // is it an end of generation?
+        if (llama_token_is_eog(backend_ctx->model, new_token_id)) {
+            printf("\n");
+            NN_INFO_PRINTF("reach the end of generation");
+            break;
+        }
+
+        // prepare the next batch
+        llama_batch_clear(&batch);
+        // push this new token for next evaluation
+        llama_batch_add(&batch, new_token_id, n_cur, seq_ids,
+                        sizeof(seq_ids) / sizeof(seq_ids[0]), true);
+        n_decode++;
+        n_cur++;
+
+        if (llama_decode(backend_ctx->ctx, batch) != 0) {
+            NN_ERR_PRINTF("Secondary decode failed");
+            goto fail;
+        }
+    }
+
+    printf("\n");
+    ret = success;
+fail:
+    llama_batch_free(batch);
+    if (candidates != NULL) {
+        free(candidates);
+    }
+    return ret;
+}
+
+__attribute__((visibility("default"))) wasi_nn_error
+get_output(void *ctx, graph_execution_context exec_ctx, uint32_t index,
+           tensor_data output_tensor, uint32_t *output_tensor_size)
+{
+    struct LlamaContext *backend_ctx = (struct LlamaContext *)ctx;
+
+    // Compatibility with WasmEdge
+    if (index > 1) {
+        NN_ERR_PRINTF("Invalid output index %d", index);
+        return invalid_argument;
+    }
+
+    // Index 1 is for the metadata of the outputs.
+    if (index == 1) {
+        char output_metadata[128] = { 0 };
+        llama_build_output_metadata(backend_ctx, output_metadata, 127);
+
+        if (backend_ctx->config.stream_stdout) {
+            printf("%s\n", output_metadata);
+        }
+
+        memcpy(output_tensor, output_metadata, strlen(output_metadata));
+        *output_tensor_size = strlen(output_metadata);
+        return success;
+    }
+
+    // token -> piece -> output_tensor
+    if (backend_ctx->config.stream_stdout) {
+        printf("\n");
+    }
+
+    size_t end_pos = 0;
+    for (size_t i = 0; i < backend_ctx->generation_len; i++) {
+        char buf[128] = { 0 };
+        llama_token_to_piece(backend_ctx->model, backend_ctx->generation[i],
+                             buf, 120, 0, true);
+
+        if (backend_ctx->config.stream_stdout) {
+            printf("%s", buf);
+        }
+
+        memcpy(output_tensor + end_pos, buf, strlen(buf));
+        end_pos += strlen(buf);
+    }
+
+    if (backend_ctx->config.stream_stdout) {
+        printf("\n");
+    }
+
+    *output_tensor_size = end_pos;
+    return success;
+}

+ 29 - 14
core/iwasm/libraries/wasi-nn/test/Dockerfile.wasi-nn-smoke

@@ -63,21 +63,35 @@ WORKDIR /workspaces/wasmedge-wasinn-examples
 RUN git clone --depth 1 https://github.com/second-state/WasmEdge-WASINN-examples.git .
 RUN git clone --depth 1 https://github.com/second-state/WasmEdge-WASINN-examples.git .
 COPY core/iwasm/libraries/wasi-nn/test/bump_wasi_nn_to_0_6_0.patch .
 COPY core/iwasm/libraries/wasi-nn/test/bump_wasi_nn_to_0_6_0.patch .
 RUN git apply ./bump_wasi_nn_to_0_6_0.patch
 RUN git apply ./bump_wasi_nn_to_0_6_0.patch
-# recompile with wasi-nn 0.6.0
-RUN cd openvino-mobilenet-image/rust && cargo build --target=wasm32-wasi
-RUN cd openvino-mobilenet-raw/rust && cargo build --target=wasm32-wasi
-RUN cd openvino-road-segmentation-adas/openvino-road-seg-adas && cargo build --target=wasm32-wasi
-RUN cd tflite-birds_v1-image/rust && cargo build --target=wasm32-wasi
 
 
-# preparation
-RUN cd openvino-mobilenet-image \
+# recompile with wasi-nn 0.6.0
+WORKDIR /workspaces/wasmedge-wasinn-examples/openvino-mobilenet-image/
+RUN pushd rust \
+  && cargo build --target=wasm32-wasi \
+  && popd \
   && ./download_mobilenet.sh . \
   && ./download_mobilenet.sh . \
   && ls -l mobilenet.xml mobilenet.bin
   && ls -l mobilenet.xml mobilenet.bin
 
 
-RUN cd openvino-mobilenet-raw \
+WORKDIR /workspaces/wasmedge-wasinn-examples/openvino-mobilenet-raw/
+RUN pushd rust \
+  && cargo build --target=wasm32-wasi \
+  && popd \
   && ./download_mobilenet.sh . \
   && ./download_mobilenet.sh . \
   && ls -l mobilenet.xml mobilenet.bin tensor-1x224x224x3-f32.bgr
   && ls -l mobilenet.xml mobilenet.bin tensor-1x224x224x3-f32.bgr
 
 
+WORKDIR /workspaces/wasmedge-wasinn-examples/openvino-road-segmentation-adas/
+RUN pushd openvino-road-seg-adas \
+  && cargo build --target=wasm32-wasi
+
+WORKDIR /workspaces/wasmedge-wasinn-examples/tflite-birds_v1-image/
+RUN pushd rust \
+  && cargo build --target=wasm32-wasi
+
+# mount models when running
+WORKDIR /workspaces/wasmedge-wasinn-examples/wasmedge-ggml/qwen
+RUN wget --progress=dot:giga https://www.modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat-GGUF/resolve/master/qwen1_5-0_5b-chat-q2_k.gguf
+RUN cargo build --target=wasm32-wasi
+
 #
 #
 # iwasm. build from source
 # iwasm. build from source
 WORKDIR /workspaces/wamr
 WORKDIR /workspaces/wamr
@@ -88,15 +102,16 @@ WORKDIR /workspaces/wamr/product-mini/platforms/linux
 RUN OpenVINO_DIR=/usr/lib/openvino-2023.2.0 \
 RUN OpenVINO_DIR=/usr/lib/openvino-2023.2.0 \
     cmake -S . -B build \
     cmake -S . -B build \
     -DWAMR_BUILD_WASI_NN=1 -DWAMR_BUILD_WASI_EPHEMERAL_NN=1 \
     -DWAMR_BUILD_WASI_NN=1 -DWAMR_BUILD_WASI_EPHEMERAL_NN=1 \
-    -DWAMR_BUILD_WASI_NN_OPENVINO=1 -DWAMR_BUILD_WASI_NN_TFLITE=1 \
-  && cmake --build build
-
-ENV PATH=/workspaces/wamr/product-mini/platforms/linux/build:${PATH}
-ENV LD_LIBRARY_PATH=/workspaces/wamr/product-mini/platforms/linux/build
+    -DWAMR_BUILD_WASI_NN_OPENVINO=1 \
+    -DWAMR_BUILD_WASI_NN_TFLITE=1 \
+    -DWAMR_BUILD_WASI_NN_LLAMACPP=1 \
+  && cmake --build build \
+  && cmake --install build
+ 
+ENV LD_LIBRARY_PATH=/usr/local/lib
 
 
 # add smoke test script
 # add smoke test script
 COPY core/iwasm/libraries/wasi-nn/test/run_smoke_test.py /
 COPY core/iwasm/libraries/wasi-nn/test/run_smoke_test.py /
 
 
-#
 WORKDIR /workspaces/wasmedge-wasinn-examples
 WORKDIR /workspaces/wasmedge-wasinn-examples
 CMD ["python3", "/run_smoke_test.py"]
 CMD ["python3", "/run_smoke_test.py"]

+ 60 - 0
core/iwasm/libraries/wasi-nn/test/run_smoke_test.py

@@ -260,6 +260,63 @@ def execute_openvino_road_segmentation_adas(
     print("------------------------------------------------------------")
     print("------------------------------------------------------------")
 
 
 
 
+def execute_wasmedge_ggml_qwen(iwasm_bin: str, wasmedge_bin: str, cwd: Path):
+    iwasm_args = ["--dir=."]
+    wasm_file = ["./target/wasm32-wasi/debug/wasmedge-ggml-qwen.wasm"]
+    wasm_args = ["./qwen1_5-0_5b-chat-q2_k.gguf"]
+
+    cmd = [iwasm_bin]
+    cmd.extend(iwasm_args)
+    cmd.extend(wasm_file)
+    cmd.extend(wasm_args)
+
+    # print(f'Execute: {" ".join(cmd)}')
+
+    prompt = "what is the capital of Pakistan"
+
+    with subprocess.Popen(
+        cmd,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        cwd=cwd,
+    ) as p:
+        # USER
+        p.stdout.readline()
+
+        p.stdin.write(b"hi\n")
+        p.stdin.flush()
+        # ASSITANT
+        p.stdout.readline()
+        # xxx
+        p.stdout.readline()
+        # USER
+        p.stdout.readline()
+
+        p.stdin.write(prompt.encode())
+        p.stdin.write(b"\n")
+        p.stdin.flush()
+        # ASSITANT
+        p.stdout.readline()
+        # xxx
+        answer = p.stdout.readline().decode("utf-8")
+        # USER
+        p.stdout.readline()
+
+        p.terminate()
+
+    if "Karachi" in answer:
+        print(f"- wasmedge_ggml_qwen. PASS")
+        return
+
+    print(f"- wasmedge_ggml_qwen. FAILED")
+    print("------------------------------------------------------------")
+    pprint(answer)
+    print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
+    pprint("Karachi")
+    print("------------------------------------------------------------")
+
+
 def execute_wasmedge_wasinn_examples(iwasm_bin: str, wasmedge_bin: str):
 def execute_wasmedge_wasinn_examples(iwasm_bin: str, wasmedge_bin: str):
     assert Path.cwd().name == "wasmedge-wasinn-examples"
     assert Path.cwd().name == "wasmedge-wasinn-examples"
     assert shutil.which(iwasm_bin)
     assert shutil.which(iwasm_bin)
@@ -282,6 +339,9 @@ def execute_wasmedge_wasinn_examples(iwasm_bin: str, wasmedge_bin: str):
         iwasm_bin, wasmedge_bin, openvino_road_segmentation_adas_dir
         iwasm_bin, wasmedge_bin, openvino_road_segmentation_adas_dir
     )
     )
 
 
+    wasmedge_ggml_qwem_dir = Path.cwd().joinpath("./wasmedge-ggml/qwen")
+    execute_wasmedge_ggml_qwen(iwasm_bin, wasmedge_bin, wasmedge_ggml_qwem_dir)
+
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     execute_wasmedge_wasinn_examples("iwasm", "wasmedge")
     execute_wasmedge_wasinn_examples("iwasm", "wasmedge")

+ 12 - 5
core/shared/platform/common/posix/posix_memmap.c

@@ -138,18 +138,25 @@ os_mmap(void *hint, size_t size, int prot, int flags, os_file_handle file)
 
 
     /* memory hasn't been mapped or was mapped failed previously */
     /* memory hasn't been mapped or was mapped failed previously */
     if (addr == MAP_FAILED) {
     if (addr == MAP_FAILED) {
-        /* try 5 times */
-        for (i = 0; i < 5; i++) {
+        /* try 5 times on EAGAIN or ENOMEM, and keep retrying on EINTR */
+        i = 0;
+        while (i < 5) {
             addr = mmap(hint, request_size, map_prot, map_flags, file, 0);
             addr = mmap(hint, request_size, map_prot, map_flags, file, 0);
             if (addr != MAP_FAILED)
             if (addr != MAP_FAILED)
                 break;
                 break;
+            if (errno == EINTR)
+                continue;
+            if (errno != EAGAIN && errno != ENOMEM) {
+                break;
+            }
+            i++;
         }
         }
     }
     }
 
 
     if (addr == MAP_FAILED) {
     if (addr == MAP_FAILED) {
-#if BH_ENABLE_TRACE_MMAP != 0
-        os_printf("mmap failed\n");
-#endif
+        os_printf("mmap failed with errno: %d, hint: %p, size: %" PRIu64
+                  ", prot: %d, flags: %d",
+                  errno, hint, request_size, map_prot, map_flags);
         return NULL;
         return NULL;
     }
     }
 
 

+ 5 - 5
core/shared/platform/include/platform_api_extension.h

@@ -379,19 +379,19 @@ os_sem_unlink(const char *name);
  * Initialize process-global state for os_wakeup_blocking_op.
  * Initialize process-global state for os_wakeup_blocking_op.
  */
  */
 int
 int
-os_blocking_op_init();
+os_blocking_op_init(void);
 
 
 /**
 /**
  * Start accepting os_wakeup_blocking_op requests for the calling thread.
  * Start accepting os_wakeup_blocking_op requests for the calling thread.
  */
  */
 void
 void
-os_begin_blocking_op();
+os_begin_blocking_op(void);
 
 
 /**
 /**
  * Stop accepting os_wakeup_blocking_op requests for the calling thread.
  * Stop accepting os_wakeup_blocking_op requests for the calling thread.
  */
  */
 void
 void
-os_end_blocking_op();
+os_end_blocking_op(void);
 
 
 /**
 /**
  * Wake up the specified thread.
  * Wake up the specified thread.
@@ -1586,7 +1586,7 @@ os_closedir(os_dir_stream dir_stream);
  * @return the invalid directory stream
  * @return the invalid directory stream
  */
  */
 os_dir_stream
 os_dir_stream
-os_get_invalid_dir_stream();
+os_get_invalid_dir_stream(void);
 
 
 /**
 /**
  * Checks whether the given directory stream is valid. An invalid directory
  * Checks whether the given directory stream is valid. An invalid directory
@@ -1605,7 +1605,7 @@ os_is_dir_stream_valid(os_dir_stream *dir_stream);
  * @return the invalid handle
  * @return the invalid handle
  */
  */
 os_file_handle
 os_file_handle
-os_get_invalid_handle();
+os_get_invalid_handle(void);
 
 
 /**
 /**
  * Checks whether the given file handle is valid. An invalid handle is
  * Checks whether the given file handle is valid. An invalid handle is

+ 1 - 1
core/shared/platform/nuttx/platform_internal.h

@@ -137,7 +137,7 @@ typedef DIR *os_dir_stream;
 typedef int os_raw_file_handle;
 typedef int os_raw_file_handle;
 
 
 static inline os_file_handle
 static inline os_file_handle
-os_get_invalid_handle()
+os_get_invalid_handle(void)
 {
 {
     return -1;
     return -1;
 }
 }

+ 1 - 1
product-mini/platforms/common/libc_wasi.c

@@ -28,7 +28,7 @@ typedef enum {
 } libc_wasi_parse_result_t;
 } libc_wasi_parse_result_t;
 
 
 static void
 static void
-libc_wasi_print_help()
+libc_wasi_print_help(void)
 {
 {
     printf("  --env=<env>              Pass wasi environment variables with "
     printf("  --env=<env>              Pass wasi environment variables with "
            "\"key=value\"\n");
            "\"key=value\"\n");

+ 1 - 1
product-mini/platforms/nuttx/CMakeLists.txt

@@ -197,7 +197,7 @@ include(${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake)
 # NuttX wamr lib complie required: `WAMR_SOURCES` `WAMR_CFLAGS` `WAMR_INCDIRS`
 # NuttX wamr lib complie required: `WAMR_SOURCES` `WAMR_CFLAGS` `WAMR_INCDIRS`
 # `WAMR_DEFINITIONS`
 # `WAMR_DEFINITIONS`
 set(WAMR_SOURCES ${WAMR_RUNTIME_LIB_SOURCE})
 set(WAMR_SOURCES ${WAMR_RUNTIME_LIB_SOURCE})
-set(WAMR_CFLAGS -Wno-strict-prototypes -Wno-shadow -Wno-unused-variable
+set(WAMR_CFLAGS -Wno-shadow -Wno-unused-variable
                 -Wno-int-conversion -Wno-implicit-function-declaration)
                 -Wno-int-conversion -Wno-implicit-function-declaration)
 get_directory_property(WAMR_INCDIRS INCLUDE_DIRECTORIES)
 get_directory_property(WAMR_INCDIRS INCLUDE_DIRECTORIES)
 get_directory_property(WAMR_DEFINITIONS COMPILE_DEFINITIONS)
 get_directory_property(WAMR_DEFINITIONS COMPILE_DEFINITIONS)

+ 1 - 1
product-mini/platforms/nuttx/wamr.mk

@@ -400,7 +400,7 @@ CFLAGS += -DWASM_ENABLE_EXCE_HANDLING=0
 CFLAGS += -DWASM_ENABLE_TAGS=0
 CFLAGS += -DWASM_ENABLE_TAGS=0
 endif
 endif
 
 
-CFLAGS += -Wno-strict-prototypes -Wno-shadow -Wno-unused-variable
+CFLAGS += -Wno-shadow -Wno-unused-variable
 CFLAGS += -Wno-int-conversion -Wno-implicit-function-declaration
 CFLAGS += -Wno-int-conversion -Wno-implicit-function-declaration
 
 
 CFLAGS += -I${CORE_ROOT} \
 CFLAGS += -I${CORE_ROOT} \

+ 1 - 1
product-mini/platforms/posix/main.c

@@ -27,7 +27,7 @@ static char **app_argv;
 
 
 /* clang-format off */
 /* clang-format off */
 static int
 static int
-print_help()
+print_help(void)
 {
 {
     printf("Usage: iwasm [-options] wasm_file [args...]\n");
     printf("Usage: iwasm [-options] wasm_file [args...]\n");
     printf("options:\n");
     printf("options:\n");

+ 1 - 1
tests/wamr-test-suites/test_wamr.sh

@@ -511,7 +511,7 @@ function spec_test()
         pushd spec
         pushd spec
 
 
         # Reset to commit: "Merge pull request #48 from backes/specify-memcpy-immediate-order"
         # Reset to commit: "Merge pull request #48 from backes/specify-memcpy-immediate-order"
-        git reset --hard 48e69f394869c55b7bbe14ac963c09f4605490b6
+        git reset --hard fbc99efd7a788db300aec3dd62a14577ec404f1b
         git checkout 044d0d2e77bdcbe891f7e0b9dd2ac01d56435f0b -- test/core/elem.wast
         git checkout 044d0d2e77bdcbe891f7e0b9dd2ac01d56435f0b -- test/core/elem.wast
         git apply ../../spec-test-script/multi_memory_ignore_cases.patch || exit 1
         git apply ../../spec-test-script/multi_memory_ignore_cases.patch || exit 1
         if [[ ${RUNNING_MODE} == "aot" ]]; then
         if [[ ${RUNNING_MODE} == "aot" ]]; then

+ 89 - 8
wamr-compiler/main.c

@@ -142,9 +142,7 @@ print_help()
     printf("                              with a runtime without the hardware bounds checks.\n");
     printf("                              with a runtime without the hardware bounds checks.\n");
     printf("  --stack-bounds-checks=1/0 Enable or disable the bounds checks for native stack:\n");
     printf("  --stack-bounds-checks=1/0 Enable or disable the bounds checks for native stack:\n");
     printf("                              if the option isn't set, the status is same as `--bounds-check`,\n");
     printf("                              if the option isn't set, the status is same as `--bounds-check`,\n");
-    printf("                              if the option is set:\n");
-    printf("                                (1) it is always enabled when `--bounds-checks` is enabled,\n");
-    printf("                                (2) else it is enabled/disabled according to the option value\n");
+    printf("                              if the option is set, the status is same as the option value\n");
     printf("  --stack-usage=<file>      Generate a stack-usage file.\n");
     printf("  --stack-usage=<file>      Generate a stack-usage file.\n");
     printf("                              Similarly to `clang -fstack-usage`.\n");
     printf("                              Similarly to `clang -fstack-usage`.\n");
     printf("  --format=<format>         Specifies the format of the output file\n");
     printf("  --format=<format>         Specifies the format of the output file\n");
@@ -164,6 +162,12 @@ print_help()
     printf("                              GC is enabled\n");
     printf("                              GC is enabled\n");
     printf("  --disable-aux-stack-check Disable auxiliary stack overflow/underflow check\n");
     printf("  --disable-aux-stack-check Disable auxiliary stack overflow/underflow check\n");
     printf("  --enable-dump-call-stack  Enable stack trace feature\n");
     printf("  --enable-dump-call-stack  Enable stack trace feature\n");
+    printf("  --call-stack-features=<features>\n");
+    printf("                            A comma-separated list of features when generating call stacks.\n");
+    printf("                            By default, all features are enabled. To disable all features,\n");
+    printf("                            provide an empty list (i.e. --call-stack-features=). This flag\n");
+    printf("                            only only takes effect when --enable-dump-call-stack is set.\n");
+    printf("                            Available features: bounds-checks, ip, trap-ip, values.\n");
     printf("  --enable-perf-profiling   Enable function performance profiling\n");
     printf("  --enable-perf-profiling   Enable function performance profiling\n");
     printf("  --enable-memory-profiling Enable memory usage profiling\n");
     printf("  --enable-memory-profiling Enable memory usage profiling\n");
     printf("  --xip                     A shorthand of --enable-indirect-mode --disable-llvm-intrinsics\n");
     printf("  --xip                     A shorthand of --enable-indirect-mode --disable-llvm-intrinsics\n");
@@ -261,6 +265,55 @@ split_string(char *str, int *count, const char *delimer)
     return res;
     return res;
 }
 }
 
 
+static bool
+parse_call_stack_features(char *features_str,
+                          AOTCallStackFeatures *out_features)
+{
+    int size = 0;
+    char **features;
+    bool ret = true;
+
+    bh_assert(features_str);
+    bh_assert(out_features);
+
+    /* non-empty feature list */
+    features = split_string(features_str, &size, ",");
+    if (!features) {
+        return false;
+    }
+
+    while (size--) {
+        if (!strcmp(features[size], "bounds-checks")) {
+            out_features->bounds_checks = true;
+        }
+        else if (!strcmp(features[size], "ip")) {
+            out_features->ip = true;
+        }
+        else if (!strcmp(features[size], "trap-ip")) {
+            out_features->trap_ip = true;
+        }
+        else if (!strcmp(features[size], "values")) {
+            out_features->values = true;
+        }
+        else {
+            ret = false;
+            printf("Unsupported feature %s\n", features[size]);
+            goto finish;
+        }
+    }
+
+finish:
+    free(features);
+    return ret;
+}
+
+static bool
+can_enable_tiny_frame(const AOTCompOption *opt)
+{
+    return !opt->call_stack_features.values && !opt->enable_gc
+           && !opt->enable_perf_profiling;
+}
+
 static uint32
 static uint32
 resolve_segue_flags(char *str_flags)
 resolve_segue_flags(char *str_flags)
 {
 {
@@ -357,6 +410,7 @@ main(int argc, char *argv[])
     option.enable_bulk_memory = true;
     option.enable_bulk_memory = true;
     option.enable_ref_types = true;
     option.enable_ref_types = true;
     option.enable_gc = false;
     option.enable_gc = false;
+    aot_call_stack_features_init_default(&option.call_stack_features);
 
 
     /* Process options */
     /* Process options */
     for (argc--, argv++; argc > 0 && argv[0][0] == '-'; argc--, argv++) {
     for (argc--, argv++; argc > 0 && argv[0][0] == '-'; argc--, argv++) {
@@ -470,10 +524,23 @@ main(int argc, char *argv[])
             option.enable_aux_stack_check = false;
             option.enable_aux_stack_check = false;
         }
         }
         else if (!strcmp(argv[0], "--enable-dump-call-stack")) {
         else if (!strcmp(argv[0], "--enable-dump-call-stack")) {
-            option.enable_aux_stack_frame = true;
+            option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
+        }
+        else if (!strncmp(argv[0], "--call-stack-features=", 22)) {
+            /* Reset all the features, only enable the user-defined ones */
+            memset(&option.call_stack_features, 0,
+                   sizeof(AOTCallStackFeatures));
+
+            if (argv[0][22] != '\0') {
+                if (!parse_call_stack_features(argv[0] + 22,
+                                               &option.call_stack_features)) {
+                    printf("Failed to parse call-stack-features\n");
+                    PRINT_HELP_AND_EXIT();
+                }
+            }
         }
         }
         else if (!strcmp(argv[0], "--enable-perf-profiling")) {
         else if (!strcmp(argv[0], "--enable-perf-profiling")) {
-            option.enable_aux_stack_frame = true;
+            option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
             option.enable_perf_profiling = true;
             option.enable_perf_profiling = true;
         }
         }
         else if (!strcmp(argv[0], "--enable-memory-profiling")) {
         else if (!strcmp(argv[0], "--enable-memory-profiling")) {
@@ -488,7 +555,7 @@ main(int argc, char *argv[])
             option.is_indirect_mode = true;
             option.is_indirect_mode = true;
         }
         }
         else if (!strcmp(argv[0], "--enable-gc")) {
         else if (!strcmp(argv[0], "--enable-gc")) {
-            option.enable_aux_stack_frame = true;
+            option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_STANDARD;
             option.enable_gc = true;
             option.enable_gc = true;
         }
         }
         else if (!strcmp(argv[0], "--disable-llvm-intrinsics")) {
         else if (!strcmp(argv[0], "--disable-llvm-intrinsics")) {
@@ -590,6 +657,14 @@ main(int argc, char *argv[])
     if (!use_dummy_wasm && (argc == 0 || !out_file_name))
     if (!use_dummy_wasm && (argc == 0 || !out_file_name))
         PRINT_HELP_AND_EXIT();
         PRINT_HELP_AND_EXIT();
 
 
+    if (option.aux_stack_frame_type == AOT_STACK_FRAME_TYPE_STANDARD
+        && can_enable_tiny_frame(&option)) {
+        LOG_VERBOSE("Use tiny frame mode for stack frames");
+        option.aux_stack_frame_type = AOT_STACK_FRAME_TYPE_TINY;
+        /* for now we only enable frame per function for a TINY frame mode */
+        option.call_stack_features.frame_per_function = true;
+    }
+
     if (!size_level_set) {
     if (!size_level_set) {
         /**
         /**
          * Set opt level to 1 by default for Windows and MacOS as
          * Set opt level to 1 by default for Windows and MacOS as
@@ -601,8 +676,8 @@ main(int argc, char *argv[])
             LOG_VERBOSE("Set size level to 1 for Windows AOT file");
             LOG_VERBOSE("Set size level to 1 for Windows AOT file");
             option.size_level = 1;
             option.size_level = 1;
         }
         }
-#if defined(_WIN32) || defined(_WIN32_) || defined(__APPLE__) \
-    || defined(__MACH__)
+#if defined(_WIN32) || defined(_WIN32_) \
+    || ((defined(__APPLE__) || defined(__MACH__)) && !defined(__arm64__))
         if (!option.target_arch && !option.target_abi) {
         if (!option.target_arch && !option.target_abi) {
             LOG_VERBOSE("Set size level to 1 for Windows or MacOS AOT file");
             LOG_VERBOSE("Set size level to 1 for Windows or MacOS AOT file");
             option.size_level = 1;
             option.size_level = 1;
@@ -610,6 +685,12 @@ main(int argc, char *argv[])
 #endif
 #endif
     }
     }
 
 
+    if (option.enable_gc && !option.call_stack_features.values) {
+        LOG_WARNING("Call stack feature 'values' must be enabled for GC. The "
+                    "feature will be enabled automatically.");
+        option.call_stack_features.values = true;
+    }
+
     if (sgx_mode) {
     if (sgx_mode) {
         option.size_level = 1;
         option.size_level = 1;
         option.is_sgx_platform = true;
         option.is_sgx_platform = true;