Browse Source

Merge branch dev/aot_stack_frame into dev/gc_refactor

Wenyong Huang 2 years ago
parent
commit
ae508e25fa
36 changed files with 1565 additions and 270 deletions
  1. 20 0
      build-scripts/config_common.cmake
  2. 10 0
      core/config.h
  3. 35 0
      core/iwasm/aot/aot_loader.c
  4. 1 1
      core/iwasm/aot/aot_reloc.h
  5. 145 25
      core/iwasm/aot/aot_runtime.c
  6. 33 4
      core/iwasm/aot/aot_runtime.h
  7. 2 5
      core/iwasm/common/wasm_exec_env.h
  8. 4 0
      core/iwasm/common/wasm_runtime_common.h
  9. 1 0
      core/iwasm/compilation/aot.c
  10. 1 0
      core/iwasm/compilation/aot.h
  11. 412 7
      core/iwasm/compilation/aot_compiler.c
  12. 263 19
      core/iwasm/compilation/aot_compiler.h
  13. 17 1
      core/iwasm/compilation/aot_emit_aot_file.c
  14. 166 61
      core/iwasm/compilation/aot_emit_control.c
  15. 2 1
      core/iwasm/compilation/aot_emit_control.h
  16. 5 0
      core/iwasm/compilation/aot_emit_exception.c
  17. 45 19
      core/iwasm/compilation/aot_emit_function.c
  18. 4 3
      core/iwasm/compilation/aot_emit_function.h
  19. 1 1
      core/iwasm/compilation/aot_emit_memory.c
  20. 2 2
      core/iwasm/compilation/aot_emit_parametric.c
  21. 1 1
      core/iwasm/compilation/aot_emit_table.c
  22. 37 1
      core/iwasm/compilation/aot_emit_variable.c
  23. 109 14
      core/iwasm/compilation/aot_llvm.c
  24. 64 6
      core/iwasm/compilation/aot_llvm.h
  25. 0 74
      core/iwasm/fast-jit/jit_frontend.c
  26. 3 0
      core/iwasm/interpreter/wasm_interp.h
  27. 24 5
      core/iwasm/interpreter/wasm_interp_classic.c
  28. 6 0
      core/iwasm/interpreter/wasm_interp_fast.c
  29. 2 1
      core/iwasm/interpreter/wasm_loader.c
  30. 2 1
      core/iwasm/interpreter/wasm_mini_loader.c
  31. 107 10
      core/iwasm/interpreter/wasm_runtime.c
  32. 2 1
      core/iwasm/interpreter/wasm_runtime.h
  33. 30 6
      doc/build_wamr.md
  34. 0 1
      product-mini/platforms/linux/CMakeLists.txt
  35. 8 0
      product-mini/platforms/nuttx/wamr.mk
  36. 1 0
      wamr-compiler/CMakeLists.txt

+ 20 - 0
build-scripts/config_common.cmake

@@ -288,6 +288,14 @@ if (WAMR_BUILD_SIMD EQUAL 1)
     message ("     SIMD disabled due to not supported on target RISCV64")
   endif ()
 endif ()
+if (WAMR_BUILD_AOT_STACK_FRAME EQUAL 1)
+  add_definitions (-DWASM_ENABLE_AOT_STACK_FRAME=1)
+  message ("     AOT stack frame enabled")
+endif ()
+if (WAMR_BUILD_JIT_STACK_FRAME EQUAL 1)
+  add_definitions (-DWASM_ENABLE_JIT_STACK_FRAME=1)
+  message ("     JIT stack frame enabled")
+endif ()
 if (WAMR_BUILD_MEMORY_PROFILING EQUAL 1)
   add_definitions (-DWASM_ENABLE_MEMORY_PROFILING=1)
   message ("     Memory profiling enabled")
@@ -327,6 +335,18 @@ if (WAMR_BUILD_GC_BINARYEN EQUAL 1)
   add_definitions (-DWASM_ENABLE_GC_BINARYEN=1)
   message ("     GC binaryen compatible mode on")
 endif ()
+if (WAMR_BUILD_PERF_PROFILING EQUAL 1 OR
+    WAMR_BUILD_DUMP_CALL_STACK EQUAL 1 OR
+    WAMR_BUILD_GC EQUAL 1 OR WAMR_BUILD_GC_BINARYEN EQUAL 1)
+  # Enable AOT/JIT stack frame when perf-profiling, dump-call-stack,
+  # gc or gc-binaryen is enabled
+  if (WAMR_BUILD_AOT EQUAL 1)
+    add_definitions (-DWASM_ENABLE_AOT_STACK_FRAME=1)
+  endif ()
+  if (WAMR_BUILD_JIT EQUAL 1)
+    add_definitions (-DWASM_ENABLE_JIT_STACK_FRAME=1)
+  endif ()
+endif ()
 if (DEFINED WAMR_BH_VPRINTF)
   add_definitions (-DBH_VPRINTF=${WAMR_BH_VPRINTF})
 endif ()

+ 10 - 0
core/config.h

@@ -310,6 +310,16 @@
 #define WASM_ENABLE_DUMP_CALL_STACK 0
 #endif
 
+/* AOT stack frame */
+#ifndef WASM_ENABLE_AOT_STACK_FRAME
+#define WASM_ENABLE_AOT_STACK_FRAME 0
+#endif
+
+/* JIT stack frame */
+#ifndef WASM_ENABLE_JIT_STACK_FRAME
+#define WASM_ENABLE_JIT_STACK_FRAME 0
+#endif
+
 /* Heap verification */
 #ifndef BH_ENABLE_GC_VERIFY
 #define BH_ENABLE_GC_VERIFY 0

+ 35 - 0
core/iwasm/aot/aot_loader.c

@@ -2092,6 +2092,34 @@ load_function_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
         }
     }
 
+    size = sizeof(uint32) * (uint64)module->func_count;
+
+    if (size > 0) {
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
+        if (!(module->max_local_cell_nums =
+                  loader_malloc(size, error_buf, error_buf_size))) {
+            return false;
+        }
+
+        for (i = 0; i < module->func_count; i++) {
+            read_uint32(p, p_end, module->max_local_cell_nums[i]);
+        }
+
+        if (!(module->max_stack_cell_nums =
+                  loader_malloc(size, error_buf, error_buf_size))) {
+            return false;
+        }
+
+        for (i = 0; i < module->func_count; i++) {
+            read_uint32(p, p_end, module->max_stack_cell_nums[i]);
+        }
+#else
+        /* Ignore max_local_cell_num and max_stack_cell_num of each function */
+        CHECK_BUF(p, p_end, ((uint32)size * 2));
+        p += (uint32)size * 2;
+#endif
+    }
+
     if (p != buf_end) {
         set_error_buf(error_buf, error_buf_size,
                       "invalid function section size");
@@ -3618,6 +3646,13 @@ aot_unload(AOTModule *module)
     if (module->func_type_indexes)
         wasm_runtime_free(module->func_type_indexes);
 
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
+    if (module->max_local_cell_nums)
+        wasm_runtime_free(module->max_local_cell_nums);
+    if (module->max_stack_cell_nums)
+        wasm_runtime_free(module->max_stack_cell_nums);
+#endif
+
     if (module->func_ptrs)
         wasm_runtime_free(module->func_ptrs);
 

+ 1 - 1
core/iwasm/aot/aot_reloc.h

@@ -49,7 +49,7 @@ typedef struct {
 #define REG_REF_TYPES_SYM()
 #endif
 
-#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0)
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
 #define REG_AOT_TRACE_SYM()               \
     REG_SYM(aot_alloc_frame),             \
     REG_SYM(aot_free_frame),

+ 145 - 25
core/iwasm/aot/aot_runtime.c

@@ -20,6 +20,7 @@
  * AoT compilation code: aot_create_func_context, check_suspend_flags.
  */
 
+bh_static_assert(offsetof(WASMExecEnv, cur_frame) == 1 * sizeof(uintptr_t));
 bh_static_assert(offsetof(WASMExecEnv, module_inst) == 2 * sizeof(uintptr_t));
 bh_static_assert(offsetof(WASMExecEnv, argv_buf) == 3 * sizeof(uintptr_t));
 bh_static_assert(offsetof(WASMExecEnv, native_stack_boundary)
@@ -47,6 +48,11 @@ bh_static_assert(offsetof(AOTTableInstance, elems) == 24);
 
 bh_static_assert(offsetof(AOTModuleInstanceExtra, stack_sizes) == 0);
 
+bh_static_assert(offsetof(AOTFrame, ip_offset) == sizeof(uintptr_t) * 4);
+bh_static_assert(offsetof(AOTFrame, sp) == sizeof(uintptr_t) * 5);
+bh_static_assert(offsetof(AOTFrame, frame_ref) == sizeof(uintptr_t) * 6);
+bh_static_assert(offsetof(AOTFrame, lp) == sizeof(uintptr_t) * 7);
+
 static void
 set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
 {
@@ -1409,6 +1415,29 @@ fail:
     return NULL;
 }
 
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
+static void
+destroy_c_api_frames(Vector *frames)
+{
+    WASMCApiFrame frame = { 0 };
+    uint32 i, total_frames, ret;
+
+    total_frames = (uint32)bh_vector_size(frames);
+
+    for (i = 0; i < total_frames; i++) {
+        ret = bh_vector_get(frames, i, &frame);
+        bh_assert(ret);
+
+        if (frame.lp)
+            wasm_runtime_free(frame.lp);
+    }
+
+    ret = bh_vector_destroy(frames);
+    bh_assert(ret);
+    (void)ret;
+}
+#endif
+
 void
 aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
 {
@@ -1428,7 +1457,7 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
 
 #if WASM_ENABLE_DUMP_CALL_STACK != 0
     if (module_inst->frames) {
-        bh_vector_destroy(module_inst->frames);
+        destroy_c_api_frames(module_inst->frames);
         wasm_runtime_free(module_inst->frames);
         module_inst->frames = NULL;
     }
@@ -1600,6 +1629,18 @@ invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr,
 #define invoke_native_internal wasm_runtime_invoke_native
 #endif /* end of OS_ENABLE_HW_BOUND_CHECK */
 
+#ifdef AOT_STACK_FRAME_DEBUG
+typedef void (*stack_frame_callback_t)(struct WASMExecEnv *exec_env);
+static stack_frame_callback_t aot_stack_frame_callback;
+
+/* set the callback, only for debug purpose */
+void
+aot_set_stack_frame_callback(stack_frame_callback_t callback)
+{
+    aot_stack_frame_callback = callback;
+}
+#endif
+
 bool
 aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
                   unsigned argc, uint32 argv[])
@@ -1705,7 +1746,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
             cell_num += wasm_value_type_cell_num(ext_ret_types[i]);
         }
 
-#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
         if (!aot_alloc_frame(exec_env, function->func_index)) {
             if (argv1 != argv1_buf)
                 wasm_runtime_free(argv1);
@@ -1716,15 +1757,20 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
         ret = invoke_native_internal(exec_env, function->u.func.func_ptr,
                                      func_type, NULL, NULL, argv1, argc, argv);
 
-#if WASM_ENABLE_DUMP_CALL_STACK != 0
         if (!ret) {
+#ifdef AOT_STACK_FRAME_DEBUG
+            if (aot_stack_frame_callback) {
+                aot_stack_frame_callback(exec_env);
+            }
+#endif
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
             if (aot_create_call_stack(exec_env)) {
                 aot_dump_call_stack(exec_env, true, NULL, 0);
             }
-        }
 #endif
+        }
 
-#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
         aot_free_frame(exec_env);
 #endif
         if (!ret) {
@@ -1766,7 +1812,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
         return true;
     }
     else {
-#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
         if (!aot_alloc_frame(exec_env, function->func_index)) {
             return false;
         }
@@ -1775,15 +1821,20 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
         ret = invoke_native_internal(exec_env, func_ptr, func_type, NULL, NULL,
                                      argv, argc, argv);
 
-#if WASM_ENABLE_DUMP_CALL_STACK != 0
         if (aot_copy_exception(module_inst, NULL)) {
+#ifdef AOT_STACK_FRAME_DEBUG
+            if (aot_stack_frame_callback) {
+                aot_stack_frame_callback(exec_env);
+            }
+#endif
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
             if (aot_create_call_stack(exec_env)) {
                 aot_dump_call_stack(exec_env, true, NULL, 0);
             }
-        }
 #endif
+        }
 
-#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
         aot_free_frame(exec_env);
 #endif
 
@@ -2863,7 +2914,8 @@ aot_table_grow(AOTModuleInstance *module_inst, uint32 tbl_idx,
 }
 #endif /* WASM_ENABLE_REF_TYPES != 0 || WASM_ENABLE_GC != 0 */
 
-#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
+#if WASM_ENABLE_DUMP_CALL_STACK != 0 || WASM_ENABLE_PERF_PROFILING != 0
 #if WASM_ENABLE_CUSTOM_NAME_SECTION != 0
 static const char *
 lookup_func_name(const char **func_names, uint32 *func_indexes,
@@ -2922,28 +2974,57 @@ get_func_name_from_index(const AOTModuleInstance *module_inst,
 
     return func_name;
 }
+#endif /* end of WASM_ENABLE_DUMP_CALL_STACK != 0 || \
+          WASM_ENABLE_PERF_PROFILING != 0 */
 
 bool
 aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
 {
-    AOTFrame *frame =
-        wasm_exec_env_alloc_wasm_frame(exec_env, sizeof(AOTFrame));
-#if WASM_ENABLE_PERF_PROFILING != 0
     AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
+    AOTModule *module = (AOTModule *)module_inst->module;
+#if WASM_ENABLE_PERF_PROFILING != 0
     AOTFuncPerfProfInfo *func_perf_prof =
         module_inst->func_perf_profilings + func_index;
 #endif
+    AOTFrame *frame;
+    uint32 max_local_cell_num, max_stack_cell_num, all_cell_num;
+    uint32 aot_func_idx, frame_size;
+
+    if (func_index >= module->import_func_count) {
+        aot_func_idx = func_index - module->import_func_count;
+        max_local_cell_num = module->max_local_cell_nums[aot_func_idx];
+        max_stack_cell_num = module->max_stack_cell_nums[aot_func_idx];
+    }
+    else {
+        AOTFuncType *func_type = module->import_funcs[func_index].func_type;
+        max_local_cell_num =
+            func_type->param_cell_num > 2 ? func_type->param_cell_num : 2;
+        max_stack_cell_num = 0;
+    }
+
+    all_cell_num = max_local_cell_num + max_stack_cell_num;
+#if WASM_ENABLE_GC == 0
+    frame_size = (uint32)offsetof(AOTFrame, lp) + all_cell_num * 4;
+#else
+    frame_size =
+        (uint32)offsetof(AOTFrame, lp) + align_uint(all_cell_num * 5, 4);
+#endif
+    frame = wasm_exec_env_alloc_wasm_frame(exec_env, frame_size);
 
     if (!frame) {
         aot_set_exception((AOTModuleInstance *)exec_env->module_inst,
-                          "auxiliary call stack overflow");
+                          "wasm operand stack overflow");
         return false;
     }
 
 #if WASM_ENABLE_PERF_PROFILING != 0
-    frame->time_started = os_time_get_boot_microsecond();
+    frame->time_started = (uintptr_t)os_time_get_boot_microsecond();
     frame->func_perf_prof_info = func_perf_prof;
 #endif
+    frame->sp = frame->lp + max_local_cell_num;
+#if WASM_ENABLE_GC != 0
+    frame->frame_ref = (uint8 *)(frame->sp + max_stack_cell_num);
+#endif
 
     frame->prev_frame = (AOTFrame *)exec_env->cur_frame;
     exec_env->cur_frame = (struct WASMInterpFrame *)frame;
@@ -2960,15 +3041,14 @@ aot_free_frame(WASMExecEnv *exec_env)
 
 #if WASM_ENABLE_PERF_PROFILING != 0
     cur_frame->func_perf_prof_info->total_exec_time +=
-        os_time_get_boot_microsecond() - cur_frame->time_started;
+        (uintptr_t)os_time_get_boot_microsecond() - cur_frame->time_started;
     cur_frame->func_perf_prof_info->total_exec_cnt++;
 #endif
 
     wasm_exec_env_free_wasm_frame(exec_env, cur_frame);
     exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame;
 }
-#endif /* end of (WASM_ENABLE_DUMP_CALL_STACK != 0) \
-                 || (WASM_ENABLE_PERF_PROFILING != 0) */
+#endif /* end of WASM_ENABLE_AOT_STACK_FRAME != 0 */
 
 #if WASM_ENABLE_DUMP_CALL_STACK != 0
 bool
@@ -2977,6 +3057,7 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
     AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame,
              *first_frame = cur_frame;
     AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
+    AOTModule *module = (AOTModule *)module_inst->module;
     uint32 n = 0;
 
     while (cur_frame) {
@@ -2985,24 +3066,63 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
     }
 
     /* release previous stack frames and create new ones */
-    if (!bh_vector_destroy(module_inst->frames)
-        || !bh_vector_init(module_inst->frames, n, sizeof(WASMCApiFrame),
-                           false)) {
+    destroy_c_api_frames(module_inst->frames);
+    if (!bh_vector_init(module_inst->frames, n, sizeof(WASMCApiFrame), false)) {
         return false;
     }
 
     cur_frame = first_frame;
     while (cur_frame) {
         WASMCApiFrame frame = { 0 };
+        uint32 max_local_cell_num, max_stack_cell_num;
+        uint32 all_cell_num, lp_size;
+
         frame.instance = module_inst;
         frame.module_offset = 0;
         frame.func_index = cur_frame->func_index;
-        frame.func_offset = 0;
+        frame.func_offset = cur_frame->ip_offset;
         frame.func_name_wp =
             get_func_name_from_index(module_inst, cur_frame->func_index);
 
+        if (cur_frame->func_index >= module->import_func_count) {
+            uint32 aot_func_idx =
+                cur_frame->func_index - module->import_func_count;
+            max_local_cell_num = module->max_local_cell_nums[aot_func_idx];
+            max_stack_cell_num = module->max_stack_cell_nums[aot_func_idx];
+        }
+        else {
+            AOTFuncType *func_type =
+                module->import_funcs[cur_frame->func_index].func_type;
+            max_local_cell_num =
+                func_type->param_cell_num > 2 ? func_type->param_cell_num : 2;
+            max_stack_cell_num = 0;
+        }
+
+        all_cell_num = max_local_cell_num + max_stack_cell_num;
+#if WASM_ENABLE_GC == 0
+        lp_size = all_cell_num * 4;
+#else
+        lp_size = align_uint(all_cell_num * 5, 4);
+#endif
+        if (lp_size > 0) {
+            if (!(frame.lp = wasm_runtime_malloc(lp_size))) {
+                destroy_c_api_frames(module_inst->frames);
+                return false;
+            }
+            bh_memcpy_s(frame.lp, lp_size, cur_frame->lp, lp_size);
+
+            /* Only save frame sp when fast-interpr isn't enabled */
+            frame.sp = frame.lp + (cur_frame->sp - cur_frame->lp);
+#if WASM_ENABLE_GC != 0
+            frame.frame_ref = (uint8 *)frame.lp
+                              + (cur_frame->frame_ref - (uint8 *)cur_frame->lp);
+#endif
+        }
+
         if (!bh_vector_append(module_inst->frames, &frame)) {
-            bh_vector_destroy(module_inst->frames);
+            if (frame.lp)
+                wasm_runtime_free(frame.lp);
+            destroy_c_api_frames(module_inst->frames);
             return false;
         }
 
@@ -3084,7 +3204,7 @@ aot_dump_call_stack(WASMExecEnv *exec_env, bool print, char *buf, uint32 len)
 
     return total_len + 1;
 }
-#endif /* end of WASM_ENABLE_DUMP_CALL_STACK */
+#endif /* end of WASM_ENABLE_DUMP_CALL_STACK != 0 */
 
 #if WASM_ENABLE_PERF_PROFILING != 0
 void
@@ -3112,7 +3232,7 @@ aot_dump_perf_profiling(const AOTModuleInstance *module_inst)
                       perf_prof->total_exec_cnt);
     }
 }
-#endif /* end of WASM_ENABLE_PERF_PROFILING */
+#endif /* end of WASM_ENABLE_PERF_PROFILING != 0 */
 
 #if WASM_ENABLE_STATIC_PGO != 0
 

+ 33 - 4
core/iwasm/aot/aot_runtime.h

@@ -175,6 +175,12 @@ typedef struct AOTModule {
     void **func_ptrs;
     /* func type indexes of AOTed (un-imported) functions */
     uint32 *func_type_indexes;
+#if WASM_ENABLE_AOT_STACK_FRAME != 0
+    /* max local cell nums of AOTed (un-imported) functions */
+    uint32 *max_local_cell_nums;
+    /* max stack cell nums of AOTed (un-imported) functions */
+    uint32 *max_stack_cell_nums;
+#endif
 
     /* export info */
     uint32 export_count;
@@ -317,12 +323,35 @@ typedef struct AOTFuncPerfProfInfo {
 
 /* AOT auxiliary call stack */
 typedef struct AOTFrame {
+    /* The frame of the caller which is calling current function */
     struct AOTFrame *prev_frame;
-    uint32 func_index;
-#if WASM_ENABLE_PERF_PROFILING != 0
-    uint64 time_started;
+
+    /* The non-imported function index of current function */
+    uintptr_t func_index;
+
+    /* Used when performance profiling is enabled */
+    uintptr_t time_started;
+
+    /* Used when performance profiling is enabled */
     AOTFuncPerfProfInfo *func_perf_prof_info;
-#endif
+
+    /* Instruction pointer: offset to the bytecode array */
+    uintptr_t ip_offset;
+
+    /* Operand stack top pointer of the current frame */
+    uint32 *sp;
+
+    /* Frame ref flags (GC only) */
+    uint8 *frame_ref;
+
+    /**
+     * Frame data, the layout is:
+     *  local area: parameters and local variables
+     *  stack area: wasm operand stack
+     *  frame ref flags (GC only):
+     *      whether each cell in local and stack area is a GC obj
+     */
+    uint32 lp[1];
 } AOTFrame;
 
 #if WASM_ENABLE_STATIC_PGO != 0

+ 2 - 5
core/iwasm/common/wasm_exec_env.h

@@ -38,8 +38,8 @@ typedef struct WASMExecEnv {
     /* Next thread's exec env of a WASM module instance. */
     struct WASMExecEnv *next;
 
-    /* Previous thread's exec env of a WASM module instance. */
-    struct WASMExecEnv *prev;
+    /* Current interpreter/AOT frame of current thread */
+    struct WASMInterpFrame *cur_frame;
 
     /* Note: field module_inst, argv_buf, native_stack_boundary,
        suspend_flags, aux_stack_boundary, aux_stack_bottom, and
@@ -130,9 +130,6 @@ typedef struct WASMExecEnv {
 
     void *user_data;
 
-    /* Current interpreter frame of current thread */
-    struct WASMInterpFrame *cur_frame;
-
     /* The native thread handle of current thread */
     korp_tid handle;
 

+ 4 - 0
core/iwasm/common/wasm_runtime_common.h

@@ -456,6 +456,10 @@ typedef struct wasm_frame_t {
     uint32 func_index;
     uint32 func_offset;
     const char *func_name_wp;
+
+    uint32 *sp;
+    uint8 *frame_ref;
+    uint32 *lp;
 } WASMCApiFrame;
 
 #ifdef WASM_ENABLE_JIT

+ 1 - 0
core/iwasm/compilation/aot.c

@@ -360,6 +360,7 @@ aot_create_funcs(const WASMModule *module)
         funcs[i]->local_types = func->local_types;
         funcs[i]->param_cell_num = func->param_cell_num;
         funcs[i]->local_cell_num = func->local_cell_num;
+        funcs[i]->max_stack_cell_num = func->max_stack_cell_num;
         funcs[i]->code = func->code;
         funcs[i]->code_size = func->code_size;
     }

+ 1 - 0
core/iwasm/compilation/aot.h

@@ -224,6 +224,7 @@ typedef struct AOTFunc {
     uint8 *local_types;
     uint16 param_cell_num;
     uint16 local_cell_num;
+    uint32 max_stack_cell_num;
     uint32 code_size;
     uint8 *code;
 } AOTFunc;

+ 412 - 7
core/iwasm/compilation/aot_compiler.c

@@ -161,6 +161,385 @@ aot_validate_wasm(AOTCompContext *comp_ctx)
         OP_ATOMIC_##OP : bin_op = LLVMAtomicRMWBinOp##OP; \
         goto build_atomic_rmw;
 
+static bool
+store_value(AOTCompContext *comp_ctx, LLVMValueRef value, uint8 value_type,
+            LLVMValueRef cur_frame, uint32 offset)
+{
+    LLVMValueRef value_offset, value_addr, value_ptr = NULL, res;
+    LLVMTypeRef value_ptr_type;
+
+    if (!(value_offset = I32_CONST(offset))) {
+        aot_set_last_error("llvm build const failed");
+        return false;
+    }
+
+    if (!(value_addr =
+              LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, cur_frame,
+                                    &value_offset, 1, "value_addr"))) {
+        aot_set_last_error("llvm build in bounds gep failed");
+        return false;
+    }
+
+    switch (value_type) {
+        case VALUE_TYPE_I32:
+            value_ptr_type = INT32_PTR_TYPE;
+            break;
+        case VALUE_TYPE_I64:
+            value_ptr_type = INT64_PTR_TYPE;
+            break;
+        case VALUE_TYPE_F32:
+            value_ptr_type = F32_PTR_TYPE;
+            break;
+        case VALUE_TYPE_F64:
+            value_ptr_type = F64_PTR_TYPE;
+            break;
+        case VALUE_TYPE_V128:
+            value_ptr_type = V128_PTR_TYPE;
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+
+    if (!(value_ptr = LLVMBuildBitCast(comp_ctx->builder, value_addr,
+                                       value_ptr_type, "value_ptr"))) {
+        aot_set_last_error("llvm build bit cast failed");
+        return false;
+    }
+
+    if (!(res = LLVMBuildStore(comp_ctx->builder, value, value_ptr))) {
+        aot_set_last_error("llvm build store failed");
+        return false;
+    }
+
+    LLVMSetAlignment(res, 1);
+
+    return true;
+}
+
+bool
+aot_gen_commit_values(AOTCompFrame *frame)
+{
+    AOTCompContext *comp_ctx = frame->comp_ctx;
+    AOTFuncContext *func_ctx = frame->func_ctx;
+    AOTValueSlot *p;
+    LLVMValueRef value;
+    uint32 n;
+
+    for (p = frame->lp; p < frame->sp; p++) {
+        if (!p->dirty)
+            continue;
+
+        p->dirty = 0;
+        n = p - frame->lp;
+
+        switch (p->type) {
+            case VALUE_TYPE_I32:
+            case VALUE_TYPE_FUNCREF:
+            case VALUE_TYPE_EXTERNREF:
+                if (!store_value(comp_ctx, p->value, VALUE_TYPE_I32,
+                                 func_ctx->cur_frame,
+                                 offset_of_local(comp_ctx, n)))
+                    return false;
+                break;
+            case VALUE_TYPE_I64:
+                (++p)->dirty = 0;
+                if (!store_value(comp_ctx, p->value, VALUE_TYPE_I64,
+                                 func_ctx->cur_frame,
+                                 offset_of_local(comp_ctx, n)))
+                    return false;
+                break;
+            case VALUE_TYPE_F32:
+                if (!store_value(comp_ctx, p->value, VALUE_TYPE_F32,
+                                 func_ctx->cur_frame,
+                                 offset_of_local(comp_ctx, n)))
+                    return false;
+                break;
+            case VALUE_TYPE_F64:
+                (++p)->dirty = 0;
+                if (!store_value(comp_ctx, p->value, VALUE_TYPE_F64,
+                                 func_ctx->cur_frame,
+                                 offset_of_local(comp_ctx, n)))
+                    return false;
+                break;
+            case VALUE_TYPE_V128:
+                (++p)->dirty = 0;
+                (++p)->dirty = 0;
+                (++p)->dirty = 0;
+                if (!store_value(comp_ctx, p->value, VALUE_TYPE_V128,
+                                 func_ctx->cur_frame,
+                                 offset_of_local(comp_ctx, n)))
+                    return false;
+                break;
+            case VALUE_TYPE_I1:
+                if (!(value = LLVMBuildZExt(comp_ctx->builder, p->value,
+                                            I32_TYPE, "i32_val"))) {
+                    aot_set_last_error("llvm build bit cast failed");
+                    return false;
+                }
+                if (!store_value(comp_ctx, value, VALUE_TYPE_I32,
+                                 func_ctx->cur_frame,
+                                 offset_of_local(comp_ctx, n)))
+                    return false;
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+    }
+
+    return true;
+}
+
+bool
+aot_gen_commit_sp_ip(AOTCompFrame *frame, const AOTValueSlot *sp,
+                     const uint8 *ip)
+{
+    AOTCompContext *comp_ctx = frame->comp_ctx;
+    AOTFuncContext *func_ctx = frame->func_ctx;
+    LLVMValueRef cur_frame = func_ctx->cur_frame;
+    LLVMValueRef value_offset, value_addr, value_ptr, value;
+    LLVMTypeRef int8_ptr_ptr_type;
+    uint32 offset_ip, offset_sp, n;
+    bool is_64bit = (comp_ctx->pointer_size == sizeof(uint64)) ? true : false;
+
+    if (!comp_ctx->is_jit_mode) {
+        offset_ip = (uint32)sizeof(uintptr_t) * 4;
+        offset_sp = (uint32)sizeof(uintptr_t) * 5;
+    }
+    else {
+        offset_ip = offsetof(WASMInterpFrame, ip);
+        offset_sp = offsetof(WASMInterpFrame, sp);
+    }
+
+    /* commit ip */
+
+    if (!(value_offset = I32_CONST(offset_ip))) {
+        aot_set_last_error("llvm build const failed");
+        return false;
+    }
+
+    if (!(value_addr =
+              LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, cur_frame,
+                                    &value_offset, 1, "ip_addr"))) {
+        aot_set_last_error("llvm build in bounds gep failed");
+        return false;
+    }
+
+    if (!(value_ptr = LLVMBuildBitCast(
+              comp_ctx->builder, value_addr,
+              is_64bit ? INT64_PTR_TYPE : INT32_PTR_TYPE, "ip_ptr"))) {
+        aot_set_last_error("llvm build bit cast failed");
+        return false;
+    }
+
+    if (!comp_ctx->is_jit_mode) {
+        if (is_64bit)
+            value =
+                I64_CONST((uint64)(uintptr_t)(ip - func_ctx->aot_func->code));
+        else
+            value =
+                I32_CONST((uint32)(uintptr_t)(ip - func_ctx->aot_func->code));
+    }
+    else {
+        if (is_64bit)
+            value = I64_CONST((uint64)(uintptr_t)ip);
+        else
+            value = I32_CONST((uint32)(uintptr_t)ip);
+    }
+
+    if (!value) {
+        aot_set_last_error("llvm build const failed");
+        return false;
+    }
+
+    if (!LLVMBuildStore(comp_ctx->builder, value, value_ptr)) {
+        aot_set_last_error("llvm build store failed");
+        return false;
+    }
+
+    /* commit sp */
+
+    n = sp - frame->lp;
+    value = I32_CONST(offset_of_local(comp_ctx, n));
+    if (!value) {
+        aot_set_last_error("llvm build const failed");
+        return false;
+    }
+
+    if (!(value = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, cur_frame,
+                                        &value, 1, "sp"))) {
+        aot_set_last_error("llvm build in bounds gep failed");
+        return false;
+    }
+
+    if (!(value_offset = I32_CONST(offset_sp))) {
+        aot_set_last_error("llvm build const failed");
+        return false;
+    }
+
+    if (!(value_addr =
+              LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, cur_frame,
+                                    &value_offset, 1, "sp_addr"))) {
+        aot_set_last_error("llvm build in bounds gep failed");
+        return false;
+    }
+
+    if (!(int8_ptr_ptr_type = LLVMPointerType(INT8_PTR_TYPE, 0))) {
+        aot_set_last_error("llvm build pointer type failed");
+        return false;
+    }
+
+    if (!(value_ptr = LLVMBuildBitCast(comp_ctx->builder, value_addr,
+                                       int8_ptr_ptr_type, "sp_ptr"))) {
+        aot_set_last_error("llvm build bit cast failed");
+        return false;
+    }
+
+    if (!LLVMBuildStore(comp_ctx->builder, value, value_ptr)) {
+        aot_set_last_error("llvm build store failed");
+        return false;
+    }
+
+    /* commit sp */
+    return true;
+}
+
+static bool
+init_comp_frame(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+                uint32 func_idx)
+{
+    AOTCompFrame *aot_frame;
+    WASMModule *wasm_module = comp_ctx->comp_data->wasm_module;
+    AOTFunc *aot_func = func_ctx->aot_func;
+    AOTFuncType *func_type = aot_func->func_type;
+    AOTBlock *block = func_ctx->block_stack.block_list_end;
+    LLVMValueRef local_value;
+    uint32 max_local_cell_num =
+        aot_func->param_cell_num + aot_func->local_cell_num;
+    uint32 max_stack_cell_num = aot_func->max_stack_cell_num;
+    uint32 all_cell_num = max_local_cell_num + max_stack_cell_num;
+    uint32 i, n;
+    uint64 total_size;
+    uint8 local_type;
+
+    /* Free aot_frame if it was allocated previously for
+       compiling other functions */
+    if (comp_ctx->aot_frame) {
+        wasm_runtime_free(comp_ctx->aot_frame);
+        comp_ctx->aot_frame = NULL;
+    }
+
+    /* Allocate extra 2 cells since some operations may push more
+       operands than the number calculated in wasm loader, such as
+       PUSH_F64(F64_CONST(1.0)) in aot_compile_op_f64_promote_f32 */
+    all_cell_num += 2;
+    total_size = offsetof(AOTCompFrame, lp)
+                 + (uint64)sizeof(AOTValueSlot) * all_cell_num;
+
+    if (total_size > UINT32_MAX
+        || !(comp_ctx->aot_frame = aot_frame =
+                 wasm_runtime_malloc((uint32)total_size))) {
+        aot_set_last_error("allocate memory failed.");
+        return false;
+    }
+    memset(aot_frame, 0, (uint32)total_size);
+
+    aot_frame->cur_wasm_module = wasm_module;
+    aot_frame->cur_wasm_func = wasm_module->functions[func_idx];
+    aot_frame->cur_wasm_func_idx =
+        func_idx + wasm_module->import_function_count;
+    aot_frame->comp_ctx = comp_ctx;
+    aot_frame->func_ctx = func_ctx;
+
+    aot_frame->max_local_cell_num = max_local_cell_num;
+    aot_frame->max_stack_cell_num = max_stack_cell_num;
+
+    aot_frame->sp = aot_frame->lp + max_local_cell_num;
+
+    /* Init the frame_sp_begin of the function block */
+    block->frame_sp_begin = aot_frame->sp;
+
+    n = 0;
+
+    /* Set all params dirty since they were set to llvm value but
+       haven't been committed to the AOT/JIT stack frame */
+    for (i = 0; i < func_type->param_count; i++) {
+        local_type = func_type->types[i];
+        local_value = LLVMGetParam(func_ctx->func, i + 1);
+
+        switch (local_type) {
+            case VALUE_TYPE_I32:
+                set_local_i32(comp_ctx->aot_frame, n, local_value);
+                n++;
+                break;
+            case VALUE_TYPE_I64:
+                set_local_i64(comp_ctx->aot_frame, n, local_value);
+                n += 2;
+                break;
+            case VALUE_TYPE_F32:
+                set_local_f32(comp_ctx->aot_frame, n, local_value);
+                n++;
+                break;
+            case VALUE_TYPE_F64:
+                set_local_f64(comp_ctx->aot_frame, n, local_value);
+                n += 2;
+                break;
+            case VALUE_TYPE_V128:
+                set_local_v128(comp_ctx->aot_frame, n, local_value);
+                n += 4;
+                break;
+            case VALUE_TYPE_FUNCREF:
+            case VALUE_TYPE_EXTERNREF:
+                set_local_ref(comp_ctx->aot_frame, n, local_value, local_type);
+                n++;
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+    }
+
+    /* Set all locals dirty since they were set to llvm value but
+       haven't been committed to the AOT/JIT stack frame */
+    for (i = 0; i < aot_func->local_count; i++) {
+        local_type = aot_func->local_types[i];
+
+        switch (local_type) {
+            case VALUE_TYPE_I32:
+                set_local_i32(comp_ctx->aot_frame, n, I32_ZERO);
+                n++;
+                break;
+            case VALUE_TYPE_I64:
+                set_local_i64(comp_ctx->aot_frame, n, I64_ZERO);
+                n += 2;
+                break;
+            case VALUE_TYPE_F32:
+                set_local_f32(comp_ctx->aot_frame, n, F32_ZERO);
+                n++;
+                break;
+            case VALUE_TYPE_F64:
+                set_local_f64(comp_ctx->aot_frame, n, F64_ZERO);
+                n += 2;
+                break;
+            case VALUE_TYPE_V128:
+                set_local_v128(comp_ctx->aot_frame, n, V128_f64x2_ZERO);
+                n += 4;
+                break;
+            case VALUE_TYPE_FUNCREF:
+            case VALUE_TYPE_EXTERNREF:
+                set_local_ref(comp_ctx->aot_frame, n, I32_ZERO, local_type);
+                n++;
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+    }
+
+    return true;
+}
+
 static bool
 aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
 {
@@ -186,6 +565,12 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
     LLVMMetadataRef location;
 #endif
 
+    if (comp_ctx->enable_aux_stack_frame) {
+        if (!init_comp_frame(comp_ctx, func_ctx, func_index)) {
+            return false;
+        }
+    }
+
     /* Start to translate the opcodes */
     LLVMPositionBuilderAtEnd(
         comp_ctx->builder,
@@ -351,13 +736,19 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                 break;
 
             case WASM_OP_CALL:
+            {
+                uint8 *frame_ip_org = frame_ip;
+
                 read_leb_uint32(frame_ip, frame_ip_end, func_idx);
-                if (!aot_compile_op_call(comp_ctx, func_ctx, func_idx, false))
+                if (!aot_compile_op_call(comp_ctx, func_ctx, func_idx, false,
+                                         frame_ip_org))
                     return false;
                 break;
+            }
 
             case WASM_OP_CALL_INDIRECT:
             {
+                uint8 *frame_ip_org = frame_ip;
                 uint32 tbl_idx;
 
                 read_leb_uint32(frame_ip, frame_ip_end, type_idx);
@@ -371,26 +762,32 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                 }
 
                 if (!aot_compile_op_call_indirect(comp_ctx, func_ctx, type_idx,
-                                                  tbl_idx))
+                                                  tbl_idx, frame_ip_org))
                     return false;
                 break;
             }
 
 #if WASM_ENABLE_TAIL_CALL != 0
             case WASM_OP_RETURN_CALL:
+            {
+                uint8 *frame_ip_org = frame_ip;
+
                 if (!comp_ctx->enable_tail_call) {
                     aot_set_last_error("unsupported opcode");
                     return false;
                 }
                 read_leb_uint32(frame_ip, frame_ip_end, func_idx);
-                if (!aot_compile_op_call(comp_ctx, func_ctx, func_idx, true))
+                if (!aot_compile_op_call(comp_ctx, func_ctx, func_idx, true,
+                                         frame_ip_org))
                     return false;
                 if (!aot_compile_op_return(comp_ctx, func_ctx, &frame_ip))
                     return false;
                 break;
+            }
 
             case WASM_OP_RETURN_CALL_INDIRECT:
             {
+                uint8 *frame_ip_org = frame_ip;
                 uint32 tbl_idx;
 
                 if (!comp_ctx->enable_tail_call) {
@@ -408,7 +805,7 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                 }
 
                 if (!aot_compile_op_call_indirect(comp_ctx, func_ctx, type_idx,
-                                                  tbl_idx))
+                                                  tbl_idx, frame_ip_org))
                     return false;
                 if (!aot_compile_op_return(comp_ctx, func_ctx, &frame_ip))
                     return false;
@@ -530,28 +927,36 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
 
 #if WASM_ENABLE_GC != 0
             case WASM_OP_CALL_REF:
+            {
+                uint8 *frame_ip_org = frame_ip;
+
                 if (!comp_ctx->enable_gc) {
                     goto unsupport_gc;
                 }
 
                 read_leb_uint32(frame_ip, frame_ip_end, type_idx);
                 if (!aot_compile_op_call_ref(comp_ctx, func_ctx, type_idx,
-                                             false))
+                                             false, frame_ip_org))
                     return false;
                 break;
+            }
 
             case WASM_OP_RETURN_CALL_REF:
+            {
+                uint8 *frame_ip_org = frame_ip;
+
                 if (!comp_ctx->enable_gc) {
                     goto unsupport_gc;
                 }
 
                 read_leb_uint32(frame_ip, frame_ip_end, type_idx);
-                if (!aot_compile_op_call_ref(comp_ctx, func_ctx, type_idx,
-                                             true))
+                if (!aot_compile_op_call_ref(comp_ctx, func_ctx, type_idx, true,
+                                             frame_ip_org))
                     return false;
                 if (!aot_compile_op_return(comp_ctx, func_ctx, &frame_ip))
                     return false;
                 break;
+            }
 
             case WASM_OP_REF_EQ:
                 if (!comp_ctx->enable_gc) {

+ 263 - 19
core/iwasm/compilation/aot_compiler.h

@@ -8,6 +8,7 @@
 
 #include "aot.h"
 #include "aot_llvm.h"
+#include "../interpreter/wasm_interp.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -106,6 +107,248 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
     return false;
 }
 
+/**
+ * Operations for AOTCompFrame
+ */
+
+/**
+ * Get the offset from frame pointer to the n-th local variable slot.
+ *
+ * @param n the index to the local variable array
+ *
+ * @return the offset from frame pointer to the local variable slot
+ */
+static inline uint32
+offset_of_local(AOTCompContext *comp_ctx, unsigned n)
+{
+    if (!comp_ctx->is_jit_mode)
+        return comp_ctx->pointer_size * 7 + sizeof(uint32) * n;
+    else
+        return offsetof(WASMInterpFrame, lp) + sizeof(uint32) * n;
+}
+
+/**
+ * Get the offset from frame pointer to the n-th local variable's
+ * reference flag slot.
+ *
+ * @param n the index to the local variable array
+ *
+ * @return the offset from frame pointer to the local variable slot
+ */
+static inline unsigned
+offset_of_ref(AOTCompContext *comp_ctx, unsigned n)
+{
+    AOTCompFrame *frame = comp_ctx->aot_frame;
+    uint32 all_cell_num = frame->max_local_cell_num + frame->max_stack_cell_num;
+    return offset_of_local(comp_ctx, all_cell_num) + n;
+}
+
+/**
+ * Generate instructions to commit computation result to the frame.
+ * The general principle is to only commit values that will be used
+ * through the frame.
+ *
+ * @param frame the frame information
+ */
+bool
+aot_gen_commit_values(AOTCompFrame *frame);
+
+/**
+ * Generate instructions to commit SP and IP pointers to the frame.
+ *
+ * @param frame the frame information
+ */
+bool
+aot_gen_commit_sp_ip(AOTCompFrame *frame, const AOTValueSlot *sp,
+                     const uint8 *ip);
+
+static inline void
+push_32bit(AOTCompFrame *frame, AOTValue *aot_value)
+{
+    frame->sp->value = aot_value->value;
+    frame->sp->type = aot_value->type;
+    frame->sp->dirty = 1;
+    frame->sp++;
+}
+
+static inline void
+push_64bit(AOTCompFrame *frame, AOTValue *aot_value)
+{
+    push_32bit(frame, aot_value);
+    push_32bit(frame, aot_value);
+}
+
+static inline void
+push_i32(AOTCompFrame *frame, AOTValue *aot_value)
+{
+    bh_assert(aot_value->type == VALUE_TYPE_I32
+              || aot_value->type == VALUE_TYPE_I1);
+    push_32bit(frame, aot_value);
+}
+
+static inline void
+push_i64(AOTCompFrame *frame, AOTValue *aot_value)
+{
+    bh_assert(aot_value->type == VALUE_TYPE_I64);
+    push_64bit(frame, aot_value);
+}
+
+static inline void
+push_f32(AOTCompFrame *frame, AOTValue *aot_value)
+{
+    bh_assert(aot_value->type == VALUE_TYPE_F32);
+    push_32bit(frame, aot_value);
+}
+
+static inline void
+push_f64(AOTCompFrame *frame, AOTValue *aot_value)
+{
+    bh_assert(aot_value->type == VALUE_TYPE_F64);
+    push_64bit(frame, aot_value);
+}
+
+static inline void
+push_v128(AOTCompFrame *frame, AOTValue *aot_value)
+{
+    bh_assert(aot_value->type == VALUE_TYPE_V128);
+    push_64bit(frame, aot_value);
+    push_64bit(frame, aot_value);
+}
+
+static inline void
+push_ref(AOTCompFrame *frame, AOTValue *aot_value)
+{
+    bh_assert(frame->comp_ctx->enable_ref_types);
+    push_32bit(frame, aot_value);
+}
+
+static inline void
+pop_i32(AOTCompFrame *frame)
+{
+    bh_assert(frame->sp - frame->lp >= 1);
+    bh_assert((frame->sp - 1)->type == VALUE_TYPE_I32
+              || (frame->sp - 1)->type == VALUE_TYPE_I1);
+    frame->sp--;
+    memset(frame->sp, 0, sizeof(*frame->sp));
+}
+
+static inline void
+pop_i64(AOTCompFrame *frame)
+{
+    bh_assert(frame->sp - frame->lp >= 2);
+    bh_assert((frame->sp - 1)->type == VALUE_TYPE_I64
+              && (frame->sp - 2)->type == VALUE_TYPE_I64);
+    frame->sp -= 2;
+    memset(frame->sp, 0, sizeof(*frame->sp) * 2);
+}
+
+static inline void
+pop_f32(AOTCompFrame *frame)
+{
+    bh_assert(frame->sp - frame->lp >= 1);
+    bh_assert((frame->sp - 1)->type == VALUE_TYPE_F32);
+    frame->sp--;
+    memset(frame->sp, 0, sizeof(*frame->sp));
+}
+
+static inline void
+pop_f64(AOTCompFrame *frame)
+{
+    bh_assert(frame->sp - frame->lp >= 2);
+    bh_assert((frame->sp - 1)->type == VALUE_TYPE_F64
+              && (frame->sp - 2)->type == VALUE_TYPE_F64);
+    frame->sp -= 2;
+    memset(frame->sp, 0, sizeof(*frame->sp) * 2);
+}
+
+static inline void
+pop_v128(AOTCompFrame *frame)
+{
+    bh_assert(frame->sp - frame->lp >= 4);
+    bh_assert((frame->sp - 1)->type == VALUE_TYPE_V128
+              && (frame->sp - 2)->type == VALUE_TYPE_V128
+              && (frame->sp - 3)->type == VALUE_TYPE_V128
+              && (frame->sp - 4)->type == VALUE_TYPE_V128);
+    frame->sp -= 4;
+    memset(frame->sp, 0, sizeof(*frame->sp) * 4);
+}
+
+static inline void
+pop_ref(AOTCompFrame *frame)
+{
+    bh_assert(frame->sp - frame->lp >= 1);
+    bh_assert((frame->sp - 1)->type == VALUE_TYPE_FUNCREF
+              || (frame->sp - 1)->type == VALUE_TYPE_EXTERNREF);
+    frame->sp -= 1;
+    memset(frame->sp, 0, sizeof(*frame->sp) * 1);
+}
+
+static inline void
+pop(AOTCompFrame *frame, uint32 n)
+{
+    bh_assert(frame->sp - frame->lp >= n);
+    frame->sp -= n;
+    memset(frame->sp, 0, sizeof(*frame->sp) * n);
+}
+
+static inline void
+set_local_i32(AOTCompFrame *frame, int n, LLVMValueRef value)
+{
+    frame->lp[n].value = value;
+    frame->lp[n].type = VALUE_TYPE_I32;
+    frame->lp[n].dirty = 1;
+}
+
+static inline void
+set_local_i64(AOTCompFrame *frame, int n, LLVMValueRef value)
+{
+    frame->lp[n].value = value;
+    frame->lp[n].type = VALUE_TYPE_I64;
+    frame->lp[n].dirty = 1;
+    frame->lp[n + 1].value = value;
+    frame->lp[n + 1].type = VALUE_TYPE_I64;
+    frame->lp[n + 1].dirty = 1;
+}
+
+static inline void
+set_local_f32(AOTCompFrame *frame, int n, LLVMValueRef value)
+{
+    frame->lp[n].value = value;
+    frame->lp[n].type = VALUE_TYPE_F32;
+    frame->lp[n].dirty = 1;
+}
+
+static inline void
+set_local_f64(AOTCompFrame *frame, int n, LLVMValueRef value)
+{
+    frame->lp[n].value = value;
+    frame->lp[n].type = VALUE_TYPE_F64;
+    frame->lp[n].dirty = 1;
+    frame->lp[n + 1].value = value;
+    frame->lp[n + 1].type = VALUE_TYPE_F64;
+    frame->lp[n + 1].dirty = 1;
+}
+
+static inline void
+set_local_v128(AOTCompFrame *frame, int n, LLVMValueRef value)
+{
+    uint32 i;
+    for (i = 0; i < 4; i++) {
+        frame->lp[n + i].value = value;
+        frame->lp[n + i].type = VALUE_TYPE_V128;
+        frame->lp[n + i].dirty = 1;
+    }
+}
+
+static inline void
+set_local_ref(AOTCompFrame *frame, int n, LLVMValueRef value, uint8 ref_type)
+{
+    bh_assert(frame->comp_ctx->enable_ref_types);
+    frame->lp[n].value = value;
+    frame->lp[n].type = ref_type;
+    frame->lp[n].dirty = 1;
+}
+
 #define CHECK_STACK()                                          \
     do {                                                       \
         if (!func_ctx->block_stack.block_list_end) {           \
@@ -141,7 +384,7 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
         AOTValue *aot_value;                                                 \
         CHECK_STACK();                                                       \
         aot_value = aot_value_stack_pop(                                     \
-            &func_ctx->block_stack.block_list_end->value_stack);             \
+            comp_ctx, &func_ctx->block_stack.block_list_end->value_stack);   \
         if (!check_type_compatible(aot_value->type, value_type)) {           \
             aot_set_last_error("invalid WASM stack data type.");             \
             wasm_runtime_free(aot_value);                                    \
@@ -194,7 +437,7 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
         AOTValue *aot_value;                                                   \
         CHECK_STACK();                                                         \
         aot_value = aot_value_stack_pop(                                       \
-            &func_ctx->block_stack.block_list_end->value_stack);               \
+            comp_ctx, &func_ctx->block_stack.block_list_end->value_stack);     \
         if (aot_value->type != VALUE_TYPE_I1                                   \
             && aot_value->type != VALUE_TYPE_I32) {                            \
             aot_set_last_error("invalid WASM stack data type.");               \
@@ -215,23 +458,24 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
         wasm_runtime_free(aot_value);                                          \
     } while (0)
 
-#define PUSH(llvm_value, value_type)                                        \
-    do {                                                                    \
-        AOTValue *aot_value;                                                \
-        if (!func_ctx->block_stack.block_list_end) {                        \
-            aot_set_last_error("WASM block stack underflow.");              \
-            goto fail;                                                      \
-        }                                                                   \
-        aot_value = wasm_runtime_malloc(sizeof(AOTValue));                  \
-        if (!aot_value) {                                                   \
-            aot_set_last_error("allocate memory failed.");                  \
-            goto fail;                                                      \
-        }                                                                   \
-        memset(aot_value, 0, sizeof(AOTValue));                             \
-        aot_value->type = value_type;                                       \
-        aot_value->value = llvm_value;                                      \
-        aot_value_stack_push(                                               \
-            &func_ctx->block_stack.block_list_end->value_stack, aot_value); \
+#define PUSH(llvm_value, value_type)                                      \
+    do {                                                                  \
+        AOTValue *aot_value;                                              \
+        if (!func_ctx->block_stack.block_list_end) {                      \
+            aot_set_last_error("WASM block stack underflow.");            \
+            goto fail;                                                    \
+        }                                                                 \
+        aot_value = wasm_runtime_malloc(sizeof(AOTValue));                \
+        if (!aot_value) {                                                 \
+            aot_set_last_error("allocate memory failed.");                \
+            goto fail;                                                    \
+        }                                                                 \
+        memset(aot_value, 0, sizeof(AOTValue));                           \
+        aot_value->type = value_type;                                     \
+        aot_value->value = llvm_value;                                    \
+        aot_value_stack_push(                                             \
+            comp_ctx, &func_ctx->block_stack.block_list_end->value_stack, \
+            aot_value);                                                   \
     } while (0)
 
 #define PUSH_I32(v) PUSH(v, VALUE_TYPE_I32)

+ 17 - 1
core/iwasm/compilation/aot_emit_aot_file.c

@@ -618,14 +618,21 @@ get_text_section_size(AOTObjectData *obj_data)
 static uint32
 get_func_section_size(AOTCompData *comp_data, AOTObjectData *obj_data)
 {
-    /* text offsets + function type indexs */
     uint32 size = 0;
 
+    /* text offsets */
     if (is_32bit_binary(obj_data))
         size = (uint32)sizeof(uint32) * comp_data->func_count;
     else
         size = (uint32)sizeof(uint64) * comp_data->func_count;
 
+    /* function type indexes */
+    size += (uint32)sizeof(uint32) * comp_data->func_count;
+
+    /* max_local_cell_nums */
+    size += (uint32)sizeof(uint32) * comp_data->func_count;
+
+    /* max_stack_cell_nums */
     size += (uint32)sizeof(uint32) * comp_data->func_count;
     return size;
 }
@@ -2063,6 +2070,15 @@ aot_emit_func_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
     for (i = 0; i < comp_data->func_count; i++)
         EMIT_U32(funcs[i]->func_type_index);
 
+    for (i = 0; i < comp_data->func_count; i++) {
+        uint32 max_local_cell_num =
+            funcs[i]->param_cell_num + funcs[i]->local_cell_num;
+        EMIT_U32(max_local_cell_num);
+    }
+
+    for (i = 0; i < comp_data->func_count; i++)
+        EMIT_U32(funcs[i]->max_stack_cell_num);
+
     if (offset - *p_offset != section_size + sizeof(uint32) * 2) {
         aot_set_last_error("emit function section failed.");
         return false;

+ 166 - 61
core/iwasm/compilation/aot_emit_control.c

@@ -158,12 +158,38 @@ get_target_block(AOTFuncContext *func_ctx, uint32 br_depth)
     return block;
 }
 
+static void
+clear_frame_locals(AOTCompFrame *aot_frame)
+{
+    uint32 i;
+
+    for (i = 0; i < aot_frame->max_local_cell_num; i++) {
+        aot_frame->lp[i].dirty = 0;
+        aot_frame->lp[i].value = NULL;
+    }
+}
+
+static void
+restore_frame_sp(AOTBlock *block, AOTCompFrame *aot_frame)
+{
+    uint32 stack_cell_num;
+
+    bh_assert(aot_frame->sp >= block->frame_sp_begin);
+
+    stack_cell_num = aot_frame->sp - block->frame_sp_begin;
+    if (stack_cell_num > 0) {
+        memset(block->frame_sp_begin, 0, sizeof(AOTValueSlot) * stack_cell_num);
+    }
+    aot_frame->sp = block->frame_sp_begin;
+}
+
 static bool
 handle_next_reachable_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                             uint8 **p_frame_ip)
 {
     AOTBlock *block = func_ctx->block_stack.block_list_end;
     AOTBlock *block_prev;
+    AOTCompFrame *aot_frame = comp_ctx->aot_frame;
     uint8 *frame_ip = NULL;
     uint32 i;
     AOTFuncType *func_type;
@@ -180,10 +206,22 @@ handle_next_reachable_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         comp_ctx, func_ctx,
         (*p_frame_ip - 1) - comp_ctx->comp_data->wasm_module->buf_code);
 #endif
+
+    if (aot_frame) {
+        /* Clear frame local variables since they have been committed */
+        clear_frame_locals(aot_frame);
+    }
+
     if (block->label_type == LABEL_TYPE_IF && block->llvm_else_block
         && *p_frame_ip <= block->wasm_code_else) {
         /* Clear value stack and start to translate else branch */
-        aot_value_stack_destroy(&block->value_stack);
+        aot_value_stack_destroy(comp_ctx, &block->value_stack);
+
+        if (aot_frame) {
+            /* Restore the frame sp */
+            restore_frame_sp(block, aot_frame);
+        }
+
         /* Recover parameters of else branch */
         for (i = 0; i < block->param_count; i++)
             PUSH(block->else_param_phis[i], block->param_types[i]);
@@ -196,11 +234,16 @@ handle_next_reachable_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         block_prev = block->prev;
         block = aot_block_stack_pop(&func_ctx->block_stack);
 
+        if (aot_frame) {
+            /* Restore the frame sp */
+            restore_frame_sp(block, aot_frame);
+        }
+
         if (block->label_type == LABEL_TYPE_IF) {
             if (block->llvm_else_block && !block->skip_wasm_code_else
                 && *p_frame_ip <= block->wasm_code_else) {
                 /* Clear value stack and start to translate else branch */
-                aot_value_stack_destroy(&block->value_stack);
+                aot_value_stack_destroy(comp_ctx, &block->value_stack);
                 SET_BUILDER_POS(block->llvm_else_block);
                 *p_frame_ip = block->wasm_code_else + 1;
                 /* Push back the block */
@@ -215,7 +258,7 @@ handle_next_reachable_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         }
 
         frame_ip = block->wasm_code_end;
-        aot_block_destroy(block);
+        aot_block_destroy(comp_ctx, block);
         block = block_prev;
     }
 
@@ -229,6 +272,12 @@ handle_next_reachable_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
     /* Pop block, push its return value, and destroy the block */
     block = aot_block_stack_pop(&func_ctx->block_stack);
+
+    if (aot_frame) {
+        /* Restore the frame sp */
+        restore_frame_sp(block, aot_frame);
+    }
+
     func_type = func_ctx->aot_func->func_type;
     for (i = 0; i < block->result_count; i++) {
         bh_assert(block->result_phis[i]);
@@ -272,7 +321,7 @@ handle_next_reachable_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 #endif
         }
     }
-    aot_block_destroy(block);
+    aot_block_destroy(comp_ctx, block);
     return true;
 fail:
     return false;
@@ -366,6 +415,9 @@ push_aot_block_to_stack_and_pass_params(AOTCompContext *comp_ctx,
 
     /* Push the new block to block stack */
     aot_block_stack_push(&func_ctx->block_stack, block);
+    if (comp_ctx->aot_frame) {
+        block->frame_sp_begin = comp_ctx->aot_frame->sp;
+    }
 
     /* Push param phis to the new block */
     for (i = 0; i < block->param_count; i++) {
@@ -448,6 +500,13 @@ aot_compile_op_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     block->block_index = func_ctx->block_stack.block_index[label_type];
     func_ctx->block_stack.block_index[label_type]++;
 
+    if (comp_ctx->aot_frame) {
+        if (label_type != LABEL_TYPE_BLOCK
+            && !aot_gen_commit_values(comp_ctx->aot_frame)) {
+            goto fail;
+        }
+    }
+
     if (label_type == LABEL_TYPE_BLOCK || label_type == LABEL_TYPE_LOOP) {
         /* Create block */
         format_block_name(name, sizeof(name), block->block_index, label_type,
@@ -475,7 +534,7 @@ aot_compile_op_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                                      false, NULL, NULL))) {
                 goto fail;
             }
-            aot_block_destroy(block);
+            aot_block_destroy(comp_ctx, block);
             return aot_handle_next_reachable_block(comp_ctx, func_ctx,
                                                    p_frame_ip);
         }
@@ -555,7 +614,7 @@ aot_compile_op_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                 }
                 else {
                     /* skip the block */
-                    aot_block_destroy(block);
+                    aot_block_destroy(comp_ctx, block);
                     *p_frame_ip = end_addr + 1;
                 }
             }
@@ -568,7 +627,7 @@ aot_compile_op_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
     return true;
 fail:
-    aot_block_destroy(block);
+    aot_block_destroy(comp_ctx, block);
     return false;
 }
 
@@ -578,6 +637,7 @@ aot_compile_op_else(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 {
     AOTBlock *block = func_ctx->block_stack.block_list_end;
     LLVMValueRef value;
+    AOTCompFrame *aot_frame = comp_ctx->aot_frame;
     char name[32];
     uint32 i, result_index;
 
@@ -613,14 +673,29 @@ aot_compile_op_else(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         ADD_TO_RESULT_PHIS(block, value, result_index);
     }
 
+    if (aot_frame) {
+        bh_assert(block->frame_sp_begin == aot_frame->sp);
+        if (!aot_gen_commit_values(aot_frame)) {
+            goto fail;
+        }
+    }
+
     /* Jump to end block */
     BUILD_BR(block->llvm_end_block);
 
     if (!block->skip_wasm_code_else && block->llvm_else_block) {
         /* Clear value stack, recover param values
-         * and start to translate else branch.
-         */
-        aot_value_stack_destroy(&block->value_stack);
+           and start to translate else branch. */
+        aot_value_stack_destroy(comp_ctx, &block->value_stack);
+
+        if (comp_ctx->aot_frame) {
+            comp_ctx->aot_frame->sp = block->frame_sp_begin;
+            for (i = 0; i < comp_ctx->aot_frame->max_local_cell_num; i++) {
+                comp_ctx->aot_frame->lp[i].dirty = 0;
+                comp_ctx->aot_frame->lp[i].value = NULL;
+            }
+        }
+
         for (i = 0; i < block->param_count; i++)
             PUSH(block->else_param_phis[i], block->param_types[i]);
         SET_BUILDER_POS(block->llvm_else_block);
@@ -660,6 +735,13 @@ aot_compile_op_end(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
             MOVE_BLOCK_BEFORE(block->llvm_end_block, next_llvm_end_block);
     }
 
+    if (comp_ctx->aot_frame) {
+        if (block->label_type != LABEL_TYPE_FUNCTION
+            && !aot_gen_commit_values(comp_ctx->aot_frame)) {
+            return false;
+        }
+    }
+
     /* Handle block result values */
     CREATE_RESULT_VALUE_PHIS(block);
     for (i = 0; i < block->result_count; i++) {
@@ -670,6 +752,10 @@ aot_compile_op_end(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         ADD_TO_RESULT_PHIS(block, value, result_index);
     }
 
+    if (comp_ctx->aot_frame) {
+        restore_frame_sp(block, comp_ctx->aot_frame);
+    }
+
     /* Jump to the end block */
     BUILD_BR(block->llvm_end_block);
 
@@ -681,7 +767,8 @@ fail:
 
 #if WASM_ENABLE_THREAD_MGR != 0
 bool
-check_suspend_flags(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+check_suspend_flags(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+                    bool check_terminate_and_suspend)
 {
     LLVMValueRef terminate_addr, terminate_flags, flag, offset, res;
     LLVMBasicBlockRef terminate_block, non_terminate_block;
@@ -761,17 +848,28 @@ aot_compile_op_br(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     char name[32];
     uint32 i, param_index, result_index;
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    /* Insert suspend check point */
-    if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
+    if (!(block_dst = get_target_block(func_ctx, br_depth))) {
+        return false;
+    }
+
+    if (comp_ctx->aot_frame) {
+        if (!aot_gen_commit_values(comp_ctx->aot_frame))
             return false;
+        if (block_dst->label_type == LABEL_TYPE_LOOP) {
+            if (!aot_gen_commit_sp_ip(comp_ctx->aot_frame,
+                                      comp_ctx->aot_frame->sp, *p_frame_ip))
+                return false;
+        }
     }
-#endif
 
-    if (!(block_dst = get_target_block(func_ctx, br_depth))) {
-        return false;
+#if WASM_ENABLE_THREAD_MGR != 0
+    /* Terminate or suspend current thread only when this is a backward jump */
+    if (comp_ctx->enable_thread_mgr
+        && block_dst->label_type == LABEL_TYPE_LOOP) {
+        if (!check_suspend_flags(comp_ctx, func_ctx, true))
+            return false;
     }
+#endif
 
     if (block_dst->label_type == LABEL_TYPE_LOOP) {
         /* Dest block is Loop block */
@@ -825,6 +923,30 @@ aot_compile_conditional_br(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     uint32 i, param_index, result_index;
     uint64 size;
 
+    if (!(block_dst = get_target_block(func_ctx, br_depth))) {
+        return false;
+    }
+
+    if (comp_ctx->aot_frame) {
+        if (!aot_gen_commit_values(comp_ctx->aot_frame))
+            return false;
+        if (block_dst->label_type == LABEL_TYPE_LOOP) {
+            if (!aot_gen_commit_sp_ip(comp_ctx->aot_frame,
+                                      comp_ctx->aot_frame->sp, *p_frame_ip))
+                return false;
+        }
+    }
+
+#if WASM_ENABLE_THREAD_MGR != 0
+    /* Terminate or suspend current thread only when this is
+       a backward jump */
+    if (comp_ctx->enable_thread_mgr
+        && block_dst->label_type == LABEL_TYPE_LOOP) {
+        if (!check_suspend_flags(comp_ctx, func_ctx, true))
+            return false;
+    }
+#endif
+
     if (LLVMIsUndef(value_cmp)
 #if LLVM_VERSION_NUMBER >= 12
         || LLVMIsPoison(value_cmp)
@@ -839,9 +961,6 @@ aot_compile_conditional_br(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
     if (!LLVMIsEfficientConstInt(value_cmp)) {
         /* Compare value is not constant, create condition br IR */
-        if (!(block_dst = get_target_block(func_ctx, br_depth))) {
-            return false;
-        }
 
         /* Create llvm else block */
         CREATE_BLOCK(llvm_else_block, "br_if_else");
@@ -944,14 +1063,6 @@ aot_compile_op_br_if(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 {
     LLVMValueRef value_cmp;
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    /* Insert suspend check point */
-    if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
-            return false;
-    }
-#endif
-
     POP_COND(value_cmp);
 
     return aot_compile_conditional_br(comp_ctx, func_ctx, br_depth, p_frame_ip,
@@ -974,14 +1085,6 @@ aot_compile_op_br_table(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     uint64 size;
     char name[32];
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    /* Insert suspend check point */
-    if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
-            return false;
-    }
-#endif
-
     POP_I32(value_cmp);
 
     if (LLVMIsUndef(value_cmp)
@@ -997,6 +1100,31 @@ aot_compile_op_br_table(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     }
 
     if (!LLVMIsEfficientConstInt(value_cmp)) {
+        if (comp_ctx->aot_frame) {
+            if (!aot_gen_commit_values(comp_ctx->aot_frame))
+                return false;
+            if (!aot_gen_commit_sp_ip(comp_ctx->aot_frame,
+                                      comp_ctx->aot_frame->sp, *p_frame_ip))
+                return false;
+        }
+
+#if WASM_ENABLE_THREAD_MGR != 0
+        if (comp_ctx->enable_thread_mgr) {
+            for (i = 0; i <= br_count; i++) {
+                target_block = get_target_block(func_ctx, br_depths[i]);
+                if (!target_block)
+                    return false;
+                /* Terminate or suspend current thread only when this is a
+                   backward jump */
+                if (target_block->label_type == LABEL_TYPE_LOOP) {
+                    if (!check_suspend_flags(comp_ctx, func_ctx, true))
+                        return false;
+                    break;
+                }
+            }
+        }
+#endif
+
         /* Compare value is not constant, create switch IR */
         for (i = 0; i <= br_count; i++) {
             target_block = get_target_block(func_ctx, br_depths[i]);
@@ -1125,6 +1253,7 @@ aot_compile_op_return(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         comp_ctx, func_ctx,
         (*p_frame_ip - 1) - comp_ctx->comp_data->wasm_module->buf_code);
 #endif
+
     if (block_func->result_count) {
         /* Store extra result values to function parameters */
         for (i = 0; i < block_func->result_count - 1; i++) {
@@ -1190,14 +1319,6 @@ aot_compile_op_br_on_null(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 {
     LLVMValueRef gc_obj, value_cmp;
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    /* Insert suspend check point */
-    if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
-            return false;
-    }
-#endif
-
     GET_REF_FROM_STACK(gc_obj);
 
     if (!(value_cmp =
@@ -1219,14 +1340,6 @@ aot_compile_op_br_on_non_null(AOTCompContext *comp_ctx,
 {
     LLVMValueRef gc_obj, value_cmp;
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    /* Insert suspend check point */
-    if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
-            return false;
-    }
-#endif
-
     GET_REF_FROM_STACK(gc_obj);
 
     if (!(value_cmp =
@@ -1250,14 +1363,6 @@ aot_compile_op_br_on_cast(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     LLVMBasicBlockRef gc_obj_null, gc_obj_non_null, conditional_branching,
         phi_blocks[2];
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    /* Insert suspend check point */
-    if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
-            return false;
-    }
-#endif
-
     GET_REF_FROM_STACK(gc_obj);
 
     /* Create if block */

+ 2 - 1
core/iwasm/compilation/aot_emit_control.h

@@ -52,7 +52,8 @@ aot_handle_next_reachable_block(AOTCompContext *comp_ctx,
 
 #if WASM_ENABLE_THREAD_MGR != 0
 bool
-check_suspend_flags(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
+check_suspend_flags(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+                    bool check_terminate_and_suspend);
 #endif
 
 #if WASM_ENABLE_GC != 0

+ 5 - 0
core/iwasm/compilation/aot_emit_exception.c

@@ -21,6 +21,11 @@ aot_emit_exception(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
     CHECK_LLVM_CONST(exce_id);
 
+    if (comp_ctx->aot_frame) {
+        if (!aot_gen_commit_values(comp_ctx->aot_frame))
+            goto fail;
+    }
+
     /* Create got_exception block if needed */
     if (!func_ctx->got_exception_block) {
         if (!(func_ctx->got_exception_block = LLVMAppendBasicBlockInContext(

+ 45 - 19
core/iwasm/compilation/aot_emit_function.c

@@ -513,7 +513,7 @@ aot_estimate_and_record_stack_usage_for_function_call(
 
 bool
 aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
-                    uint32 func_idx, bool tail_call)
+                    uint32 func_idx, bool tail_call, const uint8 *frame_ip)
 {
     uint32 import_func_count = comp_ctx->comp_data->import_func_count;
     AOTImportFunc *import_funcs = comp_ctx->comp_data->import_funcs;
@@ -534,14 +534,6 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     bool ret = false;
     char buf[32];
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    /* Insert suspend check point */
-    if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
-            return false;
-    }
-#endif
-
     /* Check function index */
     if (func_idx >= import_func_count + func_count) {
         aot_set_last_error("Function index out of range.");
@@ -563,6 +555,23 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     /* Get param cell number */
     param_cell_num = func_type->param_cell_num;
 
+    if (comp_ctx->aot_frame) {
+        if (!aot_gen_commit_values(comp_ctx->aot_frame))
+            return false;
+        if (!aot_gen_commit_sp_ip(comp_ctx->aot_frame,
+                                  comp_ctx->aot_frame->sp - param_cell_num,
+                                  frame_ip))
+            return false;
+    }
+
+#if WASM_ENABLE_THREAD_MGR != 0
+    /* Insert suspend check point */
+    if (comp_ctx->enable_thread_mgr) {
+        if (!check_suspend_flags(comp_ctx, func_ctx, true))
+            return false;
+    }
+#endif
+
 #if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
     if (comp_ctx->enable_aux_stack_frame) {
         LLVMValueRef func_idx_const;
@@ -895,7 +904,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 #if WASM_ENABLE_THREAD_MGR != 0
     /* Insert suspend check point */
     if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
+        if (!check_suspend_flags(comp_ctx, func_ctx, false))
             goto fail;
     }
 #endif
@@ -1067,7 +1076,8 @@ call_aot_call_indirect_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
 bool
 aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
-                             uint32 type_idx, uint32 tbl_idx)
+                             uint32 type_idx, uint32 tbl_idx,
+                             const uint8 *frame_ip)
 {
     AOTFuncType *func_type;
     LLVMValueRef tbl_idx_value, elem_idx, table_elem, func_idx;
@@ -1116,6 +1126,15 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     func_param_count = func_type->param_count;
     func_result_count = func_type->result_count;
 
+    if (comp_ctx->aot_frame) {
+        if (!aot_gen_commit_values(comp_ctx->aot_frame))
+            return false;
+        if (!aot_gen_commit_sp_ip(
+                comp_ctx->aot_frame,
+                comp_ctx->aot_frame->sp - func_type->param_cell_num, frame_ip))
+            return false;
+    }
+
     POP_I32(elem_idx);
 
     /* get the cur size of the table instance */
@@ -1430,7 +1449,7 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 #if WASM_ENABLE_THREAD_MGR != 0
     /* Insert suspend check point */
     if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
+        if (!check_suspend_flags(comp_ctx, func_ctx, true))
             goto fail;
     }
 #endif
@@ -1627,7 +1646,7 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 #if WASM_ENABLE_THREAD_MGR != 0
     /* Insert suspend check point */
     if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
+        if (!check_suspend_flags(comp_ctx, func_ctx, false))
             goto fail;
     }
 #endif
@@ -1725,10 +1744,9 @@ fail:
 }
 
 #if WASM_ENABLE_GC != 0
-
 bool
 aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
-                        uint32 type_idx, bool tail_call)
+                        uint32 type_idx, bool tail_call, const uint8 *frame_ip)
 {
     AOTFuncType *func_type;
     LLVMValueRef func_obj, func_idx;
@@ -1758,6 +1776,7 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                                                           func_type);
     func_param_count = func_type->param_count;
     func_result_count = func_type->result_count;
+    param_cell_num = func_type->param_cell_num;
 
     POP_REF(func_obj);
 
@@ -1898,10 +1917,19 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         goto fail;
     }
 
+    if (comp_ctx->aot_frame) {
+        if (!aot_gen_commit_values(comp_ctx->aot_frame))
+            goto fail;
+        if (!aot_gen_commit_sp_ip(comp_ctx->aot_frame,
+                                  comp_ctx->aot_frame->sp - param_cell_num,
+                                  frame_ip))
+            goto fail;
+    }
+
 #if WASM_ENABLE_THREAD_MGR != 0
     /* Insert suspend check point */
     if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
+        if (!check_suspend_flags(comp_ctx, func_ctx, true))
             goto fail;
     }
 #endif
@@ -1973,8 +2001,6 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     /* Translate call import block */
     LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_import);
 
-    param_cell_num = func_type->param_cell_num;
-
     /* Similar to opcode call_indirect, but for opcode ref.func needs to call
      * aot_invoke_native_func instead */
     if (!call_aot_invoke_native_func(comp_ctx, func_ctx, func_idx, func_type,
@@ -2107,7 +2133,7 @@ aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 #if WASM_ENABLE_THREAD_MGR != 0
     /* Insert suspend check point */
     if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
+        if (!check_suspend_flags(comp_ctx, func_ctx, false))
             goto fail;
     }
 #endif

+ 4 - 3
core/iwasm/compilation/aot_emit_function.h

@@ -14,11 +14,12 @@ extern "C" {
 
 bool
 aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
-                    uint32 func_idx, bool tail_call);
+                    uint32 func_idx, bool tail_call, const uint8 *frame_ip);
 
 bool
 aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
-                             uint32 type_idx, uint32 tbl_idx);
+                             uint32 type_idx, uint32 tbl_idx,
+                             const uint8 *frame_ip);
 
 bool
 aot_compile_op_ref_null(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
@@ -33,7 +34,7 @@ aot_compile_op_ref_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 #if WASM_ENABLE_GC != 0
 bool
 aot_compile_op_call_ref(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
-                        uint32 type_idx, bool tail_call);
+                        uint32 type_idx, bool tail_call, const uint8 *frame_ip);
 #endif
 
 #ifdef __cplusplus

+ 1 - 1
core/iwasm/compilation/aot_emit_memory.c

@@ -1501,7 +1501,7 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 #if WASM_ENABLE_THREAD_MGR != 0
     /* Insert suspend check point */
     if (comp_ctx->enable_thread_mgr) {
-        if (!check_suspend_flags(comp_ctx, func_ctx))
+        if (!check_suspend_flags(comp_ctx, func_ctx, false))
             return false;
     }
 #endif

+ 2 - 2
core/iwasm/compilation/aot_emit_parametric.c

@@ -21,8 +21,8 @@ pop_value_from_wasm_stack(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         return false;
     }
 
-    aot_value =
-        aot_value_stack_pop(&func_ctx->block_stack.block_list_end->value_stack);
+    aot_value = aot_value_stack_pop(
+        comp_ctx, &func_ctx->block_stack.block_list_end->value_stack);
     type = aot_value->type;
 
     if (aot_value->type == VALUE_TYPE_I1) {

+ 1 - 1
core/iwasm/compilation/aot_emit_table.c

@@ -245,7 +245,7 @@ bool
 aot_compile_op_table_set(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                          uint32 tbl_idx)
 {
-    LLVMValueRef val, elem_idx, offset, table_elem;
+    LLVMValueRef val = NULL, elem_idx, offset, table_elem;
 
     if (comp_ctx->enable_gc)
         POP_REF(val);

+ 37 - 1
core/iwasm/compilation/aot_emit_variable.c

@@ -73,10 +73,46 @@ aot_compile_op_set_local(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                          uint32 local_idx)
 {
     LLVMValueRef value;
+    uint8 local_type;
+    uint32 n;
 
     CHECK_LOCAL(local_idx);
 
-    POP(value, get_local_type(comp_ctx, func_ctx, local_idx));
+    local_type = get_local_type(comp_ctx, func_ctx, local_idx);
+
+    POP(value, local_type);
+
+    if (comp_ctx->aot_frame) {
+        /* Get the slot index */
+        n = comp_ctx->aot_frame->cur_wasm_func->local_offsets[local_idx];
+        bh_assert(comp_ctx->aot_frame->lp[n].type == local_type);
+
+        switch (local_type) {
+            case VALUE_TYPE_I32:
+                set_local_i32(comp_ctx->aot_frame, n, value);
+                break;
+            case VALUE_TYPE_I64:
+                set_local_i64(comp_ctx->aot_frame, n, value);
+                break;
+            case VALUE_TYPE_F32:
+                set_local_f32(comp_ctx->aot_frame, n, value);
+                break;
+            case VALUE_TYPE_F64:
+                set_local_f64(comp_ctx->aot_frame, n, value);
+                break;
+            case VALUE_TYPE_V128:
+                set_local_v128(comp_ctx->aot_frame, n, value);
+                break;
+            case VALUE_TYPE_FUNCREF:
+            case VALUE_TYPE_EXTERNREF:
+                set_local_ref(comp_ctx->aot_frame, n, value, local_type);
+                break;
+            /* TODO: handle GC ref types */
+            default:
+                bh_assert(0);
+                break;
+        }
+    }
 
     if (!LLVMBuildStore(comp_ctx->builder, value,
                         func_ctx->locals[local_idx])) {

+ 109 - 14
core/iwasm/compilation/aot_llvm.c

@@ -910,6 +910,27 @@ create_aux_stack_info(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
     return true;
 }
 
+static bool
+create_aux_stack_frame(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    LLVMValueRef offset = I32_ONE, cur_frame_addr;
+
+    if (!(cur_frame_addr = LLVMBuildInBoundsGEP2(
+              comp_ctx->builder, OPQ_PTR_TYPE, func_ctx->exec_env, &offset, 1,
+              "cur_frame_addr"))) {
+        aot_set_last_error("llvm build in bounds gep failed");
+        return false;
+    }
+
+    if (!(func_ctx->cur_frame = LLVMBuildLoad2(comp_ctx->builder, OPQ_PTR_TYPE,
+                                               cur_frame_addr, "cur_frame"))) {
+        aot_set_last_error("llvm build load failed");
+        return false;
+    }
+
+    return true;
+}
+
 static bool
 create_native_symbol(const AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 {
@@ -1588,6 +1609,11 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx,
         goto fail;
     }
 
+    if (comp_ctx->enable_aux_stack_frame
+        && !create_aux_stack_frame(comp_ctx, func_ctx)) {
+        goto fail;
+    }
+
     /* Create local variables */
     if (!create_local_variables(comp_data, comp_ctx, func_ctx, func)) {
         goto fail;
@@ -1625,13 +1651,14 @@ aot_create_func_context(const AOTCompData *comp_data, AOTCompContext *comp_ctx,
 fail:
     if (func_ctx->mem_info)
         wasm_runtime_free(func_ctx->mem_info);
-    aot_block_stack_destroy(&func_ctx->block_stack);
+    aot_block_stack_destroy(comp_ctx, &func_ctx->block_stack);
     wasm_runtime_free(func_ctx);
     return NULL;
 }
 
 static void
-aot_destroy_func_contexts(AOTFuncContext **func_ctxes, uint32 count)
+aot_destroy_func_contexts(AOTCompContext *comp_ctx, AOTFuncContext **func_ctxes,
+                          uint32 count)
 {
     uint32 i;
 
@@ -1639,7 +1666,7 @@ aot_destroy_func_contexts(AOTFuncContext **func_ctxes, uint32 count)
         if (func_ctxes[i]) {
             if (func_ctxes[i]->mem_info)
                 wasm_runtime_free(func_ctxes[i]->mem_info);
-            aot_block_stack_destroy(&func_ctxes[i]->block_stack);
+            aot_block_stack_destroy(comp_ctx, &func_ctxes[i]->block_stack);
             aot_checked_addr_list_destroy(func_ctxes[i]);
             wasm_runtime_free(func_ctxes[i]);
         }
@@ -1676,7 +1703,8 @@ aot_create_func_contexts(const AOTCompData *comp_data, AOTCompContext *comp_ctx)
         AOTFunc *func = comp_data->funcs[i];
         if (!(func_ctxes[i] =
                   aot_create_func_context(comp_data, comp_ctx, func, i))) {
-            aot_destroy_func_contexts(func_ctxes, comp_data->func_count);
+            aot_destroy_func_contexts(comp_ctx, func_ctxes,
+                                      comp_data->func_count);
             return NULL;
         }
     }
@@ -2353,8 +2381,10 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
     if (option->builtin_intrinsics)
         comp_ctx->builtin_intrinsics = option->builtin_intrinsics;
 
-    if (option->enable_gc)
+    if (option->enable_gc) {
         comp_ctx->enable_gc = true;
+        comp_ctx->enable_aux_stack_frame = true;
+    }
 
     comp_ctx->opt_level = option->opt_level;
     comp_ctx->size_level = option->size_level;
@@ -2961,7 +2991,7 @@ aot_destroy_comp_context(AOTCompContext *comp_ctx)
         LLVMOrcDisposeLLLazyJIT(comp_ctx->orc_jit);
 
     if (comp_ctx->func_ctxes)
-        aot_destroy_func_contexts(comp_ctx->func_ctxes,
+        aot_destroy_func_contexts(comp_ctx, comp_ctx->func_ctxes,
                                   comp_ctx->func_ctx_count);
 
     if (bh_list_length(&comp_ctx->native_symbols) > 0) {
@@ -2978,6 +3008,10 @@ aot_destroy_comp_context(AOTCompContext *comp_ctx)
         wasm_runtime_free(comp_ctx->target_cpu);
     }
 
+    if (comp_ctx->aot_frame) {
+        wasm_runtime_free(comp_ctx->aot_frame);
+    }
+
     wasm_runtime_free(comp_ctx);
 }
 
@@ -3056,7 +3090,8 @@ aot_get_native_symbol_index(AOTCompContext *comp_ctx, const char *symbol)
 }
 
 void
-aot_value_stack_push(AOTValueStack *stack, AOTValue *value)
+aot_value_stack_push(const AOTCompContext *comp_ctx, AOTValueStack *stack,
+                     AOTValue *value)
 {
     if (!stack->value_list_head)
         stack->value_list_head = stack->value_list_end = value;
@@ -3065,10 +3100,38 @@ aot_value_stack_push(AOTValueStack *stack, AOTValue *value)
         value->prev = stack->value_list_end;
         stack->value_list_end = value;
     }
+
+    if (comp_ctx->aot_frame) {
+        switch (value->type) {
+            case VALUE_TYPE_I32:
+            case VALUE_TYPE_I1:
+                push_i32(comp_ctx->aot_frame, value);
+                break;
+            case VALUE_TYPE_I64:
+                push_i64(comp_ctx->aot_frame, value);
+                break;
+            case VALUE_TYPE_F32:
+                push_f32(comp_ctx->aot_frame, value);
+                break;
+            case VALUE_TYPE_F64:
+                push_f64(comp_ctx->aot_frame, value);
+                break;
+            case VALUE_TYPE_V128:
+                push_v128(comp_ctx->aot_frame, value);
+                break;
+            case VALUE_TYPE_FUNCREF:
+            case VALUE_TYPE_EXTERNREF:
+                push_ref(comp_ctx->aot_frame, value);
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+    }
 }
 
 AOTValue *
-aot_value_stack_pop(AOTValueStack *stack)
+aot_value_stack_pop(const AOTCompContext *comp_ctx, AOTValueStack *stack)
 {
     AOTValue *value = stack->value_list_end;
 
@@ -3082,11 +3145,43 @@ aot_value_stack_pop(AOTValueStack *stack)
         value->prev = NULL;
     }
 
+    if (comp_ctx->aot_frame) {
+        bh_assert(value);
+        bh_assert(value->value == (comp_ctx->aot_frame->sp - 1)->value);
+        bh_assert(value->type == (comp_ctx->aot_frame->sp - 1)->type);
+
+        switch (value->type) {
+            case VALUE_TYPE_I32:
+            case VALUE_TYPE_I1:
+                pop_i32(comp_ctx->aot_frame);
+                break;
+            case VALUE_TYPE_I64:
+                pop_i64(comp_ctx->aot_frame);
+                break;
+            case VALUE_TYPE_F32:
+                pop_f32(comp_ctx->aot_frame);
+                break;
+            case VALUE_TYPE_F64:
+                pop_f64(comp_ctx->aot_frame);
+                break;
+            case VALUE_TYPE_V128:
+                pop_v128(comp_ctx->aot_frame);
+                break;
+            case VALUE_TYPE_FUNCREF:
+            case VALUE_TYPE_EXTERNREF:
+                pop_ref(comp_ctx->aot_frame);
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+    }
+
     return value;
 }
 
 void
-aot_value_stack_destroy(AOTValueStack *stack)
+aot_value_stack_destroy(AOTCompContext *comp_ctx, AOTValueStack *stack)
 {
     AOTValue *value = stack->value_list_head, *p;
 
@@ -3131,14 +3226,14 @@ aot_block_stack_pop(AOTBlockStack *stack)
 }
 
 void
-aot_block_stack_destroy(AOTBlockStack *stack)
+aot_block_stack_destroy(AOTCompContext *comp_ctx, AOTBlockStack *stack)
 {
     AOTBlock *block = stack->block_list_head, *p;
 
     while (block) {
         p = block->next;
-        aot_value_stack_destroy(&block->value_stack);
-        aot_block_destroy(block);
+        aot_value_stack_destroy(comp_ctx, &block->value_stack);
+        aot_block_destroy(comp_ctx, block);
         block = p;
     }
 
@@ -3147,9 +3242,9 @@ aot_block_stack_destroy(AOTBlockStack *stack)
 }
 
 void
-aot_block_destroy(AOTBlock *block)
+aot_block_destroy(AOTCompContext *comp_ctx, AOTBlock *block)
 {
-    aot_value_stack_destroy(&block->value_stack);
+    aot_value_stack_destroy(comp_ctx, &block->value_stack);
     if (block->param_types)
         wasm_runtime_free(block->param_types);
     if (block->param_phis)

+ 64 - 6
core/iwasm/compilation/aot_llvm.h

@@ -63,6 +63,8 @@ extern "C" {
 #undef DUMP_MODULE
 #endif
 
+struct AOTValueSlot;
+
 /**
  * Value in the WASM operation stack, each stack element
  * is an LLVM value
@@ -85,6 +87,54 @@ typedef struct AOTValueStack {
     AOTValue *value_list_end;
 } AOTValueStack;
 
+/* Record information of a value slot of local variable or stack
+   during translation */
+typedef struct AOTValueSlot {
+    /* The LLVM value of this slot */
+    LLVMValueRef value;
+
+    /* The value type of this slot */
+    uint8 type;
+
+    /* The dirty bit of the value slot. It's set if the value in
+       register is newer than the value in memory. */
+    uint32 dirty : 1;
+
+    /* Whether the new value in register is a reference, which is valid
+       only when the dirty bit is set. */
+    uint32 ref : 1;
+
+    /* Committed reference flag:
+         0: unknown, 1: not-reference, 2: reference */
+    uint32 committed_ref : 2;
+} AOTValueSlot;
+
+/* Frame information for translation */
+typedef struct AOTCompFrame {
+    /* The current wasm module */
+    WASMModule *cur_wasm_module;
+    /* The current wasm function */
+    WASMFunction *cur_wasm_func;
+    /* The current wasm function index */
+    uint32 cur_wasm_func_idx;
+    /* The current compilation context */
+    struct AOTCompContext *comp_ctx;
+    /* The current function context */
+    struct AOTFuncContext *func_ctx;
+
+    /* Max local slot number */
+    uint32 max_local_cell_num;
+
+    /* Max operand stack slot number */
+    uint32 max_stack_cell_num;
+
+    /* Stack top pointer */
+    AOTValueSlot *sp;
+
+    /* Local variables + stack operands */
+    AOTValueSlot lp[1];
+} AOTCompFrame;
+
 typedef struct AOTBlock {
     struct AOTBlock *next;
     struct AOTBlock *prev;
@@ -123,6 +173,9 @@ typedef struct AOTBlock {
     uint32 result_count;
     uint8 *result_types;
     LLVMValueRef *result_phis;
+
+    /* The begin frame stack pointer of this block */
+    AOTValueSlot *frame_sp_begin;
 } AOTBlock;
 
 /**
@@ -175,6 +228,8 @@ typedef struct AOTFuncContext {
 
     LLVMValueRef cur_exception;
 
+    LLVMValueRef cur_frame;
+
     bool mem_space_unchanged;
     AOTCheckedAddrList checked_addr_list;
 
@@ -407,7 +462,6 @@ typedef struct AOTCompContext {
     AOTLLVMConsts llvm_consts;
 
     /* Function contexts */
-    /* TODO: */
     AOTFuncContext **func_ctxes;
     uint32 func_ctx_count;
     char **custom_sections_wp;
@@ -431,6 +485,9 @@ typedef struct AOTCompContext {
     char stack_usage_temp_file[64];
     const char *llvm_passes;
     const char *builtin_intrinsics;
+
+    /* Current frame information for translation */
+    AOTCompFrame *aot_frame;
 } AOTCompContext;
 
 enum {
@@ -499,13 +556,14 @@ void
 aot_destroy_elf_file(uint8 *elf_file);
 
 void
-aot_value_stack_push(AOTValueStack *stack, AOTValue *value);
+aot_value_stack_push(const AOTCompContext *comp_ctx, AOTValueStack *stack,
+                     AOTValue *value);
 
 AOTValue *
-aot_value_stack_pop(AOTValueStack *stack);
+aot_value_stack_pop(const AOTCompContext *comp_ctx, AOTValueStack *stack);
 
 void
-aot_value_stack_destroy(AOTValueStack *stack);
+aot_value_stack_destroy(AOTCompContext *comp_ctx, AOTValueStack *stack);
 
 void
 aot_block_stack_push(AOTBlockStack *stack, AOTBlock *block);
@@ -514,10 +572,10 @@ AOTBlock *
 aot_block_stack_pop(AOTBlockStack *stack);
 
 void
-aot_block_stack_destroy(AOTBlockStack *stack);
+aot_block_stack_destroy(AOTCompContext *comp_ctx, AOTBlockStack *stack);
 
 void
-aot_block_destroy(AOTBlock *block);
+aot_block_destroy(AOTCompContext *comp_ctx, AOTBlock *block);
 
 LLVMTypeRef
 wasm_type_to_llvm_type(const AOTLLVMTypes *llvm_types, uint8 wasm_type);

+ 0 - 74
core/iwasm/fast-jit/jit_frontend.c

@@ -2311,77 +2311,3 @@ jit_frontend_get_jitted_return_addr_offset()
 {
     return (uint32)offsetof(WASMInterpFrame, jitted_return_addr);
 }
-
-#if 0
-#if WASM_ENABLE_THREAD_MGR != 0
-bool
-check_suspend_flags(JitCompContext *cc, JITFuncContext *func_ctx)
-{
-    LLVMValueRef terminate_addr, terminate_flags, flag, offset, res;
-    JitBasicBlock *terminate_check_block, non_terminate_block;
-    JITFuncType *jit_func_type = func_ctx->jit_func->func_type;
-    JitBasicBlock *terminate_block;
-
-    /* Offset of suspend_flags */
-    offset = I32_FIVE;
-
-    if (!(terminate_addr = LLVMBuildInBoundsGEP(
-              cc->builder, func_ctx->exec_env, &offset, 1, "terminate_addr"))) {
-        jit_set_last_error("llvm build in bounds gep failed");
-        return false;
-    }
-    if (!(terminate_addr =
-              LLVMBuildBitCast(cc->builder, terminate_addr, INT32_PTR_TYPE,
-                               "terminate_addr_ptr"))) {
-        jit_set_last_error("llvm build bit cast failed");
-        return false;
-    }
-
-    if (!(terminate_flags =
-              LLVMBuildLoad(cc->builder, terminate_addr, "terminate_flags"))) {
-        jit_set_last_error("llvm build bit cast failed");
-        return false;
-    }
-    /* Set terminate_flags memory accecc to volatile, so that the value
-        will always be loaded from memory rather than register */
-    LLVMSetVolatile(terminate_flags, true);
-
-    CREATE_BASIC_BLOCK(terminate_check_block, "terminate_check");
-    MOVE_BASIC_BLOCK_AFTER_CURR(terminate_check_block);
-
-    CREATE_BASIC_BLOCK(non_terminate_block, "non_terminate");
-    MOVE_BASIC_BLOCK_AFTER_CURR(non_terminate_block);
-
-    BUILD_ICMP(LLVMIntSGT, terminate_flags, I32_ZERO, res, "need_terminate");
-    BUILD_COND_BR(res, terminate_check_block, non_terminate_block);
-
-    /* Move builder to terminate check block */
-    SET_BUILDER_POS(terminate_check_block);
-
-    CREATE_BASIC_BLOCK(terminate_block, "terminate");
-    MOVE_BASIC_BLOCK_AFTER_CURR(terminate_block);
-
-    if (!(flag = LLVMBuildAnd(cc->builder, terminate_flags, I32_ONE,
-                              "termination_flag"))) {
-        jit_set_last_error("llvm build AND failed");
-        return false;
-    }
-
-    BUILD_ICMP(LLVMIntSGT, flag, I32_ZERO, res, "need_terminate");
-    BUILD_COND_BR(res, terminate_block, non_terminate_block);
-
-    /* Move builder to terminate block */
-    SET_BUILDER_POS(terminate_block);
-    if (!jit_build_zero_function_ret(cc, func_ctx, jit_func_type)) {
-        goto fail;
-    }
-
-    /* Move builder to terminate block */
-    SET_BUILDER_POS(non_terminate_block);
-    return true;
-
-fail:
-    return false;
-}
-#endif /* End of WASM_ENABLE_THREAD_MGR */
-#endif

+ 3 - 0
core/iwasm/interpreter/wasm_interp.h

@@ -95,6 +95,9 @@ wasm_interp_call_wasm(struct WASMModuleInstance *module_inst,
 #if WASM_ENABLE_GC != 0
 bool
 wasm_interp_traverse_gc_rootset(struct WASMExecEnv *exec_env, void *heap);
+
+uint8 *
+wasm_interp_get_frame_ref(WASMInterpFrame *frame);
 #endif
 
 #ifdef __cplusplus

+ 24 - 5
core/iwasm/interpreter/wasm_interp_classic.c

@@ -381,6 +381,12 @@ init_frame_refs(uint8 *frame_ref, uint32 cell_num, WASMFunctionInstance *func)
     }
 }
 
+uint8 *
+wasm_interp_get_frame_ref(WASMInterpFrame *frame)
+{
+    return get_frame_ref(frame);
+}
+
 /* Return the corresponding ref slot of the given address of local
    variable or stack pointer. */
 
@@ -5243,7 +5249,8 @@ llvm_jit_call_func_bytecode(WASMModuleInstance *module_inst,
     uint32 func_idx = (uint32)(function - module_inst->e->functions);
     bool ret;
 
-#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
+#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) \
+    || (WASM_ENABLE_JIT_STACK_FRAME != 0)
     if (!llvm_jit_alloc_frame(exec_env, function - module_inst->e->functions)) {
         /* wasm operand stack overflow has been thrown,
            no need to throw again */
@@ -5269,7 +5276,8 @@ llvm_jit_call_func_bytecode(WASMModuleInstance *module_inst,
             if (size > UINT32_MAX
                 || !(argv1 = wasm_runtime_malloc((uint32)size))) {
                 wasm_set_exception(module_inst, "allocate memory failed");
-                return false;
+                ret = false;
+                goto fail;
             }
         }
 
@@ -5293,7 +5301,7 @@ llvm_jit_call_func_bytecode(WASMModuleInstance *module_inst,
         if (!ret) {
             if (argv1 != argv1_buf)
                 wasm_runtime_free(argv1);
-            return ret;
+            goto fail;
         }
 
         /* Get extra result values */
@@ -5327,15 +5335,26 @@ llvm_jit_call_func_bytecode(WASMModuleInstance *module_inst,
 
         if (argv1 != argv1_buf)
             wasm_runtime_free(argv1);
-        return true;
+
+        ret = true;
     }
     else {
         ret = wasm_runtime_invoke_native(
             exec_env, module_inst->func_ptrs[func_idx], func_type, NULL, NULL,
             argv, argc, argv);
 
-        return ret && !wasm_copy_exception(module_inst, NULL) ? true : false;
+        if (ret)
+            ret = !wasm_copy_exception(module_inst, NULL);
     }
+
+fail:
+
+#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) \
+    || (WASM_ENABLE_JIT_STACK_FRAME != 0)
+    llvm_jit_free_frame(exec_env);
+#endif
+
+    return ret;
 }
 #endif /* end of WASM_ENABLE_JIT != 0 */
 

+ 6 - 0
core/iwasm/interpreter/wasm_interp_fast.c

@@ -344,6 +344,12 @@ init_frame_refs(uint8 *frame_ref, uint32 cell_num, WASMFunctionInstance *func)
     }
 }
 
+uint8 *
+wasm_interp_get_frame_ref(WASMInterpFrame *frame)
+{
+    return frame->frame_ref;
+}
+
 /* Return the corresponding ref slot of the given slot of local
    variable or stack pointer. */
 

+ 2 - 1
core/iwasm/interpreter/wasm_loader.c

@@ -4009,7 +4009,8 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
     option.enable_ref_types = true;
 #endif
     option.enable_aux_stack_check = true;
-#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0)
+#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0) \
+    || (WASM_ENABLE_JIT_STACK_FRAME != 0)
     option.enable_aux_stack_frame = true;
 #endif
 #if WASM_ENABLE_MEMORY_PROFILING != 0

+ 2 - 1
core/iwasm/interpreter/wasm_mini_loader.c

@@ -1901,7 +1901,8 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
     option.enable_ref_types = true;
 #endif
     option.enable_aux_stack_check = true;
-#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0)
+#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0) \
+    || (WASM_ENABLE_JIT_STACK_FRAME != 0)
     option.enable_aux_stack_frame = true;
 #endif
 #if WASM_ENABLE_MEMORY_PROFILING != 0

+ 107 - 10
core/iwasm/interpreter/wasm_runtime.c

@@ -2348,6 +2348,29 @@ fail:
     return NULL;
 }
 
+#if WASM_ENABLE_DUMP_CALL_STACK != 0
+static void
+destroy_c_api_frames(Vector *frames)
+{
+    WASMCApiFrame frame = { 0 };
+    uint32 i, total_frames, ret;
+
+    total_frames = (uint32)bh_vector_size(frames);
+
+    for (i = 0; i < total_frames; i++) {
+        ret = bh_vector_get(frames, i, &frame);
+        bh_assert(ret);
+
+        if (frame.lp)
+            wasm_runtime_free(frame.lp);
+    }
+
+    ret = bh_vector_destroy(frames);
+    bh_assert(ret);
+    (void)ret;
+}
+#endif
+
 void
 wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
 {
@@ -2447,7 +2470,7 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
 
 #if WASM_ENABLE_DUMP_CALL_STACK != 0
     if (module_inst->frames) {
-        bh_vector_destroy(module_inst->frames);
+        destroy_c_api_frames(module_inst->frames);
         wasm_runtime_free(module_inst->frames);
         module_inst->frames = NULL;
     }
@@ -3125,6 +3148,7 @@ wasm_interp_create_call_stack(struct WASMExecEnv *exec_env)
 {
     WASMModuleInstance *module_inst =
         (WASMModuleInstance *)wasm_exec_env_get_module_inst(exec_env);
+    WASMModule *module = module_inst->module;
     WASMInterpFrame *first_frame,
         *cur_frame = wasm_exec_env_get_cur_frame(exec_env);
     uint32 n = 0;
@@ -3139,9 +3163,8 @@ wasm_interp_create_call_stack(struct WASMExecEnv *exec_env)
     }
 
     /* release previous stack frames and create new ones */
-    if (!bh_vector_destroy(module_inst->frames)
-        || !bh_vector_init(module_inst->frames, n, sizeof(WASMCApiFrame),
-                           false)) {
+    destroy_c_api_frames(module_inst->frames);
+    if (!bh_vector_init(module_inst->frames, n, sizeof(WASMCApiFrame), false)) {
         return false;
     }
 
@@ -3153,6 +3176,8 @@ wasm_interp_create_call_stack(struct WASMExecEnv *exec_env)
         WASMFunctionInstance *func_inst = cur_frame->function;
         const char *func_name = NULL;
         const uint8 *func_code_base = NULL;
+        uint32 max_local_cell_num, max_stack_cell_num;
+        uint32 all_cell_num, lp_size;
 
         if (!func_inst) {
             cur_frame = cur_frame->prev_frame;
@@ -3197,8 +3222,57 @@ wasm_interp_create_call_stack(struct WASMExecEnv *exec_env)
 
         frame.func_name_wp = func_name;
 
+        if (frame.func_index >= module->import_function_count) {
+            uint32 wasm_func_idx =
+                frame.func_index - module->import_function_count;
+            max_local_cell_num =
+                module->functions[wasm_func_idx]->param_cell_num
+                + module->functions[wasm_func_idx]->local_cell_num;
+            max_stack_cell_num =
+                module->functions[wasm_func_idx]->max_stack_cell_num;
+            all_cell_num = max_local_cell_num + max_stack_cell_num;
+#if WASM_ENABLE_FAST_INTERP != 0
+            all_cell_num += module->functions[wasm_func_idx]->const_cell_num;
+#endif
+        }
+        else {
+            WASMFuncType *func_type =
+                module->import_functions[frame.func_index].u.function.func_type;
+            max_local_cell_num =
+                func_type->param_cell_num > 2 ? func_type->param_cell_num : 2;
+            max_stack_cell_num = 0;
+            all_cell_num = max_local_cell_num + max_stack_cell_num;
+        }
+
+#if WASM_ENABLE_GC == 0
+        lp_size = all_cell_num * 4;
+#else
+        lp_size = align_uint(all_cell_num * 5, 4);
+#endif
+        if (lp_size > 0) {
+            if (!(frame.lp = wasm_runtime_malloc(lp_size))) {
+                destroy_c_api_frames(module_inst->frames);
+                return false;
+            }
+            bh_memcpy_s(frame.lp, lp_size, cur_frame->lp, lp_size);
+
+#if WASM_ENABLE_FAST_INTERP == 0
+            frame.sp = frame.lp + (cur_frame->sp - cur_frame->lp);
+#else
+            /* for fast-interp, let frame sp point to the end of the frame */
+            frame.sp = frame.lp + all_cell_num;
+#endif
+#if WASM_ENABLE_GC != 0
+            frame.frame_ref = (uint8 *)frame.lp
+                              + (wasm_interp_get_frame_ref(cur_frame)
+                                 - (uint8 *)cur_frame->lp);
+#endif
+        }
+
         if (!bh_vector_append(module_inst->frames, &frame)) {
-            bh_vector_destroy(module_inst->frames);
+            if (frame.lp)
+                wasm_runtime_free(frame.lp);
+            destroy_c_api_frames(module_inst->frames);
             return false;
         }
 
@@ -3615,18 +3689,40 @@ llvm_jit_table_grow(WASMModuleInstance *module_inst, uint32 tbl_idx,
 }
 #endif /* end of WASM_ENABLE_REF_TYPES != 0 || WASM_ENABLE_GC != 0 */
 
-#if WASM_ENABLE_DUMP_CALL_STACK != 0 || WASM_ENABLE_PERF_PROFILING != 0
+#if WASM_ENABLE_DUMP_CALL_STACK != 0 || WASM_ENABLE_PERF_PROFILING != 0 \
+    || WASM_ENABLE_JIT_STACK_FRAME != 0
 bool
 llvm_jit_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
 {
     WASMModuleInstance *module_inst;
+    WASMModule *module;
     WASMInterpFrame *frame;
-    uint32 size;
+    uint32 size, max_local_cell_num, max_stack_cell_num;
 
     bh_assert(exec_env->module_inst->module_type == Wasm_Module_Bytecode);
 
     module_inst = (WASMModuleInstance *)exec_env->module_inst;
-    size = wasm_interp_interp_frame_size(0);
+    module = module_inst->module;
+
+    if (func_index >= func_index - module->import_function_count) {
+        WASMFunction *func =
+            module->functions[func_index - module->import_function_count];
+
+        max_local_cell_num = func->param_cell_num + func->local_cell_num;
+        max_stack_cell_num = func->max_stack_cell_num;
+    }
+    else {
+        WASMFunctionImport *func =
+            &((module->import_functions + func_index)->u.function);
+
+        max_local_cell_num = func->func_type->param_cell_num > 2
+                                 ? func->func_type->param_cell_num
+                                 : 2;
+        max_stack_cell_num = 0;
+    }
+
+    size =
+        wasm_interp_interp_frame_size(max_local_cell_num + max_stack_cell_num);
 
     frame = wasm_exec_env_alloc_wasm_frame(exec_env, size);
     if (!frame) {
@@ -3636,7 +3732,7 @@ llvm_jit_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
 
     frame->function = module_inst->e->functions + func_index;
     frame->ip = NULL;
-    frame->sp = frame->lp;
+    frame->sp = frame->lp + max_local_cell_num;
 #if WASM_ENABLE_PERF_PROFILING != 0
     frame->time_started = os_time_get_boot_microsecond();
 #endif
@@ -3668,7 +3764,8 @@ llvm_jit_free_frame(WASMExecEnv *exec_env)
     wasm_exec_env_set_cur_frame(exec_env, prev_frame);
 }
 #endif /* end of WASM_ENABLE_DUMP_CALL_STACK != 0 \
-          || WASM_ENABLE_PERF_PROFILING != 0 */
+          || WASM_ENABLE_PERF_PROFILING != 0      \
+          || WASM_ENABLE_JIT_STACK_FRAME != 0 */
 
 #if WASM_ENABLE_GC != 0
 void *

+ 2 - 1
core/iwasm/interpreter/wasm_runtime.h

@@ -698,7 +698,8 @@ llvm_jit_table_grow(WASMModuleInstance *module_inst, uint32 tbl_idx,
                     uint32 inc_entries, uintptr_t init_val);
 #endif
 
-#if WASM_ENABLE_DUMP_CALL_STACK != 0 || WASM_ENABLE_PERF_PROFILING != 0
+#if WASM_ENABLE_DUMP_CALL_STACK != 0 || WASM_ENABLE_PERF_PROFILING != 0 \
+    || WASM_ENABLE_JIT_STACK_FRAME != 0
 bool
 llvm_jit_alloc_frame(WASMExecEnv *exec_env, uint32 func_index);
 

+ 30 - 6
doc/build_wamr.md

@@ -27,8 +27,8 @@ The script `runtime_lib.cmake` defines a number of variables for configuring the
 - **WAMR_BUILD_TARGET**: set the target CPU architecture. Current supported targets are:  X86_64, X86_32, AARCH64, ARM, THUMB, XTENSA, ARC, RISCV32, RISCV64 and MIPS.
   - For ARM and THUMB, the format is \<arch>\[\<sub-arch>]\[_VFP], where \<sub-arch> is the ARM sub-architecture and the "_VFP" suffix means using VFP coprocessor registers s0-s15 (d0-d7) for passing arguments or returning results in standard procedure-call. Both \<sub-arch> and "_VFP" are optional, e.g. ARMV7, ARMV7_VFP, THUMBV7, THUMBV7_VFP and so on.
   - For AARCH64, the format is\<arch>[\<sub-arch>], VFP is enabled by default. \<sub-arch> is optional, e.g. AARCH64, AARCH64V8, AARCH64V8.1 and so on.
-  - For RISCV64, the format is \<arch\>[_abi], where "_abi" is optional, currently the supported formats are RISCV64, RISCV64_LP64D and RISCV64_LP64: RISCV64 and RISCV64_LP64D are identical, using [LP64D](https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md#-named-abis) as abi (LP64 with hardware floating-point calling convention for FLEN=64). And RISCV64_LP64 uses [LP64](https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md#-named-abis) as abi (Integer calling-convention only, and hardware floating-point calling convention is not used).
-  - For RISCV32, the format is \<arch\>[_abi], where "_abi" is optional, currently the supported formats are RISCV32, RISCV32_ILP32D and RISCV32_ILP32: RISCV32 and RISCV32_ILP32D are identical, using [ILP32D](https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md#-named-abis) as abi (ILP32 with hardware floating-point calling convention for FLEN=64). And RISCV32_ILP32 uses [ILP32](https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md#-named-abis) as abi (Integer calling-convention only, and hardware floating-point calling convention is not used).
+  - For RISCV64, the format is \<arch\>[_abi], where "_abi" is optional, currently the supported formats are RISCV64, RISCV64_LP64D and RISCV64_LP64: RISCV64 and RISCV64_LP64D are identical, using [LP64D](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (LP64 with hardware floating-point calling convention for FLEN=64). And RISCV64_LP64 uses [LP64](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (Integer calling-convention only, and hardware floating-point calling convention is not used).
+  - For RISCV32, the format is \<arch\>[_abi], where "_abi" is optional, currently the supported formats are RISCV32, RISCV32_ILP32D and RISCV32_ILP32: RISCV32 and RISCV32_ILP32D are identical, using [ILP32D](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (ILP32 with hardware floating-point calling convention for FLEN=64). And RISCV32_ILP32 uses [ILP32](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#named-abis) as abi (Integer calling-convention only, and hardware floating-point calling convention is not used).
 
 ```bash
 cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM
@@ -124,6 +124,14 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM
 
 - **WAMR_BUILD_CUSTOM_NAME_SECTION**=1/0, load the function name from custom name section, default to disable if not set
 
+#### **Enable AOT stack frame feature**
+- **WAMR_BUILD_AOT_STACK_FRAME**=1/0, default to disable if not set
+> Note: if it is enabled, the AOT stack frames (like stack frame of classic interpreter but only necessary data is committed) will be created for AOT mode in function calls. And please add `--enable-dump-call-stack` option to wamrc during compiling AOT module.
+
+#### **ENABLE JIT stack frame feature**
+- **WAMR_BUILD_JIT_STACK_FRAME**=1/0, default to disable if not set
+> Note: if it is enabled, the JIT stack frames (like stack frame of classic interpreter but only necessary data is committed) will be created for JIT mode in function calls.
+
 #### **Enable dump call stack feature**
 - **WAMR_BUILD_DUMP_CALL_STACK**=1/0, default to disable if not set
 
@@ -160,7 +168,8 @@ Currently we only profile the memory consumption of module, module_instance and
 - **WAMR_APP_THREAD_STACK_SIZE_MAX**=n, default to 8 MB (8388608) if not set
 > Note: the AOT boundary check with hardware trap mechanism might consume large stack since the OS may lazily grow the stack mapping as a guard page is hit, we may use this configuration to reduce the total stack usage, e.g. -DWAMR_APP_THREAD_STACK_SIZE_MAX=131072 (128 KB).
 
-#### **WAMR_BH_VPRINTF**=<vprintf_callback>, default to disable if not set
+#### **Set vprintf callback**
+- **WAMR_BH_VPRINTF**=<vprintf_callback>, default to disable if not set
 > Note: if the vprintf_callback function is provided by developer, the os_printf() and os_vprintf() in Linux, Darwin, Windows and VxWorks platforms, besides WASI Libc output will call the callback function instead of libc vprintf() function to redirect the stdout output. For example, developer can define the callback function like below outside runtime lib:
 >
 > ```C
@@ -203,17 +212,32 @@ Currently we only profile the memory consumption of module, module_instance and
 
 > For AoT file, must use `--emit-custom-sections` to specify which sections need to be emitted into AoT file, otherwise all custom sections (except custom name section) will be ignored.
 
-### **Stack guard size**
+#### **Stack guard size**
 - **WAMR_BUILD_STACK_GUARD_SIZE**=n, default to N/A if not set.
 > Note: By default, the stack guard size is 1K (1024) or 24K (if uvwasi enabled).
 
-### **Disable the writing linear memory base address to x86 GS segment register
+#### **Disable writing the linear memory base address to x86 GS segment register**
 - **WAMR_DISABLE_WRITE_GS_BASE**=1/0, default to enable if not set and supported by platform
 > Note: by default only platform [linux x86-64](https://github.com/bytecodealliance/wasm-micro-runtime/blob/5fb5119239220b0803e7045ca49b0a29fe65e70e/core/shared/platform/linux/platform_internal.h#L67) will enable this feature, for 32-bit platforms it's automatically disabled even when the flag is set to 0. In linux x86-64, writing the linear memory base address to x86 GS segment register may be used to speedup the linear memory access for LLVM AOT/JIT, when `--enable-segue=[<flags>]` option is added for `wamrc` or `iwasm`.
 
-### **Enable running PGO(Profile-Guided Optimization) instrumented AOT file**
+#### **Enable running PGO(Profile-Guided Optimization) instrumented AOT file**
 - **WAMR_BUILD_STATIC_PGO**=1/0, default to disable if not set
 
+#### **Configurale memory access boundary check**
+- **WAMR_CONFIGUABLE_BOUNDS_CHECKS**=1/0, default to disable if not set
+> Note: If it is enabled, allow to run `iwasm --disable-bounds-checks` to disable the memory access boundary checks for interpreter mode.
+
+#### **Module instance context APIs**
+- **WAMR_BUILD_MODULE_INST_CONTEXT**=1/0, default to disable if not set
+> Note: If it is enabled, allow to set one or more contexts created by embedder for a module instance, the below APIs are provided:
+```C
+    wasm_runtime_create_context_key
+    wasm_runtime_destroy_context_key
+    wasm_runtime_set_context
+    wasm_runtime_set_context_spread
+    wasm_runtime_get_context
+```
+
 **Combination of configurations:**
 
 We can combine the configurations. For example, if we want to disable interpreter, enable AOT and WASI, we can run command:

+ 0 - 1
product-mini/platforms/linux/CMakeLists.txt

@@ -97,7 +97,6 @@ if (NOT DEFINED WAMR_BUILD_LIB_WASI_THREADS)
   set (WAMR_BUILD_LIB_WASI_THREADS 0)
 endif()
 
-
 if (NOT DEFINED WAMR_BUILD_MINI_LOADER)
   # Disable wasm mini loader by default
   set (WAMR_BUILD_MINI_LOADER 0)

+ 8 - 0
product-mini/platforms/nuttx/wamr.mk

@@ -204,8 +204,15 @@ else
 CFLAGS += -DWASM_ENABLE_BULK_MEMORY=0
 endif
 
+ifeq ($(CONFIG_INTERPRETERS_WAMR_AOT_STACK_FRAME), y)
+CFLAGS += -DWASM_ENABLE_AOT_STACK_FRAME=1
+else
+CFLAGS += -DWASM_ENABLE_AOT_STACK_FRAME=0
+endif ()
+
 ifeq ($(CONFIG_INTERPRETERS_WAMR_PERF_PROFILING),y)
 CFLAGS += -DWASM_ENABLE_PERF_PROFILING=1
+CFLAGS += -DWASM_ENABLE_AOT_STACK_FRAME=1
 else
 CFLAGS += -DWASM_ENABLE_PERF_PROFILING=0
 endif
@@ -224,6 +231,7 @@ endif
 
 ifeq ($(CONFIG_INTERPRETERS_WAMR_DUMP_CALL_STACK),y)
 CFLAGS += -DWASM_ENABLE_DUMP_CALL_STACK=1
+CFLAGS += -DWASM_ENABLE_AOT_STACK_FRAME=1
 else
 CFLAGS += -DWASM_ENABLE_DUMP_CALL_STACK=0
 endif

+ 1 - 0
wamr-compiler/CMakeLists.txt

@@ -41,6 +41,7 @@ add_definitions(-DWASM_ENABLE_TAIL_CALL=1)
 add_definitions(-DWASM_ENABLE_SIMD=1)
 add_definitions(-DWASM_ENABLE_REF_TYPES=1)
 add_definitions(-DWASM_ENABLE_CUSTOM_NAME_SECTION=1)
+add_definitions(-DWASM_ENABLE_AOT_STACK_FRAME=1)
 add_definitions(-DWASM_ENABLE_DUMP_CALL_STACK=1)
 add_definitions(-DWASM_ENABLE_PERF_PROFILING=1)
 add_definitions(-DWASM_ENABLE_LOAD_CUSTOM_SECTION=1)