Răsfoiți Sursa

Make memory profiling show native stack usage (#1917)

YAMAMOTO Takashi 3 ani în urmă
părinte
comite
7d3b2a8773

+ 4 - 0
core/iwasm/aot/aot_runtime.c

@@ -30,6 +30,8 @@ bh_static_assert(offsetof(WASMExecEnv, aux_stack_boundary)
 bh_static_assert(offsetof(WASMExecEnv, aux_stack_bottom)
                  == 7 * sizeof(uintptr_t));
 bh_static_assert(offsetof(WASMExecEnv, native_symbol) == 8 * sizeof(uintptr_t));
+bh_static_assert(offsetof(WASMExecEnv, native_stack_top_min)
+                 == 9 * sizeof(uintptr_t));
 
 bh_static_assert(offsetof(AOTModuleInstance, memories) == 1 * sizeof(uint64));
 bh_static_assert(offsetof(AOTModuleInstance, func_ptrs) == 5 * sizeof(uint64));
@@ -1257,6 +1259,7 @@ invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr,
     /* Check native stack overflow firstly to ensure we have enough
        native stack to run the following codes before actually calling
        the aot function in invokeNative function. */
+    RECORD_STACK_USAGE(exec_env, (uint8 *)&module_inst);
     if ((uint8 *)&module_inst < exec_env->native_stack_boundary
                                     + page_size * (guard_page_count + 1)) {
         aot_set_exception_with_id(module_inst, EXCE_NATIVE_STACK_OVERFLOW);
@@ -1856,6 +1859,7 @@ aot_call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 table_elem_idx,
        exec_env->native_stack_boundary must have been set, we don't set
        it again */
 
+    RECORD_STACK_USAGE(exec_env, (uint8 *)&module_inst);
     if ((uint8 *)&module_inst < exec_env->native_stack_boundary) {
         aot_set_exception_with_id(module_inst, EXCE_NATIVE_STACK_OVERFLOW);
         goto fail;

+ 1 - 0
core/iwasm/common/wasm_exec_env.c

@@ -211,6 +211,7 @@ wasm_exec_env_set_thread_info(WASMExecEnv *exec_env)
     exec_env->handle = os_self_thread();
     exec_env->native_stack_boundary =
         stack_boundary ? stack_boundary + WASM_STACK_GUARD_SIZE : NULL;
+    exec_env->native_stack_top_min = (void *)UINTPTR_MAX;
 }
 
 #if WASM_ENABLE_THREAD_MGR != 0

+ 17 - 0
core/iwasm/common/wasm_exec_env.h

@@ -84,6 +84,12 @@ typedef struct WASMExecEnv {
     void **native_symbol;
 #endif
 
+    /*
+     * The lowest stack pointer value observed.
+     * Assumption: native stack grows to the lower address.
+     */
+    uint8 *native_stack_top_min;
+
 #if WASM_ENABLE_FAST_JIT != 0
     /**
      * Cache for
@@ -165,6 +171,17 @@ typedef struct WASMExecEnv {
     } wasm_stack;
 } WASMExecEnv;
 
+#if WASM_ENABLE_MEMORY_PROFILING != 0
+#define RECORD_STACK_USAGE(e, p)               \
+    do {                                       \
+        if ((e)->native_stack_top_min > (p)) { \
+            (e)->native_stack_top_min = (p);   \
+        }                                      \
+    } while (0)
+#else
+#define RECORD_STACK_USAGE(e, p) (void)0
+#endif
+
 WASMExecEnv *
 wasm_exec_env_create_internal(struct WASMModuleInstanceCommon *module_inst,
                               uint32 stack_size);

+ 16 - 0
core/iwasm/common/wasm_runtime_common.c

@@ -1399,6 +1399,22 @@ wasm_runtime_dump_mem_consumption(WASMExecEnv *exec_env)
     else
         os_printf("Total aux stack used: no enough info to profile\n");
 
+    /*
+     * Report the native stack usage estimation.
+     *
+     * Unlike the aux stack above, we report the amount unused
+     * because we don't know the stack "bottom".
+     *
+     * Note that this is just about what the runtime itself observed.
+     * It doesn't cover host func implementations, signal handlers, etc.
+     */
+    if (exec_env->native_stack_top_min != (void *)UINTPTR_MAX)
+        os_printf("Native stack left: %zd\n",
+                  exec_env->native_stack_top_min
+                      - exec_env->native_stack_boundary);
+    else
+        os_printf("Native stack left: no enough info to profile\n");
+
     os_printf("Total app heap used: %u\n", app_heap_peak_size);
 }
 #endif /* end of (WASM_ENABLE_MEMORY_PROFILING != 0) \

+ 1 - 0
core/iwasm/compilation/aot_compiler.h

@@ -259,6 +259,7 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
 #define I32_SIX LLVM_CONST(i32_six)
 #define I32_SEVEN LLVM_CONST(i32_seven)
 #define I32_EIGHT LLVM_CONST(i32_eight)
+#define I32_NINE LLVM_CONST(i32_nine)
 #define I32_NEG_ONE LLVM_CONST(i32_neg_one)
 #define I64_NEG_ONE LLVM_CONST(i64_neg_one)
 #define I32_MIN LLVM_CONST(i32_min)

+ 100 - 8
core/iwasm/compilation/aot_emit_function.c

@@ -366,6 +366,87 @@ fail:
 #endif /* end of (WASM_ENABLE_DUMP_CALL_STACK != 0) \
                  || (WASM_ENABLE_PERF_PROFILING != 0) */
 
+static bool
+record_stack_usage(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+                   uint32 callee_cell_num)
+{
+    LLVMBasicBlockRef block_curr = LLVMGetInsertBlock(comp_ctx->builder);
+    LLVMBasicBlockRef block_update;
+    LLVMBasicBlockRef block_after_update;
+    LLVMValueRef callee_local_size, new_sp, cmp;
+    LLVMValueRef native_stack_top_min;
+    LLVMTypeRef ptrdiff_type;
+    if (comp_ctx->pointer_size == sizeof(uint64_t)) {
+        ptrdiff_type = I64_TYPE;
+    }
+    else {
+        ptrdiff_type = I32_TYPE;
+    }
+
+    /*
+     * new_sp = last_alloca - callee_local_size;
+     * if (*native_stack_top_min_addr > new_sp) {
+     *    *native_stack_top_min_addr = new_sp;
+     * }
+     */
+
+    if (!(callee_local_size = LLVMConstInt(
+              ptrdiff_type, -(int64_t)callee_cell_num * 4, true))) {
+        aot_set_last_error("llvm build const failed.");
+        return false;
+    }
+    if (!(new_sp = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
+                                         func_ctx->last_alloca,
+                                         &callee_local_size, 1, "new_sp"))) {
+        aot_set_last_error("llvm build gep failed");
+        return false;
+    }
+    if (!(native_stack_top_min = LLVMBuildLoad2(
+              comp_ctx->builder, OPQ_PTR_TYPE,
+              func_ctx->native_stack_top_min_addr, "native_stack_top_min"))) {
+        aot_set_last_error("llvm build load failed");
+        return false;
+    }
+    if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntULT, new_sp,
+                              native_stack_top_min, "cmp"))) {
+        aot_set_last_error("llvm build icmp failed.");
+        return false;
+    }
+
+    if (!(block_update = LLVMAppendBasicBlockInContext(
+              comp_ctx->context, func_ctx->func, "block_update"))) {
+        aot_set_last_error("llvm add basic block failed.");
+        return false;
+    }
+    if (!(block_after_update = LLVMAppendBasicBlockInContext(
+              comp_ctx->context, func_ctx->func, "block_after_update"))) {
+        aot_set_last_error("llvm add basic block failed.");
+        return false;
+    }
+    LLVMMoveBasicBlockAfter(block_update, block_curr);
+    LLVMMoveBasicBlockAfter(block_after_update, block_update);
+
+    if (!LLVMBuildCondBr(comp_ctx->builder, cmp, block_update,
+                         block_after_update)) {
+        aot_set_last_error("llvm build cond br failed.");
+        return false;
+    }
+
+    LLVMPositionBuilderAtEnd(comp_ctx->builder, block_update);
+    if (!LLVMBuildStore(comp_ctx->builder, new_sp,
+                        func_ctx->native_stack_top_min_addr)) {
+        aot_set_last_error("llvm build store failed");
+        return false;
+    }
+    if (!LLVMBuildBr(comp_ctx->builder, block_after_update)) {
+        aot_set_last_error("llvm build br failed.");
+        return false;
+    }
+
+    LLVMPositionBuilderAtEnd(comp_ctx->builder, block_after_update);
+    return true;
+}
+
 static bool
 check_stack_boundary(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                      uint32 callee_cell_num)
@@ -409,6 +490,19 @@ check_stack_boundary(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     return true;
 }
 
+static bool
+check_stack(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
+            uint32 callee_cell_num)
+{
+    if (comp_ctx->enable_stack_estimation
+        && !record_stack_usage(comp_ctx, func_ctx, callee_cell_num))
+        return false;
+    if (comp_ctx->enable_stack_bound_check
+        && !check_stack_boundary(comp_ctx, func_ctx, callee_cell_num))
+        return false;
+    return true;
+}
+
 /**
  * Check whether the app address and its buffer are inside the linear memory,
  * if no, throw exception
@@ -852,8 +946,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         callee_cell_num =
             aot_func->param_cell_num + aot_func->local_cell_num + 1;
 
-        if (comp_ctx->enable_stack_bound_check
-            && !check_stack_boundary(comp_ctx, func_ctx, callee_cell_num))
+        if (!check_stack(comp_ctx, func_ctx, callee_cell_num))
             goto fail;
 
 #if LLVM_VERSION_MAJOR >= 14
@@ -1467,12 +1560,11 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     /* Translate call non-import block */
     LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_non_import);
 
-    if (comp_ctx->enable_stack_bound_check
-        && !check_stack_boundary(comp_ctx, func_ctx,
-                                 param_cell_num + ext_cell_num
-                                     + 1
-                                     /* Reserve some local variables */
-                                     + 16))
+    if (!check_stack(comp_ctx, func_ctx,
+                     param_cell_num + ext_cell_num
+                         + 1
+                         /* Reserve some local variables */
+                         + 16))
         goto fail;
 
     /* Load function pointer */

+ 24 - 1
core/iwasm/compilation/aot_llvm.c

@@ -286,6 +286,21 @@ create_native_stack_bound(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
     return true;
 }
 
+static bool
+create_native_stack_top_min(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    LLVMValueRef offset = I32_NINE;
+
+    if (!(func_ctx->native_stack_top_min_addr = LLVMBuildInBoundsGEP2(
+              comp_ctx->builder, OPQ_PTR_TYPE, func_ctx->exec_env, &offset, 1,
+              "native_stack_top_min_addr"))) {
+        aot_set_last_error("llvm build in bounds gep failed");
+        return false;
+    }
+
+    return true;
+}
+
 static bool
 create_aux_stack_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 {
@@ -434,7 +449,8 @@ create_local_variables(AOTCompData *comp_data, AOTCompContext *comp_ctx,
         }
     }
 
-    if (comp_ctx->enable_stack_bound_check) {
+    if (comp_ctx->enable_stack_bound_check
+        || comp_ctx->enable_stack_estimation) {
         if (aot_func_type->param_count + func->local_count > 0) {
             func_ctx->last_alloca = func_ctx->locals[aot_func_type->param_count
                                                      + func->local_count - 1];
@@ -963,6 +979,10 @@ aot_create_func_context(AOTCompData *comp_data, AOTCompContext *comp_ctx,
         && !create_native_stack_bound(comp_ctx, func_ctx)) {
         goto fail;
     }
+    if (comp_ctx->enable_stack_estimation
+        && !create_native_stack_top_min(comp_ctx, func_ctx)) {
+        goto fail;
+    }
 
     /* Get auxiliary stack info */
     if (wasm_func->has_op_set_global_aux_stack
@@ -1622,6 +1642,9 @@ aot_create_comp_context(AOTCompData *comp_data, aot_comp_option_t option)
     if (option->disable_llvm_lto)
         comp_ctx->disable_llvm_lto = true;
 
+    if (option->enable_stack_estimation)
+        comp_ctx->enable_stack_estimation = true;
+
     comp_ctx->opt_level = option->opt_level;
     comp_ctx->size_level = option->size_level;
 

+ 5 - 0
core/iwasm/compilation/aot_llvm.h

@@ -163,6 +163,7 @@ typedef struct AOTFuncContext {
     LLVMValueRef aot_inst;
     LLVMValueRef argv_buf;
     LLVMValueRef native_stack_bound;
+    LLVMValueRef native_stack_top_min_addr;
     LLVMValueRef aux_stack_bound;
     LLVMValueRef aux_stack_bottom;
     LLVMValueRef native_symbol;
@@ -313,6 +314,9 @@ typedef struct AOTCompContext {
     /* Native stack bounday Check */
     bool enable_stack_bound_check;
 
+    /* Native stack usage estimation */
+    bool enable_stack_estimation;
+
     /* 128-bit SIMD */
     bool enable_simd;
 
@@ -403,6 +407,7 @@ typedef struct AOTCompOption {
     bool enable_aux_stack_frame;
     bool disable_llvm_intrinsics;
     bool disable_llvm_lto;
+    bool enable_stack_estimation;
     uint32 opt_level;
     uint32 size_level;
     uint32 output_format;

+ 1 - 0
core/iwasm/include/aot_export.h

@@ -55,6 +55,7 @@ typedef struct AOTCompOption {
     bool enable_aux_stack_frame;
     bool disable_llvm_intrinsics;
     bool disable_llvm_lto;
+    bool enable_stack_estimation;
     uint32_t opt_level;
     uint32_t size_level;
     uint32_t output_format;

+ 1 - 0
core/iwasm/interpreter/wasm_interp_classic.c

@@ -4150,6 +4150,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
     }
     argc = function->param_cell_num;
 
+    RECORD_STACK_USAGE(exec_env, (uint8 *)&prev_frame);
 #if !(defined(OS_ENABLE_HW_BOUND_CHECK) \
       && WASM_DISABLE_STACK_HW_BOUND_CHECK == 0)
     if ((uint8 *)&prev_frame < exec_env->native_stack_boundary) {

+ 1 - 0
core/iwasm/interpreter/wasm_interp_fast.c

@@ -3901,6 +3901,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
     }
     argc = function->param_cell_num;
 
+    RECORD_STACK_USAGE(exec_env, (uint8 *)&prev_frame);
 #if !(defined(OS_ENABLE_HW_BOUND_CHECK) \
       && WASM_DISABLE_STACK_HW_BOUND_CHECK == 0)
     if ((uint8 *)&prev_frame < exec_env->native_stack_boundary) {

+ 3 - 0
core/iwasm/interpreter/wasm_loader.c

@@ -3048,6 +3048,9 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
 #if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0)
     option.enable_aux_stack_frame = true;
 #endif
+#if WASM_ENABLE_MEMORY_PROFILING != 0
+    option.enable_stack_estimation = true;
+#endif
 
     module->comp_ctx = aot_create_comp_context(module->comp_data, &option);
     if (!module->comp_ctx) {

+ 3 - 0
core/iwasm/interpreter/wasm_mini_loader.c

@@ -1894,6 +1894,9 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
 #if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0)
     option.enable_aux_stack_frame = true;
 #endif
+#if WASM_ENABLE_MEMORY_PROFILING != 0
+    option.enable_stack_estimation = true;
+#endif
 
     module->comp_ctx = aot_create_comp_context(module->comp_data, &option);
     if (!module->comp_ctx) {

+ 1 - 0
core/iwasm/interpreter/wasm_runtime.c

@@ -2074,6 +2074,7 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst,
     /* Check native stack overflow firstly to ensure we have enough
        native stack to run the following codes before actually calling
        the aot function in invokeNative function. */
+    RECORD_STACK_USAGE(exec_env, (uint8 *)&exec_env_tls);
     if ((uint8 *)&exec_env_tls < exec_env->native_stack_boundary
                                      + page_size * (guard_page_count + 1)) {
         wasm_set_exception(module_inst, "native stack overflow");

+ 4 - 0
wamr-compiler/main.c

@@ -59,6 +59,7 @@ print_help()
     printf("  --disable-aux-stack-check Disable auxiliary stack overflow/underflow check\n");
     printf("  --enable-dump-call-stack  Enable stack trace feature\n");
     printf("  --enable-perf-profiling   Enable function performance profiling\n");
+    printf("  --enable-memory-profiling Enable memory usage profiling\n");
     printf("  --enable-indirect-mode    Enalbe call function through symbol table but not direct call\n");
     printf("  --disable-llvm-intrinsics Disable the LLVM built-in intrinsics\n");
     printf("  --disable-llvm-lto        Disable the LLVM link time optimization\n");
@@ -254,6 +255,9 @@ main(int argc, char *argv[])
         else if (!strcmp(argv[0], "--enable-perf-profiling")) {
             option.enable_aux_stack_frame = true;
         }
+        else if (!strcmp(argv[0], "--enable-memory-profiling")) {
+            option.enable_stack_estimation = true;
+        }
         else if (!strcmp(argv[0], "--enable-indirect-mode")) {
             option.is_indirect_mode = true;
         }