Parcourir la source

introduce WAMR memory profiling tool (experimental) (#390)

Xu Jun il y a 5 ans
Parent
commit
0226dbbb3d

+ 4 - 0
build-scripts/config_common.cmake

@@ -162,6 +162,10 @@ if (WAMR_DISABLE_HW_BOUND_CHECK EQUAL 1)
   add_definitions (-DWASM_DISABLE_HW_BOUND_CHECK=1)
   message ("     Hardware boundary check disabled")
 endif ()
+if (WAMR_BUILD_MEMORY_PROFILING EQUAL 1)
+  add_definitions (-DWASM_ENABLE_MEMORY_PROFILING=1)
+  message ("     Memory profiling enabled")
+endif ()
 if (DEFINED WAMR_APP_THREAD_STACK_SIZE_MAX)
   add_definitions (-DAPP_THREAD_STACK_SIZE_MAX=${WAMR_APP_THREAD_STACK_SIZE_MAX})
 endif ()

+ 14 - 6
core/config.h

@@ -164,11 +164,20 @@ enum {
 #define WASM_DISABLE_HW_BOUND_CHECK 0
 #endif
 
-/* Heap and stack profiling */
-#define BH_ENABLE_MEMORY_PROFILING 0
+/* Memory profiling */
+#ifndef WASM_ENABLE_MEMORY_PROFILING
+#define WASM_ENABLE_MEMORY_PROFILING 0
+#endif
+
+/* Memory tracing */
+#ifndef WASM_ENABLE_MEMORY_TRACING
+#define WASM_ENABLE_MEMORY_TRACING 0
+#endif
 
 /* Heap verification */
+#ifndef BH_ENABLE_GC_VERIFY
 #define BH_ENABLE_GC_VERIFY 0
+#endif
 
 /* Max app number of all modules */
 #define MAX_APP_INSTALLATIONS 3
@@ -197,12 +206,9 @@ enum {
 /* The max percentage of global heap that app memory space can grow */
 #define APP_MEMORY_MAX_GLOBAL_HEAP_PERCENT 1 / 3
 
-/* Default base offset of app heap space */
-#define DEFAULT_APP_HEAP_BASE_OFFSET (1 * BH_GB)
-
 /* Default min/max heap size of each app */
 #define APP_HEAP_SIZE_DEFAULT (8 * 1024)
-#define APP_HEAP_SIZE_MIN (2 * 1024)
+#define APP_HEAP_SIZE_MIN (512)
 #define APP_HEAP_SIZE_MAX (512 * 1024 * 1024)
 
 /* Default wasm stack size of each app */
@@ -229,7 +235,9 @@ enum {
 #define RESERVED_BYTES_TO_NATIVE_STACK_BOUNDARY (512)
 
 /* Default wasm block address cache size and conflict list size */
+#ifndef BLOCK_ADDR_CACHE_SIZE
 #define BLOCK_ADDR_CACHE_SIZE 64
+#endif
 #define BLOCK_ADDR_CONFLICT_SIZE 2
 
 #ifndef WASM_ENABLE_SPEC_TEST

+ 4 - 10
core/iwasm/aot/aot_loader.c

@@ -1701,18 +1701,12 @@ load_from_sections(AOTModule *module, AOTSection *sections,
      * otherwise unpredictable behavior can occur. */
     os_dcache_flush();
 
+#if WASM_ENABLE_MEMORY_TRACING != 0
+    wasm_runtime_dump_module_mem_consumption((WASMModuleCommon*)module);
+#endif
     return true;
 }
 
-#if BH_ENABLE_MEMORY_PROFILING != 0
-static void aot_free(void *ptr)
-{
-    wasm_runtime_free(ptr);
-}
-#else
-#define aot_free wasm_runtime_free
-#endif
-
 static AOTModule*
 create_module(char *error_buf, uint32 error_buf_size)
 {
@@ -1730,7 +1724,7 @@ create_module(char *error_buf, uint32 error_buf_size)
                                    (HashFunc)wasm_string_hash,
                                    (KeyEqualFunc)wasm_string_equal,
                                    NULL,
-                                   aot_free))) {
+                                   wasm_runtime_free))) {
         set_error_buf(error_buf, error_buf_size,
                       "create const string set failed");
         wasm_runtime_free(module);

+ 139 - 0
core/iwasm/aot/aot_runtime.c

@@ -801,6 +801,11 @@ aot_instantiate(AOTModule *module, bool is_sub_inst,
 #endif
 #endif
 
+#if WASM_ENABLE_MEMORY_TRACING != 0
+    wasm_runtime_dump_module_inst_mem_consumption
+                    ((WASMModuleInstanceCommon *)module_inst);
+#endif
+
     return module_inst;
 
 fail:
@@ -1932,3 +1937,137 @@ aot_get_aux_stack(WASMExecEnv *exec_env,
 }
 
 #endif
+
+#if (WASM_ENABLE_MEMORY_PROFILING != 0) || (WASM_ENABLE_MEMORY_TRACING != 0)
+static uint32 const_string_size;
+
+void const_string_node_size_cb(void *key, void *value)
+{
+    const_string_size += bh_hash_map_get_elem_struct_size();
+    const_string_size += strlen((const char *)value) + 1;
+}
+
+void
+aot_get_module_mem_consumption(const AOTModule *module,
+                               WASMModuleMemConsumption *mem_conspn)
+{
+    uint32 i, size;
+
+    memset(mem_conspn, 0, sizeof(*mem_conspn));
+
+    mem_conspn->module_struct_size = sizeof(AOTModule);
+
+    mem_conspn->types_size = sizeof(AOTFuncType *) * module->func_type_count;
+    for (i = 0; i < module->func_type_count; i++) {
+        AOTFuncType *type = module->func_types[i];
+        size = offsetof(AOTFuncType, types) +
+               sizeof(uint8) * (type->param_count + type->result_count);
+        mem_conspn->types_size += size;
+    }
+
+    mem_conspn->imports_size =
+        sizeof(AOTImportMemory) * module->import_memory_count
+        + sizeof(AOTImportTable) * module->import_table_count
+        + sizeof(AOTImportGlobal) * module->import_global_count
+        + sizeof(AOTImportFunc) * module->import_func_count;
+
+    /* func_ptrs and func_type_indexes */
+    mem_conspn->functions_size =
+        (sizeof(void *) + sizeof(uint32)) * module->func_count;
+
+    mem_conspn->tables_size = sizeof(AOTTable) * module->table_count;
+
+    mem_conspn->memories_size = sizeof(AOTMemory) * module->memory_count;
+    mem_conspn->globals_size = sizeof(AOTGlobal) * module->global_count;
+    mem_conspn->exports_size = sizeof(AOTExport) * module->export_count;
+
+    mem_conspn->table_segs_size =
+        sizeof(AOTTableInitData *) * module->table_init_data_count;
+    for (i = 0; i < module->table_init_data_count; i++) {
+        AOTTableInitData *init_data = module->table_init_data_list[i];
+        size = offsetof(AOTTableInitData, func_indexes)
+               + sizeof(uint32) * init_data->func_index_count;
+        mem_conspn->table_segs_size += size;
+    }
+
+    mem_conspn->data_segs_size = sizeof(AOTMemInitData *)
+                                 * module->mem_init_data_count;
+    for (i = 0; i < module->mem_init_data_count; i++) {
+        mem_conspn->data_segs_size += sizeof(AOTMemInitData);
+    }
+
+    mem_conspn->const_strs_size =
+        bh_hash_map_get_struct_size(module->const_str_set);
+
+    const_string_size = 0;
+    if (module->const_str_set) {
+        bh_hash_map_traverse(module->const_str_set,
+                             const_string_node_size_cb);
+    }
+    mem_conspn->const_strs_size += const_string_size;
+
+    /* code size + literal size + object data section size */
+    mem_conspn->aot_code_size = module->code_size + module->literal_size
+        + sizeof(AOTObjectDataSection) * module->data_section_count;
+    for (i = 0; i < module->data_section_count; i++) {
+        AOTObjectDataSection *obj_data = module->data_sections + i;
+        mem_conspn->aot_code_size += sizeof(uint8) * obj_data->size;
+    }
+
+    mem_conspn->total_size += mem_conspn->module_struct_size;
+    mem_conspn->total_size += mem_conspn->types_size;
+    mem_conspn->total_size += mem_conspn->imports_size;
+    mem_conspn->total_size += mem_conspn->functions_size;
+    mem_conspn->total_size += mem_conspn->tables_size;
+    mem_conspn->total_size += mem_conspn->memories_size;
+    mem_conspn->total_size += mem_conspn->globals_size;
+    mem_conspn->total_size += mem_conspn->exports_size;
+    mem_conspn->total_size += mem_conspn->table_segs_size;
+    mem_conspn->total_size += mem_conspn->data_segs_size;
+    mem_conspn->total_size += mem_conspn->const_strs_size;
+    mem_conspn->total_size += mem_conspn->aot_code_size;
+}
+
+void
+aot_get_module_inst_mem_consumption(const AOTModuleInstance *module_inst,
+                                    WASMModuleInstMemConsumption *mem_conspn)
+{
+    uint32 i;
+
+    memset(mem_conspn, 0, sizeof(*mem_conspn));
+
+    mem_conspn->module_inst_struct_size = sizeof(AOTModuleInstance);
+
+    mem_conspn->memories_size =
+        sizeof(AOTPointer) * module_inst->memory_count
+        + sizeof(AOTMemoryInstance) * module_inst->memory_count;
+    for (i = 0; i < module_inst->memory_count; i++) {
+        AOTMemoryInstance *mem_inst =
+            ((AOTMemoryInstance **)module_inst->memories.ptr)[i];
+        mem_conspn->memories_size +=
+            mem_inst->num_bytes_per_page * mem_inst->cur_page_count;
+        mem_conspn->app_heap_size =
+            mem_inst->heap_data_end.ptr - mem_inst->heap_data.ptr;
+    }
+
+    mem_conspn->tables_size = sizeof(uint32) * module_inst->table_size;
+
+    /* func_ptrs and func_type_indexes */
+    mem_conspn->functions_size =  (sizeof(void *) + sizeof(uint32)) *
+        (((AOTModule *)module_inst->aot_module.ptr)->import_func_count
+         + ((AOTModule *)module_inst->aot_module.ptr)->func_count);
+
+    mem_conspn->globals_size = module_inst->global_data_size;
+
+    mem_conspn->exports_size =
+        sizeof(AOTFunctionInstance) * (uint64)module_inst->export_func_count;
+
+    mem_conspn->total_size += mem_conspn->module_inst_struct_size;
+    mem_conspn->total_size += mem_conspn->memories_size;
+    mem_conspn->total_size += mem_conspn->functions_size;
+    mem_conspn->total_size += mem_conspn->tables_size;
+    mem_conspn->total_size += mem_conspn->globals_size;
+    mem_conspn->total_size += mem_conspn->exports_size;
+}
+#endif /* end of (WASM_ENABLE_MEMORY_PROFILING != 0)
+                 || (WASM_ENABLE_MEMORY_TRACING != 0) */

+ 8 - 0
core/iwasm/aot/aot_runtime.h

@@ -553,6 +553,14 @@ void
 aot_signal_destroy();
 #endif
 
+void
+aot_get_module_mem_consumption(const AOTModule *module,
+                               WASMModuleMemConsumption *mem_conspn);
+
+void
+aot_get_module_inst_mem_consumption(const AOTModuleInstance *module_inst,
+                                    WASMModuleInstMemConsumption *mem_conspn);
+
 #ifdef __cplusplus
 } /* end of extern "C" */
 #endif

+ 4 - 0
core/iwasm/common/wasm_exec_env.c

@@ -43,6 +43,10 @@ wasm_exec_env_create_internal(struct WASMModuleInstanceCommon *module_inst,
     exec_env->wasm_stack.s.top_boundary =
         exec_env->wasm_stack.s.bottom + stack_size;
     exec_env->wasm_stack.s.top = exec_env->wasm_stack.s.bottom;
+
+#if WASM_ENABLE_MEMORY_TRACING != 0
+    wasm_runtime_dump_exec_env_mem_consumption(exec_env);
+#endif
     return exec_env;
 
 #if WASM_ENABLE_THREAD_MGR != 0

+ 13 - 3
core/iwasm/common/wasm_exec_env.h

@@ -92,7 +92,7 @@ typedef struct WASMExecEnv {
     /* The native thread handle of current thread */
     korp_tid handle;
 
-#if WASM_ENABLE_INTERP != 0
+#if WASM_ENABLE_INTERP != 0 && WASM_ENABLE_FAST_INTERP == 0
     BlockAddr block_addr_cache[BLOCK_ADDR_CACHE_SIZE][BLOCK_ADDR_CONFLICT_SIZE];
 #endif
 
@@ -100,6 +100,10 @@ typedef struct WASMExecEnv {
     WASMJmpBuf *jmpbuf_stack_top;
 #endif
 
+#if WASM_ENABLE_MEMORY_PROFILING != 0
+    uint32 max_wasm_stack_used;
+#endif
+
     /* The WASM stack size */
     uint32 wasm_stack_size;
 
@@ -154,13 +158,19 @@ wasm_exec_env_alloc_wasm_frame(WASMExecEnv *exec_env, unsigned size)
        multiplying by 2 is enough. */
     if (addr + size * 2 > exec_env->wasm_stack.s.top_boundary) {
         /* WASM stack overflow. */
-        /* When throwing SOE, the preserved space must be enough. */
-        /* bh_assert(!exec_env->throwing_soe);*/
         return NULL;
     }
 
     exec_env->wasm_stack.s.top += size;
 
+#if WASM_ENABLE_MEMORY_PROFILING != 0
+    {
+        uint32 wasm_stack_used = exec_env->wasm_stack.s.top
+                                 - exec_env->wasm_stack.s.bottom;
+        if (wasm_stack_used > exec_env->max_wasm_stack_used)
+            exec_env->max_wasm_stack_used = wasm_stack_used;
+    }
+#endif
     return addr;
 }
 

+ 94 - 83
core/iwasm/common/wasm_memory.c

@@ -32,8 +32,6 @@ static memory_profile_t *memory_profiles_list = NULL;
 static korp_mutex profile_lock;
 #endif /* end of BH_ENABLE_MEMORY_PROFILING */
 
-#ifndef MALLOC_MEMORY_FROM_SYSTEM
-
 typedef enum Memory_Mode {
     MEMORY_MODE_UNKNOWN = 0,
     MEMORY_MODE_POOL,
@@ -125,28 +123,32 @@ wasm_runtime_memory_pool_size()
         return 1 * BH_GB;
 }
 
-void *
-wasm_runtime_malloc(unsigned int size)
+static inline void *
+wasm_runtime_malloc_internal(unsigned int size)
 {
     if (memory_mode == MEMORY_MODE_UNKNOWN) {
         LOG_WARNING("wasm_runtime_malloc failed: memory hasn't been initialize.\n");
         return NULL;
-    } else if (memory_mode == MEMORY_MODE_POOL) {
+    }
+    else if (memory_mode == MEMORY_MODE_POOL) {
         return mem_allocator_malloc(pool_allocator, size);
-    } else {
+    }
+    else {
         return malloc_func(size);
     }
 }
 
-void *
-wasm_runtime_realloc(void *ptr, unsigned int size)
+static inline void *
+wasm_runtime_realloc_internal(void *ptr, unsigned int size)
 {
     if (memory_mode == MEMORY_MODE_UNKNOWN) {
         LOG_WARNING("wasm_runtime_realloc failed: memory hasn't been initialize.\n");
         return NULL;
-    } else if (memory_mode == MEMORY_MODE_POOL) {
+    }
+    else if (memory_mode == MEMORY_MODE_POOL) {
         return mem_allocator_realloc(pool_allocator, ptr, size);
-    } else {
+    }
+    else {
         if (realloc_func)
             return realloc_func(ptr, size);
         else
@@ -154,20 +156,97 @@ wasm_runtime_realloc(void *ptr, unsigned int size)
     }
 }
 
-void
-wasm_runtime_free(void *ptr)
+static inline void
+wasm_runtime_free_internal(void *ptr)
 {
     if (memory_mode == MEMORY_MODE_UNKNOWN) {
         LOG_WARNING("wasm_runtime_free failed: memory hasn't been initialize.\n");
-    } else if (memory_mode == MEMORY_MODE_POOL) {
+    }
+    else if (memory_mode == MEMORY_MODE_POOL) {
         mem_allocator_free(pool_allocator, ptr);
-    } else {
+    }
+    else {
         free_func(ptr);
     }
 }
 
-#if BH_ENABLE_MEMORY_PROFILING != 0
+void *
+wasm_runtime_malloc(unsigned int size)
+{
+    return wasm_runtime_malloc_internal(size);
+}
+
+void *
+wasm_runtime_realloc(void *ptr, unsigned int size)
+{
+    return wasm_runtime_realloc_internal(ptr, size);
+}
+
+void
+wasm_runtime_free(void *ptr)
+{
+    wasm_runtime_free_internal(ptr);
+}
+
+#if 0
+static uint64 total_malloc = 0;
+static uint64 total_free = 0;
+
+void *
+wasm_runtime_malloc(unsigned int size)
+{
+    void *ret = wasm_runtime_malloc_internal(size + 8);
+
+    if (ret) {
+        total_malloc += size;
+        *(uint32 *)ret = size;
+        return (uint8 *)ret + 8;
+    }
+    else
+        return NULL;
+}
+
+void *
+wasm_runtime_realloc(void *ptr, unsigned int size)
+{
+    if (!ptr)
+        return wasm_runtime_malloc(size);
+    else {
+        uint8 *ptr_old = (uint8 *)ptr - 8;
+        uint32 size_old = *(uint32 *)ptr_old;
+
+        ptr = wasm_runtime_realloc_internal(ptr_old, size + 8);
+        if (ptr) {
+            total_free += size_old;
+            total_malloc += size;
+            *(uint32 *)ptr = size;
+            return (uint8 *)ptr + 8;
+        }
+        return NULL;
+    }
+}
+
+void
+wasm_runtime_free(void *ptr)
+{
+    if (ptr) {
+        uint8 *ptr_old = (uint8 *)ptr - 8;
+        uint32 size_old = *(uint32 *)ptr_old;
 
+        total_free += size_old;
+        wasm_runtime_free_internal(ptr_old);
+    }
+}
+
+void dump_memory_usage()
+{
+    os_printf("Memory usage:\n");
+    os_printf("    total malloc: %"PRIu64"\n", total_malloc);
+    os_printf("    total free: %"PRIu64"\n", total_free);
+}
+#endif
+
+#if BH_ENABLE_MEMORY_PROFILING != 0
 void
 memory_profile_print(const char *file, int line,
                      const char *func, int alloc)
@@ -300,73 +379,5 @@ void memory_usage_summarize()
 
     os_mutex_unlock(&profile_lock);
 }
-
-#endif /* end of BH_ENABLE_MEMORY_PROFILING */
-
-#else /* else of MALLOC_MEMORY_FROM_SYSTEM */
-
-
-void *
-wasm_runtime_malloc(unsigned int size)
-{
-    return malloc(size);
-}
-
-void *
-wasm_runtime_realloc(void *ptr, unsigned int size)
-{
-    return realloc(ptr, size);
-}
-
-void
-wasm_runtime_free(void *ptr)
-{
-    if (ptr)
-        free(ptr);
-}
-
-#if BH_ENABLE_MEMORY_PROFILING != 0
-void *
-wasm_runtime_malloc_profile(const char *file, int line,
-                            const char *func, unsigned int size)
-{
-    (void)file;
-    (void)line;
-    (void)func;
-
-    (void)memory_profiles_list;
-    (void)profile_lock;
-    (void)memory_in_use;
-
-    return malloc(size);
-}
-
-void *
-wasm_runtime_realloc_profile(const char *file, int line,
-                             const char *func, void *ptr, unsigned int size)
-{
-    (void)file;
-    (void)line;
-    (void)func;
-
-    (void)memory_profiles_list;
-    (void)profile_lock;
-    (void)memory_in_use;
-
-    return realloc(ptr, size);
-}
-
-void
-wasm_runtime_free_profile(const char *file, int line,
-                          const char *func, void *ptr)
-{
-    (void)file;
-    (void)line;
-    (void)func;
-
-    if (ptr)
-        free(ptr);
-}
 #endif /* end of BH_ENABLE_MEMORY_PROFILING */
-#endif /* end of MALLOC_MEMORY_FROM_SYSTEM*/
 

+ 3 - 0
core/iwasm/common/wasm_native.c

@@ -275,6 +275,9 @@ register_natives(const char *module_name,
 
     if (!(node = wasm_runtime_malloc(sizeof(NativeSymbolsNode))))
         return false;
+#if WASM_ENABLE_MEMORY_TRACING != 0
+    os_printf("Register native, size: %u\n", sizeof(NativeSymbolsNode));
+#endif
 
     node->module_name = module_name;
     node->native_symbols = native_symbols;

+ 169 - 3
core/iwasm/common/wasm_runtime_common.c

@@ -791,6 +791,168 @@ wasm_runtime_destroy_exec_env(WASMExecEnv *exec_env)
     wasm_exec_env_destroy(exec_env);
 }
 
+void
+wasm_runtime_dump_module_mem_consumption(const WASMModuleCommon *module)
+{
+    WASMModuleMemConsumption mem_conspn = { 0 };
+
+#if WASM_ENABLE_INTERP != 0
+    if (module->module_type == Wasm_Module_Bytecode) {
+        wasm_get_module_mem_consumption((WASMModule*)module, &mem_conspn);
+    }
+#endif
+#if WASM_ENABLE_AOT != 0
+    if (module->module_type == Wasm_Module_AoT) {
+        aot_get_module_mem_consumption((AOTModule*)module, &mem_conspn);
+    }
+#endif
+
+    os_printf("WASM module memory consumption, total size: %u\n",
+              mem_conspn.total_size);
+    os_printf("    module struct size: %u\n", mem_conspn.module_struct_size);
+    os_printf("    types size: %u\n", mem_conspn.types_size);
+    os_printf("    imports size: %u\n", mem_conspn.imports_size);
+    os_printf("    funcs size: %u\n", mem_conspn.functions_size);
+    os_printf("    tables size: %u\n", mem_conspn.tables_size);
+    os_printf("    memories size: %u\n", mem_conspn.memories_size);
+    os_printf("    globals size: %u\n", mem_conspn.globals_size);
+    os_printf("    exports size: %u\n", mem_conspn.exports_size);
+    os_printf("    table segs size: %u\n", mem_conspn.table_segs_size);
+    os_printf("    data segs size: %u\n", mem_conspn.data_segs_size);
+    os_printf("    const strings size: %u\n", mem_conspn.const_strs_size);
+#if WASM_ENABLE_AOT != 0
+    os_printf("    aot code size: %u\n", mem_conspn.aot_code_size);
+#endif
+}
+
+void
+wasm_runtime_dump_module_inst_mem_consumption(const WASMModuleInstanceCommon
+                                              *module_inst)
+{
+    WASMModuleInstMemConsumption mem_conspn = { 0 };
+
+#if WASM_ENABLE_INTERP != 0
+    if (module_inst->module_type == Wasm_Module_Bytecode) {
+        wasm_get_module_inst_mem_consumption((WASMModuleInstance*)module_inst,
+                                             &mem_conspn);
+    }
+#endif
+#if WASM_ENABLE_AOT != 0
+    if (module_inst->module_type == Wasm_Module_AoT) {
+        aot_get_module_inst_mem_consumption((AOTModuleInstance*)module_inst,
+                                            &mem_conspn);
+    }
+#endif
+
+    os_printf("WASM module inst memory consumption, total size: %u\n",
+              mem_conspn.total_size);
+    os_printf("    module inst struct size: %u\n",
+              mem_conspn.module_inst_struct_size);
+    os_printf("    memories size: %u\n", mem_conspn.memories_size);
+    os_printf("        app heap size: %u\n", mem_conspn.app_heap_size);
+    os_printf("    tables size: %u\n", mem_conspn.tables_size);
+    os_printf("    functions size: %u\n", mem_conspn.functions_size);
+    os_printf("    globals size: %u\n", mem_conspn.globals_size);
+    os_printf("    exports size: %u\n", mem_conspn.exports_size);
+}
+
+void
+wasm_runtime_dump_exec_env_mem_consumption(const WASMExecEnv *exec_env)
+{
+    uint32 total_size = offsetof(WASMExecEnv, wasm_stack.s.bottom)
+                        + exec_env->wasm_stack_size;
+
+    os_printf("Exec env memory consumption, total size: %u\n", total_size);
+    os_printf("    exec env struct size: %u\n",
+              offsetof(WASMExecEnv, wasm_stack.s.bottom));
+#if WASM_ENABLE_INTERP != 0 && WASM_ENABLE_FAST_INTERP == 0
+    os_printf("        block addr cache size: %u\n",
+              sizeof(exec_env->block_addr_cache));
+#endif
+    os_printf("    stack size: %u\n", exec_env->wasm_stack_size);
+}
+
+#if WASM_ENABLE_MEMORY_PROFILING != 0
+uint32
+gc_get_heap_highmark_size(void *heap);
+
+void
+wasm_runtime_dump_mem_consumption(WASMExecEnv *exec_env)
+{
+    WASMModuleInstMemConsumption module_inst_mem_consps;
+    WASMModuleMemConsumption module_mem_consps;
+    WASMModuleInstanceCommon *module_inst_common;
+    WASMModuleCommon *module_common = NULL;
+    void *heap_handle = NULL;
+    uint32 total_size = 0, app_heap_peak_size = 0;
+    uint32 max_aux_stack_used = -1;
+
+    module_inst_common = exec_env->module_inst;
+#if WASM_ENABLE_INTERP != 0
+    if (module_inst_common->module_type == Wasm_Module_Bytecode) {
+        WASMModuleInstance *wasm_module_inst =
+                    (WASMModuleInstance*)module_inst_common;
+        WASMModule *wasm_module = wasm_module_inst->module;
+        module_common = (WASMModuleCommon*)wasm_module;
+        if (wasm_module_inst->memories) {
+            heap_handle = wasm_module_inst->memories[0]->heap_handle;
+        }
+        wasm_get_module_inst_mem_consumption
+                    (wasm_module_inst, &module_inst_mem_consps);
+        wasm_get_module_mem_consumption
+                    (wasm_module, &module_mem_consps);
+        if (wasm_module_inst->module->aux_stack_top_global_index != (uint32)-1)
+            max_aux_stack_used = wasm_module_inst->max_aux_stack_used;
+    }
+#endif
+#if WASM_ENABLE_AOT != 0
+    if (module_inst_common->module_type == Wasm_Module_AoT) {
+        AOTModuleInstance *aot_module_inst =
+                    (AOTModuleInstance*)module_inst_common;
+        AOTModule *aot_module =
+                    (AOTModule*)aot_module_inst->aot_module.ptr;
+        module_common = (WASMModuleCommon*)aot_module;
+        if (aot_module_inst->memories.ptr) {
+            AOTMemoryInstance **memories =
+               (AOTMemoryInstance **)aot_module_inst->memories.ptr;
+            heap_handle = memories[0]->heap_handle.ptr;
+        }
+        aot_get_module_inst_mem_consumption
+                    (aot_module_inst, &module_inst_mem_consps);
+        aot_get_module_mem_consumption
+                    (aot_module, &module_mem_consps);
+    }
+#endif
+
+    bh_assert(module_common != NULL);
+
+    if (heap_handle) {
+        app_heap_peak_size = gc_get_heap_highmark_size(heap_handle);
+    }
+
+    total_size = offsetof(WASMExecEnv, wasm_stack.s.bottom)
+                 + exec_env->wasm_stack_size
+                 + module_mem_consps.total_size
+                 + module_inst_mem_consps.total_size;
+
+    os_printf("\nMemory consumption summary (bytes):\n");
+    wasm_runtime_dump_module_mem_consumption(module_common);
+    wasm_runtime_dump_module_inst_mem_consumption(module_inst_common);
+    wasm_runtime_dump_exec_env_mem_consumption(exec_env);
+    os_printf("\nTotal memory consumption of module, module inst and "
+              "exec env: %u\n", total_size);
+    os_printf("Total interpreter stack used: %u\n",
+              exec_env->max_wasm_stack_used);
+
+    if (max_aux_stack_used != (uint32)-1)
+        os_printf("Total auxiliary stack used: %u\n", max_aux_stack_used);
+    else
+        os_printf("Total aux stack used: no enough info to profile\n");
+
+    os_printf("Total app heap used: %u\n", app_heap_peak_size);
+}
+#endif
+
 WASMModuleInstanceCommon *
 wasm_runtime_get_module_inst(WASMExecEnv *exec_env)
 {
@@ -1879,10 +2041,11 @@ wasm_application_execute_main(WASMModuleInstanceCommon *module_inst,
     uint32 argc1 = 0, argv1[2] = { 0 };
     uint32 total_argv_size = 0;
     uint64 total_size;
-    uint32 argv_buf_offset;
+    uint32 argv_buf_offset = 0;
     int32 i;
     char *argv_buf, *p, *p_end;
     uint32 *argv_offsets;
+    bool ret;
 
 #if WASM_ENABLE_LIBC_WASI != 0
     if (wasm_runtime_is_wasi_mode(module_inst)) {
@@ -1961,8 +2124,11 @@ wasm_application_execute_main(WASMModuleInstanceCommon *module_inst,
         argv1[1] = (uint32)wasm_runtime_addr_native_to_app(module_inst, argv_offsets);
     }
 
-    return wasm_runtime_create_exec_env_and_call_wasm(module_inst, func,
-                                                      argc1, argv1);
+    ret = wasm_runtime_create_exec_env_and_call_wasm(module_inst, func,
+                                                     argc1, argv1);
+    if (argv_buf_offset)
+        wasm_runtime_module_free(module_inst, argv_buf_offset);
+    return ret;
 }
 
 

+ 39 - 0
core/iwasm/common/wasm_runtime_common.h

@@ -43,6 +43,35 @@ typedef struct WASMModuleInstanceCommon {
     uint8 module_inst_data[1];
 } WASMModuleInstanceCommon;
 
+typedef struct WASMModuleMemConsumption {
+    uint32 total_size;
+    uint32 module_struct_size;
+    uint32 types_size;
+    uint32 imports_size;
+    uint32 functions_size;
+    uint32 tables_size;
+    uint32 memories_size;
+    uint32 globals_size;
+    uint32 exports_size;
+    uint32 table_segs_size;
+    uint32 data_segs_size;
+    uint32 const_strs_size;
+#if WASM_ENABLE_AOT != 0
+    uint32 aot_code_size;
+#endif
+} WASMModuleMemConsumption;
+
+typedef struct WASMModuleInstMemConsumption {
+    uint32 total_size;
+    uint32 module_inst_struct_size;
+    uint32 memories_size;
+    uint32 app_heap_size;
+    uint32 tables_size;
+    uint32 globals_size;
+    uint32 functions_size;
+    uint32 exports_size;
+} WASMModuleInstMemConsumption;
+
 #if WASM_ENABLE_LIBC_WASI != 0
 typedef struct WASIContext {
     /* Use offset but not native address, since these fields are
@@ -432,6 +461,16 @@ wasm_runtime_invoke_native_raw(WASMExecEnv *exec_env, void *func_ptr,
                                void *attachment,
                                uint32 *argv, uint32 argc, uint32 *ret);
 
+void
+wasm_runtime_dump_module_mem_consumption(const WASMModuleCommon *module);
+
+void
+wasm_runtime_dump_module_inst_mem_consumption(const WASMModuleInstanceCommon
+                                              *module_inst);
+
+void
+wasm_runtime_dump_exec_env_mem_consumption(const WASMExecEnv *exec_env);
+
 #ifdef __cplusplus
 }
 #endif

+ 12 - 0
core/iwasm/include/wasm_export.h

@@ -775,6 +775,18 @@ wasm_runtime_set_user_data(wasm_exec_env_t exec_env,
 WASM_RUNTIME_API_EXTERN void *
 wasm_runtime_get_user_data(wasm_exec_env_t exec_env);
 
+/**
+ * Dump runtime memory consumption, including:
+ *     Exec env memory consumption
+ *     WASM module memory consumption
+ *     WASM module instance memory consumption
+ *     stack and app heap used info
+ *
+ * @param exec_env the execution environment
+ */
+WASM_RUNTIME_API_EXTERN void
+wasm_runtime_dump_mem_consumption(wasm_exec_env_t exec_env);
+
 #if WASM_ENABLE_THREAD_MGR != 0
 /* wasm thread callback function type */
 typedef void* (*wasm_thread_callback_t)(wasm_exec_env_t, void *);

+ 1 - 1
core/iwasm/interpreter/wasm.h

@@ -361,7 +361,7 @@ typedef struct WASMModule {
 #endif
 
 #if WASM_ENABLE_MULTI_MODULE != 0
-    // TODO: mutex ? mutli-threads ?
+    /* TODO: add mutex for mutli-thread? */
     bh_list import_module_list_head;
     bh_list *import_module_list;
 #endif

+ 8 - 0
core/iwasm/interpreter/wasm_interp_classic.c

@@ -1552,6 +1552,14 @@ label_pop_csp_n:
           if (*(uint32*)(frame_sp - 1) < exec_env->aux_stack_boundary)
             goto out_of_bounds;
           *(int32*)global_addr = POP_I32();
+#if WASM_ENABLE_MEMORY_PROFILING != 0
+          if (module->module->aux_stack_top_global_index != (uint32)-1) {
+              uint32 aux_stack_used =
+                  module->module->aux_stack_bottom - *(uint32*)global_addr;
+              if (aux_stack_used > module->max_aux_stack_used)
+                  module->max_aux_stack_used = aux_stack_used;
+          }
+#endif
           HANDLE_OP_END ();
         }
 

+ 8 - 0
core/iwasm/interpreter/wasm_interp_fast.c

@@ -1510,6 +1510,14 @@ recover_br_info:
           if (frame_lp[addr1] < exec_env->aux_stack_boundary)
               goto out_of_bounds;
           *(int32*)global_addr = frame_lp[addr1];
+#if WASM_ENABLE_MEMORY_PROFILING != 0
+          if (module->module->aux_stack_top_global_index != (uint32)-1) {
+              uint32 aux_stack_used =
+                  module->module->aux_stack_bottom - *(uint32*)global_addr;
+              if (aux_stack_used > module->max_aux_stack_used)
+                  module->max_aux_stack_used = aux_stack_used;
+          }
+#endif
           HANDLE_OP_END ();
         }
 

+ 25 - 32
core/iwasm/interpreter/wasm_loader.c

@@ -2382,7 +2382,6 @@ fail:
     return false;
 }
 
-
 static bool
 wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
                              BlockAddr *block_addr_cache,
@@ -2509,28 +2508,6 @@ load_from_sections(WASMModule *module, WASMSection *sections,
         section = section->next;
     }
 
-#if WASM_ENABLE_FAST_INTERP != 0
-    handle_table = wasm_interp_get_handle_table();
-#endif
-
-    total_size = sizeof(BlockAddr) * (uint64)BLOCK_ADDR_CACHE_SIZE
-                                   * BLOCK_ADDR_CONFLICT_SIZE;
-    if (!(block_addr_cache = loader_malloc
-                (total_size, error_buf, error_buf_size))) {
-        return false;
-    }
-
-    for (i = 0; i < module->function_count; i++) {
-        WASMFunction *func = module->functions[i];
-        memset(block_addr_cache, 0, (uint32)total_size);
-        if (!wasm_loader_prepare_bytecode(module, func, block_addr_cache,
-                                          error_buf, error_buf_size)) {
-            wasm_runtime_free(block_addr_cache);
-            return false;
-        }
-    }
-    wasm_runtime_free(block_addr_cache);
-
     module->aux_data_end_global_index = (uint32)-1;
     module->aux_heap_base_global_index = (uint32)-1;
     module->aux_stack_top_global_index = (uint32)-1;
@@ -2658,6 +2635,28 @@ load_from_sections(WASMModule *module, WASMSection *sections,
         }
     }
 
+#if WASM_ENABLE_FAST_INTERP != 0
+    handle_table = wasm_interp_get_handle_table();
+#endif
+
+    total_size = sizeof(BlockAddr) * (uint64)BLOCK_ADDR_CACHE_SIZE
+                                   * BLOCK_ADDR_CONFLICT_SIZE;
+    if (!(block_addr_cache = loader_malloc
+                (total_size, error_buf, error_buf_size))) {
+        return false;
+    }
+
+    for (i = 0; i < module->function_count; i++) {
+        WASMFunction *func = module->functions[i];
+        memset(block_addr_cache, 0, (uint32)total_size);
+        if (!wasm_loader_prepare_bytecode(module, func, block_addr_cache,
+                                          error_buf, error_buf_size)) {
+            wasm_runtime_free(block_addr_cache);
+            return false;
+        }
+    }
+    wasm_runtime_free(block_addr_cache);
+
     if (!module->possible_memory_grow) {
         WASMMemoryImport *memory_import;
         WASMMemory *memory;
@@ -2711,18 +2710,12 @@ load_from_sections(WASMModule *module, WASMSection *sections,
 #endif
     }
 
+#if WASM_ENABLE_MEMORY_TRACING != 0
+    wasm_runtime_dump_module_mem_consumption((WASMModuleCommon*)module);
+#endif
     return true;
 }
 
-#if BH_ENABLE_MEMORY_PROFILING != 0
-static void wasm_loader_free(void *ptr)
-{
-    wasm_runtime_free(ptr);
-}
-#else
-#define wasm_loader_free wasm_free
-#endif
-
 static WASMModule*
 create_module(char *error_buf, uint32 error_buf_size)
 {

+ 24 - 31
core/iwasm/interpreter/wasm_mini_loader.c

@@ -1425,7 +1425,6 @@ load_user_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
     return true;
 }
 
-
 static bool
 wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
                              BlockAddr *block_addr_cache,
@@ -1541,27 +1540,6 @@ load_from_sections(WASMModule *module, WASMSection *sections,
         section = section->next;
     }
 
-#if WASM_ENABLE_FAST_INTERP != 0
-    handle_table = wasm_interp_get_handle_table();
-#endif
-
-    total_size = sizeof(BlockAddr) * (uint64)BLOCK_ADDR_CACHE_SIZE * BLOCK_ADDR_CONFLICT_SIZE;
-    if (!(block_addr_cache = loader_malloc
-                (total_size, error_buf, error_buf_size))) {
-        return false;
-    }
-
-    for (i = 0; i < module->function_count; i++) {
-        WASMFunction *func = module->functions[i];
-        memset(block_addr_cache, 0, (uint32)total_size);
-        if (!wasm_loader_prepare_bytecode(module, func, block_addr_cache,
-                                          error_buf, error_buf_size)) {
-            wasm_runtime_free(block_addr_cache);
-            return false;
-        }
-    }
-    wasm_runtime_free(block_addr_cache);
-
     module->aux_data_end_global_index = (uint32)-1;
     module->aux_heap_base_global_index = (uint32)-1;
     module->aux_stack_top_global_index = (uint32)-1;
@@ -1689,6 +1667,27 @@ load_from_sections(WASMModule *module, WASMSection *sections,
         }
     }
 
+#if WASM_ENABLE_FAST_INTERP != 0
+    handle_table = wasm_interp_get_handle_table();
+#endif
+
+    total_size = sizeof(BlockAddr) * (uint64)BLOCK_ADDR_CACHE_SIZE * BLOCK_ADDR_CONFLICT_SIZE;
+    if (!(block_addr_cache = loader_malloc
+                (total_size, error_buf, error_buf_size))) {
+        return false;
+    }
+
+    for (i = 0; i < module->function_count; i++) {
+        WASMFunction *func = module->functions[i];
+        memset(block_addr_cache, 0, (uint32)total_size);
+        if (!wasm_loader_prepare_bytecode(module, func, block_addr_cache,
+                                          error_buf, error_buf_size)) {
+            wasm_runtime_free(block_addr_cache);
+            return false;
+        }
+    }
+    wasm_runtime_free(block_addr_cache);
+
     if (!module->possible_memory_grow) {
         WASMMemoryImport *memory_import;
         WASMMemory *memory;
@@ -1742,18 +1741,12 @@ load_from_sections(WASMModule *module, WASMSection *sections,
 #endif
     }
 
+#if WASM_ENABLE_MEMORY_TRACING != 0
+    wasm_runtime_dump_module_mem_consumption(module);
+#endif
     return true;
 }
 
-#if BH_ENABLE_MEMORY_PROFILING != 0
-static void wasm_loader_free(void *ptr)
-{
-    wasm_runtime_free(ptr);
-}
-#else
-#define wasm_loader_free wasm_free
-#endif
-
 static WASMModule*
 create_module(char *error_buf, uint32 error_buf_size)
 {

+ 145 - 5
core/iwasm/interpreter/wasm_runtime.c

@@ -257,7 +257,8 @@ memory_instantiate(WASMModuleInstance *module_inst,
     memory->heap_data_end = memory->heap_data + heap_size;
     memory->memory_data_end = memory->memory_data + (uint32)memory_data_size;
 
-    bh_assert(memory->memory_data_end - (uint8*)memory == (uint32)total_size);
+    bh_assert((uint32)(memory->memory_data_end - (uint8*)memory)
+              == (uint32)total_size);
 
     /* Initialize heap */
     if (heap_size > 0
@@ -1112,10 +1113,10 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
 
     /* Instantiate global firstly to get the mutable data size */
     global_count = module->import_global_count + module->global_count;
-    if (global_count && !(globals = globals_instantiate(
-                            module,
-                            module_inst,
-                            &global_data_size, error_buf, error_buf_size))) {
+    if (global_count
+        && !(globals = globals_instantiate(module, module_inst,
+                                           &global_data_size,
+                                           error_buf, error_buf_size))) {
         wasm_deinstantiate(module_inst, false);
         return NULL;
     }
@@ -1419,6 +1420,10 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
 #endif
 #endif
 
+#if WASM_ENABLE_MEMORY_TRACING != 0
+    wasm_runtime_dump_module_inst_mem_consumption
+                    ((WASMModuleInstanceCommon *)module_inst);
+#endif
     (void)global_data_end;
     return module_inst;
 }
@@ -1928,3 +1933,138 @@ wasm_get_aux_stack(WASMExecEnv *exec_env,
     return false;
 }
 #endif
+
+#if (WASM_ENABLE_MEMORY_PROFILING != 0) || (WASM_ENABLE_MEMORY_TRACING != 0)
+void
+wasm_get_module_mem_consumption(const WASMModule *module,
+                                WASMModuleMemConsumption *mem_conspn)
+{
+    uint32 i, size;
+
+    memset(mem_conspn, 0, sizeof(*mem_conspn));
+
+    mem_conspn->module_struct_size = sizeof(WASMModule);
+
+    mem_conspn->types_size = sizeof(WASMType *) * module->type_count;
+    for (i = 0; i < module->type_count; i++) {
+        WASMType *type = module->types[i];
+        size = offsetof(WASMType, types) +
+               sizeof(uint8) * (type->param_count + type->result_count);
+        mem_conspn->types_size += size;
+    }
+
+    mem_conspn->imports_size = sizeof(WASMImport) * module->import_count;
+
+    mem_conspn->functions_size = sizeof(WASMFunction *)
+                                 * module->function_count;
+    for (i = 0; i < module->function_count; i++) {
+        WASMFunction *func = module->functions[i];
+        WASMType *type = func->func_type;
+        size = sizeof(WASMFunction) + func->local_count
+               + sizeof(uint16) * (type->param_count + func->local_count);
+#if WASM_ENABLE_FAST_INTERP != 0
+        size += func->code_compiled_size
+                + sizeof(uint32) * func->const_cell_num;
+#endif
+        mem_conspn->functions_size += size;
+    }
+
+    mem_conspn->tables_size = sizeof(WASMTable) * module->table_count;
+    mem_conspn->memories_size = sizeof(WASMMemory) * module->memory_count;
+    mem_conspn->globals_size = sizeof(WASMGlobal) * module->global_count;
+    mem_conspn->exports_size = sizeof(WASMExport) * module->export_count;
+
+    mem_conspn->table_segs_size = sizeof(WASMTableSeg)
+                                  * module->table_seg_count;
+    for (i = 0; i < module->table_seg_count; i++) {
+        WASMTableSeg *table_seg = &module->table_segments[i];
+        mem_conspn->tables_size += sizeof(uint32)
+                                   * table_seg->function_count;
+    }
+
+    mem_conspn->data_segs_size = sizeof(WASMDataSeg*)
+                                 * module->data_seg_count;
+    for (i = 0; i < module->data_seg_count; i++) {
+        mem_conspn->data_segs_size += sizeof(WASMDataSeg);
+    }
+
+    if (module->const_str_list) {
+        StringNode *node = module->const_str_list, *node_next;
+        while (node) {
+            node_next = node->next;
+            mem_conspn->const_strs_size += sizeof(StringNode)
+                                           + strlen(node->str) + 1;
+            node = node_next;
+        }
+    }
+
+    mem_conspn->total_size += mem_conspn->module_struct_size;
+    mem_conspn->total_size += mem_conspn->types_size;
+    mem_conspn->total_size += mem_conspn->imports_size;
+    mem_conspn->total_size += mem_conspn->functions_size;
+    mem_conspn->total_size += mem_conspn->tables_size;
+    mem_conspn->total_size += mem_conspn->memories_size;
+    mem_conspn->total_size += mem_conspn->globals_size;
+    mem_conspn->total_size += mem_conspn->exports_size;
+    mem_conspn->total_size += mem_conspn->table_segs_size;
+    mem_conspn->total_size += mem_conspn->data_segs_size;
+    mem_conspn->total_size += mem_conspn->const_strs_size;
+#if WASM_ENABLE_AOT != 0
+    mem_conspn->total_size += mem_conspn->aot_code_size;
+#endif
+}
+
+void
+wasm_get_module_inst_mem_consumption(const WASMModuleInstance *module_inst,
+                                     WASMModuleInstMemConsumption *mem_conspn)
+{
+    uint32 i, size;
+
+    memset(mem_conspn, 0, sizeof(*mem_conspn));
+
+    mem_conspn->module_inst_struct_size = sizeof(WASMModuleInstance);
+
+    mem_conspn->memories_size = sizeof(WASMMemoryInstance *)
+                                * module_inst->memory_count;
+    for (i = 0; i < module_inst->memory_count; i++) {
+        WASMMemoryInstance *memory = module_inst->memories[i];
+        size = offsetof(WASMMemoryInstance, memory_data)
+               + memory->num_bytes_per_page * memory->cur_page_count;
+        mem_conspn->memories_size += size;
+        mem_conspn->app_heap_size += memory->heap_data_end
+                                     - memory->heap_data;
+    }
+
+    mem_conspn->tables_size = sizeof(WASMTableInstance *)
+                              * module_inst->table_count;
+    for (i = 0; i < module_inst->table_count; i++) {
+        WASMTableInstance *table = module_inst->tables[i];
+        size = offsetof(WASMTableInstance, base_addr)
+               + sizeof(uint32) * table->cur_size;
+        mem_conspn->tables_size += size;
+    }
+
+    mem_conspn->functions_size = sizeof(WASMFunctionInstance)
+                                 * module_inst->function_count;
+
+    mem_conspn->globals_size = sizeof(WASMGlobalInstance)
+                               * module_inst->global_count;
+    if (module_inst->global_count > 0) {
+        WASMGlobalInstance *global =
+            &module_inst->globals[module_inst->global_count - 1];
+        mem_conspn->globals_size += global->data_offset
+                                    + wasm_value_type_size(global->type);
+    }
+
+    mem_conspn->exports_size = sizeof(WASMExportFuncInstance)
+                               * module_inst->export_func_count;
+
+    mem_conspn->total_size += mem_conspn->module_inst_struct_size;
+    mem_conspn->total_size += mem_conspn->memories_size;
+    mem_conspn->total_size += mem_conspn->functions_size;
+    mem_conspn->total_size += mem_conspn->tables_size;
+    mem_conspn->total_size += mem_conspn->globals_size;
+    mem_conspn->total_size += mem_conspn->exports_size;
+}
+#endif /* end of (WASM_ENABLE_MEMORY_PROFILING != 0)
+                 || (WASM_ENABLE_MEMORY_TRACING != 0) */

+ 12 - 4
core/iwasm/interpreter/wasm_runtime.h

@@ -204,14 +204,15 @@ typedef struct WASMModuleInstance {
      * wasm_set_custom_data/wasm_get_custom_data */
     void *custom_data;
 
-    /* Main exec env */
-    WASMExecEnv *main_exec_env;
-
 #if WASM_ENABLE_MULTI_MODULE != 0
-    // TODO: mutex ? mutli-threads ?
+    /* TODO: add mutex for mutli-threads? */
     bh_list sub_module_inst_list_head;
     bh_list *sub_module_inst_list;
 #endif
+
+#if WASM_ENABLE_MEMORY_PROFILING != 0
+    uint32 max_aux_stack_used;
+#endif
 } WASMModuleInstance;
 
 struct WASMInterpFrame;
@@ -374,6 +375,13 @@ wasm_get_aux_stack(WASMExecEnv *exec_env,
                    uint32 *start_offset, uint32 *size);
 #endif
 
+void
+wasm_get_module_mem_consumption(const WASMModule *module,
+                                WASMModuleMemConsumption *mem_conspn);
+
+void
+wasm_get_module_inst_mem_consumption(const WASMModuleInstance *module,
+                                     WASMModuleInstMemConsumption *mem_conspn);
 #ifdef __cplusplus
 }
 #endif

+ 6 - 11
core/shared/mem-alloc/ems/ems_alloc.c

@@ -385,10 +385,6 @@ gc_alloc_vo_internal(void *vheap, gc_size_t size,
         /* clear buffer appended by GC_ALIGN_8() */
         memset((uint8*)ret + size, 0, tot_size - tot_size_unaligned);
 
-#if BH_ENABLE_MEMORY_PROFILING != 0
-    os_printf("HEAP.ALLOC: heap: %p, size: %u\n", heap, size);
-#endif
-
 finish:
     os_mutex_unlock(&heap->lock);
     return ret;
@@ -469,10 +465,6 @@ gc_realloc_vo_internal(void *vheap, void *ptr, gc_size_t size,
 
     ret = hmu_to_obj(hmu);
 
-#if BH_ENABLE_MEMORY_PROFILING != 0
-    os_printf("HEAP.ALLOC: heap: %p, size: %u\n", heap, size);
-#endif
-
 finish:
     os_mutex_unlock(&heap->lock);
 
@@ -548,9 +540,6 @@ gc_free_vo_internal(void *vheap, gc_object_t obj,
             g_total_free += size;
 
             heap->total_free_size += size;
-#if BH_ENABLE_MEMORY_PROFILING != 0
-            os_printf("HEAP.FREE, heap: %p, size: %u\n", heap, size);
-#endif
 
             if (!hmu_get_pinuse(hmu)) {
                 prev = (hmu_t*) ((char*) hmu - *((int*) hmu - 1));
@@ -600,6 +589,12 @@ gc_dump_heap_stats(gc_heap_t *heap)
               g_total_malloc, g_total_free, g_total_malloc - g_total_free);
 }
 
+uint32
+gc_get_heap_highmark_size(gc_heap_t *heap)
+{
+    return heap->highmark_size;
+}
+
 void
 gci_dump(gc_heap_t *heap)
 {

+ 2 - 0
core/shared/mem-alloc/ems/ems_gc_internal.h

@@ -145,7 +145,9 @@ hmu_verify(hmu_t *hmu);
  * HMU free chunk management
  */
 
+#ifndef HMU_NORMAL_NODE_CNT
 #define HMU_NORMAL_NODE_CNT 32
+#endif
 #define HMU_FC_NORMAL_MAX_SIZE ((HMU_NORMAL_NODE_CNT - 1) << 3)
 #define HMU_IS_FC_NORMAL(size) ((size) < HMU_FC_NORMAL_MAX_SIZE)
 #if HMU_FC_NORMAL_MAX_SIZE >= GC_MAX_HEAP_SIZE

+ 12 - 9
core/shared/mem-alloc/ems/ems_kfc.c

@@ -9,16 +9,17 @@ gc_handle_t
 gc_init_with_pool(char *buf, gc_size_t buf_size)
 {
     char *buf_end = buf + buf_size;
-    char *buf_aligned = (char*) (((uintptr_t) buf + 7) & (uintptr_t)~7);
+    char *buf_aligned = (char*)(((uintptr_t) buf + 7) & (uintptr_t)~7);
     char *base_addr = buf_aligned + sizeof(gc_heap_t);
-    gc_heap_t *heap = (gc_heap_t*) buf_aligned;
+    gc_heap_t *heap = (gc_heap_t*)buf_aligned;
     gc_size_t heap_max_size;
     hmu_normal_node_t *p = NULL;
     hmu_tree_node_t *root = NULL, *q = NULL;
     int i = 0, ret;
 
-    if (buf_size < 1024) {
-        os_printf("[GC_ERROR]heap_init_size(%d) < 1024\n", buf_size);
+    if (buf_size < APP_HEAP_SIZE_MIN) {
+        os_printf("[GC_ERROR]heap init buf size (%u) < %u\n",
+                  buf_size, APP_HEAP_SIZE_MIN);
         return NULL;
     }
 
@@ -66,12 +67,14 @@ gc_init_with_pool(char *buf, gc_size_t buf_size)
     q->parent = root;
     q->size = heap->current_size;
 
-    bh_assert(root->size <= HMU_FC_NORMAL_MAX_SIZE
-              && HMU_FC_NORMAL_MAX_SIZE < q->size);
+    bh_assert(root->size <= HMU_FC_NORMAL_MAX_SIZE);
 
-#if BH_ENABLE_MEMORY_PROFILING != 0
-    os_printf("heap is successfully initialized with max_size=%u.\n",
-              heap_max_size);
+#if WASM_ENABLE_MEMORY_TRACING != 0
+    os_printf("Heap created, total size: %u\n", buf_size);
+    os_printf("   heap struct size: %u\n", sizeof(gc_heap_t));
+    os_printf("   actual heap size: %u\n", heap_max_size);
+    os_printf("   padding bytes: %u\n",
+              buf_size - sizeof(gc_heap_t) - heap_max_size);
 #endif
     return heap;
 }

+ 54 - 4
core/shared/utils/bh_hashmap.c

@@ -47,7 +47,7 @@ bh_hash_map_create(uint32 size, bool use_lock,
     }
 
     total_size = offsetof(HashMap, elements) +
-                 sizeof(HashMapElem) * (uint64)size +
+                 sizeof(HashMapElem *) * (uint64)size +
                  (use_lock ? sizeof(korp_mutex) : 0);
 
     if (total_size >= UINT32_MAX
@@ -61,7 +61,7 @@ bh_hash_map_create(uint32 size, bool use_lock,
     if (use_lock) {
         map->lock = (korp_mutex*)
                     ((uint8*)map + offsetof(HashMap, elements)
-                     + sizeof(HashMapElem) * size);
+                     + sizeof(HashMapElem *) * size);
         if (os_mutex_init(map->lock)) {
             LOG_ERROR("HashMap create failed: init map lock failed.\n");
             BH_FREE(map);
@@ -188,8 +188,8 @@ bh_hash_map_update(HashMap *map, void *key, void *value,
                 os_mutex_unlock(map->lock);
             }
             return true;
-    }
-    elem = elem->next;
+        }
+        elem = elem->next;
     }
 
     if (map->lock) {
@@ -286,3 +286,53 @@ bh_hash_map_destroy(HashMap *map)
     BH_FREE(map);
     return true;
 }
+
+uint32
+bh_hash_map_get_struct_size(HashMap *hashmap)
+{
+    uint32 size = offsetof(HashMap, elements)
+                  + sizeof(HashMapElem *) * hashmap->size;
+
+    if (hashmap->lock) {
+        size += sizeof(korp_mutex);
+    }
+
+    return size;
+}
+
+uint32
+bh_hash_map_get_elem_struct_size()
+{
+    return sizeof(HashMapElem);
+}
+
+bool
+bh_hash_map_traverse(HashMap *map, TraverseCallbackFunc callback)
+{
+    uint32 index;
+    HashMapElem *elem, *next;
+
+    if (!map || !callback) {
+        LOG_ERROR("HashMap traverse failed: map or callback is NULL.\n");
+        return false;
+    }
+
+    if (map->lock) {
+        os_mutex_lock(map->lock);
+    }
+
+    for (index = 0; index < map->size; index++) {
+        elem = map->elements[index];
+        while (elem) {
+            next = elem->next;
+            callback(elem->key, elem->value);
+            elem = next;
+        }
+    }
+
+    if (map->lock) {
+        os_mutex_unlock(map->lock);
+    }
+
+    return true;
+}

+ 35 - 0
core/shared/utils/bh_hashmap.h

@@ -32,6 +32,10 @@ typedef void (*KeyDestroyFunc)(void *key);
    when an hash element is removed. */
 typedef void (*ValueDestroyFunc)(void *key);
 
+/* traverse callback function:
+   auto called when traverse every hash element */
+typedef void (*TraverseCallbackFunc)(void *key, void *value);
+
 /**
  * Create a hash map.
  *
@@ -124,6 +128,37 @@ bh_hash_map_remove(HashMap *map, void *key,
 bool
 bh_hash_map_destroy(HashMap *map);
 
+/**
+ * Get the structure size of HashMap
+ *
+ * @param map the hash map to calculate
+ *
+ * @return the memory space occupied by HashMap structure
+ */
+uint32
+bh_hash_map_get_struct_size(HashMap *hashmap);
+
+/**
+ * Get the structure size of HashMap Element
+ *
+ * @return the memory space occupied by HashMapElem structure
+ */
+uint32
+bh_hash_map_get_elem_struct_size();
+
+/**
+ * Traverse the hash map and call the callback function
+ *
+ * @param map the hash map to traverse
+ * @callback the function to be called for every element
+ *
+ * @return true if success, false otherwise
+ * Note: if the hash map has lock, the map will be locked during traverse,
+ *       keep the callback function as simple as possible.
+ */
+bool
+bh_hash_map_traverse(HashMap *map, TraverseCallbackFunc callback);
+
 #ifdef __cplusplus
 }
 #endif

+ 6 - 0
core/shared/utils/uncommon/bh_read_file.c

@@ -41,6 +41,9 @@ bh_read_file_to_buffer(const char *filename, uint32 *ret_size)
         _close(file);
         return NULL;
     }
+#if WASM_ENABLE_MEMORY_TRACING != 0
+    printf("Read file, total size: %u\n", file_size);
+#endif
 
     read_size = _read(file, buffer, file_size);
     _close(file);
@@ -88,6 +91,9 @@ bh_read_file_to_buffer(const char *filename, uint32 *ret_size)
         close(file);
         return NULL;
     }
+#if WASM_ENABLE_MEMORY_TRACING != 0
+    printf("Read file, total size: %u\n", file_size);
+#endif
 
     read_size = (uint32)read(file, buffer, file_size);
     close(file);

+ 5 - 0
doc/build_wamr.md

@@ -68,6 +68,11 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM
 - **WAMR_DISABLE_HW_BOUND_CHECK**=1/0, default to enable if not set and supported by platform
 > Note: by default only platform linux/darwin/android/vxworks 64-bit will enable boundary check with hardware trap in AOT or JIT mode, and the wamrc tool will generate AOT code without boundary check instructions in all 64-bit targets except SGX to improve performance.
 
+#### **Enable memory profiling (Experiment)**
+- **WAMR_BUILD_MEMORY_PROFLING**=1/0, default to disable if not set
+> Note: if it is enabled, developer can use API `void wasm_runtime_dump_mem_consumption(wasm_exec_env_t exec_env)` to dump the memory consumption info.
+Currently we only profile the memory consumption of module, module_instance and exec_env, the memory consumed by other components such as `wasi-ctx`, `multi-module` and `thread-manager` are not included.
+
 #### **Set maximum app thread stack size**
 - **WAMR_APP_THREAD_STACK_SIZE_MAX**=n, default to 8 MB (8388608) if not set
 > Note: the AOT boundary check with hardware trap mechanism might consume large stack since the OS may lazily grow the stack mapping as a guard page is hit, we may use this configuration to reduce the total stack usage, e.g. -DWAMR_APP_THREAD_STACK_SIZE_MAX=131072 (128 KB).

+ 2 - 3
test-tools/host-tool/CMakeLists.txt

@@ -39,7 +39,7 @@ if (CMAKE_SIZEOF_VOID_P EQUAL 8)
   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32")
 endif ()
 
-add_definitions(-Wall -Wno-pointer-sign -DMALLOC_MEMORY_FROM_SYSTEM)
+add_definitions(-Wall -Wno-pointer-sign)
 
 include_directories(
     ${CMAKE_CURRENT_LIST_DIR}/src
@@ -48,7 +48,6 @@ include_directories(
 
 file (GLOB_RECURSE HOST_TOOL_SRC src/*.c)
 
-
 SET(SOURCES
     ${HOST_TOOL_SRC}
     ${PLATFORM_SHARED_SOURCE}
@@ -57,6 +56,6 @@ SET(SOURCES
     ${CJSON_SOURCE}
     ${LIB_HOST_AGENT_SOURCE}
     )
-    
+
 add_executable(host_tool ${SOURCES})
 target_link_libraries(host_tool pthread)