Эх сурвалжийг харах

Implement memory profiler, optimize memory usage, modify code indent (#35)

wenyongh 6 жил өмнө
parent
commit
ff7cbdd2fb

+ 12 - 11
core/iwasm/lib/native/base/timer_wrapper.c

@@ -55,8 +55,8 @@ void * thread_modulers_timer_check(void * arg)
     while (1) {
         ms_to_expiry = -1;
         vm_mutex_lock(&g_timer_ctx_list_mutex);
-        timer_ctx_node_t* elem = (timer_ctx_node_t*) bh_list_first_elem(
-                &g_timer_ctx_list);
+        timer_ctx_node_t* elem = (timer_ctx_node_t*)
+                                 bh_list_first_elem(&g_timer_ctx_list);
         while (elem) {
             int next = check_app_timers(elem->timer_ctx);
             if (next != -1) {
@@ -72,7 +72,7 @@ void * thread_modulers_timer_check(void * arg)
             ms_to_expiry = 60 * 1000;
         vm_mutex_lock(&g_timer_ctx_list_mutex);
         vm_cond_reltimedwait(&g_timer_ctx_list_cond, &g_timer_ctx_list_mutex,
-                ms_to_expiry);
+                             ms_to_expiry);
         vm_mutex_unlock(&g_timer_ctx_list_mutex);
     }
 }
@@ -94,20 +94,21 @@ void init_wasm_timer()
     vm_recursive_mutex_init(&g_timer_ctx_list_mutex);
 
     vm_thread_create(&tm_tid, thread_modulers_timer_check,
-    NULL,
-    BH_APPLET_PRESERVED_STACK_SIZE);
+                     NULL, BH_APPLET_PRESERVED_STACK_SIZE);
 }
 
 timer_ctx_t create_wasm_timer_ctx(unsigned int module_id, int prealloc_num)
 {
     timer_ctx_t ctx = create_timer_ctx(wasm_timer_callback,
-            wakeup_modules_timer_thread, prealloc_num, module_id);
+                                       wakeup_modules_timer_thread,
+                                       prealloc_num,
+                                       module_id);
 
     if (ctx == NULL)
         return NULL;
 
-    timer_ctx_node_t * node = (timer_ctx_node_t*) bh_malloc(
-            sizeof(timer_ctx_node_t));
+    timer_ctx_node_t * node = (timer_ctx_node_t*)
+                              bh_malloc(sizeof(timer_ctx_node_t));
     if (node == NULL) {
         destroy_timer_ctx(ctx);
         return NULL;
@@ -125,8 +126,8 @@ timer_ctx_t create_wasm_timer_ctx(unsigned int module_id, int prealloc_num)
 void destory_module_timer_ctx(unsigned int module_id)
 {
     vm_mutex_lock(&g_timer_ctx_list_mutex);
-    timer_ctx_node_t* elem = (timer_ctx_node_t*) bh_list_first_elem(
-            &g_timer_ctx_list);
+    timer_ctx_node_t* elem = (timer_ctx_node_t*)
+                             bh_list_first_elem(&g_timer_ctx_list);
     while (elem) {
         if (timer_ctx_get_owner(elem->timer_ctx) == module_id) {
             bh_list_remove(&g_timer_ctx_list, elem);
@@ -151,7 +152,7 @@ timer_ctx_t get_wasm_timer_ctx()
 timer_id_t wasm_create_timer(int interval, bool is_period, bool auto_start)
 {
     return sys_create_timer(get_wasm_timer_ctx(), interval, is_period,
-            auto_start);
+                            auto_start);
 }
 
 void wasm_timer_destory(timer_id_t timer_id)

+ 10 - 12
core/iwasm/lib/native/extension/sensor/sensor_mgr_ref.c

@@ -40,13 +40,13 @@ void app_mgr_sensor_event_callback(module_data *m_data, bh_message_t msg)
     wasm_data *wasm_app_data = (wasm_data*) m_data->internal_data;
     wasm_module_inst_t inst = wasm_app_data->wasm_module_inst;
 
-    sensor_event_data_t *payload = (sensor_event_data_t*) bh_message_payload(
-            msg);
+    sensor_event_data_t *payload = (sensor_event_data_t*)
+                                   bh_message_payload(msg);
     if (payload == NULL)
         return;
 
     func_onSensorEvent = wasm_runtime_lookup_function(inst, "_on_sensor_event",
-            "(i32i32i32)");
+                                                      "(i32i32i32)");
     if (!func_onSensorEvent) {
         printf("Cannot find function onRequest\n");
     } else {
@@ -54,18 +54,17 @@ void app_mgr_sensor_event_callback(module_data *m_data, bh_message_t msg)
         uint32 sensor_data_len;
 
         if (payload->data_fmt == FMT_ATTR_CONTAINER) {
-            sensor_data_len = attr_container_get_serialize_length(
-                    payload->data);
+            sensor_data_len = attr_container_get_serialize_length(payload->data);
         } else {
             printf("Unsupported sensor data format: %d\n", payload->data_fmt);
             return;
         }
 
         sensor_data_offset = wasm_runtime_module_dup_data(inst, payload->data,
-                sensor_data_len);
+                                                          sensor_data_len);
         if (sensor_data_offset == 0) {
             printf("Got exception running wasm code: %s\n",
-                    wasm_runtime_get_exception(inst));
+                   wasm_runtime_get_exception(inst));
             wasm_runtime_clear_exception(inst);
             return;
         }
@@ -76,7 +75,7 @@ void app_mgr_sensor_event_callback(module_data *m_data, bh_message_t msg)
 
         if (!wasm_runtime_call_wasm(inst, NULL, func_onSensorEvent, 3, argv)) {
             printf(":Got exception running wasm code: %s\n",
-                    wasm_runtime_get_exception(inst));
+                   wasm_runtime_get_exception(inst));
             wasm_runtime_clear_exception(inst);
             wasm_runtime_module_free(inst, sensor_data_offset);
             return;
@@ -130,17 +129,16 @@ void init_sensor_framework()
 
     // add the sys sensor objects
     add_sys_sensor("sensor_test", "This is a sensor for test", 0, 1000,
-            read_test_sensor, config_test_sensor);
+                   read_test_sensor, config_test_sensor);
 
     set_sensor_reshceduler(cb_wakeup_thread);
 
     wasm_register_msg_callback(SENSOR_EVENT_WASM,
-            app_mgr_sensor_event_callback);
+                               app_mgr_sensor_event_callback);
 
     wasm_register_cleanup_callback(sensor_cleanup_callback);
 
     vm_thread_create(&tid, (void *)thread_sensor_check, NULL,
-    BH_APPLET_PRESERVED_STACK_SIZE);
-
+                     BH_APPLET_PRESERVED_STACK_SIZE);
 }
 

+ 13 - 0
core/iwasm/runtime/vmcore-wasm/wasm.h

@@ -219,6 +219,15 @@ typedef struct WASMDataSeg {
     uint8 *data;
 } WASMDataSeg;
 
+typedef struct BlockAddr {
+    const uint8 *start_addr;
+    uint8 *else_addr;
+    uint8 *end_addr;
+} BlockAddr;
+
+#define BLOCK_ADDR_CACHE_SIZE 64
+#define BLOCK_ADDR_CONFLICT_SIZE 4
+
 typedef struct WASMModule {
     uint32 type_count;
     uint32 import_count;
@@ -252,7 +261,11 @@ typedef struct WASMModule {
     uint32 start_function;
 
     HashMap *const_str_set;
+#if WASM_ENABLE_HASH_BLOCK_ADDR != 0
     HashMap *branch_set;
+#else
+    BlockAddr block_addr_cache[BLOCK_ADDR_CACHE_SIZE][BLOCK_ADDR_CONFLICT_SIZE];
+#endif
 } WASMModule;
 
 typedef struct WASMBranchBlock {

+ 12 - 9
core/iwasm/runtime/vmcore-wasm/wasm_interp.c

@@ -760,11 +760,12 @@ wasm_interp_call_func_bytecode(WASMThread *self,
       HANDLE_OP (WASM_OP_BLOCK):
         read_leb_uint32(frame_ip, frame_ip_end, block_ret_type);
 
-        if (!wasm_loader_find_block_addr(module->branch_set, frame_ip,
-                                         frame_ip_end, BLOCK_TYPE_BLOCK,
+        if (!wasm_loader_find_block_addr(module->module,
+                                         frame_ip, frame_ip_end,
+                                         BLOCK_TYPE_BLOCK,
                                          &else_addr, &end_addr,
                                          NULL, 0)) {
-          wasm_runtime_set_exception(module, "wasm loader find block addr failed");
+          wasm_runtime_set_exception(module, "find block addr failed");
           goto got_exception;
         }
 
@@ -774,11 +775,12 @@ wasm_interp_call_func_bytecode(WASMThread *self,
       HANDLE_OP (WASM_OP_LOOP):
         read_leb_uint32(frame_ip, frame_ip_end, block_ret_type);
 
-        if (!wasm_loader_find_block_addr(module->branch_set, frame_ip,
-                                         frame_ip_end, BLOCK_TYPE_LOOP,
+        if (!wasm_loader_find_block_addr(module->module,
+                                         frame_ip, frame_ip_end,
+                                         BLOCK_TYPE_LOOP,
                                          &else_addr, &end_addr,
                                          NULL, 0)) {
-          wasm_runtime_set_exception(module, "wasm loader find block addr failed");
+          wasm_runtime_set_exception(module, "find block addr failed");
           goto got_exception;
         }
 
@@ -788,11 +790,12 @@ wasm_interp_call_func_bytecode(WASMThread *self,
       HANDLE_OP (WASM_OP_IF):
         read_leb_uint32(frame_ip, frame_ip_end, block_ret_type);
 
-        if (!wasm_loader_find_block_addr(module->branch_set, frame_ip,
-                                         frame_ip_end, BLOCK_TYPE_IF,
+        if (!wasm_loader_find_block_addr(module->module,
+                                         frame_ip, frame_ip_end,
+                                         BLOCK_TYPE_IF,
                                          &else_addr, &end_addr,
                                          NULL, 0)) {
-          wasm_runtime_set_exception(module, "wasm loader find block addr failed");
+          wasm_runtime_set_exception(module, "find block addr failed");
           goto got_exception;
         }
 

+ 112 - 31
core/iwasm/runtime/vmcore-wasm/wasm_loader.c

@@ -1180,10 +1180,11 @@ load_from_sections(WASMModule *module, WASMSection *sections,
     return true;
 }
 
+#if WASM_ENABLE_HASH_BLOCK_ADDR != 0
 static uint32
 branch_set_hash(const void *key)
 {
-    return ((uintptr_t)key >> 4) ^ ((uintptr_t)key >> 14);
+    return ((uintptr_t)key) ^ ((uintptr_t)key >> 16);
 }
 
 static bool
@@ -1197,6 +1198,16 @@ branch_set_value_destroy(void *value)
 {
     wasm_free(value);
 }
+#endif
+
+#if BEIHAI_ENABLE_MEMORY_PROFILING != 0
+static void wasm_loader_free(void *ptr)
+{
+    wasm_free(ptr);
+}
+#else
+#define wasm_loader_free wasm_free
+#endif
 
 static WASMModule*
 create_module(char *error_buf, uint32 error_buf_size)
@@ -1218,15 +1229,17 @@ create_module(char *error_buf, uint32 error_buf_size)
                     (HashFunc)wasm_string_hash,
                     (KeyEqualFunc)wasm_string_equal,
                     NULL,
-                    wasm_free)))
+                    wasm_loader_free)))
         goto fail;
 
+#if WASM_ENABLE_HASH_BLOCK_ADDR != 0
     if (!(module->branch_set = wasm_hash_map_create(64, true,
                     branch_set_hash,
                     branch_set_key_equal,
                     NULL,
                     branch_set_value_destroy)))
         goto fail;
+#endif
 
     return module;
 
@@ -1361,15 +1374,17 @@ wasm_loader_load(const uint8 *buf, uint32 size, char *error_buf, uint32 error_bu
                                         (HashFunc)wasm_string_hash,
                                         (KeyEqualFunc)wasm_string_equal,
                                         NULL,
-                                        wasm_free)))
+                                        wasm_loader_free)))
         goto fail;
 
+#if WASM_ENABLE_HASH_BLOCK_ADDR != 0
     if (!(module->branch_set = wasm_hash_map_create(64, true,
                                         branch_set_hash,
                                         branch_set_key_equal,
                                         NULL,
                                         branch_set_value_destroy)))
         goto fail;
+#endif
 
     if (!load(buf, size, module, error_buf, error_buf_size))
         goto fail;
@@ -1440,20 +1455,24 @@ wasm_loader_unload(WASMModule *module)
     if (module->const_str_set)
         wasm_hash_map_destroy(module->const_str_set);
 
+#if WASM_ENABLE_HASH_BLOCK_ADDR != 0
     if (module->branch_set)
         wasm_hash_map_destroy(module->branch_set);
+#endif
 
     wasm_free(module);
 }
 
+#if WASM_ENABLE_HASH_BLOCK_ADDR != 0
 typedef struct block_addr {
     uint8 block_type;
-    uint8 *else_addr;
     uint8 *end_addr;
+    uint8 *else_addr;
 } block_addr;
+#endif
 
 bool
-wasm_loader_find_block_addr(HashMap *branch_set,
+wasm_loader_find_block_addr(WASMModule *module,
                             const uint8 *start_addr,
                             const uint8 *code_end_addr,
                             uint8 block_type,
@@ -1466,8 +1485,10 @@ wasm_loader_find_block_addr(HashMap *branch_set,
     uint8 *else_addr = NULL;
     uint32 block_nested_depth = 1, count, i, u32, u64;
     uint8 opcode, u8;
-    block_addr *block;
 
+#if WASM_ENABLE_HASH_BLOCK_ADDR != 0
+    HashMap *branch_set = module->branch_set;
+    block_addr *block;
     if ((block = wasm_hash_map_find(branch_set, (void*)start_addr))) {
         if (block->block_type != block_type)
             return false;
@@ -1476,6 +1497,25 @@ wasm_loader_find_block_addr(HashMap *branch_set,
         *p_end_addr = block->end_addr;
         return true;
     }
+#else
+    BlockAddr block_stack[16] = { 0 }, *block;
+    uint32 j, t;
+
+    i = ((uintptr_t)start_addr) ^ ((uintptr_t)start_addr >> 16);
+    i = i % BLOCK_ADDR_CACHE_SIZE;
+    block = module->block_addr_cache[i];
+    for (j = 0; j < BLOCK_ADDR_CONFLICT_SIZE; j++) {
+        if (block[j].start_addr == start_addr) {
+            /* Cache hit */
+            *p_else_addr = block[j].else_addr;
+            *p_end_addr = block[j].end_addr;
+            return true;
+        }
+    }
+
+    /* Cache unhit */
+    block_stack[0].start_addr = start_addr;
+#endif
 
     while (p < code_end_addr) {
         opcode = *p++;
@@ -1489,12 +1529,22 @@ wasm_loader_find_block_addr(HashMap *branch_set,
             case WASM_OP_LOOP:
             case WASM_OP_IF:
                 read_leb_uint32(p, p_end, u32); /* blocktype */
+#if WASM_ENABLE_HASH_BLOCK_ADDR == 0
+                if (block_nested_depth < sizeof(block_stack)/sizeof(BlockAddr)) {
+                    block_stack[block_nested_depth].start_addr = p;
+                    block_stack[block_nested_depth].else_addr = NULL;
+                }
+#endif
                 block_nested_depth++;
                 break;
 
             case WASM_OP_ELSE:
                 if (block_type == BLOCK_TYPE_IF && block_nested_depth == 1)
                     else_addr = (uint8*)(p - 1);
+#if WASM_ENABLE_HASH_BLOCK_ADDR == 0
+                if (block_nested_depth - 1 < sizeof(block_stack)/sizeof(BlockAddr))
+                    block_stack[block_nested_depth - 1].else_addr = (uint8*)(p - 1);
+#endif
                 break;
 
             case WASM_OP_END:
@@ -1503,7 +1553,13 @@ wasm_loader_find_block_addr(HashMap *branch_set,
                         *p_else_addr = else_addr;
                     *p_end_addr = (uint8*)(p - 1);
 
-                    if ((block = wasm_malloc(sizeof(block_addr)))) {
+#if WASM_ENABLE_HASH_BLOCK_ADDR != 0
+                    if (block_type == BLOCK_TYPE_IF)
+                        block = wasm_malloc(sizeof(block_addr));
+                    else
+                        block = wasm_malloc(offsetof(block_addr, else_addr));
+
+                    if (block) {
                         block->block_type = block_type;
                         if (block_type == BLOCK_TYPE_IF)
                             block->else_addr = else_addr;
@@ -1512,11 +1568,41 @@ wasm_loader_find_block_addr(HashMap *branch_set,
                         if (!wasm_hash_map_insert(branch_set, (void*)start_addr, block))
                             wasm_free(block);
                     }
-
+#else
+                    block_stack[0].end_addr = (uint8*)(p - 1);
+                    for (t = 0; t < sizeof(block_stack)/sizeof(BlockAddr); t++) {
+                        start_addr = block_stack[t].start_addr;
+                        if (start_addr) {
+                            i = ((uintptr_t)start_addr) ^ ((uintptr_t)start_addr >> 16);
+                            i = i % BLOCK_ADDR_CACHE_SIZE;
+                            block = module->block_addr_cache[i];
+                            for (j = 0; j < BLOCK_ADDR_CONFLICT_SIZE; j++)
+                                if (!block[j].start_addr)
+                                    break;
+
+                            if (j == BLOCK_ADDR_CONFLICT_SIZE) {
+                                memmove(block + 1, block, (BLOCK_ADDR_CONFLICT_SIZE - 1) *
+                                                          sizeof(BlockAddr));
+                                j = 0;
+
+                            }
+                            block[j].start_addr = block_stack[t].start_addr;
+                            block[j].else_addr = block_stack[t].else_addr;
+                            block[j].end_addr = block_stack[t].end_addr;
+                        }
+                        else
+                            break;
+                    }
+#endif
                     return true;
                 }
-                else
+                else {
                     block_nested_depth--;
+#if WASM_ENABLE_HASH_BLOCK_ADDR == 0
+                    if (block_nested_depth < sizeof(block_stack)/sizeof(BlockAddr))
+                        block_stack[block_nested_depth].end_addr = (uint8*)(p - 1);
+#endif
+                }
                 break;
 
             case WASM_OP_BR:
@@ -2079,14 +2165,13 @@ static bool
 wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
                              char *error_buf, uint32 error_buf_size)
 {
-    HashMap *branch_set = module->branch_set;
+#if WASM_ENABLE_HASH_BLOCK_ADDR != 0
     block_addr *block;
+#endif
     uint8 *p = func->code, *p_end = func->code + func->code_size;
-    uint8 *frame_lp_ref_bottom = NULL;
     uint8 *frame_ref_bottom = NULL, *frame_ref_boundary, *frame_ref;
     BranchBlock *frame_csp_bottom = NULL, *frame_csp_boundary, *frame_csp;
     uint32 param_count, local_count, global_count;
-    uint32 param_cell_num, local_cell_num;
     uint32 max_stack_cell_num = 0, max_csp_num = 0;
     uint32 stack_cell_num = 0, csp_num = 0;
     uint32 frame_ref_size, frame_csp_size;
@@ -2107,16 +2192,6 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
     local_count = func->local_count;
     local_types = func->local_types;
 
-    param_cell_num = wasm_get_cell_num(param_types, param_count);
-    local_cell_num = wasm_get_cell_num(local_types, local_count);
-
-    if (!(frame_lp_ref_bottom = wasm_malloc(param_cell_num + local_cell_num))) {
-        set_error_buf(error_buf, error_buf_size,
-                      "WASM loader prepare bytecode failed: alloc memory failed");
-        goto fail;
-    }
-    memset(frame_lp_ref_bottom, 0, param_cell_num + local_cell_num);
-
     frame_ref_size = 32;
     if (!(frame_ref_bottom = frame_ref = wasm_malloc(frame_ref_size))) {
         set_error_buf(error_buf, error_buf_size,
@@ -2167,7 +2242,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
                     (frame_csp - 1)->jumped_by_br = true;
                 else {
                     if (!i32_const) {
-                        if(!wasm_loader_find_block_addr(branch_set,
+                        if(!wasm_loader_find_block_addr(module,
                                                         (frame_csp - 1)->start_addr,
                                                         p_end,
                                                         (frame_csp - 1)->block_type,
@@ -2210,10 +2285,16 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
                 if (csp_num > 0) {
                     frame_csp->end_addr = p - 1;
 
-                    if (wasm_hash_map_find(branch_set, (void*)frame_csp->start_addr))
+#if WASM_ENABLE_HASH_BLOCK_ADDR != 0
+                    if (wasm_hash_map_find(module->branch_set, (void*)frame_csp->start_addr))
                         break;
 
-                    if (!(block = wasm_malloc(sizeof(block_addr)))) {
+                    if (frame_csp->block_type == BLOCK_TYPE_IF)
+                        block = wasm_malloc(sizeof(block_addr));
+                    else
+                        block = wasm_malloc(offsetof(block_addr, else_addr));
+
+                    if (!block) {
                         set_error_buf(error_buf, error_buf_size,
                                       "WASM loader prepare bytecode failed: "
                                       "alloc memory failed");
@@ -2221,10 +2302,11 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
                     }
 
                     block->block_type = frame_csp->block_type;
-                    block->else_addr = (void*)frame_csp->else_addr;
+                    if (frame_csp->block_type == BLOCK_TYPE_IF)
+                        block->else_addr = (void*)frame_csp->else_addr;
                     block->end_addr = (void*)frame_csp->end_addr;
 
-                    if (!wasm_hash_map_insert(branch_set, (void*)frame_csp->start_addr,
+                    if (!wasm_hash_map_insert(module->branch_set, (void*)frame_csp->start_addr,
                                               block)) {
                         set_error_buf(error_buf, error_buf_size,
                                       "WASM loader prepare bytecode failed: "
@@ -2232,6 +2314,7 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
                         wasm_free(block);
                         goto fail;
                     }
+#endif
                 }
                 break;
             }
@@ -2248,7 +2331,7 @@ handle_op_br:
 
                 block_return_type = (frame_csp - i)->return_type;
 
-                if(!wasm_loader_find_block_addr(branch_set,
+                if(!wasm_loader_find_block_addr(module,
                                                 (frame_csp - i)->start_addr,
                                                 p_end,
                                                 (frame_csp - i)->block_type,
@@ -2304,7 +2387,7 @@ handle_op_br:
                 POP_TYPE(ret_type);
                 PUSH_TYPE(ret_type);
 
-                if(!wasm_loader_find_block_addr(branch_set,
+                if(!wasm_loader_find_block_addr(module,
                                                 (frame_csp - 1)->start_addr,
                                                 p_end,
                                                 (frame_csp - 1)->block_type,
@@ -2862,8 +2945,6 @@ handle_op_br:
     return_value = true;
 
 fail:
-    if (frame_lp_ref_bottom)
-        wasm_free(frame_lp_ref_bottom);
     if (frame_ref_bottom)
         wasm_free(frame_ref_bottom);
     if (frame_csp_bottom)

+ 2 - 5
core/iwasm/runtime/vmcore-wasm/wasm_loader.h

@@ -61,10 +61,7 @@ wasm_loader_unload(WASMModule *module);
  * Find address of related else opcode and end opcode of opcode block/loop/if
  * according to the start address of opcode.
  *
- * @param branch_set the hashtable to store the else/end adress info of
- * block/loop/if opcode. The function will lookup the hashtable firstly,
- * if not found, it will then search the code from start_addr, and if success,
- * stores the result to the hashtable.
+ * @param module the module to find
  * @param start_addr the next address of opcode block/loop/if
  * @param code_end_addr the end address of function code block
  * @param block_type the type of block, 0/1/2 denotes block/loop/if
@@ -76,7 +73,7 @@ wasm_loader_unload(WASMModule *module);
  * @return true if success, false otherwise
  */
 bool
-wasm_loader_find_block_addr(HashMap *map,
+wasm_loader_find_block_addr(WASMModule *module,
                             const uint8 *start_addr,
                             const uint8 *code_end_addr,
                             uint8 block_type,

+ 6 - 5
core/iwasm/runtime/vmcore-wasm/wasm_runtime.c

@@ -706,7 +706,7 @@ execute_start_function(WASMModuleInstance *module_inst)
  * Instantiate module
  */
 WASMModuleInstance*
-wasm_runtime_instantiate(const WASMModule *module,
+wasm_runtime_instantiate(WASMModule *module,
                          uint32 stack_size, uint32 heap_size,
                          char *error_buf, uint32 error_buf_size)
 {
@@ -726,9 +726,11 @@ wasm_runtime_instantiate(const WASMModule *module,
     /* Check heap size */
     heap_size = align_uint(heap_size, 8);
     if (heap_size == 0)
-        heap_size = DEFAULT_WASM_HEAP_SIZE;
-    if (heap_size < MIN_WASM_HEAP_SIZE)
-        heap_size = MIN_WASM_HEAP_SIZE;
+        heap_size = APP_HEAP_SIZE_DEFAULT;
+    if (heap_size < APP_HEAP_SIZE_MIN)
+        heap_size = APP_HEAP_SIZE_MIN;
+    if (heap_size > APP_HEAP_SIZE_MAX)
+        heap_size = APP_HEAP_SIZE_MAX;
 
     /* Instantiate global firstly to get the mutable data size */
     global_count = module->import_global_count + module->global_count;
@@ -909,7 +911,6 @@ wasm_runtime_instantiate(const WASMModule *module,
             &module_inst->functions[module->start_function];
     }
 
-    module_inst->branch_set = module->branch_set;
     module_inst->module = module;
 
     /* module instance type */

+ 1 - 7
core/iwasm/runtime/vmcore-wasm/wasm_runtime.h

@@ -25,11 +25,6 @@
 extern "C" {
 #endif
 
-
-#define DEFAULT_WASM_STACK_SIZE (8 * 1024)
-#define DEFAULT_WASM_HEAP_SIZE  (8 * 1024)
-#define MIN_WASM_HEAP_SIZE      (1 * 1024)
-
 typedef struct WASMMemoryInstance {
     /* Current page count */
     uint32 cur_page_count;
@@ -153,8 +148,7 @@ typedef struct WASMModuleInstance {
 
     WASMFunctionInstance *start_function;
 
-    HashMap *branch_set;
-    const WASMModule *module;
+    WASMModule *module;
 
     uint32 DYNAMICTOP_PTR_offset;
     uint32 temp_ret;

+ 28 - 0
core/shared-lib/include/bh_memory.h

@@ -52,6 +52,8 @@ int bh_memory_init_with_allocator(void *malloc_func, void *free_func);
  */
 void bh_memory_destroy();
 
+#if BEIHAI_ENABLE_MEMORY_PROFILING == 0
+
 /**
  * This function allocates a memory chunk from system
  *
@@ -68,6 +70,32 @@ void* bh_malloc(unsigned int size);
  */
 void bh_free(void *ptr);
 
+#else
+
+void* bh_malloc_profile(const char *file, int line, const char *func, unsigned int size);
+void bh_free_profile(const char *file, int line, const char *func, void *ptr);
+
+#define bh_malloc(size) bh_malloc_profile(__FILE__, __LINE__, __func__, size)
+#define bh_free(ptr) bh_free_profile(__FILE__, __LINE__, __func__, ptr)
+
+/**
+ * Print current memory profiling data
+ *
+ * @param file file name of the caller
+ * @param line line of the file of the caller
+ * @param func function name of the caller
+ */
+void memory_profile_print(const char *file, int line, const char *func, int alloc);
+
+/**
+ * Summarize memory usage and print it out
+ * Can use awk to analyze the output like below:
+ * awk -F: '{print $2,$4,$6,$8,$9}' OFS="\t" ./out.txt | sort -n -r -k 1
+ */
+void memory_usage_summarize();
+
+#endif
+
 #ifdef __cplusplus
 }
 #endif

+ 14 - 3
core/shared-lib/include/config.h

@@ -48,6 +48,9 @@
 /* WASM Interpreter labels-as-values feature */
 #define WASM_ENABLE_LABELS_AS_VALUES 1
 
+/* WASM Branch Block address hashmap */
+#define WASM_ENABLE_HASH_BLOCK_ADDR 0
+
 /* Heap and stack profiling */
 #define BEIHAI_ENABLE_MEMORY_PROFILING 0
 
@@ -77,14 +80,22 @@
 #define WORKING_FLOW_HEAP_SIZE 0
 */
 
-/* Default/min/max heap size of each app */
-#define APP_HEAP_SIZE_DEFAULT (48 * 1024)
+/* Default min/max heap size of each app */
+#define APP_HEAP_SIZE_DEFAULT (8 * 1024)
 #define APP_HEAP_SIZE_MIN (2 * 1024)
 #define APP_HEAP_SIZE_MAX (1024 * 1024)
 
+/* Default wasm stack size of each app */
+#define DEFAULT_WASM_STACK_SIZE (8 * 1024)
+
 /* Default/min/max stack size of each app thread */
+#ifndef __ZEPHYR__
 #define APP_THREAD_STACK_SIZE_DEFAULT (20 * 1024)
 #define APP_THREAD_STACK_SIZE_MIN (16 * 1024)
 #define APP_THREAD_STACK_SIZE_MAX (256 * 1024)
-
+#else
+#define APP_THREAD_STACK_SIZE_DEFAULT (4 * 1024)
+#define APP_THREAD_STACK_SIZE_MIN (2 * 1024)
+#define APP_THREAD_STACK_SIZE_MAX (256 * 1024)
+#endif
 #endif

+ 216 - 3
core/shared-lib/mem-alloc/bh_memory.c

@@ -14,11 +14,38 @@
  * limitations under the License.
  */
 
+#include "bh_config.h"
 #include "bh_memory.h"
 #include "mem_alloc.h"
 #include <stdio.h>
 #include <stdlib.h>
 
+#if BEIHAI_ENABLE_MEMORY_PROFILING != 0
+#include "bh_thread.h"
+
+/* Memory profile data of a function */
+typedef struct memory_profile {
+  struct memory_profile *next;
+  const char *function_name;
+  const char *file_name;
+  int line_in_file;
+  int malloc_num;
+  int free_num;
+  int total_malloc;
+  int total_free;
+} memory_profile_t;
+
+/* Memory in use which grows when bh_malloc was called
+ * and decreases when bh_free was called */
+static unsigned int memory_in_use = 0;
+
+/* Memory profile data list */
+static memory_profile_t *memory_profiles_list = NULL;
+
+/* Lock of the memory profile list */
+static korp_mutex profile_lock;
+#endif
+
 #ifndef MALLOC_MEMORY_FROM_SYSTEM
 
 typedef enum Memory_Mode {
@@ -39,6 +66,9 @@ int bh_memory_init_with_pool(void *mem, unsigned int bytes)
     if (_allocator) {
         memory_mode = MEMORY_MODE_POOL;
         pool_allocator = _allocator;
+#if BEIHAI_ENABLE_MEMORY_PROFILING != 0
+        vm_mutex_init(&profile_lock);
+#endif
         return 0;
     }
     printf("Init memory with pool (%p, %u) failed.\n", mem, bytes);
@@ -51,6 +81,9 @@ int bh_memory_init_with_allocator(void *_malloc_func, void *_free_func)
         memory_mode = MEMORY_MODE_ALLOCATOR;
         malloc_func = _malloc_func;
         free_func = _free_func;
+#if BEIHAI_ENABLE_MEMORY_PROFILING != 0
+        vm_mutex_init(&profile_lock);
+#endif
         return 0;
     }
     printf("Init memory with allocator (%p, %p) failed.\n", _malloc_func,
@@ -60,12 +93,15 @@ int bh_memory_init_with_allocator(void *_malloc_func, void *_free_func)
 
 void bh_memory_destroy()
 {
+#if BEIHAI_ENABLE_MEMORY_PROFILING != 0
+    vm_mutex_destroy(&profile_lock);
+#endif
     if (memory_mode == MEMORY_MODE_POOL)
         mem_allocator_destroy(pool_allocator);
     memory_mode = MEMORY_MODE_UNKNOWN;
 }
 
-void* bh_malloc(unsigned int size)
+void* bh_malloc_internal(unsigned int size)
 {
     if (memory_mode == MEMORY_MODE_UNKNOWN) {
         printf("bh_malloc failed: memory hasn't been initialize.\n");
@@ -77,7 +113,7 @@ void* bh_malloc(unsigned int size)
     }
 }
 
-void bh_free(void *ptr)
+void bh_free_internal(void *ptr)
 {
     if (memory_mode == MEMORY_MODE_UNKNOWN) {
         printf("bh_free failed: memory hasn't been initialize.\n");
@@ -88,8 +124,157 @@ void bh_free(void *ptr)
     }
 }
 
+#if BEIHAI_ENABLE_MEMORY_PROFILING != 0
+void* bh_malloc_profile(const char *file,
+                        int line,
+                        const char *func,
+                        unsigned int size)
+{
+    void *p = bh_malloc_internal(size + 8);
+
+    if (p) {
+        memory_profile_t *profile;
+
+        vm_mutex_lock(&profile_lock);
+
+        profile = memory_profiles_list;
+        while (profile) {
+            if (strcmp(profile->function_name, func) == 0
+                && strcmp(profile->file_name, file) == 0) {
+                break;
+            }
+            profile = profile->next;
+        }
+
+        if (profile) {
+            profile->total_malloc += size;/* TODO: overflow check */
+            profile->malloc_num++;
+        } else {
+            profile = bh_malloc_internal(sizeof(memory_profile_t));
+            if (!profile) {
+              vm_mutex_unlock(&profile_lock);
+              memcpy(p, &size, sizeof(size));
+              return (char *)p + 8;
+            }
+
+            memset(profile, 0, sizeof(memory_profile_t));
+            profile->file_name = file;
+            profile->line_in_file = line;
+            profile->function_name = func;
+            profile->malloc_num = 1;
+            profile->total_malloc = size;
+            profile->next = memory_profiles_list;
+            memory_profiles_list = profile;
+        }
+
+        vm_mutex_unlock(&profile_lock);
+
+        memcpy(p, &size, sizeof(size));
+        memory_in_use += size;
+
+        memory_profile_print(file, line, func, size);
+
+        return (char *)p + 8;
+    }
+
+    return NULL;
+}
+
+void bh_free_profile(const char *file, int line, const char *func, void *ptr)
+{
+    unsigned int size = *(unsigned int *)((char *)ptr - 8);
+    memory_profile_t *profile;
+
+    bh_free_internal((char *)ptr - 8);
+
+    if (memory_in_use >= size)
+        memory_in_use -= size;
+
+    vm_mutex_lock(&profile_lock);
+
+    profile = memory_profiles_list;
+    while (profile) {
+        if (strcmp(profile->function_name, func) == 0
+            && strcmp(profile->file_name, file) == 0) {
+            break;
+        }
+        profile = profile->next;
+    }
+
+    if (profile) {
+        profile->total_free += size;/* TODO: overflow check */
+        profile->free_num++;
+    } else {
+        profile = bh_malloc_internal(sizeof(memory_profile_t));
+        if (!profile) {
+            vm_mutex_unlock(&profile_lock);
+            return;
+        }
+
+        memset(profile, 0, sizeof(memory_profile_t));
+        profile->file_name = file;
+        profile->line_in_file = line;
+        profile->function_name = func;
+        profile->free_num = 1;
+        profile->total_free = size;
+        profile->next = memory_profiles_list;
+        memory_profiles_list = profile;
+    }
+
+    vm_mutex_unlock(&profile_lock);
+}
+
+/**
+ * Summarize memory usage and print it out
+ * Can use awk to analyze the output like below:
+ * awk -F: '{print $2,$4,$6,$8,$9}' OFS="\t" ./out.txt | sort -n -r -k 1
+ */
+void memory_usage_summarize()
+{
+    memory_profile_t *profile;
+
+    vm_mutex_lock(&profile_lock);
+
+    profile = memory_profiles_list;
+    while (profile) {
+        printf("malloc:%d:malloc_num:%d:free:%d:free_num:%d:%s\n",
+            profile->total_malloc,
+            profile->malloc_num,
+            profile->total_free,
+            profile->free_num,
+            profile->function_name);
+        profile = profile->next;
+    }
+
+    vm_mutex_unlock(&profile_lock);
+}
+
+void memory_profile_print(const char *file,
+                          int line,
+                          const char *func,
+                          int alloc)
+{
+    printf("location:%s@%d:used:%d:contribution:%d\n",
+           func, line, memory_in_use, alloc);
+}
+
+#else
+
+void* bh_malloc(unsigned int size)
+{
+    return bh_malloc_internal(size);
+}
+
+void bh_free(void *ptr)
+{
+    bh_free_internal(ptr);
+}
+#endif
+
 #else /* else of MALLOC_MEMORY_FROM_SYSTEM */
 
+#if BEIHAI_ENABLE_MEMORY_PROFILING == 0
+
 void* bh_malloc(unsigned int size)
 {
     return malloc(size);
@@ -98,8 +283,36 @@ void* bh_malloc(unsigned int size)
 void bh_free(void *ptr)
 {
     if (ptr)
-    free(ptr);
+        free(ptr);
+}
+
+#else /* else of BEIHAI_ENABLE_MEMORY_PROFILING */
+
+void* bh_malloc_profile(const char *file,
+                        int line,
+                        const char *func,
+                        unsigned int size)
+{
+    (void)file;
+    (void)line;
+    (void)func;
+
+    (void)memory_profiles_list;
+    (void)profile_lock;
+    (void)memory_in_use;
+
+    return malloc(size);
 }
 
+void bh_free_profile(const char *file, int line, const char *func, void *ptr)
+{
+    (void)file;
+    (void)line;
+    (void)func;
+
+    if (ptr)
+        free(ptr);
+}
+#endif /* end of BEIHAI_ENABLE_MEMORY_PROFILING */
 #endif /* end of MALLOC_MEMORY_FROM_SYSTEM*/
 

+ 1 - 1
core/shared-lib/platform/linux/bh_thread.c

@@ -128,7 +128,7 @@ int _vm_thread_create(korp_tid *tid, thread_start_routine_t start, void *arg,
         unsigned int stack_size)
 {
     return _vm_thread_create_with_prio(tid, start, arg, stack_size,
-    BH_THREAD_DEFAULT_PRIORITY);
+                                       BH_THREAD_DEFAULT_PRIORITY);
 }
 
 korp_tid _vm_self_thread()

+ 13 - 0
doc/memory_usage.txt

@@ -0,0 +1,13 @@
+Current memory usage, take samples/littlevgl in Zephyr for example:
+(1) WASM app binary:                        142K for littlevgl ui_app.wasm
+(2) WASM app memory space:                  64K for littlevgl ui_app.wasm
+(3) WASM app heap space:                    8K by default
+(4) WASM app thread native stack:           4K by default
+(5) WASM interpreter stack:                 8K by default
+(6) WASM block address hash cache:          3K
+(7) timer thread stack:                     4K
+(8) sensor thread stack:                    4K
+(9) touch screen thread stack:              4K
+(10) others: vm, app mgr, queue, native lib: ~22K
+
+Total memory usage: ~263K

+ 1 - 1
samples/littlevgl/README.md

@@ -75,7 +75,7 @@ https://docs.zephyrproject.org/latest/getting_started/index.html</br>
     However, nucleo_f767zi is almost the same as nucleo_f746zg, except FLASH and SRAM size.
     So we changed the DTS setting of nucleo_f746zg boards for a workaround.</br>
 
-    `Modify zephyr/dts/arm/st/f7/stm32f746xg.dtsi, change DT_SIZE_K(320) to DT_SIZE_K(512)`</br>
+    `Modify zephyr/dts/arm/st/f7/stm32f746Xg.dtsi, change DT_SIZE_K(320) to DT_SIZE_K(512)`</br>
     `mkdir build && cd build`</br>
     `source ../../../../zephyr-env.sh`</br>
     `cmake -GNinja -DBOARD=nucleo_f746zg ..`</br>

+ 1 - 1
samples/littlevgl/vgl-wasm-runtime/src/platform/linux/iwasm_main.c

@@ -342,7 +342,7 @@ static host_interface interface = { .send = uart_send, .destroy = uart_destroy }
 
 #endif
 
-static char global_heap_buf[1024 * 1024] = { 0 };
+static char global_heap_buf[270 * 1024] = { 0 };
 
 static void showUsage()
 {

+ 8 - 7
samples/littlevgl/vgl-wasm-runtime/src/platform/zephyr/XPT2046.c

@@ -152,7 +152,7 @@ void xpt2046_init(void)
         return;
     }
     gpio_pin_configure(xpt2046_cs_ctrl.gpio_dev, XPT2046_CS_GPIO_PIN,
-            GPIO_DIR_OUT);
+                       GPIO_DIR_OUT);
     gpio_pin_write(xpt2046_cs_ctrl.gpio_dev, XPT2046_CS_GPIO_PIN, 1);
     xpt2046_cs_ctrl.gpio_pin = XPT2046_CS_GPIO_PIN;
     xpt2046_cs_ctrl.delay = 0;
@@ -169,14 +169,15 @@ void xpt2046_init(void)
     }
     /* Setup GPIO input */
     ret = gpio_pin_configure(xpt2046_pen_gpio_dev, XPT2046_PEN_GPIO_PIN,
-            (GPIO_DIR_IN | GPIO_INT | GPIO_INT_EDGE | GPIO_INT_ACTIVE_LOW
-                    | GPIO_INT_DEBOUNCE));
+                             (GPIO_DIR_IN | GPIO_INT | GPIO_INT_EDGE
+                              | GPIO_INT_ACTIVE_LOW | GPIO_INT_DEBOUNCE)
+                            );
     if (ret) {
         printk("Error configuring pin %d!\n", XPT2046_PEN_GPIO_PIN);
     }
 
     gpio_init_callback(&gpio_cb, xpt2046_pen_gpio_callback,
-            BIT(XPT2046_PEN_GPIO_PIN));
+                       BIT(XPT2046_PEN_GPIO_PIN));
 
     ret = gpio_add_callback(xpt2046_pen_gpio_dev, &gpio_cb);
     if (ret) {
@@ -191,10 +192,10 @@ void xpt2046_init(void)
     k_sem_init(&sem_touch_read, 0, 1);
 
     k_thread_create(&touch_thread_data, touch_read_thread_stack,
-    TOUCH_READ_THREAD_STACK_SIZE, touch_screen_read_thread, NULL, NULL, NULL, 5,
-            0, K_NO_WAIT);
+                    TOUCH_READ_THREAD_STACK_SIZE, touch_screen_read_thread,
+                    NULL, NULL, NULL, 5,
+                    0, K_NO_WAIT);
     printf("xpt2046_init ok \n");
-
 }
 
 /**

+ 12 - 9
samples/littlevgl/vgl-wasm-runtime/src/platform/zephyr/iwasm_main.c

@@ -43,7 +43,6 @@ static void uart_irq_callback(struct device *dev)
     int size = 0;
 
     while (uart_poll_in(dev, &ch) == 0) {
-
         uart_char_cnt++;
         aee_host_msg_callback(&ch, 1);
     }
@@ -66,24 +65,27 @@ static bool host_init()
 int host_send(void * ctx, const char *buf, int size)
 {
     for (int i = 0; i < size; i++)
-    uart_poll_out(uart_dev, buf[i]);
+        uart_poll_out(uart_dev, buf[i]);
 
     return size;
 }
 
 void host_destroy()
 {
-
 }
 
+host_interface interface = {
+    .init = host_init,
+    .send = host_send,
+    .destroy = host_destroy
+};
 
-#define DEFAULT_THREAD_STACKSIZE (8 * 1024)
-
-host_interface interface = { .init = host_init, .send =
-        host_send, .destroy = host_destroy };
 timer_ctx_t timer_ctx;
-static char global_heap_buf[ 498*1024] = { 0 };
+
+static char global_heap_buf[270 * 1024] = { 0 };
+
 extern void display_init(void);
+
 int iwasm_main()
 {
     korp_thread tid, tm_tid;
@@ -108,6 +110,7 @@ int iwasm_main()
     // TODO:
     app_manager_startup(&interface);
 
-    fail1: bh_memory_destroy();
+fail1:
+    bh_memory_destroy();
     return -1;
 }

+ 1 - 1
samples/littlevgl/wasm-apps/Makefile_wasm_app

@@ -50,6 +50,6 @@ SRCS += ../../../core/iwasm/lib/app-libs/base/timer.c
 all: 
 	@$(CC) $(CFLAGS) $(SRCS) \
     -s WASM=1 -s SIDE_MODULE=1 -s ASSERTIONS=1 -s STACK_OVERFLOW_CHECK=2 \
-    -s TOTAL_MEMORY=131072 -s TOTAL_STACK=8096 \
+    -s TOTAL_MEMORY=65536 -s TOTAL_STACK=2048\
     -s "EXPORTED_FUNCTIONS=['_on_init', '_on_request', '_on_sensor_event', '_on_timer_callback']" \
     -o ui_app.wasm