瀏覽代碼

Refine interpreter to improve performance, refine memory usage (#161)

wenyongh 6 年之前
父節點
當前提交
256ecdfdf9

+ 14 - 1
core/iwasm/aot/aot_loader.c

@@ -289,7 +289,8 @@ load_target_info_section(const uint8 *buf, const uint8 *buf_end,
     const uint8 *p = buf, *p_end = buf_end;
     bool is_target_little_endian, is_target_64_bit;
 
-    read_uint32(p, p_end, target_info.bin_type);
+    read_uint16(p, p_end, target_info.bin_type);
+    read_uint16(p, p_end, target_info.abi_type);
     read_uint16(p, p_end, target_info.e_type);
     read_uint16(p, p_end, target_info.e_machine);
     read_uint32(p, p_end, target_info.e_version);
@@ -424,6 +425,7 @@ load_memory_info(const uint8 **p_buf, const uint8 *buf_end,
 {
     const uint8 *buf = *p_buf;
 
+    read_uint32(buf, buf_end, module->num_bytes_per_page);
     read_uint32(buf, buf_end, module->mem_init_page_count);
     read_uint32(buf, buf_end, module->mem_max_page_count);
     read_uint32(buf, buf_end, module->mem_init_data_count);
@@ -1001,6 +1003,11 @@ load_init_data_section(const uint8 *buf, const uint8 *buf_end,
         return false;
     }
 
+    read_uint32(p, p_end, module->llvm_aux_data_end);
+    read_uint32(p, p_end, module->llvm_aux_stack_bottom);
+    read_uint32(p, p_end, module->llvm_aux_stack_size);
+    read_uint32(p, p_end, module->llvm_aux_stack_global_index);
+
     if (!load_object_data_sections_info(&p, p_end, module,
                                         error_buf, error_buf_size))
         return false;
@@ -2297,6 +2304,7 @@ aot_load_from_comp_data(AOTCompData *comp_data, AOTCompContext *comp_ctx,
     memset(module, 0, sizeof(AOTModule));
 
     module->module_type = Wasm_Module_AoT;
+    module->num_bytes_per_page = comp_data->num_bytes_per_page;
     module->mem_init_page_count = comp_data->mem_init_page_count;
     module->mem_max_page_count = comp_data->mem_max_page_count;
 
@@ -2382,6 +2390,11 @@ aot_load_from_comp_data(AOTCompData *comp_data, AOTCompContext *comp_ctx,
         module->start_function = NULL;
     }
 
+    module->llvm_aux_data_end = comp_data->llvm_aux_data_end;
+    module->llvm_aux_stack_bottom = comp_data->llvm_aux_stack_bottom;
+    module->llvm_aux_stack_size = comp_data->llvm_aux_stack_size;
+    module->llvm_aux_stack_global_index = comp_data->llvm_aux_stack_global_index;
+
     module->code = NULL;
     module->code_size = 0;
 

+ 5 - 3
core/iwasm/aot/aot_runtime.c

@@ -111,7 +111,7 @@ memory_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
 {
     uint32 i, global_index, global_data_offset, base_offset, length;
     AOTMemInitData *data_seg;
-    uint64 total_size = (uint64)NumBytesPerPage * module->mem_init_page_count;
+    uint64 total_size = (uint64)module->num_bytes_per_page * module->mem_init_page_count;
 
     /* Allocate memory */
     if (total_size >= UINT32_MAX
@@ -802,11 +802,13 @@ bool
 aot_enlarge_memory(AOTModuleInstance *module_inst, uint32 inc_page_count)
 {
     uint8 *mem_data_old = module_inst->memory_data.ptr, *mem_data_new;
+    uint32 num_bytes_per_page =
+        ((AOTModule*)module_inst->aot_module.ptr)->num_bytes_per_page;
     uint32 cur_page_count = module_inst->mem_cur_page_count;
     uint32 max_page_count = module_inst->mem_max_page_count;
     uint32 total_page_count = cur_page_count + inc_page_count;
-    uint32 old_size = NumBytesPerPage * cur_page_count;
-    uint64 total_size = (uint64)NumBytesPerPage * total_page_count;
+    uint32 old_size = num_bytes_per_page * cur_page_count;
+    uint64 total_size = (uint64)num_bytes_per_page * total_page_count;
 
     if (inc_page_count <= 0)
         /* No need to enlarge memory */

+ 9 - 1
core/iwasm/aot/aot_runtime.h

@@ -75,6 +75,7 @@ typedef struct AOTModule {
     uint32 module_type;
 
     /* memory info */
+    uint32 num_bytes_per_page;
     uint32 mem_init_page_count;
     uint32 mem_max_page_count;
     uint32 mem_init_data_count;
@@ -132,6 +133,11 @@ typedef struct AOTModule {
     /* constant string set */
     HashMap *const_str_set;
 
+    uint32 llvm_aux_data_end;
+    uint32 llvm_aux_stack_bottom;
+    uint32 llvm_aux_stack_size;
+    uint32 llvm_aux_stack_global_index;
+
     /* is jit mode or not */
     bool is_jit_mode;
 
@@ -210,7 +216,9 @@ typedef AOTExportFunc AOTFunctionInstance;
 /* Target info, read from ELF header of object file */
 typedef struct AOTTargetInfo {
     /* Binary type, elf32l/elf32b/elf64l/elf64b */
-    uint32 bin_type;
+    uint16 bin_type;
+    /* ABI type */
+    uint16 abi_type;
     /* Object file type */
     uint16 e_type;
     /* Architecture */

+ 4 - 0
core/iwasm/compilation/aot.c

@@ -384,12 +384,16 @@ aot_create_comp_data(WASMModule *module)
 
   /* Set memory page count */
   if (module->import_memory_count) {
+    comp_data->num_bytes_per_page =
+      module->import_memories[0].u.memory.num_bytes_per_page;
     comp_data->mem_init_page_count =
       module->import_memories[0].u.memory.init_page_count;
     comp_data->mem_max_page_count =
       module->import_memories[0].u.memory.max_page_count;
   }
   else if (module->memory_count) {
+    comp_data->num_bytes_per_page =
+      module->memories[0].num_bytes_per_page;
     comp_data->mem_init_page_count =
       module->memories[0].init_page_count;
     comp_data->mem_max_page_count =

+ 6 - 0
core/iwasm/compilation/aot.h

@@ -110,6 +110,7 @@ typedef struct AOTExportFunc {
 
 typedef struct AOTCompData {
   /* Memory and memory init data info */
+  uint32 num_bytes_per_page;
   uint32 mem_init_page_count;
   uint32 mem_max_page_count;
   uint32 mem_init_data_count;
@@ -142,6 +143,11 @@ typedef struct AOTCompData {
   uint32 addr_data_size;
   uint32 global_data_size;
 
+  uint32 llvm_aux_data_end;
+  uint32 llvm_aux_stack_bottom;
+  uint32 llvm_aux_stack_size;
+  uint32 llvm_aux_stack_global_index;
+
   WASMModule *wasm_module;
 } AOTCompData;
 

+ 15 - 3
core/iwasm/compilation/aot_emit_aot_file.c

@@ -141,8 +141,9 @@ get_mem_init_data_list_size(AOTMemInitData **mem_init_data_list,
 static uint32
 get_mem_info_size(AOTCompData *comp_data)
 {
-    /* init page count + max page count + init data count + init data list */
-    return (uint32)sizeof(uint32) * 3
+    /* num bytes per page + init page count + max page count
+       + init data count + init data list */
+    return (uint32)sizeof(uint32) * 4
            + get_mem_init_data_list_size(comp_data->mem_init_data_list,
                                          comp_data->mem_init_data_count);
 }
@@ -367,6 +368,10 @@ get_init_data_section_size(AOTCompData *comp_data, AOTObjectData *obj_data)
     size = align_uint(size, 4);
     size += (uint32)sizeof(uint32) * 2;
 
+    /* llvm aux data end + llvm aux stack bottom
+       + llvm aux stack size + llvm stack global index */
+    size += sizeof(uint32) * 4;
+
     size += get_object_data_section_info_size(obj_data);
     return size;
 }
@@ -837,7 +842,8 @@ aot_emit_target_info_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
     EMIT_U32(AOT_SECTION_TYPE_TARGET_INFO);
     EMIT_U32(section_size);
 
-    EMIT_U32(target_info->bin_type);
+    EMIT_U16(target_info->bin_type);
+    EMIT_U16(target_info->abi_type);
     EMIT_U16(target_info->e_type);
     EMIT_U16(target_info->e_machine);
     EMIT_U32(target_info->e_version);
@@ -864,6 +870,7 @@ aot_emit_mem_info(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
 
     *p_offset = offset = align_uint(offset, 4);
 
+    EMIT_U32(comp_data->num_bytes_per_page);
     EMIT_U32(comp_data->mem_init_page_count);
     EMIT_U32(comp_data->mem_max_page_count);
     EMIT_U32(comp_data->mem_init_data_count);
@@ -1087,6 +1094,11 @@ aot_emit_init_data_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
     EMIT_U32(comp_data->func_count);
     EMIT_U32(comp_data->start_func_index);
 
+    EMIT_U32(comp_data->llvm_aux_data_end);
+    EMIT_U32(comp_data->llvm_aux_stack_bottom);
+    EMIT_U32(comp_data->llvm_aux_stack_size);
+    EMIT_U32(comp_data->llvm_aux_stack_global_index);
+
     if (!aot_emit_object_data_section_info(buf, buf_end, &offset, obj_data))
         return false;
 

+ 2 - 1
core/iwasm/compilation/aot_emit_memory.c

@@ -71,7 +71,8 @@ check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         uint32 memory_offset = (uint32)LLVMConstIntGetZExtValue(moffset);
         uint32 init_page_count = comp_ctx->comp_data->mem_init_page_count;
         if (init_page_count > 0
-            && memory_offset <= NumBytesPerPage * init_page_count - bytes) {
+            && memory_offset <= comp_ctx->comp_data->num_bytes_per_page
+                                * init_page_count - bytes) {
             /* inside memory space */
             if (!func_ctx->mem_space_unchanged) {
                 if (!(mem_base_addr = LLVMBuildLoad(comp_ctx->builder,

+ 13 - 7
core/iwasm/interpreter/wasm.h

@@ -26,11 +26,7 @@ extern "C" {
 /* Table Element Type */
 #define TABLE_ELEM_TYPE_ANY_FUNC 0x70
 
-#define MaxMemoryPages 65536
-#define MaxTableElems UINT32_MAX
-#define NumBytesPerPage 65536
-#define NumBytesPerPageLog2 16
-#define MaxReturnValues 16
+#define DEFAULT_NUM_BYTES_PER_PAGE 65536
 
 #define INIT_EXPR_TYPE_I32_CONST 0x41
 #define INIT_EXPR_TYPE_I64_CONST 0x42
@@ -110,7 +106,7 @@ typedef struct WASMTable {
 
 typedef struct WASMMemory {
     uint32 flags;
-    /* 64 kbytes one page by default */
+    uint32 num_bytes_per_page;
     uint32 init_page_count;
     uint32 max_page_count;
 } WASMMemory;
@@ -129,7 +125,7 @@ typedef struct WASMMemoryImport {
     char *module_name;
     char *field_name;
     uint32 flags;
-    /* 64 kbytes one page by default */
+    uint32 num_bytes_per_page;
     uint32 init_page_count;
     uint32 max_page_count;
 } WASMMemoryImport;
@@ -269,6 +265,16 @@ typedef struct WASMModule {
     WASMDataSeg **data_segments;
     uint32 start_function;
 
+    /* __data_end global exported by llvm */
+    uint32 llvm_aux_data_end;
+    /* auxiliary stack bottom, or __heap_base global exported by llvm */
+    uint32 llvm_aux_stack_bottom;
+    /* auxiliary stack size */
+    uint32 llvm_aux_stack_size;
+    /* the index of a global exported by llvm, which is
+       auxiliary stack top pointer */
+    uint32 llvm_aux_stack_global_index;
+
     /* Whether there is possible memory grow, e.g.
        memory.grow opcode or call enlargeMemory */
     bool possible_memory_grow;

+ 223 - 125
core/iwasm/interpreter/wasm_interp.c

@@ -476,36 +476,6 @@ read_leb(const uint8 *buf, uint32 *p_offset, uint32 maxbits, bool sign)
     }                                                           \
   } while (0)
 
-#define local_off(n) (frame_lp + cur_func->local_offsets[n])
-
-#define LOCAL_I32(n) (*(int32*)(local_off(n)))
-
-#define SET_LOCAL_I32(N, val) do {              \
-    uint32 n = (N);                             \
-    *(int32*)(local_off(n)) = (int32)(val);     \
-  } while (0)
-
-#define LOCAL_F32(n) (*(float32*)(local_off(n)))
-
-#define SET_LOCAL_F32(N, val) do {              \
-    uint32 n = (N);                             \
-    *(float32*)(local_off(n)) = (float32)(val); \
-  } while (0)
-
-#define LOCAL_I64(n) (GET_I64_FROM_ADDR(local_off(n)))
-
-#define SET_LOCAL_I64(N, val) do {              \
-    uint32 n = (N);                             \
-    PUT_I64_TO_ADDR(local_off(n), val);         \
-  } while (0)
-
-#define LOCAL_F64(n) (GET_F64_FROM_ADDR(local_off(n)))
-
-#define SET_LOCAL_F64(N, val) do {              \
-    uint32 n = (N);                             \
-    PUT_F64_TO_ADDR(local_off(n), val);         \
-  } while (0)
-
 /* Pop the given number of elements from the given frame's stack.  */
 #define POP(N) do {                             \
     int n = (N);                                \
@@ -525,18 +495,42 @@ read_leb(const uint8 *buf, uint32 *p_offset, uint32 maxbits, bool sign)
   } while (0)
 
 #define read_leb_int64(p, p_end, res) do {      \
+  uint8 _val = *p;                              \
+  if (!(_val & 0x80)) {                         \
+    res = (int64)_val;                          \
+    if (_val & 0x40)                            \
+      /* sign extend */                         \
+      res |= 0xFFFFFFFFFFFFFF80LL;              \
+    p++;                                        \
+    break;                                      \
+  }                                             \
   uint32 _off = 0;                              \
   res = (int64)read_leb(p, &_off, 64, true);    \
   p += _off;                                    \
 } while (0)
 
 #define read_leb_uint32(p, p_end, res) do {     \
+  uint8 _val = *p;                              \
+  if (!(_val & 0x80)) {                         \
+    res = _val;                                 \
+    p++;                                        \
+    break;                                      \
+  }                                             \
   uint32 _off = 0;                              \
   res = (uint32)read_leb(p, &_off, 32, false);  \
   p += _off;                                    \
 } while (0)
 
 #define read_leb_int32(p, p_end, res) do {      \
+  uint8 _val = *p;                              \
+  if (!(_val & 0x80)) {                         \
+    res = (int32)_val;                          \
+    if (_val & 0x40)                            \
+      /* sign extend */                         \
+      res |= 0xFFFFFF80;                        \
+    p++;                                        \
+    break;                                      \
+  }                                             \
   uint32 _off = 0;                              \
   res = (int32)read_leb(p, &_off, 32, true);    \
   p += _off;                                    \
@@ -559,30 +553,6 @@ read_leb(const uint8 *buf, uint32 *p_offset, uint32 maxbits, bool sign)
 #define GET_OPCODE() (void)0
 #endif
 
-#define DEF_OP_LOAD(operation) do {                                 \
-    uint32 offset, flags, addr;                                     \
-    GET_OPCODE();                                                   \
-    read_leb_uint32(frame_ip, frame_ip_end, flags);                 \
-    read_leb_uint32(frame_ip, frame_ip_end, offset);                \
-    addr = POP_I32();                                               \
-    CHECK_MEMORY_OVERFLOW();                                        \
-    operation;                                                      \
-    (void)flags;                                                    \
-  } while (0)
-
-#define DEF_OP_STORE(sval_type, sval_op_type, operation) do {       \
-    uint32 offset, flags, addr;                                     \
-    sval_type sval;                                                 \
-    GET_OPCODE();                                                   \
-    read_leb_uint32(frame_ip, frame_ip_end, flags);                 \
-    read_leb_uint32(frame_ip, frame_ip_end, offset);                \
-    sval = POP_##sval_op_type();                                    \
-    addr = POP_I32();                                               \
-    CHECK_MEMORY_OVERFLOW();                                        \
-    operation;                                                      \
-    (void)flags;                                                    \
-  } while (0)
-
 #define DEF_OP_I_CONST(ctype, src_op_type) do {                     \
     ctype cval;                                                     \
     read_leb_##ctype(frame_ip, frame_ip_end, cval);                 \
@@ -673,10 +643,11 @@ read_leb(const uint8 *buf, uint32 *p_offset, uint32 maxbits, bool sign)
     PUSH_##dst_op_type(value);                                      \
   } while (0)
 
-#define GET_LOCAL_INDEX_AND_TYPE() do {                             \
+#define GET_LOCAL_INDEX_TYPE_AND_OFFSET() do {                      \
     uint32 param_count = cur_func->param_count;                     \
     read_leb_uint32(frame_ip, frame_ip_end, local_idx);             \
     bh_assert(local_idx < param_count + cur_func->local_count);     \
+    local_offset = cur_func->local_offsets[local_idx];              \
     if (local_idx < param_count)                                    \
       local_type = cur_func->param_types[local_idx];                \
     else                                                            \
@@ -818,6 +789,12 @@ wasm_interp_call_func_native(WASMModuleInstance *module_inst,
 
 #endif  /* end of WASM_ENABLE_LABELS_AS_VALUES */
 
+typedef struct BlockAddrCache {
+  uint8 *frame_ip;
+  uint8 *else_addr;
+  uint8 *end_addr;
+} BlockAddrCache;
+
 static void
 wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                                WASMExecEnv *exec_env,
@@ -826,7 +803,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 {
   WASMMemoryInstance *memory = module->default_memory;
   uint32 memory_data_size = memory
-                            ? NumBytesPerPage * memory->cur_page_count : 0;
+                            ? memory->num_bytes_per_page * memory->cur_page_count : 0;
   uint32 heap_base_offset = memory ? (uint32)memory->heap_base_offset : 0;
   uint32 heap_data_size = memory
                           ? (uint32)(memory->heap_data_end - memory->heap_data) : 0;
@@ -841,7 +818,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
   register uint32 *frame_lp = NULL;  /* cache of frame->lp */
   register uint32 *frame_sp = NULL;  /* cache of frame->sp */
   WASMBranchBlock *frame_csp = NULL;
-  uint8 *frame_ip_end = frame_ip + 1;
+  WASMGlobalInstance *global;
+  uint8 *frame_ip_end = frame_ip + 1, *frame_ip_org;
   uint8 opcode, block_ret_type;
   uint32 *depths = NULL;
   uint32 depth_buf[BR_TABLE_TMP_BUF_LEN];
@@ -849,6 +827,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
   uint64 all_cell_num = 0;
   int32 didx, val;
   uint8 *else_addr, *end_addr, *maddr = NULL;
+  uint32 local_idx, local_offset, global_idx, global_data_offset;
+  uint8 local_type, *global_addr;
+  BlockAddrCache block_addr_cache[32] = { 0 };
+  uint32 cache_index, block_addr_cache_size = 32;
 
 #if WASM_ENABLE_LABELS_AS_VALUES != 0
   #define HANDLE_OPCODE(op) &&HANDLE_##op
@@ -880,13 +862,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
       HANDLE_OP (WASM_OP_BLOCK):
         block_ret_type = *frame_ip++;
 
-        if (!wasm_loader_find_block_addr(module->module,
-                                         frame_ip, frame_ip_end,
-                                         BLOCK_TYPE_BLOCK,
-                                         &else_addr, &end_addr,
-                                         NULL, 0)) {
-          wasm_set_exception(module, "find block address failed");
-          goto got_exception;
+        cache_index = ((uintptr_t)frame_ip) % block_addr_cache_size;
+        if (block_addr_cache[cache_index].frame_ip == frame_ip) {
+          end_addr = block_addr_cache[cache_index].end_addr;
+        }
+        else {
+          if (!wasm_loader_find_block_addr(module->module,
+                                           frame_ip, frame_ip_end,
+                                           BLOCK_TYPE_BLOCK,
+                                           &else_addr, &end_addr,
+                                           NULL, 0)) {
+            wasm_set_exception(module, "find block address failed");
+            goto got_exception;
+          }
+          block_addr_cache[cache_index].frame_ip = frame_ip;
+          block_addr_cache[cache_index].end_addr = end_addr;
         }
 
         PUSH_CSP(BLOCK_TYPE_BLOCK, block_ret_type, frame_ip, NULL, end_addr);
@@ -895,13 +885,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
       HANDLE_OP (WASM_OP_LOOP):
         block_ret_type = *frame_ip++;
 
-        if (!wasm_loader_find_block_addr(module->module,
-                                         frame_ip, frame_ip_end,
-                                         BLOCK_TYPE_LOOP,
-                                         &else_addr, &end_addr,
-                                         NULL, 0)) {
-          wasm_set_exception(module, "find block address failed");
-          goto got_exception;
+        cache_index = ((uintptr_t)frame_ip) % block_addr_cache_size;
+        if (block_addr_cache[cache_index].frame_ip == frame_ip) {
+          end_addr = block_addr_cache[cache_index].end_addr;
+        }
+        else {
+          if (!wasm_loader_find_block_addr(module->module,
+                                           frame_ip, frame_ip_end,
+                                           BLOCK_TYPE_LOOP,
+                                           &else_addr, &end_addr,
+                                           NULL, 0)) {
+            wasm_set_exception(module, "find block address failed");
+            goto got_exception;
+          }
+          block_addr_cache[cache_index].frame_ip = frame_ip;
+          block_addr_cache[cache_index].end_addr = end_addr;
         }
 
         PUSH_CSP(BLOCK_TYPE_LOOP, block_ret_type, frame_ip, NULL, end_addr);
@@ -910,13 +908,24 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
       HANDLE_OP (WASM_OP_IF):
         block_ret_type = *frame_ip++;
 
-        if (!wasm_loader_find_block_addr(module->module,
-                                         frame_ip, frame_ip_end,
-                                         BLOCK_TYPE_IF,
-                                         &else_addr, &end_addr,
-                                         NULL, 0)) {
-          wasm_set_exception(module, "find block address failed");
-          goto got_exception;
+        cache_index = ((uintptr_t)frame_ip) % block_addr_cache_size;
+        if (block_addr_cache[cache_index].frame_ip == frame_ip) {
+            else_addr = block_addr_cache[cache_index].else_addr;
+            end_addr = block_addr_cache[cache_index].end_addr;
+        }
+        else {
+          if (!wasm_loader_find_block_addr(module->module,
+                                           frame_ip, frame_ip_end,
+                                           BLOCK_TYPE_IF,
+                                           &else_addr, &end_addr,
+                                           NULL, 0)) {
+            wasm_set_exception(module, "find block address failed");
+            goto got_exception;
+          }
+
+          block_addr_cache[cache_index].frame_ip = frame_ip;
+          block_addr_cache[cache_index].else_addr = else_addr;
+          block_addr_cache[cache_index].end_addr = end_addr;
         }
 
         cond = (uint32)POP_I32();
@@ -1094,87 +1103,138 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
       /* variable instructions */
       HANDLE_OP (WASM_OP_GET_LOCAL):
         {
-          uint32 local_idx;
-          uint8 local_type;
-
-          GET_LOCAL_INDEX_AND_TYPE();
+          frame_ip_org = frame_ip - 1;
+          GET_LOCAL_INDEX_TYPE_AND_OFFSET();
 
           switch (local_type) {
             case VALUE_TYPE_I32:
             case VALUE_TYPE_F32:
-              PUSH_I32(LOCAL_I32(local_idx));
+              PUSH_I32(*(int32*)(frame_lp + local_offset));
               break;
             case VALUE_TYPE_I64:
             case VALUE_TYPE_F64:
-              PUSH_I64(LOCAL_I64(local_idx));
+              PUSH_I64(GET_I64_FROM_ADDR(frame_lp + local_offset));
               break;
             default:
               wasm_set_exception(module, "invalid local type");
               goto got_exception;
           }
+
+          if (local_offset < 0x80) {
+            *frame_ip_org++ = WASM_OP_GET_LOCAL_FAST;
+            if (local_type == VALUE_TYPE_I32
+                || local_type == VALUE_TYPE_F32)
+              *frame_ip_org++ = (uint8)local_offset;
+            else
+              *frame_ip_org++ = (uint8)(local_offset | 0x80);
+            while (frame_ip_org < frame_ip)
+              *frame_ip_org++ = WASM_OP_NOP;
+          }
+
           HANDLE_OP_END ();
         }
 
-      HANDLE_OP (WASM_OP_SET_LOCAL):
+      HANDLE_OP (WASM_OP_GET_LOCAL_FAST):
         {
-          uint32 local_idx;
-          uint8 local_type;
+          local_offset = *frame_ip++;
+          if (local_offset & 0x80)
+            PUSH_I64(GET_I64_FROM_ADDR(frame_lp + (local_offset & 0x7F)));
+          else
+            PUSH_I32(*(int32*)(frame_lp + local_offset));
+          HANDLE_OP_END ();
+        }
 
-          GET_LOCAL_INDEX_AND_TYPE();
+      HANDLE_OP (WASM_OP_SET_LOCAL):
+        {
+          frame_ip_org = frame_ip - 1;
+          GET_LOCAL_INDEX_TYPE_AND_OFFSET();
 
           switch (local_type) {
             case VALUE_TYPE_I32:
-              SET_LOCAL_I32(local_idx, POP_I32());
-              break;
             case VALUE_TYPE_F32:
-              SET_LOCAL_F32(local_idx, POP_F32());
+              *(int32*)(frame_lp + local_offset) = POP_I32();
               break;
             case VALUE_TYPE_I64:
-              SET_LOCAL_I64(local_idx, POP_I64());
-              break;
             case VALUE_TYPE_F64:
-              SET_LOCAL_F64(local_idx, POP_F64());
+              PUT_I64_TO_ADDR((uint32*)(frame_lp + local_offset), POP_I64());
               break;
             default:
               wasm_set_exception(module, "invalid local type");
               goto got_exception;
           }
+
+          if (local_offset < 0x80) {
+            *frame_ip_org++ = WASM_OP_SET_LOCAL_FAST;
+            if (local_type == VALUE_TYPE_I32
+                || local_type == VALUE_TYPE_F32)
+              *frame_ip_org++ = (uint8)local_offset;
+            else
+              *frame_ip_org++ = (uint8)(local_offset | 0x80);
+            while (frame_ip_org < frame_ip)
+              *frame_ip_org++ = WASM_OP_NOP;
+          }
+
           HANDLE_OP_END ();
         }
 
-      HANDLE_OP (WASM_OP_TEE_LOCAL):
+      HANDLE_OP (WASM_OP_SET_LOCAL_FAST):
         {
-          uint32 local_idx;
-          uint8 local_type;
+          local_offset = *frame_ip++;
+          if (local_offset & 0x80)
+            PUT_I64_TO_ADDR((uint32*)(frame_lp + (local_offset & 0x7F)), POP_I64());
+          else
+            *(int32*)(frame_lp + local_offset) = POP_I32();
+          HANDLE_OP_END ();
+        }
 
-          GET_LOCAL_INDEX_AND_TYPE();
+      HANDLE_OP (WASM_OP_TEE_LOCAL):
+        {
+          frame_ip_org = frame_ip - 1;
+          GET_LOCAL_INDEX_TYPE_AND_OFFSET();
 
           switch (local_type) {
             case VALUE_TYPE_I32:
-              SET_LOCAL_I32(local_idx, *(frame_sp - 1));
-              break;
             case VALUE_TYPE_F32:
-              SET_LOCAL_F32(local_idx, *(float32*)(frame_sp - 1));
+              *(int32*)(frame_lp + local_offset) = *(int32*)(frame_sp - 1);
               break;
             case VALUE_TYPE_I64:
-              SET_LOCAL_I64(local_idx, GET_I64_FROM_ADDR(frame_sp - 2));
-              break;
             case VALUE_TYPE_F64:
-              SET_LOCAL_F64(local_idx, GET_F64_FROM_ADDR(frame_sp - 2));
+              PUT_I64_TO_ADDR((uint32*)(frame_lp + local_offset),
+                              GET_I64_FROM_ADDR(frame_sp - 2));
               break;
             default:
               wasm_set_exception(module, "invalid local type");
               goto got_exception;
           }
+
+          if (local_offset < 0x80) {
+            *frame_ip_org++ = WASM_OP_TEE_LOCAL_FAST;
+            if (local_type == VALUE_TYPE_I32
+                || local_type == VALUE_TYPE_F32)
+              *frame_ip_org++ = (uint8)local_offset;
+            else
+              *frame_ip_org++ = (uint8)(local_offset | 0x80);
+            while (frame_ip_org < frame_ip)
+              *frame_ip_org++ = WASM_OP_NOP;
+          }
+
           HANDLE_OP_END ();
         }
 
-      HANDLE_OP (WASM_OP_GET_GLOBAL):
+      HANDLE_OP (WASM_OP_TEE_LOCAL_FAST):
         {
-          WASMGlobalInstance *global;
-          uint32 global_idx;
-          uint8 *global_addr;
+          local_offset = *frame_ip++;
+          if (local_offset & 0x80)
+            PUT_I64_TO_ADDR((uint32*)(frame_lp + (local_offset & 0x7F)),
+                            GET_I64_FROM_ADDR(frame_sp - 2));
+          else
+            *(int32*)(frame_lp + local_offset) = *(int32*)(frame_sp - 1);
+          HANDLE_OP_END ();
+        }
 
+      HANDLE_OP (WASM_OP_GET_GLOBAL):
+        {
+          frame_ip_org = frame_ip - 1;
           read_leb_uint32(frame_ip, frame_ip_end, global_idx);
 
           bh_assert(global_idx < module->global_count);
@@ -1183,30 +1243,47 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 
           switch (global->type) {
             case VALUE_TYPE_I32:
-              PUSH_I32(*(uint32*)global_addr);
-              break;
             case VALUE_TYPE_F32:
-              PUSH_F32(*(float32*)global_addr);
+              PUSH_I32(*(uint32*)global_addr);
               break;
             case VALUE_TYPE_I64:
-              PUSH_I64(GET_I64_FROM_ADDR((uint32*)global_addr));
-              break;
             case VALUE_TYPE_F64:
-              PUSH_F64(GET_F64_FROM_ADDR((uint32*)global_addr));
+              PUSH_I64(GET_I64_FROM_ADDR((uint32*)global_addr));
               break;
             default:
               wasm_set_exception(module, "invalid global type");
               goto got_exception;
           }
+
+          if (global->data_offset < 0x80) {
+            *frame_ip_org++ = WASM_OP_GET_GLOBAL_FAST;
+            if (global->type == VALUE_TYPE_I32
+                || global->type == VALUE_TYPE_F32)
+              *frame_ip_org++ = (uint8)global->data_offset;
+            else
+              *frame_ip_org++ = (uint8)(global->data_offset | 0x80);
+            while (frame_ip_org < frame_ip)
+                *frame_ip_org++ = WASM_OP_NOP;
+          }
+
           HANDLE_OP_END ();
         }
 
-      HANDLE_OP (WASM_OP_SET_GLOBAL):
+      HANDLE_OP (WASM_OP_GET_GLOBAL_FAST):
         {
-          WASMGlobalInstance *global;
-          uint32 global_idx;
-          uint8 *global_addr;
+          global_data_offset = *frame_ip++;
+
+          if (global_data_offset & 0x80)
+            PUSH_I64(GET_I64_FROM_ADDR((uint32*)(global_data + (global_data_offset & 0x7F))));
+          else
+            PUSH_I32(*(uint32*)(global_data + global_data_offset));
+
+          HANDLE_OP_END ();
+        }
 
+      HANDLE_OP (WASM_OP_SET_GLOBAL):
+        {
+          frame_ip_org = frame_ip - 1;
           read_leb_uint32(frame_ip, frame_ip_end, global_idx);
 
           bh_assert(global_idx < module->global_count);
@@ -1215,21 +1292,42 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 
           switch (global->type) {
             case VALUE_TYPE_I32:
-              *(int32*)global_addr = POP_I32();
-              break;
             case VALUE_TYPE_F32:
-              *(float32*)global_addr = POP_F32();
+              *(int32*)global_addr = POP_I32();
               break;
             case VALUE_TYPE_I64:
-              PUT_I64_TO_ADDR((uint32*)global_addr, POP_I64());
-              break;
             case VALUE_TYPE_F64:
-              PUT_F64_TO_ADDR((uint32*)global_addr, POP_F64());
+              PUT_I64_TO_ADDR((uint32*)global_addr, POP_I64());
               break;
             default:
               wasm_set_exception(module, "invalid global type");
               goto got_exception;
           }
+
+          if (global->data_offset < 0x80) {
+            *frame_ip_org++ = WASM_OP_SET_GLOBAL_FAST;
+            if (global->type == VALUE_TYPE_I32
+                || global->type == VALUE_TYPE_F32)
+              *frame_ip_org++ = (uint8)global->data_offset;
+            else
+              *frame_ip_org++ = (uint8)(global->data_offset | 0x80);
+            while (frame_ip_org < frame_ip)
+                *frame_ip_org++ = WASM_OP_NOP;
+          }
+
+          HANDLE_OP_END ();
+        }
+
+      HANDLE_OP (WASM_OP_SET_GLOBAL_FAST):
+        {
+          global_data_offset = *frame_ip++;
+
+          if (global_data_offset & 0x80)
+            PUT_I64_TO_ADDR((uint32*)(global_data + (global_data_offset & 0x7F)),
+                            POP_I64());
+          else
+            *(uint32*)(global_data + global_data_offset) = POP_I32();
+
           HANDLE_OP_END ();
         }
 
@@ -1444,7 +1542,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
           PUSH_I32(prev_page_count);
           /* update the memory instance ptr */
           memory = module->default_memory;
-          memory_data_size = NumBytesPerPage * memory->cur_page_count;
+          memory_data_size = memory->num_bytes_per_page * memory->cur_page_count;
           global_data = memory->global_data;
         }
 

+ 135 - 4
core/iwasm/interpreter/wasm_loader.c

@@ -407,7 +407,7 @@ load_memory_import(const uint8 **p_buf, const uint8 *buf_end,
     const uint8 *p = *p_buf, *p_end = buf_end;
     uint32 pool_size = bh_memory_pool_size();
     uint32 max_page_count = pool_size * APP_MEMORY_MAX_GLOBAL_HEAP_PERCENT
-                            / NumBytesPerPage;
+                            / DEFAULT_NUM_BYTES_PER_PAGE;
 
     read_leb_uint32(p, p_end, memory->flags);
     read_leb_uint32(p, p_end, memory->init_page_count);
@@ -420,6 +420,8 @@ load_memory_import(const uint8 **p_buf, const uint8 *buf_end,
         /* Limit the maximum memory size to max_page_count */
         memory->max_page_count = max_page_count;
 
+    memory->num_bytes_per_page = DEFAULT_NUM_BYTES_PER_PAGE;
+
     *p_buf = p;
     return true;
 }
@@ -452,7 +454,7 @@ load_memory(const uint8 **p_buf, const uint8 *buf_end, WASMMemory *memory,
     const uint8 *p = *p_buf, *p_end = buf_end;
     uint32 pool_size = bh_memory_pool_size();
     uint32 max_page_count = pool_size * APP_MEMORY_MAX_GLOBAL_HEAP_PERCENT
-                            / NumBytesPerPage;
+                            / DEFAULT_NUM_BYTES_PER_PAGE;
 
     read_leb_uint32(p, p_end, memory->flags);
     read_leb_uint32(p, p_end, memory->init_page_count);
@@ -465,6 +467,8 @@ load_memory(const uint8 **p_buf, const uint8 *buf_end, WASMMemory *memory,
         /* Limit the maximum memory size to max_page_count */
         memory->max_page_count = max_page_count;
 
+    memory->num_bytes_per_page = DEFAULT_NUM_BYTES_PER_PAGE;
+
     *p_buf = p;
     return true;
 }
@@ -675,7 +679,7 @@ load_import_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
                     if (!load_memory_import(&p, p_end, &import->u.memory,
                                 error_buf, error_buf_size))
                         return false;
-                    if (module->import_table_count > 1) {
+                    if (module->import_memory_count > 1) {
                         set_error_buf(error_buf, error_buf_size,
                                       "Load import section failed: multiple memories");
                         return false;
@@ -1355,10 +1359,17 @@ static bool
 load_from_sections(WASMModule *module, WASMSection *sections,
                    char *error_buf, uint32 error_buf_size)
 {
+    WASMExport *export;
     WASMSection *section = sections;
     const uint8 *buf, *buf_end, *buf_code = NULL, *buf_code_end = NULL,
                 *buf_func = NULL, *buf_func_end = NULL;
-    uint32 i;
+    WASMGlobal *llvm_data_end_global = NULL, *llvm_heap_base_global = NULL;
+    WASMGlobal *llvm_stack_top_global = NULL, *global;
+    uint32 llvm_data_end = UINT32_MAX, llvm_heap_base = UINT32_MAX;
+    uint32 llvm_stack_top = UINT32_MAX, global_index, i;
+    uint32 data_end_global_index = UINT32_MAX;
+    uint32 heap_base_global_index = UINT32_MAX;
+    uint32 stack_top_global_index = UINT32_MAX;
 
     /* Find code and function sections if have */
     while (section) {
@@ -1444,6 +1455,115 @@ load_from_sections(WASMModule *module, WASMSection *sections,
             return false;
     }
 
+    /* Resolve llvm auxiliary data/stack/heap info and reset memory info */
+    if (!module->possible_memory_grow) {
+        export = module->exports;
+        for (i = 0; i < module->export_count; i++, export++) {
+            if (export->kind == EXPORT_KIND_GLOBAL) {
+                if (!strcmp(export->name, "__heap_base")) {
+                    global_index = export->index - module->import_global_count;
+                    global = module->globals + global_index;
+                    if (global->type == VALUE_TYPE_I32
+                        && !global->is_mutable
+                        && global->init_expr.init_expr_type ==
+                                INIT_EXPR_TYPE_I32_CONST) {
+                        heap_base_global_index = global_index;
+                        llvm_heap_base_global = global;
+                        llvm_heap_base = global->init_expr.u.i32;
+                        LOG_VERBOSE("found llvm __heap_base global, value: %d\n",
+                                    llvm_heap_base);
+                    }
+                }
+                else if (!strcmp(export->name, "__data_end")) {
+                    global_index = export->index - module->import_global_count;
+                    global = module->globals + global_index;
+                    if (global->type == VALUE_TYPE_I32
+                        && !global->is_mutable
+                        && global->init_expr.init_expr_type ==
+                                INIT_EXPR_TYPE_I32_CONST) {
+                        data_end_global_index = global_index;
+                        llvm_data_end_global = global;
+                        llvm_data_end = global->init_expr.u.i32;
+                        LOG_VERBOSE("found llvm __data_end global, value: %d\n",
+                                    llvm_data_end);
+
+                        llvm_data_end = align_uint(llvm_data_end, 16);
+                    }
+                }
+
+                if (llvm_data_end_global && llvm_heap_base_global) {
+                    if ((data_end_global_index == heap_base_global_index + 1
+                         && data_end_global_index > 0)
+                        || (heap_base_global_index == data_end_global_index + 1
+                            && heap_base_global_index > 0)) {
+                        global_index =
+                            data_end_global_index < heap_base_global_index
+                            ? data_end_global_index - 1 : heap_base_global_index - 1;
+                        global = module->globals + global_index;
+                        if (global->type == VALUE_TYPE_I32
+                            && global->is_mutable
+                            && global->init_expr.init_expr_type ==
+                                        INIT_EXPR_TYPE_I32_CONST) {
+                            llvm_stack_top_global = global;
+                            llvm_stack_top = global->init_expr.u.i32;
+                            stack_top_global_index = global_index;
+                            LOG_VERBOSE("found llvm stack top global, "
+                                        "value: %d, global index: %d\n",
+                                        llvm_stack_top, global_index);
+                        }
+                    }
+                    break;
+                }
+            }
+        }
+
+        if (llvm_data_end_global
+            && llvm_heap_base_global
+            && llvm_stack_top_global
+            && llvm_stack_top <= llvm_heap_base) {
+            WASMMemoryImport *memory_import;
+            WASMMemory *memory;
+            uint64 init_memory_size;
+            uint32 shrunk_memory_size = llvm_heap_base > llvm_data_end
+                                        ? llvm_heap_base : llvm_data_end;
+            if (module->import_memory_count) {
+                memory_import = &module->import_memories[0].u.memory;
+                init_memory_size = (uint64)memory_import->num_bytes_per_page *
+                                   memory_import->init_page_count;
+                if (llvm_heap_base <= init_memory_size
+                    && llvm_data_end <= init_memory_size) {
+                    /* Reset memory info to decrease memory usage */
+                    memory_import->num_bytes_per_page = shrunk_memory_size;
+                    memory_import->init_page_count = 1;
+                    LOG_VERBOSE("reset import memory size to %d\n",
+                                shrunk_memory_size);
+                }
+            }
+            if (module->memory_count) {
+                memory = &module->memories[0];
+                init_memory_size = (uint64)memory->num_bytes_per_page *
+                             memory->init_page_count;
+                if (llvm_heap_base <= init_memory_size
+                    && llvm_data_end <= init_memory_size) {
+                    /* Reset memory info to decrease memory usage */
+                    memory->num_bytes_per_page = shrunk_memory_size;
+                    memory->init_page_count = 1;
+                    LOG_VERBOSE("reset memory size to %d\n", shrunk_memory_size);
+                }
+            }
+
+            module->llvm_aux_data_end = llvm_data_end;
+            module->llvm_aux_stack_bottom = llvm_stack_top;
+            module->llvm_aux_stack_size = llvm_stack_top > llvm_data_end
+                                          ? llvm_stack_top - llvm_data_end
+                                          : llvm_stack_top;
+            module->llvm_aux_stack_global_index = stack_top_global_index;
+            LOG_VERBOSE("aux stack bottom: %d, size: %d\n",
+                        module->llvm_aux_stack_bottom,
+                        module->llvm_aux_stack_size);
+        }
+    }
+
     return true;
 }
 
@@ -1646,9 +1766,11 @@ load(const uint8 *buf, uint32 size, WASMModule *module,
     return true;
 }
 
+const uint8* wasm_file;
 WASMModule*
 wasm_loader_load(const uint8 *buf, uint32 size, char *error_buf, uint32 error_buf_size)
 {
+    wasm_file = buf;
     WASMModule *module = wasm_malloc(sizeof(WASMModule));
 
     if (!module) {
@@ -1885,6 +2007,15 @@ wasm_loader_find_block_addr(WASMModule *module,
                 read_leb_uint32(p, p_end, u32); /* localidx */
                 break;
 
+            case WASM_OP_GET_LOCAL_FAST:
+            case WASM_OP_SET_LOCAL_FAST:
+            case WASM_OP_TEE_LOCAL_FAST:
+            case WASM_OP_GET_GLOBAL_FAST:
+            case WASM_OP_SET_GLOBAL_FAST:
+                CHECK_BUF(p, p_end, 1);
+                p++;
+                break;
+
             case WASM_OP_I32_LOAD:
             case WASM_OP_I64_LOAD:
             case WASM_OP_F32_LOAD:

+ 15 - 5
core/iwasm/interpreter/wasm_opcode.h

@@ -242,9 +242,14 @@ typedef enum WASMOpcode {
     WASM_OP_DROP_64   = 0xc1,
     WASM_OP_SELECT_32 = 0xc2,
     WASM_OP_SELECT_64 = 0xc3,
-
-    WASM_OP_IMPDEP1       = WASM_OP_SELECT_64 + 1,
-    WASM_OP_IMPDEP2       = WASM_OP_IMPDEP1 + 1
+    WASM_OP_GET_LOCAL_FAST = 0xc4,
+    WASM_OP_SET_LOCAL_FAST = 0xc5,
+    WASM_OP_TEE_LOCAL_FAST = 0xc6,
+    WASM_OP_GET_GLOBAL_FAST = 0xc7,
+    WASM_OP_SET_GLOBAL_FAST = 0xc8,
+
+    WASM_OP_IMPDEP1       = 0xc9,
+    WASM_OP_IMPDEP2       = 0xca
 } WASMOpcode;
 
 #ifdef __cplusplus
@@ -454,8 +459,13 @@ static const void *_name[WASM_INSTRUCTION_NUM] = {           \
   HANDLE_OPCODE (WASM_OP_DROP_64),       /* 0xc1 */          \
   HANDLE_OPCODE (WASM_OP_SELECT_32),     /* 0xc2 */          \
   HANDLE_OPCODE (WASM_OP_SELECT_64),     /* 0xc3 */          \
-  HANDLE_OPCODE (WASM_OP_IMPDEP1),       /* 0xc4 */          \
-  HANDLE_OPCODE (WASM_OP_IMPDEP2),       /* 0xc5 */          \
+  HANDLE_OPCODE (WASM_OP_GET_LOCAL_FAST),/* 0xc4 */          \
+  HANDLE_OPCODE (WASM_OP_SET_LOCAL_FAST),/* 0xc5 */          \
+  HANDLE_OPCODE (WASM_OP_TEE_LOCAL_FAST),/* 0xc6 */          \
+  HANDLE_OPCODE (WASM_OP_GET_GLOBAL_FAST),/* 0xc7 */         \
+  HANDLE_OPCODE (WASM_OP_SET_GLOBAL_FAST),/* 0xc8 */         \
+  HANDLE_OPCODE (WASM_OP_IMPDEP1),       /* 0xc9 */          \
+  HANDLE_OPCODE (WASM_OP_IMPDEP2),       /* 0xca */          \
 }
 
 #endif /* end of _WASM_OPCODE_H */

+ 22 - 15
core/iwasm/interpreter/wasm_runtime.c

@@ -59,14 +59,15 @@ memories_deinstantiate(WASMMemoryInstance **memories, uint32 count)
 }
 
 static WASMMemoryInstance*
-memory_instantiate(uint32 init_page_count, uint32 max_page_count,
+memory_instantiate(uint32 num_bytes_per_page,
+                   uint32 init_page_count, uint32 max_page_count,
                    uint32 global_data_size,
                    uint32 heap_size,
                    char *error_buf, uint32 error_buf_size)
 {
     WASMMemoryInstance *memory;
     uint64 total_size = offsetof(WASMMemoryInstance, base_addr) +
-                        NumBytesPerPage * (uint64)init_page_count +
+                        num_bytes_per_page * (uint64)init_page_count +
                         global_data_size;
 
     /* Allocate memory space, addr data and global data */
@@ -78,13 +79,14 @@ memory_instantiate(uint32 init_page_count, uint32 max_page_count,
     }
 
     memset(memory, 0, (uint32)total_size);
+    memory->num_bytes_per_page = num_bytes_per_page;
     memory->cur_page_count = init_page_count;
     memory->max_page_count = max_page_count;
 
     memory->memory_data = memory->base_addr;
 
     memory->global_data = memory->memory_data +
-                          NumBytesPerPage * memory->cur_page_count;;
+                          num_bytes_per_page * memory->cur_page_count;
     memory->global_data_size = global_data_size;
 
     memory->end_addr = memory->global_data + global_data_size;
@@ -152,7 +154,8 @@ memories_instantiate(const WASMModule *module,
     import = module->import_memories;
     for (i = 0; i < module->import_memory_count; i++, import++) {
         if (!(memory = memories[mem_index++] =
-                    memory_instantiate(import->u.memory.init_page_count,
+                    memory_instantiate(import->u.memory.num_bytes_per_page,
+                                       import->u.memory.init_page_count,
                                        import->u.memory. max_page_count,
                                        global_data_size,
                                        heap_size, error_buf, error_buf_size))) {
@@ -167,7 +170,8 @@ memories_instantiate(const WASMModule *module,
     /* instantiate memories from memory section */
     for (i = 0; i < module->memory_count; i++) {
         if (!(memory = memories[mem_index++] =
-                    memory_instantiate(module->memories[i].init_page_count,
+                    memory_instantiate(module->memories[i].num_bytes_per_page,
+                                       module->memories[i].init_page_count,
                                        module->memories[i].max_page_count,
                                        global_data_size,
                                        heap_size, error_buf, error_buf_size))) {
@@ -182,7 +186,7 @@ memories_instantiate(const WASMModule *module,
     if (mem_index == 0) {
         /* no import memory and define memory, but has global variables */
         if (!(memory = memories[mem_index++] =
-                    memory_instantiate(0, 0, global_data_size,
+                    memory_instantiate(0, 0, 0, global_data_size,
                                        heap_size, error_buf, error_buf_size))) {
             set_error_buf(error_buf, error_buf_size,
                           "Instantiate memory failed: "
@@ -486,8 +490,9 @@ globals_instantiate_fix(WASMGlobalInstance *globals,
                 global->initial_value.addr = 0;
             }
             else if (!strcmp(import->u.names.field_name, "DYNAMICTOP_PTR")) {
-                global->initial_value.i32 =
-                    (int32)(NumBytesPerPage * module_inst->default_memory->cur_page_count);
+                global->initial_value.i32 = (int32)
+                    (module_inst->default_memory->num_bytes_per_page
+                     * module_inst->default_memory->cur_page_count);
                 module_inst->DYNAMICTOP_PTR_offset = global->data_offset;
             }
             else if (!strcmp(import->u.names.field_name, "STACKTOP")) {
@@ -748,7 +753,8 @@ wasm_instantiate(WASMModule *module,
 
                 base_offset = (uint32)data_seg->base_offset.u.i32;
                 length = data_seg->data_length;
-                memory_size = NumBytesPerPage * module_inst->default_memory->cur_page_count;
+                memory_size = module_inst->default_memory->num_bytes_per_page
+                              * module_inst->default_memory->cur_page_count;
 
                 if (length > 0
                     && (base_offset >= memory_size
@@ -1140,7 +1146,7 @@ wasm_get_app_addr_range(WASMModuleInstance *module_inst,
 
     if (0 <= app_offset && app_offset < memory->heap_base_offset) {
         app_start_offset = 0;
-        app_end_offset = (int32)(NumBytesPerPage * memory->cur_page_count);
+        app_end_offset = (int32)(memory->num_bytes_per_page * memory->cur_page_count);
     }
     else if (memory->heap_base_offset < app_offset
              && app_offset < memory->heap_base_offset
@@ -1172,7 +1178,7 @@ wasm_get_native_addr_range(WASMModuleInstance *module_inst,
         && (uint8*)native_ptr < memory->end_addr) {
         native_start_addr = memory->memory_data;
         native_end_addr = memory->memory_data
-                          + NumBytesPerPage * memory->cur_page_count;
+                          + memory->num_bytes_per_page * memory->cur_page_count;
     }
     else if (memory->heap_data <= (uint8*)native_ptr
              && (uint8*)native_ptr < memory->heap_data_end) {
@@ -1197,7 +1203,7 @@ wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
     WASMMemoryInstance *new_memory;
     uint32 total_page_count = inc_page_count + memory->cur_page_count;
     uint64 total_size = offsetof(WASMMemoryInstance, base_addr) +
-                        NumBytesPerPage * (uint64)total_page_count +
+                        memory->num_bytes_per_page * (uint64)total_page_count +
                         memory->global_data_size;
 
     if (inc_page_count <= 0)
@@ -1216,13 +1222,14 @@ wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
         return false;
     }
 
+    new_memory->num_bytes_per_page = memory->num_bytes_per_page;
     new_memory->cur_page_count = total_page_count;
     new_memory->max_page_count = memory->max_page_count;
 
     new_memory->memory_data = new_memory->base_addr;
 
     new_memory->global_data = new_memory->memory_data +
-                              NumBytesPerPage * total_page_count;
+                              memory->num_bytes_per_page * total_page_count;
     new_memory->global_data_size = memory->global_data_size;
 
     new_memory->end_addr = new_memory->global_data + memory->global_data_size;
@@ -1236,8 +1243,8 @@ wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
     bh_memcpy_s(new_memory->global_data, new_memory->global_data_size,
                 memory->global_data, memory->global_data_size);
     /* Init free space of new memory */
-    memset(new_memory->memory_data + NumBytesPerPage * memory->cur_page_count,
-           0, NumBytesPerPage * (total_page_count - memory->cur_page_count));
+    memset(new_memory->memory_data + memory->num_bytes_per_page * memory->cur_page_count,
+           0, memory->num_bytes_per_page * (total_page_count - memory->cur_page_count));
 
     new_memory->heap_data = memory->heap_data;
     new_memory->heap_data_end = memory->heap_data_end;

+ 3 - 1
core/iwasm/interpreter/wasm_runtime.h

@@ -16,6 +16,8 @@ extern "C" {
 #endif
 
 typedef struct WASMMemoryInstance {
+    /* Number bytes per page */
+    uint32 num_bytes_per_page;
     /* Current page count */
     uint32 cur_page_count;
     /* Maximum page count */
@@ -42,7 +44,7 @@ typedef struct WASMMemoryInstance {
     /* Base address, the layout is:
        thunk_argv data + thunk arg offsets +
        memory data + global data
-       memory data init size is: NumBytesPerPage * cur_page_count
+       memory data init size is: num_bytes_per_page * cur_page_count
        global data size is calculated in module instantiating
        Note: when memory is re-allocated, the thunk argv data, thunk
              argv offsets and memory data must be copied to new memory also.

+ 0 - 15
product-mini/app-samples/smart-light/build.sh

@@ -1,15 +0,0 @@
-# Copyright (C) 2019 Intel Corporation.  All rights reserved.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-WAMR_DIR=${PWD}/../../..
-
-/opt/wasi-sdk/bin/clang     \
-        --target=wasm32 -O3 \
-        -z stack-size=4096 -Wl,--initial-memory=65536 \
-        --sysroot=${WAMR_DIR}/wamr-sdk/app/libc-builtin-sysroot    \
-        -Wl,--allow-undefined-file=${WAMR_DIR}/wamr-sdk/app/libc-builtin-sysroot/share/defined-symbols.txt \
-        -Wl,--allow-undefined, \
-        -Wl,--export=main, \
-        -Wl,--no-threads,--strip-all,--no-entry \
-        -nostdlib -o test.wasm *.c
-#./jeffdump -o ../test_wasm.h -n wasm_test_file test.wasm

+ 0 - 56
product-mini/app-samples/smart-light/main.c

@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2019 Intel Corporation.  All rights reserved.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-//User LED
-#define LED_PORT "GPIOA"
-#define LED 5
-
-//User KEY
-#define KEY_PORT "GPIOC"
-#define KEY 13
-
-/** GPIO pin to be input. */
-#define GPIO_DIR_IN (0 << 0)
-
-/** GPIO pin to be output. */
-#define GPIO_DIR_OUT (1 << 0)
-
-void *device_get_binding(const char *);
-int gpio_pin_configure(void *, unsigned int, int);
-int gpio_pin_read(void *, unsigned int, unsigned int *);
-int gpio_pin_write(void *, unsigned int, unsigned int);
-
-int main(int argc, char **argv)
-{
-    unsigned int gpio_value;
-    unsigned char flag = 0;
-    struct device *dev, *key_dev;
-
-    dev = device_get_binding(LED_PORT);
-    /* Set LED pin as output */
-    gpio_pin_configure(dev, LED, GPIO_DIR_OUT);
-
-    key_dev = device_get_binding(KEY_PORT);
-    /* Set KEY pin as input */
-    gpio_pin_configure(key_dev, KEY, GPIO_DIR_IN);
-
-    while (1) {
-        gpio_pin_read(key_dev, KEY, &gpio_value);
-        if (!gpio_value) {
-            gpio_pin_write(dev, LED, 1);
-            if (!flag) {
-                printf("object detected\n");
-                flag = 1;
-            }
-        } else {
-            gpio_pin_write(dev, LED, 0);
-            flag = 0;
-        }
-    }
-    return 0;
-}

+ 3 - 0
samples/gui/README.md

@@ -109,3 +109,6 @@ https://docs.zephyrproject.org/latest/getting_started/index.html</br>
 First, connect PC and STM32 with UART. Then install to use host_tool.</br>
 `./host_tool -D /dev/ttyUSBXXX -i ui_app -f ui_app.wasm`
 
+- Install AOT version WASM application
+`wamrc --target=thumbv7 --target-abi=eabi --cpu=cortex-m7 -o ui_app.aot ui_app.wasm`
+`./host_tool -D /dev/ttyUSBXXX -i ui_app -f ui_app.aot`

+ 1 - 0
samples/gui/wasm-apps/lvgl-compatible/Makefile

@@ -25,4 +25,5 @@ all:
     -Wl,--no-threads,--strip-all,--no-entry -nostdlib \
     -Wl,--export=on_init -Wl,--export=on_timer_callback \
     -Wl,--export=on_widget_event \
+    -Wl,--export=__heap_base,--export=__data_end \
     -o ui_app_lvgl_compatible.wasm

+ 2 - 2
samples/gui/wasm-apps/wgl/CMakeLists.txt

@@ -10,11 +10,11 @@ include_directories(
 )
 
 set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS},-L${WAMR_ROOT_DIR}/wamr-sdk/out/gui/app-sdk/wamr-app-framework/lib")
-set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS},--export=on_init,--export=on_timer_callback,--export=on_widget_event")
+set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS},--export=on_init,--export=on_timer_callback,--export=on_widget_event,--export=__heap_base,--export=__data_end")
 set (CMAKE_C_FLAGS          "${CMAKE_C_FLAGS} -O3 -Wno-unused-command-line-argument")
 
 add_executable(ui_app.wasm
     ${CMAKE_CURRENT_LIST_DIR}/src/main.c
 )
 
-target_link_libraries(ui_app.wasm  app_framework)
+target_link_libraries(ui_app.wasm  app_framework)

+ 1 - 0
samples/gui/wasm-apps/wgl/Makefile

@@ -30,4 +30,5 @@ all:
     -Wl,--no-threads,--strip-all,--no-entry -nostdlib \
     -Wl,--export=on_init -Wl,--export=on_timer_callback \
     -Wl,--export=on_widget_event \
+    -Wl,--export=__heap_base,--export=__data_end \
     -o ui_app.wasm

+ 2 - 3
samples/gui/wasm-runtime-wgl/zephyr-build/CMakeLists.txt

@@ -14,7 +14,7 @@ enable_language (ASM)
 # change to "ARM[sub]", "THUMB[sub]", "X86_32", "MIPS" or "XTENSA"
 # if we want to support arm_32, x86, mips or xtensa
 if (NOT DEFINED WAMR_BUILD_TARGET)
-  set (WAMR_BUILD_TARGET "THUMB")
+  set (WAMR_BUILD_TARGET "THUMBV7")
 endif ()
 
 
@@ -24,8 +24,7 @@ if (NOT DEFINED WAMR_BUILD_INTERP)
 endif ()
 
 if (NOT DEFINED WAMR_BUILD_AOT)
-  # Disable AOT by default.
-  set (WAMR_BUILD_AOT 0)
+  set (WAMR_BUILD_AOT 1)
 endif ()
 
 if (NOT DEFINED WAMR_BUILD_JIT)

+ 1 - 0
samples/gui/wasm-runtime-wgl/zephyr-build/prj.conf

@@ -7,3 +7,4 @@ CONFIG_LOG=y
 CONFIG_UART_INTERRUPT_DRIVEN=y
 CONFIG_STACK_SENTINEL=y
 CONFIG_MAIN_STACK_SIZE=2048
+CONFIG_ARM_MPU=n

+ 5 - 2
samples/littlevgl/README.md

@@ -84,12 +84,12 @@ https://docs.zephyrproject.org/latest/getting_started/index.html</br>
    ` ln -s <wamr_root> wamr`</br>
  d. build source code</br>
     Since ui_app incorporated LittlevGL source code, so it needs more RAM on the device to install the application.
-    It is recommended that RAM SIZE not less than 320KB.
+    It is recommended that RAM SIZE not less than 420KB.
     In our test use nucleo_f767zi, which is not supported by Zephyr.
     However, nucleo_f767zi is almost the same as nucleo_f746zg, except FLASH and SRAM size.
     So we changed the DTS setting of nucleo_f746zg boards for a workaround.</br>
 
-    `Modify zephyr/dts/arm/st/f7/stm32f746.dtsi, change DT_SIZE_K(256) to DT_SIZE_K(320) in 'sram0' definition.`</br>
+    `Modify zephyr/dts/arm/st/f7/stm32f746.dtsi, change DT_SIZE_K(256) to DT_SIZE_K(512) in 'sram0' definition.`</br>
     `mkdir build && cd build`</br>
     `source ../../../../zephyr-env.sh`</br>
     `cmake -GNinja -DBOARD=nucleo_f746zg ..`</br>
@@ -129,3 +129,6 @@ First, connect PC and STM32 with UART. Then install to use host_tool.</br>
 `./host_tool -D /dev/ttyUSBXXX -i ui_app -f ui_app_no_wasi.wasm`
 **Note**: WASI is unavailable on zephyr currently, so you have to use the ui_app_no_wasi.wasm which doesn't depend on WASI.
 
+- Install AOT version WASM application
+`wamrc --target=thumbv7 --target-abi=eabi --cpu=cortex-m7 -o ui_app_no_wasi.aot ui_app_no_wasi.wasm`
+`./host_tool -D /dev/ttyUSBXXX -i ui_app -f ui_app_no_wasi.aot`

+ 1 - 1
samples/littlevgl/vgl-wasm-runtime/src/platform/zephyr/iwasm_main.c

@@ -74,7 +74,7 @@ host_interface interface = {
 
 timer_ctx_t timer_ctx;
 
-static char global_heap_buf[270 * 1024] = { 0 };
+static char global_heap_buf[370 * 1024] = { 0 };
 
 extern void display_init(void);
 

+ 2 - 3
samples/littlevgl/vgl-wasm-runtime/zephyr-build/CMakeLists.txt

@@ -14,7 +14,7 @@ enable_language (ASM)
 # change to "ARM[sub]", "THUMB[sub]", "X86_32", "MIPS_32" or "XTENSA_32"
 # if we want to support arm_32, x86, mips or xtensa
 if (NOT DEFINED WAMR_BUILD_TARGET)
-  set (WAMR_BUILD_TARGET "THUMB")
+  set (WAMR_BUILD_TARGET "THUMBV7")
 endif ()
 
 if (NOT DEFINED WAMR_BUILD_INTERP)
@@ -23,8 +23,7 @@ if (NOT DEFINED WAMR_BUILD_INTERP)
 endif ()
 
 if (NOT DEFINED WAMR_BUILD_AOT)
-  # Disable AOT by default.
-  set (WAMR_BUILD_AOT 0)
+  set (WAMR_BUILD_AOT 1)
 endif ()
 
 if (NOT DEFINED WAMR_BUILD_JIT)

+ 1 - 0
samples/littlevgl/vgl-wasm-runtime/zephyr-build/prj.conf

@@ -7,3 +7,4 @@ CONFIG_LOG=y
 CONFIG_UART_INTERRUPT_DRIVEN=y
 CONFIG_STACK_SENTINEL=y
 CONFIG_MAIN_STACK_SIZE=2048
+CONFIG_ARM_MPU=n

+ 1 - 0
samples/littlevgl/wasm-apps/Makefile_wasm_app

@@ -52,4 +52,5 @@ all:
     -Wl,--allow-undefined                           \
     -Wl,--no-threads,--strip-all,--no-entry          \
     -Wl,--export=on_init -Wl,--export=on_timer_callback \
+    -Wl,--export=__heap_base,--export=__data_end \
     -o ui_app.wasm