Browse Source

Implement Windows thread/mutex/cond APIs to support multi-thread (#627)

Implement Windows thread/mutex/cond related APIs to support Windows multi-thread feature
Change Windows HW boundary check implementation for multi-thread: change SEH to VEH
Fix wasm-c-api issue of getting AOTFunctionInstance by index, fix wasm-c-api compile warnings
Enable to build invokeNative_general.c with cmake variable
Fix several issues in lib-pthread
Disable two LLVM passes in multi-thread mode to reserve volatile semantic
Update docker script and document to build iwasm with Docker image

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
Wenyong Huang 4 years ago
parent
commit
64b5459066

+ 4 - 1
.gitignore

@@ -1,3 +1,4 @@
+
 .vs
 .vscode
 **/*build/
@@ -8,4 +9,6 @@ core/app-framework/wgl
 wamr-sdk/out/
 wamr-sdk/runtime/build_runtime_sdk/
 test-tools/host-tool/bin/
-product-mini/app-samples/hello-world/test.wasm
+product-mini/app-samples/hello-world/test.wasm
+
+build_out

+ 0 - 21
Dockerfile

@@ -1,21 +0,0 @@
-# Currently supports clang-8 compiler
-# Using the "test.c" app from the README.md:
-# clang-8 --target=wasm32 -O3 -Wl,--initial-memory=131072,--allow-undefined,--export=main,--no-threads,--strip-all,--no-entry -nostdlib -o test.wasm test.c
-# Pay attention to spacing above! ^
-# iwasm test.wasm
-
-FROM ubuntu:latest
-
-RUN apt-get update && \
-  apt-get -y upgrade && \
-  apt-get install -y build-essential clang-8 cmake g++-multilib git lib32gcc-5-dev llvm-8 lld-8 nano
-
-WORKDIR /root
-
-RUN git clone https://github.com/intel/wasm-micro-runtime
-
-RUN cd wasm-micro-runtime/product-mini/platforms/linux/ && mkdir build && \
-  cd build && cmake .. && make
-
-RUN cd /usr/bin && ln -s wasm-ld-8 wasm-ld
-RUN cd /usr/bin && ln -s ~/wasm-micro-runtime/product-mini/platforms/linux/build/iwasm iwasm

+ 18 - 0
ci/Dockerfile

@@ -0,0 +1,18 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+FROM ubuntu:18.04
+
+RUN apt update \
+  && apt install -y apt-transport-https ca-certificates gnupg \
+       software-properties-common wget lsb-release curl build-essential
+
+#
+# CMAKE (https://apt.kitware.com/)
+RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null \
+  && apt purge --auto-remove cmake \
+  && apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' \
+  && apt update \
+  && apt-get install -y kitware-archive-keyring \
+  && rm /etc/apt/trusted.gpg.d/kitware.gpg \
+  && apt-get install -y cmake

+ 24 - 0
ci/build_wamr.sh

@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+docker build -t wamr_dev:0.1 -f Dockerfile . \
+  && docker run --rm -it \
+       --name wamr_building \
+       --mount type=bind,src=$(realpath .)/..,dst=/source \
+       --workdir /source \
+       wamr_dev:0.1 \
+       /bin/bash -c "\
+         pushd product-mini/platforms/linux \
+           && mkdir -p build  \
+           && pushd build \
+           && rm -rf * \
+           && cmake .. \
+           && make \
+           && popd \
+           && popd \
+           && echo 'Copying binary for image build' \
+           && mkdir -p build_out \
+           && rm build_out/* \
+           && cp -f product-mini/platforms/linux/build/iwasm build_out/iwasm"

+ 2 - 2
core/iwasm/aot/aot_loader.c

@@ -1191,7 +1191,7 @@ load_function_section(const uint8 *buf, const uint8 *buf_end,
     unwind_info= (AOTUnwindInfo *)((uint8*)module->code + module->code_size
                                    - sizeof(AOTUnwindInfo));
     unwind_info->Version = 1;
-    unwind_info->Flags = UNW_FLAG_EHANDLER;
+    unwind_info->Flags = UNW_FLAG_NHANDLER;
     *(uint32*)&unwind_info->UnwindCode[0] = unwind_code_offset;
 
     size = sizeof(RUNTIME_FUNCTION) * (uint64)module->func_count;
@@ -1231,7 +1231,7 @@ load_function_section(const uint8 *buf, const uint8 *buf_end,
 #if defined(OS_ENABLE_HW_BOUND_CHECK) && defined(BH_PLATFORM_WINDOWS)
         rtl_func_table[i].BeginAddress = (DWORD)text_offset;
         if (i > 0) {
-            rtl_func_table[i].EndAddress = rtl_func_table[i - 1].BeginAddress;
+            rtl_func_table[i - 1].EndAddress = rtl_func_table[i].BeginAddress;
         }
         rtl_func_table[i].UnwindInfoAddress = (DWORD)unwind_info_offset;
 #endif

+ 45 - 41
core/iwasm/aot/aot_runtime.c

@@ -1152,13 +1152,6 @@ static os_thread_local_attribute WASMExecEnv *aot_exec_env = NULL;
 #ifndef BH_PLATFORM_WINDOWS
 static void
 aot_signal_handler(void *sig_addr)
-#else
-EXCEPTION_DISPOSITION
-aot_exception_handler(PEXCEPTION_RECORD ExceptionRecord,
-                      ULONG64 EstablisherFrame,
-                      PCONTEXT ContextRecord,
-                      PDISPATCHER_CONTEXT DispatcherContext)
-#endif
 {
     AOTModuleInstance *module_inst;
     AOTMemoryInstance *memory_inst;
@@ -1168,18 +1161,11 @@ aot_exception_handler(PEXCEPTION_RECORD ExceptionRecord,
     uint8 *stack_min_addr;
     uint32 page_size;
     uint32 guard_page_count = STACK_OVERFLOW_CHECK_GUARD_PAGE_COUNT;
-#ifdef BH_PLATFORM_WINDOWS
-    uint8 *sig_addr = (uint8*)ExceptionRecord->ExceptionInformation[1];
-#endif
 
     /* Check whether current thread is running aot function */
     if (aot_exec_env
         && aot_exec_env->handle == os_self_thread()
-        && (jmpbuf_node = aot_exec_env->jmpbuf_stack_top)
-#ifdef BH_PLATFORM_WINDOWS
-        && ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION
-#endif
-        ) {
+        && (jmpbuf_node = aot_exec_env->jmpbuf_stack_top)) {
         /* Get mapped mem info of current instance */
         module_inst = (AOTModuleInstance *)aot_exec_env->module_inst;
         /* Get the default memory instance */
@@ -1211,40 +1197,58 @@ aot_exception_handler(PEXCEPTION_RECORD ExceptionRecord,
             os_longjmp(jmpbuf_node->jmpbuf, 1);
         }
     }
-
-#ifdef BH_PLATFORM_WINDOWS
-    ContextRecord->Rip += 3;
-    return EXCEPTION_CONTINUE_SEARCH;
-    (void)EstablisherFrame;
-    (void)ContextRecord;
-    (void)DispatcherContext;
-#endif
 }
-
-#ifdef BH_PLATFORM_WINDOWS
+#else /* else of BH_PLATFORM_WINDOWS */
 static LONG
-stack_overflow_handler(EXCEPTION_POINTERS *exce_info)
+aot_exception_handler(EXCEPTION_POINTERS *exce_info)
 {
-    AOTModuleInstance* module_inst;
-    WASMJmpBuf* jmpbuf_node;
+    PEXCEPTION_RECORD ExceptionRecord = exce_info->ExceptionRecord;
+    uint8 *sig_addr = (uint8*)ExceptionRecord->ExceptionInformation[1];
+    AOTModuleInstance *module_inst;
+    AOTMemoryInstance *memory_inst;
+    WASMJmpBuf *jmpbuf_node;
+    uint8 *mapped_mem_start_addr = NULL;
+    uint8 *mapped_mem_end_addr = NULL;
+    uint32 page_size = os_getpagesize();
 
-    /* Check whether it is stack overflow exception and
-       current thread is running aot function */
-    if (exce_info->ExceptionRecord->ExceptionCode == EXCEPTION_STACK_OVERFLOW
-        && aot_exec_env
+    if (aot_exec_env
         && aot_exec_env->handle == os_self_thread()
         && (jmpbuf_node = aot_exec_env->jmpbuf_stack_top)) {
-        /* Set stack overflow exception and let the aot func continue
-           to run, when the aot func returns, the caller will check
-           whether the exception is thrown and return to runtime, and
-           the damaged stack will be recovered by _resetstkoflw(). */
         module_inst = (AOTModuleInstance*)aot_exec_env->module_inst;
-        aot_set_exception_with_id(module_inst, EXCE_NATIVE_STACK_OVERFLOW);
-        return EXCEPTION_CONTINUE_EXECUTION;
+        if (ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) {
+            /* Get the default memory instance */
+            memory_inst = aot_get_default_memory(module_inst);
+            if (memory_inst) {
+                mapped_mem_start_addr = (uint8*)memory_inst->memory_data.ptr;
+                mapped_mem_end_addr = (uint8*)memory_inst->memory_data.ptr
+                                      + 8 * (uint64)BH_GB;
+                if (mapped_mem_start_addr <= (uint8*)sig_addr
+                    && (uint8*)sig_addr < mapped_mem_end_addr) {
+                    /* The address which causes segmentation fault is inside
+                       aot instance's guard regions.
+                       Set exception and let the aot func continue to run, when
+                       the aot func returns, the caller will check whether the
+                       exception is thrown and return to runtime. */
+                    aot_set_exception_with_id(module_inst,
+                                              EXCE_OUT_OF_BOUNDS_MEMORY_ACCESS);
+                    /* Skip current instruction */
+                    exce_info->ContextRecord->Rip++;
+                    return EXCEPTION_CONTINUE_EXECUTION;
+                }
+            }
+        }
+        else if (ExceptionRecord->ExceptionCode == EXCEPTION_STACK_OVERFLOW) {
+            /* Set stack overflow exception and let the aot func continue
+               to run, when the aot func returns, the caller will check
+               whether the exception is thrown and return to runtime, and
+               the damaged stack will be recovered by _resetstkoflw(). */
+            aot_set_exception_with_id(module_inst, EXCE_NATIVE_STACK_OVERFLOW);
+            return EXCEPTION_CONTINUE_EXECUTION;
+        }
     }
     return EXCEPTION_CONTINUE_SEARCH;
 }
-#endif
+#endif /* end of BH_PLATFORM_WINDOWS */
 
 bool
 aot_signal_init()
@@ -1252,7 +1256,7 @@ aot_signal_init()
 #ifndef BH_PLATFORM_WINDOWS
     return os_signal_init(aot_signal_handler) == 0 ? true : false;
 #else
-    return AddVectoredExceptionHandler(1, stack_overflow_handler)
+    return AddVectoredExceptionHandler(1, aot_exception_handler)
            ? true : false;
 #endif
 }
@@ -1263,7 +1267,7 @@ aot_signal_destroy()
 #ifndef BH_PLATFORM_WINDOWS
     os_signal_destroy();
 #else
-    RemoveVectoredExceptionHandler(stack_overflow_handler);
+    RemoveVectoredExceptionHandler(aot_exception_handler);
 #endif
 }
 

+ 0 - 8
core/iwasm/aot/aot_runtime.h

@@ -659,14 +659,6 @@ aot_signal_init();
 
 void
 aot_signal_destroy();
-
-#ifdef BH_PLATFORM_WINDOWS
-EXCEPTION_DISPOSITION
-aot_exception_handler(PEXCEPTION_RECORD ExceptionRecord,
-                      ULONG64 EstablisherFrame,
-                      PCONTEXT ContextRecord,
-                      PDISPATCHER_CONTEXT DispatcherContext);
-#endif
 #endif
 
 void

+ 1 - 1
core/iwasm/aot/arch/aot_reloc_x86_64.c

@@ -93,7 +93,7 @@ init_plt_table(uint8 *plt)
     /* mov exception_handler, rax */
     *p++ = 0x48;
 	*p++ = 0xB8;
-    *(uint64*)p = (uint64)(uintptr_t)aot_exception_handler;
+    *(uint64*)p = 0;/*(uint64)(uintptr_t)aot_exception_handler;*/
     p += sizeof(uint64);
     /* jmp rax */
 	*p++ = 0xFF;

+ 11 - 9
core/iwasm/common/iwasm_common.cmake

@@ -10,7 +10,17 @@ add_definitions(-DBH_FREE=wasm_runtime_free)
 
 file (GLOB c_source_all ${IWASM_COMMON_DIR}/*.c)
 
-if (WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
+if (WAMR_BUILD_INVOKE_NATIVE_GENERAL EQUAL 1)
+  # Use invokeNative C version instead of asm code version
+  # if WAMR_BUILD_INVOKE_NATIVE_GENERAL is explicitly set.
+  # Note:
+  #   the maximum number of native arguments is limited to 20,
+  #   and there are possible issues when passing arguments to
+  #   native function for some cpus, e.g. int64 and double arguments
+  #   in arm and mips need to be 8-bytes aligned, and some arguments
+  #   of x86_64 are passed by registers but not stack
+  set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_general.c)
+elseif (WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
   if (NOT WAMR_BUILD_SIMD EQUAL 1)
     if (WAMR_BUILD_PLATFORM STREQUAL "windows")
       set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_em64.asm)
@@ -60,14 +70,6 @@ elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32" OR WAMR_BUILD_TARGET STREQUAL "RISC
   set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv32_ilp32d.s)
 elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32")
   set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv32_ilp32.s)
-elseif (WAMR_BUILD_TARGET STREQUAL "GENERAL")
-  # Use invokeNative_general.c instead of assembly code,
-  # but the maximum number of native arguments is limited to 20,
-  # and there are possible issues when passing arguments to
-  # native function for some cpus, e.g. int64 and double arguments
-  # in arm and mips need to be 8-bytes aligned, and some arguments
-  # of x86_64 are passed by registers but not stack
-  set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_general.c)
 else ()
   message (FATAL_ERROR "Build target isn't set")
 endif ()

File diff suppressed because it is too large
+ 274 - 136
core/iwasm/common/wasm_c_api.c


+ 0 - 9
core/iwasm/common/wasm_exec_env.c

@@ -71,15 +71,6 @@ fail1:
 void
 wasm_exec_env_destroy_internal(WASMExecEnv *exec_env)
 {
-#ifdef OS_ENABLE_HW_BOUND_CHECK
-    WASMJmpBuf *jmpbuf = exec_env->jmpbuf_stack_top;
-    WASMJmpBuf *jmpbuf_prev;
-    while (jmpbuf) {
-        jmpbuf_prev = jmpbuf->prev;
-        wasm_runtime_free(jmpbuf);
-        jmpbuf = jmpbuf_prev;
-    }
-#endif
 #if WASM_ENABLE_THREAD_MGR != 0
     os_mutex_destroy(&exec_env->wait_lock);
     os_cond_destroy(&exec_env->wait_cond);

+ 0 - 230
core/iwasm/common/wasm_memory.c

@@ -7,33 +7,6 @@
 #include "bh_platform.h"
 #include "mem_alloc.h"
 
-#define BH_ENABLE_MEMORY_PROFILING 0
-
-#if BH_ENABLE_MEMORY_PROFILING != 0
-
-/* Memory profile data of a function */
-typedef struct memory_profile {
-    struct memory_profile *next;
-    const char *function_name;
-    const char *file_name;
-    int line_in_file;
-    int malloc_num;
-    int free_num;
-    int total_malloc;
-    int total_free;
-} memory_profile_t;
-
-/* Memory in use which grows when BH_MALLOC was called
- * and decreases when bh_free was called */
-static unsigned int memory_in_use = 0;
-
-/* Memory profile data list */
-static memory_profile_t *memory_profiles_list = NULL;
-
-/* Lock of the memory profile list */
-static korp_mutex profile_lock;
-#endif /* end of BH_ENABLE_MEMORY_PROFILING */
-
 typedef enum Memory_Mode {
     MEMORY_MODE_UNKNOWN = 0,
     MEMORY_MODE_POOL,
@@ -58,9 +31,6 @@ wasm_memory_init_with_pool(void *mem, unsigned int bytes)
     if (_allocator) {
         memory_mode = MEMORY_MODE_POOL;
         pool_allocator = _allocator;
-#if BH_ENABLE_MEMORY_PROFILING != 0
-        os_mutex_init(&profile_lock);
-#endif
         global_pool_size = bytes;
         return true;
     }
@@ -78,9 +48,6 @@ wasm_memory_init_with_allocator(void *_malloc_func,
         malloc_func = _malloc_func;
         realloc_func = _realloc_func;
         free_func = _free_func;
-#if BH_ENABLE_MEMORY_PROFILING != 0
-        os_mutex_init(&profile_lock);
-#endif
         return true;
     }
     LOG_ERROR("Init memory with allocator (%p, %p, %p) failed.\n",
@@ -108,9 +75,6 @@ wasm_runtime_memory_init(mem_alloc_type_t mem_alloc_type,
 void
 wasm_runtime_memory_destroy()
 {
-#if BH_ENABLE_MEMORY_PROFILING != 0
-    os_mutex_destroy(&profile_lock);
-#endif
     if (memory_mode == MEMORY_MODE_POOL)
         mem_allocator_destroy(pool_allocator);
     memory_mode = MEMORY_MODE_UNKNOWN;
@@ -201,197 +165,3 @@ wasm_runtime_free(void *ptr)
 {
     wasm_runtime_free_internal(ptr);
 }
-
-#if 0
-static uint64 total_malloc = 0;
-static uint64 total_free = 0;
-
-void *
-wasm_runtime_malloc(unsigned int size)
-{
-    void *ret = wasm_runtime_malloc_internal(size + 8);
-
-    if (ret) {
-        total_malloc += size;
-        *(uint32 *)ret = size;
-        return (uint8 *)ret + 8;
-    }
-    else
-        return NULL;
-}
-
-void *
-wasm_runtime_realloc(void *ptr, unsigned int size)
-{
-    if (!ptr)
-        return wasm_runtime_malloc(size);
-    else {
-        uint8 *ptr_old = (uint8 *)ptr - 8;
-        uint32 size_old = *(uint32 *)ptr_old;
-
-        ptr = wasm_runtime_realloc_internal(ptr_old, size + 8);
-        if (ptr) {
-            total_free += size_old;
-            total_malloc += size;
-            *(uint32 *)ptr = size;
-            return (uint8 *)ptr + 8;
-        }
-        return NULL;
-    }
-}
-
-void
-wasm_runtime_free(void *ptr)
-{
-    if (ptr) {
-        uint8 *ptr_old = (uint8 *)ptr - 8;
-        uint32 size_old = *(uint32 *)ptr_old;
-
-        total_free += size_old;
-        wasm_runtime_free_internal(ptr_old);
-    }
-}
-
-void dump_memory_usage()
-{
-    os_printf("Memory usage:\n");
-    os_printf("    total malloc: %"PRIu64"\n", total_malloc);
-    os_printf("    total free: %"PRIu64"\n", total_free);
-}
-#endif
-
-#if BH_ENABLE_MEMORY_PROFILING != 0
-void
-memory_profile_print(const char *file, int line,
-                     const char *func, int alloc)
-{
-    os_printf("location:%s@%d:used:%d:contribution:%d\n",
-              func, line, memory_in_use, alloc);
-}
-
-void *
-wasm_runtime_malloc_profile(const char *file, int line,
-                            const char *func, unsigned int size)
-{
-    void *p = wasm_runtime_malloc(size + 8);
-
-    if (p) {
-        memory_profile_t *profile;
-
-        os_mutex_lock(&profile_lock);
-
-        profile = memory_profiles_list;
-        while (profile) {
-            if (strcmp(profile->function_name, func) == 0
-                && strcmp(profile->file_name, file) == 0) {
-                break;
-            }
-            profile = profile->next;
-        }
-
-        if (profile) {
-            profile->total_malloc += size;/* TODO: overflow check */
-            profile->malloc_num++;
-        } else {
-            profile = wasm_runtime_malloc(sizeof(memory_profile_t));
-            if (!profile) {
-              os_mutex_unlock(&profile_lock);
-              bh_memcpy_s(p, size + 8, &size, sizeof(size));
-              return (char *)p + 8;
-            }
-
-            memset(profile, 0, sizeof(memory_profile_t));
-            profile->file_name = file;
-            profile->line_in_file = line;
-            profile->function_name = func;
-            profile->malloc_num = 1;
-            profile->total_malloc = size;
-            profile->next = memory_profiles_list;
-            memory_profiles_list = profile;
-        }
-
-        os_mutex_unlock(&profile_lock);
-
-        bh_memcpy_s(p, size + 8, &size, sizeof(size));
-        memory_in_use += size;
-
-        memory_profile_print(file, line, func, size);
-
-        return (char *)p + 8;
-    }
-
-    return NULL;
-}
-
-void
-wasm_runtime_free_profile(const char *file, int line,
-                          const char *func, void *ptr)
-{
-    unsigned int size = *(unsigned int *)((char *)ptr - 8);
-    memory_profile_t *profile;
-
-    wasm_runtime_free((char *)ptr - 8);
-
-    if (memory_in_use >= size)
-        memory_in_use -= size;
-
-    os_mutex_lock(&profile_lock);
-
-    profile = memory_profiles_list;
-    while (profile) {
-        if (strcmp(profile->function_name, func) == 0
-            && strcmp(profile->file_name, file) == 0) {
-            break;
-        }
-        profile = profile->next;
-    }
-
-    if (profile) {
-        profile->total_free += size;/* TODO: overflow check */
-        profile->free_num++;
-    } else {
-        profile = wasm_runtime_malloc(sizeof(memory_profile_t));
-        if (!profile) {
-            os_mutex_unlock(&profile_lock);
-            return;
-        }
-
-        memset(profile, 0, sizeof(memory_profile_t));
-        profile->file_name = file;
-        profile->line_in_file = line;
-        profile->function_name = func;
-        profile->free_num = 1;
-        profile->total_free = size;
-        profile->next = memory_profiles_list;
-        memory_profiles_list = profile;
-    }
-
-    os_mutex_unlock(&profile_lock);
-}
-
-/**
- * Summarize memory usage and print it out
- * Can use awk to analyze the output like below:
- * awk -F: '{print $2,$4,$6,$8,$9}' OFS="\t" ./out.txt | sort -n -r -k 1
- */
-void memory_usage_summarize()
-{
-    memory_profile_t *profile;
-
-    os_mutex_lock(&profile_lock);
-
-    profile = memory_profiles_list;
-    while (profile) {
-        os_printf("malloc:%d:malloc_num:%d:free:%d:free_num:%d:%s\n",
-                  profile->total_malloc,
-                  profile->malloc_num,
-                  profile->total_free,
-                  profile->free_num,
-                  profile->function_name);
-        profile = profile->next;
-    }
-
-    os_mutex_unlock(&profile_lock);
-}
-#endif /* end of BH_ENABLE_MEMORY_PROFILING */
-

+ 6 - 2
core/iwasm/compilation/aot_llvm.c

@@ -1542,8 +1542,12 @@ aot_create_comp_context(AOTCompData *comp_data,
         LLVMAddLoopUnswitchPass(comp_ctx->pass_mgr);
         LLVMAddInstructionCombiningPass(comp_ctx->pass_mgr);
         LLVMAddCFGSimplificationPass(comp_ctx->pass_mgr);
-        LLVMAddGVNPass(comp_ctx->pass_mgr);
-        LLVMAddLICMPass(comp_ctx->pass_mgr);
+        if (!option->enable_thread_mgr) {
+            /* These two passes may destroy the volatile semantics,
+                disable them when building as multi-thread mode */
+            LLVMAddGVNPass(comp_ctx->pass_mgr);
+            LLVMAddLICMPass(comp_ctx->pass_mgr);
+        }
         LLVMAddLoopVectorizePass(comp_ctx->pass_mgr);
         LLVMAddSLPVectorizePass(comp_ctx->pass_mgr);
         LLVMAddInstructionCombiningPass(comp_ctx->pass_mgr);

+ 26 - 20
core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c

@@ -117,7 +117,7 @@ typedef struct {
 } ThreadRoutineArgs;
 
 static bh_list cluster_info_list;
-static korp_mutex pthread_global_lock;
+static korp_mutex thread_global_lock;
 static uint32 handle_id = 1;
 
 static void
@@ -140,7 +140,7 @@ thread_info_destroy(void *node)
 {
     ThreadInfoNode *info_node = (ThreadInfoNode *)node;
 
-    pthread_mutex_lock(&pthread_global_lock);
+    os_mutex_lock(&thread_global_lock);
     if (info_node->type == T_MUTEX) {
         if (info_node->status != MUTEX_DESTROYED)
             os_mutex_destroy(info_node->u.mutex);
@@ -152,18 +152,18 @@ thread_info_destroy(void *node)
         wasm_runtime_free(info_node->u.cond);
     }
     wasm_runtime_free(info_node);
-    pthread_mutex_unlock(&pthread_global_lock);
+    os_mutex_unlock(&thread_global_lock);
 }
 
 bool
 lib_pthread_init()
 {
-    if (0 != os_mutex_init(&pthread_global_lock))
+    if (0 != os_mutex_init(&thread_global_lock))
         return false;
     bh_list_init(&cluster_info_list);
     if (!wasm_cluster_register_destroy_callback(
             lib_pthread_destroy_callback)) {
-        os_mutex_destroy(&pthread_global_lock);
+        os_mutex_destroy(&thread_global_lock);
         return false;
     }
     return true;
@@ -172,7 +172,7 @@ lib_pthread_init()
 void
 lib_pthread_destroy()
 {
-    os_mutex_destroy(&pthread_global_lock);
+    os_mutex_destroy(&thread_global_lock);
 }
 
 static ClusterInfoNode*
@@ -180,17 +180,17 @@ get_cluster_info(WASMCluster *cluster)
 {
     ClusterInfoNode *node;
 
-    os_mutex_lock(&pthread_global_lock);
+    os_mutex_lock(&thread_global_lock);
     node = bh_list_first_elem(&cluster_info_list);
 
     while (node) {
         if (cluster == node->cluster) {
-            os_mutex_unlock(&pthread_global_lock);
+            os_mutex_unlock(&thread_global_lock);
             return node;
         }
         node = bh_list_elem_next(node);
     }
-    os_mutex_unlock(&pthread_global_lock);
+    os_mutex_unlock(&thread_global_lock);
 
     return NULL;
 }
@@ -356,10 +356,10 @@ create_cluster_info(WASMCluster *cluster)
         wasm_runtime_free(node);
         return NULL;
     }
-    os_mutex_lock(&pthread_global_lock);
+    os_mutex_lock(&thread_global_lock);
     ret = bh_list_insert(&cluster_info_list, node);
     bh_assert(ret == BH_LIST_SUCCESS);
-    os_mutex_unlock(&pthread_global_lock);
+    os_mutex_unlock(&thread_global_lock);
 
     (void)ret;
     return node;
@@ -375,10 +375,10 @@ destroy_cluster_info(WASMCluster *cluster)
         os_mutex_destroy(&node->key_data_list_lock);
 
         /* Remove from the cluster info list */
-        os_mutex_lock(&pthread_global_lock);
+        os_mutex_lock(&thread_global_lock);
         bh_list_remove(&cluster_info_list, node);
         wasm_runtime_free(node);
-        os_mutex_unlock(&pthread_global_lock);
+        os_mutex_unlock(&thread_global_lock);
         return true;
     }
     return false;
@@ -447,9 +447,9 @@ static uint32
 allocate_handle()
 {
     uint32 id;
-    os_mutex_lock(&pthread_global_lock);
+    os_mutex_lock(&thread_global_lock);
     id = handle_id++;
-    os_mutex_unlock(&pthread_global_lock);
+    os_mutex_unlock(&thread_global_lock);
     return id;
 }
 
@@ -504,8 +504,6 @@ pthread_start_routine(void *arg)
     /* routine exit, destroy instance */
     wasm_runtime_deinstantiate_internal(module_inst, true);
 
-    info_node->status = THREAD_EXIT;
-
     wasm_runtime_free(routine_args);
 
     /* if the thread is joinable, store the result in its info node,
@@ -516,6 +514,14 @@ pthread_start_routine(void *arg)
     }
     else {
         info_node->u.ret = (void *)(uintptr_t)argv[0];
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+        if (exec_env->suspend_flags.flags & 0x08)
+            /* argv[0] isn't set after longjmp(1) to
+               invoke_native_with_hw_bound_check */
+            info_node->u.ret = exec_env->thread_ret_value;
+#endif
+        /* Update node status after ret value was set */
+        info_node->status = THREAD_EXIT;
     }
 
     return (void *)(uintptr_t)argv[0];
@@ -711,7 +717,7 @@ pthread_exit_wrapper(wasm_exec_env_t exec_env, int32 retval_offset)
     if (!args)
         return;
 
-#ifdef OS_ENABLE_HW_BOUND_CHECK
+#if defined(OS_ENABLE_HW_BOUND_CHECK) && !defined(BH_PLATFORM_WINDOWS)
     /* If hardware bound check enabled, don't deinstantiate module inst
         and thread info node here for AoT module, as they will be freed
         in pthread_start_routine */
@@ -726,13 +732,13 @@ pthread_exit_wrapper(wasm_exec_env_t exec_env, int32 retval_offset)
     /* routine exit, destroy instance */
     wasm_runtime_deinstantiate_internal(module_inst, true);
 
-    args->info_node->status = THREAD_EXIT;
-
     if (!args->info_node->joinable) {
         delete_thread_info_node(args->info_node);
     }
     else {
         args->info_node->u.ret = (void *)(uintptr_t)retval_offset;
+        /* Update node status after ret value was set */
+        args->info_node->status = THREAD_EXIT;
     }
 
     wasm_runtime_free(args);

+ 5 - 7
core/iwasm/libraries/thread-mgr/thread_manager.c

@@ -495,20 +495,18 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
 
 #ifdef OS_ENABLE_HW_BOUND_CHECK
     if (exec_env->jmpbuf_stack_top) {
-        WASMJmpBuf *jmpbuf_node;
-
         /* Store the return value in exec_env */
         exec_env->thread_ret_value = retval;
         exec_env->suspend_flags.flags |= 0x08;
 
-        /* Free all jmpbuf_node except the last one */
+#ifndef BH_PLATFORM_WINDOWS
+        /* Pop all jmpbuf_node except the last one */
         while (exec_env->jmpbuf_stack_top->prev) {
-            jmpbuf_node = wasm_exec_env_pop_jmpbuf(exec_env);
-            wasm_runtime_free(jmpbuf_node);
+            wasm_exec_env_pop_jmpbuf(exec_env);
         }
-        jmpbuf_node = exec_env->jmpbuf_stack_top;
-        os_longjmp(jmpbuf_node->jmpbuf, 1);
+        os_longjmp(exec_env->jmpbuf_stack_top->jmpbuf, 1);
         return;
+#endif
     }
 #endif
 

+ 30 - 8
core/shared/platform/common/posix/posix_thread.c

@@ -11,22 +11,30 @@
 
 typedef struct {
     thread_start_routine_t start;
-    void* stack;
-    uint32 stack_size;
     void* arg;
 } thread_wrapper_arg;
 
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+static int os_thread_signal_init();
+static void os_thread_signal_destroy();
+#endif
+
 static void *os_thread_wrapper(void *arg)
 {
-    thread_wrapper_arg * targ = arg;
+    thread_wrapper_arg *targ = arg;
     thread_start_routine_t start_func = targ->start;
     void *thread_arg = targ->arg;
-    os_printf("THREAD CREATED %p\n", &targ);
-    targ->stack = (void *)((uintptr_t)(&arg) & (uintptr_t)~0xfff);
+
+    os_printf("THREAD CREATED %p\n", pthread_self());
     BH_FREE(targ);
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    if (os_thread_signal_init() != 0)
+        return NULL;
+#endif
     start_func(thread_arg);
 #ifdef OS_ENABLE_HW_BOUND_CHECK
     os_thread_destroy_stack_guard_pages();
+    os_thread_signal_destroy();
 #endif
     return NULL;
 }
@@ -58,7 +66,6 @@ int os_thread_create_with_prio(korp_tid *tid, thread_start_routine_t start,
 
     targ->start = start;
     targ->arg = arg;
-    targ->stack_size = stack_size;
 
     if (pthread_create(tid, &tattr, os_thread_wrapper, targ) != 0) {
         pthread_attr_destroy(&tattr);
@@ -244,6 +251,7 @@ void os_thread_exit(void *retval)
 {
 #ifdef OS_ENABLE_HW_BOUND_CHECK
     os_thread_destroy_stack_guard_pages();
+    os_thread_signal_destroy();
 #endif
     return pthread_exit(retval);
 }
@@ -309,7 +317,7 @@ uint8 *os_thread_get_stack_boundary()
 static os_thread_local_attribute bool stack_guard_pages_inited = false;
 
 /* The signal alternate stack base addr */
-static uint8 *sigalt_stack_base_addr;
+static os_thread_local_attribute uint8 *sigalt_stack_base_addr;
 
 /* The signal handler passed to os_signal_init() */
 static os_signal_handler signal_handler;
@@ -419,7 +427,7 @@ os_signal_init(os_signal_handler handler)
     uint32 map_size = SIG_ALT_STACK_SIZE;
     uint8 *map_addr;
 
-    /* Initialize memory for signal alternate stack */
+    /* Initialize memory for signal alternate stack of current thread */
     if (!(map_addr = os_mmap(NULL, map_size,
                              MMAP_PROT_READ | MMAP_PROT_WRITE,
                              MMAP_MAP_NONE))) {
@@ -473,6 +481,20 @@ os_signal_destroy()
     os_munmap(sigalt_stack_base_addr, SIG_ALT_STACK_SIZE);
 }
 
+static int
+os_thread_signal_init()
+{
+    assert(signal_handler);
+    /* Use the global signal handler registered previously */
+    return os_signal_init(signal_handler);
+}
+
+static void
+os_thread_signal_destroy()
+{
+    os_signal_destroy();
+}
+
 void
 os_signal_unmask()
 {

+ 8 - 1
core/shared/platform/windows/platform_init.c

@@ -5,15 +5,22 @@
 
 #include "platform_api_vmcore.h"
 
+int
+os_thread_sys_init();
+
+void
+os_thread_sys_destroy();
+
 int
 bh_platform_init()
 {
-    return 0;
+    return os_thread_sys_init();
 }
 
 void
 bh_platform_destroy()
 {
+    os_thread_sys_destroy();
 }
 
 int

+ 7 - 4
core/shared/platform/windows/platform_internal.h

@@ -24,6 +24,7 @@
 #include <sys/stat.h>
 #include <stdint.h>
 #include <malloc.h>
+#include <process.h>
 #include <Windows.h>
 
 #ifdef __cplusplus
@@ -40,14 +41,16 @@ extern "C" {
 /* Default thread priority */
 #define BH_THREAD_DEFAULT_PRIORITY 0
 
+typedef void *korp_thread;
 typedef void *korp_tid;
 typedef void *korp_mutex;
 typedef void *korp_sem;
-typedef void *korp_thread;
 
-typedef struct {
-    korp_sem s;
-    unsigned int waiting_count;
+struct os_thread_wait_node;
+typedef struct os_thread_wait_node *os_thread_wait_list;
+typedef struct korp_cond {
+    korp_mutex wait_list_lock;
+    os_thread_wait_list thread_wait_list;
 } korp_cond;
 
 unsigned os_getpagesize();

+ 428 - 73
core/shared/platform/windows/win_thread.c

@@ -3,155 +3,499 @@
  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  */
 
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
 #include "platform_api_vmcore.h"
 #include "platform_api_extension.h"
 
-typedef struct {
-    thread_start_routine_t start;
-    void* stack;
-    uint32 stack_size;
-    void* arg;
-} thread_wrapper_arg;
-
-static void *os_thread_wrapper(void *arg)
+#define bh_assert(v) assert(v)
+
+#define BH_SEM_COUNT_MAX 0xFFFF
+
+struct os_thread_data;
+
+typedef struct os_thread_wait_node {
+    korp_sem sem;
+    void *retval;
+    os_thread_wait_list next;
+} os_thread_wait_node;
+
+typedef struct os_thread_data {
+    /* Next thread data */
+    struct os_thread_data *next;
+    /* Thread data of parent thread */
+    struct os_thread_data *parent;
+    /* Thread Id */
+    DWORD thread_id;
+    /* Thread start routine */
+    thread_start_routine_t start_routine;
+    /* Thread start routine argument */
+    void *arg;
+    /* Wait node of current thread */
+    os_thread_wait_node wait_node;
+    /* Wait cond */
+    korp_cond wait_cond;
+    /* Wait lock */
+    korp_mutex wait_lock;
+    /* Waiting list of other threads who are joining this thread */
+    os_thread_wait_list thread_wait_list;
+} os_thread_data;
+
+static bool is_thread_sys_inited = false;
+
+/* Thread data of supervisor thread */
+static os_thread_data supervisor_thread_data;
+
+/* Thread data key */
+static DWORD thread_data_key;
+
+int os_sem_init(korp_sem* sem);
+int os_sem_destroy(korp_sem* sem);
+int os_sem_wait(korp_sem* sem);
+int os_sem_reltimed_wait(korp_sem* sem, uint64 useconds);
+int os_sem_signal(korp_sem* sem);
+
+int
+os_thread_sys_init()
 {
-    thread_wrapper_arg * targ = arg;
-    thread_start_routine_t start_func = targ->start;
-    void *thread_arg = targ->arg;
-    os_printf("THREAD CREATED %p\n", &targ);
-    targ->stack = (void *)((uintptr_t)(&arg) & (uintptr_t)~0xfff);
-    BH_FREE(targ);
-    start_func(thread_arg);
-    return NULL;
+    if (is_thread_sys_inited)
+        return BHT_OK;
+
+    if ((thread_data_key = TlsAlloc()) == TLS_OUT_OF_INDEXES)
+        return BHT_ERROR;
+
+    /* Initialize supervisor thread data */
+    memset(&supervisor_thread_data, 0, sizeof(os_thread_data));
+
+    supervisor_thread_data.thread_id = GetCurrentThreadId();
+
+    if (os_sem_init(&supervisor_thread_data.wait_node.sem) != BHT_OK)
+        goto fail1;
+
+    if (os_mutex_init(&supervisor_thread_data.wait_lock) != BHT_OK)
+        goto fail2;
+
+    if (os_cond_init(&supervisor_thread_data.wait_cond) != BHT_OK)
+        goto fail3;
+
+    if (!TlsSetValue(thread_data_key, &supervisor_thread_data))
+        goto fail4;
+
+    is_thread_sys_inited = true;
+    return BHT_OK;
+
+fail4:
+    os_cond_destroy(&supervisor_thread_data.wait_cond);
+fail3:
+    os_mutex_destroy(&supervisor_thread_data.wait_lock);
+fail2:
+    os_sem_destroy(&supervisor_thread_data.wait_node.sem);
+fail1:
+    TlsFree(thread_data_key);
+    return BHT_ERROR;
 }
 
-int os_thread_create_with_prio(korp_tid *tid, thread_start_routine_t start,
-                               void *arg, unsigned int stack_size, int prio)
+void
+os_thread_sys_destroy()
 {
-    return BHT_ERROR;
+    if (is_thread_sys_inited) {
+        os_cond_destroy(&supervisor_thread_data.wait_cond);
+        os_mutex_destroy(&supervisor_thread_data.wait_lock);
+        os_sem_destroy(&supervisor_thread_data.wait_node.sem);
+        memset(&supervisor_thread_data, 0, sizeof(os_thread_data));
+        TlsFree(thread_data_key);
+        thread_data_key = 0;
+        is_thread_sys_inited = false;
+    }
 }
 
-int os_thread_create(korp_tid *tid, thread_start_routine_t start, void *arg,
-                     unsigned int stack_size)
+static os_thread_data *
+thread_data_current()
 {
-    return os_thread_create_with_prio(tid, start, arg, stack_size,
-                                      BH_THREAD_DEFAULT_PRIORITY);
+    return (os_thread_data *)TlsGetValue(thread_data_key);
 }
 
-korp_tid os_self_thread()
+static void
+os_thread_cleanup(void *retval)
 {
-    return NULL;
+    os_thread_data *thread_data = thread_data_current();
+
+    bh_assert(thread_data != NULL);
+
+    os_mutex_lock(&thread_data->wait_lock);
+    if (thread_data->thread_wait_list) {
+        /* Signal each joining thread */
+        os_thread_wait_list head = thread_data->thread_wait_list;
+        while (head) {
+            os_thread_wait_list next = head->next;
+            head->retval = retval;
+            os_sem_signal(&head->sem);
+            head = next;
+        }
+        thread_data->thread_wait_list = NULL;
+    }
+    os_mutex_unlock(&thread_data->wait_lock);
+
+    /* Destroy resources */
+    os_cond_destroy(&thread_data->wait_cond);
+    os_sem_destroy(&thread_data->wait_node.sem);
+    os_mutex_destroy(&thread_data->wait_lock);
+    BH_FREE(thread_data);
 }
 
-int os_mutex_init(korp_mutex *mutex)
+static unsigned
+os_thread_wrapper(void *arg)
 {
-    return BHT_OK;
+    os_thread_data *thread_data = arg;
+    os_thread_data *parent = thread_data->parent;
+    void *retval;
+    bool result;
+
+    os_printf("THREAD CREATED %p\n", thread_data);
+
+    os_mutex_lock(&parent->wait_lock);
+    thread_data->thread_id = GetCurrentThreadId();
+    result = TlsSetValue(thread_data_key, thread_data);
+    /* Notify parent thread */
+    os_cond_signal(&parent->wait_cond);
+    os_mutex_unlock(&parent->wait_lock);
+
+    if (!result)
+        return -1;
+
+    retval = thread_data->start_routine(thread_data->arg);
+
+    os_thread_cleanup(retval);
+    return 0;
 }
 
-int os_recursive_mutex_init(korp_mutex *mutex)
+int
+os_thread_create_with_prio(korp_tid *p_tid, thread_start_routine_t start,
+                           void *arg, unsigned int stack_size, int prio)
 {
+    os_thread_data *parent = thread_data_current();
+    os_thread_data *thread_data;
+
+    if (!p_tid || !start)
+        return BHT_ERROR;
+
+    if (stack_size < BH_APPLET_PRESERVED_STACK_SIZE)
+        stack_size = BH_APPLET_PRESERVED_STACK_SIZE;
+
+    if (!(thread_data = BH_MALLOC(sizeof(os_thread_data))))
+        return BHT_ERROR;
+
+    memset(thread_data, 0, sizeof(os_thread_data));
+    thread_data->parent = parent;
+    thread_data->start_routine = start;
+    thread_data->arg = arg;
+
+    if (os_sem_init(&thread_data->wait_node.sem) != BHT_OK)
+        goto fail1;
+
+    if (os_mutex_init(&thread_data->wait_lock) != BHT_OK)
+        goto fail2;
+
+    if (os_cond_init(&thread_data->wait_cond) != BHT_OK)
+        goto fail3;
+
+    os_mutex_lock(&parent->wait_lock);
+    if (!_beginthreadex(NULL, stack_size,
+                        os_thread_wrapper, thread_data,
+                        0, NULL)) {
+        os_mutex_unlock(&parent->wait_lock);
+        goto fail4;
+    }
+    /* Wait for the thread routine to set thread_data's tid
+       and add thread_data to thread data list */
+    os_cond_wait(&parent->wait_cond, &parent->wait_lock);
+    os_mutex_unlock(&parent->wait_lock);
+
+    *p_tid = (korp_tid)thread_data;
     return BHT_OK;
+
+fail4:
+    os_cond_destroy(&thread_data->wait_cond);
+fail3:
+    os_mutex_destroy(&thread_data->wait_lock);
+fail2:
+    os_sem_destroy(&thread_data->wait_node.sem);
+fail1:
+    BH_FREE(thread_data);
+    return BHT_ERROR;
 }
 
-int os_mutex_destroy(korp_mutex *mutex)
+int
+os_thread_create(korp_tid *tid, thread_start_routine_t start, void *arg,
+                 unsigned int stack_size)
 {
-    return BHT_OK;
+    return os_thread_create_with_prio(tid, start, arg, stack_size,
+                                      BH_THREAD_DEFAULT_PRIORITY);
 }
 
-int os_mutex_lock(korp_mutex *mutex)
+korp_tid
+os_self_thread()
 {
-    return BHT_ERROR;
+    return (korp_tid)TlsGetValue(thread_data_key);
 }
 
-int os_mutex_unlock(korp_mutex *mutex)
+int
+os_thread_join(korp_tid thread, void **p_retval)
 {
+    os_thread_data *thread_data, *curr_thread_data;
+
+    /* Get thread data of current thread */
+    curr_thread_data = thread_data_current();
+    curr_thread_data->wait_node.next = NULL;
+
+    /* Get thread data of thread to join */
+    thread_data = (os_thread_data *)thread;
+    bh_assert(thread_data);
+
+    os_mutex_lock(&thread_data->wait_lock);
+    if (!thread_data->thread_wait_list)
+        thread_data->thread_wait_list = &curr_thread_data->wait_node;
+    else {
+        /* Add to end of waiting list */
+        os_thread_wait_node *p = thread_data->thread_wait_list;
+        while (p->next)
+            p = p->next;
+        p->next = &curr_thread_data->wait_node;
+    }
+    os_mutex_unlock(&thread_data->wait_lock);
+
+    /* Wait the sem */
+    os_sem_wait(&curr_thread_data->wait_node.sem);
+    if (p_retval)
+        *p_retval = curr_thread_data->wait_node.retval;
     return BHT_OK;
 }
 
-int os_cond_init(korp_cond *cond)
+int
+os_thread_detach(korp_tid thread)
 {
+    /* Do nothing */
     return BHT_OK;
+    (void)thread;
 }
 
-int os_cond_destroy(korp_cond *cond)
+void
+os_thread_exit(void *retval)
 {
-    return BHT_OK;
+    os_thread_cleanup(retval);
+    _endthreadex(0);
 }
 
-int os_cond_wait(korp_cond *cond, korp_mutex *mutex)
+int
+os_sem_init(korp_sem *sem)
 {
+    bh_assert(sem);
+    *sem = CreateSemaphore(NULL, 0, BH_SEM_COUNT_MAX, NULL);
+    return (*sem != NULL) ? BHT_OK : BHT_ERROR;
+}
+
+int
+os_sem_destroy(korp_sem *sem)
+{
+    bh_assert(sem);
+    CloseHandle(*sem);
     return BHT_OK;
 }
 
+int
+os_sem_wait(korp_sem *sem)
+{
+    DWORD ret;
+
+    bh_assert(sem);
 
-int gettimeofday(struct timeval * tp, struct timezone * tzp)
+    ret = WaitForSingleObject(*sem, INFINITE);
+
+    if (ret == WAIT_OBJECT_0)
+        return BHT_OK;
+    else if(ret == WAIT_TIMEOUT)
+        return (int)WAIT_TIMEOUT;
+    else /* WAIT_FAILED or others */
+        return BHT_ERROR;
+}
+
+int
+os_sem_reltimed_wait(korp_sem *sem, uint64 useconds)
 {
-    /* Note: some broken versions only have 8 trailing zero's,
-        the correct epoch has 9 trailing zero's
-       This magic number is the number of 100 nanosecond intervals
-        since January 1, 1601 (UTC) until 00:00:00 January 1, 1970 */
-    static const uint64_t EPOCH = ((uint64_t) 116444736000000000ULL);
+    uint64 mseconds_64;
+    DWORD ret, mseconds;
+
+    bh_assert(sem);
+
+    if (useconds == BHT_WAIT_FOREVER)
+        mseconds = INFINITE;
+    else {
+        mseconds_64 = useconds / 1000;
+
+        if (mseconds_64 < (uint64)(UINT32_MAX - 1)) {
+            mseconds = (uint32)mseconds_64;
+        }
+        else {
+            mseconds = UINT32_MAX - 1;
+            os_printf("Warning: os_sem_reltimed_wait exceeds limit, "
+                      "set to max timeout instead\n");
+        }
+    }
 
-    SYSTEMTIME  system_time;
-    FILETIME    file_time;
-    uint64_t    time;
+    ret = WaitForSingleObject(*sem, mseconds);
 
-    GetSystemTime(&system_time);
-    SystemTimeToFileTime(&system_time, &file_time);
-    time = ((uint64_t)file_time.dwLowDateTime);
-    time += ((uint64_t)file_time.dwHighDateTime) << 32;
+    if (ret == WAIT_OBJECT_0)
+        return BHT_OK;
+    else if(ret == WAIT_TIMEOUT)
+        return (int)WAIT_TIMEOUT;
+    else /* WAIT_FAILED or others */
+        return BHT_ERROR;
+}
 
-    tp->tv_sec = (long)((time - EPOCH) / 10000000L);
-    tp->tv_usec = (long)(system_time.wMilliseconds * 1000);
+int
+os_sem_signal(korp_sem *sem)
+{
+    bh_assert(sem);
+    return ReleaseSemaphore(*sem, 1, NULL) != FALSE
+           ? BHT_OK: BHT_ERROR;
+}
 
-    return 0;
+int
+os_mutex_init(korp_mutex *mutex)
+{
+    bh_assert(mutex);
+    *mutex = CreateMutex(NULL, FALSE, NULL);
+    return (*mutex != NULL) ? BHT_OK : BHT_ERROR;
 }
 
-static void msec_nsec_to_abstime(struct timespec *ts, int usec)
+int
+os_recursive_mutex_init(korp_mutex *mutex)
 {
-    struct timeval tv;
+    bh_assert(mutex);
+    *mutex = CreateMutex(NULL, FALSE, NULL);
+    return (*mutex != NULL) ? BHT_OK : BHT_ERROR;
+}
 
-    gettimeofday(&tv, NULL);
+int
+os_mutex_destroy(korp_mutex *mutex)
+{
+    assert(mutex);
+    return CloseHandle(*mutex) ? BHT_OK : BHT_ERROR;
+}
 
-    ts->tv_sec = (long int)(tv.tv_sec + usec / 1000000);
-    ts->tv_nsec = (long int)(tv.tv_usec * 1000 + (usec % 1000000) * 1000);
+int
+os_mutex_lock(korp_mutex *mutex)
+{
+    int ret;
 
-    if (ts->tv_nsec >= 1000000000L) {
-        ts->tv_sec++;
-        ts->tv_nsec -= 1000000000L;
-    }
+    assert(mutex);
+    ret = WaitForSingleObject(*mutex, INFINITE);
+    return ret != WAIT_FAILED ? BHT_OK : BHT_ERROR;
 }
 
-int os_cond_reltimedwait(korp_cond *cond, korp_mutex *mutex, uint64 useconds)
+int
+os_mutex_unlock(korp_mutex *mutex)
 {
-    return BHT_OK;
+    bh_assert(mutex);
+    return ReleaseMutex(*mutex) ? BHT_OK : BHT_ERROR;
 }
 
-int os_cond_signal(korp_cond *cond)
+int
+os_cond_init(korp_cond *cond)
 {
+    bh_assert(cond);
+    if (os_mutex_init(&cond->wait_list_lock) != BHT_OK)
+        return BHT_ERROR;
+
+    cond->thread_wait_list = NULL;
     return BHT_OK;
 }
 
-int os_thread_join(korp_tid thread, void **value_ptr)
+int
+os_cond_destroy(korp_cond *cond)
 {
+    bh_assert(cond);
+    os_mutex_destroy(&cond->wait_list_lock);
     return BHT_OK;
 }
 
-int os_thread_detach(korp_tid thread)
+static int
+os_cond_wait_internal(korp_cond *cond, korp_mutex *mutex,
+                      bool timed, uint64 useconds)
 {
+    os_thread_wait_node *node = &thread_data_current()->wait_node;
+
+    node->next = NULL;
+
+    bh_assert(cond);
+    bh_assert(mutex);
+    os_mutex_lock(&cond->wait_list_lock);
+    if (!cond->thread_wait_list)
+        cond->thread_wait_list = node;
+    else {
+        /* Add to end of wait list */
+        os_thread_wait_node *p = cond->thread_wait_list;
+        while (p->next)
+            p = p->next;
+        p->next = node;
+    }
+    os_mutex_unlock(&cond->wait_list_lock);
+
+    /* Unlock mutex, wait sem and lock mutex again */
+    os_mutex_unlock(mutex);
+    if (timed)
+        os_sem_reltimed_wait(&node->sem, useconds);
+    else
+        os_sem_wait(&node->sem);
+    os_mutex_lock(mutex);
+
+    /* Remove wait node from wait list */
+    os_mutex_lock(&cond->wait_list_lock);
+    if (cond->thread_wait_list == node)
+        cond->thread_wait_list = node->next;
+    else {
+        /* Remove from the wait list */
+        os_thread_wait_node *p = cond->thread_wait_list;
+        while (p->next != node)
+            p = p->next;
+        p->next = node->next;
+    }
+    os_mutex_unlock(&cond->wait_list_lock);
+
     return BHT_OK;
 }
 
-void os_thread_exit(void *retval)
+int
+os_cond_wait(korp_cond *cond, korp_mutex *mutex)
+{
+    return os_cond_wait_internal(cond, mutex, false, 0);
+}
+
+int
+os_cond_reltimedwait(korp_cond *cond, korp_mutex *mutex, uint64 useconds)
 {
+    if (useconds == BHT_WAIT_FOREVER) {
+        return os_cond_wait_internal(cond, mutex, false, 0);
+    }
+    else {
+        return os_cond_wait_internal(cond, mutex, true, useconds);
+    }
+}
+
+int
+os_cond_signal(korp_cond *cond)
+{
+    /* Signal the head wait node of wait list */
+    os_mutex_lock(&cond->wait_list_lock);
+    if (cond->thread_wait_list)
+        os_sem_signal(&cond->thread_wait_list->sem);
+    os_mutex_unlock(&cond->wait_list_lock);
+
+    return BHT_OK;
 }
 
 static os_thread_local_attribute uint8 *thread_stack_boundary = NULL;
 
-uint8 *os_thread_get_stack_boundary()
+uint8 *
+os_thread_get_stack_boundary()
 {
     ULONG_PTR low_limit = 0, high_limit = 0;
     uint32 page_size;
@@ -167,10 +511,21 @@ uint8 *os_thread_get_stack_boundary()
     return thread_stack_boundary;
 }
 
+static os_thread_local_attribute bool stack_guard_pages_inited = false;
+
 bool
 os_thread_init_stack_guard_pages()
 {
-    return true;
+    ULONG StackSizeInBytes = 16 * 1024;
+    bool ret;
+
+    if (stack_guard_pages_inited)
+        return true;
+
+    ret = SetThreadStackGuarantee(&StackSizeInBytes);
+    if (ret)
+        stack_guard_pages_inited = true;
+    return ret;
 }
 
 void

+ 8 - 7
doc/build_wamr.md

@@ -448,14 +448,15 @@ Docker
 
 Make sure you have Docker installed on your machine: [macOS](https://docs.docker.com/docker-for-mac/install/), [Windows](https://docs.docker.com/docker-for-windows/install/) or [Linux](https://docs.docker.com/install/linux/docker-ce/ubuntu/).
 
-Build the Docker image:
+Build *iwasm* with the Docker image:
 
 ``` Bash
-docker build --rm -f "Dockerfile" -t wamr:latest .
+$ cd ci
+$ ./build_wamr.sh
+$ ls ../build_out/
 ```
-Run the image in interactive mode:
-``` Bash
-docker run --rm -it wamr:latest
-```
-You'll now enter the container at `/root`.
 
+*build_wamr.sh* will generate *linux* compatible libraries ( libiwasm.so and
+libvmlib.a ) and an executable binary (*iwasm*) and copy *iwasm* to
+*build_out*. All original generated files are still under
+*product-mini/platforms/linux/build*.

+ 2 - 2
product-mini/platforms/windows/main.c

@@ -79,12 +79,12 @@ static char **
 split_string(char *str, int *count)
 {
     char **res = NULL;
-    char *p;
+    char *p, *next_token;
     int idx = 0;
 
     /* split string and append tokens to 'res' */
     do {
-        p = strtok(str, " ");
+        p = strtok_s(str, " ", &next_token);
         str = NULL;
         res = (char **)realloc(res, sizeof(char *) * (uint32)(idx + 1));
         if (res == NULL) {

Some files were not shown because too many files changed in this diff