Sfoglia il codice sorgente

Fix multi-threading issues (#2013)

- Implement atomic.fence to ensure a proper memory synchronization order
- Destroy exec_env_singleton first in wasm/aot deinstantiation
- Change terminate other threads to wait for other threads in
  wasm_exec_env_destroy
- Fix detach thread in thread_manager_start_routine
- Fix duplicated lock cluster->lock in wasm_cluster_cancel_thread
- Add lib-pthread and lib-wasi-threads compilation to Windows CI
Wenyong Huang 2 anni fa
parent
commit
f279ba84ee

+ 14 - 0
.github/workflows/compilation_on_windows.yml

@@ -118,3 +118,17 @@ jobs:
           cmake .. -DWAMR_BUILD_DEBUG_INTERP=1
           cmake --build . --config Release --parallel 4
           cd .. && rm -force -r build
+      - name: Build iwasm [lib pthread]
+        run: |
+          cd product-mini/platforms/windows
+          mkdir build && cd build
+          cmake .. -DWAMR_BUILD_LIB_PTHREAD=1
+          cmake --build . --config Release --parallel 4
+          cd .. && rm -force -r build
+      - name: Build iwasm [lib wasi-thread]
+        run: |
+          cd product-mini/platforms/windows
+          mkdir build && cd build
+          cmake .. -DWAMR_BUILD_LIB_WASI_THREADS=1
+          cmake --build . --config Release --parallel 4
+          cd .. && rm -force -r build

+ 9 - 3
core/iwasm/aot/aot_runtime.c

@@ -1226,6 +1226,15 @@ fail:
 void
 aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
 {
+    if (module_inst->exec_env_singleton) {
+        /* wasm_exec_env_destroy will call
+           wasm_cluster_wait_for_all_except_self to wait for other
+           threads, so as to destroy their exec_envs and module
+           instances first, and avoid accessing the shared resources
+           of current module instance after it is deinstantiated. */
+        wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton);
+    }
+
 #if WASM_ENABLE_LIBC_WASI != 0
     /* Destroy wasi resource before freeing app heap, since some fields of
        wasi contex are allocated from app heap, and if app heap is freed,
@@ -1264,9 +1273,6 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
     if (module_inst->func_type_indexes)
         wasm_runtime_free(module_inst->func_type_indexes);
 
-    if (module_inst->exec_env_singleton)
-        wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton);
-
     if (((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports)
         wasm_runtime_free(
             ((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports);

+ 2 - 22
core/iwasm/common/wasm_application.c

@@ -203,22 +203,12 @@ wasm_application_execute_main(WASMModuleInstanceCommon *module_inst, int32 argc,
                               char *argv[])
 {
     bool ret;
-#if WASM_ENABLE_THREAD_MGR != 0
-    WASMCluster *cluster;
-#endif
-#if WASM_ENABLE_THREAD_MGR != 0 || WASM_ENABLE_MEMORY_PROFILING != 0
+#if WASM_ENABLE_MEMORY_PROFILING != 0
     WASMExecEnv *exec_env;
 #endif
 
     ret = execute_main(module_inst, argc, argv);
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
-    if (exec_env && (cluster = wasm_exec_env_get_cluster(exec_env))) {
-        wasm_cluster_wait_for_all_except_self(cluster, exec_env);
-    }
-#endif
-
 #if WASM_ENABLE_MEMORY_PROFILING != 0
     exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
     if (exec_env) {
@@ -622,22 +612,12 @@ wasm_application_execute_func(WASMModuleInstanceCommon *module_inst,
                               const char *name, int32 argc, char *argv[])
 {
     bool ret;
-#if WASM_ENABLE_THREAD_MGR != 0
-    WASMCluster *cluster;
-#endif
-#if WASM_ENABLE_THREAD_MGR != 0 || WASM_ENABLE_MEMORY_PROFILING != 0
+#if WASM_ENABLE_MEMORY_PROFILING != 0
     WASMExecEnv *exec_env;
 #endif
 
     ret = execute_func(module_inst, name, argc, argv);
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
-    if (exec_env && (cluster = wasm_exec_env_get_cluster(exec_env))) {
-        wasm_cluster_wait_for_all_except_self(cluster, exec_env);
-    }
-#endif
-
 #if WASM_ENABLE_MEMORY_PROFILING != 0
     exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
     if (exec_env) {

+ 3 - 3
core/iwasm/common/wasm_exec_env.c

@@ -172,16 +172,16 @@ void
 wasm_exec_env_destroy(WASMExecEnv *exec_env)
 {
 #if WASM_ENABLE_THREAD_MGR != 0
-    /* Terminate all sub-threads */
+    /* Wait for all sub-threads */
     WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
     if (cluster) {
-        wasm_cluster_terminate_all_except_self(cluster, exec_env);
+        wasm_cluster_wait_for_all_except_self(cluster, exec_env);
 #if WASM_ENABLE_DEBUG_INTERP != 0
         /* Must fire exit event after other threads exits, otherwise
            the stopped thread will be overrided by other threads */
         wasm_cluster_thread_exited(exec_env);
 #endif
-        /* We have terminated other threads, this is the only alive thread, so
+        /* We have waited for other threads, this is the only alive thread, so
          * we don't acquire cluster->lock because the cluster will be destroyed
          * inside this function */
         wasm_cluster_del_exec_env(cluster, exec_env);

+ 2 - 0
core/iwasm/compilation/aot_compiler.c

@@ -1240,6 +1240,8 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                     case WASM_OP_ATOMIC_FENCE:
                         /* Skip memory index */
                         frame_ip++;
+                        if (!aot_compiler_op_atomic_fence(comp_ctx, func_ctx))
+                            return false;
                         break;
                     case WASM_OP_ATOMIC_I32_LOAD:
                         bytes = 4;

+ 9 - 0
core/iwasm/compilation/aot_emit_memory.c

@@ -1423,4 +1423,13 @@ fail:
     return false;
 }
 
+bool
+aot_compiler_op_atomic_fence(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    return LLVMBuildFence(comp_ctx->builder,
+                          LLVMAtomicOrderingSequentiallyConsistent, false, "")
+               ? true
+               : false;
+}
+
 #endif /* end of WASM_ENABLE_SHARED_MEMORY */

+ 4 - 0
core/iwasm/compilation/aot_emit_memory.h

@@ -97,6 +97,10 @@ bool
 aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx,
                               AOTFuncContext *func_ctx, uint32 align,
                               uint32 offset, uint32 bytes);
+
+bool
+aot_compiler_op_atomic_fence(AOTCompContext *comp_ctx,
+                             AOTFuncContext *func_ctx);
 #endif
 
 #ifdef __cplusplus

+ 3 - 1
core/iwasm/interpreter/wasm_interp_classic.c

@@ -3467,6 +3467,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     {
                         /* Skip the memory index */
                         frame_ip++;
+                        os_atomic_thread_fence(os_memory_order_release);
                         break;
                     }
 
@@ -4006,7 +4007,8 @@ fast_jit_call_func_bytecode(WASMModuleInstance *module_inst,
         module_inst->fast_jit_func_ptrs[func_idx_non_import]);
     bh_assert(action == JIT_INTERP_ACTION_NORMAL
               || (action == JIT_INTERP_ACTION_THROWN
-                  && wasm_runtime_copy_exception(exec_env->module_inst, NULL)));
+                  && wasm_copy_exception(
+                      (WASMModuleInstance *)exec_env->module_inst, NULL)));
 
     /* Get the return values form info.out.ret */
     if (func_type->result_count) {

+ 5 - 0
core/iwasm/interpreter/wasm_interp_fast.c

@@ -3305,6 +3305,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         PUSH_I32(ret);
                         break;
                     }
+                    case WASM_OP_ATOMIC_FENCE:
+                    {
+                        os_atomic_thread_fence(os_memory_order_release);
+                        break;
+                    }
 
                     case WASM_OP_ATOMIC_I32_LOAD:
                     case WASM_OP_ATOMIC_I32_LOAD8_U:

+ 9 - 4
core/iwasm/interpreter/wasm_runtime.c

@@ -2073,6 +2073,15 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
     if (!module_inst)
         return;
 
+    if (module_inst->exec_env_singleton) {
+        /* wasm_exec_env_destroy will call
+           wasm_cluster_wait_for_all_except_self to wait for other
+           threads, so as to destroy their exec_envs and module
+           instances first, and avoid accessing the shared resources
+           of current module instance after it is deinstantiated. */
+        wasm_exec_env_destroy(module_inst->exec_env_singleton);
+    }
+
 #if WASM_ENABLE_DEBUG_INTERP != 0                         \
     || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
         && WASM_ENABLE_LAZY_JIT != 0)
@@ -2155,9 +2164,6 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
     wasm_externref_cleanup((WASMModuleInstanceCommon *)module_inst);
 #endif
 
-    if (module_inst->exec_env_singleton)
-        wasm_exec_env_destroy(module_inst->exec_env_singleton);
-
 #if WASM_ENABLE_DUMP_CALL_STACK != 0
     if (module_inst->frames) {
         bh_vector_destroy(module_inst->frames);
@@ -2241,7 +2247,6 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst,
     WASMRuntimeFrame *prev_frame = wasm_exec_env_get_cur_frame(exec_env);
     uint8 *prev_top = exec_env->wasm_stack.s.top;
 #ifdef BH_PLATFORM_WINDOWS
-    const char *exce;
     int result;
     bool has_exception;
     char exception[EXCEPTION_BUF_LEN];

+ 31 - 8
core/iwasm/libraries/thread-mgr/thread_manager.c

@@ -100,7 +100,8 @@ safe_traverse_exec_env_list(WASMCluster *cluster, list_visitor visitor,
     while (node) {
         bool already_processed = false;
         void *proc_node;
-        for (size_t i = 0; i < bh_vector_size(&proc_nodes); i++) {
+        uint32 i;
+        for (i = 0; i < (uint32)bh_vector_size(&proc_nodes); i++) {
             if (!bh_vector_get(&proc_nodes, i, &proc_node)) {
                 ret = false;
                 goto final;
@@ -595,14 +596,24 @@ thread_manager_start_routine(void *arg)
 
     /* Routine exit */
 
-    /* Detach the native thread here to ensure the resources are freed */
-    wasm_cluster_detach_thread(exec_env);
 #if WASM_ENABLE_DEBUG_INTERP != 0
     wasm_cluster_thread_exited(exec_env);
 #endif
 
+    os_mutex_lock(&cluster_list_lock);
+
     os_mutex_lock(&cluster->lock);
 
+    /* Detach the native thread here to ensure the resources are freed */
+    if (exec_env->wait_count == 0 && !exec_env->thread_is_detached) {
+        /* Only detach current thread when there is no other thread
+           joining it, otherwise let the system resources for the
+           thread be released after joining */
+        os_thread_detach(exec_env->handle);
+        /* No need to set exec_env->thread_is_detached to true here
+           since we will exit soon */
+    }
+
     /* Free aux stack space */
     free_aux_stack(exec_env, exec_env->aux_stack_bottom.bottom);
     /* Remove exec_env */
@@ -614,6 +625,8 @@ thread_manager_start_routine(void *arg)
 
     os_mutex_unlock(&cluster->lock);
 
+    os_mutex_unlock(&cluster_list_lock);
+
     os_thread_exit(ret);
     return ret;
 }
@@ -936,12 +949,23 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
     wasm_cluster_clear_thread_signal(exec_env);
     wasm_cluster_thread_exited(exec_env);
 #endif
+
     /* App exit the thread, free the resources before exit native thread */
-    /* Detach the native thread here to ensure the resources are freed */
-    wasm_cluster_detach_thread(exec_env);
+
+    os_mutex_lock(&cluster_list_lock);
 
     os_mutex_lock(&cluster->lock);
 
+    /* Detach the native thread here to ensure the resources are freed */
+    if (exec_env->wait_count == 0 && !exec_env->thread_is_detached) {
+        /* Only detach current thread when there is no other thread
+           joining it, otherwise let the system resources for the
+           thread be released after joining */
+        os_thread_detach(exec_env->handle);
+        /* No need to set exec_env->thread_is_detached to true here
+           since we will exit soon */
+    }
+
     module_inst = exec_env->module_inst;
 
     /* Free aux stack space */
@@ -955,6 +979,8 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
 
     os_mutex_unlock(&cluster->lock);
 
+    os_mutex_unlock(&cluster_list_lock);
+
     os_thread_exit(retval);
 }
 
@@ -981,8 +1007,6 @@ wasm_cluster_cancel_thread(WASMExecEnv *exec_env)
         return 0;
     }
 
-    os_mutex_lock(&exec_env->cluster->lock);
-
     if (!clusters_have_exec_env(exec_env)) {
         /* Invalid thread or the thread has exited */
         goto final;
@@ -991,7 +1015,6 @@ wasm_cluster_cancel_thread(WASMExecEnv *exec_env)
     set_thread_cancel_flags(exec_env);
 
 final:
-    os_mutex_unlock(&exec_env->cluster->lock);
     os_mutex_unlock(&cluster_list_lock);
 
     return 0;

+ 35 - 0
core/shared/platform/include/platform_api_extension.h

@@ -92,6 +92,41 @@ int os_thread_detach(korp_tid);
 void
 os_thread_exit(void *retval);
 
+/* Try to define os_atomic_thread_fence if it isn't defined in
+   platform's platform_internal.h */
+#ifndef os_atomic_thread_fence
+
+#if !defined(__GNUC_PREREQ) && (defined(__GNUC__) || defined(__GNUG__)) \
+    && !defined(__clang__) && defined(__GNUC_MINOR__)
+#define __GNUC_PREREQ(maj, min) \
+    ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#endif
+
+/* Clang's __GNUC_PREREQ macro has a different meaning than GCC one,
+   so we have to handle this case specially */
+#if defined(__clang__)
+/* Clang provides stdatomic.h since 3.6.0
+   See https://releases.llvm.org/3.6.0/tools/clang/docs/ReleaseNotes.html */
+#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 6)
+#define BH_HAS_STD_ATOMIC
+#endif
+#elif defined(__GNUC_PREREQ)
+/* Even though older versions of GCC support C11, atomics were
+   not implemented until 4.9. See
+   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58016 */
+#if __GNUC_PREREQ(4, 9)
+#define BH_HAS_STD_ATOMIC
+#endif /* end of __GNUC_PREREQ(4, 9) */
+#endif /* end of defined(__GNUC_PREREQ) */
+
+#if defined(BH_HAS_STD_ATOMIC) && !defined(__cplusplus)
+#include <stdatomic.h>
+#define os_memory_order_release memory_order_release
+#define os_atomic_thread_fence atomic_thread_fence
+#endif
+
+#endif /* end of os_atomic_thread_fence */
+
 /**
  * Initialize current thread environment if current thread
  * is created by developer but not runtime

+ 3 - 0
core/shared/platform/linux-sgx/platform_internal.h

@@ -63,6 +63,9 @@ os_set_print_function(os_print_function_t pf);
 char *
 strcpy(char *dest, const char *src);
 
+#define os_memory_order_release __ATOMIC_RELEASE
+#define os_atomic_thread_fence __atomic_thread_fence
+
 #ifdef __cplusplus
 }
 #endif

+ 14 - 0
core/shared/platform/windows/platform_internal.h

@@ -116,6 +116,20 @@ os_thread_signal_inited();
 #endif /* end of BUILD_TARGET_X86_64/AMD_64 */
 #endif /* end of WASM_DISABLE_HW_BOUND_CHECK */
 
+typedef enum os_memory_order {
+    os_memory_order_relaxed,
+    os_memory_order_consume,
+    os_memory_order_acquire,
+    os_memory_order_release,
+    os_memory_order_acq_rel,
+    os_memory_order_seq_cst,
+} os_memory_order;
+
+void
+bh_atomic_thread_fence(int mem_order);
+
+#define os_atomic_thread_fence bh_atomic_thread_fence
+
 #ifdef __cplusplus
 }
 #endif

+ 2 - 1
core/shared/platform/windows/shared_platform.cmake

@@ -10,7 +10,8 @@ add_definitions(-DHAVE_STRUCT_TIMESPEC)
 include_directories(${PLATFORM_SHARED_DIR})
 include_directories(${PLATFORM_SHARED_DIR}/../include)
 
-file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c)
+file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c
+			      ${PLATFORM_SHARED_DIR}/*.cpp)
 
 set (PLATFORM_SHARED_SOURCE ${source_all})
 

+ 22 - 0
core/shared/platform/windows/win_atomic.cpp

@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2023 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "platform_api_vmcore.h"
+#include "platform_api_extension.h"
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+
+#include <atomic>
+
+void
+bh_atomic_thread_fence(int mem_order)
+{
+    std::memory_order order =
+        (std::memory_order)(std::memory_order::memory_order_relaxed + mem_order
+                            - os_memory_order_relaxed);
+    std::atomic_thread_fence(order);
+}
+
+#endif