Browse Source

Merge branch main into dev/interrupt_block_insn

Wenyong Huang 2 years ago
parent
commit
38ffef8c11
100 changed files with 3577 additions and 952 deletions
  1. 1 1
      .github/workflows/build_iwasm_release.yml
  2. 106 9
      .github/workflows/compilation_on_android_ubuntu.yml
  3. 24 0
      .github/workflows/compilation_on_macos.yml
  4. 9 0
      .github/workflows/compilation_on_nuttx.yml
  5. 24 0
      .github/workflows/compilation_on_sgx.yml
  6. 14 0
      .github/workflows/compilation_on_windows.yml
  7. 7 0
      .github/workflows/spec_test_on_nuttx.yml
  8. 4 0
      .gitignore
  9. 17 11
      CMakeLists.txt
  10. 22 5
      build-scripts/build_llvm.py
  11. 15 0
      build-scripts/config_common.cmake
  12. 16 0
      build-scripts/runtime_lib.cmake
  13. 8 0
      core/app-mgr/app-manager/module_wasm_app.c
  14. 11 0
      core/config.h
  15. 132 62
      core/iwasm/aot/aot_runtime.c
  16. 2 22
      core/iwasm/common/wasm_application.c
  17. 217 207
      core/iwasm/common/wasm_c_api.c
  18. 10 3
      core/iwasm/common/wasm_exec_env.c
  19. 7 0
      core/iwasm/common/wasm_exec_env.h
  20. 62 2
      core/iwasm/common/wasm_memory.c
  21. 11 0
      core/iwasm/common/wasm_memory.h
  22. 37 4
      core/iwasm/common/wasm_native.c
  23. 90 14
      core/iwasm/common/wasm_runtime_common.c
  24. 13 21
      core/iwasm/common/wasm_runtime_common.h
  25. 85 39
      core/iwasm/common/wasm_shared_memory.c
  26. 2 0
      core/iwasm/common/wasm_shared_memory.h
  27. 2 0
      core/iwasm/compilation/aot_compiler.c
  28. 16 0
      core/iwasm/compilation/aot_emit_function.c
  29. 19 1
      core/iwasm/compilation/aot_emit_memory.c
  30. 4 0
      core/iwasm/compilation/aot_emit_memory.h
  31. 1 1
      core/iwasm/fast-jit/jit_frontend.c
  32. 2 0
      core/iwasm/fast-jit/jit_ir.c
  33. 4 0
      core/iwasm/fast-jit/jit_ir.h
  34. 11 3
      core/iwasm/fast-jit/jit_regalloc.c
  35. 4 0
      core/iwasm/include/wasm_c_api.h
  36. 41 0
      core/iwasm/include/wasm_export.h
  37. 2 2
      core/iwasm/interpreter/wasm.h
  38. 100 73
      core/iwasm/interpreter/wasm_interp_classic.c
  39. 93 62
      core/iwasm/interpreter/wasm_interp_fast.c
  40. 28 10
      core/iwasm/interpreter/wasm_loader.c
  41. 8 4
      core/iwasm/interpreter/wasm_mini_loader.c
  42. 9 11
      core/iwasm/interpreter/wasm_opcode.h
  43. 144 124
      core/iwasm/interpreter/wasm_runtime.c
  44. 14 9
      core/iwasm/interpreter/wasm_runtime.h
  45. 44 11
      core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c
  46. 27 0
      core/iwasm/libraries/lib-socket/test/build.sh
  47. 49 0
      core/iwasm/libraries/lib-socket/test/nslookup.c
  48. 193 0
      core/iwasm/libraries/lib-socket/test/tcp_udp.c
  49. 12 0
      core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads.cmake
  50. 181 0
      core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c
  51. 30 0
      core/iwasm/libraries/lib-wasi-threads/test/build.sh
  52. 122 0
      core/iwasm/libraries/lib-wasi-threads/test/common.h
  53. 128 0
      core/iwasm/libraries/lib-wasi-threads/test/create_threads_until_limit.c
  54. 70 0
      core/iwasm/libraries/lib-wasi-threads/test/global_atomic.c
  55. 78 0
      core/iwasm/libraries/lib-wasi-threads/test/global_lock.c
  56. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_busy.c
  57. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_busy.json
  58. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_sleep.c
  59. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_sleep.json
  60. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_wait.c
  61. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_wait.json
  62. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/main_trap_busy.c
  63. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/main_trap_busy.json
  64. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/main_trap_sleep.c
  65. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/main_trap_sleep.json
  66. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/main_trap_wait.c
  67. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/main_trap_wait.json
  68. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_busy.c
  69. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_busy.json
  70. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_sleep.c
  71. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_sleep.json
  72. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_wait.c
  73. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_wait.json
  74. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_busy.c
  75. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_busy.json
  76. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_sleep.c
  77. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_sleep.json
  78. 16 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_wait.c
  79. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_wait.json
  80. 72 0
      core/iwasm/libraries/lib-wasi-threads/test/spawn_multiple_times.c
  81. 86 0
      core/iwasm/libraries/lib-wasi-threads/test/update_shared_data_and_alloc_heap.c
  82. 80 0
      core/iwasm/libraries/lib-wasi-threads/tid_allocator.c
  83. 36 0
      core/iwasm/libraries/lib-wasi-threads/tid_allocator.h
  84. 0 4
      core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c
  85. 120 20
      core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c
  86. 15 2
      core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c
  87. 11 1
      core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h
  88. 210 47
      core/iwasm/libraries/thread-mgr/thread_manager.c
  89. 8 3
      core/iwasm/libraries/thread-mgr/thread_manager.h
  90. 0 9
      core/iwasm/libraries/wasi-nn/README.md
  91. 36 30
      core/iwasm/libraries/wasi-nn/src/utils/logger.h
  92. 21 17
      core/iwasm/libraries/wasi-nn/src/wasi_nn.c
  93. 1 0
      core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h
  94. 213 67
      core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
  95. 13 9
      core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp
  96. 0 5
      core/iwasm/libraries/wasi-nn/test/CMakeLists.txt
  97. 0 22
      core/iwasm/libraries/wasi-nn/test/Dockerfile.base
  98. 21 2
      core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu
  99. 21 1
      core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu
  100. 3 2
      core/iwasm/libraries/wasi-nn/test/build.sh

+ 1 - 1
.github/workflows/build_iwasm_release.yml

@@ -36,7 +36,7 @@ jobs:
       - name: generate iwasm binary release
         run: |
           cmake -S . -B build \
-            -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_JIT=0 \
+            -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_FAST_JIT=1 -DWAMR_BUILD_JIT=1 \
             -DWAMR_BUILD_CUSTOM_NAME_SECTION=0 \
             -DWAMR_BUILD_DEBUG_INTERP=0 \
             -DWAMR_BUILD_DEBUG_AOT=0 \

+ 106 - 9
.github/workflows/compilation_on_android_ubuntu.yml

@@ -53,13 +53,14 @@ env:
   FAST_JIT_BUILD_OPTIONS: "      -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_FAST_JIT=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_LAZY_JIT=0"
   LLVM_LAZY_JIT_BUILD_OPTIONS: " -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_FAST_JIT=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=1"
   LLVM_EAGER_JIT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_FAST_JIT=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=0"
+  MULTI_TIER_JIT_BUILD_OPTIONS: "-DWAMR_BUILD_AOT=1 -DWAMR_BUILD_FAST_INTERP=0 -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_FAST_JIT=1 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=1"
   # For Spec Test
   DEFAULT_TEST_OPTIONS: "-s spec -b -P"
   MULTI_MODULES_TEST_OPTIONS: "-s spec -b -M -P"
   SIMD_TEST_OPTIONS: "-s spec -b -S -P"
   THREADS_TEST_OPTIONS: "-s spec -b -p -P"
   X86_32_TARGET_TEST_OPTIONS: "-m x86_32 -P"
-  WASI_TEST_OPTIONS: "-s wasi_certification"
+  WASI_TEST_OPTIONS: "-s wasi_certification -w"
 
 jobs:
   build_llvm_libraries_on_ubuntu_2004:
@@ -128,6 +129,7 @@ jobs:
             $FAST_JIT_BUILD_OPTIONS,
             $LLVM_LAZY_JIT_BUILD_OPTIONS,
             $LLVM_EAGER_JIT_BUILD_OPTIONS,
+            $MULTI_TIER_JIT_BUILD_OPTIONS,
           ]
         make_options_feature: [
             # Features
@@ -160,6 +162,8 @@ jobs:
             make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1"
           - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1"
+          - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MULTI_MODULE=1"
           # SIMD only on JIT/AOT mode
           - make_options_run_mode: $CLASSIC_INTERP_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_SIMD=1"
@@ -176,6 +180,8 @@ jobs:
             make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1"
           - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1"
+          - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_DEBUG_INTERP=1"
           # DEBUG_AOT only on JIT/AOT mode
           - make_options_run_mode: $CLASSIC_INTERP_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1"
@@ -188,6 +194,8 @@ jobs:
             make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1"
           - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1"
+          - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_DEBUG_AOT=1"
           # MINI_LOADER only on INTERP mode
           - make_options_run_mode: $AOT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1"
@@ -197,9 +205,13 @@ jobs:
             make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1"
           - make_options_run_mode: $LLVM_EAGER_JIT_BUILD_OPTIONS
             make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1"
-          # Fast-JIT mode doesn't support android(X86-32)
+          - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
+            make_options_feature: "-DWAMR_BUILD_MINI_LOADER=1"
+          # Fast-JIT and Multi-Tier-JIT mode don't support android(X86-32)
           - make_options_run_mode: $FAST_JIT_BUILD_OPTIONS
             platform: android
+          - make_options_run_mode: $MULTI_TIER_JIT_BUILD_OPTIONS
+            platform: android
           # only test andorid on ubuntu latest
           - os: ubuntu-20.04
             platform: android
@@ -256,6 +268,7 @@ jobs:
             $FAST_JIT_BUILD_OPTIONS,
             $LLVM_LAZY_JIT_BUILD_OPTIONS,
             $LLVM_EAGER_JIT_BUILD_OPTIONS,
+            $MULTI_TIER_JIT_BUILD_OPTIONS,
           ]
         os: [ubuntu-20.04, ubuntu-22.04]
         wasi_sdk_release:
@@ -345,6 +358,22 @@ jobs:
           sudo tar -xzf wabt-1.0.31-*.tar.gz
           sudo mv wabt-1.0.31 wabt
 
+      - name: build wasi-libc (needed for wasi-threads)
+        run: |
+          mkdir wasi-libc
+          cd wasi-libc
+          git init
+          # "Rename thread_spawn import" commit on main branch
+          git fetch https://github.com/WebAssembly/wasi-libc \
+            8f5275796a82f8ecfd0833a4f3f444fa37ed4546
+          git checkout FETCH_HEAD
+          make -j \
+            AR=/opt/wasi-sdk/bin/llvm-ar \
+            NM=/opt/wasi-sdk/bin/llvm-nm \
+            CC=/opt/wasi-sdk/bin/clang \
+            THREAD_MODEL=posix
+        working-directory: core/deps
+
       - name: Build Sample [basic]
         run: |
           cd samples/basic
@@ -398,13 +427,28 @@ jobs:
           exit $?
         working-directory: ./samples/simple
 
+      - name: Build Sample [wasi-threads]
+        run: |
+          cd samples/wasi-threads
+          mkdir build && cd build
+          cmake -DWASI_SYSROOT=`pwd`/../../../core/deps/wasi-libc/sysroot ..
+          cmake --build . --config Release --parallel 4
+          ./iwasm wasm-apps/no_pthread.wasm
+
   test:
     needs: [build_iwasm, build_llvm_libraries_on_ubuntu_2004, build_wamrc]
     runs-on: ubuntu-20.04
     strategy:
       matrix:
         running_mode:
-          ["classic-interp", "fast-interp", "jit", "aot", "fast-jit"]
+          [
+            "classic-interp",
+            "fast-interp",
+            "jit",
+            "aot",
+            "fast-jit",
+            "multi-tier-jit",
+          ]
         test_option:
           [
             $DEFAULT_TEST_OPTIONS,
@@ -413,6 +457,10 @@ jobs:
             $THREADS_TEST_OPTIONS,
             $WASI_TEST_OPTIONS,
           ]
+        wasi_sdk_release:
+          [
+            "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-19/wasi-sdk-19.0-linux.tar.gz",
+          ]
         llvm_cache_key:
           ["${{ needs.build_llvm_libraries_on_ubuntu_2004.outputs.cache_key }}"]
         exclude:
@@ -425,31 +473,64 @@ jobs:
           # aot and jit don't support multi module
           - running_mode: "aot"
             test_option: $MULTI_MODULES_TEST_OPTIONS
-          # aot is WAMR-specific while wasi-testsuite is generic
-          - running_mode: "aot"
-            test_option: $WASI_TEST_OPTIONS
           - running_mode: "jit"
             test_option: $MULTI_MODULES_TEST_OPTIONS
-          # fast-jit is only tested on default mode, exclude other three
+          # fast-jit doesn't support multi module, simd, and threads
           - running_mode: "fast-jit"
             test_option: $MULTI_MODULES_TEST_OPTIONS
           - running_mode: "fast-jit"
             test_option: $SIMD_TEST_OPTIONS
           - running_mode: "fast-jit"
             test_option: $THREADS_TEST_OPTIONS
+          - running_mode: "fast-jit"
+            test_option: $WASI_TEST_OPTIONS
+          # multi-tier-jit doesn't support multi module, simd, and threads
+          - running_mode: "multi-tier-jit"
+            test_option: $MULTI_MODULES_TEST_OPTIONS
+          - running_mode: "multi-tier-jit"
+            test_option: $SIMD_TEST_OPTIONS
+          - running_mode: "multi-tier-jit"
+            test_option: $THREADS_TEST_OPTIONS
+          - running_mode: "multi-tier-jit"
+            test_option: $WASI_TEST_OPTIONS
     steps:
       - name: checkout
         uses: actions/checkout@v3
 
+      - name: download and install wasi-sdk
+        if: matrix.test_option == '$WASI_TEST_OPTIONS'
+        run: |
+          cd /opt
+          sudo wget ${{ matrix.wasi_sdk_release }}
+          sudo tar -xzf wasi-sdk-*.tar.gz
+          sudo mv wasi-sdk-19.0 wasi-sdk
+
+      - name: build wasi-libc (needed for wasi-threads)
+        if: matrix.test_option == '$WASI_TEST_OPTIONS'
+        run: |
+          mkdir wasi-libc
+          cd wasi-libc
+          git init
+          # "Rename thread_spawn import" commit on main branch
+          git fetch https://github.com/WebAssembly/wasi-libc \
+            8f5275796a82f8ecfd0833a4f3f444fa37ed4546
+          git checkout FETCH_HEAD
+          make \
+            AR=/opt/wasi-sdk/bin/llvm-ar \
+            NM=/opt/wasi-sdk/bin/llvm-nm \
+            CC=/opt/wasi-sdk/bin/clang \
+            THREAD_MODEL=posix
+        working-directory: core/deps
+
       - name: set env variable(if llvm are used)
-        if: matrix.running_mode == 'aot' || matrix.running_mode == 'jit'
+        if: matrix.running_mode == 'aot' || matrix.running_mode == 'jit' || matrix.running_mode == 'multi-tier-jit'
         run: echo "USE_LLVM=true" >> $GITHUB_ENV
 
       - name: set env variable(if x86_32 test needed)
         if: >
           (matrix.test_option == '$DEFAULT_TEST_OPTIONS' || matrix.test_option == '$THREADS_TEST_OPTIONS'
            || matrix.test_option == '$WASI_TEST_OPTIONS')
-          && matrix.running_mode != 'fast-jit' && matrix.running_mode != 'jit'
+          && matrix.running_mode != 'fast-jit' && matrix.running_mode != 'jit' && matrix.running_mode != 'multi-tier-jit'
         run: echo "TEST_ON_X86_32=true" >> $GITHUB_ENV
 
       #only download llvm libraries in jit and aot mode
@@ -470,7 +551,22 @@ jobs:
         if: env.USE_LLVM == 'true' && steps.retrieve_llvm_libs.outputs.cache-hit != 'true'
         run: echo "::error::can not get prebuilt llvm libraries" && exit 1
 
+      - name: install jq JSON processor
+        if: matrix.running_mode == 'aot' && matrix.test_option == '$WASI_TEST_OPTIONS'
+        run: sudo apt-get update && sudo apt install -y jq
+
+      - name: Build WASI thread tests
+        if: matrix.test_option == '$WASI_TEST_OPTIONS'
+        run: WASI_SYSROOT=../../../../../core/deps/wasi-libc/sysroot bash build.sh
+        working-directory: ./core/iwasm/libraries/lib-wasi-threads/test/
+
+      - name: build socket api tests
+        if: matrix.test_option == '$WASI_TEST_OPTIONS'
+        run: WASI_SYSROOT=../../../../../core/deps/wasi-libc/sysroot bash build.sh
+        working-directory: ./core/iwasm/libraries/lib-socket/test/
+
       - name: run tests
+        timeout-minutes: 10
         run: ./test_wamr.sh ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites
 
@@ -486,6 +582,7 @@ jobs:
           sudo apt install -y g++-multilib lib32gcc-9-dev
 
       - name: run tests x86_32
+        timeout-minutes: 10
         if: env.TEST_ON_X86_32 == 'true'
         run: ./test_wamr.sh ${{ env.X86_32_TARGET_TEST_OPTIONS }} ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites

+ 24 - 0
.github/workflows/compilation_on_macos.yml

@@ -273,6 +273,22 @@ jobs:
           sudo tar -xzf wabt-1.0.31-*.tar.gz
           sudo mv wabt-1.0.31 wabt
 
+      - name: build wasi-libc (needed for wasi-threads)
+        run: |
+          mkdir wasi-libc
+          cd wasi-libc
+          git init
+          # "Rename thread_spawn import" commit on main branch
+          git fetch https://github.com/WebAssembly/wasi-libc \
+            8f5275796a82f8ecfd0833a4f3f444fa37ed4546
+          git checkout FETCH_HEAD
+          make \
+            AR=/opt/wasi-sdk/bin/llvm-ar \
+            NM=/opt/wasi-sdk/bin/llvm-nm \
+            CC=/opt/wasi-sdk/bin/clang \
+            THREAD_MODEL=posix
+        working-directory: core/deps
+
       - name: Build Sample [basic]
         run: |
           cd samples/basic
@@ -318,3 +334,11 @@ jobs:
           cmake ..
           cmake --build . --config Release --parallel 4
           ./hello
+
+      - name: Build Sample [wasi-threads]
+        run: |
+          cd samples/wasi-threads
+          mkdir build && cd build
+          cmake -DWASI_SYSROOT=`pwd`/../../../core/deps/wasi-libc/sysroot ..
+          cmake --build . --config Release --parallel 4
+          ./iwasm wasm-apps/no_pthread.wasm

+ 9 - 0
.github/workflows/compilation_on_nuttx.yml

@@ -45,6 +45,9 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
+env:
+  WASI_SDK_PATH: "/opt/wasi-sdk"
+
 jobs:
   build_iwasm_on_nuttx:
     runs-on: ubuntu-22.04
@@ -95,6 +98,12 @@ jobs:
           tar xvf riscv.tar.gz
           echo "$PWD/riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14/bin" >> $GITHUB_PATH
 
+      - name: Install WASI-SDK
+        run: |
+          curl -L https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-19/wasi-sdk-19.0-linux.tar.gz > wasi-sdk.tar.gz
+          tar xvf wasi-sdk.tar.gz
+          sudo mv wasi-sdk-* /opt/wasi-sdk
+
       - name: Checkout NuttX
         uses: actions/checkout@v3
         with:

+ 24 - 0
.github/workflows/compilation_on_sgx.yml

@@ -260,6 +260,22 @@ jobs:
           sudo tar -xzf wabt-1.0.31-*.tar.gz
           sudo mv wabt-1.0.31 wabt
 
+      - name: build wasi-libc (needed for wasi-threads)
+        run: |
+          mkdir wasi-libc
+          cd wasi-libc
+          git init
+          # "Rename thread_spawn import" commit on main branch
+          git fetch https://github.com/WebAssembly/wasi-libc \
+            8f5275796a82f8ecfd0833a4f3f444fa37ed4546
+          git checkout FETCH_HEAD
+          make \
+            AR=/opt/wasi-sdk/bin/llvm-ar \
+            NM=/opt/wasi-sdk/bin/llvm-nm \
+            CC=/opt/wasi-sdk/bin/clang \
+            THREAD_MODEL=posix
+        working-directory: core/deps
+
       - name: install SGX SDK and necessary libraries
         run: |
           mkdir -p /opt/intel
@@ -319,6 +335,14 @@ jobs:
           cmake --build . --config Release --parallel 4
           ./hello
 
+      - name: Build Sample [wasi-threads]
+        run: |
+          cd samples/wasi-threads
+          mkdir build && cd build
+          cmake -DWASI_SYSROOT=`pwd`/../../../core/deps/wasi-libc/sysroot ..
+          cmake --build . --config Release --parallel 4
+          ./iwasm wasm-apps/no_pthread.wasm
+
   spec_test_default:
     needs: [build_iwasm, build_llvm_libraries, build_wamrc]
     runs-on: ubuntu-20.04

+ 14 - 0
.github/workflows/compilation_on_windows.yml

@@ -118,3 +118,17 @@ jobs:
           cmake .. -DWAMR_BUILD_DEBUG_INTERP=1
           cmake --build . --config Release --parallel 4
           cd .. && rm -force -r build
+      - name: Build iwasm [lib pthread]
+        run: |
+          cd product-mini/platforms/windows
+          mkdir build && cd build
+          cmake .. -DWAMR_BUILD_LIB_PTHREAD=1
+          cmake --build . --config Release --parallel 4
+          cd .. && rm -force -r build
+      - name: Build iwasm [lib wasi-thread]
+        run: |
+          cd product-mini/platforms/windows
+          mkdir build && cd build
+          cmake .. -DWAMR_BUILD_LIB_WASI_THREADS=1
+          cmake --build . --config Release --parallel 4
+          cd .. && rm -force -r build

+ 7 - 0
.github/workflows/spec_test_on_nuttx.yml

@@ -11,6 +11,7 @@ on:
 
 env:
   LLVM_CACHE_SUFFIX: "build-llvm_libraries_ex"
+  WASI_SDK_PATH: "/opt/wasi-sdk"
 
 jobs:
   build_llvm_libraries:
@@ -55,6 +56,12 @@ jobs:
           tar xvf riscv.tar.gz
           echo "$PWD/riscv64-unknown-elf-toolchain-10.2.0-2020.12.8-x86_64-linux-ubuntu14/bin" >> $GITHUB_PATH
 
+      - name: Install WASI-SDK
+        run: |
+          curl -L https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-19/wasi-sdk-19.0-linux.tar.gz > wasi-sdk.tar.gz
+          tar xvf wasi-sdk.tar.gz
+          sudo mv wasi-sdk-* /opt/wasi-sdk
+
       - name: Checkout NuttX
         uses: actions/checkout@v3
         with:

+ 4 - 0
.gitignore

@@ -13,6 +13,8 @@
 core/deps/**
 core/shared/mem-alloc/tlsf
 core/app-framework/wgl
+core/iwasm/libraries/lib-wasi-threads/test/*.wasm
+core/iwasm/libraries/lib-socket/test/*.wasm
 
 wamr-sdk/out/
 wamr-sdk/runtime/build_runtime_sdk/
@@ -35,3 +37,5 @@ tests/benchmarks/coremark/coremark*
 
 samples/workload/include/**
 !samples/workload/include/.gitkeep
+
+# core/iwasm/libraries/wasi-threads

+ 17 - 11
CMakeLists.txt

@@ -7,7 +7,9 @@ project (iwasm)
 
 set (CMAKE_VERBOSE_MAKEFILE OFF)
 
-string (TOLOWER ${CMAKE_HOST_SYSTEM_NAME} WAMR_BUILD_PLATFORM)
+if (NOT DEFINED WAMR_BUILD_PLATFORM)
+  string (TOLOWER ${CMAKE_HOST_SYSTEM_NAME} WAMR_BUILD_PLATFORM)
+endif ()
 
 # Reset default linker flags
 set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
@@ -83,6 +85,11 @@ if (NOT DEFINED WAMR_BUILD_LIB_PTHREAD)
   set (WAMR_BUILD_LIB_PTHREAD 0)
 endif ()
 
+if (NOT DEFINED WAMR_BUILD_LIB_WASI_THREADS)
+  # Disable wasi threads library by default
+  set (WAMR_BUILD_LIB_WASI_THREADS 0)
+endif ()
+
 if (NOT DEFINED WAMR_BUILD_MINI_LOADER)
   # Disable wasm mini loader by default
   set (WAMR_BUILD_MINI_LOADER 0)
@@ -98,11 +105,6 @@ if (NOT DEFINED WAMR_BUILD_REF_TYPES)
   set (WAMR_BUILD_REF_TYPES 0)
 endif ()
 
-if (COLLECT_CODE_COVERAGE EQUAL 1)
-  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
-  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
-endif ()
-
 set (WAMR_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake)
@@ -130,23 +132,26 @@ include (${SHARED_DIR}/utils/uncommon/shared_uncommon.cmake)
 
 # STATIC LIBRARY
 add_library(iwasm_static STATIC ${WAMR_RUNTIME_LIB_SOURCE})
+set_target_properties (iwasm_static PROPERTIES OUTPUT_NAME vmlib)
+target_include_directories(iwasm_static INTERFACE ${WAMR_ROOT_DIR}/core/iwasm/include)
+target_link_libraries (iwasm_static INTERFACE ${LLVM_AVAILABLE_LIBS} ${UV_A_LIBS} -lm -ldl -lpthread)
 if (WAMR_BUILD_WASM_CACHE EQUAL 1)
-  target_link_libraries(iwasm_static PUBLIC boringssl_crypto)
+  target_link_libraries(iwasm_static INTERFACE boringssl_crypto)
 endif ()
-set_target_properties (iwasm_static PROPERTIES OUTPUT_NAME vmlib)
 
 install (TARGETS iwasm_static ARCHIVE DESTINATION lib)
 
 # SHARED LIBRARY
 add_library (iwasm_shared SHARED ${WAMR_RUNTIME_LIB_SOURCE})
 set_target_properties (iwasm_shared PROPERTIES OUTPUT_NAME iwasm)
-target_link_libraries (iwasm_shared ${LLVM_AVAILABLE_LIBS} ${UV_A_LIBS} -lm -ldl -lpthread)
+target_include_directories(iwasm_shared INTERFACE ${WAMR_ROOT_DIR}/core/iwasm/include)
+target_link_libraries (iwasm_shared INTERFACE ${LLVM_AVAILABLE_LIBS} ${UV_A_LIBS} -lm -ldl -lpthread)
 if (WAMR_BUILD_WASM_CACHE EQUAL 1)
-  target_link_libraries(iwasm_shared boringssl_crypto)
+  target_link_libraries(iwasm_shared INTERFACE boringssl_crypto)
 endif ()
 
 if (MINGW)
-target_link_libraries (iwasm_shared -lWs2_32)
+  target_link_libraries (iwasm_shared -lWs2_32)
 endif ()
 
 install (TARGETS iwasm_shared LIBRARY DESTINATION lib)
@@ -155,4 +160,5 @@ install (TARGETS iwasm_shared LIBRARY DESTINATION lib)
 install (FILES
     ${WAMR_ROOT_DIR}/core/iwasm/include/wasm_c_api.h
     ${WAMR_ROOT_DIR}/core/iwasm/include/wasm_export.h
+    ${WAMR_ROOT_DIR}/core/iwasm/include/lib_export.h
     DESTINATION include)

+ 22 - 5
build-scripts/build_llvm.py

@@ -23,6 +23,7 @@ def clone_llvm(dst_dir, llvm_repo, llvm_branch):
 
     if not llvm_dir.exists():
         GIT_CLONE_CMD = f"git clone --depth 1 --branch {llvm_branch} {llvm_repo} llvm"
+        print(GIT_CLONE_CMD)
         subprocess.check_output(shlex.split(GIT_CLONE_CMD), cwd=dst_dir)
 
     return llvm_dir
@@ -47,7 +48,7 @@ def query_llvm_version(llvm_info):
     return response['sha']
 
 
-def build_llvm(llvm_dir, platform, backends, projects, use_clang=False):
+def build_llvm(llvm_dir, platform, backends, projects, use_clang=False, extra_flags=''):
     LLVM_COMPILE_OPTIONS = [
         '-DCMAKE_BUILD_TYPE:STRING="Release"',
         "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
@@ -143,7 +144,7 @@ def build_llvm(llvm_dir, platform, backends, projects, use_clang=False):
         + LLVM_INCLUDE_TOOLS_OPTION
     )
 
-    CONFIG_CMD = f"cmake {compile_options} ../llvm"
+    CONFIG_CMD = f"cmake {compile_options} {extra_flags} ../llvm"
     if "windows" == platform:
         if "mingw" in sysconfig.get_platform().lower():
             CONFIG_CMD += " -G'Unix Makefiles'"
@@ -227,6 +228,12 @@ def main():
         action="store_true",
         help="use clang instead of gcc",
     )
+    parser.add_argument(
+        "--extra-cmake-flags",
+        type=str,
+        default="",
+        help="custom extra cmake flags",
+    )
     options = parser.parse_args()
 
     # if the "platform" is not identified in the command line option,
@@ -244,14 +251,17 @@ def main():
     llvm_repo_and_branch = {
         "arc": {
             "repo": "https://github.com/llvm/llvm-project.git",
+            "repo_ssh": "git@github.com:llvm/llvm-project.git",
             "branch": "release/15.x",
         },
         "xtensa": {
-            "repo": "https://github.com/espressif/llvm-project.git",
+            "repo": "https://github.com/espressif/llvm-project.git", 
+            "repo_ssh": "git@github.com:espressif/llvm-project.git",
             "branch": "xtensa_release_15.x",
         },
         "default": {
             "repo": "https://github.com/llvm/llvm-project.git",
+            "repo_ssh": "git@github.com:llvm/llvm-project.git",
             "branch": "release/15.x",
         },
     }
@@ -272,10 +282,17 @@ def main():
             print(commit_hash)
             return commit_hash is not None
         
-        llvm_dir = clone_llvm(deps_dir, llvm_info["repo"], llvm_info["branch"])
+        repo_addr = llvm_info["repo"]
+        if os.environ.get('USE_GIT_SSH') == "true":
+            repo_addr = llvm_info["repo_ssh"]
+        else:
+            print("To use ssh for git clone, run: export USE_GIT_SSH=true")
+        
+        llvm_dir = clone_llvm(deps_dir, repo_addr, llvm_info["branch"])
         if (
             build_llvm(
-                llvm_dir, platform, options.arch, options.project, options.use_clang
+                llvm_dir, platform, options.arch, options.project, options.use_clang,
+                options.extra_cmake_flags
             )
             is not None
         ):

+ 15 - 0
build-scripts/config_common.cmake

@@ -337,6 +337,11 @@ if (WAMR_BUILD_SGX_IPFS EQUAL 1)
 endif ()
 if (WAMR_BUILD_WASI_NN EQUAL 1)
   message ("     WASI-NN enabled")
+  add_definitions (-DWASM_ENABLE_WASI_NN=1)
+  if (WASI_NN_ENABLE_GPU EQUAL 1)
+      message ("     WASI-NN: GPU enabled")
+      add_definitions (-DWASI_NN_ENABLE_GPU=1)
+  endif ()
 endif ()
 if (WAMR_BUILD_ALLOC_WITH_USER_DATA EQUAL 1)
   add_definitions(-DWASM_MEM_ALLOC_WITH_USER_DATA=1)
@@ -345,3 +350,13 @@ if (WAMR_BUILD_WASM_CACHE EQUAL 1)
   add_definitions (-DWASM_ENABLE_WASM_CACHE=1)
   message ("     Wasm files cache enabled")
 endif ()
+if (WAMR_BUILD_GC_HEAP_VERIFY EQUAL 1)
+  add_definitions (-DWASM_ENABLE_GC_VERIFY=1)
+  message ("     GC heap verification enabled")
+endif ()
+if ("$ENV{COLLECT_CODE_COVERAGE}" STREQUAL "1" OR COLLECT_CODE_COVERAGE EQUAL 1)
+  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
+  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
+  add_definitions (-DCOLLECT_CODE_COVERAGE)
+  message ("     Collect code coverage enabled")
+endif ()

+ 16 - 0
build-scripts/runtime_lib.cmake

@@ -109,6 +109,13 @@ if (WAMR_BUILD_WASI_NN EQUAL 1)
         message("Tensorflow is already downloaded.")
     endif()
     set(TENSORFLOW_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")
+
+    if (WASI_NN_ENABLE_GPU EQUAL 1)
+        # Tensorflow specific:
+        # * https://www.tensorflow.org/lite/guide/build_cmake#available_options_to_build_tensorflow_lite
+        set (TFLITE_ENABLE_GPU ON)
+    endif ()
+
     include_directories (${CMAKE_CURRENT_BINARY_DIR}/flatbuffers/include)
     include_directories (${TENSORFLOW_SOURCE_DIR})
     add_subdirectory(
@@ -125,6 +132,14 @@ if (WAMR_BUILD_LIB_PTHREAD EQUAL 1)
     set (WAMR_BUILD_SHARED_MEMORY 1)
 endif ()
 
+if (WAMR_BUILD_LIB_WASI_THREADS EQUAL 1)
+    include (${IWASM_DIR}/libraries/lib-wasi-threads/lib_wasi_threads.cmake)
+    # Enable the dependent feature if lib wasi threads is enabled
+    set (WAMR_BUILD_THREAD_MGR 1)
+    set (WAMR_BUILD_BULK_MEMORY 1)
+    set (WAMR_BUILD_SHARED_MEMORY 1)
+endif ()
+
 if (WAMR_BUILD_DEBUG_INTERP EQUAL 1)
     set (WAMR_BUILD_THREAD_MGR 1)
     include (${IWASM_DIR}/libraries/debug-engine/debug_engine.cmake)
@@ -191,6 +206,7 @@ set (source_all
     ${WASM_APP_LIB_SOURCE_ALL}
     ${NATIVE_INTERFACE_SOURCE}
     ${APP_MGR_SOURCE}
+    ${LIB_WASI_THREADS_SOURCE}
     ${LIB_PTHREAD_SOURCE}
     ${THREAD_MGR_SOURCE}
     ${LIBC_EMCC_SOURCE}

+ 8 - 0
core/app-mgr/app-manager/module_wasm_app.c

@@ -993,6 +993,14 @@ wasm_app_module_uninstall(request_t *msg)
 
     app_manager_printf("Uninstall WASM app successful!\n");
 
+#ifdef COLLECT_CODE_COVERAGE
+    /* Exit app manager so as to collect code coverage data */
+    if (!strcmp(m_name, "__exit_app_manager__")) {
+        app_manager_printf("Exit app manager\n");
+        bh_queue_exit_loop_run(get_app_manager_queue());
+    }
+#endif
+
 #if VALGRIND_CHECK != 0
     bh_queue_exit_loop_run(get_app_manager_queue());
 #endif

+ 11 - 0
core/config.h

@@ -161,6 +161,17 @@
 #define WASM_ENABLE_LIB_PTHREAD_SEMAPHORE 0
 #endif
 
+#ifndef WASM_ENABLE_LIB_WASI_THREADS
+#define WASM_ENABLE_LIB_WASI_THREADS 0
+#endif
+
+#ifndef WASM_ENABLE_HEAP_AUX_STACK_ALLOCATION
+#define WASM_ENABLE_HEAP_AUX_STACK_ALLOCATION WASM_ENABLE_LIB_WASI_THREADS
+#elif WASM_ENABLE_HEAP_AUX_STACK_ALLOCATION == 0 \
+    && WASM_ENABLE_LIB_WASI_THREADS == 1
+#error "Heap aux stack allocation must be enabled for WASI threads"
+#endif
+
 #ifndef WASM_ENABLE_BASE_LIB
 #define WASM_ENABLE_BASE_LIB 0
 #endif

+ 132 - 62
core/iwasm/aot/aot_runtime.c

@@ -901,64 +901,149 @@ create_exports(AOTModuleInstance *module_inst, AOTModule *module,
     return create_export_funcs(module_inst, module, error_buf, error_buf_size);
 }
 
-static bool
-execute_post_inst_function(AOTModuleInstance *module_inst)
+static AOTFunctionInstance *
+lookup_post_instantiate_func(AOTModuleInstance *module_inst,
+                             const char *func_name)
 {
-    AOTFunctionInstance *post_inst_func =
-        aot_lookup_function(module_inst, "__post_instantiate", "()");
+    AOTFunctionInstance *func;
+    AOTFuncType *func_type;
 
-    if (!post_inst_func)
+    if (!(func = aot_lookup_function(module_inst, func_name, NULL)))
         /* Not found */
-        return true;
+        return NULL;
 
-    return aot_create_exec_env_and_call_function(module_inst, post_inst_func, 0,
-                                                 NULL);
+    func_type = func->u.func.func_type;
+    if (!(func_type->param_count == 0 && func_type->result_count == 0))
+        /* Not a valid function type, ignore it */
+        return NULL;
+
+    return func;
 }
 
 static bool
-execute_start_function(AOTModuleInstance *module_inst)
+execute_post_instantiate_functions(AOTModuleInstance *module_inst,
+                                   bool is_sub_inst)
 {
     AOTModule *module = (AOTModule *)module_inst->module;
-    WASMExecEnv *exec_env;
-    typedef void (*F)(WASMExecEnv *);
-    union {
-        F f;
-        void *v;
-    } u;
-
-    if (!module->start_function)
+    AOTFunctionInstance *initialize_func = NULL;
+    AOTFunctionInstance *post_inst_func = NULL;
+    AOTFunctionInstance *call_ctors_func = NULL;
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    WASMModuleInstanceCommon *module_inst_main = NULL;
+    WASMExecEnv *exec_env_tls = NULL;
+#endif
+    WASMExecEnv *exec_env = NULL;
+    bool ret = false;
+
+#if WASM_ENABLE_LIBC_WASI != 0
+    /*
+     * WASI reactor instances may assume that _initialize will be called by
+     * the environment at most once, and that none of their other exports
+     * are accessed before that call.
+     */
+    if (!is_sub_inst && module->import_wasi_api) {
+        initialize_func =
+            lookup_post_instantiate_func(module_inst, "_initialize");
+    }
+#endif
+
+    /* Execute possible "__post_instantiate" function if wasm app is
+       compiled by emsdk's early version */
+    if (!is_sub_inst) {
+        post_inst_func =
+            lookup_post_instantiate_func(module_inst, "__post_instantiate");
+    }
+
+#if WASM_ENABLE_BULK_MEMORY != 0
+    /* Only execute the memory init function for main instance since
+       the data segments will be dropped once initialized */
+    if (!is_sub_inst
+#if WASM_ENABLE_LIBC_WASI != 0
+        && !module->import_wasi_api
+#endif
+    ) {
+        call_ctors_func =
+            lookup_post_instantiate_func(module_inst, "__wasm_call_ctors");
+    }
+#endif
+
+    if (!module->start_function && !initialize_func && !post_inst_func
+        && !call_ctors_func) {
+        /* No post instantiation functions to call */
         return true;
+    }
 
-    if (!(exec_env =
-              wasm_exec_env_create((WASMModuleInstanceCommon *)module_inst,
-                                   module_inst->default_wasm_stack_size))) {
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    if (is_sub_inst) {
+        exec_env = exec_env_tls = wasm_runtime_get_exec_env_tls();
+        if (exec_env_tls) {
+            /* Temporarily replace exec_env_tls's module inst to current
+               module inst to avoid checking failure when calling the
+               wasm functions, and ensure that the exec_env's module inst
+               is the correct one. */
+            module_inst_main = exec_env_tls->module_inst;
+            exec_env_tls->module_inst = (WASMModuleInstanceCommon *)module_inst;
+        }
+    }
+#endif
+    if (!exec_env
+        && !(exec_env =
+                 wasm_exec_env_create((WASMModuleInstanceCommon *)module_inst,
+                                      module_inst->default_wasm_stack_size))) {
         aot_set_exception(module_inst, "allocate memory failed");
         return false;
     }
 
-    u.v = module->start_function;
-    u.f(exec_env);
+    /* Execute start function for both main insance and sub instance */
+    if (module->start_function) {
+        AOTFunctionInstance start_func = { 0 };
+        uint32 func_type_idx;
 
-    wasm_exec_env_destroy(exec_env);
-    return !aot_get_exception(module_inst);
-}
+        start_func.func_name = "";
+        start_func.func_index = module->start_func_index;
+        start_func.is_import_func = false;
+        func_type_idx = module->func_type_indexes[module->start_func_index
+                                                  - module->import_func_count];
+        start_func.u.func.func_type = module->func_types[func_type_idx];
+        start_func.u.func.func_ptr = module->start_function;
+        if (!aot_call_function(exec_env, &start_func, 0, NULL)) {
+            goto fail;
+        }
+    }
 
-#if WASM_ENABLE_BULK_MEMORY != 0
-static bool
-execute_memory_init_function(AOTModuleInstance *module_inst)
-{
-    AOTFunctionInstance *memory_init_func =
-        aot_lookup_function(module_inst, "__wasm_call_ctors", "()");
+    if (initialize_func
+        && !aot_call_function(exec_env, initialize_func, 0, NULL)) {
+        goto fail;
+    }
 
-    if (!memory_init_func)
-        /* Not found */
-        return true;
+    if (post_inst_func
+        && !aot_call_function(exec_env, post_inst_func, 0, NULL)) {
+        goto fail;
+    }
 
-    return aot_create_exec_env_and_call_function(module_inst, memory_init_func,
-                                                 0, NULL);
-}
+    if (call_ctors_func
+        && !aot_call_function(exec_env, call_ctors_func, 0, NULL)) {
+        goto fail;
+    }
+
+    ret = true;
+
+fail:
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    if (is_sub_inst && exec_env_tls) {
+        bh_assert(exec_env == exec_env_tls);
+        /* Restore the exec_env_tls's module inst */
+        exec_env_tls->module_inst = module_inst_main;
+    }
+    else
+        wasm_exec_env_destroy(exec_env);
+#else
+    wasm_exec_env_destroy(exec_env);
 #endif
 
+    return ret;
+}
+
 static bool
 check_linked_symbol(AOTModule *module, char *error_buf, uint32 error_buf_size)
 {
@@ -1121,32 +1206,11 @@ aot_instantiate(AOTModule *module, bool is_sub_inst, uint32 stack_size,
     }
 #endif
 
-    /* Execute __post_instantiate function and start function*/
-    if (!execute_post_inst_function(module_inst)
-        || !execute_start_function(module_inst)) {
+    if (!execute_post_instantiate_functions(module_inst, is_sub_inst)) {
         set_error_buf(error_buf, error_buf_size, module_inst->cur_exception);
         goto fail;
     }
 
-#if WASM_ENABLE_BULK_MEMORY != 0
-#if WASM_ENABLE_LIBC_WASI != 0
-    if (!module->import_wasi_api) {
-#endif
-        /* Only execute the memory init function for main instance because
-            the data segments will be dropped once initialized.
-        */
-        if (!is_sub_inst) {
-            if (!execute_memory_init_function(module_inst)) {
-                set_error_buf(error_buf, error_buf_size,
-                              module_inst->cur_exception);
-                goto fail;
-            }
-        }
-#if WASM_ENABLE_LIBC_WASI != 0
-    }
-#endif
-#endif
-
 #if WASM_ENABLE_MEMORY_TRACING != 0
     wasm_runtime_dump_module_inst_mem_consumption(
         (WASMModuleInstanceCommon *)module_inst);
@@ -1162,6 +1226,15 @@ fail:
 void
 aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
 {
+    if (module_inst->exec_env_singleton) {
+        /* wasm_exec_env_destroy will call
+           wasm_cluster_wait_for_all_except_self to wait for other
+           threads, so as to destroy their exec_envs and module
+           instances first, and avoid accessing the shared resources
+           of current module instance after it is deinstantiated. */
+        wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton);
+    }
+
 #if WASM_ENABLE_LIBC_WASI != 0
     /* Destroy wasi resource before freeing app heap, since some fields of
        wasi contex are allocated from app heap, and if app heap is freed,
@@ -1200,9 +1273,6 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
     if (module_inst->func_type_indexes)
         wasm_runtime_free(module_inst->func_type_indexes);
 
-    if (module_inst->exec_env_singleton)
-        wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton);
-
     if (((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports)
         wasm_runtime_free(
             ((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports);

+ 2 - 22
core/iwasm/common/wasm_application.c

@@ -203,22 +203,12 @@ wasm_application_execute_main(WASMModuleInstanceCommon *module_inst, int32 argc,
                               char *argv[])
 {
     bool ret;
-#if WASM_ENABLE_THREAD_MGR != 0
-    WASMCluster *cluster;
-#endif
-#if WASM_ENABLE_THREAD_MGR != 0 || WASM_ENABLE_MEMORY_PROFILING != 0
+#if WASM_ENABLE_MEMORY_PROFILING != 0
     WASMExecEnv *exec_env;
 #endif
 
     ret = execute_main(module_inst, argc, argv);
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
-    if (exec_env && (cluster = wasm_exec_env_get_cluster(exec_env))) {
-        wasm_cluster_wait_for_all_except_self(cluster, exec_env);
-    }
-#endif
-
 #if WASM_ENABLE_MEMORY_PROFILING != 0
     exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
     if (exec_env) {
@@ -622,22 +612,12 @@ wasm_application_execute_func(WASMModuleInstanceCommon *module_inst,
                               const char *name, int32 argc, char *argv[])
 {
     bool ret;
-#if WASM_ENABLE_THREAD_MGR != 0
-    WASMCluster *cluster;
-#endif
-#if WASM_ENABLE_THREAD_MGR != 0 || WASM_ENABLE_MEMORY_PROFILING != 0
+#if WASM_ENABLE_MEMORY_PROFILING != 0
     WASMExecEnv *exec_env;
 #endif
 
     ret = execute_func(module_inst, name, argc, argv);
 
-#if WASM_ENABLE_THREAD_MGR != 0
-    exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
-    if (exec_env && (cluster = wasm_exec_env_get_cluster(exec_env))) {
-        wasm_cluster_wait_for_all_except_self(cluster, exec_env);
-    }
-#endif
-
 #if WASM_ENABLE_MEMORY_PROFILING != 0
     exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
     if (exec_env) {

+ 217 - 207
core/iwasm/common/wasm_c_api.c

@@ -398,7 +398,7 @@ wasm_engine_new_internal(mem_alloc_type_t type, const MemAllocOption *opts)
 }
 
 /* global engine instance */
-static wasm_engine_t *singleton_engine = NULL;
+static wasm_engine_t *singleton_engine;
 #ifdef os_thread_local_attribute
 /* categorize wasm_store_t as threads*/
 static os_thread_local_attribute unsigned thread_local_stores_num = 0;
@@ -1458,6 +1458,30 @@ wasm_importtype_type(const wasm_importtype_t *import_type)
     return import_type->extern_type;
 }
 
+bool
+wasm_importtype_is_linked(const wasm_importtype_t *import_type)
+{
+    if (!import_type)
+        return false;
+
+    const wasm_name_t *module_name = wasm_importtype_module(import_type);
+    const wasm_name_t *field_name = wasm_importtype_name(import_type);
+
+    switch (wasm_externtype_kind(wasm_importtype_type(import_type))) {
+        case WASM_EXTERN_FUNC:
+            return wasm_runtime_is_import_func_linked(module_name->data,
+                                                      field_name->data);
+        case WASM_EXTERN_GLOBAL:
+            return wasm_runtime_is_import_global_linked(module_name->data,
+                                                        field_name->data);
+        case WASM_EXTERN_MEMORY:
+        case WASM_EXTERN_TABLE:
+        default:
+            break;
+    }
+    return false;
+}
+
 own wasm_exporttype_t *
 wasm_exporttype_new(own wasm_byte_vec_t *name,
                     own wasm_externtype_t *extern_type)
@@ -1633,8 +1657,6 @@ wasm_val_to_rt_val(WASMModuleInstanceCommon *inst_comm_rt, uint8 val_type_rt,
             ret =
                 wasm_externref_obj2ref(inst_comm_rt, v->of.ref, (uint32 *)data);
             break;
-#else
-            (void)inst_comm_rt;
 #endif
         default:
             LOG_WARNING("unexpected value type %d", val_type_rt);
@@ -1642,6 +1664,7 @@ wasm_val_to_rt_val(WASMModuleInstanceCommon *inst_comm_rt, uint8 val_type_rt,
             break;
     }
 
+    (void)inst_comm_rt;
     return ret;
 }
 
@@ -2537,12 +2560,12 @@ wasm_module_imports(const wasm_module_t *module, own wasm_importtype_vec_t *out)
 
         bh_assert(extern_type);
 
-        wasm_name_new_from_string(&module_name, module_name_rt);
+        wasm_name_new_from_string_nt(&module_name, module_name_rt);
         if (strlen(module_name_rt) && !module_name.data) {
             goto failed;
         }
 
-        wasm_name_new_from_string(&name, field_name_rt);
+        wasm_name_new_from_string_nt(&name, field_name_rt);
         if (strlen(field_name_rt) && !name.data) {
             goto failed;
         }
@@ -2622,7 +2645,7 @@ wasm_module_exports(const wasm_module_t *module, wasm_exporttype_vec_t *out)
         }
 
         /* byte* -> wasm_byte_vec_t */
-        wasm_name_new_from_string(&name, export->name);
+        wasm_name_new_from_string_nt(&name, export->name);
         if (strlen(export->name) && !name.data) {
             goto failed;
         }
@@ -3008,6 +3031,20 @@ failed:
     return NULL;
 }
 
+static wasm_func_t *
+wasm_func_new_empty(wasm_store_t *store)
+{
+    wasm_func_t *func = NULL;
+
+    if (!(func = malloc_internal(sizeof(wasm_func_t))))
+        goto failed;
+
+    func->store = store;
+    func->kind = WASM_EXTERN_FUNC;
+
+    RETURN_OBJ(func, wasm_func_delete)
+}
+
 void
 wasm_func_delete(wasm_func_t *func)
 {
@@ -3211,7 +3248,8 @@ wasm_func_call(const wasm_func_t *func, const wasm_val_vec_t *params,
         wasm_name_t message = { 0 };
         wasm_trap_t *trap;
 
-        wasm_name_new_from_string(&message, "failed to call unlinked function");
+        wasm_name_new_from_string_nt(&message,
+                                     "failed to call unlinked function");
         trap = wasm_trap_new(func->store, &message);
         wasm_byte_vec_delete(&message);
 
@@ -3371,6 +3409,25 @@ failed:
     return NULL;
 }
 
+static wasm_global_t *
+wasm_global_new_empty(wasm_store_t *store)
+{
+    wasm_global_t *global = NULL;
+
+    global = malloc_internal(sizeof(wasm_global_t));
+    if (!global)
+        goto failed;
+
+    global->store = store;
+    global->kind = WASM_EXTERN_GLOBAL;
+
+    return global;
+failed:
+    LOG_DEBUG("%s failed", __FUNCTION__);
+    wasm_global_delete(global);
+    return NULL;
+}
+
 /* almost same with wasm_global_new */
 wasm_global_t *
 wasm_global_copy(const wasm_global_t *src)
@@ -4205,7 +4262,8 @@ wasm_memory_data_size(const wasm_memory_t *memory)
             (WASMModuleInstance *)module_inst_comm;
         WASMMemoryInstance *memory_inst =
             module_inst->memories[memory->memory_idx_rt];
-        return memory_inst->cur_page_count * memory_inst->num_bytes_per_page;
+        return (size_t)memory_inst->cur_page_count
+               * memory_inst->num_bytes_per_page;
     }
 #endif
 
@@ -4215,7 +4273,8 @@ wasm_memory_data_size(const wasm_memory_t *memory)
         AOTMemoryInstance *memory_inst =
             ((AOTMemoryInstance **)
                  module_inst->memories)[memory->memory_idx_rt];
-        return memory_inst->cur_page_count * memory_inst->num_bytes_per_page;
+        return (size_t)memory_inst->cur_page_count
+               * memory_inst->num_bytes_per_page;
     }
 #endif
 
@@ -4286,6 +4345,11 @@ interp_link_func(const wasm_instance_t *inst, const WASMModule *module_interp,
 
     imported_func_interp = module_interp->import_functions + func_idx_rt;
     bh_assert(imported_func_interp);
+    bh_assert(imported_func_interp->kind == IMPORT_KIND_FUNC);
+
+    /* it is a placeholder and let's skip it*/
+    if (!import->type)
+        return true;
 
     /* type comparison */
     if (!wasm_functype_same_internal(
@@ -4300,6 +4364,8 @@ interp_link_func(const wasm_instance_t *inst, const WASMModule *module_interp,
         imported_func_interp->u.function.func_ptr_linked = import->u.cb_env.cb;
     else
         imported_func_interp->u.function.func_ptr_linked = import->u.cb;
+    bh_assert(imported_func_interp->u.function.func_ptr_linked);
+
     import->func_idx_rt = func_idx_rt;
 
     (void)inst;
@@ -4318,12 +4384,19 @@ interp_link_global(const WASMModule *module_interp, uint16 global_idx_rt,
 
     imported_global_interp = module_interp->import_globals + global_idx_rt;
     bh_assert(imported_global_interp);
+    bh_assert(imported_global_interp->kind == IMPORT_KIND_GLOBAL);
+
+    /* it is a placeholder and let's skip it*/
+    if (!import->type)
+        return true;
 
+    /* type comparison */
     if (!cmp_val_kind_with_val_type(wasm_valtype_kind(import->type->val_type),
                                     imported_global_interp->u.global.type))
         return false;
 
     /* set init value */
+    bh_assert(import->init);
     switch (wasm_valtype_kind(import->type->val_type)) {
         case WASM_I32:
             imported_global_interp->u.global.global_data_linked.i32 =
@@ -4350,58 +4423,6 @@ interp_link_global(const WASMModule *module_interp, uint16 global_idx_rt,
     return true;
 }
 
-static bool
-interp_link(const wasm_instance_t *inst, const WASMModule *module_interp,
-            wasm_extern_t *imports[])
-{
-    uint32 i = 0;
-    uint32 import_func_i = 0;
-    uint32 import_global_i = 0;
-
-    bh_assert(inst && module_interp && imports);
-
-    for (i = 0; i < module_interp->import_count; ++i) {
-        wasm_extern_t *import = imports[i];
-        WASMImport *import_rt = module_interp->imports + i;
-
-        switch (import_rt->kind) {
-            case IMPORT_KIND_FUNC:
-            {
-                if (!interp_link_func(inst, module_interp, import_func_i,
-                                      wasm_extern_as_func(import))) {
-                    LOG_WARNING("link #%d function failed", import_func_i);
-                    goto failed;
-                }
-                import_func_i++;
-                break;
-            }
-            case IMPORT_KIND_GLOBAL:
-            {
-                if (!interp_link_global(module_interp, import_global_i,
-                                        wasm_extern_as_global(import))) {
-                    LOG_WARNING("link #%d global failed", import_global_i);
-                    goto failed;
-                }
-                import_global_i++;
-                break;
-            }
-            case IMPORT_KIND_MEMORY:
-            case IMPORT_KIND_TABLE:
-            default:
-                ASSERT_NOT_IMPLEMENTED();
-                LOG_WARNING("%s meets unsupported kind: %d", __FUNCTION__,
-                            import_rt->kind);
-                goto failed;
-        }
-    }
-
-    return true;
-
-failed:
-    LOG_DEBUG("%s failed", __FUNCTION__);
-    return false;
-}
-
 static bool
 interp_process_export(wasm_store_t *store,
                       const WASMModuleInstance *inst_interp,
@@ -4501,6 +4522,10 @@ aot_link_func(const wasm_instance_t *inst, const AOTModule *module_aot,
     import_aot_func = module_aot->import_funcs + import_func_idx_rt;
     bh_assert(import_aot_func);
 
+    /* it is a placeholder and let's skip it*/
+    if (!import->type)
+        return true;
+
     /* type comparison */
     if (!wasm_functype_same_internal(import->type, import_aot_func->func_type))
         return false;
@@ -4513,6 +4538,8 @@ aot_link_func(const wasm_instance_t *inst, const AOTModule *module_aot,
         import_aot_func->func_ptr_linked = import->u.cb_env.cb;
     else
         import_aot_func->func_ptr_linked = import->u.cb;
+    bh_assert(import_aot_func->func_ptr_linked);
+
     import->func_idx_rt = import_func_idx_rt;
 
     return true;
@@ -4530,6 +4557,10 @@ aot_link_global(const AOTModule *module_aot, uint16 global_idx_rt,
     import_aot_global = module_aot->import_globals + global_idx_rt;
     bh_assert(import_aot_global);
 
+    /* it is a placeholder and let's skip it*/
+    if (!import->type)
+        return true;
+
     val_type = wasm_globaltype_content(import->type);
     bh_assert(val_type);
 
@@ -4537,6 +4568,7 @@ aot_link_global(const AOTModule *module_aot, uint16 global_idx_rt,
                                     import_aot_global->type))
         return false;
 
+    bh_assert(import->init);
     switch (wasm_valtype_kind(val_type)) {
         case WASM_I32:
             import_aot_global->global_data_linked.i32 = import->init->of.i32;
@@ -4557,62 +4589,6 @@ aot_link_global(const AOTModule *module_aot, uint16 global_idx_rt,
     import->global_idx_rt = global_idx_rt;
     import_aot_global->is_linked = true;
     return true;
-
-failed:
-    LOG_DEBUG("%s failed", __FUNCTION__);
-    return false;
-}
-
-static bool
-aot_link(const wasm_instance_t *inst, const AOTModule *module_aot,
-         wasm_extern_t *imports[])
-{
-    uint32 i = 0;
-    uint32 import_func_i = 0;
-    uint32 import_global_i = 0;
-    wasm_extern_t *import = NULL;
-    wasm_func_t *func = NULL;
-    wasm_global_t *global = NULL;
-
-    bh_assert(inst && module_aot && imports);
-
-    while (import_func_i < module_aot->import_func_count
-           || import_global_i < module_aot->import_global_count) {
-        import = imports[i++];
-
-        bh_assert(import);
-
-        switch (wasm_extern_kind(import)) {
-            case WASM_EXTERN_FUNC:
-                bh_assert(import_func_i < module_aot->import_func_count);
-                func = wasm_extern_as_func((wasm_extern_t *)import);
-                if (!aot_link_func(inst, module_aot, import_func_i, func)) {
-                    LOG_WARNING("link #%d function failed", import_func_i);
-                    goto failed;
-                }
-                import_func_i++;
-
-                break;
-            case WASM_EXTERN_GLOBAL:
-                bh_assert(import_global_i < module_aot->import_global_count);
-                global = wasm_extern_as_global((wasm_extern_t *)import);
-                if (!aot_link_global(module_aot, import_global_i, global)) {
-                    LOG_WARNING("link #%d global failed", import_global_i);
-                    goto failed;
-                }
-                import_global_i++;
-
-                break;
-            case WASM_EXTERN_MEMORY:
-            case WASM_EXTERN_TABLE:
-            default:
-                ASSERT_NOT_IMPLEMENTED();
-                goto failed;
-        }
-    }
-
-    return true;
-
 failed:
     LOG_DEBUG("%s failed", __FUNCTION__);
     return false;
@@ -4693,7 +4669,7 @@ aot_process_export(wasm_store_t *store, const AOTModuleInstance *inst_aot,
             goto failed;
         }
 
-        wasm_name_new_from_string(external->name, export->name);
+        wasm_name_new_from_string_nt(external->name, export->name);
         if (strlen(export->name) && !external->name->data) {
             goto failed;
         }
@@ -4711,65 +4687,103 @@ failed:
 }
 #endif /* WASM_ENABLE_AOT */
 
-wasm_instance_t *
-wasm_instance_new(wasm_store_t *store, const wasm_module_t *module,
-                  const wasm_extern_vec_t *imports, own wasm_trap_t **trap)
-{
-    return wasm_instance_new_with_args(store, module, imports, trap,
-                                       KILOBYTE(32), KILOBYTE(32));
-}
-
 static bool
-compare_imports(const wasm_module_t *module, const wasm_extern_vec_t *imports)
+do_link(const wasm_instance_t *inst, const wasm_module_t *module,
+        const wasm_extern_vec_t *imports)
 {
-    unsigned import_func_count = 0;
-    unsigned import_global_count = 0;
-    unsigned import_memory_count = 0;
-    unsigned import_table_count = 0;
-    unsigned i = 0;
+    uint32 i, import_func_i, import_global_i;
 
-    for (i = 0; imports && i < imports->num_elems; i++) {
+    bh_assert(inst && module);
+
+    /* we have run a module_type check before. */
+
+    for (i = 0, import_func_i = 0, import_global_i = 0; i < imports->num_elems;
+         i++) {
         wasm_extern_t *import = imports->data[i];
+
+        if (!import) {
+            LOG_ERROR("imports[%d] is NULL and it is fatal\n", i);
+            goto failed;
+        }
+
         switch (wasm_extern_kind(import)) {
             case WASM_EXTERN_FUNC:
-                import_func_count++;
+            {
+                bool ret = false;
+#if WASM_ENABLE_INTERP != 0
+                if ((*module)->module_type == Wasm_Module_Bytecode) {
+                    ret = interp_link_func(inst, MODULE_INTERP(module),
+                                           import_func_i,
+                                           wasm_extern_as_func(import));
+                }
+#endif
+#if WASM_ENABLE_AOT != 0
+                if ((*module)->module_type == Wasm_Module_AoT) {
+                    ret = aot_link_func(inst, MODULE_AOT(module), import_func_i,
+                                        wasm_extern_as_func(import));
+                }
+#endif
+                if (!ret) {
+                    LOG_WARNING("link function  #%d failed", import_func_i);
+                    goto failed;
+                }
+
+                import_func_i++;
                 break;
+            }
             case WASM_EXTERN_GLOBAL:
-                import_global_count++;
+            {
+                bool ret = false;
+#if WASM_ENABLE_INTERP != 0
+                if ((*module)->module_type == Wasm_Module_Bytecode) {
+                    ret = interp_link_global(MODULE_INTERP(module),
+                                             import_global_i,
+                                             wasm_extern_as_global(import));
+                }
+#endif
+#if WASM_ENABLE_AOT != 0
+                if ((*module)->module_type == Wasm_Module_AoT) {
+                    ret = aot_link_global(MODULE_AOT(module), import_global_i,
+                                          wasm_extern_as_global(import));
+                }
+#endif
+                if (!ret) {
+                    LOG_WARNING("link global #%d failed", import_global_i);
+                    goto failed;
+                }
+
+                import_global_i++;
                 break;
+            }
             case WASM_EXTERN_MEMORY:
-                import_memory_count++;
-                break;
             case WASM_EXTERN_TABLE:
-                import_table_count++;
+            {
+                LOG_WARNING("doesn't support import memories and tables for "
+                            "now, ignore them");
                 break;
+            }
             default:
+            {
                 UNREACHABLE();
-                return false;
+                break;
+            }
         }
     }
 
-#if WASM_ENABLE_INTERP != 0
-    if ((*module)->module_type == Wasm_Module_Bytecode)
-        return import_func_count == MODULE_INTERP(module)->import_function_count
-               && import_global_count
-                      == MODULE_INTERP(module)->import_global_count
-               && import_memory_count
-                      == MODULE_INTERP(module)->import_memory_count
-               && import_table_count
-                      == MODULE_INTERP(module)->import_table_count;
-#endif
-#if WASM_ENABLE_AOT != 0
-    if ((*module)->module_type == Wasm_Module_AoT)
-        return import_func_count == MODULE_AOT(module)->import_func_count
-               && import_global_count == MODULE_AOT(module)->import_global_count
-               && import_memory_count == MODULE_AOT(module)->import_memory_count
-               && import_table_count == MODULE_AOT(module)->import_table_count;
-#endif
-
+    return true;
+failed:
+    LOG_DEBUG("%s failed", __FUNCTION__);
     return false;
 }
 
+wasm_instance_t *
+wasm_instance_new(wasm_store_t *store, const wasm_module_t *module,
+                  const wasm_extern_vec_t *imports, own wasm_trap_t **trap)
+{
+    return wasm_instance_new_with_args(store, module, imports, trap,
+                                       KILOBYTE(32), KILOBYTE(32));
+}
+
 wasm_instance_t *
 wasm_instance_new_with_args(wasm_store_t *store, const wasm_module_t *module,
                             const wasm_extern_vec_t *imports,
@@ -4779,7 +4793,6 @@ wasm_instance_new_with_args(wasm_store_t *store, const wasm_module_t *module,
     char sub_error_buf[128] = { 0 };
     char error_buf[256] = { 0 };
     wasm_instance_t *instance = NULL;
-    WASMModuleInstance *inst_rt;
     CApiFuncImport *func_import = NULL, **p_func_imports = NULL;
     uint32 i = 0, import_func_count = 0;
     uint64 total_size;
@@ -4790,11 +4803,9 @@ wasm_instance_new_with_args(wasm_store_t *store, const wasm_module_t *module,
     if (!module)
         return NULL;
 
-    if (!compare_imports(module, imports)) {
-        snprintf(sub_error_buf, sizeof(sub_error_buf),
-                 "Failed to match imports");
-        goto failed;
-    }
+    /*
+     * will do the check at the end of wasm_runtime_instantiate
+     */
 
     WASM_C_DUMP_PROC_MEM();
 
@@ -4805,43 +4816,17 @@ wasm_instance_new_with_args(wasm_store_t *store, const wasm_module_t *module,
         goto failed;
     }
 
-    /* link module and imports */
-    if (imports && imports->num_elems) {
-        bool link = false;
-#if WASM_ENABLE_INTERP != 0
-        if ((*module)->module_type == Wasm_Module_Bytecode) {
-            if (!interp_link(instance, MODULE_INTERP(module),
-                             (wasm_extern_t **)imports->data)) {
-                snprintf(sub_error_buf, sizeof(sub_error_buf),
-                         "Failed to validate imports");
-                goto failed;
-            }
-            link = true;
-        }
-#endif
-
-#if WASM_ENABLE_AOT != 0
-        if ((*module)->module_type == Wasm_Module_AoT) {
-            if (!aot_link(instance, MODULE_AOT(module),
-                          (wasm_extern_t **)imports->data)) {
-                snprintf(sub_error_buf, sizeof(sub_error_buf),
-                         "Failed to validate imports");
-                goto failed;
-            }
-            link = true;
-        }
-#endif
-
-        /*
-         * a wrong combination of module filetype and compilation flags
-         * also leads to below branch
-         */
-        if (!link) {
+    /* executes the instantiate-time linking if provided */
+    if (imports) {
+        if (!do_link(instance, module, imports)) {
             snprintf(sub_error_buf, sizeof(sub_error_buf),
-                     "Failed to verify import count");
+                     "Failed to validate imports");
             goto failed;
         }
     }
+    /*
+     * will do the linking result check at the end of wasm_runtime_instantiate
+     */
 
     instance->inst_comm_rt = wasm_runtime_instantiate(
         *module, stack_size, heap_size, sub_error_buf, sizeof(sub_error_buf));
@@ -4856,18 +4841,22 @@ wasm_instance_new_with_args(wasm_store_t *store, const wasm_module_t *module,
     }
 
     /* create the c-api func import list */
-    inst_rt = (WASMModuleInstance *)instance->inst_comm_rt;
 #if WASM_ENABLE_INTERP != 0
     if (instance->inst_comm_rt->module_type == Wasm_Module_Bytecode) {
-        p_func_imports = &inst_rt->e->c_api_func_imports;
-        import_func_count = inst_rt->module->import_function_count;
+        WASMModuleInstanceExtra *e =
+            ((WASMModuleInstance *)instance->inst_comm_rt)->e;
+        p_func_imports = &(e->c_api_func_imports);
+        import_func_count = MODULE_INTERP(module)->import_function_count;
     }
 #endif
 #if WASM_ENABLE_AOT != 0
     if (instance->inst_comm_rt->module_type == Wasm_Module_AoT) {
-        p_func_imports =
-            &((AOTModuleInstanceExtra *)inst_rt->e)->c_api_func_imports;
-        import_func_count = ((AOTModule *)inst_rt->module)->import_func_count;
+        AOTModuleInstanceExtra *e =
+            (AOTModuleInstanceExtra *)((AOTModuleInstance *)
+                                           instance->inst_comm_rt)
+                ->e;
+        p_func_imports = &(e->c_api_func_imports);
+        import_func_count = MODULE_AOT(module)->import_func_count;
     }
 #endif
     bh_assert(p_func_imports);
@@ -4880,16 +4869,21 @@ wasm_instance_new_with_args(wasm_store_t *store, const wasm_module_t *module,
         goto failed;
     }
 
-    /* fill in c-api func import list */
+    /* fill in module_inst->e->c_api_func_imports */
     for (i = 0; imports && i < imports->num_elems; i++) {
-        wasm_func_t *func_host;
-        wasm_extern_t *in;
+        wasm_func_t *func_host = NULL;
+        wasm_extern_t *in = imports->data[i];
+        bh_assert(in);
 
-        in = imports->data[i];
         if (wasm_extern_kind(in) != WASM_EXTERN_FUNC)
             continue;
 
         func_host = wasm_extern_as_func(in);
+        /* it is a placeholder and let's skip it*/
+        if (!func_host->type) {
+            func_import++;
+            continue;
+        }
 
         func_import->with_env_arg = func_host->with_env;
         if (func_host->with_env) {
@@ -4900,6 +4894,7 @@ wasm_instance_new_with_args(wasm_store_t *store, const wasm_module_t *module,
             func_import->func_ptr_linked = func_host->u.cb;
             func_import->env_arg = NULL;
         }
+        bh_assert(func_import->func_ptr_linked);
 
         func_import++;
     }
@@ -4907,6 +4902,8 @@ wasm_instance_new_with_args(wasm_store_t *store, const wasm_module_t *module,
     /* fill with inst */
     for (i = 0; imports && imports->data && i < imports->num_elems; ++i) {
         wasm_extern_t *import = imports->data[i];
+        bh_assert(import);
+
         switch (import->kind) {
             case WASM_EXTERN_FUNC:
                 wasm_extern_as_func(import)->inst_comm_rt =
@@ -5002,7 +4999,7 @@ failed:
              sub_error_buf);
     if (trap != NULL) {
         wasm_message_t message = { 0 };
-        wasm_name_new_from_string(&message, error_buf);
+        wasm_name_new_from_string_nt(&message, error_buf);
         *trap = wasm_trap_new(store, &message);
         wasm_byte_vec_delete(&message);
     }
@@ -5202,3 +5199,16 @@ BASIC_FOUR_LIST(WASM_EXTERN_AS_OTHER_CONST)
 
 BASIC_FOUR_LIST(WASM_OTHER_AS_EXTERN_CONST)
 #undef WASM_OTHER_AS_EXTERN_CONST
+
+wasm_extern_t *
+wasm_extern_new_empty(wasm_store_t *store, wasm_externkind_t extern_kind)
+{
+    if (extern_kind == WASM_EXTERN_FUNC)
+        return wasm_func_as_extern(wasm_func_new_empty(store));
+
+    if (extern_kind == WASM_EXTERN_GLOBAL)
+        return wasm_global_as_extern(wasm_global_new_empty(store));
+
+    LOG_ERROR("Don't support linking table and memory for now");
+    return NULL;
+}

+ 10 - 3
core/iwasm/common/wasm_exec_env.c

@@ -172,16 +172,16 @@ void
 wasm_exec_env_destroy(WASMExecEnv *exec_env)
 {
 #if WASM_ENABLE_THREAD_MGR != 0
-    /* Terminate all sub-threads */
+    /* Wait for all sub-threads */
     WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
     if (cluster) {
-        wasm_cluster_terminate_all_except_self(cluster, exec_env);
+        wasm_cluster_wait_for_all_except_self(cluster, exec_env);
 #if WASM_ENABLE_DEBUG_INTERP != 0
         /* Must fire exit event after other threads exits, otherwise
            the stopped thread will be overrided by other threads */
         wasm_cluster_thread_exited(exec_env);
 #endif
-        /* We have terminated other threads, this is the only alive thread, so
+        /* We have waited for other threads, this is the only alive thread, so
          * we don't acquire cluster->lock because the cluster will be destroyed
          * inside this function */
         wasm_cluster_del_exec_env(cluster, exec_env);
@@ -208,10 +208,17 @@ void
 wasm_exec_env_set_thread_info(WASMExecEnv *exec_env)
 {
     uint8 *stack_boundary = os_thread_get_stack_boundary();
+
+#if WASM_ENABLE_THREAD_MGR != 0
+    os_mutex_lock(&exec_env->wait_lock);
+#endif
     exec_env->handle = os_self_thread();
     exec_env->native_stack_boundary =
         stack_boundary ? stack_boundary + WASM_STACK_GUARD_SIZE : NULL;
     exec_env->native_stack_top_min = (void *)UINTPTR_MAX;
+#if WASM_ENABLE_THREAD_MGR != 0
+    os_mutex_unlock(&exec_env->wait_lock);
+#endif
 }
 
 #if WASM_ENABLE_THREAD_MGR != 0

+ 7 - 0
core/iwasm/common/wasm_exec_env.h

@@ -204,6 +204,13 @@ wasm_exec_env_create(struct WASMModuleInstanceCommon *module_inst,
 void
 wasm_exec_env_destroy(WASMExecEnv *exec_env);
 
+static inline bool
+wasm_exec_env_is_aux_stack_managed_by_runtime(WASMExecEnv *exec_env)
+{
+    return exec_env->aux_stack_boundary.boundary != 0
+           || exec_env->aux_stack_bottom.bottom != 0;
+}
+
 /**
  * Allocate a WASM frame from the WASM stack.
  *

+ 62 - 2
core/iwasm/common/wasm_memory.c

@@ -8,6 +8,10 @@
 #include "bh_platform.h"
 #include "mem_alloc.h"
 
+#if WASM_ENABLE_SHARED_MEMORY != 0
+#include "../common/wasm_shared_memory.h"
+#endif
+
 typedef enum Memory_Mode {
     MEMORY_MODE_UNKNOWN = 0,
     MEMORY_MODE_POOL,
@@ -506,7 +510,7 @@ wasm_get_default_memory(WASMModuleInstance *module_inst)
 
 #ifndef OS_ENABLE_HW_BOUND_CHECK
 bool
-wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
+wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count)
 {
     WASMMemoryInstance *memory = wasm_get_default_memory(module);
     uint8 *memory_data_old, *memory_data_new, *heap_data_old;
@@ -624,7 +628,7 @@ wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
 }
 #else
 bool
-wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
+wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count)
 {
     WASMMemoryInstance *memory = wasm_get_default_memory(module);
     uint32 num_bytes_per_page, total_size_old;
@@ -697,3 +701,59 @@ wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
     return true;
 }
 #endif /* end of OS_ENABLE_HW_BOUND_CHECK */
+
+bool
+wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
+{
+    bool ret = false;
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    WASMSharedMemNode *node =
+        wasm_module_get_shared_memory((WASMModuleCommon *)module->module);
+    if (node)
+        os_mutex_lock(&node->shared_mem_lock);
+#endif
+    ret = wasm_enlarge_memory_internal(module, inc_page_count);
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    if (node)
+        os_mutex_unlock(&node->shared_mem_lock);
+#endif
+
+    return ret;
+}
+
+#if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
+    || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
+    || WASM_ENABLE_BULK_MEMORY != 0
+uint32
+wasm_get_num_bytes_per_page(WASMMemoryInstance *memory, void *node)
+{
+    uint32 num_bytes_per_page;
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    if (node)
+        os_mutex_lock(&((WASMSharedMemNode *)node)->shared_mem_lock);
+#endif
+    num_bytes_per_page = memory->num_bytes_per_page;
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    if (node)
+        os_mutex_unlock(&((WASMSharedMemNode *)node)->shared_mem_lock);
+#endif
+    return num_bytes_per_page;
+}
+
+uint32
+wasm_get_linear_memory_size(WASMMemoryInstance *memory, void *node)
+{
+    uint32 linear_mem_size;
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    if (node)
+        os_mutex_lock(&((WASMSharedMemNode *)node)->shared_mem_lock);
+#endif
+    linear_mem_size = memory->num_bytes_per_page * memory->cur_page_count;
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    if (node)
+        os_mutex_unlock(&((WASMSharedMemNode *)node)->shared_mem_lock);
+#endif
+    return linear_mem_size;
+}
+#endif

+ 11 - 0
core/iwasm/common/wasm_memory.h

@@ -8,6 +8,7 @@
 
 #include "bh_common.h"
 #include "../include/wasm_export.h"
+#include "../interpreter/wasm_runtime.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -23,6 +24,16 @@ wasm_runtime_memory_destroy();
 unsigned
 wasm_runtime_memory_pool_size();
 
+#if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
+    || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
+    || WASM_ENABLE_BULK_MEMORY != 0
+uint32
+wasm_get_num_bytes_per_page(WASMMemoryInstance *memory, void *node);
+
+uint32
+wasm_get_linear_memory_size(WASMMemoryInstance *memory, void *node);
+#endif
+
 #ifdef __cplusplus
 }
 #endif

+ 37 - 4
core/iwasm/common/wasm_native.c

@@ -53,6 +53,17 @@ uint32
 get_lib_pthread_export_apis(NativeSymbol **p_lib_pthread_apis);
 #endif
 
+#if WASM_ENABLE_LIB_WASI_THREADS != 0
+bool
+lib_wasi_threads_init(void);
+
+void
+lib_wasi_threads_destroy(void);
+
+uint32
+get_lib_wasi_threads_export_apis(NativeSymbol **p_lib_wasi_threads_apis);
+#endif
+
 uint32
 get_libc_emcc_export_apis(NativeSymbol **p_libc_emcc_apis);
 
@@ -239,6 +250,10 @@ lookup_symbol(NativeSymbol *native_symbols, uint32 n_native_symbols,
     return NULL;
 }
 
+/**
+ * allow func_type and all outputs, like p_signature, p_attachment and
+ * p_call_conv_raw to be NULL
+ */
 void *
 wasm_native_resolve_symbol(const char *module_name, const char *field_name,
                            const WASMType *func_type, const char **p_signature,
@@ -264,10 +279,13 @@ wasm_native_resolve_symbol(const char *module_name, const char *field_name,
         node = node_next;
     }
 
+    if (!p_signature || !p_attachment || !p_call_conv_raw)
+        return func_ptr;
+
     if (func_ptr) {
         if (signature && signature[0] != '\0') {
             /* signature is not empty, check its format */
-            if (!check_symbol_signature(func_type, signature)) {
+            if (!func_type || !check_symbol_signature(func_type, signature)) {
 #if WASM_ENABLE_WAMR_COMPILER == 0
                 /* Output warning except running aot compiler */
                 LOG_WARNING("failed to check signature '%s' and resolve "
@@ -383,7 +401,7 @@ wasm_native_init()
     || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0      \
     || WASM_ENABLE_LIB_RATS != 0 || WASM_ENABLE_WASI_NN != 0        \
     || WASM_ENABLE_APP_FRAMEWORK != 0 || WASM_ENABLE_LIBC_WASI != 0 \
-    || WASM_ENABLE_LIB_PTHREAD != 0
+    || WASM_ENABLE_LIB_PTHREAD != 0 || WASM_ENABLE_LIB_WASI_THREADS != 0
     NativeSymbol *native_symbols;
     uint32 n_native_symbols;
 #endif
@@ -438,6 +456,17 @@ wasm_native_init()
         goto fail;
 #endif
 
+#if WASM_ENABLE_LIB_WASI_THREADS != 0
+    if (!lib_wasi_threads_init())
+        goto fail;
+
+    n_native_symbols = get_lib_wasi_threads_export_apis(&native_symbols);
+    if (n_native_symbols > 0
+        && !wasm_native_register_natives("wasi", native_symbols,
+                                         n_native_symbols))
+        goto fail;
+#endif
+
 #if WASM_ENABLE_LIBC_EMCC != 0
     n_native_symbols = get_libc_emcc_export_apis(&native_symbols);
     if (n_native_symbols > 0
@@ -458,7 +487,7 @@ wasm_native_init()
     n_native_symbols = get_wasi_nn_export_apis(&native_symbols);
     if (!wasm_native_register_natives("wasi_nn", native_symbols,
                                       n_native_symbols))
-        return false;
+        goto fail;
 #endif
 
     return true;
@@ -466,7 +495,7 @@ wasm_native_init()
     || WASM_ENABLE_BASE_LIB != 0 || WASM_ENABLE_LIBC_EMCC != 0      \
     || WASM_ENABLE_LIB_RATS != 0 || WASM_ENABLE_WASI_NN != 0        \
     || WASM_ENABLE_APP_FRAMEWORK != 0 || WASM_ENABLE_LIBC_WASI != 0 \
-    || WASM_ENABLE_LIB_PTHREAD != 0
+    || WASM_ENABLE_LIB_PTHREAD != 0 || WASM_ENABLE_LIB_WASI_THREADS != 0
 fail:
     wasm_native_destroy();
     return false;
@@ -482,6 +511,10 @@ wasm_native_destroy()
     lib_pthread_destroy();
 #endif
 
+#if WASM_ENABLE_LIB_WASI_THREADS != 0
+    lib_wasi_threads_destroy();
+#endif
+
     node = g_native_symbols_list;
     while (node) {
         node_next = node->next;

+ 90 - 14
core/iwasm/common/wasm_runtime_common.c

@@ -7,6 +7,7 @@
 #include "bh_common.h"
 #include "bh_assert.h"
 #include "bh_log.h"
+#include "wasm_native.h"
 #include "wasm_runtime_common.h"
 #include "wasm_memory.h"
 #if WASM_ENABLE_INTERP != 0
@@ -195,7 +196,7 @@ hw_bound_check_sig_handler(void *sig_addr)
         else if (exec_env_tls->exce_check_guard_page <= (uint8 *)sig_addr
                  && (uint8 *)sig_addr
                         < exec_env_tls->exce_check_guard_page + page_size) {
-            bh_assert(wasm_get_exception(module_inst));
+            bh_assert(wasm_copy_exception(module_inst, NULL));
             os_longjmp(jmpbuf_node->jmpbuf, 1);
         }
     }
@@ -282,7 +283,7 @@ runtime_exception_handler(EXCEPTION_POINTERS *exce_info)
             else if (exec_env_tls->exce_check_guard_page <= (uint8 *)sig_addr
                      && (uint8 *)sig_addr
                             < exec_env_tls->exce_check_guard_page + page_size) {
-                bh_assert(wasm_get_exception(module_inst));
+                bh_assert(wasm_copy_exception(module_inst, NULL));
                 if (module_inst->module_type == Wasm_Module_Bytecode) {
                     return EXCEPTION_CONTINUE_SEARCH;
                 }
@@ -1901,18 +1902,21 @@ static bool
 clear_wasi_proc_exit_exception(WASMModuleInstanceCommon *module_inst_comm)
 {
 #if WASM_ENABLE_LIBC_WASI != 0
-    const char *exception;
+    bool has_exception;
+    char exception[EXCEPTION_BUF_LEN];
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module_inst_comm;
 
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
-    exception = wasm_get_exception(module_inst);
-    if (exception && !strcmp(exception, "Exception: wasi proc exit")) {
+    has_exception = wasm_copy_exception(module_inst, exception);
+    if (has_exception && !strcmp(exception, "Exception: wasi proc exit")) {
         /* The "wasi proc exit" exception is thrown by native lib to
            let wasm app exit, which is a normal behavior, we clear
-           the exception here. */
-        wasm_set_exception(module_inst, NULL);
+           the exception here. And just clear the exception of current
+           thread, don't call `wasm_set_exception(module_inst, NULL)`
+           which will clear the exception of all threads. */
+        module_inst->cur_exception[0] = '\0';
         return true;
     }
     return false;
@@ -2335,6 +2339,12 @@ wasm_set_exception(WASMModuleInstance *module_inst, const char *exception)
 {
     WASMExecEnv *exec_env = NULL;
 
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    WASMSharedMemNode *node =
+        wasm_module_get_shared_memory((WASMModuleCommon *)module_inst->module);
+    if (node)
+        os_mutex_lock(&node->shared_mem_lock);
+#endif
     if (exception) {
         snprintf(module_inst->cur_exception, sizeof(module_inst->cur_exception),
                  "Exception: %s", exception);
@@ -2342,6 +2352,10 @@ wasm_set_exception(WASMModuleInstance *module_inst, const char *exception)
     else {
         module_inst->cur_exception[0] = '\0';
     }
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    if (node)
+        os_mutex_unlock(&node->shared_mem_lock);
+#endif
 
 #if WASM_ENABLE_THREAD_MGR != 0
     exec_env =
@@ -2409,6 +2423,36 @@ wasm_get_exception(WASMModuleInstance *module_inst)
         return module_inst->cur_exception;
 }
 
+bool
+wasm_copy_exception(WASMModuleInstance *module_inst, char *exception_buf)
+{
+    bool has_exception = false;
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    WASMSharedMemNode *node =
+        wasm_module_get_shared_memory((WASMModuleCommon *)module_inst->module);
+    if (node)
+        os_mutex_lock(&node->shared_mem_lock);
+#endif
+    if (module_inst->cur_exception[0] != '\0') {
+        /* NULL is passed if the caller is not interested in getting the
+         * exception content, but only in knowing if an exception has been
+         * raised
+         */
+        if (exception_buf != NULL)
+            bh_memcpy_s(exception_buf, sizeof(module_inst->cur_exception),
+                        module_inst->cur_exception,
+                        sizeof(module_inst->cur_exception));
+        has_exception = true;
+    }
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    if (node)
+        os_mutex_unlock(&node->shared_mem_lock);
+#endif
+
+    return has_exception;
+}
+
 void
 wasm_runtime_set_exception(WASMModuleInstanceCommon *module_inst_comm,
                            const char *exception)
@@ -2430,6 +2474,17 @@ wasm_runtime_get_exception(WASMModuleInstanceCommon *module_inst_comm)
     return wasm_get_exception(module_inst);
 }
 
+bool
+wasm_runtime_copy_exception(WASMModuleInstanceCommon *module_inst_comm,
+                            char *exception_buf)
+{
+    WASMModuleInstance *module_inst = (WASMModuleInstance *)module_inst_comm;
+
+    bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
+              || module_inst_comm->module_type == Wasm_Module_AoT);
+    return wasm_copy_exception(module_inst, exception_buf);
+}
+
 void
 wasm_runtime_clear_exception(WASMModuleInstanceCommon *module_inst_comm)
 {
@@ -3353,7 +3408,7 @@ wasm_runtime_invoke_native_raw(WASMExecEnv *exec_env, void *func_ptr,
         }
     }
 
-    ret = !wasm_runtime_get_exception(module) ? true : false;
+    ret = !wasm_runtime_copy_exception(module, NULL);
 
 fail:
     if (argv1 != argv_buf)
@@ -3828,7 +3883,7 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr,
     }
     exec_env->attachment = NULL;
 
-    ret = !wasm_runtime_get_exception(module) ? true : false;
+    ret = !wasm_runtime_copy_exception(module, NULL);
 
 fail:
     if (argv1 != argv_buf)
@@ -4042,7 +4097,7 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr,
     }
     exec_env->attachment = NULL;
 
-    ret = !wasm_runtime_get_exception(module) ? true : false;
+    ret = !wasm_runtime_copy_exception(module, NULL);
 
 fail:
     if (argv1 != argv_buf)
@@ -4369,7 +4424,7 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr,
     }
     exec_env->attachment = NULL;
 
-    ret = !wasm_runtime_get_exception(module) ? true : false;
+    ret = !wasm_runtime_copy_exception(module, NULL);
 fail:
     if (argv1 != argv_buf)
         wasm_runtime_free(argv1);
@@ -4384,7 +4439,7 @@ fail:
                  || defined(BUILD_TARGET_RISCV64_LP64) */
 
 bool
-wasm_runtime_call_indirect(WASMExecEnv *exec_env, uint32 element_indices,
+wasm_runtime_call_indirect(WASMExecEnv *exec_env, uint32 element_index,
                            uint32 argc, uint32 argv[])
 {
     bool ret = false;
@@ -4400,11 +4455,11 @@ wasm_runtime_call_indirect(WASMExecEnv *exec_env, uint32 element_indices,
 
 #if WASM_ENABLE_INTERP != 0
     if (exec_env->module_inst->module_type == Wasm_Module_Bytecode)
-        ret = wasm_call_indirect(exec_env, 0, element_indices, argc, argv);
+        ret = wasm_call_indirect(exec_env, 0, element_index, argc, argv);
 #endif
 #if WASM_ENABLE_AOT != 0
     if (exec_env->module_inst->module_type == Wasm_Module_AoT)
-        ret = aot_call_indirect(exec_env, 0, element_indices, argc, argv);
+        ret = aot_call_indirect(exec_env, 0, element_index, argc, argv);
 #endif
 
     if (!ret && clear_wasi_proc_exit_exception(exec_env->module_inst)) {
@@ -5370,3 +5425,24 @@ wasm_runtime_get_version(uint32_t *major, uint32_t *minor, uint32_t *patch)
     *minor = WAMR_VERSION_MINOR;
     *patch = WAMR_VERSION_PATCH;
 }
+
+bool
+wasm_runtime_is_import_func_linked(const char *module_name,
+                                   const char *func_name)
+{
+    return wasm_native_resolve_symbol(module_name, func_name, NULL, NULL, NULL,
+                                      NULL);
+}
+
+bool
+wasm_runtime_is_import_global_linked(const char *module_name,
+                                     const char *global_name)
+{
+#if WASM_ENABLE_LIBC_BUILTIN != 0
+    WASMGlobalImport global = { 0 };
+    return wasm_native_lookup_libc_builtin_global(module_name, global_name,
+                                                  &global);
+#else
+    return false;
+#endif
+}

+ 13 - 21
core/iwasm/common/wasm_runtime_common.h

@@ -617,6 +617,11 @@ wasm_runtime_call_wasm_v(WASMExecEnv *exec_env,
                          uint32 num_results, wasm_val_t *results,
                          uint32 num_args, ...);
 
+/* See wasm_export.h for description */
+WASM_RUNTIME_API_EXTERN bool
+wasm_runtime_call_indirect(WASMExecEnv *exec_env, uint32 element_index,
+                           uint32 argc, uint32 argv[]);
+
 #if WASM_ENABLE_DEBUG_INTERP != 0
 /* See wasm_export.h for description */
 WASM_RUNTIME_API_EXTERN uint32
@@ -628,27 +633,6 @@ WASM_RUNTIME_API_EXTERN uint32
 wasm_runtime_start_debug_instance(WASMExecEnv *exec_env);
 #endif
 
-/**
- * Call a function reference of a given WASM runtime instance with
- * arguments.
- *
- * @param exec_env the execution environment to call the function
- *   which must be created from wasm_create_exec_env()
- * @param element_indices the function ference indicies, usually
- *   prvovided by the caller of a registed native function
- * @param argc the number of arguments
- * @param argv the arguments.  If the function method has return value,
- *   the first (or first two in case 64-bit return value) element of
- *   argv stores the return value of the called WASM function after this
- *   function returns.
- *
- * @return true if success, false otherwise and exception will be thrown,
- *   the caller can call wasm_runtime_get_exception to get exception info.
- */
-bool
-wasm_runtime_call_indirect(WASMExecEnv *exec_env, uint32 element_indices,
-                           uint32 argc, uint32 argv[]);
-
 bool
 wasm_runtime_create_exec_env_singleton(WASMModuleInstanceCommon *module_inst);
 
@@ -995,6 +979,14 @@ void
 wasm_runtime_destroy_custom_sections(WASMCustomSection *section_list);
 #endif
 
+WASM_RUNTIME_API_EXTERN bool
+wasm_runtime_is_import_func_linked(const char *module_name,
+                                   const char *func_name);
+
+WASM_RUNTIME_API_EXTERN bool
+wasm_runtime_is_import_global_linked(const char *module_name,
+                                     const char *global_name);
+
 #ifdef __cplusplus
 }
 #endif

+ 85 - 39
core/iwasm/common/wasm_shared_memory.c

@@ -37,6 +37,7 @@ typedef struct AtomicWaitAddressArgs {
 
 /* Atomic wait map */
 static HashMap *wait_map;
+static korp_mutex wait_map_lock;
 
 static uint32
 wait_address_hash(void *address);
@@ -52,11 +53,18 @@ wasm_shared_memory_init()
 {
     if (os_mutex_init(&shared_memory_list_lock) != 0)
         return false;
+
+    if (os_mutex_init(&wait_map_lock) != 0) {
+        os_mutex_destroy(&shared_memory_list_lock);
+        return false;
+    }
+
     /* wait map not exists, create new map */
     if (!(wait_map = bh_hash_map_create(32, true, (HashFunc)wait_address_hash,
                                         (KeyEqualFunc)wait_address_equal, NULL,
                                         destroy_wait_info))) {
         os_mutex_destroy(&shared_memory_list_lock);
+        os_mutex_destroy(&wait_map_lock);
         return false;
     }
 
@@ -67,6 +75,7 @@ void
 wasm_shared_memory_destroy()
 {
     os_mutex_destroy(&shared_memory_list_lock);
+    os_mutex_destroy(&wait_map_lock);
     if (wait_map) {
         bh_hash_map_destroy(wait_map);
     }
@@ -111,24 +120,32 @@ notify_stale_threads_on_exception(WASMModuleInstanceCommon *module_inst)
 {
     AtomicWaitAddressArgs args = { 0 };
     uint32 i = 0, total_elem_count = 0;
+    uint64 total_elem_count_size = 0;
 
-    os_mutex_lock(&shared_memory_list_lock);
+    os_mutex_lock(&wait_map_lock); /* Make the two traversals atomic */
 
     /* count number of addresses in wait_map */
     bh_hash_map_traverse(wait_map, wait_map_address_count_callback,
                          (void *)&total_elem_count);
 
+    if (!total_elem_count) {
+        os_mutex_unlock(&wait_map_lock);
+        return;
+    }
+
     /* allocate memory */
-    if (!(args.addr = wasm_runtime_malloc(sizeof(void *) * total_elem_count))) {
+    total_elem_count_size = (uint64)sizeof(void *) * total_elem_count;
+    if (total_elem_count_size >= UINT32_MAX
+        || !(args.addr = wasm_runtime_malloc((uint32)total_elem_count_size))) {
         LOG_ERROR(
             "failed to allocate memory for list of atomic wait addresses");
-        os_mutex_unlock(&shared_memory_list_lock);
+        os_mutex_unlock(&wait_map_lock);
         return;
     }
 
     /* set values in list of addresses */
     bh_hash_map_traverse(wait_map, create_list_of_waiter_addresses, &args);
-    os_mutex_unlock(&shared_memory_list_lock);
+    os_mutex_unlock(&wait_map_lock);
 
     /* notify */
     for (i = 0; i < args.index; i++) {
@@ -149,13 +166,13 @@ int32
 shared_memory_inc_reference(WASMModuleCommon *module)
 {
     WASMSharedMemNode *node = search_module(module);
+    uint32 ref_count = -1;
     if (node) {
         os_mutex_lock(&node->lock);
-        node->ref_count++;
+        ref_count = ++node->ref_count;
         os_mutex_unlock(&node->lock);
-        return node->ref_count;
     }
-    return -1;
+    return ref_count;
 }
 
 int32
@@ -172,6 +189,7 @@ shared_memory_dec_reference(WASMModuleCommon *module)
             bh_list_remove(shared_memory_list, node);
             os_mutex_unlock(&shared_memory_list_lock);
 
+            os_mutex_destroy(&node->shared_mem_lock);
             os_mutex_destroy(&node->lock);
             wasm_runtime_free(node);
         }
@@ -200,7 +218,14 @@ shared_memory_set_memory_inst(WASMModuleCommon *module,
     node->module = module;
     node->memory_inst = memory;
     node->ref_count = 1;
+
+    if (os_mutex_init(&node->shared_mem_lock) != 0) {
+        wasm_runtime_free(node);
+        return NULL;
+    }
+
     if (os_mutex_init(&node->lock) != 0) {
+        os_mutex_destroy(&node->shared_mem_lock);
         wasm_runtime_free(node);
         return NULL;
     }
@@ -260,9 +285,11 @@ notify_wait_list(bh_list *wait_list, uint32 count)
         bh_assert(node);
         next = bh_list_elem_next(node);
 
+        os_mutex_lock(&node->wait_lock);
         node->status = S_NOTIFIED;
         /* wakeup */
         os_cond_signal(&node->wait_cond);
+        os_mutex_unlock(&node->wait_lock);
 
         node = next;
     }
@@ -276,13 +303,13 @@ acquire_wait_info(void *address, bool create)
     AtomicWaitInfo *wait_info = NULL;
     bh_list_status ret;
 
-    os_mutex_lock(&shared_memory_list_lock);
+    os_mutex_lock(&wait_map_lock); /* Make find + insert atomic */
 
     if (address)
         wait_info = (AtomicWaitInfo *)bh_hash_map_find(wait_map, address);
 
     if (!create) {
-        os_mutex_unlock(&shared_memory_list_lock);
+        os_mutex_unlock(&wait_map_lock);
         return wait_info;
     }
 
@@ -309,7 +336,7 @@ acquire_wait_info(void *address, bool create)
         }
     }
 
-    os_mutex_unlock(&shared_memory_list_lock);
+    os_mutex_unlock(&wait_map_lock);
 
     bh_assert(wait_info);
     (void)ret;
@@ -322,7 +349,7 @@ fail2:
     wasm_runtime_free(wait_info);
 
 fail1:
-    os_mutex_unlock(&shared_memory_list_lock);
+    os_mutex_unlock(&wait_map_lock);
 
     return NULL;
 }
@@ -349,17 +376,16 @@ destroy_wait_info(void *wait_info)
     }
 }
 
-static void
-release_wait_info(HashMap *wait_map_, AtomicWaitInfo *wait_info, void *address)
+static bool
+map_remove_wait_info(HashMap *wait_map_, AtomicWaitInfo *wait_info,
+                     void *address)
 {
-    os_mutex_lock(&shared_memory_list_lock);
-
-    if (wait_info->wait_list->len == 0) {
-        bh_hash_map_remove(wait_map_, address, NULL, NULL);
-        destroy_wait_info(wait_info);
+    if (wait_info->wait_list->len > 0) {
+        return false;
     }
 
-    os_mutex_unlock(&shared_memory_list_lock);
+    bh_hash_map_remove(wait_map_, address, NULL, NULL);
+    return true;
 }
 
 uint32
@@ -369,12 +395,13 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module;
     AtomicWaitInfo *wait_info;
     AtomicWaitNode *wait_node;
-    bool check_ret, is_timeout;
+    WASMSharedMemNode *node;
+    bool check_ret, is_timeout, no_wait, removed_from_map;
 
     bh_assert(module->module_type == Wasm_Module_Bytecode
               || module->module_type == Wasm_Module_AoT);
 
-    if (wasm_get_exception(module_inst)) {
+    if (wasm_copy_exception(module_inst, NULL)) {
         return -1;
     }
 
@@ -399,11 +426,13 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
         return -1;
     }
 
-    os_mutex_lock(&wait_info->wait_list_lock);
+    node = search_module((WASMModuleCommon *)module_inst->module);
+    os_mutex_lock(&node->shared_mem_lock);
+    no_wait = (!wait64 && *(uint32 *)address != (uint32)expect)
+              || (wait64 && *(uint64 *)address != expect);
+    os_mutex_unlock(&node->shared_mem_lock);
 
-    if ((!wait64 && *(uint32 *)address != (uint32)expect)
-        || (wait64 && *(uint64 *)address != expect)) {
-        os_mutex_unlock(&wait_info->wait_list_lock);
+    if (no_wait) {
         return 1;
     }
     else {
@@ -411,33 +440,29 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
 
         if (!(wait_node = wasm_runtime_malloc(sizeof(AtomicWaitNode)))) {
             wasm_runtime_set_exception(module, "failed to create wait node");
-            os_mutex_unlock(&wait_info->wait_list_lock);
             return -1;
         }
         memset(wait_node, 0, sizeof(AtomicWaitNode));
 
         if (0 != os_mutex_init(&wait_node->wait_lock)) {
             wasm_runtime_free(wait_node);
-            os_mutex_unlock(&wait_info->wait_list_lock);
             return -1;
         }
 
         if (0 != os_cond_init(&wait_node->wait_cond)) {
             os_mutex_destroy(&wait_node->wait_lock);
             wasm_runtime_free(wait_node);
-            os_mutex_unlock(&wait_info->wait_list_lock);
             return -1;
         }
 
         wait_node->status = S_WAITING;
-
+        os_mutex_lock(&wait_info->wait_list_lock);
         ret = bh_list_insert(wait_info->wait_list, wait_node);
+        os_mutex_unlock(&wait_info->wait_list_lock);
         bh_assert(ret == BH_LIST_SUCCESS);
         (void)ret;
     }
 
-    os_mutex_unlock(&wait_info->wait_list_lock);
-
     /* condition wait start */
     os_mutex_lock(&wait_node->wait_lock);
 
@@ -445,22 +470,27 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
                          timeout < 0 ? BHT_WAIT_FOREVER
                                      : (uint64)timeout / 1000);
 
+    is_timeout = wait_node->status == S_WAITING ? true : false;
     os_mutex_unlock(&wait_node->wait_lock);
 
-    /* Check the wait node status */
+    os_mutex_lock(&node->shared_mem_lock);
     os_mutex_lock(&wait_info->wait_list_lock);
+
     check_ret = is_wait_node_exists(wait_info->wait_list, wait_node);
     bh_assert(check_ret);
 
-    is_timeout = wait_node->status == S_WAITING ? true : false;
-
+    /* Remove wait node */
     bh_list_remove(wait_info->wait_list, wait_node);
     os_mutex_destroy(&wait_node->wait_lock);
     os_cond_destroy(&wait_node->wait_cond);
     wasm_runtime_free(wait_node);
-    os_mutex_unlock(&wait_info->wait_list_lock);
 
-    release_wait_info(wait_map, wait_info, address);
+    /* Release wait info if no wait nodes attached */
+    removed_from_map = map_remove_wait_info(wait_map, wait_info, address);
+    os_mutex_unlock(&wait_info->wait_list_lock);
+    if (removed_from_map)
+        destroy_wait_info(wait_info);
+    os_mutex_unlock(&node->shared_mem_lock);
 
     (void)check_ret;
     return is_timeout ? 2 : 0;
@@ -473,12 +503,22 @@ wasm_runtime_atomic_notify(WASMModuleInstanceCommon *module, void *address,
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module;
     uint32 notify_result;
     AtomicWaitInfo *wait_info;
+    WASMSharedMemNode *node;
+    bool out_of_bounds;
 
     bh_assert(module->module_type == Wasm_Module_Bytecode
               || module->module_type == Wasm_Module_AoT);
 
-    if ((uint8 *)address < module_inst->memories[0]->memory_data
-        || (uint8 *)address + 4 > module_inst->memories[0]->memory_data_end) {
+    node = search_module((WASMModuleCommon *)module_inst->module);
+    if (node)
+        os_mutex_lock(&node->shared_mem_lock);
+    out_of_bounds =
+        ((uint8 *)address < module_inst->memories[0]->memory_data
+         || (uint8 *)address + 4 > module_inst->memories[0]->memory_data_end);
+
+    if (out_of_bounds) {
+        if (node)
+            os_mutex_unlock(&node->shared_mem_lock);
         wasm_runtime_set_exception(module, "out of bounds memory access");
         return -1;
     }
@@ -486,12 +526,18 @@ wasm_runtime_atomic_notify(WASMModuleInstanceCommon *module, void *address,
     wait_info = acquire_wait_info(address, false);
 
     /* Nobody wait on this address */
-    if (!wait_info)
+    if (!wait_info) {
+        if (node)
+            os_mutex_unlock(&node->shared_mem_lock);
         return 0;
+    }
 
     os_mutex_lock(&wait_info->wait_list_lock);
     notify_result = notify_wait_list(wait_info->wait_list, count);
     os_mutex_unlock(&wait_info->wait_list_lock);
 
+    if (node)
+        os_mutex_unlock(&node->shared_mem_lock);
+
     return notify_result;
 }

+ 2 - 0
core/iwasm/common/wasm_shared_memory.h

@@ -26,6 +26,8 @@ typedef struct WASMSharedMemNode {
     WASMModuleCommon *module;
     /* The memory information */
     WASMMemoryInstanceCommon *memory_inst;
+    /* Lock used for atomic operations */
+    korp_mutex shared_mem_lock;
 
     /* reference count */
     uint32 ref_count;

+ 2 - 0
core/iwasm/compilation/aot_compiler.c

@@ -1240,6 +1240,8 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
                     case WASM_OP_ATOMIC_FENCE:
                         /* Skip memory index */
                         frame_ip++;
+                        if (!aot_compiler_op_atomic_fence(comp_ctx, func_ctx))
+                            return false;
                         break;
                     case WASM_OP_ATOMIC_I32_LOAD:
                         bytes = 4;

+ 16 - 0
core/iwasm/compilation/aot_emit_function.c

@@ -999,6 +999,14 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     }
 #endif
 
+#if WASM_ENABLE_THREAD_MGR != 0
+    /* Insert suspend check point */
+    if (comp_ctx->enable_thread_mgr) {
+        if (!check_suspend_flags(comp_ctx, func_ctx))
+            goto fail;
+    }
+#endif
+
     ret = true;
 fail:
     if (param_types)
@@ -1645,6 +1653,14 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     }
 #endif
 
+#if WASM_ENABLE_THREAD_MGR != 0
+    /* Insert suspend check point */
+    if (comp_ctx->enable_thread_mgr) {
+        if (!check_suspend_flags(comp_ctx, func_ctx))
+            goto fail;
+    }
+#endif
+
     ret = true;
 
 fail:

+ 19 - 1
core/iwasm/compilation/aot_emit_memory.c

@@ -7,6 +7,7 @@
 #include "aot_emit_exception.h"
 #include "../aot/aot_runtime.h"
 #include "aot_intrinsic.h"
+#include "aot_emit_control.h"
 
 #define BUILD_ICMP(op, left, right, res, name)                                \
     do {                                                                      \
@@ -1344,7 +1345,7 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         return false;
     }
 
-    BUILD_ICMP(LLVMIntSGT, ret_value, I32_ZERO, cmp, "atomic_wait_ret");
+    BUILD_ICMP(LLVMIntNE, ret_value, I32_NEG_ONE, cmp, "atomic_wait_ret");
 
     ADD_BASIC_BLOCK(wait_fail, "atomic_wait_fail");
     ADD_BASIC_BLOCK(wait_success, "wait_success");
@@ -1368,6 +1369,14 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
     PUSH_I32(ret_value);
 
+#if WASM_ENABLE_THREAD_MGR != 0
+    /* Insert suspend check point */
+    if (comp_ctx->enable_thread_mgr) {
+        if (!check_suspend_flags(comp_ctx, func_ctx))
+            return false;
+    }
+#endif
+
     return true;
 fail:
     return false;
@@ -1414,4 +1423,13 @@ fail:
     return false;
 }
 
+bool
+aot_compiler_op_atomic_fence(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
+{
+    return LLVMBuildFence(comp_ctx->builder,
+                          LLVMAtomicOrderingSequentiallyConsistent, false, "")
+               ? true
+               : false;
+}
+
 #endif /* end of WASM_ENABLE_SHARED_MEMORY */

+ 4 - 0
core/iwasm/compilation/aot_emit_memory.h

@@ -97,6 +97,10 @@ bool
 aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx,
                               AOTFuncContext *func_ctx, uint32 align,
                               uint32 offset, uint32 bytes);
+
+bool
+aot_compiler_op_atomic_fence(AOTCompContext *comp_ctx,
+                             AOTFuncContext *func_ctx);
 #endif
 
 #ifdef __cplusplus

+ 1 - 1
core/iwasm/fast-jit/jit_frontend.c

@@ -841,7 +841,7 @@ init_func_translation(JitCompContext *cc)
     cc->spill_cache_offset = wasm_interp_interp_frame_size(total_cell_num);
     /* Set spill cache size according to max local cell num, max stack cell
        num and virtual fixed register num */
-    cc->spill_cache_size = (max_locals + max_stacks) * 4 + sizeof(void *) * 5;
+    cc->spill_cache_size = (max_locals + max_stacks) * 4 + sizeof(void *) * 16;
     cc->total_frame_size = cc->spill_cache_offset + cc->spill_cache_size;
     cc->jitted_return_address_offset =
         offsetof(WASMInterpFrame, jitted_return_addr);

+ 2 - 0
core/iwasm/fast-jit/jit_ir.c

@@ -560,6 +560,7 @@ address_of_const(JitCompContext *cc, JitReg reg, unsigned size)
     unsigned no = jit_reg_no(reg);
     unsigned idx = no & ~_JIT_REG_CONST_IDX_FLAG;
 
+    bh_assert(kind < JIT_REG_KIND_L32);
     bh_assert(jit_reg_is_const_idx(reg) && idx < cc->_const_val._num[kind]);
 
     return cc->_const_val._value[kind] + size * idx;
@@ -572,6 +573,7 @@ next_of_const(JitCompContext *cc, JitReg reg)
     unsigned no = jit_reg_no(reg);
     unsigned idx = no & ~_JIT_REG_CONST_IDX_FLAG;
 
+    bh_assert(kind < JIT_REG_KIND_L32);
     bh_assert(jit_reg_is_const_idx(reg) && idx < cc->_const_val._num[kind]);
 
     return cc->_const_val._next[kind][idx];

+ 4 - 0
core/iwasm/fast-jit/jit_ir.h

@@ -1688,6 +1688,7 @@ jit_cc_is_hreg(JitCompContext *cc, JitReg reg)
     unsigned kind = jit_reg_kind(reg);
     unsigned no = jit_reg_no(reg);
     bh_assert(jit_reg_is_variable(reg));
+    bh_assert(kind < JIT_REG_KIND_L32);
     return no < cc->hreg_info->info[kind].num;
 }
 
@@ -1705,6 +1706,7 @@ jit_cc_is_hreg_fixed(JitCompContext *cc, JitReg reg)
     unsigned kind = jit_reg_kind(reg);
     unsigned no = jit_reg_no(reg);
     bh_assert(jit_cc_is_hreg(cc, reg));
+    bh_assert(kind < JIT_REG_KIND_L32);
     return !!cc->hreg_info->info[kind].fixed[no];
 }
 
@@ -1722,6 +1724,7 @@ jit_cc_is_hreg_caller_saved_native(JitCompContext *cc, JitReg reg)
     unsigned kind = jit_reg_kind(reg);
     unsigned no = jit_reg_no(reg);
     bh_assert(jit_cc_is_hreg(cc, reg));
+    bh_assert(kind < JIT_REG_KIND_L32);
     return !!cc->hreg_info->info[kind].caller_saved_native[no];
 }
 
@@ -1739,6 +1742,7 @@ jit_cc_is_hreg_caller_saved_jitted(JitCompContext *cc, JitReg reg)
     unsigned kind = jit_reg_kind(reg);
     unsigned no = jit_reg_no(reg);
     bh_assert(jit_cc_is_hreg(cc, reg));
+    bh_assert(kind < JIT_REG_KIND_L32);
     return !!cc->hreg_info->info[kind].caller_saved_jitted[no];
 }
 

+ 11 - 3
core/iwasm/fast-jit/jit_regalloc.c

@@ -156,6 +156,7 @@ rc_get_vr(RegallocContext *rc, JitReg vreg)
     unsigned no = jit_reg_no(vreg);
 
     bh_assert(jit_reg_is_variable(vreg));
+    bh_assert(kind < JIT_REG_KIND_L32);
 
     return &rc->vregs[kind][no];
 }
@@ -175,6 +176,7 @@ rc_get_hr(RegallocContext *rc, JitReg hreg)
     unsigned no = jit_reg_no(hreg);
 
     bh_assert(jit_reg_is_variable(hreg) && jit_cc_is_hreg(rc->cc, hreg));
+    bh_assert(kind < JIT_REG_KIND_L32);
 
     return &rc->hregs[kind][no];
 }
@@ -208,7 +210,9 @@ static unsigned
 get_reg_stride(JitReg reg)
 {
     static const uint8 strides[] = { 0, 1, 2, 1, 2, 2, 4, 8, 0 };
-    return strides[jit_reg_kind(reg)];
+    uint32 kind = jit_reg_kind(reg);
+    bh_assert(kind <= JIT_REG_KIND_L32);
+    return strides[kind];
 }
 
 /**
@@ -582,13 +586,17 @@ static JitReg
 allocate_hreg(RegallocContext *rc, JitReg vreg, JitInsn *insn, int distance)
 {
     const int kind = jit_reg_kind(vreg);
-    const HardReg *hregs = rc->hregs[kind];
-    const unsigned hreg_num = jit_cc_hreg_num(rc->cc, kind);
+    const HardReg *hregs;
+    unsigned hreg_num;
     JitReg hreg, vreg_to_reload = 0;
     int min_distance = distance, vr_distance;
     VirtualReg *vr = rc_get_vr(rc, vreg);
     unsigned i;
 
+    bh_assert(kind < JIT_REG_KIND_L32);
+    hregs = rc->hregs[kind];
+    hreg_num = jit_cc_hreg_num(rc->cc, kind);
+
     if (hreg_num == 0)
     /* Unsupported hard register kind.  */
     {

+ 4 - 0
core/iwasm/include/wasm_c_api.h

@@ -354,6 +354,7 @@ WASM_API_EXTERN own wasm_importtype_t* wasm_importtype_new(
 WASM_API_EXTERN const wasm_name_t* wasm_importtype_module(const wasm_importtype_t*);
 WASM_API_EXTERN const wasm_name_t* wasm_importtype_name(const wasm_importtype_t*);
 WASM_API_EXTERN const wasm_externtype_t* wasm_importtype_type(const wasm_importtype_t*);
+WASM_API_EXTERN bool wasm_importtype_is_linked(const wasm_importtype_t*);
 
 
 // Export Types
@@ -797,6 +798,9 @@ static inline void* wasm_val_ptr(const wasm_val_t* val) {
 
 #define KILOBYTE(n) ((n) * 1024)
 
+// Create placeholders filled in `wasm_externvec_t* imports` for `wasm_instance_new()`
+WASM_API_EXTERN wasm_extern_t *wasm_extern_new_empty(wasm_store_t *,  wasm_externkind_t);
+
 ///////////////////////////////////////////////////////////////////////////////
 
 #undef own

+ 41 - 0
core/iwasm/include/wasm_export.h

@@ -800,6 +800,31 @@ wasm_runtime_call_wasm_v(wasm_exec_env_t exec_env,
                          uint32_t num_results, wasm_val_t results[],
                          uint32_t num_args, ...);
 
+/**
+ * Call a function reference of a given WASM runtime instance with
+ * arguments.
+ *
+ * Note: this can be used to call a function which is not exported
+ * by the module explicitly. You might consider it as an abstraction
+ * violation.
+ *
+ * @param exec_env the execution environment to call the function
+ *   which must be created from wasm_create_exec_env()
+ * @param element_index the function reference index, usually
+ *   prvovided by the caller of a registed native function
+ * @param argc the number of arguments
+ * @param argv the arguments.  If the function method has return value,
+ *   the first (or first two in case 64-bit return value) element of
+ *   argv stores the return value of the called WASM function after this
+ *   function returns.
+ *
+ * @return true if success, false otherwise and exception will be thrown,
+ *   the caller can call wasm_runtime_get_exception to get exception info.
+ */
+WASM_RUNTIME_API_EXTERN bool
+wasm_runtime_call_indirect(wasm_exec_env_t exec_env, uint32_t element_index,
+                           uint32_t argc, uint32_t argv[]);
+
 /**
  * Find the unique main function from a WASM module instance
  * and execute that function.
@@ -1324,6 +1349,22 @@ wasm_runtime_get_custom_section(wasm_module_t const module_comm,
  */
 WASM_RUNTIME_API_EXTERN void
 wasm_runtime_get_version(uint32_t *major, uint32_t *minor, uint32_t *patch);
+
+/**
+ * Check whether an import func `(import <module_name> <func_name> (func ...))` is linked or not
+ * with runtime registered natvie functions
+ */
+WASM_RUNTIME_API_EXTERN bool
+wasm_runtime_is_import_func_linked(const char *module_name,
+                                   const char *func_name);
+
+/**
+ * Check whether an import global `(import <module_name> <global_name> (global ...))` is linked or not
+ * with runtime registered natvie globals
+ */
+WASM_RUNTIME_API_EXTERN bool
+wasm_runtime_is_import_global_linked(const char *module_name,
+                                     const char *global_name);
 /* clang-format on */
 
 #ifdef __cplusplus

+ 2 - 2
core/iwasm/interpreter/wasm.h

@@ -510,8 +510,8 @@ struct WASMModule {
     uint64 load_size;
 #endif
 
-#if WASM_ENABLE_DEBUG_INTERP != 0                    \
-    || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT \
+#if WASM_ENABLE_DEBUG_INTERP != 0                         \
+    || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
         && WASM_ENABLE_LAZY_JIT != 0)
     /**
      * List of instances referred to this module. When source debugging

+ 100 - 73
core/iwasm/interpreter/wasm_interp_classic.c

@@ -8,6 +8,7 @@
 #include "wasm_runtime.h"
 #include "wasm_opcode.h"
 #include "wasm_loader.h"
+#include "wasm_memory.h"
 #include "../common/wasm_exec_env.h"
 #if WASM_ENABLE_SHARED_MEMORY != 0
 #include "../common/wasm_shared_memory.h"
@@ -696,28 +697,28 @@ trunc_f64_to_int(WASMModuleInstance *module, uint32 *frame_sp, float64 src_min,
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint32)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I32_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint32)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         else {                                                       \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = LOAD_I32(maddr);                                 \
             STORE_U32(maddr, readv op sval);                         \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         PUSH_I32(readv);                                             \
         break;                                                       \
@@ -736,39 +737,39 @@ trunc_f64_to_int(WASMModuleInstance *module, uint32 *frame_sp, float64 src_min,
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint64)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint64)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##32_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint64)LOAD_U32(maddr);                         \
             STORE_U32(maddr, (uint32)(readv op sval));               \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         else {                                                       \
             uint64 op_result;                                        \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint64)LOAD_I64(maddr);                         \
             op_result = readv op sval;                               \
             STORE_I64(maddr, op_result);                             \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         PUSH_I64(readv);                                             \
         break;                                                       \
@@ -954,7 +955,7 @@ fast_jit_invoke_native(WASMExecEnv *exec_env, uint32 func_idx,
     WASMFunctionInstance *cur_func = module_inst->e->functions + func_idx;
 
     wasm_interp_call_func_native(module_inst, exec_env, cur_func, prev_frame);
-    return wasm_get_exception(module_inst) ? false : true;
+    return wasm_copy_exception(module_inst, NULL) ? false : true;
 }
 #endif
 
@@ -1023,7 +1024,7 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
     exec_env->module_inst = (WASMModuleInstanceCommon *)module_inst;
 
     /* transfer exception if it is thrown */
-    if (wasm_get_exception(sub_module_inst)) {
+    if (wasm_copy_exception(sub_module_inst, NULL)) {
         bh_memcpy_s(module_inst->cur_exception,
                     sizeof(module_inst->cur_exception),
                     sub_module_inst->cur_exception,
@@ -1036,20 +1037,25 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
 #if WASM_ENABLE_DEBUG_INTERP != 0
 #define CHECK_SUSPEND_FLAGS()                                          \
     do {                                                               \
+        os_mutex_lock(&exec_env->wait_lock);                           \
         if (IS_WAMR_TERM_SIG(exec_env->current_status->signal_flag)) { \
+            os_mutex_unlock(&exec_env->wait_lock);                     \
             return;                                                    \
         }                                                              \
         if (IS_WAMR_STOP_SIG(exec_env->current_status->signal_flag)) { \
             SYNC_ALL_TO_FRAME();                                       \
             wasm_cluster_thread_waiting_run(exec_env);                 \
         }                                                              \
+        os_mutex_unlock(&exec_env->wait_lock);                         \
     } while (0)
 #else
 #define CHECK_SUSPEND_FLAGS()                                             \
     do {                                                                  \
+        os_mutex_lock(&exec_env->wait_lock);                              \
         if (exec_env->suspend_flags.flags != 0) {                         \
             if (exec_env->suspend_flags.flags & 0x01) {                   \
                 /* terminate current thread */                            \
+                os_mutex_unlock(&exec_env->wait_lock);                    \
                 return;                                                   \
             }                                                             \
             while (exec_env->suspend_flags.flags & 0x02) {                \
@@ -1057,6 +1063,7 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
                 os_cond_wait(&exec_env->wait_cond, &exec_env->wait_lock); \
             }                                                             \
         }                                                                 \
+        os_mutex_unlock(&exec_env->wait_lock);                            \
     } while (0)
 #endif /* WASM_ENABLE_DEBUG_INTERP */
 #endif /* WASM_ENABLE_THREAD_MGR */
@@ -1120,14 +1127,22 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                                WASMFunctionInstance *cur_func,
                                WASMInterpFrame *prev_frame)
 {
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    WASMSharedMemNode *node =
+        wasm_module_get_shared_memory((WASMModuleCommon *)module->module);
+#else
+    void *node = NULL;
+#endif
+
     WASMMemoryInstance *memory = wasm_get_default_memory(module);
     uint8 *global_data = module->global_data;
 #if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
     || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
     || WASM_ENABLE_BULK_MEMORY != 0
-    uint32 num_bytes_per_page = memory ? memory->num_bytes_per_page : 0;
+    uint32 num_bytes_per_page =
+        memory ? wasm_get_num_bytes_per_page(memory, node) : 0;
     uint32 linear_mem_size =
-        memory ? num_bytes_per_page * memory->cur_page_count : 0;
+        memory ? wasm_get_linear_memory_size(memory, node) : 0;
 #endif
     WASMType **wasm_types = module->module->types;
     WASMGlobalInstance *globals = module->e->globals, *global;
@@ -1362,7 +1377,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 
             HANDLE_OP(EXT_OP_BR_TABLE_CACHE)
             {
-                BrTableCache *node =
+                BrTableCache *node_cache =
                     bh_list_first_elem(module->module->br_table_cache_list);
                 BrTableCache *node_next;
 
@@ -1371,13 +1386,13 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 #endif
                 lidx = POP_I32();
 
-                while (node) {
-                    node_next = bh_list_elem_next(node);
-                    if (node->br_table_op_addr == frame_ip - 1) {
-                        depth = node->br_depths[lidx];
+                while (node_cache) {
+                    node_next = bh_list_elem_next(node_cache);
+                    if (node_cache->br_table_op_addr == frame_ip - 1) {
+                        depth = node_cache->br_depths[lidx];
                         goto label_pop_csp_n;
                     }
-                    node = node_next;
+                    node_cache = node_next;
                 }
                 bh_assert(0);
                 HANDLE_OP_END();
@@ -3417,6 +3432,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (ret == (uint32)-1)
                             goto got_exception;
 
+#if WASM_ENABLE_THREAD_MGR != 0
+                        CHECK_SUSPEND_FLAGS();
+#endif
+
                         PUSH_I32(ret);
                         break;
                     }
@@ -3437,6 +3456,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (ret == (uint32)-1)
                             goto got_exception;
 
+#if WASM_ENABLE_THREAD_MGR != 0
+                        CHECK_SUSPEND_FLAGS();
+#endif
+
                         PUSH_I32(ret);
                         break;
                     }
@@ -3444,6 +3467,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     {
                         /* Skip the memory index */
                         frame_ip++;
+                        os_atomic_thread_fence(os_memory_order_release);
                         break;
                     }
 
@@ -3458,23 +3482,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint32)(*(uint8 *)maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint32)LOAD_U16(maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = LOAD_I32(maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
 
                         PUSH_I32(readv);
@@ -3493,30 +3517,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)(*(uint8 *)maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)LOAD_U16(maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)LOAD_U32(maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = LOAD_I64(maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
 
                         PUSH_I64(readv);
@@ -3535,23 +3559,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             STORE_U32(maddr, frame_sp[1]);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         break;
                     }
@@ -3569,31 +3593,31 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE32) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             STORE_U32(maddr, (uint32)sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             PUT_I64_TO_ADDR((uint32 *)maddr,
                                             GET_I64_FROM_ADDR(frame_sp + 1));
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         break;
                     }
@@ -3613,32 +3637,32 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint32)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I32_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint32)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = LOAD_I32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         PUSH_I32(readv);
                         break;
@@ -3659,44 +3683,44 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint32)expect;
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)LOAD_U32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, (uint32)(sval));
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)LOAD_I64(maddr);
                             if (readv == expect) {
                                 STORE_I64(maddr, sval);
                             }
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         PUSH_I64(readv);
                         break;
@@ -3840,7 +3864,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
             if (memory)
                 linear_mem_size = num_bytes_per_page * memory->cur_page_count;
 #endif
-            if (wasm_get_exception(module))
+            if (wasm_copy_exception(module, NULL))
                 goto got_exception;
         }
         else {
@@ -3889,10 +3913,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
             PUSH_CSP(LABEL_TYPE_FUNCTION, 0, cell_num, frame_ip_end - 1);
 
             wasm_exec_env_set_cur_frame(exec_env, frame);
+        }
 #if WASM_ENABLE_THREAD_MGR != 0
-            CHECK_SUSPEND_FLAGS();
+        CHECK_SUSPEND_FLAGS();
 #endif
-        }
         HANDLE_OP_END();
     }
 
@@ -3983,7 +4007,8 @@ fast_jit_call_func_bytecode(WASMModuleInstance *module_inst,
         module_inst->fast_jit_func_ptrs[func_idx_non_import]);
     bh_assert(action == JIT_INTERP_ACTION_NORMAL
               || (action == JIT_INTERP_ACTION_THROWN
-                  && wasm_runtime_get_exception(exec_env->module_inst)));
+                  && wasm_copy_exception(
+                      (WASMModuleInstance *)exec_env->module_inst, NULL)));
 
     /* Get the return values form info.out.ret */
     if (func_type->result_count) {
@@ -4118,7 +4143,7 @@ llvm_jit_call_func_bytecode(WASMModuleInstance *module_inst,
             exec_env, module_inst->func_ptrs[func_idx], func_type, NULL, NULL,
             argv, argc, argv);
 
-        return ret && !wasm_get_exception(module_inst) ? true : false;
+        return ret && !wasm_copy_exception(module_inst, NULL) ? true : false;
     }
 }
 #endif /* end of WASM_ENABLE_JIT != 0 */
@@ -4138,6 +4163,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
     unsigned frame_size = wasm_interp_interp_frame_size(all_cell_num);
     unsigned i;
     bool copy_argv_from_frame = true;
+    char exception[EXCEPTION_BUF_LEN];
 
     if (argc < function->param_cell_num) {
         char buf[128];
@@ -4248,7 +4274,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
     }
 
     /* Output the return value to the caller */
-    if (!wasm_get_exception(module_inst)) {
+    if (!wasm_copy_exception(module_inst, NULL)) {
         if (copy_argv_from_frame) {
             for (i = 0; i < function->ret_cell_num; i++) {
                 argv[i] = *(frame->sp + i - function->ret_cell_num);
@@ -4261,7 +4287,8 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
             wasm_interp_dump_call_stack(exec_env, true, NULL, 0);
         }
 #endif
-        LOG_DEBUG("meet an exception %s", wasm_get_exception(module_inst));
+        wasm_copy_exception(module_inst, exception);
+        LOG_DEBUG("meet an exception %s", exception);
     }
 
     wasm_exec_env_set_cur_frame(exec_env, prev_frame);

+ 93 - 62
core/iwasm/interpreter/wasm_interp_fast.c

@@ -8,6 +8,7 @@
 #include "wasm_runtime.h"
 #include "wasm_opcode.h"
 #include "wasm_loader.h"
+#include "wasm_memory.h"
 #include "../common/wasm_exec_env.h"
 #if WASM_ENABLE_SHARED_MEMORY != 0
 #include "../common/wasm_shared_memory.h"
@@ -469,28 +470,28 @@ LOAD_PTR(void *addr)
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(1);                           \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint32)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I32_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(2);                           \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint32)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         else {                                                       \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(4);                           \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = LOAD_I32(maddr);                                 \
             STORE_U32(maddr, readv op sval);                         \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         PUSH_I32(readv);                                             \
         break;                                                       \
@@ -509,39 +510,39 @@ LOAD_PTR(void *addr)
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(1);                           \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint64)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(2);                           \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint64)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##32_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(4);                           \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint64)LOAD_U32(maddr);                         \
             STORE_U32(maddr, (uint32)(readv op sval));               \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         else {                                                       \
             uint64 op_result;                                        \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(8);                           \
                                                                      \
-            os_mutex_lock(&module->e->mem_lock);                     \
+            os_mutex_lock(&node->shared_mem_lock);                   \
             readv = (uint64)LOAD_I64(maddr);                         \
             op_result = readv op sval;                               \
             STORE_I64(maddr, op_result);                             \
-            os_mutex_unlock(&module->e->mem_lock);                   \
+            os_mutex_unlock(&node->shared_mem_lock);                 \
         }                                                            \
         PUSH_I64(readv);                                             \
         break;                                                       \
@@ -1042,7 +1043,7 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
     exec_env->module_inst = (WASMModuleInstanceCommon *)module_inst;
 
     /* transfer exception if it is thrown */
-    if (wasm_get_exception(sub_module_inst)) {
+    if (wasm_copy_exception(sub_module_inst, NULL)) {
         bh_memcpy_s(module_inst->cur_exception,
                     sizeof(module_inst->cur_exception),
                     sub_module_inst->cur_exception,
@@ -1054,13 +1055,16 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
 #if WASM_ENABLE_THREAD_MGR != 0
 #define CHECK_SUSPEND_FLAGS()                           \
     do {                                                \
+        os_mutex_lock(&exec_env->wait_lock);            \
         if (exec_env->suspend_flags.flags != 0) {       \
             if (exec_env->suspend_flags.flags & 0x01) { \
                 /* terminate current thread */          \
+                os_mutex_unlock(&exec_env->wait_lock);  \
                 return;                                 \
             }                                           \
             /* TODO: support suspend and breakpoint */  \
         }                                               \
+        os_mutex_unlock(&exec_env->wait_lock);          \
     } while (0)
 #endif
 
@@ -1151,13 +1155,22 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                                WASMFunctionInstance *cur_func,
                                WASMInterpFrame *prev_frame)
 {
+#if WASM_ENABLE_SHARED_MEMORY != 0
+    WASMSharedMemNode *node =
+        wasm_module_get_shared_memory((WASMModuleCommon *)module->module);
+#else
+    void *node = NULL;
+#endif
+
     WASMMemoryInstance *memory = wasm_get_default_memory(module);
+
 #if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
     || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
     || WASM_ENABLE_BULK_MEMORY != 0
-    uint32 num_bytes_per_page = memory ? memory->num_bytes_per_page : 0;
+    uint32 num_bytes_per_page =
+        memory ? wasm_get_num_bytes_per_page(memory, node) : 0;
     uint32 linear_mem_size =
-        memory ? num_bytes_per_page * memory->cur_page_count : 0;
+        memory ? wasm_get_linear_memory_size(memory, node) : 0;
 #endif
     uint8 *global_data = module->global_data;
     WASMGlobalInstance *globals = module->e ? module->e->globals : NULL;
@@ -3261,6 +3274,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (ret == (uint32)-1)
                             goto got_exception;
 
+#if WASM_ENABLE_THREAD_MGR != 0
+                        CHECK_SUSPEND_FLAGS();
+#endif
+
                         PUSH_I32(ret);
                         break;
                     }
@@ -3281,9 +3298,18 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (ret == (uint32)-1)
                             goto got_exception;
 
+#if WASM_ENABLE_THREAD_MGR != 0
+                        CHECK_SUSPEND_FLAGS();
+#endif
+
                         PUSH_I32(ret);
                         break;
                     }
+                    case WASM_OP_ATOMIC_FENCE:
+                    {
+                        os_atomic_thread_fence(os_memory_order_release);
+                        break;
+                    }
 
                     case WASM_OP_ATOMIC_I32_LOAD:
                     case WASM_OP_ATOMIC_I32_LOAD8_U:
@@ -3296,23 +3322,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint32)(*(uint8 *)maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint32)LOAD_U16(maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = LOAD_I32(maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
 
                         PUSH_I32(readv);
@@ -3331,30 +3357,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)(*(uint8 *)maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)LOAD_U16(maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)LOAD_U32(maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(8);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = LOAD_I64(maddr);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
 
                         PUSH_I64(readv);
@@ -3372,23 +3398,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             STORE_U32(maddr, sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         break;
                     }
@@ -3406,30 +3432,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE32) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             STORE_U32(maddr, (uint32)sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(8);
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             STORE_I64(maddr, sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         break;
                     }
@@ -3449,32 +3475,32 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint32)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I32_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint32)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
 
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = LOAD_I32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         PUSH_I32(readv);
                         break;
@@ -3495,44 +3521,44 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
 
                             expect = (uint32)expect;
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)LOAD_U32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, (uint32)(sval));
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(8);
 
-                            os_mutex_lock(&module->e->mem_lock);
+                            os_mutex_lock(&node->shared_mem_lock);
                             readv = (uint64)LOAD_I64(maddr);
                             if (readv == expect) {
                                 STORE_I64(maddr, sval);
                             }
-                            os_mutex_unlock(&module->e->mem_lock);
+                            os_mutex_unlock(&node->shared_mem_lock);
                         }
                         PUSH_I64(readv);
                         break;
@@ -3781,7 +3807,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
             if (memory)
                 linear_mem_size = num_bytes_per_page * memory->cur_page_count;
 #endif
-            if (wasm_get_exception(module))
+            if (wasm_copy_exception(module, NULL))
                 goto got_exception;
         }
         else {
@@ -3821,6 +3847,9 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 
             wasm_exec_env_set_cur_frame(exec_env, (WASMRuntimeFrame *)frame);
         }
+#if WASM_ENABLE_THREAD_MGR != 0
+        CHECK_SUSPEND_FLAGS();
+#endif
         HANDLE_OP_END();
     }
 
@@ -3889,6 +3918,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
     /* This frame won't be used by JITed code, so only allocate interp
        frame here.  */
     unsigned frame_size = wasm_interp_interp_frame_size(all_cell_num);
+    char exception[EXCEPTION_BUF_LEN];
 
     if (argc < function->param_cell_num) {
         char buf[128];
@@ -3954,7 +3984,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
     }
 
     /* Output the return value to the caller */
-    if (!wasm_get_exception(module_inst)) {
+    if (!wasm_copy_exception(module_inst, NULL)) {
         for (i = 0; i < function->ret_cell_num; i++)
             argv[i] = *(frame->lp + i);
     }
@@ -3964,7 +3994,8 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
             wasm_interp_dump_call_stack(exec_env, true, NULL, 0);
         }
 #endif
-        LOG_DEBUG("meet an exception %s", wasm_get_exception(module_inst));
+        wasm_copy_exception(module_inst, exception);
+        LOG_DEBUG("meet an exception %s", exception);
     }
 
     wasm_exec_env_set_cur_frame(exec_env, prev_frame);

+ 28 - 10
core/iwasm/interpreter/wasm_loader.c

@@ -1399,6 +1399,7 @@ load_global_import(const uint8 **p_buf, const uint8 *buf_end,
     WASMModule *sub_module = NULL;
     WASMGlobal *linked_global = NULL;
 #endif
+    bool ret = false;
 
     CHECK_BUF(p, p_end, 2);
     declare_type = read_uint8(p);
@@ -1411,15 +1412,16 @@ load_global_import(const uint8 **p_buf, const uint8 *buf_end,
     }
 
 #if WASM_ENABLE_LIBC_BUILTIN != 0
-    global->is_linked = wasm_native_lookup_libc_builtin_global(
-        sub_module_name, global_name, global);
-    if (global->is_linked) {
+    ret = wasm_native_lookup_libc_builtin_global(sub_module_name, global_name,
+                                                 global);
+    if (ret) {
         if (global->type != declare_type
             || global->is_mutable != declare_mutable) {
             set_error_buf(error_buf, error_buf_size,
                           "incompatible import type");
             return false;
         }
+        global->is_linked = true;
     }
 #endif
 #if WASM_ENABLE_MULTI_MODULE != 0
@@ -1449,6 +1451,7 @@ load_global_import(const uint8 **p_buf, const uint8 *buf_end,
     global->is_mutable = (declare_mutable == 1);
 
     (void)parent_module;
+    (void)ret;
     return true;
 fail:
     return false;
@@ -3861,8 +3864,8 @@ create_module(char *error_buf, uint32 error_buf_size)
     bh_assert(ret == BH_LIST_SUCCESS);
 #endif
 
-#if WASM_ENABLE_DEBUG_INTERP != 0                    \
-    || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT \
+#if WASM_ENABLE_DEBUG_INTERP != 0                         \
+    || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
         && WASM_ENABLE_LAZY_JIT != 0)
     if (os_mutex_init(&module->instance_list_lock) != 0) {
         set_error_buf(error_buf, error_buf_size,
@@ -4192,7 +4195,20 @@ check_wasi_abi_compatibility(const WASMModule *module,
 
     memory = wasm_loader_find_export(module, "", "memory", EXPORT_KIND_MEMORY,
                                      error_buf, error_buf_size);
-    if (!memory) {
+    if (!memory
+#if WASM_ENABLE_LIB_WASI_THREADS != 0
+        /*
+         * with wasi-threads, it's still an open question if a memory
+         * should be exported.
+         *
+         * https://github.com/WebAssembly/wasi-threads/issues/22
+         * https://github.com/WebAssembly/WASI/issues/502
+         *
+         * Note: this code assumes the number of memories is at most 1.
+         */
+        && module->import_memory_count == 0
+#endif
+    ) {
         set_error_buf(error_buf, error_buf_size,
                       "a module with WASI apis must export memory by default");
         return false;
@@ -4250,7 +4266,8 @@ wasm_loader_unload(WASMModule *module)
     if (!module)
         return;
 
-#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT && WASM_ENABLE_LAZY_JIT != 0
+#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
+    && WASM_ENABLE_LAZY_JIT != 0
     module->orcjit_stop_compiling = true;
     if (module->llvm_jit_init_thread)
         os_thread_join(module->llvm_jit_init_thread, NULL);
@@ -4271,7 +4288,8 @@ wasm_loader_unload(WASMModule *module)
         aot_destroy_comp_data(module->comp_data);
 #endif
 
-#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT && WASM_ENABLE_LAZY_JIT != 0
+#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
+    && WASM_ENABLE_LAZY_JIT != 0
     if (module->tierup_wait_lock_inited) {
         os_mutex_destroy(&module->tierup_wait_lock);
         os_cond_destroy(&module->tierup_wait_cond);
@@ -4400,8 +4418,8 @@ wasm_loader_unload(WASMModule *module)
     }
 #endif
 
-#if WASM_ENABLE_DEBUG_INTERP != 0                    \
-    || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT \
+#if WASM_ENABLE_DEBUG_INTERP != 0                         \
+    || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
         && WASM_ENABLE_LAZY_JIT != 0)
     os_mutex_destroy(&module->instance_list_lock);
 #endif

+ 8 - 4
core/iwasm/interpreter/wasm_mini_loader.c

@@ -2725,7 +2725,8 @@ create_module(char *error_buf, uint32 error_buf_size)
     bh_assert(ret == BH_LIST_SUCCESS);
 #endif
 
-#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT && WASM_ENABLE_LAZY_JIT != 0
+#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
+    && WASM_ENABLE_LAZY_JIT != 0
     if (os_mutex_init(&module->instance_list_lock) != 0) {
         set_error_buf(error_buf, error_buf_size,
                       "init instance list lock failed");
@@ -2946,7 +2947,8 @@ wasm_loader_unload(WASMModule *module)
     if (!module)
         return;
 
-#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT && WASM_ENABLE_LAZY_JIT != 0
+#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
+    && WASM_ENABLE_LAZY_JIT != 0
     module->orcjit_stop_compiling = true;
     if (module->llvm_jit_init_thread)
         os_thread_join(module->llvm_jit_init_thread, NULL);
@@ -2967,7 +2969,8 @@ wasm_loader_unload(WASMModule *module)
         aot_destroy_comp_data(module->comp_data);
 #endif
 
-#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT && WASM_ENABLE_LAZY_JIT != 0
+#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
+    && WASM_ENABLE_LAZY_JIT != 0
     if (module->tierup_wait_lock_inited) {
         os_mutex_destroy(&module->tierup_wait_lock);
         os_cond_destroy(&module->tierup_wait_cond);
@@ -3063,7 +3066,8 @@ wasm_loader_unload(WASMModule *module)
     }
 #endif
 
-#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT && WASM_ENABLE_LAZY_JIT != 0
+#if WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
+    && WASM_ENABLE_LAZY_JIT != 0
     os_mutex_destroy(&module->instance_list_lock);
 #endif
 

+ 9 - 11
core/iwasm/interpreter/wasm_opcode.h

@@ -675,12 +675,14 @@ typedef enum WASMAtomicEXTOpcode {
 } WASMAtomicEXTOpcode;
 
 #if WASM_ENABLE_DEBUG_INTERP != 0
-#define DEF_DEBUG_BREAK_HANDLE(_name) \
-    _name[DEBUG_OP_BREAK] = HANDLE_OPCODE(DEBUG_OP_BREAK); /* 0xd7 */
+#define DEF_DEBUG_BREAK_HANDLE() \
+    [DEBUG_OP_BREAK] = HANDLE_OPCODE(DEBUG_OP_BREAK), /* 0xd7 */
 #else
-#define DEF_DEBUG_BREAK_HANDLE(_name)
+#define DEF_DEBUG_BREAK_HANDLE()
 #endif
 
+#define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode)
+
 /*
  * Macro used to generate computed goto tables for the C interpreter.
  */
@@ -903,14 +905,10 @@ typedef enum WASMAtomicEXTOpcode {
         HANDLE_OPCODE(EXT_OP_LOOP),                  /* 0xd4 */ \
         HANDLE_OPCODE(EXT_OP_IF),                    /* 0xd5 */ \
         HANDLE_OPCODE(EXT_OP_BR_TABLE_CACHE),        /* 0xd6 */ \
-    };                                                          \
-    do {                                                        \
-        _name[WASM_OP_MISC_PREFIX] =                            \
-            HANDLE_OPCODE(WASM_OP_MISC_PREFIX); /* 0xfc */      \
-        _name[WASM_OP_ATOMIC_PREFIX] =                          \
-            HANDLE_OPCODE(WASM_OP_ATOMIC_PREFIX); /* 0xfe */    \
-        DEF_DEBUG_BREAK_HANDLE(_name)                           \
-    } while (0)
+        SET_GOTO_TABLE_ELEM(WASM_OP_MISC_PREFIX),    /* 0xfc */ \
+        SET_GOTO_TABLE_ELEM(WASM_OP_ATOMIC_PREFIX),  /* 0xfe */ \
+        DEF_DEBUG_BREAK_HANDLE()                                \
+    };
 
 #ifdef __cplusplus
 }

+ 144 - 124
core/iwasm/interpreter/wasm_runtime.c

@@ -982,88 +982,138 @@ export_globals_instantiate(const WASMModule *module,
 }
 #endif
 
-#if WASM_ENABLE_LIBC_WASI != 0
-static bool
-execute_initialize_function(WASMModuleInstance *module_inst)
-{
-    WASMFunctionInstance *initialize =
-        wasm_lookup_function(module_inst, "_initialize", NULL);
-    return !initialize
-           || wasm_create_exec_env_and_call_function(module_inst, initialize, 0,
-                                                     NULL);
-}
-#endif
-
-static bool
-execute_post_inst_function(WASMModuleInstance *module_inst)
+static WASMFunctionInstance *
+lookup_post_instantiate_func(WASMModuleInstance *module_inst,
+                             const char *func_name)
 {
-    WASMFunctionInstance *post_inst_func = NULL;
-    WASMType *post_inst_func_type;
-    uint32 i;
-
-    for (i = 0; i < module_inst->export_func_count; i++)
-        if (!strcmp(module_inst->export_functions[i].name,
-                    "__post_instantiate")) {
-            post_inst_func = module_inst->export_functions[i].function;
-            break;
-        }
+    WASMFunctionInstance *func;
+    WASMType *func_type;
 
-    if (!post_inst_func)
+    if (!(func = wasm_lookup_function(module_inst, func_name, NULL)))
         /* Not found */
-        return true;
+        return NULL;
 
-    post_inst_func_type = post_inst_func->u.func->func_type;
-    if (post_inst_func_type->param_count != 0
-        || post_inst_func_type->result_count != 0)
+    func_type = func->u.func->func_type;
+    if (!(func_type->param_count == 0 && func_type->result_count == 0))
         /* Not a valid function type, ignore it */
-        return true;
+        return NULL;
 
-    return wasm_create_exec_env_and_call_function(module_inst, post_inst_func,
-                                                  0, NULL);
+    return func;
 }
 
-#if WASM_ENABLE_BULK_MEMORY != 0
 static bool
-execute_memory_init_function(WASMModuleInstance *module_inst)
+execute_post_instantiate_functions(WASMModuleInstance *module_inst,
+                                   bool is_sub_inst)
 {
-    WASMFunctionInstance *memory_init_func = NULL;
-    WASMType *memory_init_func_type;
-    uint32 i;
+    WASMFunctionInstance *start_func = module_inst->e->start_function;
+    WASMFunctionInstance *initialize_func = NULL;
+    WASMFunctionInstance *post_inst_func = NULL;
+    WASMFunctionInstance *call_ctors_func = NULL;
+#if WASM_ENABLE_LIBC_WASI != 0
+    WASMModule *module = module_inst->module;
+#endif
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    WASMModuleInstanceCommon *module_inst_main = NULL;
+    WASMExecEnv *exec_env_tls = NULL;
+#endif
+    WASMExecEnv *exec_env = NULL;
+    bool ret = false;
 
-    for (i = 0; i < module_inst->export_func_count; i++)
-        if (!strcmp(module_inst->export_functions[i].name,
-                    "__wasm_call_ctors")) {
-            memory_init_func = module_inst->export_functions[i].function;
-            break;
-        }
+#if WASM_ENABLE_LIBC_WASI != 0
+    /*
+     * WASI reactor instances may assume that _initialize will be called by
+     * the environment at most once, and that none of their other exports
+     * are accessed before that call.
+     */
+    if (!is_sub_inst && module->import_wasi_api) {
+        initialize_func =
+            lookup_post_instantiate_func(module_inst, "_initialize");
+    }
+#endif
 
-    if (!memory_init_func)
-        /* Not found */
-        return true;
+    /* Execute possible "__post_instantiate" function if wasm app is
+       compiled by emsdk's early version */
+    if (!is_sub_inst) {
+        post_inst_func =
+            lookup_post_instantiate_func(module_inst, "__post_instantiate");
+    }
 
-    memory_init_func_type = memory_init_func->u.func->func_type;
-    if (memory_init_func_type->param_count != 0
-        || memory_init_func_type->result_count != 0)
-        /* Not a valid function type, ignore it */
+#if WASM_ENABLE_BULK_MEMORY != 0
+    /* Only execute the memory init function for main instance since
+       the data segments will be dropped once initialized */
+    if (!is_sub_inst
+#if WASM_ENABLE_LIBC_WASI != 0
+        && !module->import_wasi_api
+#endif
+    ) {
+        call_ctors_func =
+            lookup_post_instantiate_func(module_inst, "__wasm_call_ctors");
+    }
+#endif
+
+    if (!start_func && !initialize_func && !post_inst_func
+        && !call_ctors_func) {
+        /* No post instantiation functions to call */
         return true;
+    }
 
-    return wasm_create_exec_env_and_call_function(module_inst, memory_init_func,
-                                                  0, NULL);
-}
-#endif
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    if (is_sub_inst) {
+        exec_env = exec_env_tls = wasm_runtime_get_exec_env_tls();
+        if (exec_env_tls) {
+            /* Temporarily replace exec_env_tls's module inst to current
+               module inst to avoid checking failure when calling the
+               wasm functions, and ensure that the exec_env's module inst
+               is the correct one. */
+            module_inst_main = exec_env_tls->module_inst;
+            exec_env_tls->module_inst = (WASMModuleInstanceCommon *)module_inst;
+        }
+    }
+#endif
+    if (!exec_env
+        && !(exec_env =
+                 wasm_exec_env_create((WASMModuleInstanceCommon *)module_inst,
+                                      module_inst->default_wasm_stack_size))) {
+        wasm_set_exception(module_inst, "allocate memory failed");
+        return false;
+    }
 
-static bool
-execute_start_function(WASMModuleInstance *module_inst)
-{
-    WASMFunctionInstance *func = module_inst->e->start_function;
+    /* Execute start function for both main insance and sub instance */
+    if (start_func && !wasm_call_function(exec_env, start_func, 0, NULL)) {
+        goto fail;
+    }
 
-    if (!func)
-        return true;
+    if (initialize_func
+        && !wasm_call_function(exec_env, initialize_func, 0, NULL)) {
+        goto fail;
+    }
+
+    if (post_inst_func
+        && !wasm_call_function(exec_env, post_inst_func, 0, NULL)) {
+        goto fail;
+    }
+
+    if (call_ctors_func
+        && !wasm_call_function(exec_env, call_ctors_func, 0, NULL)) {
+        goto fail;
+    }
+
+    ret = true;
 
-    bh_assert(!func->is_import_func && func->param_cell_num == 0
-              && func->ret_cell_num == 0);
+fail:
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    if (is_sub_inst && exec_env_tls) {
+        bh_assert(exec_env == exec_env_tls);
+        /* Restore the exec_env_tls's module inst */
+        exec_env_tls->module_inst = module_inst_main;
+    }
+    else
+        wasm_exec_env_destroy(exec_env);
+#else
+    wasm_exec_env_destroy(exec_env);
+#endif
 
-    return wasm_create_exec_env_and_call_function(module_inst, func, 0, NULL);
+    return ret;
 }
 
 static bool
@@ -1587,15 +1637,6 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst, uint32 stack_size,
     module_inst->e =
         (WASMModuleInstanceExtra *)((uint8 *)module_inst + extra_info_offset);
 
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    if (os_mutex_init(&module_inst->e->mem_lock) != 0) {
-        set_error_buf(error_buf, error_buf_size,
-                      "create shared memory lock failed");
-        goto fail;
-    }
-    module_inst->e->mem_lock_inited = true;
-#endif
-
 #if WASM_ENABLE_MULTI_MODULE != 0
     module_inst->e->sub_module_inst_list =
         &module_inst->e->sub_module_inst_list_head;
@@ -2008,45 +2049,11 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst, uint32 stack_size,
                 &module_inst->e->functions[module->start_function];
     }
 
-    if (
-#if WASM_ENABLE_LIBC_WASI != 0
-        /*
-         * reactor instances may assume that _initialize will be called by
-         * the environment at most once, and that none of their other
-         * exports are accessed before that call.
-         *
-         * let the loader decide how to act if there is no _initialize
-         * in a reactor
-         */
-        !execute_initialize_function(module_inst) ||
-#endif
-        /* Execute __post_instantiate function */
-        !execute_post_inst_function(module_inst)
-        /* Execute the function in "start" section */
-        || !execute_start_function(module_inst)) {
+    if (!execute_post_instantiate_functions(module_inst, is_sub_inst)) {
         set_error_buf(error_buf, error_buf_size, module_inst->cur_exception);
         goto fail;
     }
 
-#if WASM_ENABLE_BULK_MEMORY != 0
-#if WASM_ENABLE_LIBC_WASI != 0
-    if (!module->import_wasi_api) {
-#endif
-        /* Only execute the memory init function for main instance because
-            the data segments will be dropped once initialized.
-        */
-        if (!is_sub_inst) {
-            if (!execute_memory_init_function(module_inst)) {
-                set_error_buf(error_buf, error_buf_size,
-                              module_inst->cur_exception);
-                goto fail;
-            }
-        }
-#if WASM_ENABLE_LIBC_WASI != 0
-    }
-#endif
-#endif
-
 #if WASM_ENABLE_MEMORY_TRACING != 0
     wasm_runtime_dump_module_inst_mem_consumption(
         (WASMModuleInstanceCommon *)module_inst);
@@ -2066,6 +2073,15 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
     if (!module_inst)
         return;
 
+    if (module_inst->exec_env_singleton) {
+        /* wasm_exec_env_destroy will call
+           wasm_cluster_wait_for_all_except_self to wait for other
+           threads, so as to destroy their exec_envs and module
+           instances first, and avoid accessing the shared resources
+           of current module instance after it is deinstantiated. */
+        wasm_exec_env_destroy(module_inst->exec_env_singleton);
+    }
+
 #if WASM_ENABLE_DEBUG_INTERP != 0                         \
     || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
         && WASM_ENABLE_LAZY_JIT != 0)
@@ -2148,9 +2164,6 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
     wasm_externref_cleanup((WASMModuleInstanceCommon *)module_inst);
 #endif
 
-    if (module_inst->exec_env_singleton)
-        wasm_exec_env_destroy(module_inst->exec_env_singleton);
-
 #if WASM_ENABLE_DUMP_CALL_STACK != 0
     if (module_inst->frames) {
         bh_vector_destroy(module_inst->frames);
@@ -2159,11 +2172,6 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
     }
 #endif
 
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    if (module_inst->e->mem_lock_inited)
-        os_mutex_destroy(&module_inst->e->mem_lock);
-#endif
-
     if (module_inst->e->c_api_func_imports)
         wasm_runtime_free(module_inst->e->c_api_func_imports);
 
@@ -2239,8 +2247,9 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst,
     WASMRuntimeFrame *prev_frame = wasm_exec_env_get_cur_frame(exec_env);
     uint8 *prev_top = exec_env->wasm_stack.s.top;
 #ifdef BH_PLATFORM_WINDOWS
-    const char *exce;
     int result;
+    bool has_exception;
+    char exception[EXCEPTION_BUF_LEN];
 #endif
     bool ret = true;
 
@@ -2279,14 +2288,14 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst,
 #else
         __try {
             wasm_interp_call_wasm(module_inst, exec_env, function, argc, argv);
-        } __except (wasm_get_exception(module_inst)
+        } __except (wasm_copy_exception(module_inst, NULL)
                         ? EXCEPTION_EXECUTE_HANDLER
                         : EXCEPTION_CONTINUE_SEARCH) {
             /* exception was thrown in wasm_exception_handler */
             ret = false;
         }
-        if ((exce = wasm_get_exception(module_inst))
-            && strstr(exce, "native stack overflow")) {
+        has_exception = wasm_copy_exception(module_inst, exception);
+        if (has_exception && strstr(exception, "native stack overflow")) {
             /* After a stack overflow, the stack was left
                in a damaged state, let the CRT repair it */
             result = _resetstkoflw();
@@ -2398,8 +2407,7 @@ wasm_call_function(WASMExecEnv *exec_env, WASMFunctionInstance *function,
     wasm_exec_env_set_thread_info(exec_env);
 
     interp_call_wasm(module_inst, exec_env, function, argc, argv);
-
-    return !wasm_get_exception(module_inst) ? true : false;
+    return !wasm_copy_exception(module_inst, NULL);
 }
 
 bool
@@ -2572,6 +2580,12 @@ wasm_module_free(WASMModuleInstance *module_inst, uint32 ptr)
             return;
         }
 
+#if WASM_ENABLE_SHARED_MEMORY != 0
+        WASMSharedMemNode *node = wasm_module_get_shared_memory(
+            (WASMModuleCommon *)module_inst->module);
+        if (node)
+            os_mutex_lock(&node->shared_mem_lock);
+#endif
         addr = memory->memory_data + ptr;
 
         if (memory->heap_handle && memory->heap_data <= addr
@@ -2584,6 +2598,10 @@ wasm_module_free(WASMModuleInstance *module_inst, uint32 ptr)
             execute_free_function(module_inst, module_inst->e->free_function,
                                   ptr);
         }
+#if WASM_ENABLE_SHARED_MEMORY != 0
+        if (node)
+            os_mutex_unlock(&node->shared_mem_lock);
+#endif
     }
 }
 
@@ -2696,7 +2714,7 @@ call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 elem_idx,
 
     interp_call_wasm(module_inst, exec_env, func_inst, argc, argv);
 
-    return !wasm_get_exception(module_inst) ? true : false;
+    return !wasm_copy_exception(module_inst, NULL);
 
 got_exception:
     return false;
@@ -2716,14 +2734,16 @@ wasm_set_aux_stack(WASMExecEnv *exec_env, uint32 start_offset, uint32 size)
     WASMModuleInstance *module_inst =
         (WASMModuleInstance *)exec_env->module_inst;
     uint32 stack_top_idx = module_inst->module->aux_stack_top_global_index;
+
+#if WASM_ENABLE_HEAP_AUX_STACK_ALLOCATION == 0
+    /* Check the aux stack space */
     uint32 data_end = module_inst->module->aux_data_end;
     uint32 stack_bottom = module_inst->module->aux_stack_bottom;
     bool is_stack_before_data = stack_bottom < data_end ? true : false;
-
-    /* Check the aux stack space, currently we don't allocate space in heap */
     if ((is_stack_before_data && (size > start_offset))
         || ((!is_stack_before_data) && (start_offset - data_end < size)))
         return false;
+#endif
 
     if (stack_top_idx != (uint32)-1) {
         /* The aux stack top is a wasm global,

+ 14 - 9
core/iwasm/interpreter/wasm_runtime.h

@@ -19,6 +19,8 @@
 extern "C" {
 #endif
 
+#define EXCEPTION_BUF_LEN 128
+
 typedef struct WASMModuleInstance WASMModuleInstance;
 typedef struct WASMFunctionInstance WASMFunctionInstance;
 typedef struct WASMMemoryInstance WASMMemoryInstance;
@@ -223,12 +225,6 @@ typedef struct WASMModuleInstanceExtra {
     CApiFuncImport *c_api_func_imports;
     RunningMode running_mode;
 
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    /* lock for shared memory atomic operations */
-    korp_mutex mem_lock;
-    bool mem_lock_inited;
-#endif
-
 #if WASM_ENABLE_MULTI_MODULE != 0
     bh_list sub_module_inst_list_head;
     bh_list *sub_module_inst_list;
@@ -240,8 +236,8 @@ typedef struct WASMModuleInstanceExtra {
     uint32 max_aux_stack_used;
 #endif
 
-#if WASM_ENABLE_DEBUG_INTERP != 0                    \
-    || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT \
+#if WASM_ENABLE_DEBUG_INTERP != 0                         \
+    || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
         && WASM_ENABLE_LAZY_JIT != 0)
     WASMModuleInstance *next;
 #endif
@@ -288,7 +284,7 @@ struct WASMModuleInstance {
     DefPointer(WASMExportTabInstance *, export_tables);
 
     /* The exception buffer of wasm interpreter for current thread. */
-    char cur_exception[128];
+    char cur_exception[EXCEPTION_BUF_LEN];
 
     /* The WASM module or AOT module, for AOTModuleInstance,
        it denotes `AOTModule *` */
@@ -450,6 +446,15 @@ wasm_set_exception_with_id(WASMModuleInstance *module_inst, uint32 id);
 const char *
 wasm_get_exception(WASMModuleInstance *module);
 
+/**
+ * @brief Copy exception in buffer passed as parameter. Thread-safe version of
+ * `wasm_get_exception()`
+ * @note Buffer size must be no smaller than EXCEPTION_BUF_LEN
+ * @return true if exception found
+ */
+bool
+wasm_copy_exception(WASMModuleInstance *module_inst, char *exception_buf);
+
 uint32
 wasm_module_malloc(WASMModuleInstance *module_inst, uint32 size,
                    void **p_native_addr);

+ 44 - 11
core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c

@@ -46,10 +46,6 @@
     wasm_runtime_addr_native_to_app(module_inst, ptr)
 /* clang-format on */
 
-extern bool
-wasm_runtime_call_indirect(wasm_exec_env_t exec_env, uint32 element_indices,
-                           uint32 argc, uint32 argv[]);
-
 enum {
     T_THREAD,
     T_MUTEX,
@@ -494,7 +490,6 @@ pthread_start_routine(void *arg)
 {
     wasm_exec_env_t exec_env = (wasm_exec_env_t)arg;
     wasm_exec_env_t parent_exec_env;
-    wasm_module_inst_t module_inst = get_module_inst(exec_env);
     ThreadRoutineArgs *routine_args = exec_env->thread_arg;
     ThreadInfoNode *info_node = routine_args->info_node;
     uint32 argv[1];
@@ -508,7 +503,6 @@ pthread_start_routine(void *arg)
     info_node->exec_env = exec_env;
     info_node->u.thread = exec_env->handle;
     if (!append_thread_info_node(info_node)) {
-        wasm_runtime_deinstantiate_internal(module_inst, true);
         delete_thread_info_node(info_node);
         os_cond_signal(&parent_exec_env->wait_cond);
         os_mutex_unlock(&parent_exec_env->wait_lock);
@@ -536,9 +530,6 @@ pthread_start_routine(void *arg)
     /* destroy pthread key values */
     call_key_destructor(exec_env);
 
-    /* routine exit, destroy instance */
-    wasm_runtime_deinstantiate_internal(module_inst, true);
-
     wasm_runtime_free(routine_args);
 
     /* if the thread is joinable, store the result in its info node,
@@ -580,6 +571,7 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
 #if WASM_ENABLE_LIBC_WASI != 0
     WASIContext *wasi_ctx;
 #endif
+    CApiFuncImport **new_c_api_func_imports = NULL;
 
     bh_assert(module);
     bh_assert(module_inst);
@@ -612,6 +604,46 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
         wasm_runtime_set_wasi_ctx(new_module_inst, wasi_ctx);
 #endif
 
+    /* workaround about passing instantiate-linking information */
+    {
+        CApiFuncImport *c_api_func_imports;
+        uint32 import_func_count = 0;
+        uint32 size_in_bytes = 0;
+
+#if WASM_ENABLE_INTERP != 0
+        if (module_inst->module_type == Wasm_Module_Bytecode) {
+            new_c_api_func_imports = &(
+                ((WASMModuleInstance *)new_module_inst)->e->c_api_func_imports);
+            c_api_func_imports =
+                ((WASMModuleInstance *)module_inst)->e->c_api_func_imports;
+            import_func_count = ((WASMModule *)module)->import_function_count;
+        }
+#endif
+#if WASM_ENABLE_AOT != 0
+        if (module_inst->module_type == Wasm_Module_AoT) {
+            AOTModuleInstanceExtra *e =
+                (AOTModuleInstanceExtra *)((AOTModuleInstance *)new_module_inst)
+                    ->e;
+            new_c_api_func_imports = &(e->c_api_func_imports);
+
+            e = (AOTModuleInstanceExtra *)((AOTModuleInstance *)module_inst)->e;
+            c_api_func_imports = e->c_api_func_imports;
+
+            import_func_count = ((AOTModule *)module)->import_func_count;
+        }
+#endif
+
+        if (import_func_count != 0 && c_api_func_imports) {
+            size_in_bytes = sizeof(CApiFuncImport *) * import_func_count;
+            *new_c_api_func_imports = wasm_runtime_malloc(size_in_bytes);
+            if (!(*new_c_api_func_imports))
+                goto fail;
+
+            bh_memcpy_s(*new_c_api_func_imports, size_in_bytes,
+                        c_api_func_imports, size_in_bytes);
+        }
+    }
+
     if (!(info_node = wasm_runtime_malloc(sizeof(ThreadInfoNode))))
         goto fail;
 
@@ -632,8 +664,9 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
     routine_args->module_inst = new_module_inst;
 
     os_mutex_lock(&exec_env->wait_lock);
-    ret = wasm_cluster_create_thread(
-        exec_env, new_module_inst, pthread_start_routine, (void *)routine_args);
+    ret =
+        wasm_cluster_create_thread(exec_env, new_module_inst, true,
+                                   pthread_start_routine, (void *)routine_args);
     if (ret != 0) {
         os_mutex_unlock(&exec_env->wait_lock);
         goto fail;

+ 27 - 0
core/iwasm/libraries/lib-socket/test/build.sh

@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+set -ueo pipefail
+CC="${CC:=/opt/wasi-sdk/bin/clang}"
+files=("tcp_udp.c" "nslookup.c")
+WASI_SYSROOT=${WASI_SYSROOT:=~/dev/wasi-libc/sysroot}
+
+for file in "${files[@]}"
+do
+    echo $file
+    $CC \
+        --target=wasm32-wasi-threads \
+        -I../inc \
+        --sysroot $WASI_SYSROOT \
+        ../src/wasi/wasi_socket_ext.c -pthread -ftls-model=local-exec \
+        -Wl,--allow-undefined \
+        -Wl,--strip-all,--no-entry \
+        -Wl,--export=__heap_base \
+        -Wl,--export=__data_end \
+        -Wl,--shared-memory,--max-memory=10485760 \
+        -Wl,--export=malloc \
+        -Wl,--export=free \
+        -o "${file%.*}.wasm" "$file"
+done

+ 49 - 0
core/iwasm/libraries/lib-socket/test/nslookup.c

@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include <assert.h>
+#include <string.h>
+#ifdef __wasi__
+#include <wasi/api.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <wasi_socket_ext.h>
+#else
+#include <netdb.h>
+#endif
+
+void
+test_nslookup(int af)
+{
+    struct addrinfo *res;
+    int count = 0;
+    struct addrinfo hints;
+    char *url = "google-public-dns-a.google.com";
+
+    memset(&hints, 0, sizeof(hints));
+    hints.ai_family = af;
+    hints.ai_socktype = SOCK_STREAM;
+    int ret = getaddrinfo(url, 0, &hints, &res);
+    assert(ret == 0);
+    struct addrinfo *address = res;
+    while (address) {
+        assert(address->ai_family == af);
+        assert(address->ai_socktype == SOCK_STREAM);
+        count++;
+        address = address->ai_next;
+    }
+
+    assert(count > 0);
+    freeaddrinfo(res);
+}
+
+int
+main()
+{
+    test_nslookup(AF_INET);  /* for ipv4 */
+    test_nslookup(AF_INET6); /* for ipv6 */
+
+    return 0;
+}

+ 193 - 0
core/iwasm/libraries/lib-socket/test/tcp_udp.c

@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#ifdef __wasi__
+#include <wasi/api.h>
+#include <sys/socket.h>
+#include <wasi_socket_ext.h>
+#endif
+#include <arpa/inet.h>
+#include <pthread.h>
+#define SERVER_MSG "Message from server."
+#define PORT 8989
+pthread_mutex_t mut;
+pthread_cond_t cond;
+int server_init_complete = 0;
+char buffer[sizeof(SERVER_MSG) + 1];
+
+struct socket_info {
+    union {
+        struct sockaddr_in addr_ipv4;
+        struct sockaddr_in6 addr_ipv6;
+    } addr;
+    int sock;
+};
+
+struct thread_args {
+    int family;
+    int protocol;
+};
+
+struct socket_info
+init_socket_addr(int family, int protocol)
+{
+    int sock = socket(family, protocol, 0);
+    assert(sock != -1);
+
+    struct socket_info info;
+    if (family == AF_INET) {
+        struct sockaddr_in addr;
+        memset(&addr, 0, sizeof(addr));
+        addr.sin_family = AF_INET;
+        addr.sin_port = htons(PORT);
+        addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+        info.addr.addr_ipv4 = addr;
+    }
+    else if (family == AF_INET6) {
+        struct sockaddr_in6 addr;
+        memset(&addr, 0, sizeof(addr));
+        addr.sin6_family = AF_INET6;
+        addr.sin6_port = htons(PORT);
+        addr.sin6_addr = in6addr_loopback;
+        info.addr.addr_ipv6 = addr;
+    }
+    info.sock = sock;
+    return info;
+}
+
+void
+assert_thread_args(struct thread_args *args)
+{
+    assert(args->family == AF_INET || args->family == AF_INET6);
+    assert(args->protocol == SOCK_STREAM || args->protocol == SOCK_DGRAM);
+}
+
+void *
+server(void *arg)
+{
+    server_init_complete = 0;
+    struct thread_args *args = (struct thread_args *)arg;
+    assert_thread_args(args);
+
+    struct socket_info init_server_sock =
+        init_socket_addr(args->family, args->protocol);
+
+    int server_sock = init_server_sock.sock;
+    socklen_t addr_size;
+    struct sockaddr_storage client_addr;
+    strcpy(buffer, SERVER_MSG);
+
+    struct sockaddr *server_addr = (struct sockaddr *)&init_server_sock.addr;
+    int ret = bind(server_sock, server_addr,
+                   args->family == AF_INET ? sizeof(struct sockaddr_in)
+                                           : sizeof(struct sockaddr_in6));
+    assert(ret == 0);
+
+    (args->protocol == SOCK_STREAM) && listen(server_sock, 1);
+    pthread_mutex_lock(&mut);
+    server_init_complete = 1;
+    pthread_mutex_unlock(&mut);
+    pthread_cond_signal(&cond);
+
+    addr_size = sizeof(client_addr);
+    if (args->protocol == SOCK_STREAM) {
+        int client_sock =
+            accept(server_sock, (struct sockaddr *)&client_addr, &addr_size);
+        assert(client_sock >= 0);
+        sendto(client_sock, buffer, strlen(buffer), 0,
+               (struct sockaddr *)&client_addr, addr_size);
+
+        assert(close(client_sock) == 0);
+    }
+    else {
+        recvfrom(server_sock, buffer, sizeof(buffer), 0,
+                 (struct sockaddr *)&client_addr, &addr_size);
+        sendto(server_sock, buffer, strlen(buffer), 0,
+               (struct sockaddr *)&client_addr, addr_size);
+
+        assert(close(server_sock) == 0);
+    }
+
+    return NULL;
+}
+
+void *
+client(void *arg)
+{
+    struct thread_args *args = (struct thread_args *)arg;
+    assert_thread_args(args);
+
+    pthread_mutex_lock(&mut);
+
+    while (server_init_complete == 0) {
+        pthread_cond_wait(&cond, &mut);
+    }
+
+    struct socket_info init_client_sock =
+        init_socket_addr(args->family, args->protocol);
+    int sock = init_client_sock.sock;
+    pthread_mutex_unlock(&mut);
+
+    if (args->family == AF_INET) {
+        struct sockaddr_in addr = init_client_sock.addr.addr_ipv4;
+        if (args->protocol == SOCK_STREAM) {
+            assert(connect(sock, (struct sockaddr *)&addr, sizeof(addr)) != -1);
+        }
+        else {
+            assert(sendto(sock, buffer, strlen(buffer), 0,
+                          (struct sockaddr *)&addr, sizeof(addr))
+                   != -1);
+        }
+    }
+    else {
+        struct sockaddr_in6 addr = init_client_sock.addr.addr_ipv6;
+        if (args->protocol == SOCK_STREAM) {
+            assert(connect(sock, (struct sockaddr *)&addr, sizeof(addr)) != -1);
+        }
+        else {
+            assert(sendto(sock, buffer, strlen(buffer), 0,
+                          (struct sockaddr *)&addr, sizeof(addr))
+                   != -1);
+        }
+    }
+
+    recv(sock, buffer, sizeof(buffer), 0);
+    assert(strcmp(buffer, SERVER_MSG) == 0);
+    assert(close(sock) == 0);
+    return NULL;
+}
+
+void
+test_protocol(int family, int protocol)
+{
+    pthread_t server_thread, client_thread;
+    assert(pthread_cond_init(&cond, NULL) == 0);
+    assert(pthread_mutex_init(&mut, NULL) == 0);
+
+    struct thread_args args = { family, protocol };
+    assert(pthread_create(&server_thread, NULL, server, (void *)&args) == 0);
+    assert(pthread_create(&client_thread, NULL, client, (void *)&args) == 0);
+    assert(pthread_join(server_thread, NULL) == 0);
+    assert(pthread_join(client_thread, NULL) == 0);
+
+    assert(pthread_mutex_destroy(&mut) == 0);
+    assert(pthread_cond_destroy(&cond) == 0);
+}
+
+int
+main(int argc, char **argv)
+{
+    /* test tcp with ipv4 and ipv6 */
+    test_protocol(AF_INET, SOCK_STREAM);
+    test_protocol(AF_INET6, SOCK_STREAM);
+
+    /* test udp with ipv4 and ipv6 */
+    test_protocol(AF_INET, SOCK_DGRAM);
+    test_protocol(AF_INET6, SOCK_DGRAM);
+
+    return 0;
+}

+ 12 - 0
core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads.cmake

@@ -0,0 +1,12 @@
+# Copyright (C) 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+set (LIB_WASI_THREADS_DIR ${CMAKE_CURRENT_LIST_DIR})
+
+add_definitions (-DWASM_ENABLE_LIB_WASI_THREADS=1 -DWASM_ENABLE_HEAP_AUX_STACK_ALLOCATION=1)
+
+include_directories(${LIB_WASI_THREADS_DIR})
+
+set (LIB_WASI_THREADS_SOURCE
+    ${LIB_WASI_THREADS_DIR}/lib_wasi_threads_wrapper.c
+    ${LIB_WASI_THREADS_DIR}/tid_allocator.c)

+ 181 - 0
core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c

@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "bh_log.h"
+#include "thread_manager.h"
+#include "tid_allocator.h"
+
+#if WASM_ENABLE_INTERP != 0
+#include "wasm_runtime.h"
+#endif
+
+#if WASM_ENABLE_AOT != 0
+#include "aot_runtime.h"
+#endif
+
+static const char *THREAD_START_FUNCTION = "wasi_thread_start";
+static korp_mutex thread_id_lock;
+static TidAllocator tid_allocator;
+
+typedef struct {
+    /* app's entry function */
+    wasm_function_inst_t start_func;
+    /* arg of the app's entry function */
+    uint32 arg;
+    /* thread id passed to the app */
+    int32 thread_id;
+} ThreadStartArg;
+
+static int32
+allocate_thread_id()
+{
+    os_mutex_lock(&thread_id_lock);
+    int32 id = tid_allocator_get_tid(&tid_allocator);
+    os_mutex_unlock(&thread_id_lock);
+
+    return id;
+}
+
+void
+deallocate_thread_id(int32 thread_id)
+{
+    os_mutex_lock(&thread_id_lock);
+    tid_allocator_release_tid(&tid_allocator, thread_id);
+    os_mutex_unlock(&thread_id_lock);
+}
+
+static void *
+thread_start(void *arg)
+{
+    wasm_exec_env_t exec_env = (wasm_exec_env_t)arg;
+    ThreadStartArg *thread_arg = exec_env->thread_arg;
+    uint32 argv[2];
+
+    wasm_exec_env_set_thread_info(exec_env);
+    argv[0] = thread_arg->thread_id;
+    argv[1] = thread_arg->arg;
+
+    if (!wasm_runtime_call_wasm(exec_env, thread_arg->start_func, 2, argv)) {
+        /* Exception has already been spread during throwing */
+    }
+
+    // Routine exit
+    deallocate_thread_id(thread_arg->thread_id);
+    wasm_runtime_free(thread_arg);
+    exec_env->thread_arg = NULL;
+
+    return NULL;
+}
+
+static int32
+thread_spawn_wrapper(wasm_exec_env_t exec_env, uint32 start_arg)
+{
+    wasm_module_t module = wasm_exec_env_get_module(exec_env);
+    wasm_module_inst_t module_inst = get_module_inst(exec_env);
+    wasm_module_inst_t new_module_inst = NULL;
+    ThreadStartArg *thread_start_arg = NULL;
+    wasm_function_inst_t start_func;
+    int32 thread_id;
+    uint32 stack_size = 8192;
+    int32 ret = -1;
+#if WASM_ENABLE_LIBC_WASI != 0
+    WASIContext *wasi_ctx;
+#endif
+
+    bh_assert(module);
+    bh_assert(module_inst);
+
+    stack_size = ((WASMModuleInstance *)module_inst)->default_wasm_stack_size;
+
+    if (!(new_module_inst = wasm_runtime_instantiate_internal(
+              module, true, stack_size, 0, NULL, 0)))
+        return -1;
+
+    wasm_runtime_set_custom_data_internal(
+        new_module_inst, wasm_runtime_get_custom_data(module_inst));
+
+#if WASM_ENABLE_LIBC_WASI != 0
+    wasi_ctx = wasm_runtime_get_wasi_ctx(module_inst);
+    if (wasi_ctx)
+        wasm_runtime_set_wasi_ctx(new_module_inst, wasi_ctx);
+#endif
+
+    start_func = wasm_runtime_lookup_function(new_module_inst,
+                                              THREAD_START_FUNCTION, NULL);
+    if (!start_func) {
+        LOG_ERROR("Failed to find thread start function %s",
+                  THREAD_START_FUNCTION);
+        goto thread_preparation_fail;
+    }
+
+    if (!(thread_start_arg = wasm_runtime_malloc(sizeof(ThreadStartArg)))) {
+        LOG_ERROR("Runtime args allocation failed");
+        goto thread_preparation_fail;
+    }
+
+    thread_start_arg->thread_id = thread_id = allocate_thread_id();
+    if (thread_id < 0) {
+        LOG_ERROR("Failed to get thread identifier");
+        goto thread_preparation_fail;
+    }
+    thread_start_arg->arg = start_arg;
+    thread_start_arg->start_func = start_func;
+
+    ret = wasm_cluster_create_thread(exec_env, new_module_inst, false,
+                                     thread_start, thread_start_arg);
+    if (ret != 0) {
+        LOG_ERROR("Failed to spawn a new thread");
+        goto thread_spawn_fail;
+    }
+
+    return thread_id;
+
+thread_spawn_fail:
+    deallocate_thread_id(thread_id);
+
+thread_preparation_fail:
+    if (new_module_inst)
+        wasm_runtime_deinstantiate_internal(new_module_inst, true);
+    if (thread_start_arg)
+        wasm_runtime_free(thread_start_arg);
+
+    return -1;
+}
+
+/* clang-format off */
+#define REG_NATIVE_FUNC(name, func_name, signature) \
+    { name, func_name##_wrapper, signature, NULL }
+/* clang-format on */
+
+static NativeSymbol native_symbols_lib_wasi_threads[] = { REG_NATIVE_FUNC(
+    "thread-spawn", thread_spawn, "(i)i") };
+
+uint32
+get_lib_wasi_threads_export_apis(NativeSymbol **p_lib_wasi_threads_apis)
+{
+    *p_lib_wasi_threads_apis = native_symbols_lib_wasi_threads;
+    return sizeof(native_symbols_lib_wasi_threads) / sizeof(NativeSymbol);
+}
+
+bool
+lib_wasi_threads_init(void)
+{
+    if (0 != os_mutex_init(&thread_id_lock))
+        return false;
+
+    if (!tid_allocator_init(&tid_allocator)) {
+        os_mutex_destroy(&thread_id_lock);
+        return false;
+    }
+
+    return true;
+}
+
+void
+lib_wasi_threads_destroy(void)
+{
+    tid_allocator_deinit(&tid_allocator);
+    os_mutex_destroy(&thread_id_lock);
+}

+ 30 - 0
core/iwasm/libraries/lib-wasi-threads/test/build.sh

@@ -0,0 +1,30 @@
+#!/bin/bash
+
+#
+# Copyright (C) 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+
+CC=${CC:=/opt/wasi-sdk/bin/clang}
+WASI_SYSROOT=${WASI_SYSROOT:=~/dev/wasi-libc/sysroot}
+WAMR_DIR=../../../../..
+
+for test_c in *.c; do
+    test_wasm="$(basename $test_c .c).wasm"
+
+    echo "Compiling $test_c to $test_wasm"
+    $CC \
+        --sysroot $WASI_SYSROOT \
+        -target wasm32-wasi-threads \
+        -pthread -ftls-model=local-exec \
+        -z stack-size=32768 \
+        -Wl,--export=__heap_base \
+        -Wl,--export=__data_end \
+        -Wl,--shared-memory,--max-memory=1966080 \
+        -Wl,--export=wasi_thread_start \
+        -Wl,--export=malloc \
+        -Wl,--export=free \
+        -I $WAMR_DIR/samples/wasi-threads/wasm-apps \
+        $WAMR_DIR/samples/wasi-threads/wasm-apps/wasi_thread_start.S \
+        $test_c -o $test_wasm
+done

+ 122 - 0
core/iwasm/libraries/lib-wasi-threads/test/common.h

@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2022 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "wasi_thread_start.h"
+
+typedef enum {
+    BLOCKING_TASK_BUSY_WAIT,
+    BLOCKING_TASK_ATOMIC_WAIT,
+    BLOCKING_TASK_POLL_ONEOFF
+} blocking_task_type_t;
+
+/* Parameter to change test behavior */
+static bool termination_by_trap;
+static bool termination_in_main_thread;
+static blocking_task_type_t blocking_task_type;
+
+#define TIMEOUT_SECONDS 10ll
+#define NUM_THREADS 3
+static pthread_barrier_t barrier;
+
+typedef struct {
+    start_args_t base;
+    bool throw_exception;
+} shared_t;
+
+void
+run_long_task()
+{
+    if (blocking_task_type == BLOCKING_TASK_BUSY_WAIT) {
+        for (int i = 0; i < TIMEOUT_SECONDS; i++)
+            sleep(1);
+    }
+    else if (blocking_task_type == BLOCKING_TASK_ATOMIC_WAIT) {
+        __builtin_wasm_memory_atomic_wait32(
+            0, 0, TIMEOUT_SECONDS * 1000 * 1000 * 1000);
+    }
+    else {
+        sleep(TIMEOUT_SECONDS);
+    }
+}
+
+void
+start_job()
+{
+    /* Wait for all threads (including the main thread) to be ready */
+    pthread_barrier_wait(&barrier);
+    run_long_task(); /* Task to be interrupted */
+    assert(false && "Thread termination test failed");
+}
+
+void
+terminate_process()
+{
+    /* Wait for all threads (including the main thread) to be ready */
+    pthread_barrier_wait(&barrier);
+
+    if (termination_by_trap)
+        __builtin_trap();
+    else
+        __wasi_proc_exit(33);
+}
+
+void
+__wasi_thread_start_C(int thread_id, int *start_arg)
+{
+    shared_t *data = (shared_t *)start_arg;
+
+    if (data->throw_exception) {
+        terminate_process();
+    }
+    else {
+        start_job();
+    }
+}
+
+void
+test_termination(bool trap, bool main, blocking_task_type_t task_type)
+{
+    termination_by_trap = trap;
+    termination_in_main_thread = main;
+    blocking_task_type = task_type;
+
+    int thread_id = -1, i;
+    shared_t data[NUM_THREADS] = { 0 };
+    assert(pthread_barrier_init(&barrier, NULL, NUM_THREADS + 1) == 0
+           && "Failed to init barrier");
+
+    for (i = 0; i < NUM_THREADS; i++) {
+        /* No graceful memory free to simplify the test */
+        assert(start_args_init(&data[i].base)
+               && "Failed to allocate thread's stack");
+    }
+
+    /* Create a thread that forces termination through trap or `proc_exit` */
+    data[0].throw_exception = !termination_in_main_thread;
+    thread_id = __wasi_thread_spawn(&data[0]);
+    assert(thread_id > 0 && "Failed to create thread");
+
+    /* Create two additional threads to test exception propagation */
+    data[1].throw_exception = false;
+    thread_id = __wasi_thread_spawn(&data[1]);
+    assert(thread_id > 0 && "Failed to create thread");
+    data[2].throw_exception = false;
+    thread_id = __wasi_thread_spawn(&data[2]);
+    assert(thread_id > 0 && "Failed to create thread");
+
+    if (termination_in_main_thread) {
+        terminate_process();
+    }
+    else {
+        start_job();
+    }
+}

+ 128 - 0
core/iwasm/libraries/lib-wasi-threads/test/create_threads_until_limit.c

@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include "wasi_thread_start.h"
+
+enum CONSTANTS {
+    MAX_NUM_THREADS = 4, /* Should be the same as "--max-threads" */
+    NUM_RETRY = 5,
+    SECOND = 1000 * 1000 * 1000, /* 1 second */
+    TIMEOUT = 10LL * SECOND
+};
+
+int g_count = 0;
+
+typedef struct {
+    start_args_t base;
+    int th_ready;
+    int th_continue;
+    int th_done;
+    bool no_ops;
+} shared_t;
+
+void
+__wasi_thread_start_C(int thread_id, int *start_arg)
+{
+    shared_t *data = (shared_t *)start_arg;
+
+    if (data->no_ops) {
+        __builtin_wasm_memory_atomic_wait32(NULL, 0, 2 * SECOND);
+        return;
+    }
+
+    __atomic_store_n(&data->th_ready, 1, __ATOMIC_SEQ_CST);
+    __builtin_wasm_memory_atomic_notify(&data->th_ready, 1);
+
+    if (__builtin_wasm_memory_atomic_wait32(&data->th_continue, 0, TIMEOUT)
+        == 2) {
+        assert(false && "Wait should not time out");
+    }
+
+    __atomic_fetch_add(&g_count, 1, __ATOMIC_SEQ_CST);
+
+    __atomic_store_n(&data->th_done, 1, __ATOMIC_SEQ_CST);
+    __builtin_wasm_memory_atomic_notify(&data->th_done, 1);
+}
+
+int
+main(int argc, char **argv)
+{
+    shared_t data[MAX_NUM_THREADS] = { 0 };
+    int thread_ids[MAX_NUM_THREADS];
+
+    for (int i = 0; i < MAX_NUM_THREADS; i++) {
+        assert(start_args_init(&data[i].base));
+        thread_ids[i] = __wasi_thread_spawn(&data[i]);
+        printf("Thread created with id=%d\n", thread_ids[i]);
+        assert(thread_ids[i] > 0 && "Thread creation failed");
+
+        for (int j = 0; j < i; j++) {
+            assert(thread_ids[i] != thread_ids[j] && "Duplicated TIDs");
+        }
+
+        if (__builtin_wasm_memory_atomic_wait32(&data[i].th_ready, 0, TIMEOUT)
+            == 2) {
+            assert(false && "Wait should not time out");
+        }
+    }
+
+    printf("Attempt to create thread when not possible\n");
+    shared_t data_fail = { 0 };
+    assert(start_args_init(&data_fail.base));
+    int thread_id = __wasi_thread_spawn(&data_fail);
+    start_args_deinit(&data_fail.base);
+    assert(thread_id < 0 && "Thread creation should fail");
+
+    printf("Unlock created threads\n");
+    for (int i = 0; i < MAX_NUM_THREADS; i++) {
+        __atomic_store_n(&data[i].th_continue, 1, __ATOMIC_SEQ_CST);
+        __builtin_wasm_memory_atomic_notify(&data[i].th_continue, 1);
+    }
+
+    printf("Wait for threads to finish\n");
+    for (int i = 0; i < MAX_NUM_THREADS; i++) {
+        if (__builtin_wasm_memory_atomic_wait32(&data[i].th_done, 0, TIMEOUT)
+            == 2) {
+            assert(false && "Wait should not time out");
+        }
+
+        start_args_deinit(&data[i].base);
+    }
+
+    printf("Value of count after update: %d\n", g_count);
+    assert(g_count == (MAX_NUM_THREADS)
+           && "Global count not updated correctly");
+
+    /* --------------------------------------------------- */
+
+    printf("Create new threads without waiting from them to finish\n");
+    shared_t data_no_join[MAX_NUM_THREADS] = { 0 };
+    for (int i = 0; i < MAX_NUM_THREADS; i++) {
+        /* No graceful memory free to simplify the test */
+        assert(start_args_init(&data_no_join[i].base));
+        data_no_join[i].no_ops = true;
+
+        int thread_id = -1;
+        for (int j = 0; j < NUM_RETRY && thread_id < 0; j++) {
+            thread_id = __wasi_thread_spawn(&data_no_join[i]);
+            if (thread_id < 0)
+                __builtin_wasm_memory_atomic_wait32(NULL, 0, SECOND);
+        }
+
+        printf("Thread created with id=%d\n", thread_id);
+        assert(thread_id > 0 && "Thread creation should succeed");
+    }
+
+    return EXIT_SUCCESS;
+}

+ 70 - 0
core/iwasm/libraries/lib-wasi-threads/test/global_atomic.c

@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include "wasi_thread_start.h"
+
+enum CONSTANTS {
+    NUM_THREADS = 4,
+    NUM_ITER = 1000,
+    SECOND = 1000 * 1000 * 1000, /* 1 second */
+    TIMEOUT = 10LL * SECOND
+};
+
+int g_count = 0;
+
+typedef struct {
+    start_args_t base;
+    int th_done;
+} shared_t;
+
+void
+__wasi_thread_start_C(int thread_id, int *start_arg)
+{
+    shared_t *data = (shared_t *)start_arg;
+
+    for (int i = 0; i < NUM_ITER; i++)
+        __atomic_fetch_add(&g_count, 1, __ATOMIC_SEQ_CST);
+
+    __atomic_store_n(&data->th_done, 1, __ATOMIC_SEQ_CST);
+    __builtin_wasm_memory_atomic_notify(&data->th_done, 1);
+}
+
+int
+main(int argc, char **argv)
+{
+    shared_t data[NUM_THREADS] = { 0 };
+    int thread_ids[NUM_THREADS];
+
+    for (int i = 0; i < NUM_THREADS; i++) {
+        assert(start_args_init(&data[i].base));
+        thread_ids[i] = __wasi_thread_spawn(&data[i]);
+        assert(thread_ids[i] > 0 && "Thread creation failed");
+    }
+
+    printf("Wait for threads to finish\n");
+    for (int i = 0; i < NUM_THREADS; i++) {
+        if (__builtin_wasm_memory_atomic_wait32(&data[i].th_done, 0, TIMEOUT)
+            == 2) {
+            assert(false && "Wait should not time out");
+        }
+
+        start_args_deinit(&data[i].base);
+    }
+
+    printf("Value of count after update: %d\n", g_count);
+    assert(g_count == (NUM_THREADS * NUM_ITER)
+           && "Global count not updated correctly");
+
+    return EXIT_SUCCESS;
+}

+ 78 - 0
core/iwasm/libraries/lib-wasi-threads/test/global_lock.c

@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <pthread.h>
+
+#include "wasi_thread_start.h"
+
+enum CONSTANTS {
+    NUM_THREADS = 4,
+    NUM_ITER = 200,
+    SECOND = 1000 * 1000 * 1000, /* 1 second */
+    TIMEOUT = 10LL * SECOND
+};
+
+pthread_mutex_t mutex;
+int g_count = 0;
+
+typedef struct {
+    start_args_t base;
+    int th_done;
+} shared_t;
+
+void
+__wasi_thread_start_C(int thread_id, int *start_arg)
+{
+    shared_t *data = (shared_t *)start_arg;
+
+    for (int i = 0; i < NUM_ITER; i++) {
+        pthread_mutex_lock(&mutex);
+        g_count++;
+        pthread_mutex_unlock(&mutex);
+    }
+
+    __atomic_store_n(&data->th_done, 1, __ATOMIC_SEQ_CST);
+    __builtin_wasm_memory_atomic_notify(&data->th_done, 1);
+}
+
+int
+main(int argc, char **argv)
+{
+    shared_t data[NUM_THREADS] = { 0 };
+    int thread_ids[NUM_THREADS];
+
+    assert(pthread_mutex_init(&mutex, NULL) == 0 && "Failed to init mutex");
+
+    for (int i = 0; i < NUM_THREADS; i++) {
+        assert(start_args_init(&data[i].base));
+        thread_ids[i] = __wasi_thread_spawn(&data[i]);
+        assert(thread_ids[i] > 0 && "Thread creation failed");
+    }
+
+    printf("Wait for threads to finish\n");
+    for (int i = 0; i < NUM_THREADS; i++) {
+        if (__builtin_wasm_memory_atomic_wait32(&data[i].th_done, 0, TIMEOUT)
+            == 2) {
+            assert(false && "Wait should not time out");
+        }
+
+        start_args_deinit(&data[i].base);
+    }
+
+    printf("Value of count after update: %d\n", g_count);
+    assert(g_count == (NUM_THREADS * NUM_ITER)
+           && "Global count not updated correctly");
+
+    assert(pthread_mutex_destroy(&mutex) == 0 && "Failed to destroy mutex");
+    return EXIT_SUCCESS;
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_busy.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(false, true, BLOCKING_TASK_BUSY_WAIT);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_busy.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 33
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_sleep.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(false, true, BLOCKING_TASK_POLL_ONEOFF);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_sleep.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 33
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_wait.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(false, true, BLOCKING_TASK_ATOMIC_WAIT);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_proc_exit_wait.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 33
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_trap_busy.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(true, true, BLOCKING_TASK_BUSY_WAIT);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_trap_busy.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 1
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_trap_sleep.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(true, true, BLOCKING_TASK_POLL_ONEOFF);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_trap_sleep.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 1
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_trap_wait.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(true, true, BLOCKING_TASK_ATOMIC_WAIT);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/main_trap_wait.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 1
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_busy.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(false, false, BLOCKING_TASK_BUSY_WAIT);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_busy.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 33
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_sleep.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(false, false, BLOCKING_TASK_POLL_ONEOFF);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_sleep.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 33
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_wait.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(false, false, BLOCKING_TASK_ATOMIC_WAIT);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_proc_exit_wait.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 33
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_busy.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(true, false, BLOCKING_TASK_BUSY_WAIT);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_busy.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 1
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_sleep.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(true, false, BLOCKING_TASK_POLL_ONEOFF);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_sleep.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 1
+}

+ 16 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_wait.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include "common.h"
+
+int
+main(int argc, char **argv)
+{
+    test_termination(true, false, BLOCKING_TASK_ATOMIC_WAIT);
+}

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/nonmain_trap_wait.json

@@ -0,0 +1,3 @@
+{
+    "exit_code": 1
+}

+ 72 - 0
core/iwasm/libraries/lib-wasi-threads/test/spawn_multiple_times.c

@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include "wasi_thread_start.h"
+
+enum CONSTANTS {
+    NUM_ITER = 50,
+    NUM_RETRY = 5,
+    SECOND = 1000 * 1000 * 1000, /* 1 second */
+    TIMEOUT = 5LL * SECOND
+};
+
+typedef struct {
+    start_args_t base;
+    int th_done;
+} shared_t;
+
+int g_count = 0;
+
+void
+__wasi_thread_start_C(int thread_id, int *start_arg)
+{
+    shared_t *data = (shared_t *)start_arg;
+
+    g_count++;
+
+    __atomic_store_n(&data->th_done, 1, __ATOMIC_SEQ_CST);
+    __builtin_wasm_memory_atomic_notify(&data->th_done, 1);
+}
+
+int
+main(int argc, char **argv)
+{
+    shared_t data = { 0 };
+    assert(start_args_init(&data.base) && "Stack allocation for thread failed");
+
+    for (int i = 0; i < NUM_ITER; i++) {
+        data.th_done = 0;
+
+        printf("Creating thread\n");
+        int thread_id = -1;
+        for (int j = 0; j < NUM_RETRY && thread_id < 0; j++) {
+            thread_id = __wasi_thread_spawn(&data);
+            if (thread_id < 0)
+                __builtin_wasm_memory_atomic_wait32(NULL, 0, SECOND);
+        }
+        assert(thread_id > 0 && "Thread creation should succeed");
+
+        printf("Waiting for thread to finish\n");
+        if (__builtin_wasm_memory_atomic_wait32(&data.th_done, 0, TIMEOUT)
+            == 2) {
+            assert(false && "Wait should not time out");
+        }
+        printf("Thread has finished\n");
+    }
+
+    assert(g_count == NUM_ITER && "Count has not been updated correctly");
+
+    start_args_deinit(&data.base);
+    return EXIT_SUCCESS;
+}

+ 86 - 0
core/iwasm/libraries/lib-wasi-threads/test/update_shared_data_and_alloc_heap.c

@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include "wasi_thread_start.h"
+
+enum CONSTANTS {
+    NUM_THREADS = 4,
+    NUM_ITER = 30,
+    SECOND = 1000 * 1000 * 1000, /* 1 second */
+    TIMEOUT = 10LL * SECOND
+};
+
+typedef struct {
+    start_args_t base;
+    int th_done;
+    int *count;
+    int iteration;
+    int *pval;
+} shared_t;
+
+int *vals[NUM_THREADS];
+
+void
+__wasi_thread_start_C(int thread_id, int *start_arg)
+{
+    shared_t *data = (shared_t *)start_arg;
+
+    for (int i = 0; i < NUM_ITER; i++)
+        __atomic_fetch_add(data->count, 1, __ATOMIC_SEQ_CST);
+
+    vals[data->iteration] = malloc(sizeof(int));
+    *vals[data->iteration] = data->iteration;
+
+    __atomic_store_n(&data->th_done, 1, __ATOMIC_SEQ_CST);
+    __builtin_wasm_memory_atomic_notify(&data->th_done, 1);
+}
+
+int
+main(int argc, char **argv)
+{
+    shared_t data[NUM_THREADS] = { 0 };
+    int thread_ids[NUM_THREADS];
+    int *count = calloc(1, sizeof(int));
+
+    for (int i = 0; i < NUM_THREADS; i++) {
+        assert(start_args_init(&data[i].base)
+               && "Stack allocation for thread failed");
+        __atomic_store_n(&data[i].count, count, __ATOMIC_SEQ_CST);
+        data[i].iteration = i;
+
+        thread_ids[i] = __wasi_thread_spawn(&data[i]);
+        assert(thread_ids[i] > 0 && "Thread creation failed");
+    }
+
+    printf("Wait for threads to finish\n");
+    for (int i = 0; i < NUM_THREADS; i++) {
+        if (__builtin_wasm_memory_atomic_wait32(&data[i].th_done, 0, TIMEOUT)
+            == 2) {
+            assert(false && "Wait should not time out");
+        }
+
+        start_args_deinit(&data[i].base);
+    }
+
+    assert(*count == (NUM_THREADS * NUM_ITER) && "Count not updated correctly");
+
+    for (int i = 0; i < NUM_THREADS; i++) {
+        printf("val=%d\n", *vals[i]);
+        assert(*vals[i] == i && "Value not updated correctly");
+        free(vals[i]);
+    }
+
+    free(count);
+    return EXIT_SUCCESS;
+}

+ 80 - 0
core/iwasm/libraries/lib-wasi-threads/tid_allocator.c

@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "tid_allocator.h"
+#include "wasm_export.h"
+#include "bh_log.h"
+
+bh_static_assert(TID_MIN <= TID_MAX);
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+bool
+tid_allocator_init(TidAllocator *tid_allocator)
+{
+    tid_allocator->size = MIN(TID_ALLOCATOR_INIT_SIZE, TID_MAX - TID_MIN + 1);
+    tid_allocator->pos = tid_allocator->size;
+    tid_allocator->ids =
+        wasm_runtime_malloc(tid_allocator->size * sizeof(int32));
+    if (tid_allocator->ids == NULL)
+        return false;
+
+    for (int64 i = tid_allocator->pos - 1; i >= 0; i--)
+        tid_allocator->ids[i] = TID_MIN + (tid_allocator->pos - 1 - i);
+
+    return true;
+}
+
+void
+tid_allocator_deinit(TidAllocator *tid_allocator)
+{
+    wasm_runtime_free(tid_allocator->ids);
+}
+
+int32
+tid_allocator_get_tid(TidAllocator *tid_allocator)
+{
+    if (tid_allocator->pos == 0) { // Resize stack and push new thread ids
+        if (tid_allocator->size == TID_MAX - TID_MIN + 1) {
+            LOG_ERROR("Maximum thread identifier reached");
+            return -1;
+        }
+
+        uint32 old_size = tid_allocator->size;
+        uint32 new_size = MIN(tid_allocator->size * 2, TID_MAX - TID_MIN + 1);
+        if (new_size != TID_MAX - TID_MIN + 1
+            && new_size / 2 != tid_allocator->size) {
+            LOG_ERROR("Overflow detected during new size calculation");
+            return -1;
+        }
+
+        size_t realloc_size = new_size * sizeof(int32);
+        if (realloc_size / sizeof(int32) != new_size) {
+            LOG_ERROR("Overflow detected during realloc");
+            return -1;
+        }
+        int32 *tmp = wasm_runtime_realloc(tid_allocator->ids, realloc_size);
+        if (tmp == NULL) {
+            LOG_ERROR("Thread ID allocator realloc failed");
+            return -1;
+        }
+
+        tid_allocator->size = new_size;
+        tid_allocator->pos = new_size - old_size;
+        tid_allocator->ids = tmp;
+        for (int64 i = tid_allocator->pos - 1; i >= 0; i--)
+            tid_allocator->ids[i] = TID_MIN + (tid_allocator->size - 1 - i);
+    }
+
+    // Pop available thread identifier from the stack
+    return tid_allocator->ids[--tid_allocator->pos];
+}
+
+void
+tid_allocator_release_tid(TidAllocator *tid_allocator, int32 thread_id)
+{
+    // Release thread identifier by pushing it into the stack
+    bh_assert(tid_allocator->pos < tid_allocator->size);
+    tid_allocator->ids[tid_allocator->pos++] = thread_id;
+}

+ 36 - 0
core/iwasm/libraries/lib-wasi-threads/tid_allocator.h

@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _TID_ALLOCATOR_H
+#define _TID_ALLOCATOR_H
+
+#include "platform_common.h"
+
+#define TID_ALLOCATOR_INIT_SIZE CLUSTER_MAX_THREAD_NUM
+enum {
+    TID_MIN = 1,
+    TID_MAX = 0x1FFFFFFF
+}; // Reserved TIDs (WASI specification)
+
+/* Stack data structure to track available thread identifiers */
+typedef struct {
+    int32 *ids;  // Array used to store the stack
+    uint32 size; // Stack capacity
+    uint32 pos;  // Index of the element after the stack top
+} TidAllocator;
+
+bool
+tid_allocator_init(TidAllocator *tid_allocator);
+
+void
+tid_allocator_deinit(TidAllocator *tid_allocator);
+
+int32
+tid_allocator_get_tid(TidAllocator *tid_allocator);
+
+void
+tid_allocator_release_tid(TidAllocator *tid_allocator, int32 thread_id);
+
+#endif /* _TID_ALLOCATOR_H */

+ 0 - 4
core/iwasm/libraries/libc-emcc/libc_emcc_wrapper.c

@@ -37,10 +37,6 @@
     wasm_runtime_module_free(module_inst, offset)
 /* clang-format on */
 
-extern bool
-wasm_runtime_call_indirect(wasm_exec_env_t exec_env, uint32 element_idx,
-                           uint32 argc, uint32 argv[]);
-
 static void
 invoke_viiii_wrapper(wasm_exec_env_t exec_env, uint32 elem_idx, int arg0,
                      int arg1, int arg2, int arg3)

+ 120 - 20
core/iwasm/libraries/libc-wasi/libc_wasi_wrapper.c

@@ -6,6 +6,11 @@
 #include "libc_wasi_wrapper.h"
 #include "bh_platform.h"
 #include "wasm_export.h"
+#include "wasm_runtime_common.h"
+
+#if WASM_ENABLE_THREAD_MGR != 0
+#include "../../../thread-mgr/thread_manager.h"
+#endif
 
 void
 wasm_runtime_set_exception(wasm_module_inst_t module, const char *exception);
@@ -46,23 +51,17 @@ typedef struct iovec_app {
     uint32 buf_len;
 } iovec_app_t;
 
-typedef struct WASIContext {
-    struct fd_table *curfds;
-    struct fd_prestats *prestats;
-    struct argv_environ_values *argv_environ;
-    struct addr_pool *addr_pool;
-    char *ns_lookup_buf;
-    char **ns_lookup_list;
-    char *argv_buf;
-    char **argv_list;
-    char *env_buf;
-    char **env_list;
-    uint32_t exit_code;
-} * wasi_ctx_t;
+typedef struct WASIContext *wasi_ctx_t;
 
 wasi_ctx_t
 wasm_runtime_get_wasi_ctx(wasm_module_inst_t module_inst);
 
+static inline size_t
+min(size_t a, size_t b)
+{
+    return a > b ? b : a;
+}
+
 static inline struct fd_table *
 wasi_ctx_get_curfds(wasm_module_inst_t module_inst, wasi_ctx_t wasi_ctx)
 {
@@ -951,6 +950,108 @@ wasi_path_remove_directory(wasm_exec_env_t exec_env, wasi_fd_t fd,
     return wasmtime_ssp_path_remove_directory(curfds, fd, path, path_len);
 }
 
+#if WASM_ENABLE_THREAD_MGR != 0
+static __wasi_timestamp_t
+get_timeout_for_poll_oneoff(const wasi_subscription_t *in,
+                            uint32 nsubscriptions)
+{
+    __wasi_timestamp_t timeout = (__wasi_timestamp_t)-1;
+    uint32 i = 0;
+
+    for (i = 0; i < nsubscriptions; ++i) {
+        const __wasi_subscription_t *s = &in[i];
+        if (s->u.type == __WASI_EVENTTYPE_CLOCK
+            && (s->u.u.clock.flags & __WASI_SUBSCRIPTION_CLOCK_ABSTIME) == 0) {
+            timeout = min(timeout, s->u.u.clock.timeout);
+        }
+    }
+    return timeout;
+}
+
+static void
+update_clock_subscription_data(wasi_subscription_t *in, uint32 nsubscriptions,
+                               const wasi_timestamp_t new_timeout)
+{
+    uint32 i = 0;
+    for (i = 0; i < nsubscriptions; ++i) {
+        __wasi_subscription_t *s = &in[i];
+        if (s->u.type == __WASI_EVENTTYPE_CLOCK) {
+            s->u.u.clock.timeout = new_timeout;
+        }
+    }
+}
+
+static wasi_errno_t
+execute_interruptible_poll_oneoff(
+#if !defined(WASMTIME_SSP_STATIC_CURFDS)
+    struct fd_table *curfds,
+#endif
+    const __wasi_subscription_t *in, __wasi_event_t *out, size_t nsubscriptions,
+    size_t *nevents, wasm_exec_env_t exec_env)
+{
+    if (nsubscriptions == 0) {
+        *nevents = 0;
+        return __WASI_ESUCCESS;
+    }
+
+    wasi_errno_t err;
+    __wasi_timestamp_t elapsed = 0;
+    bool all_outs_are_type_clock;
+    uint32 i;
+
+    const __wasi_timestamp_t timeout = get_timeout_for_poll_oneoff(
+                                 in, nsubscriptions),
+                             time_quant = 1e9;
+    const uint64 size_to_copy =
+        nsubscriptions * (uint64)sizeof(wasi_subscription_t);
+    __wasi_subscription_t *in_copy = NULL;
+
+    if (size_to_copy >= UINT32_MAX
+        || !(in_copy = (__wasi_subscription_t *)wasm_runtime_malloc(
+                 (uint32)size_to_copy))) {
+        return __WASI_ENOMEM;
+    }
+
+    bh_memcpy_s(in_copy, size_to_copy, in, size_to_copy);
+
+    while (timeout == (__wasi_timestamp_t)-1 || elapsed <= timeout) {
+        /* update timeout for clock subscription events */
+        update_clock_subscription_data(in_copy, nsubscriptions,
+                                       min(time_quant, timeout - elapsed));
+        err = wasmtime_ssp_poll_oneoff(curfds, in_copy, out, nsubscriptions,
+                                       nevents);
+        elapsed += time_quant;
+
+        if (err) {
+            wasm_runtime_free(in_copy);
+            return err;
+        }
+
+        if (wasm_cluster_is_thread_terminated(exec_env)) {
+            wasm_runtime_free(in_copy);
+            return EINTR;
+        }
+        else if (*nevents > 0) {
+            all_outs_are_type_clock = true;
+            for (i = 0; i < *nevents; i++) {
+                if (out[i].type != __WASI_EVENTTYPE_CLOCK) {
+                    all_outs_are_type_clock = false;
+                    break;
+                }
+            }
+
+            if (!all_outs_are_type_clock) {
+                wasm_runtime_free(in_copy);
+                return __WASI_ESUCCESS;
+            }
+        }
+    }
+
+    wasm_runtime_free(in_copy);
+    return __WASI_ESUCCESS;
+}
+#endif
+
 static wasi_errno_t
 wasi_poll_oneoff(wasm_exec_env_t exec_env, const wasi_subscription_t *in,
                  wasi_event_t *out, uint32 nsubscriptions, uint32 *nevents_app)
@@ -958,7 +1059,7 @@ wasi_poll_oneoff(wasm_exec_env_t exec_env, const wasi_subscription_t *in,
     wasm_module_inst_t module_inst = get_module_inst(exec_env);
     wasi_ctx_t wasi_ctx = get_wasi_ctx(module_inst);
     struct fd_table *curfds = wasi_ctx_get_curfds(module_inst, wasi_ctx);
-    size_t nevents;
+    size_t nevents = 0;
     wasi_errno_t err;
 
     if (!wasi_ctx)
@@ -969,7 +1070,12 @@ wasi_poll_oneoff(wasm_exec_env_t exec_env, const wasi_subscription_t *in,
         || !validate_native_addr(nevents_app, sizeof(uint32)))
         return (wasi_errno_t)-1;
 
+#if WASM_ENABLE_THREAD_MGR == 0
     err = wasmtime_ssp_poll_oneoff(curfds, in, out, nsubscriptions, &nevents);
+#else
+    err = execute_interruptible_poll_oneoff(curfds, in, out, nsubscriptions,
+                                            &nevents, exec_env);
+#endif
     if (err)
         return err;
 
@@ -1861,12 +1967,6 @@ allocate_iovec_app_buffer(wasm_module_inst_t module_inst,
     return __WASI_ESUCCESS;
 }
 
-static inline size_t
-min(size_t a, size_t b)
-{
-    return a > b ? b : a;
-}
-
 static wasi_errno_t
 copy_buffer_to_iovec_app(wasm_module_inst_t module_inst, uint8 *buf_begin,
                          uint32 buf_size, iovec_app_t *data, uint32 data_len,

+ 15 - 2
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c

@@ -685,9 +685,20 @@ fd_table_insert_existing(struct fd_table *ft, __wasi_fd_t in, int out)
     struct fd_object *fo;
     __wasi_errno_t error;
 
-    if (fd_determine_type_rights(out, &type, &rights_base, &rights_inheriting)
-        != 0)
+    error =
+        fd_determine_type_rights(out, &type, &rights_base, &rights_inheriting);
+    if (error != 0) {
+#ifdef BH_PLATFORM_EGO
+        /**
+         * since it is an already opened file and we can assume the opened file
+         * has all necessary rights no matter how to get
+         */
+        if (error != __WASI_ENOTSUP)
+            return false;
+#else
         return false;
+#endif
+    }
 
     error = fd_object_new(type, &fo);
     if (error != 0)
@@ -2602,6 +2613,8 @@ wasmtime_ssp_poll_oneoff(
         }
 #endif
         *nevents = 1;
+        if (out[0].error != 0)
+            return convert_errno(out[0].error);
         return 0;
     }
 

+ 11 - 1
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/ssp_config.h

@@ -108,7 +108,17 @@
 #endif
 
 #if !defined(BH_PLATFORM_LINUX_SGX)
-#if defined(__GNUC_PREREQ)
+/* Clang's __GNUC_PREREQ macro has a different meaning than GCC one,
+so we have to handle this case specially */
+#if defined(__clang__)
+/* Clang provides stdatomic.h since 3.6.0
+See https://releases.llvm.org/3.6.0/tools/clang/docs/ReleaseNotes.html */
+#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 6)
+#define CONFIG_HAS_STD_ATOMIC 1
+#else
+#define CONFIG_HAS_STD_ATOMIC 0
+#endif
+#elif defined(__GNUC_PREREQ)
 /* Even though older versions of GCC support C11, atomics were
 not implemented until 4.9. See
 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58016 */

+ 210 - 47
core/iwasm/libraries/thread-mgr/thread_manager.c

@@ -16,6 +16,10 @@
 #include "debug_engine.h"
 #endif
 
+#if WASM_ENABLE_SHARED_MEMORY != 0
+#include "wasm_shared_memory.h"
+#endif
+
 typedef struct {
     bh_list_link l;
     void (*destroy_cb)(WASMCluster *);
@@ -96,7 +100,8 @@ safe_traverse_exec_env_list(WASMCluster *cluster, list_visitor visitor,
     while (node) {
         bool already_processed = false;
         void *proc_node;
-        for (size_t i = 0; i < bh_vector_size(&proc_nodes); i++) {
+        uint32 i;
+        for (i = 0; i < (uint32)bh_vector_size(&proc_nodes); i++) {
             if (!bh_vector_get(&proc_nodes, i, &proc_node)) {
                 ret = false;
                 goto final;
@@ -130,8 +135,21 @@ final:
 
 /* The caller must lock cluster->lock */
 static bool
-allocate_aux_stack(WASMCluster *cluster, uint32 *start, uint32 *size)
+allocate_aux_stack(WASMExecEnv *exec_env, uint32 *start, uint32 *size)
 {
+    WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
+#if WASM_ENABLE_HEAP_AUX_STACK_ALLOCATION != 0
+    WASMModuleInstanceCommon *module_inst =
+        wasm_exec_env_get_module_inst(exec_env);
+    uint32 stack_end;
+
+    stack_end =
+        wasm_runtime_module_malloc(module_inst, cluster->stack_size, NULL);
+    *start = stack_end + cluster->stack_size;
+    *size = cluster->stack_size;
+
+    return stack_end != 0;
+#else
     uint32 i;
 
     /* If the module doesn't have aux stack info,
@@ -151,12 +169,29 @@ allocate_aux_stack(WASMCluster *cluster, uint32 *start, uint32 *size)
     }
 
     return false;
+#endif
 }
 
 /* The caller must lock cluster->lock */
 static bool
-free_aux_stack(WASMCluster *cluster, uint32 start)
+free_aux_stack(WASMExecEnv *exec_env, uint32 start)
 {
+    WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
+
+#if WASM_ENABLE_HEAP_AUX_STACK_ALLOCATION != 0
+    WASMModuleInstanceCommon *module_inst =
+        wasm_exec_env_get_module_inst(exec_env);
+
+    if (!wasm_exec_env_is_aux_stack_managed_by_runtime(exec_env)) {
+        return true;
+    }
+
+    bh_assert(start >= cluster->stack_size);
+
+    wasm_runtime_module_free(module_inst, start - cluster->stack_size);
+
+    return true;
+#else
     uint32 i;
 
     for (i = 0; i < cluster_max_thread_num; i++) {
@@ -166,14 +201,14 @@ free_aux_stack(WASMCluster *cluster, uint32 start)
         }
     }
     return false;
+#endif
 }
 
 WASMCluster *
 wasm_cluster_create(WASMExecEnv *exec_env)
 {
     WASMCluster *cluster;
-    uint64 total_size;
-    uint32 aux_stack_start, aux_stack_size, i;
+    uint32 aux_stack_start, aux_stack_size;
 
     bh_assert(exec_env->cluster == NULL);
     if (!(cluster = wasm_runtime_malloc(sizeof(WASMCluster)))) {
@@ -195,7 +230,9 @@ wasm_cluster_create(WASMExecEnv *exec_env)
     /* Prepare the aux stack top and size for every thread */
     if (!wasm_exec_env_get_aux_stack(exec_env, &aux_stack_start,
                                      &aux_stack_size)) {
+#if WASM_ENABLE_LIB_WASI_THREADS == 0
         LOG_VERBOSE("No aux stack info for this module, can't create thread");
+#endif
 
         /* If the module don't have aux stack info, don't throw error here,
             but remain stack_tops and stack_segment_occupied as NULL */
@@ -209,12 +246,16 @@ wasm_cluster_create(WASMExecEnv *exec_env)
         return cluster;
     }
 
+#if WASM_ENABLE_HEAP_AUX_STACK_ALLOCATION != 0
+    cluster->stack_size = aux_stack_size;
+#else
     cluster->stack_size = aux_stack_size / (cluster_max_thread_num + 1);
     if (cluster->stack_size < WASM_THREAD_AUX_STACK_SIZE_MIN) {
         goto fail;
     }
     /* Make stack size 16-byte aligned */
     cluster->stack_size = cluster->stack_size & (~15);
+#endif
 
     /* Set initial aux stack top to the instance and
         aux stack boundary to the main exec_env */
@@ -222,8 +263,10 @@ wasm_cluster_create(WASMExecEnv *exec_env)
                                      cluster->stack_size))
         goto fail;
 
+#if WASM_ENABLE_HEAP_AUX_STACK_ALLOCATION == 0
     if (cluster_max_thread_num != 0) {
-        total_size = cluster_max_thread_num * sizeof(uint32);
+        uint64 total_size = cluster_max_thread_num * sizeof(uint32);
+        uint32 i;
         if (total_size >= UINT32_MAX
             || !(cluster->stack_tops =
                      wasm_runtime_malloc((uint32)total_size))) {
@@ -245,6 +288,7 @@ wasm_cluster_create(WASMExecEnv *exec_env)
             cluster->stack_tops[i] = aux_stack_start - cluster->stack_size * i;
         }
     }
+#endif
 
     os_mutex_lock(&cluster_list_lock);
     if (bh_list_insert(cluster_list, cluster) != 0) {
@@ -284,10 +328,12 @@ wasm_cluster_destroy(WASMCluster *cluster)
 
     os_mutex_destroy(&cluster->lock);
 
+#if WASM_ENABLE_HEAP_AUX_STACK_ALLOCATION == 0
     if (cluster->stack_tops)
         wasm_runtime_free(cluster->stack_tops);
     if (cluster->stack_segment_occupied)
         wasm_runtime_free(cluster->stack_segment_occupied);
+#endif
 
 #if WASM_ENABLE_DEBUG_INTERP != 0
     wasm_debug_instance_destroy(cluster);
@@ -323,7 +369,13 @@ wasm_cluster_add_exec_env(WASMCluster *cluster, WASMExecEnv *exec_env)
 
     exec_env->cluster = cluster;
 
-    if (bh_list_insert(&cluster->exec_env_list, exec_env) != 0)
+    if (cluster->exec_env_list.len == cluster_max_thread_num + 1) {
+        LOG_ERROR("thread manager error: "
+                  "maximum number of threads exceeded");
+        ret = false;
+    }
+
+    if (ret && bh_list_insert(&cluster->exec_env_list, exec_env) != 0)
         ret = false;
 
     return ret;
@@ -461,7 +513,7 @@ wasm_cluster_spawn_exec_env(WASMExecEnv *exec_env)
     if (!new_exec_env)
         goto fail2;
 
-    if (!allocate_aux_stack(cluster, &aux_stack_start, &aux_stack_size)) {
+    if (!allocate_aux_stack(exec_env, &aux_stack_start, &aux_stack_size)) {
         LOG_ERROR("thread manager error: "
                   "failed to allocate aux stack space for new thread");
         goto fail3;
@@ -482,9 +534,9 @@ wasm_cluster_spawn_exec_env(WASMExecEnv *exec_env)
 
 fail4:
     /* free the allocated aux stack space */
-    free_aux_stack(cluster, aux_stack_start);
+    free_aux_stack(exec_env, aux_stack_start);
 fail3:
-    wasm_exec_env_destroy(new_exec_env);
+    wasm_exec_env_destroy_internal(new_exec_env);
 fail2:
     wasm_runtime_deinstantiate_internal(new_module_inst, true);
 fail1:
@@ -500,14 +552,18 @@ wasm_cluster_destroy_spawned_exec_env(WASMExecEnv *exec_env)
     wasm_module_inst_t module_inst = wasm_runtime_get_module_inst(exec_env);
     bh_assert(cluster != NULL);
 
-    /* Free aux stack space */
     os_mutex_lock(&cluster->lock);
-    free_aux_stack(cluster, exec_env->aux_stack_bottom.bottom);
+
+    /* Free aux stack space */
+    free_aux_stack(exec_env, exec_env->aux_stack_bottom.bottom);
+    /* Remove exec_env */
     wasm_cluster_del_exec_env(cluster, exec_env);
-    os_mutex_unlock(&cluster->lock);
+    /* Destroy exec_env */
     wasm_exec_env_destroy_internal(exec_env);
-
+    /* Routine exit, destroy instance */
     wasm_runtime_deinstantiate_internal(module_inst, true);
+
+    os_mutex_unlock(&cluster->lock);
 }
 
 /* start routine of thread manager */
@@ -517,30 +573,59 @@ thread_manager_start_routine(void *arg)
     void *ret;
     WASMExecEnv *exec_env = (WASMExecEnv *)arg;
     WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
+    WASMModuleInstanceCommon *module_inst =
+        wasm_exec_env_get_module_inst(exec_env);
+
     bh_assert(cluster != NULL);
+    bh_assert(module_inst != NULL);
 
+    os_mutex_lock(&exec_env->wait_lock);
     exec_env->handle = os_self_thread();
+    /* Notify the parent thread to continue running */
+    os_cond_signal(&exec_env->wait_cond);
+    os_mutex_unlock(&exec_env->wait_lock);
+
     ret = exec_env->thread_start_routine(exec_env);
 
 #if defined(OS_ENABLE_HW_BOUND_CHECK) || defined(OS_ENABLE_INTERRUPT_BLOCK_INSN)
+    os_mutex_lock(&exec_env->wait_lock);
     if (exec_env->suspend_flags.flags & 0x08)
         ret = exec_env->thread_ret_value;
+    os_mutex_unlock(&exec_env->wait_lock);
 #endif
 
     /* Routine exit */
-    /* Detach the native thread here to ensure the resources are freed */
-    wasm_cluster_detach_thread(exec_env);
+
 #if WASM_ENABLE_DEBUG_INTERP != 0
     wasm_cluster_thread_exited(exec_env);
 #endif
+
+    os_mutex_lock(&cluster_list_lock);
+
     os_mutex_lock(&cluster->lock);
+
+    /* Detach the native thread here to ensure the resources are freed */
+    if (exec_env->wait_count == 0 && !exec_env->thread_is_detached) {
+        /* Only detach current thread when there is no other thread
+           joining it, otherwise let the system resources for the
+           thread be released after joining */
+        os_thread_detach(exec_env->handle);
+        /* No need to set exec_env->thread_is_detached to true here
+           since we will exit soon */
+    }
+
     /* Free aux stack space */
-    free_aux_stack(cluster, exec_env->aux_stack_bottom.bottom);
-    /* Remove and exec_env */
+    free_aux_stack(exec_env, exec_env->aux_stack_bottom.bottom);
+    /* Remove exec_env */
     wasm_cluster_del_exec_env(cluster, exec_env);
-    os_mutex_unlock(&cluster->lock);
-    /* destroy exec_env */
+    /* Destroy exec_env */
     wasm_exec_env_destroy_internal(exec_env);
+    /* Routine exit, destroy instance */
+    wasm_runtime_deinstantiate_internal(module_inst, true);
+
+    os_mutex_unlock(&cluster->lock);
+
+    os_mutex_unlock(&cluster_list_lock);
 
     os_thread_exit(ret);
     return ret;
@@ -548,12 +633,12 @@ thread_manager_start_routine(void *arg)
 
 int32
 wasm_cluster_create_thread(WASMExecEnv *exec_env,
-                           wasm_module_inst_t module_inst,
+                           wasm_module_inst_t module_inst, bool alloc_aux_stack,
                            void *(*thread_routine)(void *), void *arg)
 {
     WASMCluster *cluster;
     WASMExecEnv *new_exec_env;
-    uint32 aux_stack_start, aux_stack_size;
+    uint32 aux_stack_start = 0, aux_stack_size;
     korp_tid tid;
 
     cluster = wasm_exec_env_get_cluster(exec_env);
@@ -570,16 +655,23 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
     if (!new_exec_env)
         goto fail1;
 
-    if (!allocate_aux_stack(cluster, &aux_stack_start, &aux_stack_size)) {
-        LOG_ERROR("thread manager error: "
-                  "failed to allocate aux stack space for new thread");
-        goto fail2;
-    }
+    if (alloc_aux_stack) {
+        if (!allocate_aux_stack(exec_env, &aux_stack_start, &aux_stack_size)) {
+            LOG_ERROR("thread manager error: "
+                      "failed to allocate aux stack space for new thread");
+            goto fail2;
+        }
 
-    /* Set aux stack for current thread */
-    if (!wasm_exec_env_set_aux_stack(new_exec_env, aux_stack_start,
-                                     aux_stack_size)) {
-        goto fail3;
+        /* Set aux stack for current thread */
+        if (!wasm_exec_env_set_aux_stack(new_exec_env, aux_stack_start,
+                                         aux_stack_size)) {
+            goto fail3;
+        }
+    }
+    else {
+        /* Disable aux stack */
+        new_exec_env->aux_stack_boundary.boundary = 0;
+        new_exec_env->aux_stack_bottom.bottom = UINT32_MAX;
     }
 
     if (!wasm_cluster_add_exec_env(cluster, new_exec_env))
@@ -588,13 +680,21 @@ wasm_cluster_create_thread(WASMExecEnv *exec_env,
     new_exec_env->thread_start_routine = thread_routine;
     new_exec_env->thread_arg = arg;
 
+    os_mutex_lock(&new_exec_env->wait_lock);
+
     if (0
         != os_thread_create(&tid, thread_manager_start_routine,
                             (void *)new_exec_env,
                             APP_THREAD_STACK_SIZE_DEFAULT)) {
+        os_mutex_unlock(&new_exec_env->wait_lock);
         goto fail4;
     }
 
+    /* Wait until the new_exec_env->handle is set to avoid it is
+       illegally accessed after unlocking cluster->lock */
+    os_cond_wait(&new_exec_env->wait_cond, &new_exec_env->wait_lock);
+    os_mutex_unlock(&new_exec_env->wait_lock);
+
     os_mutex_unlock(&cluster->lock);
 
     return 0;
@@ -603,9 +703,10 @@ fail4:
     wasm_cluster_del_exec_env(cluster, new_exec_env);
 fail3:
     /* free the allocated aux stack space */
-    free_aux_stack(cluster, aux_stack_start);
+    if (alloc_aux_stack)
+        free_aux_stack(exec_env, aux_stack_start);
 fail2:
-    wasm_exec_env_destroy(new_exec_env);
+    wasm_exec_env_destroy_internal(new_exec_env);
 fail1:
     os_mutex_unlock(&cluster->lock);
 
@@ -753,15 +854,18 @@ clusters_have_exec_env(WASMExecEnv *exec_env)
     WASMExecEnv *node;
 
     while (cluster) {
+        os_mutex_lock(&cluster->lock);
         node = bh_list_first_elem(&cluster->exec_env_list);
 
         while (node) {
             if (node == exec_env) {
                 bh_assert(exec_env->cluster == cluster);
+                os_mutex_unlock(&cluster->lock);
                 return true;
             }
             node = bh_list_elem_next(node);
         }
+        os_mutex_unlock(&cluster->lock);
 
         cluster = bh_list_elem_next(cluster);
     }
@@ -775,20 +879,20 @@ wasm_cluster_join_thread(WASMExecEnv *exec_env, void **ret_val)
     korp_tid handle;
 
     os_mutex_lock(&cluster_list_lock);
-    os_mutex_lock(&exec_env->cluster->lock);
 
     if (!clusters_have_exec_env(exec_env) || exec_env->thread_is_detached) {
         /* Invalid thread, thread has exited or thread has been detached */
         if (ret_val)
             *ret_val = NULL;
-        os_mutex_unlock(&exec_env->cluster->lock);
         os_mutex_unlock(&cluster_list_lock);
         return 0;
     }
+
+    os_mutex_lock(&exec_env->wait_lock);
     exec_env->wait_count++;
     handle = exec_env->handle;
+    os_mutex_unlock(&exec_env->wait_lock);
 
-    os_mutex_unlock(&exec_env->cluster->lock);
     os_mutex_unlock(&cluster_list_lock);
 
     return os_thread_join(handle, ret_val);
@@ -820,6 +924,7 @@ void
 wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
 {
     WASMCluster *cluster;
+    WASMModuleInstanceCommon *module_inst;
 
 #if defined(OS_ENABLE_HW_BOUND_CHECK) || defined(OS_ENABLE_INTERRUPT_BLOCK_INSN)
     if (exec_env->jmpbuf_stack_top) {
@@ -844,16 +949,37 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
     wasm_cluster_clear_thread_signal(exec_env);
     wasm_cluster_thread_exited(exec_env);
 #endif
+
     /* App exit the thread, free the resources before exit native thread */
-    /* Detach the native thread here to ensure the resources are freed */
-    wasm_cluster_detach_thread(exec_env);
+
+    os_mutex_lock(&cluster_list_lock);
+
     os_mutex_lock(&cluster->lock);
+
+    /* Detach the native thread here to ensure the resources are freed */
+    if (exec_env->wait_count == 0 && !exec_env->thread_is_detached) {
+        /* Only detach current thread when there is no other thread
+           joining it, otherwise let the system resources for the
+           thread be released after joining */
+        os_thread_detach(exec_env->handle);
+        /* No need to set exec_env->thread_is_detached to true here
+           since we will exit soon */
+    }
+
+    module_inst = exec_env->module_inst;
+
     /* Free aux stack space */
-    free_aux_stack(cluster, exec_env->aux_stack_bottom.bottom);
-    /* Remove and destroy exec_env */
+    free_aux_stack(exec_env, exec_env->aux_stack_bottom.bottom);
+    /* Remove exec_env */
     wasm_cluster_del_exec_env(cluster, exec_env);
-    os_mutex_unlock(&cluster->lock);
+    /* Destroy exec_env */
     wasm_exec_env_destroy_internal(exec_env);
+    /* Routine exit, destroy instance */
+    wasm_runtime_deinstantiate_internal(module_inst, true);
+
+    os_mutex_unlock(&cluster->lock);
+
+    os_mutex_unlock(&cluster_list_lock);
 
     os_thread_exit(retval);
 }
@@ -861,23 +987,26 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
 static void
 set_thread_cancel_flags(WASMExecEnv *exec_env)
 {
+    os_mutex_lock(&exec_env->wait_lock);
     /* Set the termination flag */
 #if WASM_ENABLE_DEBUG_INTERP != 0
     wasm_cluster_thread_send_signal(exec_env, WAMR_SIG_TERM);
 #else
     exec_env->suspend_flags.flags |= 0x01;
 #endif
+    os_mutex_unlock(&exec_env->wait_lock);
 }
 
 int32
 wasm_cluster_cancel_thread(WASMExecEnv *exec_env)
 {
     os_mutex_lock(&cluster_list_lock);
-    os_mutex_lock(&exec_env->cluster->lock);
 
     if (!exec_env->cluster) {
-        goto final;
+        os_mutex_unlock(&cluster_list_lock);
+        return 0;
     }
+
     if (!clusters_have_exec_env(exec_env)) {
         /* Invalid thread or the thread has exited */
         goto final;
@@ -886,7 +1015,6 @@ wasm_cluster_cancel_thread(WASMExecEnv *exec_env)
     set_thread_cancel_flags(exec_env);
 
 final:
-    os_mutex_unlock(&exec_env->cluster->lock);
     os_mutex_unlock(&cluster_list_lock);
 
     return 0;
@@ -1079,9 +1207,23 @@ set_exception_visitor(void *node, void *user_data)
         WASMModuleInstance *curr_wasm_inst =
             (WASMModuleInstance *)get_module_inst(curr_exec_env);
 
-        bh_memcpy_s(curr_wasm_inst->cur_exception,
-                    sizeof(curr_wasm_inst->cur_exception),
-                    wasm_inst->cur_exception, sizeof(wasm_inst->cur_exception));
+        /* Only spread non "wasi proc exit" exception */
+#if WASM_ENABLE_SHARED_MEMORY != 0
+        WASMSharedMemNode *shared_mem_node = wasm_module_get_shared_memory(
+            (WASMModuleCommon *)curr_wasm_inst->module);
+        if (shared_mem_node)
+            os_mutex_lock(&shared_mem_node->shared_mem_lock);
+#endif
+        if (!strstr(wasm_inst->cur_exception, "wasi proc exit")) {
+            bh_memcpy_s(curr_wasm_inst->cur_exception,
+                        sizeof(curr_wasm_inst->cur_exception),
+                        wasm_inst->cur_exception,
+                        sizeof(wasm_inst->cur_exception));
+        }
+#if WASM_ENABLE_SHARED_MEMORY != 0
+        if (shared_mem_node)
+            os_mutex_unlock(&shared_mem_node->shared_mem_lock);
+#endif
 
         /* Terminate the thread so it can exit from dead loops */
         set_thread_cancel_flags(curr_exec_env);
@@ -1098,7 +1240,17 @@ clear_exception_visitor(void *node, void *user_data)
         WASMModuleInstance *curr_wasm_inst =
             (WASMModuleInstance *)get_module_inst(curr_exec_env);
 
+#if WASM_ENABLE_SHARED_MEMORY != 0
+        WASMSharedMemNode *shared_mem_node = wasm_module_get_shared_memory(
+            (WASMModuleCommon *)curr_wasm_inst->module);
+        if (shared_mem_node)
+            os_mutex_lock(&shared_mem_node->shared_mem_lock);
+#endif
         curr_wasm_inst->cur_exception[0] = '\0';
+#if WASM_ENABLE_SHARED_MEMORY != 0
+        if (shared_mem_node)
+            os_mutex_unlock(&shared_mem_node->shared_mem_lock);
+#endif
     }
 }
 
@@ -1147,3 +1299,14 @@ wasm_cluster_spread_custom_data(WASMModuleInstanceCommon *module_inst,
         os_mutex_unlock(&cluster->lock);
     }
 }
+
+bool
+wasm_cluster_is_thread_terminated(WASMExecEnv *exec_env)
+{
+    os_mutex_lock(&exec_env->wait_lock);
+    bool is_thread_terminated =
+        (exec_env->suspend_flags.flags & 0x01) ? true : false;
+    os_mutex_unlock(&exec_env->wait_lock);
+
+    return is_thread_terminated;
+}

+ 8 - 3
core/iwasm/libraries/thread-mgr/thread_manager.h

@@ -26,14 +26,16 @@ struct WASMCluster {
     korp_mutex lock;
     bh_list exec_env_list;
 
+#if WASM_ENABLE_HEAP_AUX_STACK_ALLOCATION == 0
     /* The aux stack of a module with shared memory will be
         divided into several segments. This array store the
         stack top of different segments */
     uint32 *stack_tops;
-    /* Size of every stack segment */
-    uint32 stack_size;
     /* Record which segments are occupied */
     bool *stack_segment_occupied;
+#endif
+    /* Size of every stack segment */
+    uint32 stack_size;
     /* When has_exception == true, this cluster should refuse any spawn thread
      * requests, this flag can be cleared by calling
      * wasm_runtime_clear_exception on instances of any threads of this cluster
@@ -74,7 +76,7 @@ wasm_exec_env_get_cluster(WASMExecEnv *exec_env);
 
 int32
 wasm_cluster_create_thread(WASMExecEnv *exec_env,
-                           wasm_module_inst_t module_inst,
+                           wasm_module_inst_t module_inst, bool alloc_aux_stack,
                            void *(*thread_routine)(void *), void *arg);
 
 int32
@@ -149,6 +151,9 @@ void
 wasm_cluster_spread_custom_data(WASMModuleInstanceCommon *module_inst,
                                 void *custom_data);
 
+bool
+wasm_cluster_is_thread_terminated(WASMExecEnv *exec_env);
+
 #if WASM_ENABLE_DEBUG_INTERP != 0
 #define WAMR_SIG_TRAP (5)
 #define WAMR_SIG_STOP (19)

+ 0 - 9
core/iwasm/libraries/wasi-nn/README.md

@@ -19,12 +19,6 @@ To run the tests we assume that the current directory is the root of the reposit
 
 ### Build the runtime
 
-Build the runtime base image,
-
-```
-docker build -t wasi-nn-base -f core/iwasm/libraries/wasi-nn/test/Dockerfile.base .
-```
-
 Build the runtime image for your execution target type.
 
 `EXECUTION_TYPE` can be:
@@ -84,9 +78,6 @@ Requirements:
 
 Supported:
 
-* Only 1 WASM app at a time.
-* Only 1 model at a time.
-    * `graph` and `graph-execution-context` are ignored.
 * Graph encoding: `tensorflowlite`.
 * Execution target: `cpu` and `gpu`.
 * Tensor type: `fp32`.

+ 36 - 30
core/iwasm/libraries/wasi-nn/src/utils/logger.h

@@ -13,51 +13,57 @@
     (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
 
 /* Disable a level by removing the define */
-#define ENABLE_ERR_LOG
-#define ENABLE_WARN_LOG
-#define ENABLE_DBG_LOG
-#define ENABLE_INFO_LOG
+#ifndef NN_LOG_LEVEL
+/*
+    0 -> debug, info, warn, err
+    1 -> info, warn, err
+    2 -> warn, err
+    3 -> err
+    4 -> NO LOGS
+*/
+#define NN_LOG_LEVEL 0
+#endif
 
 // Definition of the levels
-#ifdef ENABLE_ERR_LOG
-#define NN_ERR_PRINTF(fmt, ...)                                        \
-    do {                                                               \
-        printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
-        printf("\n");                                                  \
-        fflush(stdout);                                                \
+#if NN_LOG_LEVEL <= 3
+#define NN_ERR_PRINTF(fmt, ...)                                              \
+    do {                                                                     \
+        printf("[%s:%d ERROR] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
+        printf("\n");                                                        \
+        fflush(stdout);                                                      \
     } while (0)
 #else
 #define NN_ERR_PRINTF(fmt, ...)
 #endif
-#ifdef ENABLE_WARN_LOG
-#define NN_WARN_PRINTF(fmt, ...)                                       \
-    do {                                                               \
-        printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
-        printf("\n");                                                  \
-        fflush(stdout);                                                \
+#if NN_LOG_LEVEL <= 2
+#define NN_WARN_PRINTF(fmt, ...)                                               \
+    do {                                                                       \
+        printf("[%s:%d WARNING] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
+        printf("\n");                                                          \
+        fflush(stdout);                                                        \
     } while (0)
 #else
 #define NN_WARN_PRINTF(fmt, ...)
 #endif
-#ifdef ENABLE_DBG_LOG
-#define NN_DBG_PRINTF(fmt, ...)                                        \
-    do {                                                               \
-        printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
-        printf("\n");                                                  \
-        fflush(stdout);                                                \
+#if NN_LOG_LEVEL <= 1
+#define NN_INFO_PRINTF(fmt, ...)                                            \
+    do {                                                                    \
+        printf("[%s:%d INFO] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
+        printf("\n");                                                       \
+        fflush(stdout);                                                     \
     } while (0)
 #else
-#define NN_DBG_PRINTF(fmt, ...)
+#define NN_INFO_PRINTF(fmt, ...)
 #endif
-#ifdef ENABLE_INFO_LOG
-#define NN_INFO_PRINTF(fmt, ...)                                       \
-    do {                                                               \
-        printf("[%s:%d] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
-        printf("\n");                                                  \
-        fflush(stdout);                                                \
+#if NN_LOG_LEVEL <= 0
+#define NN_DBG_PRINTF(fmt, ...)                                              \
+    do {                                                                     \
+        printf("[%s:%d DEBUG] " fmt, __FILENAME__, __LINE__, ##__VA_ARGS__); \
+        printf("\n");                                                        \
+        fflush(stdout);                                                      \
     } while (0)
 #else
-#define NN_INFO_PRINTF(fmt, ...)
+#define NN_DBG_PRINTF(fmt, ...)
 #endif
 
 #endif

+ 21 - 17
core/iwasm/libraries/wasi-nn/src/wasi_nn.c

@@ -22,13 +22,14 @@
 
 /* Definition of 'wasi_nn.h' structs in WASM app format (using offset) */
 
-typedef error (*LOAD)(graph_builder_array *, graph_encoding, execution_target,
-                      graph *);
-typedef error (*INIT_EXECUTION_CONTEXT)(graph, graph_execution_context *);
-typedef error (*SET_INPUT)(graph_execution_context, uint32_t, tensor *);
-typedef error (*COMPUTE)(graph_execution_context);
-typedef error (*GET_OUTPUT)(graph_execution_context, uint32_t, tensor_data,
-                            uint32_t *);
+typedef error (*LOAD)(void *, graph_builder_array *, graph_encoding,
+                      execution_target, graph *);
+typedef error (*INIT_EXECUTION_CONTEXT)(void *, graph,
+                                        graph_execution_context *);
+typedef error (*SET_INPUT)(void *, graph_execution_context, uint32_t, tensor *);
+typedef error (*COMPUTE)(void *, graph_execution_context);
+typedef error (*GET_OUTPUT)(void *, graph_execution_context, uint32_t,
+                            tensor_data, uint32_t *);
 
 typedef struct {
     LOAD load;
@@ -123,12 +124,12 @@ wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_array_wasm *builder,
         goto fail;
     }
 
-    res = lookup[encoding].load(&builder_native, encoding, target, g);
+    WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
+    res = lookup[encoding].load(wasi_nn_ctx->tflite_ctx, &builder_native,
+                                encoding, target, g);
 
     NN_DBG_PRINTF("wasi_nn_load finished with status %d [graph=%d]", res, *g);
 
-    WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
-
     wasi_nn_ctx->current_encoding = encoding;
     wasi_nn_ctx->is_initialized = true;
 
@@ -160,8 +161,9 @@ wasi_nn_init_execution_context(wasm_exec_env_t exec_env, graph g,
         return invalid_argument;
     }
 
-    res = lookup[wasi_nn_ctx->current_encoding].init_execution_context(g, ctx);
-    *ctx = g;
+    res = lookup[wasi_nn_ctx->current_encoding].init_execution_context(
+        wasi_nn_ctx->tflite_ctx, g, ctx);
+
     NN_DBG_PRINTF(
         "wasi_nn_init_execution_context finished with status %d [ctx=%d]", res,
         *ctx);
@@ -189,8 +191,8 @@ wasi_nn_set_input(wasm_exec_env_t exec_env, graph_execution_context ctx,
                                     &input_tensor_native)))
         return res;
 
-    res = lookup[wasi_nn_ctx->current_encoding].set_input(ctx, index,
-                                                          &input_tensor_native);
+    res = lookup[wasi_nn_ctx->current_encoding].set_input(
+        wasi_nn_ctx->tflite_ctx, ctx, index, &input_tensor_native);
 
     // XXX: Free intermediate structure pointers
     if (input_tensor_native.dimensions)
@@ -213,7 +215,8 @@ wasi_nn_compute(wasm_exec_env_t exec_env, graph_execution_context ctx)
     if (success != (res = is_model_initialized(wasi_nn_ctx)))
         return res;
 
-    res = lookup[wasi_nn_ctx->current_encoding].compute(ctx);
+    res = lookup[wasi_nn_ctx->current_encoding].compute(wasi_nn_ctx->tflite_ctx,
+                                                        ctx);
     NN_DBG_PRINTF("wasi_nn_compute finished with status %d", res);
     return res;
 }
@@ -241,7 +244,7 @@ wasi_nn_get_output(wasm_exec_env_t exec_env, graph_execution_context ctx,
     }
 
     res = lookup[wasi_nn_ctx->current_encoding].get_output(
-        ctx, index, output_tensor, output_tensor_size);
+        wasi_nn_ctx->tflite_ctx, ctx, index, output_tensor, output_tensor_size);
     NN_DBG_PRINTF("wasi_nn_get_output finished with status %d [data_size=%d]",
                   res, *output_tensor_size);
     return res;
@@ -261,6 +264,7 @@ wasi_nn_initialize()
     }
     wasi_nn_ctx->is_initialized = true;
     wasi_nn_ctx->current_encoding = 3;
+    tensorflowlite_initialize(&wasi_nn_ctx->tflite_ctx);
     return wasi_nn_ctx;
 }
 
@@ -275,7 +279,7 @@ wasi_nn_destroy(WASINNContext *wasi_nn_ctx)
     NN_DBG_PRINTF("Freeing wasi-nn");
     NN_DBG_PRINTF("-> is_initialized: %d", wasi_nn_ctx->is_initialized);
     NN_DBG_PRINTF("-> current_encoding: %d", wasi_nn_ctx->current_encoding);
-    tensorflowlite_destroy();
+    tensorflowlite_destroy(wasi_nn_ctx->tflite_ctx);
     wasm_runtime_free(wasi_nn_ctx);
 }
 

+ 1 - 0
core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h

@@ -11,6 +11,7 @@
 typedef struct {
     bool is_initialized;
     graph_encoding current_encoding;
+    void *tflite_ctx;
 } WASINNContext;
 
 /**

+ 213 - 67
core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp

@@ -16,25 +16,105 @@
 #include <tensorflow/lite/model.h>
 #include <tensorflow/lite/optional_debug_tools.h>
 #include <tensorflow/lite/error_reporter.h>
-#include <tensorflow/lite/delegates/gpu/delegate.h>
 
-/* Global variables */
+#if defined(WASI_NN_ENABLE_GPU)
+#include <tensorflow/lite/delegates/gpu/delegate.h>
+#endif
+
+/* Maximum number of graphs per WASM instance */
+#define MAX_GRAPHS_PER_INST 10
+/* Maximum number of graph execution context per WASM instance*/
+#define MAX_GRAPH_EXEC_CONTEXTS_PER_INST 10
+
+typedef struct {
+    std::unique_ptr<tflite::Interpreter> interpreter;
+} Interpreter;
+
+typedef struct {
+    char *model_pointer;
+    std::unique_ptr<tflite::FlatBufferModel> model;
+    execution_target target;
+} Model;
+
+typedef struct {
+    uint32_t current_models;
+    Model models[MAX_GRAPHS_PER_INST];
+    uint32_t current_interpreters;
+    Interpreter interpreters[MAX_GRAPH_EXEC_CONTEXTS_PER_INST];
+    korp_mutex g_lock;
+} TFLiteContext;
+
+/* Utils */
+
+static error
+initialize_g(TFLiteContext *tfl_ctx, graph *g)
+{
+    os_mutex_lock(&tfl_ctx->g_lock);
+    if (tfl_ctx->current_models == MAX_GRAPHS_PER_INST) {
+        os_mutex_unlock(&tfl_ctx->g_lock);
+        NN_ERR_PRINTF("Excedded max graphs per WASM instance");
+        return runtime_error;
+    }
+    *g = tfl_ctx->current_models++;
+    os_mutex_unlock(&tfl_ctx->g_lock);
+    return success;
+}
+static error
+initialize_graph_ctx(TFLiteContext *tfl_ctx, graph g,
+                     graph_execution_context *ctx)
+{
+    os_mutex_lock(&tfl_ctx->g_lock);
+    if (tfl_ctx->current_interpreters == MAX_GRAPH_EXEC_CONTEXTS_PER_INST) {
+        os_mutex_unlock(&tfl_ctx->g_lock);
+        NN_ERR_PRINTF("Excedded max graph execution context per WASM instance");
+        return runtime_error;
+    }
+    *ctx = tfl_ctx->current_interpreters++;
+    os_mutex_unlock(&tfl_ctx->g_lock);
+    return success;
+}
 
-static std::unique_ptr<tflite::Interpreter> interpreter;
-static std::unique_ptr<tflite::FlatBufferModel> model;
+static error
+is_valid_graph(TFLiteContext *tfl_ctx, graph g)
+{
+    if (g >= MAX_GRAPHS_PER_INST) {
+        NN_ERR_PRINTF("Invalid graph: %d >= %d.", g, MAX_GRAPHS_PER_INST);
+        return runtime_error;
+    }
+    if (tfl_ctx->models[g].model_pointer == NULL) {
+        NN_ERR_PRINTF("Context (model) non-initialized.");
+        return runtime_error;
+    }
+    if (tfl_ctx->models[g].model == NULL) {
+        NN_ERR_PRINTF("Context (tflite model) non-initialized.");
+        return runtime_error;
+    }
+    return success;
+}
 
-static char *model_pointer = NULL;
+static error
+is_valid_graph_execution_context(TFLiteContext *tfl_ctx,
+                                 graph_execution_context ctx)
+{
+    if (ctx >= MAX_GRAPH_EXEC_CONTEXTS_PER_INST) {
+        NN_ERR_PRINTF("Invalid graph execution context: %d >= %d", ctx,
+                      MAX_GRAPH_EXEC_CONTEXTS_PER_INST);
+        return runtime_error;
+    }
+    if (tfl_ctx->interpreters[ctx].interpreter == NULL) {
+        NN_ERR_PRINTF("Context (interpreter) non-initialized.");
+        return runtime_error;
+    }
+    return success;
+}
 
 /* WASI-NN (tensorflow) implementation */
 
 error
-tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
-                    execution_target target, graph *g)
+tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder,
+                    graph_encoding encoding, execution_target target, graph *g)
 {
-    if (model_pointer != NULL) {
-        wasm_runtime_free(model_pointer);
-        model_pointer = NULL;
-    }
+    TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
     if (builder->size != 1) {
         NN_ERR_PRINTF("Unexpected builder format.");
@@ -51,39 +131,68 @@ tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
         return invalid_argument;
     }
 
+    error res;
+    if (success != (res = initialize_g(tfl_ctx, g)))
+        return res;
+
     uint32_t size = builder->buf[0].size;
 
-    model_pointer = (char *)wasm_runtime_malloc(size);
-    if (model_pointer == NULL) {
+    // Save model
+    tfl_ctx->models[*g].model_pointer = (char *)wasm_runtime_malloc(size);
+    if (tfl_ctx->models[*g].model_pointer == NULL) {
         NN_ERR_PRINTF("Error when allocating memory for model.");
         return missing_memory;
     }
 
-    bh_memcpy_s(model_pointer, size, builder->buf[0].buf, size);
+    bh_memcpy_s(tfl_ctx->models[*g].model_pointer, size, builder->buf[0].buf,
+                size);
 
-    model = tflite::FlatBufferModel::BuildFromBuffer(model_pointer, size, NULL);
-    if (model == NULL) {
+    // Save model flatbuffer
+    tfl_ctx->models[*g].model =
+        std::move(tflite::FlatBufferModel::BuildFromBuffer(
+            tfl_ctx->models[*g].model_pointer, size, NULL));
+
+    if (tfl_ctx->models[*g].model == NULL) {
         NN_ERR_PRINTF("Loading model error.");
-        wasm_runtime_free(model_pointer);
-        model_pointer = NULL;
+        wasm_runtime_free(tfl_ctx->models[*g].model_pointer);
+        tfl_ctx->models[*g].model_pointer = NULL;
         return missing_memory;
     }
 
+    // Save target
+    tfl_ctx->models[*g].target = target;
+    return success;
+}
+
+error
+tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
+                                      graph_execution_context *ctx)
+{
+    TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
+
+    error res;
+    if (success != (res = is_valid_graph(tfl_ctx, g)))
+        return res;
+
+    if (success != (res = initialize_graph_ctx(tfl_ctx, g, ctx)))
+        return res;
+
     // Build the interpreter with the InterpreterBuilder.
     tflite::ops::builtin::BuiltinOpResolver resolver;
-    tflite::InterpreterBuilder tflite_builder(*model, resolver);
-    tflite_builder(&interpreter);
-    if (interpreter == NULL) {
+    tflite::InterpreterBuilder tflite_builder(*tfl_ctx->models[g].model,
+                                              resolver);
+    tflite_builder(&tfl_ctx->interpreters[*ctx].interpreter);
+    if (tfl_ctx->interpreters[*ctx].interpreter == NULL) {
         NN_ERR_PRINTF("Error when generating the interpreter.");
-        wasm_runtime_free(model_pointer);
-        model_pointer = NULL;
         return missing_memory;
     }
 
     bool use_default = false;
-    switch (target) {
+    switch (tfl_ctx->models[g].target) {
         case gpu:
         {
+#if defined(WASI_NN_ENABLE_GPU)
+            NN_WARN_PRINTF("GPU enabled.");
             // https://www.tensorflow.org/lite/performance/gpu
             auto options = TfLiteGpuDelegateOptionsV2Default();
             options.inference_preference =
@@ -91,10 +200,16 @@ tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
             options.inference_priority1 =
                 TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
             auto *delegate = TfLiteGpuDelegateV2Create(&options);
-            if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) {
+            if (tfl_ctx->interpreters[*ctx]
+                    .interpreter->ModifyGraphWithDelegate(delegate)
+                != kTfLiteOk) {
                 NN_ERR_PRINTF("Error when enabling GPU delegate.");
                 use_default = true;
             }
+#else
+            NN_WARN_PRINTF("GPU not enabled.");
+            use_default = true;
+#endif
             break;
         }
         default:
@@ -103,36 +218,28 @@ tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
     if (use_default)
         NN_WARN_PRINTF("Default encoding is CPU.");
 
+    tfl_ctx->interpreters[*ctx].interpreter->AllocateTensors();
     return success;
 }
 
 error
-tensorflowlite_init_execution_context(graph g, graph_execution_context *ctx)
+tensorflowlite_set_input(void *tflite_ctx, graph_execution_context ctx,
+                         uint32_t index, tensor *input_tensor)
 {
-    if (interpreter == NULL) {
-        NN_ERR_PRINTF("Non-initialized interpreter.");
-        return runtime_error;
-    }
-    interpreter->AllocateTensors();
-    return success;
-}
+    TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
-error
-tensorflowlite_set_input(graph_execution_context ctx, uint32_t index,
-                         tensor *input_tensor)
-{
-    if (interpreter == NULL) {
-        NN_ERR_PRINTF("Non-initialized interpreter.");
-        return runtime_error;
-    }
+    error res;
+    if (success != (res = is_valid_graph_execution_context(tfl_ctx, ctx)))
+        return res;
 
-    uint32_t num_tensors = interpreter->inputs().size();
+    uint32_t num_tensors =
+        tfl_ctx->interpreters[ctx].interpreter->inputs().size();
     NN_DBG_PRINTF("Number of tensors (%d)", num_tensors);
     if (index + 1 > num_tensors) {
         return runtime_error;
     }
 
-    auto tensor = interpreter->input_tensor(index);
+    auto tensor = tfl_ctx->interpreters[ctx].interpreter->input_tensor(index);
     if (tensor == NULL) {
         NN_ERR_PRINTF("Missing memory");
         return missing_memory;
@@ -152,7 +259,9 @@ tensorflowlite_set_input(graph_execution_context ctx, uint32_t index,
         return invalid_argument;
     }
 
-    auto *input = interpreter->typed_input_tensor<float>(index);
+    auto *input =
+        tfl_ctx->interpreters[ctx].interpreter->typed_input_tensor<float>(
+            index);
     if (input == NULL)
         return missing_memory;
 
@@ -162,34 +271,38 @@ tensorflowlite_set_input(graph_execution_context ctx, uint32_t index,
 }
 
 error
-tensorflowlite_compute(graph_execution_context ctx)
+tensorflowlite_compute(void *tflite_ctx, graph_execution_context ctx)
 {
-    if (interpreter == NULL) {
-        NN_ERR_PRINTF("Non-initialized interpreter.");
-        return runtime_error;
-    }
-    interpreter->Invoke();
+    TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
+
+    error res;
+    if (success != (res = is_valid_graph_execution_context(tfl_ctx, ctx)))
+        return res;
+
+    tfl_ctx->interpreters[ctx].interpreter->Invoke();
     return success;
 }
 
 error
-tensorflowlite_get_output(graph_execution_context ctx, uint32_t index,
-                          tensor_data output_tensor,
+tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx,
+                          uint32_t index, tensor_data output_tensor,
                           uint32_t *output_tensor_size)
 {
-    if (interpreter == NULL) {
-        NN_ERR_PRINTF("Non-initialized interpreter.");
-        return runtime_error;
-    }
+    TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
-    uint32_t num_output_tensors = interpreter->outputs().size();
+    error res;
+    if (success != (res = is_valid_graph_execution_context(tfl_ctx, ctx)))
+        return res;
+
+    uint32_t num_output_tensors =
+        tfl_ctx->interpreters[ctx].interpreter->outputs().size();
     NN_DBG_PRINTF("Number of tensors (%d)", num_output_tensors);
 
     if (index + 1 > num_output_tensors) {
         return runtime_error;
     }
 
-    auto tensor = interpreter->output_tensor(index);
+    auto tensor = tfl_ctx->interpreters[ctx].interpreter->output_tensor(index);
     if (tensor == NULL) {
         NN_ERR_PRINTF("Missing memory");
         return missing_memory;
@@ -204,7 +317,9 @@ tensorflowlite_get_output(graph_execution_context ctx, uint32_t index,
         return missing_memory;
     }
 
-    float *tensor_f = interpreter->typed_output_tensor<float>(index);
+    float *tensor_f =
+        tfl_ctx->interpreters[ctx].interpreter->typed_output_tensor<float>(
+            index);
     for (uint32_t i = 0; i < model_tensor_size; ++i)
         NN_DBG_PRINTF("output: %f", tensor_f[i]);
 
@@ -215,20 +330,51 @@ tensorflowlite_get_output(graph_execution_context ctx, uint32_t index,
 }
 
 void
-tensorflowlite_destroy()
+tensorflowlite_initialize(void **tflite_ctx)
+{
+    TFLiteContext *tfl_ctx = new TFLiteContext();
+    if (tfl_ctx == NULL) {
+        NN_ERR_PRINTF("Error when allocating memory for tensorflowlite.");
+        return;
+    }
+
+    NN_DBG_PRINTF("Initializing models.");
+    tfl_ctx->current_models = 0;
+    for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
+        tfl_ctx->models[i].model_pointer = NULL;
+    }
+    NN_DBG_PRINTF("Initializing interpreters.");
+    tfl_ctx->current_interpreters = 0;
+
+    if (os_mutex_init(&tfl_ctx->g_lock) != 0) {
+        NN_ERR_PRINTF("Error while initializing the lock");
+    }
+
+    *tflite_ctx = (void *)tfl_ctx;
+}
+
+void
+tensorflowlite_destroy(void *tflite_ctx)
 {
     /*
-        TensorFlow Lite memory is man
+        TensorFlow Lite memory is internally managed by tensorflow
 
         Related issues:
         * https://github.com/tensorflow/tensorflow/issues/15880
     */
+    TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
+
     NN_DBG_PRINTF("Freeing memory.");
-    model.reset(nullptr);
-    model = NULL;
-    interpreter.reset(nullptr);
-    interpreter = NULL;
-    wasm_runtime_free(model_pointer);
-    model_pointer = NULL;
+    for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
+        tfl_ctx->models[i].model.reset();
+        if (tfl_ctx->models[i].model_pointer)
+            wasm_runtime_free(tfl_ctx->models[i].model_pointer);
+        tfl_ctx->models[i].model_pointer = NULL;
+    }
+    for (int i = 0; i < MAX_GRAPH_EXEC_CONTEXTS_PER_INST; ++i) {
+        tfl_ctx->interpreters[i].interpreter.reset();
+    }
+    os_mutex_destroy(&tfl_ctx->g_lock);
+    delete tfl_ctx;
     NN_DBG_PRINTF("Memory free'd.");
 }

+ 13 - 9
core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.hpp

@@ -13,26 +13,30 @@ extern "C" {
 #endif
 
 error
-tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
-                    execution_target target, graph *g);
+tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder,
+                    graph_encoding encoding, execution_target target, graph *g);
 
 error
-tensorflowlite_init_execution_context(graph g, graph_execution_context *ctx);
+tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
+                                      graph_execution_context *ctx);
 
 error
-tensorflowlite_set_input(graph_execution_context ctx, uint32_t index,
-                         tensor *input_tensor);
+tensorflowlite_set_input(void *tflite_ctx, graph_execution_context ctx,
+                         uint32_t index, tensor *input_tensor);
 
 error
-tensorflowlite_compute(graph_execution_context ctx);
+tensorflowlite_compute(void *tflite_ctx, graph_execution_context ctx);
 
 error
-tensorflowlite_get_output(graph_execution_context ctx, uint32_t index,
-                          tensor_data output_tensor,
+tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx,
+                          uint32_t index, tensor_data output_tensor,
                           uint32_t *output_tensor_size);
 
 void
-tensorflowlite_destroy();
+tensorflowlite_initialize(void **tflite_ctx);
+
+void
+tensorflowlite_destroy(void *tflite_ctx);
 
 #ifdef __cplusplus
 }

+ 0 - 5
core/iwasm/libraries/wasi-nn/test/CMakeLists.txt

@@ -110,11 +110,6 @@ if (WAMR_BUILD_DEBUG_INTERP EQUAL 1)
   set (WAMR_BUILD_SIMD 0)
 endif ()
 
-if (COLLECT_CODE_COVERAGE EQUAL 1)
-  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
-  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
-endif ()
-
 set (WAMR_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..)
 
 include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake)

+ 0 - 22
core/iwasm/libraries/wasi-nn/test/Dockerfile.base

@@ -1,22 +0,0 @@
-# Copyright (C) 2019 Intel Corporation.  All rights reserved.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-FROM ubuntu:20.04 AS base
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && apt-get install -y \
-    cmake build-essential git
-
-WORKDIR /home/wamr
-
-COPY . .
-
-WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
-
-RUN cmake \
-  -DWAMR_BUILD_WASI_NN=1 \
-  -DTFLITE_ENABLE_GPU=ON \
-  ..
-
-RUN make -j $(grep -c ^processor /proc/cpuinfo)

+ 21 - 2
core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu

@@ -1,8 +1,27 @@
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-FROM ubuntu:20.04
+FROM ubuntu:20.04 AS base
 
-COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+    cmake build-essential git
+
+WORKDIR /home/wamr
+
+COPY . .
+
+WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
+
+RUN cmake \
+  -DWAMR_BUILD_WASI_NN=1 \
+  ..
+
+RUN make -j $(grep -c ^processor /proc/cpuinfo)
+
+FROM ubuntu:22.04
+
+COPY --from=base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
 
 ENTRYPOINT [ "/run/iwasm" ]

+ 21 - 1
core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu

@@ -1,6 +1,26 @@
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+FROM ubuntu:20.04 AS base
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+    cmake build-essential git
+
+WORKDIR /home/wamr
+
+COPY . .
+
+WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
+
+RUN cmake \
+  -DWAMR_BUILD_WASI_NN=1 \
+  -DWASI_NN_ENABLE_GPU=1 \
+  ..
+
+RUN make -j $(grep -c ^processor /proc/cpuinfo)
+
 FROM nvidia/cuda:11.3.0-runtime-ubuntu20.04
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -15,6 +35,6 @@ RUN mkdir -p /etc/OpenCL/vendors && \
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 
-COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
+COPY --from=base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
 
 ENTRYPOINT [ "/run/iwasm" ]

+ 3 - 2
core/iwasm/libraries/wasi-nn/test/build.sh

@@ -7,8 +7,9 @@
     -Wl,--allow-undefined \
     -Wl,--strip-all,--no-entry \
     --sysroot=/opt/wasi-sdk/share/wasi-sysroot \
-    -I.. \
-    -o test_tensorflow.wasm test_tensorflow.c
+    -I.. -I../src/utils \
+    -o test_tensorflow.wasm \
+    test_tensorflow.c utils.c
 
 # TFLite models to use in the tests
 

Some files were not shown because too many files changed in this diff