Bläddra i källkod

Merge pull request #2500 from bytecodealliance/main

Merge branch main into gitbook
Wenyong Huang 2 år sedan
förälder
incheckning
7441660893
100 ändrade filer med 3643 tillägg och 1483 borttagningar
  1. 47 31
      .devcontainer/Dockerfile
  2. 13 10
      .devcontainer/devcontainer.json
  3. 31 6
      .github/workflows/build_wamr_lldb.yml
  4. 15 5
      .github/workflows/compilation_on_android_ubuntu.yml
  5. 47 0
      .github/workflows/hadolint_dockerfiles.yml
  6. 13 3
      .github/workflows/nightly_run.yml
  7. 2 2
      ATTRIBUTIONS.md
  8. 23 0
      CMakeLists.txt
  9. 136 0
      RELEASE_NOTES.md
  10. 34 6
      build-scripts/config_common.cmake
  11. 97 58
      build-scripts/lldb_wasm.patch
  12. 32 0
      ci/pre_commit_hook_sample
  13. 15 0
      ci/setup.sh
  14. 128 0
      ci/validate_lldb.py
  15. 25 1
      core/config.h
  16. 129 0
      core/iwasm/aot/aot_intrinsic.c
  17. 19 53
      core/iwasm/aot/aot_loader.c
  18. 51 69
      core/iwasm/aot/aot_runtime.c
  19. 10 37
      core/iwasm/aot/aot_runtime.h
  20. 14 18
      core/iwasm/aot/arch/aot_reloc_riscv.c
  21. 3 0
      core/iwasm/aot/arch/aot_reloc_x86_32.c
  22. 1 16
      core/iwasm/aot/arch/aot_reloc_x86_64.c
  23. 19 1
      core/iwasm/aot/arch/aot_reloc_xtensa.c
  24. 45 1
      core/iwasm/aot/iwasm_aot.cmake
  25. 18 6
      core/iwasm/common/wasm_c_api.c
  26. 3 9
      core/iwasm/common/wasm_exec_env.h
  27. 63 45
      core/iwasm/common/wasm_memory.c
  28. 0 10
      core/iwasm/common/wasm_memory.h
  29. 240 50
      core/iwasm/common/wasm_runtime_common.c
  30. 13 1
      core/iwasm/common/wasm_runtime_common.h
  31. 86 115
      core/iwasm/common/wasm_shared_memory.c
  32. 12 32
      core/iwasm/common/wasm_shared_memory.h
  33. 48 0
      core/iwasm/common/wasm_suspend_flags.h
  34. 2 2
      core/iwasm/compilation/aot.h
  35. 28 70
      core/iwasm/compilation/aot_compiler.c
  36. 176 21
      core/iwasm/compilation/aot_emit_aot_file.c
  37. 17 5
      core/iwasm/compilation/aot_emit_function.c
  38. 77 5
      core/iwasm/compilation/aot_llvm.c
  39. 4 0
      core/iwasm/compilation/aot_llvm.h
  40. 49 95
      core/iwasm/compilation/aot_llvm_extra.cpp
  41. 16 16
      core/iwasm/compilation/debug/dwarf_extractor.cpp
  42. 2 2
      core/iwasm/compilation/debug/dwarf_extractor.h
  43. 21 20
      core/iwasm/fast-jit/fe/jit_emit_table.c
  44. 4 2
      core/iwasm/include/aot_export.h
  45. 47 0
      core/iwasm/include/wasm_export.h
  46. 18 1
      core/iwasm/interpreter/wasm.h
  47. 135 114
      core/iwasm/interpreter/wasm_interp_classic.c
  48. 108 96
      core/iwasm/interpreter/wasm_interp_fast.c
  49. 10 7
      core/iwasm/interpreter/wasm_loader.c
  50. 5 3
      core/iwasm/interpreter/wasm_mini_loader.c
  51. 69 134
      core/iwasm/interpreter/wasm_runtime.c
  52. 14 3
      core/iwasm/interpreter/wasm_runtime.h
  53. 13 6
      core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c
  54. 10 0
      core/iwasm/libraries/lib-rats/lib_rats.cmake
  55. 3 1
      core/iwasm/libraries/lib-socket/inc/wasi_socket_ext.h
  56. 5 0
      core/iwasm/libraries/lib-socket/src/wasi/wasi_socket_ext.c
  57. 21 2
      core/iwasm/libraries/lib-socket/test/nslookup.c
  58. 112 113
      core/iwasm/libraries/lib-socket/test/tcp_udp.c
  59. 1 1
      core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c
  60. 38 1
      core/iwasm/libraries/lib-wasi-threads/test/build.sh
  61. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/manifest.json
  62. 6 0
      core/iwasm/libraries/lib-wasi-threads/test/skip.json
  63. 117 0
      core/iwasm/libraries/lib-wasi-threads/test/spawn_stress_test.c
  64. 93 0
      core/iwasm/libraries/lib-wasi-threads/test/stress_test_threads_creation.c
  65. 7 4
      core/iwasm/libraries/lib-wasi-threads/tid_allocator.c
  66. 1 1
      core/iwasm/libraries/libc-uvwasi/libc_uvwasi.cmake
  67. 3 1
      core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/include/wasmtime_ssp.h
  68. 6 3
      core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c
  69. 29 25
      core/iwasm/libraries/thread-mgr/thread_manager.c
  70. 2 0
      core/iwasm/libraries/wasi-nn/.gitignore
  71. 22 8
      core/iwasm/libraries/wasi-nn/README.md
  72. 5 1
      core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake
  73. 102 31
      core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
  74. 0 1
      core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu
  75. 1 2
      core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu
  76. 37 0
      core/iwasm/libraries/wasi-nn/test/Dockerfile.tpu
  77. 21 1
      core/iwasm/libraries/wasi-nn/test/build.sh
  78. 30 0
      core/iwasm/libraries/wasi-nn/test/models/quantized.py
  79. 63 0
      core/iwasm/libraries/wasi-nn/test/test_tensorflow_quantized.c
  80. 2 2
      core/iwasm/libraries/wasi-nn/test/utils.c
  81. 1 1
      core/iwasm/libraries/wasi-nn/test/utils.h
  82. 3 0
      core/shared/platform/common/posix/posix_socket.c
  83. 68 5
      core/shared/platform/esp-idf/espidf_memmap.c
  84. 6 0
      core/shared/platform/esp-idf/shared_platform.cmake
  85. 5 0
      core/shared/platform/include/platform_api_vmcore.h
  86. 79 2
      core/shared/platform/nuttx/nuttx_platform.c
  87. 123 0
      core/shared/utils/bh_atomic.h
  88. 4 0
      core/shared/utils/bh_common.c
  89. 15 15
      core/shared/utils/bh_common.h
  90. 0 0
      core/shared/utils/gnuc.h
  91. 1 1
      core/version.h
  92. 6 1
      doc/build_wamr.md
  93. 44 43
      doc/embed_wamr.md
  94. 38 0
      doc/embed_wamr_spawn_api.md
  95. 15 0
      doc/perf_tune.md
  96. 1 1
      doc/source_debugging.md
  97. 117 0
      doc/xip.md
  98. 121 22
      language-bindings/python/src/wamr/wamrapi/wamr.py
  99. 6 1
      language-bindings/python/utils/create_lib.sh
  100. 3 6
      language-bindings/python/wamr-api/README.md

+ 47 - 31
.devcontainer/Dockerfile

@@ -1,20 +1,21 @@
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.195.0/containers/cpp/.devcontainer/base.Dockerfile
-# [Choice] Debian / Ubuntu version (use Debian 11/9, Ubuntu 18.04/21.04 on local arm64/Apple Silicon): debian-11, debian-10, debian-9, ubuntu-21.04, ubuntu-20.04, ubuntu-18.04
-ARG VARIANT=ubuntu-20.04
-FROM mcr.microsoft.com/vscode/devcontainers/cpp:0-${VARIANT}
+# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/cpp/.devcontainer/base.Dockerfile
+# [Choice] Debian / Ubuntu version (use Debian 12/11/9, Ubuntu 18.04/21.04 on local arm64/Apple Silicon): debian-12, debian-11, debian-10, debian-9, ubuntu-21.04, ubuntu-20.04, ubuntu-18.04
+ARG VARIANT=debian-12
+FROM mcr.microsoft.com/vscode/devcontainers/cpp:${VARIANT}
 
 ARG DEBIAN_FRONTEND=noninteractive
 ENV TZ=Asian/Shanghai
 
 # hadolint ignore=DL3008
 RUN apt-get update \
+  && apt-get upgrade -y \
   && apt-get install -y apt-transport-https apt-utils build-essential \
-  ca-certificates ccache curl g++-multilib git gnupg \
-  libgcc-9-dev lib32gcc-9-dev lsb-release \
-  ninja-build ocaml ocamlbuild python2.7 \
+  ca-certificates ccache cmake curl g++-multilib git gnupg \
+  libgcc-12-dev lib32gcc-12-dev lsb-release \
+  ninja-build ocaml ocamlbuild \
   software-properties-common tree tzdata \
   unzip valgrind vim wget zip --no-install-recommends \
   && apt-get clean -y \
@@ -22,32 +23,32 @@ RUN apt-get update \
 
 #
 # binaryen
-ARG BINARYEN_VER=111
+ARG BINARYEN_VER=114
 WORKDIR /opt
 RUN wget -c --progress=dot:giga https://github.com/WebAssembly/binaryen/releases/download/version_${BINARYEN_VER}/binaryen-version_${BINARYEN_VER}-x86_64-linux.tar.gz \
   && tar xf binaryen-version_${BINARYEN_VER}-x86_64-linux.tar.gz \
-  && ln -sf /opt/binaryen-version_111 /opt/binaryen \
+  && ln -sf /opt/binaryen-version_${BINARYEN_VER} /opt/binaryen \
   && rm binaryen-version_${BINARYEN_VER}-x86_64-linux.tar.gz
 
 #
 # CMAKE (https://apt.kitware.com/)
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 # hadolint ignore=DL3008
-RUN wget --progress=dot:giga -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg > /dev/null \
-  && echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
-  && apt-get update \
-  && rm /usr/share/keyrings/kitware-archive-keyring.gpg \
-  && apt-get install -y kitware-archive-keyring --no-install-recommends \
-  && apt-get install -y cmake --no-install-recommends \
-  && apt-get clean -y \
-  && rm -rf /var/lib/apt/lists/*
+ARG CMAKE_VER=3.27.0
+RUN wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VER}/cmake-${CMAKE_VER}-Linux-x86_64.sh \
+      -q -O /tmp/cmake-install.sh \
+      && chmod u+x /tmp/cmake-install.sh \
+      && mkdir /opt/cmake-${CMAKE_VER} \
+      && /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-${CMAKE_VER} \
+      && rm /tmp/cmake-install.sh \
+      && ln -s /opt/cmake-${CMAKE_VER}/bin/* /usr/local/bin
 
 #
 # install emsdk
 WORKDIR /opt
 RUN git clone https://github.com/emscripten-core/emsdk.git
 
-ARG EMSDK_VER=3.0.0
+ARG EMSDK_VER=3.1.43
 WORKDIR /opt/emsdk
 RUN  git pull \
   && ./emsdk install ${EMSDK_VER} \
@@ -56,7 +57,7 @@ RUN  git pull \
 
 #
 # install wasi-sdk
-ARG WASI_SDK_VER=19
+ARG WASI_SDK_VER=20
 RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -P /opt \
   && tar xf /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -C /opt \
   && ln -sf /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk \
@@ -64,7 +65,7 @@ RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wasi-sdk/releases
 
 #
 #install wabt
-ARG WABT_VER=1.0.29
+ARG WABT_VER=1.0.33
 RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wabt/releases/download/${WABT_VER}/wabt-${WABT_VER}-ubuntu.tar.gz -P /opt \
   && tar xf /opt/wabt-${WABT_VER}-ubuntu.tar.gz -C /opt \
   && ln -sf /opt/wabt-${WABT_VER} /opt/wabt \
@@ -72,7 +73,7 @@ RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wabt/releases/dow
 
 #
 # install bazelisk
-ARG BAZELISK_VER=1.12.0
+ARG BAZELISK_VER=1.17.0
 RUN mkdir /opt/bazelisk \
   && wget -c --progress=dot:giga https://github.com/bazelbuild/bazelisk/releases/download/v${BAZELISK_VER}/bazelisk-linux-amd64 -P /opt/bazelisk \
   && chmod a+x /opt/bazelisk/bazelisk-linux-amd64 \
@@ -80,16 +81,30 @@ RUN mkdir /opt/bazelisk \
 
 #
 # install clang+llvm
-ARG LLVM_VER=14
-RUN apt-get purge -y clang-10 llvm-10 && apt-get autoremove -y
+ARG LLVM_VER=16
+RUN apt-get purge -y clang-14 llvm-14 && apt-get autoremove -y
 WORKDIR /etc/apt/apt.conf.d
 RUN touch 99verfiy-peer.conf \
   && echo "Acquire { https::Verify-Peer false }" > 99verfiy-peer.conf
 
 WORKDIR /tmp
-RUN wget --progress=dot:giga https://apt.llvm.org/llvm.sh \
-  && chmod a+x ./llvm.sh \
-  && ./llvm.sh ${LLVM_VER} all
+#RUN wget --progress=dot:giga https://apt.llvm.org/llvm.sh \
+#  && chmod a+x ./llvm.sh \
+#  && ./llvm.sh ${LLVM_VER} all
+
+# Workaround due to https://github.com/llvm/llvm-project/issues/62475
+# hadolint ignore=DL3008
+RUN set -ex \
+    && echo "deb http://apt.llvm.org/bookworm/ llvm-toolchain-bookworm-${LLVM_VER} main" > /etc/apt/sources.list.d/apt.llvm.org.list \
+    && wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc \
+    && apt-get update \
+    && apt-get install -y \
+    clang-${LLVM_VER} lldb-${LLVM_VER} lld-${LLVM_VER} clangd-${LLVM_VER} clang-tidy-${LLVM_VER} clang-format-${LLVM_VER} clang-tools-${LLVM_VER} \
+    llvm-${LLVM_VER}-dev lld-${LLVM_VER} lldb-${LLVM_VER} llvm-${LLVM_VER}-tools libomp-${LLVM_VER}-dev libc++-${LLVM_VER}-dev libc++abi-${LLVM_VER}-dev \
+    libclang-common-${LLVM_VER}-dev libclang-${LLVM_VER}-dev libclang-cpp${LLVM_VER}-dev libunwind-${LLVM_VER}-dev \
+    libclang-rt-${LLVM_VER}-dev libpolly-${LLVM_VER}-dev --no-install-recommends \
+    && apt-get clean -y \
+    && rm -rf /var/lib/apt/lists/*
 
 #
 # [Optional]
@@ -105,18 +120,19 @@ RUN apt-get update \
 #
 # Install required python packages
 # hadolint ignore=DL3013
-RUN python3 -m pip install --no-cache-dir --upgrade pip \
-  && pip3 install --no-cache-dir black nose pycparser pylint
+RUN python3 -m pip install --no-cache-dir --break-system-packages --upgrade pip \
+  && pip3 install --no-cache-dir --break-system-packages black nose pycparser pylint
 
 #
 # Install github-cli. It doens't work as a feature of devcontainer.json
+ARG GH_CLI_VER=2.32.0
 WORKDIR /tmp
-RUN wget -q https://github.com/cli/cli/releases/download/v2.20.2/gh_2.20.2_linux_amd64.deb \
-  && dpkg -i gh_2.20.2_linux_amd64.deb
+RUN wget -q https://github.com/cli/cli/releases/download/v${GH_CLI_VER}/gh_${GH_CLI_VER}_linux_amd64.deb \
+  && dpkg -i gh_${GH_CLI_VER}_linux_amd64.deb
 
 #
 # Install NodeJS
-RUN wget -qO- https://deb.nodesource.com/setup_19.x | bash -
+RUN wget -qO- https://deb.nodesource.com/setup_20.x | bash -
 # hadolint ignore=DL3008
 RUN apt-get install -y nodejs --no-install-recommends
 

+ 13 - 10
.devcontainer/devcontainer.json

@@ -1,20 +1,23 @@
 // Copyright (C) 2019 Intel Corporation.  All rights reserved.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 // For format details, see https://aka.ms/vscode-remote/devcontainer.json or this file's README at:
-// https://github.com/microsoft/vscode-dev-containers/tree/v0.195.0/containers/cpp
+// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/cpp
 {
   "name": "WAMR-Dev",
   "build": {
     "dockerfile": "Dockerfile",
-    // Update 'VARIANT' to pick an Debian / Ubuntu OS version: debian-11, debian-10, debian-9, ubuntu-21.04, ubuntu-20.04, ubuntu-18.04
-    // Use Debian 11, Debian 9, Ubuntu 18.04 or Ubuntu 21.04 on local arm64/Apple Silicon
+    // Update 'VARIANT' to pick an Debian / Ubuntu OS version: debian-12, debian-11, debian-10, debian-9, ubuntu-21.04, ubuntu-20.04, ubuntu-18.04
+    // Use Debian 12, Debian 11, Debian 9, Ubuntu 18.04 or Ubuntu 21.04 on local arm64/Apple Silicon
     "args": {
-      "BINARYEN_VER": "111",
-      "EMSDK_VER": "3.0.0",
-      "LLVM_VER": "15",
-      "VARIANT": "ubuntu-20.04",
-      "WASI_SDK_VER": "19",
-      "WABT_VER": "1.0.31"
+      "BINARYEN_VER": "114",
+      "BAZELISK_VER": "1.17.0",
+      "CMAKE_VER": "3.27.0",
+      "EMSDK_VER": "3.1.43",
+      "GH_CLI_VER": "2.32.0",
+      "LLVM_VER": "16",
+      "VARIANT": "debian-12",
+      "WASI_SDK_VER": "20",
+      "WABT_VER": "1.0.33"
     }
   },
   "runArgs": [
@@ -34,7 +37,7 @@
         "llvm-vs-code-extensions.vscode-clangd",
         "ms-python.python",
         "ms-python.vscode-pylance",
-        "ms-vscode.cmake-tools",
+        "ms-vscode.cmake-tools"
       ]
     }
   },

+ 31 - 6
.github/workflows/build_wamr_lldb.yml

@@ -22,6 +22,11 @@ on:
         description: a semantic version number
         type: string
         required: true
+      wasi_sdk_url:
+        description: download WASI_SDK from this URL
+        type: string
+        required: false
+        default: "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-linux.tar.gz"
 
 jobs:
   try_reuse:
@@ -29,7 +34,7 @@ jobs:
     with:
       binary_name_stem: "wamr-lldb-${{ inputs.ver_num }}-${{ inputs.arch }}-${{ inputs.runner }}"
       last_commit: "ea63ba4bd010c2285623ad4acc0262a4d63bcfea"
-      the_path: "./build-scripts/lldb-wasm.patch"
+      the_path: "./build-scripts/lldb_wasm.patch"
       upload_url: ${{ inputs.upload_url }}
 
   build:
@@ -44,6 +49,15 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
+      - name: download and install wasi-sdk
+        run: |
+          cd /opt
+          basename=$(basename ${{ inputs.wasi_sdk_url }})
+          sudo wget --progress=dot:giga ${{ inputs.wasi_sdk_url }}
+          sudo tar -xzf ${basename}
+          sudo rm ${basename}
+          sudo mv wasi-sdk-* wasi-sdk
+
       - name: Cache build
         id: lldb_build_cache
         uses: actions/cache@v3
@@ -69,8 +83,8 @@ jobs:
         if: steps.lldb_build_cache.outputs.cache-hit != 'true' && contains(inputs.runner, 'macos')
         run: |
           brew remove swig
-          brew install swig@3 cmake ninja libedit
-          brew link --overwrite swig@3
+          brew install swig@4.1 cmake ninja libedit
+          brew link --overwrite swig@4.1
           sudo rm -rf /Library/Developer/CommandLineTools
 
       - name: install utils ubuntu
@@ -92,7 +106,7 @@ jobs:
           git init
           git config user.email "action@github.com"
           git config user.name "github action"
-          git apply ../../../build-scripts/lldb-wasm.patch
+          git apply ../../../build-scripts/lldb_wasm.patch
         working-directory: core/deps/llvm-project
 
       - name: get stand-alone python ubuntu
@@ -141,6 +155,17 @@ jobs:
           cmake --build build --target lldb install --parallel $(nproc)
         working-directory: core/deps/llvm-project
 
+      - name: validate lldb ubuntu
+        if: steps.lldb_build_cache.outputs.cache-hit != 'true' && contains(inputs.runner, 'ubuntu')
+        run: |
+          echo "start to validate lldb..."
+          mkdir -p wamr-debug
+          cmake -S product-mini/platforms/linux -B wamr-debug -DWAMR_BUILD_DEBUG_INTERP=1
+          cmake --build wamr-debug --parallel $(nproc)
+          export LD_LIBRARY_PATH=$(pwd)/core/deps/python/lib:${LD_LIBRARY_PATH}
+          python3 ci/validate_lldb.py --port 1239 --lldb core/deps/wamr-lldb/bin/lldb --wamr wamr-debug/iwasm --verbose
+        working-directory: .
+
       - name: build lldb macos
         if: steps.lldb_build_cache.outputs.cache-hit != 'true' && contains(inputs.runner, 'macos')
         run: |
@@ -180,7 +205,7 @@ jobs:
           mkdir -p wamr-lldb/lib
           cp build/bin/lldb* wamr-lldb/bin
           cp lldb/tools/lldb-vscode/package.json wamr-lldb
-          cp -r lldb/tools/lldb-vscode/syntaxes/ wamr-lldb 
+          cp -r lldb/tools/lldb-vscode/syntaxes/ wamr-lldb
         working-directory: core/deps/llvm-project
 
       - name: pack ubuntu specific libraries
@@ -201,7 +226,7 @@ jobs:
           cp -R ../python/lib/python* wamr-lldb/lib
           cp ../python/lib/libpython*.dylib wamr-lldb/lib
           install_name_tool -change /install/lib/libpython${{ env.PYTHON_VERSION }}.dylib @rpath/libpython${{ env.PYTHON_VERSION }}.dylib wamr-lldb/lib/liblldb.*.dylib
-        # Patch path of python library -> https://github.com/indygreg/python-build-standalone/blob/85923ca3911784e6978b85d56e06e9ae75cb2dc4/docs/quirks.rst?plain=1#L412-L446  
+        # Patch path of python library -> https://github.com/indygreg/python-build-standalone/blob/85923ca3911784e6978b85d56e06e9ae75cb2dc4/docs/quirks.rst?plain=1#L412-L446
         working-directory: core/deps/llvm-project
 
       - name: compress the binary

+ 15 - 5
.github/workflows/compilation_on_android_ubuntu.yml

@@ -483,6 +483,16 @@ jobs:
           sudo tar -xzf wasi-sdk-*.tar.gz
           sudo mv wasi-sdk-20.0 wasi-sdk
 
+      # It is a temporary solution until new wasi-sdk that includes bug fixes is released 
+      - name: build wasi-libc from source
+        if: matrix.test_option == '$WASI_TEST_OPTIONS'
+        run: |
+          git clone https://github.com/WebAssembly/wasi-libc
+          cd wasi-libc
+          make -j AR=/opt/wasi-sdk/bin/llvm-ar NM=/opt/wasi-sdk/bin/llvm-nm CC=/opt/wasi-sdk/bin/clang THREAD_MODEL=posix
+          echo "SYSROOT_PATH=$PWD/sysroot" >> $GITHUB_ENV
+
+
       - name: set env variable(if llvm are used)
         if: matrix.running_mode == 'aot' || matrix.running_mode == 'jit' || matrix.running_mode == 'multi-tier-jit'
         run: echo "USE_LLVM=true" >> $GITHUB_ENV
@@ -518,7 +528,7 @@ jobs:
 
       - name: Build WASI thread tests
         if: matrix.test_option == '$WASI_TEST_OPTIONS'
-        run: bash build.sh
+        run: bash build.sh --sysroot "$SYSROOT_PATH"
         working-directory: ./core/iwasm/libraries/lib-wasi-threads/test/
 
       - name: build socket api tests
@@ -527,7 +537,7 @@ jobs:
         working-directory: ./core/iwasm/libraries/lib-socket/test/
 
       - name: run tests
-        timeout-minutes: 10
+        timeout-minutes: 30
         run: ./test_wamr.sh ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites
 
@@ -543,7 +553,7 @@ jobs:
           sudo apt install -y g++-multilib lib32gcc-9-dev
 
       - name: run tests x86_32
-        timeout-minutes: 10
+        timeout-minutes: 30
         if: env.TEST_ON_X86_32 == 'true'
         run: ./test_wamr.sh ${{ env.X86_32_TARGET_TEST_OPTIONS }} ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites
@@ -584,7 +594,7 @@ jobs:
           cache-name: cache-lldb-vscode
         with:
           path: test-tools/wamr-ide/VSCode-Extension/resource/debug/linux
-          key: ${{ env.cache-name }}-${{ hashFiles('build-scripts/lldb-wasm.patch') }}-${{ env.PYTHON_UBUNTU_STANDALONE_BUILD }}
+          key: ${{ env.cache-name }}-${{ hashFiles('build-scripts/lldb_wasm.patch') }}-${{ env.PYTHON_UBUNTU_STANDALONE_BUILD }}
 
       - if: ${{ steps.cache-lldb.outputs.cache-hit != 'true' }}
         name: get stand-alone python ubuntu
@@ -607,7 +617,7 @@ jobs:
           git init
           git config user.email "action@github.com"
           git config user.name "github action"
-          git apply ../../../build-scripts/lldb-wasm.patch
+          git apply ../../../build-scripts/lldb_wasm.patch
         working-directory: core/deps/llvm-project
 
       - if: ${{ steps.cache-lldb.outputs.cache-hit != 'true' }}

+ 47 - 0
.github/workflows/hadolint_dockerfiles.yml

@@ -0,0 +1,47 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+name: hadolint dockerfiles
+
+on:
+  # will be triggered on PR events
+  pull_request:
+    types:
+      - opened
+      - synchronize
+    paths:
+      - "**/Dockerfile*"
+      - ".github/workflows/hadolint_dockerfiles.yml"
+  push:
+    branches:
+      - main
+      - "dev/**"
+    paths:
+      - "**/Dockerfile*"
+      - ".github/workflows/hadolint_dockerfiles.yml"
+  # allow to be triggered manually
+  workflow_dispatch:
+
+# Cancel any in-flight jobs for the same PR/branch so there's only one active
+# at a time
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run-hadolint-on-dockerfiles:
+    runs-on: ubuntu-22.04
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      # on default, hadolint will fail on warnings and errors
+      - name: Run hadolint on dockerfiles
+        run: |
+          docker pull hadolint/hadolint:latest-debian
+          find . -name "*Dockerfile*" | while read dockerfile; do
+            echo "run hadolint on $dockerfile:"
+            docker run --rm -i hadolint/hadolint:latest-debian hadolint - <"$dockerfile"
+            echo "successful"
+          done

+ 13 - 3
.github/workflows/nightly_run.yml

@@ -548,6 +548,16 @@ jobs:
           sudo wget ${{ matrix.wasi_sdk_release }}
           sudo tar -xzf wasi-sdk-*.tar.gz
           sudo mv wasi-sdk-20.0 wasi-sdk
+
+      # It is a temporary solution until new wasi-sdk that includes bug fixes is released 
+      - name: build wasi-libc from source
+        if: matrix.test_option == '$WASI_TEST_OPTIONS'
+        run: |
+          git clone https://github.com/WebAssembly/wasi-libc
+          cd wasi-libc
+          make -j AR=/opt/wasi-sdk/bin/llvm-ar NM=/opt/wasi-sdk/bin/llvm-nm CC=/opt/wasi-sdk/bin/clang THREAD_MODEL=posix
+          echo "SYSROOT_PATH=$PWD/sysroot" >> $GITHUB_ENV
+
       - name: set env variable(if llvm are used)
         if: matrix.running_mode == 'aot' || matrix.running_mode == 'jit' || matrix.running_mode == 'multi-tier-jit'
         run: echo "USE_LLVM=true" >> $GITHUB_ENV
@@ -586,7 +596,7 @@ jobs:
 
       - name: Build WASI thread tests
         if: matrix.test_option == '$WASI_TEST_OPTIONS'
-        run: bash build.sh
+        run: bash build.sh --sysroot "$SYSROOT_PATH"
         working-directory: ./core/iwasm/libraries/lib-wasi-threads/test/
 
       - name: build socket api tests
@@ -595,7 +605,7 @@ jobs:
         working-directory: ./core/iwasm/libraries/lib-socket/test/
 
       - name: run tests
-        timeout-minutes: 10
+        timeout-minutes: 40
         run: ./test_wamr.sh ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites
 
@@ -611,7 +621,7 @@ jobs:
           sudo apt install -y g++-multilib lib32gcc-9-dev
 
       - name: run tests x86_32
-        timeout-minutes: 10
+        timeout-minutes: 40
         if: env.TEST_ON_X86_32 == 'true'
         run: ./test_wamr.sh ${{ env.X86_32_TARGET_TEST_OPTIONS }} ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites

+ 2 - 2
ATTRIBUTIONS.md

@@ -22,7 +22,7 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the
 
 |  third party components | version number | latest release | vendor pages | CVE details |
 | --- | --- | --- | --- | --- |
-| cjson | 1.7.10 | 1.7.14 | https://github.com/DaveGamble/cJSON | https://www.cvedetails.com/vendor/19164/Cjson-Project.html |
+| cjson | 1.7.16 | 1.7.16 | https://github.com/DaveGamble/cJSON | https://www.cvedetails.com/vendor/19164/Cjson-Project.html |
 | contiki-ng (er-coap) | unspecified | 3.0 | https://github.com/contiki-os/contiki | https://www.cvedetails.com/vendor/16528/Contiki-os.html |
 | freebsd libm | unspecified | 13.0 | https://www.freebsd.org/ | https://www.cvedetails.com/vendor/6/Freebsd.html |
 | LVGL | 6.0.1 | 7.11.0 | https://lvgl.io/ | |
@@ -31,7 +31,7 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the
 | wasmtime | unspecified | v0.26.0 | https://github.com/bytecodealliance/wasmtime | |
 | zephyr | unspecified | v2.5.0 | https://www.zephyrproject.org/ | https://www.cvedetails.com/vendor/19255/Zephyrproject.html |
 | WebAssembly debugging patch for LLDB | unspecified | unspecified | https://reviews.llvm.org/D78801 | |
-| libuv | v1.42.0 | v1.44.1 | https://github.com/libuv/libuv | https://www.cvedetails.com/vendor/15402/Libuv-Project.html |
+| libuv | v1.46.0 | v1.46.0 | https://github.com/libuv/libuv | https://www.cvedetails.com/vendor/15402/Libuv-Project.html |
 | uvwasi | unspecified | v0.0.12 | https://github.com/nodejs/uvwasi | |
 | asmjit | unspecified | unspecified | https://github.com/asmjit/asmjit | |
 | zydis | unspecified | e14a07895136182a5b53e181eec3b1c6e0b434de | https://github.com/zyantific/zydis | |

+ 23 - 0
CMakeLists.txt

@@ -107,6 +107,11 @@ endif ()
 
 set (WAMR_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
+# Set the strip command based on the system (GNU or Clang)
+if (CMAKE_STRIP)
+    set (CMAKE_STRIP_FLAGS "--strip-all")
+endif ()
+
 include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake)
 
 set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wshadow -Wno-unused-parameter")
@@ -141,6 +146,15 @@ endif ()
 
 install (TARGETS iwasm_static ARCHIVE DESTINATION lib)
 
+# If it's a Release build, strip the static library
+if (CMAKE_STRIP AND CMAKE_BUILD_TYPE STREQUAL "Release")
+  # Strip static library
+  message (STATUS "Stripping static library after build!")
+  add_custom_command (TARGET iwasm_static POST_BUILD
+      COMMAND ${CMAKE_STRIP} ${CMAKE_STRIP_FLAGS} $<TARGET_FILE:iwasm_static>
+  )
+endif ()
+
 # SHARED LIBRARY
 add_library (iwasm_shared SHARED ${WAMR_RUNTIME_LIB_SOURCE})
 set_target_properties (iwasm_shared PROPERTIES OUTPUT_NAME iwasm)
@@ -162,3 +176,12 @@ install (FILES
     ${WAMR_ROOT_DIR}/core/iwasm/include/wasm_export.h
     ${WAMR_ROOT_DIR}/core/iwasm/include/lib_export.h
     DESTINATION include)
+
+# If it's a Release build, strip the shared library
+if (CMAKE_STRIP AND CMAKE_BUILD_TYPE STREQUAL "Release")
+  # Strip shared library
+  message (STATUS "Stripping shared library after build!")
+  add_custom_command (TARGET iwasm_shared POST_BUILD
+      COMMAND ${CMAKE_STRIP} ${CMAKE_STRIP_FLAGS} $<TARGET_FILE:iwasm_shared>
+  )
+endif ()

+ 136 - 0
RELEASE_NOTES.md

@@ -1,3 +1,139 @@
+## WAMR-1.2.3
+
+### Breaking Changes
+- Increase default native stack size (#2332)
+
+### New Features
+- Implement the segue optimization for LLVM AOT/JIT (#2230)
+- Implement AOT static PGO (#2243)
+- Enable static PGO for Linux SGX (#2270)
+- Add Rust Formatters to Debugger (Vector, Map etc.) (#2219)
+
+### Bug Fixes
+- The Python language-binding needs python>=3.9 (#2228)
+- aot_compile_op_call: Remove a wrong optimization (#2233)
+- Fix typo in samples/ref-types (#2236)
+- Update thread proposal ignore cases (#2246)
+- Disable writting GS register on linux-sgx platform (#2255)
+- Fix compile error of wamrc with llvm-13/llvm-14 (#2261)
+- aot/jit: Set module layout (#2260)
+- Fix build error with LLVM 16 (#2259)
+- spec-test-script: Disable conversions.wast on i386 (#2269)
+- Fix a heap corruption bug in ems realloc (#2279)
+- Fix fast-interp issue of LAST_OP_OUTPUT_I32/64 check (#2295)
+- Fix wamrc build issues with LLVM 13 and LLVM 16 (#2313)
+- aot: Move stack_sizes table to a dedicated section (#2317)
+- product-mini/platforms/linux: Mark vmlib POSITION_INDEPENDENT_CODE (#2323)
+- aot: Avoid possible relocations around "stack_sizes" for XIP mode (#2322)
+- Avoid switch lowering to lookup tables for XIP (#2339)
+- Fix typo in zephyr's Dockerfile.old (#2354)
+- Fix typo (dwarf) in the codebase (#2367)
+- Implement suspend flags as atomic variable (#2361)
+- Fix llvm jit failed to lookup aot_stack_sizes symbol issue (#2384)
+- Fix some check issues on table operations (#2392)
+- Fix ExpandMemoryOpPass doesn't work properly (#2399)
+- Fix non-builtin BH_ATOMIC_32_FETCH_OR and BH_ATOMIC_32_FETCH_AND (#2400)
+- Fix wasi-sockets tests (#2389)
+- Fix result arity check on select_t opcode (#2406)
+- Re-organize intrinsics in aot_reloc_riscv.c to fix some FPU issues (#2414)
+- Fix lib-pthread issues (#2410)
+- Fix typo in test_wamr.sh (#2421)
+- Fix memory sharing (#2415)
+- wasm_export.h: Fix struct wasm_val_t (#2435)
+- Fix typos in wamrc print_help() (#2442)
+- iwasm: Fix native lib cleanup after error occurs (#2443)
+- Correct --heap-size option in messages (#2458)
+- wasm_instantiate: Fix a potential integer overflow issue (#2459)
+- Fix windows link error and clear windows warnings (#2463)
+- aot: Disable musttail for mips (#2457)
+- Fix opcode overwrite issue in fast interp (#2476)
+- wamrc: Fix windows relocation to `aot_func_internal#n` (#2474)
+- Fix windows AOT hw bound check (#2475)
+- Fix typo in aot_emit_aot_file.c (#2478)
+
+### Enhancements
+- A few changes related to WAMRC_LLC_COMPILER (#2218)
+- Enhance linux-sgx CI (#2102)
+- Add asan and ubsan to WAMR CI (#2161)
+- Update doc on WAMR_DISABLE_HW_BOUND_CHECK 32-bit (#2262)
+- wamrc: Add an incompatibility note in the help message (#2276)
+- Add cmake variable to disable writing gs register (#2284)
+- Make hmu_tree_node 4 byte aligned to reduce compiler warning (#2268)
+- Appease unused warning on min_uint64 (#2277)
+- Fix format warning by PRIu32 in [wasm|aot] dump call stack  (#2251)
+- Fix a compile warning due to missing include (#2293)
+- Fix dockerfile linter warnings (#2291)
+- Enable windows x86-32 AOT relocations (#2285)
+- wamr-ide: Add vscode extension tests (#2292)
+- AOT/JIT native stack bound check improvement (#2244)
+- Add retries to flaky step in nightly run CI (#2306)
+- Use system libuv if available (#1861)
+- wasi-nn: Simplify cmake and headers' location (#2308)
+- wasi-nn: Improve tests paths for local dev (#2309)
+- aot: Implement a few more relocation types for riscv (#2318)
+- wasi-nn: Add support of wasi-nn as shared lib (#2310)
+- Add a few more assertions on structures to which aot abi is sensitive (#2326)
+- Fix sanitizer errors in posix socket  (#2331)
+- Add "--xip" option for wamrc (#2336)
+- Add "--enable-llvm-passes=<passes>" option to wamrc (#2335)
+- Make memory access boundary check behavior configurable (#2289)
+- Migrate ExpandMemoryOpPass to llvm new pass manager (#2334)
+- Allow defining hints without exact socket type or address family (#2337)
+- wamrc: Warn on text relocations for XIP (#2340)
+- Add scripts to validate lldb source debugger (#2150)
+- Add docker file to fix Zephy ESP32 linking issue (#2314)
+- Add "--native-lib=<lib>" option to wamrc (#2342)
+- Fix unused warnings on disable_bounds_checks (#2347)
+- Add "--enable-builtin-intrinsics=<flags>" option to wamrc (#2341)
+- nuttx: Add a kconfig for wasi-threads (#2343)
+- iwasm: Disable app heap by default if wasi is enabled (#2346)
+- Fix some static scan issues (#2362)
+- Bring up WAMR on esp32-s3 device (#2348)
+- ESP-IDF platform supports to load AOT to PSRAM and run it (#2385)
+- Add hadolint CI for Dockerfile linting (#2387)
+- Move generic parts of wasm_suspend_flags.h to bh_atomic.h (#2393)
+- bh_atomic.h: Add comments (#2398)
+- bh_atomic.h: Add BH_ATOMIC_32_FETCH_ADD/BH_ATOMIC_32_FETCH_SUB (#2408)
+- Update libuv version to v1.46.0 (#2405)
+- Remove a few unused functions (#2409)
+- Add initial stress test (#2364)
+- Move wasm_runtime_destroy_wasi and wasi_nn_destroy calls together (#2418)
+- embed_wamr.md: Improvements about threads (#2420)
+- Add runtime inited checks in Enclave command handlings to improve security (#2416)
+- Add some relocation symbols for xtensa target (#2422)
+- Remove unnecessary and extra zero length check in mem functions' macro (#2428)
+- Introduce WASMModuleInstanceExtraCommon (#2429)
+- Strip static and shared libraries of iwasm to reduce the binary size (#2431)
+- Auto-check wrgsbase in cmake script (#2437)
+- iwasm: call native lib init/deinit if exists (#2439)
+- wasi-nn: Support uint8 quantized networks (#2433)
+- Implement `wasm_externref_objdel` and `wasm_externref_set_cleanup` (#2455)
+- wasi-nn: Improve TPU support (#2447)
+- wamr-python: Enable debugging WASM and grant dir access (#2449)
+- Build wasi-libc from source in WAMR CI (#2465)
+- wamrc: More friendly to print help info (#2451)
+- Add another wamr test (#2411)
+- Fix issues reported by Coverity and clear windows warnings (#2467)
+- Clone the input binary during wasm_module_validate (#2483)
+
+### Others
+- Nuttx CI: Ignore the expired certificate for riscv gcc toolchain (#2222)
+- core/iwasm/compilation: constify a bit (#2223)
+- Bump requests from 2.28.2 to 2.31.0 in /build-scripts (#2229)
+- dwarf_extractor: Constify a bit (#2278)
+- AOTFuncContext: Remove a stale comment (#2283)
+- Add performance tunning document (#2286)
+- Reduce CI jobs number (#2296)
+- CI: Update used node version to 16 (#2303)
+- Update Docker image for latest version of external libraries & tools (#2374)
+- Upgrade cJSON version to v1.7.16 (#2404)
+- Upgrade XNNPACK workload (#2394)
+- Build more benchmarks in workload XNNPACK (#2417)
+- Upgrade SGX-RA integration for 0.1.2 and Ubuntu 20.04 (#2454)
+- Add sample pre-commit hook (#2470)
+
+---
+
 ## WAMR-1.2.2
 
 ### Breaking Changes

+ 34 - 6
build-scripts/config_common.cmake

@@ -359,16 +359,16 @@ endif ()
 if (WAMR_BUILD_WASI_NN EQUAL 1)
   message ("     WASI-NN enabled")
   add_definitions (-DWASM_ENABLE_WASI_NN=1)
-  if (WASI_NN_ENABLE_GPU EQUAL 1)
+  if (WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
       message ("     WASI-NN: GPU enabled")
-      add_definitions (-DWASI_NN_ENABLE_GPU=1)
+      add_definitions (-DWASM_ENABLE_WASI_NN_GPU=1)
   endif ()
-  if (WAMR_BUILD_WASI_NN_ENABLE_EXT EQUAL 1)
+  if (WAMR_BUILD_WASI_NN_ENABLE_EXTERNAL_DELEGATE EQUAL 1)
       message ("     WASI-NN: External Delegation enabled")
-      add_definitions (-DWASI_NN_ENABLE_EXTERNAL_DELEGATE=1)
+      add_definitions (-DWASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE=1)
   endif ()
-  if (DEFINED WASI_NN_EXT_DELEGATE_PATH)
-      add_definitions (-DWASI_NN_EXT_DELEGATE_PATH="${WASI_NN_EXT_DELEGATE_PATH}")
+  if (DEFINED WAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH)
+      add_definitions (-DWASM_WASI_NN_EXTERNAL_DELEGATE_PATH="${WAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH}")
   endif ()
 endif ()
 if (WAMR_BUILD_ALLOC_WITH_USER_DATA EQUAL 1)
@@ -395,4 +395,32 @@ endif ()
 if (WAMR_DISABLE_WRITE_GS_BASE EQUAL 1)
   add_definitions (-DWASM_DISABLE_WRITE_GS_BASE=1)
   message ("     Write linear memory base addr to x86 GS register disabled")
+elseif (WAMR_BUILD_TARGET STREQUAL "X86_64"
+        AND WAMR_BUILD_PLATFORM STREQUAL "linux")
+  set (TEST_WRGSBASE_SOURCE "${CMAKE_BINARY_DIR}/test_wrgsbase.c")
+  file (WRITE "${TEST_WRGSBASE_SOURCE}" "
+  #include <stdio.h>
+  #include <stdint.h>
+  int main() {
+      uint64_t value;
+      asm volatile (\"wrgsbase %0\" : : \"r\"(value));
+      printf(\"WRGSBASE instruction is available.\\n\");
+      return 0;
+  }")
+  # Try to compile and run the test program
+  try_run (TEST_WRGSBASE_RESULT
+    TEST_WRGSBASE_COMPILED
+    ${CMAKE_BINARY_DIR}/test_wrgsbase
+    SOURCES ${TEST_WRGSBASE_SOURCE}
+    CMAKE_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+  )
+  #message("${TEST_WRGSBASE_COMPILED}, ${TEST_WRGSBASE_RESULT}")
+  if (NOT TEST_WRGSBASE_RESULT EQUAL 0)
+    add_definitions (-DWASM_DISABLE_WRITE_GS_BASE=1)
+    message ("     Write linear memory base addr to x86 GS register disabled")
+  endif ()
+endif ()
+if (WAMR_CONFIGUABLE_BOUNDS_CHECKS EQUAL 1)
+  add_definitions (-DWASM_CONFIGURABLE_BOUNDS_CHECKS=1)
+  message ("     Configurable bounds checks enabled")
 endif ()

+ 97 - 58
build-scripts/lldb-wasm.patch → build-scripts/lldb_wasm.patch

@@ -1,5 +1,44 @@
+diff --git a/lldb/bindings/CMakeLists.txt b/lldb/bindings/CMakeLists.txt
+index 9759b069fdc4..25b427f8bcf2 100644
+--- a/lldb/bindings/CMakeLists.txt
++++ b/lldb/bindings/CMakeLists.txt
+@@ -26,8 +26,6 @@ set(SWIG_COMMON_FLAGS
+   -features autodoc
+   -I${LLDB_SOURCE_DIR}/include
+   -I${CMAKE_CURRENT_SOURCE_DIR}
+-  -D__STDC_LIMIT_MACROS
+-  -D__STDC_CONSTANT_MACROS
+   ${DARWIN_EXTRAS}
+ )
+ 
+diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig
+index c9a6d0f06056..021c7683d170 100644
+--- a/lldb/bindings/interfaces.swig
++++ b/lldb/bindings/interfaces.swig
+@@ -1,8 +1,5 @@
+ /* Various liblldb typedefs that SWIG needs to know about.  */
+ #define __extension__ /* Undefine GCC keyword to make Swig happy when processing glibc's stdint.h. */
+-/* The ISO C99 standard specifies that in C++ implementations limit macros such
+-   as INT32_MAX should only be defined if __STDC_LIMIT_MACROS is. */
+-#define __STDC_LIMIT_MACROS
+ %include "stdint.i"
+ 
+ %include "lldb/lldb-defines.h"
+diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig
+index b1ace4ff3b1e..5f8f4aa678c4 100644
+--- a/lldb/bindings/python/python-typemaps.swig
++++ b/lldb/bindings/python/python-typemaps.swig
+@@ -439,7 +439,7 @@ bool SetNumberFromPyObject<double>(double &number, PyObject *obj) {
+ 
+ %typemap(out) lldb::FileSP {
+   $result = nullptr;
+-  lldb::FileSP &sp = $1;
++  const lldb::FileSP &sp = $1;
+   if (sp) {
+     PythonFile pyfile = unwrapOrSetPythonException(PythonFile::FromFile(*sp));
+     if (!pyfile.IsValid())
 diff --git a/lldb/include/lldb/Breakpoint/Breakpoint.h b/lldb/include/lldb/Breakpoint/Breakpoint.h
-index f2e2a0d22..426d1129b 100644
+index f2e2a0d22784..426d1129bd10 100644
 --- a/lldb/include/lldb/Breakpoint/Breakpoint.h
 +++ b/lldb/include/lldb/Breakpoint/Breakpoint.h
 @@ -9,6 +9,7 @@
@@ -11,7 +50,7 @@ index f2e2a0d22..426d1129b 100644
  #include <string>
  #include <unordered_set>
 diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h
-index dd7100c46..97d70daad 100644
+index dd7100c4616c..97d70daadbdc 100644
 --- a/lldb/include/lldb/Core/Module.h
 +++ b/lldb/include/lldb/Core/Module.h
 @@ -41,6 +41,7 @@
@@ -41,7 +80,7 @@ index dd7100c46..97d70daad 100644
    ///
    /// Tries to resolve \a vm_addr as a file address (if \a
 diff --git a/lldb/include/lldb/Core/PluginManager.h b/lldb/include/lldb/Core/PluginManager.h
-index be91929c6..8d876fc1f 100644
+index be91929c62e1..8d876fc1fa2f 100644
 --- a/lldb/include/lldb/Core/PluginManager.h
 +++ b/lldb/include/lldb/Core/PluginManager.h
 @@ -508,6 +508,17 @@ public:
@@ -64,7 +103,7 @@ index be91929c6..8d876fc1f 100644
  } // namespace lldb_private
 diff --git a/lldb/include/lldb/Expression/DWARFEvaluator.h b/lldb/include/lldb/Expression/DWARFEvaluator.h
 new file mode 100644
-index 000000000..6811cbeae
+index 000000000000..6811cbeae3d3
 --- /dev/null
 +++ b/lldb/include/lldb/Expression/DWARFEvaluator.h
 @@ -0,0 +1,110 @@
@@ -180,7 +219,7 @@ index 000000000..6811cbeae
 +#endif // LLDB_EXPRESSION_DWARFEVALUATOR_H
 diff --git a/lldb/include/lldb/Expression/DWARFEvaluatorFactory.h b/lldb/include/lldb/Expression/DWARFEvaluatorFactory.h
 new file mode 100644
-index 000000000..f3b496c58
+index 000000000000..f3b496c580e4
 --- /dev/null
 +++ b/lldb/include/lldb/Expression/DWARFEvaluatorFactory.h
 @@ -0,0 +1,56 @@
@@ -241,7 +280,7 @@ index 000000000..f3b496c58
 +
 +#endif // LLDB_EXPRESSION_DWARFEVALUATORFACTORY_H
 diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h
-index 1490ac2d6..35c741d4e 100644
+index 1490ac2d614a..35c741d4e6ba 100644
 --- a/lldb/include/lldb/Expression/DWARFExpression.h
 +++ b/lldb/include/lldb/Expression/DWARFExpression.h
 @@ -120,6 +120,10 @@ public:
@@ -275,7 +314,7 @@ index 1490ac2d6..35c741d4e 100644
    GetLocationExpression(lldb::addr_t load_function_start,
                          lldb::addr_t addr) const;
 diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
-index aaa2470d2..c15f2db52 100644
+index aaa2470d2931..c15f2db52fbc 100644
 --- a/lldb/include/lldb/Target/Process.h
 +++ b/lldb/include/lldb/Target/Process.h
 @@ -1434,7 +1434,7 @@ public:
@@ -288,7 +327,7 @@ index aaa2470d2..c15f2db52 100644
    /// Read of memory from a process.
    ///
 diff --git a/lldb/include/lldb/Target/ProcessTrace.h b/lldb/include/lldb/Target/ProcessTrace.h
-index 7b9d6b13d..9525fc975 100644
+index 7b9d6b13dd6f..9525fc9750fd 100644
 --- a/lldb/include/lldb/Target/ProcessTrace.h
 +++ b/lldb/include/lldb/Target/ProcessTrace.h
 @@ -59,7 +59,7 @@ public:
@@ -301,7 +340,7 @@ index 7b9d6b13d..9525fc975 100644
    size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
                        Status &error) override;
 diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h
-index ad5298151..5a3c0b27a 100644
+index ad5298151e4a..5a3c0b27a738 100644
 --- a/lldb/include/lldb/lldb-forward.h
 +++ b/lldb/include/lldb/lldb-forward.h
 @@ -74,6 +74,7 @@ class Disassembler;
@@ -313,7 +352,7 @@ index ad5298151..5a3c0b27a 100644
  class EmulateInstruction;
  class Environment;
 diff --git a/lldb/include/lldb/lldb-private-interfaces.h b/lldb/include/lldb/lldb-private-interfaces.h
-index 2ed083ec8..f4d500d19 100644
+index 2ed083ec8ae9..f4d500d198e8 100644
 --- a/lldb/include/lldb/lldb-private-interfaces.h
 +++ b/lldb/include/lldb/lldb-private-interfaces.h
 @@ -113,6 +113,8 @@ typedef lldb::REPLSP (*REPLCreateInstance)(Status &error,
@@ -326,7 +365,7 @@ index 2ed083ec8..f4d500d19 100644
  /// \{
  typedef llvm::Expected<lldb::TraceSP> (*TraceCreateInstanceForSessionFile)(
 diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp
-index 19c97be15..1647f93ec 100644
+index 19c97be15066..1647f93ec4f3 100644
 --- a/lldb/source/Core/Module.cpp
 +++ b/lldb/source/Core/Module.cpp
 @@ -16,6 +16,7 @@
@@ -348,7 +387,7 @@ index 19c97be15..1647f93ec 100644
 +  return m_dwarf_evaluator_factory.get();
 +}
 diff --git a/lldb/source/Core/PluginManager.cpp b/lldb/source/Core/PluginManager.cpp
-index fcaa868b0..59a404d4a 100644
+index fcaa868b083e..59a404d4a7e1 100644
 --- a/lldb/source/Core/PluginManager.cpp
 +++ b/lldb/source/Core/PluginManager.cpp
 @@ -1597,3 +1597,32 @@ bool PluginManager::CreateSettingForStructuredDataPlugin(
@@ -385,7 +424,7 @@ index fcaa868b0..59a404d4a 100644
 +  return GetDWARFEvaluatorFactoryInstances().GetCallbackAtIndex(idx);
 +}
 diff --git a/lldb/source/Core/Value.cpp b/lldb/source/Core/Value.cpp
-index fb57c0fed..f92d6a54d 100644
+index fb57c0fedf04..f92d6a54de94 100644
 --- a/lldb/source/Core/Value.cpp
 +++ b/lldb/source/Core/Value.cpp
 @@ -538,7 +538,7 @@ Status Value::GetValueAsData(ExecutionContext *exe_ctx, DataExtractor &data,
@@ -398,7 +437,7 @@ index fb57c0fed..f92d6a54d 100644
              error.SetErrorStringWithFormat(
                  "read memory from 0x%" PRIx64 " failed (%u of %u bytes read)",
 diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp
-index 9c1ba99da..b15b214b2 100644
+index 9c1ba99da1d0..b15b214b2a2f 100644
 --- a/lldb/source/Core/ValueObject.cpp
 +++ b/lldb/source/Core/ValueObject.cpp
 @@ -735,7 +735,7 @@ size_t ValueObject::GetPointeeData(DataExtractor &data, uint32_t item_idx,
@@ -411,7 +450,7 @@ index 9c1ba99da..b15b214b2 100644
            data.SetData(data_sp);
            return bytes_read;
 diff --git a/lldb/source/Expression/CMakeLists.txt b/lldb/source/Expression/CMakeLists.txt
-index bf94361dd..4e76d547a 100644
+index bf94361dd6c1..4e76d547aeaf 100644
 --- a/lldb/source/Expression/CMakeLists.txt
 +++ b/lldb/source/Expression/CMakeLists.txt
 @@ -1,5 +1,7 @@
@@ -424,7 +463,7 @@ index bf94361dd..4e76d547a 100644
    ExpressionVariable.cpp
 diff --git a/lldb/source/Expression/DWARFEvaluator.cpp b/lldb/source/Expression/DWARFEvaluator.cpp
 new file mode 100644
-index 000000000..06107e136
+index 000000000000..06107e136197
 --- /dev/null
 +++ b/lldb/source/Expression/DWARFEvaluator.cpp
 @@ -0,0 +1,1952 @@
@@ -2382,7 +2421,7 @@ index 000000000..06107e136
 +}
 diff --git a/lldb/source/Expression/DWARFEvaluatorFactory.cpp b/lldb/source/Expression/DWARFEvaluatorFactory.cpp
 new file mode 100644
-index 000000000..c06126412
+index 000000000000..c0612641204a
 --- /dev/null
 +++ b/lldb/source/Expression/DWARFEvaluatorFactory.cpp
 @@ -0,0 +1,57 @@
@@ -2444,7 +2483,7 @@ index 000000000..c06126412
 +                                          object_address_ptr);
 +}
 diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp
-index a10546c1d..4d13e4642 100644
+index a10546c1deae..4d13e4642af3 100644
 --- a/lldb/source/Expression/DWARFExpression.cpp
 +++ b/lldb/source/Expression/DWARFExpression.cpp
 @@ -15,6 +15,8 @@
@@ -4261,7 +4300,7 @@ index a10546c1d..4d13e4642 100644
  
  static DataExtractor ToDataExtractor(const llvm::DWARFLocationExpression &loc,
 diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
-index 00e9ccb76..2137a1ac8 100644
+index 00e9ccb762c3..2137a1ac8324 100644
 --- a/lldb/source/Interpreter/CommandInterpreter.cpp
 +++ b/lldb/source/Interpreter/CommandInterpreter.cpp
 @@ -759,6 +759,24 @@ void CommandInterpreter::LoadCommandDictionary() {
@@ -4290,7 +4329,7 @@ index 00e9ccb76..2137a1ac8 100644
        new CommandObjectRegexCommand(
            *this, "kdp-remote",
 diff --git a/lldb/source/Plugins/CMakeLists.txt b/lldb/source/Plugins/CMakeLists.txt
-index 9181a4e47..2be6ec365 100644
+index 9181a4e47675..2be6ec3657c0 100644
 --- a/lldb/source/Plugins/CMakeLists.txt
 +++ b/lldb/source/Plugins/CMakeLists.txt
 @@ -2,6 +2,7 @@ add_subdirectory(ABI)
@@ -4320,14 +4359,14 @@ index 9181a4e47..2be6ec365 100644
    endif()
 diff --git a/lldb/source/Plugins/DWARFEvaluator/CMakeLists.txt b/lldb/source/Plugins/DWARFEvaluator/CMakeLists.txt
 new file mode 100644
-index 000000000..73fad41e1
+index 000000000000..73fad41e1a72
 --- /dev/null
 +++ b/lldb/source/Plugins/DWARFEvaluator/CMakeLists.txt
 @@ -0,0 +1 @@
 +add_subdirectory(wasm)
 diff --git a/lldb/source/Plugins/DWARFEvaluator/wasm/CMakeLists.txt b/lldb/source/Plugins/DWARFEvaluator/wasm/CMakeLists.txt
 new file mode 100644
-index 000000000..e50b1bef7
+index 000000000000..e50b1bef7e69
 --- /dev/null
 +++ b/lldb/source/Plugins/DWARFEvaluator/wasm/CMakeLists.txt
 @@ -0,0 +1,10 @@
@@ -4343,7 +4382,7 @@ index 000000000..e50b1bef7
 +  )
 diff --git a/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluator.cpp b/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluator.cpp
 new file mode 100644
-index 000000000..fdda1991d
+index 000000000000..fdda1991d19f
 --- /dev/null
 +++ b/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluator.cpp
 @@ -0,0 +1,126 @@
@@ -4475,7 +4514,7 @@ index 000000000..fdda1991d
 +}
 diff --git a/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluator.h b/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluator.h
 new file mode 100644
-index 000000000..a01159064
+index 000000000000..a01159064a39
 --- /dev/null
 +++ b/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluator.h
 @@ -0,0 +1,47 @@
@@ -4528,7 +4567,7 @@ index 000000000..a01159064
 +#endif // LLDB_SOURCE_PLUGINS_DWARFEVALUATOR_WASM_WASMDWARFEVALUATOR_H
 diff --git a/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluatorFactory.cpp b/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluatorFactory.cpp
 new file mode 100644
-index 000000000..d43e96a34
+index 000000000000..d43e96a34d37
 --- /dev/null
 +++ b/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluatorFactory.cpp
 @@ -0,0 +1,64 @@
@@ -4598,7 +4637,7 @@ index 000000000..d43e96a34
 +}
 diff --git a/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluatorFactory.h b/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluatorFactory.h
 new file mode 100644
-index 000000000..8a946592a
+index 000000000000..8a946592a09a
 --- /dev/null
 +++ b/lldb/source/Plugins/DWARFEvaluator/wasm/WasmDWARFEvaluatorFactory.h
 @@ -0,0 +1,55 @@
@@ -4658,7 +4697,7 @@ index 000000000..8a946592a
 +
 +#endif // LLDB_SOURCE_PLUGINS_DWARFEVALUATOR_WASM_WASMDWARFEVALUATORFACTORY_H
 diff --git a/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.cpp b/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.cpp
-index ae7e011ea..24ea75d19 100644
+index ae7e011eaa52..24ea75d1971c 100644
 --- a/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.cpp
 +++ b/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.cpp
 @@ -62,6 +62,15 @@ void DynamicLoaderWasmDYLD::DidAttach() {
@@ -4678,7 +4717,7 @@ index ae7e011ea..24ea75d19 100644
  
  ThreadPlanSP DynamicLoaderWasmDYLD::GetStepThroughTrampolinePlan(Thread &thread,
 diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
-index 5272da9ab..abc5523bf 100644
+index 5272da9ab33a..abc5523bfd70 100644
 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
 +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
 @@ -23,6 +23,7 @@
@@ -4718,7 +4757,7 @@ index 5272da9ab..abc5523bf 100644
      }
    }
 diff --git a/lldb/source/Plugins/Platform/CMakeLists.txt b/lldb/source/Plugins/Platform/CMakeLists.txt
-index 5f284e517..6084cbc93 100644
+index 5f284e517dca..6084cbc9378d 100644
 --- a/lldb/source/Plugins/Platform/CMakeLists.txt
 +++ b/lldb/source/Plugins/Platform/CMakeLists.txt
 @@ -15,3 +15,4 @@
@@ -4728,7 +4767,7 @@ index 5f284e517..6084cbc93 100644
 +add_subdirectory(wasm-remote)
 diff --git a/lldb/source/Plugins/Platform/wasm-remote/CMakeLists.txt b/lldb/source/Plugins/Platform/wasm-remote/CMakeLists.txt
 new file mode 100644
-index 000000000..4a65765a5
+index 000000000000..4a65765a5659
 --- /dev/null
 +++ b/lldb/source/Plugins/Platform/wasm-remote/CMakeLists.txt
 @@ -0,0 +1,10 @@
@@ -4744,7 +4783,7 @@ index 000000000..4a65765a5
 +  )
 diff --git a/lldb/source/Plugins/Platform/wasm-remote/PlatformRemoteWasmServer.cpp b/lldb/source/Plugins/Platform/wasm-remote/PlatformRemoteWasmServer.cpp
 new file mode 100644
-index 000000000..f26d11f00
+index 000000000000..f26d11f00e5c
 --- /dev/null
 +++ b/lldb/source/Plugins/Platform/wasm-remote/PlatformRemoteWasmServer.cpp
 @@ -0,0 +1,139 @@
@@ -4890,7 +4929,7 @@ index 000000000..f26d11f00
 \ No newline at end of file
 diff --git a/lldb/source/Plugins/Platform/wasm-remote/PlatformRemoteWasmServer.h b/lldb/source/Plugins/Platform/wasm-remote/PlatformRemoteWasmServer.h
 new file mode 100644
-index 000000000..f306a79d3
+index 000000000000..f306a79d3f4f
 --- /dev/null
 +++ b/lldb/source/Plugins/Platform/wasm-remote/PlatformRemoteWasmServer.h
 @@ -0,0 +1,37 @@
@@ -4933,7 +4972,7 @@ index 000000000..f306a79d3
 +#endif
 \ No newline at end of file
 diff --git a/lldb/source/Plugins/Plugins.def.in b/lldb/source/Plugins/Plugins.def.in
-index bf54598fb..b0bd7b996 100644
+index bf54598fb2f3..b0bd7b9965fe 100644
 --- a/lldb/source/Plugins/Plugins.def.in
 +++ b/lldb/source/Plugins/Plugins.def.in
 @@ -31,6 +31,7 @@
@@ -4945,7 +4984,7 @@ index bf54598fb..b0bd7b996 100644
  
  #undef LLDB_PLUGIN
 diff --git a/lldb/source/Plugins/Process/CMakeLists.txt b/lldb/source/Plugins/Process/CMakeLists.txt
-index bea5bac9e..7a0855e02 100644
+index bea5bac9eb21..7a0855e02ca2 100644
 --- a/lldb/source/Plugins/Process/CMakeLists.txt
 +++ b/lldb/source/Plugins/Process/CMakeLists.txt
 @@ -18,3 +18,4 @@ add_subdirectory(Utility)
@@ -4954,7 +4993,7 @@ index bea5bac9e..7a0855e02 100644
  add_subdirectory(minidump)
 +add_subdirectory(wasm)
 diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
-index 12bc7390c..707ab85e5 100644
+index 12bc7390c729..707ab85e5615 100644
 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
 +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
 @@ -285,7 +285,7 @@ bool ProcessElfCore::IsAlive() { return true; }
@@ -4967,7 +5006,7 @@ index 12bc7390c..707ab85e5 100644
    // in core files we have it all cached our our core file anyway.
    return DoReadMemory(addr, buf, size, error);
 diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
-index d8e3cc9ae..f0bf9c4d3 100644
+index d8e3cc9ae3e1..f0bf9c4d3b00 100644
 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
 +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
 @@ -84,7 +84,8 @@ public:
@@ -4981,7 +5020,7 @@ index d8e3cc9ae..f0bf9c4d3 100644
    size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
                        lldb_private::Status &error) override;
 diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
-index 6914b3734..bb8a05604 100644
+index 6914b37348ea..bb8a056049f3 100644
 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
 +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
 @@ -334,6 +334,11 @@ ConstString ProcessGDBRemote::GetPluginName() { return GetPluginNameStatic(); }
@@ -5015,7 +5054,7 @@ index 6914b3734..bb8a05604 100644
        }
      }
 diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
-index fe04cdddd..e4a14c645 100644
+index fe04cdddd0f5..e4a14c64579a 100644
 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
 +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
 @@ -237,6 +237,8 @@ protected:
@@ -5028,7 +5067,7 @@ index fe04cdddd..e4a14c645 100644
    enum {
      eBroadcastBitAsyncContinue = (1 << 0),
 diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
-index 84548edb5..0ae6f7e4a 100644
+index 84548edb5caa..0ae6f7e4a177 100644
 --- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
 +++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
 @@ -596,7 +596,7 @@ bool ProcessMachCore::WarnBeforeDetach() const { return false; }
@@ -5041,7 +5080,7 @@ index 84548edb5..0ae6f7e4a 100644
    // in core files we have it all cached our our core file anyway.
    return DoReadMemory(addr, buf, size, error);
 diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h
-index db77e96f1..1c930896c 100644
+index db77e96f1072..1c930896c743 100644
 --- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h
 +++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h
 @@ -65,7 +65,8 @@ public:
@@ -5055,7 +5094,7 @@ index db77e96f1..1c930896c 100644
    size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
                        lldb_private::Status &error) override;
 diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
-index 385557422..d8bb21581 100644
+index 385557422758..d8bb21581086 100644
 --- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
 +++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
 @@ -374,7 +374,7 @@ bool ProcessMinidump::IsAlive() { return true; }
@@ -5068,7 +5107,7 @@ index 385557422..d8bb21581 100644
    // we have it all cached in our dump file anyway.
    return DoReadMemory(addr, buf, size, error);
 diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
-index 27b0da004..e94ecab43 100644
+index 27b0da0047a5..e94ecab430c1 100644
 --- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
 +++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
 @@ -69,8 +69,8 @@ public:
@@ -5084,7 +5123,7 @@ index 27b0da004..e94ecab43 100644
                        Status &error) override;
 diff --git a/lldb/source/Plugins/Process/wasm/CMakeLists.txt b/lldb/source/Plugins/Process/wasm/CMakeLists.txt
 new file mode 100644
-index 000000000..61efb933f
+index 000000000000..61efb933fa62
 --- /dev/null
 +++ b/lldb/source/Plugins/Process/wasm/CMakeLists.txt
 @@ -0,0 +1,12 @@
@@ -5102,7 +5141,7 @@ index 000000000..61efb933f
 +  )
 diff --git a/lldb/source/Plugins/Process/wasm/ProcessWasm.cpp b/lldb/source/Plugins/Process/wasm/ProcessWasm.cpp
 new file mode 100644
-index 000000000..9c0fc7b7f
+index 000000000000..9c0fc7b7f270
 --- /dev/null
 +++ b/lldb/source/Plugins/Process/wasm/ProcessWasm.cpp
 @@ -0,0 +1,261 @@
@@ -5369,7 +5408,7 @@ index 000000000..9c0fc7b7f
 +}
 diff --git a/lldb/source/Plugins/Process/wasm/ProcessWasm.h b/lldb/source/Plugins/Process/wasm/ProcessWasm.h
 new file mode 100644
-index 000000000..d3aece7a6
+index 000000000000..d3aece7a6554
 --- /dev/null
 +++ b/lldb/source/Plugins/Process/wasm/ProcessWasm.h
 @@ -0,0 +1,128 @@
@@ -5503,7 +5542,7 @@ index 000000000..d3aece7a6
 +#endif // LLDB_SOURCE_PLUGINS_PROCESS_WASM_PROCESSWASM_H
 diff --git a/lldb/source/Plugins/Process/wasm/ThreadWasm.cpp b/lldb/source/Plugins/Process/wasm/ThreadWasm.cpp
 new file mode 100644
-index 000000000..fa02073e7
+index 000000000000..fa02073e7a52
 --- /dev/null
 +++ b/lldb/source/Plugins/Process/wasm/ThreadWasm.cpp
 @@ -0,0 +1,35 @@
@@ -5544,7 +5583,7 @@ index 000000000..fa02073e7
 +}
 diff --git a/lldb/source/Plugins/Process/wasm/ThreadWasm.h b/lldb/source/Plugins/Process/wasm/ThreadWasm.h
 new file mode 100644
-index 000000000..0a33c07de
+index 000000000000..0a33c07de994
 --- /dev/null
 +++ b/lldb/source/Plugins/Process/wasm/ThreadWasm.h
 @@ -0,0 +1,41 @@
@@ -5591,7 +5630,7 @@ index 000000000..0a33c07de
 +#endif // LLDB_SOURCE_PLUGINS_PROCESS_WASM_THREADWASM_H
 diff --git a/lldb/source/Plugins/Process/wasm/UnwindWasm.cpp b/lldb/source/Plugins/Process/wasm/UnwindWasm.cpp
 new file mode 100644
-index 000000000..1a195cb93
+index 000000000000..1a195cb9361a
 --- /dev/null
 +++ b/lldb/source/Plugins/Process/wasm/UnwindWasm.cpp
 @@ -0,0 +1,74 @@
@@ -5672,7 +5711,7 @@ index 000000000..1a195cb93
 \ No newline at end of file
 diff --git a/lldb/source/Plugins/Process/wasm/UnwindWasm.h b/lldb/source/Plugins/Process/wasm/UnwindWasm.h
 new file mode 100644
-index 000000000..9bd1dac9a
+index 000000000000..9bd1dac9a98a
 --- /dev/null
 +++ b/lldb/source/Plugins/Process/wasm/UnwindWasm.h
 @@ -0,0 +1,55 @@
@@ -5732,7 +5771,7 @@ index 000000000..9bd1dac9a
 +
 +#endif // lldb_UnwindWasm_h_
 diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
-index ccaf31317..c3ef5aebd 100644
+index ccaf31317d75..c3ef5aebd46d 100644
 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
 +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
 @@ -3212,8 +3212,13 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
@@ -5751,7 +5790,7 @@ index ccaf31317..c3ef5aebd 100644
      // DWARF doesn't specify if a DW_TAG_variable is a local, global
      // or static variable, so we have to do a little digging:
 diff --git a/lldb/source/Target/PathMappingList.cpp b/lldb/source/Target/PathMappingList.cpp
-index b660c310e..cd76421ce 100644
+index b660c310ef31..cd76421cec18 100644
 --- a/lldb/source/Target/PathMappingList.cpp
 +++ b/lldb/source/Target/PathMappingList.cpp
 @@ -218,7 +218,12 @@ bool PathMappingList::ReverseRemapPath(const FileSpec &file, FileSpec &fixed) co
@@ -5769,7 +5808,7 @@ index b660c310e..cd76421ce 100644
  
    return {};
 diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
-index a77ecddfb..e257f9350 100644
+index a77ecddfbab6..e257f93508f6 100644
 --- a/lldb/source/Target/Platform.cpp
 +++ b/lldb/source/Target/Platform.cpp
 @@ -1970,6 +1970,12 @@ size_t Platform::GetSoftwareBreakpointTrapOpcode(Target &target,
@@ -5786,7 +5825,7 @@ index a77ecddfb..e257f9350 100644
      return 0;
    }
 diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
-index 8ecc66b59..f14898791 100644
+index 8ecc66b592ea..f148987915de 100644
 --- a/lldb/source/Target/Process.cpp
 +++ b/lldb/source/Target/Process.cpp
 @@ -1892,7 +1892,8 @@ Status Process::DisableSoftwareBreakpoint(BreakpointSite *bp_site) {
@@ -5800,7 +5839,7 @@ index 8ecc66b59..f14898791 100644
    if (!GetDisableMemoryCache()) {
  #if defined(VERIFY_MEMORY_READS)
 diff --git a/lldb/source/Target/ProcessTrace.cpp b/lldb/source/Target/ProcessTrace.cpp
-index c878a2ac4..ad5945b0a 100644
+index c878a2ac4eb9..ad5945b0ad1f 100644
 --- a/lldb/source/Target/ProcessTrace.cpp
 +++ b/lldb/source/Target/ProcessTrace.cpp
 @@ -88,7 +88,7 @@ void ProcessTrace::RefreshStateAfterStop() {}
@@ -5813,7 +5852,7 @@ index c878a2ac4..ad5945b0a 100644
    // we have it all cached in the trace files.
    return DoReadMemory(addr, buf, size, error);
 diff --git a/lldb/source/Target/ThreadPlanStepRange.cpp b/lldb/source/Target/ThreadPlanStepRange.cpp
-index 896e647bb..f76307016 100644
+index 896e647bbb52..f76307016102 100644
 --- a/lldb/source/Target/ThreadPlanStepRange.cpp
 +++ b/lldb/source/Target/ThreadPlanStepRange.cpp
 @@ -334,7 +334,10 @@ bool ThreadPlanStepRange::SetNextBranchBreakpoint() {
@@ -5829,7 +5868,7 @@ index 896e647bb..f76307016 100644
              instructions->GetInstructionAtIndex(last_index);
          size_t last_inst_size = last_inst->GetOpcode().GetByteSize();
 diff --git a/lldb/source/Target/UnixSignals.cpp b/lldb/source/Target/UnixSignals.cpp
-index 4ec2e25c7..24c88fe9a 100644
+index 4ec2e25c7e3b..24c88fe9ae4f 100644
 --- a/lldb/source/Target/UnixSignals.cpp
 +++ b/lldb/source/Target/UnixSignals.cpp
 @@ -46,6 +46,8 @@ lldb::UnixSignalsSP UnixSignals::Create(const ArchSpec &arch) {
@@ -5842,7 +5881,7 @@ index 4ec2e25c7..24c88fe9a 100644
      return std::make_shared<UnixSignals>();
    }
 diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h
-index 4310ba9ce..297b33879 100644
+index 4310ba9ce9e0..297b3387999d 100644
 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h
 +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h
 @@ -13,6 +13,7 @@
@@ -5854,7 +5893,7 @@ index 4310ba9ce..297b33879 100644
  #include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
  #include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
 diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
-index 753b1998c..27370c62d 100644
+index 753b1998c40c..27370c62dd6e 100644
 --- a/llvm/include/llvm/Support/MathExtras.h
 +++ b/llvm/include/llvm/Support/MathExtras.h
 @@ -16,6 +16,7 @@

+ 32 - 0
ci/pre_commit_hook_sample

@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# This is a sample of pre-commit hook that can be used to make your code fit the WAMR CI code style requirements.
+# You need to have clang-format-12 installed to use this hook.
+# To add this pre-commit hook, copy it to <path_to_wamr>/.git/hooks/pre-commit
+# (you don't need any extensions here)
+
+# Function to check if a file has a C or C++ extension
+
+is_c_or_cpp_file() {
+    file="$1"
+    if [[ "$filename" =~ \.(h|c|cpp)$  ]]; then
+        return 0
+    else
+        return 1
+    fi
+}
+
+# Loop through staged files and apply command "abc" to C and C++ files
+for staged_file in $(git diff --cached --name-only); do
+    if is_c_or_cpp_file "$staged_file"; then
+        clang-format-12 -Werror --style file --dry-run "$staged_file" 2>/dev/null
+        if [ $? -ne 0 ]; then 
+            echo "Issues are found in $staged_file. Applying the fix" 
+            clang-format-12 --style file -i "$staged_file"
+        fi
+        git add "$staged_file"  # Add the modified file back to staging
+    fi
+done

+ 15 - 0
ci/setup.sh

@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# This script executes some commands to make your onboarding with WAMR easier.
+# For example, setting pre-commit hook that will make your code complaint with the
+# code style requirements checked in WAMR CI
+
+echo "Copy the pre-commit hook to your hooks folder"
+cp pre_commit_hook_sample ../.git/hooks/pre-commit
+
+# Feel free to propose your commands to this script to make developing WAMR easier
+
+echo "Setup is done"

+ 128 - 0
ci/validate_lldb.py

@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2023 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+
+import argparse
+import time
+from pathlib import Path
+import subprocess, shlex
+
+SCRIPT_DIR = Path(__file__).parent.resolve()
+REPO_ROOT_DIR = SCRIPT_DIR.parent
+SAMPLE_CODE_FILE = REPO_ROOT_DIR / 'product-mini/app-samples/hello-world/main.c'
+WASM_OUT_FILE = SCRIPT_DIR / 'out.wasm'
+
+parser = argparse.ArgumentParser(
+    description="Validate the customized lldb with sample code"
+)
+parser.add_argument(
+    "-l", "--lldb", dest='lldb', default='lldb', help="path to lldb executable"
+)
+parser.add_argument(
+    "-w", "--wamr", dest='wamr', default='iwasm', help="path to iwasm executable"
+)
+parser.add_argument(
+    "-p", "--port", dest='port', default='1234', help="debug server listen port"
+)
+parser.add_argument(
+    "-v", "--verbose", dest='verbose', action='store_true', default=False, help="display lldb stdout"
+)
+
+options = parser.parse_args()
+
+lldb_command_epilogue = '-o q'
+
+test_cases = {
+    'run_to_exit': '-o c',
+    'func_breakpoint': '-o "b main" -o c -o c',
+    'line_breakpoint': '-o "b main.c:12" -o c -o c',
+    'break_on_unknown_func': '-o "b not_a_func" -o c',
+    'watch_point': '-o "b main" -o c -o "watchpoint set variable buf" -o c -o "fr v buf" -o c',
+}
+
+# Step1: Build wasm module with debug information
+build_cmd = f'/opt/wasi-sdk/bin/clang -g -O0 -o {WASM_OUT_FILE} {SAMPLE_CODE_FILE}'
+try:
+    print(f'building wasm module ...', end='', flush=True)
+    subprocess.check_call(shlex.split(build_cmd))
+    print(f'\t OK')
+except subprocess.CalledProcessError:
+    print("Failed to build wasm module with debug information")
+    exit(1)
+
+def print_process_output(p):
+    try:
+        outs, errs = p.communicate(timeout=2)
+        print("stdout:")
+        print(outs)
+        print("stderr:")
+        print(errs)
+    except subprocess.TimeoutExpired:
+        print("Failed to get process output")
+
+# Step2: Launch WAMR in debug mode and validate lldb commands
+
+iteration = 0
+for case, cmd in test_cases.items():
+    lldb_command_prologue = f'{options.lldb} -o "process connect -p wasm connect://127.0.0.1:{int(options.port) + iteration}"'
+    wamr_cmd = f'{options.wamr} -g=127.0.0.1:{int(options.port) + iteration} {WASM_OUT_FILE}'
+    iteration += 1
+
+    has_error = False
+    print(f'validating case [{case}] ...', end='', flush=True)
+    lldb_cmd = f'{lldb_command_prologue} {cmd} {lldb_command_epilogue}'
+
+    wamr_process = subprocess.Popen(shlex.split(
+        wamr_cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+
+    time.sleep(0.1)
+    if (wamr_process.poll() != None):
+        print("\nWAMR doesn't wait for lldb connection")
+        print_process_output(wamr_process)
+        exit(1)
+
+    lldb_process = subprocess.Popen(shlex.split(
+        lldb_cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+
+    if (options.verbose):
+        while (lldb_process.poll() is None):
+            print(lldb_process.stdout.read(), end='', flush=True)
+
+    try:
+        if (lldb_process.wait(5) != 0):
+            print(f"\nFailed to validate case [{case}]")
+            print_process_output(lldb_process)
+            has_error = True
+
+        if wamr_process.wait(2) != 0:
+            print("\nWAMR process doesn't exit normally")
+            print_process_output(wamr_process)
+            has_error = True
+
+    except subprocess.TimeoutExpired:
+        print(f"\nFailed to validate case [{case}]")
+        print("wamr output:")
+        print_process_output(wamr_process)
+        print("lldb output:")
+        print_process_output(lldb_process)
+        has_error = True
+    finally:
+        if (lldb_process.poll() == None):
+            print(f'\nterminating lldb process [{lldb_process.pid}]')
+            lldb_process.kill()
+        if (wamr_process.poll() == None):
+            print(f'terminating wamr process [{wamr_process.pid}]')
+            wamr_process.kill()
+
+        if (has_error):
+            exit(1)
+
+    print(f'\t OK')
+
+    # wait 100ms to ensure the socket is closed
+    time.sleep(0.1)
+
+print('Validate lldb success')
+exit(0)

+ 25 - 1
core/config.h

@@ -144,6 +144,14 @@
 #define WASM_ENABLE_WASI_NN 0
 #endif
 
+#ifndef WASM_ENABLE_WASI_NN_GPU
+#define WASM_ENABLE_WASI_NN_GPU 0
+#endif
+
+#ifndef WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE
+#define WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE 0
+#endif
+
 /* Default disable libc emcc */
 #ifndef WASM_ENABLE_LIBC_EMCC
 #define WASM_ENABLE_LIBC_EMCC 0
@@ -384,7 +392,7 @@
 #define APP_THREAD_STACK_SIZE_DEFAULT (64 * 1024)
 #define APP_THREAD_STACK_SIZE_MIN (48 * 1024)
 #else
-#define APP_THREAD_STACK_SIZE_DEFAULT (32 * 1024)
+#define APP_THREAD_STACK_SIZE_DEFAULT (64 * 1024)
 #define APP_THREAD_STACK_SIZE_MIN (24 * 1024)
 #endif
 #endif /* end of !(defined(APP_THREAD_STACK_SIZE_DEFAULT) \
@@ -456,4 +464,20 @@
 #define WASM_DISABLE_WRITE_GS_BASE 0
 #endif
 
+/* Configurable bounds checks */
+#ifndef WASM_CONFIGURABLE_BOUNDS_CHECKS
+#define WASM_CONFIGURABLE_BOUNDS_CHECKS 0
+#endif
+
+/* Some chip cannot support external ram with rwx attr at the same time,
+   it has to map it into 2 spaces of idbus and dbus, code in dbus can be
+   read/written and read/executed in ibus. so there are 2 steps to execute
+   the code, first, copy&do relocaiton in dbus space, and second execute
+   it in ibus space, since in the 2 spaces the contents are the same,
+   so we call it bus mirror.
+ */
+#ifndef WASM_MEM_DUAL_BUS_MIRROR
+#define WASM_MEM_DUAL_BUS_MIRROR 0
+#endif
+
 #endif /* end of _CONFIG_H_ */

+ 129 - 0
core/iwasm/aot/aot_intrinsic.c

@@ -648,6 +648,42 @@ add_f64_common_intrinsics(AOTCompContext *comp_ctx)
     add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_CMP);
 }
 
+static void
+add_f32xi32_intrinsics(AOTCompContext *comp_ctx)
+{
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TO_I32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TO_U32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I32_TO_F32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_U32_TO_F32);
+}
+
+static void
+add_f64xi32_intrinsics(AOTCompContext *comp_ctx)
+{
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_TO_I32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_TO_U32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I32_TO_F64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_U32_TO_F64);
+}
+
+static void
+add_f32xi64_intrinsics(AOTCompContext *comp_ctx)
+{
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TO_I64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TO_U64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_TO_F32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_U64_TO_F32);
+}
+
+static void
+add_f64xi64_intrinsics(AOTCompContext *comp_ctx)
+{
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_TO_I64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_TO_U64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_TO_F64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_U64_TO_F64);
+}
+
 static void
 add_common_float_integer_convertion(AOTCompContext *comp_ctx)
 {
@@ -705,8 +741,101 @@ aot_intrinsic_check_capability(const AOTCompContext *comp_ctx,
 void
 aot_intrinsic_fill_capability_flags(AOTCompContext *comp_ctx)
 {
+    uint32 i;
+
     memset(comp_ctx->flags, 0, sizeof(comp_ctx->flags));
 
+    /* Intrinsics from command line have highest priority */
+
+    if (comp_ctx->builtin_intrinsics) {
+
+        /* Handle 'all' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "all")) {
+            for (i = 0; i < g_intrinsic_count; i++) {
+                add_intrinsic_capability(comp_ctx, g_intrinsic_mapping[i].flag);
+            }
+            return;
+        }
+
+        /* Handle 'i32.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "i32.common")) {
+            add_i32_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'i64.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "i64.common")) {
+            add_i64_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'fp.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "fp.common")) {
+            add_f32_common_intrinsics(comp_ctx);
+            add_f64_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f32.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f32.common")) {
+            add_f32_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f64.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f64.common")) {
+            add_f64_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f32xi32' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f32xi32")) {
+            add_f32xi32_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f64xi32' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f64xi32")) {
+            add_f64xi32_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f32xi64' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f32xi64")) {
+            add_f32xi64_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f64xi64' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f64xi64")) {
+            add_f64xi64_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'fpxint' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "fpxint")) {
+            add_f32xi32_intrinsics(comp_ctx);
+            add_f64xi32_intrinsics(comp_ctx);
+            add_f32xi64_intrinsics(comp_ctx);
+            add_f64xi64_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'constop' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "constop")) {
+            add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I32_CONST);
+            add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_CONST);
+            add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CONST);
+            add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_CONST);
+        }
+
+        /* Handle 'fp.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "fp.common")) {
+            add_f32_common_intrinsics(comp_ctx);
+            add_f64_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle other single items */
+        for (i = 0; i < g_intrinsic_count; i++) {
+            if (strstr(comp_ctx->builtin_intrinsics,
+                       g_intrinsic_mapping[i].llvm_intrinsic)) {
+                add_intrinsic_capability(comp_ctx, g_intrinsic_mapping[i].flag);
+            }
+        }
+
+        return;
+    }
+
     if (!comp_ctx->target_cpu)
         return;
 

+ 19 - 53
core/iwasm/aot/aot_loader.c

@@ -1644,27 +1644,6 @@ load_function_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
     const uint8 *p = buf, *p_end = buf_end;
     uint32 i;
     uint64 size, text_offset;
-#if defined(OS_ENABLE_HW_BOUND_CHECK) && defined(BH_PLATFORM_WINDOWS)
-    RUNTIME_FUNCTION *rtl_func_table;
-    AOTUnwindInfo *unwind_info;
-    uint32 unwind_info_offset = module->code_size - sizeof(AOTUnwindInfo);
-    uint32 unwind_code_offset = unwind_info_offset - PLT_ITEM_SIZE;
-#endif
-
-#if defined(OS_ENABLE_HW_BOUND_CHECK) && defined(BH_PLATFORM_WINDOWS)
-    unwind_info = (AOTUnwindInfo *)((uint8 *)module->code + module->code_size
-                                    - sizeof(AOTUnwindInfo));
-    unwind_info->Version = 1;
-    unwind_info->Flags = UNW_FLAG_NHANDLER;
-    *(uint32 *)&unwind_info->UnwindCode[0] = unwind_code_offset;
-
-    size = sizeof(RUNTIME_FUNCTION) * (uint64)module->func_count;
-    if (size > 0
-        && !(rtl_func_table = module->rtl_func_table =
-                 loader_malloc(size, error_buf, error_buf_size))) {
-        return false;
-    }
-#endif
 
     size = sizeof(void *) * (uint64)module->func_count;
     if (size > 0
@@ -1692,31 +1671,7 @@ load_function_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
         /* bits[0] of thumb function address must be 1 */
         module->func_ptrs[i] = (void *)((uintptr_t)module->func_ptrs[i] | 1);
 #endif
-#if defined(OS_ENABLE_HW_BOUND_CHECK) && defined(BH_PLATFORM_WINDOWS)
-        rtl_func_table[i].BeginAddress = (DWORD)text_offset;
-        if (i > 0) {
-            rtl_func_table[i - 1].EndAddress = rtl_func_table[i].BeginAddress;
-        }
-        rtl_func_table[i].UnwindInfoAddress = (DWORD)unwind_info_offset;
-#endif
-    }
-
-#if defined(OS_ENABLE_HW_BOUND_CHECK) && defined(BH_PLATFORM_WINDOWS)
-    if (module->func_count > 0) {
-        uint32 plt_table_size =
-            module->is_indirect_mode ? 0 : get_plt_table_size();
-        rtl_func_table[module->func_count - 1].EndAddress =
-            (DWORD)(module->code_size - plt_table_size);
-
-        if (!RtlAddFunctionTable(rtl_func_table, module->func_count,
-                                 (DWORD64)(uintptr_t)module->code)) {
-            set_error_buf(error_buf, error_buf_size,
-                          "add dynamic function table failed");
-            return false;
-        }
-        module->rtl_func_table_registered = true;
     }
-#endif
 
     /* Set start function when function pointers are resolved */
     if (module->start_func_index != (uint32)-1) {
@@ -1843,6 +1798,13 @@ get_data_section_addr(AOTModule *module, const char *section_name,
     return NULL;
 }
 
+const void *
+aot_get_data_section_addr(AOTModule *module, const char *section_name,
+                          uint32 *p_data_size)
+{
+    return get_data_section_addr(module, section_name, p_data_size);
+}
+
 static void *
 resolve_target_sym(const char *symbol, int32 *p_index)
 {
@@ -3006,6 +2968,9 @@ create_sections(AOTModule *module, const uint8 *buf, uint32 size,
     uint32 section_size;
     uint64 total_size;
     uint8 *aot_text;
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    uint8 *mirrored_text;
+#endif
 
     if (!resolve_execute_mode(buf, size, &is_indirect_mode, error_buf,
                               error_buf_size)) {
@@ -3064,8 +3029,17 @@ create_sections(AOTModule *module, const uint8 *buf, uint32 size,
                     bh_assert((uintptr_t)aot_text < INT32_MAX);
 #endif
 #endif
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+                    mirrored_text = os_get_dbus_mirror(aot_text);
+                    bh_assert(mirrored_text != NULL);
+                    bh_memcpy_s(mirrored_text, (uint32)total_size,
+                                section->section_body, (uint32)section_size);
+                    os_dcache_flush();
+#else
                     bh_memcpy_s(aot_text, (uint32)total_size,
                                 section->section_body, (uint32)section_size);
+#endif
                     section->section_body = aot_text;
                     destroy_aot_text = true;
 
@@ -3242,14 +3216,6 @@ aot_unload(AOTModule *module)
     }
 #endif
 
-#if defined(OS_ENABLE_HW_BOUND_CHECK) && defined(BH_PLATFORM_WINDOWS)
-    if (module->rtl_func_table) {
-        if (module->rtl_func_table_registered)
-            RtlDeleteFunctionTable(module->rtl_func_table);
-        wasm_runtime_free(module->rtl_func_table);
-    }
-#endif
-
 #if (defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)) \
     && !defined(BH_PLATFORM_WINDOWS)
     {

+ 51 - 69
core/iwasm/aot/aot_runtime.c

@@ -42,6 +42,11 @@ bh_static_assert(offsetof(AOTModuleInstance, cur_exception)
 bh_static_assert(offsetof(AOTModuleInstance, global_table_data)
                  == 13 * sizeof(uint64) + 128 + 11 * sizeof(uint64));
 
+bh_static_assert(sizeof(AOTMemoryInstance) == 104);
+bh_static_assert(offsetof(AOTTableInstance, elems) == 8);
+
+bh_static_assert(offsetof(AOTModuleInstanceExtra, stack_sizes) == 0);
+
 static void
 set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
 {
@@ -334,11 +339,8 @@ memories_deinstantiate(AOTModuleInstance *module_inst)
         memory_inst = module_inst->memories[i];
         if (memory_inst) {
 #if WASM_ENABLE_SHARED_MEMORY != 0
-            if (memory_inst->is_shared) {
-                int32 ref_count = shared_memory_dec_reference(
-                    (WASMModuleCommon *)module_inst->module);
-                bh_assert(ref_count >= 0);
-
+            if (shared_memory_is_shared(memory_inst)) {
+                uint32 ref_count = shared_memory_dec_reference(memory_inst);
                 /* if the reference count is not zero,
                     don't free the memory */
                 if (ref_count > 0)
@@ -368,9 +370,10 @@ memories_deinstantiate(AOTModuleInstance *module_inst)
 }
 
 static AOTMemoryInstance *
-memory_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
-                   AOTMemoryInstance *memory_inst, AOTMemory *memory,
-                   uint32 heap_size, char *error_buf, uint32 error_buf_size)
+memory_instantiate(AOTModuleInstance *module_inst, AOTModuleInstance *parent,
+                   AOTModule *module, AOTMemoryInstance *memory_inst,
+                   AOTMemory *memory, uint32 memory_idx, uint32 heap_size,
+                   char *error_buf, uint32 error_buf_size)
 {
     void *heap_handle;
     uint32 num_bytes_per_page = memory->num_bytes_per_page;
@@ -391,23 +394,13 @@ memory_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
     bool is_shared_memory = memory->memory_flags & 0x02 ? true : false;
 
     /* Shared memory */
-    if (is_shared_memory) {
+    if (is_shared_memory && parent != NULL) {
         AOTMemoryInstance *shared_memory_instance;
-        WASMSharedMemNode *node =
-            wasm_module_get_shared_memory((WASMModuleCommon *)module);
-        /* If the memory of this module has been instantiated,
-            return the memory instance directly */
-        if (node) {
-            uint32 ref_count;
-            ref_count = shared_memory_inc_reference((WASMModuleCommon *)module);
-            bh_assert(ref_count > 0);
-            shared_memory_instance =
-                (AOTMemoryInstance *)shared_memory_get_memory_inst(node);
-            bh_assert(shared_memory_instance);
-
-            (void)ref_count;
-            return shared_memory_instance;
-        }
+        bh_assert(memory_idx == 0);
+        bh_assert(parent->memory_count > memory_idx);
+        shared_memory_instance = parent->memories[memory_idx];
+        shared_memory_inc_reference(shared_memory_instance);
+        return shared_memory_instance;
     }
 #endif
 
@@ -427,7 +420,7 @@ memory_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
         if (num_bytes_per_page < heap_size) {
             set_error_buf(error_buf, error_buf_size,
                           "failed to insert app heap into linear memory, "
-                          "try using `--heap_size=0` option");
+                          "try using `--heap-size=0` option");
             return NULL;
         }
     }
@@ -485,7 +478,7 @@ memory_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
         if (init_page_count > DEFAULT_MAX_PAGES) {
             set_error_buf(error_buf, error_buf_size,
                           "failed to insert app heap into linear memory, "
-                          "try using `--heap_size=0` option");
+                          "try using `--heap-size=0` option");
             return NULL;
         }
         else if (init_page_count == DEFAULT_MAX_PAGES) {
@@ -604,23 +597,12 @@ memory_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
     if (is_shared_memory) {
-        memory_inst->is_shared = true;
-        if (!shared_memory_set_memory_inst(
-                (WASMModuleCommon *)module,
-                (WASMMemoryInstanceCommon *)memory_inst)) {
-            set_error_buf(error_buf, error_buf_size, "allocate memory failed");
-            goto fail3;
-        }
+        memory_inst->ref_count = 1;
     }
 #endif
 
     return memory_inst;
 
-#if WASM_ENABLE_SHARED_MEMORY != 0
-fail3:
-    if (heap_size > 0)
-        mem_allocator_destroy(memory_inst->heap_handle);
-#endif
 fail2:
     if (heap_size > 0)
         wasm_runtime_free(memory_inst->heap_handle);
@@ -649,8 +631,9 @@ aot_get_default_memory(AOTModuleInstance *module_inst)
 }
 
 static bool
-memories_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
-                     uint32 heap_size, char *error_buf, uint32 error_buf_size)
+memories_instantiate(AOTModuleInstance *module_inst, AOTModuleInstance *parent,
+                     AOTModule *module, uint32 heap_size, char *error_buf,
+                     uint32 error_buf_size)
 {
     uint32 global_index, global_data_offset, base_offset, length;
     uint32 i, memory_count = module->memory_count;
@@ -667,8 +650,8 @@ memories_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
 
     memories = module_inst->global_table_data.memory_instances;
     for (i = 0; i < memory_count; i++, memories++) {
-        memory_inst = memory_instantiate(module_inst, module, memories,
-                                         &module->memories[i], heap_size,
+        memory_inst = memory_instantiate(module_inst, parent, module, memories,
+                                         &module->memories[i], i, heap_size,
                                          error_buf, error_buf_size);
         if (!memory_inst) {
             return false;
@@ -1095,9 +1078,9 @@ check_linked_symbol(AOTModule *module, char *error_buf, uint32 error_buf_size)
 }
 
 AOTModuleInstance *
-aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
-                uint32 stack_size, uint32 heap_size, char *error_buf,
-                uint32 error_buf_size)
+aot_instantiate(AOTModule *module, AOTModuleInstance *parent,
+                WASMExecEnv *exec_env_main, uint32 stack_size, uint32 heap_size,
+                char *error_buf, uint32 error_buf_size)
 {
     AOTModuleInstance *module_inst;
     const uint32 module_inst_struct_size =
@@ -1107,6 +1090,7 @@ aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
     uint64 total_size, table_size = 0;
     uint8 *p;
     uint32 i, extra_info_offset;
+    const bool is_sub_inst = parent != NULL;
 
     /* Check heap size */
     heap_size = align_uint(heap_size, 8);
@@ -1166,7 +1150,7 @@ aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
         goto fail;
 
     /* Initialize memory space */
-    if (!memories_instantiate(module_inst, module, heap_size, error_buf,
+    if (!memories_instantiate(module_inst, parent, module, heap_size, error_buf,
                               error_buf_size))
         goto fail;
 
@@ -1210,6 +1194,9 @@ aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
 #endif
     module_inst->default_wasm_stack_size = stack_size;
 
+    ((AOTModuleInstanceExtra *)module_inst->e)->stack_sizes =
+        aot_get_data_section_addr(module, AOT_STACK_SIZES_SECTION_NAME, NULL);
+
 #if WASM_ENABLE_PERF_PROFILING != 0
     total_size = (uint64)sizeof(AOTFuncPerfProfInfo)
                  * (module->import_func_count + module->func_count);
@@ -1256,16 +1243,6 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
         wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton);
     }
 
-#if WASM_ENABLE_LIBC_WASI != 0
-    /* Destroy wasi resource before freeing app heap, since some fields of
-       wasi contex are allocated from app heap, and if app heap is freed,
-       these fields will be set to NULL, we cannot free their internal data
-       which may allocated from global heap. */
-    /* Only destroy wasi ctx in the main module instance */
-    if (!is_sub_inst)
-        wasm_runtime_destroy_wasi((WASMModuleInstanceCommon *)module_inst);
-#endif
-
 #if WASM_ENABLE_PERF_PROFILING != 0
     if (module_inst->func_perf_profilings)
         wasm_runtime_free(module_inst->func_perf_profilings);
@@ -1294,14 +1271,18 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
     if (module_inst->func_type_indexes)
         wasm_runtime_free(module_inst->func_type_indexes);
 
-    if (((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports)
-        wasm_runtime_free(
-            ((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports);
+    if (((AOTModuleInstanceExtra *)module_inst->e)->common.c_api_func_imports)
+        wasm_runtime_free(((AOTModuleInstanceExtra *)module_inst->e)
+                              ->common.c_api_func_imports);
 
+    if (!is_sub_inst) {
+#if WASM_ENABLE_LIBC_WASI != 0
+        wasm_runtime_destroy_wasi((WASMModuleInstanceCommon *)module_inst);
+#endif
 #if WASM_ENABLE_WASI_NN != 0
-    if (!is_sub_inst)
         wasi_nn_destroy(module_inst);
 #endif
+    }
 
     wasm_runtime_free(module_inst);
 }
@@ -1944,8 +1925,8 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
     AOTModuleInstanceExtra *module_inst_extra =
         (AOTModuleInstanceExtra *)module_inst->e;
     CApiFuncImport *c_api_func_import =
-        module_inst_extra->c_api_func_imports
-            ? module_inst_extra->c_api_func_imports + func_idx
+        module_inst_extra->common.c_api_func_imports
+            ? module_inst_extra->common.c_api_func_imports + func_idx
             : NULL;
     uint32 *func_type_indexes = module_inst->func_type_indexes;
     uint32 func_type_idx = func_type_indexes[func_idx];
@@ -2481,13 +2462,13 @@ aot_table_init(AOTModuleInstance *module_inst, uint32 tbl_idx,
     tbl_seg = module->table_init_data_list[tbl_seg_idx];
     bh_assert(tbl_seg);
 
-    if (!length) {
+    if (offset_len_out_of_bounds(src_offset, length, tbl_seg->func_index_count)
+        || offset_len_out_of_bounds(dst_offset, length, tbl_inst->cur_size)) {
+        aot_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }
 
-    if (length + src_offset > tbl_seg->func_index_count
-        || dst_offset + length > tbl_inst->cur_size) {
-        aot_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
+    if (!length) {
         return;
     }
 
@@ -2520,8 +2501,9 @@ aot_table_copy(AOTModuleInstance *module_inst, uint32 src_tbl_idx,
     dst_tbl_inst = module_inst->tables[dst_tbl_idx];
     bh_assert(dst_tbl_inst);
 
-    if ((uint64)dst_offset + length > dst_tbl_inst->cur_size
-        || (uint64)src_offset + length > src_tbl_inst->cur_size) {
+    if (offset_len_out_of_bounds(dst_offset, length, dst_tbl_inst->cur_size)
+        || offset_len_out_of_bounds(src_offset, length,
+                                    src_tbl_inst->cur_size)) {
         aot_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }
@@ -2546,7 +2528,7 @@ aot_table_fill(AOTModuleInstance *module_inst, uint32 tbl_idx, uint32 length,
     tbl_inst = module_inst->tables[tbl_idx];
     bh_assert(tbl_inst);
 
-    if (data_offset + length > tbl_inst->cur_size) {
+    if (offset_len_out_of_bounds(data_offset, length, tbl_inst->cur_size)) {
         aot_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }

+ 10 - 37
core/iwasm/aot/aot_runtime.h

@@ -88,33 +88,10 @@ typedef struct AOTFunctionInstance {
 } AOTFunctionInstance;
 
 typedef struct AOTModuleInstanceExtra {
-    CApiFuncImport *c_api_func_imports;
+    DefPointer(const uint32 *, stack_sizes);
+    WASMModuleInstanceExtraCommon common;
 } AOTModuleInstanceExtra;
 
-#if defined(OS_ENABLE_HW_BOUND_CHECK) && defined(BH_PLATFORM_WINDOWS)
-/* clang-format off */
-typedef struct AOTUnwindInfo {
-    uint8 Version       : 3;
-    uint8 Flags         : 5;
-    uint8 SizeOfProlog;
-    uint8 CountOfCodes;
-    uint8 FrameRegister : 4;
-    uint8 FrameOffset   : 4;
-    struct {
-        struct {
-            uint8 CodeOffset;
-            uint8 UnwindOp : 4;
-            uint8 OpInfo   : 4;
-        };
-        uint16 FrameOffset;
-    } UnwindCode[1];
-} AOTUnwindInfo;
-/* clang-format on */
-
-/* size of mov instruction and jmp instruction */
-#define PLT_ITEM_SIZE 12
-#endif
-
 #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
 typedef struct GOTItem {
     uint32 func_idx;
@@ -210,14 +187,6 @@ typedef struct AOTModule {
     uint32 float_plt_count;
 #endif
 
-#if defined(OS_ENABLE_HW_BOUND_CHECK) && defined(BH_PLATFORM_WINDOWS)
-    /* dynamic function table to be added by RtlAddFunctionTable(),
-       used to unwind the call stack and register exception handler
-       for AOT functions */
-    RUNTIME_FUNCTION *rtl_func_table;
-    bool rtl_func_table_registered;
-#endif
-
 #if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
     uint32 got_item_count;
     GOTItemList got_item_list;
@@ -401,7 +370,7 @@ aot_unload(AOTModule *module);
  * Instantiate a AOT module.
  *
  * @param module the AOT module to instantiate
- * @param is_sub_inst the flag of sub instance
+ * @param parent the parent module instance
  * @param heap_size the default heap size of the module instance, a heap will
  *        be created besides the app memory space. Both wasm app and native
  *        function can allocate memory from the heap. If heap_size is 0, the
@@ -412,9 +381,9 @@ aot_unload(AOTModule *module);
  * @return return the instantiated AOT module instance, NULL if failed
  */
 AOTModuleInstance *
-aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
-                uint32 stack_size, uint32 heap_size, char *error_buf,
-                uint32 error_buf_size);
+aot_instantiate(AOTModule *module, AOTModuleInstance *parent,
+                WASMExecEnv *exec_env_main, uint32 stack_size, uint32 heap_size,
+                char *error_buf, uint32 error_buf_size);
 
 /**
  * Deinstantiate a AOT module instance, destroy the resources.
@@ -633,6 +602,10 @@ aot_dump_perf_profiling(const AOTModuleInstance *module_inst);
 const uint8 *
 aot_get_custom_section(const AOTModule *module, const char *name, uint32 *len);
 
+const void *
+aot_get_data_section_addr(AOTModule *module, const char *section_name,
+                          uint32 *p_data_size);
+
 #if WASM_ENABLE_STATIC_PGO != 0
 void
 llvm_profile_instrument_target(uint64 target_value, void *data,

+ 14 - 18
core/iwasm/aot/arch/aot_reloc_riscv.c

@@ -78,6 +78,13 @@ static SymbolMap target_sym_map[] = {
     REG_SYM(__addsf3),
     REG_SYM(__divdf3),
     REG_SYM(__divsf3),
+    REG_SYM(__eqdf2),
+    REG_SYM(__eqsf2),
+    REG_SYM(__extendsfdf2),
+    REG_SYM(__fixunsdfdi),
+    REG_SYM(__fixunsdfsi),
+    REG_SYM(__fixunssfdi),
+    REG_SYM(__fixunssfsi),
     REG_SYM(__gedf2),
     REG_SYM(__gesf2),
     REG_SYM(__gtdf2),
@@ -89,44 +96,33 @@ static SymbolMap target_sym_map[] = {
     REG_SYM(__muldf3),
     REG_SYM(__nedf2),
     REG_SYM(__nesf2),
-    REG_SYM(__eqsf2),
-    REG_SYM(__eqdf2),
-    REG_SYM(__extendsfdf2),
-    REG_SYM(__fixunsdfdi),
-    REG_SYM(__fixunsdfsi),
-    REG_SYM(__fixunssfsi),
     REG_SYM(__subdf3),
     REG_SYM(__subsf3),
     REG_SYM(__truncdfsf2),
     REG_SYM(__unorddf2),
     REG_SYM(__unordsf2),
-#endif
-    REG_SYM(__divdi3),
-    REG_SYM(__divsi3),
 #if __riscv_xlen == 32
     REG_SYM(__fixdfdi),
     REG_SYM(__fixdfsi),
     REG_SYM(__fixsfdi),
     REG_SYM(__fixsfsi),
-#endif
-    REG_SYM(__fixunssfdi),
-#if __riscv_xlen == 32
     REG_SYM(__floatdidf),
     REG_SYM(__floatdisf),
-    REG_SYM(__floatsisf),
     REG_SYM(__floatsidf),
+    REG_SYM(__floatsisf),
     REG_SYM(__floatundidf),
     REG_SYM(__floatundisf),
-    REG_SYM(__floatunsisf),
     REG_SYM(__floatunsidf),
+    REG_SYM(__floatunsisf),
+    REG_SYM(__mulsf3),
+    REG_SYM(__mulsi3),
+#endif
 #endif
+    REG_SYM(__divdi3),
+    REG_SYM(__divsi3),
     REG_SYM(__moddi3),
     REG_SYM(__modsi3),
     REG_SYM(__muldi3),
-#if __riscv_xlen == 32
-    REG_SYM(__mulsf3),
-    REG_SYM(__mulsi3),
-#endif
     REG_SYM(__udivdi3),
     REG_SYM(__udivsi3),
     REG_SYM(__umoddi3),

+ 3 - 0
core/iwasm/aot/arch/aot_reloc_x86_32.c

@@ -164,6 +164,9 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
                 (uint32)((uintptr_t)symbol_addr + (intptr_t)reloc_addend
                          - (uintptr_t)(target_section_addr
                                        + (uint32)reloc_offset)
+#if defined(BH_PLATFORM_WINDOWS)
+                         - sizeof(int32)
+#endif
                          + value); /* S + A - P */
             break;
         }

+ 1 - 16
core/iwasm/aot/arch/aot_reloc_x86_64.c

@@ -69,9 +69,6 @@ get_plt_table_size()
 {
     uint32 size =
         get_plt_item_size() * (sizeof(target_sym_map) / sizeof(SymbolMap));
-#if defined(OS_ENABLE_HW_BOUND_CHECK) && defined(BH_PLATFORM_WINDOWS)
-    size += get_plt_item_size() + sizeof(AOTUnwindInfo);
-#endif
     return size;
 }
 
@@ -93,18 +90,6 @@ init_plt_table(uint8 *plt)
         *p++ = 0xE0;
         plt += get_plt_item_size();
     }
-
-#if defined(OS_ENABLE_HW_BOUND_CHECK) && defined(BH_PLATFORM_WINDOWS)
-    p = plt;
-    /* mov exception_handler, rax */
-    *p++ = 0x48;
-    *p++ = 0xB8;
-    *(uint64 *)p = 0; /*(uint64)(uintptr_t)aot_exception_handler;*/
-    p += sizeof(uint64);
-    /* jmp rax */
-    *p++ = 0xFF;
-    *p++ = 0xE0;
-#endif
 }
 
 static bool
@@ -242,7 +227,7 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
                      - (uintptr_t)(target_section_addr + reloc_offset));
             }
             else {
-                target_addr = (intptr_t) /* L + A - P */
+                target_addr = (intptr_t) /* S + A - P */
                     ((uintptr_t)symbol_addr + reloc_addend
                      - (uintptr_t)(target_section_addr + reloc_offset));
             }

+ 19 - 1
core/iwasm/aot/arch/aot_reloc_xtensa.c

@@ -43,6 +43,11 @@ void __floatdidf();
 void __divsf3();
 void __fixdfdi();
 void __floatundidf();
+void __fixsfdi();
+void __fixunssfdi();
+void __fixunsdfdi();
+void __floatdisf();
+void __floatundisf();
 
 
 static SymbolMap target_sym_map[] = {
@@ -85,6 +90,11 @@ static SymbolMap target_sym_map[] = {
     REG_SYM(__divsf3),
     REG_SYM(__fixdfdi),
     REG_SYM(__floatundidf),
+    REG_SYM(__fixsfdi),
+    REG_SYM(__fixunssfdi),
+    REG_SYM(__fixunsdfdi),
+    REG_SYM(__floatdisf),
+    REG_SYM(__floatundisf),
 };
 /* clang-format on */
 
@@ -207,6 +217,10 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
         case R_XTENSA_32:
         {
             uint8 *insn_addr = target_section_addr + reloc_offset;
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+            insn_addr = os_get_dbus_mirror((void *)insn_addr);
+            bh_assert(insn_addr != NULL);
+#endif
             int32 initial_addend;
             /* (S + A) */
             if ((intptr_t)insn_addr & 3) {
@@ -265,6 +279,11 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
                 return false;
             }
 
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+            insn_addr = os_get_dbus_mirror((void *)insn_addr);
+            bh_assert(insn_addr != NULL);
+            l32r_insn = (l32r_insn_t *)insn_addr;
+#endif
             imm16 = (int16)(relative_offset >> 2);
 
             /* write back the imm16 to the l32r instruction */
@@ -285,7 +304,6 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
 #if __GNUC__ >= 9
 #pragma GCC diagnostic pop
 #endif
-
             break;
         }
 

+ 45 - 1
core/iwasm/aot/iwasm_aot.cmake

@@ -36,5 +36,49 @@ if (WAMR_BUILD_DEBUG_AOT EQUAL 1)
   file(GLOB debug_source ${IWASM_AOT_DIR}/debug/*.c)
 endif()
 
-set (IWASM_AOT_SOURCE ${c_source_all} ${arch_source} ${debug_source})
+if ((WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
+    AND (WAMR_BUILD_PLATFORM STREQUAL "windows")
+    AND (NOT WAMR_DISABLE_HW_BOUND_CHECK EQUAL 1))
+  include(FetchContent)
 
+  FetchContent_Declare(
+    zycore
+    GIT_REPOSITORY https://github.com/zyantific/zycore-c.git
+  )
+  FetchContent_GetProperties(zycore)
+  if (NOT zycore_POPULATED)
+    message ("-- Fetching zycore ..")
+    FetchContent_Populate(zycore)
+    include_directories("${zycore_SOURCE_DIR}/include")
+    include_directories("${zycore_BINARY_DIR}")
+    add_definitions(-DZYCORE_STATIC_BUILD=1)
+    add_subdirectory(${zycore_SOURCE_DIR} ${zycore_BINARY_DIR} EXCLUDE_FROM_ALL)
+    file (GLOB_RECURSE c_source_zycore ${zycore_SOURCE_DIR}/src/*.c)
+  endif ()
+
+  FetchContent_Declare(
+    zydis
+    GIT_REPOSITORY https://github.com/zyantific/zydis.git
+    GIT_TAG e14a07895136182a5b53e181eec3b1c6e0b434de
+  )
+  FetchContent_GetProperties(zydis)
+  if (NOT zydis_POPULATED)
+    message ("-- Fetching zydis ..")
+    FetchContent_Populate(zydis)
+    option(ZYDIS_FEATURE_ENCODER "" OFF)
+    option(ZYDIS_BUILD_TOOLS "" OFF)
+    option(ZYDIS_BUILD_EXAMPLES "" OFF)
+    option(ZYDIS_BUILD_MAN "" OFF)
+    option(ZYDIS_BUILD_DOXYGEN "" OFF)
+    include_directories("${zydis_BINARY_DIR}")
+    include_directories("${zydis_SOURCE_DIR}/include")
+    include_directories("${zydis_SOURCE_DIR}/src")
+    add_definitions(-DZYDIS_STATIC_BUILD=1)
+    add_subdirectory(${zydis_SOURCE_DIR} ${zydis_BINARY_DIR} EXCLUDE_FROM_ALL)
+    file (GLOB_RECURSE c_source_zydis ${zydis_SOURCE_DIR}/src/*.c)
+  endif ()
+endif ()
+
+
+set (IWASM_AOT_SOURCE ${c_source_all} ${arch_source} ${debug_source}
+	              ${c_source_zycore} ${c_source_zydis})

+ 18 - 6
core/iwasm/common/wasm_c_api.c

@@ -2290,8 +2290,10 @@ quit:
 bool
 wasm_module_validate(wasm_store_t *store, const wasm_byte_vec_t *binary)
 {
+    wasm_byte_vec_t local_binary = { 0 };
     struct WASMModuleCommon *module_rt;
     char error_buf[128] = { 0 };
+    bool ret;
 
     bh_assert(singleton_engine);
 
@@ -2300,15 +2302,25 @@ wasm_module_validate(wasm_store_t *store, const wasm_byte_vec_t *binary)
         return false;
     }
 
-    if ((module_rt = wasm_runtime_load((uint8 *)binary->data,
-                                       (uint32)binary->size, error_buf, 128))) {
+    /* make a copy of binary */
+    wasm_byte_vec_copy(&local_binary, binary);
+
+    if (binary->size && !local_binary.data)
+        return false;
+
+    module_rt = wasm_runtime_load((uint8 *)local_binary.data,
+                                  (uint32)local_binary.size, error_buf, 128);
+    wasm_byte_vec_delete(&local_binary);
+    if (module_rt) {
         wasm_runtime_unload(module_rt);
-        return true;
+        ret = true;
     }
     else {
+        ret = false;
         LOG_VERBOSE(error_buf);
-        return false;
     }
+
+    return ret;
 }
 
 static void
@@ -4858,7 +4870,7 @@ wasm_instance_new_with_args(wasm_store_t *store, const wasm_module_t *module,
     if (instance->inst_comm_rt->module_type == Wasm_Module_Bytecode) {
         WASMModuleInstanceExtra *e =
             ((WASMModuleInstance *)instance->inst_comm_rt)->e;
-        p_func_imports = &(e->c_api_func_imports);
+        p_func_imports = &(e->common.c_api_func_imports);
         import_func_count = MODULE_INTERP(module)->import_function_count;
     }
 #endif
@@ -4868,7 +4880,7 @@ wasm_instance_new_with_args(wasm_store_t *store, const wasm_module_t *module,
             (AOTModuleInstanceExtra *)((AOTModuleInstance *)
                                            instance->inst_comm_rt)
                 ->e;
-        p_func_imports = &(e->c_api_func_imports);
+        p_func_imports = &(e->common.c_api_func_imports);
         import_func_count = MODULE_AOT(module)->import_func_count;
     }
 #endif

+ 3 - 9
core/iwasm/common/wasm_exec_env.h

@@ -7,6 +7,7 @@
 #define _WASM_EXEC_ENV_H
 
 #include "bh_assert.h"
+#include "wasm_suspend_flags.h"
 #if WASM_ENABLE_INTERP != 0
 #include "../interpreter/wasm.h"
 #endif
@@ -57,15 +58,8 @@ typedef struct WASMExecEnv {
        exception. */
     uint8 *native_stack_boundary;
 
-    /* Used to terminate or suspend current thread
-        bit 0: need to terminate
-        bit 1: need to suspend
-        bit 2: need to go into breakpoint
-        bit 3: return from pthread_exit */
-    union {
-        uint32 flags;
-        uintptr_t __padding__;
-    } suspend_flags;
+    /* Used to terminate or suspend current thread */
+    WASMSuspendFlags suspend_flags;
 
     /* Auxiliary stack boundary */
     union {

+ 63 - 45
core/iwasm/common/wasm_memory.c

@@ -5,6 +5,7 @@
 
 #include "wasm_runtime_common.h"
 #include "../interpreter/wasm_runtime.h"
+#include "../aot/aot_runtime.h"
 #include "bh_platform.h"
 #include "mem_alloc.h"
 
@@ -87,6 +88,16 @@ wasm_memory_init_with_allocator(void *_malloc_func, void *_realloc_func,
 }
 #endif
 
+static inline bool
+is_bounds_checks_enabled(WASMModuleInstanceCommon *module_inst)
+{
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    return wasm_runtime_is_bounds_checks_enabled(module_inst);
+#else
+    return true;
+#endif
+}
+
 bool
 wasm_runtime_memory_init(mem_alloc_type_t mem_alloc_type,
                          const MemAllocOption *alloc_option)
@@ -269,6 +280,10 @@ wasm_runtime_validate_app_addr(WASMModuleInstanceCommon *module_inst_comm,
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
+    if (!is_bounds_checks_enabled(module_inst_comm)) {
+        return true;
+    }
+
     memory_inst = wasm_get_default_memory(module_inst);
     if (!memory_inst) {
         goto fail;
@@ -299,6 +314,10 @@ wasm_runtime_validate_app_str_addr(WASMModuleInstanceCommon *module_inst_comm,
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
+    if (!is_bounds_checks_enabled(module_inst_comm)) {
+        return true;
+    }
+
     if (!wasm_runtime_get_app_addr_range(module_inst_comm, app_str_offset, NULL,
                                          &app_end_offset))
         goto fail;
@@ -327,6 +346,10 @@ wasm_runtime_validate_native_addr(WASMModuleInstanceCommon *module_inst_comm,
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
+    if (!is_bounds_checks_enabled(module_inst_comm)) {
+        return true;
+    }
+
     memory_inst = wasm_get_default_memory(module_inst);
     if (!memory_inst) {
         goto fail;
@@ -354,10 +377,13 @@ wasm_runtime_addr_app_to_native(WASMModuleInstanceCommon *module_inst_comm,
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module_inst_comm;
     WASMMemoryInstance *memory_inst;
     uint8 *addr;
+    bool bounds_checks;
 
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
+    bounds_checks = is_bounds_checks_enabled(module_inst_comm);
+
     memory_inst = wasm_get_default_memory(module_inst);
     if (!memory_inst) {
         return NULL;
@@ -365,8 +391,17 @@ wasm_runtime_addr_app_to_native(WASMModuleInstanceCommon *module_inst_comm,
 
     addr = memory_inst->memory_data + app_offset;
 
-    if (memory_inst->memory_data <= addr && addr < memory_inst->memory_data_end)
+    if (bounds_checks) {
+        if (memory_inst->memory_data <= addr
+            && addr < memory_inst->memory_data_end) {
+
+            return addr;
+        }
+    }
+    /* If bounds checks is disabled, return the address directly */
+    else if (app_offset != 0) {
         return addr;
+    }
 
     return NULL;
 }
@@ -378,17 +413,27 @@ wasm_runtime_addr_native_to_app(WASMModuleInstanceCommon *module_inst_comm,
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module_inst_comm;
     WASMMemoryInstance *memory_inst;
     uint8 *addr = (uint8 *)native_ptr;
+    bool bounds_checks;
 
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
+    bounds_checks = is_bounds_checks_enabled(module_inst_comm);
+
     memory_inst = wasm_get_default_memory(module_inst);
     if (!memory_inst) {
         return 0;
     }
 
-    if (memory_inst->memory_data <= addr && addr < memory_inst->memory_data_end)
+    if (bounds_checks) {
+        if (memory_inst->memory_data <= addr
+            && addr < memory_inst->memory_data_end)
+            return (uint32)(addr - memory_inst->memory_data);
+    }
+    /* If bounds checks is disabled, return the offset directly */
+    else if (addr != NULL) {
         return (uint32)(addr - memory_inst->memory_data);
+    }
 
     return 0;
 }
@@ -460,6 +505,7 @@ wasm_check_app_addr_and_convert(WASMModuleInstance *module_inst, bool is_str,
 {
     WASMMemoryInstance *memory_inst = wasm_get_default_memory(module_inst);
     uint8 *native_addr;
+    bool bounds_checks;
 
     if (!memory_inst) {
         goto fail;
@@ -467,6 +513,15 @@ wasm_check_app_addr_and_convert(WASMModuleInstance *module_inst, bool is_str,
 
     native_addr = memory_inst->memory_data + app_buf_addr;
 
+    bounds_checks = is_bounds_checks_enabled((wasm_module_inst_t)module_inst);
+
+    if (!bounds_checks) {
+        if (app_buf_addr == 0) {
+            native_addr = NULL;
+        }
+        goto success;
+    }
+
     /* No need to check the app_offset and buf_size if memory access
        boundary check with hardware trap is enabled */
 #ifndef OS_ENABLE_HW_BOUND_CHECK
@@ -492,6 +547,7 @@ wasm_check_app_addr_and_convert(WASMModuleInstance *module_inst, bool is_str,
     }
 #endif
 
+success:
     *p_native_addr = (void *)native_addr;
     return true;
 fail:
@@ -552,7 +608,7 @@ wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count)
     }
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    if (memory->is_shared) {
+    if (shared_memory_is_shared(memory)) {
         memory->num_bytes_per_page = num_bytes_per_page;
         memory->cur_page_count = total_page_count;
         memory->max_page_count = max_page_count;
@@ -713,52 +769,14 @@ wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
     bool ret = false;
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    WASMSharedMemNode *node =
-        wasm_module_get_shared_memory((WASMModuleCommon *)module->module);
-    if (node)
-        os_mutex_lock(&node->shared_mem_lock);
+    if (module->memory_count > 0)
+        shared_memory_lock(module->memories[0]);
 #endif
     ret = wasm_enlarge_memory_internal(module, inc_page_count);
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_unlock(&node->shared_mem_lock);
+    if (module->memory_count > 0)
+        shared_memory_unlock(module->memories[0]);
 #endif
 
     return ret;
 }
-
-#if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
-    || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
-    || WASM_ENABLE_BULK_MEMORY != 0
-uint32
-wasm_get_num_bytes_per_page(WASMMemoryInstance *memory, void *node)
-{
-    uint32 num_bytes_per_page;
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_lock(&((WASMSharedMemNode *)node)->shared_mem_lock);
-#endif
-    num_bytes_per_page = memory->num_bytes_per_page;
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_unlock(&((WASMSharedMemNode *)node)->shared_mem_lock);
-#endif
-    return num_bytes_per_page;
-}
-
-uint32
-wasm_get_linear_memory_size(WASMMemoryInstance *memory, void *node)
-{
-    uint32 linear_mem_size;
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_lock(&((WASMSharedMemNode *)node)->shared_mem_lock);
-#endif
-    linear_mem_size = memory->num_bytes_per_page * memory->cur_page_count;
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_unlock(&((WASMSharedMemNode *)node)->shared_mem_lock);
-#endif
-    return linear_mem_size;
-}
-#endif

+ 0 - 10
core/iwasm/common/wasm_memory.h

@@ -24,16 +24,6 @@ wasm_runtime_memory_destroy();
 unsigned
 wasm_runtime_memory_pool_size();
 
-#if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
-    || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
-    || WASM_ENABLE_BULK_MEMORY != 0
-uint32
-wasm_get_num_bytes_per_page(WASMMemoryInstance *memory, void *node);
-
-uint32
-wasm_get_linear_memory_size(WASMMemoryInstance *memory, void *node);
-#endif
-
 #ifdef __cplusplus
 }
 #endif

+ 240 - 50
core/iwasm/common/wasm_runtime_common.c

@@ -199,7 +199,90 @@ runtime_signal_handler(void *sig_addr)
         }
     }
 }
-#else
+#else /* else of BH_PLATFORM_WINDOWS */
+
+#if WASM_ENABLE_AOT != 0
+#include <Zydis/Zydis.h>
+
+static uint32
+decode_insn(uint8 *insn)
+{
+    uint8 *data = (uint8 *)insn;
+    uint32 length = 32; /* reserve enough size */
+
+    /* Initialize decoder context */
+    ZydisDecoder decoder;
+    ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64,
+                     ZYDIS_STACK_WIDTH_64);
+
+    /* Initialize formatter */
+    ZydisFormatter formatter;
+    ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL);
+
+    /* Loop over the instructions in our buffer */
+    ZyanU64 runtime_address = (ZyanU64)(uintptr_t)data;
+    ZyanUSize offset = 0;
+    ZydisDecodedInstruction instruction;
+    ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT_VISIBLE];
+    char buffer[256];
+
+    if (ZYAN_SUCCESS(ZydisDecoderDecodeFull(
+            &decoder, data + offset, length - offset, &instruction, operands,
+            ZYDIS_MAX_OPERAND_COUNT_VISIBLE,
+            ZYDIS_DFLAG_VISIBLE_OPERANDS_ONLY))) {
+
+        /* Format & print the binary instruction structure to
+           human readable format */
+        ZydisFormatterFormatInstruction(&formatter, &instruction, operands,
+                                        instruction.operand_count_visible,
+                                        buffer, sizeof(buffer),
+                                        runtime_address);
+
+        /* Print current instruction */
+        /*
+        os_printf("%012" PRIX64 "  ", runtime_address);
+        puts(buffer);
+        */
+
+        return instruction.length;
+    }
+
+    /* Decode failed */
+    return 0;
+}
+#endif /* end of WASM_ENABLE_AOT != 0 */
+
+static LONG
+next_action(WASMModuleInstance *module_inst, EXCEPTION_POINTERS *exce_info)
+{
+#if WASM_ENABLE_AOT != 0
+    uint32 insn_size;
+#endif
+
+    if (module_inst->module_type == Wasm_Module_Bytecode
+        && module_inst->e->running_mode == Mode_Interp) {
+        /* Continue to search next exception handler for
+           interpreter mode as it can be caught by
+           `__try { .. } __except { .. }` sentences in
+           wasm_runtime.c */
+        return EXCEPTION_CONTINUE_SEARCH;
+    }
+
+#if WASM_ENABLE_AOT != 0
+    /* Skip current instruction and continue to run for AOT/JIT mode.
+       TODO: implement unwind support for AOT/JIT code in Windows platform */
+    insn_size = decode_insn((uint8 *)exce_info->ContextRecord->Rip);
+    if (insn_size > 0) {
+        exce_info->ContextRecord->Rip += insn_size;
+        return EXCEPTION_CONTINUE_EXECUTION;
+    }
+#endif
+
+    /* return different value from EXCEPTION_CONTINUE_SEARCH (= 0)
+       and EXCEPTION_CONTINUE_EXECUTION (= -1) */
+    return -2;
+}
+
 static LONG
 runtime_exception_handler(EXCEPTION_POINTERS *exce_info)
 {
@@ -211,6 +294,7 @@ runtime_exception_handler(EXCEPTION_POINTERS *exce_info)
     uint8 *mapped_mem_start_addr = NULL;
     uint8 *mapped_mem_end_addr = NULL;
     uint32 page_size = os_getpagesize();
+    LONG ret;
 
     if (exec_env_tls && exec_env_tls->handle == os_self_thread()
         && (jmpbuf_node = exec_env_tls->jmpbuf_stack_top)) {
@@ -232,32 +316,19 @@ runtime_exception_handler(EXCEPTION_POINTERS *exce_info)
                    the wasm func returns, the caller will check whether the
                    exception is thrown and return to runtime. */
                 wasm_set_exception(module_inst, "out of bounds memory access");
-                if (module_inst->module_type == Wasm_Module_Bytecode) {
-                    /* Continue to search next exception handler for
-                       interpreter mode as it can be caught by
-                       `__try { .. } __except { .. }` sentences in
-                       wasm_runtime.c */
-                    return EXCEPTION_CONTINUE_SEARCH;
-                }
-                else {
-                    /* Skip current instruction and continue to run for
-                       AOT mode. TODO: implement unwind support for AOT
-                       code in Windows platform */
-                    exce_info->ContextRecord->Rip++;
-                    return EXCEPTION_CONTINUE_EXECUTION;
-                }
+                ret = next_action(module_inst, exce_info);
+                if (ret == EXCEPTION_CONTINUE_SEARCH
+                    || ret == EXCEPTION_CONTINUE_EXECUTION)
+                    return ret;
             }
             else if (exec_env_tls->exce_check_guard_page <= (uint8 *)sig_addr
                      && (uint8 *)sig_addr
                             < exec_env_tls->exce_check_guard_page + page_size) {
                 bh_assert(wasm_copy_exception(module_inst, NULL));
-                if (module_inst->module_type == Wasm_Module_Bytecode) {
-                    return EXCEPTION_CONTINUE_SEARCH;
-                }
-                else {
-                    exce_info->ContextRecord->Rip++;
-                    return EXCEPTION_CONTINUE_EXECUTION;
-                }
+                ret = next_action(module_inst, exce_info);
+                if (ret == EXCEPTION_CONTINUE_SEARCH
+                    || ret == EXCEPTION_CONTINUE_EXECUTION)
+                    return ret;
             }
         }
 #if WASM_DISABLE_STACK_HW_BOUND_CHECK == 0
@@ -267,12 +338,10 @@ runtime_exception_handler(EXCEPTION_POINTERS *exce_info)
                whether the exception is thrown and return to runtime, and
                the damaged stack will be recovered by _resetstkoflw(). */
             wasm_set_exception(module_inst, "native stack overflow");
-            if (module_inst->module_type == Wasm_Module_Bytecode) {
-                return EXCEPTION_CONTINUE_SEARCH;
-            }
-            else {
-                return EXCEPTION_CONTINUE_EXECUTION;
-            }
+            ret = next_action(module_inst, exce_info);
+            if (ret == EXCEPTION_CONTINUE_SEARCH
+                || ret == EXCEPTION_CONTINUE_EXECUTION)
+                return ret;
         }
 #endif
     }
@@ -1196,7 +1265,8 @@ wasm_runtime_unload(WASMModuleCommon *module)
 }
 
 WASMModuleInstanceCommon *
-wasm_runtime_instantiate_internal(WASMModuleCommon *module, bool is_sub_inst,
+wasm_runtime_instantiate_internal(WASMModuleCommon *module,
+                                  WASMModuleInstanceCommon *parent,
                                   WASMExecEnv *exec_env_main, uint32 stack_size,
                                   uint32 heap_size, char *error_buf,
                                   uint32 error_buf_size)
@@ -1204,14 +1274,14 @@ wasm_runtime_instantiate_internal(WASMModuleCommon *module, bool is_sub_inst,
 #if WASM_ENABLE_INTERP != 0
     if (module->module_type == Wasm_Module_Bytecode)
         return (WASMModuleInstanceCommon *)wasm_instantiate(
-            (WASMModule *)module, is_sub_inst, exec_env_main, stack_size,
-            heap_size, error_buf, error_buf_size);
+            (WASMModule *)module, (WASMModuleInstance *)parent, exec_env_main,
+            stack_size, heap_size, error_buf, error_buf_size);
 #endif
 #if WASM_ENABLE_AOT != 0
     if (module->module_type == Wasm_Module_AoT)
         return (WASMModuleInstanceCommon *)aot_instantiate(
-            (AOTModule *)module, is_sub_inst, exec_env_main, stack_size,
-            heap_size, error_buf, error_buf_size);
+            (AOTModule *)module, (AOTModuleInstance *)parent, exec_env_main,
+            stack_size, heap_size, error_buf, error_buf_size);
 #endif
     set_error_buf(error_buf, error_buf_size,
                   "Instantiate module failed, invalid module type");
@@ -1224,7 +1294,7 @@ wasm_runtime_instantiate(WASMModuleCommon *module, uint32 stack_size,
                          uint32 error_buf_size)
 {
     return wasm_runtime_instantiate_internal(
-        module, false, NULL, stack_size, heap_size, error_buf, error_buf_size);
+        module, NULL, NULL, stack_size, heap_size, error_buf, error_buf_size);
 }
 
 void
@@ -2310,10 +2380,8 @@ wasm_set_exception(WASMModuleInstance *module_inst, const char *exception)
     WASMExecEnv *exec_env = NULL;
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    WASMSharedMemNode *node =
-        wasm_module_get_shared_memory((WASMModuleCommon *)module_inst->module);
-    if (node)
-        os_mutex_lock(&node->shared_mem_lock);
+    if (module_inst->memory_count > 0)
+        shared_memory_lock(module_inst->memories[0]);
 #endif
     if (exception) {
         snprintf(module_inst->cur_exception, sizeof(module_inst->cur_exception),
@@ -2323,8 +2391,8 @@ wasm_set_exception(WASMModuleInstance *module_inst, const char *exception)
         module_inst->cur_exception[0] = '\0';
     }
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_unlock(&node->shared_mem_lock);
+    if (module_inst->memory_count > 0)
+        shared_memory_unlock(module_inst->memories[0]);
 #endif
 
 #if WASM_ENABLE_THREAD_MGR != 0
@@ -2386,10 +2454,8 @@ wasm_copy_exception(WASMModuleInstance *module_inst, char *exception_buf)
     bool has_exception = false;
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    WASMSharedMemNode *node =
-        wasm_module_get_shared_memory((WASMModuleCommon *)module_inst->module);
-    if (node)
-        os_mutex_lock(&node->shared_mem_lock);
+    if (module_inst->memory_count > 0)
+        shared_memory_lock(module_inst->memories[0]);
 #endif
     if (module_inst->cur_exception[0] != '\0') {
         /* NULL is passed if the caller is not interested in getting the
@@ -2403,8 +2469,8 @@ wasm_copy_exception(WASMModuleInstance *module_inst, char *exception_buf)
         has_exception = true;
     }
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_unlock(&node->shared_mem_lock);
+    if (module_inst->memory_count > 0)
+        shared_memory_unlock(module_inst->memories[0]);
 #endif
 
     return has_exception;
@@ -2482,6 +2548,54 @@ wasm_runtime_get_custom_data(WASMModuleInstanceCommon *module_inst_comm)
     return module_inst->custom_data;
 }
 
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+void
+wasm_runtime_set_bounds_checks(WASMModuleInstanceCommon *module_inst,
+                               bool enable)
+{
+    /* Alwary disable bounds checks if hw bounds checks enabled */
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    enable = false;
+#endif
+#if WASM_ENABLE_INTERP != 0
+    if (module_inst->module_type == Wasm_Module_Bytecode) {
+        ((WASMModuleInstanceExtra *)((WASMModuleInstance *)module_inst)->e)
+            ->common.disable_bounds_checks = enable ? false : true;
+    }
+#endif
+
+#if WASM_ENABLE_AOT != 0
+    if (module_inst->module_type == Wasm_Module_AoT) {
+        ((AOTModuleInstanceExtra *)((AOTModuleInstance *)module_inst)->e)
+            ->common.disable_bounds_checks = enable ? false : true;
+    }
+#endif
+}
+
+bool
+wasm_runtime_is_bounds_checks_enabled(WASMModuleInstanceCommon *module_inst)
+{
+
+#if WASM_ENABLE_INTERP != 0
+    if (module_inst->module_type == Wasm_Module_Bytecode) {
+        return !((WASMModuleInstanceExtra *)((WASMModuleInstance *)module_inst)
+                     ->e)
+                    ->common.disable_bounds_checks;
+    }
+#endif
+
+#if WASM_ENABLE_AOT != 0
+    if (module_inst->module_type == Wasm_Module_AoT) {
+        return !((AOTModuleInstanceExtra *)((WASMModuleInstance *)module_inst)
+                     ->e)
+                    ->common.disable_bounds_checks;
+    }
+#endif
+
+    return true;
+}
+#endif
+
 uint32
 wasm_runtime_module_malloc_internal(WASMModuleInstanceCommon *module_inst,
                                     WASMExecEnv *exec_env, uint32 size,
@@ -4636,6 +4750,8 @@ typedef struct ExternRefMapNode {
     bool retained;
     /* Whether it is marked by runtime */
     bool marked;
+    /* cleanup function called when the externref is freed */
+    void (*cleanup)(void *);
 } ExternRefMapNode;
 
 static uint32
@@ -4698,6 +4814,81 @@ lookup_extobj_callback(void *key, void *value, void *user_data)
     }
 }
 
+static void
+delete_externref(void *key, ExternRefMapNode *node)
+{
+    bh_hash_map_remove(externref_map, key, NULL, NULL);
+    if (node->cleanup) {
+        (*node->cleanup)(node->extern_obj);
+    }
+    wasm_runtime_free(node);
+}
+
+static void
+delete_extobj_callback(void *key, void *value, void *user_data)
+{
+    ExternRefMapNode *node = (ExternRefMapNode *)value;
+    LookupExtObj_UserData *lookup_user_data =
+        (LookupExtObj_UserData *)user_data;
+
+    if (node->extern_obj == lookup_user_data->node.extern_obj
+        && node->module_inst == lookup_user_data->node.module_inst) {
+        lookup_user_data->found = true;
+        delete_externref(key, node);
+    }
+}
+
+bool
+wasm_externref_objdel(WASMModuleInstanceCommon *module_inst, void *extern_obj)
+{
+    LookupExtObj_UserData lookup_user_data = { 0 };
+    bool ok = false;
+
+    /* in a wrapper, extern_obj could be any value */
+    lookup_user_data.node.extern_obj = extern_obj;
+    lookup_user_data.node.module_inst = module_inst;
+    lookup_user_data.found = false;
+
+    os_mutex_lock(&externref_lock);
+    /* Lookup hashmap firstly */
+    bh_hash_map_traverse(externref_map, delete_extobj_callback,
+                         (void *)&lookup_user_data);
+    if (lookup_user_data.found) {
+        ok = true;
+    }
+    os_mutex_unlock(&externref_lock);
+
+    return ok;
+}
+
+bool
+wasm_externref_set_cleanup(WASMModuleInstanceCommon *module_inst,
+                           void *extern_obj, void (*extern_obj_cleanup)(void *))
+{
+
+    LookupExtObj_UserData lookup_user_data = { 0 };
+    bool ok = false;
+
+    /* in a wrapper, extern_obj could be any value */
+    lookup_user_data.node.extern_obj = extern_obj;
+    lookup_user_data.node.module_inst = module_inst;
+    lookup_user_data.found = false;
+
+    os_mutex_lock(&externref_lock);
+    /* Lookup hashmap firstly */
+    bh_hash_map_traverse(externref_map, lookup_extobj_callback,
+                         (void *)&lookup_user_data);
+    if (lookup_user_data.found) {
+        void *key = (void *)(uintptr_t)lookup_user_data.externref_idx;
+        ExternRefMapNode *node = bh_hash_map_find(externref_map, key);
+        node->cleanup = extern_obj_cleanup;
+        ok = true;
+    }
+    os_mutex_unlock(&externref_lock);
+
+    return ok;
+}
+
 bool
 wasm_externref_obj2ref(WASMModuleInstanceCommon *module_inst, void *extern_obj,
                        uint32 *p_externref_idx)
@@ -4747,6 +4938,7 @@ wasm_externref_obj2ref(WASMModuleInstanceCommon *module_inst, void *extern_obj,
     memset(node, 0, sizeof(ExternRefMapNode));
     node->extern_obj = extern_obj;
     node->module_inst = module_inst;
+    node->cleanup = NULL;
 
     externref_idx = externref_global_id;
 
@@ -4797,8 +4989,7 @@ reclaim_extobj_callback(void *key, void *value, void *user_data)
 
     if (node->module_inst == module_inst) {
         if (!node->marked && !node->retained) {
-            bh_hash_map_remove(externref_map, key, NULL, NULL);
-            wasm_runtime_free(value);
+            delete_externref(key, node);
         }
         else {
             node->marked = false;
@@ -4913,8 +5104,7 @@ cleanup_extobj_callback(void *key, void *value, void *user_data)
         (WASMModuleInstanceCommon *)user_data;
 
     if (node->module_inst == module_inst) {
-        bh_hash_map_remove(externref_map, key, NULL, NULL);
-        wasm_runtime_free(value);
+        delete_externref(key, node);
     }
 }
 

+ 13 - 1
core/iwasm/common/wasm_runtime_common.h

@@ -498,7 +498,8 @@ wasm_runtime_unload(WASMModuleCommon *module);
 
 /* Internal API */
 WASMModuleInstanceCommon *
-wasm_runtime_instantiate_internal(WASMModuleCommon *module, bool is_sub_inst,
+wasm_runtime_instantiate_internal(WASMModuleCommon *module,
+                                  WASMModuleInstanceCommon *parent,
                                   WASMExecEnv *exec_env_main, uint32 stack_size,
                                   uint32 heap_size, char *error_buf,
                                   uint32 error_buf_size);
@@ -593,6 +594,17 @@ wasm_runtime_set_user_data(WASMExecEnv *exec_env, void *user_data);
 WASM_RUNTIME_API_EXTERN void *
 wasm_runtime_get_user_data(WASMExecEnv *exec_env);
 
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+/* See wasm_export.h for description */
+WASM_RUNTIME_API_EXTERN void
+wasm_runtime_set_bounds_checks(WASMModuleInstanceCommon *module_inst,
+                               bool enable);
+
+/* See wasm_export.h for description */
+WASM_RUNTIME_API_EXTERN bool
+wasm_runtime_is_bounds_checks_enabled(WASMModuleInstanceCommon *module_inst);
+#endif
+
 #ifdef OS_ENABLE_HW_BOUND_CHECK
 /* Access exception check guard page to trigger the signal handler */
 void

+ 86 - 115
core/iwasm/common/wasm_shared_memory.c

@@ -9,9 +9,16 @@
 #include "../libraries/thread-mgr/thread_manager.h"
 #endif
 
-static bh_list shared_memory_list_head;
-static bh_list *const shared_memory_list = &shared_memory_list_head;
-static korp_mutex shared_memory_list_lock;
+/*
+ * Note: this lock can be per memory.
+ *
+ * For now, just use a global because:
+ * - it's a bit cumbersome to extend WASMMemoryInstance w/o breaking
+ *   the AOT ABI.
+ * - If you care performance, it's better to make the interpreters
+ *   use atomic ops.
+ */
+static korp_mutex _shared_memory_lock;
 
 /* clang-format off */
 enum {
@@ -37,7 +44,7 @@ typedef struct AtomicWaitNode {
 static HashMap *wait_map;
 
 static uint32
-wait_address_hash(void *address);
+wait_address_hash(const void *address);
 
 static bool
 wait_address_equal(void *h1, void *h2);
@@ -48,17 +55,15 @@ destroy_wait_info(void *wait_info);
 bool
 wasm_shared_memory_init()
 {
-    if (os_mutex_init(&shared_memory_list_lock) != 0)
+    if (os_mutex_init(&_shared_memory_lock) != 0)
         return false;
-
     /* wait map not exists, create new map */
     if (!(wait_map = bh_hash_map_create(32, true, (HashFunc)wait_address_hash,
                                         (KeyEqualFunc)wait_address_equal, NULL,
                                         destroy_wait_info))) {
-        os_mutex_destroy(&shared_memory_list_lock);
+        os_mutex_destroy(&_shared_memory_lock);
         return false;
     }
-
     return true;
 }
 
@@ -66,115 +71,84 @@ void
 wasm_shared_memory_destroy()
 {
     bh_hash_map_destroy(wait_map);
-    os_mutex_destroy(&shared_memory_list_lock);
+    os_mutex_destroy(&_shared_memory_lock);
 }
 
-static WASMSharedMemNode *
-search_module(WASMModuleCommon *module)
+uint32
+shared_memory_inc_reference(WASMMemoryInstance *memory)
 {
-    WASMSharedMemNode *node;
-
-    os_mutex_lock(&shared_memory_list_lock);
-    node = bh_list_first_elem(shared_memory_list);
-
-    while (node) {
-        if (module == node->module) {
-            os_mutex_unlock(&shared_memory_list_lock);
-            return node;
-        }
-        node = bh_list_elem_next(node);
-    }
-
-    os_mutex_unlock(&shared_memory_list_lock);
-    return NULL;
+    bh_assert(shared_memory_is_shared(memory));
+    uint32 old;
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_lock(&_shared_memory_lock);
+#endif
+    old = BH_ATOMIC_32_FETCH_ADD(memory->ref_count, 1);
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_unlock(&_shared_memory_lock);
+#endif
+    bh_assert(old >= 1);
+    bh_assert(old < UINT32_MAX);
+    return old + 1;
 }
 
-WASMSharedMemNode *
-wasm_module_get_shared_memory(WASMModuleCommon *module)
+uint32
+shared_memory_dec_reference(WASMMemoryInstance *memory)
 {
-    return search_module(module);
+    bh_assert(shared_memory_is_shared(memory));
+    uint32 old;
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_lock(&_shared_memory_lock);
+#endif
+    old = BH_ATOMIC_32_FETCH_SUB(memory->ref_count, 1);
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_unlock(&_shared_memory_lock);
+#endif
+    bh_assert(old > 0);
+    return old - 1;
 }
 
-int32
-shared_memory_inc_reference(WASMModuleCommon *module)
+bool
+shared_memory_is_shared(WASMMemoryInstance *memory)
 {
-    WASMSharedMemNode *node = search_module(module);
-    uint32 ref_count = -1;
-    if (node) {
-        os_mutex_lock(&node->lock);
-        ref_count = ++node->ref_count;
-        os_mutex_unlock(&node->lock);
-    }
-    return ref_count;
+    uint32 old;
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_lock(&_shared_memory_lock);
+#endif
+    old = BH_ATOMIC_32_LOAD(memory->ref_count);
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_unlock(&_shared_memory_lock);
+#endif
+    return old > 0;
 }
 
-int32
-shared_memory_dec_reference(WASMModuleCommon *module)
+static korp_mutex *
+shared_memory_get_lock_pointer(WASMMemoryInstance *memory)
 {
-    WASMSharedMemNode *node = search_module(module);
-    uint32 ref_count = 0;
-    if (node) {
-        os_mutex_lock(&node->lock);
-        ref_count = --node->ref_count;
-        os_mutex_unlock(&node->lock);
-        if (ref_count == 0) {
-            os_mutex_lock(&shared_memory_list_lock);
-            bh_list_remove(shared_memory_list, node);
-            os_mutex_unlock(&shared_memory_list_lock);
-
-            os_mutex_destroy(&node->shared_mem_lock);
-            os_mutex_destroy(&node->lock);
-            wasm_runtime_free(node);
-        }
-        return ref_count;
-    }
-
-    return -1;
+    bh_assert(memory != NULL);
+    return &_shared_memory_lock;
 }
 
-WASMMemoryInstanceCommon *
-shared_memory_get_memory_inst(WASMSharedMemNode *node)
+void
+shared_memory_lock(WASMMemoryInstance *memory)
 {
-    return node->memory_inst;
+    /*
+     * Note: exception logic is currently abusing this lock.
+     * cf. https://github.com/bytecodealliance/wasm-micro-runtime/issues/2407
+     */
+    bh_assert(memory != NULL);
+    os_mutex_lock(&_shared_memory_lock);
 }
 
-WASMSharedMemNode *
-shared_memory_set_memory_inst(WASMModuleCommon *module,
-                              WASMMemoryInstanceCommon *memory)
+void
+shared_memory_unlock(WASMMemoryInstance *memory)
 {
-    WASMSharedMemNode *node;
-    bh_list_status ret;
-
-    if (!(node = wasm_runtime_malloc(sizeof(WASMSharedMemNode))))
-        return NULL;
-
-    node->module = module;
-    node->memory_inst = memory;
-    node->ref_count = 1;
-
-    if (os_mutex_init(&node->shared_mem_lock) != 0) {
-        wasm_runtime_free(node);
-        return NULL;
-    }
-
-    if (os_mutex_init(&node->lock) != 0) {
-        os_mutex_destroy(&node->shared_mem_lock);
-        wasm_runtime_free(node);
-        return NULL;
-    }
-
-    os_mutex_lock(&shared_memory_list_lock);
-    ret = bh_list_insert(shared_memory_list, node);
-    bh_assert(ret == BH_LIST_SUCCESS);
-    os_mutex_unlock(&shared_memory_list_lock);
-
-    (void)ret;
-    return node;
+    bh_assert(memory != NULL);
+    os_mutex_unlock(&_shared_memory_lock);
 }
 
 /* Atomics wait && notify APIs */
 static uint32
-wait_address_hash(void *address)
+wait_address_hash(const void *address)
 {
     return (uint32)(uintptr_t)address;
 }
@@ -307,7 +281,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module;
     AtomicWaitInfo *wait_info;
     AtomicWaitNode *wait_node;
-    WASMSharedMemNode *node;
+    korp_mutex *lock;
 #if WASM_ENABLE_THREAD_MGR != 0
     WASMExecEnv *exec_env;
 #endif
@@ -322,7 +296,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     }
 
     /* Currently we have only one memory instance */
-    if (!module_inst->memories[0]->is_shared) {
+    if (!shared_memory_is_shared(module_inst->memories[0])) {
         wasm_runtime_set_exception(module, "expected shared memory");
         return -1;
     }
@@ -340,30 +314,29 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     bh_assert(exec_env);
 #endif
 
-    node = search_module((WASMModuleCommon *)module_inst->module);
-    bh_assert(node);
+    lock = shared_memory_get_lock_pointer(module_inst->memories[0]);
 
     /* Lock the shared_mem_lock for the whole atomic wait process,
        and use it to os_cond_reltimedwait */
-    os_mutex_lock(&node->shared_mem_lock);
+    os_mutex_lock(lock);
 
     no_wait = (!wait64 && *(uint32 *)address != (uint32)expect)
               || (wait64 && *(uint64 *)address != expect);
 
     if (no_wait) {
-        os_mutex_unlock(&node->shared_mem_lock);
+        os_mutex_unlock(lock);
         return 1;
     }
 
     if (!(wait_node = wasm_runtime_malloc(sizeof(AtomicWaitNode)))) {
-        os_mutex_unlock(&node->shared_mem_lock);
+        os_mutex_unlock(lock);
         wasm_runtime_set_exception(module, "failed to create wait node");
         return -1;
     }
     memset(wait_node, 0, sizeof(AtomicWaitNode));
 
     if (0 != os_cond_init(&wait_node->wait_cond)) {
-        os_mutex_unlock(&node->shared_mem_lock);
+        os_mutex_unlock(lock);
         wasm_runtime_free(wait_node);
         wasm_runtime_set_exception(module, "failed to init wait cond");
         return -1;
@@ -375,7 +348,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     wait_info = acquire_wait_info(address, wait_node);
 
     if (!wait_info) {
-        os_mutex_unlock(&node->shared_mem_lock);
+        os_mutex_unlock(lock);
         os_cond_destroy(&wait_node->wait_cond);
         wasm_runtime_free(wait_node);
         wasm_runtime_set_exception(module, "failed to acquire wait_info");
@@ -390,7 +363,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
         if (timeout < 0) {
             /* wait forever until it is notified or terminatied
                here we keep waiting and checking every second */
-            os_cond_reltimedwait(&wait_node->wait_cond, &node->shared_mem_lock,
+            os_cond_reltimedwait(&wait_node->wait_cond, lock,
                                  (uint64)timeout_1sec);
             if (wait_node->status == S_NOTIFIED /* notified by atomic.notify */
 #if WASM_ENABLE_THREAD_MGR != 0
@@ -404,8 +377,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
         else {
             timeout_wait =
                 timeout_left < timeout_1sec ? timeout_left : timeout_1sec;
-            os_cond_reltimedwait(&wait_node->wait_cond, &node->shared_mem_lock,
-                                 timeout_wait);
+            os_cond_reltimedwait(&wait_node->wait_cond, lock, timeout_wait);
             if (wait_node->status == S_NOTIFIED /* notified by atomic.notify */
                 || timeout_left <= timeout_wait /* time out */
 #if WASM_ENABLE_THREAD_MGR != 0
@@ -433,7 +405,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     /* Release wait info if no wait nodes are attached */
     map_try_release_wait_info(wait_map, wait_info, address);
 
-    os_mutex_unlock(&node->shared_mem_lock);
+    os_mutex_unlock(lock);
 
     return is_timeout ? 2 : 0;
 }
@@ -445,7 +417,7 @@ wasm_runtime_atomic_notify(WASMModuleInstanceCommon *module, void *address,
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module;
     uint32 notify_result;
     AtomicWaitInfo *wait_info;
-    WASMSharedMemNode *node;
+    korp_mutex *lock;
     bool out_of_bounds;
 
     bh_assert(module->module_type == Wasm_Module_Bytecode
@@ -461,31 +433,30 @@ wasm_runtime_atomic_notify(WASMModuleInstanceCommon *module, void *address,
     }
 
     /* Currently we have only one memory instance */
-    if (!module_inst->memories[0]->is_shared) {
+    if (!shared_memory_is_shared(module_inst->memories[0])) {
         /* Always return 0 for ushared linear memory since there is
            no way to create a waiter on it */
         return 0;
     }
 
-    node = search_module((WASMModuleCommon *)module_inst->module);
-    bh_assert(node);
+    lock = shared_memory_get_lock_pointer(module_inst->memories[0]);
 
     /* Lock the shared_mem_lock for the whole atomic notify process,
        and use it to os_cond_signal */
-    os_mutex_lock(&node->shared_mem_lock);
+    os_mutex_lock(lock);
 
     wait_info = acquire_wait_info(address, NULL);
 
     /* Nobody wait on this address */
     if (!wait_info) {
-        os_mutex_unlock(&node->shared_mem_lock);
+        os_mutex_unlock(lock);
         return 0;
     }
 
     /* Notify each wait node in the wait list */
     notify_result = notify_wait_list(wait_info->wait_list, count);
 
-    os_mutex_unlock(&node->shared_mem_lock);
+    os_mutex_unlock(lock);
 
     return notify_result;
 }

+ 12 - 32
core/iwasm/common/wasm_shared_memory.h

@@ -7,53 +7,33 @@
 #define _WASM_SHARED_MEMORY_H
 
 #include "bh_common.h"
-#if WASM_ENABLE_INTERP != 0
-#include "wasm_runtime.h"
-#endif
-#if WASM_ENABLE_AOT != 0
-#include "aot_runtime.h"
-#endif
+#include "../interpreter/wasm_runtime.h"
+#include "wasm_runtime_common.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-typedef struct WASMSharedMemNode {
-    bh_list_link l;
-    /* Lock */
-    korp_mutex lock;
-    /* The module reference */
-    WASMModuleCommon *module;
-    /* The memory information */
-    WASMMemoryInstanceCommon *memory_inst;
-    /* Lock used for atomic operations */
-    korp_mutex shared_mem_lock;
-
-    /* reference count */
-    uint32 ref_count;
-} WASMSharedMemNode;
-
 bool
 wasm_shared_memory_init();
 
 void
 wasm_shared_memory_destroy();
 
-WASMSharedMemNode *
-wasm_module_get_shared_memory(WASMModuleCommon *module);
+uint32
+shared_memory_inc_reference(WASMMemoryInstance *memory);
 
-int32
-shared_memory_inc_reference(WASMModuleCommon *module);
+uint32
+shared_memory_dec_reference(WASMMemoryInstance *memory);
 
-int32
-shared_memory_dec_reference(WASMModuleCommon *module);
+bool
+shared_memory_is_shared(WASMMemoryInstance *memory);
 
-WASMMemoryInstanceCommon *
-shared_memory_get_memory_inst(WASMSharedMemNode *node);
+void
+shared_memory_lock(WASMMemoryInstance *memory);
 
-WASMSharedMemNode *
-shared_memory_set_memory_inst(WASMModuleCommon *module,
-                              WASMMemoryInstanceCommon *memory);
+void
+shared_memory_unlock(WASMMemoryInstance *memory);
 
 uint32
 wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,

+ 48 - 0
core/iwasm/common/wasm_suspend_flags.h

@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2023 Amazon Inc.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _WASM_SUSPEND_FLAGS_H
+#define _WASM_SUSPEND_FLAGS_H
+
+#include "bh_atomic.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Need to terminate */
+#define WASM_SUSPEND_FLAG_TERMINATE 0x1
+/* Need to suspend */
+#define WASM_SUSPEND_FLAG_SUSPEND 0x2
+/* Need to go into breakpoint */
+#define WASM_SUSPEND_FLAG_BREAKPOINT 0x4
+/* Return from pthread_exit */
+#define WASM_SUSPEND_FLAG_EXIT 0x8
+
+typedef union WASMSuspendFlags {
+    bh_atomic_32_t flags;
+    uintptr_t __padding__;
+} WASMSuspendFlags;
+
+#define WASM_SUSPEND_FLAGS_IS_ATOMIC BH_ATOMIC_32_IS_ATOMIC
+#define WASM_SUSPEND_FLAGS_GET(s_flags) BH_ATOMIC_32_LOAD(s_flags.flags)
+#define WASM_SUSPEND_FLAGS_FETCH_OR(s_flags, val) \
+    BH_ATOMIC_32_FETCH_OR(s_flags.flags, val)
+#define WASM_SUSPEND_FLAGS_FETCH_AND(s_flags, val) \
+    BH_ATOMIC_32_FETCH_AND(s_flags.flags, val)
+
+#if WASM_SUSPEND_FLAGS_IS_ATOMIC != 0
+#define WASM_SUSPEND_FLAGS_LOCK(lock) (void)0
+#define WASM_SUSPEND_FLAGS_UNLOCK(lock) (void)0
+#else /* else of WASM_SUSPEND_FLAGS_IS_ATOMIC */
+#define WASM_SUSPEND_FLAGS_LOCK(lock) os_mutex_lock(&lock)
+#define WASM_SUSPEND_FLAGS_UNLOCK(lock) os_mutex_unlock(&lock);
+#endif /* WASM_SUSPEND_FLAGS_IS_ATOMIC */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of _WASM_SUSPEND_FLAGS_H */

+ 2 - 2
core/iwasm/compilation/aot.h

@@ -43,7 +43,7 @@ typedef WASMType AOTFuncType;
 typedef WASMExport AOTExport;
 
 #if WASM_ENABLE_DEBUG_AOT != 0
-typedef void *dwar_extractor_handle_t;
+typedef void *dwarf_extractor_handle_t;
 #endif
 
 typedef enum AOTIntCond {
@@ -285,7 +285,7 @@ typedef struct AOTCompData {
 
     WASMModule *wasm_module;
 #if WASM_ENABLE_DEBUG_AOT != 0
-    dwar_extractor_handle_t extractor;
+    dwarf_extractor_handle_t extractor;
 #endif
 } AOTCompData;
 

+ 28 - 70
core/iwasm/compilation/aot_compiler.c

@@ -2617,64 +2617,6 @@ verify_module(AOTCompContext *comp_ctx)
     return true;
 }
 
-/* Check whether the target supports hardware atomic instructions */
-static bool
-aot_require_lower_atomic_pass(AOTCompContext *comp_ctx)
-{
-    bool ret = false;
-    if (!strncmp(comp_ctx->target_arch, "riscv", 5)) {
-        char *feature =
-            LLVMGetTargetMachineFeatureString(comp_ctx->target_machine);
-
-        if (feature) {
-            if (!strstr(feature, "+a")) {
-                ret = true;
-            }
-            LLVMDisposeMessage(feature);
-        }
-    }
-    return ret;
-}
-
-/* Check whether the target needs to expand switch to if/else */
-static bool
-aot_require_lower_switch_pass(AOTCompContext *comp_ctx)
-{
-    bool ret = false;
-
-    /* IR switch/case will cause .rodata relocation on riscv/xtensa */
-    if (!strncmp(comp_ctx->target_arch, "riscv", 5)
-        || !strncmp(comp_ctx->target_arch, "xtensa", 6)) {
-        ret = true;
-    }
-
-    return ret;
-}
-
-static bool
-apply_passes_for_indirect_mode(AOTCompContext *comp_ctx)
-{
-    LLVMPassManagerRef common_pass_mgr;
-
-    if (!(common_pass_mgr = LLVMCreatePassManager())) {
-        aot_set_last_error("create pass manager failed");
-        return false;
-    }
-
-    aot_add_expand_memory_op_pass(common_pass_mgr);
-
-    if (aot_require_lower_atomic_pass(comp_ctx))
-        LLVMAddLowerAtomicPass(common_pass_mgr);
-
-    if (aot_require_lower_switch_pass(comp_ctx))
-        LLVMAddLowerSwitchPass(common_pass_mgr);
-
-    LLVMRunPassManager(common_pass_mgr, comp_ctx->module);
-
-    LLVMDisposePassManager(common_pass_mgr);
-    return true;
-}
-
 bool
 aot_compile_wasm(AOTCompContext *comp_ctx)
 {
@@ -2714,17 +2656,6 @@ aot_compile_wasm(AOTCompContext *comp_ctx)
            possible core dump. */
         bh_print_time("Begin to run llvm optimization passes");
         aot_apply_llvm_new_pass_manager(comp_ctx, comp_ctx->module);
-
-        /* Run specific passes for AOT indirect mode in last since general
-           optimization may create some intrinsic function calls like
-           llvm.memset, so let's remove these function calls here. */
-        if (!comp_ctx->is_jit_mode && comp_ctx->is_indirect_mode) {
-            bh_print_time("Begin to run optimization passes "
-                          "for indirect mode");
-            if (!apply_passes_for_indirect_mode(comp_ctx)) {
-                return false;
-            }
-        }
         bh_print_time("Finish llvm optimization passes");
     }
 
@@ -2765,7 +2696,7 @@ aot_compile_wasm(AOTCompContext *comp_ctx)
         if (comp_ctx->stack_sizes != NULL) {
             LLVMOrcJITTargetAddress addr;
             if ((err = LLVMOrcLLLazyJITLookup(comp_ctx->orc_jit, &addr,
-                                              aot_stack_sizes_name))) {
+                                              aot_stack_sizes_alias_name))) {
                 aot_handle_llvm_errmsg("failed to look up stack_sizes", err);
                 return false;
             }
@@ -2804,6 +2735,33 @@ aot_generate_tempfile_name(const char *prefix, const char *extension,
     snprintf(buffer + name_len, len - name_len, ".%s", extension);
     return buffer;
 }
+#else
+
+errno_t
+_mktemp_s(char *nameTemplate, size_t sizeInChars);
+
+char *
+aot_generate_tempfile_name(const char *prefix, const char *extension,
+                           char *buffer, uint32 len)
+{
+    int name_len;
+
+    name_len = snprintf(buffer, len, "%s-XXXXXX", prefix);
+
+    if (_mktemp_s(buffer, name_len + 1) != 0) {
+        return NULL;
+    }
+
+    /* Check if buffer length is enough */
+    /* name_len + '.' + extension + '\0' */
+    if (name_len + 1 + strlen(extension) + 1 > len) {
+        aot_set_last_error("temp file name too long.");
+        return NULL;
+    }
+
+    snprintf(buffer + name_len, len - name_len, ".%s", extension);
+    return buffer;
+}
 #endif /* end of !(defined(_WIN32) || defined(_WIN32_)) */
 
 bool

+ 176 - 21
core/iwasm/compilation/aot_emit_aot_file.c

@@ -93,7 +93,10 @@ check_utf8_str(const uint8 *str, uint32 len)
 /* Internal function in object file */
 typedef struct AOTObjectFunc {
     char *func_name;
+    /* text offset of aot_func#n */
     uint64 text_offset;
+    /* text offset of aot_func_internal#n */
+    uint64 text_offset_of_aot_func_internal;
 } AOTObjectFunc;
 
 /* Symbol table list node */
@@ -637,13 +640,33 @@ get_relocation_size(AOTRelocation *relocation, bool is_32bin)
 }
 
 static uint32
-get_relocations_size(AOTRelocation *relocations, uint32 relocation_count,
+get_relocations_size(AOTObjectData *obj_data,
+                     AOTRelocationGroup *relocation_group,
+                     AOTRelocation *relocations, uint32 relocation_count,
                      bool is_32bin)
 {
     AOTRelocation *relocation = relocations;
     uint32 size = 0, i;
 
     for (i = 0; i < relocation_count; i++, relocation++) {
+        /* ignore the relocations to aot_func_internal#n in text section
+           for windows platform since they will be applied in
+           aot_emit_text_section */
+        if (!strcmp(relocation_group->section_name, ".text")
+            && !strncmp(relocation->symbol_name, AOT_FUNC_INTERNAL_PREFIX,
+                        strlen(AOT_FUNC_INTERNAL_PREFIX))
+            && ((!strncmp(obj_data->comp_ctx->target_arch, "x86_64", 6)
+                 /* Windows AOT_COFF64_BIN_TYPE */
+                 && obj_data->target_info.bin_type == 6
+                 /* IMAGE_REL_AMD64_REL32 in windows x86_64 */
+                 && relocation->relocation_type == 4)
+                || (!strncmp(obj_data->comp_ctx->target_arch, "i386", 4)
+                    /* Windows AOT_COFF32_BIN_TYPE */
+                    && obj_data->target_info.bin_type == 4
+                    /* IMAGE_REL_I386_REL32 in windows x86_32 */
+                    && relocation->relocation_type == 20))) {
+            continue;
+        }
         size = align_uint(size, 4);
         size += get_relocation_size(relocation, is_32bin);
     }
@@ -651,19 +674,22 @@ get_relocations_size(AOTRelocation *relocations, uint32 relocation_count,
 }
 
 static uint32
-get_relocation_group_size(AOTRelocationGroup *relocation_group, bool is_32bin)
+get_relocation_group_size(AOTObjectData *obj_data,
+                          AOTRelocationGroup *relocation_group, bool is_32bin)
 {
     uint32 size = 0;
     /* section name index + relocation count + relocations */
     size += (uint32)sizeof(uint32);
     size += (uint32)sizeof(uint32);
-    size += get_relocations_size(relocation_group->relocations,
+    size += get_relocations_size(obj_data, relocation_group,
+                                 relocation_group->relocations,
                                  relocation_group->relocation_count, is_32bin);
     return size;
 }
 
 static uint32
-get_relocation_groups_size(AOTRelocationGroup *relocation_groups,
+get_relocation_groups_size(AOTObjectData *obj_data,
+                           AOTRelocationGroup *relocation_groups,
                            uint32 relocation_group_count, bool is_32bin)
 {
     AOTRelocationGroup *relocation_group = relocation_groups;
@@ -671,7 +697,7 @@ get_relocation_groups_size(AOTRelocationGroup *relocation_groups,
 
     for (i = 0; i < relocation_group_count; i++, relocation_group++) {
         size = align_uint(size, 4);
-        size += get_relocation_group_size(relocation_group, is_32bin);
+        size += get_relocation_group_size(obj_data, relocation_group, is_32bin);
     }
     return size;
 }
@@ -864,7 +890,7 @@ get_relocation_section_size(AOTCompContext *comp_ctx, AOTObjectData *obj_data)
 
     /* relocation group count + symbol_table + relocation groups */
     return (uint32)sizeof(uint32) + symbol_table_size
-           + get_relocation_groups_size(relocation_groups,
+           + get_relocation_groups_size(obj_data, relocation_groups,
                                         relocation_group_count,
                                         is_32bit_binary(obj_data));
 }
@@ -1734,6 +1760,10 @@ aot_emit_text_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
     uint32 section_size = get_text_section_size(obj_data);
     uint32 offset = *p_offset;
     uint8 placeholder = 0;
+    AOTRelocationGroup *relocation_group;
+    AOTRelocation *relocation;
+    uint32 i, j, relocation_count;
+    uint8 *text;
 
     *p_offset = offset = align_uint(offset, 4);
 
@@ -1747,6 +1777,8 @@ aot_emit_text_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
             EMIT_BUF(&placeholder, 1);
     }
 
+    text = buf + offset;
+
     if (obj_data->text_size > 0) {
         EMIT_BUF(obj_data->text, obj_data->text_size);
         while (offset & 3)
@@ -1768,6 +1800,67 @@ aot_emit_text_section(uint8 *buf, uint8 *buf_end, uint32 *p_offset,
         return false;
     }
 
+    /* apply relocations to aot_func_internal#n in text section for
+       windows platform */
+    if ((!strncmp(obj_data->comp_ctx->target_arch, "x86_64", 6)
+         /* Windows AOT_COFF64_BIN_TYPE */
+         && obj_data->target_info.bin_type == 6)
+        || (!strncmp(obj_data->comp_ctx->target_arch, "i386", 4)
+            /* Windows AOT_COFF32_BIN_TYPE */
+            && obj_data->target_info.bin_type == 4)) {
+        relocation_group = obj_data->relocation_groups;
+        for (i = 0; i < obj_data->relocation_group_count;
+             i++, relocation_group++) {
+            /* relocation in text section */
+            if (!strcmp(relocation_group->section_name, ".text")) {
+                relocation = relocation_group->relocations;
+                relocation_count = relocation_group->relocation_count;
+                for (j = 0; j < relocation_count; j++) {
+                    /* relocation to aot_func_internal#n */
+                    if (str_starts_with(relocation->symbol_name,
+                                        AOT_FUNC_INTERNAL_PREFIX)
+                        && ((obj_data->target_info.bin_type
+                                 == 6 /* AOT_COFF64_BIN_TYPE */
+                             && relocation->relocation_type
+                                    == 4 /* IMAGE_REL_AMD64_REL32 */)
+                            || (obj_data->target_info.bin_type
+                                    == 4 /* AOT_COFF32_BIN_TYPE */
+                                && relocation->relocation_type
+                                       == 20 /* IMAGE_REL_I386_REL32 */))) {
+                        uint32 func_idx =
+                            atoi(relocation->symbol_name
+                                 + strlen(AOT_FUNC_INTERNAL_PREFIX));
+                        uint64 text_offset, reloc_offset, reloc_addend;
+
+                        bh_assert(func_idx < obj_data->func_count);
+
+                        text_offset = obj_data->funcs[func_idx]
+                                          .text_offset_of_aot_func_internal;
+                        reloc_offset = relocation->relocation_offset;
+                        reloc_addend = relocation->relocation_addend;
+                        /* S + A - P */
+                        *(uint32 *)(text + reloc_offset) =
+                            (uint32)(text_offset + reloc_addend - reloc_offset
+                                     - 4);
+
+                        /* remove current relocation as it has been applied */
+                        if (j < relocation_count - 1) {
+                            uint32 move_size =
+                                (uint32)(sizeof(AOTRelocation)
+                                         * (relocation_count - 1 - j));
+                            bh_memmove_s(relocation, move_size, relocation + 1,
+                                         move_size);
+                        }
+                        relocation_group->relocation_count--;
+                    }
+                    else {
+                        relocation++;
+                    }
+                }
+            }
+        }
+    }
+
     *p_offset = offset;
 
     return true;
@@ -2403,7 +2496,7 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data)
                     && !strcmp(name, "__llvm_prf_cnts")) {
                     snprintf(buf, sizeof(buf), "%s%u", name,
                              llvm_prf_cnts_idx++);
-                    size = strlen(buf) + 1;
+                    size = (uint32)(strlen(buf) + 1);
                     if (!(data_section->name = wasm_runtime_malloc(size))) {
                         aot_set_last_error(
                             "allocate memory for data section name failed.");
@@ -2416,7 +2509,7 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data)
                          && !strcmp(name, "__llvm_prf_data")) {
                     snprintf(buf, sizeof(buf), "%s%u", name,
                              llvm_prf_data_idx++);
-                    size = strlen(buf) + 1;
+                    size = (uint32)(strlen(buf) + 1);
                     if (!(data_section->name = wasm_runtime_malloc(size))) {
                         aot_set_last_error(
                             "allocate memory for data section name failed.");
@@ -2520,15 +2613,15 @@ read_stack_usage_file(const AOTCompContext *comp_ctx, const char *filename,
         }
         if (prefix == aot_func_prefix) {
             if (sz < precheck_stack_size_min) {
-                precheck_stack_size_min = sz;
+                precheck_stack_size_min = (uint32)sz;
             }
             if (sz > precheck_stack_size_max) {
-                precheck_stack_size_max = sz;
+                precheck_stack_size_max = (uint32)sz;
             }
             precheck_found++;
             continue;
         }
-        sizes[func_idx] = sz;
+        sizes[func_idx] = (uint32)sz;
         found++;
     }
     fclose(fp);
@@ -2605,9 +2698,16 @@ aot_resolve_stack_sizes(AOTCompContext *comp_ctx, AOTObjectData *obj_data)
 
     while (!LLVMObjectFileIsSymbolIteratorAtEnd(obj_data->binary, sym_itr)) {
         if ((name = LLVMGetSymbolName(sym_itr))
-            && !strcmp(name, aot_stack_sizes_alias_name)) {
+            && (!strcmp(name, aot_stack_sizes_alias_name)
+                /* symbol of COFF32 starts with "_" */
+                || (obj_data->target_info.bin_type == AOT_COFF32_BIN_TYPE
+                    && !strncmp(name, "_", 1)
+                    && !strcmp(name + 1, aot_stack_sizes_alias_name)))) {
             uint64 sz = LLVMGetSymbolSize(sym_itr);
-            if (sz != sizeof(uint32) * obj_data->func_count) {
+            if (sz != sizeof(uint32) * obj_data->func_count
+                /* sz of COFF64/COFF32 is 0, ignore the check */
+                && obj_data->target_info.bin_type != AOT_COFF64_BIN_TYPE
+                && obj_data->target_info.bin_type != AOT_COFF32_BIN_TYPE) {
                 aot_set_last_error("stack_sizes had unexpected size.");
                 goto fail;
             }
@@ -2642,16 +2742,12 @@ aot_resolve_stack_sizes(AOTCompContext *comp_ctx, AOTObjectData *obj_data)
                     goto fail;
                 }
             }
-            if (addr > UINT32_MAX) {
-                aot_set_last_error("too large stack_sizes offset.");
-                goto fail;
-            }
             /*
              * Record section/offset and construct a copy of stack_sizes.
              * aot_emit_object_data_section_info will emit this copy.
              */
             obj_data->stack_sizes_section_name = sec_name;
-            obj_data->stack_sizes_offset = addr;
+            obj_data->stack_sizes_offset = (uint32)addr;
             obj_data->stack_sizes = wasm_runtime_malloc(
                 obj_data->func_count * sizeof(*obj_data->stack_sizes));
             if (obj_data->stack_sizes == NULL) {
@@ -2770,6 +2866,7 @@ aot_resolve_functions(AOTCompContext *comp_ctx, AOTObjectData *obj_data)
     while (!LLVMObjectFileIsSymbolIteratorAtEnd(obj_data->binary, sym_itr)) {
         if ((name = (char *)LLVMGetSymbolName(sym_itr))
             && str_starts_with(name, prefix)) {
+            /* symbol aot_func#n */
             func_index = (uint32)atoi(name + strlen(prefix));
             if (func_index < obj_data->func_count) {
                 LLVMSectionIteratorRef contain_section;
@@ -2804,6 +2901,44 @@ aot_resolve_functions(AOTCompContext *comp_ctx, AOTObjectData *obj_data)
                 }
             }
         }
+        else if ((name = (char *)LLVMGetSymbolName(sym_itr))
+                 && str_starts_with(name, AOT_FUNC_INTERNAL_PREFIX)) {
+            /* symbol aot_func_internal#n */
+            func_index = (uint32)atoi(name + strlen(AOT_FUNC_INTERNAL_PREFIX));
+            if (func_index < obj_data->func_count) {
+                LLVMSectionIteratorRef contain_section;
+                char *contain_section_name;
+
+                func = obj_data->funcs + func_index;
+
+                if (!(contain_section = LLVMObjectFileCopySectionIterator(
+                          obj_data->binary))) {
+                    aot_set_last_error("llvm get section iterator failed.");
+                    LLVMDisposeSymbolIterator(sym_itr);
+                    return false;
+                }
+                LLVMMoveToContainingSection(contain_section, sym_itr);
+                contain_section_name =
+                    (char *)LLVMGetSectionName(contain_section);
+                LLVMDisposeSectionIterator(contain_section);
+
+                if (!strcmp(contain_section_name, ".text.unlikely.")) {
+                    func->text_offset_of_aot_func_internal =
+                        align_uint(obj_data->text_size, 4)
+                        + LLVMGetSymbolAddress(sym_itr);
+                }
+                else if (!strcmp(contain_section_name, ".text.hot.")) {
+                    func->text_offset_of_aot_func_internal =
+                        align_uint(obj_data->text_size, 4)
+                        + align_uint(obj_data->text_unlikely_size, 4)
+                        + LLVMGetSymbolAddress(sym_itr);
+                }
+                else {
+                    func->text_offset_of_aot_func_internal =
+                        LLVMGetSymbolAddress(sym_itr);
+                }
+            }
+        }
         LLVMMoveToNextSymbol(sym_itr);
     }
     LLVMDisposeSymbolIterator(sym_itr);
@@ -2975,7 +3110,7 @@ aot_resolve_object_relocation_group(AOTObjectData *obj_data,
                 || !strcmp(group->section_name, ".rel.text")) {
                 snprintf(buf, sizeof(buf), "%s%u", relocation->symbol_name,
                          prof_section_idx);
-                size = strlen(buf) + 1;
+                size = (uint32)(strlen(buf) + 1);
                 if (!(relocation->symbol_name = wasm_runtime_malloc(size))) {
                     aot_set_last_error(
                         "allocate memory for relocation symbol name failed.");
@@ -2990,7 +3125,7 @@ aot_resolve_object_relocation_group(AOTObjectData *obj_data,
                                  19)) {
                 snprintf(buf, sizeof(buf), "%s%u", relocation->symbol_name,
                          prof_section_idx);
-                size = strlen(buf) + 1;
+                size = (uint32)(strlen(buf) + 1);
                 if (!(relocation->symbol_name = wasm_runtime_malloc(size))) {
                     aot_set_last_error(
                         "allocate memory for relocation symbol name failed.");
@@ -3087,6 +3222,13 @@ is_relocation_section(AOTObjectData *obj_data, LLVMSectionIteratorRef sec_itr)
     return false;
 }
 
+static bool
+is_readonly_section(const char *name)
+{
+    return !strcmp(name, ".rel.text") || !strcmp(name, ".rela.text")
+           || !strcmp(name, ".rela.literal") || !strcmp(name, ".text");
+}
+
 static bool
 get_relocation_groups_count(AOTObjectData *obj_data, uint32 *p_count)
 {
@@ -3149,7 +3291,7 @@ aot_resolve_object_relocation_groups(AOTObjectData *obj_data)
                     || !strcmp(name, ".rel__llvm_prf_data"))) {
                 char buf[32];
                 snprintf(buf, sizeof(buf), "%s%u", name, llvm_prf_data_idx);
-                size = strlen(buf) + 1;
+                size = (uint32)(strlen(buf) + 1);
                 if (!(relocation_group->section_name =
                           wasm_runtime_malloc(size))) {
                     aot_set_last_error(
@@ -3184,6 +3326,19 @@ aot_resolve_object_relocation_groups(AOTObjectData *obj_data)
                 relocation_group->section_name = ".rel.text";
             }
 
+            /*
+             * Relocations in read-only sections are problematic,
+             * especially for XIP on platforms which don't have
+             * copy-on-write mappings.
+             */
+            if (obj_data->comp_ctx->is_indirect_mode
+                && is_readonly_section(relocation_group->section_name)) {
+                LOG_WARNING("%" PRIu32
+                            " text relocations in %s section for indirect mode",
+                            relocation_group->relocation_count,
+                            relocation_group->section_name);
+            }
+
             relocation_group++;
         }
         LLVMMoveToNextSection(sec_itr);

+ 17 - 5
core/iwasm/compilation/aot_emit_function.c

@@ -18,6 +18,17 @@
         }                                                                     \
     } while (0)
 
+static bool
+is_win_platform(AOTCompContext *comp_ctx)
+{
+    char *triple = LLVMGetTargetMachineTriple(comp_ctx->target_machine);
+
+    bh_assert(triple);
+    if (strstr(triple, "win32") || strstr(triple, "win"))
+        return true;
+    return false;
+}
+
 static bool
 create_func_return_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 {
@@ -458,7 +469,7 @@ check_app_addr_and_convert(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     }
 
     /* Check whether exception was thrown when executing the function */
-    if (comp_ctx->enable_bound_check
+    if ((comp_ctx->enable_bound_check || is_win_platform(comp_ctx))
         && !check_call_return(comp_ctx, func_ctx, res)) {
         return false;
     }
@@ -696,7 +707,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                 goto fail;
             /* Check whether there was exception thrown when executing
                the function */
-            if (comp_ctx->enable_bound_check
+            if ((comp_ctx->enable_bound_check || is_win_platform(comp_ctx))
                 && !check_call_return(comp_ctx, func_ctx, res))
                 goto fail;
         }
@@ -849,7 +860,8 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
         /* Check whether there was exception thrown when executing
            the function */
-        if (!tail_call && comp_ctx->enable_bound_check
+        if (!tail_call
+            && (comp_ctx->enable_bound_check || is_win_platform(comp_ctx))
             && !check_exception_thrown(comp_ctx, func_ctx))
             goto fail;
     }
@@ -1431,7 +1443,7 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
         goto fail;
 
     /* Check whether exception was thrown when executing the function */
-    if (comp_ctx->enable_bound_check
+    if ((comp_ctx->enable_bound_check || is_win_platform(comp_ctx))
         && !check_call_return(comp_ctx, func_ctx, res))
         goto fail;
 
@@ -1483,7 +1495,7 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
     }
 
     /* Check whether exception was thrown when executing the function */
-    if (comp_ctx->enable_bound_check
+    if ((comp_ctx->enable_bound_check || is_win_platform(comp_ctx))
         && !check_exception_thrown(comp_ctx, func_ctx))
         goto fail;
 

+ 77 - 5
core/iwasm/compilation/aot_llvm.c

@@ -7,6 +7,7 @@
 #include "aot_llvm_extra2.h"
 #include "aot_compiler.h"
 #include "aot_emit_exception.h"
+#include "aot_emit_table.h"
 #include "../aot/aot_runtime.h"
 #include "../aot/aot_intrinsic.h"
 
@@ -145,6 +146,13 @@ aot_target_precheck_can_use_musttail(const AOTCompContext *comp_ctx)
          */
         return false;
     }
+    if (!strcmp(comp_ctx->target_arch, "mips")) {
+        /*
+         * cf.
+         * https://github.com/bytecodealliance/wasm-micro-runtime/issues/2412
+         */
+        return false;
+    }
     /*
      * x86-64/i386: true
      *
@@ -230,6 +238,18 @@ aot_estimate_stack_usage_for_function_call(const AOTCompContext *comp_ctx,
     return size;
 }
 
+static uint32
+get_inst_extra_offset(AOTCompContext *comp_ctx)
+{
+    const AOTCompData *comp_data = comp_ctx->comp_data;
+    uint32 table_count = comp_data->import_table_count + comp_data->table_count;
+    uint64 offset = get_tbl_inst_offset(comp_ctx, NULL, table_count);
+    uint32 offset_32 = (uint32)offset;
+    bh_assert(offset <= UINT32_MAX);
+    offset_32 = align_uint((uint32)offset_32, 8);
+    return offset_32;
+}
+
 /*
  * a "precheck" function performs a few things before calling wrapped_func.
  *
@@ -297,8 +317,8 @@ aot_add_precheck_function(AOTCompContext *comp_ctx, LLVMModuleRef module,
         goto fail;
     }
 
-    unsigned int param_count = LLVMCountParams(precheck_func);
-    uint64 sz = param_count * sizeof(LLVMValueRef);
+    uint32 param_count = LLVMCountParams(precheck_func);
+    uint32 sz = param_count * (uint32)sizeof(LLVMValueRef);
     params = wasm_runtime_malloc(sz);
     if (params == NULL) {
         goto fail;
@@ -327,9 +347,36 @@ aot_add_precheck_function(AOTCompContext *comp_ctx, LLVMModuleRef module,
     /*
      * load the value for this wrapped function from the stack_sizes array
      */
+    LLVMValueRef stack_sizes;
+    if (comp_ctx->is_indirect_mode) {
+        uint32 offset_u32;
+        LLVMValueRef offset;
+        LLVMValueRef stack_sizes_p;
+
+        offset_u32 = get_inst_extra_offset(comp_ctx);
+        offset_u32 += offsetof(AOTModuleInstanceExtra, stack_sizes);
+        offset = I32_CONST(offset_u32);
+        if (!offset) {
+            goto fail;
+        }
+        stack_sizes_p =
+            LLVMBuildInBoundsGEP2(b, INT8_TYPE, func_ctx->aot_inst, &offset, 1,
+                                  "aot_inst_stack_sizes_p");
+        if (!stack_sizes_p) {
+            goto fail;
+        }
+        stack_sizes =
+            LLVMBuildLoad2(b, INT32_PTR_TYPE, stack_sizes_p, "stack_sizes");
+        if (!stack_sizes) {
+            goto fail;
+        }
+    }
+    else {
+        stack_sizes = comp_ctx->stack_sizes;
+    }
     LLVMValueRef func_index_const = I32_CONST(func_index);
     LLVMValueRef sizes =
-        LLVMBuildBitCast(b, comp_ctx->stack_sizes, INT32_PTR_TYPE, "sizes");
+        LLVMBuildBitCast(b, stack_sizes, INT32_PTR_TYPE, "sizes");
     if (!sizes) {
         goto fail;
     }
@@ -584,6 +631,15 @@ aot_add_llvm_func(AOTCompContext *comp_ctx, LLVMModuleRef module,
                                     prefix)))
         goto fail;
 
+    if (comp_ctx->is_indirect_mode) {
+        /* avoid LUT relocations ("switch-table") */
+        LLVMAttributeRef attr_no_jump_tables = LLVMCreateStringAttribute(
+            comp_ctx->context, "no-jump-tables",
+            (uint32)strlen("no-jump-tables"), "true", (uint32)strlen("true"));
+        LLVMAddAttributeAtIndex(func, LLVMAttributeFunctionIndex,
+                                attr_no_jump_tables);
+    }
+
     if (need_precheck) {
         if (!comp_ctx->is_jit_mode)
             LLVMSetLinkage(func, LLVMInternalLinkage);
@@ -2033,7 +2089,7 @@ jit_stack_size_callback(void *user_data, const char *name, size_t namelen,
         return;
     }
     /* ensure NUL termination */
-    bh_memcpy_s(buf, sizeof(buf), name, namelen);
+    bh_memcpy_s(buf, (uint32)sizeof(buf), name, (uint32)namelen);
     buf[namelen] = 0;
 
     ret = sscanf(buf, AOT_FUNC_INTERNAL_PREFIX "%" SCNu32, &func_idx);
@@ -2054,7 +2110,7 @@ jit_stack_size_callback(void *user_data, const char *name, size_t namelen,
 
     /* Note: -1 == AOT_NEG_ONE from aot_create_stack_sizes */
     bh_assert(comp_ctx->jit_stack_sizes[func_idx] == (uint32)-1);
-    comp_ctx->jit_stack_sizes[func_idx] = stack_size + call_size;
+    comp_ctx->jit_stack_sizes[func_idx] = (uint32)stack_size + call_size;
 }
 
 static bool
@@ -2257,6 +2313,12 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
     if (option->enable_stack_estimation)
         comp_ctx->enable_stack_estimation = true;
 
+    if (option->llvm_passes)
+        comp_ctx->llvm_passes = option->llvm_passes;
+
+    if (option->builtin_intrinsics)
+        comp_ctx->builtin_intrinsics = option->builtin_intrinsics;
+
     comp_ctx->opt_level = option->opt_level;
     comp_ctx->size_level = option->size_level;
 
@@ -2690,6 +2752,16 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
             aot_set_last_error("create LLVM target machine failed.");
             goto fail;
         }
+
+        /* If only to create target machine for querying information, early stop
+         */
+        if ((arch && !strcmp(arch, "help")) || (abi && !strcmp(abi, "help"))
+            || (cpu && !strcmp(cpu, "help"))
+            || (features && !strcmp(features, "+help"))) {
+            LOG_DEBUG(
+                "create LLVM target machine only for printing help info.");
+            goto fail;
+        }
     }
 
     triple = LLVMGetTargetMachineTriple(comp_ctx->target_machine);

+ 4 - 0
core/iwasm/compilation/aot_llvm.h

@@ -417,6 +417,8 @@ typedef struct AOTCompContext {
 
     const char *stack_usage_file;
     char stack_usage_temp_file[64];
+    const char *llvm_passes;
+    const char *builtin_intrinsics;
 } AOTCompContext;
 
 enum {
@@ -455,6 +457,8 @@ typedef struct AOTCompOption {
     char **custom_sections;
     uint32 custom_sections_count;
     const char *stack_usage_file;
+    const char *llvm_passes;
+    const char *builtin_intrinsics;
 } AOTCompOption, *aot_comp_option_t;
 
 bool

+ 49 - 95
core/iwasm/compilation/aot_llvm_extra.cpp

@@ -27,7 +27,7 @@
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/PassManager.h>
 #include <llvm/Support/CommandLine.h>
 #include <llvm/Support/ErrorHandling.h>
 #include <llvm/Target/CodeGenCWrappers.h>
@@ -73,115 +73,56 @@ LLVM_C_EXTERN_C_END
 
 ExitOnError ExitOnErr;
 
-class ExpandMemoryOpPass : public llvm::ModulePass
+class ExpandMemoryOpPass : public PassInfoMixin<ExpandMemoryOpPass>
 {
   public:
-    static char ID;
-
-    ExpandMemoryOpPass()
-      : ModulePass(ID)
-    {}
-
-    bool runOnModule(Module &M) override;
-
-    bool expandMemIntrinsicUses(Function &F);
-    StringRef getPassName() const override
-    {
-        return "Expand memory operation intrinsics";
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        AU.addRequired<TargetTransformInfoWrapperPass>();
-    }
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
-char ExpandMemoryOpPass::ID = 0;
-
-bool
-ExpandMemoryOpPass::expandMemIntrinsicUses(Function &F)
+PreservedAnalyses
+ExpandMemoryOpPass::run(Function &F, FunctionAnalysisManager &AM)
 {
-    Intrinsic::ID ID = F.getIntrinsicID();
-    bool Changed = false;
-
-    for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
-        Instruction *Inst = cast<Instruction>(*I);
-        ++I;
-
-        switch (ID) {
-            case Intrinsic::memcpy:
-            {
-                auto *Memcpy = cast<MemCpyInst>(Inst);
-                Function *ParentFunc = Memcpy->getParent()->getParent();
-                const TargetTransformInfo &TTI =
-                    getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
-                        *ParentFunc);
-                expandMemCpyAsLoop(Memcpy, TTI);
-                Changed = true;
-                Memcpy->eraseFromParent();
-                break;
+    SmallVector<MemIntrinsic *, 16> MemCalls;
+
+    /* Iterate over all instructions in the function, looking for memcpy,
+     * memmove, and memset.  When we find one, expand it into a loop. */
+
+    for (auto &BB : F) {
+        for (auto &Inst : BB) {
+            if (auto *Memcpy = dyn_cast_or_null<MemCpyInst>(&Inst)) {
+                MemCalls.push_back(Memcpy);
             }
-            case Intrinsic::memmove:
-            {
-                auto *Memmove = cast<MemMoveInst>(Inst);
-                expandMemMoveAsLoop(Memmove);
-                Changed = true;
-                Memmove->eraseFromParent();
-                break;
+            else if (auto *Memmove = dyn_cast_or_null<MemMoveInst>(&Inst)) {
+                MemCalls.push_back(Memmove);
             }
-            case Intrinsic::memset:
-            {
-                auto *Memset = cast<MemSetInst>(Inst);
-                expandMemSetAsLoop(Memset);
-                Changed = true;
-                Memset->eraseFromParent();
-                break;
+            else if (auto *Memset = dyn_cast_or_null<MemSetInst>(&Inst)) {
+                MemCalls.push_back(Memset);
             }
-            default:
-                break;
         }
     }
 
-    return Changed;
-}
-
-bool
-ExpandMemoryOpPass::runOnModule(Module &M)
-{
-    bool Changed = false;
-
-    for (Function &F : M) {
-        if (!F.isDeclaration())
-            continue;
-
-        switch (F.getIntrinsicID()) {
-            case Intrinsic::memcpy:
-            case Intrinsic::memmove:
-            case Intrinsic::memset:
-                if (expandMemIntrinsicUses(F))
-                    Changed = true;
-                break;
-
-            default:
-                break;
+    for (MemIntrinsic *MemCall : MemCalls) {
+        if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
+            Function *ParentFunc = Memcpy->getParent()->getParent();
+            const TargetTransformInfo &TTI =
+                AM.getResult<TargetIRAnalysis>(*ParentFunc);
+            expandMemCpyAsLoop(Memcpy, TTI);
+            Memcpy->eraseFromParent();
+        }
+        else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
+            expandMemMoveAsLoop(Memmove);
+            Memmove->eraseFromParent();
+        }
+        else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
+            expandMemSetAsLoop(Memset);
+            Memset->eraseFromParent();
         }
     }
 
-    return Changed;
-}
+    PreservedAnalyses PA;
+    PA.preserveSet<CFGAnalyses>();
 
-void
-aot_add_expand_memory_op_pass(LLVMPassManagerRef pass)
-{
-    reinterpret_cast<legacy::PassManager *>(pass)->add(
-        new ExpandMemoryOpPass());
-}
-
-void
-aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass)
-{
-    reinterpret_cast<legacy::PassManager *>(pass)->add(
-        createSimpleLoopUnswitchLegacyPass());
+    return PA;
 }
 
 bool
@@ -373,6 +314,10 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
 
         MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
 
+        if (comp_ctx->llvm_passes) {
+            ExitOnErr(PB.parsePassPipeline(MPM, comp_ctx->llvm_passes));
+        }
+
         if (!disable_llvm_lto) {
             /* Apply LTO for AOT mode */
             if (comp_ctx->comp_data->func_count >= 10
@@ -386,6 +331,15 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
         else {
             MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
         }
+
+        /* Run specific passes for AOT indirect mode in last since general
+            optimization may create some intrinsic function calls like
+            llvm.memset, so let's remove these function calls here. */
+        if (comp_ctx->is_indirect_mode) {
+            FunctionPassManager FPM1;
+            FPM1.addPass(ExpandMemoryOpPass());
+            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM1)));
+        }
     }
 
     MPM.run(*M, MAM);

+ 16 - 16
core/iwasm/compilation/debug/dwarf_extractor.cpp

@@ -28,25 +28,25 @@
 
 using namespace lldb;
 
-typedef struct dwar_extractor {
+typedef struct dwarf_extractor {
     SBDebugger debugger;
     SBTarget target;
     SBModule module;
 
-} dwar_extractor;
+} dwarf_extractor;
 
-#define TO_HANDLE(extractor) (dwar_extractor_handle_t)(extractor)
+#define TO_HANDLE(extractor) (dwarf_extractor_handle_t)(extractor)
 
-#define TO_EXTACTOR(handle) (dwar_extractor *)(handle)
+#define TO_EXTACTOR(handle) (dwarf_extractor *)(handle)
 
 static bool is_debugger_initialized;
 
-dwar_extractor_handle_t
+dwarf_extractor_handle_t
 create_dwarf_extractor(AOTCompData *comp_data, char *file_name)
 {
     char *arch = NULL;
     char *platform = NULL;
-    dwar_extractor *extractor = NULL;
+    dwarf_extractor *extractor = NULL;
 
     //__attribute__((constructor)) may be better?
     if (!is_debugger_initialized) {
@@ -61,7 +61,7 @@ create_dwarf_extractor(AOTCompData *comp_data, char *file_name)
     SBError error;
     SBFileSpec exe_file_spec(file_name, true);
 
-    if (!(extractor = new dwar_extractor())) {
+    if (!(extractor = new dwarf_extractor())) {
         LOG_ERROR("Create Dwarf Extractor error: failed to allocate memory");
         goto fail3;
     }
@@ -101,9 +101,9 @@ fail3:
 }
 
 void
-destroy_dwarf_extractor(dwar_extractor_handle_t handle)
+destroy_dwarf_extractor(dwarf_extractor_handle_t handle)
 {
-    dwar_extractor *extractor = TO_EXTACTOR(handle);
+    dwarf_extractor *extractor = TO_EXTACTOR(handle);
     if (!extractor)
         return;
     extractor->debugger.DeleteTarget(extractor->target);
@@ -116,7 +116,7 @@ destroy_dwarf_extractor(dwar_extractor_handle_t handle)
 LLVMMetadataRef
 dwarf_gen_file_info(const AOTCompContext *comp_ctx)
 {
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     int units_number;
     LLVMMetadataRef file_info = NULL;
     const char *file_name;
@@ -193,7 +193,7 @@ dwarf_gen_mock_vm_info(AOTCompContext *comp_ctx)
 LLVMMetadataRef
 dwarf_gen_comp_unit_info(const AOTCompContext *comp_ctx)
 {
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     int units_number;
     LLVMMetadataRef comp_unit = NULL;
 
@@ -292,7 +292,7 @@ lldb_function_to_function_dbi(const AOTCompContext *comp_ctx,
     SBTypeList function_args = function.GetType().GetFunctionArgumentTypes();
     SBType return_type = function.GetType().GetFunctionReturnType();
     const size_t num_function_args = function_args.GetSize();
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
 
     if (!(extractor = TO_EXTACTOR(comp_ctx->comp_data->extractor)))
         return NULL;
@@ -393,7 +393,7 @@ dwarf_gen_func_info(const AOTCompContext *comp_ctx,
                     const AOTFuncContext *func_ctx)
 {
     LLVMMetadataRef func_info = NULL;
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     uint64_t vm_offset;
     AOTFunc *func = func_ctx->aot_func;
 
@@ -423,7 +423,7 @@ dwarf_get_func_name(const AOTCompContext *comp_ctx,
                     const AOTFuncContext *func_ctx, char *name, int len)
 {
     LLVMMetadataRef func_info = NULL;
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     uint64_t vm_offset;
     AOTFunc *func = func_ctx->aot_func;
 
@@ -454,7 +454,7 @@ dwarf_gen_location(const AOTCompContext *comp_ctx,
                    const AOTFuncContext *func_ctx, uint64_t vm_offset)
 {
     LLVMMetadataRef location_info = NULL;
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     AOTFunc *func = func_ctx->aot_func;
 
     if (!(extractor = TO_EXTACTOR(comp_ctx->comp_data->extractor)))
@@ -493,7 +493,7 @@ dwarf_gen_func_ret_location(const AOTCompContext *comp_ctx,
                             const AOTFuncContext *func_ctx)
 {
     LLVMMetadataRef func_info = NULL;
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     uint64_t vm_offset;
     AOTFunc *func = func_ctx->aot_func;
     LLVMMetadataRef location_info = NULL;

+ 2 - 2
core/iwasm/compilation/debug/dwarf_extractor.h

@@ -18,7 +18,7 @@ typedef unsigned int LLDBLangType;
 
 struct AOTCompData;
 typedef struct AOTCompData *aot_comp_data_t;
-typedef void *dwar_extractor_handle_t;
+typedef void *dwarf_extractor_handle_t;
 
 struct AOTCompContext;
 typedef struct AOTCompContext AOTCompContext;
@@ -26,7 +26,7 @@ typedef struct AOTCompContext AOTCompContext;
 struct AOTFuncContext;
 
 typedef struct AOTFuncContext AOTFuncContext;
-dwar_extractor_handle_t
+dwarf_extractor_handle_t
 create_dwarf_extractor(aot_comp_data_t comp_data, char *file_name);
 
 LLVMMetadataRef

+ 21 - 20
core/iwasm/fast-jit/fe/jit_emit_table.c

@@ -88,27 +88,28 @@ fail:
 
 static int
 wasm_init_table(WASMModuleInstance *inst, uint32 tbl_idx, uint32 elem_idx,
-                uint32 dst, uint32 len, uint32 src)
+                uint32 dst_offset, uint32 len, uint32 src_offset)
 {
     WASMTableInstance *tbl;
     uint32 tbl_sz;
     WASMTableSeg *elem;
     uint32 elem_len;
 
-    tbl = inst->tables[tbl_idx];
-    tbl_sz = tbl->cur_size;
-    if (dst > tbl_sz || tbl_sz - dst < len)
-        goto out_of_bounds;
-
     elem = inst->module->table_segments + elem_idx;
     elem_len = elem->function_count;
-    if (src > elem_len || elem_len - src < len)
+    if (offset_len_out_of_bounds(src_offset, len, elem_len))
+        goto out_of_bounds;
+
+    tbl = inst->tables[tbl_idx];
+    tbl_sz = tbl->cur_size;
+    if (offset_len_out_of_bounds(dst_offset, len, tbl_sz))
         goto out_of_bounds;
 
     bh_memcpy_s((uint8 *)tbl + offsetof(WASMTableInstance, elems)
-                    + dst * sizeof(uint32),
-                (uint32)((tbl_sz - dst) * sizeof(uint32)),
-                elem->func_indexes + src, (uint32)(len * sizeof(uint32)));
+                    + dst_offset * sizeof(uint32),
+                (uint32)((tbl_sz - dst_offset) * sizeof(uint32)),
+                elem->func_indexes + src_offset,
+                (uint32)(len * sizeof(uint32)));
 
     return 0;
 out_of_bounds:
@@ -157,14 +158,14 @@ wasm_copy_table(WASMModuleInstance *inst, uint32 src_tbl_idx,
     WASMTableInstance *src_tbl, *dst_tbl;
     uint32 src_tbl_sz, dst_tbl_sz;
 
-    src_tbl = inst->tables[src_tbl_idx];
-    src_tbl_sz = src_tbl->cur_size;
-    if (src_offset > src_tbl_sz || src_tbl_sz - src_offset < len)
-        goto out_of_bounds;
-
     dst_tbl = inst->tables[dst_tbl_idx];
     dst_tbl_sz = dst_tbl->cur_size;
-    if (dst_offset > dst_tbl_sz || dst_tbl_sz - dst_offset < len)
+    if (offset_len_out_of_bounds(dst_offset, len, dst_tbl_sz))
+        goto out_of_bounds;
+
+    src_tbl = inst->tables[src_tbl_idx];
+    src_tbl_sz = src_tbl->cur_size;
+    if (offset_len_out_of_bounds(src_offset, len, src_tbl_sz))
         goto out_of_bounds;
 
     bh_memmove_s((uint8 *)dst_tbl + offsetof(WASMTableInstance, elems)
@@ -263,7 +264,7 @@ fail:
 }
 
 static int
-wasm_fill_table(WASMModuleInstance *inst, uint32 tbl_idx, uint32 dst,
+wasm_fill_table(WASMModuleInstance *inst, uint32 tbl_idx, uint32 dst_offset,
                 uint32 val, uint32 len)
 {
     WASMTableInstance *tbl;
@@ -272,11 +273,11 @@ wasm_fill_table(WASMModuleInstance *inst, uint32 tbl_idx, uint32 dst,
     tbl = inst->tables[tbl_idx];
     tbl_sz = tbl->cur_size;
 
-    if (dst > tbl_sz || tbl_sz - dst < len)
+    if (offset_len_out_of_bounds(dst_offset, len, tbl_sz))
         goto out_of_bounds;
 
-    for (; len != 0; dst++, len--) {
-        tbl->elems[dst] = val;
+    for (; len != 0; dst_offset++, len--) {
+        tbl->elems[dst_offset] = val;
     }
 
     return 0;

+ 4 - 2
core/iwasm/include/aot_export.h

@@ -26,8 +26,8 @@ void
 aot_destroy_comp_data(aot_comp_data_t comp_data);
 
 #if WASM_ENABLE_DEBUG_AOT != 0
-typedef void *dwar_extractor_handle_t;
-dwar_extractor_handle_t
+typedef void *dwarf_extractor_handle_t;
+dwarf_extractor_handle_t
 create_dwarf_extractor(aot_comp_data_t comp_data, char *file_name);
 #endif
 
@@ -67,6 +67,8 @@ typedef struct AOTCompOption {
     char **custom_sections;
     uint32_t custom_sections_count;
     const char *stack_usage_file;
+    const char *llvm_passes;
+    const char *builtin_intrinsics;
 } AOTCompOption, *aot_comp_option_t;
 
 bool

+ 47 - 0
core/iwasm/include/wasm_export.h

@@ -186,6 +186,7 @@ enum wasm_valkind_enum {
 
 #ifndef WASM_VAL_T_DEFINED
 #define WASM_VAL_T_DEFINED
+struct wasm_ref_t;
 
 typedef struct wasm_val_t {
     wasm_valkind_t kind;
@@ -197,6 +198,7 @@ typedef struct wasm_val_t {
         double f64;
         /* represent a foreign object, aka externref in .wat */
         uintptr_t foreign;
+        struct wasm_ref_t *ref;
     } of;
 } wasm_val_t;
 #endif
@@ -914,6 +916,25 @@ wasm_runtime_set_custom_data(wasm_module_inst_t module_inst,
 WASM_RUNTIME_API_EXTERN void *
 wasm_runtime_get_custom_data(wasm_module_inst_t module_inst);
 
+/**
+ * Set the memory bounds checks flag of a WASM module instance.
+ * 
+ * @param module_inst the WASM module instance
+ * @param enable the flag to enable/disable the memory bounds checks
+ */
+WASM_RUNTIME_API_EXTERN void
+wasm_runtime_set_bounds_checks(wasm_module_inst_t module_inst,
+                               bool enable);
+/**
+ * Check if the memory bounds checks flag is enabled for a WASM module instance.
+ * 
+ * @param module_inst the WASM module instance
+ *
+ * @return true if the memory bounds checks flag is enabled, false otherwise
+ */
+WASM_RUNTIME_API_EXTERN bool
+wasm_runtime_is_bounds_checks_enabled(
+    wasm_module_inst_t module_inst);
 /**
  * Allocate memory from the heap of WASM module instance
  *
@@ -1271,6 +1292,32 @@ WASM_RUNTIME_API_EXTERN bool
 wasm_externref_obj2ref(wasm_module_inst_t module_inst,
                        void *extern_obj, uint32_t *p_externref_idx);
 
+/**
+ * Delete external object registered by `wasm_externref_obj2ref`.
+ *
+ * @param module_inst the WASM module instance that the extern object
+ *        belongs to
+ * @param extern_obj the external object to be deleted
+ *
+ * @return true if success, false otherwise
+ */
+WASM_RUNTIME_API_EXTERN bool
+wasm_externref_objdel(wasm_module_inst_t module_inst, void *extern_obj);
+
+/**
+ * Set cleanup callback to release external object.
+ *
+ * @param module_inst the WASM module instance that the extern object
+ *        belongs to
+ * @param extern_obj the external object to which to set the `extern_obj_cleanup` cleanup callback.
+ * @param extern_obj_cleanup a callback to release `extern_obj`
+ *
+ * @return true if success, false otherwise
+ */
+WASM_RUNTIME_API_EXTERN bool
+wasm_externref_set_cleanup(wasm_module_inst_t module_inst, void *extern_obj,
+                           void (*extern_obj_cleanup)(void *));
+
 /**
  * Retrieve the external object from an internal externref index
  *

+ 18 - 1
core/iwasm/interpreter/wasm.h

@@ -627,7 +627,6 @@ typedef struct WASMBranchBlock {
     uint32 cell_num;
 } WASMBranchBlock;
 
-/* Execution environment, e.g. stack info */
 /**
  * Align an unsigned value on a alignment boundary.
  *
@@ -643,6 +642,24 @@ align_uint(unsigned v, unsigned b)
     return (v + m) & ~m;
 }
 
+/**
+ * Check whether a piece of data is out of range
+ *
+ * @param offset the offset that the data starts
+ * @param len the length of the data
+ * @param max_size the maximum size of the data range
+ *
+ * @return true if out of range, false otherwise
+ */
+inline static bool
+offset_len_out_of_bounds(uint32 offset, uint32 len, uint32 max_size)
+{
+    if (offset + len < offset /* integer overflow */
+        || offset + len > max_size)
+        return true;
+    return false;
+}
+
 /**
  * Return the hash value of c string.
  */

+ 135 - 114
core/iwasm/interpreter/wasm_interp_classic.c

@@ -41,26 +41,28 @@ typedef float64 CellType_F64;
 
 #if !defined(OS_ENABLE_HW_BOUND_CHECK) \
     || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0
-#define CHECK_MEMORY_OVERFLOW(bytes)                            \
-    do {                                                        \
-        uint64 offset1 = (uint64)offset + (uint64)addr;         \
-        if (offset1 + bytes <= (uint64)get_linear_mem_size())   \
-            /* If offset1 is in valid range, maddr must also    \
-               be in valid range, no need to check it again. */ \
-            maddr = memory->memory_data + offset1;              \
-        else                                                    \
-            goto out_of_bounds;                                 \
+#define CHECK_MEMORY_OVERFLOW(bytes)                             \
+    do {                                                         \
+        uint64 offset1 = (uint64)offset + (uint64)addr;          \
+        if (disable_bounds_checks                                \
+            || offset1 + bytes <= (uint64)get_linear_mem_size()) \
+            /* If offset1 is in valid range, maddr must also     \
+               be in valid range, no need to check it again. */  \
+            maddr = memory->memory_data + offset1;               \
+        else                                                     \
+            goto out_of_bounds;                                  \
     } while (0)
 
-#define CHECK_BULK_MEMORY_OVERFLOW(start, bytes, maddr)       \
-    do {                                                      \
-        uint64 offset1 = (uint32)(start);                     \
-        if (offset1 + bytes <= (uint64)get_linear_mem_size()) \
-            /* App heap space is not valid space for          \
-             bulk memory operation */                         \
-            maddr = memory->memory_data + offset1;            \
-        else                                                  \
-            goto out_of_bounds;                               \
+#define CHECK_BULK_MEMORY_OVERFLOW(start, bytes, maddr)          \
+    do {                                                         \
+        uint64 offset1 = (uint32)(start);                        \
+        if (disable_bounds_checks                                \
+            || offset1 + bytes <= (uint64)get_linear_mem_size()) \
+            /* App heap space is not valid space for             \
+             bulk memory operation */                            \
+            maddr = memory->memory_data + offset1;               \
+        else                                                     \
+            goto out_of_bounds;                                  \
     } while (0)
 #else
 #define CHECK_MEMORY_OVERFLOW(bytes)                    \
@@ -708,28 +710,28 @@ trunc_f64_to_int(WASMModuleInstance *module, uint32 *frame_sp, float64 src_min,
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint32)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I32_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint32)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else {                                                       \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = LOAD_I32(maddr);                                 \
             STORE_U32(maddr, readv op sval);                         \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         PUSH_I32(readv);                                             \
         break;                                                       \
@@ -748,39 +750,39 @@ trunc_f64_to_int(WASMModuleInstance *module, uint32 *frame_sp, float64 src_min,
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##32_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_U32(maddr);                         \
             STORE_U32(maddr, (uint32)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else {                                                       \
             uint64 op_result;                                        \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_I64(maddr);                         \
             op_result = readv op sval;                               \
             STORE_I64(maddr, op_result);                             \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         PUSH_I64(readv);                                             \
         break;                                                       \
@@ -903,8 +905,9 @@ wasm_interp_call_func_native(WASMModuleInstance *module_inst,
     if (!func_import->call_conv_wasm_c_api) {
         native_func_pointer = module_inst->import_func_ptrs[cur_func_index];
     }
-    else if (module_inst->e->c_api_func_imports) {
-        c_api_func_import = module_inst->e->c_api_func_imports + cur_func_index;
+    else if (module_inst->e->common.c_api_func_imports) {
+        c_api_func_import =
+            module_inst->e->common.c_api_func_imports + cur_func_index;
         native_func_pointer = c_api_func_import->func_ptr_linked;
     }
 
@@ -1060,21 +1063,33 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
         os_mutex_unlock(&exec_env->wait_lock);                         \
     } while (0)
 #else
-#define CHECK_SUSPEND_FLAGS()                                             \
-    do {                                                                  \
-        os_mutex_lock(&exec_env->wait_lock);                              \
-        if (exec_env->suspend_flags.flags != 0) {                         \
-            if (exec_env->suspend_flags.flags & 0x01) {                   \
-                /* terminate current thread */                            \
-                os_mutex_unlock(&exec_env->wait_lock);                    \
-                return;                                                   \
-            }                                                             \
-            while (exec_env->suspend_flags.flags & 0x02) {                \
-                /* suspend current thread */                              \
-                os_cond_wait(&exec_env->wait_cond, &exec_env->wait_lock); \
-            }                                                             \
-        }                                                                 \
-        os_mutex_unlock(&exec_env->wait_lock);                            \
+#if WASM_SUSPEND_FLAGS_IS_ATOMIC != 0
+/* The lock is only needed when the suspend_flags is atomic; otherwise
+   the lock is already taken at the time when SUSPENSION_LOCK() is called. */
+#define SUSPENSION_LOCK() os_mutex_lock(&exec_env->wait_lock);
+#define SUSPENSION_UNLOCK() os_mutex_unlock(&exec_env->wait_lock);
+#else
+#define SUSPENSION_LOCK()
+#define SUSPENSION_UNLOCK()
+#endif
+
+#define CHECK_SUSPEND_FLAGS()                                         \
+    do {                                                              \
+        WASM_SUSPEND_FLAGS_LOCK(exec_env->wait_lock);                 \
+        if (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags)           \
+            & WASM_SUSPEND_FLAG_TERMINATE) {                          \
+            /* terminate current thread */                            \
+            WASM_SUSPEND_FLAGS_UNLOCK(exec_env->wait_lock);           \
+            return;                                                   \
+        }                                                             \
+        while (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags)        \
+               & WASM_SUSPEND_FLAG_SUSPEND) {                         \
+            /* suspend current thread */                              \
+            SUSPENSION_LOCK()                                         \
+            os_cond_wait(&exec_env->wait_cond, &exec_env->wait_lock); \
+            SUSPENSION_UNLOCK()                                       \
+        }                                                             \
+        WASM_SUSPEND_FLAGS_UNLOCK(exec_env->wait_lock);               \
     } while (0)
 #endif /* WASM_ENABLE_DEBUG_INTERP */
 #endif /* WASM_ENABLE_THREAD_MGR */
@@ -1142,10 +1157,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                                WASMFunctionInstance *cur_func,
                                WASMInterpFrame *prev_frame)
 {
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    WASMSharedMemNode *node =
-        wasm_module_get_shared_memory((WASMModuleCommon *)module->module);
-#endif
     WASMMemoryInstance *memory = wasm_get_default_memory(module);
 #if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
     || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
@@ -1174,6 +1185,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
     uint8 local_type, *global_addr;
     uint32 cache_index, type_index, param_cell_num, cell_num;
     uint8 value_type;
+#if !defined(OS_ENABLE_HW_BOUND_CHECK) \
+    || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    bool disable_bounds_checks = !wasm_runtime_is_bounds_checks_enabled(
+        (WASMModuleInstanceCommon *)module);
+#else
+    bool disable_bounds_checks = false;
+#endif
+#endif
 
 #if WASM_ENABLE_DEBUG_INTERP != 0
     uint8 *frame_ip_orig = NULL;
@@ -3224,7 +3244,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     case WASM_OP_TABLE_INIT:
                     {
                         uint32 tbl_idx, elem_idx;
-                        uint64 n, s, d;
+                        uint32 n, s, d;
                         WASMTableInstance *tbl_inst;
 
                         read_leb_uint32(frame_ip, frame_ip_end, elem_idx);
@@ -3239,20 +3259,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         s = (uint32)POP_I32();
                         d = (uint32)POP_I32();
 
-                        /* TODO: what if the element is not passive? */
-
-                        if (!n) {
-                            break;
-                        }
-
-                        if (n + s > module->module->table_segments[elem_idx]
-                                        .function_count
-                            || d + n > tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(
+                                s, n,
+                                module->module->table_segments[elem_idx]
+                                    .function_count)
+                            || offset_len_out_of_bounds(d, n,
+                                                        tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
                         }
 
+                        if (!n) {
+                            break;
+                        }
+
                         if (module->module->table_segments[elem_idx]
                                 .is_dropped) {
                             wasm_set_exception(module,
@@ -3293,7 +3314,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     case WASM_OP_TABLE_COPY:
                     {
                         uint32 src_tbl_idx, dst_tbl_idx;
-                        uint64 n, s, d;
+                        uint32 n, s, d;
                         WASMTableInstance *src_tbl_inst, *dst_tbl_inst;
 
                         read_leb_uint32(frame_ip, frame_ip_end, dst_tbl_idx);
@@ -3310,8 +3331,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         s = (uint32)POP_I32();
                         d = (uint32)POP_I32();
 
-                        if (d + n > dst_tbl_inst->cur_size
-                            || s + n > src_tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(d, n,
+                                                     dst_tbl_inst->cur_size)
+                            || offset_len_out_of_bounds(
+                                s, n, src_tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
@@ -3381,11 +3404,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         fill_val = POP_I32();
                         i = POP_I32();
 
-                        /* TODO: what if the element is not passive? */
-                        /* TODO: what if the element is dropped? */
-
-                        if (i + n > tbl_inst->cur_size) {
-                            /* TODO: verify warning content */
+                        if (offset_len_out_of_bounds(i, n,
+                                                     tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
@@ -3503,23 +3523,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)(*(uint8 *)maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)LOAD_U16(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I32(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
 
                         PUSH_I32(readv);
@@ -3538,30 +3558,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)(*(uint8 *)maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U16(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U32(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I64(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
 
                         PUSH_I64(readv);
@@ -3580,23 +3600,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U32(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         break;
                     }
@@ -3614,30 +3634,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE32) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U32(maddr, (uint32)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             PUT_I64_TO_ADDR((uint32 *)maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         break;
                     }
@@ -3657,32 +3677,32 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I32_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         PUSH_I32(readv);
                         break;
@@ -3703,43 +3723,43 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint32)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, (uint32)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_I64(maddr);
                             if (readv == expect)
                                 STORE_I64(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         PUSH_I64(readv);
                         break;
@@ -3772,7 +3792,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
             HANDLE_OP(DEBUG_OP_BREAK)
             {
                 wasm_cluster_thread_send_signal(exec_env, WAMR_SIG_TRAP);
-                exec_env->suspend_flags.flags |= 2;
+                WASM_SUSPEND_FLAGS_FETCH_OR(exec_env->suspend_flags,
+                                            WASM_SUSPEND_FLAG_SUSPEND);
                 frame_ip--;
                 SYNC_ALL_TO_FRAME();
                 CHECK_SUSPEND_FLAGS();

+ 108 - 96
core/iwasm/interpreter/wasm_interp_fast.c

@@ -35,7 +35,8 @@ typedef float64 CellType_F64;
 #define CHECK_MEMORY_OVERFLOW(bytes)                             \
     do {                                                         \
         uint64 offset1 = (uint64)offset + (uint64)addr;          \
-        if (offset1 + bytes <= (uint64)get_linear_mem_size())    \
+        if (disable_bounds_checks                                \
+            || offset1 + bytes <= (uint64)get_linear_mem_size()) \
             /* If offset1 is in valid range, maddr must also     \
                 be in valid range, no need to check it again. */ \
             maddr = memory->memory_data + offset1;               \
@@ -43,15 +44,15 @@ typedef float64 CellType_F64;
             goto out_of_bounds;                                  \
     } while (0)
 
-#define CHECK_BULK_MEMORY_OVERFLOW(start, bytes, maddr) \
-    do {                                                \
-        uint64 offset1 = (uint32)(start);               \
-        if (offset1 + bytes <= get_linear_mem_size())   \
-            /* App heap space is not valid space for    \
-               bulk memory operation */                 \
-            maddr = memory->memory_data + offset1;      \
-        else                                            \
-            goto out_of_bounds;                         \
+#define CHECK_BULK_MEMORY_OVERFLOW(start, bytes, maddr)                        \
+    do {                                                                       \
+        uint64 offset1 = (uint32)(start);                                      \
+        if (disable_bounds_checks || offset1 + bytes <= get_linear_mem_size()) \
+            /* App heap space is not valid space for                           \
+               bulk memory operation */                                        \
+            maddr = memory->memory_data + offset1;                             \
+        else                                                                   \
+            goto out_of_bounds;                                                \
     } while (0)
 #else
 #define CHECK_MEMORY_OVERFLOW(bytes)                    \
@@ -481,28 +482,28 @@ LOAD_PTR(void *addr)
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(1);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint32)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I32_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(2);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint32)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else {                                                       \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(4);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = LOAD_I32(maddr);                                 \
             STORE_U32(maddr, readv op sval);                         \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         PUSH_I32(readv);                                             \
         break;                                                       \
@@ -521,39 +522,39 @@ LOAD_PTR(void *addr)
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(1);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(2);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##32_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(4);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_U32(maddr);                         \
             STORE_U32(maddr, (uint32)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else {                                                       \
             uint64 op_result;                                        \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(8);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_I64(maddr);                         \
             op_result = readv op sval;                               \
             STORE_I64(maddr, op_result);                             \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         PUSH_I64(readv);                                             \
         break;                                                       \
@@ -937,8 +938,9 @@ wasm_interp_call_func_native(WASMModuleInstance *module_inst,
     if (!func_import->call_conv_wasm_c_api) {
         native_func_pointer = module_inst->import_func_ptrs[cur_func_index];
     }
-    else if (module_inst->e->c_api_func_imports) {
-        c_api_func_import = module_inst->e->c_api_func_imports + cur_func_index;
+    else if (module_inst->e->common.c_api_func_imports) {
+        c_api_func_import =
+            module_inst->e->common.c_api_func_imports + cur_func_index;
         native_func_pointer = c_api_func_import->func_ptr_linked;
     }
 
@@ -1064,18 +1066,17 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
 #endif
 
 #if WASM_ENABLE_THREAD_MGR != 0
-#define CHECK_SUSPEND_FLAGS()                           \
-    do {                                                \
-        os_mutex_lock(&exec_env->wait_lock);            \
-        if (exec_env->suspend_flags.flags != 0) {       \
-            if (exec_env->suspend_flags.flags & 0x01) { \
-                /* terminate current thread */          \
-                os_mutex_unlock(&exec_env->wait_lock);  \
-                return;                                 \
-            }                                           \
-            /* TODO: support suspend and breakpoint */  \
-        }                                               \
-        os_mutex_unlock(&exec_env->wait_lock);          \
+#define CHECK_SUSPEND_FLAGS()                               \
+    do {                                                    \
+        WASM_SUSPEND_FLAGS_LOCK(exec_env->wait_lock);       \
+        if (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags) \
+            & WASM_SUSPEND_FLAG_TERMINATE) {                \
+            /* terminate current thread */                  \
+            WASM_SUSPEND_FLAGS_UNLOCK(exec_env->wait_lock); \
+            return;                                         \
+        }                                                   \
+        /* TODO: support suspend and breakpoint */          \
+        WASM_SUSPEND_FLAGS_UNLOCK(exec_env->wait_lock);     \
     } while (0)
 #endif
 
@@ -1166,10 +1167,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                                WASMFunctionInstance *cur_func,
                                WASMInterpFrame *prev_frame)
 {
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    WASMSharedMemNode *node =
-        wasm_module_get_shared_memory((WASMModuleCommon *)module->module);
-#endif
     WASMMemoryInstance *memory = wasm_get_default_memory(module);
 #if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
     || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
@@ -1199,6 +1196,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
     uint8 *maddr = NULL;
     uint32 local_idx, local_offset, global_idx;
     uint8 opcode, local_type, *global_addr;
+#if !defined(OS_ENABLE_HW_BOUND_CHECK) \
+    || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    bool disable_bounds_checks = !wasm_runtime_is_bounds_checks_enabled(
+        (WASMModuleInstanceCommon *)module);
+#else
+    bool disable_bounds_checks = false;
+#endif
+#endif
 
 #if WASM_ENABLE_LABELS_AS_VALUES != 0
 #define HANDLE_OPCODE(op) &&HANDLE_##op
@@ -3069,7 +3075,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     case WASM_OP_TABLE_INIT:
                     {
                         uint32 tbl_idx, elem_idx;
-                        uint64 n, s, d;
+                        uint32 n, s, d;
                         WASMTableInstance *tbl_inst;
 
                         elem_idx = read_uint32(frame_ip);
@@ -3084,18 +3090,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         s = (uint32)POP_I32();
                         d = (uint32)POP_I32();
 
-                        if (!n) {
-                            break;
-                        }
-
-                        if (n + s > module->module->table_segments[elem_idx]
-                                        .function_count
-                            || d + n > tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(
+                                s, n,
+                                module->module->table_segments[elem_idx]
+                                    .function_count)
+                            || offset_len_out_of_bounds(d, n,
+                                                        tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
                         }
 
+                        if (!n) {
+                            break;
+                        }
+
                         if (module->module->table_segments[elem_idx]
                                 .is_dropped) {
                             wasm_set_exception(module,
@@ -3134,7 +3143,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     case WASM_OP_TABLE_COPY:
                     {
                         uint32 src_tbl_idx, dst_tbl_idx;
-                        uint64 n, s, d;
+                        uint32 n, s, d;
                         WASMTableInstance *src_tbl_inst, *dst_tbl_inst;
 
                         dst_tbl_idx = read_uint32(frame_ip);
@@ -3151,8 +3160,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         s = (uint32)POP_I32();
                         d = (uint32)POP_I32();
 
-                        if (d + n > dst_tbl_inst->cur_size
-                            || s + n > src_tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(d, n,
+                                                     dst_tbl_inst->cur_size)
+                            || offset_len_out_of_bounds(
+                                s, n, src_tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
@@ -3223,7 +3234,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         fill_val = POP_I32();
                         i = POP_I32();
 
-                        if (i + n > tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(i, n,
+                                                     tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
@@ -3338,23 +3350,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)(*(uint8 *)maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)LOAD_U16(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I32(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
 
                         PUSH_I32(readv);
@@ -3373,30 +3385,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)(*(uint8 *)maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U16(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U32(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(8);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I64(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
 
                         PUSH_I64(readv);
@@ -3414,23 +3426,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U32(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         break;
                     }
@@ -3448,30 +3460,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE32) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U32(maddr, (uint32)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(8);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_I64(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         break;
                     }
@@ -3491,32 +3503,32 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I32_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
 
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         PUSH_I32(readv);
                         break;
@@ -3537,43 +3549,43 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
 
                             expect = (uint32)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, (uint32)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(8);
 
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_I64(maddr);
                             if (readv == expect)
                                 STORE_I64(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         PUSH_I64(readv);
                         break;

+ 10 - 7
core/iwasm/interpreter/wasm_loader.c

@@ -7014,6 +7014,7 @@ static bool
 copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
                              char *error_buf, uint32 error_buf_size)
 {
+    bool ret = false;
     int16 *frame_offset = NULL;
     uint8 *cells = NULL, cell;
     int16 *src_offsets = NULL;
@@ -7084,13 +7085,13 @@ copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
     if (is_if_block)
         PUSH_OFFSET_TYPE(VALUE_TYPE_I32);
 
+    ret = true;
+
+fail:
     /* Free the emit data */
     wasm_runtime_free(emit_data);
 
-    return true;
-
-fail:
-    return false;
+    return ret;
 }
 #endif
 
@@ -8063,9 +8064,13 @@ re_scan:
             case WASM_OP_SELECT_T:
             {
                 uint8 vec_len, ref_type;
+#if WASM_ENABLE_FAST_INTERP != 0
+                uint8 *p_code_compiled_tmp = loader_ctx->p_code_compiled;
+#endif
 
                 read_leb_uint32(p, p_end, vec_len);
-                if (!vec_len) {
+                if (vec_len != 1) {
+                    /* typed select must have exactly one result */
                     set_error_buf(error_buf, error_buf_size,
                                   "invalid result arity");
                     goto fail;
@@ -8084,8 +8089,6 @@ re_scan:
 #if WASM_ENABLE_FAST_INTERP != 0
                 if (loader_ctx->p_code_compiled) {
                     uint8 opcode_tmp = WASM_OP_SELECT;
-                    uint8 *p_code_compiled_tmp =
-                        loader_ctx->p_code_compiled - 2;
 
                     if (ref_type == VALUE_TYPE_V128) {
 #if (WASM_ENABLE_SIMD == 0) \

+ 5 - 3
core/iwasm/interpreter/wasm_mini_loader.c

@@ -6233,9 +6233,13 @@ re_scan:
             case WASM_OP_SELECT_T:
             {
                 uint8 vec_len, ref_type;
+#if WASM_ENABLE_FAST_INTERP != 0
+                uint8 *p_code_compiled_tmp = loader_ctx->p_code_compiled;
+#endif
 
                 read_leb_uint32(p, p_end, vec_len);
-                if (!vec_len) {
+                if (vec_len != 1) {
+                    /* typed select must have exactly one result */
                     set_error_buf(error_buf, error_buf_size,
                                   "invalid result arity");
                     goto fail;
@@ -6254,8 +6258,6 @@ re_scan:
 #if WASM_ENABLE_FAST_INTERP != 0
                 if (loader_ctx->p_code_compiled) {
                     uint8 opcode_tmp = WASM_OP_SELECT;
-                    uint8 *p_code_compiled_tmp =
-                        loader_ctx->p_code_compiled - 2;
 
                     if (ref_type == VALUE_TYPE_F64
                         || ref_type == VALUE_TYPE_I64)

+ 69 - 134
core/iwasm/interpreter/wasm_runtime.c

@@ -122,11 +122,8 @@ memories_deinstantiate(WASMModuleInstance *module_inst,
                 }
 #endif
 #if WASM_ENABLE_SHARED_MEMORY != 0
-                if (memories[i]->is_shared) {
-                    int32 ref_count = shared_memory_dec_reference(
-                        (WASMModuleCommon *)module_inst->module);
-                    bh_assert(ref_count >= 0);
-
+                if (shared_memory_is_shared(memories[i])) {
+                    uint32 ref_count = shared_memory_dec_reference(memories[i]);
                     /* if the reference count is not zero,
                         don't free the memory */
                     if (ref_count > 0)
@@ -159,7 +156,8 @@ memories_deinstantiate(WASMModuleInstance *module_inst,
 }
 
 static WASMMemoryInstance *
-memory_instantiate(WASMModuleInstance *module_inst, WASMMemoryInstance *memory,
+memory_instantiate(WASMModuleInstance *module_inst, WASMModuleInstance *parent,
+                   WASMMemoryInstance *memory, uint32 memory_idx,
                    uint32 num_bytes_per_page, uint32 init_page_count,
                    uint32 max_page_count, uint32 heap_size, uint32 flags,
                    char *error_buf, uint32 error_buf_size)
@@ -180,22 +178,11 @@ memory_instantiate(WASMModuleInstance *module_inst, WASMMemoryInstance *memory,
     bool is_shared_memory = flags & 0x02 ? true : false;
 
     /* shared memory */
-    if (is_shared_memory) {
-        WASMSharedMemNode *node = wasm_module_get_shared_memory(
-            (WASMModuleCommon *)module_inst->module);
-        /* If the memory of this module has been instantiated,
-            return the memory instance directly */
-        if (node) {
-            uint32 ref_count;
-            ref_count = shared_memory_inc_reference(
-                (WASMModuleCommon *)module_inst->module);
-            bh_assert(ref_count > 0);
-            memory = (WASMMemoryInstance *)shared_memory_get_memory_inst(node);
-            bh_assert(memory);
-
-            (void)ref_count;
-            return memory;
-        }
+    if (is_shared_memory && parent != NULL) {
+        bh_assert(parent->memory_count > memory_idx);
+        memory = parent->memories[memory_idx];
+        shared_memory_inc_reference(memory);
+        return memory;
     }
 #endif /* end of WASM_ENABLE_SHARED_MEMORY */
 
@@ -215,7 +202,7 @@ memory_instantiate(WASMModuleInstance *module_inst, WASMMemoryInstance *memory,
         if (num_bytes_per_page < heap_size) {
             set_error_buf(error_buf, error_buf_size,
                           "failed to insert app heap into linear memory, "
-                          "try using `--heap_size=0` option");
+                          "try using `--heap-size=0` option");
             return NULL;
         }
     }
@@ -274,7 +261,7 @@ memory_instantiate(WASMModuleInstance *module_inst, WASMMemoryInstance *memory,
         if (init_page_count > DEFAULT_MAX_PAGES) {
             set_error_buf(error_buf, error_buf_size,
                           "failed to insert app heap into linear memory, "
-                          "try using `--heap_size=0` option");
+                          "try using `--heap-size=0` option");
             return NULL;
         }
         else if (init_page_count == DEFAULT_MAX_PAGES) {
@@ -388,24 +375,13 @@ memory_instantiate(WASMModuleInstance *module_inst, WASMMemoryInstance *memory,
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
     if (is_shared_memory) {
-        memory->is_shared = true;
-        if (!shared_memory_set_memory_inst(
-                (WASMModuleCommon *)module_inst->module,
-                (WASMMemoryInstanceCommon *)memory)) {
-            set_error_buf(error_buf, error_buf_size, "allocate memory failed");
-            goto fail4;
-        }
+        memory->ref_count = 1;
     }
 #endif
 
     LOG_VERBOSE("Memory instantiate success.");
     return memory;
 
-#if WASM_ENABLE_SHARED_MEMORY != 0
-fail4:
-    if (heap_size > 0)
-        mem_allocator_destroy(memory->heap_handle);
-#endif
 fail3:
     if (heap_size > 0)
         wasm_runtime_free(memory->heap_handle);
@@ -428,7 +404,8 @@ fail1:
  */
 static WASMMemoryInstance **
 memories_instantiate(const WASMModule *module, WASMModuleInstance *module_inst,
-                     uint32 heap_size, char *error_buf, uint32 error_buf_size)
+                     WASMModuleInstance *parent, uint32 heap_size,
+                     char *error_buf, uint32 error_buf_size)
 {
     WASMImport *import;
     uint32 mem_index = 0, i,
@@ -474,26 +451,29 @@ memories_instantiate(const WASMModule *module, WASMModuleInstance *module_inst,
         else
 #endif
         {
-            if (!(memories[mem_index++] = memory_instantiate(
-                      module_inst, memory, num_bytes_per_page, init_page_count,
-                      max_page_count, actual_heap_size, flags, error_buf,
-                      error_buf_size))) {
+            if (!(memories[mem_index] = memory_instantiate(
+                      module_inst, parent, memory, mem_index,
+                      num_bytes_per_page, init_page_count, max_page_count,
+                      actual_heap_size, flags, error_buf, error_buf_size))) {
                 memories_deinstantiate(module_inst, memories, memory_count);
                 return NULL;
             }
+            mem_index++;
         }
     }
 
     /* instantiate memories from memory section */
     for (i = 0; i < module->memory_count; i++, memory++) {
-        if (!(memories[mem_index++] = memory_instantiate(
-                  module_inst, memory, module->memories[i].num_bytes_per_page,
+        if (!(memories[mem_index] = memory_instantiate(
+                  module_inst, parent, memory, mem_index,
+                  module->memories[i].num_bytes_per_page,
                   module->memories[i].init_page_count,
                   module->memories[i].max_page_count, heap_size,
                   module->memories[i].flags, error_buf, error_buf_size))) {
             memories_deinstantiate(module_inst, memories, memory_count);
             return NULL;
         }
+        mem_index++;
     }
 
     bh_assert(mem_index == memory_count);
@@ -1104,10 +1084,14 @@ execute_post_instantiate_functions(WASMModuleInstance *module_inst,
         goto fail;
     }
 
+#if WASM_ENABLE_LIBC_WASI != 0
     if (initialize_func
         && !wasm_call_function(exec_env, initialize_func, 0, NULL)) {
         goto fail;
     }
+#else
+    (void)initialize_func;
+#endif
 
     if (post_inst_func
         && !wasm_call_function(exec_env, post_inst_func, 0, NULL)) {
@@ -1297,7 +1281,7 @@ sub_module_instantiate(WASMModule *module, WASMModuleInstance *module_inst,
         WASMModuleInstance *sub_module_inst = NULL;
 
         sub_module_inst =
-            wasm_instantiate(sub_module, false, NULL, stack_size, heap_size,
+            wasm_instantiate(sub_module, NULL, NULL, stack_size, heap_size,
                              error_buf, error_buf_size);
         if (!sub_module_inst) {
             LOG_DEBUG("instantiate %s failed",
@@ -1642,7 +1626,7 @@ wasm_set_running_mode(WASMModuleInstance *module_inst, RunningMode running_mode)
  * Instantiate module
  */
 WASMModuleInstance *
-wasm_instantiate(WASMModule *module, bool is_sub_inst,
+wasm_instantiate(WASMModule *module, WASMModuleInstance *parent,
                  WASMExecEnv *exec_env_main, uint32 stack_size,
                  uint32 heap_size, char *error_buf, uint32 error_buf_size)
 {
@@ -1659,6 +1643,7 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
 #if WASM_ENABLE_MULTI_MODULE != 0
     bool ret = false;
 #endif
+    const bool is_sub_inst = parent != NULL;
 
     if (!module)
         return NULL;
@@ -1777,8 +1762,9 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
 
     /* Instantiate memories/tables/functions */
     if ((module_inst->memory_count > 0
-         && !(module_inst->memories = memories_instantiate(
-                  module, module_inst, heap_size, error_buf, error_buf_size)))
+         && !(module_inst->memories =
+                  memories_instantiate(module, module_inst, parent, heap_size,
+                                       error_buf, error_buf_size)))
         || (module_inst->table_count > 0
             && !(module_inst->tables =
                      tables_instantiate(module, module_inst, first_table,
@@ -1853,7 +1839,7 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
     for (i = 0; i < module->data_seg_count; i++) {
         WASMMemoryInstance *memory = NULL;
         uint8 *memory_data = NULL;
-        uint32 memory_size = 0;
+        uint64 memory_size = 0;
         WASMDataSeg *data_seg = module->data_segments[i];
 
 #if WASM_ENABLE_BULK_MEMORY != 0
@@ -1866,7 +1852,8 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
         bh_assert(memory);
 
         memory_data = memory->memory_data;
-        memory_size = memory->num_bytes_per_page * memory->cur_page_count;
+        memory_size =
+            (uint64)memory->num_bytes_per_page * memory->cur_page_count;
         bh_assert(memory_data || memory_size == 0);
 
         bh_assert(data_seg->base_offset.init_expr_type
@@ -1912,7 +1899,7 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
 
         /* check offset + length(could be zero) */
         length = data_seg->data_length;
-        if (base_offset + length > memory_size) {
+        if ((uint64)base_offset + length > memory_size) {
             LOG_DEBUG("base_offset(%d) + length(%d) > memory_size(%d)",
                       base_offset, length, memory_size);
 #if WASM_ENABLE_REF_TYPES != 0
@@ -1926,8 +1913,9 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
         }
 
         if (memory_data) {
-            bh_memcpy_s(memory_data + base_offset, memory_size - base_offset,
-                        data_seg->data, length);
+            bh_memcpy_s(memory_data + base_offset,
+                        (uint32)memory_size - base_offset, data_seg->data,
+                        length);
         }
     }
 
@@ -2212,16 +2200,6 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
     sub_module_deinstantiate(module_inst);
 #endif
 
-#if WASM_ENABLE_LIBC_WASI != 0
-    /* Destroy wasi resource before freeing app heap, since some fields of
-       wasi contex are allocated from app heap, and if app heap is freed,
-       these fields will be set to NULL, we cannot free their internal data
-       which may allocated from global heap. */
-    /* Only destroy wasi ctx in the main module instance */
-    if (!is_sub_inst)
-        wasm_runtime_destroy_wasi((WASMModuleInstanceCommon *)module_inst);
-#endif
-
     if (module_inst->memory_count > 0)
         memories_deinstantiate(module_inst, module_inst->memories,
                                module_inst->memory_count);
@@ -2251,13 +2229,17 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
     }
 #endif
 
-    if (module_inst->e->c_api_func_imports)
-        wasm_runtime_free(module_inst->e->c_api_func_imports);
+    if (module_inst->e->common.c_api_func_imports)
+        wasm_runtime_free(module_inst->e->common.c_api_func_imports);
 
+    if (!is_sub_inst) {
+#if WASM_ENABLE_LIBC_WASI != 0
+        wasm_runtime_destroy_wasi((WASMModuleInstanceCommon *)module_inst);
+#endif
 #if WASM_ENABLE_WASI_NN != 0
-    if (!is_sub_inst)
         wasi_nn_destroy(module_inst);
 #endif
+    }
 
     wasm_runtime_free(module_inst);
 }
@@ -3113,11 +3095,7 @@ llvm_jit_call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 elem_idx,
 {
     bool ret;
 
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == exec_env->module_inst->module_type) {
-        return aot_call_indirect(exec_env, tbl_idx, elem_idx, argc, argv);
-    }
-#endif
+    bh_assert(exec_env->module_inst->module_type == Wasm_Module_Bytecode);
 
     ret = call_indirect(exec_env, tbl_idx, elem_idx, argc, argv, false, 0);
 #ifdef OS_ENABLE_HW_BOUND_CHECK
@@ -3144,11 +3122,7 @@ llvm_jit_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
     char buf[96];
     bool ret = false;
 
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == exec_env->module_inst->module_type) {
-        return aot_invoke_native(exec_env, func_idx, argc, argv);
-    }
-#endif
+    bh_assert(exec_env->module_inst->module_type == Wasm_Module_Bytecode);
 
     module_inst = (WASMModuleInstance *)wasm_runtime_get_module_inst(exec_env);
     module = module_inst->module;
@@ -3161,8 +3135,9 @@ llvm_jit_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
 
     import_func = &module->import_functions[func_idx].u.function;
     if (import_func->call_conv_wasm_c_api) {
-        if (module_inst->e->c_api_func_imports) {
-            c_api_func_import = module_inst->e->c_api_func_imports + func_idx;
+        if (module_inst->e->common.c_api_func_imports) {
+            c_api_func_import =
+                module_inst->e->common.c_api_func_imports + func_idx;
             func_ptr = c_api_func_import->func_ptr_linked;
         }
         else {
@@ -3217,11 +3192,7 @@ llvm_jit_memory_init(WASMModuleInstance *module_inst, uint32 seg_index,
     uint8 *maddr;
     uint64 seg_len = 0;
 
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == module_inst->module_type) {
-        return aot_memory_init(module_inst, seg_index, offset, len, dst);
-    }
-#endif
+    bh_assert(module_inst->module_type == Wasm_Module_Bytecode);
 
     memory_inst = wasm_get_default_memory(module_inst);
     module = module_inst->module;
@@ -3247,11 +3218,7 @@ llvm_jit_memory_init(WASMModuleInstance *module_inst, uint32 seg_index,
 bool
 llvm_jit_data_drop(WASMModuleInstance *module_inst, uint32 seg_index)
 {
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == module_inst->module_type) {
-        return aot_data_drop(module_inst, seg_index);
-    }
-#endif
+    bh_assert(module_inst->module_type == Wasm_Module_Bytecode);
 
     module_inst->module->data_segments[seg_index]->data_length = 0;
     /* Currently we can't free the dropped data segment
@@ -3266,11 +3233,7 @@ llvm_jit_drop_table_seg(WASMModuleInstance *module_inst, uint32 tbl_seg_idx)
 {
     WASMTableSeg *tbl_segs;
 
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == module_inst->module_type) {
-        return aot_drop_table_seg(module_inst, tbl_seg_idx);
-    }
-#endif
+    bh_assert(module_inst->module_type == Wasm_Module_Bytecode);
 
     tbl_segs = module_inst->module->table_segments;
     tbl_segs[tbl_seg_idx].is_dropped = true;
@@ -3284,12 +3247,7 @@ llvm_jit_table_init(WASMModuleInstance *module_inst, uint32 tbl_idx,
     WASMTableInstance *tbl_inst;
     WASMTableSeg *tbl_seg;
 
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == module_inst->module_type) {
-        return aot_table_init(module_inst, tbl_idx, tbl_seg_idx, length,
-                              src_offset, dst_offset);
-    }
-#endif
+    bh_assert(module_inst->module_type == Wasm_Module_Bytecode);
 
     tbl_inst = wasm_get_table_inst(module_inst, tbl_idx);
     tbl_seg = module_inst->module->table_segments + tbl_seg_idx;
@@ -3297,13 +3255,13 @@ llvm_jit_table_init(WASMModuleInstance *module_inst, uint32 tbl_idx,
     bh_assert(tbl_inst);
     bh_assert(tbl_seg);
 
-    if (!length) {
+    if (offset_len_out_of_bounds(src_offset, length, tbl_seg->function_count)
+        || offset_len_out_of_bounds(dst_offset, length, tbl_inst->cur_size)) {
+        jit_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }
 
-    if (length + src_offset > tbl_seg->function_count
-        || dst_offset + length > tbl_inst->cur_size) {
-        jit_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
+    if (!length) {
         return;
     }
 
@@ -3332,21 +3290,16 @@ llvm_jit_table_copy(WASMModuleInstance *module_inst, uint32 src_tbl_idx,
     WASMTableInstance *src_tbl_inst;
     WASMTableInstance *dst_tbl_inst;
 
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == module_inst->module_type) {
-        aot_table_copy(module_inst, src_tbl_idx, dst_tbl_idx, length,
-                       src_offset, dst_offset);
-        return;
-    }
-#endif
+    bh_assert(module_inst->module_type == Wasm_Module_Bytecode);
 
     src_tbl_inst = wasm_get_table_inst(module_inst, src_tbl_idx);
     dst_tbl_inst = wasm_get_table_inst(module_inst, dst_tbl_idx);
     bh_assert(src_tbl_inst);
     bh_assert(dst_tbl_inst);
 
-    if ((uint64)dst_offset + length > dst_tbl_inst->cur_size
-        || (uint64)src_offset + length > src_tbl_inst->cur_size) {
+    if (offset_len_out_of_bounds(dst_offset, length, dst_tbl_inst->cur_size)
+        || offset_len_out_of_bounds(src_offset, length,
+                                    src_tbl_inst->cur_size)) {
         jit_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }
@@ -3368,17 +3321,12 @@ llvm_jit_table_fill(WASMModuleInstance *module_inst, uint32 tbl_idx,
 {
     WASMTableInstance *tbl_inst;
 
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == module_inst->module_type) {
-        aot_table_fill(module_inst, tbl_idx, length, val, data_offset);
-        return;
-    }
-#endif
+    bh_assert(module_inst->module_type == Wasm_Module_Bytecode);
 
     tbl_inst = wasm_get_table_inst(module_inst, tbl_idx);
     bh_assert(tbl_inst);
 
-    if (data_offset + length > tbl_inst->cur_size) {
+    if (offset_len_out_of_bounds(data_offset, length, tbl_inst->cur_size)) {
         jit_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }
@@ -3395,11 +3343,7 @@ llvm_jit_table_grow(WASMModuleInstance *module_inst, uint32 tbl_idx,
     WASMTableInstance *tbl_inst;
     uint32 i, orig_size, total_size;
 
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == module_inst->module_type) {
-        return aot_table_grow(module_inst, tbl_idx, inc_size, init_val);
-    }
-#endif
+    bh_assert(module_inst->module_type == Wasm_Module_Bytecode);
 
     tbl_inst = wasm_get_table_inst(module_inst, tbl_idx);
     if (!tbl_inst) {
@@ -3439,11 +3383,7 @@ llvm_jit_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
     WASMInterpFrame *frame;
     uint32 size;
 
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == exec_env->module_inst->module_type) {
-        return aot_alloc_frame(exec_env, func_index);
-    }
-#endif
+    bh_assert(exec_env->module_inst->module_type == Wasm_Module_Bytecode);
 
     module_inst = (WASMModuleInstance *)exec_env->module_inst;
     size = wasm_interp_interp_frame_size(0);
@@ -3472,12 +3412,7 @@ llvm_jit_free_frame(WASMExecEnv *exec_env)
     WASMInterpFrame *frame;
     WASMInterpFrame *prev_frame;
 
-#if WASM_ENABLE_JIT != 0
-    if (Wasm_Module_AoT == exec_env->module_inst->module_type) {
-        aot_free_frame(exec_env);
-        return;
-    }
-#endif
+    bh_assert(exec_env->module_inst->module_type == Wasm_Module_Bytecode);
 
     frame = wasm_exec_env_get_cur_frame(exec_env);
     prev_frame = frame->prev_frame;

+ 14 - 3
core/iwasm/interpreter/wasm_runtime.h

@@ -7,6 +7,7 @@
 #define _WASM_RUNTIME_H
 
 #include "wasm.h"
+#include "bh_atomic.h"
 #include "bh_hashmap.h"
 #include "../common/wasm_runtime_common.h"
 #include "../common/wasm_exec_env.h"
@@ -79,7 +80,7 @@ struct WASMMemoryInstance {
     /* Module type */
     uint32 module_type;
     /* Shared memory flag */
-    bool is_shared;
+    bh_atomic_32_t ref_count; /* 0: non-shared, > 0: reference count */
 
     /* Number bytes per page */
     uint32 num_bytes_per_page;
@@ -209,8 +210,19 @@ typedef struct CApiFuncImport {
     void *env_arg;
 } CApiFuncImport;
 
+/* The common part of WASMModuleInstanceExtra and AOTModuleInstanceExtra */
+typedef struct WASMModuleInstanceExtraCommon {
+    CApiFuncImport *c_api_func_imports;
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    /* Disable bounds checks or not */
+    bool disable_bounds_checks;
+#endif
+} WASMModuleInstanceExtraCommon;
+
 /* Extra info of WASM module instance for interpreter/jit mode */
 typedef struct WASMModuleInstanceExtra {
+    WASMModuleInstanceExtraCommon common;
+
     WASMGlobalInstance *globals;
     WASMFunctionInstance *functions;
 
@@ -222,7 +234,6 @@ typedef struct WASMModuleInstanceExtra {
     WASMFunctionInstance *free_function;
     WASMFunctionInstance *retain_function;
 
-    CApiFuncImport *c_api_func_imports;
     RunningMode running_mode;
 
 #if WASM_ENABLE_MULTI_MODULE != 0
@@ -396,7 +407,7 @@ void
 wasm_unload(WASMModule *module);
 
 WASMModuleInstance *
-wasm_instantiate(WASMModule *module, bool is_sub_inst,
+wasm_instantiate(WASMModule *module, WASMModuleInstance *parent,
                  WASMExecEnv *exec_env_main, uint32 stack_size,
                  uint32 heap_size, char *error_buf, uint32 error_buf_size);
 

+ 13 - 6
core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c

@@ -531,7 +531,8 @@ pthread_start_routine(void *arg)
     else {
         info_node->u.ret = (void *)(uintptr_t)argv[0];
 #ifdef OS_ENABLE_HW_BOUND_CHECK
-        if (exec_env->suspend_flags.flags & 0x08)
+        if (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags)
+            & WASM_SUSPEND_FLAG_EXIT)
             /* argv[0] isn't set after longjmp(1) to
                invoke_native_with_hw_bound_check */
             info_node->u.ret = exec_env->thread_ret_value;
@@ -580,7 +581,7 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
 #endif
 
     if (!(new_module_inst = wasm_runtime_instantiate_internal(
-              module, true, exec_env, stack_size, 0, NULL, 0)))
+              module, module_inst, exec_env, stack_size, 0, NULL, 0)))
         return -1;
 
     /* Set custom_data to new module instance */
@@ -690,6 +691,14 @@ pthread_join_wrapper(wasm_exec_env_t exec_env, uint32 thread,
         bh_assert(node->joinable);
         join_ret = 0;
         ret = node->u.ret;
+
+        /* The target thread changes the node's status before calling
+           wasm_cluster_exit_thread to exit, so here its resources may
+           haven't been destroyed yet, we wait enough time to ensure that
+           they are actually destroyed to avoid unexpected behavior. */
+        os_mutex_lock(&exec_env->wait_lock);
+        os_cond_reltimedwait(&exec_env->wait_cond, &exec_env->wait_lock, 1000);
+        os_mutex_unlock(&exec_env->wait_lock);
     }
 
     if (retval_offset != 0)
@@ -757,7 +766,6 @@ __pthread_self_wrapper(wasm_exec_env_t exec_env)
 static void
 pthread_exit_wrapper(wasm_exec_env_t exec_env, int32 retval_offset)
 {
-    wasm_module_inst_t module_inst = get_module_inst(exec_env);
     ThreadRoutineArgs *args = get_thread_arg(exec_env);
     /* Currently exit main thread is not allowed */
     if (!args)
@@ -775,9 +783,6 @@ pthread_exit_wrapper(wasm_exec_env_t exec_env, int32 retval_offset)
     /* destroy pthread key values */
     call_key_destructor(exec_env);
 
-    /* routine exit, destroy instance */
-    wasm_runtime_deinstantiate_internal(module_inst, true);
-
     if (!args->info_node->joinable) {
         delete_thread_info_node(args->info_node);
     }
@@ -789,6 +794,8 @@ pthread_exit_wrapper(wasm_exec_env_t exec_env, int32 retval_offset)
 
     wasm_runtime_free(args);
 
+    /* Don't destroy exec_env->module_inst in this functuntion since
+       it will be destroyed in wasm_cluster_exit_thread */
     wasm_cluster_exit_thread(exec_env, (void *)(uintptr_t)retval_offset);
 }
 

+ 10 - 0
core/iwasm/libraries/lib-rats/lib_rats.cmake

@@ -23,6 +23,7 @@ include(FetchContent)
 set(RATS_BUILD_MODE "sgx"
     CACHE INTERNAL "Select build mode for librats(host|occlum|sgx|wasm)")
 set(RATS_INSTALL_PATH  "${CMAKE_BINARY_DIR}/librats" CACHE INTERNAL "")
+set(BUILD_SAMPLES OFF)
 
 FetchContent_Declare(
     librats
@@ -34,8 +35,17 @@ if (NOT librats_POPULATED)
     message("-- Fetching librats ..")
     FetchContent_Populate(librats)
     include_directories("${librats_SOURCE_DIR}/include")
+    
+    # Prevent the propagation of the CMAKE_C_FLAGS of WAMR into librats
+    set(SAVED_CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
+    set(CMAKE_C_FLAGS "")
+
+    # Import the building scripts of librats
     add_subdirectory(${librats_SOURCE_DIR} ${librats_BINARY_DIR} EXCLUDE_FROM_ALL)
 
+    # Restore the CMAKE_C_FLAGS of WAMR
+    set(CMAKE_C_FLAGS ${SAVED_CMAKE_C_FLAGS})
+
 endif()
 
 file (GLOB source_all ${LIB_RATS_DIR}/*.c)

+ 3 - 1
core/iwasm/libraries/lib-socket/inc/wasi_socket_ext.h

@@ -17,6 +17,8 @@ extern "C" {
 #endif
 
 typedef enum {
+    /* Used only for sock_addr_resolve hints */
+    SOCKET_ANY = -1,
     SOCKET_DGRAM = 0,
     SOCKET_STREAM,
 } __wasi_sock_type_t;
@@ -84,7 +86,7 @@ typedef struct __wasi_addr_t {
     } addr;
 } __wasi_addr_t;
 
-typedef enum { INET4 = 0, INET6 } __wasi_address_family_t;
+typedef enum { INET4 = 0, INET6, INET_UNSPEC } __wasi_address_family_t;
 
 typedef struct __wasi_addr_info_t {
     __wasi_addr_t addr;

+ 5 - 0
core/iwasm/libraries/lib-socket/src/wasi/wasi_socket_ext.c

@@ -430,6 +430,9 @@ addrinfo_hints_to_wasi_hints(const struct addrinfo *hints,
             case AF_INET6:
                 wasi_hints->family = INET6;
                 break;
+            case AF_UNSPEC:
+                wasi_hints->family = INET_UNSPEC;
+                break;
             default:
                 return __WASI_ERRNO_AFNOSUPPORT;
         }
@@ -440,6 +443,8 @@ addrinfo_hints_to_wasi_hints(const struct addrinfo *hints,
             case SOCK_DGRAM:
                 wasi_hints->type = SOCKET_DGRAM;
                 break;
+            case 0:
+                wasi_hints->type = SOCKET_ANY;
             default:
                 return __WASI_ERRNO_NOTSUP;
         }

+ 21 - 2
core/iwasm/libraries/lib-socket/test/nslookup.c

@@ -5,6 +5,8 @@
 
 #include <assert.h>
 #include <string.h>
+#include <stdio.h>
+#include <pthread.h>
 #ifdef __wasi__
 #include <wasi/api.h>
 #include <sys/socket.h>
@@ -39,11 +41,28 @@ test_nslookup(int af)
     freeaddrinfo(res);
 }
 
+void *
+test_nslookup_mt(void *params)
+{
+    int *af = (int *)params;
+    test_nslookup(*af);
+    return NULL;
+}
+
 int
 main()
 {
-    test_nslookup(AF_INET);  /* for ipv4 */
-    test_nslookup(AF_INET6); /* for ipv6 */
+    int afs[] = { AF_INET, AF_INET6 };
+
+    for (int i = 0; i < sizeof(afs) / sizeof(afs[0]); i++) {
+        pthread_t th;
+
+        printf("Testing %d in main thread...\n", afs[i]);
+        test_nslookup(afs[i]);
+        printf("Testing %d in a new thread...\n", afs[i]);
+        pthread_create(&th, NULL, test_nslookup_mt, &afs[i]);
+        pthread_join(th, NULL);
+    }
 
     return 0;
 }

+ 112 - 113
core/iwasm/libraries/lib-socket/test/tcp_udp.c

@@ -5,6 +5,8 @@
 #include <unistd.h>
 #include <string.h>
 #include <assert.h>
+#include <errno.h>
+#include <time.h>
 #ifdef __wasi__
 #include <wasi/api.h>
 #include <sys/socket.h>
@@ -12,105 +14,123 @@
 #endif
 #include <arpa/inet.h>
 #include <pthread.h>
+#include <stdio.h>
+
 #define SERVER_MSG "Message from server."
 #define PORT 8989
-pthread_mutex_t mut;
-pthread_cond_t cond;
+
+pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
 int server_init_complete = 0;
-char buffer[sizeof(SERVER_MSG) + 1];
 
-struct socket_info {
-    union {
-        struct sockaddr_in addr_ipv4;
-        struct sockaddr_in6 addr_ipv6;
-    } addr;
+typedef struct {
+    struct sockaddr_storage addr;
+    socklen_t addr_len;
     int sock;
-};
-
-struct thread_args {
-    int family;
     int protocol;
-};
+} socket_info_t;
+
+void
+wait_for_server(int wait_time_seconds)
+{
+    int res = 0;
+    struct timespec ts;
+    clock_gettime(CLOCK_REALTIME, &ts);
+    ts.tv_sec += wait_time_seconds;
+
+    pthread_mutex_lock(&mut);
+    while (server_init_complete == 0) {
+        res = pthread_cond_timedwait(&cond, &mut, &ts);
+        if (res == ETIMEDOUT)
+            break;
+    }
+    pthread_mutex_unlock(&mut);
+
+    assert(res == 0);
+}
 
-struct socket_info
+void
+notify_server_started()
+{
+    pthread_mutex_lock(&mut);
+    server_init_complete = 1;
+    pthread_cond_signal(&cond);
+    pthread_mutex_unlock(&mut);
+}
+
+socket_info_t
 init_socket_addr(int family, int protocol)
 {
-    int sock = socket(family, protocol, 0);
-    assert(sock != -1);
+    socket_info_t info;
+
+    info.sock = socket(family, protocol, 0);
+    assert(info.sock != -1);
+    info.protocol = protocol;
+
+    memset(&info.addr, 0, sizeof(info.addr));
 
-    struct socket_info info;
     if (family == AF_INET) {
-        struct sockaddr_in addr;
-        memset(&addr, 0, sizeof(addr));
-        addr.sin_family = AF_INET;
-        addr.sin_port = htons(PORT);
-        addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-        info.addr.addr_ipv4 = addr;
+        struct sockaddr_in *addr = (struct sockaddr_in *)&info.addr;
+        addr->sin_family = AF_INET;
+        addr->sin_port = htons(PORT);
+        addr->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+        info.addr_len = sizeof(struct sockaddr_in);
     }
     else if (family == AF_INET6) {
-        struct sockaddr_in6 addr;
-        memset(&addr, 0, sizeof(addr));
-        addr.sin6_family = AF_INET6;
-        addr.sin6_port = htons(PORT);
-        addr.sin6_addr = in6addr_loopback;
-        info.addr.addr_ipv6 = addr;
+        struct sockaddr_in6 *addr = (struct sockaddr_in6 *)&info.addr;
+        addr->sin6_family = AF_INET6;
+        addr->sin6_port = htons(PORT);
+        addr->sin6_addr = in6addr_loopback;
+        info.addr_len = sizeof(struct sockaddr_in6);
     }
-    info.sock = sock;
-    return info;
-}
 
-void
-assert_thread_args(struct thread_args *args)
-{
-    assert(args->family == AF_INET || args->family == AF_INET6);
-    assert(args->protocol == SOCK_STREAM || args->protocol == SOCK_DGRAM);
+    return info;
 }
 
 void *
 server(void *arg)
 {
-    server_init_complete = 0;
-    struct thread_args *args = (struct thread_args *)arg;
-    assert_thread_args(args);
-
-    struct socket_info init_server_sock =
-        init_socket_addr(args->family, args->protocol);
-
-    int server_sock = init_server_sock.sock;
-    socklen_t addr_size;
+    char buffer[sizeof(SERVER_MSG) + 1] = { 0 };
     struct sockaddr_storage client_addr;
-    strcpy(buffer, SERVER_MSG);
+    socket_info_t *info = (socket_info_t *)arg;
+    struct sockaddr *server_addr = (struct sockaddr *)&info->addr;
+    int server_sock = info->sock;
 
-    struct sockaddr *server_addr = (struct sockaddr *)&init_server_sock.addr;
-    int ret = bind(server_sock, server_addr,
-                   args->family == AF_INET ? sizeof(struct sockaddr_in)
-                                           : sizeof(struct sockaddr_in6));
-    assert(ret == 0);
+    int optval = 1;
+    assert(setsockopt(server_sock, SOL_SOCKET, SO_REUSEADDR, &optval,
+                      sizeof(optval))
+           == 0);
 
-    (args->protocol == SOCK_STREAM) && listen(server_sock, 1);
-    pthread_mutex_lock(&mut);
-    server_init_complete = 1;
-    pthread_mutex_unlock(&mut);
-    pthread_cond_signal(&cond);
+    assert(bind(server_sock, server_addr, info->addr_len) == 0);
+
+    if (info->protocol == SOCK_STREAM)
+        listen(server_sock, 1);
+    notify_server_started();
 
-    addr_size = sizeof(client_addr);
-    if (args->protocol == SOCK_STREAM) {
+    socklen_t addr_size = info->addr_len;
+    if (info->protocol == SOCK_STREAM) {
         int client_sock =
             accept(server_sock, (struct sockaddr *)&client_addr, &addr_size);
         assert(client_sock >= 0);
-        sendto(client_sock, buffer, strlen(buffer), 0,
-               (struct sockaddr *)&client_addr, addr_size);
-
-        assert(close(client_sock) == 0);
+        assert(recv(client_sock, buffer, sizeof(buffer), 0) > 0);
+        strcpy(buffer, SERVER_MSG);
+        assert(send(client_sock, buffer, sizeof(buffer), 0) > 0);
+        assert(recv(client_sock, buffer, sizeof(buffer), 0) > 0);
     }
     else {
-        recvfrom(server_sock, buffer, sizeof(buffer), 0,
-                 (struct sockaddr *)&client_addr, &addr_size);
-        sendto(server_sock, buffer, strlen(buffer), 0,
-               (struct sockaddr *)&client_addr, addr_size);
-
-        assert(close(server_sock) == 0);
+        assert(recvfrom(server_sock, buffer, sizeof(buffer), 0,
+                        (struct sockaddr *)&client_addr, &addr_size)
+               > 0);
+        strcpy(buffer, SERVER_MSG);
+        assert(sendto(server_sock, buffer, strlen(buffer), 0,
+                      (struct sockaddr *)&client_addr, addr_size)
+               > 0);
+        assert(recvfrom(server_sock, buffer, sizeof(buffer), 0,
+                        (struct sockaddr *)&client_addr, &addr_size)
+               > 0);
     }
+    assert(close(server_sock) == 0);
 
     return NULL;
 }
@@ -118,46 +138,23 @@ server(void *arg)
 void *
 client(void *arg)
 {
-    struct thread_args *args = (struct thread_args *)arg;
-    assert_thread_args(args);
-
-    pthread_mutex_lock(&mut);
+    char buffer[sizeof(SERVER_MSG) + 1];
+    socket_info_t *info = (socket_info_t *)arg;
+    int sock = info->sock;
+    struct sockaddr *addr = (struct sockaddr *)&info->addr;
 
-    while (server_init_complete == 0) {
-        pthread_cond_wait(&cond, &mut);
-    }
+    wait_for_server(1);
 
-    struct socket_info init_client_sock =
-        init_socket_addr(args->family, args->protocol);
-    int sock = init_client_sock.sock;
-    pthread_mutex_unlock(&mut);
-
-    if (args->family == AF_INET) {
-        struct sockaddr_in addr = init_client_sock.addr.addr_ipv4;
-        if (args->protocol == SOCK_STREAM) {
-            assert(connect(sock, (struct sockaddr *)&addr, sizeof(addr)) != -1);
-        }
-        else {
-            assert(sendto(sock, buffer, strlen(buffer), 0,
-                          (struct sockaddr *)&addr, sizeof(addr))
-                   != -1);
-        }
-    }
-    else {
-        struct sockaddr_in6 addr = init_client_sock.addr.addr_ipv6;
-        if (args->protocol == SOCK_STREAM) {
-            assert(connect(sock, (struct sockaddr *)&addr, sizeof(addr)) != -1);
-        }
-        else {
-            assert(sendto(sock, buffer, strlen(buffer), 0,
-                          (struct sockaddr *)&addr, sizeof(addr))
-                   != -1);
-        }
+    if (info->protocol == SOCK_STREAM) {
+        assert(connect(sock, addr, info->addr_len) != -1);
     }
 
-    recv(sock, buffer, sizeof(buffer), 0);
-    assert(strcmp(buffer, SERVER_MSG) == 0);
+    assert(sendto(sock, "open", strlen("open"), 0, addr, info->addr_len) > 0);
+    assert(recv(sock, buffer, sizeof(buffer), 0) > 0);
+    assert(strncmp(buffer, SERVER_MSG, strlen(SERVER_MSG)) == 0);
+    assert(sendto(sock, "close", sizeof("close"), 0, addr, info->addr_len) > 0);
     assert(close(sock) == 0);
+
     return NULL;
 }
 
@@ -165,17 +162,19 @@ void
 test_protocol(int family, int protocol)
 {
     pthread_t server_thread, client_thread;
-    assert(pthread_cond_init(&cond, NULL) == 0);
-    assert(pthread_mutex_init(&mut, NULL) == 0);
+    socket_info_t server_info = init_socket_addr(family, protocol);
+    socket_info_t client_info = init_socket_addr(family, protocol);
+
+    printf("Testing address family: %d protocol: %d\n", family, protocol);
+
+    server_init_complete = 0;
 
-    struct thread_args args = { family, protocol };
-    assert(pthread_create(&server_thread, NULL, server, (void *)&args) == 0);
-    assert(pthread_create(&client_thread, NULL, client, (void *)&args) == 0);
+    assert(pthread_create(&server_thread, NULL, server, (void *)&server_info)
+           == 0);
+    assert(pthread_create(&client_thread, NULL, client, (void *)&client_info)
+           == 0);
     assert(pthread_join(server_thread, NULL) == 0);
     assert(pthread_join(client_thread, NULL) == 0);
-
-    assert(pthread_mutex_destroy(&mut) == 0);
-    assert(pthread_cond_destroy(&cond) == 0);
 }
 
 int
@@ -190,4 +189,4 @@ main(int argc, char **argv)
     test_protocol(AF_INET6, SOCK_DGRAM);
 
     return 0;
-}
+}

+ 1 - 1
core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c

@@ -90,7 +90,7 @@ thread_spawn_wrapper(wasm_exec_env_t exec_env, uint32 start_arg)
     stack_size = ((WASMModuleInstance *)module_inst)->default_wasm_stack_size;
 
     if (!(new_module_inst = wasm_runtime_instantiate_internal(
-              module, true, exec_env, stack_size, 0, NULL, 0)))
+              module, module_inst, exec_env, stack_size, 0, NULL, 0)))
         return -1;
 
     wasm_runtime_set_custom_data_internal(

+ 38 - 1
core/iwasm/libraries/lib-wasi-threads/test/build.sh

@@ -9,18 +9,54 @@ set -eo pipefail
 CC=${CC:=/opt/wasi-sdk/bin/clang}
 WAMR_DIR=../../../../..
 
+show_usage() {
+    echo "Usage: $0 [--sysroot PATH_TO_SYSROOT]"
+    echo "--sysroot PATH_TO_SYSROOT specify to build with custom sysroot for wasi-libc"
+}
+
+while [[ $# -gt 0 ]]; do
+    key="$1"
+    case $key in
+        --sysroot)
+            sysroot_path="$2"
+            shift
+            shift
+            ;;
+        --help)
+            show_usage
+            exit
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+done
+
+# Stress tests names
+thread_start_file_exclusions=("spawn_stress_test.wasm" "linear_memory_size_update.wasm" "stress_test_threads_creation.wasm")
+
 for test_c in *.c; do
     test_wasm="$(basename $test_c .c).wasm"
 
-    if [ $test_wasm = "linear_memory_size_update.wasm" ]; then
+    if [[ " ${thread_start_file_exclusions[@]} " =~ " ${test_wasm} " ]] ; then
         thread_start_file=""
     else
         thread_start_file=$WAMR_DIR/samples/wasi-threads/wasm-apps/wasi_thread_start.S
     fi
 
+    if [[ -n "$sysroot_path" ]]; then 
+        if [ ! -d "$sysroot_path" ]; then 
+            echo "Directory $sysroot_path  doesn't exist. Aborting"
+            exit 1
+        fi
+        sysroot_command="--sysroot $sysroot_path"
+    fi
+    
     echo "Compiling $test_c to $test_wasm"
     $CC \
         -target wasm32-wasi-threads \
+        -O2 \
         -pthread -ftls-model=local-exec \
         -z stack-size=32768 \
         -Wl,--export=__heap_base \
@@ -30,6 +66,7 @@ for test_c in *.c; do
         -Wl,--export=malloc \
         -Wl,--export=free \
         -I $WAMR_DIR/samples/wasi-threads/wasm-apps \
+        $sysroot_command \
         $thread_start_file \
         $test_c -o $test_wasm
 done

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/manifest.json

@@ -0,0 +1,3 @@
+{
+    "name": "lib-wasi-threads tests"
+}

+ 6 - 0
core/iwasm/libraries/lib-wasi-threads/test/skip.json

@@ -0,0 +1,6 @@
+{
+    "lib-wasi-threads tests": {
+        "spawn_stress_test": "Stress tests are incompatible with the other part and executed differently",
+        "stress_test_threads_creation": "Stress tests are incompatible with the other part and executed differently"
+    }
+}

+ 117 - 0
core/iwasm/libraries/lib-wasi-threads/test/spawn_stress_test.c

@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include <assert.h>
+#include <errno.h>
+#include <math.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+enum CONSTANTS {
+    NUM_ITER = 100000,
+    NUM_RETRY = 8,
+    MAX_NUM_THREADS = 8,
+    RETRY_SLEEP_TIME_US = 2000,
+};
+
+unsigned prime_numbers_count = 0;
+
+bool
+is_prime(unsigned int num)
+{
+    for (unsigned int i = 2; i <= (unsigned int)(sqrt(num)); ++i) {
+        if (num % i == 0) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void *
+check_if_prime(void *value)
+{
+    unsigned int *num = (unsigned int *)(value);
+    usleep(10000);
+    if (is_prime(*num)) {
+        __atomic_fetch_add(&prime_numbers_count, 1, __ATOMIC_SEQ_CST);
+    }
+    return NULL;
+}
+
+unsigned int
+validate()
+{
+    unsigned int counter = 0;
+    for (unsigned int i = 2; i <= NUM_ITER; ++i) {
+        counter += is_prime(i);
+    }
+
+    return counter;
+}
+
+void
+spawn_thread(pthread_t *thread, unsigned int *arg)
+{
+    int status_code = -1;
+    int timeout_us = RETRY_SLEEP_TIME_US;
+    for (int tries = 0; status_code != 0 && tries < NUM_RETRY; ++tries) {
+        status_code = pthread_create(thread, NULL, &check_if_prime, arg);
+        assert(status_code == 0 || status_code == EAGAIN);
+        if (status_code == EAGAIN) {
+            usleep(timeout_us);
+            timeout_us *= 2;
+        }
+    }
+
+    assert(status_code == 0 && "Thread creation should succeed");
+}
+
+int
+main(int argc, char **argv)
+{
+    pthread_t threads[MAX_NUM_THREADS];
+    unsigned int args[MAX_NUM_THREADS];
+    double percentage = 0.1;
+
+    for (unsigned int factorised_number = 2; factorised_number < NUM_ITER;
+         ++factorised_number) {
+        if (factorised_number > NUM_ITER * percentage) {
+            fprintf(stderr, "Stress test is %d%% finished\n",
+                    (unsigned int)(percentage * 100));
+            percentage += 0.1;
+        }
+
+        unsigned int thread_num = factorised_number % MAX_NUM_THREADS;
+        if (threads[thread_num] != 0) {
+            assert(pthread_join(threads[thread_num], NULL) == 0);
+        }
+
+        args[thread_num] = factorised_number;
+
+        usleep(RETRY_SLEEP_TIME_US);
+        spawn_thread(&threads[thread_num], &args[thread_num]);
+        assert(threads[thread_num] != 0);
+    }
+
+    for (int i = 0; i < MAX_NUM_THREADS; ++i) {
+        assert(threads[i] == 0 || pthread_join(threads[i], NULL) == 0);
+    }
+
+    // Check the test results
+    assert(
+        prime_numbers_count == validate()
+        && "Answer mismatch between tested code and reference implementation");
+
+    fprintf(stderr, "Stress test finished successfully\n");
+    return 0;
+}

+ 93 - 0
core/iwasm/libraries/lib-wasi-threads/test/stress_test_threads_creation.c

@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+enum CONSTANTS {
+    NUM_ITER = 200000,
+    NUM_RETRY = 8,
+    MAX_NUM_THREADS = 8,
+    RETRY_SLEEP_TIME_US = 4000,
+    SECOND = 1000 * 1000 * 1000
+};
+
+int threads_executed = 0;
+unsigned int threads_creation_tried = 0;
+unsigned int threads_in_use = 0;
+
+void *
+thread_func(void *arg)
+{
+    (void)(arg);
+    __atomic_fetch_add(&threads_executed, 1, __ATOMIC_RELAXED);
+    __atomic_fetch_sub(&threads_in_use, 1, __ATOMIC_SEQ_CST);
+    return NULL;
+}
+
+void
+spawn_thread(pthread_t *thread)
+{
+    int status_code = -1;
+    int timeout_us = RETRY_SLEEP_TIME_US;
+    for (int tries = 0; status_code != 0 && tries < NUM_RETRY; ++tries) {
+        status_code = pthread_create(thread, NULL, &thread_func, NULL);
+        __atomic_fetch_add(&threads_creation_tried, 1, __ATOMIC_RELAXED);
+
+        assert(status_code == 0 || status_code == EAGAIN);
+        if (status_code == EAGAIN) {
+            usleep(timeout_us);
+            timeout_us *= 2;
+        }
+    }
+
+    assert(status_code == 0 && "Thread creation should succeed");
+}
+
+int
+main(int argc, char **argv)
+{
+    double percentage = 0.1;
+
+    for (int iter = 0; iter < NUM_ITER; ++iter) {
+        if (iter > NUM_ITER * percentage) {
+            fprintf(stderr, "Spawning stress test is %d%% finished\n",
+                    (unsigned int)(percentage * 100));
+            percentage += 0.1;
+        }
+        while (__atomic_load_n(&threads_in_use, __ATOMIC_SEQ_CST)
+               == MAX_NUM_THREADS) {
+            usleep(100);
+        }
+
+        __atomic_fetch_add(&threads_in_use, 1, __ATOMIC_SEQ_CST);
+        pthread_t tmp;
+        spawn_thread(&tmp);
+        pthread_detach(tmp);
+    }
+
+    while ((__atomic_load_n(&threads_in_use, __ATOMIC_SEQ_CST) != 0)) {
+        __builtin_wasm_memory_atomic_wait32(&threads_in_use, 0, SECOND);
+    }
+
+    assert(__atomic_load_n(&threads_in_use, __ATOMIC_SEQ_CST) == 0);
+
+    // Validation
+    assert(threads_creation_tried >= threads_executed
+           && "Test executed more threads than were created");
+    assert((1. * threads_creation_tried) / threads_executed < 2.5
+           && "Ensuring that we're retrying thread creation less than 2.5 "
+              "times on average ");
+
+    fprintf(stderr,
+            "Spawning stress test finished successfully executed %d threads "
+            "with retry ratio %f\n",
+            threads_creation_tried,
+            (1. * threads_creation_tried) / threads_executed);
+    return 0;
+}

+ 7 - 4
core/iwasm/libraries/lib-wasi-threads/tid_allocator.c

@@ -21,7 +21,8 @@ tid_allocator_init(TidAllocator *tid_allocator)
         return false;
 
     for (int64 i = tid_allocator->pos - 1; i >= 0; i--)
-        tid_allocator->ids[i] = TID_MIN + (tid_allocator->pos - 1 - i);
+        tid_allocator->ids[i] =
+            (uint32)(TID_MIN + (tid_allocator->pos - 1 - i));
 
     return true;
 }
@@ -54,7 +55,8 @@ tid_allocator_get_tid(TidAllocator *tid_allocator)
             LOG_ERROR("Overflow detected during realloc");
             return -1;
         }
-        int32 *tmp = wasm_runtime_realloc(tid_allocator->ids, realloc_size);
+        int32 *tmp =
+            wasm_runtime_realloc(tid_allocator->ids, (uint32)realloc_size);
         if (tmp == NULL) {
             LOG_ERROR("Thread ID allocator realloc failed");
             return -1;
@@ -64,7 +66,8 @@ tid_allocator_get_tid(TidAllocator *tid_allocator)
         tid_allocator->pos = new_size - old_size;
         tid_allocator->ids = tmp;
         for (int64 i = tid_allocator->pos - 1; i >= 0; i--)
-            tid_allocator->ids[i] = TID_MIN + (tid_allocator->size - 1 - i);
+            tid_allocator->ids[i] =
+                (uint32)(TID_MIN + (tid_allocator->size - 1 - i));
     }
 
     // Pop available thread identifier from the stack
@@ -77,4 +80,4 @@ tid_allocator_release_tid(TidAllocator *tid_allocator, int32 thread_id)
     // Release thread identifier by pushing it into the stack
     bh_assert(tid_allocator->pos < tid_allocator->size);
     tid_allocator->ids[tid_allocator->pos++] = thread_id;
-}
+}

+ 1 - 1
core/iwasm/libraries/libc-uvwasi/libc_uvwasi.cmake

@@ -3,7 +3,7 @@
 
 set (LIBC_WASI_DIR ${CMAKE_CURRENT_LIST_DIR})
 
-set (LIBUV_VERSION v1.44.2)
+set (LIBUV_VERSION v1.46.0)
 
 add_definitions (-DWASM_ENABLE_LIBC_WASI=1 -DWASM_ENABLE_UVWASI=1)
 

+ 3 - 1
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/include/wasmtime_ssp.h

@@ -552,6 +552,8 @@ _Static_assert(offsetof(__wasi_subscription_t, u) == 8, "witx calculated offset"
 
 /* keep syncing with wasi_socket_ext.h */
 typedef enum {
+    /* Used only for sock_addr_resolve hints */
+    SOCKET_ANY = -1,
     SOCKET_DGRAM = 0,
     SOCKET_STREAM,
 } __wasi_sock_type_t;
@@ -605,7 +607,7 @@ typedef struct __wasi_addr_t {
     } addr;
 } __wasi_addr_t;
 
-typedef enum { INET4 = 0, INET6 } __wasi_address_family_t;
+typedef enum { INET4 = 0, INET6, INET_UNSPEC } __wasi_address_family_t;
 
 typedef struct __wasi_addr_info_t {
     __wasi_addr_t addr;

+ 6 - 3
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c

@@ -3102,9 +3102,12 @@ wasi_ssp_sock_addr_resolve(
     }
 
     int ret = os_socket_addr_resolve(
-        host, service, hints->hints_enabled ? &hints_is_tcp : NULL,
-        hints->hints_enabled ? &hints_is_ipv4 : NULL, wamr_addr_info,
-        addr_info_size, &_max_info_size);
+        host, service,
+        hints->hints_enabled && hints->type != SOCKET_ANY ? &hints_is_tcp
+                                                          : NULL,
+        hints->hints_enabled && hints->family != INET_UNSPEC ? &hints_is_ipv4
+                                                             : NULL,
+        wamr_addr_info, addr_info_size, &_max_info_size);
 
     if (ret != BHT_OK) {
         wasm_runtime_free(wamr_addr_info);

+ 29 - 25
core/iwasm/libraries/thread-mgr/thread_manager.c

@@ -509,7 +509,7 @@ wasm_cluster_spawn_exec_env(WASMExecEnv *exec_env)
 #endif
 
     if (!(new_module_inst = wasm_runtime_instantiate_internal(
-              module, true, exec_env, stack_size, 0, NULL, 0))) {
+              module, module_inst, exec_env, stack_size, 0, NULL, 0))) {
         goto fail1;
     }
 
@@ -606,7 +606,8 @@ thread_manager_start_routine(void *arg)
 
 #ifdef OS_ENABLE_HW_BOUND_CHECK
     os_mutex_lock(&exec_env->wait_lock);
-    if (exec_env->suspend_flags.flags & 0x08)
+    if (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags)
+        & WASM_SUSPEND_FLAG_EXIT)
         ret = exec_env->thread_ret_value;
     os_mutex_unlock(&exec_env->wait_lock);
 #endif
@@ -745,10 +746,10 @@ wasm_cluster_dup_c_api_imports(WASMModuleInstanceCommon *module_inst_dst,
 
 #if WASM_ENABLE_INTERP != 0
     if (module_inst_src->module_type == Wasm_Module_Bytecode) {
-        new_c_api_func_imports =
-            &(((WASMModuleInstance *)module_inst_dst)->e->c_api_func_imports);
+        new_c_api_func_imports = &(((WASMModuleInstance *)module_inst_dst)
+                                       ->e->common.c_api_func_imports);
         c_api_func_imports = ((const WASMModuleInstance *)module_inst_src)
-                                 ->e->c_api_func_imports;
+                                 ->e->common.c_api_func_imports;
         import_func_count =
             ((WASMModule *)(((const WASMModuleInstance *)module_inst_src)
                                 ->module))
@@ -759,10 +760,10 @@ wasm_cluster_dup_c_api_imports(WASMModuleInstanceCommon *module_inst_dst,
     if (module_inst_src->module_type == Wasm_Module_AoT) {
         AOTModuleInstanceExtra *e =
             (AOTModuleInstanceExtra *)((AOTModuleInstance *)module_inst_dst)->e;
-        new_c_api_func_imports = &(e->c_api_func_imports);
+        new_c_api_func_imports = &(e->common.c_api_func_imports);
 
         e = (AOTModuleInstanceExtra *)((AOTModuleInstance *)module_inst_src)->e;
-        c_api_func_imports = e->c_api_func_imports;
+        c_api_func_imports = e->common.c_api_func_imports;
 
         import_func_count =
             ((AOTModule *)(((AOTModuleInstance *)module_inst_src)->module))
@@ -993,7 +994,9 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
     if (exec_env->jmpbuf_stack_top) {
         /* Store the return value in exec_env */
         exec_env->thread_ret_value = retval;
-        exec_env->suspend_flags.flags |= 0x08;
+
+        WASM_SUSPEND_FLAGS_FETCH_OR(exec_env->suspend_flags,
+                                    WASM_SUSPEND_FLAG_EXIT);
 
 #ifndef BH_PLATFORM_WINDOWS
         /* Pop all jmpbuf_node except the last one */
@@ -1055,7 +1058,8 @@ set_thread_cancel_flags(WASMExecEnv *exec_env)
 #if WASM_ENABLE_DEBUG_INTERP != 0
     wasm_cluster_thread_send_signal(exec_env, WAMR_SIG_TERM);
 #endif
-    exec_env->suspend_flags.flags |= 0x01;
+    WASM_SUSPEND_FLAGS_FETCH_OR(exec_env->suspend_flags,
+                                WASM_SUSPEND_FLAG_TERMINATE);
 
     os_mutex_unlock(&exec_env->wait_lock);
 }
@@ -1178,7 +1182,8 @@ void
 wasm_cluster_suspend_thread(WASMExecEnv *exec_env)
 {
     /* Set the suspend flag */
-    exec_env->suspend_flags.flags |= 0x02;
+    WASM_SUSPEND_FLAGS_FETCH_OR(exec_env->suspend_flags,
+                                WASM_SUSPEND_FLAG_SUSPEND);
 }
 
 static void
@@ -1214,7 +1219,8 @@ wasm_cluster_suspend_all_except_self(WASMCluster *cluster,
 void
 wasm_cluster_resume_thread(WASMExecEnv *exec_env)
 {
-    exec_env->suspend_flags.flags &= ~0x02;
+    WASM_SUSPEND_FLAGS_FETCH_AND(exec_env->suspend_flags,
+                                 ~WASM_SUSPEND_FLAG_SUSPEND);
     os_cond_signal(&exec_env->wait_cond);
 }
 
@@ -1248,10 +1254,8 @@ set_exception_visitor(void *node, void *user_data)
 
         /* Only spread non "wasi proc exit" exception */
 #if WASM_ENABLE_SHARED_MEMORY != 0
-        WASMSharedMemNode *shared_mem_node = wasm_module_get_shared_memory(
-            (WASMModuleCommon *)curr_wasm_inst->module);
-        if (shared_mem_node)
-            os_mutex_lock(&shared_mem_node->shared_mem_lock);
+        if (curr_wasm_inst->memory_count > 0)
+            shared_memory_lock(curr_wasm_inst->memories[0]);
 #endif
         if (!strstr(wasm_inst->cur_exception, "wasi proc exit")) {
             bh_memcpy_s(curr_wasm_inst->cur_exception,
@@ -1260,8 +1264,8 @@ set_exception_visitor(void *node, void *user_data)
                         sizeof(wasm_inst->cur_exception));
         }
 #if WASM_ENABLE_SHARED_MEMORY != 0
-        if (shared_mem_node)
-            os_mutex_unlock(&shared_mem_node->shared_mem_lock);
+        if (curr_wasm_inst->memory_count > 0)
+            shared_memory_unlock(curr_wasm_inst->memories[0]);
 #endif
 
         /* Terminate the thread so it can exit from dead loops */
@@ -1280,15 +1284,13 @@ clear_exception_visitor(void *node, void *user_data)
             (WASMModuleInstance *)get_module_inst(curr_exec_env);
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
-        WASMSharedMemNode *shared_mem_node = wasm_module_get_shared_memory(
-            (WASMModuleCommon *)curr_wasm_inst->module);
-        if (shared_mem_node)
-            os_mutex_lock(&shared_mem_node->shared_mem_lock);
+        if (curr_wasm_inst->memory_count > 0)
+            shared_memory_lock(curr_wasm_inst->memories[0]);
 #endif
         curr_wasm_inst->cur_exception[0] = '\0';
 #if WASM_ENABLE_SHARED_MEMORY != 0
-        if (shared_mem_node)
-            os_mutex_unlock(&shared_mem_node->shared_mem_lock);
+        if (curr_wasm_inst->memory_count > 0)
+            shared_memory_unlock(curr_wasm_inst->memories[0]);
 #endif
     }
 }
@@ -1343,8 +1345,10 @@ bool
 wasm_cluster_is_thread_terminated(WASMExecEnv *exec_env)
 {
     os_mutex_lock(&exec_env->wait_lock);
-    bool is_thread_terminated =
-        (exec_env->suspend_flags.flags & 0x01) ? true : false;
+    bool is_thread_terminated = (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags)
+                                 & WASM_SUSPEND_FLAG_TERMINATE)
+                                    ? true
+                                    : false;
     os_mutex_unlock(&exec_env->wait_lock);
 
     return is_thread_terminated;

+ 2 - 0
core/iwasm/libraries/wasi-nn/.gitignore

@@ -0,0 +1,2 @@
+**/*.wasm
+**/*.tflite

+ 22 - 8
core/iwasm/libraries/wasi-nn/README.md

@@ -25,6 +25,7 @@ Build the runtime image for your execution target type.
 * `cpu`
 * `nvidia-gpu`
 * `vx-delegate`
+* `tpu`
 
 ```
 EXECUTION_TYPE=cpu
@@ -64,6 +65,8 @@ docker run \
 ```
 
 * (NVIDIA) GPU
+    * Requirements:
+        * [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
 
 ```
 docker run \
@@ -76,25 +79,36 @@ docker run \
     /assets/test_tensorflow.wasm
 ```
 
-* vx-delegate for NPU (x86 simulater)
+* vx-delegate for NPU (x86 simulator)
 
 ```
 docker run \
-    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-vx-delegate \
-    --dir=/assets \
+    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets \
+    wasi-nn-vx-delegate \
+    --dir=/ \
     --env="TARGET=gpu" \
-    /assets/test_tensorflow.wasm
+    /assets/test_tensorflow_quantized.wasm
 ```
 
+* (Coral) TPU
+    * Requirements:
+        * [Coral USB](https://coral.ai/products/accelerator/).
 
-
-Requirements:
-* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
+```
+docker run \
+    --privileged \
+    --device=/dev/bus/usb:/dev/bus/usb \
+    -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets \
+    wasi-nn-tpu \
+    --dir=/ \
+    --env="TARGET=tpu" \
+    /assets/test_tensorflow_quantized.wasm
+```
 
 ## What is missing
 
 Supported:
 
 * Graph encoding: `tensorflowlite`.
-* Execution target: `cpu` and `gpu`.
+* Execution target: `cpu`, `gpu` and `tpu`.
 * Tensor type: `fp32`.

+ 5 - 1
core/iwasm/libraries/wasi-nn/cmake/Findtensorflow_lite.cmake

@@ -18,12 +18,16 @@ if(NOT EXISTS ${TENSORFLOW_LITE})
 
   set(TENSORFLOW_SOURCE_DIR "${WAMR_ROOT_DIR}/core/deps/tensorflow-src")
 
-  if(WASI_NN_ENABLE_GPU EQUAL 1)
+  if(WAMR_BUILD_WASI_NN_ENABLE_GPU EQUAL 1)
     # Tensorflow specific:
     # * https://www.tensorflow.org/lite/guide/build_cmake#available_options_to_build_tensorflow_lite
     set (TFLITE_ENABLE_GPU ON)
   endif()
 
+  if (CMAKE_SIZEOF_VOID_P EQUAL 4)
+    set (TFLITE_ENABLE_XNNPACK OFF)
+  endif()
+
   add_subdirectory(
     "${TENSORFLOW_SOURCE_DIR}/tensorflow/lite"
     "${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite"

+ 102 - 31
core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp

@@ -16,11 +16,11 @@
 #include <tensorflow/lite/optional_debug_tools.h>
 #include <tensorflow/lite/error_reporter.h>
 
-#if defined(WASI_NN_ENABLE_GPU)
+#if WASM_ENABLE_WASI_NN_GPU != 0
 #include <tensorflow/lite/delegates/gpu/delegate.h>
 #endif
 
-#if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+#if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
 #include <tensorflow/lite/delegates/external/external_delegate.h>
 #endif
 
@@ -130,8 +130,8 @@ tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder,
         return invalid_argument;
     }
 
-    if (target != cpu && target != gpu) {
-        NN_ERR_PRINTF("Only CPU and GPU target is supported.");
+    if (target != cpu && target != gpu && target != tpu) {
+        NN_ERR_PRINTF("Only CPU, GPU and TPU target is supported.");
         return invalid_argument;
     }
 
@@ -195,7 +195,7 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
     switch (tfl_ctx->models[g].target) {
         case gpu:
         {
-#if defined(WASI_NN_ENABLE_GPU)
+#if WASM_ENABLE_WASI_NN_GPU != 0
             NN_WARN_PRINTF("GPU enabled.");
             // https://www.tensorflow.org/lite/performance/gpu
             TfLiteGpuDelegateOptionsV2 options =
@@ -216,10 +216,19 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
                 NN_ERR_PRINTF("Error when enabling GPU delegate.");
                 use_default = true;
             }
-#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+#else
+            NN_WARN_PRINTF("GPU not enabled.");
+            use_default = true;
+#endif
+            break;
+        }
+        case tpu:
+        {
+#if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
             NN_WARN_PRINTF("external delegation enabled.");
             TfLiteExternalDelegateOptions options =
-                TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH);
+                TfLiteExternalDelegateOptionsDefault(
+                    WASM_WASI_NN_EXTERNAL_DELEGATE_PATH);
             tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options);
             if (tfl_ctx->delegate == NULL) {
                 NN_ERR_PRINTF("Error when generating External delegate.");
@@ -233,7 +242,7 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
                 use_default = true;
             }
 #else
-            NN_WARN_PRINTF("GPU not enabled.");
+            NN_WARN_PRINTF("External delegate not enabled.");
             use_default = true;
 #endif
             break;
@@ -285,14 +294,37 @@ tensorflowlite_set_input(void *tflite_ctx, graph_execution_context ctx,
         return invalid_argument;
     }
 
-    auto *input =
-        tfl_ctx->interpreters[ctx].interpreter->typed_input_tensor<float>(
-            index);
-    if (input == NULL)
-        return missing_memory;
+    if (tensor->quantization.type == kTfLiteNoQuantization) {
+        NN_DBG_PRINTF("No quantization information. Using float as default");
+        float *it =
+            tfl_ctx->interpreters[ctx].interpreter->typed_input_tensor<float>(
+                index);
+
+        int size = model_tensor_size * sizeof(float);
+        bh_memcpy_s(it, size, input_tensor->data, size);
+    }
+    else { // TODO: Assumming uint8 quantized networks.
+        TfLiteAffineQuantization *quant_info =
+            (TfLiteAffineQuantization *)tensor->quantization.params;
+        if (quant_info->scale->size != 1 || quant_info->zero_point->size != 1) {
+            NN_ERR_PRINTF("Quantization per channel is not supported");
+            return runtime_error;
+        }
+        uint8_t *it =
+            tfl_ctx->interpreters[ctx].interpreter->typed_input_tensor<uint8_t>(
+                index);
+
+        float scale = quant_info->scale->data[0];
+        float zero_point = (float)quant_info->zero_point->data[0];
+        NN_DBG_PRINTF("input tensor: (scale, offset) = (%f, %f)", scale,
+                      zero_point);
+
+        float *input_tensor_f = (float *)input_tensor->data;
+        for (uint32_t i = 0; i < model_tensor_size; ++i) {
+            it[i] = (uint8_t)(input_tensor_f[i] / scale + zero_point);
+        }
+    }
 
-    bh_memcpy_s(input, model_tensor_size * sizeof(float), input_tensor->data,
-                model_tensor_size * sizeof(float));
     return success;
 }
 
@@ -325,6 +357,7 @@ tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx,
     NN_DBG_PRINTF("Number of tensors (%d)", num_output_tensors);
 
     if (index + 1 > num_output_tensors) {
+        NN_ERR_PRINTF("Index %d is invalid.", index);
         return runtime_error;
     }
 
@@ -343,15 +376,37 @@ tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx,
         return missing_memory;
     }
 
-    float *tensor_f =
-        tfl_ctx->interpreters[ctx].interpreter->typed_output_tensor<float>(
-            index);
-    for (uint32_t i = 0; i < model_tensor_size; ++i)
-        NN_DBG_PRINTF("output: %f", tensor_f[i]);
+    if (tensor->quantization.type == kTfLiteNoQuantization) {
+        NN_DBG_PRINTF("No quantization information");
+        float *ot =
+            tfl_ctx->interpreters[ctx].interpreter->typed_output_tensor<float>(
+                index);
+
+        int size = model_tensor_size * sizeof(float);
+        bh_memcpy_s(output_tensor, size, ot, size);
+    }
+    else { // TODO: Assumming uint8 quantized networks.
+        TfLiteAffineQuantization *quant_info =
+            (TfLiteAffineQuantization *)tensor->quantization.params;
+        if (quant_info->scale->size != 1 || quant_info->zero_point->size != 1) {
+            NN_ERR_PRINTF("Quantization per channel is not supported");
+            return runtime_error;
+        }
+        uint8_t *ot = tfl_ctx->interpreters[ctx]
+                          .interpreter->typed_output_tensor<uint8_t>(index);
+
+        float scale = quant_info->scale->data[0];
+        float zero_point = (float)quant_info->zero_point->data[0];
+        NN_DBG_PRINTF("output tensor: (scale, offset) = (%f, %f)", scale,
+                      zero_point);
+
+        float *output_tensor_f = (float *)output_tensor;
+        for (uint32_t i = 0; i < model_tensor_size; ++i) {
+            output_tensor_f[i] = (ot[i] - zero_point) * scale;
+        }
+    }
 
     *output_tensor_size = model_tensor_size;
-    bh_memcpy_s(output_tensor, model_tensor_size * sizeof(float), tensor_f,
-                model_tensor_size * sizeof(float));
     return success;
 }
 
@@ -392,19 +447,35 @@ tensorflowlite_destroy(void *tflite_ctx)
     */
     TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
 
-    if (tfl_ctx->delegate != NULL) {
-#if defined(WASI_NN_ENABLE_GPU)
-        TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
-#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
-        TfLiteExternalDelegateDelete(tfl_ctx->delegate);
-#endif
-    }
-
     NN_DBG_PRINTF("Freeing memory.");
     for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
         tfl_ctx->models[i].model.reset();
-        if (tfl_ctx->models[i].model_pointer)
+        if (tfl_ctx->models[i].model_pointer) {
+            if (tfl_ctx->delegate) {
+                switch (tfl_ctx->models[i].target) {
+                    case gpu:
+                    {
+#if WASM_ENABLE_WASI_NN_GPU != 0
+                        TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
+#else
+                        NN_ERR_PRINTF("GPU delegate delete but not enabled.");
+#endif
+                        break;
+                    }
+                    case tpu:
+                    {
+#if WASM_ENABLE_WASI_NN_EXTERNAL_DELEGATE != 0
+                        TfLiteExternalDelegateDelete(tfl_ctx->delegate);
+#else
+                        NN_ERR_PRINTF(
+                            "External delegate delete but not enabled.");
+#endif
+                        break;
+                    }
+                }
+            }
             wasm_runtime_free(tfl_ctx->models[i].model_pointer);
+        }
         tfl_ctx->models[i].model_pointer = NULL;
     }
     for (int i = 0; i < MAX_GRAPH_EXEC_CONTEXTS_PER_INST; ++i) {

+ 0 - 1
core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu

@@ -30,7 +30,6 @@ RUN make -j "$(grep -c ^processor /proc/cpuinfo)"
 
 FROM ubuntu:22.04
 
-COPY --from=base /home/wamr/product-mini/platforms/linux/build/libvmlib.so /libvmlib.so
 COPY --from=base /home/wamr/product-mini/platforms/linux/build/iwasm /iwasm
 
 ENTRYPOINT [ "/iwasm" ]

+ 1 - 2
core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu

@@ -24,7 +24,7 @@ RUN apt-get install -y wget ca-certificates --no-install-recommends \
 
 RUN cmake \
     -DWAMR_BUILD_WASI_NN=1 \
-    -DWASI_NN_ENABLE_GPU=1 \
+    -DWAMR_BUILD_WASI_NN_ENABLE_GPU=1 \
     ..
 
 RUN make -j "$(grep -c ^processor /proc/cpuinfo)"
@@ -44,7 +44,6 @@ RUN mkdir -p /etc/OpenCL/vendors && \
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 
-COPY --from=base /home/wamr/product-mini/platforms/linux/build/libvmlib.so /libvmlib.so
 COPY --from=base /home/wamr/product-mini/platforms/linux/build/iwasm /iwasm
 
 ENTRYPOINT [ "/iwasm" ]

+ 37 - 0
core/iwasm/libraries/wasi-nn/test/Dockerfile.tpu

@@ -0,0 +1,37 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+FROM ubuntu:20.04 AS base
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# hadolint ignore=DL3008
+RUN apt-get update && apt-get install -y \
+    cmake build-essential git curl gnupg --no-install-recommends && \
+    rm -rf /var/lib/apt/lists/*
+
+# hadolint ignore=DL3008,DL4006
+RUN echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list && \
+    curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
+    apt-get update && apt-get install -y libedgetpu1-std --no-install-recommends && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /home/wamr
+
+COPY . .
+
+WORKDIR /home/wamr/product-mini/platforms/linux/build
+
+RUN cmake \
+  -DWAMR_BUILD_WASI_NN=1 \
+  -DWAMR_BUILD_WASI_NN_ENABLE_EXTERNAL_DELEGATE=1 \
+  -DWAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH="libedgetpu.so.1.0" \
+  -DWAMR_BUILD_WASI_NN_ENABLE_GPU=1 \
+  ..
+
+RUN make -j "$(grep -c ^processor /proc/cpuinfo)" && \
+    cp /home/wamr/product-mini/platforms/linux/build/iwasm /iwasm
+
+WORKDIR /assets
+
+ENTRYPOINT [ "/iwasm" ]

+ 21 - 1
core/iwasm/libraries/wasi-nn/test/build.sh

@@ -1,6 +1,10 @@
+#!/bin/sh
+
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+CURR_PATH=$(cd $(dirname $0) && pwd -P)
+
 # WASM application that uses WASI-NN
 
 /opt/wasi-sdk/bin/clang \
@@ -13,9 +17,25 @@
 
 # TFLite models to use in the tests
 
-cd models
+cd ${CURR_PATH}/models
 python3 average.py
 python3 max.py
 python3 mult_dimension.py
 python3 mult_outputs.py
 python3 sum.py
+
+# Specific tests for TPU
+
+cd ${CURR_PATH}
+/opt/wasi-sdk/bin/clang \
+    -Wl,--allow-undefined \
+    -Wl,--strip-all,--no-entry \
+    --sysroot=/opt/wasi-sdk/share/wasi-sysroot \
+    -I../include -I../src/utils \
+    -o test_tensorflow_quantized.wasm \
+    test_tensorflow_quantized.c utils.c
+
+cd ${CURR_PATH}/models
+python3 quantized.py
+
+cd ${CURR_PATH}

+ 30 - 0
core/iwasm/libraries/wasi-nn/test/models/quantized.py

@@ -0,0 +1,30 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import tensorflow as tf
+import numpy as np
+import pathlib
+
+model = tf.keras.Sequential([
+    tf.keras.layers.InputLayer(input_shape=[5, 5, 1]),
+    tf.keras.layers.AveragePooling2D(
+        pool_size=(5, 5), strides=None, padding="valid", data_format=None)
+
+])
+
+def representative_dataset():
+    for _ in range(1000):
+      data = np.random.randint(0, 25, (1, 5, 5, 1))
+      yield [data.astype(np.float32)]
+
+converter = tf.lite.TFLiteConverter.from_keras_model(model)
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.representative_dataset = representative_dataset
+converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+converter.inference_input_type = tf.uint8  # or tf.int8
+converter.inference_output_type = tf.uint8  # or tf.int8
+tflite_model = converter.convert()
+
+tflite_models_dir = pathlib.Path("./")
+tflite_model_file = tflite_models_dir / "quantized_model.tflite"
+tflite_model_file.write_bytes(tflite_model)

+ 63 - 0
core/iwasm/libraries/wasi-nn/test/test_tensorflow_quantized.c

@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <math.h>
+
+#include "utils.h"
+#include "logger.h"
+
+#undef EPSILON
+#define EPSILON 1e-2
+
+void
+test_average_quantized(execution_target target)
+{
+    int dims[] = { 1, 5, 5, 1 };
+    input_info input = create_input(dims);
+
+    uint32_t output_size = 0;
+    float *output =
+        run_inference(target, input.input_tensor, input.dim, &output_size,
+                      "./models/quantized_model.tflite", 1);
+
+    NN_INFO_PRINTF("Output size: %d", output_size);
+    NN_INFO_PRINTF("Result: average is %f", output[0]);
+    // NOTE: 11.95 instead of 12 because of errors due quantization
+    assert(fabs(output[0] - 11.95) < EPSILON);
+
+    free(input.dim);
+    free(input.input_tensor);
+    free(output);
+}
+
+int
+main()
+{
+    char *env = getenv("TARGET");
+    if (env == NULL) {
+        NN_INFO_PRINTF("Usage:\n--env=\"TARGET=[cpu|gpu|tpu]\"");
+        return 1;
+    }
+    execution_target target;
+    if (strcmp(env, "cpu") == 0)
+        target = cpu;
+    else if (strcmp(env, "gpu") == 0)
+        target = gpu;
+    else if (strcmp(env, "tpu") == 0)
+        target = tpu;
+    else {
+        NN_ERR_PRINTF("Wrong target!");
+        return 1;
+    }
+    NN_INFO_PRINTF("################### Testing quantized model...");
+    test_average_quantized(target);
+
+    NN_INFO_PRINTF("Tests: passed!");
+    return 0;
+}

+ 2 - 2
core/iwasm/libraries/wasi-nn/test/utils.c

@@ -132,8 +132,8 @@ run_inference(execution_target target, float *input, uint32_t *input_size,
         *output_size = MAX_OUTPUT_TENSOR_SIZE - *output_size;
         if (wasm_get_output(ctx, i, &out_tensor[offset], output_size)
             != success) {
-            NN_ERR_PRINTF("Error when getting output.");
-            exit(1);
+            NN_ERR_PRINTF("Error when getting index %d.", i);
+            break;
         }
 
         offset += *output_size;

+ 1 - 1
core/iwasm/libraries/wasi-nn/test/utils.h

@@ -11,7 +11,7 @@
 #include "wasi_nn.h"
 
 #define MAX_MODEL_SIZE 85000000
-#define MAX_OUTPUT_TENSOR_SIZE 200
+#define MAX_OUTPUT_TENSOR_SIZE 1000000
 #define INPUT_TENSOR_DIMS 4
 #define EPSILON 1e-8
 

+ 3 - 0
core/shared/platform/common/posix/posix_socket.c

@@ -275,6 +275,9 @@ os_socket_recv_from(bh_socket_t socket, void *buf, unsigned int len, int flags,
             return -1;
         }
     }
+    else if (src_addr) {
+        memset(src_addr, 0, sizeof(*src_addr));
+    }
 
     return ret;
 }

+ 68 - 5
core/shared/platform/esp-idf/espidf_memmap.c

@@ -5,16 +5,34 @@
 
 #include "platform_api_vmcore.h"
 #include "platform_api_extension.h"
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+#include "soc/mmu.h"
+#include "rom/cache.h"
+
+#define MEM_DUAL_BUS_OFFSET (IRAM0_CACHE_ADDRESS_LOW - DRAM0_CACHE_ADDRESS_LOW)
+
+#define in_ibus_ext(addr)                      \
+    (((uint32)addr >= IRAM0_CACHE_ADDRESS_LOW) \
+     && ((uint32)addr < IRAM0_CACHE_ADDRESS_HIGH))
+
+static portMUX_TYPE s_spinlock = portMUX_INITIALIZER_UNLOCKED;
+#endif
 
 void *
 os_mmap(void *hint, size_t size, int prot, int flags)
 {
     if (prot & MMAP_PROT_EXEC) {
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+        uint32_t mem_caps = MALLOC_CAP_SPIRAM;
+#else
+        uint32_t mem_caps = MALLOC_CAP_EXEC;
+#endif
+
         // Memory allocation with MALLOC_CAP_EXEC will return 4-byte aligned
         // Reserve extra 4 byte to fixup alignment and size for the pointer to
         // the originally allocated address
         void *buf_origin =
-            heap_caps_malloc(size + 4 + sizeof(uintptr_t), MALLOC_CAP_EXEC);
+            heap_caps_malloc(size + 4 + sizeof(uintptr_t), mem_caps);
         if (!buf_origin) {
             return NULL;
         }
@@ -25,19 +43,35 @@ os_mmap(void *hint, size_t size, int prot, int flags)
 
         uintptr_t *addr_field = buf_fixed - sizeof(uintptr_t);
         *addr_field = (uintptr_t)buf_origin;
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+        return buf_fixed + MEM_DUAL_BUS_OFFSET;
+#else
         return buf_fixed;
+#endif
     }
     else {
-        return os_malloc(size);
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+        uint32_t mem_caps = MALLOC_CAP_SPIRAM;
+#else
+        uint32_t mem_caps = MALLOC_CAP_8BIT;
+#endif
+        return heap_caps_malloc(size, mem_caps);
     }
 }
 
 void
 os_munmap(void *addr, size_t size)
 {
+    char *ptr = (char *)addr;
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    if (in_ibus_ext(ptr)) {
+        ptr -= MEM_DUAL_BUS_OFFSET;
+    }
+#endif
     // We don't need special handling of the executable allocations
     // here, free() of esp-idf handles it properly
-    return os_free(addr);
+    return os_free(ptr);
 }
 
 int
@@ -47,5 +81,34 @@ os_mprotect(void *addr, size_t size, int prot)
 }
 
 void
-os_dcache_flush()
-{}
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    IRAM_ATTR
+#endif
+    os_dcache_flush()
+{
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    uint32_t preload;
+    extern void Cache_WriteBack_All(void);
+
+    portENTER_CRITICAL(&s_spinlock);
+
+    Cache_WriteBack_All();
+    preload = Cache_Disable_ICache();
+    Cache_Enable_ICache(preload);
+
+    portEXIT_CRITICAL(&s_spinlock);
+#endif
+}
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+void *
+os_get_dbus_mirror(void *ibus)
+{
+    if (in_ibus_ext(ibus)) {
+        return (void *)((char *)ibus - MEM_DUAL_BUS_OFFSET);
+    }
+    else {
+        return ibus;
+    }
+}
+#endif

+ 6 - 0
core/shared/platform/esp-idf/shared_platform.cmake

@@ -11,3 +11,9 @@ include_directories(${PLATFORM_SHARED_DIR}/../include)
 file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c)
 
 set (PLATFORM_SHARED_SOURCE ${source_all} ${PLATFORM_COMMON_MATH_SOURCE})
+
+# If enable PSRAM of ESP32-S3, it had better to put AOT into PSRAM, so that
+# users can use SRAM to for Wi-Fi/BLE and peripheral driver.
+if(CONFIG_ESP32S3_SPIRAM_SUPPORT)
+    add_definitions(-DWASM_MEM_DUAL_BUS_MIRROR=1)
+endif()

+ 5 - 0
core/shared/platform/include/platform_api_vmcore.h

@@ -129,6 +129,11 @@ os_munmap(void *addr, size_t size);
 int
 os_mprotect(void *addr, size_t size, int prot);
 
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+void *
+os_get_dbus_mirror(void *ibus);
+#endif
+
 /**
  * Flush cpu data cache, in some CPUs, after applying relocation to the
  * AOT code, the code may haven't been written back to the cpu data cache,

+ 79 - 2
core/shared/platform/nuttx/nuttx_platform.c

@@ -10,6 +10,46 @@
 #include <nuttx/arch.h>
 #endif
 
+#if defined(CONFIG_ARCH_CHIP_ESP32S3)
+/*
+ * TODO: Move these methods below the operating system level
+ */
+#define MEM_DUAL_BUS_OFFSET (0x42000000 - 0x3C000000)
+#define IRAM0_CACHE_ADDRESS_LOW 0x42000000
+#define IRAM0_CACHE_ADDRESS_HIGH 0x44000000
+#define IRAM_ATTR locate_data(".iram1")
+
+#define in_ibus_ext(addr)                      \
+    (((uint32)addr >= IRAM0_CACHE_ADDRESS_LOW) \
+     && ((uint32)addr < IRAM0_CACHE_ADDRESS_HIGH))
+void IRAM_ATTR
+bus_sync(void)
+{
+    extern void cache_writeback_all(void);
+    extern uint32_t Cache_Disable_ICache(void);
+    extern void Cache_Enable_ICache(uint32_t autoload);
+
+    irqstate_t flags;
+    uint32_t preload;
+
+    flags = enter_critical_section();
+
+    cache_writeback_all();
+    preload = Cache_Disable_ICache();
+    Cache_Enable_ICache(preload);
+
+    leave_critical_section(flags);
+}
+#else
+#define MEM_DUAL_BUS_OFFSET (0)
+#define IRAM0_CACHE_ADDRESS_LOW (0)
+#define IRAM0_CACHE_ADDRESS_HIGH (0)
+#define in_ibus_ext(addr) (0)
+static void
+bus_sync(void)
+{}
+#endif
+
 int
 bh_platform_init()
 {
@@ -47,6 +87,10 @@ os_dumps_proc_mem_info(char *out, unsigned int size)
 void *
 os_mmap(void *hint, size_t size, int prot, int flags)
 {
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    void *i_addr, *d_addr;
+#endif
+
 #if defined(CONFIG_ARCH_USE_TEXT_HEAP)
     if ((prot & MMAP_PROT_EXEC) != 0) {
         return up_textheap_memalign(sizeof(void *), size);
@@ -55,6 +99,17 @@ os_mmap(void *hint, size_t size, int prot, int flags)
 
     if ((uint64)size >= UINT32_MAX)
         return NULL;
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    if ((prot & MMAP_PROT_EXEC) != 0) {
+        d_addr = malloc((uint32)size);
+        if (d_addr == NULL) {
+            return NULL;
+        }
+        i_addr = (void *)((uint8 *)d_addr + MEM_DUAL_BUS_OFFSET);
+        return in_ibus_ext(i_addr) ? i_addr : d_addr;
+    }
+#endif
     return malloc((uint32)size);
 }
 
@@ -67,7 +122,14 @@ os_munmap(void *addr, size_t size)
         return;
     }
 #endif
-    return free(addr);
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    if (in_ibus_ext(addr)) {
+        free((void *)((uint8 *)addr - MEM_DUAL_BUS_OFFSET));
+        return;
+    }
+#endif
+    free(addr);
 }
 
 int
@@ -78,7 +140,22 @@ os_mprotect(void *addr, size_t size, int prot)
 
 void
 os_dcache_flush()
-{}
+{
+    bus_sync();
+}
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+void *
+os_get_dbus_mirror(void *ibus)
+{
+    if (in_ibus_ext(ibus)) {
+        return (void *)((uint8 *)ibus - MEM_DUAL_BUS_OFFSET);
+    }
+    else {
+        return ibus;
+    }
+}
+#endif
 
 /* If AT_FDCWD is provided, maybe we have openat family */
 #if !defined(AT_FDCWD)

+ 123 - 0
core/shared/utils/bh_atomic.h

@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2023 Amazon Inc.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _BH_ATOMIC_H
+#define _BH_ATOMIC_H
+
+#include "gnuc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Why don't we use C11 stdatomics here?
+ *
+ * Unlike C11 stdatomics,
+ *
+ * - bh_atomic_xxx_t is guaranteed to have the same size as the base type.
+ *   Thus more friendly to our AOT conventions.
+ *
+ * - It's available for C++.
+ *   Although C++23 will have C-compatible stdatomics.h, it isn't widely
+ *   available yet.
+ */
+
+/*
+ * Note about BH_ATOMIC_32_IS_ATOMIC
+ *
+ * If BH_ATOMIC_32_IS_ATOMIC == 0, BH_ATOMIC_xxx operations defined below
+ * are not really atomic and require an external lock.
+ *
+ * Expected usage is:
+ *
+ *     bh_atomic_32_t var = 0;
+ *     uint32 old;
+ * #if BH_ATOMIC_32_IS_ATOMIC == 0
+ *     lock(&some_lock);
+ * #endif
+ *     old = BH_ATOMIC_32_FETCH_AND(var, 1);
+ * #if BH_ATOMIC_32_IS_ATOMIC == 0
+ *     unlock(&some_lock);
+ * #endif
+ */
+
+typedef uint32 bh_atomic_32_t;
+
+#if defined(__GNUC_PREREQ)
+#if __GNUC_PREREQ(4, 7)
+#define CLANG_GCC_HAS_ATOMIC_BUILTIN
+#endif
+#elif defined(__clang__)
+#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 0)
+#define CLANG_GCC_HAS_ATOMIC_BUILTIN
+#endif
+#endif
+
+#if defined(CLANG_GCC_HAS_ATOMIC_BUILTIN)
+#define BH_ATOMIC_32_IS_ATOMIC 1
+#define BH_ATOMIC_32_LOAD(v) __atomic_load_n(&(v), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_OR(v, val) \
+    __atomic_fetch_or(&(v), (val), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_AND(v, val) \
+    __atomic_fetch_and(&(v), (val), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_ADD(v, val) \
+    __atomic_fetch_add(&(v), (val), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_SUB(v, val) \
+    __atomic_fetch_sub(&(v), (val), __ATOMIC_SEQ_CST)
+#else /* else of defined(CLANG_GCC_HAS_ATOMIC_BUILTIN) */
+#define BH_ATOMIC_32_LOAD(v) (v)
+#define BH_ATOMIC_32_FETCH_OR(v, val) nonatomic_32_fetch_or(&(v), val)
+#define BH_ATOMIC_32_FETCH_AND(v, val) nonatomic_32_fetch_and(&(v), val)
+#define BH_ATOMIC_32_FETCH_ADD(v, val) nonatomic_32_fetch_add(&(v), val)
+#define BH_ATOMIC_32_FETCH_SUB(v, val) nonatomic_32_fetch_sub(&(v), val)
+
+static inline uint32
+nonatomic_32_fetch_or(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p |= val;
+    return old;
+}
+
+static inline uint32
+nonatomic_32_fetch_and(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p &= val;
+    return old;
+}
+
+static inline uint32
+nonatomic_32_fetch_add(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p += val;
+    return old;
+}
+
+static inline uint32
+nonatomic_32_fetch_sub(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p -= val;
+    return old;
+}
+
+/* The flag can be defined by the user if the platform
+   supports atomic access to uint32 aligned memory. */
+#ifdef WASM_UINT32_IS_ATOMIC
+#define BH_ATOMIC_32_IS_ATOMIC 1
+#else /* else of WASM_UINT32_IS_ATOMIC */
+#define BH_ATOMIC_32_IS_ATOMIC 0
+#endif /* WASM_UINT32_IS_ATOMIC */
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of _BH_ATOMIC_H */

+ 4 - 0
core/shared/utils/bh_common.c

@@ -31,6 +31,10 @@ b_memcpy_wa(void *s1, unsigned int s1max, const void *s2, unsigned int n)
     unsigned int *p;
     char *ps;
 
+    if (n == 0) {
+        return 0;
+    }
+
     if (pa > src) {
         pa -= 4;
     }

+ 15 - 15
core/shared/utils/bh_common.h

@@ -12,25 +12,25 @@
 extern "C" {
 #endif
 
-#define bh_memcpy_s(dest, dlen, src, slen)                            \
-    do {                                                              \
-        int _ret = slen == 0 ? 0 : b_memcpy_s(dest, dlen, src, slen); \
-        (void)_ret;                                                   \
-        bh_assert(_ret == 0);                                         \
+#define bh_memcpy_s(dest, dlen, src, slen)            \
+    do {                                              \
+        int _ret = b_memcpy_s(dest, dlen, src, slen); \
+        (void)_ret;                                   \
+        bh_assert(_ret == 0);                         \
     } while (0)
 
-#define bh_memcpy_wa(dest, dlen, src, slen)                            \
-    do {                                                               \
-        int _ret = slen == 0 ? 0 : b_memcpy_wa(dest, dlen, src, slen); \
-        (void)_ret;                                                    \
-        bh_assert(_ret == 0);                                          \
+#define bh_memcpy_wa(dest, dlen, src, slen)            \
+    do {                                               \
+        int _ret = b_memcpy_wa(dest, dlen, src, slen); \
+        (void)_ret;                                    \
+        bh_assert(_ret == 0);                          \
     } while (0)
 
-#define bh_memmove_s(dest, dlen, src, slen)                            \
-    do {                                                               \
-        int _ret = slen == 0 ? 0 : b_memmove_s(dest, dlen, src, slen); \
-        (void)_ret;                                                    \
-        bh_assert(_ret == 0);                                          \
+#define bh_memmove_s(dest, dlen, src, slen)            \
+    do {                                               \
+        int _ret = b_memmove_s(dest, dlen, src, slen); \
+        (void)_ret;                                    \
+        bh_assert(_ret == 0);                          \
     } while (0)
 
 #define bh_strcat_s(dest, dlen, src)            \

+ 0 - 0
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/gnuc.h → core/shared/utils/gnuc.h


+ 1 - 1
core/version.h

@@ -7,5 +7,5 @@
 #define _WAMR_VERSION_H_
 #define WAMR_VERSION_MAJOR 1
 #define WAMR_VERSION_MINOR 2
-#define WAMR_VERSION_PATCH 2
+#define WAMR_VERSION_PATCH 3
 #endif

+ 6 - 1
doc/build_wamr.md

@@ -94,7 +94,12 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM
 - **WAMR_BUILD_WASI_NN**=1/0, default to disable if not set
 
 #### **Enable lib wasi-nn GPU mode**
-- **WASI_NN_ENABLE_GPU**=1/0, default to disable if not set
+- **WAMR_BUILD_WASI_NN_ENABLE_GPU**=1/0, default to disable if not set
+
+#### **Enable lib wasi-nn external delegate mode**
+- **WAMR_BUILD_WASI_NN_ENABLE_EXTERNAL_DELEGATE**=1/0, default to disable if not set
+
+- **WAMR_BUILD_WASI_NN_EXTERNAL_DELEGATE_PATH**=Path to the external delegate shared library (e.g. `libedgetpu.so.1.0` for Coral USB)
 
 #### **Disable boundary check with hardware trap**
 - **WAMR_DISABLE_HW_BOUND_CHECK**=1/0, default to enable if not set and supported by platform

+ 44 - 43
doc/embed_wamr.md

@@ -258,68 +258,69 @@ We can't pass structure data or class objects through the pointer since the memo
 
 ## Execute wasm functions in multiple threads
 
-The `exec_env` is not thread safety, it will cause unexpected behavior if the same `exec_env` is used in multiple threads. However, we've provided two ways to execute wasm functions concurrently:
+It isn't safe to use an `exec_env` object in multiple threads concurrently.
+To run a multi-threaded application, you basically need a separate `exec_env`
+for each threads.
 
-- You can use `pthread` APIs in your wasm application, see [pthread library](./pthread_library.md) for more details.
+### Approaches to manage `exec_env` objects and threads
 
-- The `spawn exec_env` and `spawn thread` APIs are available, you can use these APIs to manage the threads in native:
+WAMR supports two approaches to manage `exec_env` and threads as described
+below.  While they are not exclusive, you usually only need to use one of
+them.
 
-  *spawn exec_env:*
+#### Make your WASM application manage threads
 
-  `spawn exec_env` API spawns a `new_exec_env` base on the original `exec_env`, use can use it in other threads:
+  You can make your WASM application spawn threads by itself,
+  typically using `pthread` APIs like `pthread_create`.
+  See [pthread library](./pthread_library.md) and
+  [pthread implementations](./pthread_impls.md) for more details.
+  In this case, WAMR manages `exec_env` for the spawned threads.
 
-  ```C
-  new_exec_env = wasm_runtime_spawn_exec_env(exec_env);
+#### Make your embedder manage threads
+
+  The `spawn exec_env` and `spawn thread` APIs are available for the embedder.
+  You can use these APIs to manage the threads.
+  See [Thread related embedder API](./embed_wamr_spawn_api.md) for details.
+
+### Other notes about threads
 
-    /* Then you can use new_exec_env in your new thread */
-    module_inst = wasm_runtime_get_module_inst(new_exec_env);
-    func_inst = wasm_runtime_lookup_function(module_inst, ...);
-    wasm_runtime_call_wasm(new_exec_env, func_inst, ...);
+* You can manage the maximum number of threads
 
-  /* you need to use this API to manually destroy the spawned exec_env */
-  wasm_runtime_destroy_spawned_exec_env(new_exec_env);
+  ```C
+  init_args.max_thread_num = THREAD_NUM;
+  /* If this init argument is not set, the default maximum thread number is 4 */
   ```
 
-  *spawn thread:*
+* To share memory among threads, you need to build your WASM application with shared memory
 
-  You can also use `spawn thread` API to avoid manually manage the spawned exec_env:
+  For example, it can be done with `--shared-memory` and `-pthread`.
 
-  ```C
-  wasm_thread_t wasm_tid;
-  void *wamr_thread_cb(wasm_exec_env_t exec_env, void *arg)
-  {
-    module_inst = wasm_runtime_get_module_inst(exec_env);
-    func_inst = wasm_runtime_lookup_function(module_inst, ...);
-    wasm_runtime_call_wasm(exec_env, func_inst, ...);
-  }
-  wasm_runtime_spawn_thread(exec_env, &wasm_tid, wamr_thread_cb, NULL);
-  /* Use wasm_runtime_join_thread to join the spawned thread */
-  wasm_runtime_join_thread(wasm_tid, NULL);
+  ```bash
+    /opt/wasi-sdk/bin/clang -o test.wasm test.c -nostdlib -pthread    \
+      -Wl,--shared-memory,--max-memory=131072                         \
+      -Wl,--no-entry,--export=__heap_base,--export=__data_end         \
+      -Wl,--export=__wasm_call_ctors,--export=${your_func_name}
   ```
 
-**Note1: You can manage the maximum number of threads can be created:**
+* The corresponding threading feature should be enabled while building the runtime
 
-```C
-init_args.max_thread_num = THREAD_NUM;
-/* If this init argument is not set, the default maximum thread number is 4 */
-```
+  - WAMR lib-pthread (legacy)
 
-**Note2: The wasm application should be built with `--shared-memory` and `-pthread` enabled:**
+    ```bash
+    cmake .. -DWAMR_BUILD_LIB_PTHREAD=1
+    ```
 
-```bash
-  /opt/wasi-sdk/bin/clang -o test.wasm test.c -nostdlib -pthread    \
-    -Wl,--shared-memory,--max-memory=131072                         \
-    -Wl,--no-entry,--export=__heap_base,--export=__data_end         \
-    -Wl,--export=__wasm_call_ctors,--export=${your_func_name}
-```
+  - wasi-threads
 
-  **Note3: The pthread library feature should be enabled while building the runtime:**
+    ```bash
+    cmake .. -DWAMR_BUILD_LIB_WASI_THREADS=1
+    ```
 
-  ```bash
-  cmake .. -DWAMR_BUILD_LIB_PTHREAD=1
-  ```
+  - `wasm_runtime_spawn_exec_env` and `wasm_runtime_spawn_thread`
 
-[Here](../samples/spawn-thread) is a sample to show how to use these APIs.
+    ```bash
+    cmake .. -DWAMR_BUILD_THREAD_MGR=1 -DWAMR_BUILD_SHARED_MEMORY=1
+    ```
 
 ## The deinitialization procedure
 

+ 38 - 0
doc/embed_wamr_spawn_api.md

@@ -0,0 +1,38 @@
+# Thread related embedder API
+
+This document explains `wasm_runtime_spawn_exec_env` and
+`wasm_runtime_spawn_thread`.
+[Here](../samples/spawn-thread) is a sample to show how to use these APIs.
+
+  * spawn exec_env
+
+    `spawn exec_env` API creates a new `exec_env` based on the original `exec_env`. You can use it in other threads. It's up to the embedder how to manage host threads to run the new `exec_env`.
+
+    ```C
+    new_exec_env = wasm_runtime_spawn_exec_env(exec_env);
+
+      /* Then you can use new_exec_env in your new thread */
+      module_inst = wasm_runtime_get_module_inst(new_exec_env);
+      func_inst = wasm_runtime_lookup_function(module_inst, ...);
+      wasm_runtime_call_wasm(new_exec_env, func_inst, ...);
+
+    /* you need to use this API to manually destroy the spawned exec_env */
+    wasm_runtime_destroy_spawned_exec_env(new_exec_env);
+    ```
+
+  * spawn thread
+
+    Alternatively, you can use `spawn thread` API to avoid managing the extra exec_env and the corresponding host thread manually:
+
+    ```C
+    wasm_thread_t wasm_tid;
+    void *wamr_thread_cb(wasm_exec_env_t exec_env, void *arg)
+    {
+      module_inst = wasm_runtime_get_module_inst(exec_env);
+      func_inst = wasm_runtime_lookup_function(module_inst, ...);
+      wasm_runtime_call_wasm(exec_env, func_inst, ...);
+    }
+    wasm_runtime_spawn_thread(exec_env, &wasm_tid, wamr_thread_cb, NULL);
+    /* Use wasm_runtime_join_thread to join the spawned thread */
+    wasm_runtime_join_thread(wasm_tid, NULL);
+    ```

+ 15 - 0
doc/perf_tune.md

@@ -72,3 +72,18 @@ wasm_runtime_dump_pgo_prof_data_to_buf(wasm_module_inst_t module_inst, char *buf
 6. Run the optimized aot_file: `iwasm <aot_file>`.
 
 Developer can refer to the `test_pgo.sh` files under each benchmark folder for more details, e.g. [test_pgo.sh](../tests/benchmarks/coremark/test_pgo.sh) of CoreMark benchmark.
+
+## 6. Disable the memory boundary check
+
+Please notice that this method is not a general solution since it may lead to security issues. And only boost the performance for some platforms in AOT mode and don't support hardware trap for memory boundary check.
+
+1. Build WAMR with `-DWAMR_CONFIGUABLE_BOUNDS_CHECKS=1` option.
+
+2. Compile AOT module by wamrc with `--bounds-check=0` option.
+
+3. Run the AOT module by iwasm with `--disable-bounds-checks` option.
+
+> Note: The size of AOT file will be much smaller than the default, and some tricks are possible such as let the wasm application access the memory of host os directly.
+Please notice that if this option is enabled, the wasm spec test will fail since it requires the memory boundary check. For example, the runtime will crash when accessing the memory out of the boundary in some cases instead of throwing an exception as the spec requires.
+
+You should only use this method for well tested wasm applications and make sure the memory access is safe.

+ 1 - 1
doc/source_debugging.md

@@ -44,7 +44,7 @@ iwasm -g=127.0.0.1:1234 test.wasm
 ``` bash
 git clone --branch release/13.x --depth=1 https://github.com/llvm/llvm-project
 cd llvm-project
-git apply ${WAMR_ROOT}/build-scripts/lldb-wasm.patch
+git apply ${WAMR_ROOT}/build-scripts/lldb_wasm.patch
 mkdir build-lldb
 cmake -S ./llvm -B build-lldb \
     -G Ninja \

+ 117 - 0
doc/xip.md

@@ -7,8 +7,125 @@ Some IoT devices may require to run the AOT file from flash or ROM which is read
 The XIP file is an AOT file without (or with few) relocations to patch the AOT code (or text section). Developer can use the option `--enable-indirect-mode --disable-llvm-intrinsics` for wamrc to generate the AOT file, e.g.:
 ```bash
 wamrc --enable-indirect-mode --disable-llvm-intrinsics -o <aot_file> <wasm_file>
+or
+wamrc --xip -o <aot_file> <wasm_file>
 ```
 
+Note: --xip is a short option for --enable-indirect-mode --disable-llvm-intrinsics
+
 ## Known issues
 
 There may be some relocations to the ".rodata" like sections which require to patch the AOT code. More work will be done to resolve it in the future.
+
+## Tuning the XIP intrinsic functions
+
+WAMR provides a default mapping table for some targets, but it may not be the best one for your target. And it doesn't cover all the supported targets.
+
+So, wamrc provides the option `--enable-builtin-intrinsics=<intr1,intr2,...>` to make it possible to tune the intrinsic functions for your target.
+
+Firstly, you should understand why we don't use the LLVM intrinsic functions directly. The reason is that the LLVM intrinsic functions can't map to the native instructions directly, e.g. the LLVM intrinsic function `i32.div_s` can't map to the native instruction if the target doesn't support the division instruction, it will be translated to a function call to the runtime function from libgcc/compiler-rt. This will cause the AOT code to have the relocations to the libgcc/compiler-rt, which is not acceptable for the XIP feature.
+
+So, we need to replace the LLVM intrinsic functions with the runtime self implemented functions, which can be called through the function pointer table (--enable-indirect-mode) and don't have the relocations to the libgcc/compiler-rt (--disable-llvm-intrinsics).
+
+Available intrinsic functions for tuning:
+
+| LLVM intrinsic function | Explanation |
+| --- | --- |
+| llvm.experimental.constrained.fadd.f32 | float32 add |
+| llvm.experimental.constrained.fadd.f64 | float64 add |
+| llvm.experimental.constrained.fsub.f32 | float32 sub |
+| llvm.experimental.constrained.fsub.f64 | float64 sub |
+| llvm.experimental.constrained.fmul.f32 | float32 mul |
+| llvm.experimental.constrained.fmul.f64 | float64 mul |
+| llvm.experimental.constrained.fdiv.f32 | float32 div |
+| llvm.experimental.constrained.fdiv.f64 | float64 div |
+| llvm.fabs.f32 | float32 abs |
+| llvm.fabs.f64 | float64 abs |
+| llvm.ceil.f32 | float32 ceil |
+| llvm.ceil.f64 | float64 ceil |
+| llvm.floor.f32 | float32 floor |
+| llvm.floor.f64 | float64 floor |
+| llvm.trunc.f32 | float32 trunc |
+| llvm.trunc.f64 | float64 trunc |
+| llvm.rint.f32 | float32 rint |
+| llvm.rint.f64 | float64 rint |
+| llvm.sqrt.f32 | float32 sqrt |
+| llvm.sqrt.f64 | float64 sqrt |
+| llvm.copysign.f32 | float32 copysign |
+| llvm.copysign.f64 | float64 copysign |
+| llvm.minnum.f32 | float32 minnum |
+| llvm.minnum.f64 | float64 minnum |
+| llvm.maxnum.f32 | float32 maxnum |
+| llvm.maxnum.f64 | float64 maxnum |
+| llvm.ctlz.i32 | int32 count leading zeros |
+| llvm.ctlz.i64 | int64 count leading zeros |
+| llvm.cttz.i32 | int32 count trailing zeros |
+| llvm.cttz.i64 | int64 count trailing zeros |
+| llvm.ctpop.i32 | int32 count population |
+| llvm.ctpop.i64 | int64 count population |
+| f64_convert_i32_s | int32 to float64 |
+| f64_convert_i32_u | uint32 to float64 |
+| f32_convert_i32_s | int32 to float32 |
+| f32_convert_i32_u | uint32 to float32 |
+| f64_convert_i64_s | int64 to float64 |
+| f64_convert_i64_u | uint64 to float64 |
+| f32_convert_i64_s | int64 to float32 |
+| f32_convert_i64_u | uint64 to float32 |
+| i32_trunc_f32_s | float32 to int32 |
+| i32_trunc_f32_u | float32 to uint32 |
+| i32_trunc_f64_s | float64 to int32 |
+| i32_trunc_f64_u | float64 to uint32 |
+| i64_trunc_f64_s | float64 to int64 |
+| i64_trunc_f64_u | float64 to uint64 |
+| i64_trunc_f32_s | float32 to int64 |
+| i64_trunc_f32_u | float32 to uint64 |
+| f32_demote_f64 | float64 to float32 |
+| f64_promote_f32 | float32 to float64 |
+| f32_cmp | float32 compare |
+| f64_cmp | float64 compare |
+| i64.div_s | int64 div |
+| i64.div_u | uint64 div |
+| i32.div_s | int32 div |
+| i32.div_u | uint32 div |
+| i64.rem_s | int64 rem |
+| i64.rem_u | uint64 rem |
+| i32.rem_s | int32 rem |
+| i32.rem_u | uint32 rem |
+| i64.or | int64 or |
+| i64.and | int64 and |
+| i32.const | emit i32 const into constant table |
+| i64.const | emit i64 const into constant table |
+| f32.const | emit f32 const into constant table |
+| f64.const | emit f64 const into constant table |
+
+And also provide combined intrinsic functions to simplify the tuning:
+
+* all: all the above intrinsic functions
+* i32.common: i32.div_s, i32.div_u, i32.rem_s, i32.rem_u
+* i64.common: i64.div_s, i64.div_u, i64.rem_s, i64.rem_u, i64.or, i64.and
+* f32.common: f32_cmp, llvm.experimental.constrained.fadd.f32, llvm.experimental.constrained.fsub.f32, llvm.experimental.constrained.fmul.f32, llvm.experimental.constrained.fdiv.f32, llvm.fabs.f32, llvm.ceil.f32, llvm.floor.f32, llvm.trunc.f32, llvm.rint.f32, llvm.sqrt.f32, llvm.copysign.f32, llvm.minnum.f32, llvm.maxnum.f32
+* f64.common: f32_demote_f64, f64_promote_f32, f64_cmp, llvm.experimental.constrained.fadd.f64, llvm.experimental.constrained.fsub.f64, llvm.experimental.constrained.fmul.f64, llvm.experimental.constrained.fdiv.f64, llvm.fabs.f64, llvm.ceil.f64, llvm.floor.f64, llvm.trunc.f64, llvm.rint.f64, llvm.sqrt.f64, llvm.copysign.f64, llvm.minnum.f64, llvm.maxnum.f64
+* f32xi32: i32_trunc_f32_s, i32_trunc_f32_u, f32_convert_i32_s, f32_convert_i32_u
+* f64xi32: i32_trunc_f64_s, i32_trunc_f64_u, f64_convert_i32_s, f64_convert_i32_u
+* f32xi64: i64_trunc_f32_s, i64_trunc_f32_u, f32_convert_i64_s, f32_convert_i64_u
+* f64xi64: i64_trunc_f64_s, i64_trunc_f64_u, f64_convert_i64_s, f64_convert_i64_u
+* constop: i32.const, i64.const, f32.const, f64.const
+* fpxint: f32xi32, f64xi32, f32xi64, f64xi64
+* fp.common: f32.common, f64.common
+
+
+### Example
+
+For ARM Cortex-M55, since it has double precision floating point unit, so it can support f32/f64 operations. But as a 32-bit MCU, it can only support 32-bit integer operations. So we can use the following command to generate the XIP binary:
+
+```
+wamrc --target=thumbv8m.main --cpu=cortex-m55 --xip --enable-builtin-intrinsics=i64.common -o hello.aot hello.wasm
+``` 
+
+For ARM Cortex-M3, since it has no floating point unit, and it can only support 32-bit integer operations. So we can use the following command to generate the XIP binary:
+
+```
+wamrc --target=thumbv7m --cpu=cortex-m3 --xip --enable-builtin-intrinsics=i64.common,fp.common,fpxint -o hello.aot hello.wasm
+```
+
+Other platforms can be tuned in the same way, which intrinsic should be enabled depends on the target platform's hardware capability.

+ 121 - 22
language-bindings/python/src/wamr/wamrapi/wamr.py

@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ctypes import Array
+from ctypes import addressof
 from ctypes import c_char
 from ctypes import c_uint
 from ctypes import c_uint8
@@ -10,6 +11,8 @@ from ctypes import cast
 from ctypes import create_string_buffer
 from ctypes import POINTER
 from ctypes import pointer
+from typing import List
+from typing import Tuple
 from wamr.wamrapi.iwasm import String
 from wamr.wamrapi.iwasm import Alloc_With_Pool
 from wamr.wamrapi.iwasm import RuntimeInitArgs
@@ -31,6 +34,14 @@ from wamr.wamrapi.iwasm import wasm_runtime_module_malloc
 from wamr.wamrapi.iwasm import wasm_runtime_module_free
 from wamr.wamrapi.iwasm import wasm_runtime_register_natives
 from wamr.wamrapi.iwasm import NativeSymbol
+from wamr.wamrapi.iwasm import wasm_runtime_start_debug_instance
+from wamr.wamrapi.iwasm import wasm_runtime_call_indirect
+from wamr.wamrapi.iwasm import wasm_runtime_get_module_inst
+from wamr.wamrapi.iwasm import wasm_runtime_addr_app_to_native
+from wamr.wamrapi.iwasm import wasm_runtime_addr_native_to_app
+from wamr.wamrapi.iwasm import wasm_runtime_set_wasi_args
+
+ID_TO_EXEC_ENV_MAPPING = {}
 
 
 class Engine:
@@ -43,16 +54,26 @@ class Engine:
         print("deleting Engine")
         wasm_runtime_destroy()
 
-    def _get_init_args(self, heap_size: int = 1024 * 512) -> RuntimeInitArgs:
+    def _get_init_args(
+        self,
+        heap_size: int = 1024 * 1024 * 2,
+        ip_addr: str = "127.0.0.1",
+        instance_port: int = 1234,
+    ) -> RuntimeInitArgs:
         init_args = RuntimeInitArgs()
         init_args.mem_alloc_type = Alloc_With_Pool
         init_args.mem_alloc_option.pool.heap_buf = cast(
             (c_char * heap_size)(), c_void_p
         )
         init_args.mem_alloc_option.pool.heap_size = heap_size
+        # Debug port setting
+        init_args.ip_addr = bytes(ip_addr, "utf-8")
+        init_args.instance_port = instance_port
         return init_args
 
-    def register_natives(self, module_name: str, native_symbols: list[NativeSymbol]) -> None:
+    def register_natives(
+        self, module_name: str, native_symbols: List[NativeSymbol]
+    ) -> None:
         module_name = String.from_param(module_name)
         # WAMR does not copy the symbols. We must store them.
         for native in native_symbols:
@@ -62,12 +83,13 @@ class Engine:
             module_name,
             cast(
                 (NativeSymbol * len(native_symbols))(*native_symbols),
-                POINTER(NativeSymbol)
+                POINTER(NativeSymbol),
             ),
-            len(native_symbols)
+            len(native_symbols),
         ):
             raise Exception("Error while registering symbols")
 
+
 class Module:
     __create_key = object()
 
@@ -86,7 +108,7 @@ class Module:
         print("deleting Module")
         wasm_runtime_unload(self.module)
 
-    def _create_module(self, fp: str) -> tuple[wasm_module_t, Array[c_uint]]:
+    def _create_module(self, fp: str) -> Tuple[wasm_module_t, "Array[c_uint]"]:
         with open(fp, "rb") as f:
             data = f.read()
             data = (c_uint8 * len(data))(*data)
@@ -99,14 +121,52 @@ class Module:
 
 
 class Instance:
-    def __init__(self, module: Module, stack_size: int = 65536, heap_size: int = 16384):
+    def __init__(
+        self,
+        module: Module,
+        stack_size: int = 65536,
+        heap_size: int = 16384,
+        dir_list: List[str] | None = None,
+        preinitialized_module_inst: wasm_module_inst_t | None = None,
+    ):
+        # Store module ensures GC does not remove it
         self.module = module
-        self.module_inst = self._create_module_inst(module, stack_size, heap_size)
+        if dir_list:
+            self._set_wasi_args(module, dir_list)
+        if preinitialized_module_inst is None:
+            self.module_inst = self._create_module_inst(module, stack_size, heap_size)
+        else:
+            self.module_inst = preinitialized_module_inst
 
     def __del__(self):
         print("deleting Instance")
         wasm_runtime_deinstantiate(self.module_inst)
 
+    def _set_wasi_args(self, module: Module, dir_list: List[str]) -> None:
+        LP_c_char = POINTER(c_char)
+        LP_LP_c_char = POINTER(LP_c_char)
+
+        p = (LP_c_char * len(dir_list))()
+        for i, dir in enumerate(dir_list):
+            enc_dir = dir.encode("utf-8")
+            p[i] = create_string_buffer(enc_dir)
+
+        na = cast(p, LP_LP_c_char)
+        wasm_runtime_set_wasi_args(
+            module.module, na, len(dir_list), None, 0, None, 0, None, 0
+        )
+
+    def _create_module_inst(
+        self, module: Module, stack_size: int, heap_size: int
+    ) -> wasm_module_inst_t:
+        error_buf = create_string_buffer(128)
+        module_inst = wasm_runtime_instantiate(
+            module.module, stack_size, heap_size, error_buf, len(error_buf)
+        )
+        if not module_inst:
+            raise Exception("Error while creating module instance")
+        return module_inst
+
     def malloc(self, nbytes: int, native_handler) -> c_uint:
         return wasm_runtime_module_malloc(self.module_inst, nbytes, native_handler)
 
@@ -119,31 +179,70 @@ class Instance:
             raise Exception("Error while looking-up function")
         return func
 
-    def _create_module_inst(self, module: Module, stack_size: int, heap_size: int) -> wasm_module_inst_t:
-        error_buf = create_string_buffer(128)
-        module_inst = wasm_runtime_instantiate(
-            module.module, stack_size, heap_size, error_buf, len(error_buf)
-        )
-        if not module_inst:
-            raise Exception("Error while creating module instance")
-        return module_inst
+    def native_addr_to_app_addr(self, native_addr) -> c_void_p:
+        return wasm_runtime_addr_native_to_app(self.module_inst, native_addr)
+
+    def app_addr_to_native_addr(self, app_addr) -> c_void_p:
+        return wasm_runtime_addr_app_to_native(self.module_inst, app_addr)
 
 
 class ExecEnv:
     def __init__(self, module_inst: Instance, stack_size: int = 65536):
         self.module_inst = module_inst
         self.exec_env = self._create_exec_env(module_inst, stack_size)
+        self.env = addressof(self.exec_env.contents)
+        self.own_c = True
+
+        ID_TO_EXEC_ENV_MAPPING[str(self.env)] = self
 
     def __del__(self):
-        print("deleting ExecEnv")
-        wasm_runtime_destroy_exec_env(self.exec_env)
+        if self.own_c:
+            print("deleting ExecEnv")
+            wasm_runtime_destroy_exec_env(self.exec_env)
+            del ID_TO_EXEC_ENV_MAPPING[str(self.env)]
+
+    def _create_exec_env(
+        self, module_inst: Instance, stack_size: int
+    ) -> wasm_exec_env_t:
+        exec_env = wasm_runtime_create_exec_env(module_inst.module_inst, stack_size)
+        if not exec_env:
+            raise Exception("Error while creating execution environment")
+        return exec_env
 
     def call(self, func: wasm_function_inst_t, argc: int, argv: "POINTER[c_uint]"):
         if not wasm_runtime_call_wasm(self.exec_env, func, argc, argv):
             raise Exception("Error while calling function")
 
-    def _create_exec_env(self, module_inst: Instance, stack_size: int) -> wasm_exec_env_t:
-        exec_env = wasm_runtime_create_exec_env(module_inst.module_inst, stack_size)
-        if not exec_env:
-            raise Exception("Error while creating execution environment")
-        return exec_env
+    def get_module_inst(self) -> Instance:
+        return self.module_inst
+
+    def start_debugging(self) -> int:
+        return wasm_runtime_start_debug_instance(self.exec_env)
+
+    def call_indirect(self, element_index: int, argc: int, argv: "POINTER[c_uint]"):
+        if not wasm_runtime_call_indirect(self.exec_env, element_index, argc, argv):
+            raise Exception("Error while calling function")
+
+    @staticmethod
+    def wrap(env: int) -> "ExecEnv":
+        if str(env) in ID_TO_EXEC_ENV_MAPPING:
+            return ID_TO_EXEC_ENV_MAPPING[str(env)]
+        return InternalExecEnv(env)
+
+
+class InternalExecEnv(ExecEnv):
+    """
+    Generate Python ExecEnv-like object from a `wasm_exec_env_t` index.
+    """
+
+    def __init__(self, env: int):
+        self.env = env
+        self.exec_env = cast(env, wasm_exec_env_t)
+        self.module_inst = Instance(
+            module=object(),
+            preinitialized_module_inst=wasm_runtime_get_module_inst(self.exec_env),
+        )
+        ID_TO_EXEC_ENV_MAPPING[str(env)] = self
+
+    def __del__(self):
+        del ID_TO_EXEC_ENV_MAPPING[str(self.env)]

+ 6 - 1
language-bindings/python/utils/create_lib.sh

@@ -12,7 +12,12 @@ WAMR_BUILD_PLATFORM=${WAMR_BUILD_PLATFORM:-${UNAME}}
 cd ${ROOT_DIR}/product-mini/platforms/${WAMR_BUILD_PLATFORM}
 
 mkdir -p build && cd build
-cmake ..
+cmake \
+    -DWAMR_BUILD_DEBUG_INTERP=1 \
+    -DWAMR_BUILD_LIB_PTHREAD=1 \
+    -DWAMR_BUILD_LIB_WASI_THREADS=1 \
+    -DWAMR_BUILD_LIB_WASI=1 \
+    ..
 make -j
 
 case ${UNAME} in

+ 3 - 6
language-bindings/python/wamr-api/README.md

@@ -22,10 +22,7 @@ bash language-bindings/python/utils/create_lib.sh
 
 This will build and copy libiwasm into the package.
 
-## Examples
+## Samples
 
-There is a [simple example](./samples/main.py) to show how to use bindings.
-
-```
-python samples/main.py
-```
+- **[basic](./samples/basic)**: Demonstrating how to use basic python bindings.
+- **[native-symbol](./samples/native-symbol)**: Desmostrate how to call WASM from Python and how to export Python functions into WASM.

Vissa filer visades inte eftersom för många filer har ändrats