Ver Fonte

Merge branch main into dev/gc_refactor

Wenyong Huang há 2 anos atrás
pai
commit
803597dc55
95 ficheiros alterados com 3669 adições e 1709 exclusões
  1. 47 31
      .devcontainer/Dockerfile
  2. 13 10
      .devcontainer/devcontainer.json
  3. 25 0
      .github/workflows/build_wamr_lldb.yml
  4. 2 2
      .github/workflows/compilation_on_android_ubuntu.yml
  5. 47 0
      .github/workflows/hadolint_dockerfiles.yml
  6. 2 2
      .github/workflows/nightly_run.yml
  7. 2 2
      ATTRIBUTIONS.md
  8. 4 0
      build-scripts/config_common.cmake
  9. 124 0
      ci/validate_lldb.py
  10. 17 1
      core/config.h
  11. 129 0
      core/iwasm/aot/aot_intrinsic.c
  12. 19 0
      core/iwasm/aot/aot_loader.c
  13. 45 78
      core/iwasm/aot/aot_runtime.c
  14. 12 6
      core/iwasm/aot/aot_runtime.h
  15. 14 18
      core/iwasm/aot/arch/aot_reloc_riscv.c
  16. 19 1
      core/iwasm/aot/arch/aot_reloc_xtensa.c
  17. 3 9
      core/iwasm/common/wasm_exec_env.h
  18. 63 45
      core/iwasm/common/wasm_memory.c
  19. 0 10
      core/iwasm/common/wasm_memory.h
  20. 63 18
      core/iwasm/common/wasm_runtime_common.c
  21. 13 1
      core/iwasm/common/wasm_runtime_common.h
  22. 86 115
      core/iwasm/common/wasm_shared_memory.c
  23. 12 32
      core/iwasm/common/wasm_shared_memory.h
  24. 48 0
      core/iwasm/common/wasm_suspend_flags.h
  25. 2 2
      core/iwasm/compilation/aot.h
  26. 1 70
      core/iwasm/compilation/aot_compiler.c
  27. 20 0
      core/iwasm/compilation/aot_emit_aot_file.c
  28. 55 1
      core/iwasm/compilation/aot_llvm.c
  29. 4 0
      core/iwasm/compilation/aot_llvm.h
  30. 49 95
      core/iwasm/compilation/aot_llvm_extra.cpp
  31. 16 16
      core/iwasm/compilation/debug/dwarf_extractor.cpp
  32. 2 2
      core/iwasm/compilation/debug/dwarf_extractor.h
  33. 20 20
      core/iwasm/fast-jit/fe/jit_emit_table.c
  34. 4 2
      core/iwasm/include/aot_export.h
  35. 19 0
      core/iwasm/include/wasm_export.h
  36. 18 1
      core/iwasm/interpreter/wasm.h
  37. 132 111
      core/iwasm/interpreter/wasm_interp_classic.c
  38. 105 94
      core/iwasm/interpreter/wasm_interp_fast.c
  39. 7 5
      core/iwasm/interpreter/wasm_loader.c
  40. 2 1
      core/iwasm/interpreter/wasm_mini_loader.c
  41. 48 80
      core/iwasm/interpreter/wasm_runtime.c
  42. 6 5
      core/iwasm/interpreter/wasm_runtime.h
  43. 13 6
      core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c
  44. 3 1
      core/iwasm/libraries/lib-socket/inc/wasi_socket_ext.h
  45. 5 0
      core/iwasm/libraries/lib-socket/src/wasi/wasi_socket_ext.c
  46. 21 2
      core/iwasm/libraries/lib-socket/test/nslookup.c
  47. 112 113
      core/iwasm/libraries/lib-socket/test/tcp_udp.c
  48. 1 1
      core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c
  49. 4 1
      core/iwasm/libraries/lib-wasi-threads/test/build.sh
  50. 3 0
      core/iwasm/libraries/lib-wasi-threads/test/manifest.json
  51. 5 0
      core/iwasm/libraries/lib-wasi-threads/test/skip.json
  52. 114 0
      core/iwasm/libraries/lib-wasi-threads/test/spawn_stress_test.c
  53. 1 1
      core/iwasm/libraries/libc-uvwasi/libc_uvwasi.cmake
  54. 3 1
      core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/include/wasmtime_ssp.h
  55. 6 3
      core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c
  56. 24 20
      core/iwasm/libraries/thread-mgr/thread_manager.c
  57. 58 0
      core/iwasm/libraries/wasi-nn/external/CMakeLists.txt
  58. 13 0
      core/iwasm/libraries/wasi-nn/external/README.md
  59. 133 63
      core/iwasm/libraries/wasi-nn/src/wasi_nn.c
  60. 3 8
      core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h
  61. 1 2
      core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
  62. 3 0
      core/shared/platform/common/posix/posix_socket.c
  63. 68 5
      core/shared/platform/esp-idf/espidf_memmap.c
  64. 6 0
      core/shared/platform/esp-idf/shared_platform.cmake
  65. 5 0
      core/shared/platform/include/platform_api_vmcore.h
  66. 79 2
      core/shared/platform/nuttx/nuttx_platform.c
  67. 123 0
      core/shared/utils/bh_atomic.h
  68. 0 0
      core/shared/utils/gnuc.h
  69. 44 43
      doc/embed_wamr.md
  70. 38 0
      doc/embed_wamr_spawn_api.md
  71. 15 0
      doc/perf_tune.md
  72. 117 0
      doc/xip.md
  73. 1 11
      product-mini/README.md
  74. 3 1
      product-mini/platforms/esp-idf/build_and_run.sh
  75. 7 1
      product-mini/platforms/esp-idf/main/main.c
  76. 64 0
      product-mini/platforms/linux-sgx/enclave-sample/Enclave/Enclave.cpp
  77. 3 1
      product-mini/platforms/linux/CMakeLists.txt
  78. 21 0
      product-mini/platforms/nuttx/wamr.mk
  79. 23 1
      product-mini/platforms/posix/main.c
  80. 6 1
      product-mini/platforms/windows/main.c
  81. 60 0
      product-mini/platforms/zephyr/simple/Dockerfile
  82. 58 0
      product-mini/platforms/zephyr/simple/Dockerfile.old
  83. 106 0
      product-mini/platforms/zephyr/simple/README.md
  84. 0 25
      product-mini/platforms/zephyr/simple/README_docker.md
  85. 20 0
      samples/bh_atomic/CMakeLists.txt
  86. 42 0
      samples/bh_atomic/main.c
  87. 183 132
      samples/workload/XNNPACK/CMakeLists.txt
  88. 21 20
      samples/workload/XNNPACK/README.md
  89. 95 98
      samples/workload/XNNPACK/xnnpack.patch
  90. 334 160
      test-tools/host-tool/external/cJSON/cJSON.c
  91. 94 65
      test-tools/host-tool/external/cJSON/cJSON.h
  92. 0 20
      tests/wamr-test-suites/spec-test-script/ignore_cases.patch
  93. 1 1
      tests/wamr-test-suites/test_wamr.sh
  94. 38 14
      tests/wamr-test-suites/wasi-test-script/run_wasi_tests.sh
  95. 149 0
      wamr-compiler/main.c

+ 47 - 31
.devcontainer/Dockerfile

@@ -1,20 +1,21 @@
 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.195.0/containers/cpp/.devcontainer/base.Dockerfile
-# [Choice] Debian / Ubuntu version (use Debian 11/9, Ubuntu 18.04/21.04 on local arm64/Apple Silicon): debian-11, debian-10, debian-9, ubuntu-21.04, ubuntu-20.04, ubuntu-18.04
-ARG VARIANT=ubuntu-20.04
-FROM mcr.microsoft.com/vscode/devcontainers/cpp:0-${VARIANT}
+# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/cpp/.devcontainer/base.Dockerfile
+# [Choice] Debian / Ubuntu version (use Debian 12/11/9, Ubuntu 18.04/21.04 on local arm64/Apple Silicon): debian-12, debian-11, debian-10, debian-9, ubuntu-21.04, ubuntu-20.04, ubuntu-18.04
+ARG VARIANT=debian-12
+FROM mcr.microsoft.com/vscode/devcontainers/cpp:${VARIANT}
 
 ARG DEBIAN_FRONTEND=noninteractive
 ENV TZ=Asian/Shanghai
 
 # hadolint ignore=DL3008
 RUN apt-get update \
+  && apt-get upgrade -y \
   && apt-get install -y apt-transport-https apt-utils build-essential \
-  ca-certificates ccache curl g++-multilib git gnupg \
-  libgcc-9-dev lib32gcc-9-dev lsb-release \
-  ninja-build ocaml ocamlbuild python2.7 \
+  ca-certificates ccache cmake curl g++-multilib git gnupg \
+  libgcc-12-dev lib32gcc-12-dev lsb-release \
+  ninja-build ocaml ocamlbuild \
   software-properties-common tree tzdata \
   unzip valgrind vim wget zip --no-install-recommends \
   && apt-get clean -y \
@@ -22,32 +23,32 @@ RUN apt-get update \
 
 #
 # binaryen
-ARG BINARYEN_VER=111
+ARG BINARYEN_VER=114
 WORKDIR /opt
 RUN wget -c --progress=dot:giga https://github.com/WebAssembly/binaryen/releases/download/version_${BINARYEN_VER}/binaryen-version_${BINARYEN_VER}-x86_64-linux.tar.gz \
   && tar xf binaryen-version_${BINARYEN_VER}-x86_64-linux.tar.gz \
-  && ln -sf /opt/binaryen-version_111 /opt/binaryen \
+  && ln -sf /opt/binaryen-version_${BINARYEN_VER} /opt/binaryen \
   && rm binaryen-version_${BINARYEN_VER}-x86_64-linux.tar.gz
 
 #
 # CMAKE (https://apt.kitware.com/)
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 # hadolint ignore=DL3008
-RUN wget --progress=dot:giga -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg > /dev/null \
-  && echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
-  && apt-get update \
-  && rm /usr/share/keyrings/kitware-archive-keyring.gpg \
-  && apt-get install -y kitware-archive-keyring --no-install-recommends \
-  && apt-get install -y cmake --no-install-recommends \
-  && apt-get clean -y \
-  && rm -rf /var/lib/apt/lists/*
+ARG CMAKE_VER=3.27.0
+RUN wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VER}/cmake-${CMAKE_VER}-Linux-x86_64.sh \
+      -q -O /tmp/cmake-install.sh \
+      && chmod u+x /tmp/cmake-install.sh \
+      && mkdir /opt/cmake-${CMAKE_VER} \
+      && /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-${CMAKE_VER} \
+      && rm /tmp/cmake-install.sh \
+      && ln -s /opt/cmake-${CMAKE_VER}/bin/* /usr/local/bin
 
 #
 # install emsdk
 WORKDIR /opt
 RUN git clone https://github.com/emscripten-core/emsdk.git
 
-ARG EMSDK_VER=3.0.0
+ARG EMSDK_VER=3.1.43
 WORKDIR /opt/emsdk
 RUN  git pull \
   && ./emsdk install ${EMSDK_VER} \
@@ -56,7 +57,7 @@ RUN  git pull \
 
 #
 # install wasi-sdk
-ARG WASI_SDK_VER=19
+ARG WASI_SDK_VER=20
 RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -P /opt \
   && tar xf /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -C /opt \
   && ln -sf /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk \
@@ -64,7 +65,7 @@ RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wasi-sdk/releases
 
 #
 #install wabt
-ARG WABT_VER=1.0.29
+ARG WABT_VER=1.0.33
 RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wabt/releases/download/${WABT_VER}/wabt-${WABT_VER}-ubuntu.tar.gz -P /opt \
   && tar xf /opt/wabt-${WABT_VER}-ubuntu.tar.gz -C /opt \
   && ln -sf /opt/wabt-${WABT_VER} /opt/wabt \
@@ -72,7 +73,7 @@ RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wabt/releases/dow
 
 #
 # install bazelisk
-ARG BAZELISK_VER=1.12.0
+ARG BAZELISK_VER=1.17.0
 RUN mkdir /opt/bazelisk \
   && wget -c --progress=dot:giga https://github.com/bazelbuild/bazelisk/releases/download/v${BAZELISK_VER}/bazelisk-linux-amd64 -P /opt/bazelisk \
   && chmod a+x /opt/bazelisk/bazelisk-linux-amd64 \
@@ -80,16 +81,30 @@ RUN mkdir /opt/bazelisk \
 
 #
 # install clang+llvm
-ARG LLVM_VER=14
-RUN apt-get purge -y clang-10 llvm-10 && apt-get autoremove -y
+ARG LLVM_VER=16
+RUN apt-get purge -y clang-14 llvm-14 && apt-get autoremove -y
 WORKDIR /etc/apt/apt.conf.d
 RUN touch 99verfiy-peer.conf \
   && echo "Acquire { https::Verify-Peer false }" > 99verfiy-peer.conf
 
 WORKDIR /tmp
-RUN wget --progress=dot:giga https://apt.llvm.org/llvm.sh \
-  && chmod a+x ./llvm.sh \
-  && ./llvm.sh ${LLVM_VER} all
+#RUN wget --progress=dot:giga https://apt.llvm.org/llvm.sh \
+#  && chmod a+x ./llvm.sh \
+#  && ./llvm.sh ${LLVM_VER} all
+
+# Workaround due to https://github.com/llvm/llvm-project/issues/62475
+# hadolint ignore=DL3008
+RUN set -ex \
+    && echo "deb http://apt.llvm.org/bookworm/ llvm-toolchain-bookworm-${LLVM_VER} main" > /etc/apt/sources.list.d/apt.llvm.org.list \
+    && wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc \
+    && apt-get update \
+    && apt-get install -y \
+    clang-${LLVM_VER} lldb-${LLVM_VER} lld-${LLVM_VER} clangd-${LLVM_VER} clang-tidy-${LLVM_VER} clang-format-${LLVM_VER} clang-tools-${LLVM_VER} \
+    llvm-${LLVM_VER}-dev lld-${LLVM_VER} lldb-${LLVM_VER} llvm-${LLVM_VER}-tools libomp-${LLVM_VER}-dev libc++-${LLVM_VER}-dev libc++abi-${LLVM_VER}-dev \
+    libclang-common-${LLVM_VER}-dev libclang-${LLVM_VER}-dev libclang-cpp${LLVM_VER}-dev libunwind-${LLVM_VER}-dev \
+    libclang-rt-${LLVM_VER}-dev libpolly-${LLVM_VER}-dev --no-install-recommends \
+    && apt-get clean -y \
+    && rm -rf /var/lib/apt/lists/*
 
 #
 # [Optional]
@@ -105,18 +120,19 @@ RUN apt-get update \
 #
 # Install required python packages
 # hadolint ignore=DL3013
-RUN python3 -m pip install --no-cache-dir --upgrade pip \
-  && pip3 install --no-cache-dir black nose pycparser pylint
+RUN python3 -m pip install --no-cache-dir --break-system-packages --upgrade pip \
+  && pip3 install --no-cache-dir --break-system-packages black nose pycparser pylint
 
 #
 # Install github-cli. It doens't work as a feature of devcontainer.json
+ARG GH_CLI_VER=2.32.0
 WORKDIR /tmp
-RUN wget -q https://github.com/cli/cli/releases/download/v2.20.2/gh_2.20.2_linux_amd64.deb \
-  && dpkg -i gh_2.20.2_linux_amd64.deb
+RUN wget -q https://github.com/cli/cli/releases/download/v${GH_CLI_VER}/gh_${GH_CLI_VER}_linux_amd64.deb \
+  && dpkg -i gh_${GH_CLI_VER}_linux_amd64.deb
 
 #
 # Install NodeJS
-RUN wget -qO- https://deb.nodesource.com/setup_19.x | bash -
+RUN wget -qO- https://deb.nodesource.com/setup_20.x | bash -
 # hadolint ignore=DL3008
 RUN apt-get install -y nodejs --no-install-recommends
 

+ 13 - 10
.devcontainer/devcontainer.json

@@ -1,20 +1,23 @@
 // Copyright (C) 2019 Intel Corporation.  All rights reserved.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 // For format details, see https://aka.ms/vscode-remote/devcontainer.json or this file's README at:
-// https://github.com/microsoft/vscode-dev-containers/tree/v0.195.0/containers/cpp
+// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/cpp
 {
   "name": "WAMR-Dev",
   "build": {
     "dockerfile": "Dockerfile",
-    // Update 'VARIANT' to pick an Debian / Ubuntu OS version: debian-11, debian-10, debian-9, ubuntu-21.04, ubuntu-20.04, ubuntu-18.04
-    // Use Debian 11, Debian 9, Ubuntu 18.04 or Ubuntu 21.04 on local arm64/Apple Silicon
+    // Update 'VARIANT' to pick an Debian / Ubuntu OS version: debian-12, debian-11, debian-10, debian-9, ubuntu-21.04, ubuntu-20.04, ubuntu-18.04
+    // Use Debian 12, Debian 11, Debian 9, Ubuntu 18.04 or Ubuntu 21.04 on local arm64/Apple Silicon
     "args": {
-      "BINARYEN_VER": "111",
-      "EMSDK_VER": "3.0.0",
-      "LLVM_VER": "15",
-      "VARIANT": "ubuntu-20.04",
-      "WASI_SDK_VER": "19",
-      "WABT_VER": "1.0.31"
+      "BINARYEN_VER": "114",
+      "BAZELISK_VER": "1.17.0",
+      "CMAKE_VER": "3.27.0",
+      "EMSDK_VER": "3.1.43",
+      "GH_CLI_VER": "2.32.0",
+      "LLVM_VER": "16",
+      "VARIANT": "debian-12",
+      "WASI_SDK_VER": "20",
+      "WABT_VER": "1.0.33"
     }
   },
   "runArgs": [
@@ -34,7 +37,7 @@
         "llvm-vs-code-extensions.vscode-clangd",
         "ms-python.python",
         "ms-python.vscode-pylance",
-        "ms-vscode.cmake-tools",
+        "ms-vscode.cmake-tools"
       ]
     }
   },

+ 25 - 0
.github/workflows/build_wamr_lldb.yml

@@ -22,6 +22,12 @@ on:
         description: a semantic version number
         type: string
         required: true
+      wasi_sdk_url:
+        description: download WASI_SDK from this URL
+        type: string
+        required: false
+        default: "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-linux.tar.gz"
+      
 
 jobs:
   try_reuse:
@@ -43,6 +49,15 @@ jobs:
       PYTHON_MACOS_STANDALONE_BUILD: https://github.com/indygreg/python-build-standalone/releases/download/20230507/cpython-3.10.11+20230507-x86_64-apple-darwin-install_only.tar.gz
     steps:
       - uses: actions/checkout@v3
+      
+      - name: download and install wasi-sdk
+        run: |
+          cd /opt
+          basename=$(basename ${{ inputs.wasi_sdk_url }})
+          sudo wget --progress=dot:giga ${{ inputs.wasi_sdk_url }}
+          sudo tar -xzf ${basename}
+          sudo rm ${basename}
+          sudo mv wasi-sdk-* wasi-sdk
 
       - name: Cache build
         id: lldb_build_cache
@@ -141,6 +156,16 @@ jobs:
           cmake --build build --target lldb install --parallel $(nproc)
         working-directory: core/deps/llvm-project
 
+      - name: validate lldb ubuntu
+        if: steps.lldb_build_cache.outputs.cache-hit != 'true' && contains(inputs.runner, 'ubuntu')
+        run: |
+          echo "start to validate lldb..."
+          mkdir -p wamr-debug
+          cmake -S product-mini/platforms/linux -B wamr-debug -DWAMR_BUILD_DEBUG_INTERP=1
+          cmake --build wamr-debug --parallel $(nproc)
+          python3 ci/validate_lldb.py --port 1239 --lldb core/deps/wamr-lldb/bin/lldb --wamr wamr-debug/iwasm --verbose
+        working-directory: .
+
       - name: build lldb macos
         if: steps.lldb_build_cache.outputs.cache-hit != 'true' && contains(inputs.runner, 'macos')
         run: |

+ 2 - 2
.github/workflows/compilation_on_android_ubuntu.yml

@@ -527,7 +527,7 @@ jobs:
         working-directory: ./core/iwasm/libraries/lib-socket/test/
 
       - name: run tests
-        timeout-minutes: 10
+        timeout-minutes: 20
         run: ./test_wamr.sh ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites
 
@@ -543,7 +543,7 @@ jobs:
           sudo apt install -y g++-multilib lib32gcc-9-dev
 
       - name: run tests x86_32
-        timeout-minutes: 10
+        timeout-minutes: 20
         if: env.TEST_ON_X86_32 == 'true'
         run: ./test_wamr.sh ${{ env.X86_32_TARGET_TEST_OPTIONS }} ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites

+ 47 - 0
.github/workflows/hadolint_dockerfiles.yml

@@ -0,0 +1,47 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+name: hadolint dockerfiles
+
+on:
+  # will be triggered on PR events
+  pull_request:
+    types:
+      - opened
+      - synchronize
+    paths:
+      - "**/Dockerfile*"
+      - ".github/workflows/hadolint_dockerfiles.yml"
+  push:
+    branches:
+      - main
+      - "dev/**"
+    paths:
+      - "**/Dockerfile*"
+      - ".github/workflows/hadolint_dockerfiles.yml"
+  # allow to be triggered manually
+  workflow_dispatch:
+
+# Cancel any in-flight jobs for the same PR/branch so there's only one active
+# at a time
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run-hadolint-on-dockerfiles:
+    runs-on: ubuntu-22.04
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      # on default, hadolint will fail on warnings and errors
+      - name: Run hadolint on dockerfiles
+        run: |
+          docker pull hadolint/hadolint:latest-debian
+          find . -name "*Dockerfile*" | while read dockerfile; do
+            echo "run hadolint on $dockerfile:"
+            docker run --rm -i hadolint/hadolint:latest-debian hadolint - <"$dockerfile"
+            echo "successful"
+          done

+ 2 - 2
.github/workflows/nightly_run.yml

@@ -595,7 +595,7 @@ jobs:
         working-directory: ./core/iwasm/libraries/lib-socket/test/
 
       - name: run tests
-        timeout-minutes: 10
+        timeout-minutes: 20
         run: ./test_wamr.sh ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites
 
@@ -611,7 +611,7 @@ jobs:
           sudo apt install -y g++-multilib lib32gcc-9-dev
 
       - name: run tests x86_32
-        timeout-minutes: 10
+        timeout-minutes: 20
         if: env.TEST_ON_X86_32 == 'true'
         run: ./test_wamr.sh ${{ env.X86_32_TARGET_TEST_OPTIONS }} ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites

+ 2 - 2
ATTRIBUTIONS.md

@@ -22,7 +22,7 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the
 
 |  third party components | version number | latest release | vendor pages | CVE details |
 | --- | --- | --- | --- | --- |
-| cjson | 1.7.10 | 1.7.14 | https://github.com/DaveGamble/cJSON | https://www.cvedetails.com/vendor/19164/Cjson-Project.html |
+| cjson | 1.7.16 | 1.7.16 | https://github.com/DaveGamble/cJSON | https://www.cvedetails.com/vendor/19164/Cjson-Project.html |
 | contiki-ng (er-coap) | unspecified | 3.0 | https://github.com/contiki-os/contiki | https://www.cvedetails.com/vendor/16528/Contiki-os.html |
 | freebsd libm | unspecified | 13.0 | https://www.freebsd.org/ | https://www.cvedetails.com/vendor/6/Freebsd.html |
 | LVGL | 6.0.1 | 7.11.0 | https://lvgl.io/ | |
@@ -31,7 +31,7 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the
 | wasmtime | unspecified | v0.26.0 | https://github.com/bytecodealliance/wasmtime | |
 | zephyr | unspecified | v2.5.0 | https://www.zephyrproject.org/ | https://www.cvedetails.com/vendor/19255/Zephyrproject.html |
 | WebAssembly debugging patch for LLDB | unspecified | unspecified | https://reviews.llvm.org/D78801 | |
-| libuv | v1.42.0 | v1.44.1 | https://github.com/libuv/libuv | https://www.cvedetails.com/vendor/15402/Libuv-Project.html |
+| libuv | v1.46.0 | v1.46.0 | https://github.com/libuv/libuv | https://www.cvedetails.com/vendor/15402/Libuv-Project.html |
 | uvwasi | unspecified | v0.0.12 | https://github.com/nodejs/uvwasi | |
 | asmjit | unspecified | unspecified | https://github.com/asmjit/asmjit | |
 | zydis | unspecified | e14a07895136182a5b53e181eec3b1c6e0b434de | https://github.com/zyantific/zydis | |

+ 4 - 0
build-scripts/config_common.cmake

@@ -406,3 +406,7 @@ if (WAMR_DISABLE_WRITE_GS_BASE EQUAL 1)
   add_definitions (-DWASM_DISABLE_WRITE_GS_BASE=1)
   message ("     Write linear memory base addr to x86 GS register disabled")
 endif ()
+if (WAMR_CONFIGUABLE_BOUNDS_CHECKS EQUAL 1)
+  add_definitions (-DWASM_CONFIGURABLE_BOUNDS_CHECKS=1)
+  message ("     Configurable bounds checks enabled")
+endif ()

+ 124 - 0
ci/validate_lldb.py

@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2023 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+
+import argparse
+import time
+from pathlib import Path
+import subprocess, shlex
+
+SCRIPT_DIR = Path(__file__).parent.resolve()
+REPO_ROOT_DIR = SCRIPT_DIR.parent
+SAMPLE_CODE_FILE = REPO_ROOT_DIR / 'product-mini/app-samples/hello-world/main.c'
+WASM_OUT_FILE = SCRIPT_DIR / 'out.wasm'
+
+parser = argparse.ArgumentParser(
+    description="Validate the customized lldb with sample code"
+)
+parser.add_argument(
+    "-l", "--lldb", dest='lldb', default='lldb', help="path to lldb executable"
+)
+parser.add_argument(
+    "-w", "--wamr", dest='wamr', default='iwasm', help="path to iwasm executable"
+)
+parser.add_argument(
+    "-p", "--port", dest='port', default='1234', help="debug server listen port"
+)
+parser.add_argument(
+    "-v", "--verbose", dest='verbose', action='store_true', default=False, help="display lldb stdout"
+)
+
+options = parser.parse_args()
+
+lldb_command_prologue = f'{options.lldb} -o "process connect -p wasm connect://127.0.0.1:{options.port}"'
+lldb_command_epilogue = '-o q'
+
+test_cases = {
+    'run_to_exit': '-o c',
+    'func_breakpoint': '-o "b main" -o c -o c',
+    'line_breakpoint': '-o "b main.c:12" -o c -o c',
+    'break_on_unknown_func': '-o "b not_a_func" -o c',
+    'watch_point': '-o "b main" -o c -o "watchpoint set variable buf" -o c -o "fr v buf" -o c',
+}
+
+# Step1: Build wasm module with debug information
+build_cmd = f'/opt/wasi-sdk/bin/clang -g -O0 -o {WASM_OUT_FILE} {SAMPLE_CODE_FILE}'
+try:
+    print(f'building wasm module ...', end='', flush=True)
+    subprocess.check_call(shlex.split(build_cmd))
+    print(f'\t OK')
+except subprocess.CalledProcessError:
+    print("Failed to build wasm module with debug information")
+    exit(1)
+
+def print_process_output(p):
+    try:
+        outs, errs = p.communicate(timeout=2)
+        print("stdout:")
+        print(outs)
+        print("stderr:")
+        print(errs)
+    except subprocess.TimeoutExpired:
+        print("Failed to get process output")
+
+# Step2: Launch WAMR in debug mode and validate lldb commands
+wamr_cmd = f'{options.wamr} -g=127.0.0.1:{options.port} {WASM_OUT_FILE}'
+for case, cmd in test_cases.items():
+    has_error = False
+    print(f'validating case [{case}] ...', end='', flush=True)
+    lldb_cmd = f'{lldb_command_prologue} {cmd} {lldb_command_epilogue}'
+
+    wamr_process = subprocess.Popen(shlex.split(
+        wamr_cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+
+    time.sleep(0.1)
+    if (wamr_process.poll() != None):
+        print("\nWAMR doesn't wait for lldb connection")
+        print_process_output(wamr_process)
+        exit(1)
+
+    lldb_process = subprocess.Popen(shlex.split(
+        lldb_cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+
+    if (options.verbose):
+        while (lldb_process.poll() is None):
+            print(lldb_process.stdout.read(), end='', flush=True)
+
+    try:
+        if (lldb_process.wait(5) != 0):
+            print(f"\nFailed to validate case [{case}]")
+            print_process_output(lldb_process)
+            has_error = True
+
+        if wamr_process.wait(2) != 0:
+            print("\nWAMR process doesn't exit normally")
+            print_process_output(wamr_process)
+            has_error = True
+
+    except subprocess.TimeoutExpired:
+        print(f"\nFailed to validate case [{case}]")
+        print("wamr output:")
+        print_process_output(wamr_process)
+        print("lldb output:")
+        print_process_output(lldb_process)
+        has_error = True
+    finally:
+        if (lldb_process.poll() == None):
+            print(f'\nterminating lldb process [{lldb_process.pid}]')
+            lldb_process.kill()
+        if (wamr_process.poll() == None):
+            print(f'terminating wamr process [{wamr_process.pid}]')
+            wamr_process.kill()
+
+        if (has_error):
+            exit(1)
+
+    print(f'\t OK')
+
+    # wait 100ms to ensure the socket is closed
+    time.sleep(0.1)
+
+print('Validate lldb success')
+exit(0)

+ 17 - 1
core/config.h

@@ -391,7 +391,7 @@
 #define APP_THREAD_STACK_SIZE_DEFAULT (64 * 1024)
 #define APP_THREAD_STACK_SIZE_MIN (48 * 1024)
 #else
-#define APP_THREAD_STACK_SIZE_DEFAULT (32 * 1024)
+#define APP_THREAD_STACK_SIZE_DEFAULT (64 * 1024)
 #define APP_THREAD_STACK_SIZE_MIN (24 * 1024)
 #endif
 #endif /* end of !(defined(APP_THREAD_STACK_SIZE_DEFAULT) \
@@ -480,4 +480,20 @@
 #define WASM_DISABLE_WRITE_GS_BASE 0
 #endif
 
+/* Configurable bounds checks */
+#ifndef WASM_CONFIGURABLE_BOUNDS_CHECKS
+#define WASM_CONFIGURABLE_BOUNDS_CHECKS 0
+#endif
+
+/* Some chip cannot support external ram with rwx attr at the same time,
+   it has to map it into 2 spaces of idbus and dbus, code in dbus can be
+   read/written and read/executed in ibus. so there are 2 steps to execute
+   the code, first, copy&do relocaiton in dbus space, and second execute
+   it in ibus space, since in the 2 spaces the contents are the same,
+   so we call it bus mirror.
+ */
+#ifndef WASM_MEM_DUAL_BUS_MIRROR
+#define WASM_MEM_DUAL_BUS_MIRROR 0
+#endif
+
 #endif /* end of _CONFIG_H_ */

+ 129 - 0
core/iwasm/aot/aot_intrinsic.c

@@ -648,6 +648,42 @@ add_f64_common_intrinsics(AOTCompContext *comp_ctx)
     add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_CMP);
 }
 
+static void
+add_f32xi32_intrinsics(AOTCompContext *comp_ctx)
+{
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TO_I32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TO_U32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I32_TO_F32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_U32_TO_F32);
+}
+
+static void
+add_f64xi32_intrinsics(AOTCompContext *comp_ctx)
+{
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_TO_I32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_TO_U32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I32_TO_F64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_U32_TO_F64);
+}
+
+static void
+add_f32xi64_intrinsics(AOTCompContext *comp_ctx)
+{
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TO_I64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TO_U64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_TO_F32);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_U64_TO_F32);
+}
+
+static void
+add_f64xi64_intrinsics(AOTCompContext *comp_ctx)
+{
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_TO_I64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_TO_U64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_TO_F64);
+    add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_U64_TO_F64);
+}
+
 static void
 add_common_float_integer_convertion(AOTCompContext *comp_ctx)
 {
@@ -705,8 +741,101 @@ aot_intrinsic_check_capability(const AOTCompContext *comp_ctx,
 void
 aot_intrinsic_fill_capability_flags(AOTCompContext *comp_ctx)
 {
+    uint32 i;
+
     memset(comp_ctx->flags, 0, sizeof(comp_ctx->flags));
 
+    /* Intrinsics from command line have highest priority */
+
+    if (comp_ctx->builtin_intrinsics) {
+
+        /* Handle 'all' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "all")) {
+            for (i = 0; i < g_intrinsic_count; i++) {
+                add_intrinsic_capability(comp_ctx, g_intrinsic_mapping[i].flag);
+            }
+            return;
+        }
+
+        /* Handle 'i32.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "i32.common")) {
+            add_i32_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'i64.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "i64.common")) {
+            add_i64_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'fp.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "fp.common")) {
+            add_f32_common_intrinsics(comp_ctx);
+            add_f64_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f32.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f32.common")) {
+            add_f32_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f64.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f64.common")) {
+            add_f64_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f32xi32' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f32xi32")) {
+            add_f32xi32_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f64xi32' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f64xi32")) {
+            add_f64xi32_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f32xi64' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f32xi64")) {
+            add_f32xi64_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'f64xi64' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "f64xi64")) {
+            add_f64xi64_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'fpxint' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "fpxint")) {
+            add_f32xi32_intrinsics(comp_ctx);
+            add_f64xi32_intrinsics(comp_ctx);
+            add_f32xi64_intrinsics(comp_ctx);
+            add_f64xi64_intrinsics(comp_ctx);
+        }
+
+        /* Handle 'constop' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "constop")) {
+            add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I32_CONST);
+            add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_CONST);
+            add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CONST);
+            add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_CONST);
+        }
+
+        /* Handle 'fp.common' group */
+        if (strstr(comp_ctx->builtin_intrinsics, "fp.common")) {
+            add_f32_common_intrinsics(comp_ctx);
+            add_f64_common_intrinsics(comp_ctx);
+        }
+
+        /* Handle other single items */
+        for (i = 0; i < g_intrinsic_count; i++) {
+            if (strstr(comp_ctx->builtin_intrinsics,
+                       g_intrinsic_mapping[i].llvm_intrinsic)) {
+                add_intrinsic_capability(comp_ctx, g_intrinsic_mapping[i].flag);
+            }
+        }
+
+        return;
+    }
+
     if (!comp_ctx->target_cpu)
         return;
 

+ 19 - 0
core/iwasm/aot/aot_loader.c

@@ -1897,6 +1897,13 @@ get_data_section_addr(AOTModule *module, const char *section_name,
     return NULL;
 }
 
+const void *
+aot_get_data_section_addr(AOTModule *module, const char *section_name,
+                          uint32 *p_data_size)
+{
+    return get_data_section_addr(module, section_name, p_data_size);
+}
+
 static void *
 resolve_target_sym(const char *symbol, int32 *p_index)
 {
@@ -3061,6 +3068,9 @@ create_sections(AOTModule *module, const uint8 *buf, uint32 size,
     uint32 section_size;
     uint64 total_size;
     uint8 *aot_text;
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    uint8 *mirrored_text;
+#endif
 
     if (!resolve_execute_mode(buf, size, &is_indirect_mode, error_buf,
                               error_buf_size)) {
@@ -3119,8 +3129,17 @@ create_sections(AOTModule *module, const uint8 *buf, uint32 size,
                     bh_assert((uintptr_t)aot_text < INT32_MAX);
 #endif
 #endif
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+                    mirrored_text = os_get_dbus_mirror(aot_text);
+                    bh_assert(mirrored_text != NULL);
+                    bh_memcpy_s(mirrored_text, (uint32)total_size,
+                                section->section_body, (uint32)section_size);
+                    os_dcache_flush();
+#else
                     bh_memcpy_s(aot_text, (uint32)total_size,
                                 section->section_body, (uint32)section_size);
+#endif
                     section->section_body = aot_text;
                     destroy_aot_text = true;
 

+ 45 - 78
core/iwasm/aot/aot_runtime.c

@@ -42,6 +42,11 @@ bh_static_assert(offsetof(AOTModuleInstance, cur_exception)
 bh_static_assert(offsetof(AOTModuleInstance, global_table_data)
                  == 13 * sizeof(uint64) + 128 + 11 * sizeof(uint64));
 
+bh_static_assert(sizeof(AOTMemoryInstance) == 104);
+bh_static_assert(offsetof(AOTTableInstance, elems) == 24);
+
+bh_static_assert(offsetof(AOTModuleInstanceExtra, stack_sizes) == 0);
+
 static void
 set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
 {
@@ -337,11 +342,8 @@ memories_deinstantiate(AOTModuleInstance *module_inst)
         memory_inst = module_inst->memories[i];
         if (memory_inst) {
 #if WASM_ENABLE_SHARED_MEMORY != 0
-            if (memory_inst->is_shared) {
-                int32 ref_count = shared_memory_dec_reference(
-                    (WASMModuleCommon *)module_inst->module);
-                bh_assert(ref_count >= 0);
-
+            if (shared_memory_is_shared(memory_inst)) {
+                uint32 ref_count = shared_memory_dec_reference(memory_inst);
                 /* if the reference count is not zero,
                     don't free the memory */
                 if (ref_count > 0)
@@ -371,9 +373,10 @@ memories_deinstantiate(AOTModuleInstance *module_inst)
 }
 
 static AOTMemoryInstance *
-memory_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
-                   AOTMemoryInstance *memory_inst, AOTMemory *memory,
-                   uint32 heap_size, char *error_buf, uint32 error_buf_size)
+memory_instantiate(AOTModuleInstance *module_inst, AOTModuleInstance *parent,
+                   AOTModule *module, AOTMemoryInstance *memory_inst,
+                   AOTMemory *memory, uint32 memory_idx, uint32 heap_size,
+                   char *error_buf, uint32 error_buf_size)
 {
     void *heap_handle;
     uint32 num_bytes_per_page = memory->num_bytes_per_page;
@@ -394,23 +397,13 @@ memory_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
     bool is_shared_memory = memory->memory_flags & 0x02 ? true : false;
 
     /* Shared memory */
-    if (is_shared_memory) {
+    if (is_shared_memory && parent != NULL) {
         AOTMemoryInstance *shared_memory_instance;
-        WASMSharedMemNode *node =
-            wasm_module_get_shared_memory((WASMModuleCommon *)module);
-        /* If the memory of this module has been instantiated,
-            return the memory instance directly */
-        if (node) {
-            uint32 ref_count;
-            ref_count = shared_memory_inc_reference((WASMModuleCommon *)module);
-            bh_assert(ref_count > 0);
-            shared_memory_instance =
-                (AOTMemoryInstance *)shared_memory_get_memory_inst(node);
-            bh_assert(shared_memory_instance);
-
-            (void)ref_count;
-            return shared_memory_instance;
-        }
+        bh_assert(memory_idx == 0);
+        bh_assert(parent->memory_count > memory_idx);
+        shared_memory_instance = parent->memories[memory_idx];
+        shared_memory_inc_reference(shared_memory_instance);
+        return shared_memory_instance;
     }
 #endif
 
@@ -607,23 +600,12 @@ memory_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
     if (is_shared_memory) {
-        memory_inst->is_shared = true;
-        if (!shared_memory_set_memory_inst(
-                (WASMModuleCommon *)module,
-                (WASMMemoryInstanceCommon *)memory_inst)) {
-            set_error_buf(error_buf, error_buf_size, "allocate memory failed");
-            goto fail3;
-        }
+        memory_inst->ref_count = 1;
     }
 #endif
 
     return memory_inst;
 
-#if WASM_ENABLE_SHARED_MEMORY != 0
-fail3:
-    if (heap_size > 0)
-        mem_allocator_destroy(memory_inst->heap_handle);
-#endif
 fail2:
     if (heap_size > 0)
         wasm_runtime_free(memory_inst->heap_handle);
@@ -652,8 +634,9 @@ aot_get_default_memory(AOTModuleInstance *module_inst)
 }
 
 static bool
-memories_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
-                     uint32 heap_size, char *error_buf, uint32 error_buf_size)
+memories_instantiate(AOTModuleInstance *module_inst, AOTModuleInstance *parent,
+                     AOTModule *module, uint32 heap_size, char *error_buf,
+                     uint32 error_buf_size)
 {
     uint32 global_index, global_data_offset, base_offset, length;
     uint32 i, memory_count = module->memory_count;
@@ -670,8 +653,8 @@ memories_instantiate(AOTModuleInstance *module_inst, AOTModule *module,
 
     memories = module_inst->global_table_data.memory_instances;
     for (i = 0; i < memory_count; i++, memories++) {
-        memory_inst = memory_instantiate(module_inst, module, memories,
-                                         &module->memories[i], heap_size,
+        memory_inst = memory_instantiate(module_inst, parent, module, memories,
+                                         &module->memories[i], i, heap_size,
                                          error_buf, error_buf_size);
         if (!memory_inst) {
             return false;
@@ -1099,9 +1082,9 @@ check_linked_symbol(AOTModule *module, char *error_buf, uint32 error_buf_size)
 }
 
 AOTModuleInstance *
-aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
-                uint32 stack_size, uint32 heap_size, char *error_buf,
-                uint32 error_buf_size)
+aot_instantiate(AOTModule *module, AOTModuleInstance *parent,
+                WASMExecEnv *exec_env_main, uint32 stack_size, uint32 heap_size,
+                char *error_buf, uint32 error_buf_size)
 {
     AOTModuleInstance *module_inst;
     const uint32 module_inst_struct_size =
@@ -1111,6 +1094,7 @@ aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
     uint64 total_size, table_size = 0;
     uint8 *p;
     uint32 i, extra_info_offset;
+    const bool is_sub_inst = parent != NULL;
 
     /* Check heap size */
     heap_size = align_uint(heap_size, 8);
@@ -1170,7 +1154,7 @@ aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
         goto fail;
 
     /* Initialize memory space */
-    if (!memories_instantiate(module_inst, module, heap_size, error_buf,
+    if (!memories_instantiate(module_inst, parent, module, heap_size, error_buf,
                               error_buf_size))
         goto fail;
 
@@ -1205,17 +1189,6 @@ aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
     }
 #endif
 
-#if WASM_ENABLE_WASI_NN != 0
-    if (!is_sub_inst) {
-        if (!(((AOTModuleInstanceExtra *)module_inst->e)->wasi_nn_ctx =
-                  wasi_nn_initialize())) {
-            set_error_buf(error_buf, error_buf_size,
-                          "wasi nn initialization failed");
-            goto fail;
-        }
-    }
-#endif
-
     /* Initialize the thread related data */
     if (stack_size == 0)
         stack_size = DEFAULT_WASM_STACK_SIZE;
@@ -1225,6 +1198,9 @@ aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
 #endif
     module_inst->default_wasm_stack_size = stack_size;
 
+    ((AOTModuleInstanceExtra *)module_inst->e)->stack_sizes =
+        aot_get_data_section_addr(module, AOT_STACK_SIZES_SECTION_NAME, NULL);
+
 #if WASM_ENABLE_PERF_PROFILING != 0
     total_size = (uint64)sizeof(AOTFuncPerfProfInfo)
                  * (module->import_func_count + module->func_count);
@@ -1271,16 +1247,6 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
         wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton);
     }
 
-#if WASM_ENABLE_LIBC_WASI != 0
-    /* Destroy wasi resource before freeing app heap, since some fields of
-       wasi contex are allocated from app heap, and if app heap is freed,
-       these fields will be set to NULL, we cannot free their internal data
-       which may allocated from global heap. */
-    /* Only destroy wasi ctx in the main module instance */
-    if (!is_sub_inst)
-        wasm_runtime_destroy_wasi((WASMModuleInstanceCommon *)module_inst);
-#endif
-
 #if WASM_ENABLE_PERF_PROFILING != 0
     if (module_inst->func_perf_profilings)
         wasm_runtime_free(module_inst->func_perf_profilings);
@@ -1313,14 +1279,14 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
         wasm_runtime_free(
             ((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports);
 
-#if WASM_ENABLE_WASI_NN != 0
     if (!is_sub_inst) {
-        WASINNContext *wasi_nn_ctx =
-            ((AOTModuleInstanceExtra *)module_inst->e)->wasi_nn_ctx;
-        if (wasi_nn_ctx)
-            wasi_nn_destroy(wasi_nn_ctx);
-    }
+#if WASM_ENABLE_LIBC_WASI != 0
+        wasm_runtime_destroy_wasi((WASMModuleInstanceCommon *)module_inst);
+#endif
+#if WASM_ENABLE_WASI_NN != 0
+        wasi_nn_destroy(module_inst);
 #endif
+    }
 
     wasm_runtime_free(module_inst);
 }
@@ -2508,13 +2474,13 @@ aot_table_init(AOTModuleInstance *module_inst, uint32 tbl_idx,
     tbl_seg = module->table_init_data_list[tbl_seg_idx];
     bh_assert(tbl_seg);
 
-    if (!length) {
+    if (offset_len_out_of_bounds(src_offset, length, tbl_seg->func_index_count)
+        || offset_len_out_of_bounds(dst_offset, length, tbl_inst->cur_size)) {
+        aot_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }
 
-    if (length + src_offset > tbl_seg->func_index_count
-        || dst_offset + length > tbl_inst->cur_size) {
-        aot_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
+    if (!length) {
         return;
     }
 
@@ -2548,8 +2514,9 @@ aot_table_copy(AOTModuleInstance *module_inst, uint32 src_tbl_idx,
     dst_tbl_inst = module_inst->tables[dst_tbl_idx];
     bh_assert(dst_tbl_inst);
 
-    if ((uint64)dst_offset + length > dst_tbl_inst->cur_size
-        || (uint64)src_offset + length > src_tbl_inst->cur_size) {
+    if (offset_len_out_of_bounds(dst_offset, length, dst_tbl_inst->cur_size)
+        || offset_len_out_of_bounds(src_offset, length,
+                                    src_tbl_inst->cur_size)) {
         aot_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }
@@ -2575,7 +2542,7 @@ aot_table_fill(AOTModuleInstance *module_inst, uint32 tbl_idx, uint32 length,
     tbl_inst = module_inst->tables[tbl_idx];
     bh_assert(tbl_inst);
 
-    if (data_offset + length > tbl_inst->cur_size) {
+    if (offset_len_out_of_bounds(data_offset, length, tbl_inst->cur_size)) {
         aot_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }

+ 12 - 6
core/iwasm/aot/aot_runtime.h

@@ -105,9 +105,11 @@ typedef struct AOTFunctionInstance {
 } AOTFunctionInstance;
 
 typedef struct AOTModuleInstanceExtra {
+    DefPointer(const uint32 *, stack_sizes);
     CApiFuncImport *c_api_func_imports;
-#if WASM_ENABLE_WASI_NN != 0
-    WASINNContext *wasi_nn_ctx;
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    /* Disable bounds checks or not */
+    bool disable_bounds_checks;
 #endif
 } AOTModuleInstanceExtra;
 
@@ -423,7 +425,7 @@ aot_unload(AOTModule *module);
  * Instantiate a AOT module.
  *
  * @param module the AOT module to instantiate
- * @param is_sub_inst the flag of sub instance
+ * @param parent the parent module instance
  * @param heap_size the default heap size of the module instance, a heap will
  *        be created besides the app memory space. Both wasm app and native
  *        function can allocate memory from the heap. If heap_size is 0, the
@@ -434,9 +436,9 @@ aot_unload(AOTModule *module);
  * @return return the instantiated AOT module instance, NULL if failed
  */
 AOTModuleInstance *
-aot_instantiate(AOTModule *module, bool is_sub_inst, WASMExecEnv *exec_env_main,
-                uint32 stack_size, uint32 heap_size, char *error_buf,
-                uint32 error_buf_size);
+aot_instantiate(AOTModule *module, AOTModuleInstance *parent,
+                WASMExecEnv *exec_env_main, uint32 stack_size, uint32 heap_size,
+                char *error_buf, uint32 error_buf_size);
 
 /**
  * Deinstantiate a AOT module instance, destroy the resources.
@@ -655,6 +657,10 @@ aot_dump_perf_profiling(const AOTModuleInstance *module_inst);
 const uint8 *
 aot_get_custom_section(const AOTModule *module, const char *name, uint32 *len);
 
+const void *
+aot_get_data_section_addr(AOTModule *module, const char *section_name,
+                          uint32 *p_data_size);
+
 #if WASM_ENABLE_STATIC_PGO != 0
 void
 llvm_profile_instrument_target(uint64 target_value, void *data,

+ 14 - 18
core/iwasm/aot/arch/aot_reloc_riscv.c

@@ -78,6 +78,13 @@ static SymbolMap target_sym_map[] = {
     REG_SYM(__addsf3),
     REG_SYM(__divdf3),
     REG_SYM(__divsf3),
+    REG_SYM(__eqdf2),
+    REG_SYM(__eqsf2),
+    REG_SYM(__extendsfdf2),
+    REG_SYM(__fixunsdfdi),
+    REG_SYM(__fixunsdfsi),
+    REG_SYM(__fixunssfdi),
+    REG_SYM(__fixunssfsi),
     REG_SYM(__gedf2),
     REG_SYM(__gesf2),
     REG_SYM(__gtdf2),
@@ -89,44 +96,33 @@ static SymbolMap target_sym_map[] = {
     REG_SYM(__muldf3),
     REG_SYM(__nedf2),
     REG_SYM(__nesf2),
-    REG_SYM(__eqsf2),
-    REG_SYM(__eqdf2),
-    REG_SYM(__extendsfdf2),
-    REG_SYM(__fixunsdfdi),
-    REG_SYM(__fixunsdfsi),
-    REG_SYM(__fixunssfsi),
     REG_SYM(__subdf3),
     REG_SYM(__subsf3),
     REG_SYM(__truncdfsf2),
     REG_SYM(__unorddf2),
     REG_SYM(__unordsf2),
-#endif
-    REG_SYM(__divdi3),
-    REG_SYM(__divsi3),
 #if __riscv_xlen == 32
     REG_SYM(__fixdfdi),
     REG_SYM(__fixdfsi),
     REG_SYM(__fixsfdi),
     REG_SYM(__fixsfsi),
-#endif
-    REG_SYM(__fixunssfdi),
-#if __riscv_xlen == 32
     REG_SYM(__floatdidf),
     REG_SYM(__floatdisf),
-    REG_SYM(__floatsisf),
     REG_SYM(__floatsidf),
+    REG_SYM(__floatsisf),
     REG_SYM(__floatundidf),
     REG_SYM(__floatundisf),
-    REG_SYM(__floatunsisf),
     REG_SYM(__floatunsidf),
+    REG_SYM(__floatunsisf),
+    REG_SYM(__mulsf3),
+    REG_SYM(__mulsi3),
+#endif
 #endif
+    REG_SYM(__divdi3),
+    REG_SYM(__divsi3),
     REG_SYM(__moddi3),
     REG_SYM(__modsi3),
     REG_SYM(__muldi3),
-#if __riscv_xlen == 32
-    REG_SYM(__mulsf3),
-    REG_SYM(__mulsi3),
-#endif
     REG_SYM(__udivdi3),
     REG_SYM(__udivsi3),
     REG_SYM(__umoddi3),

+ 19 - 1
core/iwasm/aot/arch/aot_reloc_xtensa.c

@@ -43,6 +43,11 @@ void __floatdidf();
 void __divsf3();
 void __fixdfdi();
 void __floatundidf();
+void __fixsfdi();
+void __fixunssfdi();
+void __fixunsdfdi();
+void __floatdisf();
+void __floatundisf();
 
 
 static SymbolMap target_sym_map[] = {
@@ -85,6 +90,11 @@ static SymbolMap target_sym_map[] = {
     REG_SYM(__divsf3),
     REG_SYM(__fixdfdi),
     REG_SYM(__floatundidf),
+    REG_SYM(__fixsfdi),
+    REG_SYM(__fixunssfdi),
+    REG_SYM(__fixunsdfdi),
+    REG_SYM(__floatdisf),
+    REG_SYM(__floatundisf),
 };
 /* clang-format on */
 
@@ -207,6 +217,10 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
         case R_XTENSA_32:
         {
             uint8 *insn_addr = target_section_addr + reloc_offset;
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+            insn_addr = os_get_dbus_mirror((void *)insn_addr);
+            bh_assert(insn_addr != NULL);
+#endif
             int32 initial_addend;
             /* (S + A) */
             if ((intptr_t)insn_addr & 3) {
@@ -265,6 +279,11 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
                 return false;
             }
 
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+            insn_addr = os_get_dbus_mirror((void *)insn_addr);
+            bh_assert(insn_addr != NULL);
+            l32r_insn = (l32r_insn_t *)insn_addr;
+#endif
             imm16 = (int16)(relative_offset >> 2);
 
             /* write back the imm16 to the l32r instruction */
@@ -285,7 +304,6 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr,
 #if __GNUC__ >= 9
 #pragma GCC diagnostic pop
 #endif
-
             break;
         }
 

+ 3 - 9
core/iwasm/common/wasm_exec_env.h

@@ -7,6 +7,7 @@
 #define _WASM_EXEC_ENV_H
 
 #include "bh_assert.h"
+#include "wasm_suspend_flags.h"
 #if WASM_ENABLE_INTERP != 0
 #include "../interpreter/wasm.h"
 #endif
@@ -57,15 +58,8 @@ typedef struct WASMExecEnv {
        exception. */
     uint8 *native_stack_boundary;
 
-    /* Used to terminate or suspend current thread
-        bit 0: need to terminate
-        bit 1: need to suspend
-        bit 2: need to go into breakpoint
-        bit 3: return from pthread_exit */
-    union {
-        uint32 flags;
-        uintptr_t __padding__;
-    } suspend_flags;
+    /* Used to terminate or suspend current thread */
+    WASMSuspendFlags suspend_flags;
 
     /* Auxiliary stack boundary */
     union {

+ 63 - 45
core/iwasm/common/wasm_memory.c

@@ -5,6 +5,7 @@
 
 #include "wasm_runtime_common.h"
 #include "../interpreter/wasm_runtime.h"
+#include "../aot/aot_runtime.h"
 #include "bh_platform.h"
 #include "mem_alloc.h"
 
@@ -87,6 +88,16 @@ wasm_memory_init_with_allocator(void *_malloc_func, void *_realloc_func,
 }
 #endif
 
+static inline bool
+is_bounds_checks_enabled(WASMModuleInstanceCommon *module_inst)
+{
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    return wasm_runtime_is_bounds_checks_enabled(module_inst);
+#else
+    return true;
+#endif
+}
+
 bool
 wasm_runtime_memory_init(mem_alloc_type_t mem_alloc_type,
                          const MemAllocOption *alloc_option)
@@ -269,6 +280,10 @@ wasm_runtime_validate_app_addr(WASMModuleInstanceCommon *module_inst_comm,
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
+    if (!is_bounds_checks_enabled(module_inst_comm)) {
+        return true;
+    }
+
     memory_inst = wasm_get_default_memory(module_inst);
     if (!memory_inst) {
         goto fail;
@@ -299,6 +314,10 @@ wasm_runtime_validate_app_str_addr(WASMModuleInstanceCommon *module_inst_comm,
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
+    if (!is_bounds_checks_enabled(module_inst_comm)) {
+        return true;
+    }
+
     if (!wasm_runtime_get_app_addr_range(module_inst_comm, app_str_offset, NULL,
                                          &app_end_offset))
         goto fail;
@@ -327,6 +346,10 @@ wasm_runtime_validate_native_addr(WASMModuleInstanceCommon *module_inst_comm,
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
+    if (!is_bounds_checks_enabled(module_inst_comm)) {
+        return true;
+    }
+
     memory_inst = wasm_get_default_memory(module_inst);
     if (!memory_inst) {
         goto fail;
@@ -354,10 +377,13 @@ wasm_runtime_addr_app_to_native(WASMModuleInstanceCommon *module_inst_comm,
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module_inst_comm;
     WASMMemoryInstance *memory_inst;
     uint8 *addr;
+    bool bounds_checks;
 
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
+    bounds_checks = is_bounds_checks_enabled(module_inst_comm);
+
     memory_inst = wasm_get_default_memory(module_inst);
     if (!memory_inst) {
         return NULL;
@@ -365,8 +391,17 @@ wasm_runtime_addr_app_to_native(WASMModuleInstanceCommon *module_inst_comm,
 
     addr = memory_inst->memory_data + app_offset;
 
-    if (memory_inst->memory_data <= addr && addr < memory_inst->memory_data_end)
+    if (bounds_checks) {
+        if (memory_inst->memory_data <= addr
+            && addr < memory_inst->memory_data_end) {
+
+            return addr;
+        }
+    }
+    /* If bounds checks is disabled, return the address directly */
+    else if (app_offset != 0) {
         return addr;
+    }
 
     return NULL;
 }
@@ -378,17 +413,27 @@ wasm_runtime_addr_native_to_app(WASMModuleInstanceCommon *module_inst_comm,
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module_inst_comm;
     WASMMemoryInstance *memory_inst;
     uint8 *addr = (uint8 *)native_ptr;
+    bool bounds_checks;
 
     bh_assert(module_inst_comm->module_type == Wasm_Module_Bytecode
               || module_inst_comm->module_type == Wasm_Module_AoT);
 
+    bounds_checks = is_bounds_checks_enabled(module_inst_comm);
+
     memory_inst = wasm_get_default_memory(module_inst);
     if (!memory_inst) {
         return 0;
     }
 
-    if (memory_inst->memory_data <= addr && addr < memory_inst->memory_data_end)
+    if (bounds_checks) {
+        if (memory_inst->memory_data <= addr
+            && addr < memory_inst->memory_data_end)
+            return (uint32)(addr - memory_inst->memory_data);
+    }
+    /* If bounds checks is disabled, return the offset directly */
+    else if (addr != NULL) {
         return (uint32)(addr - memory_inst->memory_data);
+    }
 
     return 0;
 }
@@ -460,6 +505,7 @@ wasm_check_app_addr_and_convert(WASMModuleInstance *module_inst, bool is_str,
 {
     WASMMemoryInstance *memory_inst = wasm_get_default_memory(module_inst);
     uint8 *native_addr;
+    bool bounds_checks;
 
     if (!memory_inst) {
         goto fail;
@@ -467,6 +513,15 @@ wasm_check_app_addr_and_convert(WASMModuleInstance *module_inst, bool is_str,
 
     native_addr = memory_inst->memory_data + app_buf_addr;
 
+    bounds_checks = is_bounds_checks_enabled((wasm_module_inst_t)module_inst);
+
+    if (!bounds_checks) {
+        if (app_buf_addr == 0) {
+            native_addr = NULL;
+        }
+        goto success;
+    }
+
     /* No need to check the app_offset and buf_size if memory access
        boundary check with hardware trap is enabled */
 #ifndef OS_ENABLE_HW_BOUND_CHECK
@@ -492,6 +547,7 @@ wasm_check_app_addr_and_convert(WASMModuleInstance *module_inst, bool is_str,
     }
 #endif
 
+success:
     *p_native_addr = (void *)native_addr;
     return true;
 fail:
@@ -552,7 +608,7 @@ wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count)
     }
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    if (memory->is_shared) {
+    if (shared_memory_is_shared(memory)) {
         memory->num_bytes_per_page = num_bytes_per_page;
         memory->cur_page_count = total_page_count;
         memory->max_page_count = max_page_count;
@@ -713,52 +769,14 @@ wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
     bool ret = false;
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    WASMSharedMemNode *node =
-        wasm_module_get_shared_memory((WASMModuleCommon *)module->module);
-    if (node)
-        os_mutex_lock(&node->shared_mem_lock);
+    if (module->memory_count > 0)
+        shared_memory_lock(module->memories[0]);
 #endif
     ret = wasm_enlarge_memory_internal(module, inc_page_count);
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_unlock(&node->shared_mem_lock);
+    if (module->memory_count > 0)
+        shared_memory_unlock(module->memories[0]);
 #endif
 
     return ret;
 }
-
-#if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
-    || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
-    || WASM_ENABLE_BULK_MEMORY != 0
-uint32
-wasm_get_num_bytes_per_page(WASMMemoryInstance *memory, void *node)
-{
-    uint32 num_bytes_per_page;
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_lock(&((WASMSharedMemNode *)node)->shared_mem_lock);
-#endif
-    num_bytes_per_page = memory->num_bytes_per_page;
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_unlock(&((WASMSharedMemNode *)node)->shared_mem_lock);
-#endif
-    return num_bytes_per_page;
-}
-
-uint32
-wasm_get_linear_memory_size(WASMMemoryInstance *memory, void *node)
-{
-    uint32 linear_mem_size;
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_lock(&((WASMSharedMemNode *)node)->shared_mem_lock);
-#endif
-    linear_mem_size = memory->num_bytes_per_page * memory->cur_page_count;
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_unlock(&((WASMSharedMemNode *)node)->shared_mem_lock);
-#endif
-    return linear_mem_size;
-}
-#endif

+ 0 - 10
core/iwasm/common/wasm_memory.h

@@ -24,16 +24,6 @@ wasm_runtime_memory_destroy();
 unsigned
 wasm_runtime_memory_pool_size();
 
-#if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
-    || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
-    || WASM_ENABLE_BULK_MEMORY != 0
-uint32
-wasm_get_num_bytes_per_page(WASMMemoryInstance *memory, void *node);
-
-uint32
-wasm_get_linear_memory_size(WASMMemoryInstance *memory, void *node);
-#endif
-
 #ifdef __cplusplus
 }
 #endif

+ 63 - 18
core/iwasm/common/wasm_runtime_common.c

@@ -1215,7 +1215,8 @@ wasm_runtime_unload(WASMModuleCommon *module)
 }
 
 WASMModuleInstanceCommon *
-wasm_runtime_instantiate_internal(WASMModuleCommon *module, bool is_sub_inst,
+wasm_runtime_instantiate_internal(WASMModuleCommon *module,
+                                  WASMModuleInstanceCommon *parent,
                                   WASMExecEnv *exec_env_main, uint32 stack_size,
                                   uint32 heap_size, char *error_buf,
                                   uint32 error_buf_size)
@@ -1223,14 +1224,14 @@ wasm_runtime_instantiate_internal(WASMModuleCommon *module, bool is_sub_inst,
 #if WASM_ENABLE_INTERP != 0
     if (module->module_type == Wasm_Module_Bytecode)
         return (WASMModuleInstanceCommon *)wasm_instantiate(
-            (WASMModule *)module, is_sub_inst, exec_env_main, stack_size,
-            heap_size, error_buf, error_buf_size);
+            (WASMModule *)module, (WASMModuleInstance *)parent, exec_env_main,
+            stack_size, heap_size, error_buf, error_buf_size);
 #endif
 #if WASM_ENABLE_AOT != 0
     if (module->module_type == Wasm_Module_AoT)
         return (WASMModuleInstanceCommon *)aot_instantiate(
-            (AOTModule *)module, is_sub_inst, exec_env_main, stack_size,
-            heap_size, error_buf, error_buf_size);
+            (AOTModule *)module, (AOTModuleInstance *)parent, exec_env_main,
+            stack_size, heap_size, error_buf, error_buf_size);
 #endif
     set_error_buf(error_buf, error_buf_size,
                   "Instantiate module failed, invalid module type");
@@ -1243,7 +1244,7 @@ wasm_runtime_instantiate(WASMModuleCommon *module, uint32 stack_size,
                          uint32 error_buf_size)
 {
     return wasm_runtime_instantiate_internal(
-        module, false, NULL, stack_size, heap_size, error_buf, error_buf_size);
+        module, NULL, NULL, stack_size, heap_size, error_buf, error_buf_size);
 }
 
 void
@@ -2330,10 +2331,8 @@ wasm_set_exception(WASMModuleInstance *module_inst, const char *exception)
     WASMExecEnv *exec_env = NULL;
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    WASMSharedMemNode *node =
-        wasm_module_get_shared_memory((WASMModuleCommon *)module_inst->module);
-    if (node)
-        os_mutex_lock(&node->shared_mem_lock);
+    if (module_inst->memory_count > 0)
+        shared_memory_lock(module_inst->memories[0]);
 #endif
     if (exception) {
         snprintf(module_inst->cur_exception, sizeof(module_inst->cur_exception),
@@ -2343,8 +2342,8 @@ wasm_set_exception(WASMModuleInstance *module_inst, const char *exception)
         module_inst->cur_exception[0] = '\0';
     }
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_unlock(&node->shared_mem_lock);
+    if (module_inst->memory_count > 0)
+        shared_memory_unlock(module_inst->memories[0]);
 #endif
 
 #if WASM_ENABLE_THREAD_MGR != 0
@@ -2406,10 +2405,8 @@ wasm_copy_exception(WASMModuleInstance *module_inst, char *exception_buf)
     bool has_exception = false;
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    WASMSharedMemNode *node =
-        wasm_module_get_shared_memory((WASMModuleCommon *)module_inst->module);
-    if (node)
-        os_mutex_lock(&node->shared_mem_lock);
+    if (module_inst->memory_count > 0)
+        shared_memory_lock(module_inst->memories[0]);
 #endif
     if (module_inst->cur_exception[0] != '\0') {
         /* NULL is passed if the caller is not interested in getting the
@@ -2423,8 +2420,8 @@ wasm_copy_exception(WASMModuleInstance *module_inst, char *exception_buf)
         has_exception = true;
     }
 #if WASM_ENABLE_SHARED_MEMORY != 0
-    if (node)
-        os_mutex_unlock(&node->shared_mem_lock);
+    if (module_inst->memory_count > 0)
+        shared_memory_unlock(module_inst->memories[0]);
 #endif
 
     return has_exception;
@@ -2502,6 +2499,54 @@ wasm_runtime_get_custom_data(WASMModuleInstanceCommon *module_inst_comm)
     return module_inst->custom_data;
 }
 
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+void
+wasm_runtime_set_bounds_checks(WASMModuleInstanceCommon *module_inst,
+                               bool enable)
+{
+    /* Alwary disable bounds checks if hw bounds checks enabled */
+#ifdef OS_ENABLE_HW_BOUND_CHECK
+    enable = false;
+#endif
+#if WASM_ENABLE_INTERP != 0
+    if (module_inst->module_type == Wasm_Module_Bytecode) {
+        ((WASMModuleInstanceExtra *)((WASMModuleInstance *)module_inst)->e)
+            ->disable_bounds_checks = enable ? false : true;
+    }
+#endif
+
+#if WASM_ENABLE_AOT != 0
+    if (module_inst->module_type == Wasm_Module_AoT) {
+        ((AOTModuleInstanceExtra *)((AOTModuleInstance *)module_inst)->e)
+            ->disable_bounds_checks = enable ? false : true;
+    }
+#endif
+}
+
+bool
+wasm_runtime_is_bounds_checks_enabled(WASMModuleInstanceCommon *module_inst)
+{
+
+#if WASM_ENABLE_INTERP != 0
+    if (module_inst->module_type == Wasm_Module_Bytecode) {
+        return !((WASMModuleInstanceExtra *)((WASMModuleInstance *)module_inst)
+                     ->e)
+                    ->disable_bounds_checks;
+    }
+#endif
+
+#if WASM_ENABLE_AOT != 0
+    if (module_inst->module_type == Wasm_Module_AoT) {
+        return !((AOTModuleInstanceExtra *)((WASMModuleInstance *)module_inst)
+                     ->e)
+                    ->disable_bounds_checks;
+    }
+#endif
+
+    return true;
+}
+#endif
+
 uint32
 wasm_runtime_module_malloc_internal(WASMModuleInstanceCommon *module_inst,
                                     WASMExecEnv *exec_env, uint32 size,

+ 13 - 1
core/iwasm/common/wasm_runtime_common.h

@@ -546,7 +546,8 @@ wasm_runtime_unload(WASMModuleCommon *module);
 
 /* Internal API */
 WASMModuleInstanceCommon *
-wasm_runtime_instantiate_internal(WASMModuleCommon *module, bool is_sub_inst,
+wasm_runtime_instantiate_internal(WASMModuleCommon *module,
+                                  WASMModuleInstanceCommon *parent,
                                   WASMExecEnv *exec_env_main, uint32 stack_size,
                                   uint32 heap_size, char *error_buf,
                                   uint32 error_buf_size);
@@ -641,6 +642,17 @@ wasm_runtime_set_user_data(WASMExecEnv *exec_env, void *user_data);
 WASM_RUNTIME_API_EXTERN void *
 wasm_runtime_get_user_data(WASMExecEnv *exec_env);
 
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+/* See wasm_export.h for description */
+WASM_RUNTIME_API_EXTERN void
+wasm_runtime_set_bounds_checks(WASMModuleInstanceCommon *module_inst,
+                               bool enable);
+
+/* See wasm_export.h for description */
+WASM_RUNTIME_API_EXTERN bool
+wasm_runtime_is_bounds_checks_enabled(WASMModuleInstanceCommon *module_inst);
+#endif
+
 #ifdef OS_ENABLE_HW_BOUND_CHECK
 /* Access exception check guard page to trigger the signal handler */
 void

+ 86 - 115
core/iwasm/common/wasm_shared_memory.c

@@ -9,9 +9,16 @@
 #include "../libraries/thread-mgr/thread_manager.h"
 #endif
 
-static bh_list shared_memory_list_head;
-static bh_list *const shared_memory_list = &shared_memory_list_head;
-static korp_mutex shared_memory_list_lock;
+/*
+ * Note: this lock can be per memory.
+ *
+ * For now, just use a global because:
+ * - it's a bit cumbersome to extend WASMMemoryInstance w/o breaking
+ *   the AOT ABI.
+ * - If you care performance, it's better to make the interpreters
+ *   use atomic ops.
+ */
+static korp_mutex _shared_memory_lock;
 
 /* clang-format off */
 enum {
@@ -37,7 +44,7 @@ typedef struct AtomicWaitNode {
 static HashMap *wait_map;
 
 static uint32
-wait_address_hash(void *address);
+wait_address_hash(const void *address);
 
 static bool
 wait_address_equal(void *h1, void *h2);
@@ -48,17 +55,15 @@ destroy_wait_info(void *wait_info);
 bool
 wasm_shared_memory_init()
 {
-    if (os_mutex_init(&shared_memory_list_lock) != 0)
+    if (os_mutex_init(&_shared_memory_lock) != 0)
         return false;
-
     /* wait map not exists, create new map */
     if (!(wait_map = bh_hash_map_create(32, true, (HashFunc)wait_address_hash,
                                         (KeyEqualFunc)wait_address_equal, NULL,
                                         destroy_wait_info))) {
-        os_mutex_destroy(&shared_memory_list_lock);
+        os_mutex_destroy(&_shared_memory_lock);
         return false;
     }
-
     return true;
 }
 
@@ -66,115 +71,84 @@ void
 wasm_shared_memory_destroy()
 {
     bh_hash_map_destroy(wait_map);
-    os_mutex_destroy(&shared_memory_list_lock);
+    os_mutex_destroy(&_shared_memory_lock);
 }
 
-static WASMSharedMemNode *
-search_module(WASMModuleCommon *module)
+uint32
+shared_memory_inc_reference(WASMMemoryInstance *memory)
 {
-    WASMSharedMemNode *node;
-
-    os_mutex_lock(&shared_memory_list_lock);
-    node = bh_list_first_elem(shared_memory_list);
-
-    while (node) {
-        if (module == node->module) {
-            os_mutex_unlock(&shared_memory_list_lock);
-            return node;
-        }
-        node = bh_list_elem_next(node);
-    }
-
-    os_mutex_unlock(&shared_memory_list_lock);
-    return NULL;
+    bh_assert(shared_memory_is_shared(memory));
+    uint32 old;
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_lock(&_shared_memory_lock);
+#endif
+    old = BH_ATOMIC_32_FETCH_ADD(memory->ref_count, 1);
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_unlock(&_shared_memory_lock);
+#endif
+    bh_assert(old >= 1);
+    bh_assert(old < UINT32_MAX);
+    return old + 1;
 }
 
-WASMSharedMemNode *
-wasm_module_get_shared_memory(WASMModuleCommon *module)
+uint32
+shared_memory_dec_reference(WASMMemoryInstance *memory)
 {
-    return search_module(module);
+    bh_assert(shared_memory_is_shared(memory));
+    uint32 old;
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_lock(&_shared_memory_lock);
+#endif
+    old = BH_ATOMIC_32_FETCH_SUB(memory->ref_count, 1);
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_unlock(&_shared_memory_lock);
+#endif
+    bh_assert(old > 0);
+    return old - 1;
 }
 
-int32
-shared_memory_inc_reference(WASMModuleCommon *module)
+bool
+shared_memory_is_shared(WASMMemoryInstance *memory)
 {
-    WASMSharedMemNode *node = search_module(module);
-    uint32 ref_count = -1;
-    if (node) {
-        os_mutex_lock(&node->lock);
-        ref_count = ++node->ref_count;
-        os_mutex_unlock(&node->lock);
-    }
-    return ref_count;
+    uint32 old;
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_lock(&_shared_memory_lock);
+#endif
+    old = BH_ATOMIC_32_LOAD(memory->ref_count);
+#if BH_ATOMIC_32_IS_ATOMIC == 0
+    os_mutex_unlock(&_shared_memory_lock);
+#endif
+    return old > 0;
 }
 
-int32
-shared_memory_dec_reference(WASMModuleCommon *module)
+static korp_mutex *
+shared_memory_get_lock_pointer(WASMMemoryInstance *memory)
 {
-    WASMSharedMemNode *node = search_module(module);
-    uint32 ref_count = 0;
-    if (node) {
-        os_mutex_lock(&node->lock);
-        ref_count = --node->ref_count;
-        os_mutex_unlock(&node->lock);
-        if (ref_count == 0) {
-            os_mutex_lock(&shared_memory_list_lock);
-            bh_list_remove(shared_memory_list, node);
-            os_mutex_unlock(&shared_memory_list_lock);
-
-            os_mutex_destroy(&node->shared_mem_lock);
-            os_mutex_destroy(&node->lock);
-            wasm_runtime_free(node);
-        }
-        return ref_count;
-    }
-
-    return -1;
+    bh_assert(memory != NULL);
+    return &_shared_memory_lock;
 }
 
-WASMMemoryInstanceCommon *
-shared_memory_get_memory_inst(WASMSharedMemNode *node)
+void
+shared_memory_lock(WASMMemoryInstance *memory)
 {
-    return node->memory_inst;
+    /*
+     * Note: exception logic is currently abusing this lock.
+     * cf. https://github.com/bytecodealliance/wasm-micro-runtime/issues/2407
+     */
+    bh_assert(memory != NULL);
+    os_mutex_lock(&_shared_memory_lock);
 }
 
-WASMSharedMemNode *
-shared_memory_set_memory_inst(WASMModuleCommon *module,
-                              WASMMemoryInstanceCommon *memory)
+void
+shared_memory_unlock(WASMMemoryInstance *memory)
 {
-    WASMSharedMemNode *node;
-    bh_list_status ret;
-
-    if (!(node = wasm_runtime_malloc(sizeof(WASMSharedMemNode))))
-        return NULL;
-
-    node->module = module;
-    node->memory_inst = memory;
-    node->ref_count = 1;
-
-    if (os_mutex_init(&node->shared_mem_lock) != 0) {
-        wasm_runtime_free(node);
-        return NULL;
-    }
-
-    if (os_mutex_init(&node->lock) != 0) {
-        os_mutex_destroy(&node->shared_mem_lock);
-        wasm_runtime_free(node);
-        return NULL;
-    }
-
-    os_mutex_lock(&shared_memory_list_lock);
-    ret = bh_list_insert(shared_memory_list, node);
-    bh_assert(ret == BH_LIST_SUCCESS);
-    os_mutex_unlock(&shared_memory_list_lock);
-
-    (void)ret;
-    return node;
+    bh_assert(memory != NULL);
+    os_mutex_unlock(&_shared_memory_lock);
 }
 
 /* Atomics wait && notify APIs */
 static uint32
-wait_address_hash(void *address)
+wait_address_hash(const void *address)
 {
     return (uint32)(uintptr_t)address;
 }
@@ -307,7 +281,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module;
     AtomicWaitInfo *wait_info;
     AtomicWaitNode *wait_node;
-    WASMSharedMemNode *node;
+    korp_mutex *lock;
 #if WASM_ENABLE_THREAD_MGR != 0
     WASMExecEnv *exec_env;
 #endif
@@ -322,7 +296,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     }
 
     /* Currently we have only one memory instance */
-    if (!module_inst->memories[0]->is_shared) {
+    if (!shared_memory_is_shared(module_inst->memories[0])) {
         wasm_runtime_set_exception(module, "expected shared memory");
         return -1;
     }
@@ -340,30 +314,29 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     bh_assert(exec_env);
 #endif
 
-    node = search_module((WASMModuleCommon *)module_inst->module);
-    bh_assert(node);
+    lock = shared_memory_get_lock_pointer(module_inst->memories[0]);
 
     /* Lock the shared_mem_lock for the whole atomic wait process,
        and use it to os_cond_reltimedwait */
-    os_mutex_lock(&node->shared_mem_lock);
+    os_mutex_lock(lock);
 
     no_wait = (!wait64 && *(uint32 *)address != (uint32)expect)
               || (wait64 && *(uint64 *)address != expect);
 
     if (no_wait) {
-        os_mutex_unlock(&node->shared_mem_lock);
+        os_mutex_unlock(lock);
         return 1;
     }
 
     if (!(wait_node = wasm_runtime_malloc(sizeof(AtomicWaitNode)))) {
-        os_mutex_unlock(&node->shared_mem_lock);
+        os_mutex_unlock(lock);
         wasm_runtime_set_exception(module, "failed to create wait node");
         return -1;
     }
     memset(wait_node, 0, sizeof(AtomicWaitNode));
 
     if (0 != os_cond_init(&wait_node->wait_cond)) {
-        os_mutex_unlock(&node->shared_mem_lock);
+        os_mutex_unlock(lock);
         wasm_runtime_free(wait_node);
         wasm_runtime_set_exception(module, "failed to init wait cond");
         return -1;
@@ -375,7 +348,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     wait_info = acquire_wait_info(address, wait_node);
 
     if (!wait_info) {
-        os_mutex_unlock(&node->shared_mem_lock);
+        os_mutex_unlock(lock);
         os_cond_destroy(&wait_node->wait_cond);
         wasm_runtime_free(wait_node);
         wasm_runtime_set_exception(module, "failed to acquire wait_info");
@@ -390,7 +363,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
         if (timeout < 0) {
             /* wait forever until it is notified or terminatied
                here we keep waiting and checking every second */
-            os_cond_reltimedwait(&wait_node->wait_cond, &node->shared_mem_lock,
+            os_cond_reltimedwait(&wait_node->wait_cond, lock,
                                  (uint64)timeout_1sec);
             if (wait_node->status == S_NOTIFIED /* notified by atomic.notify */
 #if WASM_ENABLE_THREAD_MGR != 0
@@ -404,8 +377,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
         else {
             timeout_wait =
                 timeout_left < timeout_1sec ? timeout_left : timeout_1sec;
-            os_cond_reltimedwait(&wait_node->wait_cond, &node->shared_mem_lock,
-                                 timeout_wait);
+            os_cond_reltimedwait(&wait_node->wait_cond, lock, timeout_wait);
             if (wait_node->status == S_NOTIFIED /* notified by atomic.notify */
                 || timeout_left <= timeout_wait /* time out */
 #if WASM_ENABLE_THREAD_MGR != 0
@@ -433,7 +405,7 @@ wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,
     /* Release wait info if no wait nodes are attached */
     map_try_release_wait_info(wait_map, wait_info, address);
 
-    os_mutex_unlock(&node->shared_mem_lock);
+    os_mutex_unlock(lock);
 
     return is_timeout ? 2 : 0;
 }
@@ -445,7 +417,7 @@ wasm_runtime_atomic_notify(WASMModuleInstanceCommon *module, void *address,
     WASMModuleInstance *module_inst = (WASMModuleInstance *)module;
     uint32 notify_result;
     AtomicWaitInfo *wait_info;
-    WASMSharedMemNode *node;
+    korp_mutex *lock;
     bool out_of_bounds;
 
     bh_assert(module->module_type == Wasm_Module_Bytecode
@@ -461,31 +433,30 @@ wasm_runtime_atomic_notify(WASMModuleInstanceCommon *module, void *address,
     }
 
     /* Currently we have only one memory instance */
-    if (!module_inst->memories[0]->is_shared) {
+    if (!shared_memory_is_shared(module_inst->memories[0])) {
         /* Always return 0 for ushared linear memory since there is
            no way to create a waiter on it */
         return 0;
     }
 
-    node = search_module((WASMModuleCommon *)module_inst->module);
-    bh_assert(node);
+    lock = shared_memory_get_lock_pointer(module_inst->memories[0]);
 
     /* Lock the shared_mem_lock for the whole atomic notify process,
        and use it to os_cond_signal */
-    os_mutex_lock(&node->shared_mem_lock);
+    os_mutex_lock(lock);
 
     wait_info = acquire_wait_info(address, NULL);
 
     /* Nobody wait on this address */
     if (!wait_info) {
-        os_mutex_unlock(&node->shared_mem_lock);
+        os_mutex_unlock(lock);
         return 0;
     }
 
     /* Notify each wait node in the wait list */
     notify_result = notify_wait_list(wait_info->wait_list, count);
 
-    os_mutex_unlock(&node->shared_mem_lock);
+    os_mutex_unlock(lock);
 
     return notify_result;
 }

+ 12 - 32
core/iwasm/common/wasm_shared_memory.h

@@ -7,53 +7,33 @@
 #define _WASM_SHARED_MEMORY_H
 
 #include "bh_common.h"
-#if WASM_ENABLE_INTERP != 0
-#include "wasm_runtime.h"
-#endif
-#if WASM_ENABLE_AOT != 0
-#include "aot_runtime.h"
-#endif
+#include "../interpreter/wasm_runtime.h"
+#include "wasm_runtime_common.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-typedef struct WASMSharedMemNode {
-    bh_list_link l;
-    /* Lock */
-    korp_mutex lock;
-    /* The module reference */
-    WASMModuleCommon *module;
-    /* The memory information */
-    WASMMemoryInstanceCommon *memory_inst;
-    /* Lock used for atomic operations */
-    korp_mutex shared_mem_lock;
-
-    /* reference count */
-    uint32 ref_count;
-} WASMSharedMemNode;
-
 bool
 wasm_shared_memory_init();
 
 void
 wasm_shared_memory_destroy();
 
-WASMSharedMemNode *
-wasm_module_get_shared_memory(WASMModuleCommon *module);
+uint32
+shared_memory_inc_reference(WASMMemoryInstance *memory);
 
-int32
-shared_memory_inc_reference(WASMModuleCommon *module);
+uint32
+shared_memory_dec_reference(WASMMemoryInstance *memory);
 
-int32
-shared_memory_dec_reference(WASMModuleCommon *module);
+bool
+shared_memory_is_shared(WASMMemoryInstance *memory);
 
-WASMMemoryInstanceCommon *
-shared_memory_get_memory_inst(WASMSharedMemNode *node);
+void
+shared_memory_lock(WASMMemoryInstance *memory);
 
-WASMSharedMemNode *
-shared_memory_set_memory_inst(WASMModuleCommon *module,
-                              WASMMemoryInstanceCommon *memory);
+void
+shared_memory_unlock(WASMMemoryInstance *memory);
 
 uint32
 wasm_runtime_atomic_wait(WASMModuleInstanceCommon *module, void *address,

+ 48 - 0
core/iwasm/common/wasm_suspend_flags.h

@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2023 Amazon Inc.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _WASM_SUSPEND_FLAGS_H
+#define _WASM_SUSPEND_FLAGS_H
+
+#include "bh_atomic.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Need to terminate */
+#define WASM_SUSPEND_FLAG_TERMINATE 0x1
+/* Need to suspend */
+#define WASM_SUSPEND_FLAG_SUSPEND 0x2
+/* Need to go into breakpoint */
+#define WASM_SUSPEND_FLAG_BREAKPOINT 0x4
+/* Return from pthread_exit */
+#define WASM_SUSPEND_FLAG_EXIT 0x8
+
+typedef union WASMSuspendFlags {
+    bh_atomic_32_t flags;
+    uintptr_t __padding__;
+} WASMSuspendFlags;
+
+#define WASM_SUSPEND_FLAGS_IS_ATOMIC BH_ATOMIC_32_IS_ATOMIC
+#define WASM_SUSPEND_FLAGS_GET(s_flags) BH_ATOMIC_32_LOAD(s_flags.flags)
+#define WASM_SUSPEND_FLAGS_FETCH_OR(s_flags, val) \
+    BH_ATOMIC_32_FETCH_OR(s_flags.flags, val)
+#define WASM_SUSPEND_FLAGS_FETCH_AND(s_flags, val) \
+    BH_ATOMIC_32_FETCH_AND(s_flags.flags, val)
+
+#if WASM_SUSPEND_FLAGS_IS_ATOMIC != 0
+#define WASM_SUSPEND_FLAGS_LOCK(lock) (void)0
+#define WASM_SUSPEND_FLAGS_UNLOCK(lock) (void)0
+#else /* else of WASM_SUSPEND_FLAGS_IS_ATOMIC */
+#define WASM_SUSPEND_FLAGS_LOCK(lock) os_mutex_lock(&lock)
+#define WASM_SUSPEND_FLAGS_UNLOCK(lock) os_mutex_unlock(&lock);
+#endif /* WASM_SUSPEND_FLAGS_IS_ATOMIC */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of _WASM_SUSPEND_FLAGS_H */

+ 2 - 2
core/iwasm/compilation/aot.h

@@ -44,7 +44,7 @@ typedef WASMFuncType AOTFuncType;
 typedef WASMExport AOTExport;
 
 #if WASM_ENABLE_DEBUG_AOT != 0
-typedef void *dwar_extractor_handle_t;
+typedef void *dwarf_extractor_handle_t;
 #endif
 
 typedef enum AOTIntCond {
@@ -286,7 +286,7 @@ typedef struct AOTCompData {
 
     WASMModule *wasm_module;
 #if WASM_ENABLE_DEBUG_AOT != 0
-    dwar_extractor_handle_t extractor;
+    dwarf_extractor_handle_t extractor;
 #endif
 } AOTCompData;
 

+ 1 - 70
core/iwasm/compilation/aot_compiler.c

@@ -2619,64 +2619,6 @@ verify_module(AOTCompContext *comp_ctx)
     return true;
 }
 
-/* Check whether the target supports hardware atomic instructions */
-static bool
-aot_require_lower_atomic_pass(AOTCompContext *comp_ctx)
-{
-    bool ret = false;
-    if (!strncmp(comp_ctx->target_arch, "riscv", 5)) {
-        char *feature =
-            LLVMGetTargetMachineFeatureString(comp_ctx->target_machine);
-
-        if (feature) {
-            if (!strstr(feature, "+a")) {
-                ret = true;
-            }
-            LLVMDisposeMessage(feature);
-        }
-    }
-    return ret;
-}
-
-/* Check whether the target needs to expand switch to if/else */
-static bool
-aot_require_lower_switch_pass(AOTCompContext *comp_ctx)
-{
-    bool ret = false;
-
-    /* IR switch/case will cause .rodata relocation on riscv/xtensa */
-    if (!strncmp(comp_ctx->target_arch, "riscv", 5)
-        || !strncmp(comp_ctx->target_arch, "xtensa", 6)) {
-        ret = true;
-    }
-
-    return ret;
-}
-
-static bool
-apply_passes_for_indirect_mode(AOTCompContext *comp_ctx)
-{
-    LLVMPassManagerRef common_pass_mgr;
-
-    if (!(common_pass_mgr = LLVMCreatePassManager())) {
-        aot_set_last_error("create pass manager failed");
-        return false;
-    }
-
-    aot_add_expand_memory_op_pass(common_pass_mgr);
-
-    if (aot_require_lower_atomic_pass(comp_ctx))
-        LLVMAddLowerAtomicPass(common_pass_mgr);
-
-    if (aot_require_lower_switch_pass(comp_ctx))
-        LLVMAddLowerSwitchPass(common_pass_mgr);
-
-    LLVMRunPassManager(common_pass_mgr, comp_ctx->module);
-
-    LLVMDisposePassManager(common_pass_mgr);
-    return true;
-}
-
 bool
 aot_compile_wasm(AOTCompContext *comp_ctx)
 {
@@ -2716,17 +2658,6 @@ aot_compile_wasm(AOTCompContext *comp_ctx)
            possible core dump. */
         bh_print_time("Begin to run llvm optimization passes");
         aot_apply_llvm_new_pass_manager(comp_ctx, comp_ctx->module);
-
-        /* Run specific passes for AOT indirect mode in last since general
-           optimization may create some intrinsic function calls like
-           llvm.memset, so let's remove these function calls here. */
-        if (!comp_ctx->is_jit_mode && comp_ctx->is_indirect_mode) {
-            bh_print_time("Begin to run optimization passes "
-                          "for indirect mode");
-            if (!apply_passes_for_indirect_mode(comp_ctx)) {
-                return false;
-            }
-        }
         bh_print_time("Finish llvm optimization passes");
     }
 
@@ -2767,7 +2698,7 @@ aot_compile_wasm(AOTCompContext *comp_ctx)
         if (comp_ctx->stack_sizes != NULL) {
             LLVMOrcJITTargetAddress addr;
             if ((err = LLVMOrcLLLazyJITLookup(comp_ctx->orc_jit, &addr,
-                                              aot_stack_sizes_name))) {
+                                              aot_stack_sizes_alias_name))) {
                 aot_handle_llvm_errmsg("failed to look up stack_sizes", err);
                 return false;
             }

+ 20 - 0
core/iwasm/compilation/aot_emit_aot_file.c

@@ -3095,6 +3095,13 @@ is_relocation_section(AOTObjectData *obj_data, LLVMSectionIteratorRef sec_itr)
     return false;
 }
 
+static bool
+is_readonly_section(const char *name)
+{
+    return !strcmp(name, ".rel.text") || !strcmp(name, ".rela.text")
+           || !strcmp(name, ".rela.literal") || !strcmp(name, ".text");
+}
+
 static bool
 get_relocation_groups_count(AOTObjectData *obj_data, uint32 *p_count)
 {
@@ -3192,6 +3199,19 @@ aot_resolve_object_relocation_groups(AOTObjectData *obj_data)
                 relocation_group->section_name = ".rel.text";
             }
 
+            /*
+             * Relocations in read-only sections are problematic,
+             * especially for XIP on platforms which don't have
+             * copy-on-write mappings.
+             */
+            if (obj_data->comp_ctx->is_indirect_mode
+                && is_readonly_section(relocation_group->section_name)) {
+                LOG_WARNING("%" PRIu32
+                            " text relocations in %s section for indirect mode",
+                            relocation_group->relocation_count,
+                            relocation_group->section_name);
+            }
+
             relocation_group++;
         }
         LLVMMoveToNextSection(sec_itr);

+ 55 - 1
core/iwasm/compilation/aot_llvm.c

@@ -7,6 +7,7 @@
 #include "aot_llvm_extra2.h"
 #include "aot_compiler.h"
 #include "aot_emit_exception.h"
+#include "aot_emit_table.h"
 #include "../aot/aot_runtime.h"
 #include "../aot/aot_intrinsic.h"
 
@@ -230,6 +231,17 @@ aot_estimate_stack_usage_for_function_call(const AOTCompContext *comp_ctx,
     return size;
 }
 
+static uint32
+get_inst_extra_offset(AOTCompContext *comp_ctx)
+{
+    const AOTCompData *comp_data = comp_ctx->comp_data;
+    uint32 table_count = comp_data->import_table_count + comp_data->table_count;
+    uint64 offset = get_tbl_inst_offset(comp_ctx, NULL, table_count);
+    bh_assert(offset <= UINT_MAX);
+    offset = align_uint(offset, 8);
+    return offset;
+}
+
 /*
  * a "precheck" function performs a few things before calling wrapped_func.
  *
@@ -327,9 +339,36 @@ aot_add_precheck_function(AOTCompContext *comp_ctx, LLVMModuleRef module,
     /*
      * load the value for this wrapped function from the stack_sizes array
      */
+    LLVMValueRef stack_sizes;
+    if (comp_ctx->is_indirect_mode) {
+        uint32 offset_u32;
+        LLVMValueRef offset;
+        LLVMValueRef stack_sizes_p;
+
+        offset_u32 = get_inst_extra_offset(comp_ctx);
+        offset_u32 += offsetof(AOTModuleInstanceExtra, stack_sizes);
+        offset = I32_CONST(offset_u32);
+        if (!offset) {
+            goto fail;
+        }
+        stack_sizes_p =
+            LLVMBuildInBoundsGEP2(b, INT8_TYPE, func_ctx->aot_inst, &offset, 1,
+                                  "aot_inst_stack_sizes_p");
+        if (!stack_sizes_p) {
+            goto fail;
+        }
+        stack_sizes =
+            LLVMBuildLoad2(b, INT32_PTR_TYPE, stack_sizes_p, "stack_sizes");
+        if (!stack_sizes) {
+            goto fail;
+        }
+    }
+    else {
+        stack_sizes = comp_ctx->stack_sizes;
+    }
     LLVMValueRef func_index_const = I32_CONST(func_index);
     LLVMValueRef sizes =
-        LLVMBuildBitCast(b, comp_ctx->stack_sizes, INT32_PTR_TYPE, "sizes");
+        LLVMBuildBitCast(b, stack_sizes, INT32_PTR_TYPE, "sizes");
     if (!sizes) {
         goto fail;
     }
@@ -584,6 +623,15 @@ aot_add_llvm_func(AOTCompContext *comp_ctx, LLVMModuleRef module,
                                     prefix)))
         goto fail;
 
+    if (comp_ctx->is_indirect_mode) {
+        /* avoid LUT relocations ("switch-table") */
+        LLVMAttributeRef attr_no_jump_tables = LLVMCreateStringAttribute(
+            comp_ctx->context, "no-jump-tables", strlen("no-jump-tables"),
+            "true", strlen("true"));
+        LLVMAddAttributeAtIndex(func, LLVMAttributeFunctionIndex,
+                                attr_no_jump_tables);
+    }
+
     if (need_precheck) {
         if (!comp_ctx->is_jit_mode)
             LLVMSetLinkage(func, LLVMInternalLinkage);
@@ -2270,6 +2318,12 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option)
     if (option->enable_stack_estimation)
         comp_ctx->enable_stack_estimation = true;
 
+    if (option->llvm_passes)
+        comp_ctx->llvm_passes = option->llvm_passes;
+
+    if (option->builtin_intrinsics)
+        comp_ctx->builtin_intrinsics = option->builtin_intrinsics;
+
     if (option->enable_gc)
         comp_ctx->enable_gc = true;
 

+ 4 - 0
core/iwasm/compilation/aot_llvm.h

@@ -422,6 +422,8 @@ typedef struct AOTCompContext {
 
     const char *stack_usage_file;
     char stack_usage_temp_file[64];
+    const char *llvm_passes;
+    const char *builtin_intrinsics;
 } AOTCompContext;
 
 enum {
@@ -461,6 +463,8 @@ typedef struct AOTCompOption {
     char **custom_sections;
     uint32 custom_sections_count;
     const char *stack_usage_file;
+    const char *llvm_passes;
+    const char *builtin_intrinsics;
 } AOTCompOption, *aot_comp_option_t;
 
 bool

+ 49 - 95
core/iwasm/compilation/aot_llvm_extra.cpp

@@ -27,7 +27,7 @@
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/PassManager.h>
 #include <llvm/Support/CommandLine.h>
 #include <llvm/Support/ErrorHandling.h>
 #include <llvm/Target/CodeGenCWrappers.h>
@@ -73,115 +73,56 @@ LLVM_C_EXTERN_C_END
 
 ExitOnError ExitOnErr;
 
-class ExpandMemoryOpPass : public llvm::ModulePass
+class ExpandMemoryOpPass : public PassInfoMixin<ExpandMemoryOpPass>
 {
   public:
-    static char ID;
-
-    ExpandMemoryOpPass()
-      : ModulePass(ID)
-    {}
-
-    bool runOnModule(Module &M) override;
-
-    bool expandMemIntrinsicUses(Function &F);
-    StringRef getPassName() const override
-    {
-        return "Expand memory operation intrinsics";
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        AU.addRequired<TargetTransformInfoWrapperPass>();
-    }
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
-char ExpandMemoryOpPass::ID = 0;
-
-bool
-ExpandMemoryOpPass::expandMemIntrinsicUses(Function &F)
+PreservedAnalyses
+ExpandMemoryOpPass::run(Function &F, FunctionAnalysisManager &AM)
 {
-    Intrinsic::ID ID = F.getIntrinsicID();
-    bool Changed = false;
-
-    for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
-        Instruction *Inst = cast<Instruction>(*I);
-        ++I;
-
-        switch (ID) {
-            case Intrinsic::memcpy:
-            {
-                auto *Memcpy = cast<MemCpyInst>(Inst);
-                Function *ParentFunc = Memcpy->getParent()->getParent();
-                const TargetTransformInfo &TTI =
-                    getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
-                        *ParentFunc);
-                expandMemCpyAsLoop(Memcpy, TTI);
-                Changed = true;
-                Memcpy->eraseFromParent();
-                break;
+    SmallVector<MemIntrinsic *, 16> MemCalls;
+
+    /* Iterate over all instructions in the function, looking for memcpy,
+     * memmove, and memset.  When we find one, expand it into a loop. */
+
+    for (auto &BB : F) {
+        for (auto &Inst : BB) {
+            if (auto *Memcpy = dyn_cast_or_null<MemCpyInst>(&Inst)) {
+                MemCalls.push_back(Memcpy);
             }
-            case Intrinsic::memmove:
-            {
-                auto *Memmove = cast<MemMoveInst>(Inst);
-                expandMemMoveAsLoop(Memmove);
-                Changed = true;
-                Memmove->eraseFromParent();
-                break;
+            else if (auto *Memmove = dyn_cast_or_null<MemMoveInst>(&Inst)) {
+                MemCalls.push_back(Memmove);
             }
-            case Intrinsic::memset:
-            {
-                auto *Memset = cast<MemSetInst>(Inst);
-                expandMemSetAsLoop(Memset);
-                Changed = true;
-                Memset->eraseFromParent();
-                break;
+            else if (auto *Memset = dyn_cast_or_null<MemSetInst>(&Inst)) {
+                MemCalls.push_back(Memset);
             }
-            default:
-                break;
         }
     }
 
-    return Changed;
-}
-
-bool
-ExpandMemoryOpPass::runOnModule(Module &M)
-{
-    bool Changed = false;
-
-    for (Function &F : M) {
-        if (!F.isDeclaration())
-            continue;
-
-        switch (F.getIntrinsicID()) {
-            case Intrinsic::memcpy:
-            case Intrinsic::memmove:
-            case Intrinsic::memset:
-                if (expandMemIntrinsicUses(F))
-                    Changed = true;
-                break;
-
-            default:
-                break;
+    for (MemIntrinsic *MemCall : MemCalls) {
+        if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
+            Function *ParentFunc = Memcpy->getParent()->getParent();
+            const TargetTransformInfo &TTI =
+                AM.getResult<TargetIRAnalysis>(*ParentFunc);
+            expandMemCpyAsLoop(Memcpy, TTI);
+            Memcpy->eraseFromParent();
+        }
+        else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
+            expandMemMoveAsLoop(Memmove);
+            Memmove->eraseFromParent();
+        }
+        else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
+            expandMemSetAsLoop(Memset);
+            Memset->eraseFromParent();
         }
     }
 
-    return Changed;
-}
+    PreservedAnalyses PA;
+    PA.preserveSet<CFGAnalyses>();
 
-void
-aot_add_expand_memory_op_pass(LLVMPassManagerRef pass)
-{
-    reinterpret_cast<legacy::PassManager *>(pass)->add(
-        new ExpandMemoryOpPass());
-}
-
-void
-aot_add_simple_loop_unswitch_pass(LLVMPassManagerRef pass)
-{
-    reinterpret_cast<legacy::PassManager *>(pass)->add(
-        createSimpleLoopUnswitchLegacyPass());
+    return PA;
 }
 
 bool
@@ -373,6 +314,10 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
 
         MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
 
+        if (comp_ctx->llvm_passes) {
+            ExitOnErr(PB.parsePassPipeline(MPM, comp_ctx->llvm_passes));
+        }
+
         if (!disable_llvm_lto) {
             /* Apply LTO for AOT mode */
             if (comp_ctx->comp_data->func_count >= 10
@@ -386,6 +331,15 @@ aot_apply_llvm_new_pass_manager(AOTCompContext *comp_ctx, LLVMModuleRef module)
         else {
             MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
         }
+
+        /* Run specific passes for AOT indirect mode in last since general
+            optimization may create some intrinsic function calls like
+            llvm.memset, so let's remove these function calls here. */
+        if (comp_ctx->is_indirect_mode) {
+            FunctionPassManager FPM1;
+            FPM1.addPass(ExpandMemoryOpPass());
+            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM1)));
+        }
     }
 
     MPM.run(*M, MAM);

+ 16 - 16
core/iwasm/compilation/debug/dwarf_extractor.cpp

@@ -28,25 +28,25 @@
 
 using namespace lldb;
 
-typedef struct dwar_extractor {
+typedef struct dwarf_extractor {
     SBDebugger debugger;
     SBTarget target;
     SBModule module;
 
-} dwar_extractor;
+} dwarf_extractor;
 
-#define TO_HANDLE(extractor) (dwar_extractor_handle_t)(extractor)
+#define TO_HANDLE(extractor) (dwarf_extractor_handle_t)(extractor)
 
-#define TO_EXTACTOR(handle) (dwar_extractor *)(handle)
+#define TO_EXTACTOR(handle) (dwarf_extractor *)(handle)
 
 static bool is_debugger_initialized;
 
-dwar_extractor_handle_t
+dwarf_extractor_handle_t
 create_dwarf_extractor(AOTCompData *comp_data, char *file_name)
 {
     char *arch = NULL;
     char *platform = NULL;
-    dwar_extractor *extractor = NULL;
+    dwarf_extractor *extractor = NULL;
 
     //__attribute__((constructor)) may be better?
     if (!is_debugger_initialized) {
@@ -61,7 +61,7 @@ create_dwarf_extractor(AOTCompData *comp_data, char *file_name)
     SBError error;
     SBFileSpec exe_file_spec(file_name, true);
 
-    if (!(extractor = new dwar_extractor())) {
+    if (!(extractor = new dwarf_extractor())) {
         LOG_ERROR("Create Dwarf Extractor error: failed to allocate memory");
         goto fail3;
     }
@@ -101,9 +101,9 @@ fail3:
 }
 
 void
-destroy_dwarf_extractor(dwar_extractor_handle_t handle)
+destroy_dwarf_extractor(dwarf_extractor_handle_t handle)
 {
-    dwar_extractor *extractor = TO_EXTACTOR(handle);
+    dwarf_extractor *extractor = TO_EXTACTOR(handle);
     if (!extractor)
         return;
     extractor->debugger.DeleteTarget(extractor->target);
@@ -116,7 +116,7 @@ destroy_dwarf_extractor(dwar_extractor_handle_t handle)
 LLVMMetadataRef
 dwarf_gen_file_info(const AOTCompContext *comp_ctx)
 {
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     int units_number;
     LLVMMetadataRef file_info = NULL;
     const char *file_name;
@@ -193,7 +193,7 @@ dwarf_gen_mock_vm_info(AOTCompContext *comp_ctx)
 LLVMMetadataRef
 dwarf_gen_comp_unit_info(const AOTCompContext *comp_ctx)
 {
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     int units_number;
     LLVMMetadataRef comp_unit = NULL;
 
@@ -292,7 +292,7 @@ lldb_function_to_function_dbi(const AOTCompContext *comp_ctx,
     SBTypeList function_args = function.GetType().GetFunctionArgumentTypes();
     SBType return_type = function.GetType().GetFunctionReturnType();
     const size_t num_function_args = function_args.GetSize();
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
 
     if (!(extractor = TO_EXTACTOR(comp_ctx->comp_data->extractor)))
         return NULL;
@@ -393,7 +393,7 @@ dwarf_gen_func_info(const AOTCompContext *comp_ctx,
                     const AOTFuncContext *func_ctx)
 {
     LLVMMetadataRef func_info = NULL;
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     uint64_t vm_offset;
     AOTFunc *func = func_ctx->aot_func;
 
@@ -423,7 +423,7 @@ dwarf_get_func_name(const AOTCompContext *comp_ctx,
                     const AOTFuncContext *func_ctx, char *name, int len)
 {
     LLVMMetadataRef func_info = NULL;
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     uint64_t vm_offset;
     AOTFunc *func = func_ctx->aot_func;
 
@@ -454,7 +454,7 @@ dwarf_gen_location(const AOTCompContext *comp_ctx,
                    const AOTFuncContext *func_ctx, uint64_t vm_offset)
 {
     LLVMMetadataRef location_info = NULL;
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     AOTFunc *func = func_ctx->aot_func;
 
     if (!(extractor = TO_EXTACTOR(comp_ctx->comp_data->extractor)))
@@ -493,7 +493,7 @@ dwarf_gen_func_ret_location(const AOTCompContext *comp_ctx,
                             const AOTFuncContext *func_ctx)
 {
     LLVMMetadataRef func_info = NULL;
-    dwar_extractor *extractor;
+    dwarf_extractor *extractor;
     uint64_t vm_offset;
     AOTFunc *func = func_ctx->aot_func;
     LLVMMetadataRef location_info = NULL;

+ 2 - 2
core/iwasm/compilation/debug/dwarf_extractor.h

@@ -18,7 +18,7 @@ typedef unsigned int LLDBLangType;
 
 struct AOTCompData;
 typedef struct AOTCompData *aot_comp_data_t;
-typedef void *dwar_extractor_handle_t;
+typedef void *dwarf_extractor_handle_t;
 
 struct AOTCompContext;
 typedef struct AOTCompContext AOTCompContext;
@@ -26,7 +26,7 @@ typedef struct AOTCompContext AOTCompContext;
 struct AOTFuncContext;
 
 typedef struct AOTFuncContext AOTFuncContext;
-dwar_extractor_handle_t
+dwarf_extractor_handle_t
 create_dwarf_extractor(aot_comp_data_t comp_data, char *file_name);
 
 LLVMMetadataRef

+ 20 - 20
core/iwasm/fast-jit/fe/jit_emit_table.c

@@ -90,27 +90,27 @@ fail:
 
 static int
 wasm_init_table(WASMModuleInstance *inst, uint32 tbl_idx, uint32 elem_idx,
-                uint32 dst, uint32 len, uint32 src)
+                uint32 dst_offset, uint32 len, uint32 src_offset)
 {
     WASMTableInstance *tbl;
     uint32 tbl_sz;
     WASMTableSeg *elem;
     uint32 elem_len;
 
-    tbl = inst->tables[tbl_idx];
-    tbl_sz = tbl->cur_size;
-    if (dst > tbl_sz || tbl_sz - dst < len)
-        goto out_of_bounds;
-
     elem = inst->module->table_segments + elem_idx;
     elem_len = elem->function_count;
-    if (src > elem_len || elem_len - src < len)
+    if (offset_len_out_of_bounds(src_offset, len, elem_len))
+        goto out_of_bounds;
+
+    tbl = inst->tables[tbl_idx];
+    tbl_sz = tbl->cur_size;
+    if (offset_len_out_of_bounds(dst_offset, len, tbl_sz))
         goto out_of_bounds;
 
     bh_memcpy_s((uint8 *)tbl + offsetof(WASMTableInstance, elems)
-                    + dst * sizeof(table_elem_type_t),
-                (uint32)((tbl_sz - dst) * sizeof(table_elem_type_t)),
-                elem->func_indexes + src,
+                    + dst_offset * sizeof(table_elem_type_t),
+                (uint32)((tbl_sz - dst_offset) * sizeof(table_elem_type_t)),
+                elem->func_indexes + src_offset,
                 (uint32)(len * sizeof(table_elem_type_t)));
 
     return 0;
@@ -160,14 +160,14 @@ wasm_copy_table(WASMModuleInstance *inst, uint32 src_tbl_idx,
     WASMTableInstance *src_tbl, *dst_tbl;
     uint32 src_tbl_sz, dst_tbl_sz;
 
-    src_tbl = inst->tables[src_tbl_idx];
-    src_tbl_sz = src_tbl->cur_size;
-    if (src_offset > src_tbl_sz || src_tbl_sz - src_offset < len)
-        goto out_of_bounds;
-
     dst_tbl = inst->tables[dst_tbl_idx];
     dst_tbl_sz = dst_tbl->cur_size;
-    if (dst_offset > dst_tbl_sz || dst_tbl_sz - dst_offset < len)
+    if (offset_len_out_of_bounds(dst_offset, len, dst_tbl_sz))
+        goto out_of_bounds;
+
+    src_tbl = inst->tables[src_tbl_idx];
+    src_tbl_sz = src_tbl->cur_size;
+    if (offset_len_out_of_bounds(src_offset, len, src_tbl_sz))
         goto out_of_bounds;
 
     bh_memmove_s(
@@ -267,7 +267,7 @@ fail:
 }
 
 static int
-wasm_fill_table(WASMModuleInstance *inst, uint32 tbl_idx, uint32 dst,
+wasm_fill_table(WASMModuleInstance *inst, uint32 tbl_idx, uint32 dst_offset,
                 uintptr_t val, uint32 len)
 {
     WASMTableInstance *tbl;
@@ -276,11 +276,11 @@ wasm_fill_table(WASMModuleInstance *inst, uint32 tbl_idx, uint32 dst,
     tbl = inst->tables[tbl_idx];
     tbl_sz = tbl->cur_size;
 
-    if (dst > tbl_sz || tbl_sz - dst < len)
+    if (offset_len_out_of_bounds(dst_offset, len, tbl_sz))
         goto out_of_bounds;
 
-    for (; len != 0; dst++, len--) {
-        tbl->elems[dst] = val;
+    for (; len != 0; dst_offset++, len--) {
+        tbl->elems[dst_offset] = val;
     }
 
     return 0;

+ 4 - 2
core/iwasm/include/aot_export.h

@@ -26,8 +26,8 @@ void
 aot_destroy_comp_data(aot_comp_data_t comp_data);
 
 #if WASM_ENABLE_DEBUG_AOT != 0
-typedef void *dwar_extractor_handle_t;
-dwar_extractor_handle_t
+typedef void *dwarf_extractor_handle_t;
+dwarf_extractor_handle_t
 create_dwarf_extractor(aot_comp_data_t comp_data, char *file_name);
 #endif
 
@@ -68,6 +68,8 @@ typedef struct AOTCompOption {
     char **custom_sections;
     uint32_t custom_sections_count;
     const char *stack_usage_file;
+    const char *llvm_passes;
+    const char *builtin_intrinsics;
 } AOTCompOption, *aot_comp_option_t;
 
 bool

+ 19 - 0
core/iwasm/include/wasm_export.h

@@ -917,6 +917,25 @@ wasm_runtime_set_custom_data(wasm_module_inst_t module_inst,
 WASM_RUNTIME_API_EXTERN void *
 wasm_runtime_get_custom_data(wasm_module_inst_t module_inst);
 
+/**
+ * Set the memory bounds checks flag of a WASM module instance.
+ * 
+ * @param module_inst the WASM module instance
+ * @param enable the flag to enable/disable the memory bounds checks
+ */
+WASM_RUNTIME_API_EXTERN void
+wasm_runtime_set_bounds_checks(wasm_module_inst_t module_inst,
+                               bool enable);
+/**
+ * Check if the memory bounds checks flag is enabled for a WASM module instance.
+ * 
+ * @param module_inst the WASM module instance
+ *
+ * @return true if the memory bounds checks flag is enabled, false otherwise
+ */
+WASM_RUNTIME_API_EXTERN bool
+wasm_runtime_is_bounds_checks_enabled(
+    wasm_module_inst_t module_inst);
 /**
  * Allocate memory from the heap of WASM module instance
  *

+ 18 - 1
core/iwasm/interpreter/wasm.h

@@ -891,7 +891,6 @@ typedef struct WASMBranchBlock {
     uint32 cell_num;
 } WASMBranchBlock;
 
-/* Execution environment, e.g. stack info */
 /**
  * Align an unsigned value on a alignment boundary.
  *
@@ -907,6 +906,24 @@ align_uint(unsigned v, unsigned b)
     return (v + m) & ~m;
 }
 
+/**
+ * Check whether a piece of data is out of range
+ *
+ * @param offset the offset that the data starts
+ * @param len the length of the data
+ * @param max_size the maximum size of the data range
+ *
+ * @return true if out of range, false otherwise
+ */
+inline static bool
+offset_len_out_of_bounds(uint32 offset, uint32 len, uint32 max_size)
+{
+    if (offset + len < offset /* integer overflow */
+        || offset + len > max_size)
+        return true;
+    return false;
+}
+
 /**
  * Return the hash value of c string.
  */

+ 132 - 111
core/iwasm/interpreter/wasm_interp_classic.c

@@ -45,26 +45,28 @@ typedef float64 CellType_F64;
 
 #if !defined(OS_ENABLE_HW_BOUND_CHECK) \
     || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0
-#define CHECK_MEMORY_OVERFLOW(bytes)                            \
-    do {                                                        \
-        uint64 offset1 = (uint64)offset + (uint64)addr;         \
-        if (offset1 + bytes <= (uint64)get_linear_mem_size())   \
-            /* If offset1 is in valid range, maddr must also    \
-               be in valid range, no need to check it again. */ \
-            maddr = memory->memory_data + offset1;              \
-        else                                                    \
-            goto out_of_bounds;                                 \
+#define CHECK_MEMORY_OVERFLOW(bytes)                             \
+    do {                                                         \
+        uint64 offset1 = (uint64)offset + (uint64)addr;          \
+        if (disable_bounds_checks                                \
+            || offset1 + bytes <= (uint64)get_linear_mem_size()) \
+            /* If offset1 is in valid range, maddr must also     \
+               be in valid range, no need to check it again. */  \
+            maddr = memory->memory_data + offset1;               \
+        else                                                     \
+            goto out_of_bounds;                                  \
     } while (0)
 
-#define CHECK_BULK_MEMORY_OVERFLOW(start, bytes, maddr)       \
-    do {                                                      \
-        uint64 offset1 = (uint32)(start);                     \
-        if (offset1 + bytes <= (uint64)get_linear_mem_size()) \
-            /* App heap space is not valid space for          \
-             bulk memory operation */                         \
-            maddr = memory->memory_data + offset1;            \
-        else                                                  \
-            goto out_of_bounds;                               \
+#define CHECK_BULK_MEMORY_OVERFLOW(start, bytes, maddr)          \
+    do {                                                         \
+        uint64 offset1 = (uint32)(start);                        \
+        if (disable_bounds_checks                                \
+            || offset1 + bytes <= (uint64)get_linear_mem_size()) \
+            /* App heap space is not valid space for             \
+             bulk memory operation */                            \
+            maddr = memory->memory_data + offset1;               \
+        else                                                     \
+            goto out_of_bounds;                                  \
     } while (0)
 #else
 #define CHECK_MEMORY_OVERFLOW(bytes)                    \
@@ -844,28 +846,28 @@ trunc_f64_to_int(WASMModuleInstance *module, uint32 *frame_sp, float64 src_min,
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint32)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I32_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint32)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else {                                                       \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = LOAD_I32(maddr);                                 \
             STORE_U32(maddr, readv op sval);                         \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         PUSH_I32(readv);                                             \
         break;                                                       \
@@ -884,39 +886,39 @@ trunc_f64_to_int(WASMModuleInstance *module, uint32 *frame_sp, float64 src_min,
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##32_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_U32(maddr);                         \
             STORE_U32(maddr, (uint32)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else {                                                       \
             uint64 op_result;                                        \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS();                            \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_I64(maddr);                         \
             op_result = readv op sval;                               \
             STORE_I64(maddr, op_result);                             \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         PUSH_I64(readv);                                             \
         break;                                                       \
@@ -1240,21 +1242,33 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
         os_mutex_unlock(&exec_env->wait_lock);                         \
     } while (0)
 #else
-#define CHECK_SUSPEND_FLAGS()                                             \
-    do {                                                                  \
-        os_mutex_lock(&exec_env->wait_lock);                              \
-        if (exec_env->suspend_flags.flags != 0) {                         \
-            if (exec_env->suspend_flags.flags & 0x01) {                   \
-                /* terminate current thread */                            \
-                os_mutex_unlock(&exec_env->wait_lock);                    \
-                return;                                                   \
-            }                                                             \
-            while (exec_env->suspend_flags.flags & 0x02) {                \
-                /* suspend current thread */                              \
-                os_cond_wait(&exec_env->wait_cond, &exec_env->wait_lock); \
-            }                                                             \
-        }                                                                 \
-        os_mutex_unlock(&exec_env->wait_lock);                            \
+#if WASM_SUSPEND_FLAGS_IS_ATOMIC != 0
+/* The lock is only needed when the suspend_flags is atomic; otherwise
+   the lock is already taken at the time when SUSPENSION_LOCK() is called. */
+#define SUSPENSION_LOCK() os_mutex_lock(&exec_env->wait_lock);
+#define SUSPENSION_UNLOCK() os_mutex_unlock(&exec_env->wait_lock);
+#else
+#define SUSPENSION_LOCK()
+#define SUSPENSION_UNLOCK()
+#endif
+
+#define CHECK_SUSPEND_FLAGS()                                         \
+    do {                                                              \
+        WASM_SUSPEND_FLAGS_LOCK(exec_env->wait_lock);                 \
+        if (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags)           \
+            & WASM_SUSPEND_FLAG_TERMINATE) {                          \
+            /* terminate current thread */                            \
+            WASM_SUSPEND_FLAGS_UNLOCK(exec_env->wait_lock);           \
+            return;                                                   \
+        }                                                             \
+        while (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags)        \
+               & WASM_SUSPEND_FLAG_SUSPEND) {                         \
+            /* suspend current thread */                              \
+            SUSPENSION_LOCK()                                         \
+            os_cond_wait(&exec_env->wait_cond, &exec_env->wait_lock); \
+            SUSPENSION_UNLOCK()                                       \
+        }                                                             \
+        WASM_SUSPEND_FLAGS_UNLOCK(exec_env->wait_lock);               \
     } while (0)
 #endif /* WASM_ENABLE_DEBUG_INTERP */
 #endif /* WASM_ENABLE_THREAD_MGR */
@@ -1322,10 +1336,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                                WASMFunctionInstance *cur_func,
                                WASMInterpFrame *prev_frame)
 {
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    WASMSharedMemNode *node =
-        wasm_module_get_shared_memory((WASMModuleCommon *)module->module);
-#endif
     WASMMemoryInstance *memory = wasm_get_default_memory(module);
 #if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
     || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
@@ -1358,6 +1368,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
     uint8 local_type, *global_addr;
     uint32 cache_index, type_index, param_cell_num, cell_num;
     uint8 value_type;
+#if !defined(OS_ENABLE_HW_BOUND_CHECK) \
+    || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    bool disable_bounds_checks = !wasm_runtime_is_bounds_checks_enabled(
+        (WASMModuleInstanceCommon *)module);
+#else
+    bool disable_bounds_checks = false;
+#endif
+#endif
 #if WASM_ENABLE_GC != 0
     WASMObjectRef gc_obj;
     WASMStructObjectRef struct_obj;
@@ -4283,7 +4302,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     case WASM_OP_TABLE_INIT:
                     {
                         uint32 tbl_idx, elem_idx;
-                        uint64 n, s, d;
+                        uint32 n, s, d;
                         WASMTableInstance *tbl_inst;
 #if WASM_ENABLE_GC != 0
                         void **table_elems;
@@ -4302,20 +4321,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         s = (uint32)POP_I32();
                         d = (uint32)POP_I32();
 
-                        /* TODO: what if the element is not passive? */
-
-                        if (!n) {
-                            break;
-                        }
-
-                        if (n + s > module->module->table_segments[elem_idx]
-                                        .function_count
-                            || d + n > tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(
+                                s, n,
+                                module->module->table_segments[elem_idx]
+                                    .function_count)
+                            || offset_len_out_of_bounds(d, n,
+                                                        tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
                         }
 
+                        if (!n) {
+                            break;
+                        }
+
                         if (module->module->table_segments[elem_idx]
                                 .is_dropped) {
                             wasm_set_exception(module,
@@ -4377,7 +4397,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     case WASM_OP_TABLE_COPY:
                     {
                         uint32 src_tbl_idx, dst_tbl_idx;
-                        uint64 n, s, d;
+                        uint32 n, s, d;
                         WASMTableInstance *src_tbl_inst, *dst_tbl_inst;
 
                         read_leb_uint32(frame_ip, frame_ip_end, dst_tbl_idx);
@@ -4394,8 +4414,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         s = (uint32)POP_I32();
                         d = (uint32)POP_I32();
 
-                        if (d + n > dst_tbl_inst->cur_size
-                            || s + n > src_tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(d, n,
+                                                     dst_tbl_inst->cur_size)
+                            || offset_len_out_of_bounds(
+                                s, n, src_tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
@@ -4475,10 +4497,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 #endif
                         i = POP_I32();
 
-                        /* TODO: what if the element is not passive? */
-                        /* TODO: what if the element is dropped? */
-
-                        if (i + n > tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(i, n,
+                                                     tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
@@ -4595,23 +4615,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)(*(uint8 *)maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)LOAD_U16(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I32(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
 
                         PUSH_I32(readv);
@@ -4630,30 +4650,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)(*(uint8 *)maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U16(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U32(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I64(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
 
                         PUSH_I64(readv);
@@ -4672,23 +4692,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U32(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         break;
                     }
@@ -4706,30 +4726,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE32) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U32(maddr, (uint32)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             PUT_I64_TO_ADDR((uint32 *)maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         break;
                     }
@@ -4749,32 +4769,32 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I32_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         PUSH_I32(readv);
                         break;
@@ -4795,43 +4815,43 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
                             expect = (uint32)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, (uint32)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS();
 
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_I64(maddr);
                             if (readv == expect)
                                 STORE_I64(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         PUSH_I64(readv);
                         break;
@@ -4864,7 +4884,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
             HANDLE_OP(DEBUG_OP_BREAK)
             {
                 wasm_cluster_thread_send_signal(exec_env, WAMR_SIG_TRAP);
-                exec_env->suspend_flags.flags |= 2;
+                WASM_SUSPEND_FLAGS_FETCH_OR(exec_env->suspend_flags,
+                                            WASM_SUSPEND_FLAG_SUSPEND);
                 frame_ip--;
                 SYNC_ALL_TO_FRAME();
                 CHECK_SUSPEND_FLAGS();

+ 105 - 94
core/iwasm/interpreter/wasm_interp_fast.c

@@ -39,7 +39,8 @@ typedef float64 CellType_F64;
 #define CHECK_MEMORY_OVERFLOW(bytes)                             \
     do {                                                         \
         uint64 offset1 = (uint64)offset + (uint64)addr;          \
-        if (offset1 + bytes <= (uint64)get_linear_mem_size())    \
+        if (disable_bounds_checks                                \
+            || offset1 + bytes <= (uint64)get_linear_mem_size()) \
             /* If offset1 is in valid range, maddr must also     \
                 be in valid range, no need to check it again. */ \
             maddr = memory->memory_data + offset1;               \
@@ -47,15 +48,15 @@ typedef float64 CellType_F64;
             goto out_of_bounds;                                  \
     } while (0)
 
-#define CHECK_BULK_MEMORY_OVERFLOW(start, bytes, maddr) \
-    do {                                                \
-        uint64 offset1 = (uint32)(start);               \
-        if (offset1 + bytes <= get_linear_mem_size())   \
-            /* App heap space is not valid space for    \
-               bulk memory operation */                 \
-            maddr = memory->memory_data + offset1;      \
-        else                                            \
-            goto out_of_bounds;                         \
+#define CHECK_BULK_MEMORY_OVERFLOW(start, bytes, maddr)                        \
+    do {                                                                       \
+        uint64 offset1 = (uint32)(start);                                      \
+        if (disable_bounds_checks || offset1 + bytes <= get_linear_mem_size()) \
+            /* App heap space is not valid space for                           \
+               bulk memory operation */                                        \
+            maddr = memory->memory_data + offset1;                             \
+        else                                                                   \
+            goto out_of_bounds;                                                \
     } while (0)
 #else
 #define CHECK_MEMORY_OVERFLOW(bytes)                    \
@@ -603,28 +604,28 @@ init_frame_refs(uint8 *frame_ref, uint32 cell_num, WASMFunctionInstance *func)
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(1);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint32)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I32_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(2);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint32)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else {                                                       \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(4);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = LOAD_I32(maddr);                                 \
             STORE_U32(maddr, readv op sval);                         \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         PUSH_I32(readv);                                             \
         break;                                                       \
@@ -643,39 +644,39 @@ init_frame_refs(uint8 *frame_ref, uint32 cell_num, WASMFunctionInstance *func)
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(1);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)(*(uint8 *)maddr);                       \
             *(uint8 *)maddr = (uint8)(readv op sval);                \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##16_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(2);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_U16(maddr);                         \
             STORE_U16(maddr, (uint16)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else if (opcode == WASM_OP_ATOMIC_RMW_I64_##OP_NAME##32_U) { \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(4);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_U32(maddr);                         \
             STORE_U32(maddr, (uint32)(readv op sval));               \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         else {                                                       \
             uint64 op_result;                                        \
             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);     \
             CHECK_ATOMIC_MEMORY_ACCESS(8);                           \
                                                                      \
-            os_mutex_lock(&node->shared_mem_lock);                   \
+            shared_memory_lock(memory);                              \
             readv = (uint64)LOAD_I64(maddr);                         \
             op_result = readv op sval;                               \
             STORE_I64(maddr, op_result);                             \
-            os_mutex_unlock(&node->shared_mem_lock);                 \
+            shared_memory_unlock(memory);                            \
         }                                                            \
         PUSH_I64(readv);                                             \
         break;                                                       \
@@ -1317,18 +1318,17 @@ wasm_interp_call_func_import(WASMModuleInstance *module_inst,
 #endif
 
 #if WASM_ENABLE_THREAD_MGR != 0
-#define CHECK_SUSPEND_FLAGS()                           \
-    do {                                                \
-        os_mutex_lock(&exec_env->wait_lock);            \
-        if (exec_env->suspend_flags.flags != 0) {       \
-            if (exec_env->suspend_flags.flags & 0x01) { \
-                /* terminate current thread */          \
-                os_mutex_unlock(&exec_env->wait_lock);  \
-                return;                                 \
-            }                                           \
-            /* TODO: support suspend and breakpoint */  \
-        }                                               \
-        os_mutex_unlock(&exec_env->wait_lock);          \
+#define CHECK_SUSPEND_FLAGS()                               \
+    do {                                                    \
+        WASM_SUSPEND_FLAGS_LOCK(exec_env->wait_lock);       \
+        if (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags) \
+            & WASM_SUSPEND_FLAG_TERMINATE) {                \
+            /* terminate current thread */                  \
+            WASM_SUSPEND_FLAGS_UNLOCK(exec_env->wait_lock); \
+            return;                                         \
+        }                                                   \
+        /* TODO: support suspend and breakpoint */          \
+        WASM_SUSPEND_FLAGS_UNLOCK(exec_env->wait_lock);     \
     } while (0)
 #endif
 
@@ -1419,10 +1419,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                                WASMFunctionInstance *cur_func,
                                WASMInterpFrame *prev_frame)
 {
-#if WASM_ENABLE_SHARED_MEMORY != 0
-    WASMSharedMemNode *node =
-        wasm_module_get_shared_memory((WASMModuleCommon *)module->module);
-#endif
     WASMMemoryInstance *memory = wasm_get_default_memory(module);
 #if !defined(OS_ENABLE_HW_BOUND_CHECK)              \
     || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0 \
@@ -1457,6 +1453,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
     uint8 *maddr = NULL;
     uint32 local_idx, local_offset, global_idx;
     uint8 opcode = 0, local_type, *global_addr;
+#if !defined(OS_ENABLE_HW_BOUND_CHECK) \
+    || WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS == 0
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    bool disable_bounds_checks = !wasm_runtime_is_bounds_checks_enabled(
+        (WASMModuleInstanceCommon *)module);
+#else
+    bool disable_bounds_checks = false;
+#endif
+#endif
 #if WASM_ENABLE_GC != 0
     WASMObjectRef gc_obj;
     WASMStructObjectRef struct_obj;
@@ -4228,7 +4233,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     case WASM_OP_TABLE_INIT:
                     {
                         uint32 tbl_idx, elem_idx;
-                        uint64 n, s, d;
+                        uint32 n, s, d;
                         WASMTableInstance *tbl_inst;
 #if WASM_ENABLE_GC != 0
                         void **table_elems;
@@ -4248,18 +4253,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         s = (uint32)POP_I32();
                         d = (uint32)POP_I32();
 
-                        if (!n) {
-                            break;
-                        }
-
-                        if (n + s > module->module->table_segments[elem_idx]
-                                        .function_count
-                            || d + n > tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(
+                                s, n,
+                                module->module->table_segments[elem_idx]
+                                    .function_count)
+                            || offset_len_out_of_bounds(d, n,
+                                                        tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
                         }
 
+                        if (!n) {
+                            break;
+                        }
+
                         if (module->module->table_segments[elem_idx]
                                 .is_dropped) {
                             wasm_set_exception(module,
@@ -4320,7 +4328,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                     case WASM_OP_TABLE_COPY:
                     {
                         uint32 src_tbl_idx, dst_tbl_idx;
-                        uint64 n, s, d;
+                        uint32 n, s, d;
                         WASMTableInstance *src_tbl_inst, *dst_tbl_inst;
 
                         dst_tbl_idx = read_uint32(frame_ip);
@@ -4337,8 +4345,10 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         s = (uint32)POP_I32();
                         d = (uint32)POP_I32();
 
-                        if (d + n > dst_tbl_inst->cur_size
-                            || s + n > src_tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(d, n,
+                                                     dst_tbl_inst->cur_size)
+                            || offset_len_out_of_bounds(
+                                s, n, src_tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
@@ -4419,7 +4429,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
 #endif
                         i = POP_I32();
 
-                        if (i + n > tbl_inst->cur_size) {
+                        if (offset_len_out_of_bounds(i, n,
+                                                     tbl_inst->cur_size)) {
                             wasm_set_exception(module,
                                                "out of bounds table access");
                             goto got_exception;
@@ -4534,23 +4545,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)(*(uint8 *)maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)LOAD_U16(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I32(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
 
                         PUSH_I32(readv);
@@ -4569,30 +4580,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_LOAD8_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)(*(uint8 *)maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U16(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_LOAD32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U32(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(8);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I64(maddr);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
 
                         PUSH_I64(readv);
@@ -4610,23 +4621,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I32_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I32_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U32(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         break;
                     }
@@ -4644,30 +4655,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                         if (opcode == WASM_OP_ATOMIC_I64_STORE8) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 1, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             *(uint8 *)maddr = (uint8)sval;
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE16) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U16(maddr, (uint16)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_I64_STORE32) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_U32(maddr, (uint32)sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(8);
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             STORE_I64(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         break;
                     }
@@ -4687,32 +4698,32 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I32_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint32)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
 
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = LOAD_I32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         PUSH_I32(readv);
                         break;
@@ -4733,43 +4744,43 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
                             CHECK_ATOMIC_MEMORY_ACCESS(1);
 
                             expect = (uint8)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)(*(uint8 *)maddr);
                             if (readv == expect)
                                 *(uint8 *)maddr = (uint8)(sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG16_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 2, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(2);
 
                             expect = (uint16)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U16(maddr);
                             if (readv == expect)
                                 STORE_U16(maddr, (uint16)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else if (opcode == WASM_OP_ATOMIC_RMW_I64_CMPXCHG32_U) {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 4, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(4);
 
                             expect = (uint32)expect;
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_U32(maddr);
                             if (readv == expect)
                                 STORE_U32(maddr, (uint32)(sval));
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         else {
                             CHECK_BULK_MEMORY_OVERFLOW(addr + offset, 8, maddr);
                             CHECK_ATOMIC_MEMORY_ACCESS(8);
 
-                            os_mutex_lock(&node->shared_mem_lock);
+                            shared_memory_lock(memory);
                             readv = (uint64)LOAD_I64(maddr);
                             if (readv == expect)
                                 STORE_I64(maddr, sval);
-                            os_mutex_unlock(&node->shared_mem_lock);
+                            shared_memory_unlock(memory);
                         }
                         PUSH_I64(readv);
                         break;

+ 7 - 5
core/iwasm/interpreter/wasm_loader.c

@@ -8854,6 +8854,7 @@ static bool
 copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
                              char *error_buf, uint32 error_buf_size)
 {
+    bool ret = false;
     int16 *frame_offset = NULL;
     uint8 *cells = NULL, cell;
     int16 *src_offsets = NULL;
@@ -8924,13 +8925,13 @@ copy_params_to_dynamic_space(WASMLoaderContext *loader_ctx, bool is_if_block,
     if (is_if_block)
         PUSH_OFFSET_TYPE(VALUE_TYPE_I32);
 
+    ret = true;
+
+fail:
     /* Free the emit data */
     wasm_runtime_free(emit_data);
 
-    return true;
-
-fail:
-    return false;
+    return ret;
 }
 #endif
 
@@ -10127,7 +10128,8 @@ re_scan:
 #endif
 
                 read_leb_uint32(p, p_end, vec_len);
-                if (!vec_len) {
+                if (vec_len != 1) {
+                    /* typed select must have exactly one result */
                     set_error_buf(error_buf, error_buf_size,
                                   "invalid result arity");
                     goto fail;

+ 2 - 1
core/iwasm/interpreter/wasm_mini_loader.c

@@ -6251,7 +6251,8 @@ re_scan:
                 uint8 vec_len, ref_type;
 
                 read_leb_uint32(p, p_end, vec_len);
-                if (!vec_len) {
+                if (vec_len != 1) {
+                    /* typed select must have exactly one result */
                     set_error_buf(error_buf, error_buf_size,
                                   "invalid result arity");
                     goto fail;

+ 48 - 80
core/iwasm/interpreter/wasm_runtime.c

@@ -125,11 +125,8 @@ memories_deinstantiate(WASMModuleInstance *module_inst,
                 }
 #endif
 #if WASM_ENABLE_SHARED_MEMORY != 0
-                if (memories[i]->is_shared) {
-                    int32 ref_count = shared_memory_dec_reference(
-                        (WASMModuleCommon *)module_inst->module);
-                    bh_assert(ref_count >= 0);
-
+                if (shared_memory_is_shared(memories[i])) {
+                    uint32 ref_count = shared_memory_dec_reference(memories[i]);
                     /* if the reference count is not zero,
                         don't free the memory */
                     if (ref_count > 0)
@@ -162,7 +159,8 @@ memories_deinstantiate(WASMModuleInstance *module_inst,
 }
 
 static WASMMemoryInstance *
-memory_instantiate(WASMModuleInstance *module_inst, WASMMemoryInstance *memory,
+memory_instantiate(WASMModuleInstance *module_inst, WASMModuleInstance *parent,
+                   WASMMemoryInstance *memory, uint32 memory_idx,
                    uint32 num_bytes_per_page, uint32 init_page_count,
                    uint32 max_page_count, uint32 heap_size, uint32 flags,
                    char *error_buf, uint32 error_buf_size)
@@ -183,22 +181,11 @@ memory_instantiate(WASMModuleInstance *module_inst, WASMMemoryInstance *memory,
     bool is_shared_memory = flags & 0x02 ? true : false;
 
     /* shared memory */
-    if (is_shared_memory) {
-        WASMSharedMemNode *node = wasm_module_get_shared_memory(
-            (WASMModuleCommon *)module_inst->module);
-        /* If the memory of this module has been instantiated,
-            return the memory instance directly */
-        if (node) {
-            uint32 ref_count;
-            ref_count = shared_memory_inc_reference(
-                (WASMModuleCommon *)module_inst->module);
-            bh_assert(ref_count > 0);
-            memory = (WASMMemoryInstance *)shared_memory_get_memory_inst(node);
-            bh_assert(memory);
-
-            (void)ref_count;
-            return memory;
-        }
+    if (is_shared_memory && parent != NULL) {
+        bh_assert(parent->memory_count > memory_idx);
+        memory = parent->memories[memory_idx];
+        shared_memory_inc_reference(memory);
+        return memory;
     }
 #endif /* end of WASM_ENABLE_SHARED_MEMORY */
 
@@ -391,24 +378,13 @@ memory_instantiate(WASMModuleInstance *module_inst, WASMMemoryInstance *memory,
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
     if (is_shared_memory) {
-        memory->is_shared = true;
-        if (!shared_memory_set_memory_inst(
-                (WASMModuleCommon *)module_inst->module,
-                (WASMMemoryInstanceCommon *)memory)) {
-            set_error_buf(error_buf, error_buf_size, "allocate memory failed");
-            goto fail4;
-        }
+        memory->ref_count = 1;
     }
 #endif
 
     LOG_VERBOSE("Memory instantiate success.");
     return memory;
 
-#if WASM_ENABLE_SHARED_MEMORY != 0
-fail4:
-    if (heap_size > 0)
-        mem_allocator_destroy(memory->heap_handle);
-#endif
 fail3:
     if (heap_size > 0)
         wasm_runtime_free(memory->heap_handle);
@@ -431,7 +407,8 @@ fail1:
  */
 static WASMMemoryInstance **
 memories_instantiate(const WASMModule *module, WASMModuleInstance *module_inst,
-                     uint32 heap_size, char *error_buf, uint32 error_buf_size)
+                     WASMModuleInstance *parent, uint32 heap_size,
+                     char *error_buf, uint32 error_buf_size)
 {
     WASMImport *import;
     uint32 mem_index = 0, i,
@@ -477,26 +454,29 @@ memories_instantiate(const WASMModule *module, WASMModuleInstance *module_inst,
         else
 #endif
         {
-            if (!(memories[mem_index++] = memory_instantiate(
-                      module_inst, memory, num_bytes_per_page, init_page_count,
-                      max_page_count, actual_heap_size, flags, error_buf,
-                      error_buf_size))) {
+            if (!(memories[mem_index] = memory_instantiate(
+                      module_inst, parent, memory, mem_index,
+                      num_bytes_per_page, init_page_count, max_page_count,
+                      actual_heap_size, flags, error_buf, error_buf_size))) {
                 memories_deinstantiate(module_inst, memories, memory_count);
                 return NULL;
             }
+            mem_index++;
         }
     }
 
     /* instantiate memories from memory section */
     for (i = 0; i < module->memory_count; i++, memory++) {
-        if (!(memories[mem_index++] = memory_instantiate(
-                  module_inst, memory, module->memories[i].num_bytes_per_page,
+        if (!(memories[mem_index] = memory_instantiate(
+                  module_inst, parent, memory, mem_index,
+                  module->memories[i].num_bytes_per_page,
                   module->memories[i].init_page_count,
                   module->memories[i].max_page_count, heap_size,
                   module->memories[i].flags, error_buf, error_buf_size))) {
             memories_deinstantiate(module_inst, memories, memory_count);
             return NULL;
         }
+        mem_index++;
     }
 
     bh_assert(mem_index == memory_count);
@@ -1151,10 +1131,14 @@ execute_post_instantiate_functions(WASMModuleInstance *module_inst,
         goto fail;
     }
 
+#if WASM_ENABLE_LIBC_WASI != 0
     if (initialize_func
         && !wasm_call_function(exec_env, initialize_func, 0, NULL)) {
         goto fail;
     }
+#else
+    (void)initialize_func;
+#endif
 
     if (post_inst_func
         && !wasm_call_function(exec_env, post_inst_func, 0, NULL)) {
@@ -1344,7 +1328,7 @@ sub_module_instantiate(WASMModule *module, WASMModuleInstance *module_inst,
         WASMModuleInstance *sub_module_inst = NULL;
 
         sub_module_inst =
-            wasm_instantiate(sub_module, false, NULL, stack_size, heap_size,
+            wasm_instantiate(sub_module, NULL, NULL, stack_size, heap_size,
                              error_buf, error_buf_size);
         if (!sub_module_inst) {
             LOG_DEBUG("instantiate %s failed",
@@ -1817,7 +1801,7 @@ wasm_set_running_mode(WASMModuleInstance *module_inst, RunningMode running_mode)
  * Instantiate module
  */
 WASMModuleInstance *
-wasm_instantiate(WASMModule *module, bool is_sub_inst,
+wasm_instantiate(WASMModule *module, WASMModuleInstance *parent,
                  WASMExecEnv *exec_env_main, uint32 stack_size,
                  uint32 heap_size, char *error_buf, uint32 error_buf_size)
 {
@@ -1834,6 +1818,7 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
 #if WASM_ENABLE_MULTI_MODULE != 0
     bool ret = false;
 #endif
+    const bool is_sub_inst = parent != NULL;
 
     if (!module)
         return NULL;
@@ -1974,8 +1959,9 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
 
     /* Instantiate memories/tables/functions */
     if ((module_inst->memory_count > 0
-         && !(module_inst->memories = memories_instantiate(
-                  module, module_inst, heap_size, error_buf, error_buf_size)))
+         && !(module_inst->memories =
+                  memories_instantiate(module, module_inst, parent, heap_size,
+                                       error_buf, error_buf_size)))
         || (module_inst->table_count > 0
             && !(module_inst->tables =
                      tables_instantiate(module, module_inst, first_table,
@@ -2378,16 +2364,6 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst,
     }
 #endif
 
-#if WASM_ENABLE_WASI_NN != 0
-    if (!is_sub_inst) {
-        if (!(module_inst->e->wasi_nn_ctx = wasi_nn_initialize())) {
-            set_error_buf(error_buf, error_buf_size,
-                          "wasi nn initialization failed");
-            goto fail;
-        }
-    }
-#endif
-
 #if WASM_ENABLE_DEBUG_INTERP != 0
     if (!is_sub_inst) {
         /* Add module instance into module's instance list */
@@ -2503,16 +2479,6 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
     sub_module_deinstantiate(module_inst);
 #endif
 
-#if WASM_ENABLE_LIBC_WASI != 0
-    /* Destroy wasi resource before freeing app heap, since some fields of
-       wasi contex are allocated from app heap, and if app heap is freed,
-       these fields will be set to NULL, we cannot free their internal data
-       which may allocated from global heap. */
-    /* Only destroy wasi ctx in the main module instance */
-    if (!is_sub_inst)
-        wasm_runtime_destroy_wasi((WASMModuleInstanceCommon *)module_inst);
-#endif
-
     if (module_inst->memory_count > 0)
         memories_deinstantiate(module_inst, module_inst->memories,
                                module_inst->memory_count);
@@ -2554,13 +2520,14 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
     if (module_inst->e->c_api_func_imports)
         wasm_runtime_free(module_inst->e->c_api_func_imports);
 
-#if WASM_ENABLE_WASI_NN != 0
     if (!is_sub_inst) {
-        WASINNContext *wasi_nn_ctx = module_inst->e->wasi_nn_ctx;
-        if (wasi_nn_ctx)
-            wasi_nn_destroy(wasi_nn_ctx);
-    }
+#if WASM_ENABLE_LIBC_WASI != 0
+        wasm_runtime_destroy_wasi((WASMModuleInstanceCommon *)module_inst);
+#endif
+#if WASM_ENABLE_WASI_NN != 0
+        wasi_nn_destroy(module_inst);
 #endif
+    }
 
     wasm_runtime_free(module_inst);
 }
@@ -3466,7 +3433,7 @@ llvm_jit_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
     module = module_inst->module;
     func_type_indexes = module_inst->func_type_indexes;
     func_type_idx = func_type_indexes[func_idx];
-    func_type = (AOTFuncType *)module->types[func_type_idx];
+    func_type = (WASMFuncType *)module->types[func_type_idx];
     func_ptr = module_inst->func_ptrs[func_idx];
 
     bh_assert(func_idx < module->import_function_count);
@@ -3609,13 +3576,13 @@ llvm_jit_table_init(WASMModuleInstance *module_inst, uint32 tbl_idx,
     bh_assert(tbl_inst);
     bh_assert(tbl_seg);
 
-    if (!length) {
+    if (offset_len_out_of_bounds(src_offset, length, tbl_seg->function_count)
+        || offset_len_out_of_bounds(dst_offset, length, tbl_inst->cur_size)) {
+        jit_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }
 
-    if (length + src_offset > tbl_seg->function_count
-        || dst_offset + length > tbl_inst->cur_size) {
-        jit_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
+    if (!length) {
         return;
     }
 
@@ -3658,8 +3625,9 @@ llvm_jit_table_copy(WASMModuleInstance *module_inst, uint32 src_tbl_idx,
     bh_assert(src_tbl_inst);
     bh_assert(dst_tbl_inst);
 
-    if ((uint64)dst_offset + length > dst_tbl_inst->cur_size
-        || (uint64)src_offset + length > src_tbl_inst->cur_size) {
+    if (offset_len_out_of_bounds(dst_offset, length, dst_tbl_inst->cur_size)
+        || offset_len_out_of_bounds(src_offset, length,
+                                    src_tbl_inst->cur_size)) {
         jit_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }
@@ -3692,13 +3660,13 @@ llvm_jit_table_fill(WASMModuleInstance *module_inst, uint32 tbl_idx,
     tbl_inst = wasm_get_table_inst(module_inst, tbl_idx);
     bh_assert(tbl_inst);
 
-    if (data_offset + length > tbl_inst->cur_size) {
+    if (offset_len_out_of_bounds(data_offset, length, tbl_inst->cur_size)) {
         jit_set_exception_with_id(module_inst, EXCE_OUT_OF_BOUNDS_TABLE_ACCESS);
         return;
     }
 
     for (; length != 0; data_offset++, length--) {
-        tbl_inst->elems[data_offset] = (table_elem_type_t *)val;
+        tbl_inst->elems[data_offset] = (table_elem_type_t)val;
     }
 }
 
@@ -3737,7 +3705,7 @@ llvm_jit_table_grow(WASMModuleInstance *module_inst, uint32 tbl_idx,
 
     /* fill in */
     for (i = 0; i < inc_size; ++i) {
-        tbl_inst->elems[tbl_inst->cur_size + i] = (table_elem_type_t *)init_val;
+        tbl_inst->elems[tbl_inst->cur_size + i] = (table_elem_type_t)init_val;
     }
 
     tbl_inst->cur_size = total_size;

+ 6 - 5
core/iwasm/interpreter/wasm_runtime.h

@@ -7,6 +7,7 @@
 #define _WASM_RUNTIME_H
 
 #include "wasm.h"
+#include "bh_atomic.h"
 #include "bh_hashmap.h"
 #include "../common/wasm_runtime_common.h"
 #include "../common/wasm_exec_env.h"
@@ -79,7 +80,7 @@ struct WASMMemoryInstance {
     /* Module type */
     uint32 module_type;
     /* Shared memory flag */
-    bool is_shared;
+    bh_atomic_32_t ref_count; /* 0: non-shared, > 0: reference count */
 
     /* Number bytes per page */
     uint32 num_bytes_per_page;
@@ -269,9 +270,9 @@ typedef struct WASMModuleInstanceExtra {
         && WASM_ENABLE_LAZY_JIT != 0)
     WASMModuleInstance *next;
 #endif
-
-#if WASM_ENABLE_WASI_NN != 0
-    WASINNContext *wasi_nn_ctx;
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    /* Disable bounds checks or not */
+    bool disable_bounds_checks;
 #endif
 } WASMModuleInstanceExtra;
 
@@ -428,7 +429,7 @@ void
 wasm_unload(WASMModule *module);
 
 WASMModuleInstance *
-wasm_instantiate(WASMModule *module, bool is_sub_inst,
+wasm_instantiate(WASMModule *module, WASMModuleInstance *parent,
                  WASMExecEnv *exec_env_main, uint32 stack_size,
                  uint32 heap_size, char *error_buf, uint32 error_buf_size);
 

+ 13 - 6
core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c

@@ -531,7 +531,8 @@ pthread_start_routine(void *arg)
     else {
         info_node->u.ret = (void *)(uintptr_t)argv[0];
 #ifdef OS_ENABLE_HW_BOUND_CHECK
-        if (exec_env->suspend_flags.flags & 0x08)
+        if (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags)
+            & WASM_SUSPEND_FLAG_EXIT)
             /* argv[0] isn't set after longjmp(1) to
                invoke_native_with_hw_bound_check */
             info_node->u.ret = exec_env->thread_ret_value;
@@ -580,7 +581,7 @@ pthread_create_wrapper(wasm_exec_env_t exec_env,
 #endif
 
     if (!(new_module_inst = wasm_runtime_instantiate_internal(
-              module, true, exec_env, stack_size, 0, NULL, 0)))
+              module, module_inst, exec_env, stack_size, 0, NULL, 0)))
         return -1;
 
     /* Set custom_data to new module instance */
@@ -690,6 +691,14 @@ pthread_join_wrapper(wasm_exec_env_t exec_env, uint32 thread,
         bh_assert(node->joinable);
         join_ret = 0;
         ret = node->u.ret;
+
+        /* The target thread changes the node's status before calling
+           wasm_cluster_exit_thread to exit, so here its resources may
+           haven't been destroyed yet, we wait enough time to ensure that
+           they are actually destroyed to avoid unexpected behavior. */
+        os_mutex_lock(&exec_env->wait_lock);
+        os_cond_reltimedwait(&exec_env->wait_cond, &exec_env->wait_lock, 1000);
+        os_mutex_unlock(&exec_env->wait_lock);
     }
 
     if (retval_offset != 0)
@@ -757,7 +766,6 @@ __pthread_self_wrapper(wasm_exec_env_t exec_env)
 static void
 pthread_exit_wrapper(wasm_exec_env_t exec_env, int32 retval_offset)
 {
-    wasm_module_inst_t module_inst = get_module_inst(exec_env);
     ThreadRoutineArgs *args = get_thread_arg(exec_env);
     /* Currently exit main thread is not allowed */
     if (!args)
@@ -775,9 +783,6 @@ pthread_exit_wrapper(wasm_exec_env_t exec_env, int32 retval_offset)
     /* destroy pthread key values */
     call_key_destructor(exec_env);
 
-    /* routine exit, destroy instance */
-    wasm_runtime_deinstantiate_internal(module_inst, true);
-
     if (!args->info_node->joinable) {
         delete_thread_info_node(args->info_node);
     }
@@ -789,6 +794,8 @@ pthread_exit_wrapper(wasm_exec_env_t exec_env, int32 retval_offset)
 
     wasm_runtime_free(args);
 
+    /* Don't destroy exec_env->module_inst in this functuntion since
+       it will be destroyed in wasm_cluster_exit_thread */
     wasm_cluster_exit_thread(exec_env, (void *)(uintptr_t)retval_offset);
 }
 

+ 3 - 1
core/iwasm/libraries/lib-socket/inc/wasi_socket_ext.h

@@ -17,6 +17,8 @@ extern "C" {
 #endif
 
 typedef enum {
+    /* Used only for sock_addr_resolve hints */
+    SOCKET_ANY = -1,
     SOCKET_DGRAM = 0,
     SOCKET_STREAM,
 } __wasi_sock_type_t;
@@ -84,7 +86,7 @@ typedef struct __wasi_addr_t {
     } addr;
 } __wasi_addr_t;
 
-typedef enum { INET4 = 0, INET6 } __wasi_address_family_t;
+typedef enum { INET4 = 0, INET6, INET_UNSPEC } __wasi_address_family_t;
 
 typedef struct __wasi_addr_info_t {
     __wasi_addr_t addr;

+ 5 - 0
core/iwasm/libraries/lib-socket/src/wasi/wasi_socket_ext.c

@@ -430,6 +430,9 @@ addrinfo_hints_to_wasi_hints(const struct addrinfo *hints,
             case AF_INET6:
                 wasi_hints->family = INET6;
                 break;
+            case AF_UNSPEC:
+                wasi_hints->family = INET_UNSPEC;
+                break;
             default:
                 return __WASI_ERRNO_AFNOSUPPORT;
         }
@@ -440,6 +443,8 @@ addrinfo_hints_to_wasi_hints(const struct addrinfo *hints,
             case SOCK_DGRAM:
                 wasi_hints->type = SOCKET_DGRAM;
                 break;
+            case 0:
+                wasi_hints->type = SOCKET_ANY;
             default:
                 return __WASI_ERRNO_NOTSUP;
         }

+ 21 - 2
core/iwasm/libraries/lib-socket/test/nslookup.c

@@ -5,6 +5,8 @@
 
 #include <assert.h>
 #include <string.h>
+#include <stdio.h>
+#include <pthread.h>
 #ifdef __wasi__
 #include <wasi/api.h>
 #include <sys/socket.h>
@@ -39,11 +41,28 @@ test_nslookup(int af)
     freeaddrinfo(res);
 }
 
+void *
+test_nslookup_mt(void *params)
+{
+    int *af = (int *)params;
+    test_nslookup(*af);
+    return NULL;
+}
+
 int
 main()
 {
-    test_nslookup(AF_INET);  /* for ipv4 */
-    test_nslookup(AF_INET6); /* for ipv6 */
+    int afs[] = { AF_INET, AF_INET6 };
+
+    for (int i = 0; i < sizeof(afs) / sizeof(afs[0]); i++) {
+        pthread_t th;
+
+        printf("Testing %d in main thread...\n", afs[i]);
+        test_nslookup(afs[i]);
+        printf("Testing %d in a new thread...\n", afs[i]);
+        pthread_create(&th, NULL, test_nslookup_mt, &afs[i]);
+        pthread_join(th, NULL);
+    }
 
     return 0;
 }

+ 112 - 113
core/iwasm/libraries/lib-socket/test/tcp_udp.c

@@ -5,6 +5,8 @@
 #include <unistd.h>
 #include <string.h>
 #include <assert.h>
+#include <errno.h>
+#include <time.h>
 #ifdef __wasi__
 #include <wasi/api.h>
 #include <sys/socket.h>
@@ -12,105 +14,123 @@
 #endif
 #include <arpa/inet.h>
 #include <pthread.h>
+#include <stdio.h>
+
 #define SERVER_MSG "Message from server."
 #define PORT 8989
-pthread_mutex_t mut;
-pthread_cond_t cond;
+
+pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
 int server_init_complete = 0;
-char buffer[sizeof(SERVER_MSG) + 1];
 
-struct socket_info {
-    union {
-        struct sockaddr_in addr_ipv4;
-        struct sockaddr_in6 addr_ipv6;
-    } addr;
+typedef struct {
+    struct sockaddr_storage addr;
+    socklen_t addr_len;
     int sock;
-};
-
-struct thread_args {
-    int family;
     int protocol;
-};
+} socket_info_t;
+
+void
+wait_for_server(int wait_time_seconds)
+{
+    int res = 0;
+    struct timespec ts;
+    clock_gettime(CLOCK_REALTIME, &ts);
+    ts.tv_sec += wait_time_seconds;
+
+    pthread_mutex_lock(&mut);
+    while (server_init_complete == 0) {
+        res = pthread_cond_timedwait(&cond, &mut, &ts);
+        if (res == ETIMEDOUT)
+            break;
+    }
+    pthread_mutex_unlock(&mut);
+
+    assert(res == 0);
+}
 
-struct socket_info
+void
+notify_server_started()
+{
+    pthread_mutex_lock(&mut);
+    server_init_complete = 1;
+    pthread_cond_signal(&cond);
+    pthread_mutex_unlock(&mut);
+}
+
+socket_info_t
 init_socket_addr(int family, int protocol)
 {
-    int sock = socket(family, protocol, 0);
-    assert(sock != -1);
+    socket_info_t info;
+
+    info.sock = socket(family, protocol, 0);
+    assert(info.sock != -1);
+    info.protocol = protocol;
+
+    memset(&info.addr, 0, sizeof(info.addr));
 
-    struct socket_info info;
     if (family == AF_INET) {
-        struct sockaddr_in addr;
-        memset(&addr, 0, sizeof(addr));
-        addr.sin_family = AF_INET;
-        addr.sin_port = htons(PORT);
-        addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-        info.addr.addr_ipv4 = addr;
+        struct sockaddr_in *addr = (struct sockaddr_in *)&info.addr;
+        addr->sin_family = AF_INET;
+        addr->sin_port = htons(PORT);
+        addr->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+        info.addr_len = sizeof(struct sockaddr_in);
     }
     else if (family == AF_INET6) {
-        struct sockaddr_in6 addr;
-        memset(&addr, 0, sizeof(addr));
-        addr.sin6_family = AF_INET6;
-        addr.sin6_port = htons(PORT);
-        addr.sin6_addr = in6addr_loopback;
-        info.addr.addr_ipv6 = addr;
+        struct sockaddr_in6 *addr = (struct sockaddr_in6 *)&info.addr;
+        addr->sin6_family = AF_INET6;
+        addr->sin6_port = htons(PORT);
+        addr->sin6_addr = in6addr_loopback;
+        info.addr_len = sizeof(struct sockaddr_in6);
     }
-    info.sock = sock;
-    return info;
-}
 
-void
-assert_thread_args(struct thread_args *args)
-{
-    assert(args->family == AF_INET || args->family == AF_INET6);
-    assert(args->protocol == SOCK_STREAM || args->protocol == SOCK_DGRAM);
+    return info;
 }
 
 void *
 server(void *arg)
 {
-    server_init_complete = 0;
-    struct thread_args *args = (struct thread_args *)arg;
-    assert_thread_args(args);
-
-    struct socket_info init_server_sock =
-        init_socket_addr(args->family, args->protocol);
-
-    int server_sock = init_server_sock.sock;
-    socklen_t addr_size;
+    char buffer[sizeof(SERVER_MSG) + 1] = { 0 };
     struct sockaddr_storage client_addr;
-    strcpy(buffer, SERVER_MSG);
+    socket_info_t *info = (socket_info_t *)arg;
+    struct sockaddr *server_addr = (struct sockaddr *)&info->addr;
+    int server_sock = info->sock;
 
-    struct sockaddr *server_addr = (struct sockaddr *)&init_server_sock.addr;
-    int ret = bind(server_sock, server_addr,
-                   args->family == AF_INET ? sizeof(struct sockaddr_in)
-                                           : sizeof(struct sockaddr_in6));
-    assert(ret == 0);
+    int optval = 1;
+    assert(setsockopt(server_sock, SOL_SOCKET, SO_REUSEADDR, &optval,
+                      sizeof(optval))
+           == 0);
 
-    (args->protocol == SOCK_STREAM) && listen(server_sock, 1);
-    pthread_mutex_lock(&mut);
-    server_init_complete = 1;
-    pthread_mutex_unlock(&mut);
-    pthread_cond_signal(&cond);
+    assert(bind(server_sock, server_addr, info->addr_len) == 0);
+
+    if (info->protocol == SOCK_STREAM)
+        listen(server_sock, 1);
+    notify_server_started();
 
-    addr_size = sizeof(client_addr);
-    if (args->protocol == SOCK_STREAM) {
+    socklen_t addr_size = info->addr_len;
+    if (info->protocol == SOCK_STREAM) {
         int client_sock =
             accept(server_sock, (struct sockaddr *)&client_addr, &addr_size);
         assert(client_sock >= 0);
-        sendto(client_sock, buffer, strlen(buffer), 0,
-               (struct sockaddr *)&client_addr, addr_size);
-
-        assert(close(client_sock) == 0);
+        assert(recv(client_sock, buffer, sizeof(buffer), 0) > 0);
+        strcpy(buffer, SERVER_MSG);
+        assert(send(client_sock, buffer, sizeof(buffer), 0) > 0);
+        assert(recv(client_sock, buffer, sizeof(buffer), 0) > 0);
     }
     else {
-        recvfrom(server_sock, buffer, sizeof(buffer), 0,
-                 (struct sockaddr *)&client_addr, &addr_size);
-        sendto(server_sock, buffer, strlen(buffer), 0,
-               (struct sockaddr *)&client_addr, addr_size);
-
-        assert(close(server_sock) == 0);
+        assert(recvfrom(server_sock, buffer, sizeof(buffer), 0,
+                        (struct sockaddr *)&client_addr, &addr_size)
+               > 0);
+        strcpy(buffer, SERVER_MSG);
+        assert(sendto(server_sock, buffer, strlen(buffer), 0,
+                      (struct sockaddr *)&client_addr, addr_size)
+               > 0);
+        assert(recvfrom(server_sock, buffer, sizeof(buffer), 0,
+                        (struct sockaddr *)&client_addr, &addr_size)
+               > 0);
     }
+    assert(close(server_sock) == 0);
 
     return NULL;
 }
@@ -118,46 +138,23 @@ server(void *arg)
 void *
 client(void *arg)
 {
-    struct thread_args *args = (struct thread_args *)arg;
-    assert_thread_args(args);
-
-    pthread_mutex_lock(&mut);
+    char buffer[sizeof(SERVER_MSG) + 1];
+    socket_info_t *info = (socket_info_t *)arg;
+    int sock = info->sock;
+    struct sockaddr *addr = (struct sockaddr *)&info->addr;
 
-    while (server_init_complete == 0) {
-        pthread_cond_wait(&cond, &mut);
-    }
+    wait_for_server(1);
 
-    struct socket_info init_client_sock =
-        init_socket_addr(args->family, args->protocol);
-    int sock = init_client_sock.sock;
-    pthread_mutex_unlock(&mut);
-
-    if (args->family == AF_INET) {
-        struct sockaddr_in addr = init_client_sock.addr.addr_ipv4;
-        if (args->protocol == SOCK_STREAM) {
-            assert(connect(sock, (struct sockaddr *)&addr, sizeof(addr)) != -1);
-        }
-        else {
-            assert(sendto(sock, buffer, strlen(buffer), 0,
-                          (struct sockaddr *)&addr, sizeof(addr))
-                   != -1);
-        }
-    }
-    else {
-        struct sockaddr_in6 addr = init_client_sock.addr.addr_ipv6;
-        if (args->protocol == SOCK_STREAM) {
-            assert(connect(sock, (struct sockaddr *)&addr, sizeof(addr)) != -1);
-        }
-        else {
-            assert(sendto(sock, buffer, strlen(buffer), 0,
-                          (struct sockaddr *)&addr, sizeof(addr))
-                   != -1);
-        }
+    if (info->protocol == SOCK_STREAM) {
+        assert(connect(sock, addr, info->addr_len) != -1);
     }
 
-    recv(sock, buffer, sizeof(buffer), 0);
-    assert(strcmp(buffer, SERVER_MSG) == 0);
+    assert(sendto(sock, "open", strlen("open"), 0, addr, info->addr_len) > 0);
+    assert(recv(sock, buffer, sizeof(buffer), 0) > 0);
+    assert(strncmp(buffer, SERVER_MSG, strlen(SERVER_MSG)) == 0);
+    assert(sendto(sock, "close", sizeof("close"), 0, addr, info->addr_len) > 0);
     assert(close(sock) == 0);
+
     return NULL;
 }
 
@@ -165,17 +162,19 @@ void
 test_protocol(int family, int protocol)
 {
     pthread_t server_thread, client_thread;
-    assert(pthread_cond_init(&cond, NULL) == 0);
-    assert(pthread_mutex_init(&mut, NULL) == 0);
+    socket_info_t server_info = init_socket_addr(family, protocol);
+    socket_info_t client_info = init_socket_addr(family, protocol);
+
+    printf("Testing address family: %d protocol: %d\n", family, protocol);
+
+    server_init_complete = 0;
 
-    struct thread_args args = { family, protocol };
-    assert(pthread_create(&server_thread, NULL, server, (void *)&args) == 0);
-    assert(pthread_create(&client_thread, NULL, client, (void *)&args) == 0);
+    assert(pthread_create(&server_thread, NULL, server, (void *)&server_info)
+           == 0);
+    assert(pthread_create(&client_thread, NULL, client, (void *)&client_info)
+           == 0);
     assert(pthread_join(server_thread, NULL) == 0);
     assert(pthread_join(client_thread, NULL) == 0);
-
-    assert(pthread_mutex_destroy(&mut) == 0);
-    assert(pthread_cond_destroy(&cond) == 0);
 }
 
 int
@@ -190,4 +189,4 @@ main(int argc, char **argv)
     test_protocol(AF_INET6, SOCK_DGRAM);
 
     return 0;
-}
+}

+ 1 - 1
core/iwasm/libraries/lib-wasi-threads/lib_wasi_threads_wrapper.c

@@ -90,7 +90,7 @@ thread_spawn_wrapper(wasm_exec_env_t exec_env, uint32 start_arg)
     stack_size = ((WASMModuleInstance *)module_inst)->default_wasm_stack_size;
 
     if (!(new_module_inst = wasm_runtime_instantiate_internal(
-              module, true, exec_env, stack_size, 0, NULL, 0)))
+              module, module_inst, exec_env, stack_size, 0, NULL, 0)))
         return -1;
 
     wasm_runtime_set_custom_data_internal(

+ 4 - 1
core/iwasm/libraries/lib-wasi-threads/test/build.sh

@@ -9,10 +9,13 @@ set -eo pipefail
 CC=${CC:=/opt/wasi-sdk/bin/clang}
 WAMR_DIR=../../../../..
 
+# Stress tests names
+thread_start_file_exclusions=("spawn_stress_test.wasm" "linear_memory_size_update.wasm")
+
 for test_c in *.c; do
     test_wasm="$(basename $test_c .c).wasm"
 
-    if [ $test_wasm = "linear_memory_size_update.wasm" ]; then
+    if [[ " ${thread_start_file_exclusions[@]} " =~ " ${test_wasm} " ]] ; then
         thread_start_file=""
     else
         thread_start_file=$WAMR_DIR/samples/wasi-threads/wasm-apps/wasi_thread_start.S

+ 3 - 0
core/iwasm/libraries/lib-wasi-threads/test/manifest.json

@@ -0,0 +1,3 @@
+{
+    "name": "lib-wasi-threads tests"
+}

+ 5 - 0
core/iwasm/libraries/lib-wasi-threads/test/skip.json

@@ -0,0 +1,5 @@
+{
+    "lib-wasi-threads tests": {
+        "spawn_stress_test": "Stress tests are incompatible with the other part and executed differently"
+    }
+}

+ 114 - 0
core/iwasm/libraries/lib-wasi-threads/test/spawn_stress_test.c

@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2023 Amazon.com Inc. or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef __wasi__
+#error This example only compiles to WASM/WASI target
+#endif
+
+#include <assert.h>
+#include <errno.h>
+#include <math.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+enum CONSTANTS {
+    NUM_ITER = 100000,
+    NUM_RETRY = 5,
+    MAX_NUM_THREADS = 8,
+};
+
+unsigned prime_numbers_count = 0;
+
+bool
+is_prime(unsigned int num)
+{
+    for (unsigned int i = 2; i <= (unsigned int)(sqrt(num)); ++i) {
+        if (num % i == 0) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void *
+check_if_prime(void *value)
+{
+    unsigned int *num = (unsigned int *)(value);
+    usleep(10000);
+    if (is_prime(*num)) {
+        __atomic_fetch_add(&prime_numbers_count, 1, __ATOMIC_SEQ_CST);
+    }
+    return NULL;
+}
+
+unsigned int
+validate()
+{
+    unsigned int counter = 0;
+    for (unsigned int i = 2; i <= NUM_ITER; ++i) {
+        counter += is_prime(i);
+    }
+
+    return counter;
+}
+
+void
+spawn_thread(pthread_t *thread, unsigned int *arg)
+{
+    int status_code = -1;
+    for (int tries = 0; status_code != 0 && tries < NUM_RETRY; ++tries) {
+        status_code = pthread_create(thread, NULL, &check_if_prime, arg);
+        assert(status_code == 0 || status_code == EAGAIN);
+        if (status_code == EAGAIN) {
+            usleep(2000);
+        }
+    }
+
+    assert(status_code == 0 && "Thread creation should succeed");
+}
+
+int
+main(int argc, char **argv)
+{
+    pthread_t threads[MAX_NUM_THREADS];
+    unsigned int args[MAX_NUM_THREADS];
+    double percentage = 0.1;
+
+    for (unsigned int factorised_number = 2; factorised_number < NUM_ITER;
+         ++factorised_number) {
+        if (factorised_number > NUM_ITER * percentage) {
+            fprintf(stderr, "Stress test is %d%% finished\n",
+                    (unsigned int)(percentage * 100));
+            percentage += 0.1;
+        }
+
+        unsigned int thread_num = factorised_number % MAX_NUM_THREADS;
+        if (threads[thread_num] != 0) {
+            assert(pthread_join(threads[thread_num], NULL) == 0);
+        }
+
+        args[thread_num] = factorised_number;
+
+        usleep(2000);
+        spawn_thread(&threads[thread_num], &args[thread_num]);
+        assert(threads[thread_num] != 0);
+    }
+
+    for (int i = 0; i < MAX_NUM_THREADS; ++i) {
+        assert(threads[i] == 0 || pthread_join(threads[i], NULL) == 0);
+    }
+
+    // Check the test results
+    assert(
+        prime_numbers_count == validate()
+        && "Answer mismatch between tested code and reference implementation");
+
+    fprintf(stderr, "Stress test finished successfully\n");
+    return 0;
+}

+ 1 - 1
core/iwasm/libraries/libc-uvwasi/libc_uvwasi.cmake

@@ -3,7 +3,7 @@
 
 set (LIBC_WASI_DIR ${CMAKE_CURRENT_LIST_DIR})
 
-set (LIBUV_VERSION v1.44.2)
+set (LIBUV_VERSION v1.46.0)
 
 add_definitions (-DWASM_ENABLE_LIBC_WASI=1 -DWASM_ENABLE_UVWASI=1)
 

+ 3 - 1
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/include/wasmtime_ssp.h

@@ -552,6 +552,8 @@ _Static_assert(offsetof(__wasi_subscription_t, u) == 8, "witx calculated offset"
 
 /* keep syncing with wasi_socket_ext.h */
 typedef enum {
+    /* Used only for sock_addr_resolve hints */
+    SOCKET_ANY = -1,
     SOCKET_DGRAM = 0,
     SOCKET_STREAM,
 } __wasi_sock_type_t;
@@ -605,7 +607,7 @@ typedef struct __wasi_addr_t {
     } addr;
 } __wasi_addr_t;
 
-typedef enum { INET4 = 0, INET6 } __wasi_address_family_t;
+typedef enum { INET4 = 0, INET6, INET_UNSPEC } __wasi_address_family_t;
 
 typedef struct __wasi_addr_info_t {
     __wasi_addr_t addr;

+ 6 - 3
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/posix.c

@@ -3102,9 +3102,12 @@ wasi_ssp_sock_addr_resolve(
     }
 
     int ret = os_socket_addr_resolve(
-        host, service, hints->hints_enabled ? &hints_is_tcp : NULL,
-        hints->hints_enabled ? &hints_is_ipv4 : NULL, wamr_addr_info,
-        addr_info_size, &_max_info_size);
+        host, service,
+        hints->hints_enabled && hints->type != SOCKET_ANY ? &hints_is_tcp
+                                                          : NULL,
+        hints->hints_enabled && hints->family != INET_UNSPEC ? &hints_is_ipv4
+                                                             : NULL,
+        wamr_addr_info, addr_info_size, &_max_info_size);
 
     if (ret != BHT_OK) {
         wasm_runtime_free(wamr_addr_info);

+ 24 - 20
core/iwasm/libraries/thread-mgr/thread_manager.c

@@ -509,7 +509,7 @@ wasm_cluster_spawn_exec_env(WASMExecEnv *exec_env)
 #endif
 
     if (!(new_module_inst = wasm_runtime_instantiate_internal(
-              module, true, exec_env, stack_size, 0, NULL, 0))) {
+              module, module_inst, exec_env, stack_size, 0, NULL, 0))) {
         goto fail1;
     }
 
@@ -606,7 +606,8 @@ thread_manager_start_routine(void *arg)
 
 #ifdef OS_ENABLE_HW_BOUND_CHECK
     os_mutex_lock(&exec_env->wait_lock);
-    if (exec_env->suspend_flags.flags & 0x08)
+    if (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags)
+        & WASM_SUSPEND_FLAG_EXIT)
         ret = exec_env->thread_ret_value;
     os_mutex_unlock(&exec_env->wait_lock);
 #endif
@@ -993,7 +994,9 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
     if (exec_env->jmpbuf_stack_top) {
         /* Store the return value in exec_env */
         exec_env->thread_ret_value = retval;
-        exec_env->suspend_flags.flags |= 0x08;
+
+        WASM_SUSPEND_FLAGS_FETCH_OR(exec_env->suspend_flags,
+                                    WASM_SUSPEND_FLAG_EXIT);
 
 #ifndef BH_PLATFORM_WINDOWS
         /* Pop all jmpbuf_node except the last one */
@@ -1055,7 +1058,8 @@ set_thread_cancel_flags(WASMExecEnv *exec_env)
 #if WASM_ENABLE_DEBUG_INTERP != 0
     wasm_cluster_thread_send_signal(exec_env, WAMR_SIG_TERM);
 #endif
-    exec_env->suspend_flags.flags |= 0x01;
+    WASM_SUSPEND_FLAGS_FETCH_OR(exec_env->suspend_flags,
+                                WASM_SUSPEND_FLAG_TERMINATE);
 
     os_mutex_unlock(&exec_env->wait_lock);
 }
@@ -1178,7 +1182,8 @@ void
 wasm_cluster_suspend_thread(WASMExecEnv *exec_env)
 {
     /* Set the suspend flag */
-    exec_env->suspend_flags.flags |= 0x02;
+    WASM_SUSPEND_FLAGS_FETCH_OR(exec_env->suspend_flags,
+                                WASM_SUSPEND_FLAG_SUSPEND);
 }
 
 static void
@@ -1214,7 +1219,8 @@ wasm_cluster_suspend_all_except_self(WASMCluster *cluster,
 void
 wasm_cluster_resume_thread(WASMExecEnv *exec_env)
 {
-    exec_env->suspend_flags.flags &= ~0x02;
+    WASM_SUSPEND_FLAGS_FETCH_AND(exec_env->suspend_flags,
+                                 ~WASM_SUSPEND_FLAG_SUSPEND);
     os_cond_signal(&exec_env->wait_cond);
 }
 
@@ -1248,10 +1254,8 @@ set_exception_visitor(void *node, void *user_data)
 
         /* Only spread non "wasi proc exit" exception */
 #if WASM_ENABLE_SHARED_MEMORY != 0
-        WASMSharedMemNode *shared_mem_node = wasm_module_get_shared_memory(
-            (WASMModuleCommon *)curr_wasm_inst->module);
-        if (shared_mem_node)
-            os_mutex_lock(&shared_mem_node->shared_mem_lock);
+        if (curr_wasm_inst->memory_count > 0)
+            shared_memory_lock(curr_wasm_inst->memories[0]);
 #endif
         if (!strstr(wasm_inst->cur_exception, "wasi proc exit")) {
             bh_memcpy_s(curr_wasm_inst->cur_exception,
@@ -1260,8 +1264,8 @@ set_exception_visitor(void *node, void *user_data)
                         sizeof(wasm_inst->cur_exception));
         }
 #if WASM_ENABLE_SHARED_MEMORY != 0
-        if (shared_mem_node)
-            os_mutex_unlock(&shared_mem_node->shared_mem_lock);
+        if (curr_wasm_inst->memory_count > 0)
+            shared_memory_unlock(curr_wasm_inst->memories[0]);
 #endif
 
         /* Terminate the thread so it can exit from dead loops */
@@ -1280,15 +1284,13 @@ clear_exception_visitor(void *node, void *user_data)
             (WASMModuleInstance *)get_module_inst(curr_exec_env);
 
 #if WASM_ENABLE_SHARED_MEMORY != 0
-        WASMSharedMemNode *shared_mem_node = wasm_module_get_shared_memory(
-            (WASMModuleCommon *)curr_wasm_inst->module);
-        if (shared_mem_node)
-            os_mutex_lock(&shared_mem_node->shared_mem_lock);
+        if (curr_wasm_inst->memory_count > 0)
+            shared_memory_lock(curr_wasm_inst->memories[0]);
 #endif
         curr_wasm_inst->cur_exception[0] = '\0';
 #if WASM_ENABLE_SHARED_MEMORY != 0
-        if (shared_mem_node)
-            os_mutex_unlock(&shared_mem_node->shared_mem_lock);
+        if (curr_wasm_inst->memory_count > 0)
+            shared_memory_unlock(curr_wasm_inst->memories[0]);
 #endif
     }
 }
@@ -1343,8 +1345,10 @@ bool
 wasm_cluster_is_thread_terminated(WASMExecEnv *exec_env)
 {
     os_mutex_lock(&exec_env->wait_lock);
-    bool is_thread_terminated =
-        (exec_env->suspend_flags.flags & 0x01) ? true : false;
+    bool is_thread_terminated = (WASM_SUSPEND_FLAGS_GET(exec_env->suspend_flags)
+                                 & WASM_SUSPEND_FLAG_TERMINATE)
+                                    ? true
+                                    : false;
     os_mutex_unlock(&exec_env->wait_lock);
 
     return is_thread_terminated;

+ 58 - 0
core/iwasm/libraries/wasi-nn/external/CMakeLists.txt

@@ -0,0 +1,58 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+cmake_minimum_required(VERSION 3.16)
+project(wasi-nn C CXX)
+
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+set(WAMR_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR}/../../../../..)
+set(WASI_NN_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR}/..)
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Debug)
+endif()
+
+#### libvmlib ####
+# NOTE: we build vmlib as a shared library here so that it can be
+# shared between iwasm and native libraries.
+include(${WASI_NN_ROOT_DIR}/cmake/iwasm_helper.cmake)
+include(${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake)
+
+add_library(vmlib SHARED ${WAMR_RUNTIME_LIB_SOURCE})
+
+# iwasm
+include(${SHARED_DIR}/utils/uncommon/shared_uncommon.cmake)
+set(RUNTIME_SOURCE_ALL
+  ${WAMR_ROOT_DIR}/product-mini/platforms/${WAMR_BUILD_PLATFORM}/main.c
+  ${UNCOMMON_SHARED_SOURCE}
+)
+
+add_executable(iwasm ${RUNTIME_SOURCE_ALL})
+target_link_libraries(iwasm vmlib -lpthread -lm -ldl)
+
+#### TensorFlow ####
+
+include(${WASI_NN_ROOT_DIR}/cmake/wasi_nn.cmake)
+
+#### WASI-NN ####
+
+include_directories(
+  ${WAMR_ROOT_DIR}/core/iwasm/include
+  ${WAMR_ROOT_DIR}/core/shared/utils
+  ${WAMR_ROOT_DIR}/core/shared/platform/linux
+)
+
+add_library(wasi-nn SHARED
+  ${WASI_NN_SOURCES}
+)
+
+# Add `get_native_lib` symbol
+target_compile_definitions(wasi-nn PUBLIC
+  WASI_NN_SHARED
+)
+
+target_link_libraries(wasi-nn
+  ${WASI_NN_LIBS}
+  vmlib
+)

+ 13 - 0
core/iwasm/libraries/wasi-nn/external/README.md

@@ -0,0 +1,13 @@
+# wasi-nn as shared library
+
+Example on how to create libwasi-nn (external library) instead of embedding wasi-nn inside iwasm
+
+From folder `core/iwasm/libraries/wasi-nn/test`, build the test and run
+
+```sh
+../external/build/iwasm \
+    --dir=. \
+    --env="TARGET=cpu" \
+    --native-lib=../external/build/libwasi-nn.so \
+    test_tensorflow.wasm 
+```

+ 133 - 63
core/iwasm/libraries/wasi-nn/src/wasi_nn.c

@@ -9,16 +9,18 @@
 #include <assert.h>
 #include <errno.h>
 #include <string.h>
+#include <stdint.h>
 
 #include "wasi_nn.h"
+#include "wasi_nn_private.h"
 #include "wasi_nn_app_native.h"
-#include "logger.h"
 #include "wasi_nn_tensorflowlite.hpp"
+#include "logger.h"
 
 #include "bh_platform.h"
 #include "wasm_export.h"
-#include "wasm_runtime.h"
-#include "aot_runtime.h"
+
+#define HASHMAP_INITIAL_SIZE 20
 
 /* Definition of 'wasi_nn.h' structs in WASM app format (using offset) */
 
@@ -51,6 +53,119 @@ static api_function lookup[] = {
       tensorflowlite_get_output }
 };
 
+static HashMap *hashmap;
+
+static void
+wasi_nn_ctx_destroy(WASINNContext *wasi_nn_ctx);
+
+/* Get wasi-nn context from module instance */
+
+static uint32
+hash_func(const void *key)
+{
+    // fnv1a_hash
+    const uint32 FNV_PRIME = 16777619;
+    const uint32 FNV_OFFSET_BASIS = 2166136261U;
+
+    uint32 hash = FNV_OFFSET_BASIS;
+    const unsigned char *bytes = (const unsigned char *)key;
+
+    for (size_t i = 0; i < sizeof(uintptr_t); ++i) {
+        hash ^= bytes[i];
+        hash *= FNV_PRIME;
+    }
+
+    return hash;
+}
+
+static bool
+key_equal_func(void *key1, void *key2)
+{
+    return key1 == key2;
+}
+
+static void
+key_destroy_func(void *key1)
+{}
+
+static void
+value_destroy_func(void *value)
+{
+    wasi_nn_ctx_destroy((WASINNContext *)value);
+}
+
+static WASINNContext *
+wasi_nn_initialize_context()
+{
+    NN_DBG_PRINTF("Initializing wasi-nn context");
+    WASINNContext *wasi_nn_ctx =
+        (WASINNContext *)wasm_runtime_malloc(sizeof(WASINNContext));
+    if (wasi_nn_ctx == NULL) {
+        NN_ERR_PRINTF("Error when allocating memory for WASI-NN context");
+        return NULL;
+    }
+    wasi_nn_ctx->is_model_loaded = false;
+    tensorflowlite_initialize(&wasi_nn_ctx->tflite_ctx);
+    return wasi_nn_ctx;
+}
+
+static bool
+wasi_nn_initialize()
+{
+    NN_DBG_PRINTF("Initializing wasi-nn");
+    hashmap = bh_hash_map_create(HASHMAP_INITIAL_SIZE, true, hash_func,
+                                 key_equal_func, key_destroy_func,
+                                 value_destroy_func);
+    if (hashmap == NULL) {
+        NN_ERR_PRINTF("Error while initializing hashmap");
+        return false;
+    }
+    return true;
+}
+
+static WASINNContext *
+wasm_runtime_get_wasi_nn_ctx(wasm_module_inst_t instance)
+{
+    WASINNContext *wasi_nn_ctx =
+        (WASINNContext *)bh_hash_map_find(hashmap, (void *)instance);
+    if (wasi_nn_ctx == NULL) {
+        wasi_nn_ctx = wasi_nn_initialize_context();
+        if (wasi_nn_ctx == NULL)
+            return NULL;
+        bool ok =
+            bh_hash_map_insert(hashmap, (void *)instance, (void *)wasi_nn_ctx);
+        if (!ok) {
+            NN_ERR_PRINTF("Error while storing context");
+            wasi_nn_ctx_destroy(wasi_nn_ctx);
+            return NULL;
+        }
+    }
+    NN_DBG_PRINTF("Returning ctx");
+    return wasi_nn_ctx;
+}
+
+static void
+wasi_nn_ctx_destroy(WASINNContext *wasi_nn_ctx)
+{
+    if (wasi_nn_ctx == NULL) {
+        NN_ERR_PRINTF(
+            "Error when deallocating memory. WASI-NN context is NULL");
+        return;
+    }
+    NN_DBG_PRINTF("Freeing wasi-nn");
+    NN_DBG_PRINTF("-> is_model_loaded: %d", wasi_nn_ctx->is_model_loaded);
+    NN_DBG_PRINTF("-> current_encoding: %d", wasi_nn_ctx->current_encoding);
+    tensorflowlite_destroy(wasi_nn_ctx->tflite_ctx);
+    wasm_runtime_free(wasi_nn_ctx);
+}
+
+void
+wasi_nn_destroy(wasm_module_inst_t instance)
+{
+    WASINNContext *wasi_nn_ctx = wasm_runtime_get_wasi_nn_ctx(instance);
+    wasi_nn_ctx_destroy(wasi_nn_ctx);
+}
+
 /* Utils */
 
 static bool
@@ -64,36 +179,13 @@ is_encoding_implemented(graph_encoding encoding)
 static error
 is_model_initialized(WASINNContext *wasi_nn_ctx)
 {
-    if (!wasi_nn_ctx->is_initialized) {
+    if (!wasi_nn_ctx->is_model_loaded) {
         NN_ERR_PRINTF("Model not initialized.");
         return runtime_error;
     }
     return success;
 }
 
-WASINNContext *
-wasm_runtime_get_wasi_nn_ctx(wasm_module_inst_t instance)
-{
-    WASINNContext *wasi_nn_ctx = NULL;
-#if WASM_ENABLE_INTERP != 0
-    if (instance->module_type == Wasm_Module_Bytecode) {
-        NN_DBG_PRINTF("Getting ctx from WASM");
-        WASMModuleInstance *module_inst = (WASMModuleInstance *)instance;
-        wasi_nn_ctx = ((WASMModuleInstanceExtra *)module_inst->e)->wasi_nn_ctx;
-    }
-#endif
-#if WASM_ENABLE_AOT != 0
-    if (instance->module_type == Wasm_Module_AoT) {
-        NN_DBG_PRINTF("Getting ctx from AOT");
-        AOTModuleInstance *module_inst = (AOTModuleInstance *)instance;
-        wasi_nn_ctx = ((AOTModuleInstanceExtra *)module_inst->e)->wasi_nn_ctx;
-    }
-#endif
-    bh_assert(wasi_nn_ctx != NULL);
-    NN_DBG_PRINTF("Returning ctx");
-    return wasi_nn_ctx;
-}
-
 /* WASI-NN implementation */
 
 error
@@ -131,7 +223,7 @@ wasi_nn_load(wasm_exec_env_t exec_env, graph_builder_array_wasm *builder,
     NN_DBG_PRINTF("wasi_nn_load finished with status %d [graph=%d]", res, *g);
 
     wasi_nn_ctx->current_encoding = encoding;
-    wasi_nn_ctx->is_initialized = true;
+    wasi_nn_ctx->is_model_loaded = true;
 
 fail:
     // XXX: Free intermediate structure pointers
@@ -250,39 +342,6 @@ wasi_nn_get_output(wasm_exec_env_t exec_env, graph_execution_context ctx,
     return res;
 }
 
-/* Non-exposed public functions */
-
-WASINNContext *
-wasi_nn_initialize()
-{
-    NN_DBG_PRINTF("Initializing wasi-nn");
-    WASINNContext *wasi_nn_ctx =
-        (WASINNContext *)wasm_runtime_malloc(sizeof(WASINNContext));
-    if (wasi_nn_ctx == NULL) {
-        NN_ERR_PRINTF("Error when allocating memory for WASI-NN context");
-        return NULL;
-    }
-    wasi_nn_ctx->is_initialized = true;
-    wasi_nn_ctx->current_encoding = 3;
-    tensorflowlite_initialize(&wasi_nn_ctx->tflite_ctx);
-    return wasi_nn_ctx;
-}
-
-void
-wasi_nn_destroy(WASINNContext *wasi_nn_ctx)
-{
-    if (wasi_nn_ctx == NULL) {
-        NN_ERR_PRINTF(
-            "Error when deallocating memory. WASI-NN context is NULL");
-        return;
-    }
-    NN_DBG_PRINTF("Freeing wasi-nn");
-    NN_DBG_PRINTF("-> is_initialized: %d", wasi_nn_ctx->is_initialized);
-    NN_DBG_PRINTF("-> current_encoding: %d", wasi_nn_ctx->current_encoding);
-    tensorflowlite_destroy(wasi_nn_ctx->tflite_ctx);
-    wasm_runtime_free(wasi_nn_ctx);
-}
-
 /* Register WASI-NN in WAMR */
 
 /* clang-format off */
@@ -299,8 +358,19 @@ static NativeSymbol native_symbols_wasi_nn[] = {
 };
 
 uint32_t
-get_wasi_nn_export_apis(NativeSymbol **p_libc_wasi_apis)
+get_wasi_nn_export_apis(NativeSymbol **p_native_symbols)
 {
-    *p_libc_wasi_apis = native_symbols_wasi_nn;
+    if (!wasi_nn_initialize())
+        return 0;
+    *p_native_symbols = native_symbols_wasi_nn;
     return sizeof(native_symbols_wasi_nn) / sizeof(NativeSymbol);
 }
+
+#if defined(WASI_NN_SHARED)
+uint32_t
+get_native_lib(char **p_module_name, NativeSymbol **p_native_symbols)
+{
+    *p_module_name = "wasi_nn";
+    return get_wasi_nn_export_apis(p_native_symbols);
+}
+#endif

+ 3 - 8
core/iwasm/libraries/wasi-nn/src/wasi_nn_private.h

@@ -7,25 +7,20 @@
 #define WASI_NN_PRIVATE_H
 
 #include "wasi_nn_types.h"
+#include "wasm_export.h"
 
 typedef struct {
-    bool is_initialized;
+    bool is_model_loaded;
     graph_encoding current_encoding;
     void *tflite_ctx;
 } WASINNContext;
 
-/**
- * @brief Initialize wasi-nn
- *
- */
-WASINNContext *
-wasi_nn_initialize();
 /**
  * @brief Destroy wasi-nn on app exists
  *
  */
 
 void
-wasi_nn_destroy(WASINNContext *wasi_nn_ctx);
+wasi_nn_destroy(wasm_module_inst_t instance);
 
 #endif

+ 1 - 2
core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp

@@ -7,9 +7,8 @@
 #include "wasi_nn_tensorflowlite.hpp"
 #include "logger.h"
 
-#include "bh_common.h"
 #include "bh_platform.h"
-#include "platform_common.h"
+#include "wasm_export.h"
 
 #include <tensorflow/lite/interpreter.h>
 #include <tensorflow/lite/kernels/register.h>

+ 3 - 0
core/shared/platform/common/posix/posix_socket.c

@@ -275,6 +275,9 @@ os_socket_recv_from(bh_socket_t socket, void *buf, unsigned int len, int flags,
             return -1;
         }
     }
+    else {
+        memset(src_addr, 0, sizeof(*src_addr));
+    }
 
     return ret;
 }

+ 68 - 5
core/shared/platform/esp-idf/espidf_memmap.c

@@ -5,16 +5,34 @@
 
 #include "platform_api_vmcore.h"
 #include "platform_api_extension.h"
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+#include "soc/mmu.h"
+#include "rom/cache.h"
+
+#define MEM_DUAL_BUS_OFFSET (IRAM0_CACHE_ADDRESS_LOW - DRAM0_CACHE_ADDRESS_LOW)
+
+#define in_ibus_ext(addr)                      \
+    (((uint32)addr >= IRAM0_CACHE_ADDRESS_LOW) \
+     && ((uint32)addr < IRAM0_CACHE_ADDRESS_HIGH))
+
+static portMUX_TYPE s_spinlock = portMUX_INITIALIZER_UNLOCKED;
+#endif
 
 void *
 os_mmap(void *hint, size_t size, int prot, int flags)
 {
     if (prot & MMAP_PROT_EXEC) {
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+        uint32_t mem_caps = MALLOC_CAP_SPIRAM;
+#else
+        uint32_t mem_caps = MALLOC_CAP_EXEC;
+#endif
+
         // Memory allocation with MALLOC_CAP_EXEC will return 4-byte aligned
         // Reserve extra 4 byte to fixup alignment and size for the pointer to
         // the originally allocated address
         void *buf_origin =
-            heap_caps_malloc(size + 4 + sizeof(uintptr_t), MALLOC_CAP_EXEC);
+            heap_caps_malloc(size + 4 + sizeof(uintptr_t), mem_caps);
         if (!buf_origin) {
             return NULL;
         }
@@ -25,19 +43,35 @@ os_mmap(void *hint, size_t size, int prot, int flags)
 
         uintptr_t *addr_field = buf_fixed - sizeof(uintptr_t);
         *addr_field = (uintptr_t)buf_origin;
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+        return buf_fixed + MEM_DUAL_BUS_OFFSET;
+#else
         return buf_fixed;
+#endif
     }
     else {
-        return os_malloc(size);
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+        uint32_t mem_caps = MALLOC_CAP_SPIRAM;
+#else
+        uint32_t mem_caps = MALLOC_CAP_8BIT;
+#endif
+        return heap_caps_malloc(size, mem_caps);
     }
 }
 
 void
 os_munmap(void *addr, size_t size)
 {
+    char *ptr = (char *)addr;
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    if (in_ibus_ext(ptr)) {
+        ptr -= MEM_DUAL_BUS_OFFSET;
+    }
+#endif
     // We don't need special handling of the executable allocations
     // here, free() of esp-idf handles it properly
-    return os_free(addr);
+    return os_free(ptr);
 }
 
 int
@@ -47,5 +81,34 @@ os_mprotect(void *addr, size_t size, int prot)
 }
 
 void
-os_dcache_flush()
-{}
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    IRAM_ATTR
+#endif
+    os_dcache_flush()
+{
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    uint32_t preload;
+    extern void Cache_WriteBack_All(void);
+
+    portENTER_CRITICAL(&s_spinlock);
+
+    Cache_WriteBack_All();
+    preload = Cache_Disable_ICache();
+    Cache_Enable_ICache(preload);
+
+    portEXIT_CRITICAL(&s_spinlock);
+#endif
+}
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+void *
+os_get_dbus_mirror(void *ibus)
+{
+    if (in_ibus_ext(ibus)) {
+        return (void *)((char *)ibus - MEM_DUAL_BUS_OFFSET);
+    }
+    else {
+        return ibus;
+    }
+}
+#endif

+ 6 - 0
core/shared/platform/esp-idf/shared_platform.cmake

@@ -11,3 +11,9 @@ include_directories(${PLATFORM_SHARED_DIR}/../include)
 file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c)
 
 set (PLATFORM_SHARED_SOURCE ${source_all} ${PLATFORM_COMMON_MATH_SOURCE})
+
+# If enable PSRAM of ESP32-S3, it had better to put AOT into PSRAM, so that
+# users can use SRAM to for Wi-Fi/BLE and peripheral driver.
+if(CONFIG_ESP32S3_SPIRAM_SUPPORT)
+    add_definitions(-DWASM_MEM_DUAL_BUS_MIRROR=1)
+endif()

+ 5 - 0
core/shared/platform/include/platform_api_vmcore.h

@@ -129,6 +129,11 @@ os_munmap(void *addr, size_t size);
 int
 os_mprotect(void *addr, size_t size, int prot);
 
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+void *
+os_get_dbus_mirror(void *ibus);
+#endif
+
 /**
  * Flush cpu data cache, in some CPUs, after applying relocation to the
  * AOT code, the code may haven't been written back to the cpu data cache,

+ 79 - 2
core/shared/platform/nuttx/nuttx_platform.c

@@ -10,6 +10,46 @@
 #include <nuttx/arch.h>
 #endif
 
+#if defined(CONFIG_ARCH_CHIP_ESP32S3)
+/*
+ * TODO: Move these methods below the operating system level
+ */
+#define MEM_DUAL_BUS_OFFSET (0x42000000 - 0x3C000000)
+#define IRAM0_CACHE_ADDRESS_LOW 0x42000000
+#define IRAM0_CACHE_ADDRESS_HIGH 0x44000000
+#define IRAM_ATTR locate_data(".iram1")
+
+#define in_ibus_ext(addr)                      \
+    (((uint32)addr >= IRAM0_CACHE_ADDRESS_LOW) \
+     && ((uint32)addr < IRAM0_CACHE_ADDRESS_HIGH))
+void IRAM_ATTR
+bus_sync(void)
+{
+    extern void cache_writeback_all(void);
+    extern uint32_t Cache_Disable_ICache(void);
+    extern void Cache_Enable_ICache(uint32_t autoload);
+
+    irqstate_t flags;
+    uint32_t preload;
+
+    flags = enter_critical_section();
+
+    cache_writeback_all();
+    preload = Cache_Disable_ICache();
+    Cache_Enable_ICache(preload);
+
+    leave_critical_section(flags);
+}
+#else
+#define MEM_DUAL_BUS_OFFSET (0)
+#define IRAM0_CACHE_ADDRESS_LOW (0)
+#define IRAM0_CACHE_ADDRESS_HIGH (0)
+#define in_ibus_ext(addr) (0)
+static void
+bus_sync(void)
+{}
+#endif
+
 int
 bh_platform_init()
 {
@@ -47,6 +87,10 @@ os_dumps_proc_mem_info(char *out, unsigned int size)
 void *
 os_mmap(void *hint, size_t size, int prot, int flags)
 {
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    void *i_addr, *d_addr;
+#endif
+
 #if defined(CONFIG_ARCH_USE_TEXT_HEAP)
     if ((prot & MMAP_PROT_EXEC) != 0) {
         return up_textheap_memalign(sizeof(void *), size);
@@ -55,6 +99,17 @@ os_mmap(void *hint, size_t size, int prot, int flags)
 
     if ((uint64)size >= UINT32_MAX)
         return NULL;
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    if ((prot & MMAP_PROT_EXEC) != 0) {
+        d_addr = malloc((uint32)size);
+        if (d_addr == NULL) {
+            return NULL;
+        }
+        i_addr = (void *)((uint8 *)d_addr + MEM_DUAL_BUS_OFFSET);
+        return in_ibus_ext(i_addr) ? i_addr : d_addr;
+    }
+#endif
     return malloc((uint32)size);
 }
 
@@ -67,7 +122,14 @@ os_munmap(void *addr, size_t size)
         return;
     }
 #endif
-    return free(addr);
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+    if (in_ibus_ext(addr)) {
+        free((void *)((uint8 *)addr - MEM_DUAL_BUS_OFFSET));
+        return;
+    }
+#endif
+    free(addr);
 }
 
 int
@@ -78,7 +140,22 @@ os_mprotect(void *addr, size_t size, int prot)
 
 void
 os_dcache_flush()
-{}
+{
+    bus_sync();
+}
+
+#if (WASM_MEM_DUAL_BUS_MIRROR != 0)
+void *
+os_get_dbus_mirror(void *ibus)
+{
+    if (in_ibus_ext(ibus)) {
+        return (void *)((uint8 *)ibus - MEM_DUAL_BUS_OFFSET);
+    }
+    else {
+        return ibus;
+    }
+}
+#endif
 
 /* If AT_FDCWD is provided, maybe we have openat family */
 #if !defined(AT_FDCWD)

+ 123 - 0
core/shared/utils/bh_atomic.h

@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2023 Amazon Inc.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _BH_ATOMIC_H
+#define _BH_ATOMIC_H
+
+#include "gnuc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Why don't we use C11 stdatomics here?
+ *
+ * Unlike C11 stdatomics,
+ *
+ * - bh_atomic_xxx_t is guaranteed to have the same size as the base type.
+ *   Thus more friendly to our AOT conventions.
+ *
+ * - It's available for C++.
+ *   Although C++23 will have C-compatible stdatomics.h, it isn't widely
+ *   available yet.
+ */
+
+/*
+ * Note about BH_ATOMIC_32_IS_ATOMIC
+ *
+ * If BH_ATOMIC_32_IS_ATOMIC == 0, BH_ATOMIC_xxx operations defined below
+ * are not really atomic and require an external lock.
+ *
+ * Expected usage is:
+ *
+ *     bh_atomic_32_t var = 0;
+ *     uint32 old;
+ * #if BH_ATOMIC_32_IS_ATOMIC == 0
+ *     lock(&some_lock);
+ * #endif
+ *     old = BH_ATOMIC_32_FETCH_AND(var, 1);
+ * #if BH_ATOMIC_32_IS_ATOMIC == 0
+ *     unlock(&some_lock);
+ * #endif
+ */
+
+typedef uint32 bh_atomic_32_t;
+
+#if defined(__GNUC_PREREQ)
+#if __GNUC_PREREQ(4, 7)
+#define CLANG_GCC_HAS_ATOMIC_BUILTIN
+#endif
+#elif defined(__clang__)
+#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 0)
+#define CLANG_GCC_HAS_ATOMIC_BUILTIN
+#endif
+#endif
+
+#if defined(CLANG_GCC_HAS_ATOMIC_BUILTIN)
+#define BH_ATOMIC_32_IS_ATOMIC 1
+#define BH_ATOMIC_32_LOAD(v) __atomic_load_n(&(v), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_OR(v, val) \
+    __atomic_fetch_or(&(v), (val), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_AND(v, val) \
+    __atomic_fetch_and(&(v), (val), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_ADD(v, val) \
+    __atomic_fetch_add(&(v), (val), __ATOMIC_SEQ_CST)
+#define BH_ATOMIC_32_FETCH_SUB(v, val) \
+    __atomic_fetch_sub(&(v), (val), __ATOMIC_SEQ_CST)
+#else /* else of defined(CLANG_GCC_HAS_ATOMIC_BUILTIN) */
+#define BH_ATOMIC_32_LOAD(v) (v)
+#define BH_ATOMIC_32_FETCH_OR(v, val) nonatomic_32_fetch_or(&(v), val)
+#define BH_ATOMIC_32_FETCH_AND(v, val) nonatomic_32_fetch_and(&(v), val)
+#define BH_ATOMIC_32_FETCH_ADD(v, val) nonatomic_32_fetch_add(&(v), val)
+#define BH_ATOMIC_32_FETCH_SUB(v, val) nonatomic_32_fetch_sub(&(v), val)
+
+static inline uint32
+nonatomic_32_fetch_or(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p |= val;
+    return old;
+}
+
+static inline uint32
+nonatomic_32_fetch_and(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p &= val;
+    return old;
+}
+
+static inline uint32
+nonatomic_32_fetch_add(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p += val;
+    return old;
+}
+
+static inline uint32
+nonatomic_32_fetch_sub(bh_atomic_32_t *p, uint32 val)
+{
+    uint32 old = *p;
+    *p -= val;
+    return old;
+}
+
+/* The flag can be defined by the user if the platform
+   supports atomic access to uint32 aligned memory. */
+#ifdef WASM_UINT32_IS_ATOMIC
+#define BH_ATOMIC_32_IS_ATOMIC 1
+#else /* else of WASM_UINT32_IS_ATOMIC */
+#define BH_ATOMIC_32_IS_ATOMIC 0
+#endif /* WASM_UINT32_IS_ATOMIC */
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of _BH_ATOMIC_H */

+ 0 - 0
core/iwasm/libraries/libc-wasi/sandboxed-system-primitives/src/gnuc.h → core/shared/utils/gnuc.h


+ 44 - 43
doc/embed_wamr.md

@@ -258,68 +258,69 @@ We can't pass structure data or class objects through the pointer since the memo
 
 ## Execute wasm functions in multiple threads
 
-The `exec_env` is not thread safety, it will cause unexpected behavior if the same `exec_env` is used in multiple threads. However, we've provided two ways to execute wasm functions concurrently:
+It isn't safe to use an `exec_env` object in multiple threads concurrently.
+To run a multi-threaded application, you basically need a separate `exec_env`
+for each threads.
 
-- You can use `pthread` APIs in your wasm application, see [pthread library](./pthread_library.md) for more details.
+### Approaches to manage `exec_env` objects and threads
 
-- The `spawn exec_env` and `spawn thread` APIs are available, you can use these APIs to manage the threads in native:
+WAMR supports two approaches to manage `exec_env` and threads as described
+below.  While they are not exclusive, you usually only need to use one of
+them.
 
-  *spawn exec_env:*
+#### Make your WASM application manage threads
 
-  `spawn exec_env` API spawns a `new_exec_env` base on the original `exec_env`, use can use it in other threads:
+  You can make your WASM application spawn threads by itself,
+  typically using `pthread` APIs like `pthread_create`.
+  See [pthread library](./pthread_library.md) and
+  [pthread implementations](./pthread_impls.md) for more details.
+  In this case, WAMR manages `exec_env` for the spawned threads.
 
-  ```C
-  new_exec_env = wasm_runtime_spawn_exec_env(exec_env);
+#### Make your embedder manage threads
+
+  The `spawn exec_env` and `spawn thread` APIs are available for the embedder.
+  You can use these APIs to manage the threads.
+  See [Thread related embedder API](./embed_wamr_spawn_api.md) for details.
+
+### Other notes about threads
 
-    /* Then you can use new_exec_env in your new thread */
-    module_inst = wasm_runtime_get_module_inst(new_exec_env);
-    func_inst = wasm_runtime_lookup_function(module_inst, ...);
-    wasm_runtime_call_wasm(new_exec_env, func_inst, ...);
+* You can manage the maximum number of threads
 
-  /* you need to use this API to manually destroy the spawned exec_env */
-  wasm_runtime_destroy_spawned_exec_env(new_exec_env);
+  ```C
+  init_args.max_thread_num = THREAD_NUM;
+  /* If this init argument is not set, the default maximum thread number is 4 */
   ```
 
-  *spawn thread:*
+* To share memory among threads, you need to build your WASM application with shared memory
 
-  You can also use `spawn thread` API to avoid manually manage the spawned exec_env:
+  For example, it can be done with `--shared-memory` and `-pthread`.
 
-  ```C
-  wasm_thread_t wasm_tid;
-  void *wamr_thread_cb(wasm_exec_env_t exec_env, void *arg)
-  {
-    module_inst = wasm_runtime_get_module_inst(exec_env);
-    func_inst = wasm_runtime_lookup_function(module_inst, ...);
-    wasm_runtime_call_wasm(exec_env, func_inst, ...);
-  }
-  wasm_runtime_spawn_thread(exec_env, &wasm_tid, wamr_thread_cb, NULL);
-  /* Use wasm_runtime_join_thread to join the spawned thread */
-  wasm_runtime_join_thread(wasm_tid, NULL);
+  ```bash
+    /opt/wasi-sdk/bin/clang -o test.wasm test.c -nostdlib -pthread    \
+      -Wl,--shared-memory,--max-memory=131072                         \
+      -Wl,--no-entry,--export=__heap_base,--export=__data_end         \
+      -Wl,--export=__wasm_call_ctors,--export=${your_func_name}
   ```
 
-**Note1: You can manage the maximum number of threads can be created:**
+* The corresponding threading feature should be enabled while building the runtime
 
-```C
-init_args.max_thread_num = THREAD_NUM;
-/* If this init argument is not set, the default maximum thread number is 4 */
-```
+  - WAMR lib-pthread (legacy)
 
-**Note2: The wasm application should be built with `--shared-memory` and `-pthread` enabled:**
+    ```bash
+    cmake .. -DWAMR_BUILD_LIB_PTHREAD=1
+    ```
 
-```bash
-  /opt/wasi-sdk/bin/clang -o test.wasm test.c -nostdlib -pthread    \
-    -Wl,--shared-memory,--max-memory=131072                         \
-    -Wl,--no-entry,--export=__heap_base,--export=__data_end         \
-    -Wl,--export=__wasm_call_ctors,--export=${your_func_name}
-```
+  - wasi-threads
 
-  **Note3: The pthread library feature should be enabled while building the runtime:**
+    ```bash
+    cmake .. -DWAMR_BUILD_LIB_WASI_THREADS=1
+    ```
 
-  ```bash
-  cmake .. -DWAMR_BUILD_LIB_PTHREAD=1
-  ```
+  - `wasm_runtime_spawn_exec_env` and `wasm_runtime_spawn_thread`
 
-[Here](../samples/spawn-thread) is a sample to show how to use these APIs.
+    ```bash
+    cmake .. -DWAMR_BUILD_THREAD_MGR=1 -DWAMR_BUILD_SHARED_MEMORY=1
+    ```
 
 ## The deinitialization procedure
 

+ 38 - 0
doc/embed_wamr_spawn_api.md

@@ -0,0 +1,38 @@
+# Thread related embedder API
+
+This document explains `wasm_runtime_spawn_exec_env` and
+`wasm_runtime_spawn_thread`.
+[Here](../samples/spawn-thread) is a sample to show how to use these APIs.
+
+  * spawn exec_env
+
+    `spawn exec_env` API creates a new `exec_env` based on the original `exec_env`. You can use it in other threads. It's up to the embedder how to manage host threads to run the new `exec_env`.
+
+    ```C
+    new_exec_env = wasm_runtime_spawn_exec_env(exec_env);
+
+      /* Then you can use new_exec_env in your new thread */
+      module_inst = wasm_runtime_get_module_inst(new_exec_env);
+      func_inst = wasm_runtime_lookup_function(module_inst, ...);
+      wasm_runtime_call_wasm(new_exec_env, func_inst, ...);
+
+    /* you need to use this API to manually destroy the spawned exec_env */
+    wasm_runtime_destroy_spawned_exec_env(new_exec_env);
+    ```
+
+  * spawn thread
+
+    Alternatively, you can use `spawn thread` API to avoid managing the extra exec_env and the corresponding host thread manually:
+
+    ```C
+    wasm_thread_t wasm_tid;
+    void *wamr_thread_cb(wasm_exec_env_t exec_env, void *arg)
+    {
+      module_inst = wasm_runtime_get_module_inst(exec_env);
+      func_inst = wasm_runtime_lookup_function(module_inst, ...);
+      wasm_runtime_call_wasm(exec_env, func_inst, ...);
+    }
+    wasm_runtime_spawn_thread(exec_env, &wasm_tid, wamr_thread_cb, NULL);
+    /* Use wasm_runtime_join_thread to join the spawned thread */
+    wasm_runtime_join_thread(wasm_tid, NULL);
+    ```

+ 15 - 0
doc/perf_tune.md

@@ -72,3 +72,18 @@ wasm_runtime_dump_pgo_prof_data_to_buf(wasm_module_inst_t module_inst, char *buf
 6. Run the optimized aot_file: `iwasm <aot_file>`.
 
 Developer can refer to the `test_pgo.sh` files under each benchmark folder for more details, e.g. [test_pgo.sh](../tests/benchmarks/coremark/test_pgo.sh) of CoreMark benchmark.
+
+## 6. Disable the memory boundary check
+
+Please notice that this method is not a general solution since it may lead to security issues. And only boost the performance for some platforms in AOT mode and don't support hardware trap for memory boundary check.
+
+1. Build WAMR with `-DWAMR_CONFIGUABLE_BOUNDS_CHECKS=1` option.
+
+2. Compile AOT module by wamrc with `--bounds-check=0` option.
+
+3. Run the AOT module by iwasm with `--disable-bounds-checks` option.
+
+> Note: The size of AOT file will be much smaller than the default, and some tricks are possible such as let the wasm application access the memory of host os directly.
+Please notice that if this option is enabled, the wasm spec test will fail since it requires the memory boundary check. For example, the runtime will crash when accessing the memory out of the boundary in some cases instead of throwing an exception as the spec requires.
+
+You should only use this method for well tested wasm applications and make sure the memory access is safe.

+ 117 - 0
doc/xip.md

@@ -7,8 +7,125 @@ Some IoT devices may require to run the AOT file from flash or ROM which is read
 The XIP file is an AOT file without (or with few) relocations to patch the AOT code (or text section). Developer can use the option `--enable-indirect-mode --disable-llvm-intrinsics` for wamrc to generate the AOT file, e.g.:
 ```bash
 wamrc --enable-indirect-mode --disable-llvm-intrinsics -o <aot_file> <wasm_file>
+or
+wamrc --xip -o <aot_file> <wasm_file>
 ```
 
+Note: --xip is a short option for --enable-indirect-mode --disable-llvm-intrinsics
+
 ## Known issues
 
 There may be some relocations to the ".rodata" like sections which require to patch the AOT code. More work will be done to resolve it in the future.
+
+## Tuning the XIP intrinsic functions
+
+WAMR provides a default mapping table for some targets, but it may not be the best one for your target. And it doesn't cover all the supported targets.
+
+So, wamrc provides the option `--enable-builtin-intrinsics=<intr1,intr2,...>` to make it possible to tune the intrinsic functions for your target.
+
+Firstly, you should understand why we don't use the LLVM intrinsic functions directly. The reason is that the LLVM intrinsic functions can't map to the native instructions directly, e.g. the LLVM intrinsic function `i32.div_s` can't map to the native instruction if the target doesn't support the division instruction, it will be translated to a function call to the runtime function from libgcc/compiler-rt. This will cause the AOT code to have the relocations to the libgcc/compiler-rt, which is not acceptable for the XIP feature.
+
+So, we need to replace the LLVM intrinsic functions with the runtime self implemented functions, which can be called through the function pointer table (--enable-indirect-mode) and don't have the relocations to the libgcc/compiler-rt (--disable-llvm-intrinsics).
+
+Available intrinsic functions for tuning:
+
+| LLVM intrinsic function | Explanation |
+| --- | --- |
+| llvm.experimental.constrained.fadd.f32 | float32 add |
+| llvm.experimental.constrained.fadd.f64 | float64 add |
+| llvm.experimental.constrained.fsub.f32 | float32 sub |
+| llvm.experimental.constrained.fsub.f64 | float64 sub |
+| llvm.experimental.constrained.fmul.f32 | float32 mul |
+| llvm.experimental.constrained.fmul.f64 | float64 mul |
+| llvm.experimental.constrained.fdiv.f32 | float32 div |
+| llvm.experimental.constrained.fdiv.f64 | float64 div |
+| llvm.fabs.f32 | float32 abs |
+| llvm.fabs.f64 | float64 abs |
+| llvm.ceil.f32 | float32 ceil |
+| llvm.ceil.f64 | float64 ceil |
+| llvm.floor.f32 | float32 floor |
+| llvm.floor.f64 | float64 floor |
+| llvm.trunc.f32 | float32 trunc |
+| llvm.trunc.f64 | float64 trunc |
+| llvm.rint.f32 | float32 rint |
+| llvm.rint.f64 | float64 rint |
+| llvm.sqrt.f32 | float32 sqrt |
+| llvm.sqrt.f64 | float64 sqrt |
+| llvm.copysign.f32 | float32 copysign |
+| llvm.copysign.f64 | float64 copysign |
+| llvm.minnum.f32 | float32 minnum |
+| llvm.minnum.f64 | float64 minnum |
+| llvm.maxnum.f32 | float32 maxnum |
+| llvm.maxnum.f64 | float64 maxnum |
+| llvm.ctlz.i32 | int32 count leading zeros |
+| llvm.ctlz.i64 | int64 count leading zeros |
+| llvm.cttz.i32 | int32 count trailing zeros |
+| llvm.cttz.i64 | int64 count trailing zeros |
+| llvm.ctpop.i32 | int32 count population |
+| llvm.ctpop.i64 | int64 count population |
+| f64_convert_i32_s | int32 to float64 |
+| f64_convert_i32_u | uint32 to float64 |
+| f32_convert_i32_s | int32 to float32 |
+| f32_convert_i32_u | uint32 to float32 |
+| f64_convert_i64_s | int64 to float64 |
+| f64_convert_i64_u | uint64 to float64 |
+| f32_convert_i64_s | int64 to float32 |
+| f32_convert_i64_u | uint64 to float32 |
+| i32_trunc_f32_s | float32 to int32 |
+| i32_trunc_f32_u | float32 to uint32 |
+| i32_trunc_f64_s | float64 to int32 |
+| i32_trunc_f64_u | float64 to uint32 |
+| i64_trunc_f64_s | float64 to int64 |
+| i64_trunc_f64_u | float64 to uint64 |
+| i64_trunc_f32_s | float32 to int64 |
+| i64_trunc_f32_u | float32 to uint64 |
+| f32_demote_f64 | float64 to float32 |
+| f64_promote_f32 | float32 to float64 |
+| f32_cmp | float32 compare |
+| f64_cmp | float64 compare |
+| i64.div_s | int64 div |
+| i64.div_u | uint64 div |
+| i32.div_s | int32 div |
+| i32.div_u | uint32 div |
+| i64.rem_s | int64 rem |
+| i64.rem_u | uint64 rem |
+| i32.rem_s | int32 rem |
+| i32.rem_u | uint32 rem |
+| i64.or | int64 or |
+| i64.and | int64 and |
+| i32.const | emit i32 const into constant table |
+| i64.const | emit i64 const into constant table |
+| f32.const | emit f32 const into constant table |
+| f64.const | emit f64 const into constant table |
+
+And also provide combined intrinsic functions to simplify the tuning:
+
+* all: all the above intrinsic functions
+* i32.common: i32.div_s, i32.div_u, i32.rem_s, i32.rem_u
+* i64.common: i64.div_s, i64.div_u, i64.rem_s, i64.rem_u, i64.or, i64.and
+* f32.common: f32_cmp, llvm.experimental.constrained.fadd.f32, llvm.experimental.constrained.fsub.f32, llvm.experimental.constrained.fmul.f32, llvm.experimental.constrained.fdiv.f32, llvm.fabs.f32, llvm.ceil.f32, llvm.floor.f32, llvm.trunc.f32, llvm.rint.f32, llvm.sqrt.f32, llvm.copysign.f32, llvm.minnum.f32, llvm.maxnum.f32
+* f64.common: f32_demote_f64, f64_promote_f32, f64_cmp, llvm.experimental.constrained.fadd.f64, llvm.experimental.constrained.fsub.f64, llvm.experimental.constrained.fmul.f64, llvm.experimental.constrained.fdiv.f64, llvm.fabs.f64, llvm.ceil.f64, llvm.floor.f64, llvm.trunc.f64, llvm.rint.f64, llvm.sqrt.f64, llvm.copysign.f64, llvm.minnum.f64, llvm.maxnum.f64
+* f32xi32: i32_trunc_f32_s, i32_trunc_f32_u, f32_convert_i32_s, f32_convert_i32_u
+* f64xi32: i32_trunc_f64_s, i32_trunc_f64_u, f64_convert_i32_s, f64_convert_i32_u
+* f32xi64: i64_trunc_f32_s, i64_trunc_f32_u, f32_convert_i64_s, f32_convert_i64_u
+* f64xi64: i64_trunc_f64_s, i64_trunc_f64_u, f64_convert_i64_s, f64_convert_i64_u
+* constop: i32.const, i64.const, f32.const, f64.const
+* fpxint: f32xi32, f64xi32, f32xi64, f64xi64
+* fp.common: f32.common, f64.common
+
+
+### Example
+
+For ARM Cortex-M55, since it has double precision floating point unit, so it can support f32/f64 operations. But as a 32-bit MCU, it can only support 32-bit integer operations. So we can use the following command to generate the XIP binary:
+
+```
+wamrc --target=thumbv8m.main --cpu=cortex-m55 --xip --enable-builtin-intrinsics=i64.common -o hello.aot hello.wasm
+``` 
+
+For ARM Cortex-M3, since it has no floating point unit, and it can only support 32-bit integer operations. So we can use the following command to generate the XIP binary:
+
+```
+wamrc --target=thumbv7m --cpu=cortex-m3 --xip --enable-builtin-intrinsics=i64.common,fp.common,fpxint -o hello.aot hello.wasm
+```
+
+Other platforms can be tuned in the same way, which intrinsic should be enabled depends on the target platform's hardware capability.

+ 1 - 11
product-mini/README.md

@@ -251,17 +251,7 @@ WAMR provides some features which can be easily configured by passing options to
 
 ## Zephyr
 
-You need to prepare Zephyr first as described [here](https://docs.zephyrproject.org/latest/getting_started/index.html#get-zephyr-and-install-python-dependencies).
-
-After that you need to point the `ZEPHYR_BASE` variable to e.g. `~/zephyrproject/zephyr`. Also, it is important that you have `west` available for subsequent actions.
-
-``` Bash
-cd <wamr_root_dir>/product-mini/platforms/zephyr/simple
-# Execute the ./build_and_run.sh script with board name as parameter. Here take x86 as example:
-./build_and_run.sh x86
-```
-
-The [Zephyr SDK](https://github.com/zephyrproject-rtos/sdk-ng) provides toolchains for all supported targets. Follow the instructions in the [documentation](https://docs.zephyrproject.org/latest/develop/getting_started/index.html#install-zephyr-sdk) to ensure it is installed and configured correctly.
+Please refer to this [README](./platforms/zephyr/simple/README.md) under the Zephyr sample directory for details.
 
 Note:
 WAMR provides some features which can be easily configured by passing options to cmake, please see [WAMR vmcore cmake building configurations](../doc/build_wamr.md#wamr-vmcore-cmake-building-configurations) for details. Currently in Zephyr, interpreter, AOT and builtin libc are enabled by default.

+ 3 - 1
product-mini/platforms/esp-idf/build_and_run.sh

@@ -5,14 +5,16 @@
 
 ESP32_TARGET="esp32"
 ESP32C3_TARGET="esp32c3"
+ESP32S3_TARGET="esp32s3"
 
 usage ()
 {
         echo "USAGE:"
-        echo "$0 $ESP32_TARGET|$ESP32C3_TARGET"
+        echo "$0 $ESP32_TARGET|$ESP32C3_TARGET|$ESP32S3_TARGET"
         echo "Example:"
         echo "        $0 $ESP32_TARGET"
         echo "        $0 $ESP32C3_TARGET"
+        echo "        $0 $ESP32S3_TARGET"
         exit 1
 }
 

+ 7 - 1
product-mini/platforms/esp-idf/main/main.c

@@ -12,6 +12,12 @@
 
 #include "esp_log.h"
 
+#ifdef CONFIG_IDF_TARGET_ESP32S3
+#define IWASM_MAIN_STACK_SIZE 5120
+#else
+#define IWASM_MAIN_STACK_SIZE 4096
+#endif
+
 #define LOG_TAG "wamr"
 
 static void *
@@ -146,7 +152,7 @@ app_main(void)
     pthread_attr_t tattr;
     pthread_attr_init(&tattr);
     pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
-    pthread_attr_setstacksize(&tattr, 4096);
+    pthread_attr_setstacksize(&tattr, IWASM_MAIN_STACK_SIZE);
 
     res = pthread_create(&t, &tattr, iwasm_main, (void *)NULL);
     assert(res == 0);

+ 64 - 0
product-mini/platforms/linux-sgx/enclave-sample/Enclave/Enclave.cpp

@@ -92,6 +92,8 @@ set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
         snprintf(error_buf, error_buf_size, "%s", string);
 }
 
+static bool runtime_inited = false;
+
 static void
 handle_cmd_init_runtime(uint64 *args, uint32 argc)
 {
@@ -100,6 +102,12 @@ handle_cmd_init_runtime(uint64 *args, uint32 argc)
 
     bh_assert(argc == 1);
 
+    /* avoid duplicated init */
+    if (runtime_inited) {
+        args[0] = false;
+        return;
+    }
+
     os_set_print_function(enclave_print);
 
     max_thread_num = (uint32)args[0];
@@ -122,6 +130,7 @@ handle_cmd_init_runtime(uint64 *args, uint32 argc)
         return;
     }
 
+    runtime_inited = true;
     args[0] = true;
 
     LOG_VERBOSE("Init runtime environment success.\n");
@@ -130,7 +139,11 @@ handle_cmd_init_runtime(uint64 *args, uint32 argc)
 static void
 handle_cmd_destroy_runtime()
 {
+    if (!runtime_inited)
+        return;
+
     wasm_runtime_destroy();
+    runtime_inited = false;
 
     LOG_VERBOSE("Destroy runtime success.\n");
 }
@@ -214,6 +227,11 @@ handle_cmd_load_module(uint64 *args, uint32 argc)
 
     bh_assert(argc == 4);
 
+    if (!runtime_inited) {
+        *(void **)args_org = NULL;
+        return;
+    }
+
     if (!is_xip_file((uint8 *)wasm_file, wasm_file_size)) {
         if (total_size >= UINT32_MAX
             || !(enclave_module = (EnclaveModule *)wasm_runtime_malloc(
@@ -284,6 +302,10 @@ handle_cmd_unload_module(uint64 *args, uint32 argc)
 
     bh_assert(argc == 1);
 
+    if (!runtime_inited) {
+        return;
+    }
+
 #if WASM_ENABLE_LIB_RATS != 0
     /* Remove enclave module from enclave module list */
     os_mutex_lock(&enclave_module_list_lock);
@@ -354,6 +376,11 @@ handle_cmd_instantiate_module(uint64 *args, uint32 argc)
 
     bh_assert(argc == 5);
 
+    if (!runtime_inited) {
+        *(void **)args_org = NULL;
+        return;
+    }
+
     if (!(module_inst =
               wasm_runtime_instantiate(enclave_module->module, stack_size,
                                        heap_size, error_buf, error_buf_size))) {
@@ -373,6 +400,10 @@ handle_cmd_deinstantiate_module(uint64 *args, uint32 argc)
 
     bh_assert(argc == 1);
 
+    if (!runtime_inited) {
+        return;
+    }
+
     wasm_runtime_deinstantiate(module_inst);
 
     LOG_VERBOSE("Deinstantiate module success.\n");
@@ -389,6 +420,11 @@ handle_cmd_get_exception(uint64 *args, uint32 argc)
 
     bh_assert(argc == 3);
 
+    if (!runtime_inited) {
+        args_org[0] = false;
+        return;
+    }
+
     if ((exception1 = wasm_runtime_get_exception(module_inst))) {
         snprintf(exception, exception_size, "%s", exception1);
         args_org[0] = true;
@@ -410,6 +446,10 @@ handle_cmd_exec_app_main(uint64 *args, int32 argc)
     bh_assert(argc >= 3);
     bh_assert(app_argc >= 1);
 
+    if (!runtime_inited) {
+        return;
+    }
+
     total_size = sizeof(char *) * (app_argc > 2 ? (uint64)app_argc : 2);
 
     if (total_size >= UINT32_MAX
@@ -439,6 +479,10 @@ handle_cmd_exec_app_func(uint64 *args, int32 argc)
 
     bh_assert(argc == app_argc + 3);
 
+    if (!runtime_inited) {
+        return;
+    }
+
     total_size = sizeof(char *) * (app_argc > 2 ? (uint64)app_argc : 2);
 
     if (total_size >= UINT32_MAX
@@ -488,6 +532,11 @@ handle_cmd_set_wasi_args(uint64 *args, int32 argc)
 
     bh_assert(argc == 10);
 
+    if (!runtime_inited) {
+        *args_org = false;
+        return;
+    }
+
     total_size += sizeof(char *) * (uint64)dir_list_size
                   + sizeof(char *) * (uint64)env_list_size
                   + sizeof(char *) * (uint64)addr_pool_list_size
@@ -610,6 +659,11 @@ handle_cmd_get_pgo_prof_buf_size(uint64 *args, int32 argc)
 
     bh_assert(argc == 1);
 
+    if (!runtime_inited) {
+        args[0] = 0;
+        return;
+    }
+
     buf_len = wasm_runtime_get_pgo_prof_data_size(module_inst);
     args[0] = buf_len;
 }
@@ -625,6 +679,11 @@ handle_cmd_get_pro_prof_buf_data(uint64 *args, int32 argc)
 
     bh_assert(argc == 3);
 
+    if (!runtime_inited) {
+        args_org[0] = 0;
+        return;
+    }
+
     bytes_dumped =
         wasm_runtime_dump_pgo_prof_data_to_buf(module_inst, buf, len);
     args_org[0] = bytes_dumped;
@@ -704,6 +763,11 @@ ecall_iwasm_main(uint8_t *wasm_file_buf, uint32_t wasm_file_size)
     char error_buf[128];
     const char *exception;
 
+    /* avoid duplicated init */
+    if (runtime_inited) {
+        return;
+    }
+
     os_set_print_function(enclave_print);
 
     memset(&init_args, 0, sizeof(RuntimeInitArgs));

+ 3 - 1
product-mini/platforms/linux/CMakeLists.txt

@@ -121,7 +121,10 @@ endif ()
 set (WAMR_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
 
 include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake)
+
+check_pie_supported()
 add_library(vmlib ${WAMR_RUNTIME_LIB_SOURCE})
+set_target_properties (vmlib PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
 
@@ -150,7 +153,6 @@ include (${SHARED_DIR}/utils/uncommon/shared_uncommon.cmake)
 
 add_executable (iwasm main.c ${UNCOMMON_SHARED_SOURCE})
 
-check_pie_supported()
 set_target_properties (iwasm PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 install (TARGETS iwasm DESTINATION bin)

+ 21 - 0
product-mini/platforms/nuttx/wamr.mk

@@ -141,6 +141,12 @@ else
 CFLAGS += -DWASM_ENABLE_WORD_ALIGN_READ=0
 endif
 
+ifeq ($(CONFIG_INTERPRETERS_WAMR_MEM_DUAL_BUS_MIRROR),y)
+CFLAGS += -DWASM_MEM_DUAL_BUS_MIRROR=1
+else
+CFLAGS += -DWASM_MEM_DUAL_BUS_MIRROR=0
+endif
+
 ifeq ($(CONFIG_INTERPRETERS_WAMR_FAST), y)
 CFLAGS += -DWASM_ENABLE_FAST_INTERP=1
 CFLAGS += -DWASM_ENABLE_INTERP=1
@@ -230,6 +236,12 @@ else
 CFLAGS += -DWASM_ENABLE_LIBC_BUILTIN=0
 endif
 
+ifeq ($(CONFIG_INTERPRETERS_WAMR_CONFIGUABLE_BOUNDS_CHECKS),y)
+CFLAGS += -DWASM_CONFIGUABLE_BOUNDS_CHECKS=1
+else
+CFLAGS += -DWASM_CONFIGUABLE_BOUNDS_CHECKS=0
+endif
+
 ifeq ($(CONFIG_INTERPRETERS_WAMR_LIBC_WASI),y)
 CFLAGS += -DWASM_ENABLE_LIBC_WASI=1
 CFLAGS += -I$(IWASM_ROOT)/libraries/libc-wasi/sandboxed-system-primitives/src
@@ -259,6 +271,15 @@ else
 CFLAGS += -DWASM_ENABLE_THREAD_MGR=0
 endif
 
+ifeq ($(CONFIG_INTERPRETERS_WAMR_LIB_WASI_THREADS),y)
+CFLAGS += -DWASM_ENABLE_LIB_WASI_THREADS=1
+CSRCS += lib_wasi_threads_wrapper.c
+CSRCS += tid_allocator.c
+VPATH += $(IWASM_ROOT)/libraries/lib-wasi-threads
+else
+CFLAGS += -DWASM_ENABLE_LIB_WASI_THREADS=0
+endif
+
 ifeq ($(CONFIG_INTERPRETERS_WAMR_GC),y)
 CFLAGS += -DWASM_ENABLE_GC=1
 CSRCS += gc_type.c gc_object.c

+ 23 - 1
product-mini/platforms/posix/main.c

@@ -69,6 +69,9 @@ print_help()
 #endif
     printf("  --repl                   Start a very simple REPL (read-eval-print-loop) mode\n"
            "                           that runs commands in the form of \"FUNC ARG...\"\n");
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    printf("  --disable-bounds-checks  Disable bounds checks for memory accesses\n");
+#endif
 #if WASM_ENABLE_LIBC_WASI != 0
     printf("  --env=<env>              Pass wasi environment variables with \"key=value\"\n");
     printf("                           to the program, for example:\n");
@@ -466,7 +469,12 @@ main(int argc, char *argv[])
     const char *func_name = NULL;
     uint8 *wasm_file_buf = NULL;
     uint32 wasm_file_size;
-    uint32 stack_size = 64 * 1024, heap_size = 16 * 1024;
+    uint32 stack_size = 64 * 1024;
+#if WASM_ENABLE_LIBC_WASI != 0
+    uint32 heap_size = 0;
+#else
+    uint32 heap_size = 16 * 1024;
+#endif
 #if WASM_ENABLE_FAST_JIT != 0
     uint32 jit_code_cache_size = FAST_JIT_DEFAULT_CODE_CACHE_SIZE;
 #endif
@@ -488,6 +496,9 @@ main(int argc, char *argv[])
 #endif
     bool is_repl_mode = false;
     bool is_xip_file = false;
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    bool disable_bounds_checks = false;
+#endif
 #if WASM_ENABLE_LIBC_WASI != 0
     const char *dir_list[8] = { NULL };
     uint32 dir_list_size = 0;
@@ -552,6 +563,11 @@ main(int argc, char *argv[])
         else if (!strcmp(argv[0], "--repl")) {
             is_repl_mode = true;
         }
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+        else if (!strcmp(argv[0], "--disable-bounds-checks")) {
+            disable_bounds_checks = true;
+        }
+#endif
         else if (!strncmp(argv[0], "--stack-size=", 13)) {
             if (argv[0][13] == '\0')
                 return print_help();
@@ -850,6 +866,12 @@ main(int argc, char *argv[])
         goto fail3;
     }
 
+#if WASM_CONFIGUABLE_BOUNDS_CHECKS != 0
+    if (disable_bounds_checks) {
+        wasm_runtime_set_bounds_checks(wasm_module_inst, false);
+    }
+#endif
+
 #if WASM_ENABLE_DEBUG_INTERP != 0
     if (ip_addr != NULL) {
         wasm_exec_env_t exec_env =

+ 6 - 1
product-mini/platforms/windows/main.c

@@ -243,7 +243,12 @@ main(int argc, char *argv[])
     const char *func_name = NULL;
     uint8 *wasm_file_buf = NULL;
     uint32 wasm_file_size;
-    uint32 stack_size = 64 * 1024, heap_size = 16 * 1024;
+    uint32 stack_size = 64 * 1024;
+#if WASM_ENABLE_LIBC_WASI != 0
+    uint32 heap_size = 0;
+#else
+    uint32 heap_size = 16 * 1024;
+#endif
 #if WASM_ENABLE_JIT != 0
     uint32 llvm_jit_size_level = 3;
     uint32 llvm_jit_opt_level = 3;

+ 60 - 0
product-mini/platforms/zephyr/simple/Dockerfile

@@ -0,0 +1,60 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+FROM ubuntu:20.04
+
+ARG DEBIAN_FRONTEND=noninteractive
+ENV TZ=Asian/Shanghai
+
+# Install dependencies for Zephyr and ESPRESSIF
+# hadolint ignore=DL3008
+RUN apt-get update && apt-get install -y git wget flex bison gperf python3 python3-pip python3-venv\
+    python3-dev python3-setuptools python3-tk python3-wheel xz-utils file libpython3.8-dev \
+    ninja-build ccache libffi-dev libssl-dev dfu-util libusb-1.0-0 device-tree-compiler \
+    make gcc gcc-multilib g++-multilib libsdl2-dev libmagic1 qemu udev --no-install-recommends \
+    && apt-get clean -y && rm -rf /var/lib/apt/lists/*
+
+# Install recent CMake version
+WORKDIR /tmp
+RUN mkdir /opt/cmake \
+    && wget --progress=dot:giga https://github.com/Kitware/CMake/releases/download/v3.22.1/cmake-3.22.1-linux-x86_64.sh \
+    && sh cmake-3.22.1-linux-x86_64.sh --skip-license --prefix=/opt/cmake && rm cmake-3.22.1-linux-x86_64.sh
+ENV PATH="/opt/cmake/bin:$PATH"
+
+# Install the Zephyr Software Development Kit (SDK)
+WORKDIR /opt
+# hadolint ignore=DL4006
+RUN wget --progress=dot:giga https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.16.1/zephyr-sdk-0.16.1_linux-x86_64.tar.xz \
+    && wget --progress=dot:giga -O - https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.16.1/sha256.sum | shasum --check --ignore-missing \
+    && tar xvf zephyr-sdk-0.16.1_linux-x86_64.tar.xz && rm zephyr-sdk-0.16.1_linux-x86_64.tar.xz
+
+WORKDIR /opt/zephyr-sdk-0.16.1
+# hadolint ignore=DL4006
+RUN yes | ./setup.sh
+
+# Get ESP-IDF
+RUN ln -s /usr/bin/python3 /usr/bin/python && mkdir -p ~/esp
+WORKDIR /root/esp
+RUN git clone https://github.com/espressif/esp-idf.git 
+WORKDIR /root/esp/esp-idf
+RUN git checkout 03d4fa28694ee15ccfd5a97447575de2d1655026 \
+    && git submodule update --init --recursive
+# Set up the sep-idf tools
+RUN ./install.sh esp32 esp32c3
+
+# Get Zephyr 
+# hadolint ignore=DL3013
+RUN pip3 install --no-cache-dir west && west init -m https://github.com/zephyrproject-rtos/zephyr --mr v3.4.0 /root/zephyrproject
+
+WORKDIR /root/zephyrproject
+RUN west update 
+
+WORKDIR /root/zephyrproject/zephyr
+RUN west zephyr-export && pip install --no-cache-dir -r ~/zephyrproject/zephyr/scripts/requirements.txt
+
+# Git clone wamr
+WORKDIR /root
+RUN git clone https://github.com/bytecodealliance/wasm-micro-runtime.git
+
+WORKDIR /root/wasm-micro-runtime/product-mini/platforms/zephyr/simple
+
+ENV ZEPHYR_BASE="/root/zephyrproject/zephyr"

+ 58 - 0
product-mini/platforms/zephyr/simple/Dockerfile.old

@@ -0,0 +1,58 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+FROM ubuntu:20.04
+
+ARG DEBIAN_FRONTEND=noninteractive
+ENV TZ=Asian/Shanghai
+
+# Install dependencies for Zephyr and ESPRESSIF
+# hadolint ignore=DL3008
+RUN apt-get update && apt-get install -y git wget flex bison gperf python3 python3-pip python3-venv\
+    python3-dev python3-setuptools python3-tk python3-wheel xz-utils file libpython3.8-dev \
+    ninja-build ccache libffi-dev libssl-dev dfu-util libusb-1.0-0 device-tree-compiler \
+    make gcc gcc-multilib g++-multilib libsdl2-dev libmagic1 qemu udev --no-install-recommends \
+    && apt-get clean -y && rm -rf /var/lib/apt/lists/*
+
+# Install recent CMake version
+WORKDIR /tmp
+RUN mkdir /opt/cmake \
+    && wget --progress=dot:giga https://github.com/Kitware/CMake/releases/download/v3.22.1/cmake-3.22.1-linux-x86_64.sh \
+    && sh cmake-3.22.1-linux-x86_64.sh --skip-license --prefix=/opt/cmake && rm cmake-3.22.1-linux-x86_64.sh
+ENV PATH="/opt/cmake/bin:$PATH"
+
+# Install the Zephyr Software Development Kit (SDK)
+WORKDIR /opt
+# hadolint ignore=DL4006
+RUN wget --progress=dot:giga https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.11.3/zephyr-sdk-0.11.3-setup.run \
+    && chmod +x ./zephyr-sdk-0.11.3-setup.run \
+    && ./zephyr-sdk-0.11.3-setup.run -- -d /opt/zephyr-sdk-0.11.3
+ENV ZEPHYR_TOOLCHAIN_VARIANT=zephyr
+ENV ZEPHYR_SDK_INSTALL_DIR=/opt/zephyr-sdk-0.11.3
+
+# Get ESP-IDF
+RUN ln -s /usr/bin/python3 /usr/bin/python && mkdir -p ~/esp
+WORKDIR /root/esp
+RUN git clone https://github.com/espressif/esp-idf.git 
+WORKDIR /root/esp/esp-idf
+RUN git checkout v4.0 \
+    && pip install --no-cache-dir virtualenv==16.7.12 \
+    && git submodule update --init --recursive \
+    && ./install.sh esp32 esp32c3
+
+# Get Zephyr 
+# hadolint ignore=DL3013
+RUN pip3 install --no-cache-dir west && west init -m https://github.com/zephyrproject-rtos/zephyr --mr v2.3.0 /root/zephyrproject
+
+WORKDIR /root/zephyrproject
+RUN west update 
+
+WORKDIR /root/zephyrproject/zephyr
+RUN west zephyr-export && pip install --no-cache-dir -r ~/zephyrproject/zephyr/scripts/requirements.txt
+
+# Git clone wamr
+WORKDIR /root
+RUN git clone https://github.com/bytecodealliance/wasm-micro-runtime.git
+
+WORKDIR /root/wasm-micro-runtime/product-mini/platforms/zephyr/simple
+
+ENV ZEPHYR_BASE="/root/zephyrproject/zephyr"

+ 106 - 0
product-mini/platforms/zephyr/simple/README.md

@@ -0,0 +1,106 @@
+# How to use WAMR with Zephyr
+
+## Build with Docker(recommend approach)
+
+To have a quicker start, a Docker container of the Zephyr setup can be generated. The current docker image would be considerably large(~15GB), it would take some time to build it and enough disk space to store it.
+
+### Build Docker images
+
+```shell
+docker build -t wamr-zephyr .
+```
+
+> PS: currently, the esp32 custom linker script only works with a lower version of Zephyr, if you want to use an esp32 board, you can build the Dockerfile with a lower version of Zephyr, Zephyr SDE, ESP-IDF. The old version of Docker image can also build other targets, but probably it's a better choice to use the new Dockerfile for other boards
+
+```shell
+# If you want to build on esp32 platform
+docker build -f Dockerfile.old -t wamr-zephyr .
+```
+
+### Run Docker images
+
+Adopt the device or remove if not needed.
+
+```shell
+docker run -ti --device=/dev/ttyUSB0 wamr-zephyr
+```
+
+And then inside the docker container:
+
+```shell
+# copy the corresponding board conf file to current directory
+cp boards/qemu_x86_nommu.conf prj.conf
+# then build
+./build_and_run.sh x86
+```
+
+> PS: for boards esp32, need to configure some environment first
+
+```shell
+# configure zephyr with espressif
+export ZEPHYR_TOOLCHAIN_VARIANT="espressif"
+export ESPRESSIF_TOOLCHAIN_PATH="/root/.espressif/tools/xtensa-esp32-elf/esp-2019r2-8.2.0/xtensa-esp32-elf/"
+export ESP_IDF_PATH="/root/esp/esp-idf"
+# copy the corresponding board conf file to current directory
+cp boards/esp32.conf prj.conf
+# then build
+./build_and_run.sh esp32
+```
+
+## Build on local environment
+
+### Dependencies installation
+
+Following the Zephyr and Espressif official document:
+
+1. Zephyr installation:
+
+   <https://docs.zephyrproject.org/latest/develop/getting_started/index.html>
+
+2. ESP32 installation:
+
+   <https://docs.espressif.com/projects/esp-idf/en/latest/esp32/get-started/linux-macos-setup.html>
+
+And setup the Zephyr for esp32:
+
+<https://wiki.amarulasolutions.com/zephyr/esp32/esp32-setup.html>
+
+Then Installing QEMU, for example, on Linux:
+
+```shell
+sudo apt-get install qemu
+```
+
+### Run the build script
+
+Make sure you have the environment variable ready, you can use the command `env` to check:
+
+```shell
+env
+```
+
+```shell
+# export ZEPHYR_BASE if it's not present
+export ZEPHYR_BASE=~/zephyrproject/zephyr
+# and if you install zephyr in virtual environment rather than global
+source ~/zephyrproject/.venv/bin/activate
+```
+
+For boards esp32, need to configure some extra environment first, check the following env variable whether in the env list, if not, add them like:
+
+> Noted: The esp32 custom linker script doesn't work with the recent version of Zephyr, if you want to use it in the local environment, please install Zephyr 2.3.0 with the corresponding SDK, and ESP-IDF 4.0
+
+```shell
+export ZEPHYR_TOOLCHAIN_VARIANT="espressif"
+export ESPRESSIF_TOOLCHAIN_PATH="~/.espressif/tools/xtensa-esp32-elf/esp-{the version you installed}/xtensa-esp32-elf/"
+export ESP_IDF_PATH="~/esp/esp-idf"
+```
+
+Then you can run the build script:
+
+```shell
+# copy the corresponding board conf file to current directory
+cp boards/qemu_x86_nommu.conf prj.conf
+# then build
+./build_and_run.sh x86
+```

+ 0 - 25
product-mini/platforms/zephyr/simple/README_docker.md

@@ -1,25 +0,0 @@
-# Build with Docker
-
-To have a quicker start, a Docker container of the Zephyr setup can be generated.
-
-## Build Docker container
-
-``` Bash
-docker build --build-arg DOCKER_UID=$(id -u) . -t wamr-zephyr
-```
-
-## Run Docker container to build images
-
-Enter the docker container (maps the toplevel wasm-micro-runtime repo as volume):
-
-``` Bash
-docker run -ti -v $PWD/../../../..:/home/wamr/source --device=/dev/ttyUSB0 wamr-zephyr
-```
-
-Adopt the device or remove if not needed.
-
-And then in the docker container:
-
-``` Bash
-./build_and_run.sh esp32c3
-```

+ 20 - 0
samples/bh_atomic/CMakeLists.txt

@@ -0,0 +1,20 @@
+# Copyright (C) 2023 Midokura Japan KK.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+cmake_minimum_required(VERSION 3.0)
+project(bh_atomic)
+
+string (TOLOWER ${CMAKE_HOST_SYSTEM_NAME} WAMR_BUILD_PLATFORM)
+if(APPLE)
+  add_definitions(-DBH_PLATFORM_DARWIN)
+endif()
+
+set(WAMR_BUILD_INTERP 1)
+set(WAMR_BUILD_LIBC_BUILTIN 0)
+
+set(WAMR_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../..)
+include(${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake)
+
+add_executable(bh_atomic main.c)
+
+target_link_libraries(bh_atomic)

+ 42 - 0
samples/bh_atomic/main.c

@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2023 Midokura Japan KK.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include <assert.h>
+
+#include "bh_platform.h"
+#include "bh_atomic.h"
+
+int
+main(int argc, char **argv)
+{
+    bh_atomic_32_t v;
+    uint32 o;
+
+    v = 0x00ff00ff;
+    o = BH_ATOMIC_32_LOAD(v);
+    assert(o == 0x00ff00ff);
+
+    v = 0x00ff00ff;
+    o = BH_ATOMIC_32_FETCH_OR(v, 0xffff0000);
+    assert(o == 0x00ff00ff);
+    assert(v == 0xffff00ff);
+
+    v = 0x00ff00ff;
+    o = BH_ATOMIC_32_FETCH_AND(v, 0xffff0000);
+    assert(o == 0x00ff00ff);
+    assert(v == 0x00ff0000);
+
+    v = 0x00ff00ff;
+    o = BH_ATOMIC_32_FETCH_ADD(v, 0x10101);
+    assert(o == 0x00ff00ff);
+    assert(v == 0x00ff00ff + 0x10101);
+
+    v = 0x00ff00ff;
+    o = BH_ATOMIC_32_FETCH_SUB(v, 0x10101);
+    assert(o == 0x00ff00ff);
+    assert(v == 0x00ff00ff - 0x10101);
+
+    return 0;
+}

+ 183 - 132
samples/workload/XNNPACK/CMakeLists.txt

@@ -1,147 +1,198 @@
 # Copyright (C) 2019 Intel Corporation. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-cmake_minimum_required (VERSION 3.0)
+cmake_minimum_required (VERSION 3.14)
 
 project(xnnpack_wasm)
 
 ################  EMCC ################
 include(ExternalProject)
 
-ExternalProject_Add(xnnpack
+# grep xnnpack_benchmark -A 1 BUILD.bazel \
+#   | grep "name =" \
+#   | awk '{print $3}' \
+#   | sed -e 's/\"//g; s/,//g; s/^/\"/g; s/$/\"/g'
+list(APPEND NATIVE_BENCHMARKS
+  "qs8_dwconv_bench"
+  "qs8_f32_vcvt_bench"
+  "qs8_gemm_bench"
+  "qs8_requantization_bench"
+  "qs8_vadd_bench"
+  "qs8_vaddc_bench"
+  "qs8_vcvt_bench"
+  "qs16_qs8_vcvt_bench"
+  "qs8_vlrelu_bench"
+  "qs8_vmul_bench"
+  "qs8_vmulc_bench"
+  "qu8_f32_vcvt_bench"
+  "qu8_gemm_bench"
+  "qu8_requantization_bench"
+  "qu8_vadd_bench"
+  "qu8_vaddc_bench"
+  "qu8_vcvt_bench"
+  "qu8_vlrelu_bench"
+  "qu8_vmul_bench"
+  "qu8_vmulc_bench"
+  "bf16_gemm_bench"
+  "f16_f32acc_igemm_bench"
+  "f16_igemm_bench"
+  "f16_f32acc_gemm_bench"
+  "f16_gemm_bench"
+  "f16_raddstoreexpminusmax_bench"
+  "f16_spmm_bench"
+  "f16_vsigmoid_bench"
+  "f16_vtanh_bench"
+  "f16_f32_vcvt_bench"
+  "f32_igemm_bench"
+  "f32_conv_hwc_bench"
+  "f16_conv_hwc2chw_bench"
+  "f16_gavgpool_cw_bench"
+  "f32_gavgpool_cw_bench"
+  "f32_conv_hwc2chw_bench"
+  "f16_dwconv_bench"
+  "f32_dwconv_bench"
+  "f32_dwconv2d_chw_bench"
+  "f16_dwconv2d_chw_bench"
+  "f32_f16_vcvt_bench"
+  "xx_transpose_bench"
+  "x8_transpose_bench"
+  "x16_transpose_bench"
+  "x24_transpose_bench"
+  "x32_transpose_bench"
+  "x64_transpose_bench"
+  "f32_bgemm_bench"
+  "f32_gemm_bench"
+  "f32_qs8_vcvt_bench"
+  "f32_qu8_vcvt_bench"
+  "f32_raddexpminusmax_bench"
+  "f32_raddextexp_bench"
+  "f32_raddstoreexpminusmax_bench"
+  "f32_rmax_bench"
+  "f32_spmm_bench"
+  "f32_softmax_bench"
+  "f16_velu_bench"
+  "f32_velu_bench"
+  "f32_vhswish_bench"
+  "f32_vlrelu_bench"
+  "f32_vrelu_bench"
+  "f32_vscaleexpminusmax_bench"
+  "f32_vscaleextexp_bench"
+  "f32_vsigmoid_bench"
+  "f16_vsqrt_bench"
+  "f32_vsqrt_bench"
+  "f32_vtanh_bench"
+  "f32_im2col_gemm_bench"
+  "rounding_bench"
+  "s16_rmaxabs_bench"
+  "s16_window_bench"
+  "u32_filterbank_accumulate_bench"
+  "u32_filterbank_subtract_bench"
+  "u32_vlog_bench"
+  "u64_u32_vsqrtshift_bench"
+  "i16_vlshift_bench"
+  "cs16_vsquareabs_bench"
+  "cs16_bfly4_bench"
+  "cs16_fftr_bench"
+  "x8_lut_bench"
+  "x32_packw_bench"
+  "x16_packw_bench"
+  "abs_bench"
+  "average_pooling_bench"
+  "bankers_rounding_bench"
+  "ceiling_bench"
+  "channel_shuffle_bench"
+  "convert_bench"
+  "convolution_bench"
+  "deconvolution_bench"
+  "elu_bench"
+  "floor_bench"
+  "global_average_pooling_bench"
+  "hardswish_bench"
+  "leaky_relu_bench"
+  "max_pooling_bench"
+  "negate_bench"
+  "prelu_bench"
+  "sigmoid_bench"
+  "softmax_bench"
+  "square_bench"
+  "square_root_bench"
+  "tanh_bench"
+  "truncation_bench"
+  "f16_dwconv_e2e_bench"
+  "f16_gemm_e2e_bench"
+  "f32_dwconv_e2e_bench"
+  "f32_gemm_e2e_bench"
+  "qs8_dwconv_e2e_bench"
+  "qs8_gemm_e2e_bench"
+  "qu8_gemm_e2e_bench"
+  "qu8_dwconv_e2e_bench"
+  "end2end_bench"
+  "f16_exp_ulp_eval"
+  "f16_expminus_ulp_eval"
+  "f16_expm1minus_ulp_eval"
+  "f16_sigmoid_ulp_eval"
+  "f16_sqrt_ulp_eval"
+  "f16_tanh_ulp_eval"
+  "f32_exp_ulp_eval"
+  "f32_expminus_ulp_eval"
+  "f32_expm1minus_ulp_eval"
+  "f32_extexp_ulp_eval"
+  "f32_sigmoid_ulp_eval"
+  "f32_sqrt_ulp_eval"
+  "f32_tanh_ulp_eval"
+)
+
+# Only Download
+ExternalProject_Add(xnnpack-download
     PREFIX xnnpack
     GIT_REPOSITORY https://github.com/google/XNNPACK.git
-    GIT_TAG        4570a7151aa4f3e57eca14a575eeff6bb13e26be
+    GIT_TAG        b9d4073a6913891ce9cbd8965c8d506075d2a45a
     GIT_PROGRESS   ON
     SOURCE_DIR     ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack
-    UPDATE_COMMAND git restore .
-                   && cmake -E copy ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/google3/third_party/XNNPACK/microkernels.bzl
-                      ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/
-                   && git apply ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack.patch
+    UPDATE_COMMAND ""
+    PATCH_COMMAND git apply ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack.patch
     CONFIGURE_COMMAND ""
-    # grep xnnpack_benchmark -A 1 BUILD.bazel \
-    #   | grep "name =" \
-    #   | awk '{print $3}' \
-    #   | sed -e 's/\"//g' -e 's/,//g' -e 's/^/\/\/:/g'
-    BUILD_COMMAND  cd ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack
-                   && bazel --output_user_root=build-user-output build -c opt --config=wasm
-                       //:qs8_dwconv_bench.wasm
-                       //:qs8_f32_vcvt_bench.wasm
-                       //:qs8_gemm_bench.wasm
-                       //:qs8_requantization_bench.wasm
-                       //:qs8_vadd_bench.wasm
-                       //:qs8_vaddc_bench.wasm
-                       //:qs8_vcvt_bench.wasm
-                       //:qs8_vlrelu_bench.wasm
-                       //:qs8_vmul_bench.wasm
-                       //:qs8_vmulc_bench.wasm
-                       //:qu8_f32_vcvt_bench.wasm
-                       //:qu8_gemm_bench.wasm
-                       //:qu8_requantization_bench.wasm
-                       //:qu8_vadd_bench.wasm
-                       //:qu8_vaddc_bench.wasm
-                       //:qu8_vcvt_bench.wasm
-                       //:qu8_vlrelu_bench.wasm
-                       //:qu8_vmul_bench.wasm
-                       //:qu8_vmulc_bench.wasm
-                       //:bf16_gemm_bench.wasm
-                       //:f16_igemm_bench.wasm
-                       //:f16_gemm_bench.wasm
-                       //:f16_raddstoreexpminusmax_bench.wasm
-                       //:f16_spmm_bench.wasm
-                       //:f16_vsigmoid_bench.wasm
-                       //:f16_f32_vcvt_bench.wasm
-                       //:f32_igemm_bench.wasm
-                       //:f32_conv_hwc_bench.wasm
-                       //:f16_conv_hwc2chw_bench.wasm
-                       //:f16_gavgpool_cw_bench.wasm
-                       //:f32_gavgpool_cw_bench.wasm
-                       //:f32_conv_hwc2chw_bench.wasm
-                       //:f16_dwconv_bench.wasm
-                       //:f32_dwconv_bench.wasm
-                       //:f32_dwconv2d_chw_bench.wasm
-                       //:f16_dwconv2d_chw_bench.wasm
-                       //:f32_f16_vcvt_bench.wasm
-                       //:xx_transpose_bench.wasm
-                       //:x8_transpose_bench.wasm
-                       //:x16_transpose_bench.wasm
-                       //:x24_transpose_bench.wasm
-                       //:x32_transpose_bench.wasm
-                       //:x64_transpose_bench.wasm
-                       //:f32_gemm_bench.wasm
-                       //:f32_qs8_vcvt_bench.wasm
-                       //:f32_qu8_vcvt_bench.wasm
-                       //:f32_raddexpminusmax_bench.wasm
-                       //:f32_raddextexp_bench.wasm
-                       //:f32_raddstoreexpminusmax_bench.wasm
-                       //:f32_rmax_bench.wasm
-                       //:f32_spmm_bench.wasm
-                       //:f32_softmax_bench.wasm
-                       //:f16_velu_bench.wasm
-                       //:f32_velu_bench.wasm
-                       //:f32_vhswish_bench.wasm
-                       //:f32_vlrelu_bench.wasm
-                       //:f32_vrelu_bench.wasm
-                       //:f32_vscaleexpminusmax_bench.wasm
-                       //:f32_vscaleextexp_bench.wasm
-                       //:f32_vsigmoid_bench.wasm
-                       //:f16_vsqrt_bench.wasm
-                       //:f32_vsqrt_bench.wasm
-                       //:f32_im2col_gemm_bench.wasm
-                       //:rounding_bench.wasm
-                       //:s16_rmaxabs_bench.wasm
-                       //:s16_window_bench.wasm
-                       //:u32_filterbank_accumulate_bench.wasm
-                       //:u32_filterbank_subtract_bench.wasm
-                       //:u32_vlog_bench.wasm
-                       //:u64_u32_vsqrtshift_bench.wasm
-                       //:i16_vlshift_bench.wasm
-                       //:cs16_vsquareabs_bench.wasm
-                       //:cs16_bfly4_bench.wasm
-                       //:cs16_fftr_bench.wasm
-                       //:x8_lut_bench.wasm
-                       //:abs_bench.wasm
-                       //:average_pooling_bench.wasm
-                       //:bankers_rounding_bench.wasm
-                       //:ceiling_bench.wasm
-                       //:channel_shuffle_bench.wasm
-                       //:convert_bench.wasm
-                       //:convolution_bench.wasm
-                       //:deconvolution_bench.wasm
-                       //:elu_bench.wasm
-                       //:floor_bench.wasm
-                       //:global_average_pooling_bench.wasm
-                       //:hardswish_bench.wasm
-                       //:leaky_relu_bench.wasm
-                       //:max_pooling_bench.wasm
-                       //:negate_bench.wasm
-                       //:sigmoid_bench.wasm
-                       //:prelu_bench.wasm
-                       //:softmax_bench.wasm
-                       //:square_bench.wasm
-                       //:square_root_bench.wasm
-                       //:truncation_bench.wasm
-                       //:f16_gemm_e2e_bench.wasm
-                       //:f32_dwconv_e2e_bench.wasm
-                       //:f32_gemm_e2e_bench.wasm
-                       //:qs8_dwconv_e2e_bench.wasm
-                       //:qs8_gemm_e2e_bench.wasm
-                       //:qu8_gemm_e2e_bench.wasm
-                       //:qu8_dwconv_e2e_bench.wasm
-                       //:end2end_bench.wasm
-                       //:f16_exp_ulp_eval.wasm
-                       //:f16_expminus_ulp_eval.wasm
-                       //:f16_expm1minus_ulp_eval.wasm
-                       //:f16_sigmoid_ulp_eval.wasm
-                       //:f16_sqrt_ulp_eval.wasm
-                       //:f32_exp_ulp_eval.wasm
-                       //:f32_expminus_ulp_eval.wasm
-                       //:f32_expm1minus_ulp_eval.wasm
-                       //:f32_extexp_ulp_eval.wasm
-                       //:f32_sigmoid_ulp_eval.wasm
-                       //:f32_sqrt_ulp_eval.wasm
-                       //:f32_tanh_ulp_eval.wasm
-    INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory
-                      ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/bazel-out/wasm-opt/bin/
-                      ${CMAKE_BINARY_DIR}/wasm-opt
+    BUILD_COMMAND ""
+    INSTALL_COMMAND ""
+    TEST_COMMAND ""
 )
+
+set(WAMRC "${CMAKE_CURRENT_SOURCE_DIR}/../../../wamr-compiler/build/wamrc")
+if(EXISTS ${WAMRC})
+  message("-- Will generate .aot")
+else()
+  message("Will generate .wasm")
+endif()
+
+foreach(BENCHMARK IN LISTS NATIVE_BENCHMARKS)
+  string(CONCAT WASM_BENCHMARK "//:" ${BENCHMARK} "-wasm")
+  string(CONCAT WASM_OUTPUT ${BENCHMARK} ".wasm")
+
+  add_custom_command(
+    OUTPUT ${WASM_OUTPUT}
+    COMMAND bazel --output_user_root=build-user-output build -c opt --config=wasm ${WASM_BENCHMARK}
+              && ${CMAKE_COMMAND} -E copy_if_different ./bazel-bin/${WASM_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${WASM_OUTPUT}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack
+    DEPENDS xnnpack-download
+    COMMENT "Generating ${WASM_OUTPUT} ..."
+  )
+
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_CLEAN_FILES ${CMAKE_CURRENT_BINARY_DIR}/${WASM_OUTPUT})
+
+  if(EXISTS ${WAMRC})
+    string(CONCAT AOT_OUTPUT ${BENCHMARK} ".aot")
+
+    add_custom_command(
+      OUTPUT ${AOT_OUTPUT}
+      COMMAND ${WAMRC} -o ${AOT_OUTPUT} ${WASM_OUTPUT}
+      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+      DEPENDS ${WASM_OUTPUT}
+      COMMENT "Generating ${AOT_OUTPUT} ..."
+    )
+
+    add_custom_target(${BENCHMARK} ALL DEPENDS ${AOT_OUTPUT})
+  else()
+    add_custom_target(${BENCHMARK} ALL DEPENDS ${WASM_OUTPUT})
+  endif()
+endforeach()
+

+ 21 - 20
samples/workload/XNNPACK/README.md

@@ -9,17 +9,29 @@ please refer to [installation instructions](../README.md).
 
 ## Build XNNPACK
 
-```bash
-cd <wamr-dir>/samples/workload/XNNPACK
-mkdir build
-cd build
+please build wamrc:
+
+``` bash
+cd <wamr-dir>/wamr-compiler
+./build_llvm.sh
+mkdir build && cd build
 cmake ..
+make
+```
+
+And then build xnnpack standalone wasm files
+
+```bash
+$ cd <wamr-dir>/samples/workload/XNNPACK
+$ cmake -S . -B build
+$ cmake --build build
 ```
-The wasm files are generated under folder samples/workload/XNNPACK/xnnpack/bazel-bin.
+
+Generated .wasm(and .aot) files are under *samples/workload/XNNPACK/build*.
 
 ## Run benchmarks
 
-Firstly please build iwasm with simd, libc-emcc and lib-pthread support:
+Firstly please build iwasm with simd, libc-emcc and lib-pthread supporting:
 
 ``` bash
 $ cd <wamr-dir>/product-mini/platforms/linux/
@@ -28,21 +40,10 @@ $ cmake .. -DWAMR_BUILD_LIBC_EMCC=1 -DWAMR_BUILD_LIB_PTHREAD=1
 $ make
 ```
 
-And please build wamrc:
-
-``` bash
-cd <wamr-dir>/wamr-compiler
-./build_llvm.sh
-mkdir build && cd build
-cmake ..
-make
-```
-
-Then compile wasm file to aot file and run:
+Then run:
 
 ``` shell
-$ cd <wamr-dir>/samples/workload/XNNPACK/xnnpack/bazel-bin
-$ wamrc -o average_pooling_bench.aot average_pooling_bench.wasm  (or other wasm files)
-$ iwasm average_pooling_bench.aot
+$ cd <wamr-dir>/samples/workload/XNNPACK/build
+$ iwasm average_pooling_bench.aot # (or other aot files)
 ```
 

+ 95 - 98
samples/workload/XNNPACK/xnnpack.patch

@@ -1,141 +1,138 @@
 diff --git a/.bazelrc b/.bazelrc
-index 688279da1..376996885 100644
+index fcaff1063..e61d53337 100644
 --- a/.bazelrc
 +++ b/.bazelrc
-@@ -53,4 +53,9 @@ build:ios_fat --watchos_cpus=armv7k
- build:macos --apple_platform_type=macos
+@@ -1,6 +1,7 @@
+ # Basic build settings
+ build --jobs 128
+ build --cxxopt='-std=gnu++14'
++build --incompatible_enable_cc_toolchain_resolution
+ 
+ # Sets the default Apple platform to macOS.
+ build --apple_platform_type=macos
+@@ -55,3 +56,10 @@ build:macos --apple_platform_type=macos
  
  build:macos_arm64 --config=macos
--build:macos_arm64 --cpu=darwin_arm64
-\ No newline at end of file
-+build:macos_arm64 --cpu=darwin_arm64
+ build:macos_arm64 --cpu=darwin_arm64
 +
++# Emscripten configs
++build:wasm --copt="-Wno-unused"
++build:wasm --copt="-Wno-unused-function"
++build:wasm --copt="-Wno-unused-but-set-variable"
 +build:wasm --cpu=wasm
 +build:wasm --features=wasm_simd
-+build:wasm --crosstool_top=@emsdk//emscripten_toolchain:everything
-+build:wasm --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
 diff --git a/WORKSPACE b/WORKSPACE
-index cd8960ffa..787e03ca8 100644
+index 2e568088b..3961371ca 100644
 --- a/WORKSPACE
 +++ b/WORKSPACE
-@@ -29,8 +29,9 @@ http_archive(
- # Google Benchmark library, used in micro-benchmarks.
- http_archive(
-     name = "com_google_benchmark",
--    strip_prefix = "benchmark-main",
--    urls = ["https://github.com/google/benchmark/archive/main.zip"],
-+    sha256 = "1ba14374fddcd9623f126b1a60945e4deac4cdc4fb25a5f25e7f779e36f2db52",
-+    strip_prefix = "benchmark-d2a8a4ee41b923876c034afb939c4fc03598e622",
-+    urls = ["https://github.com/google/benchmark/archive/d2a8a4ee41b923876c034afb939c4fc03598e622.zip"],
+@@ -83,7 +83,23 @@ http_archive(
  )
  
- # FP16 library, used for half-precision conversions
-@@ -92,8 +93,25 @@ http_archive(
-     ],
- )
+ # Android NDK location and version is auto-detected from $ANDROID_NDK_HOME environment variable
+-android_ndk_repository(name = "androidndk")
++# android_ndk_repository(name = "androidndk")
  
-+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+ # Android SDK location and API is auto-detected from $ANDROID_HOME environment variable
+-android_sdk_repository(name = "androidsdk")
++# android_sdk_repository(name = "androidsdk")
++
 +http_archive(
 +    name = "emsdk",
-+    # Use emsdk-3.0.0 since the larger version may:
-+    #   - compress the wasm file into a tar file but not directly generate wasm file
-+    #   - generate incomplete implementation of libc API, e.g. throw exception in getentropy
-+    strip_prefix = "emsdk-3.0.0/bazel",
-+    url = "https://github.com/emscripten-core/emsdk/archive/refs/tags/3.0.0.tar.gz",
-+    sha256 = "a41dccfd15be9e85f923efaa0ac21943cbab77ec8d39e52f25eca1ec61a9ac9e"
++    sha256 = "5fa6f5eb45a4d50264610c4c9e1c155535359b63bfaad69b4e5101d16c1e7e32",
++    strip_prefix = "emsdk-a896e3d066448b3530dbcaa48869fafefd738f57/bazel",
++    url = "https://github.com/emscripten-core/emsdk/archive/a896e3d066448b3530dbcaa48869fafefd738f57.tar.gz",
 +)
 +
 +load("@emsdk//:deps.bzl", emsdk_deps = "deps")
 +emsdk_deps()
 +
 +load("@emsdk//:emscripten_deps.bzl", emsdk_emscripten_deps = "emscripten_deps")
-+emsdk_emscripten_deps()
++emsdk_emscripten_deps(emscripten_version = "3.1.44")
 +
- # Android NDK location and version is auto-detected from $ANDROID_NDK_HOME environment variable
--android_ndk_repository(name = "androidndk")
-+#android_ndk_repository(name = "androidndk")
++load("@emsdk//:toolchains.bzl", "register_emscripten_toolchains")
++register_emscripten_toolchains()
+diff --git a/bench/utils.cc b/bench/utils.cc
+index 3b32503a7..656845336 100644
+--- a/bench/utils.cc
++++ b/bench/utils.cc
+@@ -456,3 +456,13 @@ CodeMemoryHelper::~CodeMemoryHelper() {
  
- # Android SDK location and API is auto-detected from $ANDROID_HOME environment variable
--android_sdk_repository(name = "androidsdk")
-+#android_sdk_repository(name = "androidsdk")
+ }  // namespace utils
+ }  // namespace benchmark
++
++
++extern "C"
++__attribute__((import_module("env"), import_name("getentropy"))) int import_getentropy(void* buffer, size_t length);
++
++extern "C"
++int getentropy(void* buffer, size_t length)
++{
++  return import_getentropy(buffer, length);
++}
 diff --git a/build_defs.bzl b/build_defs.bzl
-index b8217a18d..6f2d1675e 100644
+index 01b436eb7..2738fd50a 100644
 --- a/build_defs.bzl
 +++ b/build_defs.bzl
-@@ -380,7 +380,7 @@ def xnnpack_benchmark(name, srcs, copts = [], deps = [], tags = []):
-             explicitly specified.
-     """
-     native.cc_binary(
--        name = name,
-+        name = name + ".wasm",
-         srcs = srcs,
-         copts = xnnpack_std_cxxopts() + [
-             "-Iinclude",
-@@ -405,5 +405,5 @@ def xnnpack_benchmark(name, srcs, copts = [], deps = [], tags = []):
+@@ -1,6 +1,7 @@
+ """Build definitions and rules for XNNPACK."""
+ 
+-load(":emscripten.bzl", "xnnpack_emscripten_benchmark_linkopts", "xnnpack_emscripten_deps", "xnnpack_emscripten_minimal_linkopts", "xnnpack_emscripten_test_linkopts")
++load(":emscripten.bzl", "xnnpack_emscripten_benchmark_linkopts", "xnnpack_emscripten_deps", "xnnpack_emscripten_minimal_linkopts", "xnnpack_emscripten_test_linkopts", "xnnpack_emscripten_standalone_benchmark_linkopts")
++load("@emsdk//emscripten_toolchain:wasm_rules.bzl", "wasm_cc_binary")
+ 
+ def xnnpack_visibility():
+     """Visibility of :XNNPACK target.
+@@ -393,7 +394,8 @@ def xnnpack_benchmark(name, srcs, copts = [], deps = [], tags = []):
+             "//conditions:default": ["-Wno-unused-function"],
+         }) + copts,
+         linkopts = select({
+-            ":emscripten": xnnpack_emscripten_benchmark_linkopts(),
++            ":emscripten": xnnpack_emscripten_standalone_benchmark_linkopts(),
++            ":emscripten_wasmsimd": xnnpack_emscripten_standalone_benchmark_linkopts(),
+             ":windows_x86_64_mingw": ["-lshlwapi"],
+             ":windows_x86_64_msys": ["-lshlwapi"],
+             "//conditions:default": [],
+@@ -405,5 +407,16 @@ def xnnpack_benchmark(name, srcs, copts = [], deps = [], tags = []):
              ":emscripten": xnnpack_emscripten_deps(),
              "//conditions:default": [],
          }),
 -	tags = tags,
-+	    tags = tags,
++        tags = tags,
++    )
++
++    wasm_cc_binary(
++        name = name + "-wasm",
++        cc_target = ":" + name,
++        threads = "off",
++        simd = True,
++        standalone= True,
++        outputs = [
++            name + ".wasm",
++        ]
      )
 diff --git a/emscripten.bzl b/emscripten.bzl
-index f1557a7b1..7f964a094 100644
+index f1557a7b1..a3c4f93b9 100644
 --- a/emscripten.bzl
 +++ b/emscripten.bzl
-@@ -25,12 +25,19 @@ def xnnpack_emscripten_benchmark_linkopts():
-     """Emscripten-specific linkopts for benchmarks."""
-     return [
-         "-s ASSERTIONS=1",
--        "-s ENVIRONMENT=node,shell,web",
--        "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
--        "-s EXIT_RUNTIME=1",
+@@ -33,6 +33,21 @@ def xnnpack_emscripten_benchmark_linkopts():
+         "--pre-js $(location :preamble.js.lds)",
+     ]
+ 
++def xnnpack_emscripten_standalone_benchmark_linkopts():
++    return [
++        "-s ASSERTIONS=1",
 +        "-s ERROR_ON_UNDEFINED_SYMBOLS=0",
-         "-s ALLOW_MEMORY_GROWTH=1",
-         "-s TOTAL_MEMORY=536870912",  # 512M
--        "--pre-js $(location :preamble.js.lds)",
++        "-s ALLOW_MEMORY_GROWTH=1",
++        "-s TOTAL_MEMORY=536870912",  # 512M
 +        "-s USE_PTHREADS=0",
 +        "-s STANDALONE_WASM=1",
-+        "-Wno-unused",
-+        "-Wno-unused-variable",
-+        "-Wno-unused-command-line-argument",
 +        "-Wl,--export=__heap_base",
 +        "-Wl,--export=__data_end",
 +        "-Wl,--export=malloc",
 +        "-Wl,--export=free",
-+        "--oformat=wasm",
-     ]
- 
++    ]
++
++
  def xnnpack_emscripten_deps():
-diff --git a/src/log.c b/src/log.c
-index 5715f2f85..4b3e4261b 100644
---- a/src/log.c
-+++ b/src/log.c
-@@ -55,7 +55,7 @@
- #endif
- 
- #if XNN_LOG_TO_STDIO
--static void xnn_vlog(int output_handle, const char* prefix, size_t prefix_length, const char* format, va_list args) {
-+void xnn_vlog(int output_handle, const char* prefix, size_t prefix_length, const char* format, va_list args) {
-   char stack_buffer[XNN_LOG_STACK_BUFFER_SIZE];
-   char* heap_buffer = NULL;
-   char* out_buffer = &stack_buffer[0];
-diff --git a/third_party/cpuinfo.BUILD b/third_party/cpuinfo.BUILD
-index 1997f4e3a..5e03c43af 100644
---- a/third_party/cpuinfo.BUILD
-+++ b/third_party/cpuinfo.BUILD
-@@ -150,7 +150,7 @@ cc_library(
-         "src/arm/midr.h",
-     ],
-     deps = [
--        "@clog",
-+        "//deps/clog"
-     ],
- )
- 
-@@ -352,5 +352,5 @@ config_setting(
- 
- config_setting(
-     name = "emscripten",
--    values = {"crosstool_top": "//toolchain:emscripten"},
-+    values = {"crosstool_top": "@emsdk//emscripten_toolchain:everything"},
- )
+     """Emscripten-specific dependencies for unit tests and benchmarks."""
+     return [

+ 334 - 160
test-tools/host-tool/external/cJSON/cJSON.c

@@ -1,24 +1,24 @@
 /*
- Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
+  Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
 
 /* cJSON */
 /* JSON parser in C. */
@@ -43,6 +43,7 @@
 #include <stdlib.h>
 #include <limits.h>
 #include <ctype.h>
+#include <float.h>
 
 #ifdef ENABLE_LOCALES
 #include <locale.h>
@@ -58,9 +59,33 @@
 #include "cJSON.h"
 
 /* define our own boolean type */
+#ifdef true
+#undef true
+#endif
 #define true ((cJSON_bool)1)
+
+#ifdef false
+#undef false
+#endif
 #define false ((cJSON_bool)0)
 
+/* define isnan and isinf for ANSI C, if in C99 or above, isnan and isinf has
+ * been defined in math.h */
+#ifndef isinf
+#define isinf(d) (isnan((d - d)) && !isnan(d))
+#endif
+#ifndef isnan
+#define isnan(d) (d != d)
+#endif
+
+#ifndef NAN
+#ifdef _WIN32
+#define NAN sqrt(-1.0)
+#else
+#define NAN 0.0 / 0.0
+#endif
+#endif
+
 typedef struct {
     const unsigned char *json;
     size_t position;
@@ -72,7 +97,7 @@ CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void)
     return (const char *)(global_error.json + global_error.position);
 }
 
-CJSON_PUBLIC(char *) cJSON_GetStringValue(cJSON *item)
+CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON *const item)
 {
     if (!cJSON_IsString(item)) {
         return NULL;
@@ -81,18 +106,27 @@ CJSON_PUBLIC(char *) cJSON_GetStringValue(cJSON *item)
     return item->valuestring;
 }
 
+CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON *const item)
+{
+    if (!cJSON_IsNumber(item)) {
+        return (double)NAN;
+    }
+
+    return item->valuedouble;
+}
+
 /* This is a safeguard to prevent copy-pasters from using incompatible C and
  * header files */
 #if (CJSON_VERSION_MAJOR != 1) || (CJSON_VERSION_MINOR != 7) \
-    || (CJSON_VERSION_PATCH != 10)
+    || (CJSON_VERSION_PATCH != 16)
 #error cJSON.h and cJSON.c have different versions. Make sure that both have the same.
 #endif
 
 CJSON_PUBLIC(const char *) cJSON_Version(void)
 {
     static char version[15];
-    snprintf(version, sizeof(version), "%i.%i.%i", CJSON_VERSION_MAJOR,
-             CJSON_VERSION_MINOR, CJSON_VERSION_PATCH);
+    sprintf(version, "%i.%i.%i", CJSON_VERSION_MAJOR, CJSON_VERSION_MINOR,
+            CJSON_VERSION_PATCH);
 
     return version;
 }
@@ -127,8 +161,8 @@ typedef struct internal_hooks {
 } internal_hooks;
 
 #if defined(_MSC_VER)
-/* work around MSVC error C2322: '...' address of dillimport '...'
-   is not static */
+/* work around MSVC error C2322: '...' address of dllimport '...' is not static
+ */
 static void *CJSON_CDECL
 internal_malloc(size_t size)
 {
@@ -150,13 +184,11 @@ internal_realloc(void *pointer, size_t size)
 #define internal_realloc realloc
 #endif
 
-/* clang-format off */
-static internal_hooks global_hooks = {
-    internal_malloc,
-    internal_free,
-    internal_realloc
-};
-/* clang-format on */
+/* strlen of character literals resolved at compile time */
+#define static_strlen(string_literal) (sizeof(string_literal) - sizeof(""))
+
+static internal_hooks global_hooks = { internal_malloc, internal_free,
+                                       internal_realloc };
 
 static unsigned char *
 cJSON_strdup(const unsigned char *string, const internal_hooks *const hooks)
@@ -271,8 +303,8 @@ typedef struct {
 /* get a pointer to the buffer at the position */
 #define buffer_at_offset(buffer) ((buffer)->content + (buffer)->offset)
 
-/* Parse the input text to generate a number, and populate the result
-   into item. */
+/* Parse the input text to generate a number, and populate the result into item.
+ */
 static cJSON_bool
 parse_number(cJSON *const item, parse_buffer *const input_buffer)
 {
@@ -287,9 +319,8 @@ parse_number(cJSON *const item, parse_buffer *const input_buffer)
     }
 
     /* copy the number into a temporary buffer and replace '.' with the decimal
-     * point of the current locale (for strtod)
-     * This also takes care of '\0' not necessarily being available for marking
-     * the end of the input */
+     * point of the current locale (for strtod) This also takes care of '\0' not
+     * necessarily being available for marking the end of the input */
     for (i = 0; (i < (sizeof(number_c_string) - 1))
                 && can_access_at_index(input_buffer, i);
          i++) {
@@ -363,6 +394,32 @@ CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number)
     return object->valuedouble = number;
 }
 
+CJSON_PUBLIC(char *)
+cJSON_SetValuestring(cJSON *object, const char *valuestring)
+{
+    char *copy = NULL;
+    /* if object's type is not cJSON_String or is cJSON_IsReference, it should
+     * not set valuestring */
+    if (!(object->type & cJSON_String) || (object->type & cJSON_IsReference)) {
+        return NULL;
+    }
+    if (strlen(valuestring) <= strlen(object->valuestring)) {
+        strcpy(object->valuestring, valuestring);
+        return object->valuestring;
+    }
+    copy =
+        (char *)cJSON_strdup((const unsigned char *)valuestring, &global_hooks);
+    if (copy == NULL) {
+        return NULL;
+    }
+    if (object->valuestring != NULL) {
+        cJSON_free(object->valuestring);
+    }
+    object->valuestring = copy;
+
+    return copy;
+}
+
 typedef struct {
     unsigned char *buffer;
     size_t length;
@@ -438,9 +495,8 @@ ensure(printbuffer *const p, size_t needed)
 
             return NULL;
         }
-        if (newbuffer) {
-            memcpy(newbuffer, p->buffer, p->offset + 1);
-        }
+
+        memcpy(newbuffer, p->buffer, p->offset + 1);
         p->hooks.deallocate(p->buffer);
     }
     p->length = newsize;
@@ -463,6 +519,14 @@ update_offset(printbuffer *const buffer)
     buffer->offset += strlen((const char *)buffer_pointer);
 }
 
+/* securely comparison of floating-point variables */
+static cJSON_bool
+compare_double(double a, double b)
+{
+    double maxVal = fabs(a) > fabs(b) ? fabs(a) : fabs(b);
+    return (fabs(a - b) <= maxVal * DBL_EPSILON);
+}
+
 /* Render the number nicely from the given item into a string. */
 static cJSON_bool
 print_number(const cJSON *const item, printbuffer *const output_buffer)
@@ -471,35 +535,37 @@ print_number(const cJSON *const item, printbuffer *const output_buffer)
     double d = item->valuedouble;
     int length = 0;
     size_t i = 0;
-    unsigned char
-        number_buffer[26]; /* temporary buffer to print the number into */
+    unsigned char number_buffer[26] = {
+        0
+    }; /* temporary buffer to print the number into */
     unsigned char decimal_point = get_decimal_point();
-    double test;
+    double test = 0.0;
 
     if (output_buffer == NULL) {
         return false;
     }
 
     /* This checks for NaN and Infinity */
-    if ((d * 0) != 0) {
-        length = snprintf((char *)number_buffer, sizeof(number_buffer), "null");
+    if (isnan(d) || isinf(d)) {
+        length = sprintf((char *)number_buffer, "null");
+    }
+    else if (d == (double)item->valueint) {
+        length = sprintf((char *)number_buffer, "%d", item->valueint);
     }
     else {
         /* Try 15 decimal places of precision to avoid nonsignificant nonzero
          * digits */
-        length =
-            snprintf((char *)number_buffer, sizeof(number_buffer), "%1.15g", d);
+        length = sprintf((char *)number_buffer, "%1.15g", d);
 
         /* Check whether the original double can be recovered */
         if ((sscanf((char *)number_buffer, "%lg", &test) != 1)
-            || ((double)test != d)) {
+            || !compare_double((double)test, d)) {
             /* If not, print with 17 decimal places of precision */
-            length = snprintf((char *)number_buffer, sizeof(number_buffer),
-                              "%1.17g", d);
+            length = sprintf((char *)number_buffer, "%1.17g", d);
         }
     }
 
-    /* snprintf failed or buffer overrun occured */
+    /* sprintf failed or buffer overrun occurred */
     if ((length < 0) || (length > (int)(sizeof(number_buffer) - 1))) {
         return false;
     }
@@ -709,8 +775,7 @@ parse_string(cJSON *const item, parse_buffer *const input_buffer)
         if (((size_t)(input_end - input_buffer->content)
              >= input_buffer->length)
             || (*input_end != '\"')) {
-            goto fail;
-            /* string ended unexpectedly */
+            goto fail; /* string ended unexpectedly */
         }
 
         /* This is at most how much we need for the output */
@@ -719,8 +784,7 @@ parse_string(cJSON *const item, parse_buffer *const input_buffer)
         output = (unsigned char *)input_buffer->hooks.allocate(allocation_length
                                                                + sizeof(""));
         if (output == NULL) {
-            goto fail;
-            /* allocation failure */
+            goto fail; /* allocation failure */
         }
     }
 
@@ -759,7 +823,7 @@ parse_string(cJSON *const item, parse_buffer *const input_buffer)
                     *output_pointer++ = input_pointer[1];
                     break;
 
-                    /* UTF-16 literal */
+                /* UTF-16 literal */
                 case 'u':
                     sequence_length = utf16_literal_to_utf8(
                         input_pointer, input_end, &output_pointer);
@@ -805,7 +869,7 @@ print_string_ptr(const unsigned char *const input,
                  printbuffer *const output_buffer)
 {
     const unsigned char *input_pointer = NULL;
-    unsigned char *output = NULL, *output_end;
+    unsigned char *output = NULL;
     unsigned char *output_pointer = NULL;
     size_t output_length = 0;
     /* numbers of additional characters needed for escaping */
@@ -853,7 +917,6 @@ print_string_ptr(const unsigned char *const input,
     if (output == NULL) {
         return false;
     }
-    output_end = output + output_length + sizeof("\"\"");
 
     /* no characters have to be escaped */
     if (escape_characters == 0) {
@@ -902,9 +965,7 @@ print_string_ptr(const unsigned char *const input,
                     break;
                 default:
                     /* escape and print as unicode codepoint */
-                    snprintf((char *)output_pointer,
-                             output_end - output_pointer, "u%04x",
-                             *input_pointer);
+                    sprintf((char *)output_pointer, "u%04x", *input_pointer);
                     output_pointer += 4;
                     break;
             }
@@ -945,6 +1006,10 @@ buffer_skip_whitespace(parse_buffer *const buffer)
         return NULL;
     }
 
+    if (cannot_access_at_index(buffer, 0)) {
+        return buffer;
+    }
+
     while (can_access_at_index(buffer, 0)
            && (buffer_at_offset(buffer)[0] <= 32)) {
         buffer->offset++;
@@ -975,10 +1040,28 @@ skip_utf8_bom(parse_buffer *const buffer)
     return buffer;
 }
 
-/* Parse an object - create a new root, and populate. */
 CJSON_PUBLIC(cJSON *)
 cJSON_ParseWithOpts(const char *value, const char **return_parse_end,
                     cJSON_bool require_null_terminated)
+{
+    size_t buffer_length;
+
+    if (NULL == value) {
+        return NULL;
+    }
+
+    /* Adding null character size due to require_null_terminated. */
+    buffer_length = strlen(value) + sizeof("");
+
+    return cJSON_ParseWithLengthOpts(value, buffer_length, return_parse_end,
+                                     require_null_terminated);
+}
+
+/* Parse an object - create a new root, and populate. */
+CJSON_PUBLIC(cJSON *)
+cJSON_ParseWithLengthOpts(const char *value, size_t buffer_length,
+                          const char **return_parse_end,
+                          cJSON_bool require_null_terminated)
 {
     parse_buffer buffer = { 0, 0, 0, 0, { 0, 0, 0 } };
     cJSON *item = NULL;
@@ -987,12 +1070,12 @@ cJSON_ParseWithOpts(const char *value, const char **return_parse_end,
     global_error.json = NULL;
     global_error.position = 0;
 
-    if (value == NULL) {
+    if (value == NULL || 0 == buffer_length) {
         goto fail;
     }
 
     buffer.content = (const unsigned char *)value;
-    buffer.length = strlen((const char *)value) + sizeof("");
+    buffer.length = buffer_length;
     buffer.offset = 0;
     buffer.hooks = global_hooks;
 
@@ -1056,7 +1139,13 @@ CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value)
     return cJSON_ParseWithOpts(value, 0, 0);
 }
 
-#define cjson_min(a, b) ((a < b) ? a : b)
+CJSON_PUBLIC(cJSON *)
+cJSON_ParseWithLength(const char *value, size_t buffer_length)
+{
+    return cJSON_ParseWithLengthOpts(value, buffer_length, 0, 0);
+}
+
+#define cjson_min(a, b) (((a) < (b)) ? (a) : (b))
 
 static unsigned char *
 print(const cJSON *const item, cJSON_bool format,
@@ -1113,6 +1202,10 @@ fail:
         hooks->deallocate(buffer->buffer);
     }
 
+    if (printed != NULL) {
+        hooks->deallocate(printed);
+    }
+
     return NULL;
 }
 
@@ -1156,20 +1249,20 @@ cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt)
 }
 
 CJSON_PUBLIC(cJSON_bool)
-cJSON_PrintPreallocated(cJSON *item, char *buf, const int len,
-                        const cJSON_bool fmt)
+cJSON_PrintPreallocated(cJSON *item, char *buffer, const int length,
+                        const cJSON_bool format)
 {
     printbuffer p = { 0, 0, 0, 0, 0, 0, { 0, 0, 0 } };
 
-    if ((len < 0) || (buf == NULL)) {
+    if ((length < 0) || (buffer == NULL)) {
         return false;
     }
 
-    p.buffer = (unsigned char *)buf;
-    p.length = (size_t)len;
+    p.buffer = (unsigned char *)buffer;
+    p.length = (size_t)length;
     p.offset = 0;
     p.noalloc = true;
-    p.format = fmt;
+    p.format = format;
     p.hooks = global_hooks;
 
     return print_value(item, &p);
@@ -1341,8 +1434,7 @@ parse_array(cJSON *const item, parse_buffer *const input_buffer)
         /* allocate next item */
         cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks));
         if (new_item == NULL) {
-            goto fail;
-            /* allocation failure */
+            goto fail; /* allocation failure */
         }
 
         /* attach next item to list */
@@ -1361,8 +1453,7 @@ parse_array(cJSON *const item, parse_buffer *const input_buffer)
         input_buffer->offset++;
         buffer_skip_whitespace(input_buffer);
         if (!parse_value(current_item, input_buffer)) {
-            goto fail;
-            /* failed to parse value */
+            goto fail; /* failed to parse value */
         }
         buffer_skip_whitespace(input_buffer);
     } while (can_access_at_index(input_buffer, 0)
@@ -1370,13 +1461,16 @@ parse_array(cJSON *const item, parse_buffer *const input_buffer)
 
     if (cannot_access_at_index(input_buffer, 0)
         || buffer_at_offset(input_buffer)[0] != ']') {
-        goto fail;
-        /* expected end of array */
+        goto fail; /* expected end of array */
     }
 
 success:
     input_buffer->depth--;
 
+    if (head != NULL) {
+        head->prev = current_item;
+    }
+
     item->type = cJSON_Array;
     item->child = head;
 
@@ -1461,16 +1555,14 @@ parse_object(cJSON *const item, parse_buffer *const input_buffer)
 
     if (cannot_access_at_index(input_buffer, 0)
         || (buffer_at_offset(input_buffer)[0] != '{')) {
-        goto fail;
-        /* not an object */
+        goto fail; /* not an object */
     }
 
     input_buffer->offset++;
     buffer_skip_whitespace(input_buffer);
     if (can_access_at_index(input_buffer, 0)
         && (buffer_at_offset(input_buffer)[0] == '}')) {
-        goto success;
-        /* empty object */
+        goto success; /* empty object */
     }
 
     /* check if we skipped to the end of the buffer */
@@ -1486,8 +1578,7 @@ parse_object(cJSON *const item, parse_buffer *const input_buffer)
         /* allocate next item */
         cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks));
         if (new_item == NULL) {
-            goto fail;
-            /* allocation failure */
+            goto fail; /* allocation failure */
         }
 
         /* attach next item to list */
@@ -1506,8 +1597,7 @@ parse_object(cJSON *const item, parse_buffer *const input_buffer)
         input_buffer->offset++;
         buffer_skip_whitespace(input_buffer);
         if (!parse_string(current_item, input_buffer)) {
-            goto fail;
-            /* faile to parse name */
+            goto fail; /* failed to parse name */
         }
         buffer_skip_whitespace(input_buffer);
 
@@ -1517,16 +1607,14 @@ parse_object(cJSON *const item, parse_buffer *const input_buffer)
 
         if (cannot_access_at_index(input_buffer, 0)
             || (buffer_at_offset(input_buffer)[0] != ':')) {
-            goto fail;
-            /* invalid object */
+            goto fail; /* invalid object */
         }
 
         /* parse the value */
         input_buffer->offset++;
         buffer_skip_whitespace(input_buffer);
         if (!parse_value(current_item, input_buffer)) {
-            goto fail;
-            /* failed to parse value */
+            goto fail; /* failed to parse value */
         }
         buffer_skip_whitespace(input_buffer);
     } while (can_access_at_index(input_buffer, 0)
@@ -1534,13 +1622,16 @@ parse_object(cJSON *const item, parse_buffer *const input_buffer)
 
     if (cannot_access_at_index(input_buffer, 0)
         || (buffer_at_offset(input_buffer)[0] != '}')) {
-        goto fail;
-        /* expected end of object */
+        goto fail; /* expected end of object */
     }
 
 success:
     input_buffer->depth--;
 
+    if (head != NULL) {
+        head->prev = current_item;
+    }
+
     item->type = cJSON_Object;
     item->child = head;
 
@@ -1792,22 +1883,26 @@ add_item_to_array(cJSON *array, cJSON *item)
 {
     cJSON *child = NULL;
 
-    if ((item == NULL) || (array == NULL)) {
+    if ((item == NULL) || (array == NULL) || (array == item)) {
         return false;
     }
 
     child = array->child;
-
+    /*
+     * To find the last item in array quickly, we use prev in array
+     */
     if (child == NULL) {
         /* list is empty, start new one */
         array->child = item;
+        item->prev = item;
+        item->next = NULL;
     }
     else {
         /* append to the end */
-        while (child->next) {
-            child = child->next;
+        if (child->prev) {
+            suffix_object(child->prev, item);
+            array->child->prev = item;
         }
-        suffix_object(child, item);
     }
 
     return true;
@@ -1847,7 +1942,8 @@ add_item_to_object(cJSON *const object, const char *const string,
     char *new_key = NULL;
     int new_type = cJSON_Invalid;
 
-    if ((object == NULL) || (string == NULL) || (item == NULL)) {
+    if ((object == NULL) || (string == NULL) || (item == NULL)
+        || (object == item)) {
         return false;
     }
 
@@ -2028,7 +2124,7 @@ cJSON_DetachItemViaPointer(cJSON *parent, cJSON *const item)
         return NULL;
     }
 
-    if (item->prev != NULL) {
+    if (item != parent->child) {
         /* not the first element */
         item->prev->next = item->next;
     }
@@ -2041,6 +2137,11 @@ cJSON_DetachItemViaPointer(cJSON *parent, cJSON *const item)
         /* first element */
         parent->child = item->next;
     }
+    else if (item->next == NULL) {
+        /* last element */
+        parent->child->prev = item->prev;
+    }
+
     /* make sure the detached item doesn't point anywhere anymore */
     item->prev = NULL;
     item->next = NULL;
@@ -2121,7 +2222,8 @@ CJSON_PUBLIC(cJSON_bool)
 cJSON_ReplaceItemViaPointer(cJSON *const parent, cJSON *const item,
                             cJSON *replacement)
 {
-    if ((parent == NULL) || (replacement == NULL) || (item == NULL)) {
+    if ((parent == NULL) || (parent->child == NULL) || (replacement == NULL)
+        || (item == NULL)) {
         return false;
     }
 
@@ -2135,12 +2237,24 @@ cJSON_ReplaceItemViaPointer(cJSON *const parent, cJSON *const item,
     if (replacement->next != NULL) {
         replacement->next->prev = replacement;
     }
-    if (replacement->prev != NULL) {
-        replacement->prev->next = replacement;
-    }
     if (parent->child == item) {
+        if (parent->child->prev == parent->child) {
+            replacement->prev = replacement;
+        }
         parent->child = replacement;
     }
+    else { /*
+            * To find the last item in array quickly, we use prev in array.
+            * We can't modify the last item's next pointer where this item was
+            * the parent's child
+            */
+        if (replacement->prev != NULL) {
+            replacement->prev->next = replacement;
+        }
+        if (replacement->next == NULL) {
+            parent->child->prev = replacement;
+        }
+    }
 
     item->next = NULL;
     item->prev = NULL;
@@ -2149,15 +2263,15 @@ cJSON_ReplaceItemViaPointer(cJSON *const parent, cJSON *const item,
     return true;
 }
 
-CJSON_PUBLIC(void)
+CJSON_PUBLIC(cJSON_bool)
 cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem)
 {
     if (which < 0) {
-        return;
+        return false;
     }
 
-    cJSON_ReplaceItemViaPointer(array, get_array_item(array, (size_t)which),
-                                newitem);
+    return cJSON_ReplaceItemViaPointer(
+        array, get_array_item(array, (size_t)which), newitem);
 }
 
 static cJSON_bool
@@ -2175,25 +2289,27 @@ replace_item_in_object(cJSON *object, const char *string, cJSON *replacement,
     }
     replacement->string =
         (char *)cJSON_strdup((const unsigned char *)string, &global_hooks);
+    if (replacement->string == NULL) {
+        return false;
+    }
+
     replacement->type &= ~cJSON_StringIsConst;
 
-    cJSON_ReplaceItemViaPointer(
+    return cJSON_ReplaceItemViaPointer(
         object, get_object_item(object, string, case_sensitive), replacement);
-
-    return true;
 }
 
-CJSON_PUBLIC(void)
+CJSON_PUBLIC(cJSON_bool)
 cJSON_ReplaceItemInObject(cJSON *object, const char *string, cJSON *newitem)
 {
-    replace_item_in_object(object, string, newitem, false);
+    return replace_item_in_object(object, string, newitem, false);
 }
 
-CJSON_PUBLIC(void)
+CJSON_PUBLIC(cJSON_bool)
 cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object, const char *string,
                                        cJSON *newitem)
 {
-    replace_item_in_object(object, string, newitem, true);
+    return replace_item_in_object(object, string, newitem, true);
 }
 
 /* Create basic types: */
@@ -2227,11 +2343,11 @@ CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void)
     return item;
 }
 
-CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool b)
+CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool boolean)
 {
     cJSON *item = cJSON_New_Item(&global_hooks);
     if (item) {
-        item->type = b ? cJSON_True : cJSON_False;
+        item->type = boolean ? cJSON_True : cJSON_False;
     }
 
     return item;
@@ -2357,6 +2473,7 @@ CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count)
     }
 
     a = cJSON_CreateArray();
+
     for (i = 0; a && (i < (size_t)count); i++) {
         n = cJSON_CreateNumber(numbers[i]);
         if (!n) {
@@ -2372,6 +2489,10 @@ CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count)
         p = n;
     }
 
+    if (a && a->child) {
+        a->child->prev = n;
+    }
+
     return a;
 }
 
@@ -2403,6 +2524,10 @@ CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count)
         p = n;
     }
 
+    if (a && a->child) {
+        a->child->prev = n;
+    }
+
     return a;
 }
 
@@ -2434,10 +2559,15 @@ CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count)
         p = n;
     }
 
+    if (a && a->child) {
+        a->child->prev = n;
+    }
+
     return a;
 }
 
-CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char **strings, int count)
+CJSON_PUBLIC(cJSON *)
+cJSON_CreateStringArray(const char *const *strings, int count)
 {
     size_t i = 0;
     cJSON *n = NULL;
@@ -2465,6 +2595,10 @@ CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char **strings, int count)
         p = n;
     }
 
+    if (a && a->child) {
+        a->child->prev = n;
+    }
+
     return a;
 }
 
@@ -2532,6 +2666,9 @@ CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse)
         }
         child = child->next;
     }
+    if (newitem && newitem->child) {
+        newitem->child->prev = newchild;
+    }
 
     return newitem;
 
@@ -2543,55 +2680,93 @@ fail:
     return NULL;
 }
 
-CJSON_PUBLIC(void) cJSON_Minify(char *json)
+static void
+skip_oneline_comment(char **input)
 {
-    unsigned char *into = (unsigned char *)json;
+    *input += static_strlen("//");
 
-    if (json == NULL) {
-        return;
+    for (; (*input)[0] != '\0'; ++(*input)) {
+        if ((*input)[0] == '\n') {
+            *input += static_strlen("\n");
+            return;
+        }
     }
+}
 
-    while (*json) {
-        if (*json == ' ') {
-            json++;
-        }
-        else if (*json == '\t') {
-            /* Whitespace characters. */
-            json++;
-        }
-        else if (*json == '\r') {
-            json++;
+static void
+skip_multiline_comment(char **input)
+{
+    *input += static_strlen("/*");
+
+    for (; (*input)[0] != '\0'; ++(*input)) {
+        if (((*input)[0] == '*') && ((*input)[1] == '/')) {
+            *input += static_strlen("*/");
+            return;
         }
-        else if (*json == '\n') {
-            json++;
+    }
+}
+
+static void
+minify_string(char **input, char **output)
+{
+    (*output)[0] = (*input)[0];
+    *input += static_strlen("\"");
+    *output += static_strlen("\"");
+
+    for (; (*input)[0] != '\0'; (void)++(*input), ++(*output)) {
+        (*output)[0] = (*input)[0];
+
+        if ((*input)[0] == '\"') {
+            (*output)[0] = '\"';
+            *input += static_strlen("\"");
+            *output += static_strlen("\"");
+            return;
         }
-        else if ((*json == '/') && (json[1] == '/')) {
-            /* double-slash comments, to end of line. */
-            while (*json && (*json != '\n')) {
-                json++;
-            }
+        else if (((*input)[0] == '\\') && ((*input)[1] == '\"')) {
+            (*output)[1] = (*input)[1];
+            *input += static_strlen("\"");
+            *output += static_strlen("\"");
         }
-        else if ((*json == '/') && (json[1] == '*')) {
-            /* multiline comments. */
-            while (*json && !((*json == '*') && (json[1] == '/'))) {
+    }
+}
+
+CJSON_PUBLIC(void) cJSON_Minify(char *json)
+{
+    char *into = json;
+
+    if (json == NULL) {
+        return;
+    }
+
+    while (json[0] != '\0') {
+        switch (json[0]) {
+            case ' ':
+            case '\t':
+            case '\r':
+            case '\n':
                 json++;
-            }
-            json += 2;
-        }
-        else if (*json == '\"') {
-            /* string literals, which are \" sensitive. */
-            *into++ = (unsigned char)*json++;
-            while (*json && (*json != '\"')) {
-                if (*json == '\\') {
-                    *into++ = (unsigned char)*json++;
+                break;
+
+            case '/':
+                if (json[1] == '/') {
+                    skip_oneline_comment(&json);
                 }
-                *into++ = (unsigned char)*json++;
-            }
-            *into++ = (unsigned char)*json++;
-        }
-        else {
-            /* All other characters. */
-            *into++ = (unsigned char)*json++;
+                else if (json[1] == '*') {
+                    skip_multiline_comment(&json);
+                }
+                else {
+                    json++;
+                }
+                break;
+
+            case '\"':
+                minify_string(&json, (char **)&into);
+                break;
+
+            default:
+                into[0] = json[0];
+                json++;
+                into++;
         }
     }
 
@@ -2692,8 +2867,7 @@ CJSON_PUBLIC(cJSON_bool)
 cJSON_Compare(const cJSON *const a, const cJSON *const b,
               const cJSON_bool case_sensitive)
 {
-    if ((a == NULL) || (b == NULL) || ((a->type & 0xFF) != (b->type & 0xFF))
-        || cJSON_IsInvalid(a)) {
+    if ((a == NULL) || (b == NULL) || ((a->type & 0xFF) != (b->type & 0xFF))) {
         return false;
     }
 
@@ -2726,7 +2900,7 @@ cJSON_Compare(const cJSON *const a, const cJSON *const b,
             return true;
 
         case cJSON_Number:
-            if (a->valuedouble == b->valuedouble) {
+            if (compare_double(a->valuedouble, b->valuedouble)) {
                 return true;
             }
             return false;

+ 94 - 65
test-tools/host-tool/external/cJSON/cJSON.h

@@ -1,24 +1,24 @@
 /*
- Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
+  Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
 
 #ifndef cJSON__h
 #define cJSON__h
@@ -35,30 +35,34 @@ extern "C" {
 
 #ifdef __WINDOWS__
 
-/**
- * When compiling for windows, we specify a specific calling convention to avoid
- * issues where we are being called from a project with a different default
- * calling convention. For windows you have 3 define options:
- *   CJSON_HIDE_SYMBOLS - Define this in the case where you don't want to ever
- *                        dllexport symbols
- *   CJSON_EXPORT_SYMBOLS - Define this on library build when you want to
- *                          dllexport symbols (default)
- *   CJSON_IMPORT_SYMBOLS - Define this if you  want to dllimport symbol
- *
- * For *nix builds that support visibility attribute, you can define similar
- * behavior by setting default visibility to hidden by adding
- *   -fvisibility=hidden (for gcc)
- * or
- *   -xldscope=hidden (for sun cc)
- * to CFLAGS, then using the CJSON_API_VISIBILITY flag to "export" the same
- * symbols the way CJSON_EXPORT_SYMBOLS does
- */
+/* When compiling for windows, we specify a specific calling convention to avoid
+issues where we are being called from a project with a different default calling
+convention.  For windows you have 3 define options:
+
+CJSON_HIDE_SYMBOLS - Define this in the case where you don't want to ever
+dllexport symbols CJSON_EXPORT_SYMBOLS - Define this on library build when you
+want to dllexport symbols (default) CJSON_IMPORT_SYMBOLS - Define this if you
+want to dllimport symbol
+
+For *nix builds that support visibility attribute, you can define similar
+behavior by
+
+setting default visibility to hidden by adding
+-fvisibility=hidden (for gcc)
+or
+-xldscope=hidden (for sun cc)
+to CFLAGS
+
+then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way
+CJSON_EXPORT_SYMBOLS does
+
+*/
 
 #define CJSON_CDECL __cdecl
 #define CJSON_STDCALL __stdcall
 
 /* export symbols by default, this is necessary for copy pasting the C and
-   header file */
+ * header file */
 #if !defined(CJSON_HIDE_SYMBOLS) && !defined(CJSON_IMPORT_SYMBOLS) \
     && !defined(CJSON_EXPORT_SYMBOLS)
 #define CJSON_EXPORT_SYMBOLS
@@ -86,7 +90,7 @@ extern "C" {
 /* project version */
 #define CJSON_VERSION_MAJOR 1
 #define CJSON_VERSION_MINOR 7
-#define CJSON_VERSION_PATCH 10
+#define CJSON_VERSION_PATCH 16
 
 #include <stddef.h>
 
@@ -107,11 +111,11 @@ extern "C" {
 /* The cJSON structure: */
 typedef struct cJSON {
     /* next/prev allow you to walk array/object chains. Alternatively, use
-       GetArraySize/GetArrayItem/GetObjectItem */
+     * GetArraySize/GetArrayItem/GetObjectItem */
     struct cJSON *next;
     struct cJSON *prev;
     /* An array or object item will have a child pointer pointing to a chain of
-       the items in the array/object. */
+     * the items in the array/object. */
     struct cJSON *child;
 
     /* The type of the item, as above. */
@@ -125,7 +129,7 @@ typedef struct cJSON {
     double valuedouble;
 
     /* The item's name string, if this item is the child of, or is in the list
-       of subitems of an object. */
+     * of subitems of an object. */
     char *string;
 } cJSON;
 
@@ -140,7 +144,7 @@ typedef struct cJSON_Hooks {
 typedef int cJSON_bool;
 
 /* Limits how deeply nested arrays/objects can be before cJSON rejects to parse
-   them. This is to prevent stack overflows. */
+ * them. This is to prevent stack overflows. */
 #ifndef CJSON_NESTING_LIMIT
 #define CJSON_NESTING_LIMIT 1000
 #endif
@@ -159,6 +163,8 @@ CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks *hooks);
 /* Supply a block of JSON, and this returns a cJSON object you can interrogate.
  */
 CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value);
+CJSON_PUBLIC(cJSON *)
+cJSON_ParseWithLength(const char *value, size_t buffer_length);
 /* ParseWithOpts allows you to require (and check) that the JSON is null
  * terminated, and to retrieve the pointer to the final byte parsed. */
 /* If you supply a ptr in return_parse_end and parsing fails, then
@@ -167,6 +173,10 @@ CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value);
 CJSON_PUBLIC(cJSON *)
 cJSON_ParseWithOpts(const char *value, const char **return_parse_end,
                     cJSON_bool require_null_terminated);
+CJSON_PUBLIC(cJSON *)
+cJSON_ParseWithLengthOpts(const char *value, size_t buffer_length,
+                          const char **return_parse_end,
+                          cJSON_bool require_null_terminated);
 
 /* Render a cJSON entity to text for transfer/storage. */
 CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item);
@@ -185,7 +195,7 @@ CJSON_PUBLIC(cJSON_bool)
 cJSON_PrintPreallocated(cJSON *item, char *buffer, const int length,
                         const cJSON_bool format);
 /* Delete a cJSON entity and all subentities. */
-CJSON_PUBLIC(void) cJSON_Delete(cJSON *c);
+CJSON_PUBLIC(void) cJSON_Delete(cJSON *item);
 
 /* Returns the number of items in an array (or object). */
 CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array);
@@ -205,8 +215,9 @@ cJSON_HasObjectItem(const cJSON *object, const char *string);
  * when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */
 CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void);
 
-/* Check if the item is a string and return its valuestring */
-CJSON_PUBLIC(char *) cJSON_GetStringValue(cJSON *item);
+/* Check item type and return its value */
+CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON *const item);
+CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON *const item);
 
 /* These functions check the type of an item */
 CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON *const item);
@@ -233,18 +244,21 @@ CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void);
 CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void);
 
 /* Create a string where valuestring references a string so
-   it will not be freed by cJSON_Delete */
+ * it will not be freed by cJSON_Delete */
 CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string);
-/* Create an object/arrray that only references it's elements so
-   they will not be freed by cJSON_Delete */
+/* Create an object/array that only references it's elements so
+ * they will not be freed by cJSON_Delete */
 CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child);
 CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child);
 
-/* These utilities create an Array of count items. */
+/* These utilities create an Array of count items.
+ * The parameter count cannot be greater than the number of elements in the
+ * number array, otherwise array access will be out of bounds.*/
 CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count);
 CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count);
 CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count);
-CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char **strings, int count);
+CJSON_PUBLIC(cJSON *)
+cJSON_CreateStringArray(const char *const *strings, int count);
 
 /* Append item to the specified array/object. */
 CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToArray(cJSON *array, cJSON *item);
@@ -264,7 +278,7 @@ cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item);
 CJSON_PUBLIC(cJSON_bool)
 cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item);
 
-/* Remove/Detatch items from Arrays/Objects. */
+/* Remove/Detach items from Arrays/Objects. */
 CJSON_PUBLIC(cJSON *)
 cJSON_DetachItemViaPointer(cJSON *parent, cJSON *const item);
 CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which);
@@ -286,32 +300,35 @@ cJSON_InsertItemInArray(
 CJSON_PUBLIC(cJSON_bool)
 cJSON_ReplaceItemViaPointer(cJSON *const parent, cJSON *const item,
                             cJSON *replacement);
-CJSON_PUBLIC(void)
+CJSON_PUBLIC(cJSON_bool)
 cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem);
-CJSON_PUBLIC(void)
+CJSON_PUBLIC(cJSON_bool)
 cJSON_ReplaceItemInObject(cJSON *object, const char *string, cJSON *newitem);
-CJSON_PUBLIC(void)
+CJSON_PUBLIC(cJSON_bool)
 cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object, const char *string,
                                        cJSON *newitem);
 
 /* Duplicate a cJSON item */
 CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse);
 /* Duplicate will create a new, identical cJSON item to the one you pass, in new
- memory that will need to be released. With recurse!=0, it will duplicate any
- children connected to the item. The item->next and ->prev pointers are always
- zero on return from Duplicate. */
+ * memory that will need to be released. With recurse!=0, it will duplicate any
+ * children connected to the item. The item->next and ->prev pointers are always
+ * zero on return from Duplicate. */
 /* Recursively compare two cJSON items for equality. If either a or b is NULL or
- * invalid, they will be considered unequal.
- * case_sensitive determines if object keys are treated case sensitive (1) or
- * case insensitive (0) */
+ * invalid, they will be considered unequal. case_sensitive determines if object
+ * keys are treated case sensitive (1) or case insensitive (0) */
 CJSON_PUBLIC(cJSON_bool)
 cJSON_Compare(const cJSON *const a, const cJSON *const b,
               const cJSON_bool case_sensitive);
 
+/* Minify a strings, remove blank characters(such as ' ', '\t', '\r', '\n') from
+ * strings. The input pointer json cannot point to a read-only address area,
+ * such as a string constant, but should point to a readable and writable
+ * address area. */
 CJSON_PUBLIC(void) cJSON_Minify(char *json);
 
 /* Helper functions for creating and adding items to an object at the same time.
-   They return the added item or NULL on failure. */
+ * They return the added item or NULL on failure. */
 CJSON_PUBLIC(cJSON *)
 cJSON_AddNullToObject(cJSON *const object, const char *const name);
 CJSON_PUBLIC(cJSON *)
@@ -336,7 +353,7 @@ CJSON_PUBLIC(cJSON *)
 cJSON_AddArrayToObject(cJSON *const object, const char *const name);
 
 /* When assigning an integer value, it needs to be propagated to valuedouble
-   too. */
+ * too. */
 #define cJSON_SetIntValue(object, number)                             \
     ((object) ? (object)->valueint = (object)->valuedouble = (number) \
               : (number))
@@ -345,6 +362,18 @@ CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number);
 #define cJSON_SetNumberValue(object, number)                          \
     ((object != NULL) ? cJSON_SetNumberHelper(object, (double)number) \
                       : (number))
+/* Change the valuestring of a cJSON_String object, only takes effect when type
+ * of object is cJSON_String */
+CJSON_PUBLIC(char *)
+cJSON_SetValuestring(cJSON *object, const char *valuestring);
+
+/* If the object is not a boolean type this does nothing and returns
+ * cJSON_Invalid else it returns the new type*/
+#define cJSON_SetBoolValue(object, boolValue)                                \
+    ((object != NULL && ((object)->type & (cJSON_False | cJSON_True)))       \
+         ? (object)->type = ((object)->type & (~(cJSON_False | cJSON_True))) \
+                            | ((boolValue) ? cJSON_True : cJSON_False)       \
+         : cJSON_Invalid)
 
 /* Macro for iterating over an array or object */
 #define cJSON_ArrayForEach(element, array)                                   \
@@ -352,7 +381,7 @@ CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number);
          element = element->next)
 
 /* malloc/free objects using the malloc/free functions that have been set with
-   cJSON_InitHooks */
+ * cJSON_InitHooks */
 CJSON_PUBLIC(void *) cJSON_malloc(size_t size);
 CJSON_PUBLIC(void) cJSON_free(void *object);
 

+ 0 - 20
tests/wamr-test-suites/spec-test-script/ignore_cases.patch

@@ -343,26 +343,6 @@ index adb5cb7..590f626 100644
    (func $g (param $x i32) (result i32)
      (i32.add (local.get $x) (i32.const 1))
    )
-diff --git a/test/core/select.wast b/test/core/select.wast
-index 046e6fe..b677023 100644
---- a/test/core/select.wast
-+++ b/test/core/select.wast
-@@ -324,6 +324,7 @@
-   (module (func $arity-0 (select (result) (nop) (nop) (i32.const 1))))
-   "invalid result arity"
- )
-+(;
- (assert_invalid
-   (module (func $arity-2 (result i32 i32)
-     (select (result i32 i32)
-@@ -334,6 +335,7 @@
-   ))
-   "invalid result arity"
- )
-+;)
- 
- 
- (assert_invalid
 diff --git a/test/core/table_copy.wast b/test/core/table_copy.wast
 index 380e84e..f37e745 100644
 --- a/test/core/table_copy.wast

+ 1 - 1
tests/wamr-test-suites/test_wamr.sh

@@ -824,7 +824,7 @@ function trigger()
                 collect_coverage llvm-jit
 
                 echo "work in orc jit lazy compilation mode"
-                BUILD_FLAGS="$ORC_EAGER_JIT_COMPILE_FLAGS $EXTRA_COMPILE_FLAGS"
+                BUILD_FLAGS="$ORC_LAZY_JIT_COMPILE_FLAGS $EXTRA_COMPILE_FLAGS"
                 build_iwasm_with_cfg $BUILD_FLAGS
                 for suite in "${TEST_CASE_ARR[@]}"; do
                     $suite"_test" jit

+ 38 - 14
tests/wamr-test-suites/wasi-test-script/run_wasi_tests.sh

@@ -11,7 +11,10 @@ readonly TARGET=$2
 readonly WORK_DIR=$PWD
 readonly PLATFORM=$(uname -s | tr A-Z a-z)
 readonly WAMR_DIR="${WORK_DIR}/../../../.."
-readonly IWASM_CMD="${WORK_DIR}/../../../../product-mini/platforms/${PLATFORM}/build/iwasm"
+readonly IWASM_CMD="${WORK_DIR}/../../../../product-mini/platforms/${PLATFORM}/build/iwasm \
+    --allow-resolve=google-public-dns-a.google.com \
+    --addr-pool=::1/128,127.0.0.1/32"
+readonly IWASM_CMD_STRESS="${IWASM_CMD} --max-threads=8"
 readonly WAMRC_CMD="${WORK_DIR}/../../../../wamr-compiler/build/wamrc"
 readonly C_TESTS="tests/c/testsuite/"
 readonly ASSEMBLYSCRIPT_TESTS="tests/assemblyscript/testsuite/"
@@ -22,6 +25,11 @@ readonly LIB_SOCKET_TESTS="${WAMR_DIR}/core/iwasm/libraries/lib-socket/test/"
 run_aot_tests () {
     local tests=("$@")
     for test_wasm in ${tests[@]}; do
+        local extra_stress_flags=""
+        if [[ "$test_wasm" =~ "stress" ]]; then
+            extra_stress_flags="--max-threads=8"
+        fi
+
         test_aot="${test_wasm%.wasm}.aot"
         test_json="${test_wasm%.wasm}.json"
 
@@ -35,12 +43,12 @@ run_aot_tests () {
 
         echo "Running $test_aot"
         expected=0
-        if [ -f ${test_json} ]; then 
+        if [ -f ${test_json} ]; then
             expected=$(jq .exit_code ${test_json})
         fi
-        
-        ${IWASM_CMD} $test_aot
-        
+
+        ${IWASM_CMD} $extra_stress_flags $test_aot
+
         ret=${PIPESTATUS[0]}
 
         echo "expected=$expected, actual=$ret"
@@ -48,20 +56,36 @@ run_aot_tests () {
             exit_code=1
         fi
     done
-} 
+}
 
 if [[ $MODE != "aot" ]];then
     python3 -m venv wasi-env && source wasi-env/bin/activate
     python3 -m pip install -r test-runner/requirements.txt
+
+    # Stress test requires max-threads=8 so it's run separately
+    if [[ -e "${THREAD_INTERNAL_TESTS}spawn_stress_test.wasm" ]]; then 
+        ${IWASM_CMD_STRESS} ${THREAD_INTERNAL_TESTS}spawn_stress_test.wasm
+        ret=${PIPESTATUS[0]}
+        if [ "${ret}" -ne 0 ]; then
+            echo "Stress test spawn_stress_test FAILED with code " ${ret}
+            exit_code=${ret}
+        fi
+    fi
+
     TEST_RUNTIME_EXE="${IWASM_CMD}" python3 test-runner/wasi_test_runner.py \
-                -r adapters/wasm-micro-runtime.py \
-                -t \
-                    ${C_TESTS} \
-                    ${ASSEMBLYSCRIPT_TESTS} \
-                    ${THREAD_PROPOSAL_TESTS} \
-                    ${THREAD_INTERNAL_TESTS} \
-                    ${LIB_SOCKET_TESTS} \
-    exit_code=${PIPESTATUS[0]}
+            -r adapters/wasm-micro-runtime.py \
+            -t \
+                ${C_TESTS} \
+                ${ASSEMBLYSCRIPT_TESTS} \
+                ${THREAD_PROPOSAL_TESTS} \
+                ${THREAD_INTERNAL_TESTS} \
+                ${LIB_SOCKET_TESTS} \
+            --exclude-filter "${THREAD_INTERNAL_TESTS}skip.json"
+
+    ret=${PIPESTATUS[0]}
+    if [ "${ret}" -ne 0 ]; then
+        exit_code=${ret}
+    fi
     deactivate
 else
     target_option=""

+ 149 - 0
wamr-compiler/main.c

@@ -9,6 +9,100 @@
 #include "wasm_export.h"
 #include "aot_export.h"
 
+#if BH_HAS_DLFCN
+#include <dlfcn.h>
+
+typedef uint32 (*get_native_lib_func)(char **p_module_name,
+                                      NativeSymbol **p_native_symbols);
+
+static uint32
+load_and_register_native_libs(const char **native_lib_list,
+                              uint32 native_lib_count,
+                              void **native_handle_list)
+{
+    uint32 i, native_handle_count = 0, n_native_symbols;
+    NativeSymbol *native_symbols;
+    char *module_name;
+    void *handle;
+
+    for (i = 0; i < native_lib_count; i++) {
+        /* open the native library */
+        if (!(handle = dlopen(native_lib_list[i], RTLD_NOW | RTLD_GLOBAL))
+            && !(handle = dlopen(native_lib_list[i], RTLD_LAZY))) {
+            LOG_WARNING("warning: failed to load native library %s",
+                        native_lib_list[i]);
+            continue;
+        }
+
+        /* lookup get_native_lib func */
+        get_native_lib_func get_native_lib = dlsym(handle, "get_native_lib");
+        if (!get_native_lib) {
+            LOG_WARNING("warning: failed to lookup `get_native_lib` function "
+                        "from native lib %s",
+                        native_lib_list[i]);
+            dlclose(handle);
+            continue;
+        }
+
+        n_native_symbols = get_native_lib(&module_name, &native_symbols);
+
+        /* register native symbols */
+        if (!(n_native_symbols > 0 && module_name && native_symbols
+              && wasm_runtime_register_natives(module_name, native_symbols,
+                                               n_native_symbols))) {
+            LOG_WARNING("warning: failed to register native lib %s",
+                        native_lib_list[i]);
+            dlclose(handle);
+            continue;
+        }
+
+        native_handle_list[native_handle_count++] = handle;
+    }
+
+    return native_handle_count;
+}
+
+static void
+unregister_and_unload_native_libs(uint32 native_lib_count,
+                                  void **native_handle_list)
+{
+    uint32 i, n_native_symbols;
+    NativeSymbol *native_symbols;
+    char *module_name;
+    void *handle;
+
+    for (i = 0; i < native_lib_count; i++) {
+        handle = native_handle_list[i];
+
+        /* lookup get_native_lib func */
+        get_native_lib_func get_native_lib = dlsym(handle, "get_native_lib");
+        if (!get_native_lib) {
+            LOG_WARNING("warning: failed to lookup `get_native_lib` function "
+                        "from native lib %p",
+                        handle);
+            continue;
+        }
+
+        n_native_symbols = get_native_lib(&module_name, &native_symbols);
+        if (n_native_symbols == 0 || module_name == NULL
+            || native_symbols == NULL) {
+            LOG_WARNING("warning: get_native_lib returned different values for "
+                        "native lib %p",
+                        handle);
+            continue;
+        }
+
+        /* unregister native symbols */
+        if (!wasm_runtime_unregister_natives(module_name, native_symbols)) {
+            LOG_WARNING("warning: failed to unregister native lib %p", handle);
+            continue;
+        }
+
+        dlclose(handle);
+    }
+}
+#endif
+
 /* clang-format off */
 static void
 print_help()
@@ -66,11 +160,20 @@ print_help()
     printf("  --enable-dump-call-stack  Enable stack trace feature\n");
     printf("  --enable-perf-profiling   Enable function performance profiling\n");
     printf("  --enable-memory-profiling Enable memory usage profiling\n");
+    printf("  --xip                     A shorthand of --enalbe-indirect-mode --disable-llvm-intrinsics\n");
     printf("  --enable-indirect-mode    Enalbe call function through symbol table but not direct call\n");
     printf("  --enable-gc               Enalbe GC (Garbage Collection) feature\n");
     printf("  --disable-llvm-intrinsics Disable the LLVM built-in intrinsics\n");
+    printf("  --enable-builtin-intrinsics=<flags>\n");
+    printf("                            Enable the specified built-in intrinsics, it will override the default\n");
+    printf("                              settings. It only takes effect when --disable-llvm-intrinsics is set.\n");
+    printf("                            Available flags: all, i32.common, i64.common, f32.common, f64.common,\n");
+    printf("                              i32.clz, i32.ctz, etc, refer to doc/xip.md for full list\n");
+    printf("                            Use comma to separate, please refer to doc/xip.md for full list.\n");
     printf("  --disable-llvm-lto        Disable the LLVM link time optimization\n");
     printf("  --enable-llvm-pgo         Enable LLVM PGO (Profile-Guided Optimization)\n");
+    printf("  --enable-llvm-passes=<passes>\n");
+    printf("                            Enable the specified LLVM passes, using comma to separate\n");
     printf("  --use-prof-file=<file>    Use profile file collected by LLVM PGO (Profile-Guided Optimization)\n");
     printf("  --enable-segue[=<flags>]  Enable using segment register GS as the base address of linear memory,\n");
     printf("                            only available on linux/linux-sgx x86-64, which may improve performance,\n");
@@ -82,6 +185,11 @@ print_help()
     printf("                            Emit the specified custom sections to AoT file, using comma to separate\n");
     printf("                            multiple names, e.g.\n");
     printf("                                --emit-custom-sections=section1,section2,sectionN\n");
+#if BH_HAS_DLFCN
+    printf("  --native-lib=<lib>       Register native libraries to the WASM module, which\n");
+    printf("                           are shared object (.so) files, for example:\n");
+    printf("                             --native-lib=test1.so --native-lib=test2.so\n");
+#endif
     printf("  -v=n                      Set log verbose level (0 to 5, default is 2), larger with more log\n");
     printf("  --version                 Show version information\n");
     printf("Examples: wamrc -o test.aot test.wasm\n");
@@ -205,6 +313,12 @@ main(int argc, char *argv[])
     int log_verbose_level = 2;
     bool sgx_mode = false, size_level_set = false;
     int exit_status = EXIT_FAILURE;
+#if BH_HAS_DLFCN
+    const char *native_lib_list[8] = { NULL };
+    uint32 native_lib_count = 0;
+    void *native_handle_list[8] = { NULL };
+    uint32 native_handle_count = 0;
+#endif
 
     option.opt_level = 3;
     option.size_level = 3;
@@ -328,6 +442,10 @@ main(int argc, char *argv[])
         else if (!strcmp(argv[0], "--enable-memory-profiling")) {
             option.enable_stack_estimation = true;
         }
+        else if (!strcmp(argv[0], "--xip")) {
+            option.is_indirect_mode = true;
+            option.disable_llvm_intrinsics = true;
+        }
         else if (!strcmp(argv[0], "--enable-indirect-mode")) {
             option.is_indirect_mode = true;
         }
@@ -337,12 +455,22 @@ main(int argc, char *argv[])
         else if (!strcmp(argv[0], "--disable-llvm-intrinsics")) {
             option.disable_llvm_intrinsics = true;
         }
+        else if (!strncmp(argv[0], "--enable-builtin-intrinsics=", 28)) {
+            if (argv[0][28] == '\0')
+                PRINT_HELP_AND_EXIT();
+            option.builtin_intrinsics = argv[0] + 28;
+        }
         else if (!strcmp(argv[0], "--disable-llvm-lto")) {
             option.disable_llvm_lto = true;
         }
         else if (!strcmp(argv[0], "--enable-llvm-pgo")) {
             option.enable_llvm_pgo = true;
         }
+        else if (!strncmp(argv[0], "--enable-llvm-passes=", 21)) {
+            if (argv[0][21] == '\0')
+                PRINT_HELP_AND_EXIT();
+            option.llvm_passes = argv[0] + 21;
+        }
         else if (!strncmp(argv[0], "--use-prof-file=", 16)) {
             if (argv[0][16] == '\0')
                 PRINT_HELP_AND_EXIT();
@@ -372,6 +500,18 @@ main(int argc, char *argv[])
 
             option.custom_sections_count = len;
         }
+#if BH_HAS_DLFCN
+        else if (!strncmp(argv[0], "--native-lib=", 13)) {
+            if (argv[0][13] == '\0')
+                PRINT_HELP_AND_EXIT();
+            if (native_lib_count >= sizeof(native_lib_list) / sizeof(char *)) {
+                printf("Only allow max native lib number %d\n",
+                       (int)(sizeof(native_lib_list) / sizeof(char *)));
+                goto fail0;
+            }
+            native_lib_list[native_lib_count++] = argv[0] + 13;
+        }
+#endif
         else if (!strncmp(argv[0], "--version", 9)) {
             uint32 major, minor, patch;
             wasm_runtime_get_version(&major, &minor, &patch);
@@ -436,6 +576,12 @@ main(int argc, char *argv[])
 
     bh_log_set_verbose_level(log_verbose_level);
 
+#if BH_HAS_DLFCN
+    bh_print_time("Begin to load native libs");
+    native_handle_count = load_and_register_native_libs(
+        native_lib_list, native_lib_count, native_handle_list);
+#endif
+
     bh_print_time("Begin to load wasm file");
 
     /* load WASM byte buffer from WASM bin file */
@@ -526,6 +672,9 @@ fail2:
     wasm_runtime_free(wasm_file);
 
 fail1:
+#if BH_HAS_DLFCN
+    unregister_and_unload_native_libs(native_handle_count, native_handle_list);
+#endif
     /* Destroy runtime environment */
     wasm_runtime_destroy();