4 anos atrás · 7be0d385a6
--- a/.github/workflows/android.yml
+++ b/.github/workflows/android.yml
@@ -1,112 +0,0 @@
 
				-# This is a basic workflow to help you get started with Actions
			
 
				-
			
 
				-name: Android
			
 
				-
			
 
				-# Controls when the action will run. 
			
 
				-on:
			
 
				-  # Triggers the workflow on push or pull request events but only for the master branch
			
 
				-  push:
			
 
				-    branches: [ main ]
			
 
				-  pull_request:
			
 
				-    branches: [ main ]
			
 
				-
			
 
				-  # Allows you to run this workflow manually from the Actions tab
			
 
				-  workflow_dispatch:
			
 
				-
			
 
				-# A workflow run is made up of one or more jobs that can run sequentially or in parallel
			
 
				-jobs:
			
 
				-  # This workflow contains a single job called "build"
			
 
				-  build:
			
 
				-    # The type of runner that the job will run on
			
 
				-    runs-on: ubuntu-latest
			
 
				-
			
 
				-    # Steps represent a sequence of tasks that will be executed as part of the job
			
 
				-    steps:
			
 
				-      
			
 
				-      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
			
 
				-      - uses: actions/checkout@v2
			
 
				-      
			
 
				-      - name: Build iwasm [default]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake ..
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [classic interp]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_BUILD_FAST_INTERP=0
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [multi module]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_BUILD_MULTI_MODULE=1
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [lib-pthread]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_BUILD_LIB_PTHREAD=1
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [aot only]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_INTERP=0
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [interp only]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_BUILD_AOT=0
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [memory profiling]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_BUILD_MEMORY_PROFILING=1
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [tail call]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_BUILD_TAIL_CALL=1
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [custom name section]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_BUILD_CUSTOM_NAME_SECTION=1
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [disable hardware boundary check]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_DISABLE_HW_BOUND_CHECK=1
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [reference types]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_BUILD_REF_TYPES=1
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
 
				-      - name: Build iwasm [128-bit SIMD]
			
 
				-        run: |
			
 
				-          cd product-mini/platforms/android
			
 
				-          mkdir build && cd build
			
 
				-          cmake .. -DWAMR_BUILD_SIMD=1
			
 
				-          make -j $(nproc)
			
 
				-          cd .. && rm -rf build
			
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -16,119 +16,154 @@ on:
 
				     - 'doc/**'
			
 
				 
			
 
				 jobs:
			
 
				-  build:
			
 
				+  build_llvm_libraries:
			
 
				     runs-on: ${{ matrix.os }}
			
 
				     strategy:
			
 
				       matrix:
			
 
				         os: [ubuntu-18.04, ubuntu-20.04]
			
 
				     steps:
			
 
				-    - uses: actions/checkout@v2
			
 
				-
			
 
				-    - name: Build iwasm [default]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake ..
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [classic interp]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_BUILD_FAST_INTERP=0
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [multi module]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_BUILD_MULTI_MODULE=1
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [lib-pthread]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_BUILD_LIB_PTHREAD=1
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [aot only]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_BUILD_AOT=1 -DWAMR_BUILD_INTERP=0
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [interp only]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_BUILD_AOT=0
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [memory profiling]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_BUILD_MEMORY_PROFILING=1
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [tail call]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_BUILD_TAIL_CALL=1
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [custom name section]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_BUILD_CUSTOM_NAME_SECTION=1
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [disable hardware boundary check]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_DISABLE_HW_BOUND_CHECK=1
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [reference types]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_BUILD_REF_TYPES=1
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				-    - name: Build iwasm [128-bit SIMD]
			
 
				-      run: |
			
 
				-        cd product-mini/platforms/linux
			
 
				-        mkdir build && cd build
			
 
				-        cmake .. -DWAMR_BUILD_SIMD=1
			
 
				-        make -j $(nproc)
			
 
				-        cd .. && rm -rf build
			
 
				+    - name: checkout
			
 
				+      uses: actions/checkout@v2
			
 
				     - name: Cache LLVM libraries
			
 
				-      uses: actions/cache@v2
			
 
				       id: cache_llvm
			
 
				+      uses: actions/cache@v2
			
 
				       env:
			
 
				         cache-name: llvm_libraries
			
 
				       with:
			
 
				-        path: ./core/deps/llvm
			
 
				-        key: ${{ runner.os }}-build-${{env.cache-name}}
			
 
				-        restore-keys: ${{ runner.os }}-build-${{env.cache-name}}
			
 
				+        path: ./core/deps/llvm/build/LLVM-13.0.0-Linux.tar.gz
			
 
				+        key: ${{ matrix.os }}-build-${{env.cache-name}}
			
 
				+        restore-keys: ${{ matrix.os }}-build-${{env.cache-name}}
			
 
				     - name: Build llvm and clang from source
			
 
				+      id: build_llvm
			
 
				       if: steps.cache_llvm.outputs.cache-hit != 'true'
			
 
				       run: |
			
 
				         cd wamr-compiler
			
 
				         ./build_llvm.sh
			
 
				+        cd ../core/deps/llvm/build/
			
 
				+        make package
			
 
				+
			
 
				+  build_wamrc:
			
 
				+    needs: build_llvm_libraries
			
 
				+    runs-on: ${{ matrix.os }}
			
 
				+    strategy:
			
 
				+      matrix:
			
 
				+        os: [ubuntu-18.04, ubuntu-20.04]
			
 
				+    steps:
			
 
				+    - name: checkout
			
 
				+      uses: actions/checkout@v2
			
 
				+    - name: Get LLVM libraries
			
 
				+      id: cache_llvm
			
 
				+      uses: actions/cache@v2
			
 
				+      env:
			
 
				+        cache-name: llvm_libraries
			
 
				+      with:
			
 
				+        path: ./core/deps/llvm/build/LLVM-13.0.0-Linux.tar.gz
			
 
				+        key: ${{ matrix.os }}-build-${{env.cache-name}}
			
 
				+        restore-keys: ${{ matrix.os }}-build-${{env.cache-name}}
			
 
				+    - name: Quit if cache miss
			
 
				+      if: steps.cache_llvm.outputs.cache-hit != 'true'
			
 
				+      run: exit 1
			
 
				+    - name: Extract the LLVM package
			
 
				+      run: tar xf LLVM-13.0.0-Linux.tar.gz --strip-components=1
			
 
				+      working-directory: ./core/deps/llvm/build
			
 
				     - name: Build wamrc
			
 
				       run: |
			
 
				-        cd wamr-compiler
			
 
				         mkdir build && cd build
			
 
				-        cmake ..
			
 
				-        make -j $(nproc)
			
 
				-        cd ..
			
 
				+        cmake .. && make -j $(nproc)
			
 
				+      working-directory: wamr-compiler
			
 
				+    - name: Upload Wamrc
			
 
				+      uses: actions/upload-artifact@v2
			
 
				+      with:
			
 
				+        name: wamrc_bin-${{ matrix.os }}
			
 
				+        path: ./wamr-compiler/build/wamrc
			
 
				+        retention-days: 1
			
 
				+
			
 
				+  build_iwasm:
			
 
				+    needs: build_llvm_libraries
			
 
				+    runs-on: ${{ matrix.os }}
			
 
				+    strategy:
			
 
				+      matrix:
			
 
				+        make_options: [
			
 
				+          # Running mode
			
 
				+          "-DWAMR_BUILD_INERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=1",
			
 
				+          "-DWAMR_BUILD_INERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=0",
			
 
				+          "-DWAMR_BUILD_INERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_AOT=0",
			
 
				+          "-DWAMR_BUILD_INERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_AOT=1",
			
 
				+          # Features
			
 
				+          "-DWAMR_BUILD_CUSTOM_NAME_SECTION=1",
			
 
				+          "-DWAMR_BUILD_LIB_PTHREAD=1",
			
 
				+          "-DWAMR_BUILD_MEMORY_PROFILING=1",
			
 
				+          "-DWAMR_BUILD_MULTI_MODULE=1",
			
 
				+          "-DWAMR_BUILD_REF_TYPES=1",
			
 
				+          "-DWAMR_BUILD_SIMD=1",
			
 
				+          "-DWAMR_BUILD_TAIL_CALL=1",
			
 
				+          "-DWAMR_DISABLE_HW_BOUND_CHECK=1",
			
 
				+        ]
			
 
				+        os: [ubuntu-18.04, ubuntu-20.04]
			
 
				+        platform: [linux, android]
			
 
				+    steps:
			
 
				+    - name: checkout
			
 
				+      uses: actions/checkout@v2
			
 
				+    - name: Get LLVM libraries
			
 
				+      id: cache_llvm
			
 
				+      uses: actions/cache@v2
			
 
				+      env:
			
 
				+        cache-name: llvm_libraries
			
 
				+      with:
			
 
				+        path: ./core/deps/llvm/build/LLVM-13.0.0-Linux.tar.gz
			
 
				+        key: ${{ matrix.os }}-build-${{env.cache-name}}
			
 
				+        restore-keys: ${{ matrix.os }}-build-${{env.cache-name}}
			
 
				+    - name: Quit if cache miss
			
 
				+      if: steps.cache_llvm.outputs.cache-hit != 'true'
			
 
				+      run: exit 1
			
 
				+    - name: Extract the LLVM package
			
 
				+      run: tar xf LLVM-13.0.0-Linux.tar.gz --strip-components=1
			
 
				+      working-directory: ./core/deps/llvm/build
			
 
				+    - name: Build iwasm
			
 
				+      run: |
			
 
				+        mkdir build && cd build
			
 
				+        cmake .. ${{ matrix.make_options }} && make -j $(nproc)
			
 
				+        cd .. && rm -rf build
			
 
				+      working-directory:  product-mini/platforms/${{ matrix.platform }}
			
 
				+
			
 
				+  build_samples:
			
 
				+    needs: [build_llvm_libraries, build_wamrc]
			
 
				+    runs-on: ${{ matrix.os }}
			
 
				+    strategy:
			
 
				+      matrix:
			
 
				+        os: [ubuntu-18.04, ubuntu-20.04]
			
 
				+        make_options: [
			
 
				+          # Running mode
			
 
				+          "-DWAMR_BUILD_INERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=1",
			
 
				+          "-DWAMR_BUILD_INERP=1 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=0 -DWAMR_BUILD_FAST_INTERP=0",
			
 
				+          "-DWAMR_BUILD_INERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_AOT=0",
			
 
				+          "-DWAMR_BUILD_INERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_AOT=1",
			
 
				+        ]
			
 
				+    steps:
			
 
				+    - name: checkout
			
 
				+      uses: actions/checkout@v2
			
 
				+    - name: Get LLVM libraries
			
 
				+      id: cache_llvm
			
 
				+      uses: actions/cache@v2
			
 
				+      env:
			
 
				+        cache-name: llvm_libraries
			
 
				+      with:
			
 
				+        path: ./core/deps/llvm/build/LLVM-13.0.0-Linux.tar.gz
			
 
				+        key: ${{ matrix.os }}-build-${{env.cache-name}}
			
 
				+        restore-keys: ${{ matrix.os }}-build-${{env.cache-name}}
			
 
				+    - name: Quit if cache miss
			
 
				+      if: steps.cache_llvm.outputs.cache-hit != 'true'
			
 
				+      run: exit 1
			
 
				+    - name: Extract the LLVM package
			
 
				+      run: tar xf LLVM-13.0.0-Linux.tar.gz --strip-components=1
			
 
				+      working-directory: ./core/deps/llvm/build
			
 
				+    - name: Download Wamrc
			
 
				+      uses: actions/download-artifact@v2
			
 
				+      with:
			
 
				+        name: wamrc_bin-${{ matrix.os }}
			
 
				+        path: ./wamr-compiler/build
			
 
				+    - name: Give execution rights
			
 
				+      run: chmod a+x ./wamr-compiler/build/wamrc
			
 
				     - name: download and install wasi-sdk
			
 
				       run: |
			
 
				         cd /opt
			
@@ -147,39 +182,7 @@ jobs:
 
				       run: |
			
 
				         cd samples/wasm-c-api
			
 
				         mkdir build && cd build
			
 
				-        cmake ..
			
 
				-        make -j $(nproc)
			
 
				-        ./callback
			
 
				-        ./callback_chain
			
 
				-        ./global
			
 
				-        ./hello
			
 
				-        ./hostref
			
 
				-        ./memory
			
 
				-        ./reflect
			
 
				-        ./table
			
 
				-        ./trap
			
 
				-        cd .. && rm -r build
			
 
				-    - name: Build Sample [wasm-c-api] [Jit]
			
 
				-      run: |
			
 
				-        cd samples/wasm-c-api
			
 
				-        mkdir build && cd build
			
 
				-        cmake -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_AOT=1 ..
			
 
				-        make -j $(nproc)
			
 
				-        ./callback
			
 
				-        ./callback_chain
			
 
				-        ./global
			
 
				-        ./hello
			
 
				-        ./hostref
			
 
				-        ./memory
			
 
				-        ./reflect
			
 
				-        ./table
			
 
				-        ./trap
			
 
				-        cd .. && rm -r build
			
 
				-    - name: Build Sample [wasm-c-api] [Aot]
			
 
				-      run: |
			
 
				-        cd samples/wasm-c-api
			
 
				-        mkdir build && cd build
			
 
				-        cmake -DWAMR_BUILD_INTERP=0 -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=1 ..
			
 
				+        cmake .. ${{ matrix.make_options }}
			
 
				         make -j $(nproc)
			
 
				         ./callback
			
 
				         ./callback_chain
			
--- a/README.md
+++ b/README.md
@@ -77,8 +77,6 @@ For **Windows**：
 
				 ```shell
			
 
				 cd wamr-compiler
			
 
				 python build_llvm.py
			
 
				-open LLVM.sln in wasm-micro-runtime\core\deps\llvm\win32build with Visual Studio
			
 
				-build LLVM.sln Release
			
 
				 mkdir build && cd build
			
 
				 cmake ..
			
 
				 cmake --build . --config Release
			
--- a/build-scripts/build_llvm.py
+++ b/build-scripts/build_llvm.py
@@ -0,0 +1,177 @@
 
				+#!/usr/bin/env python3
			
 
				+#
			
 
				+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
			
 
				+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+#
			
 
				+
			
 
				+import argparse
			
 
				+import os
			
 
				+import pathlib
			
 
				+import shlex
			
 
				+import shutil
			
 
				+import subprocess
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+def clone_llvm(dst_dir, llvm_repo, llvm_branch):
			
 
				+    """
			
 
				+    any error will raise CallProcessError
			
 
				+    """
			
 
				+    llvm_dir = dst_dir.joinpath("llvm").resolve()
			
 
				+
			
 
				+    if not llvm_dir.exists():
			
 
				+        print(f"Clone llvm to {llvm_dir} ...")
			
 
				+        GIT_CLONE_CMD = f"git clone --depth 1 --branch {llvm_branch} {llvm_repo} llvm"
			
 
				+        subprocess.check_output(shlex.split(GIT_CLONE_CMD), cwd=dst_dir)
			
 
				+    else:
			
 
				+        print(f"There is an LLVM local repo in {llvm_dir}, keep using it")
			
 
				+
			
 
				+    return llvm_dir
			
 
				+
			
 
				+
			
 
				+def build_llvm(llvm_dir, platform, backends):
			
 
				+    LLVM_COMPILE_OPTIONS = [
			
 
				+        '-DCMAKE_BUILD_TYPE:STRING="Release"',
			
 
				+        "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
			
 
				+        "-DLLVM_APPEND_VC_REV:BOOL=ON",
			
 
				+        "-DLLVM_BUILD_BENCHMARKS:BOOL=OFF",
			
 
				+        "-DLLVM_BUILD_DOCS:BOOL=OFF",
			
 
				+        "-DLLVM_BUILD_EXAMPLES:BOOL=OFF",
			
 
				+        "-DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF",
			
 
				+        "-DLLVM_BUILD_TESTS:BOOL=OFF",
			
 
				+        "-DLLVM_CCACHE_BUILD:BOOL=OFF",
			
 
				+        "-DLLVM_ENABLE_BINDINGS:BOOL=OFF",
			
 
				+        "-DLLVM_ENABLE_IDE:BOOL=OFF",
			
 
				+        "-DLLVM_ENABLE_LIBXML2:BOOL=OFF",
			
 
				+        "-DLLVM_ENABLE_TERMINFO:BOOL=OFF",
			
 
				+        "-DLLVM_ENABLE_ZLIB:BOOL=OFF",
			
 
				+        "-DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF",
			
 
				+        "-DLLVM_INCLUDE_DOCS:BOOL=OFF",
			
 
				+        "-DLLVM_INCLUDE_EXAMPLES:BOOL=OFF",
			
 
				+        "-DLLVM_INCLUDE_UTILS:BOOL=OFF",
			
 
				+        "-DLLVM_INCLUDE_TESTS:BOOL=OFF",
			
 
				+        "-DLLVM_INCLUDE_TOOLS:BOOL=OFF",
			
 
				+        "-DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON",
			
 
				+    ]
			
 
				+
			
 
				+    LLVM_EXTRA_COMPILER_OPTIONS = {
			
 
				+        "arc": [
			
 
				+            '-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="ARC"',
			
 
				+            "-DLLVM_ENABLE_LIBICUUC:BOOL=OFF",
			
 
				+            "-DLLVM_ENABLE_LIBICUDATA:BOOL=OFF",
			
 
				+        ],
			
 
				+        "xtensa": [
			
 
				+            '-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="Xtensa"',
			
 
				+        ],
			
 
				+        "windows": [
			
 
				+            "-DCMAKE_INSTALL_PREFIX=LLVM-install",
			
 
				+        ],
			
 
				+        "default": [],
			
 
				+    }
			
 
				+
			
 
				+    LLVM_TARGETS_TO_BUILD = [
			
 
				+        "-DLLVM_TARGETS_TO_BUILD:STRING=" + ";".join(backends)
			
 
				+        if backends
			
 
				+        else '-DLLVM_TARGETS_TO_BUILD:STRING="AArch64;ARM;Mips;RISCV;X86"'
			
 
				+    ]
			
 
				+
			
 
				+    if not llvm_dir.exists():
			
 
				+        raise Exception(f"{llvm_dir} doesn't exist")
			
 
				+
			
 
				+    build_dir = llvm_dir.joinpath(
			
 
				+        "win32build" if "windows" == platform else "build"
			
 
				+    ).resolve()
			
 
				+    build_dir.mkdir(exist_ok=True)
			
 
				+
			
 
				+    lib_llvm_core_library = build_dir.joinpath("lib/libLLVMCore.a").resolve()
			
 
				+    if lib_llvm_core_library.exists():
			
 
				+        print(f"Please remove {build_dir} manually and try again")
			
 
				+        return
			
 
				+
			
 
				+    compile_options = " ".join(
			
 
				+        LLVM_COMPILE_OPTIONS
			
 
				+        + LLVM_EXTRA_COMPILER_OPTIONS.get(
			
 
				+            platform, LLVM_EXTRA_COMPILER_OPTIONS["default"]
			
 
				+        )
			
 
				+        + LLVM_TARGETS_TO_BUILD
			
 
				+    )
			
 
				+
			
 
				+    CONFIG_CMD = f"cmake {compile_options} ../llvm "
			
 
				+    subprocess.check_call(shlex.split(CONFIG_CMD), cwd=build_dir)
			
 
				+
			
 
				+    BUILD_CMD = f"cmake --build . --parallel {os.cpu_count()}" + (
			
 
				+        " --config Release" if "windows" == platform else ""
			
 
				+    )
			
 
				+
			
 
				+    subprocess.check_call(shlex.split(BUILD_CMD), cwd=build_dir)
			
 
				+
			
 
				+    return build_dir
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    parser = argparse.ArgumentParser(description="build necessary LLVM libraries")
			
 
				+    parser.add_argument(
			
 
				+        "--platform",
			
 
				+        type=str,
			
 
				+        choices=["android", "arc", "darwin", "linux", "windows", "xtensa"],
			
 
				+        help="identify current platform",
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--arch",
			
 
				+        nargs="+",
			
 
				+        type=str,
			
 
				+        choices=["AArch64", "ARC", "ARM", "Mips", "RISCV", "X86", "Xtensa"],
			
 
				+        help="identify LLVM supported backends, separate by space, like '--arch ARM Mips X86'",
			
 
				+    )
			
 
				+    options = parser.parse_args()
			
 
				+
			
 
				+    # if the "platform" is not identified in the command line option,
			
 
				+    # detect it
			
 
				+    if not options.platform:
			
 
				+        if sys.platform.startswith("win32") or sys.platform.startswith("msys"):
			
 
				+            platform = "windows"
			
 
				+        elif sys.platform.startswith("darwin"):
			
 
				+            platform = "darwin"
			
 
				+        else:
			
 
				+            platform = "linux"
			
 
				+    else:
			
 
				+        platform = options.platform
			
 
				+
			
 
				+    print(f"========== Build LLVM for {platform} ==========\n")
			
 
				+
			
 
				+    llvm_repo_and_branch = {
			
 
				+        "arc": {
			
 
				+            "repo": "https://github.com/llvm/llvm-project.git",
			
 
				+            "branch": "release/13.x"
			
 
				+        },
			
 
				+        "xtensa": {
			
 
				+            "repo": "https://github.com/espressif/llvm-project.git",
			
 
				+            "branch": "xtensa_release_11.0.0",
			
 
				+        },
			
 
				+        "default": {
			
 
				+            "repo": "https://github.com/llvm/llvm-project.git",
			
 
				+            "branch": "release/13.x",
			
 
				+        },
			
 
				+    }
			
 
				+
			
 
				+    # retrieve the real file
			
 
				+    current_file = pathlib.Path(__file__)
			
 
				+    if current_file.is_symlink():
			
 
				+        current_file = pathlib.Path(os.readlink(current_file))
			
 
				+
			
 
				+    current_dir = current_file.parent.resolve()
			
 
				+    deps_dir = current_dir.joinpath("../core/deps").resolve()
			
 
				+
			
 
				+    print(f"==================== CLONE LLVM ====================")
			
 
				+    llvm_info = llvm_repo_and_branch.get(platform, llvm_repo_and_branch["default"])
			
 
				+    llvm_dir = clone_llvm(deps_dir, llvm_info["repo"], llvm_info["branch"])
			
 
				+
			
 
				+    print()
			
 
				+    print(f"==================== BUILD LLVM ====================")
			
 
				+    build_llvm(llvm_dir, platform, options.arch)
			
 
				+
			
 
				+    print()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -1,18 +1,83 @@
 
				 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 
			
 
				-FROM ubuntu:18.04
			
 
				+ARG VARIANT=need_to_assign
			
 
				+FROM ubuntu:${VARIANT}
			
 
				+
			
 
				+ARG DEBIAN_FRONTEND=noninteractive
			
 
				+ENV TZ=Asian/Shanghai
			
 
				 
			
 
				 RUN apt update \
			
 
				   && apt install -y apt-transport-https ca-certificates gnupg \
			
 
				-       software-properties-common wget lsb-release curl build-essential
			
 
				+      tzdata lsb-release software-properties-common build-essential \
			
 
				+      apt-utils curl wget git tree unzip zip vim
			
 
				 
			
 
				 #
			
 
				 # CMAKE (https://apt.kitware.com/)
			
 
				-RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null \
			
 
				-  && apt purge --auto-remove cmake \
			
 
				-  && apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' \
			
 
				+RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg > /dev/null \
			
 
				+  && echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
			
 
				   && apt update \
			
 
				-  && apt-get install -y kitware-archive-keyring \
			
 
				-  && rm /etc/apt/trusted.gpg.d/kitware.gpg \
			
 
				-  && apt-get install -y cmake
			
 
				+  && rm /usr/share/keyrings/kitware-archive-keyring.gpg \
			
 
				+  && apt install -y kitware-archive-keyring \
			
 
				+  && apt install -y cmake
			
 
				+
			
 
				+#
			
 
				+# install emsdk (may not necessary ?)
			
 
				+RUN cd /opt \
			
 
				+  && git clone https://github.com/emscripten-core/emsdk.git
			
 
				+RUN cd /opt/emsdk \
			
 
				+  && git pull \
			
 
				+  && ./emsdk install 2.0.26 \
			
 
				+  && ./emsdk activate 2.0.26 \
			
 
				+  && echo "source /opt/emsdk/emsdk_env.sh" >> /root/.bashrc
			
 
				+
			
 
				+#
			
 
				+#install wabt
			
 
				+ARG WABT_VER=1.0.23
			
 
				+RUN wget -c https://github.com/WebAssembly/wabt/releases/download/${WABT_VER}/wabt-${WABT_VER}-ubuntu.tar.gz -P /opt
			
 
				+RUN tar xf /opt/wabt-${WABT_VER}-ubuntu.tar.gz -C /opt \
			
 
				+  && ln -fs /opt/wabt-${WABT_VER} /opt/wabt
			
 
				+RUN rm /opt/wabt-${WABT_VER}-ubuntu.tar.gz
			
 
				+
			
 
				+#
			
 
				+# install binaryen
			
 
				+ARG BINARYEN_VER=version_101
			
 
				+RUN wget -c https://github.com/WebAssembly/binaryen/releases/download/${BINARYEN_VER}/binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz -P /opt
			
 
				+RUN tar xf /opt/binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz -C /opt \
			
 
				+  && ln -fs /opt/binaryen-${BINARYEN_VER} /opt/binaryen
			
 
				+RUN rm /opt/binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz
			
 
				+
			
 
				+
			
 
				+#
			
 
				+# install bazelisk
			
 
				+ARG BAZELISK_VER=1.10.1
			
 
				+RUN mkdir /opt/bazelisk
			
 
				+RUN wget -c https://github.com/bazelbuild/bazelisk/releases/download/v${BAZELISK_VER}/bazelisk-linux-amd64 -P /opt/bazelisk
			
 
				+RUN chmod a+x /opt/bazelisk/bazelisk-linux-amd64 \
			
 
				+  && ln -fs /opt/bazelisk/bazelisk-linux-amd64 /opt/bazelisk/bazel
			
 
				+
			
 
				+# set path
			
 
				+RUN echo "PATH=/opt/clang_llvm/bin:/opt/wasi-sdk/bin:/opt/wabt/bin:/opt/binaryen/bin:/opt/bazelisk:${PATH}" >> /root/.bashrc
			
 
				+
			
 
				+#
			
 
				+# install
			
 
				+RUN apt update \
			
 
				+  && apt install -y ninja-build python2.7 valgrind
			
 
				+
			
 
				+#
			
 
				+# ocaml
			
 
				+RUN apt update \
			
 
				+  && apt install -y ocaml ocamlbuild
			
 
				+
			
 
				+#
			
 
				+# PS
			
 
				+RUN echo "PS1='\n[ \u@wamr-dev-docker \W ]\n$ '" >> /root/.bashrc
			
 
				+
			
 
				+# Clean up
			
 
				+RUN apt-get autoremove -y \
			
 
				+  && apt-get clean -y \
			
 
				+  && rm -rf /var/lib/apt/lists/* \
			
 
				+  && rm -rf /tmp/*
			
 
				+
			
 
				+VOLUME workspace
			
 
				+WORKDIR workspace
			
--- a/ci/build_wamr.sh
+++ b/ci/build_wamr.sh
@@ -1,24 +1,33 @@
 
				-#!/bin/bash
			
 
				+#!/usr/bin/env bash
			
 
				 
			
 
				 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 
			
 
				-docker build -t wamr_dev:0.1 -f Dockerfile . \
			
 
				-  && docker run --rm -it \
			
 
				-       --name wamr_building \
			
 
				-       --mount type=bind,src=$(realpath .)/..,dst=/source \
			
 
				-       --workdir /source \
			
 
				-       wamr_dev:0.1 \
			
 
				-       /bin/bash -c "\
			
 
				-         pushd product-mini/platforms/linux \
			
 
				-           && mkdir -p build  \
			
 
				-           && pushd build \
			
 
				-           && rm -rf * \
			
 
				-           && cmake .. \
			
 
				-           && make \
			
 
				-           && popd \
			
 
				-           && popd \
			
 
				-           && echo 'Copying binary for image build' \
			
 
				-           && mkdir -p build_out \
			
 
				-           && rm build_out/* \
			
 
				-           && cp -f product-mini/platforms/linux/build/iwasm build_out/iwasm"
			
 
				+readonly CURRENT_PATH=$(dirname "$(realpath "$0")")
			
 
				+readonly ROOT=$(realpath "${CURRENT_PATH}/..")
			
 
				+readonly VARIANT=$(lsb_release -c | awk '{print $2}')
			
 
				+
			
 
				+docker build \
			
 
				+  --build-arg VARIANT=${VARIANT} \
			
 
				+  --memory=4G --cpu-quota=50000 \
			
 
				+  -t wamr_dev_${VARIANT}:0.1 -f "${CURRENT_PATH}"/Dockerfile "${CURRENT_PATH}" &&
			
 
				+  docker run --rm -it \
			
 
				+    --cpus=".5" \
			
 
				+    --memory=4G \
			
 
				+    --name wamr_build_env \
			
 
				+    --mount type=bind,src="${ROOT}",dst=/workspace \
			
 
				+    wamr_dev_${VARIANT}:0.1 \
			
 
				+    /bin/bash -c "\
			
 
				+      pwd \
			
 
				+      && pushd product-mini/platforms/linux \
			
 
				+      && rm -rf build \
			
 
				+      && mkdir build  \
			
 
				+      && pushd build \
			
 
				+      && cmake .. \
			
 
				+      && make \
			
 
				+      && popd \
			
 
				+      && popd \
			
 
				+      && echo 'Copying the binary ...' \
			
 
				+      && rm -rf build_out \
			
 
				+      && mkdir build_out \
			
 
				+      && cp product-mini/platforms/linux/build/iwasm build_out/iwasm"
			
--- a/core/iwasm/compilation/aot_compiler.c
+++ b/core/iwasm/compilation/aot_compiler.c
--- a/core/iwasm/compilation/aot_compiler.h
+++ b/core/iwasm/compilation/aot_compiler.h
@@ -28,11 +28,7 @@ typedef enum IntArithmetic {
 
				 
			
 
				 typedef enum V128Arithmetic {
			
 
				   V128_ADD = 0,
			
 
				-  V128_ADD_SATURATE_S,
			
 
				-  V128_ADD_SATURATE_U,
			
 
				   V128_SUB,
			
 
				-  V128_SUB_SATURATE_S,
			
 
				-  V128_SUB_SATURATE_U,
			
 
				   V128_MUL,
			
 
				   V128_DIV,
			
 
				   V128_NEG,
			
@@ -52,7 +48,7 @@ typedef enum V128Bitwise {
 
				   V128_ANDNOT,
			
 
				   V128_OR,
			
 
				   V128_XOR,
			
 
				-  V128_BITSELECT
			
 
				+  V128_BITSELECT,
			
 
				 } V128Bitwise;
			
 
				 
			
 
				 typedef enum IntShift {
			
@@ -79,7 +75,7 @@ typedef enum FloatArithmetic {
 
				   FLOAT_MUL,
			
 
				   FLOAT_DIV,
			
 
				   FLOAT_MIN,
			
 
				-  FLOAT_MAX
			
 
				+  FLOAT_MAX,
			
 
				 } FloatArithmetic;
			
 
				 
			
 
				 static inline bool
			
@@ -246,27 +242,29 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
 
				 #define F64_CONST(v) LLVMConstReal(F64_TYPE, v)
			
 
				 #define I8_CONST(v) LLVMConstInt(INT8_TYPE, v, true)
			
 
				 
			
 
				-#define I8_ZERO     (comp_ctx->llvm_consts.i8_zero)
			
 
				-#define I32_ZERO    (comp_ctx->llvm_consts.i32_zero)
			
 
				-#define I64_ZERO    (comp_ctx->llvm_consts.i64_zero)
			
 
				-#define F32_ZERO    (comp_ctx->llvm_consts.f32_zero)
			
 
				-#define F64_ZERO    (comp_ctx->llvm_consts.f64_zero)
			
 
				-#define I32_ONE     (comp_ctx->llvm_consts.i32_one)
			
 
				-#define I32_TWO     (comp_ctx->llvm_consts.i32_two)
			
 
				-#define I32_THREE   (comp_ctx->llvm_consts.i32_three)
			
 
				-#define I32_FOUR    (comp_ctx->llvm_consts.i32_four)
			
 
				-#define I32_FIVE    (comp_ctx->llvm_consts.i32_five)
			
 
				-#define I32_SIX     (comp_ctx->llvm_consts.i32_six)
			
 
				-#define I32_SEVEN   (comp_ctx->llvm_consts.i32_seven)
			
 
				-#define I32_EIGHT   (comp_ctx->llvm_consts.i32_eight)
			
 
				-#define I32_NEG_ONE (comp_ctx->llvm_consts.i32_neg_one)
			
 
				-#define I64_NEG_ONE (comp_ctx->llvm_consts.i64_neg_one)
			
 
				-#define I32_MIN     (comp_ctx->llvm_consts.i32_min)
			
 
				-#define I64_MIN     (comp_ctx->llvm_consts.i64_min)
			
 
				-#define I32_31     (comp_ctx->llvm_consts.i32_31)
			
 
				-#define I32_32     (comp_ctx->llvm_consts.i32_32)
			
 
				-#define I64_63     (comp_ctx->llvm_consts.i64_63)
			
 
				-#define I64_64     (comp_ctx->llvm_consts.i64_64)
			
 
				+#define LLVM_CONST(name) (comp_ctx->llvm_consts.name)
			
 
				+#define I8_ZERO     LLVM_CONST(i8_zero)
			
 
				+#define I32_ZERO    LLVM_CONST(i32_zero)
			
 
				+#define I64_ZERO    LLVM_CONST(i64_zero)
			
 
				+#define F32_ZERO    LLVM_CONST(f32_zero)
			
 
				+#define F64_ZERO    LLVM_CONST(f64_zero)
			
 
				+#define I32_ONE     LLVM_CONST(i32_one)
			
 
				+#define I32_TWO     LLVM_CONST(i32_two)
			
 
				+#define I32_THREE   LLVM_CONST(i32_three)
			
 
				+#define I32_FOUR    LLVM_CONST(i32_four)
			
 
				+#define I32_FIVE    LLVM_CONST(i32_five)
			
 
				+#define I32_SIX     LLVM_CONST(i32_six)
			
 
				+#define I32_SEVEN   LLVM_CONST(i32_seven)
			
 
				+#define I32_EIGHT   LLVM_CONST(i32_eight)
			
 
				+#define I32_NEG_ONE LLVM_CONST(i32_neg_one)
			
 
				+#define I64_NEG_ONE LLVM_CONST(i64_neg_one)
			
 
				+#define I32_MIN     LLVM_CONST(i32_min)
			
 
				+#define I64_MIN     LLVM_CONST(i64_min)
			
 
				+#define I32_31      LLVM_CONST(i32_31)
			
 
				+#define I32_32      LLVM_CONST(i32_32)
			
 
				+#define I64_63      LLVM_CONST(i64_63)
			
 
				+#define I64_64      LLVM_CONST(i64_64)
			
 
				+#define REF_NULL    I32_NEG_ONE
			
 
				 
			
 
				 #define V128_TYPE       comp_ctx->basic_types.v128_type
			
 
				 #define V128_PTR_TYPE   comp_ctx->basic_types.v128_ptr_type
			
@@ -277,15 +275,12 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
 
				 #define V128_f32x4_TYPE comp_ctx->basic_types.f32x4_vec_type
			
 
				 #define V128_f64x2_TYPE comp_ctx->basic_types.f64x2_vec_type
			
 
				 
			
 
				-#define V128_ZERO       (comp_ctx->llvm_consts.v128_zero)
			
 
				-#define V128_i8x16_ZERO (comp_ctx->llvm_consts.i8x16_vec_zero)
			
 
				-#define V128_i16x8_ZERO (comp_ctx->llvm_consts.i16x8_vec_zero)
			
 
				-#define V128_i32x4_ZERO (comp_ctx->llvm_consts.i32x4_vec_zero)
			
 
				-#define V128_i64x2_ZERO (comp_ctx->llvm_consts.i64x2_vec_zero)
			
 
				-#define V128_f32x4_ZERO (comp_ctx->llvm_consts.f32x4_vec_zero)
			
 
				-#define V128_f64x2_ZERO (comp_ctx->llvm_consts.f64x2_vec_zero)
			
 
				-
			
 
				-#define REF_NULL        (comp_ctx->llvm_consts.i32_neg_one)
			
 
				+#define V128_i8x16_ZERO LLVM_CONST(i8x16_vec_zero)
			
 
				+#define V128_i16x8_ZERO LLVM_CONST(i16x8_vec_zero)
			
 
				+#define V128_i32x4_ZERO LLVM_CONST(i32x4_vec_zero)
			
 
				+#define V128_i64x2_ZERO LLVM_CONST(i64x2_vec_zero)
			
 
				+#define V128_f32x4_ZERO LLVM_CONST(f32x4_vec_zero)
			
 
				+#define V128_f64x2_ZERO LLVM_CONST(f64x2_vec_zero)
			
 
				 
			
 
				 #define TO_V128_i8x16(v) LLVMBuildBitCast(comp_ctx->builder, v, \
			
 
				                                           V128_i8x16_TYPE, "i8x16_val")
			
--- a/core/iwasm/compilation/aot_emit_control.c
+++ b/core/iwasm/compilation/aot_emit_control.c
@@ -435,6 +435,20 @@ aot_compile_op_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
				     }
			
 
				     else if (label_type == LABEL_TYPE_IF) {
			
 
				         POP_COND(value);
			
 
				+
			
 
				+        if (LLVMIsUndef(value)
			
 
				+#if LLVM_VERSION_NUMBER >= 12
			
 
				+            || LLVMIsPoison(value)
			
 
				+#endif
			
 
				+        ) {
			
 
				+            if (!(aot_emit_exception(comp_ctx, func_ctx, EXCE_INTEGER_OVERFLOW,
			
 
				+                                     false, NULL, NULL))) {
			
 
				+                goto fail;
			
 
				+            }
			
 
				+            return aot_handle_next_reachable_block(comp_ctx, func_ctx,
			
 
				+                                                   p_frame_ip);
			
 
				+        }
			
 
				+
			
 
				         if (!LLVMIsConstant(value)) {
			
 
				             /* Compare value is not constant, create condition br IR */
			
 
				             /* Create entry block */
			
@@ -791,6 +805,19 @@ aot_compile_op_br_if(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
				 #endif
			
 
				 
			
 
				     POP_COND(value_cmp);
			
 
				+
			
 
				+    if (LLVMIsUndef(value_cmp)
			
 
				+#if LLVM_VERSION_NUMBER >= 12
			
 
				+        || LLVMIsPoison(value_cmp)
			
 
				+#endif
			
 
				+    ) {
			
 
				+        if (!(aot_emit_exception(comp_ctx, func_ctx, EXCE_INTEGER_OVERFLOW,
			
 
				+                                 false, NULL, NULL))) {
			
 
				+            goto fail;
			
 
				+        }
			
 
				+        return aot_handle_next_reachable_block(comp_ctx, func_ctx, p_frame_ip);
			
 
				+    }
			
 
				+
			
 
				     if (!LLVMIsConstant(value_cmp)) {
			
 
				         /* Compare value is not constant, create condition br IR */
			
 
				         if (!(block_dst = get_target_block(func_ctx, br_depth))) {
			
@@ -917,6 +944,19 @@ aot_compile_op_br_table(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
				 #endif
			
 
				 
			
 
				     POP_I32(value_cmp);
			
 
				+
			
 
				+    if (LLVMIsUndef(value_cmp)
			
 
				+#if LLVM_VERSION_NUMBER >= 12
			
 
				+        || LLVMIsPoison(value_cmp)
			
 
				+#endif
			
 
				+    ) {
			
 
				+        if (!(aot_emit_exception(comp_ctx, func_ctx, EXCE_INTEGER_OVERFLOW,
			
 
				+                                 false, NULL, NULL))) {
			
 
				+            goto fail;
			
 
				+        }
			
 
				+        return aot_handle_next_reachable_block(comp_ctx, func_ctx, p_frame_ip);
			
 
				+    }
			
 
				+
			
 
				     if (!LLVMIsConstant(value_cmp)) {
			
 
				         /* Compare value is not constant, create switch IR */
			
 
				         for (i = 0; i <= br_count; i++) {
			
--- a/core/iwasm/compilation/aot_emit_memory.c
+++ b/core/iwasm/compilation/aot_emit_memory.c
@@ -126,8 +126,16 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
				 
			
 
				     POP_I32(addr);
			
 
				 
			
 
				+    /*
			
 
				+     * Note: not throw the integer-overflow-exception here since it must
			
 
				+     * have been thrown when converting float to integer before
			
 
				+     */
			
 
				     /* return addres directly if constant offset and inside memory space */
			
 
				-    if (LLVMIsConstant(addr)) {
			
 
				+    if (LLVMIsConstant(addr) && !LLVMIsUndef(addr)
			
 
				+#if LLVM_VERSION_NUMBER >= 12
			
 
				+        && !LLVMIsPoison(addr)
			
 
				+#endif
			
 
				+    ) {
			
 
				         uint64 mem_offset = (uint64)LLVMConstIntGetZExtValue(addr)
			
 
				                              + (uint64)offset;
			
 
				         uint32 num_bytes_per_page =
			
@@ -764,8 +772,16 @@ check_bulk_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
				         }
			
 
				     }
			
 
				 
			
 
				+    /*
			
 
				+     * Note: not throw the integer-overflow-exception here since it must
			
 
				+     * have been thrown when converting float to integer before
			
 
				+     */
			
 
				     /* return addres directly if constant offset and inside memory space */
			
 
				-    if (LLVMIsConstant(offset) && LLVMIsConstant(bytes)) {
			
 
				+    if (!LLVMIsUndef(offset) && !LLVMIsUndef(bytes)
			
 
				+#if LLVM_VERSION_NUMBER >= 12
			
 
				+        && !LLVMIsPoison(offset) && !LLVMIsPoison(bytes)
			
 
				+#endif
			
 
				+        && LLVMIsConstant(offset) && LLVMIsConstant(bytes)) {
			
 
				         uint64 mem_offset = (uint64)LLVMConstIntGetZExtValue(offset);
			
 
				         uint64 mem_len = (uint64)LLVMConstIntGetZExtValue(bytes);
			
 
				         uint32 num_bytes_per_page =
			
--- a/core/iwasm/compilation/aot_emit_numberic.c
+++ b/core/iwasm/compilation/aot_emit_numberic.c
@@ -36,10 +36,17 @@
 
				     LLVMMoveBasicBlockAfter(block, LLVMGetInsertBlock(comp_ctx->builder)); \
			
 
				 } while (0)
			
 
				 
			
 
				-#define IS_CONST_ZERO(val)                                                 \
			
 
				-    (LLVMIsConstant(val)                                                   \
			
 
				-     && ((is_i32 && (int32)LLVMConstIntGetZExtValue(val) == 0)             \
			
 
				+#if LLVM_VERSION_NUMBER >= 12
			
 
				+#define IS_CONST_ZERO(val)                                                    \
			
 
				+    (!LLVMIsUndef(val) && !LLVMIsPoison(val) && LLVMIsConstant(val)           \
			
 
				+     && ((is_i32 && (int32)LLVMConstIntGetZExtValue(val) == 0)                \
			
 
				          || (!is_i32 && (int64)LLVMConstIntGetSExtValue(val) == 0)))
			
 
				+#else
			
 
				+#define IS_CONST_ZERO(val)                                                    \
			
 
				+    (!LLVMIsUndef(val) && LLVMIsConstant(val)                                 \
			
 
				+     && ((is_i32 && (int32)LLVMConstIntGetZExtValue(val) == 0)                \
			
 
				+         || (!is_i32 && (int64)LLVMConstIntGetSExtValue(val) == 0)))
			
 
				+#endif
			
 
				 
			
 
				 #define CHECK_INT_OVERFLOW(type) do {                                      \
			
 
				     LLVMValueRef cmp_min_int, cmp_neg_one;                                 \
			
@@ -399,6 +406,18 @@ compile_int_div(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
				     POP_INT(right);
			
 
				     POP_INT(left);
			
 
				 
			
 
				+    if (LLVMIsUndef(right) || LLVMIsUndef(left)
			
 
				+#if LLVM_VERSION_NUMBER >= 12
			
 
				+        || LLVMIsPoison(right) || LLVMIsPoison(left)
			
 
				+#endif
			
 
				+    ) {
			
 
				+        if (!(aot_emit_exception(comp_ctx, func_ctx, EXCE_INTEGER_OVERFLOW,
			
 
				+                                 false, NULL, NULL))) {
			
 
				+            goto fail;
			
 
				+        }
			
 
				+        return aot_handle_next_reachable_block(comp_ctx, func_ctx, p_frame_ip);
			
 
				+    }
			
 
				+
			
 
				     if (LLVMIsConstant(right)) {
			
 
				         int64 right_val = (int64)LLVMConstIntGetSExtValue(right);
			
 
				         switch (right_val) {
			
--- a/core/iwasm/compilation/aot_llvm.c
+++ b/core/iwasm/compilation/aot_llvm.c
@@ -811,7 +811,7 @@ aot_create_func_context(AOTCompData *comp_data, AOTCompContext *comp_ctx,
 
				                 local_value = F64_ZERO;
			
 
				                 break;
			
 
				             case VALUE_TYPE_V128:
			
 
				-                local_value = V128_ZERO;
			
 
				+                local_value = V128_i64x2_ZERO;
			
 
				                 break;
			
 
				             case VALUE_TYPE_FUNCREF:
			
 
				             case VALUE_TYPE_EXTERNREF:
			
@@ -963,6 +963,8 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context)
 
				     basic_types->v128_type = basic_types->i64x2_vec_type;
			
 
				     basic_types->v128_ptr_type = LLVMPointerType(basic_types->v128_type, 0);
			
 
				 
			
 
				+    basic_types->i1x2_vec_type = LLVMVectorType(basic_types->int1_type, 2);
			
 
				+
			
 
				     basic_types->funcref_type = LLVMInt32TypeInContext(context);
			
 
				     basic_types->externref_type = LLVMInt32TypeInContext(context);
			
 
				 
			
@@ -979,6 +981,7 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context)
 
				             && basic_types->i64x2_vec_type
			
 
				             && basic_types->f32x4_vec_type
			
 
				             && basic_types->f64x2_vec_type
			
 
				+            && basic_types->i1x2_vec_type
			
 
				             && basic_types->meta_data_type
			
 
				             && basic_types->funcref_type
			
 
				             && basic_types->externref_type) ? true : false;
			
@@ -987,73 +990,89 @@ aot_set_llvm_basic_types(AOTLLVMTypes *basic_types, LLVMContextRef context)
 
				 static bool
			
 
				 aot_create_llvm_consts(AOTLLVMConsts *consts, AOTCompContext *comp_ctx)
			
 
				 {
			
 
				-    LLVMValueRef i64_consts[2];
			
 
				-
			
 
				-    consts->i8_zero = I8_CONST(0);
			
 
				-    consts->i32_zero = I32_CONST(0);
			
 
				-    consts->i64_zero = I64_CONST(0);
			
 
				-    consts->f32_zero = F32_CONST(0);
			
 
				-    consts->f64_zero = F64_CONST(0);
			
 
				-
			
 
				-    if (consts->i64_zero) {
			
 
				-        i64_consts[0] = i64_consts[1] = consts->i64_zero;
			
 
				-        consts->v128_zero = consts->i64x2_vec_zero =
			
 
				-                                    LLVMConstVector(i64_consts, 2);
			
 
				-        if (consts->i64x2_vec_zero) {
			
 
				-            consts->i8x16_vec_zero = TO_V128_i8x16(consts->i64x2_vec_zero);
			
 
				-            consts->i16x8_vec_zero = TO_V128_i16x8(consts->i64x2_vec_zero);
			
 
				-            consts->i32x4_vec_zero = TO_V128_i32x4(consts->i64x2_vec_zero);
			
 
				-            consts->f32x4_vec_zero = TO_V128_f32x4(consts->i64x2_vec_zero);
			
 
				-            consts->f64x2_vec_zero = TO_V128_f64x2(consts->i64x2_vec_zero);
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    consts->i32_one = I32_CONST(1);
			
 
				-    consts->i32_two = I32_CONST(2);
			
 
				-    consts->i32_three = I32_CONST(3);
			
 
				-    consts->i32_four = I32_CONST(4);
			
 
				-    consts->i32_five = I32_CONST(5);
			
 
				-    consts->i32_six = I32_CONST(6);
			
 
				-    consts->i32_seven = I32_CONST(7);
			
 
				-    consts->i32_eight = I32_CONST(8);
			
 
				-    consts->i32_neg_one = I32_CONST((uint32)-1);
			
 
				-    consts->i64_neg_one = I64_CONST((uint64)-1);
			
 
				-    consts->i32_min = I32_CONST((uint32)INT32_MIN);
			
 
				-    consts->i64_min = I64_CONST((uint64)INT64_MIN);
			
 
				-    consts->i32_31 = I32_CONST(31);
			
 
				-    consts->i32_32 = I32_CONST(32);
			
 
				-    consts->i64_63 = I64_CONST(63);
			
 
				-    consts->i64_64 = I64_CONST(64);
			
 
				-    consts->ref_null = I32_CONST(NULL_REF);
			
 
				-
			
 
				-    return (consts->i8_zero
			
 
				-            && consts->i32_zero
			
 
				-            && consts->i64_zero
			
 
				-            && consts->f32_zero
			
 
				-            && consts->f64_zero
			
 
				-            && consts->i8x16_vec_zero
			
 
				-            && consts->i16x8_vec_zero
			
 
				-            && consts->i32x4_vec_zero
			
 
				-            && consts->i64x2_vec_zero
			
 
				-            && consts->f32x4_vec_zero
			
 
				-            && consts->f64x2_vec_zero
			
 
				-            && consts->i32_one
			
 
				-            && consts->i32_two
			
 
				-            && consts->i32_three
			
 
				-            && consts->i32_four
			
 
				-            && consts->i32_five
			
 
				-            && consts->i32_six
			
 
				-            && consts->i32_seven
			
 
				-            && consts->i32_eight
			
 
				-            && consts->i32_neg_one
			
 
				-            && consts->i64_neg_one
			
 
				-            && consts->i32_min
			
 
				-            && consts->i64_min
			
 
				-            && consts->i32_31
			
 
				-            && consts->i32_32
			
 
				-            && consts->i64_63
			
 
				-            && consts->i64_64
			
 
				-            && consts->ref_null) ? true : false;
			
 
				+#define CREATE_I1_CONST(name, value)                                          \
			
 
				+    if (!(consts->i1_##name =                                                 \
			
 
				+            LLVMConstInt(comp_ctx->basic_types.int1_type, value, true)))      \
			
 
				+        return false;
			
 
				+
			
 
				+    CREATE_I1_CONST(zero, 0)
			
 
				+    CREATE_I1_CONST(one, 1)
			
 
				+#undef CREATE_I1_CONST
			
 
				+
			
 
				+    if (!(consts->i8_zero = I8_CONST(0)))
			
 
				+        return false;
			
 
				+
			
 
				+    if (!(consts->f32_zero = F32_CONST(0)))
			
 
				+        return false;
			
 
				+
			
 
				+    if (!(consts->f64_zero = F64_CONST(0)))
			
 
				+        return false;
			
 
				+
			
 
				+#define CREATE_I32_CONST(name, value)                                         \
			
 
				+    if (!(consts->i32_##name = LLVMConstInt(I32_TYPE, value, true)))          \
			
 
				+        return false;
			
 
				+
			
 
				+    CREATE_I32_CONST(min, (uint32)INT32_MIN)
			
 
				+    CREATE_I32_CONST(neg_one, (uint32)-1)
			
 
				+    CREATE_I32_CONST(zero, 0)
			
 
				+    CREATE_I32_CONST(one, 1)
			
 
				+    CREATE_I32_CONST(two, 2)
			
 
				+    CREATE_I32_CONST(three, 3)
			
 
				+    CREATE_I32_CONST(four, 4)
			
 
				+    CREATE_I32_CONST(five, 5)
			
 
				+    CREATE_I32_CONST(six, 6)
			
 
				+    CREATE_I32_CONST(seven, 7)
			
 
				+    CREATE_I32_CONST(eight, 8)
			
 
				+    CREATE_I32_CONST(nine, 9)
			
 
				+    CREATE_I32_CONST(ten, 10)
			
 
				+    CREATE_I32_CONST(eleven, 11)
			
 
				+    CREATE_I32_CONST(twelve, 12)
			
 
				+    CREATE_I32_CONST(thirteen, 13)
			
 
				+    CREATE_I32_CONST(fourteen, 14)
			
 
				+    CREATE_I32_CONST(fifteen, 15)
			
 
				+    CREATE_I32_CONST(31, 31)
			
 
				+    CREATE_I32_CONST(32, 32)
			
 
				+#undef CREATE_I32_CONST
			
 
				+
			
 
				+#define CREATE_I64_CONST(name, value)                                         \
			
 
				+    if (!(consts->i64_##name = LLVMConstInt(I64_TYPE, value, true)))          \
			
 
				+        return false;
			
 
				+
			
 
				+    CREATE_I64_CONST(min, (uint64)INT64_MIN)
			
 
				+    CREATE_I64_CONST(neg_one, (uint64)-1)
			
 
				+    CREATE_I64_CONST(zero, 0)
			
 
				+    CREATE_I64_CONST(63, 63)
			
 
				+    CREATE_I64_CONST(64, 64)
			
 
				+#undef CREATE_I64_CONST
			
 
				+
			
 
				+#define CREATE_V128_CONST(name, type)                                         \
			
 
				+    if (!(consts->name##_vec_zero = LLVMConstNull(type)))                     \
			
 
				+        return false;                                                         \
			
 
				+    if (!(consts->name##_undef = LLVMGetUndef(type)))                         \
			
 
				+        return false;
			
 
				+
			
 
				+    CREATE_V128_CONST(i8x16, V128_i8x16_TYPE)
			
 
				+    CREATE_V128_CONST(i16x8, V128_i16x8_TYPE)
			
 
				+    CREATE_V128_CONST(i32x4, V128_i32x4_TYPE)
			
 
				+    CREATE_V128_CONST(i64x2, V128_i64x2_TYPE)
			
 
				+    CREATE_V128_CONST(f32x4, V128_f32x4_TYPE)
			
 
				+    CREATE_V128_CONST(f64x2, V128_f64x2_TYPE)
			
 
				+#undef CREATE_V128_CONST
			
 
				+
			
 
				+#define CREATE_VEC_ZERO_MASK(slot)                                            \
			
 
				+    {                                                                         \
			
 
				+        LLVMTypeRef type = LLVMVectorType(I32_TYPE, slot);                    \
			
 
				+        if (!type || !(consts->i32x##slot##_zero = LLVMConstNull(type)))      \
			
 
				+            return false;                                                     \
			
 
				+    }
			
 
				+
			
 
				+    CREATE_VEC_ZERO_MASK(16)
			
 
				+    CREATE_VEC_ZERO_MASK(8)
			
 
				+    CREATE_VEC_ZERO_MASK(4)
			
 
				+    CREATE_VEC_ZERO_MASK(2)
			
 
				+#undef CREATE_VEC_ZERO_MASK
			
 
				+
			
 
				+    return true;
			
 
				 }
			
 
				 
			
 
				 typedef struct ArchItem {
			
@@ -2213,7 +2232,8 @@ aot_build_zero_function_ret(AOTCompContext *comp_ctx,
 
				                 ret = LLVMBuildRet(comp_ctx->builder, F64_ZERO);
			
 
				                 break;
			
 
				             case VALUE_TYPE_V128:
			
 
				-                ret = LLVMBuildRet(comp_ctx->builder, V128_ZERO);
			
 
				+                ret =
			
 
				+                  LLVMBuildRet(comp_ctx->builder, LLVM_CONST(i64x2_vec_zero));
			
 
				                 break;
			
 
				             case VALUE_TYPE_FUNCREF:
			
 
				             case VALUE_TYPE_EXTERNREF:
			
@@ -2315,7 +2335,7 @@ __call_llvm_intrinsic(const AOTCompContext *comp_ctx,
 
				 LLVMValueRef
			
 
				 aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx,
			
 
				                         const AOTFuncContext *func_ctx,
			
 
				-                        const char *name,
			
 
				+                        const char *intrinsic,
			
 
				                         LLVMTypeRef ret_type,
			
 
				                         LLVMTypeRef *param_types,
			
 
				                         int param_count,
			
@@ -2340,8 +2360,8 @@ aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx,
 
				         param_values[i++] = va_arg(argptr, LLVMValueRef);
			
 
				     va_end(argptr);
			
 
				 
			
 
				-    ret = __call_llvm_intrinsic(comp_ctx, func_ctx, name, ret_type, param_types,
			
 
				-                                param_count, param_values);
			
 
				+    ret = __call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, ret_type,
			
 
				+                                param_types, param_count, param_values);
			
 
				 
			
 
				     wasm_runtime_free(param_values);
			
 
				 
			
@@ -2351,7 +2371,7 @@ aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx,
 
				 LLVMValueRef
			
 
				 aot_call_llvm_intrinsic_v(const AOTCompContext *comp_ctx,
			
 
				                           const AOTFuncContext *func_ctx,
			
 
				-                          const char *name,
			
 
				+                          const char *intrinsic,
			
 
				                           LLVMTypeRef ret_type,
			
 
				                           LLVMTypeRef *param_types,
			
 
				                           int param_count,
			
@@ -2373,8 +2393,8 @@ aot_call_llvm_intrinsic_v(const AOTCompContext *comp_ctx,
 
				     while (i < param_count)
			
 
				         param_values[i++] = va_arg(param_value_list, LLVMValueRef);
			
 
				 
			
 
				-    ret = __call_llvm_intrinsic(comp_ctx, func_ctx, name, ret_type, param_types,
			
 
				-                                param_count, param_values);
			
 
				+    ret = __call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, ret_type,
			
 
				+                                param_types, param_count, param_values);
			
 
				 
			
 
				     wasm_runtime_free(param_values);
			
 
				 
			
--- a/core/iwasm/compilation/aot_llvm.h
+++ b/core/iwasm/compilation/aot_llvm.h
@@ -178,6 +178,8 @@ typedef struct AOTLLVMTypes {
 
				   LLVMTypeRef f32x4_vec_type;
			
 
				   LLVMTypeRef f64x2_vec_type;
			
 
				 
			
 
				+  LLVMTypeRef i1x2_vec_type;
			
 
				+
			
 
				   LLVMTypeRef meta_data_type;
			
 
				 
			
 
				   LLVMTypeRef funcref_type;
			
@@ -185,18 +187,13 @@ typedef struct AOTLLVMTypes {
 
				 } AOTLLVMTypes;
			
 
				 
			
 
				 typedef struct AOTLLVMConsts {
			
 
				+    LLVMValueRef i1_zero;
			
 
				+    LLVMValueRef i1_one;
			
 
				     LLVMValueRef i8_zero;
			
 
				     LLVMValueRef i32_zero;
			
 
				     LLVMValueRef i64_zero;
			
 
				     LLVMValueRef f32_zero;
			
 
				     LLVMValueRef f64_zero;
			
 
				-    LLVMValueRef v128_zero;
			
 
				-    LLVMValueRef i8x16_vec_zero;
			
 
				-    LLVMValueRef i16x8_vec_zero;
			
 
				-    LLVMValueRef i32x4_vec_zero;
			
 
				-    LLVMValueRef i64x2_vec_zero;
			
 
				-    LLVMValueRef f32x4_vec_zero;
			
 
				-    LLVMValueRef f64x2_vec_zero;
			
 
				     LLVMValueRef i32_one;
			
 
				     LLVMValueRef i32_two;
			
 
				     LLVMValueRef i32_three;
			
@@ -205,6 +202,13 @@ typedef struct AOTLLVMConsts {
 
				     LLVMValueRef i32_six;
			
 
				     LLVMValueRef i32_seven;
			
 
				     LLVMValueRef i32_eight;
			
 
				+    LLVMValueRef i32_nine;
			
 
				+    LLVMValueRef i32_ten;
			
 
				+    LLVMValueRef i32_eleven;
			
 
				+    LLVMValueRef i32_twelve;
			
 
				+    LLVMValueRef i32_thirteen;
			
 
				+    LLVMValueRef i32_fourteen;
			
 
				+    LLVMValueRef i32_fifteen;
			
 
				     LLVMValueRef i32_neg_one;
			
 
				     LLVMValueRef i64_neg_one;
			
 
				     LLVMValueRef i32_min;
			
@@ -213,7 +217,22 @@ typedef struct AOTLLVMConsts {
 
				     LLVMValueRef i32_32;
			
 
				     LLVMValueRef i64_63;
			
 
				     LLVMValueRef i64_64;
			
 
				-    LLVMValueRef ref_null;
			
 
				+    LLVMValueRef i8x16_vec_zero;
			
 
				+    LLVMValueRef i16x8_vec_zero;
			
 
				+    LLVMValueRef i32x4_vec_zero;
			
 
				+    LLVMValueRef i64x2_vec_zero;
			
 
				+    LLVMValueRef f32x4_vec_zero;
			
 
				+    LLVMValueRef f64x2_vec_zero;
			
 
				+    LLVMValueRef i8x16_undef;
			
 
				+    LLVMValueRef i16x8_undef;
			
 
				+    LLVMValueRef i32x4_undef;
			
 
				+    LLVMValueRef i64x2_undef;
			
 
				+    LLVMValueRef f32x4_undef;
			
 
				+    LLVMValueRef f64x2_undef;
			
 
				+    LLVMValueRef i32x16_zero;
			
 
				+    LLVMValueRef i32x8_zero;
			
 
				+    LLVMValueRef i32x4_zero;
			
 
				+    LLVMValueRef i32x2_zero;
			
 
				 } AOTLLVMConsts;
			
 
				 
			
 
				 /**
			
@@ -393,7 +412,7 @@ aot_build_zero_function_ret(AOTCompContext *comp_ctx,
 
				 LLVMValueRef
			
 
				 aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx,
			
 
				                         const AOTFuncContext *func_ctx,
			
 
				-                        const char *name,
			
 
				+                        const char *intrinsic,
			
 
				                         LLVMTypeRef ret_type,
			
 
				                         LLVMTypeRef *param_types,
			
 
				                         int param_count,
			
@@ -402,7 +421,7 @@ aot_call_llvm_intrinsic(const AOTCompContext *comp_ctx,
 
				 LLVMValueRef
			
 
				 aot_call_llvm_intrinsic_v(const AOTCompContext *comp_ctx,
			
 
				                           const AOTFuncContext *func_ctx,
			
 
				-                          const char *name,
			
 
				+                          const char *intrinsic,
			
 
				                           LLVMTypeRef ret_type,
			
 
				                           LLVMTypeRef *param_types,
			
 
				                           int param_count,
			
--- a/core/iwasm/compilation/simd/simd_access_lanes.c
+++ b/core/iwasm/compilation/simd/simd_access_lanes.c
@@ -8,39 +8,6 @@
 
				 #include "../aot_emit_exception.h"
			
 
				 #include "../../aot/aot_runtime.h"
			
 
				 
			
 
				-static bool
			
 
				-is_target_x86(AOTCompContext *comp_ctx)
			
 
				-{
			
 
				-    return !strncmp(comp_ctx->target_arch, "x86_64", 6) ||
			
 
				-           !strncmp(comp_ctx->target_arch, "i386", 4);
			
 
				-}
			
 
				-
			
 
				-static LLVMValueRef
			
 
				-build_intx16_vector(const AOTCompContext *comp_ctx,
			
 
				-                    const LLVMTypeRef element_type,
			
 
				-                    const int *element_value)
			
 
				-{
			
 
				-    LLVMValueRef vector, elements[16];
			
 
				-    unsigned i;
			
 
				-
			
 
				-    for (i = 0; i < 16; i++) {
			
 
				-        if (!(elements[i] =
			
 
				-                LLVMConstInt(element_type, element_value[i], true))) {
			
 
				-            HANDLE_FAILURE("LLVMConstInst");
			
 
				-            goto fail;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    if (!(vector = LLVMConstVector(elements, 16))) {
			
 
				-        HANDLE_FAILURE("LLVMConstVector");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    return vector;
			
 
				-fail:
			
 
				-    return NULL;
			
 
				-}
			
 
				-
			
 
				 bool
			
 
				 aot_compile_simd_shuffle(AOTCompContext *comp_ctx,
			
 
				                          AOTFuncContext *func_ctx,
			
@@ -67,7 +34,8 @@ aot_compile_simd_shuffle(AOTCompContext *comp_ctx,
 
				     }
			
 
				 
			
 
				     /* build a vector <16 x i32> */
			
 
				-    if (!(mask = build_intx16_vector(comp_ctx, I32_TYPE, values))) {
			
 
				+    if (!(mask =
			
 
				+            simd_build_const_integer_vector(comp_ctx, I32_TYPE, values, 16))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
@@ -77,29 +45,20 @@ aot_compile_simd_shuffle(AOTCompContext *comp_ctx,
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    PUSH_V128(result);
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 
			
 
				-    return true;
			
 
				 fail:
			
 
				     return false;
			
 
				 }
			
 
				 
			
 
				+/*TODO: llvm.experimental.vector.*/
			
 
				 /* shufflevector is not an option, since it requires *mask as a const */
			
 
				 bool
			
 
				-aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx)
			
 
				 {
			
 
				     LLVMValueRef vector, mask, max_lanes, condition, mask_lanes, result;
			
 
				     LLVMTypeRef param_types[2];
			
 
				-    int max_lane_id[16] = { 16, 16, 16, 16, 16, 16, 16, 16,
			
 
				-                            16, 16, 16, 16, 16, 16, 16, 16 },
			
 
				-        mask_lane_id[16] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
			
 
				-                             0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
			
 
				 
			
 
				     if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
			
 
				                                            "mask"))) {
			
@@ -112,20 +71,21 @@ aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 
				     }
			
 
				 
			
 
				     /* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */
			
 
				-    if (!(max_lanes = build_intx16_vector(comp_ctx, INT8_TYPE, max_lane_id))) {
			
 
				+    if (!(max_lanes = simd_build_splat_const_integer_vector(
			
 
				+            comp_ctx, INT8_TYPE, 16, 16))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    if (!(condition = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, mask,
			
 
				-                                    max_lanes, "compare_with_16"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				+    /*  if the highest bit of every i8 of mask is 1, means doesn't pick up from vector */
			
 
				+    /* select <16 x i1> %condition, <16 x i8> <0x80, 0x80, ...>, <16 x i8> %mask */
			
 
				+    if (!(mask_lanes = simd_build_splat_const_integer_vector(
			
 
				+            comp_ctx, INT8_TYPE, 0x80, 16))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    /*  if the highest bit of every i8 of mask is 1, means doesn't pick up from vector */
			
 
				-    /* select <16 x i1> %condition, <16 x i8> <0x80, 0x80, ...>, <16 x i8> %mask */
			
 
				-    if (!(mask_lanes =
			
 
				-            build_intx16_vector(comp_ctx, INT8_TYPE, mask_lane_id))) {
			
 
				+    if (!(condition = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, mask,
			
 
				+                                    max_lanes, "compare_with_16"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
@@ -158,17 +118,13 @@ fail:
 
				 }
			
 
				 
			
 
				 static bool
			
 
				-aot_compile_simd_swizzle_common(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_swizzle_common(AOTCompContext *comp_ctx,
			
 
				+                                AOTFuncContext *func_ctx)
			
 
				 {
			
 
				     LLVMValueRef vector, mask, default_lane_value, condition, max_lane_id,
			
 
				       result, idx, id, replace_with_zero, elem, elem_or_zero, undef;
			
 
				     uint8 i;
			
 
				 
			
 
				-    int const_lane_ids[16] = { 16, 16, 16, 16, 16, 16, 16, 16,
			
 
				-                               16, 16, 16, 16, 16, 16, 16, 16 },
			
 
				-        const_zeors[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
			
 
				-                            0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
			
 
				-
			
 
				     if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
			
 
				                                            "mask"))) {
			
 
				         goto fail;
			
@@ -185,8 +141,8 @@ aot_compile_simd_swizzle_common(AOTCompContext *comp_ctx, AOTFuncContext *func_c
 
				     }
			
 
				 
			
 
				     /* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */
			
 
				-    if (!(max_lane_id =
			
 
				-            build_intx16_vector(comp_ctx, INT8_TYPE, const_lane_ids))) {
			
 
				+    if (!(max_lane_id = simd_build_splat_const_integer_vector(
			
 
				+            comp_ctx, INT8_TYPE, 16, 16))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
@@ -197,8 +153,8 @@ aot_compile_simd_swizzle_common(AOTCompContext *comp_ctx, AOTFuncContext *func_c
 
				     }
			
 
				 
			
 
				     /*  if the id is out of range (>=16), set the id as 0 */
			
 
				-    if (!(default_lane_value =
			
 
				-            build_intx16_vector(comp_ctx, INT8_TYPE, const_zeors))) {
			
 
				+    if (!(default_lane_value = simd_build_splat_const_integer_vector(
			
 
				+            comp_ctx, INT8_TYPE, 0, 16))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
@@ -277,9 +233,9 @@ aot_compile_simd_extract(AOTCompContext *comp_ctx,
 
				                          LLVMTypeRef result_type,
			
 
				                          unsigned aot_value_type)
			
 
				 {
			
 
				-    LLVMValueRef vector, idx, result;
			
 
				+    LLVMValueRef vector, lane, result;
			
 
				 
			
 
				-    if (!(idx = I8_CONST(lane_id))) {
			
 
				+    if (!(lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id))) {
			
 
				         HANDLE_FAILURE("LLVMConstInt");
			
 
				         goto fail;
			
 
				     }
			
@@ -291,7 +247,7 @@ aot_compile_simd_extract(AOTCompContext *comp_ctx,
 
				     }
			
 
				 
			
 
				     /* extractelement <vector_type> %vector, i8 lane_id*/
			
 
				-    if (!(result = LLVMBuildExtractElement(comp_ctx->builder, vector, idx,
			
 
				+    if (!(result = LLVMBuildExtractElement(comp_ctx->builder, vector, lane,
			
 
				                                            "element"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildExtractElement");
			
 
				         goto fail;
			
@@ -390,23 +346,20 @@ aot_compile_simd_replace(AOTCompContext *comp_ctx,
 
				                          bool need_reduce,
			
 
				                          LLVMTypeRef element_type)
			
 
				 {
			
 
				-    LLVMValueRef vector, new_value, idx, result;
			
 
				+    LLVMValueRef vector, new_value, lane, result;
			
 
				 
			
 
				     POP(new_value, new_value_type);
			
 
				 
			
 
				-    if (!(idx = I8_CONST(lane_id))) {
			
 
				-        HANDLE_FAILURE("LLVMConstInt");
			
 
				+    if (!(lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    /* bitcast <2 x i64> %0 to <vector_type> */
			
 
				-
			
 
				     if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				                                              "vec"))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    /* bitcast <new_value_type> to <element_type> */
			
 
				+    /* trunc <new_value_type> to <element_type> */
			
 
				     if (need_reduce) {
			
 
				         if (!(new_value = LLVMBuildTrunc(comp_ctx->builder, new_value,
			
 
				                                          element_type, "element"))) {
			
@@ -415,23 +368,15 @@ aot_compile_simd_replace(AOTCompContext *comp_ctx,
 
				         }
			
 
				     }
			
 
				 
			
 
				-    /* insertelement <vector_type> %vector, <element_type>  %element, i8 idx */
			
 
				+    /* insertelement <vector_type> %vector, <element_type>  %element, i32 lane */
			
 
				     if (!(result = LLVMBuildInsertElement(comp_ctx->builder, vector, new_value,
			
 
				-                                          idx, "new_vector"))) {
			
 
				+                                          lane, "new_vector"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    /* bitcast <vector_type> %result to <2 x i64> */
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    PUSH_V128(result);
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "reesult");
			
 
				 
			
 
				-    return true;
			
 
				 fail:
			
 
				     return false;
			
 
				 }
			
--- a/core/iwasm/compilation/simd/simd_access_lanes.h
+++ b/core/iwasm/compilation/simd/simd_access_lanes.h
@@ -82,6 +82,26 @@ aot_compile_simd_replace_f64x2(AOTCompContext *comp_ctx,
 
				                                AOTFuncContext *func_ctx,
			
 
				                                uint8 lane_id);
			
 
				 
			
 
				+bool
			
 
				+aot_compile_simd_load8_lane(AOTCompContext *comp_ctx,
			
 
				+                            AOTFuncContext *func_ctx,
			
 
				+                            uint8 lane_id);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_load16_lane(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx,
			
 
				+                             uint8 lane_id);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_load32_lane(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx,
			
 
				+                             uint8 lane_id);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_load64_lane(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx,
			
 
				+                             uint8 lane_id);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 } /* end of extern "C" */
			
 
				 #endif
			
--- a/core/iwasm/compilation/simd/simd_bit_shifts.c
+++ b/core/iwasm/compilation/simd/simd_bit_shifts.c
@@ -8,121 +8,112 @@
 
				 #include "../aot_emit_exception.h"
			
 
				 #include "../../aot/aot_runtime.h"
			
 
				 
			
 
				+enum integer_shift {
			
 
				+    e_shift_i8x16,
			
 
				+    e_shift_i16x8,
			
 
				+    e_shift_i32x4,
			
 
				+    e_shift_i64x2,
			
 
				+};
			
 
				+
			
 
				 static bool
			
 
				 simd_shift(AOTCompContext *comp_ctx,
			
 
				            AOTFuncContext *func_ctx,
			
 
				            IntShift shift_op,
			
 
				-           LLVMTypeRef vector_type,
			
 
				-           LLVMTypeRef element_type,
			
 
				-           unsigned lane_width)
			
 
				+           enum integer_shift itype)
			
 
				 {
			
 
				-    LLVMValueRef vector, offset, width, undef, zeros, result;
			
 
				-    LLVMTypeRef zeros_type;
			
 
				+    LLVMValueRef vector, offset, result = NULL;
			
 
				+    LLVMTypeRef vector_type[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
			
 
				+                                  V128_i32x4_TYPE, V128_i64x2_TYPE };
			
 
				+    LLVMTypeRef element_type[] = { INT8_TYPE, INT16_TYPE, I32_TYPE, I64_TYPE };
			
 
				+
			
 
				+    LLVMValueRef undef[] = { LLVM_CONST(i8x16_undef), LLVM_CONST(i16x8_undef),
			
 
				+                             LLVM_CONST(i32x4_undef),
			
 
				+                             LLVM_CONST(i64x2_undef) };
			
 
				+    LLVMValueRef mask[] = { LLVM_CONST(i8x16_vec_zero),
			
 
				+                            LLVM_CONST(i16x8_vec_zero),
			
 
				+                            LLVM_CONST(i32x4_vec_zero),
			
 
				+                            LLVM_CONST(i64x2_vec_zero) };
			
 
				+    LLVMValueRef lane_bits[] = {
			
 
				+        LLVM_CONST(i32_eight),
			
 
				+        LLVMConstInt(I32_TYPE, 16, true),
			
 
				+        LLVMConstInt(I32_TYPE, 32, true),
			
 
				+        LLVMConstInt(I32_TYPE, 64, true),
			
 
				+    };
			
 
				 
			
 
				     POP_I32(offset);
			
 
				 
			
 
				-    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                             "vec"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(width = LLVMConstInt(I32_TYPE, lane_width, true))) {
			
 
				-        HANDLE_FAILURE("LLVMConstInt");
			
 
				-        goto fail;
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             vector_type[itype], "vec"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(offset =
			
 
				-            LLVMBuildURem(comp_ctx->builder, offset, width, "remainder"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildURem");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (I64_TYPE == element_type) {
			
 
				-        if (!(offset = LLVMBuildZExt(comp_ctx->builder, offset, element_type,
			
 
				-                                     "offset_scalar"))) {
			
 
				-            HANDLE_FAILURE("LLVMBuildZExt");
			
 
				-            goto fail;
			
 
				-        }
			
 
				-    }
			
 
				-    else {
			
 
				-        if (!(offset = LLVMBuildTruncOrBitCast(
			
 
				-                comp_ctx->builder, offset, element_type, "offset_scalar"))) {
			
 
				-            HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				-            goto fail;
			
 
				-        }
			
 
				+    /* offset mod LaneBits */
			
 
				+    if (!lane_bits[itype]
			
 
				+        || !(offset = LLVMBuildSRem(comp_ctx->builder, offset,
			
 
				+                                    lane_bits[itype], "offset_fix"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSRem");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    /* create a vector with offset */
			
 
				-    if (!(undef = LLVMGetUndef(vector_type))) {
			
 
				-        HANDLE_FAILURE("LLVMGetUndef");
			
 
				-        goto fail;
			
 
				+    /* change type */
			
 
				+    if (itype < e_shift_i32x4) {
			
 
				+        offset = LLVMBuildTrunc(comp_ctx->builder, offset, element_type[itype],
			
 
				+                                "offset_trunc");
			
 
				     }
			
 
				-
			
 
				-    if (!(zeros_type = LLVMVectorType(I32_TYPE, 128 / lane_width))) {
			
 
				-        HANDLE_FAILURE("LVMVectorType");
			
 
				-        goto fail;
			
 
				+    else if (itype == e_shift_i64x2) {
			
 
				+        offset = LLVMBuildZExt(comp_ctx->builder, offset, element_type[itype],
			
 
				+                               "offset_ext");
			
 
				     }
			
 
				 
			
 
				-    if (!(zeros = LLVMConstNull(zeros_type))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				-        goto fail;
			
 
				+    if (!offset) {
			
 
				+        HANDLE_FAILURE("LLVMBuildZext/LLVMBuildTrunc");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(offset = LLVMBuildInsertElement(comp_ctx->builder, undef, offset,
			
 
				-                                          I32_ZERO, "base_vector"))) {
			
 
				+    /* splat to a vector */
			
 
				+    if (!(offset =
			
 
				+            LLVMBuildInsertElement(comp_ctx->builder, undef[itype], offset,
			
 
				+                                   I32_ZERO, "offset_vector_base"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(offset = LLVMBuildShuffleVector(comp_ctx->builder, offset, undef,
			
 
				-                                          zeros, "offset_vector"))) {
			
 
				+    if (!(offset =
			
 
				+            LLVMBuildShuffleVector(comp_ctx->builder, offset, undef[itype],
			
 
				+                                   mask[itype], "offset_vector"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				     switch (shift_op) {
			
 
				         case INT_SHL:
			
 
				         {
			
 
				-            if (!(result =
			
 
				-                    LLVMBuildShl(comp_ctx->builder, vector, offset, "shl"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildShl");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            result = LLVMBuildShl(comp_ctx->builder, vector, offset, "shl");
			
 
				             break;
			
 
				         }
			
 
				         case INT_SHR_S:
			
 
				         {
			
 
				-            if (!(result = LLVMBuildAShr(comp_ctx->builder, vector, offset,
			
 
				-                                         "ashr"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildAShr");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            result = LLVMBuildAShr(comp_ctx->builder, vector, offset, "ashr");
			
 
				             break;
			
 
				         }
			
 
				         case INT_SHR_U:
			
 
				         {
			
 
				-            if (!(result = LLVMBuildLShr(comp_ctx->builder, vector, offset,
			
 
				-                                         "lshr"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildLShr");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            result = LLVMBuildLShr(comp_ctx->builder, vector, offset, "lshr");
			
 
				             break;
			
 
				         }
			
 
				         default:
			
 
				         {
			
 
				-            bh_assert(0);
			
 
				-            goto fail;
			
 
				+            break;
			
 
				         }
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "result"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				+    if (!result) {
			
 
				+        HANDLE_FAILURE("LLVMBuildShl/LLVMBuildLShr/LLVMBuildAShr");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				+
			
 
				 fail:
			
 
				     return false;
			
 
				 }
			
@@ -132,8 +123,7 @@ aot_compile_simd_i8x16_shift(AOTCompContext *comp_ctx,
 
				                              AOTFuncContext *func_ctx,
			
 
				                              IntShift shift_op)
			
 
				 {
			
 
				-    return simd_shift(comp_ctx, func_ctx, shift_op, V128_i8x16_TYPE, INT8_TYPE,
			
 
				-                      8);
			
 
				+    return simd_shift(comp_ctx, func_ctx, shift_op, e_shift_i8x16);
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -141,8 +131,7 @@ aot_compile_simd_i16x8_shift(AOTCompContext *comp_ctx,
 
				                              AOTFuncContext *func_ctx,
			
 
				                              IntShift shift_op)
			
 
				 {
			
 
				-    return simd_shift(comp_ctx, func_ctx, shift_op, V128_i16x8_TYPE,
			
 
				-                      INT16_TYPE, 16);
			
 
				+    return simd_shift(comp_ctx, func_ctx, shift_op, e_shift_i16x8);
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -150,8 +139,7 @@ aot_compile_simd_i32x4_shift(AOTCompContext *comp_ctx,
 
				                              AOTFuncContext *func_ctx,
			
 
				                              IntShift shift_op)
			
 
				 {
			
 
				-    return simd_shift(comp_ctx, func_ctx, shift_op, V128_i32x4_TYPE, I32_TYPE,
			
 
				-                      32);
			
 
				+    return simd_shift(comp_ctx, func_ctx, shift_op, e_shift_i32x4);
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -159,6 +147,5 @@ aot_compile_simd_i64x2_shift(AOTCompContext *comp_ctx,
 
				                              AOTFuncContext *func_ctx,
			
 
				                              IntShift shift_op)
			
 
				 {
			
 
				-    return simd_shift(comp_ctx, func_ctx, shift_op, V128_i64x2_TYPE, I64_TYPE,
			
 
				-                      64);
			
 
				+    return simd_shift(comp_ctx, func_ctx, shift_op, e_shift_i64x2);
			
 
				 }
			
--- a/core/iwasm/compilation/simd/simd_bitmask_extracts.c
+++ b/core/iwasm/compilation/simd/simd_bitmask_extracts.c
@@ -8,70 +8,92 @@
 
				 #include "../aot_emit_exception.h"
			
 
				 #include "../../aot/aot_runtime.h"
			
 
				 
			
 
				+enum integer_bitmask_type {
			
 
				+    e_bitmask_i8x16,
			
 
				+    e_bitmask_i16x8,
			
 
				+    e_bitmask_i32x4,
			
 
				+    e_bitmask_i64x2,
			
 
				+};
			
 
				+
			
 
				+/* TODO: should use a much clever intrinsic */
			
 
				 static bool
			
 
				 simd_build_bitmask(const AOTCompContext *comp_ctx,
			
 
				                    const AOTFuncContext *func_ctx,
			
 
				-                   uint8 length,
			
 
				-                   LLVMTypeRef vector_type,
			
 
				-                   LLVMTypeRef element_type,
			
 
				-                   const char *intrinsic)
			
 
				+                   enum integer_bitmask_type itype)
			
 
				 {
			
 
				-    LLVMValueRef vector, zeros, mask, mask_elements[16], cond, result;
			
 
				-    LLVMTypeRef param_types[1], vector_ext_type;
			
 
				-    const uint32 numbers[16] = { 0x1,    0x2,    0x4,    0x8,   0x10,  0x20,
			
 
				-                                 0x40,   0x80,   0x100,  0x200, 0x400, 0x800,
			
 
				-                                 0x1000, 0x2000, 0x4000, 0x8000 };
			
 
				+    LLVMValueRef vector, mask, result;
			
 
				     uint8 i;
			
 
				-
			
 
				-    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                             "vec"))) {
			
 
				+    LLVMTypeRef vector_ext_type;
			
 
				+
			
 
				+    uint32 lanes[] = { 16, 8, 4, 2 };
			
 
				+    uint32 lane_bits[] = { 8, 16, 32, 64 };
			
 
				+    LLVMTypeRef element_type[] = { INT8_TYPE, INT16_TYPE, I32_TYPE, I64_TYPE };
			
 
				+    LLVMTypeRef vector_type[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
			
 
				+                                  V128_i32x4_TYPE, V128_i64x2_TYPE };
			
 
				+    int32 mask_element[16] = { 0 };
			
 
				+    const char *intrinsic[] = {
			
 
				+        "llvm.vector.reduce.or.v16i64",
			
 
				+        "llvm.vector.reduce.or.v8i64",
			
 
				+        "llvm.vector.reduce.or.v4i64",
			
 
				+        "llvm.vector.reduce.or.v2i64",
			
 
				+    };
			
 
				+
			
 
				+    LLVMValueRef ashr_distance;
			
 
				+
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             vector_type[itype], "vec"))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    if (!(vector_ext_type = LLVMVectorType(I32_TYPE, length))) {
			
 
				-        HANDLE_FAILURE("LLVMVectorType");
			
 
				+    /* fill every bit in a lange with its sign bit */
			
 
				+    if (!(ashr_distance = simd_build_splat_const_integer_vector(
			
 
				+            comp_ctx, element_type[itype], lane_bits[itype] - 1,
			
 
				+            lanes[itype]))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    if (!(vector = LLVMBuildSExt(comp_ctx->builder, vector, vector_ext_type,
			
 
				-                                 "vec_ext"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSExt");
			
 
				+    if (!(vector = LLVMBuildAShr(comp_ctx->builder, vector, ashr_distance,
			
 
				+                                 "vec_ashr"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildAShr");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    if (!(zeros = LLVMConstNull(vector_ext_type))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				+    if (!(vector_ext_type = LLVMVectorType(I64_TYPE, lanes[itype]))) {
			
 
				+        HANDLE_FAILURE("LLVMVectorType");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    for (i = 0; i < 16; i++) {
			
 
				-        if (!(mask_elements[i] = LLVMConstInt(I32_TYPE, numbers[i], false))) {
			
 
				-            HANDLE_FAILURE("LLVMConstInt");
			
 
				+    if (e_bitmask_i64x2 != itype) {
			
 
				+        if (!(vector = LLVMBuildSExt(comp_ctx->builder, vector,
			
 
				+                                     vector_ext_type, "zext_to_i64"))) {
			
 
				             goto fail;
			
 
				         }
			
 
				     }
			
 
				 
			
 
				-    if (!(mask = LLVMConstVector(mask_elements, length))) {
			
 
				-        HANDLE_FAILURE("LLVMConstVector");
			
 
				+    for (i = 0; i < 16; i++) {
			
 
				+        mask_element[i] = 0x1 << i;
			
 
				+    }
			
 
				+
			
 
				+    if (!(mask = simd_build_const_integer_vector(
			
 
				+            comp_ctx, I64_TYPE, mask_element, lanes[itype]))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    if (!(cond = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector, zeros,
			
 
				-                               "lt_zero"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildICmp");
			
 
				+    if (!(vector =
			
 
				+            LLVMBuildAnd(comp_ctx->builder, vector, mask, "mask_bits"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildAnd");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				     if (!(result =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, cond, mask, zeros, "select"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+            aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic[itype],
			
 
				+                                    I64_TYPE, &vector_ext_type, 1, vector))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    param_types[0] = vector_ext_type;
			
 
				-    if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, I32_TYPE,
			
 
				-                                           param_types, 1, result))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildCall");
			
 
				+    if (!(result =
			
 
				+            LLVMBuildTrunc(comp_ctx->builder, result, I32_TYPE, "to_i32"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
@@ -86,24 +108,26 @@ bool
 
				 aot_compile_simd_i8x16_bitmask(AOTCompContext *comp_ctx,
			
 
				                                AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_build_bitmask(comp_ctx, func_ctx, 16, V128_i8x16_TYPE,
			
 
				-                              INT8_TYPE,
			
 
				-                              "llvm.experimental.vector.reduce.or.v16i32");
			
 
				+    return simd_build_bitmask(comp_ctx, func_ctx, e_bitmask_i8x16);
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_i16x8_bitmask(AOTCompContext *comp_ctx,
			
 
				                                AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_build_bitmask(comp_ctx, func_ctx, 8, V128_i16x8_TYPE,
			
 
				-                              INT16_TYPE,
			
 
				-                              "llvm.experimental.vector.reduce.or.v8i32");
			
 
				+    return simd_build_bitmask(comp_ctx, func_ctx, e_bitmask_i16x8);
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_i32x4_bitmask(AOTCompContext *comp_ctx,
			
 
				                                AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_build_bitmask(comp_ctx, func_ctx, 4, V128_i32x4_TYPE, I32_TYPE,
			
 
				-                              "llvm.experimental.vector.reduce.or.v4i32");
			
 
				+    return simd_build_bitmask(comp_ctx, func_ctx, e_bitmask_i32x4);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_bitmask(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_build_bitmask(comp_ctx, func_ctx, e_bitmask_i64x2);
			
 
				 }
			
--- a/core/iwasm/compilation/simd/simd_bitmask_extracts.h
+++ b/core/iwasm/compilation/simd/simd_bitmask_extracts.h
@@ -13,17 +13,23 @@ extern "C" {
 
				 #endif
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i8x16_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_i8x16_bitmask(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i16x8_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_i16x8_bitmask(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i32x4_bitmask(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_i32x4_bitmask(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_bitmask(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 } /* end of extern "C" */
			
 
				 #endif
			
 
				 
			
 
				 #endif /* end of _SIMD_BITMASK_EXTRACTS_H_ */
			
 
				-
			
--- a/core/iwasm/compilation/simd/simd_bitwise_ops.c
+++ b/core/iwasm/compilation/simd/simd_bitwise_ops.c
@@ -86,7 +86,7 @@ fail:
 
				 
			
 
				 /* v128.or(v128.and(v1, c), v128.and(v2, v128.not(c))) */
			
 
				 static bool
			
 
				-v128_bitwise_bit_select(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+v128_bitwise_bitselect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				 {
			
 
				     LLVMValueRef vector1, vector2, vector3, result;
			
 
				 
			
@@ -138,7 +138,7 @@ aot_compile_simd_v128_bitwise(AOTCompContext *comp_ctx,
 
				         case V128_NOT:
			
 
				             return v128_bitwise_not(comp_ctx, func_ctx);
			
 
				         case V128_BITSELECT:
			
 
				-            return v128_bitwise_bit_select(comp_ctx, func_ctx);
			
 
				+            return v128_bitwise_bitselect(comp_ctx, func_ctx);
			
 
				         default:
			
 
				             bh_assert(0);
			
 
				             return false;
			
--- a/core/iwasm/compilation/simd/simd_bool_reductions.c
+++ b/core/iwasm/compilation/simd/simd_bool_reductions.c
@@ -8,58 +8,62 @@
 
				 #include "../aot_emit_exception.h"
			
 
				 #include "../../aot/aot_runtime.h"
			
 
				 
			
 
				+enum integer_all_true {
			
 
				+    e_int_all_true_v16i8,
			
 
				+    e_int_all_true_v8i16,
			
 
				+    e_int_all_true_v4i32,
			
 
				+    e_int_all_true_v2i64,
			
 
				+};
			
 
				+
			
 
				 static bool
			
 
				-simd_any_true(AOTCompContext *comp_ctx,
			
 
				+simd_all_true(AOTCompContext *comp_ctx,
			
 
				               AOTFuncContext *func_ctx,
			
 
				-              LLVMTypeRef vector_type,
			
 
				-              LLVMTypeRef element_type,
			
 
				-              const char *intrinsic)
			
 
				+              enum integer_all_true itype)
			
 
				 {
			
 
				-    LLVMValueRef vector, zeros, non_zero, result;
			
 
				+    LLVMValueRef vector, result;
			
 
				+    LLVMTypeRef vector_i1_type;
			
 
				+    LLVMTypeRef vector_type[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
			
 
				+                                  V128_i32x4_TYPE, V128_i64x2_TYPE };
			
 
				+    uint32 lanes[] = { 16, 8, 4, 2 };
			
 
				+    const char *intrinsic[] = {
			
 
				+        "llvm.vector.reduce.and.v16i1",
			
 
				+        "llvm.vector.reduce.and.v8i1",
			
 
				+        "llvm.vector.reduce.and.v4i1",
			
 
				+        "llvm.vector.reduce.and.v2i1",
			
 
				+    };
			
 
				+    LLVMValueRef zero[] = {
			
 
				+        LLVM_CONST(i8x16_vec_zero),
			
 
				+        LLVM_CONST(i16x8_vec_zero),
			
 
				+        LLVM_CONST(i32x4_vec_zero),
			
 
				+        LLVM_CONST(i64x2_vec_zero),
			
 
				+    };
			
 
				 
			
 
				-    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                             "vec"))) {
			
 
				+    if (!(vector_i1_type = LLVMVectorType(INT1_TYPE, lanes[itype]))) {
			
 
				+        HANDLE_FAILURE("LLVMVectorType");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    if (!(zeros = LLVMConstNull(vector_type))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             vector_type[itype], "vector"))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    /* icmp eq <N x iX> %vector, zeroinitialize */
			
 
				-    if (!(non_zero = LLVMBuildICmp(comp_ctx->builder, LLVMIntNE, vector, zeros,
			
 
				-                                   "non_zero"))) {
			
 
				+    /* compare with zero */
			
 
				+    if (!(result = LLVMBuildICmp(comp_ctx->builder, LLVMIntNE, vector,
			
 
				+                                 zero[itype], "ne_zero"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildICmp");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    /* zext <N x i1> to <N x iX> */
			
 
				-    if (!(non_zero = LLVMBuildZExt(comp_ctx->builder, non_zero, vector_type,
			
 
				-                                   "non_zero_ex"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildZExt");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, element_type,
			
 
				-                                           &vector_type, 1, non_zero))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildCall");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(zeros = LLVMConstNull(element_type))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildICmp(comp_ctx->builder, LLVMIntNE, result, zeros,
			
 
				-                                 "gt_zero"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildICmp");
			
 
				+    /* check zero */
			
 
				+    if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx,
			
 
				+                                           intrinsic[itype], INT1_TYPE,
			
 
				+                                           &vector_i1_type, 1, result))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				     if (!(result =
			
 
				-            LLVMBuildZExt(comp_ctx->builder, result, I32_TYPE, "ret"))) {
			
 
				+            LLVMBuildZExt(comp_ctx->builder, result, I32_TYPE, "to_i32"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildZExt");
			
 
				         goto fail;
			
 
				     }
			
@@ -72,81 +76,57 @@ fail:
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i8x16_any_true(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i8x16_all_true(AOTCompContext *comp_ctx,
			
 
				                                 AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_any_true(comp_ctx, func_ctx, V128_i8x16_TYPE, INT8_TYPE,
			
 
				-                         "llvm.experimental.vector.reduce.add.v16i8");
			
 
				+    return simd_all_true(comp_ctx, func_ctx, e_int_all_true_v16i8);
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i16x8_any_true(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i16x8_all_true(AOTCompContext *comp_ctx,
			
 
				                                 AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_any_true(comp_ctx, func_ctx, V128_i16x8_TYPE, INT16_TYPE,
			
 
				-                         "llvm.experimental.vector.reduce.add.v8i16");
			
 
				+    return simd_all_true(comp_ctx, func_ctx, e_int_all_true_v8i16);
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i32x4_any_true(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i32x4_all_true(AOTCompContext *comp_ctx,
			
 
				                                 AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_any_true(comp_ctx, func_ctx, V128_i32x4_TYPE, I32_TYPE,
			
 
				-                         "llvm.experimental.vector.reduce.add.v4i32");
			
 
				+    return simd_all_true(comp_ctx, func_ctx, e_int_all_true_v4i32);
			
 
				 }
			
 
				 
			
 
				-static bool
			
 
				-simd_all_true(AOTCompContext *comp_ctx,
			
 
				-              AOTFuncContext *func_ctx,
			
 
				-              LLVMTypeRef vector_type,
			
 
				-              LLVMTypeRef element_type,
			
 
				-              const char *intrinsic)
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_all_true(AOTCompContext *comp_ctx,
			
 
				+                                AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    LLVMValueRef vector, zeros, is_zero, result;
			
 
				-
			
 
				-    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                             "vec"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(zeros = LLVMConstNull(vector_type))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    /* icmp eq <N x iX> %vector, zeroinitialize */
			
 
				-    if (!(is_zero = LLVMBuildICmp(comp_ctx->builder, LLVMIntEQ, vector, zeros,
			
 
				-                                  "is_zero"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildICmp");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+    return simd_all_true(comp_ctx, func_ctx, e_int_all_true_v2i64);
			
 
				+}
			
 
				 
			
 
				-    /* zext <N x i1> to <N x iX> */
			
 
				-    if (!(is_zero = LLVMBuildZExt(comp_ctx->builder, is_zero, vector_type,
			
 
				-                                  "is_zero_ex"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildZExt");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+bool
			
 
				+aot_compile_simd_v128_any_true(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    LLVMTypeRef vector_type;
			
 
				+    LLVMValueRef vector, result;
			
 
				 
			
 
				-    if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, element_type,
			
 
				-                                           &vector_type, 1, is_zero))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildCall");
			
 
				-        goto fail;
			
 
				+    if (!(vector_type = LLVMVectorType(INT1_TYPE, 128))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(zeros = LLVMConstNull(element_type))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				+                                             "vector"))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildICmp(comp_ctx->builder, LLVMIntEQ, result, zeros,
			
 
				-                                 "none"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildICmp");
			
 
				+    if (!(result = aot_call_llvm_intrinsic(
			
 
				+            comp_ctx, func_ctx, "llvm.vector.reduce.or.v128i1", INT1_TYPE,
			
 
				+            &vector_type, 1, vector))) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				     if (!(result =
			
 
				-            LLVMBuildZExt(comp_ctx->builder, result, I32_TYPE, "ret"))) {
			
 
				+            LLVMBuildZExt(comp_ctx->builder, result, I32_TYPE, "to_i32"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildZExt");
			
 
				         goto fail;
			
 
				     }
			
@@ -157,27 +137,3 @@ simd_all_true(AOTCompContext *comp_ctx,
 
				 fail:
			
 
				     return false;
			
 
				 }
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i8x16_all_true(AOTCompContext *comp_ctx,
			
 
				-                                AOTFuncContext *func_ctx)
			
 
				-{
			
 
				-    return simd_all_true(comp_ctx, func_ctx, V128_i8x16_TYPE, INT8_TYPE,
			
 
				-                         "llvm.experimental.vector.reduce.add.v16i8");
			
 
				-}
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i16x8_all_true(AOTCompContext *comp_ctx,
			
 
				-                                AOTFuncContext *func_ctx)
			
 
				-{
			
 
				-    return simd_all_true(comp_ctx, func_ctx, V128_i16x8_TYPE, INT16_TYPE,
			
 
				-                         "llvm.experimental.vector.reduce.add.v8i16");
			
 
				-}
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i32x4_all_true(AOTCompContext *comp_ctx,
			
 
				-                                AOTFuncContext *func_ctx)
			
 
				-{
			
 
				-    return simd_all_true(comp_ctx, func_ctx, V128_i32x4_TYPE, I32_TYPE,
			
 
				-                         "llvm.experimental.vector.reduce.add.v4i32");
			
 
				-}
			
--- a/core/iwasm/compilation/simd/simd_bool_reductions.h
+++ b/core/iwasm/compilation/simd/simd_bool_reductions.h
@@ -13,28 +13,24 @@ extern "C" {
 
				 #endif
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i8x16_any_true(AOTCompContext *comp_ctx,
			
 
				-                                AOTFuncContext *func_ctx);
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i16x8_any_true(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i8x16_all_true(AOTCompContext *comp_ctx,
			
 
				                                 AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i32x4_any_true(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i16x8_all_true(AOTCompContext *comp_ctx,
			
 
				                                 AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i8x16_all_true(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i32x4_all_true(AOTCompContext *comp_ctx,
			
 
				                                 AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i16x8_all_true(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i64x2_all_true(AOTCompContext *comp_ctx,
			
 
				                                 AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i32x4_all_true(AOTCompContext *comp_ctx,
			
 
				-                                AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_v128_any_true(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 } /* end of extern "C" */
			
--- a/core/iwasm/compilation/simd/simd_common.c
+++ b/core/iwasm/compilation/simd/simd_common.c
@@ -44,4 +44,119 @@ simd_bitcast_and_push_v128(const AOTCompContext *comp_ctx,
 
				     return true;
			
 
				 fail:
			
 
				     return false;
			
 
				-}
			
 
				+}
			
 
				+
			
 
				+LLVMValueRef
			
 
				+simd_lane_id_to_llvm_value(AOTCompContext *comp_ctx, uint8 lane_id)
			
 
				+{
			
 
				+    LLVMValueRef lane_indexes[] = {
			
 
				+        LLVM_CONST(i32_zero),     LLVM_CONST(i32_one),
			
 
				+        LLVM_CONST(i32_two),      LLVM_CONST(i32_three),
			
 
				+        LLVM_CONST(i32_four),     LLVM_CONST(i32_five),
			
 
				+        LLVM_CONST(i32_six),      LLVM_CONST(i32_seven),
			
 
				+        LLVM_CONST(i32_eight),    LLVM_CONST(i32_nine),
			
 
				+        LLVM_CONST(i32_ten),      LLVM_CONST(i32_eleven),
			
 
				+        LLVM_CONST(i32_twelve),   LLVM_CONST(i32_thirteen),
			
 
				+        LLVM_CONST(i32_fourteen), LLVM_CONST(i32_fifteen),
			
 
				+    };
			
 
				+
			
 
				+    return lane_id < 16 ? lane_indexes[lane_id] : NULL;
			
 
				+}
			
 
				+
			
 
				+LLVMValueRef
			
 
				+simd_build_const_integer_vector(const AOTCompContext *comp_ctx,
			
 
				+                                const LLVMTypeRef element_type,
			
 
				+                                const int *element_value,
			
 
				+                                uint32 length)
			
 
				+{
			
 
				+    LLVMValueRef vector = NULL;
			
 
				+    LLVMValueRef *elements;
			
 
				+    unsigned i;
			
 
				+
			
 
				+    if (!(elements = wasm_runtime_malloc(sizeof(LLVMValueRef) * length))) {
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    for (i = 0; i < length; i++) {
			
 
				+        if (!(elements[i] =
			
 
				+                LLVMConstInt(element_type, element_value[i], true))) {
			
 
				+            HANDLE_FAILURE("LLVMConstInst");
			
 
				+            goto fail;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector = LLVMConstVector(elements, length))) {
			
 
				+        HANDLE_FAILURE("LLVMConstVector");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+fail:
			
 
				+    wasm_runtime_free(elements);
			
 
				+    return vector;
			
 
				+}
			
 
				+
			
 
				+LLVMValueRef
			
 
				+simd_build_splat_const_integer_vector(const AOTCompContext *comp_ctx,
			
 
				+                                      const LLVMTypeRef element_type,
			
 
				+                                      const int64 element_value,
			
 
				+                                      uint32 length)
			
 
				+{
			
 
				+    LLVMValueRef vector = NULL, element;
			
 
				+    LLVMValueRef *elements;
			
 
				+    unsigned i;
			
 
				+
			
 
				+    if (!(elements = wasm_runtime_malloc(sizeof(LLVMValueRef) * length))) {
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    if (!(element = LLVMConstInt(element_type, element_value, true))) {
			
 
				+        HANDLE_FAILURE("LLVMConstInt");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    for (i = 0; i < length; i++) {
			
 
				+        elements[i] = element;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector = LLVMConstVector(elements, length))) {
			
 
				+        HANDLE_FAILURE("LLVMConstVector");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+fail:
			
 
				+    wasm_runtime_free(elements);
			
 
				+    return vector;
			
 
				+}
			
 
				+
			
 
				+LLVMValueRef
			
 
				+simd_build_splat_const_float_vector(const AOTCompContext *comp_ctx,
			
 
				+                                    const LLVMTypeRef element_type,
			
 
				+                                    const float element_value,
			
 
				+                                    uint32 length)
			
 
				+{
			
 
				+    LLVMValueRef vector = NULL, element;
			
 
				+    LLVMValueRef *elements;
			
 
				+    unsigned i;
			
 
				+
			
 
				+    if (!(elements = wasm_runtime_malloc(sizeof(LLVMValueRef) * length))) {
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    if (!(element = LLVMConstReal(element_type, element_value))) {
			
 
				+        HANDLE_FAILURE("LLVMConstReal");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+    for (i = 0; i < length; i++) {
			
 
				+        elements[i] = element;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector = LLVMConstVector(elements, length))) {
			
 
				+        HANDLE_FAILURE("LLVMConstVector");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+
			
 
				+fail:
			
 
				+    wasm_runtime_free(elements);
			
 
				+    return vector;
			
 
				+}
			
--- a/core/iwasm/compilation/simd/simd_common.h
+++ b/core/iwasm/compilation/simd/simd_common.h
@@ -8,6 +8,13 @@
 
				 
			
 
				 #include "../aot_compiler.h"
			
 
				 
			
 
				+static inline bool
			
 
				+is_target_x86(AOTCompContext *comp_ctx)
			
 
				+{
			
 
				+    return !strncmp(comp_ctx->target_arch, "x86_64", 6)
			
 
				+           || !strncmp(comp_ctx->target_arch, "i386", 4);
			
 
				+}
			
 
				+
			
 
				 LLVMValueRef
			
 
				 simd_pop_v128_and_bitcast(const AOTCompContext *comp_ctx,
			
 
				                           const AOTFuncContext *func_ctx,
			
@@ -20,4 +27,24 @@ simd_bitcast_and_push_v128(const AOTCompContext *comp_ctx,
 
				                            LLVMValueRef vector,
			
 
				                            const char *name);
			
 
				 
			
 
				+LLVMValueRef
			
 
				+simd_lane_id_to_llvm_value(AOTCompContext *comp_ctx, uint8 lane_id);
			
 
				+
			
 
				+LLVMValueRef
			
 
				+simd_build_const_integer_vector(const AOTCompContext *comp_ctx,
			
 
				+                                const LLVMTypeRef element_type,
			
 
				+                                const int *element_value,
			
 
				+                                uint32 length);
			
 
				+
			
 
				+LLVMValueRef
			
 
				+simd_build_splat_const_integer_vector(const AOTCompContext *comp_ctx,
			
 
				+                                      const LLVMTypeRef element_type,
			
 
				+                                      const int64 element_value,
			
 
				+                                      uint32 length);
			
 
				+
			
 
				+LLVMValueRef
			
 
				+simd_build_splat_const_float_vector(const AOTCompContext *comp_ctx,
			
 
				+                                    const LLVMTypeRef element_type,
			
 
				+                                    const float element_value,
			
 
				+                                    uint32 length);
			
 
				 #endif /* _SIMD_COMMON_H_ */
			
--- a/core/iwasm/compilation/simd/simd_comparisons.c
+++ b/core/iwasm/compilation/simd/simd_comparisons.c
@@ -160,6 +160,14 @@ aot_compile_simd_i32x4_compare(AOTCompContext *comp_ctx,
 
				     return interger_vector_compare(comp_ctx, func_ctx, cond, V128_i32x4_TYPE);
			
 
				 }
			
 
				 
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_compare(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx,
			
 
				+                               IntCond cond)
			
 
				+{
			
 
				+    return interger_vector_compare(comp_ctx, func_ctx, cond, V128_i64x2_TYPE);
			
 
				+}
			
 
				+
			
 
				 static bool
			
 
				 float_vector_compare(AOTCompContext *comp_ctx,
			
 
				                      AOTFuncContext *func_ctx,
			
--- a/core/iwasm/compilation/simd/simd_comparisons.h
+++ b/core/iwasm/compilation/simd/simd_comparisons.h
@@ -27,6 +27,11 @@ aot_compile_simd_i32x4_compare(AOTCompContext *comp_ctx,
 
				                                AOTFuncContext *func_ctx,
			
 
				                                IntCond cond);
			
 
				 
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_compare(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx,
			
 
				+                               IntCond cond);
			
 
				+
			
 
				 bool
			
 
				 aot_compile_simd_f32x4_compare(AOTCompContext *comp_ctx,
			
 
				                                AOTFuncContext *func_ctx,
			
--- a/core/iwasm/compilation/simd/simd_construct_values.c
+++ b/core/iwasm/compilation/simd/simd_construct_values.c
@@ -3,6 +3,7 @@
 
				  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				  */
			
 
				 
			
 
				+#include "simd_common.h"
			
 
				 #include "simd_construct_values.h"
			
 
				 #include "../aot_emit_exception.h"
			
 
				 #include "../interpreter/wasm_opcode.h"
			
@@ -14,23 +15,19 @@ aot_compile_simd_v128_const(AOTCompContext *comp_ctx,
 
				                             const uint8 *imm_bytes)
			
 
				 {
			
 
				     uint64 imm1, imm2;
			
 
				-    LLVMValueRef undef, first_long, agg1, second_long, agg2;
			
 
				+    LLVMValueRef first_long, agg1, second_long, agg2;
			
 
				 
			
 
				     wasm_runtime_read_v128(imm_bytes, &imm1, &imm2);
			
 
				 
			
 
				-    if (!(undef = LLVMGetUndef(V128_i64x2_TYPE))) {
			
 
				-        HANDLE_FAILURE("LLVMGetUndef");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				     /* %agg1 = insertelement <2 x i64> undef, i16 0, i64 ${*imm} */
			
 
				     if (!(first_long = I64_CONST(imm1))) {
			
 
				         HANDLE_FAILURE("LLVMConstInt");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    if (!(agg1 = LLVMBuildInsertElement(comp_ctx->builder, undef, first_long,
			
 
				-                                        I32_ZERO, "agg1"))) {
			
 
				+    if (!(agg1 =
			
 
				+            LLVMBuildInsertElement(comp_ctx->builder, LLVM_CONST(i64x2_undef),
			
 
				+                                   first_long, I32_ZERO, "agg1"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				         goto fail;
			
 
				     }
			
@@ -48,7 +45,6 @@ aot_compile_simd_v128_const(AOTCompContext *comp_ctx,
 
				     }
			
 
				 
			
 
				     PUSH_V128(agg2);
			
 
				-
			
 
				     return true;
			
 
				 fail:
			
 
				     return false;
			
@@ -57,134 +53,88 @@ fail:
 
				 bool
			
 
				 aot_compile_simd_splat(AOTCompContext *comp_ctx,
			
 
				                        AOTFuncContext *func_ctx,
			
 
				-                       uint8 splat_opcode)
			
 
				+                       uint8 opcode)
			
 
				 {
			
 
				-    LLVMValueRef value, undef, base, mask, new_vector, result;
			
 
				-    LLVMTypeRef all_zero_ty;
			
 
				-
			
 
				-    switch (splat_opcode) {
			
 
				+    uint32 opcode_index = opcode - SIMD_i8x16_splat;
			
 
				+    LLVMValueRef value = NULL, base, new_vector;
			
 
				+    LLVMValueRef undefs[] = {
			
 
				+        LLVM_CONST(i8x16_undef), LLVM_CONST(i16x8_undef),
			
 
				+        LLVM_CONST(i32x4_undef), LLVM_CONST(i64x2_undef),
			
 
				+        LLVM_CONST(f32x4_undef), LLVM_CONST(f64x2_undef),
			
 
				+    };
			
 
				+    LLVMValueRef masks[] = {
			
 
				+        LLVM_CONST(i32x16_zero), LLVM_CONST(i32x8_zero),
			
 
				+        LLVM_CONST(i32x4_zero),  LLVM_CONST(i32x2_zero),
			
 
				+        LLVM_CONST(i32x4_zero),  LLVM_CONST(i32x2_zero),
			
 
				+    };
			
 
				+
			
 
				+    switch (opcode) {
			
 
				         case SIMD_i8x16_splat:
			
 
				         {
			
 
				             LLVMValueRef input;
			
 
				             POP_I32(input);
			
 
				-
			
 
				             /* trunc i32 %input to i8 */
			
 
				-            if (!(value = LLVMBuildTrunc(comp_ctx->builder, input, INT8_TYPE,
			
 
				-                                         "trunc"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				-                goto fail;
			
 
				-            }
			
 
				-            undef = LLVMGetUndef(V128_i8x16_TYPE);
			
 
				-            if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 16))) {
			
 
				-                HANDLE_FAILURE("LLVMVectorType");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            value =
			
 
				+              LLVMBuildTrunc(comp_ctx->builder, input, INT8_TYPE, "trunc");
			
 
				             break;
			
 
				         }
			
 
				         case SIMD_i16x8_splat:
			
 
				         {
			
 
				             LLVMValueRef input;
			
 
				             POP_I32(input);
			
 
				-
			
 
				             /* trunc i32 %input to i16 */
			
 
				-            if (!(value = LLVMBuildTrunc(comp_ctx->builder, input, INT16_TYPE,
			
 
				-                                         "trunc"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				-                goto fail;
			
 
				-            }
			
 
				-            undef = LLVMGetUndef(V128_i16x8_TYPE);
			
 
				-            if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 8))) {
			
 
				-                HANDLE_FAILURE("LLVMVectorType");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            value =
			
 
				+              LLVMBuildTrunc(comp_ctx->builder, input, INT16_TYPE, "trunc");
			
 
				             break;
			
 
				         }
			
 
				         case SIMD_i32x4_splat:
			
 
				         {
			
 
				             POP_I32(value);
			
 
				-            undef = LLVMGetUndef(V128_i32x4_TYPE);
			
 
				-
			
 
				-            if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 4))) {
			
 
				-                HANDLE_FAILURE("LLVMVectorType");
			
 
				-                goto fail;
			
 
				-            }
			
 
				             break;
			
 
				         }
			
 
				         case SIMD_i64x2_splat:
			
 
				         {
			
 
				             POP(value, VALUE_TYPE_I64);
			
 
				-            undef = LLVMGetUndef(V128_i64x2_TYPE);
			
 
				-
			
 
				-            if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 2))) {
			
 
				-                HANDLE_FAILURE("LLVMVectorType");
			
 
				-                goto fail;
			
 
				-            }
			
 
				             break;
			
 
				         }
			
 
				         case SIMD_f32x4_splat:
			
 
				         {
			
 
				             POP(value, VALUE_TYPE_F32);
			
 
				-            undef = LLVMGetUndef(V128_f32x4_TYPE);
			
 
				-
			
 
				-            if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 4))) {
			
 
				-                HANDLE_FAILURE("LLVMVectorType");
			
 
				-                goto fail;
			
 
				-            }
			
 
				             break;
			
 
				         }
			
 
				         case SIMD_f64x2_splat:
			
 
				         {
			
 
				             POP(value, VALUE_TYPE_F64);
			
 
				-            undef = LLVMGetUndef(V128_f64x2_TYPE);
			
 
				-
			
 
				-            if (!(all_zero_ty = LLVMVectorType(I32_TYPE, 2))) {
			
 
				-                HANDLE_FAILURE("LLVMVectorType");
			
 
				-                goto fail;
			
 
				-            }
			
 
				             break;
			
 
				         }
			
 
				         default:
			
 
				         {
			
 
				-            bh_assert(0);
			
 
				-            goto fail;
			
 
				+            break;
			
 
				         }
			
 
				     }
			
 
				-    if (!undef) {
			
 
				-        HANDLE_FAILURE("LVMGetUndef");
			
 
				+
			
 
				+    if (!value) {
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				     /* insertelement <n x ty> undef, ty %value, i32 0 */
			
 
				-    if (!(base = LLVMBuildInsertElement(comp_ctx->builder, undef, value,
			
 
				-                                        I32_ZERO, "base"))) {
			
 
				+    if (!(base =
			
 
				+            LLVMBuildInsertElement(comp_ctx->builder, undefs[opcode_index],
			
 
				+                                   value, I32_ZERO, "base"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    /* <n x i32> zeroinitializer */
			
 
				-    if (!(mask = LLVMConstNull(all_zero_ty))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				     /* shufflevector <ty1> %base, <ty2> undef, <n x i32> zeroinitializer */
			
 
				-    if (!(new_vector = LLVMBuildShuffleVector(comp_ctx->builder, base, undef,
			
 
				-                                              mask, "new_vector"))) {
			
 
				+    if (!(new_vector = LLVMBuildShuffleVector(
			
 
				+            comp_ctx->builder, base, undefs[opcode_index], masks[opcode_index],
			
 
				+            "new_vector"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				         goto fail;
			
 
				     }
			
 
				 
			
 
				-    /* bitcast <ty> <value> to <2 x i64> */
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, new_vector,
			
 
				-                                    V128_i64x2_TYPE, "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuidlCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    /* push result into the stack */
			
 
				-    PUSH_V128(result);
			
 
				-
			
 
				-    return true;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, new_vector,
			
 
				+                                      "result");
			
 
				 fail:
			
 
				     return false;
			
 
				 }
			
--- a/core/iwasm/compilation/simd/simd_conversions.c
+++ b/core/iwasm/compilation/simd/simd_conversions.c
@@ -10,784 +10,767 @@
 
				 #include "../../aot/aot_runtime.h"
			
 
				 
			
 
				 static bool
			
 
				-is_target_x86(AOTCompContext *comp_ctx)
			
 
				-{
			
 
				-    return !strncmp(comp_ctx->target_arch, "x86_64", 6) ||
			
 
				-           !strncmp(comp_ctx->target_arch, "i386", 4);
			
 
				-}
			
 
				-
			
 
				-static bool
			
 
				-simd_integer_narrow(AOTCompContext *comp_ctx,
			
 
				-                    AOTFuncContext *func_ctx,
			
 
				-                    bool is_signed,
			
 
				-                    LLVMTypeRef in_vector_type,
			
 
				-                    LLVMTypeRef out_vector_type,
			
 
				-                    const char *instrinsic)
			
 
				+simd_integer_narrow_x86(AOTCompContext *comp_ctx,
			
 
				+                        AOTFuncContext *func_ctx,
			
 
				+                        LLVMTypeRef in_vector_type,
			
 
				+                        LLVMTypeRef out_vector_type,
			
 
				+                        const char *instrinsic)
			
 
				 {
			
 
				     LLVMValueRef vector1, vector2, result;
			
 
				     LLVMTypeRef param_types[2] = { in_vector_type, in_vector_type };
			
 
				 
			
 
				     if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                              in_vector_type, "vec2"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                              in_vector_type, "vec1"))) {
			
 
				-        goto fail;
			
 
				+                                              in_vector_type, "vec2"))
			
 
				+        || !(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                                 in_vector_type, "vec1"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result =
			
 
				-            aot_call_llvm_intrinsic(comp_ctx, func_ctx, instrinsic, out_vector_type,
			
 
				-                                    param_types, 2, vector1, vector2))) {
			
 
				+    if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, instrinsic,
			
 
				+                                           out_vector_type, param_types, 2,
			
 
				+                                           vector1, vector2))) {
			
 
				         HANDLE_FAILURE("LLVMBuildCall");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				+enum integer_sat_type {
			
 
				+    e_sat_i16x8 = 0,
			
 
				+    e_sat_i32x4,
			
 
				+    e_sat_i64x2,
			
 
				+    e_sat_i32x8,
			
 
				+};
			
 
				+
			
 
				 static LLVMValueRef
			
 
				-build_intx4_vector(const AOTCompContext *comp_ctx,
			
 
				-                    const LLVMTypeRef element_type,
			
 
				-                    const int *element_value)
			
 
				+simd_saturate(AOTCompContext *comp_ctx,
			
 
				+              AOTFuncContext *func_ctx,
			
 
				+              enum integer_sat_type itype,
			
 
				+              LLVMValueRef vector,
			
 
				+              LLVMValueRef min,
			
 
				+              LLVMValueRef max,
			
 
				+              bool is_signed)
			
 
				 {
			
 
				-    LLVMValueRef vector, elements[4];
			
 
				-    unsigned i;
			
 
				-
			
 
				-    for (i = 0; i < 4; i++) {
			
 
				-        if (!(elements[i] =
			
 
				-                LLVMConstInt(element_type, element_value[i], true))) {
			
 
				-            HANDLE_FAILURE("LLVMConstInst");
			
 
				-            goto fail;
			
 
				+    LLVMValueRef result;
			
 
				+    LLVMTypeRef vector_type;
			
 
				+
			
 
				+    LLVMTypeRef param_types[][2] = {
			
 
				+        { V128_i16x8_TYPE, V128_i16x8_TYPE },
			
 
				+        { V128_i32x4_TYPE, V128_i32x4_TYPE },
			
 
				+        { V128_i64x2_TYPE, V128_i64x2_TYPE },
			
 
				+        { 0 },
			
 
				+    };
			
 
				+
			
 
				+    const char *smin_intrinsic[] = {
			
 
				+        "llvm.smin.v8i16",
			
 
				+        "llvm.smin.v4i32",
			
 
				+        "llvm.smin.v2i64",
			
 
				+        "llvm.smin.v8i32",
			
 
				+    };
			
 
				+
			
 
				+    const char *umin_intrinsic[] = {
			
 
				+        "llvm.umin.v8i16",
			
 
				+        "llvm.umin.v4i32",
			
 
				+        "llvm.umin.v2i64",
			
 
				+        "llvm.umin.v8i32",
			
 
				+    };
			
 
				+
			
 
				+    const char *smax_intrinsic[] = {
			
 
				+        "llvm.smax.v8i16",
			
 
				+        "llvm.smax.v4i32",
			
 
				+        "llvm.smax.v2i64",
			
 
				+        "llvm.smax.v8i32",
			
 
				+    };
			
 
				+
			
 
				+    const char *umax_intrinsic[] = {
			
 
				+        "llvm.umax.v8i16",
			
 
				+        "llvm.umax.v4i32",
			
 
				+        "llvm.umax.v2i64",
			
 
				+        "llvm.umax.v8i32",
			
 
				+    };
			
 
				+
			
 
				+    if (e_sat_i32x8 == itype) {
			
 
				+        if (!(vector_type = LLVMVectorType(I32_TYPE, 8))) {
			
 
				+            HANDLE_FAILURE("LLVMVectorType");
			
 
				+            return NULL;
			
 
				         }
			
 
				+
			
 
				+        param_types[itype][0] = vector_type;
			
 
				+        param_types[itype][1] = vector_type;
			
 
				     }
			
 
				 
			
 
				-    if (!(vector = LLVMConstVector(elements, 4))) {
			
 
				-        HANDLE_FAILURE("LLVMConstVector");
			
 
				-        goto fail;
			
 
				+    if (!(result = aot_call_llvm_intrinsic(
			
 
				+            comp_ctx, func_ctx,
			
 
				+            is_signed ? smin_intrinsic[itype] : umin_intrinsic[itype],
			
 
				+            param_types[itype][0], param_types[itype], 2, vector, max))
			
 
				+        || !(result = aot_call_llvm_intrinsic(
			
 
				+               comp_ctx, func_ctx,
			
 
				+               is_signed ? smax_intrinsic[itype] : umax_intrinsic[itype],
			
 
				+               param_types[itype][0], param_types[itype], 2, result, min))) {
			
 
				+        return NULL;
			
 
				     }
			
 
				-    return vector;
			
 
				-fail:
			
 
				-    return NULL;
			
 
				+
			
 
				+    return result;
			
 
				 }
			
 
				 
			
 
				-static LLVMValueRef
			
 
				-build_intx8_vector(const AOTCompContext *comp_ctx,
			
 
				-                    const LLVMTypeRef element_type,
			
 
				-                    const int *element_value)
			
 
				+static bool
			
 
				+simd_integer_narrow_common(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           enum integer_sat_type itype,
			
 
				+                           bool is_signed)
			
 
				 {
			
 
				-    LLVMValueRef vector, elements[8];
			
 
				-    unsigned i;
			
 
				-
			
 
				-    for (i = 0; i < 8; i++) {
			
 
				-        if (!(elements[i] =
			
 
				-                LLVMConstInt(element_type, element_value[i], true))) {
			
 
				-            HANDLE_FAILURE("LLVMConstInst");
			
 
				-            goto fail;
			
 
				-        }
			
 
				+    LLVMValueRef vec1, vec2, min, max, mask, result;
			
 
				+    LLVMTypeRef in_vector_type[] = { V128_i16x8_TYPE, V128_i32x4_TYPE,
			
 
				+                                     V128_i64x2_TYPE };
			
 
				+    LLVMTypeRef min_max_type[] = { INT16_TYPE, I32_TYPE, I64_TYPE };
			
 
				+    LLVMTypeRef trunc_type[3] = { 0 };
			
 
				+    uint8 length[] = { 8, 4, 2 };
			
 
				+
			
 
				+    int64 smin[] = { 0xff80, 0xffFF8000, 0xffFFffFF80000000 };
			
 
				+    int64 umin[] = { 0x0, 0x0, 0x0 };
			
 
				+    int64 smax[] = { 0x007f, 0x00007fff, 0x000000007fFFffFF };
			
 
				+    int64 umax[] = { 0x00ff, 0x0000ffff, 0x00000000ffFFffFF };
			
 
				+
			
 
				+    LLVMValueRef mask_element[] = {
			
 
				+        LLVM_CONST(i32_zero),     LLVM_CONST(i32_one),
			
 
				+        LLVM_CONST(i32_two),      LLVM_CONST(i32_three),
			
 
				+        LLVM_CONST(i32_four),     LLVM_CONST(i32_five),
			
 
				+        LLVM_CONST(i32_six),      LLVM_CONST(i32_seven),
			
 
				+        LLVM_CONST(i32_eight),    LLVM_CONST(i32_nine),
			
 
				+        LLVM_CONST(i32_ten),      LLVM_CONST(i32_eleven),
			
 
				+        LLVM_CONST(i32_twelve),   LLVM_CONST(i32_thirteen),
			
 
				+        LLVM_CONST(i32_fourteen), LLVM_CONST(i32_fifteen),
			
 
				+    };
			
 
				+
			
 
				+    if (!(trunc_type[0] == LLVMVectorType(INT8_TYPE, 8))
			
 
				+        || !(trunc_type[1] == LLVMVectorType(INT16_TYPE, 4))
			
 
				+        || !(trunc_type[2] == LLVMVectorType(I32_TYPE, 2))) {
			
 
				+        HANDLE_FAILURE("LLVMVectorType");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                           in_vector_type[itype], "vec2"))
			
 
				+        || !(vec1 = simd_pop_v128_and_bitcast(
			
 
				+               comp_ctx, func_ctx, in_vector_type[itype], "vec1"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(max = simd_build_splat_const_integer_vector(
			
 
				+            comp_ctx, min_max_type[itype],
			
 
				+            is_signed ? smax[itype] : umax[itype], length[itype]))
			
 
				+        || !(min = simd_build_splat_const_integer_vector(
			
 
				+               comp_ctx, min_max_type[itype],
			
 
				+               is_signed ? smin[itype] : umin[itype], length[itype]))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    /* sat */
			
 
				+    if (!(vec1 = simd_saturate(comp_ctx, func_ctx, e_sat_i16x8, vec1, min, max,
			
 
				+                               is_signed))
			
 
				+        || !(vec2 = simd_saturate(comp_ctx, func_ctx, e_sat_i16x8, vec2, min,
			
 
				+                                  max, is_signed))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    /* trunc */
			
 
				+    if (!(vec1 = LLVMBuildTrunc(comp_ctx->builder, vec1, trunc_type[itype],
			
 
				+                                "vec1_trunc"))
			
 
				+        || !(vec2 = LLVMBuildTrunc(comp_ctx->builder, vec2, trunc_type[itype],
			
 
				+                                   "vec2_trunc"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(vector = LLVMConstVector(elements, 8))) {
			
 
				-        HANDLE_FAILURE("LLVMConstVector");
			
 
				-        goto fail;
			
 
				+    /* combine */
			
 
				+    if (!(mask = LLVMConstVector(mask_element, (length[itype] << 1)))) {
			
 
				+        HANDLE_FAILURE("LLVMConstInt");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, vec1, vec2, mask,
			
 
				+                                          "vec_shuffle"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    return vector;
			
 
				-fail:
			
 
				-    return NULL;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				-static LLVMValueRef
			
 
				-build_intx16_vector(const AOTCompContext *comp_ctx,
			
 
				-                    const LLVMTypeRef element_type,
			
 
				-                    const int *element_value)
			
 
				+bool
			
 
				+aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool is_signed)
			
 
				 {
			
 
				-    LLVMValueRef vector, elements[16];
			
 
				-    unsigned i;
			
 
				-
			
 
				-    for (i = 0; i < 16; i++) {
			
 
				-        if (!(elements[i] =
			
 
				-                LLVMConstInt(element_type, element_value[i], true))) {
			
 
				-            HANDLE_FAILURE("LLVMConstInst");
			
 
				-            goto fail;
			
 
				-        }
			
 
				+    if (is_target_x86(comp_ctx)) {
			
 
				+        return simd_integer_narrow_x86(
			
 
				+          comp_ctx, func_ctx, V128_i16x8_TYPE, V128_i8x16_TYPE,
			
 
				+          is_signed ? "llvm.x86.sse2.packsswb.128"
			
 
				+                    : "llvm.x86.sse2.packuswb.128");
			
 
				     }
			
 
				-
			
 
				-    if (!(vector = LLVMConstVector(elements, 16))) {
			
 
				-        HANDLE_FAILURE("LLVMConstVector");
			
 
				-        goto fail;
			
 
				+    else {
			
 
				+        return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i16x8,
			
 
				+                                          is_signed);
			
 
				     }
			
 
				-
			
 
				-    return vector;
			
 
				-fail:
			
 
				-    return NULL;
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i8x16_narrow_i16x8_x86(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
			
 
				                                     AOTFuncContext *func_ctx,
			
 
				                                     bool is_signed)
			
 
				 {
			
 
				-    return simd_integer_narrow(
			
 
				-      comp_ctx, func_ctx, is_signed, V128_i16x8_TYPE, V128_i8x16_TYPE,
			
 
				-      is_signed ? "llvm.x86.sse2.packsswb.128" : "llvm.x86.sse2.packuswb.128");
			
 
				+    if (is_target_x86(comp_ctx)) {
			
 
				+        return simd_integer_narrow_x86(comp_ctx, func_ctx, V128_i32x4_TYPE,
			
 
				+                                       V128_i16x8_TYPE,
			
 
				+                                       is_signed ? "llvm.x86.sse2.packssdw.128"
			
 
				+                                                 : "llvm.x86.sse41.packusdw");
			
 
				+    }
			
 
				+    else {
			
 
				+        return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i32x4,
			
 
				+                                          is_signed);
			
 
				+    }
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i16x8_narrow_i32x4_x86(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i32x4_narrow_i64x2(AOTCompContext *comp_ctx,
			
 
				                                     AOTFuncContext *func_ctx,
			
 
				                                     bool is_signed)
			
 
				 {
			
 
				-    return simd_integer_narrow(
			
 
				-      comp_ctx, func_ctx, is_signed, V128_i32x4_TYPE, V128_i16x8_TYPE,
			
 
				-      is_signed ? "llvm.x86.sse2.packssdw.128" : "llvm.x86.sse41.packusdw");
			
 
				+    /* TODO: x86 intrinsics */
			
 
				+    return simd_integer_narrow_common(comp_ctx, func_ctx, e_sat_i64x2,
			
 
				+                                      is_signed);
			
 
				 }
			
 
				 
			
 
				-static bool
			
 
				-aot_compile_simd_i8x16_narrow_i16x8_common(AOTCompContext *comp_ctx,
			
 
				-                                           AOTFuncContext *func_ctx,
			
 
				-                                           bool is_signed)
			
 
				-{
			
 
				-    LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle,
			
 
				-      vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced,
			
 
				-      shuffle_vector;
			
 
				-    LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min;
			
 
				-
			
 
				-    int min_s_array[8] = { 0xff80, 0xff80, 0xff80, 0xff80,
			
 
				-                           0xff80, 0xff80, 0xff80, 0xff80 };
			
 
				-    int max_s_array[8] = { 0x007f, 0x007f, 0x007f, 0x007f,
			
 
				-                           0x007f, 0x007f, 0x007f, 0x007f };
			
 
				+enum integer_extend_type {
			
 
				+    e_ext_i8x16,
			
 
				+    e_ext_i16x8,
			
 
				+    e_ext_i32x4,
			
 
				+};
			
 
				 
			
 
				-    int min_u_array[8] = { 0x0000, 0x0000, 0x0000, 0x0000,
			
 
				-                           0x0000, 0x0000, 0x0000, 0x0000 };
			
 
				-    int max_u_array[8] = { 0x00ff, 0x00ff, 0x00ff, 0x00ff,
			
 
				-                           0x00ff, 0x00ff, 0x00ff, 0x00ff };
			
 
				-
			
 
				-    int shuffle_array[16] = { 0, 1, 2,  3,  4,  5,  6,  7,
			
 
				-                              8, 9, 10, 11, 12, 13, 14, 15 };
			
 
				-
			
 
				-    if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                              V128_i16x8_TYPE, "vec2"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                              V128_i16x8_TYPE, "vec1"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(vector_min = build_intx8_vector(
			
 
				-            comp_ctx, INT16_TYPE, is_signed ? min_s_array : min_u_array))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-    if (!(vector_max = build_intx8_vector(
			
 
				-            comp_ctx, INT16_TYPE, is_signed ? max_s_array : max_u_array))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-    if (!(shuffle = build_intx16_vector(comp_ctx, I32_TYPE, shuffle_array))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(v1_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector1,
			
 
				-                                    vector_max, "v1_great_than_max"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(v2_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector2,
			
 
				-                                    vector_max, "v2_great_than_max"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(v1_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector1,
			
 
				-                                    vector_min, "v1_less_than_min"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(v2_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector2,
			
 
				-                                    vector_min, "v2_less_than_min"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				-        goto fail;
			
 
				+static LLVMValueRef
			
 
				+simd_integer_extension(AOTCompContext *comp_ctx,
			
 
				+                       AOTFuncContext *func_ctx,
			
 
				+                       enum integer_extend_type itype,
			
 
				+                       LLVMValueRef vector,
			
 
				+                       bool lower_half,
			
 
				+                       bool is_signed)
			
 
				+{
			
 
				+    LLVMValueRef mask, sub_vector, result;
			
 
				+    LLVMValueRef bits[] = {
			
 
				+        LLVM_CONST(i32_zero),     LLVM_CONST(i32_one),
			
 
				+        LLVM_CONST(i32_two),      LLVM_CONST(i32_three),
			
 
				+        LLVM_CONST(i32_four),     LLVM_CONST(i32_five),
			
 
				+        LLVM_CONST(i32_six),      LLVM_CONST(i32_seven),
			
 
				+        LLVM_CONST(i32_eight),    LLVM_CONST(i32_nine),
			
 
				+        LLVM_CONST(i32_ten),      LLVM_CONST(i32_eleven),
			
 
				+        LLVM_CONST(i32_twelve),   LLVM_CONST(i32_thirteen),
			
 
				+        LLVM_CONST(i32_fourteen), LLVM_CONST(i32_fifteen),
			
 
				+    };
			
 
				+    LLVMTypeRef out_vector_type[] = { V128_i16x8_TYPE, V128_i32x4_TYPE,
			
 
				+                                      V128_i64x2_TYPE };
			
 
				+    LLVMValueRef undef[] = { LLVM_CONST(i8x16_undef), LLVM_CONST(i16x8_undef),
			
 
				+                             LLVM_CONST(i32x4_undef) };
			
 
				+    uint32 sub_vector_length[] = { 8, 4, 2 };
			
 
				+
			
 
				+    if (!(mask = lower_half ? LLVMConstVector(bits, sub_vector_length[itype])
			
 
				+                            : LLVMConstVector(bits + sub_vector_length[itype],
			
 
				+                                              sub_vector_length[itype]))) {
			
 
				+        HANDLE_FAILURE("LLVMConstVector");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(vector1_clamped =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, v1_gt_max, vector_max, vector1,
			
 
				-                            "vector1_clamped_max"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				+    /* retrive the low or high half */
			
 
				+    if (!(sub_vector = LLVMBuildShuffleVector(comp_ctx->builder, vector,
			
 
				+                                              undef[itype], mask, "half"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(vector1_clamped =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, v1_lt_min, vector_min,
			
 
				-                            vector1_clamped, "vector1_clamped_min"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				+    if (is_signed) {
			
 
				+        if (!(result = LLVMBuildSExt(comp_ctx->builder, sub_vector,
			
 
				+                                     out_vector_type[itype], "sext"))) {
			
 
				+            HANDLE_FAILURE("LLVMBuildSExt");
			
 
				+            return false;
			
 
				+        }
			
 
				     }
			
 
				-
			
 
				-    if (!(vector2_clamped =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, v2_gt_max, vector_max, vector2,
			
 
				-                            "vector2_clamped_max"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				+    else {
			
 
				+        if (!(result = LLVMBuildZExt(comp_ctx->builder, sub_vector,
			
 
				+                                     out_vector_type[itype], "zext"))) {
			
 
				+            HANDLE_FAILURE("LLVMBuildZExt");
			
 
				+            return false;
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				-    if (!(vector2_clamped =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, v2_lt_min, vector_min,
			
 
				-                            vector2_clamped, "vector2_clamped_min"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+    return result;
			
 
				+}
			
 
				 
			
 
				-    if (!(vector1_trunced =
			
 
				-            LLVMBuildTrunc(comp_ctx->builder, vector1_clamped,
			
 
				-                           LLVMVectorType(INT8_TYPE, 8), "vector1_trunced"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+static bool
			
 
				+simd_integer_extension_wrapper(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx,
			
 
				+                               enum integer_extend_type itype,
			
 
				+                               bool lower_half,
			
 
				+                               bool is_signed)
			
 
				+{
			
 
				+    LLVMValueRef vector, result;
			
 
				 
			
 
				-    if (!(vector2_trunced =
			
 
				-            LLVMBuildTrunc(comp_ctx->builder, vector2_clamped,
			
 
				-                           LLVMVectorType(INT8_TYPE, 8), "vector2_trunced"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+    LLVMTypeRef in_vector_type[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
			
 
				+                                     V128_i32x4_TYPE };
			
 
				 
			
 
				-    if (!(shuffle_vector = LLVMBuildShuffleVector(
			
 
				-            comp_ctx->builder, vector1_trunced, vector2_trunced, shuffle,
			
 
				-            "shuffle_vector"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				-        goto fail;
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             in_vector_type[itype], "vec"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, shuffle_vector,
			
 
				-                                    V128_i64x2_TYPE, "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+    if (!(result = simd_integer_extension(comp_ctx, func_ctx, itype, vector,
			
 
				+                                          lower_half, is_signed))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx,
			
 
				+aot_compile_simd_i16x8_extend_i8x16(AOTCompContext *comp_ctx,
			
 
				                                     AOTFuncContext *func_ctx,
			
 
				+                                    bool lower_half,
			
 
				                                     bool is_signed)
			
 
				 {
			
 
				-    if (is_target_x86(comp_ctx)) {
			
 
				-        return aot_compile_simd_i8x16_narrow_i16x8_x86(comp_ctx, func_ctx,
			
 
				-                                                       is_signed);
			
 
				-    }
			
 
				-    else {
			
 
				-        return aot_compile_simd_i8x16_narrow_i16x8_common(comp_ctx, func_ctx,
			
 
				-                                                          is_signed);
			
 
				-    }
			
 
				+    return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i8x16,
			
 
				+                                          lower_half, is_signed);
			
 
				 }
			
 
				 
			
 
				-static bool
			
 
				-aot_compile_simd_i16x8_narrow_i32x4_common(AOTCompContext *comp_ctx,
			
 
				-                                           AOTFuncContext *func_ctx,
			
 
				-                                           bool is_signed)
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_extend_i16x8(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool lower_half,
			
 
				+                                    bool is_signed)
			
 
				 {
			
 
				-    LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle,
			
 
				-      vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced,
			
 
				-      shuffle_vector;
			
 
				-    LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min;
			
 
				-
			
 
				-    int min_s_array[4] = { 0xffff8000, 0xffff8000, 0xffff8000, 0xffff8000 };
			
 
				-    int32 max_s_array[4] = { 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff };
			
 
				-
			
 
				-    int min_u_array[4] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
			
 
				-    int max_u_array[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
			
 
				-
			
 
				-    int shuffle_array[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
			
 
				-
			
 
				-    if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                              V128_i32x4_TYPE, "vec2"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                              V128_i32x4_TYPE, "vec1"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				+    return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i16x8,
			
 
				+                                          lower_half, is_signed);
			
 
				+}
			
 
				 
			
 
				-    if (!(vector_min = build_intx4_vector(
			
 
				-            comp_ctx, I32_TYPE, is_signed ? min_s_array : min_u_array))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-    if (!(vector_max = build_intx4_vector(
			
 
				-            comp_ctx, I32_TYPE, is_signed ? max_s_array : max_u_array))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-    if (!(shuffle = build_intx8_vector(comp_ctx, I32_TYPE, shuffle_array))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_extend_i32x4(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool lower_half,
			
 
				+                                    bool is_signed)
			
 
				+{
			
 
				+    return simd_integer_extension_wrapper(comp_ctx, func_ctx, e_ext_i32x4,
			
 
				+                                          lower_half, is_signed);
			
 
				+}
			
 
				 
			
 
				-    if (!(v1_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector1,
			
 
				-                                    vector_max, "v1_great_than_max"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+static LLVMValueRef
			
 
				+simd_trunc_sat(AOTCompContext *comp_ctx,
			
 
				+               AOTFuncContext *func_ctx,
			
 
				+               const char *intrinsics,
			
 
				+               LLVMTypeRef in_vector_type,
			
 
				+               LLVMTypeRef out_vector_type)
			
 
				+{
			
 
				+    LLVMValueRef vector, result;
			
 
				+    LLVMTypeRef param_types[] = { in_vector_type };
			
 
				 
			
 
				-    if (!(v2_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector2,
			
 
				-                                    vector_max, "v2_great_than_max"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				-        goto fail;
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             in_vector_type, "vector"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(v1_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector1,
			
 
				-                                    vector_min, "v1_less_than_min"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				-        goto fail;
			
 
				+    if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsics,
			
 
				+                                           out_vector_type, param_types, 1,
			
 
				+                                           vector))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(v2_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector2,
			
 
				-                                    vector_min, "v2_less_than_min"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuldICmp");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+    return result;
			
 
				+}
			
 
				 
			
 
				-    if (!(vector1_clamped =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, v1_gt_max, vector_max, vector1,
			
 
				-                            "vector1_clamped_max"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_trunc_sat_f32x4(AOTCompContext *comp_ctx,
			
 
				+                                       AOTFuncContext *func_ctx,
			
 
				+                                       bool is_signed)
			
 
				+{
			
 
				+    LLVMValueRef result;
			
 
				+    if (!(result = simd_trunc_sat(comp_ctx, func_ctx,
			
 
				+                                  is_signed ? "llvm.fptosi.sat.v4i32.v4f32"
			
 
				+                                            : "llvm.fptoui.sat.v4i32.v4f32",
			
 
				+                                  V128_f32x4_TYPE, V128_i32x4_TYPE))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(vector1_clamped =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, v1_lt_min, vector_min,
			
 
				-                            vector1_clamped, "vector1_clamped_min"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				+}
			
 
				 
			
 
				-    if (!(vector2_clamped =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, v2_gt_max, vector_max, vector2,
			
 
				-                            "vector2_clamped_max"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_trunc_sat_f64x2(AOTCompContext *comp_ctx,
			
 
				+                                       AOTFuncContext *func_ctx,
			
 
				+                                       bool is_signed)
			
 
				+{
			
 
				+    LLVMValueRef result, zero, mask;
			
 
				+    LLVMTypeRef out_vector_type;
			
 
				+    LLVMValueRef lanes[] = {
			
 
				+        LLVM_CONST(i32_zero),
			
 
				+        LLVM_CONST(i32_one),
			
 
				+        LLVM_CONST(i32_two),
			
 
				+        LLVM_CONST(i32_three),
			
 
				+    };
			
 
				+
			
 
				+    if (!(out_vector_type = LLVMVectorType(I32_TYPE, 2))) {
			
 
				+        HANDLE_FAILURE("LLVMVectorType");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = simd_trunc_sat(comp_ctx, func_ctx,
			
 
				+                                  is_signed ? "llvm.fptosi.sat.v2i32.v2f64"
			
 
				+                                            : "llvm.fptoui.sat.v2i32.v2f64",
			
 
				+                                  V128_f64x2_TYPE, out_vector_type))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+    
			
 
				+    if (!(zero = LLVMConstNull(out_vector_type))) {
			
 
				+        HANDLE_FAILURE("LLVMConstNull");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(vector2_clamped =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, v2_lt_min, vector_min,
			
 
				-                            vector2_clamped, "vector2_clamped_min"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				+    /* v2i32 -> v4i32 */
			
 
				+    if (!(mask = LLVMConstVector(lanes, 4))) {
			
 
				+        HANDLE_FAILURE("LLVMConstVector");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(vector1_trunced = LLVMBuildTrunc(comp_ctx->builder, vector1_clamped,
			
 
				-                                           LLVMVectorType(INT16_TYPE, 4),
			
 
				-                                           "vector1_trunced"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				-        goto fail;
			
 
				+    if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result, zero,
			
 
				+                                          mask, "extend"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(vector2_trunced = LLVMBuildTrunc(comp_ctx->builder, vector2_clamped,
			
 
				-                                           LLVMVectorType(INT16_TYPE, 4),
			
 
				-                                           "vector2_trunced"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				+}
			
 
				 
			
 
				-    if (!(shuffle_vector = LLVMBuildShuffleVector(
			
 
				-            comp_ctx->builder, vector1_trunced, vector2_trunced, shuffle,
			
 
				-            "shuffle_vector"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+static LLVMValueRef
			
 
				+simd_integer_convert(AOTCompContext *comp_ctx,
			
 
				+                     AOTFuncContext *func_ctx,
			
 
				+                     bool is_signed,
			
 
				+                     LLVMValueRef vector,
			
 
				+                     LLVMTypeRef out_vector_type)
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, shuffle_vector,
			
 
				-                                    V128_i64x2_TYPE, "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+{
			
 
				+    LLVMValueRef result;
			
 
				+    result = is_signed ? LLVMBuildSIToFP(comp_ctx->builder, vector,
			
 
				+                                         out_vector_type, "converted")
			
 
				+                       : LLVMBuildUIToFP(comp_ctx->builder, vector,
			
 
				+                                         out_vector_type, "converted");
			
 
				+    if (!result) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSIToFP/LLVMBuildUIToFP");
			
 
				     }
			
 
				 
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return result;
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
			
 
				-                                    AOTFuncContext *func_ctx,
			
 
				-                                    bool is_signed)
			
 
				+aot_compile_simd_f32x4_convert_i32x4(AOTCompContext *comp_ctx,
			
 
				+                                     AOTFuncContext *func_ctx,
			
 
				+                                     bool is_signed)
			
 
				 {
			
 
				-    if (is_target_x86(comp_ctx)) {
			
 
				-        return aot_compile_simd_i16x8_narrow_i32x4_x86(comp_ctx, func_ctx,
			
 
				-                                                       is_signed);
			
 
				+    LLVMValueRef vector, result;
			
 
				+
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             V128_i32x4_TYPE, "vec"))) {
			
 
				+        return false;
			
 
				     }
			
 
				-    else {
			
 
				-        return aot_compile_simd_i16x8_narrow_i32x4_common(comp_ctx, func_ctx,
			
 
				-                                                          is_signed);
			
 
				+
			
 
				+    if (!(result = simd_integer_convert(comp_ctx, func_ctx, is_signed, vector,
			
 
				+                                        V128_f32x4_TYPE))) {
			
 
				+        return false;
			
 
				     }
			
 
				+
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i16x8_widen_i8x16(AOTCompContext *comp_ctx,
			
 
				-                                   AOTFuncContext *func_ctx,
			
 
				-                                   bool is_low_half,
			
 
				-                                   bool is_signed)
			
 
				+aot_compile_simd_f64x2_convert_i32x4(AOTCompContext *comp_ctx,
			
 
				+                                     AOTFuncContext *func_ctx,
			
 
				+                                     bool is_signed)
			
 
				 {
			
 
				-    LLVMValueRef vector, undef, mask_high[8], mask_low[8], mask, shuffled,
			
 
				-      result;
			
 
				-    uint8 mask_high_value[8] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf },
			
 
				-          mask_low_value[8] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }, i;
			
 
				+    LLVMValueRef vector, mask, result;
			
 
				+    LLVMValueRef lanes[] = {
			
 
				+        LLVM_CONST(i32_zero),
			
 
				+        LLVM_CONST(i32_one),
			
 
				+    };
			
 
				+    LLVMTypeRef out_vector_type;
			
 
				 
			
 
				     if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                             V128_i8x16_TYPE, "vec"))) {
			
 
				-        goto fail;
			
 
				+                                             V128_i32x4_TYPE, "vec"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(undef = LLVMGetUndef(V128_i8x16_TYPE))) {
			
 
				-        HANDLE_FAILURE("LLVMGetUndef");
			
 
				-        goto fail;
			
 
				+    if (!(out_vector_type = LLVMVectorType(F64_TYPE, 4))) {
			
 
				+        HANDLE_FAILURE("LLVMVectorType");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    /* create a mask */
			
 
				-    for (i = 0; i < 8; i++) {
			
 
				-        mask_high[i] = LLVMConstInt(I32_TYPE, mask_high_value[i], true);
			
 
				-        mask_low[i] = LLVMConstInt(I32_TYPE, mask_low_value[i], true);
			
 
				+    if (!(result = simd_integer_convert(comp_ctx, func_ctx, is_signed, vector,
			
 
				+                                        out_vector_type))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    mask = is_low_half ? LLVMConstVector(mask_low, 8)
			
 
				-                       : LLVMConstVector(mask_high, 8);
			
 
				-    if (!mask) {
			
 
				+    /* v4f64 -> v2f64 */
			
 
				+    if (!(mask = LLVMConstVector(lanes, 2))) {
			
 
				         HANDLE_FAILURE("LLVMConstVector");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    /* retrive the low or high half */
			
 
				-    if (!(shuffled = LLVMBuildShuffleVector(comp_ctx->builder, vector, undef,
			
 
				-                                            mask, "shuffled"))) {
			
 
				+    if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result, result,
			
 
				+                                          mask, "trunc"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (is_signed) {
			
 
				-        if (!(result = LLVMBuildSExt(comp_ctx->builder, shuffled,
			
 
				-                                     V128_i16x8_TYPE, "ext"))) {
			
 
				-            HANDLE_FAILURE("LLVMBuildSExt");
			
 
				-            goto fail;
			
 
				-        }
			
 
				-    }
			
 
				-    else {
			
 
				-        if (!(result = LLVMBuildZExt(comp_ctx->builder, shuffled,
			
 
				-                                     V128_i16x8_TYPE, "ext"))) {
			
 
				-            HANDLE_FAILURE("LLVMBuildZExt");
			
 
				-            goto fail;
			
 
				-        }
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				-bool
			
 
				-aot_compile_simd_i32x4_widen_i16x8(AOTCompContext *comp_ctx,
			
 
				-                                   AOTFuncContext *func_ctx,
			
 
				-                                   bool is_low_half,
			
 
				-                                   bool is_signed)
			
 
				+static bool
			
 
				+simd_extadd_pairwise(AOTCompContext *comp_ctx,
			
 
				+                     AOTFuncContext *func_ctx,
			
 
				+                     LLVMTypeRef in_vector_type,
			
 
				+                     LLVMTypeRef out_vector_type,
			
 
				+                     bool is_signed)
			
 
				 {
			
 
				-    LLVMValueRef vector, undef, mask_high[4], mask_low[4], mask, shuffled,
			
 
				+    LLVMValueRef vector, even_mask, odd_mask, sub_vector_even, sub_vector_odd,
			
 
				       result;
			
 
				-    uint8 mask_high_value[4] = { 0x4, 0x5, 0x6, 0x7 },
			
 
				-          mask_low_value[4] = { 0x0, 0x1, 0x2, 0x3 }, i;
			
 
				 
			
 
				-    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                             V128_i16x8_TYPE, "vec"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				+    LLVMValueRef even_element[] = {
			
 
				+        LLVM_CONST(i32_zero),   LLVM_CONST(i32_two),      LLVM_CONST(i32_four),
			
 
				+        LLVM_CONST(i32_six),    LLVM_CONST(i32_eight),    LLVM_CONST(i32_ten),
			
 
				+        LLVM_CONST(i32_twelve), LLVM_CONST(i32_fourteen),
			
 
				+    };
			
 
				 
			
 
				-    if (!(undef = LLVMGetUndef(V128_i16x8_TYPE))) {
			
 
				-        HANDLE_FAILURE("LLVMGetUndef");
			
 
				-        goto fail;
			
 
				-    }
			
 
				+    LLVMValueRef odd_element[] = {
			
 
				+        LLVM_CONST(i32_one),      LLVM_CONST(i32_three),
			
 
				+        LLVM_CONST(i32_five),     LLVM_CONST(i32_seven),
			
 
				+        LLVM_CONST(i32_nine),     LLVM_CONST(i32_eleven),
			
 
				+        LLVM_CONST(i32_thirteen), LLVM_CONST(i32_fifteen),
			
 
				+    };
			
 
				 
			
 
				-    /* create a mask */
			
 
				-    for (i = 0; i < 4; i++) {
			
 
				-        mask_high[i] = LLVMConstInt(I32_TYPE, mask_high_value[i], true);
			
 
				-        mask_low[i] = LLVMConstInt(I32_TYPE, mask_low_value[i], true);
			
 
				+    /* assumption about i16x8 from i8x16 and i32x4 from i16x8 */
			
 
				+    uint8 mask_length = V128_i16x8_TYPE == out_vector_type ? 8 : 4;
			
 
				+
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             in_vector_type, "vector"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    mask = is_low_half ? LLVMConstVector(mask_low, 4)
			
 
				-                       : LLVMConstVector(mask_high, 4);
			
 
				-    if (!mask) {
			
 
				+    if (!(even_mask = LLVMConstVector(even_element, mask_length))
			
 
				+        || !(odd_mask = LLVMConstVector(odd_element, mask_length))) {
			
 
				         HANDLE_FAILURE("LLVMConstVector");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    /* retrive the low or high half */
			
 
				-    if (!(shuffled = LLVMBuildShuffleVector(comp_ctx->builder, vector, undef,
			
 
				-                                            mask, "shuffled"))) {
			
 
				+    /* shuffle a <16xi8> vector to two <8xi8> vectors */
			
 
				+    if (!(sub_vector_even = LLVMBuildShuffleVector(
			
 
				+            comp_ctx->builder, vector, vector, even_mask, "pick_even"))
			
 
				+        || !(sub_vector_odd = LLVMBuildShuffleVector(
			
 
				+               comp_ctx->builder, vector, vector, odd_mask, "pick_odd"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				+    /* sext/zext <8xi8> to <8xi16> */
			
 
				     if (is_signed) {
			
 
				-        if (!(result = LLVMBuildSExt(comp_ctx->builder, shuffled,
			
 
				-                                     V128_i32x4_TYPE, "ext"))) {
			
 
				+        if (!(sub_vector_even =
			
 
				+                LLVMBuildSExt(comp_ctx->builder, sub_vector_even,
			
 
				+                              out_vector_type, "even_sext"))
			
 
				+            || !(sub_vector_odd =
			
 
				+                   LLVMBuildSExt(comp_ctx->builder, sub_vector_odd,
			
 
				+                                 out_vector_type, "odd_sext"))) {
			
 
				             HANDLE_FAILURE("LLVMBuildSExt");
			
 
				-            goto fail;
			
 
				+            return false;
			
 
				         }
			
 
				     }
			
 
				     else {
			
 
				-        if (!(result = LLVMBuildZExt(comp_ctx->builder, shuffled,
			
 
				-                                     V128_i32x4_TYPE, "ext"))) {
			
 
				+        if (!(sub_vector_even =
			
 
				+                LLVMBuildZExt(comp_ctx->builder, sub_vector_even,
			
 
				+                              out_vector_type, "even_zext"))
			
 
				+            || !(sub_vector_odd =
			
 
				+                   LLVMBuildZExt(comp_ctx->builder, sub_vector_odd,
			
 
				+                                 out_vector_type, "odd_zext"))) {
			
 
				             HANDLE_FAILURE("LLVMBuildZExt");
			
 
				-            goto fail;
			
 
				+            return false;
			
 
				         }
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+    if (!(result = LLVMBuildAdd(comp_ctx->builder, sub_vector_even,
			
 
				+                                sub_vector_odd, "sum"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildAdd");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				-static LLVMValueRef
			
 
				-simd_build_const_f32x4(AOTCompContext *comp_ctx,
			
 
				-                       AOTFuncContext *func_ctx,
			
 
				-                       float f)
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_extadd_pairwise_i8x16(AOTCompContext *comp_ctx,
			
 
				+                                             AOTFuncContext *func_ctx,
			
 
				+                                             bool is_signed)
			
 
				 {
			
 
				-    LLVMValueRef elements[4], vector;
			
 
				-
			
 
				-    if (!(elements[0] = LLVMConstReal(F32_TYPE, f))) {
			
 
				-        HANDLE_FAILURE("LLVMConstInt");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    elements[1] = elements[2] = elements[3] = elements[0];
			
 
				-
			
 
				-    if (!(vector = LLVMConstVector(elements, 4))) {
			
 
				-        HANDLE_FAILURE("LLVMConstVector");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    return vector;
			
 
				-fail:
			
 
				-    return NULL;
			
 
				+    return simd_extadd_pairwise(comp_ctx, func_ctx, V128_i8x16_TYPE,
			
 
				+                                V128_i16x8_TYPE, is_signed);
			
 
				 }
			
 
				 
			
 
				-static LLVMValueRef
			
 
				-simd_build_const_i32x4(AOTCompContext *comp_ctx,
			
 
				-                       AOTFuncContext *func_ctx,
			
 
				-                       uint64 integer,
			
 
				-                       bool is_signed)
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_extadd_pairwise_i16x8(AOTCompContext *comp_ctx,
			
 
				+                                             AOTFuncContext *func_ctx,
			
 
				+                                             bool is_signed)
			
 
				 {
			
 
				-    LLVMValueRef elements[4], vector;
			
 
				-
			
 
				-    if (!(elements[0] = LLVMConstInt(I32_TYPE, integer, is_signed))) {
			
 
				-        HANDLE_FAILURE("LLVMConstInt");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    elements[1] = elements[2] = elements[3] = elements[0];
			
 
				-
			
 
				-    if (!(vector = LLVMConstVector(elements, 4))) {
			
 
				-        HANDLE_FAILURE("LLVMConstVector");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    return vector;
			
 
				-fail:
			
 
				-    return NULL;
			
 
				+    return simd_extadd_pairwise(comp_ctx, func_ctx, V128_i16x8_TYPE,
			
 
				+                                V128_i32x4_TYPE, is_signed);
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i32x4_trunc_sat_f32x4(AOTCompContext *comp_ctx,
			
 
				-                                       AOTFuncContext *func_ctx,
			
 
				-                                       bool is_signed)
			
 
				+aot_compile_simd_i16x8_q15mulr_sat(AOTCompContext *comp_ctx,
			
 
				+                                   AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    LLVMValueRef vector, zeros, is_nan, max_float_v, min_float_v, is_ge_max,
			
 
				-      is_le_min, result, max_int_v, min_int_v;
			
 
				-    uint32 max_ui = 0xFFffFFff, min_ui = 0x0;
			
 
				-    int32 max_si = 0x7FFFffff, min_si = 0x80000000;
			
 
				-    float max_f_ui = 4294967296.0f, min_f_ui = 0.0f, max_f_si = 2147483647.0f,
			
 
				-          min_f_si = -2147483648.0f;
			
 
				+    LLVMValueRef lhs, rhs, pad, offset, min, max, result;
			
 
				+    LLVMTypeRef vector_ext_type;
			
 
				 
			
 
				-    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                             V128_f32x4_TYPE, "vec"))) {
			
 
				-        goto fail;
			
 
				+    if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE,
			
 
				+                                          "rhs"))
			
 
				+        || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             V128_i16x8_TYPE, "lhs"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(zeros = LLVMConstNull(V128_f32x4_TYPE))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				-        goto fail;
			
 
				+    if (!(vector_ext_type = LLVMVectorType(I32_TYPE, 8))) {
			
 
				+        HANDLE_FAILURE("LLVMVectorType");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (is_signed) {
			
 
				-        if (!(max_float_v =
			
 
				-                simd_build_const_f32x4(comp_ctx, func_ctx, max_f_si))) {
			
 
				-            goto fail;
			
 
				-        }
			
 
				-
			
 
				-        if (!(min_float_v =
			
 
				-                simd_build_const_f32x4(comp_ctx, func_ctx, min_f_si))) {
			
 
				-            goto fail;
			
 
				-        }
			
 
				-
			
 
				-        if (!(max_int_v =
			
 
				-                simd_build_const_i32x4(comp_ctx, func_ctx, max_si, true))) {
			
 
				-            goto fail;
			
 
				-        }
			
 
				-
			
 
				-        if (!(min_int_v =
			
 
				-                simd_build_const_i32x4(comp_ctx, func_ctx, min_si, true))) {
			
 
				-            goto fail;
			
 
				-        }
			
 
				-    }
			
 
				-    else {
			
 
				-        if (!(max_float_v =
			
 
				-                simd_build_const_f32x4(comp_ctx, func_ctx, max_f_ui))) {
			
 
				-            goto fail;
			
 
				-        }
			
 
				-
			
 
				-        if (!(min_float_v =
			
 
				-                simd_build_const_f32x4(comp_ctx, func_ctx, min_f_ui))) {
			
 
				-            goto fail;
			
 
				-        }
			
 
				-
			
 
				-        if (!(max_int_v =
			
 
				-                simd_build_const_i32x4(comp_ctx, func_ctx, max_ui, false))) {
			
 
				-            goto fail;
			
 
				-        }
			
 
				-
			
 
				-        if (!(min_int_v =
			
 
				-                simd_build_const_i32x4(comp_ctx, func_ctx, min_ui, false))) {
			
 
				-            goto fail;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    if (!(is_nan = LLVMBuildFCmp(comp_ctx->builder, LLVMRealORD, vector, zeros,
			
 
				-                                 "is_nan"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildFCmp");
			
 
				-        goto fail;
			
 
				+    if (!(lhs = LLVMBuildSExt(comp_ctx->builder, lhs, vector_ext_type,
			
 
				+                              "lhs_v8i32"))
			
 
				+        || !(rhs = LLVMBuildSExt(comp_ctx->builder, rhs, vector_ext_type,
			
 
				+                                 "rhs_v8i32"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSExt");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(is_le_min = LLVMBuildFCmp(comp_ctx->builder, LLVMRealOLE, vector,
			
 
				-                                    min_float_v, "le_min"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildFCmp");
			
 
				-        goto fail;
			
 
				+    /* 0x4000 and 15*/
			
 
				+    if (!(pad = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
			
 
				+                                                      0x4000, 8))
			
 
				+        || !(offset = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
			
 
				+                                                            15, 8))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(is_ge_max = LLVMBuildFCmp(comp_ctx->builder, LLVMRealOGE, vector,
			
 
				-                                    max_float_v, "ge_max"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildFCmp");
			
 
				-        goto fail;
			
 
				+    /* TODO: looking for x86 intrinsics about integer"fused multiply-and-add" */
			
 
				+    /* S.SignedSaturate((x * y + 0x4000) >> 15) */
			
 
				+    if (!(result = LLVMBuildMul(comp_ctx->builder, lhs, rhs, "mul"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildMul");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (is_signed) {
			
 
				-        if (!(result = LLVMBuildFPToSI(comp_ctx->builder, vector,
			
 
				-                                       V128_i32x4_TYPE, "truncated"))) {
			
 
				-            HANDLE_FAILURE("LLVMBuildSIToFP");
			
 
				-            goto fail;
			
 
				-        }
			
 
				-    }
			
 
				-    else {
			
 
				-        if (!(result = LLVMBuildFPToUI(comp_ctx->builder, vector,
			
 
				-                                       V128_i32x4_TYPE, "truncated"))) {
			
 
				-            HANDLE_FAILURE("LLVMBuildUIToFP");
			
 
				-            goto fail;
			
 
				-        }
			
 
				+    if (!(result = LLVMBuildAdd(comp_ctx->builder, result, pad, "add"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildAdd");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildSelect(comp_ctx->builder, is_ge_max, max_int_v,
			
 
				-                                   result, "sat_w_max"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				+    if (!(result = LLVMBuildAShr(comp_ctx->builder, result, offset, "ashr"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildAShr");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildSelect(comp_ctx->builder, is_le_min, min_int_v,
			
 
				-                                   result, "sat_w_min"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				+    if (!(min = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
			
 
				+                                                      0xffff8000, 8))
			
 
				+        || !(max = simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE,
			
 
				+                                                         0x00007fff, 8))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildSelect(comp_ctx->builder, is_nan, result,
			
 
				-                                   V128_i32x4_ZERO, "sat_w_nan"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				+    /* sat after trunc will let *sat* part be optimized */
			
 
				+    if (!(result = simd_saturate(comp_ctx, func_ctx, e_sat_i32x8, result, min,
			
 
				+                                 max, true))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+    if (!(result = LLVMBuildTrunc(comp_ctx->builder, result, V128_i16x8_TYPE,
			
 
				+                                  "down_to_v8i16"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuidlTrunc");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				-bool
			
 
				-aot_compile_simd_f32x4_convert_i32x4(AOTCompContext *comp_ctx,
			
 
				-                                     AOTFuncContext *func_ctx,
			
 
				-                                     bool is_signed)
			
 
				+enum integer_extmul_type {
			
 
				+    e_i16x8_extmul_i8x16,
			
 
				+    e_i32x4_extmul_i16x8,
			
 
				+    e_i64x2_extmul_i32x4,
			
 
				+};
			
 
				+
			
 
				+static bool
			
 
				+simd_integer_extmul(AOTCompContext *comp_ctx,
			
 
				+                    AOTFuncContext *func_ctx,
			
 
				+                    bool lower_half,
			
 
				+                    bool is_signed,
			
 
				+                    enum integer_extmul_type itype)
			
 
				 {
			
 
				-    LLVMValueRef vector, result;
			
 
				+    LLVMValueRef vec1, vec2, result;
			
 
				+    enum integer_extend_type ext_type[] = {
			
 
				+        e_ext_i8x16,
			
 
				+        e_ext_i16x8,
			
 
				+        e_ext_i32x4,
			
 
				+    };
			
 
				+    LLVMTypeRef in_vector_type[] = {
			
 
				+        V128_i8x16_TYPE,
			
 
				+        V128_i16x8_TYPE,
			
 
				+        V128_i32x4_TYPE,
			
 
				+    };
			
 
				 
			
 
				-    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                             V128_i32x4_TYPE, "vec"))) {
			
 
				-        goto fail;
			
 
				+    if (!(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                           in_vector_type[itype], "vec1"))
			
 
				+        || !(vec2 = simd_pop_v128_and_bitcast(
			
 
				+               comp_ctx, func_ctx, in_vector_type[itype], "vec2"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (is_signed) {
			
 
				-        if (!(result = LLVMBuildSIToFP(comp_ctx->builder, vector,
			
 
				-                                       V128_f32x4_TYPE, "converted"))) {
			
 
				-            HANDLE_FAILURE("LLVMBuildSIToFP");
			
 
				-            goto fail;
			
 
				-        }
			
 
				-    }
			
 
				-    else {
			
 
				-        if (!(result = LLVMBuildUIToFP(comp_ctx->builder, vector,
			
 
				-                                       V128_f32x4_TYPE, "converted"))) {
			
 
				-            HANDLE_FAILURE("LLVMBuildSIToFP");
			
 
				-            goto fail;
			
 
				-        }
			
 
				+    if (!(vec1 = simd_integer_extension(comp_ctx, func_ctx, ext_type[itype],
			
 
				+                                        vec1, lower_half, is_signed))
			
 
				+        || !(vec2 = simd_integer_extension(comp_ctx, func_ctx, ext_type[itype],
			
 
				+                                           vec2, lower_half, is_signed))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+    if (!(result = LLVMBuildMul(comp_ctx->builder, vec1, vec2, "product"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_extmul_i8x16(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool lower_half,
			
 
				+                                    bool is_signed)
			
 
				+{
			
 
				+    return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed,
			
 
				+                               e_i16x8_extmul_i8x16);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_extmul_i16x8(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool lower_half,
			
 
				+                                    bool is_signed)
			
 
				+{
			
 
				+    return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed,
			
 
				+                               e_i32x4_extmul_i16x8);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_extmul_i32x4(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool lower_half,
			
 
				+                                    bool is_signed)
			
 
				+{
			
 
				+    return simd_integer_extmul(comp_ctx, func_ctx, lower_half, is_signed,
			
 
				+                               e_i64x2_extmul_i32x4);
			
 
				+}
			
--- a/core/iwasm/compilation/simd/simd_conversions.h
+++ b/core/iwasm/compilation/simd/simd_conversions.h
@@ -23,27 +23,77 @@ aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx,
 
				                                     bool is_signed);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i16x8_widen_i8x16(AOTCompContext *comp_ctx,
			
 
				-                                   AOTFuncContext *func_ctx,
			
 
				-                                   bool is_low,
			
 
				-                                   bool is_signed);
			
 
				+aot_compile_simd_i32x4_narrow_i64x2(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool is_signed);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_extend_i8x16(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool is_low,
			
 
				+                                    bool is_signed);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_extend_i16x8(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool is_low,
			
 
				+                                    bool is_signed);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i32x4_widen_i16x8(AOTCompContext *comp_ctx,
			
 
				-                                   AOTFuncContext *func_ctx,
			
 
				-                                   bool is_low,
			
 
				-                                   bool is_signed);
			
 
				+aot_compile_simd_i64x2_extend_i32x4(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool lower_half,
			
 
				+                                    bool is_signed);
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_i32x4_trunc_sat_f32x4(AOTCompContext *comp_ctx,
			
 
				                                        AOTFuncContext *func_ctx,
			
 
				                                        bool is_signed);
			
 
				 
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_trunc_sat_f64x2(AOTCompContext *comp_ctx,
			
 
				+                                       AOTFuncContext *func_ctx,
			
 
				+                                       bool is_signed);
			
 
				+
			
 
				 bool
			
 
				 aot_compile_simd_f32x4_convert_i32x4(AOTCompContext *comp_ctx,
			
 
				                                      AOTFuncContext *func_ctx,
			
 
				                                      bool is_signed);
			
 
				 
			
 
				+bool
			
 
				+aot_compile_simd_f64x2_convert_i32x4(AOTCompContext *comp_ctx,
			
 
				+                                     AOTFuncContext *func_ctx,
			
 
				+                                     bool is_signed);
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_extadd_pairwise_i8x16(AOTCompContext *comp_ctx,
			
 
				+                                             AOTFuncContext *func_ctx,
			
 
				+                                             bool is_signed);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_extadd_pairwise_i16x8(AOTCompContext *comp_ctx,
			
 
				+                                             AOTFuncContext *func_ctx,
			
 
				+                                             bool is_signed);
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_q15mulr_sat(AOTCompContext *comp_ctx,
			
 
				+                                   AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_extmul_i8x16(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool is_low,
			
 
				+                                    bool is_signed);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_extmul_i16x8(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool is_low,
			
 
				+                                    bool is_signed);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_extmul_i32x4(AOTCompContext *comp_ctx,
			
 
				+                                    AOTFuncContext *func_ctx,
			
 
				+                                    bool lower_half,
			
 
				+                                    bool is_signed);
			
 
				 #ifdef __cplusplus
			
 
				 } /* end of extern "C" */
			
 
				 #endif
			
--- a/core/iwasm/compilation/simd/simd_floating_point.c
+++ b/core/iwasm/compilation/simd/simd_floating_point.c
@@ -9,111 +9,45 @@
 
				 #include "../aot_emit_numberic.h"
			
 
				 #include "../../aot/aot_runtime.h"
			
 
				 
			
 
				-static LLVMValueRef
			
 
				-simd_v128_float_cmp(AOTCompContext *comp_ctx,
			
 
				-                    AOTFuncContext *func_ctx,
			
 
				-                    FloatArithmetic arith_op,
			
 
				-                    LLVMValueRef lhs,
			
 
				-                    LLVMValueRef rhs)
			
 
				-{
			
 
				-    LLVMValueRef result;
			
 
				-    LLVMRealPredicate op;
			
 
				-
			
 
				-    op = FLOAT_MIN == arith_op ? LLVMRealULT : LLVMRealUGT;
			
 
				-
			
 
				-    if (!(result = LLVMBuildFCmp(comp_ctx->builder, op, lhs, rhs, "cmp"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildFCmp");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, result, lhs, rhs, "select"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    return result;
			
 
				-fail:
			
 
				-    return NULL;
			
 
				-}
			
 
				-
			
 
				 static bool
			
 
				 simd_v128_float_arith(AOTCompContext *comp_ctx,
			
 
				                       AOTFuncContext *func_ctx,
			
 
				                       FloatArithmetic arith_op,
			
 
				                       LLVMTypeRef vector_type)
			
 
				 {
			
 
				-    LLVMValueRef lhs, rhs, result;
			
 
				-
			
 
				-    if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                          "rhs"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				+    LLVMValueRef lhs, rhs, result = NULL;
			
 
				 
			
 
				-    if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                          "lhs"))) {
			
 
				-        goto fail;
			
 
				+    if (!(rhs =
			
 
				+            simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs"))
			
 
				+        || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				+                                             "lhs"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				     switch (arith_op) {
			
 
				         case FLOAT_ADD:
			
 
				-            if (!(result =
			
 
				-                    LLVMBuildFAdd(comp_ctx->builder, lhs, rhs, "sum"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildFAdd");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            result = LLVMBuildFAdd(comp_ctx->builder, lhs, rhs, "sum");
			
 
				             break;
			
 
				         case FLOAT_SUB:
			
 
				-            if (!(result = LLVMBuildFSub(comp_ctx->builder, lhs, rhs,
			
 
				-                                         "difference"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildFSub");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            result = LLVMBuildFSub(comp_ctx->builder, lhs, rhs, "difference");
			
 
				             break;
			
 
				         case FLOAT_MUL:
			
 
				-            if (!(result =
			
 
				-                    LLVMBuildFMul(comp_ctx->builder, lhs, rhs, "product"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildFMul");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            result = LLVMBuildFMul(comp_ctx->builder, lhs, rhs, "product");
			
 
				             break;
			
 
				         case FLOAT_DIV:
			
 
				-            if (!(result =
			
 
				-                    LLVMBuildFDiv(comp_ctx->builder, lhs, rhs, "quotient"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildFDiv");
			
 
				-                goto fail;
			
 
				-            }
			
 
				-            break;
			
 
				-        case FLOAT_MIN:
			
 
				-            if (!(result = simd_v128_float_cmp(comp_ctx, func_ctx, FLOAT_MIN,
			
 
				-                                               lhs, rhs))) {
			
 
				-                goto fail;
			
 
				-            }
			
 
				-            break;
			
 
				-        case FLOAT_MAX:
			
 
				-            if (!(result = simd_v128_float_cmp(comp_ctx, func_ctx, FLOAT_MAX,
			
 
				-                                               lhs, rhs))) {
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            result = LLVMBuildFDiv(comp_ctx->builder, lhs, rhs, "quotient");
			
 
				             break;
			
 
				         default:
			
 
				-            result = NULL;
			
 
				-            bh_assert(0);
			
 
				-            break;
			
 
				+            return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+    if (!result) {
			
 
				+        HANDLE_FAILURE(
			
 
				+          "LLVMBuildFAdd/LLVMBuildFSub/LLVMBuildFMul/LLVMBuildFDiv");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    /* push result into the stack */
			
 
				-    PUSH_V128(result);
			
 
				-
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -139,30 +73,19 @@ simd_v128_float_neg(AOTCompContext *comp_ctx,
 
				                     AOTFuncContext *func_ctx,
			
 
				                     LLVMTypeRef vector_type)
			
 
				 {
			
 
				-    LLVMValueRef number, result;
			
 
				+    LLVMValueRef vector, result;
			
 
				 
			
 
				-    if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                             "number"))) {
			
 
				-        goto fail;
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				+                                             "vector"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildFNeg(comp_ctx->builder, number, "neg"))) {
			
 
				+    if (!(result = LLVMBuildFNeg(comp_ctx->builder, vector, "neg"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildFNeg");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    /* push result into the stack */
			
 
				-    PUSH_V128(result);
			
 
				-
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -178,119 +101,310 @@ aot_compile_simd_f64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
 
				 }
			
 
				 
			
 
				 static bool
			
 
				-simd_v128_float_intrinsic(AOTCompContext *comp_ctx,
			
 
				-                          AOTFuncContext *func_ctx,
			
 
				-                          LLVMTypeRef vector_type,
			
 
				-                          const char *intrinsic)
			
 
				+simd_float_intrinsic(AOTCompContext *comp_ctx,
			
 
				+                     AOTFuncContext *func_ctx,
			
 
				+                     LLVMTypeRef vector_type,
			
 
				+                     const char *intrinsic)
			
 
				 {
			
 
				-    LLVMValueRef number, result;
			
 
				+    LLVMValueRef vector, result;
			
 
				     LLVMTypeRef param_types[1] = { vector_type };
			
 
				 
			
 
				-    if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                             "number"))) {
			
 
				-        goto fail;
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				+                                             "vector"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, vector_type,
			
 
				-                                           param_types, 1, number))) {
			
 
				+    if (!(result =
			
 
				+            aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, vector_type,
			
 
				+                                    param_types, 1, vector))) {
			
 
				         HANDLE_FAILURE("LLVMBuildCall");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    /* push result into the stack */
			
 
				-    PUSH_V128(result);
			
 
				-
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_f32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				-                                     "llvm.fabs.v4f32");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				+                                "llvm.fabs.v4f32");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				-                                     "llvm.fabs.v2f64");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				+                                "llvm.fabs.v2f64");
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f32x4_round(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				+                                "llvm.round.v4f32");
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f64x2_round(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				+                                "llvm.round.v2f64");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				-                                     "llvm.sqrt.v4f32");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				+                                "llvm.sqrt.v4f32");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				-                                     "llvm.sqrt.v2f64");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				+                                "llvm.sqrt.v2f64");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_f32x4_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				-                                     "llvm.ceil.v4f32");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				+                                "llvm.ceil.v4f32");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_f64x2_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				-                                     "llvm.ceil.v2f64");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				+                                "llvm.ceil.v2f64");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				-                                     "llvm.floor.v4f32");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				+                                "llvm.floor.v4f32");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				-                                     "llvm.floor.v2f64");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				+                                "llvm.floor.v2f64");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				-                                     "llvm.trunc.v4f32");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				+                                "llvm.trunc.v4f32");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				-                                     "llvm.trunc.v2f64");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				+                                "llvm.trunc.v2f64");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				-                                     "llvm.rint.v4f32");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				+                                "llvm.rint.v4f32");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    return simd_v128_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				-                                     "llvm.rint.v2f64");
			
 
				+    return simd_float_intrinsic(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				+                                "llvm.rint.v2f64");
			
 
				+}
			
 
				+
			
 
				+static bool
			
 
				+simd_float_cmp(AOTCompContext *comp_ctx,
			
 
				+               AOTFuncContext *func_ctx,
			
 
				+               FloatArithmetic arith_op,
			
 
				+               LLVMTypeRef vector_type)
			
 
				+{
			
 
				+    LLVMValueRef lhs, rhs, result;
			
 
				+    LLVMRealPredicate op = FLOAT_MIN == arith_op ? LLVMRealULT : LLVMRealUGT;
			
 
				+
			
 
				+    if (!(rhs =
			
 
				+            simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs"))
			
 
				+        || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				+                                             "lhs"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildFCmp(comp_ctx->builder, op, lhs, rhs, "cmp"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildFCmp");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result =
			
 
				+            LLVMBuildSelect(comp_ctx->builder, result, lhs, rhs, "select"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				+}
			
 
				+
			
 
				+/*TODO: sugggest non-IA platforms check with "llvm.minimum.*" and "llvm.maximum.*" firstly */
			
 
				+bool
			
 
				+aot_compile_simd_f32x4_min_max(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx,
			
 
				+                               bool run_min)
			
 
				+{
			
 
				+    return simd_float_cmp(comp_ctx, func_ctx, run_min ? FLOAT_MIN : FLOAT_MAX,
			
 
				+                          V128_f32x4_TYPE);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f64x2_min_max(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx,
			
 
				+                               bool run_min)
			
 
				+{
			
 
				+    return simd_float_cmp(comp_ctx, func_ctx, run_min ? FLOAT_MIN : FLOAT_MAX,
			
 
				+                          V128_f64x2_TYPE);
			
 
				+}
			
 
				+
			
 
				+static bool
			
 
				+simd_float_pmin_max(AOTCompContext *comp_ctx,
			
 
				+                    AOTFuncContext *func_ctx,
			
 
				+                    LLVMTypeRef vector_type,
			
 
				+                    const char *intrinsic)
			
 
				+{
			
 
				+    LLVMValueRef lhs, rhs, result;
			
 
				+    LLVMTypeRef param_types[2];
			
 
				+
			
 
				+    param_types[0] = vector_type;
			
 
				+    param_types[1] = vector_type;
			
 
				+
			
 
				+    if (!(rhs =
			
 
				+            simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs"))
			
 
				+        || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				+                                             "lhs"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result =
			
 
				+            aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic, vector_type,
			
 
				+                                    param_types, 2, lhs, rhs))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f32x4_pmin_pmax(AOTCompContext *comp_ctx,
			
 
				+                                 AOTFuncContext *func_ctx,
			
 
				+                                 bool run_min)
			
 
				+{
			
 
				+    return simd_float_pmin_max(comp_ctx, func_ctx, V128_f32x4_TYPE,
			
 
				+                               run_min ? "llvm.minnum.v4f32"
			
 
				+                                       : "llvm.maxnum.v4f32");
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f64x2_pmin_pmax(AOTCompContext *comp_ctx,
			
 
				+                                 AOTFuncContext *func_ctx,
			
 
				+                                 bool run_min)
			
 
				+{
			
 
				+    return simd_float_pmin_max(comp_ctx, func_ctx, V128_f64x2_TYPE,
			
 
				+                               run_min ? "llvm.minnum.v2f64"
			
 
				+                                       : "llvm.maxnum.v2f64");
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f64x2_demote(AOTCompContext *comp_ctx,
			
 
				+                              AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    LLVMValueRef vector, elem_0, elem_1, result;
			
 
				+
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             V128_f64x2_TYPE, "vector"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(elem_0 = LLVMBuildExtractElement(comp_ctx->builder, vector,
			
 
				+                                           LLVM_CONST(i32_zero), "elem_0"))
			
 
				+        || !(elem_1 = LLVMBuildExtractElement(
			
 
				+               comp_ctx->builder, vector, LLVM_CONST(i32_one), "elem_1"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildExtractElement");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    /* fptrunc <f64> elem to <f32> */
			
 
				+    if (!(elem_0 = LLVMBuildFPTrunc(comp_ctx->builder, elem_0, F32_TYPE,
			
 
				+                                    "elem_0_trunc"))
			
 
				+        || !(elem_1 = LLVMBuildFPTrunc(comp_ctx->builder, elem_1, F32_TYPE,
			
 
				+                                       "elem_1_trunc"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildFPTrunc");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildInsertElement(
			
 
				+            comp_ctx->builder, LLVM_CONST(f32x4_vec_zero), elem_0,
			
 
				+            LLVM_CONST(i32_zero), "new_vector_0"))
			
 
				+        || !(result =
			
 
				+               LLVMBuildInsertElement(comp_ctx->builder, result, elem_1,
			
 
				+                                      LLVM_CONST(i32_one), "new_vector_1"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f32x4_promote(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    LLVMValueRef vector, elem_0, elem_1, result;
			
 
				+
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             V128_f32x4_TYPE, "vector"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(elem_0 = LLVMBuildExtractElement(comp_ctx->builder, vector,
			
 
				+                                           LLVM_CONST(i32_zero), "elem_0"))
			
 
				+        || !(elem_1 = LLVMBuildExtractElement(
			
 
				+               comp_ctx->builder, vector, LLVM_CONST(i32_one), "elem_1"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildExtractElement");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    /* fpext <f32> elem to <f64> */
			
 
				+    if (!(elem_0 =
			
 
				+            LLVMBuildFPExt(comp_ctx->builder, elem_0, F64_TYPE, "elem_0_ext"))
			
 
				+        || !(elem_1 = LLVMBuildFPExt(comp_ctx->builder, elem_1, F64_TYPE,
			
 
				+                                     "elem_1_ext"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildFPExt");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildInsertElement(
			
 
				+            comp_ctx->builder, LLVM_CONST(f64x2_vec_zero), elem_0,
			
 
				+            LLVM_CONST(i32_zero), "new_vector_0"))
			
 
				+        || !(result =
			
 
				+               LLVMBuildInsertElement(comp_ctx->builder, result, elem_1,
			
 
				+                                      LLVM_CONST(i32_one), "new_vector_1"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
--- a/core/iwasm/compilation/simd/simd_floating_point.h
+++ b/core/iwasm/compilation/simd/simd_floating_point.h
@@ -35,34 +35,80 @@ bool
 
				 aot_compile_simd_f64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_f32x4_round(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_f64x2_round(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f32x4_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_f32x4_sqrt(AOTCompContext *comp_ctx,
			
 
				+                            AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f64x2_ceil(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_f64x2_sqrt(AOTCompContext *comp_ctx,
			
 
				+                            AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_f32x4_ceil(AOTCompContext *comp_ctx,
			
 
				+                            AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_f64x2_ceil(AOTCompContext *comp_ctx,
			
 
				+                            AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_f32x4_floor(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_f64x2_floor(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_f32x4_trunc(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+aot_compile_simd_f64x2_trunc(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f32x4_nearest(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f64x2_nearest(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f32x4_min_max(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx,
			
 
				+                               bool run_min);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f64x2_min_max(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx,
			
 
				+                               bool run_min);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f32x4_pmin_pmax(AOTCompContext *comp_ctx,
			
 
				+                                 AOTFuncContext *func_ctx,
			
 
				+                                 bool run_min);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f64x2_pmin_pmax(AOTCompContext *comp_ctx,
			
 
				+                                 AOTFuncContext *func_ctx,
			
 
				+                                 bool run_min);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f64x2_demote(AOTCompContext *comp_ctx,
			
 
				+                              AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_f32x4_promote(AOTCompContext *comp_ctx,
			
 
				+                               AOTFuncContext *func_ctx);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 } /* end of extern "C" */
			
--- a/core/iwasm/compilation/simd/simd_int_arith.c
+++ b/core/iwasm/compilation/simd/simd_int_arith.c
@@ -9,59 +9,41 @@
 
				 #include "../../aot/aot_runtime.h"
			
 
				 
			
 
				 static bool
			
 
				-simd_v128_integer_arith(AOTCompContext *comp_ctx,
			
 
				-                        AOTFuncContext *func_ctx,
			
 
				-                        V128Arithmetic arith_op,
			
 
				-                        LLVMValueRef lhs,
			
 
				-                        LLVMValueRef rhs)
			
 
				+simd_integer_arith(AOTCompContext *comp_ctx,
			
 
				+                   AOTFuncContext *func_ctx,
			
 
				+                   V128Arithmetic arith_op,
			
 
				+                   LLVMTypeRef vector_type)
			
 
				 {
			
 
				-    LLVMValueRef result;
			
 
				+    LLVMValueRef lhs, rhs, result = NULL;
			
 
				+
			
 
				+    if (!(rhs =
			
 
				+            simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs"))
			
 
				+        || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				+                                             "lhs"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				 
			
 
				     switch (arith_op) {
			
 
				         case V128_ADD:
			
 
				-            if (!(result = LLVMBuildAdd(comp_ctx->builder, lhs, rhs, "sum"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildAdd");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            result = LLVMBuildAdd(comp_ctx->builder, lhs, rhs, "sum");
			
 
				             break;
			
 
				         case V128_SUB:
			
 
				-            if (!(result =
			
 
				-                    LLVMBuildSub(comp_ctx->builder, lhs, rhs, "difference"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildSub");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            result = LLVMBuildSub(comp_ctx->builder, lhs, rhs, "difference");
			
 
				             break;
			
 
				         case V128_MUL:
			
 
				-            if (!(result =
			
 
				-                    LLVMBuildMul(comp_ctx->builder, lhs, rhs, "product"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildMul");
			
 
				-                goto fail;
			
 
				-            }
			
 
				-            break;
			
 
				-        case V128_NEG:
			
 
				-            if (!(result = LLVMBuildNeg(comp_ctx->builder, lhs, "neg"))) {
			
 
				-                HANDLE_FAILURE("LLVMBuildNeg");
			
 
				-                goto fail;
			
 
				-            }
			
 
				+            result = LLVMBuildMul(comp_ctx->builder, lhs, rhs, "product");
			
 
				             break;
			
 
				         default:
			
 
				-            result = NULL;
			
 
				-            bh_assert(0);
			
 
				+            HANDLE_FAILURE("Unsupport arith_op");
			
 
				             break;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+    if (!result) {
			
 
				+        HANDLE_FAILURE("LLVMBuildAdd/LLVMBuildSub/LLVMBuildMul");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    /* push result into the stack */
			
 
				-    PUSH_V128(result);
			
 
				-
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -69,21 +51,7 @@ aot_compile_simd_i8x16_arith(AOTCompContext *comp_ctx,
 
				                              AOTFuncContext *func_ctx,
			
 
				                              V128Arithmetic arith_op)
			
 
				 {
			
 
				-    LLVMValueRef lhs, rhs;
			
 
				-
			
 
				-    if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
			
 
				-                                          "rhs"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
			
 
				-                                          "lhs"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs);
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_integer_arith(comp_ctx, func_ctx, arith_op, V128_i8x16_TYPE);
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -91,21 +59,7 @@ aot_compile_simd_i16x8_arith(AOTCompContext *comp_ctx,
 
				                              AOTFuncContext *func_ctx,
			
 
				                              V128Arithmetic arith_op)
			
 
				 {
			
 
				-    LLVMValueRef lhs, rhs;
			
 
				-
			
 
				-    if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE,
			
 
				-                                          "rhs"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE,
			
 
				-                                          "lhs"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs);
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_integer_arith(comp_ctx, func_ctx, arith_op, V128_i16x8_TYPE);
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -113,95 +67,362 @@ aot_compile_simd_i32x4_arith(AOTCompContext *comp_ctx,
 
				                              AOTFuncContext *func_ctx,
			
 
				                              V128Arithmetic arith_op)
			
 
				 {
			
 
				-    LLVMValueRef lhs, rhs;
			
 
				+    return simd_integer_arith(comp_ctx, func_ctx, arith_op, V128_i32x4_TYPE);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_arith(AOTCompContext *comp_ctx,
			
 
				+                             AOTFuncContext *func_ctx,
			
 
				+                             V128Arithmetic arith_op)
			
 
				+{
			
 
				+    return simd_integer_arith(comp_ctx, func_ctx, arith_op, V128_i64x2_TYPE);
			
 
				+}
			
 
				+
			
 
				+static bool
			
 
				+simd_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, LLVMTypeRef type)
			
 
				+{
			
 
				+    LLVMValueRef vector, result;
			
 
				 
			
 
				-    if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i32x4_TYPE,
			
 
				-                                          "rhs"))) {
			
 
				-        goto fail;
			
 
				+    if (!(vector =
			
 
				+            simd_pop_v128_and_bitcast(comp_ctx, func_ctx, type, "vector"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i32x4_TYPE,
			
 
				-                                          "lhs"))) {
			
 
				-        goto fail;
			
 
				+    if (!(result = LLVMBuildNeg(comp_ctx->builder, vector, "neg"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildNeg");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs);
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i64x2_arith(AOTCompContext *comp_ctx,
			
 
				-                             AOTFuncContext *func_ctx,
			
 
				-                             V128Arithmetic arith_op)
			
 
				+aot_compile_simd_i8x16_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_neg(comp_ctx, func_ctx, V128_i8x16_TYPE);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    LLVMValueRef lhs, rhs;
			
 
				+    return simd_neg(comp_ctx, func_ctx, V128_i16x8_TYPE);
			
 
				+}
			
 
				 
			
 
				-    POP_V128(rhs);
			
 
				-    POP_V128(lhs);
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_neg(comp_ctx, func_ctx, V128_i32x4_TYPE);
			
 
				+}
			
 
				 
			
 
				-    return simd_v128_integer_arith(comp_ctx, func_ctx, arith_op, lhs, rhs);
			
 
				-fail:
			
 
				-    return false;
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_neg(comp_ctx, func_ctx, V128_i64x2_TYPE);
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i8x16_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_i8x16_popcnt(AOTCompContext *comp_ctx,
			
 
				+                              AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    LLVMValueRef vector, result;
			
 
				+
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             V128_i8x16_TYPE, "vector"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx,
			
 
				+                                           "llvm.ctpop.v16i8", V128_i8x16_TYPE,
			
 
				+                                           &V128_i8x16_TYPE, 1, vector))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				+}
			
 
				+
			
 
				+static bool
			
 
				+simd_v128_cmp(AOTCompContext *comp_ctx,
			
 
				+              AOTFuncContext *func_ctx,
			
 
				+              LLVMTypeRef vector_type,
			
 
				+              V128Arithmetic arith_op,
			
 
				+              bool is_signed)
			
 
				 {
			
 
				-    LLVMValueRef number;
			
 
				+    LLVMValueRef lhs, rhs, result;
			
 
				+    LLVMIntPredicate op;
			
 
				+
			
 
				+    if (!(rhs =
			
 
				+            simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs"))
			
 
				+        || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				+                                             "lhs"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (V128_MIN == arith_op) {
			
 
				+        op = is_signed ? LLVMIntSLT : LLVMIntULT;
			
 
				+    }
			
 
				+    else {
			
 
				+        op = is_signed ? LLVMIntSGT : LLVMIntUGT;
			
 
				+    }
			
 
				 
			
 
				-    if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                             V128_i8x16_TYPE, "number"))) {
			
 
				-        goto fail;
			
 
				+    if (!(result = LLVMBuildICmp(comp_ctx->builder, op, lhs, rhs, "cmp"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildICmp");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL);
			
 
				+    if (!(result =
			
 
				+            LLVMBuildSelect(comp_ctx->builder, result, lhs, rhs, "select"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				+        return false;
			
 
				+    }
			
 
				 
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i16x8_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_i8x16_cmp(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           V128Arithmetic arith_op,
			
 
				+                           bool is_signed)
			
 
				+{
			
 
				+    return simd_v128_cmp(comp_ctx, func_ctx, V128_i8x16_TYPE, arith_op,
			
 
				+                         is_signed);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_cmp(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           V128Arithmetic arith_op,
			
 
				+                           bool is_signed)
			
 
				 {
			
 
				-    LLVMValueRef number;
			
 
				+    return simd_v128_cmp(comp_ctx, func_ctx, V128_i16x8_TYPE, arith_op,
			
 
				+                         is_signed);
			
 
				+}
			
 
				 
			
 
				-    if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                             V128_i16x8_TYPE, "number"))) {
			
 
				-        goto fail;
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_cmp(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           V128Arithmetic arith_op,
			
 
				+                           bool is_signed)
			
 
				+{
			
 
				+    return simd_v128_cmp(comp_ctx, func_ctx, V128_i32x4_TYPE, arith_op,
			
 
				+                         is_signed);
			
 
				+}
			
 
				+
			
 
				+/* llvm.abs.* */
			
 
				+static bool
			
 
				+simd_v128_abs(AOTCompContext *comp_ctx,
			
 
				+              AOTFuncContext *func_ctx,
			
 
				+              char *intrinsic,
			
 
				+              LLVMTypeRef vector_type)
			
 
				+{
			
 
				+    LLVMValueRef vector, result;
			
 
				+    LLVMTypeRef param_types[] = { vector_type, INT1_TYPE };
			
 
				+
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				+                                             "vec"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL);
			
 
				+    if (!(result = aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsic,
			
 
				+                                           vector_type, param_types, 2, vector,
			
 
				+                                           /* is_int_min_poison */
			
 
				+                                           LLVM_CONST(i1_zero)))) {
			
 
				+        return false;
			
 
				+    }
			
 
				 
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i32x4_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_i8x16_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_v128_abs(comp_ctx, func_ctx, "llvm.abs.v16i8",
			
 
				+                         V128_i8x16_TYPE);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_v128_abs(comp_ctx, func_ctx, "llvm.abs.v8i16",
			
 
				+                         V128_i16x8_TYPE);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    LLVMValueRef number;
			
 
				+    return simd_v128_abs(comp_ctx, func_ctx, "llvm.abs.v4i32",
			
 
				+                         V128_i32x4_TYPE);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_v128_abs(comp_ctx, func_ctx, "llvm.abs.v2i64",
			
 
				+                         V128_i64x2_TYPE);
			
 
				+}
			
 
				+
			
 
				+enum integer_avgr_u {
			
 
				+    e_avgr_u_i8x16,
			
 
				+    e_avgr_u_i16x8,
			
 
				+    e_avgr_u_i32x4,
			
 
				+};
			
 
				+
			
 
				+/* TODO: try int_x86_mmx_pavg_b and int_x86_mmx_pavg_w */
			
 
				+/* (v1 + v2 + 1) / 2 */
			
 
				+static bool
			
 
				+simd_v128_avg(AOTCompContext *comp_ctx,
			
 
				+              AOTFuncContext *func_ctx,
			
 
				+              enum integer_avgr_u itype)
			
 
				+{
			
 
				+    LLVMValueRef lhs, rhs, ones, result;
			
 
				+    LLVMTypeRef vector_ext_type;
			
 
				+    LLVMTypeRef vector_type[] = {
			
 
				+        V128_i8x16_TYPE,
			
 
				+        V128_i16x8_TYPE,
			
 
				+        V128_i32x4_TYPE,
			
 
				+    };
			
 
				+    unsigned lanes[] = { 16, 8, 4 };
			
 
				+
			
 
				+    if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                          vector_type[itype], "rhs"))
			
 
				+        || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                             vector_type[itype], "lhs"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector_ext_type = LLVMVectorType(I64_TYPE, lanes[itype]))) {
			
 
				+        HANDLE_FAILURE("LLVMVectorType");
			
 
				+        return false;
			
 
				+    }
			
 
				 
			
 
				-    if (!(number = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				-                                             V128_i32x4_TYPE, "number"))) {
			
 
				-        goto fail;
			
 
				+    if (!(lhs = LLVMBuildZExt(comp_ctx->builder, lhs, vector_ext_type,
			
 
				+                              "zext_to_i64"))
			
 
				+        || !(rhs = LLVMBuildZExt(comp_ctx->builder, rhs, vector_ext_type,
			
 
				+                                 "zext_to_i64"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildZExt");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL);
			
 
				+    /* by default, add will do signed/unsigned overflow */
			
 
				+    if (!(result = LLVMBuildAdd(comp_ctx->builder, lhs, rhs, "l_add_r"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildAdd");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(ones = simd_build_splat_const_integer_vector(comp_ctx, I64_TYPE, 1,
			
 
				+                                                       lanes[itype]))) {
			
 
				+        return false;
			
 
				+    }
			
 
				 
			
 
				-fail:
			
 
				-    return false;
			
 
				+    if (!(result = LLVMBuildAdd(comp_ctx->builder, result, ones, "plus_1"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildAdd");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildLShr(comp_ctx->builder, result, ones, "avg"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildLShr");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildTrunc(comp_ctx->builder, result,
			
 
				+                                  vector_type[itype], "to_orig_type"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_i8x16_avgr_u(AOTCompContext *comp_ctx,
			
 
				+                              AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_v128_avg(comp_ctx, func_ctx, e_avgr_u_i8x16);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx,
			
 
				+                              AOTFuncContext *func_ctx)
			
 
				 {
			
 
				-    LLVMValueRef number;
			
 
				+    return simd_v128_avg(comp_ctx, func_ctx, e_avgr_u_i16x8);
			
 
				+}
			
 
				 
			
 
				-    POP_V128(number);
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_avgr_u(AOTCompContext *comp_ctx,
			
 
				+                              AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    return simd_v128_avg(comp_ctx, func_ctx, e_avgr_u_i32x4);
			
 
				+}
			
 
				 
			
 
				-    return simd_v128_integer_arith(comp_ctx, func_ctx, V128_NEG, number, NULL);
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_dot_i16x8(AOTCompContext *comp_ctx,
			
 
				+                                 AOTFuncContext *func_ctx)
			
 
				+{
			
 
				+    LLVMValueRef vec1, vec2, even_mask, odd_mask, zero, result;
			
 
				+    LLVMTypeRef vector_ext_type;
			
 
				+    LLVMValueRef even_element[] = {
			
 
				+        LLVM_CONST(i32_zero),
			
 
				+        LLVM_CONST(i32_two),
			
 
				+        LLVM_CONST(i32_four),
			
 
				+        LLVM_CONST(i32_six),
			
 
				+    };
			
 
				+    LLVMValueRef odd_element[] = {
			
 
				+        LLVM_CONST(i32_one),
			
 
				+        LLVM_CONST(i32_three),
			
 
				+        LLVM_CONST(i32_five),
			
 
				+        LLVM_CONST(i32_seven),
			
 
				+    };
			
 
				+
			
 
				+    if (!(vec1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i16x8_TYPE,
			
 
				+                                           "vec1"))
			
 
				+        || !(vec2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
			
 
				+                                              V128_i16x8_TYPE, "vec2"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector_ext_type = LLVMVectorType(I32_TYPE, 8))) {
			
 
				+        HANDLE_FAILURE("LLVMVectorType");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    /* sext <v8i16> to <v8i32> */
			
 
				+    if (!(vec1 = LLVMBuildSExt(comp_ctx->builder, vec1, vector_ext_type,
			
 
				+                               "vec1_v8i32"))
			
 
				+        || !(vec2 = LLVMBuildSExt(comp_ctx->builder, vec2, vector_ext_type,
			
 
				+                                  "vec2_v8i32"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildSExt");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildMul(comp_ctx->builder, vec1, vec2, "product"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildMul");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    /* pick elements with even indexes and odd indexes */
			
 
				+    if (!(even_mask = LLVMConstVector(even_element, 4))
			
 
				+        || !(odd_mask = LLVMConstVector(odd_element, 4))) {
			
 
				+        HANDLE_FAILURE("LLVMConstVector");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(zero =
			
 
				+            simd_build_splat_const_integer_vector(comp_ctx, I32_TYPE, 0, 8))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vec1 = LLVMBuildShuffleVector(comp_ctx->builder, result, zero,
			
 
				+                                        even_mask, "even_result"))
			
 
				+        || !(vec2 = LLVMBuildShuffleVector(comp_ctx->builder, result, zero,
			
 
				+                                           odd_mask, "odd_result"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildAdd(comp_ctx->builder, vec1, vec2, "new_vec"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildAdd");
			
 
				+        return false;
			
 
				+    }
			
 
				 
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
--- a/core/iwasm/compilation/simd/simd_int_arith.h
+++ b/core/iwasm/compilation/simd/simd_int_arith.h
@@ -44,6 +44,56 @@ aot_compile_simd_i32x4_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
 
				 bool
			
 
				 aot_compile_simd_i64x2_neg(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				 
			
 
				+bool
			
 
				+aot_compile_simd_i8x16_popcnt(AOTCompContext *comp_ctx,
			
 
				+                              AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i8x16_cmp(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           V128Arithmetic arith_op,
			
 
				+                           bool is_signed);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_cmp(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           V128Arithmetic arith_op,
			
 
				+                           bool is_signed);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_cmp(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           V128Arithmetic arith_op,
			
 
				+                           bool is_signed);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i8x16_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i64x2_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i8x16_avgr_u(AOTCompContext *comp_ctx,
			
 
				+                              AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx,
			
 
				+                              AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_avgr_u(AOTCompContext *comp_ctx,
			
 
				+                              AOTFuncContext *func_ctx);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_i32x4_dot_i16x8(AOTCompContext *comp_ctx,
			
 
				+                                 AOTFuncContext *func_ctx);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 } /* end of extern "C" */
			
 
				 #endif
			
--- a/core/iwasm/compilation/simd/simd_load_store.c
+++ b/core/iwasm/compilation/simd/simd_load_store.c
@@ -3,6 +3,7 @@
 
				  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				  */
			
 
				 
			
 
				+#include "simd_common.h"
			
 
				 #include "simd_load_store.h"
			
 
				 #include "../aot_emit_exception.h"
			
 
				 #include "../aot_emit_memory.h"
			
@@ -23,68 +24,23 @@ simd_load(AOTCompContext *comp_ctx,
 
				     if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
			
 
				                                             data_length))) {
			
 
				         HANDLE_FAILURE("aot_check_memory_overflow");
			
 
				-        goto fail;
			
 
				+        return NULL;
			
 
				     }
			
 
				 
			
 
				     if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, ptr_type,
			
 
				                                    "data_ptr"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+        return NULL;
			
 
				     }
			
 
				 
			
 
				     if (!(data = LLVMBuildLoad(comp_ctx->builder, maddr, "data"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildLoad");
			
 
				-        goto fail;
			
 
				+        return NULL;
			
 
				     }
			
 
				 
			
 
				     LLVMSetAlignment(data, 1);
			
 
				 
			
 
				     return data;
			
 
				-fail:
			
 
				-    return NULL;
			
 
				-}
			
 
				-
			
 
				-/* data_length in bytes */
			
 
				-static LLVMValueRef
			
 
				-simd_splat(AOTCompContext *comp_ctx,
			
 
				-           AOTFuncContext *func_ctx,
			
 
				-           LLVMValueRef element,
			
 
				-           LLVMTypeRef vectory_type,
			
 
				-           unsigned lane_count)
			
 
				-{
			
 
				-    LLVMValueRef undef, zeros, vector;
			
 
				-    LLVMTypeRef zeros_type;
			
 
				-
			
 
				-    if (!(undef = LLVMGetUndef(vectory_type))) {
			
 
				-        HANDLE_FAILURE("LLVMGetUndef");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(zeros_type = LLVMVectorType(I32_TYPE, lane_count))) {
			
 
				-        HANDLE_FAILURE("LVMVectorType");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(zeros = LLVMConstNull(zeros_type))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(vector = LLVMBuildInsertElement(comp_ctx->builder, undef, element,
			
 
				-                                          I32_ZERO, "base"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(vector = LLVMBuildShuffleVector(comp_ctx->builder, vector, undef,
			
 
				-                                          zeros, "vector"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    return vector;
			
 
				-fail:
			
 
				-    return NULL;
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -97,40 +53,10 @@ aot_compile_simd_v128_load(AOTCompContext *comp_ctx,
 
				 
			
 
				     if (!(result =
			
 
				             simd_load(comp_ctx, func_ctx, align, offset, 16, V128_PTR_TYPE))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				-}
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_v128_store(AOTCompContext *comp_ctx,
			
 
				-                            AOTFuncContext *func_ctx,
			
 
				-                            uint32 align,
			
 
				-                            uint32 offset)
			
 
				-{
			
 
				-    LLVMValueRef maddr, value, result;
			
 
				-
			
 
				-    POP_V128(value);
			
 
				-
			
 
				-    if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 16)))
			
 
				         return false;
			
 
				-
			
 
				-    if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, V128_PTR_TYPE,
			
 
				-                                   "data_ptr"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildStore(comp_ctx->builder, value, maddr))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildStore");
			
 
				-        goto fail;
			
 
				     }
			
 
				 
			
 
				-    LLVMSetAlignment(result, 1);
			
 
				+    PUSH_V128(result);
			
 
				 
			
 
				     return true;
			
 
				 fail:
			
@@ -140,162 +66,272 @@ fail:
 
				 bool
			
 
				 aot_compile_simd_load_extend(AOTCompContext *comp_ctx,
			
 
				                              AOTFuncContext *func_ctx,
			
 
				-                             uint8 load_opcode,
			
 
				+                             uint8 opcode,
			
 
				                              uint32 align,
			
 
				                              uint32 offset)
			
 
				 {
			
 
				     LLVMValueRef sub_vector, result;
			
 
				-    LLVMTypeRef sub_vector_type, vector_type;
			
 
				-    bool is_signed;
			
 
				-    uint32 data_length;
			
 
				-
			
 
				-    switch (load_opcode) {
			
 
				-        case SIMD_i16x8_load8x8_s:
			
 
				-        case SIMD_i16x8_load8x8_u:
			
 
				-        {
			
 
				-            data_length = 8;
			
 
				-            vector_type = V128_i16x8_TYPE;
			
 
				-            is_signed = (load_opcode == SIMD_i16x8_load8x8_s);
			
 
				-
			
 
				-            if (!(sub_vector_type = LLVMVectorType(INT8_TYPE, 8))) {
			
 
				-                HANDLE_FAILURE("LLVMVectorType");
			
 
				-                goto fail;
			
 
				-            }
			
 
				-
			
 
				-            break;
			
 
				-        }
			
 
				-        case SIMD_i32x4_load16x4_s:
			
 
				-        case SIMD_i32x4_load16x4_u:
			
 
				-        {
			
 
				-            data_length = 8;
			
 
				-            vector_type = V128_i32x4_TYPE;
			
 
				-            is_signed = (load_opcode == SIMD_i32x4_load16x4_s);
			
 
				-
			
 
				-            if (!(sub_vector_type = LLVMVectorType(INT16_TYPE, 4))) {
			
 
				-                HANDLE_FAILURE("LLVMVectorType");
			
 
				-                goto fail;
			
 
				-            }
			
 
				-
			
 
				-            break;
			
 
				-        }
			
 
				-        case SIMD_i64x2_load32x2_s:
			
 
				-        case SIMD_i64x2_load32x2_u:
			
 
				-        {
			
 
				-            data_length = 8;
			
 
				-            vector_type = V128_i64x2_TYPE;
			
 
				-            is_signed = (load_opcode == SIMD_i64x2_load32x2_s);
			
 
				-
			
 
				-            if (!(sub_vector_type = LLVMVectorType(I32_TYPE, 2))) {
			
 
				-                HANDLE_FAILURE("LLVMVectorType");
			
 
				-                goto fail;
			
 
				-            }
			
 
				-
			
 
				-            break;
			
 
				-        }
			
 
				-        default:
			
 
				-        {
			
 
				-            bh_assert(0);
			
 
				-            goto fail;
			
 
				-        }
			
 
				-    }
			
 
				+    uint32 opcode_index = opcode - SIMD_v128_load8x8_s;
			
 
				+    bool signeds[] = { true, false, true, false, true, false };
			
 
				+    LLVMTypeRef vector_types[] = {
			
 
				+        V128_i16x8_TYPE, V128_i16x8_TYPE, V128_i32x4_TYPE,
			
 
				+        V128_i32x4_TYPE, V128_i64x2_TYPE, V128_i64x2_TYPE,
			
 
				+    };
			
 
				+    LLVMTypeRef sub_vector_types[] = {
			
 
				+        LLVMVectorType(INT8_TYPE, 8),  LLVMVectorType(INT8_TYPE, 8),
			
 
				+        LLVMVectorType(INT16_TYPE, 4), LLVMVectorType(INT16_TYPE, 4),
			
 
				+        LLVMVectorType(I32_TYPE, 2),   LLVMVectorType(I32_TYPE, 2),
			
 
				+    };
			
 
				+    LLVMTypeRef sub_vector_type = sub_vector_types[opcode_index];
			
 
				 
			
 
				     /* to vector ptr type */
			
 
				-    if (!(sub_vector_type = LLVMPointerType(sub_vector_type, 0))) {
			
 
				+    if (!sub_vector_type
			
 
				+        || !(sub_vector_type = LLVMPointerType(sub_vector_type, 0))) {
			
 
				         HANDLE_FAILURE("LLVMPointerType");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(sub_vector = simd_load(comp_ctx, func_ctx, align, offset,
			
 
				-                                 data_length, sub_vector_type))) {
			
 
				-        goto fail;
			
 
				+    if (!(sub_vector = simd_load(comp_ctx, func_ctx, align, offset, 8,
			
 
				+                                 sub_vector_type))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (is_signed) {
			
 
				+    if (signeds[opcode_index]) {
			
 
				         if (!(result = LLVMBuildSExt(comp_ctx->builder, sub_vector,
			
 
				-                                     vector_type, "vector"))) {
			
 
				+                                     vector_types[opcode_index], "vector"))) {
			
 
				             HANDLE_FAILURE("LLVMBuildSExt");
			
 
				-            goto fail;
			
 
				+            return false;
			
 
				         }
			
 
				     }
			
 
				     else {
			
 
				         if (!(result = LLVMBuildZExt(comp_ctx->builder, sub_vector,
			
 
				-                                     vector_type, "vector"))) {
			
 
				+                                     vector_types[opcode_index], "vector"))) {
			
 
				             HANDLE_FAILURE("LLVMBuildZExt");
			
 
				-            goto fail;
			
 
				+            return false;
			
 
				         }
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "result"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_load_splat(AOTCompContext *comp_ctx,
			
 
				                             AOTFuncContext *func_ctx,
			
 
				-                            uint8 load_opcode,
			
 
				+                            uint8 opcode,
			
 
				                             uint32 align,
			
 
				                             uint32 offset)
			
 
				 {
			
 
				+    uint32 opcode_index = opcode - SIMD_v128_load8_splat;
			
 
				     LLVMValueRef element, result;
			
 
				-    LLVMTypeRef element_ptr_type, vector_type;
			
 
				-    unsigned data_length, lane_count;
			
 
				-
			
 
				-    switch (load_opcode) {
			
 
				-        case SIMD_v8x16_load_splat:
			
 
				-            data_length = 1;
			
 
				-            lane_count = 16;
			
 
				-            element_ptr_type = INT8_PTR_TYPE;
			
 
				-            vector_type = V128_i8x16_TYPE;
			
 
				-            break;
			
 
				-        case SIMD_v16x8_load_splat:
			
 
				-            data_length = 2;
			
 
				-            lane_count = 8;
			
 
				-            element_ptr_type = INT16_PTR_TYPE;
			
 
				-            vector_type = V128_i16x8_TYPE;
			
 
				-            break;
			
 
				-        case SIMD_v32x4_load_splat:
			
 
				-            data_length = 4;
			
 
				-            lane_count = 4;
			
 
				-            element_ptr_type = INT32_PTR_TYPE;
			
 
				-            vector_type = V128_i32x4_TYPE;
			
 
				-            break;
			
 
				-        case SIMD_v64x2_load_splat:
			
 
				-            data_length = 8;
			
 
				-            lane_count = 2;
			
 
				-            element_ptr_type = INT64_PTR_TYPE;
			
 
				-            vector_type = V128_i64x2_TYPE;
			
 
				-            break;
			
 
				-        default:
			
 
				-            bh_assert(0);
			
 
				-            goto fail;
			
 
				+    LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
			
 
				+                                        INT32_PTR_TYPE, INT64_PTR_TYPE };
			
 
				+    uint32 data_lengths[] = { 1, 2, 4, 8 };
			
 
				+    LLVMValueRef undefs[] = {
			
 
				+        LLVM_CONST(i8x16_undef),
			
 
				+        LLVM_CONST(i16x8_undef),
			
 
				+        LLVM_CONST(i32x4_undef),
			
 
				+        LLVM_CONST(i64x2_undef),
			
 
				+    };
			
 
				+    LLVMValueRef masks[] = {
			
 
				+        LLVM_CONST(i32x16_zero),
			
 
				+        LLVM_CONST(i32x8_zero),
			
 
				+        LLVM_CONST(i32x4_zero),
			
 
				+        LLVM_CONST(i32x2_zero),
			
 
				+    };
			
 
				+
			
 
				+    if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
			
 
				+                              data_lengths[opcode_index],
			
 
				+                              element_ptr_types[opcode_index]))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(element = simd_load(comp_ctx, func_ctx, align, offset, data_length,
			
 
				-                              element_ptr_type))) {
			
 
				-        goto fail;
			
 
				+    if (!(result =
			
 
				+            LLVMBuildInsertElement(comp_ctx->builder, undefs[opcode_index],
			
 
				+                                   element, I32_ZERO, "base"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = simd_splat(comp_ctx, func_ctx, element, vector_type,
			
 
				-                              lane_count))) {
			
 
				-        goto fail;
			
 
				+    if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result,
			
 
				+                                          undefs[opcode_index],
			
 
				+                                          masks[opcode_index], "vector"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "result"))) {
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_load_lane(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           uint8 opcode,
			
 
				+                           uint32 align,
			
 
				+                           uint32 offset,
			
 
				+                           uint8 lane_id)
			
 
				+{
			
 
				+    LLVMValueRef element, vector;
			
 
				+    uint32 opcode_index = opcode - SIMD_v128_load8_lane;
			
 
				+    uint32 data_lengths[] = { 1, 2, 4, 8 };
			
 
				+    LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
			
 
				+                                        INT32_PTR_TYPE, INT64_PTR_TYPE };
			
 
				+    LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
			
 
				+                                   V128_i32x4_TYPE, V128_i64x2_TYPE };
			
 
				+    LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id);
			
 
				+
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(
			
 
				+            comp_ctx, func_ctx, vector_types[opcode_index], "src"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
			
 
				+                              data_lengths[opcode_index],
			
 
				+                              element_ptr_types[opcode_index]))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(vector = LLVMBuildInsertElement(comp_ctx->builder, vector, element,
			
 
				+                                          lane, "dst"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, vector, "result");
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_load_zero(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           uint8 opcode,
			
 
				+                           uint32 align,
			
 
				+                           uint32 offset)
			
 
				+{
			
 
				+    LLVMValueRef element, result, mask;
			
 
				+    uint32 opcode_index = opcode - SIMD_v128_load32_zero;
			
 
				+    uint32 data_lengths[] = { 4, 8 };
			
 
				+    LLVMTypeRef element_ptr_types[] = { INT32_PTR_TYPE, INT64_PTR_TYPE };
			
 
				+    LLVMValueRef zero[] = {
			
 
				+        LLVM_CONST(i32x4_vec_zero),
			
 
				+        LLVM_CONST(i64x2_vec_zero),
			
 
				+    };
			
 
				+    LLVMValueRef undef[] = {
			
 
				+        LLVM_CONST(i32x4_undef),
			
 
				+        LLVM_CONST(i64x2_undef),
			
 
				+    };
			
 
				+    uint32 mask_length[] = { 4, 2 };
			
 
				+    LLVMValueRef mask_element[][4] = {
			
 
				+        { LLVM_CONST(i32_zero), LLVM_CONST(i32_four), LLVM_CONST(i32_five),
			
 
				+          LLVM_CONST(i32_six) },
			
 
				+        { LLVM_CONST(i32_zero), LLVM_CONST(i32_two) },
			
 
				+    };
			
 
				+
			
 
				+    if (!(element = simd_load(comp_ctx, func_ctx, align, offset,
			
 
				+                              data_lengths[opcode_index],
			
 
				+                              element_ptr_types[opcode_index]))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result =
			
 
				+            LLVMBuildInsertElement(comp_ctx->builder, undef[opcode_index],
			
 
				+                                   element, I32_ZERO, "vector"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    /* fill in other lanes with zero */
			
 
				+    if (!(mask = LLVMConstVector(mask_element[opcode_index],
			
 
				+                                 mask_length[opcode_index]))) {
			
 
				+        HANDLE_FAILURE("LLConstVector");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(result = LLVMBuildShuffleVector(comp_ctx->builder, result,
			
 
				+                                          zero[opcode_index], mask,
			
 
				+                                          "fill_in_zero"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				+}
			
 
				+
			
 
				+/* data_length in bytes */
			
 
				+static bool
			
 
				+simd_store(AOTCompContext *comp_ctx,
			
 
				+           AOTFuncContext *func_ctx,
			
 
				+           uint32 align,
			
 
				+           uint32 offset,
			
 
				+           uint32 data_length,
			
 
				+           LLVMValueRef value,
			
 
				+           LLVMTypeRef value_ptr_type)
			
 
				+{
			
 
				+    LLVMValueRef maddr, result;
			
 
				+
			
 
				+    if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
			
 
				+                                            data_length)))
			
 
				+        return false;
			
 
				+
			
 
				+    if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, value_ptr_type,
			
 
				+                                   "data_ptr"))) {
			
 
				         HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    PUSH_V128(result);
			
 
				+    if (!(result = LLVMBuildStore(comp_ctx->builder, value, maddr))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildStore");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    LLVMSetAlignment(result, 1);
			
 
				+
			
 
				     return true;
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_v128_store(AOTCompContext *comp_ctx,
			
 
				+                            AOTFuncContext *func_ctx,
			
 
				+                            uint32 align,
			
 
				+                            uint32 offset)
			
 
				+{
			
 
				+    LLVMValueRef value;
			
 
				+
			
 
				+    POP_V128(value);
			
 
				+
			
 
				+    return simd_store(comp_ctx, func_ctx, align, offset, 16, value,
			
 
				+                      V128_PTR_TYPE);
			
 
				 fail:
			
 
				     return false;
			
 
				 }
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_store_lane(AOTCompContext *comp_ctx,
			
 
				+                            AOTFuncContext *func_ctx,
			
 
				+                            uint8 opcode,
			
 
				+                            uint32 align,
			
 
				+                            uint32 offset,
			
 
				+                            uint8 lane_id)
			
 
				+{
			
 
				+    LLVMValueRef element, vector;
			
 
				+    uint32 data_lengths[] = { 1, 2, 4, 8 };
			
 
				+    LLVMTypeRef element_ptr_types[] = { INT8_PTR_TYPE, INT16_PTR_TYPE,
			
 
				+                                        INT32_PTR_TYPE, INT64_PTR_TYPE };
			
 
				+    uint32 opcode_index = opcode - SIMD_v128_store8_lane;
			
 
				+    LLVMTypeRef vector_types[] = { V128_i8x16_TYPE, V128_i16x8_TYPE,
			
 
				+                                   V128_i32x4_TYPE, V128_i64x2_TYPE };
			
 
				+    LLVMValueRef lane = simd_lane_id_to_llvm_value(comp_ctx, lane_id);
			
 
				+
			
 
				+    if (!(vector = simd_pop_v128_and_bitcast(
			
 
				+            comp_ctx, func_ctx, vector_types[opcode_index], "src"))) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (!(element = LLVMBuildExtractElement(comp_ctx->builder, vector, lane,
			
 
				+                                            "element"))) {
			
 
				+        HANDLE_FAILURE("LLVMBuildExtractElement");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    return simd_store(comp_ctx, func_ctx, align, offset,
			
 
				+                      data_lengths[opcode_index], element,
			
 
				+                      element_ptr_types[opcode_index]);
			
 
				+}
			
--- a/core/iwasm/compilation/simd/simd_load_store.h
+++ b/core/iwasm/compilation/simd/simd_load_store.h
@@ -18,26 +18,49 @@ aot_compile_simd_v128_load(AOTCompContext *comp_ctx,
 
				                            uint32 align,
			
 
				                            uint32 offset);
			
 
				 
			
 
				-bool
			
 
				-aot_compile_simd_v128_store(AOTCompContext *comp_ctx,
			
 
				-                            AOTFuncContext *func_ctx,
			
 
				-                            uint32 align,
			
 
				-                            uint32 offset);
			
 
				-
			
 
				 bool
			
 
				 aot_compile_simd_load_extend(AOTCompContext *comp_ctx,
			
 
				                              AOTFuncContext *func_ctx,
			
 
				-                             uint8 load_opcode,
			
 
				+                             uint8 opcode,
			
 
				                              uint32 align,
			
 
				                              uint32 offset);
			
 
				 
			
 
				 bool
			
 
				 aot_compile_simd_load_splat(AOTCompContext *comp_ctx,
			
 
				                             AOTFuncContext *func_ctx,
			
 
				-                            uint8 load_opcode,
			
 
				+                            uint8 opcode,
			
 
				                             uint32 align,
			
 
				                             uint32 offset);
			
 
				 
			
 
				+bool
			
 
				+aot_compile_simd_load_lane(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           uint8 opcode,
			
 
				+                           uint32 align,
			
 
				+                           uint32 offset,
			
 
				+                           uint8 lane_id);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_load_zero(AOTCompContext *comp_ctx,
			
 
				+                           AOTFuncContext *func_ctx,
			
 
				+                           uint8 opcode,
			
 
				+                           uint32 align,
			
 
				+                           uint32 offset);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_v128_store(AOTCompContext *comp_ctx,
			
 
				+                            AOTFuncContext *func_ctx,
			
 
				+                            uint32 align,
			
 
				+                            uint32 offset);
			
 
				+
			
 
				+bool
			
 
				+aot_compile_simd_store_lane(AOTCompContext *comp_ctx,
			
 
				+                            AOTFuncContext *func_ctx,
			
 
				+                            uint8 opcode,
			
 
				+                            uint32 align,
			
 
				+                            uint32 offset,
			
 
				+                            uint8 lane_id);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 } /* end of extern "C" */
			
 
				 #endif
			
--- a/core/iwasm/compilation/simd/simd_sat_int_arith.c
+++ b/core/iwasm/compilation/simd/simd_sat_int_arith.c
@@ -9,46 +9,32 @@
 
				 #include "../../aot/aot_runtime.h"
			
 
				 
			
 
				 static bool
			
 
				-simd_v128_integer_arith(AOTCompContext *comp_ctx,
			
 
				-                        AOTFuncContext *func_ctx,
			
 
				-                        LLVMTypeRef vector_type,
			
 
				-                        char *intrinsics_s_u[2],
			
 
				-                        bool is_signed)
			
 
				+simd_sat_int_arith(AOTCompContext *comp_ctx,
			
 
				+                   AOTFuncContext *func_ctx,
			
 
				+                   LLVMTypeRef vector_type,
			
 
				+                   const char *intrinsics)
			
 
				 {
			
 
				     LLVMValueRef lhs, rhs, result;
			
 
				     LLVMTypeRef param_types[2];
			
 
				 
			
 
				-    if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                          "rhs"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                          "lhs"))) {
			
 
				-        goto fail;
			
 
				+    if (!(rhs =
			
 
				+            simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type, "rhs"))
			
 
				+        || !(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				+                                             "lhs"))) {
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				     param_types[0] = vector_type;
			
 
				     param_types[1] = vector_type;
			
 
				 
			
 
				-    if (!(result = aot_call_llvm_intrinsic(
			
 
				-            comp_ctx, func_ctx, is_signed ? intrinsics_s_u[0] : intrinsics_s_u[1],
			
 
				-            vector_type, param_types, 2, lhs, rhs))) {
			
 
				+    if (!(result =
			
 
				+            aot_call_llvm_intrinsic(comp_ctx, func_ctx, intrinsics,
			
 
				+                                    vector_type, param_types, 2, lhs, rhs))) {
			
 
				         HANDLE_FAILURE("LLVMBuildCall");
			
 
				-        goto fail;
			
 
				+        return false;
			
 
				     }
			
 
				 
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    /* push result into the stack */
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				+    return simd_bitcast_and_push_v128(comp_ctx, func_ctx, result, "result");
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -57,27 +43,14 @@ aot_compile_simd_i8x16_saturate(AOTCompContext *comp_ctx,
 
				                                 V128Arithmetic arith_op,
			
 
				                                 bool is_signed)
			
 
				 {
			
 
				-    char *intrinsics[2] = { 0 };
			
 
				-    bool result = false;
			
 
				-    switch (arith_op) {
			
 
				-        case V128_ADD:
			
 
				-            intrinsics[0] = "llvm.sadd.sat.v16i8";
			
 
				-            intrinsics[1] = "llvm.uadd.sat.v16i8";
			
 
				-            result = simd_v128_integer_arith(
			
 
				-              comp_ctx, func_ctx, V128_i8x16_TYPE, intrinsics, is_signed);
			
 
				-            break;
			
 
				-        case V128_SUB:
			
 
				-            intrinsics[0] = "llvm.ssub.sat.v16i8";
			
 
				-            intrinsics[1] = "llvm.usub.sat.v16i8";
			
 
				-            result = simd_v128_integer_arith(
			
 
				-              comp_ctx, func_ctx, V128_i8x16_TYPE, intrinsics, is_signed);
			
 
				-            break;
			
 
				-        default:
			
 
				-            bh_assert(0);
			
 
				-            break;
			
 
				-    }
			
 
				-
			
 
				-    return result;
			
 
				+    char *intrinsics[][2] = {
			
 
				+        { "llvm.sadd.sat.v16i8", "llvm.uadd.sat.v16i8" },
			
 
				+        { "llvm.ssub.sat.v16i8", "llvm.usub.sat.v16i8" },
			
 
				+    };
			
 
				+
			
 
				+    return simd_sat_int_arith(comp_ctx, func_ctx, V128_i8x16_TYPE,
			
 
				+                              is_signed ? intrinsics[arith_op][0]
			
 
				+                                        : intrinsics[arith_op][1]);
			
 
				 }
			
 
				 
			
 
				 bool
			
@@ -86,282 +59,28 @@ aot_compile_simd_i16x8_saturate(AOTCompContext *comp_ctx,
 
				                                 V128Arithmetic arith_op,
			
 
				                                 bool is_signed)
			
 
				 {
			
 
				-    char *intrinsics[2] = { 0 };
			
 
				-    bool result = false;
			
 
				-    switch (arith_op) {
			
 
				-        case V128_ADD:
			
 
				-            intrinsics[0] = "llvm.sadd.sat.v8i16";
			
 
				-            intrinsics[1] = "llvm.uadd.sat.v8i16";
			
 
				-            result = simd_v128_integer_arith(
			
 
				-              comp_ctx, func_ctx, V128_i16x8_TYPE, intrinsics, is_signed);
			
 
				-            break;
			
 
				-        case V128_SUB:
			
 
				-            intrinsics[0] = "llvm.ssub.sat.v8i16";
			
 
				-            intrinsics[1] = "llvm.usub.sat.v8i16";
			
 
				-            result = simd_v128_integer_arith(
			
 
				-              comp_ctx, func_ctx, V128_i16x8_TYPE, intrinsics, is_signed);
			
 
				-            break;
			
 
				-        default:
			
 
				-            bh_assert(0);
			
 
				-            break;
			
 
				-    }
			
 
				-
			
 
				-    return result;
			
 
				-}
			
 
				-
			
 
				-static bool
			
 
				-simd_v128_cmp(AOTCompContext *comp_ctx,
			
 
				-              AOTFuncContext *func_ctx,
			
 
				-              LLVMTypeRef vector_type,
			
 
				-              V128Arithmetic arith_op,
			
 
				-              bool is_signed)
			
 
				-{
			
 
				-    LLVMValueRef lhs, rhs, result;
			
 
				-    LLVMIntPredicate op;
			
 
				-
			
 
				-    if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                          "rhs"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                          "lhs"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (V128_MIN == arith_op) {
			
 
				-        op = is_signed ? LLVMIntSLT : LLVMIntULT;
			
 
				-    }
			
 
				-    else {
			
 
				-        op = is_signed ? LLVMIntSGT : LLVMIntUGT;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildICmp(comp_ctx->builder, op, lhs, rhs, "cmp"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildICmp");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result =
			
 
				-            LLVMBuildSelect(comp_ctx->builder, result, lhs, rhs, "select"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    /* push result into the stack */
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				-}
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i8x16_cmp(AOTCompContext *comp_ctx,
			
 
				-                           AOTFuncContext *func_ctx,
			
 
				-                           V128Arithmetic arith_op,
			
 
				-                           bool is_signed)
			
 
				-{
			
 
				-    return simd_v128_cmp(comp_ctx, func_ctx, V128_i8x16_TYPE, arith_op,
			
 
				-                         is_signed);
			
 
				-}
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i16x8_cmp(AOTCompContext *comp_ctx,
			
 
				-                           AOTFuncContext *func_ctx,
			
 
				-                           V128Arithmetic arith_op,
			
 
				-                           bool is_signed)
			
 
				-{
			
 
				-    return simd_v128_cmp(comp_ctx, func_ctx, V128_i16x8_TYPE, arith_op,
			
 
				-                         is_signed);
			
 
				-}
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i32x4_cmp(AOTCompContext *comp_ctx,
			
 
				-                           AOTFuncContext *func_ctx,
			
 
				-                           V128Arithmetic arith_op,
			
 
				-                           bool is_signed)
			
 
				-{
			
 
				-    return simd_v128_cmp(comp_ctx, func_ctx, V128_i32x4_TYPE, arith_op,
			
 
				-                         is_signed);
			
 
				-}
			
 
				-
			
 
				-static bool
			
 
				-simd_v128_abs(AOTCompContext *comp_ctx,
			
 
				-              AOTFuncContext *func_ctx,
			
 
				-              LLVMTypeRef vector_type)
			
 
				-{
			
 
				-    LLVMValueRef vector, negs, zeros, cond, result;
			
 
				-
			
 
				-    if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                             "vec"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(negs = LLVMBuildNeg(comp_ctx->builder, vector, "neg"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildNeg");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(zeros = LLVMConstNull(vector_type))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(cond = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGE, vector, zeros,
			
 
				-                               "ge_zero"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildICmp");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildSelect(comp_ctx->builder, cond, vector, negs,
			
 
				-                                   "select"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildSelect");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    /* push result into the stack */
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				-}
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i8x16_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				-{
			
 
				-    return simd_v128_abs(comp_ctx, func_ctx, V128_i8x16_TYPE);
			
 
				-}
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i16x8_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				-{
			
 
				-    return simd_v128_abs(comp_ctx, func_ctx, V128_i16x8_TYPE);
			
 
				-}
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
			
 
				-{
			
 
				-    return simd_v128_abs(comp_ctx, func_ctx, V128_i32x4_TYPE);
			
 
				+    char *intrinsics[][2] = {
			
 
				+        { "llvm.sadd.sat.v8i16", "llvm.uadd.sat.v8i16" },
			
 
				+        { "llvm.ssub.sat.v8i16", "llvm.usub.sat.v8i16" },
			
 
				+    };
			
 
				+
			
 
				+    return simd_sat_int_arith(comp_ctx, func_ctx, V128_i16x8_TYPE,
			
 
				+                              is_signed ? intrinsics[arith_op][0]
			
 
				+                                        : intrinsics[arith_op][1]);
			
 
				 }
			
 
				 
			
 
				-/* (v1 + v2 + 1) / 2 */
			
 
				-static bool
			
 
				-simd_v128_avg(AOTCompContext *comp_ctx,
			
 
				-              AOTFuncContext *func_ctx,
			
 
				-              LLVMTypeRef vector_type,
			
 
				-              LLVMTypeRef element_type,
			
 
				-              unsigned lane_width)
			
 
				-{
			
 
				-    LLVMValueRef lhs, rhs, undef, zeros, ones, result;
			
 
				-    LLVMTypeRef ext_type;
			
 
				-
			
 
				-    if (!(rhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                          "rhs"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(lhs = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, vector_type,
			
 
				-                                          "lhs"))) {
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(ext_type = LLVMVectorType(I32_TYPE, lane_width))) {
			
 
				-        HANDLE_FAILURE("LLVMVectorType");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(lhs = LLVMBuildZExt(comp_ctx->builder, lhs, ext_type, "left_ext"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildZExt");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(rhs =
			
 
				-            LLVMBuildZExt(comp_ctx->builder, rhs, ext_type, "right_ext"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildZExt");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(undef = LLVMGetUndef(ext_type))) {
			
 
				-        HANDLE_FAILURE("LLVMGetUndef");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(zeros = LLVMConstNull(ext_type))) {
			
 
				-        HANDLE_FAILURE("LLVMConstNull");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(ones = LLVMConstInt(I32_TYPE, 1, true))) {
			
 
				-        HANDLE_FAILURE("LLVMConstInt");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(ones = LLVMBuildInsertElement(comp_ctx->builder, undef, ones,
			
 
				-                                        I32_ZERO, "base_ones"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildInsertElement");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(ones = LLVMBuildShuffleVector(comp_ctx->builder, ones, undef, zeros,
			
 
				-                                        "ones"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildShuffleVector");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildAdd(comp_ctx->builder, lhs, rhs, "a_add_b"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildAdd");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildAdd(comp_ctx->builder, result, ones, "plus_1"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildAdd");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildLShr(comp_ctx->builder, result, ones, "avg"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildLShr");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildTrunc(comp_ctx->builder, result, vector_type,
			
 
				-                                  "avg_trunc"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildTrunc");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    if (!(result = LLVMBuildBitCast(comp_ctx->builder, result, V128_i64x2_TYPE,
			
 
				-                                    "ret"))) {
			
 
				-        HANDLE_FAILURE("LLVMBuildBitCast");
			
 
				-        goto fail;
			
 
				-    }
			
 
				-
			
 
				-    /* push result into the stack */
			
 
				-    PUSH_V128(result);
			
 
				-    return true;
			
 
				-fail:
			
 
				-    return false;
			
 
				-}
			
 
				 bool
			
 
				-aot_compile_simd_i8x16_avgr_u(AOTCompContext *comp_ctx,
			
 
				-                              AOTFuncContext *func_ctx)
			
 
				+aot_compile_simd_i32x4_saturate(AOTCompContext *comp_ctx,
			
 
				+                                AOTFuncContext *func_ctx,
			
 
				+                                V128Arithmetic arith_op,
			
 
				+                                bool is_signed)
			
 
				 {
			
 
				-    return simd_v128_avg(comp_ctx, func_ctx, V128_i8x16_TYPE, INT8_TYPE, 16);
			
 
				+    char *intrinsics[][2] = {
			
 
				+        { "llvm.sadd.sat.v4i32", "llvm.uadd.sat.v4i32" },
			
 
				+        { "llvm.ssub.sat.v4i32", "llvm.usub.sat.v4i32" },
			
 
				+    };
			
 
				+
			
 
				+    return simd_sat_int_arith(comp_ctx, func_ctx, V128_i16x8_TYPE,
			
 
				+                              is_signed ? intrinsics[arith_op][0]
			
 
				+                                        : intrinsics[arith_op][1]);
			
 
				 }
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx,
			
 
				-                              AOTFuncContext *func_ctx)
			
 
				-{
			
 
				-    return simd_v128_avg(comp_ctx, func_ctx, V128_i16x8_TYPE, INT16_TYPE, 8);
			
 
				-}
			
--- a/core/iwasm/compilation/simd/simd_sat_int_arith.h
+++ b/core/iwasm/compilation/simd/simd_sat_int_arith.h
@@ -25,40 +25,10 @@ aot_compile_simd_i16x8_saturate(AOTCompContext *comp_ctx,
 
				                                 bool is_signed);
			
 
				 
			
 
				 bool
			
 
				-aot_compile_simd_i8x16_cmp(AOTCompContext *comp_ctx,
			
 
				-                           AOTFuncContext *func_ctx,
			
 
				-                           V128Arithmetic arith_op,
			
 
				-                           bool is_signed);
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i16x8_cmp(AOTCompContext *comp_ctx,
			
 
				-                           AOTFuncContext *func_ctx,
			
 
				-                           V128Arithmetic arith_op,
			
 
				-                           bool is_signed);
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i32x4_cmp(AOTCompContext *comp_ctx,
			
 
				-                           AOTFuncContext *func_ctx,
			
 
				-                           V128Arithmetic arith_op,
			
 
				-                           bool is_signed);
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i8x16_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i16x8_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i32x4_abs(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i8x16_avgr_u(AOTCompContext *comp_ctx,
			
 
				-                              AOTFuncContext *func_ctx);
			
 
				-
			
 
				-bool
			
 
				-aot_compile_simd_i16x8_avgr_u(AOTCompContext *comp_ctx,
			
 
				-                              AOTFuncContext *func_ctx);
			
 
				-
			
 
				+aot_compile_simd_i32x4_saturate(AOTCompContext *comp_ctx,
			
 
				+                                AOTFuncContext *func_ctx,
			
 
				+                                V128Arithmetic arith_op,
			
 
				+                                bool is_signed);
			
 
				 #ifdef __cplusplus
			
 
				 } /* end of extern "C" */
			
 
				 #endif
			
--- a/core/iwasm/interpreter/wasm_loader.c
+++ b/core/iwasm/interpreter/wasm_loader.c
@@ -4053,44 +4053,35 @@ wasm_loader_find_block_addr(BlockAddr *block_addr_cache,
 
				 #if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0)
			
 
				             case WASM_OP_SIMD_PREFIX:
			
 
				             {
			
 
				+                /* TODO: shall we ceate a table to be friendly to branch prediction */
			
 
				                 opcode = read_uint8(p);
			
 
				-                if (SIMD_i8x16_eq <= opcode
			
 
				-                    && opcode <= SIMD_f32x4_convert_i32x4_u) {
			
 
				-                    break;
			
 
				-                }
			
 
				-
			
 
				+                /* follow the order of enum WASMSimdEXTOpcode in wasm_opcode.h */
			
 
				                 switch (opcode) {
			
 
				                     case SIMD_v128_load:
			
 
				-                    case SIMD_i16x8_load8x8_s:
			
 
				-                    case SIMD_i16x8_load8x8_u:
			
 
				-                    case SIMD_i32x4_load16x4_s:
			
 
				-                    case SIMD_i32x4_load16x4_u:
			
 
				-                    case SIMD_i64x2_load32x2_s:
			
 
				-                    case SIMD_i64x2_load32x2_u:
			
 
				-                    case SIMD_v8x16_load_splat:
			
 
				-                    case SIMD_v16x8_load_splat:
			
 
				-                    case SIMD_v32x4_load_splat:
			
 
				-                    case SIMD_v64x2_load_splat:
			
 
				+                    case SIMD_v128_load8x8_s:
			
 
				+                    case SIMD_v128_load8x8_u:
			
 
				+                    case SIMD_v128_load16x4_s:
			
 
				+                    case SIMD_v128_load16x4_u:
			
 
				+                    case SIMD_v128_load32x2_s:
			
 
				+                    case SIMD_v128_load32x2_u:
			
 
				+                    case SIMD_v128_load8_splat:
			
 
				+                    case SIMD_v128_load16_splat:
			
 
				+                    case SIMD_v128_load32_splat:
			
 
				+                    case SIMD_v128_load64_splat:
			
 
				                     case SIMD_v128_store:
			
 
				-                        skip_leb_uint32(p, p_end); /* align */
			
 
				-                        skip_leb_uint32(p, p_end); /* offset */
			
 
				+                        /* memarg align */
			
 
				+                        skip_leb_uint32(p, p_end);
			
 
				+                        /* memarg offset*/
			
 
				+                        skip_leb_uint32(p, p_end);
			
 
				                         break;
			
 
				 
			
 
				                     case SIMD_v128_const:
			
 
				                     case SIMD_v8x16_shuffle:
			
 
				+                        /* immByte[16] immLaneId[16] */
			
 
				                         CHECK_BUF1(p, p_end, 16);
			
 
				                         p += 16;
			
 
				                         break;
			
 
				 
			
 
				-                    case SIMD_v8x16_swizzle:
			
 
				-                    case SIMD_i8x16_splat:
			
 
				-                    case SIMD_i16x8_splat:
			
 
				-                    case SIMD_i32x4_splat:
			
 
				-                    case SIMD_i64x2_splat:
			
 
				-                    case SIMD_f32x4_splat:
			
 
				-                    case SIMD_f64x2_splat:
			
 
				-                        break;
			
 
				-
			
 
				                     case SIMD_i8x16_extract_lane_s:
			
 
				                     case SIMD_i8x16_extract_lane_u:
			
 
				                     case SIMD_i8x16_replace_lane:
			
@@ -4105,14 +4096,44 @@ wasm_loader_find_block_addr(BlockAddr *block_addr_cache,
 
				                     case SIMD_f32x4_replace_lane:
			
 
				                     case SIMD_f64x2_extract_lane:
			
 
				                     case SIMD_f64x2_replace_lane:
			
 
				+                        /* ImmLaneId */
			
 
				                         CHECK_BUF(p, p_end, 1);
			
 
				                         p++;
			
 
				                         break;
			
 
				 
			
 
				+                    case SIMD_v128_load8_lane:
			
 
				+                    case SIMD_v128_load16_lane:
			
 
				+                    case SIMD_v128_load32_lane:
			
 
				+                    case SIMD_v128_load64_lane:
			
 
				+                    case SIMD_v128_store8_lane:
			
 
				+                    case SIMD_v128_store16_lane:
			
 
				+                    case SIMD_v128_store32_lane:
			
 
				+                    case SIMD_v128_store64_lane:
			
 
				+                        /* memarg align */
			
 
				+                        skip_leb_uint32(p, p_end);
			
 
				+                        /* memarg offset*/
			
 
				+                        skip_leb_uint32(p, p_end);
			
 
				+                        /* ImmLaneId */
			
 
				+                        CHECK_BUF(p, p_end, 1);
			
 
				+                        p++;
			
 
				+                        break;
			
 
				+
			
 
				+                    case SIMD_v128_load32_zero:
			
 
				+                    case SIMD_v128_load64_zero:
			
 
				+                        /* memarg align */
			
 
				+                        skip_leb_uint32(p, p_end);
			
 
				+                        /* memarg offset*/
			
 
				+                        skip_leb_uint32(p, p_end);
			
 
				+                        break;
			
 
				+
			
 
				                     default:
			
 
				-                        LOG_WARNING("WASM loader find block addr failed: "
			
 
				-                                    "invalid opcode fd 0x%02x.", opcode);
			
 
				-                        return false;
			
 
				+                        /*
			
 
				+                         * since latest SIMD specific used almost every value
			
 
				+                         * from 0x00 to 0xff, the default branch will present all
			
 
				+                         * opcodes without imm
			
 
				+                         * https://github.com/WebAssembly/simd/blob/main/proposals/simd/NewOpcodes.md
			
 
				+                         */
			
 
				+                        break;
			
 
				                 }
			
 
				                 break;
			
 
				             }
			
@@ -5685,9 +5706,25 @@ check_simd_memory_access_align(uint8 opcode, uint32 align,
 
				         4, /* store */
			
 
				     };
			
 
				 
			
 
				-    bh_assert(opcode <= SIMD_v128_store);
			
 
				+    uint8 mem_access_aligns_load_lane[] = {
			
 
				+        0, 1, 2, 3, /* load lane */
			
 
				+        0, 1, 2, 3, /* store lane */
			
 
				+        2, 3 /* store zero */
			
 
				+    };
			
 
				 
			
 
				-    if (align > mem_access_aligns[opcode - SIMD_v128_load]) {
			
 
				+    if (!((opcode <= SIMD_v128_store)
			
 
				+          || (SIMD_v128_load8_lane <= opcode
			
 
				+              && opcode <= SIMD_v128_load64_zero))) {
			
 
				+        set_error_buf(error_buf, error_buf_size,
			
 
				+                      "the opcode doesn't include memarg");
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if ((opcode <= SIMD_v128_store
			
 
				+         && align > mem_access_aligns[opcode - SIMD_v128_load])
			
 
				+        || (SIMD_v128_load8_lane <= opcode && opcode <= SIMD_v128_load64_zero
			
 
				+            && align > mem_access_aligns_load_lane[opcode
			
 
				+                                                   - SIMD_v128_load8_lane])) {
			
 
				         set_error_buf(error_buf, error_buf_size,
			
 
				                       "alignment must not be larger than natural");
			
 
				         return false;
			
@@ -5731,6 +5768,24 @@ check_simd_access_lane(uint8 opcode, uint8 lane,
 
				                 goto fail;
			
 
				             }
			
 
				             break;
			
 
				+
			
 
				+        case SIMD_v128_load8_lane:
			
 
				+        case SIMD_v128_load16_lane:
			
 
				+        case SIMD_v128_load32_lane:
			
 
				+        case SIMD_v128_load64_lane:
			
 
				+        case SIMD_v128_store8_lane:
			
 
				+        case SIMD_v128_store16_lane:
			
 
				+        case SIMD_v128_store32_lane:
			
 
				+        case SIMD_v128_store64_lane:
			
 
				+        case SIMD_v128_load32_zero:
			
 
				+        case SIMD_v128_load64_zero:
			
 
				+        {
			
 
				+            uint8 max_lanes[] = { 16, 8, 4, 2, 16, 8, 4, 2, 4, 2 };
			
 
				+            if (lane >= max_lanes[opcode - SIMD_v128_load8_lane]) {
			
 
				+                goto fail;
			
 
				+            }
			
 
				+            break;
			
 
				+        }
			
 
				         default:
			
 
				             goto fail;
			
 
				     }
			
@@ -8038,21 +8093,21 @@ fail_data_cnt_sec_require:
 
				 #if (WASM_ENABLE_WAMR_COMPILER != 0) || (WASM_ENABLE_JIT != 0)
			
 
				             case WASM_OP_SIMD_PREFIX:
			
 
				             {
			
 
				-                uint8 lane;
			
 
				-
			
 
				                 opcode = read_uint8(p);
			
 
				+                /* follow the order of enum WASMSimdEXTOpcode in wasm_opcode.h */
			
 
				                 switch (opcode) {
			
 
				+                    /* memory instruction */
			
 
				                     case SIMD_v128_load:
			
 
				-                    case SIMD_i16x8_load8x8_s:
			
 
				-                    case SIMD_i16x8_load8x8_u:
			
 
				-                    case SIMD_i32x4_load16x4_s:
			
 
				-                    case SIMD_i32x4_load16x4_u:
			
 
				-                    case SIMD_i64x2_load32x2_s:
			
 
				-                    case SIMD_i64x2_load32x2_u:
			
 
				-                    case SIMD_v8x16_load_splat:
			
 
				-                    case SIMD_v16x8_load_splat:
			
 
				-                    case SIMD_v32x4_load_splat:
			
 
				-                    case SIMD_v64x2_load_splat:
			
 
				+                    case SIMD_v128_load8x8_s:
			
 
				+                    case SIMD_v128_load8x8_u:
			
 
				+                    case SIMD_v128_load16x4_s:
			
 
				+                    case SIMD_v128_load16x4_u:
			
 
				+                    case SIMD_v128_load32x2_s:
			
 
				+                    case SIMD_v128_load32x2_u:
			
 
				+                    case SIMD_v128_load8_splat:
			
 
				+                    case SIMD_v128_load16_splat:
			
 
				+                    case SIMD_v128_load32_splat:
			
 
				+                    case SIMD_v128_load64_splat:
			
 
				                     {
			
 
				                         CHECK_MEMORY();
			
 
				 
			
@@ -8064,7 +8119,6 @@ fail_data_cnt_sec_require:
 
				 
			
 
				                         read_leb_uint32(p, p_end, mem_offset); /* offset */
			
 
				 
			
 
				-                        /* pop(i32 %i), push(v128 *result) */
			
 
				                         POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128);
			
 
				                         break;
			
 
				                     }
			
@@ -8081,18 +8135,19 @@ fail_data_cnt_sec_require:
 
				 
			
 
				                         read_leb_uint32(p, p_end, mem_offset); /* offset */
			
 
				 
			
 
				-                        /* pop(v128 %value) */
			
 
				                         POP_V128();
			
 
				-                        /* pop(i32 %i) */
			
 
				                         POP_I32();
			
 
				                         break;
			
 
				                     }
			
 
				 
			
 
				+                    /* basic operation */
			
 
				                     case SIMD_v128_const:
			
 
				+                    {
			
 
				                         CHECK_BUF1(p, p_end, 16);
			
 
				                         p += 16;
			
 
				                         PUSH_V128();
			
 
				                         break;
			
 
				+                    }
			
 
				 
			
 
				                     case SIMD_v8x16_shuffle:
			
 
				                     {
			
@@ -8111,122 +8166,87 @@ fail_data_cnt_sec_require:
 
				                     }
			
 
				 
			
 
				                     case SIMD_v8x16_swizzle:
			
 
				+                    {
			
 
				                         POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				                         break;
			
 
				+                    }
			
 
				 
			
 
				+                    /* splat operation */
			
 
				                     case SIMD_i8x16_splat:
			
 
				                     case SIMD_i16x8_splat:
			
 
				                     case SIMD_i32x4_splat:
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128);
			
 
				-                        break;
			
 
				                     case SIMD_i64x2_splat:
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_I64, VALUE_TYPE_V128);
			
 
				-                        break;
			
 
				                     case SIMD_f32x4_splat:
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_F32, VALUE_TYPE_V128);
			
 
				-                        break;
			
 
				                     case SIMD_f64x2_splat:
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_F64, VALUE_TYPE_V128);
			
 
				+                    {
			
 
				+                        uint8 pop_type[] = { VALUE_TYPE_I32, VALUE_TYPE_I32,
			
 
				+                                             VALUE_TYPE_I32, VALUE_TYPE_I64,
			
 
				+                                             VALUE_TYPE_F32, VALUE_TYPE_F64 };
			
 
				+                        POP_AND_PUSH(pop_type[opcode - SIMD_i8x16_splat],
			
 
				+                                     VALUE_TYPE_V128);
			
 
				                         break;
			
 
				+                    }
			
 
				 
			
 
				+                    /* lane operation */
			
 
				                     case SIMD_i8x16_extract_lane_s:
			
 
				                     case SIMD_i8x16_extract_lane_u:
			
 
				+                    case SIMD_i8x16_replace_lane:
			
 
				                     case SIMD_i16x8_extract_lane_s:
			
 
				                     case SIMD_i16x8_extract_lane_u:
			
 
				+                    case SIMD_i16x8_replace_lane:
			
 
				                     case SIMD_i32x4_extract_lane:
			
 
				-                        CHECK_BUF(p, p_end, 1);
			
 
				-                        lane = read_uint8(p);
			
 
				-
			
 
				-                        if (!check_simd_access_lane(opcode, lane, error_buf,
			
 
				-                                                    error_buf_size)) {
			
 
				-                            goto fail;
			
 
				-                        }
			
 
				-
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
			
 
				-                        break;
			
 
				+                    case SIMD_i32x4_replace_lane:
			
 
				                     case SIMD_i64x2_extract_lane:
			
 
				-                        CHECK_BUF(p, p_end, 1);
			
 
				-                        lane = read_uint8(p);
			
 
				-
			
 
				-                        if (!check_simd_access_lane(opcode, lane, error_buf,
			
 
				-                                                    error_buf_size)) {
			
 
				-                            goto fail;
			
 
				-                        }
			
 
				-
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I64);
			
 
				-                        break;
			
 
				+                    case SIMD_i64x2_replace_lane:
			
 
				                     case SIMD_f32x4_extract_lane:
			
 
				-                        CHECK_BUF(p, p_end, 1);
			
 
				-                        lane = read_uint8(p);
			
 
				-
			
 
				-                        if (!check_simd_access_lane(opcode, lane, error_buf,
			
 
				-                                                    error_buf_size)) {
			
 
				-                            goto fail;
			
 
				-                        }
			
 
				-
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_F32);
			
 
				-                        break;
			
 
				+                    case SIMD_f32x4_replace_lane:
			
 
				                     case SIMD_f64x2_extract_lane:
			
 
				-                        CHECK_BUF(p, p_end, 1);
			
 
				-                        lane = read_uint8(p);
			
 
				-
			
 
				-                        if (!check_simd_access_lane(opcode, lane, error_buf,
			
 
				-                                                    error_buf_size)) {
			
 
				-                            goto fail;
			
 
				-                        }
			
 
				-
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_F64);
			
 
				-                        break;
			
 
				-                    case SIMD_i8x16_replace_lane:
			
 
				-                    case SIMD_i16x8_replace_lane:
			
 
				-                    case SIMD_i32x4_replace_lane:
			
 
				-                        CHECK_BUF(p, p_end, 1);
			
 
				-                        lane = read_uint8(p);
			
 
				-
			
 
				-                        if (!check_simd_access_lane(opcode, lane, error_buf,
			
 
				-                                                    error_buf_size)) {
			
 
				-                            goto fail;
			
 
				-                        }
			
 
				+                    case SIMD_f64x2_replace_lane:
			
 
				+                    {
			
 
				+                        uint8 lane;
			
 
				+                        /* clang-format off */
			
 
				+                        uint8 replace[] = {
			
 
				+                            /*i8x16*/ 0x0, 0x0, VALUE_TYPE_I32,
			
 
				+                            /*i16x8*/ 0x0, 0x0, VALUE_TYPE_I32,
			
 
				+                            /*i32x4*/ 0x0, VALUE_TYPE_I32,
			
 
				+                            /*i64x2*/ 0x0, VALUE_TYPE_I64,
			
 
				+                            /*f32x4*/ 0x0, VALUE_TYPE_F32,
			
 
				+                            /*f64x2*/ 0x0, VALUE_TYPE_F64,
			
 
				+                        };
			
 
				+                        uint8 push_type[] = {
			
 
				+                            /*i8x16*/ VALUE_TYPE_I32, VALUE_TYPE_I32,
			
 
				+                                      VALUE_TYPE_V128,
			
 
				+                            /*i16x8*/ VALUE_TYPE_I32, VALUE_TYPE_I32,
			
 
				+                                      VALUE_TYPE_V128,
			
 
				+                            /*i32x4*/ VALUE_TYPE_I32, VALUE_TYPE_V128,
			
 
				+                            /*i64x2*/ VALUE_TYPE_I64, VALUE_TYPE_V128,
			
 
				+                            /*f32x4*/ VALUE_TYPE_F32, VALUE_TYPE_V128,
			
 
				+                            /*f64x2*/ VALUE_TYPE_F64, VALUE_TYPE_V128,
			
 
				+                        };
			
 
				+                        /* clang-format on */
			
 
				 
			
 
				-                        POP_I32();
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				-                        break;
			
 
				-                    case SIMD_i64x2_replace_lane:
			
 
				                         CHECK_BUF(p, p_end, 1);
			
 
				                         lane = read_uint8(p);
			
 
				-
			
 
				                         if (!check_simd_access_lane(opcode, lane, error_buf,
			
 
				                                                     error_buf_size)) {
			
 
				                             goto fail;
			
 
				                         }
			
 
				 
			
 
				-                        POP_I64();
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				-                        break;
			
 
				-                    case SIMD_f32x4_replace_lane:
			
 
				-                        CHECK_BUF(p, p_end, 1);
			
 
				-                        lane = read_uint8(p);
			
 
				-
			
 
				-                        if (!check_simd_access_lane(opcode, lane, error_buf,
			
 
				-                                                    error_buf_size)) {
			
 
				-                            goto fail;
			
 
				+                        if (replace[opcode - SIMD_i8x16_extract_lane_s]) {
			
 
				+                            if (!(wasm_loader_pop_frame_ref(
			
 
				+                                  loader_ctx,
			
 
				+                                  replace[opcode - SIMD_i8x16_extract_lane_s],
			
 
				+                                  error_buf, error_buf_size)))
			
 
				+                                goto fail;
			
 
				                         }
			
 
				 
			
 
				-                        POP_F32();
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        POP_AND_PUSH(
			
 
				+                          VALUE_TYPE_V128,
			
 
				+                          push_type[opcode - SIMD_i8x16_extract_lane_s]);
			
 
				                         break;
			
 
				-                    case SIMD_f64x2_replace_lane:
			
 
				-                        CHECK_BUF(p, p_end, 1);
			
 
				-                        lane = read_uint8(p);
			
 
				-
			
 
				-                        if (!check_simd_access_lane(opcode, lane, error_buf,
			
 
				-                                                    error_buf_size)) {
			
 
				-                            goto fail;
			
 
				-                        }
			
 
				+                    }
			
 
				 
			
 
				-                        POP_F64();
			
 
				-                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				-                        break;
			
 
				+                    /* i8x16 compare operation */
			
 
				                     case SIMD_i8x16_eq:
			
 
				                     case SIMD_i8x16_ne:
			
 
				                     case SIMD_i8x16_lt_s:
			
@@ -8237,6 +8257,7 @@ fail_data_cnt_sec_require:
 
				                     case SIMD_i8x16_le_u:
			
 
				                     case SIMD_i8x16_ge_s:
			
 
				                     case SIMD_i8x16_ge_u:
			
 
				+                    /* i16x8 compare operation */
			
 
				                     case SIMD_i16x8_eq:
			
 
				                     case SIMD_i16x8_ne:
			
 
				                     case SIMD_i16x8_lt_s:
			
@@ -8247,6 +8268,7 @@ fail_data_cnt_sec_require:
 
				                     case SIMD_i16x8_le_u:
			
 
				                     case SIMD_i16x8_ge_s:
			
 
				                     case SIMD_i16x8_ge_u:
			
 
				+                    /* i32x4 compare operation */
			
 
				                     case SIMD_i32x4_eq:
			
 
				                     case SIMD_i32x4_ne:
			
 
				                     case SIMD_i32x4_lt_s:
			
@@ -8257,122 +8279,318 @@ fail_data_cnt_sec_require:
 
				                     case SIMD_i32x4_le_u:
			
 
				                     case SIMD_i32x4_ge_s:
			
 
				                     case SIMD_i32x4_ge_u:
			
 
				+                    /* f32x4 compare operation */
			
 
				                     case SIMD_f32x4_eq:
			
 
				                     case SIMD_f32x4_ne:
			
 
				                     case SIMD_f32x4_lt:
			
 
				                     case SIMD_f32x4_gt:
			
 
				                     case SIMD_f32x4_le:
			
 
				                     case SIMD_f32x4_ge:
			
 
				+                    /* f64x2 compare operation */
			
 
				                     case SIMD_f64x2_eq:
			
 
				                     case SIMD_f64x2_ne:
			
 
				                     case SIMD_f64x2_lt:
			
 
				                     case SIMD_f64x2_gt:
			
 
				                     case SIMD_f64x2_le:
			
 
				                     case SIMD_f64x2_ge:
			
 
				+                    {
			
 
				                         POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				                         break;
			
 
				+                    }
			
 
				 
			
 
				-                    case SIMD_f32x4_ceil:
			
 
				-                    case SIMD_f32x4_floor:
			
 
				-                    case SIMD_f32x4_trunc:
			
 
				-                    case SIMD_f32x4_nearest:
			
 
				-                    case SIMD_f64x2_ceil:
			
 
				-                    case SIMD_f64x2_floor:
			
 
				-                    case SIMD_f64x2_trunc:
			
 
				-                    case SIMD_f64x2_nearest:
			
 
				+                    /* v128 operation */
			
 
				                     case SIMD_v128_not:
			
 
				-                    case SIMD_i8x16_abs:
			
 
				-                    case SIMD_i8x16_neg:
			
 
				-                    case SIMD_i16x8_abs:
			
 
				-                    case SIMD_i16x8_neg:
			
 
				-                    case SIMD_i32x4_abs:
			
 
				-                    case SIMD_i32x4_neg:
			
 
				-                    case SIMD_i64x2_neg:
			
 
				-                    case SIMD_f32x4_abs:
			
 
				-                    case SIMD_f32x4_neg:
			
 
				-                    case SIMD_f32x4_sqrt:
			
 
				-                    case SIMD_f64x2_abs:
			
 
				-                    case SIMD_f64x2_neg:
			
 
				-                    case SIMD_f64x2_sqrt:
			
 
				-                    case SIMD_i16x8_widen_low_i8x16_s:
			
 
				-                    case SIMD_i16x8_widen_high_i8x16_s:
			
 
				-                    case SIMD_i16x8_widen_low_i8x16_u:
			
 
				-                    case SIMD_i16x8_widen_high_i8x16_u:
			
 
				-                    case SIMD_i32x4_widen_low_i16x8_s:
			
 
				-                    case SIMD_i32x4_widen_high_i16x8_s:
			
 
				-                    case SIMD_i32x4_widen_low_i16x8_u:
			
 
				-                    case SIMD_i32x4_widen_high_i16x8_u:
			
 
				-                    case SIMD_i32x4_trunc_sat_f32x4_s:
			
 
				-                    case SIMD_i32x4_trunc_sat_f32x4_u:
			
 
				-                    case SIMD_f32x4_convert_i32x4_s:
			
 
				-                    case SIMD_f32x4_convert_i32x4_u:
			
 
				+                    {
			
 
				                         POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				                         break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_v128_and:
			
 
				+                    case SIMD_v128_andnot:
			
 
				+                    case SIMD_v128_or:
			
 
				+                    case SIMD_v128_xor:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				 
			
 
				                     case SIMD_v128_bitselect:
			
 
				+                    {
			
 
				                         POP_V128();
			
 
				                         POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				                         break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_v128_any_true:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    /* Load Lane Operation */
			
 
				+                    case SIMD_v128_load8_lane:
			
 
				+                    case SIMD_v128_load16_lane:
			
 
				+                    case SIMD_v128_load32_lane:
			
 
				+                    case SIMD_v128_load64_lane:
			
 
				+                    case SIMD_v128_store8_lane:
			
 
				+                    case SIMD_v128_store16_lane:
			
 
				+                    case SIMD_v128_store32_lane:
			
 
				+                    case SIMD_v128_store64_lane:
			
 
				+                    {
			
 
				+                        uint8 lane;
			
 
				+
			
 
				+                        CHECK_MEMORY();
			
 
				+
			
 
				+                        read_leb_uint32(p, p_end, align); /* align */
			
 
				+                        if (!check_simd_memory_access_align(
			
 
				+                              opcode, align, error_buf, error_buf_size)) {
			
 
				+                            goto fail;
			
 
				+                        }
			
 
				+
			
 
				+                        read_leb_uint32(p, p_end, mem_offset); /* offset */
			
 
				+
			
 
				+                        CHECK_BUF(p, p_end, 1);
			
 
				+                        lane = read_uint8(p);
			
 
				+                        if (!check_simd_access_lane(opcode, lane, error_buf,
			
 
				+                                                    error_buf_size)) {
			
 
				+                            goto fail;
			
 
				+                        }
			
 
				+
			
 
				+                        POP_V128();
			
 
				+                        POP_I32();
			
 
				+                        if (opcode < SIMD_v128_store8_lane) {
			
 
				+                            PUSH_V128();
			
 
				+                        }
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_v128_load32_zero:
			
 
				+                    case SIMD_v128_load64_zero:
			
 
				+                    {
			
 
				+                        CHECK_MEMORY();
			
 
				+
			
 
				+                        read_leb_uint32(p, p_end, align); /* align */
			
 
				+                        if (!check_simd_memory_access_align(
			
 
				+                              opcode, align, error_buf, error_buf_size)) {
			
 
				+                            goto fail;
			
 
				+                        }
			
 
				+
			
 
				+                        read_leb_uint32(p, p_end, mem_offset); /* offset */
			
 
				+
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_I32, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    /* Float conversion */
			
 
				+                    case SIMD_f32x4_demote_f64x2_zero:
			
 
				+                    case SIMD_f64x2_promote_low_f32x4_zero:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    /* i8x16 Operation */
			
 
				+                    case SIMD_i8x16_abs:
			
 
				+                    case SIMD_i8x16_neg:
			
 
				+                    case SIMD_i8x16_popcnt:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				 
			
 
				-                    case SIMD_i8x16_any_true:
			
 
				                     case SIMD_i8x16_all_true:
			
 
				                     case SIMD_i8x16_bitmask:
			
 
				-                    case SIMD_i16x8_any_true:
			
 
				-                    case SIMD_i16x8_all_true:
			
 
				-                    case SIMD_i16x8_bitmask:
			
 
				-                    case SIMD_i32x4_any_true:
			
 
				-                    case SIMD_i32x4_all_true:
			
 
				-                    case SIMD_i32x4_bitmask:
			
 
				+                    {
			
 
				                         POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
			
 
				                         break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i8x16_narrow_i16x8_s:
			
 
				+                    case SIMD_i8x16_narrow_i16x8_u:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_f32x4_ceil:
			
 
				+                    case SIMD_f32x4_floor:
			
 
				+                    case SIMD_f32x4_trunc:
			
 
				+                    case SIMD_f32x4_nearest:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				 
			
 
				                     case SIMD_i8x16_shl:
			
 
				                     case SIMD_i8x16_shr_s:
			
 
				                     case SIMD_i8x16_shr_u:
			
 
				-                    case SIMD_i16x8_shl:
			
 
				-                    case SIMD_i16x8_shr_s:
			
 
				-                    case SIMD_i16x8_shr_u:
			
 
				-                    case SIMD_i32x4_shl:
			
 
				-                    case SIMD_i32x4_shr_s:
			
 
				-                    case SIMD_i32x4_shr_u:
			
 
				-                    case SIMD_i64x2_shl:
			
 
				-                    case SIMD_i64x2_shr_s:
			
 
				-                    case SIMD_i64x2_shr_u:
			
 
				+                    {
			
 
				                         POP_I32();
			
 
				                         POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				                         break;
			
 
				+                    }
			
 
				 
			
 
				-                    case SIMD_i8x16_narrow_i16x8_s:
			
 
				-                    case SIMD_i8x16_narrow_i16x8_u:
			
 
				-                    case SIMD_i16x8_narrow_i32x4_s:
			
 
				-                    case SIMD_i16x8_narrow_i32x4_u:
			
 
				-                    case SIMD_v128_and:
			
 
				-                    case SIMD_v128_andnot:
			
 
				-                    case SIMD_v128_or:
			
 
				-                    case SIMD_v128_xor:
			
 
				                     case SIMD_i8x16_add:
			
 
				-                    case SIMD_i8x16_add_saturate_s:
			
 
				-                    case SIMD_i8x16_add_saturate_u:
			
 
				+                    case SIMD_i8x16_add_sat_s:
			
 
				+                    case SIMD_i8x16_add_sat_u:
			
 
				                     case SIMD_i8x16_sub:
			
 
				-                    case SIMD_i8x16_sub_saturate_s:
			
 
				-                    case SIMD_i8x16_sub_saturate_u:
			
 
				+                    case SIMD_i8x16_sub_sat_s:
			
 
				+                    case SIMD_i8x16_sub_sat_u:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_f64x2_ceil:
			
 
				+                    case SIMD_f64x2_floor:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				                     case SIMD_i8x16_min_s:
			
 
				                     case SIMD_i8x16_min_u:
			
 
				                     case SIMD_i8x16_max_s:
			
 
				                     case SIMD_i8x16_max_u:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_f64x2_trunc:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				                     case SIMD_i8x16_avgr_u:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i16x8_extadd_pairwise_i8x16_s:
			
 
				+                    case SIMD_i16x8_extadd_pairwise_i8x16_u:
			
 
				+                    case SIMD_i32x4_extadd_pairwise_i16x8_s:
			
 
				+                    case SIMD_i32x4_extadd_pairwise_i16x8_u:
			
 
				+                    /* i16x8 operation */
			
 
				+                    case SIMD_i16x8_abs:
			
 
				+                    case SIMD_i16x8_neg:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i16x8_q15mulr_sat_s:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i16x8_all_true:
			
 
				+                    case SIMD_i16x8_bitmask:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i16x8_narrow_i32x4_s:
			
 
				+                    case SIMD_i16x8_narrow_i32x4_u:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i16x8_extend_low_i8x16_s:
			
 
				+                    case SIMD_i16x8_extend_high_i8x16_s:
			
 
				+                    case SIMD_i16x8_extend_low_i8x16_u:
			
 
				+                    case SIMD_i16x8_extend_high_i8x16_u:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i16x8_shl:
			
 
				+                    case SIMD_i16x8_shr_s:
			
 
				+                    case SIMD_i16x8_shr_u:
			
 
				+                    {
			
 
				+                        POP_I32();
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				                     case SIMD_i16x8_add:
			
 
				-                    case SIMD_i16x8_add_saturate_s:
			
 
				-                    case SIMD_i16x8_add_saturate_u:
			
 
				+                    case SIMD_i16x8_add_sat_s:
			
 
				+                    case SIMD_i16x8_add_sat_u:
			
 
				                     case SIMD_i16x8_sub:
			
 
				-                    case SIMD_i16x8_sub_saturate_s:
			
 
				-                    case SIMD_i16x8_sub_saturate_u:
			
 
				+                    case SIMD_i16x8_sub_sat_s:
			
 
				+                    case SIMD_i16x8_sub_sat_u:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_f64x2_nearest:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				                     case SIMD_i16x8_mul:
			
 
				                     case SIMD_i16x8_min_s:
			
 
				                     case SIMD_i16x8_min_u:
			
 
				                     case SIMD_i16x8_max_s:
			
 
				                     case SIMD_i16x8_max_u:
			
 
				                     case SIMD_i16x8_avgr_u:
			
 
				+                    case SIMD_i16x8_extmul_low_i8x16_s:
			
 
				+                    case SIMD_i16x8_extmul_high_i8x16_s:
			
 
				+                    case SIMD_i16x8_extmul_low_i8x16_u:
			
 
				+                    case SIMD_i16x8_extmul_high_i8x16_u:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    /* i32x4 operation */
			
 
				+                    case SIMD_i32x4_abs:
			
 
				+                    case SIMD_i32x4_neg:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i32x4_all_true:
			
 
				+                    case SIMD_i32x4_bitmask:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i32x4_narrow_i64x2_s:
			
 
				+                    case SIMD_i32x4_narrow_i64x2_u:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i32x4_extend_low_i16x8_s:
			
 
				+                    case SIMD_i32x4_extend_high_i16x8_s:
			
 
				+                    case SIMD_i32x4_extend_low_i16x8_u:
			
 
				+                    case SIMD_i32x4_extend_high_i16x8_u:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i32x4_shl:
			
 
				+                    case SIMD_i32x4_shr_s:
			
 
				+                    case SIMD_i32x4_shr_u:
			
 
				+                    {
			
 
				+                        POP_I32();
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				                     case SIMD_i32x4_add:
			
 
				                     case SIMD_i32x4_sub:
			
 
				                     case SIMD_i32x4_mul:
			
@@ -8380,31 +8598,137 @@ fail_data_cnt_sec_require:
 
				                     case SIMD_i32x4_min_u:
			
 
				                     case SIMD_i32x4_max_s:
			
 
				                     case SIMD_i32x4_max_u:
			
 
				+                    case SIMD_i32x4_dot_i16x8_s:
			
 
				+                    case SIMD_i32x4_avgr_u:
			
 
				+                    case SIMD_i32x4_extmul_low_i16x8_s:
			
 
				+                    case SIMD_i32x4_extmul_high_i16x8_s:
			
 
				+                    case SIMD_i32x4_extmul_low_i16x8_u:
			
 
				+                    case SIMD_i32x4_extmul_high_i16x8_u:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    /* i64x2 operation */
			
 
				+                    case SIMD_i64x2_abs:
			
 
				+                    case SIMD_i64x2_neg:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i64x2_all_true:
			
 
				+                    case SIMD_i64x2_bitmask:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_I32);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i64x2_extend_low_i32x4_s:
			
 
				+                    case SIMD_i64x2_extend_high_i32x4_s:
			
 
				+                    case SIMD_i64x2_extend_low_i32x4_u:
			
 
				+                    case SIMD_i64x2_extend_high_i32x4_u:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i64x2_shl:
			
 
				+                    case SIMD_i64x2_shr_s:
			
 
				+                    case SIMD_i64x2_shr_u:
			
 
				+                    {
			
 
				+                        POP_I32();
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				                     case SIMD_i64x2_add:
			
 
				                     case SIMD_i64x2_sub:
			
 
				                     case SIMD_i64x2_mul:
			
 
				+                    case SIMD_i64x2_eq:
			
 
				+                    case SIMD_i64x2_ne:
			
 
				+                    case SIMD_i64x2_lt_s:
			
 
				+                    case SIMD_i64x2_gt_s:
			
 
				+                    case SIMD_i64x2_le_s:
			
 
				+                    case SIMD_i64x2_ge_s:
			
 
				+                    case SIMD_i64x2_extmul_low_i32x4_s:
			
 
				+                    case SIMD_i64x2_extmul_high_i32x4_s:
			
 
				+                    case SIMD_i64x2_extmul_low_i32x4_u:
			
 
				+                    case SIMD_i64x2_extmul_high_i32x4_u:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    /* f32x4 operation */
			
 
				+                    case SIMD_f32x4_abs:
			
 
				+                    case SIMD_f32x4_neg:
			
 
				+                    case SIMD_f32x4_round:
			
 
				+                    case SIMD_f32x4_sqrt:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				                     case SIMD_f32x4_add:
			
 
				                     case SIMD_f32x4_sub:
			
 
				                     case SIMD_f32x4_mul:
			
 
				                     case SIMD_f32x4_div:
			
 
				                     case SIMD_f32x4_min:
			
 
				                     case SIMD_f32x4_max:
			
 
				+                    case SIMD_f32x4_pmin:
			
 
				+                    case SIMD_f32x4_pmax:
			
 
				+                    {
			
 
				+                        POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				+                    /* f64x2 operation */
			
 
				+                    case SIMD_f64x2_abs:
			
 
				+                    case SIMD_f64x2_neg:
			
 
				+                    case SIMD_f64x2_round:
			
 
				+                    case SIMD_f64x2_sqrt:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				+
			
 
				                     case SIMD_f64x2_add:
			
 
				                     case SIMD_f64x2_sub:
			
 
				                     case SIMD_f64x2_mul:
			
 
				                     case SIMD_f64x2_div:
			
 
				                     case SIMD_f64x2_min:
			
 
				                     case SIMD_f64x2_max:
			
 
				+                    case SIMD_f64x2_pmin:
			
 
				+                    case SIMD_f64x2_pmax:
			
 
				+                    {
			
 
				                         POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				                         break;
			
 
				+                    }
			
 
				+
			
 
				+                    case SIMD_i32x4_trunc_sat_f32x4_s:
			
 
				+                    case SIMD_i32x4_trunc_sat_f32x4_u:
			
 
				+                    case SIMD_f32x4_convert_i32x4_s:
			
 
				+                    case SIMD_f32x4_convert_i32x4_u:
			
 
				+                    case SIMD_i32x4_trunc_sat_f64x2_s_zero:
			
 
				+                    case SIMD_i32x4_trunc_sat_f64x2_u_zero:
			
 
				+                    case SIMD_f64x2_convert_low_i32x4_s:
			
 
				+                    case SIMD_f64x2_convert_low_i32x4_u:
			
 
				+                    {
			
 
				+                        POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
			
 
				+                        break;
			
 
				+                    }
			
 
				 
			
 
				                     default:
			
 
				+                    {
			
 
				                         if (error_buf != NULL) {
			
 
				                             snprintf(error_buf, error_buf_size,
			
 
				-                                    "WASM module load failed: "
			
 
				-                                    "invalid opcode 0xfd %02x.", opcode);
			
 
				+                                     "WASM module load failed: "
			
 
				+                                     "invalid opcode 0xfd %02x.",
			
 
				+                                     opcode);
			
 
				                         }
			
 
				                         goto fail;
			
 
				+                    }
			
 
				                 }
			
 
				                 break;
			
 
				             }
			
--- a/core/iwasm/interpreter/wasm_opcode.h
+++ b/core/iwasm/interpreter/wasm_opcode.h
@@ -296,18 +296,18 @@ typedef enum WASMMiscEXTOpcode {
 
				 
			
 
				 typedef enum WASMSimdEXTOpcode {
			
 
				     /* memory instruction */
			
 
				-    SIMD_v128_load        = 0x00,
			
 
				-    SIMD_i16x8_load8x8_s  = 0x01,
			
 
				-    SIMD_i16x8_load8x8_u  = 0x02,
			
 
				-    SIMD_i32x4_load16x4_s = 0x03,
			
 
				-    SIMD_i32x4_load16x4_u = 0x04,
			
 
				-    SIMD_i64x2_load32x2_s = 0x05,
			
 
				-    SIMD_i64x2_load32x2_u = 0x06,
			
 
				-    SIMD_v8x16_load_splat = 0x07,
			
 
				-    SIMD_v16x8_load_splat = 0x08,
			
 
				-    SIMD_v32x4_load_splat = 0x09,
			
 
				-    SIMD_v64x2_load_splat = 0x0a,
			
 
				-    SIMD_v128_store       = 0x0b,
			
 
				+    SIMD_v128_load         = 0x00,
			
 
				+    SIMD_v128_load8x8_s    = 0x01,
			
 
				+    SIMD_v128_load8x8_u    = 0x02,
			
 
				+    SIMD_v128_load16x4_s   = 0x03,
			
 
				+    SIMD_v128_load16x4_u   = 0x04,
			
 
				+    SIMD_v128_load32x2_s   = 0x05,
			
 
				+    SIMD_v128_load32x2_u   = 0x06,
			
 
				+    SIMD_v128_load8_splat  = 0x07,
			
 
				+    SIMD_v128_load16_splat = 0x08,
			
 
				+    SIMD_v128_load32_splat = 0x09,
			
 
				+    SIMD_v128_load64_splat = 0x0a,
			
 
				+    SIMD_v128_store        = 0x0b,
			
 
				 
			
 
				     /* basic operation */
			
 
				     SIMD_v128_const       = 0x0c,
			
@@ -391,107 +391,170 @@ typedef enum WASMSimdEXTOpcode {
 
				     SIMD_f64x2_ge    = 0x4c,
			
 
				 
			
 
				     /* v128 operation */
			
 
				-    SIMD_v128_not    = 0x4d,
			
 
				-    SIMD_v128_and    = 0x4e,
			
 
				-    SIMD_v128_andnot = 0x4f,
			
 
				-    SIMD_v128_or     = 0x50,
			
 
				-    SIMD_v128_xor    = 0x51,
			
 
				+    SIMD_v128_not       = 0x4d,
			
 
				+    SIMD_v128_and       = 0x4e,
			
 
				+    SIMD_v128_andnot    = 0x4f,
			
 
				+    SIMD_v128_or        = 0x50,
			
 
				+    SIMD_v128_xor       = 0x51,
			
 
				     SIMD_v128_bitselect = 0x52,
			
 
				+    SIMD_v128_any_true  = 0x53,
			
 
				+
			
 
				+    /* Load Lane Operation */
			
 
				+    SIMD_v128_load8_lane   = 0x54,
			
 
				+    SIMD_v128_load16_lane  = 0x55,
			
 
				+    SIMD_v128_load32_lane  = 0x56,
			
 
				+    SIMD_v128_load64_lane  = 0x57,
			
 
				+    SIMD_v128_store8_lane  = 0x58,
			
 
				+    SIMD_v128_store16_lane = 0x59,
			
 
				+    SIMD_v128_store32_lane = 0x5a,
			
 
				+    SIMD_v128_store64_lane = 0x5b,
			
 
				+    SIMD_v128_load32_zero  = 0x5c,
			
 
				+    SIMD_v128_load64_zero  = 0x5d,
			
 
				+
			
 
				+    /* Float conversion */
			
 
				+    SIMD_f32x4_demote_f64x2_zero      = 0x5e,
			
 
				+    SIMD_f64x2_promote_low_f32x4_zero = 0x5f,
			
 
				 
			
 
				     /* i8x16 Operation */
			
 
				     SIMD_i8x16_abs            = 0x60,
			
 
				     SIMD_i8x16_neg            = 0x61,
			
 
				-    SIMD_i8x16_any_true       = 0x62,
			
 
				+    SIMD_i8x16_popcnt         = 0x62,
			
 
				     SIMD_i8x16_all_true       = 0x63,
			
 
				     SIMD_i8x16_bitmask        = 0x64,
			
 
				     SIMD_i8x16_narrow_i16x8_s = 0x65,
			
 
				     SIMD_i8x16_narrow_i16x8_u = 0x66,
			
 
				+    SIMD_f32x4_ceil           = 0x67,
			
 
				+    SIMD_f32x4_floor          = 0x68,
			
 
				+    SIMD_f32x4_trunc          = 0x69,
			
 
				+    SIMD_f32x4_nearest        = 0x6a,
			
 
				     SIMD_i8x16_shl            = 0x6b,
			
 
				     SIMD_i8x16_shr_s          = 0x6c,
			
 
				     SIMD_i8x16_shr_u          = 0x6d,
			
 
				     SIMD_i8x16_add            = 0x6e,
			
 
				-    SIMD_i8x16_add_saturate_s = 0x6f,
			
 
				-    SIMD_i8x16_add_saturate_u = 0x70,
			
 
				+    SIMD_i8x16_add_sat_s      = 0x6f,
			
 
				+    SIMD_i8x16_add_sat_u      = 0x70,
			
 
				     SIMD_i8x16_sub            = 0x71,
			
 
				-    SIMD_i8x16_sub_saturate_s = 0x72,
			
 
				-    SIMD_i8x16_sub_saturate_u = 0x73,
			
 
				+    SIMD_i8x16_sub_sat_s      = 0x72,
			
 
				+    SIMD_i8x16_sub_sat_u      = 0x73,
			
 
				+    SIMD_f64x2_ceil           = 0x74,
			
 
				+    SIMD_f64x2_floor          = 0x75,
			
 
				     SIMD_i8x16_min_s          = 0x76,
			
 
				     SIMD_i8x16_min_u          = 0x77,
			
 
				     SIMD_i8x16_max_s          = 0x78,
			
 
				     SIMD_i8x16_max_u          = 0x79,
			
 
				+    SIMD_f64x2_trunc          = 0x7a,
			
 
				     SIMD_i8x16_avgr_u         = 0x7b,
			
 
				+    SIMD_i16x8_extadd_pairwise_i8x16_s = 0x7c,
			
 
				+    SIMD_i16x8_extadd_pairwise_i8x16_u = 0x7d,
			
 
				+    SIMD_i32x4_extadd_pairwise_i16x8_s = 0x7e,
			
 
				+    SIMD_i32x4_extadd_pairwise_i16x8_u = 0x7f,
			
 
				 
			
 
				     /* i16x8 operation */
			
 
				     SIMD_i16x8_abs            = 0x80,
			
 
				     SIMD_i16x8_neg            = 0x81,
			
 
				-    SIMD_i16x8_any_true       = 0x82,
			
 
				+    SIMD_i16x8_q15mulr_sat_s  = 0x82,
			
 
				     SIMD_i16x8_all_true       = 0x83,
			
 
				     SIMD_i16x8_bitmask        = 0x84,
			
 
				     SIMD_i16x8_narrow_i32x4_s = 0x85,
			
 
				     SIMD_i16x8_narrow_i32x4_u = 0x86,
			
 
				-    SIMD_i16x8_widen_low_i8x16_s  = 0x87,
			
 
				-    SIMD_i16x8_widen_high_i8x16_s = 0x88,
			
 
				-    SIMD_i16x8_widen_low_i8x16_u  = 0x89,
			
 
				-    SIMD_i16x8_widen_high_i8x16_u = 0x8a,
			
 
				+    SIMD_i16x8_extend_low_i8x16_s  = 0x87,
			
 
				+    SIMD_i16x8_extend_high_i8x16_s = 0x88,
			
 
				+    SIMD_i16x8_extend_low_i8x16_u  = 0x89,
			
 
				+    SIMD_i16x8_extend_high_i8x16_u = 0x8a,
			
 
				     SIMD_i16x8_shl            = 0x8b,
			
 
				     SIMD_i16x8_shr_s          = 0x8c,
			
 
				     SIMD_i16x8_shr_u          = 0x8d,
			
 
				     SIMD_i16x8_add            = 0x8e,
			
 
				-    SIMD_i16x8_add_saturate_s = 0x8f,
			
 
				-    SIMD_i16x8_add_saturate_u = 0x90,
			
 
				+    SIMD_i16x8_add_sat_s      = 0x8f,
			
 
				+    SIMD_i16x8_add_sat_u      = 0x90,
			
 
				     SIMD_i16x8_sub            = 0x91,
			
 
				-    SIMD_i16x8_sub_saturate_s = 0x92,
			
 
				-    SIMD_i16x8_sub_saturate_u = 0x93,
			
 
				+    SIMD_i16x8_sub_sat_s      = 0x92,
			
 
				+    SIMD_i16x8_sub_sat_u      = 0x93,
			
 
				+    SIMD_f64x2_nearest        = 0x94,
			
 
				     SIMD_i16x8_mul            = 0x95,
			
 
				     SIMD_i16x8_min_s          = 0x96,
			
 
				     SIMD_i16x8_min_u          = 0x97,
			
 
				     SIMD_i16x8_max_s          = 0x98,
			
 
				     SIMD_i16x8_max_u          = 0x99,
			
 
				+    /* placeholder            = 0x9a */
			
 
				     SIMD_i16x8_avgr_u         = 0x9b,
			
 
				+    SIMD_i16x8_extmul_low_i8x16_s  = 0x9c,
			
 
				+    SIMD_i16x8_extmul_high_i8x16_s = 0x9d,
			
 
				+    SIMD_i16x8_extmul_low_i8x16_u  = 0x9e,
			
 
				+    SIMD_i16x8_extmul_high_i8x16_u = 0x9f,
			
 
				 
			
 
				     /* i32x4 operation */
			
 
				     SIMD_i32x4_abs            = 0xa0,
			
 
				     SIMD_i32x4_neg            = 0xa1,
			
 
				-    SIMD_i32x4_any_true       = 0xa2,
			
 
				+    /* placeholder            = 0xa2 */
			
 
				     SIMD_i32x4_all_true       = 0xa3,
			
 
				     SIMD_i32x4_bitmask        = 0xa4,
			
 
				-    SIMD_i32x4_widen_low_i16x8_s  = 0xa7,
			
 
				-    SIMD_i32x4_widen_high_i16x8_s = 0xa8,
			
 
				-    SIMD_i32x4_widen_low_i16x8_u  = 0xa9,
			
 
				-    SIMD_i32x4_widen_high_i16x8_u = 0xaa,
			
 
				+    SIMD_i32x4_narrow_i64x2_s = 0xa5,
			
 
				+    SIMD_i32x4_narrow_i64x2_u = 0xa6,
			
 
				+    SIMD_i32x4_extend_low_i16x8_s  = 0xa7,
			
 
				+    SIMD_i32x4_extend_high_i16x8_s = 0xa8,
			
 
				+    SIMD_i32x4_extend_low_i16x8_u  = 0xa9,
			
 
				+    SIMD_i32x4_extend_high_i16x8_u = 0xaa,
			
 
				     SIMD_i32x4_shl            = 0xab,
			
 
				     SIMD_i32x4_shr_s          = 0xac,
			
 
				     SIMD_i32x4_shr_u          = 0xad,
			
 
				     SIMD_i32x4_add            = 0xae,
			
 
				+    SIMD_i32x4_add_sat_s      = 0xaf,
			
 
				+    SIMD_i32x4_add_sat_u      = 0xb0,
			
 
				     SIMD_i32x4_sub            = 0xb1,
			
 
				+    SIMD_i32x4_sub_sat_s      = 0xb2,
			
 
				+    SIMD_i32x4_sub_sat_u      = 0xb3,
			
 
				+    /* placeholder            = 0xb4 */
			
 
				     SIMD_i32x4_mul            = 0xb5,
			
 
				     SIMD_i32x4_min_s          = 0xb6,
			
 
				     SIMD_i32x4_min_u          = 0xb7,
			
 
				     SIMD_i32x4_max_s          = 0xb8,
			
 
				     SIMD_i32x4_max_u          = 0xb9,
			
 
				+    SIMD_i32x4_dot_i16x8_s    = 0xba,
			
 
				+    SIMD_i32x4_avgr_u         = 0xbb,
			
 
				+    SIMD_i32x4_extmul_low_i16x8_s  = 0xbc,
			
 
				+    SIMD_i32x4_extmul_high_i16x8_s = 0xbd,
			
 
				+    SIMD_i32x4_extmul_low_i16x8_u  = 0xbe,
			
 
				+    SIMD_i32x4_extmul_high_i16x8_u = 0xbf,
			
 
				 
			
 
				     /* i64x2 operation */
			
 
				-    SIMD_i64x2_neg    = 0xc1,
			
 
				-    SIMD_i64x2_shl    = 0xcb,
			
 
				-    SIMD_i64x2_shr_s  = 0xcc,
			
 
				-    SIMD_i64x2_shr_u  = 0xcd,
			
 
				-    SIMD_i64x2_add    = 0xce,
			
 
				-    SIMD_i64x2_sub    = 0xd1,
			
 
				-    SIMD_i64x2_mul    = 0xd5,
			
 
				-
			
 
				-    /* float ceil/floor/trunc/nearest */
			
 
				-    SIMD_f32x4_ceil   = 0xd8,
			
 
				-    SIMD_f32x4_floor  = 0xd9,
			
 
				-    SIMD_f32x4_trunc  = 0xda,
			
 
				-    SIMD_f32x4_nearest = 0xdb,
			
 
				-    SIMD_f64x2_ceil   = 0xdc,
			
 
				-    SIMD_f64x2_floor  = 0xdd,
			
 
				-    SIMD_f64x2_trunc  = 0xde,
			
 
				-    SIMD_f64x2_nearest = 0xdf,
			
 
				+    SIMD_i64x2_abs       = 0xc0,
			
 
				+    SIMD_i64x2_neg       = 0xc1,
			
 
				+    /* placeholder       = 0xc2 */
			
 
				+    SIMD_i64x2_all_true  = 0xc3,
			
 
				+    SIMD_i64x2_bitmask   = 0xc4,
			
 
				+    /* placeholder       = 0xc5 */
			
 
				+    /* placeholder       = 0xc6 */
			
 
				+    SIMD_i64x2_extend_low_i32x4_s  = 0xc7,
			
 
				+    SIMD_i64x2_extend_high_i32x4_s = 0xc8,
			
 
				+    SIMD_i64x2_extend_low_i32x4_u  = 0xc9,
			
 
				+    SIMD_i64x2_extend_high_i32x4_u = 0xca,
			
 
				+    SIMD_i64x2_shl       = 0xcb,
			
 
				+    SIMD_i64x2_shr_s     = 0xcc,
			
 
				+    SIMD_i64x2_shr_u     = 0xcd,
			
 
				+    SIMD_i64x2_add       = 0xce,
			
 
				+    /* placeholder       = 0xcf */
			
 
				+    /* placeholder       = 0xd0 */
			
 
				+    SIMD_i64x2_sub       = 0xd1,
			
 
				+    /* placeholder       = 0xd2 */
			
 
				+    /* placeholder       = 0xd3 */
			
 
				+    /* placeholder       = 0xd4 */
			
 
				+    SIMD_i64x2_mul       = 0xd5,
			
 
				+    SIMD_i64x2_eq        = 0xd6,
			
 
				+    SIMD_i64x2_ne        = 0xd7,
			
 
				+    SIMD_i64x2_lt_s      = 0xd8,
			
 
				+    SIMD_i64x2_gt_s      = 0xd9,
			
 
				+    SIMD_i64x2_le_s      = 0xda,
			
 
				+    SIMD_i64x2_ge_s      = 0xdb,
			
 
				+    SIMD_i64x2_extmul_low_i32x4_s  = 0xdc,
			
 
				+    SIMD_i64x2_extmul_high_i32x4_s = 0xdd,
			
 
				+    SIMD_i64x2_extmul_low_i32x4_u  = 0xde,
			
 
				+    SIMD_i64x2_extmul_high_i32x4_u = 0xdf,
			
 
				 
			
 
				     /* f32x4 operation */
			
 
				     SIMD_f32x4_abs    = 0xe0,
			
 
				     SIMD_f32x4_neg    = 0xe1,
			
 
				+    SIMD_f32x4_round  = 0xe2,
			
 
				     SIMD_f32x4_sqrt   = 0xe3,
			
 
				     SIMD_f32x4_add    = 0xe4,
			
 
				     SIMD_f32x4_sub    = 0xe5,
			
@@ -499,10 +562,13 @@ typedef enum WASMSimdEXTOpcode {
 
				     SIMD_f32x4_div    = 0xe7,
			
 
				     SIMD_f32x4_min    = 0xe8,
			
 
				     SIMD_f32x4_max    = 0xe9,
			
 
				+    SIMD_f32x4_pmin   = 0xea,
			
 
				+    SIMD_f32x4_pmax   = 0xeb,
			
 
				 
			
 
				     /* f64x2 operation */
			
 
				     SIMD_f64x2_abs    = 0xec,
			
 
				     SIMD_f64x2_neg    = 0xed,
			
 
				+    SIMD_f64x2_round  = 0xee,
			
 
				     SIMD_f64x2_sqrt   = 0xef,
			
 
				     SIMD_f64x2_add    = 0xf0,
			
 
				     SIMD_f64x2_sub    = 0xf1,
			
@@ -510,12 +576,18 @@ typedef enum WASMSimdEXTOpcode {
 
				     SIMD_f64x2_div    = 0xf3,
			
 
				     SIMD_f64x2_min    = 0xf4,
			
 
				     SIMD_f64x2_max    = 0xf5,
			
 
				+    SIMD_f64x2_pmin   = 0xf6,
			
 
				+    SIMD_f64x2_pmax   = 0xf7,
			
 
				 
			
 
				     /* conversion operation */
			
 
				-    SIMD_i32x4_trunc_sat_f32x4_s = 0xf8,
			
 
				-    SIMD_i32x4_trunc_sat_f32x4_u = 0xf9,
			
 
				-    SIMD_f32x4_convert_i32x4_s   = 0xfa,
			
 
				-    SIMD_f32x4_convert_i32x4_u   = 0xfb,
			
 
				+    SIMD_i32x4_trunc_sat_f32x4_s      = 0xf8,
			
 
				+    SIMD_i32x4_trunc_sat_f32x4_u      = 0xf9,
			
 
				+    SIMD_f32x4_convert_i32x4_s        = 0xfa,
			
 
				+    SIMD_f32x4_convert_i32x4_u        = 0xfb,
			
 
				+    SIMD_i32x4_trunc_sat_f64x2_s_zero = 0xfc,
			
 
				+    SIMD_i32x4_trunc_sat_f64x2_u_zero = 0xfd,
			
 
				+    SIMD_f64x2_convert_low_i32x4_s    = 0xfe,
			
 
				+    SIMD_f64x2_convert_low_i32x4_u    = 0xff,
			
 
				 } WASMSimdEXTOpcode;
			
 
				 
			
 
				 typedef enum WASMAtomicEXTOpcode {
			
--- a/doc/build_wamr.md
+++ b/doc/build_wamr.md
@@ -258,6 +258,7 @@ Then build the source codes:
 
				 ``` Bash
			
 
				 cd core/deps/
			
 
				 git clone https://github.com/nodejs/uvwasi.git
			
 
				+
			
 
				 cd product-mini/platforms/windows/
			
 
				 mkdir build
			
 
				 cd build
			
--- a/doc/build_wasm_app.md
+++ b/doc/build_wasm_app.md
@@ -6,6 +6,11 @@ For C and C++, WASI-SDK version 12.0+ is the major tool supported by WAMR to bui
 
				 
			
 
				 To install WASI SDK, please download the [wasi-sdk release](https://github.com/CraneStation/wasi-sdk/releases) and extract the archive to default path `/opt/wasi-sdk`.
			
 
				 
			
 
				+The offical *wasi-sdk release* doesn't fully support *latest 128-bit SIMD spec* yet. WARM provides a script in [build-wasi-sdk](../test-tools/build-wasi-sdk/) to generate
			
 
				+another wasi-sdk with *llvm-13* from source code and installs it at *../test-tools/wasi-sdk*. If you plan to build WASM applications with *latest 128-bit SIMD*, please use it instead of the offical release.
			
 
				+
			
 
				+And [sample workloads](../samples/workload) are using the self-compiled wasi-sdk.
			
 
				+
			
 
				 For [AssemblyScript](https://github.com/AssemblyScript/assemblyscript), please refer to [AssemblyScript quick start](https://www.assemblyscript.org/quick-start.html) and [AssemblyScript compiler](https://www.assemblyscript.org/compiler.html#command-line-options) for how to install `asc` compiler and build WASM applications.
			
 
				 
			
 
				 For Rust, please firstly ref to [Install Rust and Cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html) to install cargo, rustc and rustup, by default they are installed under ~/.cargo/bin, and then run `rustup target add wasm32-wasi` to install wasm32-wasi target for Rust toolchain. To build WASM applications, we can run `cargo build --target wasm32-wasi`, the output files are under `target/wasm32-wasi`.
			
--- a/product-mini/platforms/android/build_llvm.sh
+++ b/product-mini/platforms/android/build_llvm.sh
@@ -1,43 +1,6 @@
 
				 #!/bin/sh
			
 
				 
			
 
				-# Copyright (C) 2019 Intel Corporation. All rights reserved.
			
 
				+# Copyright (C) 2020 Intel Corporation. All rights reserved.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 
			
 
				-DEPS_DIR=${PWD}/../../../core/deps
			
 
				-
			
 
				-cd ${DEPS_DIR}
			
 
				-if [ ! -d "llvm" ]; then
			
 
				-  echo "Clone llvm to core/deps/ .."
			
 
				-  git clone https://github.com/llvm-mirror/llvm.git
			
 
				-fi
			
 
				-
			
 
				-cd llvm
			
 
				-mkdir -p build
			
 
				-cd build
			
 
				-
			
 
				-if [ ! -f bin/llvm-lto ]; then
			
 
				-
			
 
				-  CORE_NUM=$(nproc --all)
			
 
				-  if [ -z "${CORE_NUM}" ]; then
			
 
				-    CORE_NUM=1
			
 
				-  fi
			
 
				-
			
 
				-  echo "Build llvm with" ${CORE_NUM} "cores"
			
 
				-
			
 
				-  cmake .. \
			
 
				-          -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
			
 
				-          -DCMAKE_BUILD_TYPE:STRING="Release" \
			
 
				-          -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \
			
 
				-          -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \
			
 
				-          -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_TESTS:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \
			
 
				-          -DLLVM_APPEND_VC_REV:BOOL=OFF
			
 
				-  make -j ${CORE_NUM}
			
 
				-
			
 
				-else
			
 
				-  echo "llvm has already been built"
			
 
				-fi
			
 
				-
			
 
				-cd ${PWD}
			
 
				-
			
 
				+/usr/bin/env python3 ../../../build-scripts/build_llvm.py --platform android
			
--- a/product-mini/platforms/darwin/build_jit.sh
+++ b/product-mini/platforms/darwin/build_jit.sh
@@ -0,0 +1,10 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
			
 
				+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+
			
 
				+rm -fr build && mkdir build
			
 
				+cd build
			
 
				+cmake .. -DWAMR_BUILD_JIT=1
			
 
				+make
			
 
				+cd ..
			
--- a/product-mini/platforms/darwin/build_llvm.sh
+++ b/product-mini/platforms/darwin/build_llvm.sh
@@ -0,0 +1,6 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+# Copyright (C) 2020 Intel Corporation. All rights reserved.
			
 
				+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+
			
 
				+/usr/bin/env python3 ../../../build-scripts/build_llvm.py --platform darwin
			
--- a/product-mini/platforms/linux/build_llvm.sh
+++ b/product-mini/platforms/linux/build_llvm.sh
@@ -3,44 +3,4 @@
 
				 # Copyright (C) 2020 Intel Corporation. All rights reserved.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 
			
 
				-DEPS_DIR=${PWD}/../../../core/deps
			
 
				-
			
 
				-cd ${DEPS_DIR}
			
 
				-if [ ! -d "llvm" ]; then
			
 
				-  echo "Clone llvm to core/deps/ .."
			
 
				-  git clone --depth 1 --branch release/11.x https://github.com/llvm/llvm-project.git llvm
			
 
				-fi
			
 
				-
			
 
				-cd llvm
			
 
				-mkdir -p build
			
 
				-cd build
			
 
				-
			
 
				-if [ ! -f bin/llvm-lto ]; then
			
 
				-
			
 
				-  CORE_NUM=$(nproc --all)
			
 
				-  if [ -z "${CORE_NUM}" ]; then
			
 
				-    CORE_NUM=1
			
 
				-  fi
			
 
				-
			
 
				-  echo "Build llvm with" ${CORE_NUM} "cores"
			
 
				-
			
 
				-  cmake ../llvm \
			
 
				-          -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
			
 
				-          -DCMAKE_BUILD_TYPE:STRING="Release" \
			
 
				-          -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips" \
			
 
				-          -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \
			
 
				-          -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \
			
 
				-          -DLLVM_ENABLE_ZLIB:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_DOCS:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_TESTS:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \
			
 
				-          -DLLVM_APPEND_VC_REV:BOOL=OFF
			
 
				-  make -j ${CORE_NUM}
			
 
				-
			
 
				-else
			
 
				-  echo "llvm has already been built"
			
 
				-fi
			
 
				-
			
 
				-cd ${PWD}
			
 
				-
			
 
				+/usr/bin/env python3 ../../../build-scripts/build_llvm.py
			
--- a/product-mini/platforms/windows/build_llvm.py
+++ b/product-mini/platforms/windows/build_llvm.py
@@ -1,69 +1,16 @@
 
				+#!/usr/bin/env python3
			
 
				 #
			
 
				 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 #
			
 
				 
			
 
				-#!/usr/bin/env python3
			
 
				-import os
			
 
				+import pathlib
			
 
				+import subprocess
			
 
				 import sys
			
 
				-from pathlib import Path
			
 
				-
			
 
				-def clone_llvm():
			
 
				-    llvm_dir = Path("llvm")
			
 
				-    if(llvm_dir.exists() == False):
			
 
				-        print("Clone llvm to core/deps/ ..")
			
 
				-        for line in os.popen("git clone --branch release/11.x https://github.com/llvm/llvm-project.git llvm"):
			
 
				-            print(line)
			
 
				-    else:
			
 
				-        print("llvm source codes already existed")
			
 
				-    return llvm_dir
			
 
				-
			
 
				-def main():
			
 
				-    current_os = sys.platform
			
 
				-    print("current OS is ", current_os)
			
 
				-
			
 
				-    current_dir = Path.cwd()
			
 
				-    deps_dir = current_dir.joinpath( "../../../core/deps")
			
 
				-
			
 
				-    os.chdir(deps_dir)
			
 
				-    llvm_dir = clone_llvm()
			
 
				-    os.chdir(llvm_dir)
			
 
				-
			
 
				-    build_dir_name = "win32build"
			
 
				-    llvm_file = "LLVM.sln"
			
 
				-
			
 
				-    Path(build_dir_name).mkdir(exist_ok = True)
			
 
				-    build_dir = Path(build_dir_name)
			
 
				-    os.chdir(build_dir)
			
 
				-
			
 
				-    if ( not Path(llvm_file).exists()):
			
 
				-        core_number = os.cpu_count()
			
 
				-        print("Build llvm with", core_number, " cores")
			
 
				-        cmd = 'cmake ../llvm \
			
 
				-                -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
			
 
				-                -DCMAKE_BUILD_TYPE:STRING="Release" \
			
 
				-                -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips" \
			
 
				-                -DLLVM_INCLUDE_GO_TESTS=OFF \
			
 
				-                -DLLVM_INCLUDE_TOOLS=OFF \
			
 
				-                -DLLVM_INCLUDE_UTILS=OFF \
			
 
				-                -DLLVM_ENABLE_TERMINFO=OFF \
			
 
				-                -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \
			
 
				-                -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \
			
 
				-                -DLLVM_ENABLE_ZLIB:BOOL=OFF \
			
 
				-                -DLLVM_INCLUDE_DOCS:BOOL=OFF \
			
 
				-                -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \
			
 
				-                -DLLVM_INCLUDE_TESTS:BOOL=OFF \
			
 
				-                -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \
			
 
				-                -DLLVM_APPEND_VC_REV:BOOL=OFF'
			
 
				-        print(cmd)
			
 
				-        for line in os.popen(cmd):
			
 
				-            print(line)
			
 
				-    else:
			
 
				-        print("llvm has already been Cmaked")
			
 
				-
			
 
				-    print("Please open LLVM.sln in {} to build *Release* version".format(build_dir.absolute()))
			
 
				-
			
 
				-    os.chdir(current_dir)
			
 
				 
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
 
				+script = (
			
 
				+    pathlib.Path(__file__)
			
 
				+    .parent.joinpath("../../../build-scripts/build_llvm.py")
			
 
				+    .resolve()
			
 
				+)
			
 
				+subprocess.check_call([sys.executable, script])
			
--- a/samples/workload/README.md
+++ b/samples/workload/README.md
@@ -1,5 +1,5 @@
 
				 All workloads have similar requirment of software dependencies, including
			
 
				-**wasi-sdk**, **emsdk**, **wabt** and **binaryen**
			
 
				+**emsdk**, **wabt** and **binaryen**
			
 
				 
			
 
				 > There might be slight differences when using MacOS and other Linux distro than Ubuntu. This document only target
			
 
				 Ubuntu 18.04 as example.
			
@@ -10,16 +10,6 @@ use [preparation.sh](./preparation.sh) to install all dependencies before compil
 
				 
			
 
				 for details, the script includes below steps:
			
 
				 
			
 
				-- **wasi-sdk**. Install
			
 
				-  [latest release](https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-12/wasi-sdk-12.0-linux.tar.gz)
			
 
				-  to */opt/wasi-sdk*
			
 
				-
			
 
				-``` bash
			
 
				-$ wget https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/${WASI_SDK_FILE}
			
 
				-$ tar zxf ${WASI_SDK_FILE} -C /opt
			
 
				-$ ln -sf /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk
			
 
				-```
			
 
				-
			
 
				 - **wabt**. Install
			
 
				   [latest release](https://github.com/WebAssembly/wabt/releases/download/1.0.23/wabt-1.0.23-ubuntu.tar.gz)
			
 
				   to */opt/wabt*
			
@@ -32,15 +22,15 @@ $ ln -sf /opt/wabt-${WABT_VER} /opt/wabt
 
				 
			
 
				 - **emsdk**. Refer to [the guide](https://emscripten.org/docs/getting_started/downloads.html). Don't forget to activate
			
 
				   emsdk and set up environment variables. Verify it with `echo ${EMSDK}`. Please be sure to install and activate the building
			
 
				-  of 2.0.12
			
 
				+  of 2.0.26
			
 
				 
			
 
				 ``` bash
			
 
				 $ cd /opt
			
 
				 $ git clone https://github.com/emscripten-core/emsdk.git
			
 
				 $ cd emsdk
			
 
				 $ git pull
			
 
				-$ ./emsdk install 2.0.12
			
 
				-$ ./emsdk activate 2.0.12
			
 
				+$ ./emsdk install 2.0.26
			
 
				+$ ./emsdk activate 2.0.26
			
 
				 $ echo "source /opt/emsdk/emsdk_env.sh" >> "${HOME}"/.bashrc
			
 
				 ```
			
 
				 
			
--- a/samples/workload/XNNPACK/CMakeLists.txt
+++ b/samples/workload/XNNPACK/CMakeLists.txt
@@ -6,85 +6,87 @@ cmake_minimum_required (VERSION 3.0)
 
				 project(xnnpack_wasm)
			
 
				 
			
 
				 ################  EMCC ################
			
 
				-if(NOT DEFINED ENV{EMSDK})
			
 
				-  message(SEND_ERROR
			
 
				-    "can not find emsdk. "
			
 
				-    "please refer to https://emscripten.org/docs/getting_started/downloads.html "
			
 
				-    "and install it, "
			
 
				-    "or active emsdk by 'source ./emsdk_env.sh'"
			
 
				-  )
			
 
				-endif()
			
 
				-
			
 
				 include(ExternalProject)
			
 
				 
			
 
				 ExternalProject_Add(xnnpack
			
 
				     PREFIX xnnpack
			
 
				     GIT_REPOSITORY https://github.com/google/XNNPACK.git
			
 
				-    GIT_TAG        90f520b6482bb99ac1bbfb71be1382f6c9b83241
			
 
				+    GIT_TAG        master
			
 
				     GIT_PROGRESS   ON
			
 
				     SOURCE_DIR     ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack
			
 
				     UPDATE_COMMAND git checkout .
			
 
				+                   && cmake -E copy ${CMAKE_CURRENT_SOURCE_DIR}/benchmark.patch ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/third_party
			
 
				                    && git apply ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack.patch
			
 
				     CONFIGURE_COMMAND ""
			
 
				+    # grep xnnpack_benchmark -A 1 BUILD.bazel \
			
 
				+    #   | grep "name =" \
			
 
				+    #   | awk '{print $3}' \
			
 
				+    #   | sed -e 's/\"//g' -e 's/,//g' -e 's/^/\/\/:/g'
			
 
				     BUILD_COMMAND  cd ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack
			
 
				                    && bazel --output_user_root=build_user_output build -c opt --config=wasm
			
 
				-                        //:qs8_gemm_bench.wasm
			
 
				-                        //:qs8_requantization_bench.wasm
			
 
				-                        //:qu8_gemm_bench.wasm
			
 
				-                        //:qu8_requantization_bench.wasm
			
 
				-                        //:f16_igemm_bench.wasm
			
 
				-                        //:f16_gemm_bench.wasm
			
 
				-                        //:f16_spmm_bench.wasm
			
 
				-                        //:f32_igemm_bench.wasm
			
 
				-                        //:f16_relu_bench.wasm
			
 
				-                        //:f32_conv_hwc_bench.wasm
			
 
				-                        //:f32_conv_hwc2chw_bench.wasm
			
 
				-                        //:f16_dwconv_bench.wasm
			
 
				-                        //:f32_dwconv_bench.wasm
			
 
				-                        //:f32_dwconv2d_chw_bench.wasm
			
 
				-                        //:f32_gemm_bench.wasm
			
 
				-                        //:f32_hswish_bench.wasm
			
 
				-                        //:f32_raddexpminusmax_bench.wasm
			
 
				-                        //:f32_raddextexp_bench.wasm
			
 
				-                        //:f32_raddstoreexpminusmax_bench.wasm
			
 
				-                        //:f32_relu_bench.wasm
			
 
				-                        //:f32_rmax_bench.wasm
			
 
				-                        //:f32_sigmoid_bench.wasm
			
 
				-                        //:f32_spmm_bench.wasm
			
 
				-                        //:f32_softmax_bench.wasm
			
 
				-                        //:f32_velu_bench.wasm
			
 
				-                        //:f32_vscaleexpminusmax_bench.wasm
			
 
				-                        //:f32_vscaleextexp_bench.wasm
			
 
				-                        //:f32_vsqrt_bench.wasm
			
 
				-                        //:f32_im2col_gemm_bench.wasm
			
 
				-                        //:rounding_bench.wasm
			
 
				-                        //:average_pooling_bench.wasm
			
 
				-                        //:bankers_rounding_bench.wasm
			
 
				-                        //:ceiling_bench.wasm
			
 
				-                        //:channel_shuffle_bench.wasm
			
 
				-                        //:convolution_bench.wasm
			
 
				-                        //:deconvolution_bench.wasm
			
 
				-                        //:elu_bench.wasm
			
 
				-                        //:floor_bench.wasm
			
 
				-                        //:global_average_pooling_bench.wasm
			
 
				-                        //:hardswish_bench.wasm
			
 
				-                        //:max_pooling_bench.wasm
			
 
				-                        //:sigmoid_bench.wasm
			
 
				-                        //:prelu_bench.wasm
			
 
				-                        //:softmax_bench.wasm
			
 
				-                        //:square_root_bench.wasm
			
 
				-                        //:truncation_bench.wasm
			
 
				-                        //:f32_dwconv_e2e_bench.wasm
			
 
				-                        //:f32_gemm_e2e_bench.wasm
			
 
				-                        //:qs8_gemm_e2e_bench.wasm
			
 
				-                        //:end2end_bench.wasm
			
 
				-                        //:f32_exp_ulp_eval.wasm
			
 
				-                        //:f32_expminus_ulp_eval.wasm
			
 
				-                        //:f32_expm1minus_ulp_eval.wasm
			
 
				-                        //:f32_extexp_ulp_eval.wasm
			
 
				-                        //:f32_sigmoid_ulp_eval.wasm
			
 
				-                        //:f32_sqrt_ulp_eval.wasm
			
 
				-                        #--sandbox_debug
			
 
				+                       //:qs8_dwconv_bench.wasm
			
 
				+                       //:qs8_gemm_bench.wasm
			
 
				+                       //:qs8_requantization_bench.wasm
			
 
				+                       //:qs8_vadd_bench.wasm
			
 
				+                       //:qs8_vaddc_bench.wasm
			
 
				+                       //:qu8_gemm_bench.wasm
			
 
				+                       //:qu8_requantization_bench.wasm
			
 
				+                       //:qu8_vadd_bench.wasm
			
 
				+                       //:qu8_vaddc_bench.wasm
			
 
				+                       //:f16_igemm_bench.wasm
			
 
				+                       //:f16_gemm_bench.wasm
			
 
				+                       //:f16_spmm_bench.wasm
			
 
				+                       //:f16_vrelu_bench.wasm
			
 
				+                       //:f32_igemm_bench.wasm
			
 
				+                       //:f32_conv_hwc_bench.wasm
			
 
				+                       //:f32_conv_hwc2chw_bench.wasm
			
 
				+                       //:f16_dwconv_bench.wasm
			
 
				+                       //:f32_dwconv_bench.wasm
			
 
				+                       //:f32_dwconv2d_chw_bench.wasm
			
 
				+                       //:f32_gemm_bench.wasm
			
 
				+                       //:f32_raddexpminusmax_bench.wasm
			
 
				+                       //:f32_raddextexp_bench.wasm
			
 
				+                       //:f32_raddstoreexpminusmax_bench.wasm
			
 
				+                       //:f32_rmax_bench.wasm
			
 
				+                       //:f32_spmm_bench.wasm
			
 
				+                       //:f32_softmax_bench.wasm
			
 
				+                       //:f32_velu_bench.wasm
			
 
				+                       //:f32_vhswish_bench.wasm
			
 
				+                       //:f32_vrelu_bench.wasm
			
 
				+                       //:f32_vscaleexpminusmax_bench.wasm
			
 
				+                       //:f32_vscaleextexp_bench.wasm
			
 
				+                       //:f32_vsigmoid_bench.wasm
			
 
				+                       //:f32_vsqrt_bench.wasm
			
 
				+                       //:f32_im2col_gemm_bench.wasm
			
 
				+                       //:rounding_bench.wasm
			
 
				+                       //:average_pooling_bench.wasm
			
 
				+                       //:bankers_rounding_bench.wasm
			
 
				+                       //:ceiling_bench.wasm
			
 
				+                       //:channel_shuffle_bench.wasm
			
 
				+                       //:convolution_bench.wasm
			
 
				+                       //:deconvolution_bench.wasm
			
 
				+                       //:elu_bench.wasm
			
 
				+                       //:floor_bench.wasm
			
 
				+                       //:global_average_pooling_bench.wasm
			
 
				+                       //:hardswish_bench.wasm
			
 
				+                       //:max_pooling_bench.wasm
			
 
				+                       //:sigmoid_bench.wasm
			
 
				+                       //:prelu_bench.wasm
			
 
				+                       //:softmax_bench.wasm
			
 
				+                       //:square_root_bench.wasm
			
 
				+                       //:truncation_bench.wasm
			
 
				+                       //:f32_dwconv_e2e_bench.wasm
			
 
				+                       //:f32_gemm_e2e_bench.wasm
			
 
				+                       //:qs8_dwconv_e2e_bench.wasm
			
 
				+                       //:qs8_gemm_e2e_bench.wasm
			
 
				+                       //:qu8_dwconv_e2e_bench.wasm
			
 
				+                       //:end2end_bench.wasm
			
 
				+                       //:f32_exp_ulp_eval.wasm
			
 
				+                       //:f32_expminus_ulp_eval.wasm
			
 
				+                       //:f32_expm1minus_ulp_eval.wasm
			
 
				+                       //:f32_extexp_ulp_eval.wasm
			
 
				+                       //:f32_sigmoid_ulp_eval.wasm
			
 
				+                       //:f32_sqrt_ulp_eval.wasm
			
 
				     INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory
			
 
				                       ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack/bazel-out/wasm-opt/bin/
			
 
				                       ${CMAKE_CURRENT_SOURCE_DIR}/build/wasm-opt
			
--- a/samples/workload/XNNPACK/README.md
+++ b/samples/workload/XNNPACK/README.md
@@ -5,20 +5,7 @@ This sample demonstrates how to build [XNNPACK](https://github.com/google/XNNPAC
 
				 
			
 
				 ## Installation toolchains
			
 
				 
			
 
				-- **bazel**. Please install bazel from [latest release](https://github.com/bazelbuild/bazel/releases)
			
 
				-
			
 
				-- **emsdk**. Please install [emsdk](https://github.com/emscripten-core/emsdk) to /opt/emsdk:
			
 
				-```bash
			
 
				-cd /opt
			
 
				-git clone https://github.com/emscripten-core/emsdk.git
			
 
				-cd emsdk
			
 
				-./emsdk install latest
			
 
				-./emsdk activate latest
			
 
				-```
			
 
				-And set up ensdk environment:
			
 
				-```bash
			
 
				-source /opt/emsdk/emsdk_env.sh
			
 
				-```
			
 
				+please refer to [installation instructions](../README.md).
			
 
				 
			
 
				 ## Build XNNPACK
			
 
				 
			
--- a/samples/workload/XNNPACK/benchmark.patch
+++ b/samples/workload/XNNPACK/benchmark.patch
@@ -0,0 +1,14 @@
 
				+diff --git include/benchmark/benchmark.h include/benchmark/benchmark.h
			
 
				+index 9b54802..baa5938 100755
			
 
				+--- include/benchmark/benchmark.h
			
 
				++++ include/benchmark/benchmark.h
			
 
				+@@ -364,7 +364,9 @@ template <class Tp>
			
 
				+ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
			
 
				+   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
			
 
				+ }
			
 
				++
			
 
				+ // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
			
 
				++inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { }
			
 
				+ #endif
			
 
				+ 
			
 
				+ // This class is used for user-defined counters.
			
--- a/samples/workload/XNNPACK/build_workload.sh
+++ b/samples/workload/XNNPACK/build_workload.sh
@@ -0,0 +1 @@
 
				+../docker/build_workload.sh
			
--- a/samples/workload/XNNPACK/docker_build.sh
+++ b/samples/workload/XNNPACK/docker_build.sh
@@ -1 +0,0 @@
 
				-../docker/docker_build.sh
			
--- a/samples/workload/XNNPACK/xnnpack.patch
+++ b/samples/workload/XNNPACK/xnnpack.patch
@@ -1,8 +1,8 @@
 
				 diff --git a/.bazelrc b/.bazelrc
			
 
				-index ec740f38..2c193244 100644
			
 
				+index ec740f38..29f9d56e 100644
			
 
				 --- a/.bazelrc
			
 
				 +++ b/.bazelrc
			
 
				-@@ -49,4 +49,10 @@ build:ios_fat --watchos_cpus=armv7k
			
 
				+@@ -49,4 +49,9 @@ build:ios_fat --watchos_cpus=armv7k
			
 
				  build:macos --apple_platform_type=macos
			
 
				  
			
 
				  build:macos_arm64 --config=macos
			
@@ -10,558 +10,31 @@ index ec740f38..2c193244 100644
 
				 \ No newline at end of file
			
 
				 +build:macos_arm64 --cpu=darwin_arm64
			
 
				 +
			
 
				-+build:wasm --copt=-msimd128
			
 
				 +build:wasm --cpu=wasm
			
 
				++build:wasm --copt=-msimd128
			
 
				 +build:wasm --crosstool_top=@emsdk//emscripten_toolchain:everything
			
 
				 +build:wasm --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
			
 
				-+
			
 
				-diff --git a/BUILD.bazel b/BUILD.bazel
			
 
				-index 1f2b15a8..e7abf838 100644
			
 
				---- a/BUILD.bazel
			
 
				-+++ b/BUILD.bazel
			
 
				-@@ -4996,7 +4996,7 @@ xnnpack_cc_library(
			
 
				- ######################### Benchmarks for micro-kernels #########################
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "qs8_gemm_bench",
			
 
				-+    name = "qs8_gemm_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/gemm.h",
			
 
				-         "bench/qs8-gemm.cc",
			
 
				-@@ -5007,7 +5007,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "qs8_requantization_bench",
			
 
				-+    name = "qs8_requantization_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/qs8-requantization.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5017,7 +5017,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "qu8_gemm_bench",
			
 
				-+    name = "qu8_gemm_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/gemm.h",
			
 
				-         "bench/qu8-gemm.cc",
			
 
				-@@ -5028,7 +5028,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "qu8_requantization_bench",
			
 
				-+    name = "qu8_requantization_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/qu8-requantization.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5038,11 +5038,10 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f16_igemm_bench",
			
 
				-+    name = "f16_igemm_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f16-igemm.cc",
			
 
				-         "bench/conv.h",
			
 
				--        "bench/google/conv.h",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-     ] + MICROKERNEL_BENCHMARK_HDRS,
			
 
				-     deps = MICROKERNEL_BENCHMARK_DEPS + [
			
 
				-@@ -5052,7 +5051,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f16_gemm_bench",
			
 
				-+    name = "f16_gemm_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f16-gemm.cc",
			
 
				-         "bench/gemm.h",
			
 
				-@@ -5064,7 +5063,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f16_spmm_bench",
			
 
				-+    name = "f16_spmm_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f16-spmm.cc",
			
 
				-         "bench/spmm.h",
			
 
				-@@ -5074,7 +5073,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_igemm_bench",
			
 
				-+    name = "f32_igemm_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-igemm.cc",
			
 
				-         "bench/conv.h",
			
 
				-@@ -5087,7 +5086,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f16_relu_bench",
			
 
				-+    name = "f16_relu_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f16-relu.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5096,7 +5095,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_conv_hwc_bench",
			
 
				-+    name = "f32_conv_hwc_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-conv-hwc.cc",
			
 
				-         "bench/dconv.h",
			
 
				-@@ -5108,7 +5107,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_conv_hwc2chw_bench",
			
 
				-+    name = "f32_conv_hwc2chw_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-conv-hwc2chw.cc",
			
 
				-         "bench/dconv.h",
			
 
				-@@ -5120,11 +5119,10 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f16_dwconv_bench",
			
 
				-+    name = "f16_dwconv_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f16-dwconv.cc",
			
 
				-         "bench/dwconv.h",
			
 
				--        "bench/google/dwconv.h",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-     ] + MICROKERNEL_BENCHMARK_HDRS,
			
 
				-     deps = MICROKERNEL_BENCHMARK_DEPS + [
			
 
				-@@ -5134,7 +5132,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_dwconv_bench",
			
 
				-+    name = "f32_dwconv_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-dwconv.cc",
			
 
				-         "bench/dwconv.h",
			
 
				-@@ -5147,7 +5145,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_dwconv2d_chw_bench",
			
 
				-+    name = "f32_dwconv2d_chw_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-dwconv2d-chw.cc",
			
 
				-         "bench/dwconv.h",
			
 
				-@@ -5160,7 +5158,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_gemm_bench",
			
 
				-+    name = "f32_gemm_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-gemm.cc",
			
 
				-         "bench/gemm.h",
			
 
				-@@ -5171,7 +5169,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_hswish_bench",
			
 
				-+    name = "f32_hswish_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-hswish.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5180,7 +5178,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_raddexpminusmax_bench",
			
 
				-+    name = "f32_raddexpminusmax_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-raddexpminusmax.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5189,7 +5187,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_raddextexp_bench",
			
 
				-+    name = "f32_raddextexp_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-raddextexp.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5198,7 +5196,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_raddstoreexpminusmax_bench",
			
 
				-+    name = "f32_raddstoreexpminusmax_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-raddstoreexpminusmax.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5207,7 +5205,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_relu_bench",
			
 
				-+    name = "f32_relu_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-relu.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5216,7 +5214,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_rmax_bench",
			
 
				-+    name = "f32_rmax_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-rmax.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5225,7 +5223,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_sigmoid_bench",
			
 
				-+    name = "f32_sigmoid_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-sigmoid.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5234,7 +5232,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_spmm_bench",
			
 
				-+    name = "f32_spmm_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-spmm.cc",
			
 
				-         "bench/spmm.h",
			
 
				-@@ -5244,7 +5242,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_softmax_bench",
			
 
				-+    name = "f32_softmax_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-softmax.cc",
			
 
				-     ] + MICROKERNEL_BENCHMARK_HDRS,
			
 
				-@@ -5253,7 +5251,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_velu_bench",
			
 
				-+    name = "f32_velu_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-velu.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5262,7 +5260,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_vscaleexpminusmax_bench",
			
 
				-+    name = "f32_vscaleexpminusmax_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-vscaleexpminusmax.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5271,7 +5269,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_vscaleextexp_bench",
			
 
				-+    name = "f32_vscaleextexp_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-vscaleextexp.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5280,7 +5278,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_vsqrt_bench",
			
 
				-+    name = "f32_vsqrt_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-vsqrt.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5289,7 +5287,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_im2col_gemm_bench",
			
 
				-+    name = "f32_im2col_gemm_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-im2col-gemm.cc",
			
 
				-         "bench/conv.h",
			
 
				-@@ -5302,7 +5300,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "rounding_bench",
			
 
				-+    name = "rounding_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/rounding.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5314,7 +5312,7 @@ xnnpack_benchmark(
			
 
				- ########################### Benchmarks for operators ###########################
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "average_pooling_bench",
			
 
				-+    name = "average_pooling_bench.wasm",
			
 
				-     srcs = ["bench/average-pooling.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5322,7 +5320,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "bankers_rounding_bench",
			
 
				-+    name = "bankers_rounding_bench.wasm",
			
 
				-     srcs = ["bench/bankers-rounding.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5330,7 +5328,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "ceiling_bench",
			
 
				-+    name = "ceiling_bench.wasm",
			
 
				-     srcs = ["bench/ceiling.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5338,13 +5336,13 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "channel_shuffle_bench",
			
 
				-+    name = "channel_shuffle_bench.wasm",
			
 
				-     srcs = ["bench/channel-shuffle.cc"],
			
 
				-     deps = OPERATOR_BENCHMARK_DEPS,
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "convolution_bench",
			
 
				-+    name = "convolution_bench.wasm",
			
 
				-     srcs = ["bench/convolution.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5352,7 +5350,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "deconvolution_bench",
			
 
				-+    name = "deconvolution_bench.wasm",
			
 
				-     srcs = ["bench/deconvolution.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5360,7 +5358,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "elu_bench",
			
 
				-+    name = "elu_bench.wasm",
			
 
				-     srcs = ["bench/elu.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5368,7 +5366,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "floor_bench",
			
 
				-+    name = "floor_bench.wasm",
			
 
				-     srcs = ["bench/floor.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5376,13 +5374,13 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "global_average_pooling_bench",
			
 
				-+    name = "global_average_pooling_bench.wasm",
			
 
				-     srcs = ["bench/global-average-pooling.cc"],
			
 
				-     deps = OPERATOR_BENCHMARK_DEPS,
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "hardswish_bench",
			
 
				-+    name = "hardswish_bench.wasm",
			
 
				-     srcs = ["bench/hardswish.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5390,13 +5388,13 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "max_pooling_bench",
			
 
				-+    name = "max_pooling_bench.wasm",
			
 
				-     srcs = ["bench/max-pooling.cc"],
			
 
				-     deps = OPERATOR_BENCHMARK_DEPS,
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "sigmoid_bench",
			
 
				-+    name = "sigmoid_bench.wasm",
			
 
				-     srcs = ["bench/sigmoid.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5404,7 +5402,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "prelu_bench",
			
 
				-+    name = "prelu_bench.wasm",
			
 
				-     srcs = ["bench/prelu.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5412,7 +5410,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "softmax_bench",
			
 
				-+    name = "softmax_bench.wasm",
			
 
				-     srcs = ["bench/softmax.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5420,7 +5418,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "square_root_bench",
			
 
				-+    name = "square_root_bench.wasm",
			
 
				-     srcs = ["bench/square-root.cc"],
			
 
				-     copts = xnnpack_optional_tflite_copts(),
			
 
				-     tags = ["nowin32"],
			
 
				-@@ -5428,7 +5426,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "truncation_bench",
			
 
				-+    name = "truncation_bench.wasm",
			
 
				-     srcs = ["bench/truncation.cc"],
			
 
				-     deps = OPERATOR_BENCHMARK_DEPS,
			
 
				- )
			
 
				-@@ -5620,7 +5618,7 @@ cc_library(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_dwconv_e2e_bench",
			
 
				-+    name = "f32_dwconv_e2e_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-dwconv-e2e.cc",
			
 
				-         "bench/end2end.h",
			
 
				-@@ -5635,7 +5633,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_gemm_e2e_bench",
			
 
				-+    name = "f32_gemm_e2e_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/f32-gemm-e2e.cc",
			
 
				-         "bench/end2end.h",
			
 
				-@@ -5650,7 +5648,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "qs8_gemm_e2e_bench",
			
 
				-+    name = "qs8_gemm_e2e_bench.wasm",
			
 
				-     srcs = [
			
 
				-         "bench/qs8-gemm-e2e.cc",
			
 
				-         "bench/end2end.h",
			
 
				-@@ -5663,7 +5661,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "end2end_bench",
			
 
				-+    name = "end2end_bench.wasm",
			
 
				-     srcs = ["bench/end2end.cc"],
			
 
				-     deps = [
			
 
				-         ":XNNPACK",
			
 
				-@@ -5690,7 +5688,7 @@ xnnpack_benchmark(
			
 
				- #################### Accuracy evaluation for math functions ####################
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_exp_ulp_eval",
			
 
				-+    name = "f32_exp_ulp_eval.wasm",
			
 
				-     srcs = [
			
 
				-         "eval/f32-exp-ulp.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5702,7 +5700,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_expminus_ulp_eval",
			
 
				-+    name = "f32_expminus_ulp_eval.wasm",
			
 
				-     srcs = [
			
 
				-         "eval/f32-expminus-ulp.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5714,7 +5712,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_expm1minus_ulp_eval",
			
 
				-+    name = "f32_expm1minus_ulp_eval.wasm",
			
 
				-     srcs = [
			
 
				-         "eval/f32-expm1minus-ulp.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5726,7 +5724,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_extexp_ulp_eval",
			
 
				-+    name = "f32_extexp_ulp_eval.wasm",
			
 
				-     srcs = [
			
 
				-         "eval/f32-extexp-ulp.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5738,7 +5736,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_sigmoid_ulp_eval",
			
 
				-+    name = "f32_sigmoid_ulp_eval.wasm",
			
 
				-     srcs = [
			
 
				-         "eval/f32-sigmoid-ulp.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				-@@ -5750,7 +5748,7 @@ xnnpack_benchmark(
			
 
				- )
			
 
				- 
			
 
				- xnnpack_benchmark(
			
 
				--    name = "f32_sqrt_ulp_eval",
			
 
				-+    name = "f32_sqrt_ulp_eval.wasm",
			
 
				-     srcs = [
			
 
				-         "eval/f32-sqrt-ulp.cc",
			
 
				-         "src/xnnpack/AlignedAllocator.h",
			
 
				 diff --git a/WORKSPACE b/WORKSPACE
			
 
				-index 4fa1aa2f..6181aab2 100644
			
 
				+index c58e76b6..30934678 100644
			
 
				 --- a/WORKSPACE
			
 
				 +++ b/WORKSPACE
			
 
				-@@ -5,8 +5,8 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
			
 
				- # Bazel rule definitions
			
 
				- http_archive(
			
 
				-     name = "rules_cc",
			
 
				--    strip_prefix = "rules_cc-master",
			
 
				--    urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
			
 
				-+    strip_prefix = "rules_cc-main",
			
 
				-+    urls = ["https://github.com/bazelbuild/rules_cc/archive/main.zip"],
			
 
				- )
			
 
				- 
			
 
				- # Google Test framework, used by most unit-tests.
			
 
				-@@ -19,8 +19,8 @@ http_archive(
			
 
				- # Google Benchmark library, used in micro-benchmarks.
			
 
				- http_archive(
			
 
				+@@ -21,6 +21,7 @@ http_archive(
			
 
				      name = "com_google_benchmark",
			
 
				--    strip_prefix = "benchmark-master",
			
 
				--    urls = ["https://github.com/google/benchmark/archive/master.zip"],
			
 
				-+    strip_prefix = "benchmark-1.5.3",
			
 
				-+    urls = ["https://github.com/google/benchmark/archive/refs/tags/v1.5.3.zip"],
			
 
				+     strip_prefix = "benchmark-master",
			
 
				+     urls = ["https://github.com/google/benchmark/archive/master.zip"],
			
 
				++    patches = ["@//third_party:benchmark.patch"],
			
 
				  )
			
 
				  
			
 
				  # FP16 library, used for half-precision conversions
			
 
				-@@ -89,3 +89,18 @@ android_ndk_repository(name = "androidndk")
			
 
				+@@ -84,6 +85,19 @@ http_archive(
			
 
				+    ],
			
 
				+ )
			
 
				  
			
 
				- # Android SDK location and API is auto-detected from $ANDROID_HOME environment variable
			
 
				- android_sdk_repository(name = "androidsdk")
			
 
				-+
			
 
				-+# emscripten library
			
 
				 +http_archive(
			
 
				-+    name = "emsdk",
			
 
				-+    strip_prefix = "emsdk-c1589b55641787d55d53e883852035beea9aec3f/bazel",
			
 
				-+    url = "https://github.com/emscripten-core/emsdk/archive/c1589b55641787d55d53e883852035beea9aec3f.tar.gz",
			
 
				-+    sha256 = "7a58a9996b113d3e0675df30b5f17e28aa47de2e684a844f05394fe2f6f12e8e",
			
 
				++  name = "emsdk",
			
 
				++  strip_prefix = "emsdk-2.0.26/bazel",
			
 
				++  url = "https://github.com/emscripten-core/emsdk/archive/refs/tags/2.0.26.tar.gz",
			
 
				++  sha256 = "79e7166aa8eaae6e52cef1363b2d8db795d03684846066bc51f9dcf905dd58ad",
			
 
				 +)
			
 
				 +
			
 
				 +load("@emsdk//:deps.bzl", emsdk_deps = "deps")
			
@@ -570,67 +43,42 @@ index 4fa1aa2f..6181aab2 100644
 
				 +load("@emsdk//:emscripten_deps.bzl", emsdk_emscripten_deps = "emscripten_deps")
			
 
				 +emsdk_emscripten_deps()
			
 
				 +
			
 
				+ # Android NDK location and version is auto-detected from $ANDROID_NDK_HOME environment variable
			
 
				+ android_ndk_repository(name = "androidndk")
			
 
				+ 
			
 
				 diff --git a/build_defs.bzl b/build_defs.bzl
			
 
				-index 10345032..0e926fca 100644
			
 
				+index 2442bed1..b860dfef 100644
			
 
				 --- a/build_defs.bzl
			
 
				 +++ b/build_defs.bzl
			
 
				-@@ -1,6 +1,6 @@
			
 
				- """Build definitions and rules for XNNPACK."""
			
 
				- 
			
 
				--load(":emscripten.bzl", "xnnpack_emscripten_benchmark_linkopts", "xnnpack_emscripten_deps", "xnnpack_emscripten_minimal_linkopts", "xnnpack_emscripten_test_linkopts")
			
 
				-+load(":emscripten.bzl", "xnnpack_emscripten_benchmark_linkopts", "xnnpack_emscripten_deps", "xnnpack_emscripten_minimal_linkopts", "xnnpack_emscripten_test_linkopts", "xnnpack_emscripten_benchmark_copts")
			
 
				- 
			
 
				- def xnnpack_visibility():
			
 
				-     """Visibility of :XNNPACK target.
			
 
				-@@ -424,10 +424,15 @@ def xnnpack_benchmark(name, srcs, copts = [], deps = [], tags = []):
			
 
				-             ":windows_x86_64_mingw": ["-Wno-unused-function"],
			
 
				-             ":windows_x86_64_msys": ["-Wno-unused-function"],
			
 
				-             ":windows_x86_64": [],
			
 
				-+            ":emscripten": xnnpack_emscripten_benchmark_copts(),
			
 
				-+            ":emscripten_wasm": xnnpack_emscripten_benchmark_copts(),
			
 
				-+            ":emscripten_wasmsimd": xnnpack_emscripten_benchmark_copts(),
			
 
				-             "//conditions:default": ["-Wno-unused-function"],
			
 
				-         }) + copts,
			
 
				-         linkopts = select({
			
 
				-             ":emscripten": xnnpack_emscripten_benchmark_linkopts(),
			
 
				-+            ":emscripten_wasm": xnnpack_emscripten_benchmark_linkopts(),
			
 
				-+            ":emscripten_wasmsimd": xnnpack_emscripten_benchmark_linkopts(),
			
 
				-             ":windows_x86_64_mingw": ["-lshlwapi"],
			
 
				-             ":windows_x86_64_msys": ["-lshlwapi"],
			
 
				-             "//conditions:default": [],
			
 
				+@@ -414,7 +414,7 @@ def xnnpack_benchmark(name, srcs, copts = [], deps = [], tags = []):
			
 
				+             explicitly specified.
			
 
				+     """
			
 
				+     native.cc_binary(
			
 
				+-        name = name,
			
 
				++        name = name + ".wasm",
			
 
				+         srcs = srcs,
			
 
				+         copts = xnnpack_std_cxxopts() + [
			
 
				+             "-Iinclude",
			
 
				 diff --git a/emscripten.bzl b/emscripten.bzl
			
 
				-index 0a0caedf..aafe3199 100644
			
 
				+index 130d5f16..2696ad54 100644
			
 
				 --- a/emscripten.bzl
			
 
				 +++ b/emscripten.bzl
			
 
				-@@ -6,6 +6,7 @@ def xnnpack_emscripten_minimal_linkopts():
			
 
				-         "-s ASSERTIONS=0",
			
 
				-         "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
			
 
				-         "-s EXIT_RUNTIME=1",
			
 
				-+        "--oformat=wasm",
			
 
				-     ]
			
 
				- 
			
 
				- def xnnpack_emscripten_test_linkopts():
			
 
				-@@ -17,21 +18,36 @@ def xnnpack_emscripten_test_linkopts():
			
 
				-         "-s EXIT_RUNTIME=1",
			
 
				-         "-s ALLOW_MEMORY_GROWTH=1",
			
 
				-         "--pre-js $(location :preamble.js.lds)",
			
 
				-+        "--oformat=wasm",
			
 
				-     ]
			
 
				- 
			
 
				- def xnnpack_emscripten_benchmark_linkopts():
			
 
				+@@ -25,12 +25,19 @@ def xnnpack_emscripten_benchmark_linkopts():
			
 
				      """Emscripten-specific linkopts for benchmarks."""
			
 
				      return [
			
 
				          "-s ASSERTIONS=1",
			
 
				+-        "-s ENVIRONMENT=node,shell,web",
			
 
				 -        "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
			
 
				 -        "-s EXIT_RUNTIME=1",
			
 
				--        "-s ALLOW_MEMORY_GROWTH=1",
			
 
				 +        "-s ERROR_ON_UNDEFINED_SYMBOLS=0",
			
 
				-+        "-s ALLOW_MEMORY_GROWTH=0",
			
 
				-         "-s TOTAL_MEMORY=436207616",  # 416M
			
 
				+         "-s ALLOW_MEMORY_GROWTH=1",
			
 
				+         "-s TOTAL_MEMORY=445644800",  # 425M
			
 
				 -        "--pre-js $(location :preamble.js.lds)",
			
 
				 +        "-s USE_PTHREADS=0",
			
 
				 +        "-s STANDALONE_WASM=1",
			
 
				 +        "-Wno-unused",
			
 
				++        "-Wno-unused-variable",
			
 
				++        "-Wno-unused-command-line-argument",
			
 
				 +        "-Wl,--export=__heap_base",
			
 
				 +        "-Wl,--export=__data_end",
			
 
				 +        "-Wl,--export=malloc",
			
@@ -639,19 +87,6 @@ index 0a0caedf..aafe3199 100644
 
				      ]
			
 
				  
			
 
				  def xnnpack_emscripten_deps():
			
 
				-     """Emscripten-specific dependencies for unit tests and benchmarks."""
			
 
				-+    return []
			
 
				-+
			
 
				-+def xnnpack_emscripten_benchmark_copts():
			
 
				-     return [
			
 
				--        ":preamble.js.lds",
			
 
				-+        "-s ASSERTIONS=1",
			
 
				-+        "-s ERROR_ON_UNDEFINED_SYMBOLS=0",
			
 
				-+        "-s ALLOW_MEMORY_GROWTH=0",
			
 
				-+        "-s USE_PTHREADS=0",
			
 
				-+        "-s STANDALONE_WASM=1",
			
 
				-+        "-Wno-unused",
			
 
				-     ]
			
 
				 diff --git a/third_party/cpuinfo.BUILD b/third_party/cpuinfo.BUILD
			
 
				 index 128d683e..f6c287c4 100644
			
 
				 --- a/third_party/cpuinfo.BUILD
			
--- a/samples/workload/bwa/CMakeLists.bwa_wasm.txt
+++ b/samples/workload/bwa/CMakeLists.bwa_wasm.txt
@@ -5,6 +5,8 @@ cmake_minimum_required (VERSION 3.0)
 
				 
			
 
				 project(bwa_wasm C)
			
 
				 
			
 
				+include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/preparation.cmake)
			
 
				+
			
 
				 ################ LIBZ ################
			
 
				 set(LIBZ_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../libz)
			
 
				 add_library(z_wasm STATIC
			
@@ -86,16 +88,15 @@ set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME bwa.wasm)
 
				 
			
 
				 target_include_directories(${PROJECT_NAME}
			
 
				   PRIVATE
			
 
				-    ${CMAKE_CURRENT_SOURCE_DIR}/../include
			
 
				-    ${CMAKE_CURRENT_SOURCE_DIR}/../include/SSE
			
 
				-    ${CMAKE_CURRENT_SOURCE_DIR}/../include/pthread
			
 
				+    ${WASI_SDK_HOME}/share/wasi-sysroot/include/libc/musl
			
 
				+    ${WASI_SDK_HOME}/share/wasi-sysroot/include/sse
			
 
				 )
			
 
				 
			
 
				 target_compile_definitions(${PROJECT_NAME}
			
 
				   PRIVATE
			
 
				     USE_MALLOC_WRAPPERS
			
 
				     __SSE__ __SSE2__ __SSE4_1__
			
 
				-    _WASI_EMULATED_MMAN _WASI_EMULATED_SIGNAL
			
 
				+    _WASI_EMULATED_MMAN _WASI_EMULATED_SIGNAL _WASI_EMULATED_PROCESS_CLOCKS
			
 
				 )
			
 
				 
			
 
				 target_compile_options(${PROJECT_NAME}
			
@@ -112,16 +113,7 @@ target_link_options(${PROJECT_NAME}
 
				     LINKER:-z,stack-size=1048576
			
 
				 )
			
 
				 
			
 
				-target_link_libraries(${PROJECT_NAME} z_wasm)
			
 
				-
			
 
				-find_program(WASM_OPT
			
 
				-    NAMES wasm-opt
			
 
				-    PATHS /opt/binaryen-version_97/bin /opt/binaryen/bin
			
 
				-)
			
 
				-
			
 
				-if (NOT WASM_OPT)
			
 
				-  message(WARNING "can not find wasm-opt and will not optimize any wasm module")
			
 
				-endif()
			
 
				+target_link_libraries(${PROJECT_NAME} z_wasm wasi-emulated-process-clocks)
			
 
				 
			
 
				 add_custom_target(bwa_wasm_opt ALL
			
 
				   COMMAND
			
--- a/samples/workload/bwa/CMakeLists.txt
+++ b/samples/workload/bwa/CMakeLists.txt
@@ -10,67 +10,37 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/preparation.cmake)
 
				 #######################################
			
 
				 include(ExternalProject)
			
 
				 
			
 
				-################ HEADERS ################
			
 
				-set(EMSDK_SYSTEM_HEADERS "$ENV{EMSDK}/upstream/emscripten/system/include")
			
 
				-set(EMSDK_SSE_HEADERS "${EMSDK_SYSTEM_HEADERS}/SSE")
			
 
				-set(EMSDK_LIBC_HEADERS "${EMSDK_SYSTEM_HEADERS}/libc")
			
 
				-ExternalProject_Add(headers_from_emcc
			
 
				-   PREFIX headers
			
 
				-   SOURCE_DIR ${EMSDK_SYSTEM_HEADERS}
			
 
				-   CONFIGURE_COMMAND ""
			
 
				-   BUILD_COMMAND     ""
			
 
				-   INSTALL_COMMAND mkdir -p ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE
			
 
				-                     && ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys
			
 
				-                     && ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/emscripten
			
 
				-                     # copy emscripten SSE header files
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_SYSTEM_HEADERS}/wasm_simd128.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_SSE_HEADERS}/immintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/
			
 
				-                     # SSE
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_SSE_HEADERS}/xmmintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/
			
 
				-                     # SSE2
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_SSE_HEADERS}/emmintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/
			
 
				-                     # SSE4.1
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_SSE_HEADERS}/smmintrin.h ${CMAKE_CURRENT_SOURCE_DIR}/include/SSE/
			
 
				-                     # a fake empty header to aovid further depenency
			
 
				-                     && ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_SOURCE_DIR}/include/emscripten/emscripten.h
			
 
				-                     # copy emscripten pthread related header files
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/pthread.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/signal.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/netdb.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/sys/wait.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/sys/socket.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/
			
 
				-)
			
 
				-
			
 
				 ################ libz ################
			
 
				 ExternalProject_Add(libz_src
			
 
				-  PREFIX libz
			
 
				   GIT_REPOSITORY https://github.com/madler/zlib.git
			
 
				   GIT_TAG        master
			
 
				   GIT_PROGRESS   ON
			
 
				   GIT_SHALLOW    ON
			
 
				   SOURCE_DIR     ${CMAKE_CURRENT_SOURCE_DIR}/libz
			
 
				+  UPDATE_COMMAND    ""
			
 
				+  PATCH_COMMAND     ""
			
 
				   CONFIGURE_COMMAND ""
			
 
				-  BUILD_COMMAND  ""
			
 
				-  INSTALL_COMMAND ""
			
 
				+  BUILD_COMMAND     ""
			
 
				+  INSTALL_COMMAND   ""
			
 
				 )
			
 
				 
			
 
				 ################ bwa ################
			
 
				 ExternalProject_Add(bwa
			
 
				-  PREFIX bwa
			
 
				   GIT_REPOSITORY https://github.com/lh3/bwa.git
			
 
				   GIT_TAG        master
			
 
				   GIT_PROGRESS   ON
			
 
				   GIT_SHALLOW    ON
			
 
				   SOURCE_DIR     ${CMAKE_CURRENT_SOURCE_DIR}/bwa
			
 
				-  DEPENDS        libz_src headers_from_emcc
			
 
				-  UPDATE_COMMAND git clean -fd && git checkout -- *
			
 
				+  DEPENDS        libz_src
			
 
				+  UPDATE_COMMAND git clean -ffdx && git checkout -- *
			
 
				                   && ${CMAKE_COMMAND} -E echo "Copying pre-installed CMakeLists.txt"
			
 
				                   && ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.bwa_wasm.txt CMakeLists.txt
			
 
				                   && git apply ../bwa.patch
			
 
				   CONFIGURE_COMMAND  ${CMAKE_COMMAND}
			
 
				-                         -DWASI_SDK_PREFIX=${WASI_SDK_HOME}/wasi-sdk
			
 
				-                         -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/wasi-sdk/share/cmake/wasi-sdk.cmake
			
 
				-                         ${CMAKE_CURRENT_SOURCE_DIR}/bwa
			
 
				+                       -DWASI_SDK_PREFIX=${WASI_SDK_HOME}
			
 
				+                       -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/share/cmake/wasi-sdk.cmake
			
 
				+                       -DCMAKE_SYSROOT=${WASI_SDK_HOME}/share/wasi-sysroot
			
 
				+                       ${CMAKE_CURRENT_SOURCE_DIR}/bwa
			
 
				   BUILD_COMMAND  make bwa_wasm_opt
			
 
				   INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ./bwa.opt.wasm ${CMAKE_CURRENT_SOURCE_DIR}/build/bwa.wasm
			
 
				 )
			
--- a/samples/workload/bwa/build_workload.sh
+++ b/samples/workload/bwa/build_workload.sh
@@ -0,0 +1 @@
 
				+../docker/build_workload.sh
			
--- a/samples/workload/bwa/docker_build.sh
+++ b/samples/workload/bwa/docker_build.sh
@@ -1 +0,0 @@
 
				-../docker/docker_build.sh
			
--- a/samples/workload/cmake/preparation.cmake
+++ b/samples/workload/cmake/preparation.cmake
@@ -1,76 +1,49 @@
 
				 # Copyright (C) 2019 Intel Corporation. All rights reserved.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 
			
 
				-################  WASI-SDK ################
			
 
				-find_path(WASI_SDK_HOME
			
 
				-  NAMES wasi-sdk
			
 
				-  PATHS /opt/
			
 
				-  REQUIRED
			
 
				-)
			
 
				-
			
 
				-if (NOT WASI_SDK_HOME)
			
 
				-  message(FATAL_ERROR
			
 
				-    "can not find wasi-sdk. "
			
 
				-    "please download it from "
			
 
				-    "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-12/wasi-sdk-12.0-linux.tar.gz "
			
 
				-    "and install it under /opt/wasi-sdk"
			
 
				-  )
			
 
				-else()
			
 
				-  message(STATUS 
			
 
				-    "Detecting wasi-sdk info: ${WASI_SDK_HOME}/wasi-sdk"
			
 
				-  )
			
 
				-endif()
			
 
				+#######################################
			
 
				+include(ExternalProject)
			
 
				 
			
 
				-#
			
 
				-# check clang version
			
 
				-execute_process(COMMAND
			
 
				-  ${WASI_SDK_HOME}/wasi-sdk/bin/clang --version
			
 
				-  OUTPUT_VARIABLE clang_full_version_string
			
 
				+file(REAL_PATH ../../.. WAMR_ROOT
			
 
				+  BASE_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
			
 
				 )
			
 
				-string(REGEX REPLACE ".*clang version ([0-9]+\\.[0-9]+).*" "\\1"
			
 
				-  CLANG_VERSION_STRING ${clang_full_version_string}
			
 
				+
			
 
				+find_path(WASI_SDK_PARENT
			
 
				+  name wasi-sdk
			
 
				+  PATHS ${WAMR_ROOT}/test-tools/
			
 
				+  NO_DEFAULT_PATH
			
 
				+  NO_CMAKE_FIND_ROOT_PATH
			
 
				 )
			
 
				-message(STATUS "Detecting clang versoin: ${CLANG_VERSION_STRING}")
			
 
				-if(CLANG_VERSION_STRING VERSION_LESS 11.0)
			
 
				-  message(FATAL_ERROR
			
 
				-    "please install latest wai-sdk to get a clang-11 at least"
			
 
				-  )
			
 
				-endif()
			
 
				 
			
 
				-################  EMCC ################
			
 
				-if(NOT DEFINED ENV{EMSDK})
			
 
				+if(NOT WASI_SDK_PARENT)
			
 
				   message(FATAL_ERROR
			
 
				-    "can not find emsdk. "
			
 
				-    "please refer to https://emscripten.org/docs/getting_started/downloads.html "
			
 
				-    "and install it, "
			
 
				-    "or active emsdk by 'source ./emsdk_env.sh'"
			
 
				+    "can not find 'wasi-sdk' under ${WAMR_ROOT}/test-tools, "
			
 
				+    "please run ${WAMR_ROOT}/test-tools/build-wasi-sdk/build_wasi_sdk.py "
			
 
				+    "to build wasi-sdk and try again"
			
 
				   )
			
 
				 endif()
			
 
				 
			
 
				-message(STATUS "Detecting EMSDK info: $ENV{EMSDK}")
			
 
				-
			
 
				-### check if the emsdk is 2.0.12
			
 
				-### upstream/.emsdk_version should be releases-upstream-dcf819a7821f8db0c8f15ac336fea8960ec204f5-64bit
			
 
				-file(STRINGS "$ENV{EMSDK}/upstream/.emsdk_version" EMSDK_VERSION)
			
 
				-if(NOT (${EMSDK_VERSION} STREQUAL "releases-upstream-dcf819a7821f8db0c8f15ac336fea8960ec204f5-64bit"))
			
 
				-    message(FATAL_ERROR "please install emsdk 2.0.12")
			
 
				+set(WASI_SDK_HOME ${WASI_SDK_PARENT}/wasi-sdk)
			
 
				+message(CHECK_START "Detecting WASI-SDK at ${WASI_SDK_HOME}")
			
 
				+if(EXISTS "${WASI_SDK_HOME}/share/cmake/wasi-sdk.cmake")
			
 
				+  message(CHECK_PASS "found")
			
 
				+else()
			
 
				+  message(CHECK_FAIL "not found")
			
 
				 endif()
			
 
				 
			
 
				 ################  BINARYEN ################
			
 
				 find_program(WASM_OPT
			
 
				-    NAMES wasm-opt
			
 
				-    PATHS /opt/binaryen-version_101/bin /opt/binaryen/bin
			
 
				+  NAMES wasm-opt
			
 
				+  PATHS /opt/binaryen-version_101/bin /opt/binaryen/bin
			
 
				+  NO_DEFAULT_PATH
			
 
				+  NO_CMAKE_FIND_ROOT_PATH
			
 
				 )
			
 
				 
			
 
				-if (NOT WASM_OPT)
			
 
				+if(NOT WASM_OPT)
			
 
				   message(FATAL_ERROR
			
 
				     "can not find wasm-opt. "
			
 
				     "please download it from "
			
 
				     "https://github.com/WebAssembly/binaryen/releases/download/version_101/binaryen-version_101-x86_64-linux.tar.gz "
			
 
				     "and install it under /opt"
			
 
				   )
			
 
				-else()
			
 
				-  message(STATUS 
			
 
				-    "Detecting EMSDK info: $ENV{EMSDK}"
			
 
				-  )
			
 
				 endif()
			
--- a/samples/workload/docker/.gitignore
+++ b/samples/workload/docker/.gitignore
@@ -1 +0,0 @@
 
				-build_scripts
			
--- a/samples/workload/docker/Dockerfile
+++ b/samples/workload/docker/Dockerfile
@@ -1,93 +0,0 @@
 
				-FROM ubuntu:18.04 as builder
			
 
				-
			
 
				-RUN apt update \
			
 
				-      && apt install -y lsb-release software-properties-common build-essential \
			
 
				-          wget curl git tree zip unzip
			
 
				-
			
 
				-ARG WASI_SDK_VER=12
			
 
				-ARG WABT_VER=1.0.23
			
 
				-ARG CMAKE_VER=3.16.2
			
 
				-ARG BINARYEN_VER=version_101
			
 
				-
			
 
				-#
			
 
				-# install wasi-sdk
			
 
				-ARG WASI_SDK_FILE="wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz"
			
 
				-COPY ${WASI_SDK_FILE} /opt
			
 
				-RUN cd /opt \
			
 
				-    && tar zxf ${WASI_SDK_FILE} \
			
 
				-    && rm ${WASI_SDK_FILE} \
			
 
				-    && ln -sf /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk
			
 
				-
			
 
				-#
			
 
				-# install wabt
			
 
				-ARG WABT_FILE="wabt-${WABT_VER}-ubuntu.tar.gz"
			
 
				-COPY ${WABT_FILE} /opt
			
 
				-RUN cd /opt \
			
 
				-    && tar zxf ${WABT_FILE} \
			
 
				-    && rm ${WABT_FILE} \
			
 
				-    && ln -sf /opt/wabt-${WABT_VER} /opt/wabt
			
 
				-
			
 
				-#
			
 
				-# install cmake
			
 
				-ARG CMAKE_FILE="cmake-${CMAKE_VER}-Linux-x86_64.sh"
			
 
				-COPY ${CMAKE_FILE} /tmp
			
 
				-RUN cd /tmp \
			
 
				-    && chmod a+x ${CMAKE_FILE} \
			
 
				-    && mkdir /opt/cmake \
			
 
				-    && ./${CMAKE_FILE} --prefix=/opt/cmake --skip-license  \
			
 
				-    && ln -sf /opt/cmake/bin/cmake /usr/local/bin/cmake
			
 
				-
			
 
				-#
			
 
				-# install emsdk
			
 
				-RUN cd /opt \
			
 
				-    && git clone https://github.com/emscripten-core/emsdk.git \
			
 
				-    && cd emsdk \
			
 
				-    && git pull \
			
 
				-    && ./emsdk install 2.0.12 \
			
 
				-    && ./emsdk activate 2.0.12 \
			
 
				-    && echo "source /opt/emsdk/emsdk_env.sh" >> /root/.bashrc
			
 
				-
			
 
				-#
			
 
				-# install binaryen
			
 
				-ARG BINARYEN_FILE="binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz"
			
 
				-COPY ${BINARYEN_FILE} /opt
			
 
				-RUN cd /opt \
			
 
				-    && tar zxf ${BINARYEN_FILE} \
			
 
				-    && rm ${BINARYEN_FILE}  \
			
 
				-    && ln -sf /opt/binaryen-${BINARYEN_VER} /opt/binaryen
			
 
				-
			
 
				-#
			
 
				-# install bazelisk
			
 
				-ARG BAZEL_FILE="bazelisk-linux-amd64"
			
 
				-COPY ${BAZEL_FILE} /opt/bazelisk/bin/bazelisk
			
 
				-RUN cd /opt/bazelisk/bin/ \
			
 
				-      && chmod a+x bazelisk \
			
 
				-      && ln -sf /opt/bazelisk/bin/bazelisk /usr/local/bin/bazel
			
 
				-
			
 
				-RUN apt update \
			
 
				-      && apt install -y python2.7-minimal
			
 
				-
			
 
				-#
			
 
				-# Clean up
			
 
				-RUN apt-get autoremove -y \
			
 
				-    && apt-get clean -y \
			
 
				-    && rm -rf /var/lib/apt/lists/* \
			
 
				-    && rm -rf /tmp/*
			
 
				-
			
 
				-VOLUME /data
			
 
				-
			
 
				-#
			
 
				-#
			
 
				-RUN touch  /build.sh \
			
 
				-    && echo "\
			
 
				-#!/bin/bash \n\
			
 
				-if [[ -d /data/project/build ]]; then \n\
			
 
				-  rm -r /data/project/build \n\
			
 
				-fi \n\
			
 
				-mkdir /data/project/build \n\
			
 
				-cd /data/project/build \n\
			
 
				-source /opt/emsdk/emsdk_env.sh \n\
			
 
				-cmake .. \n\
			
 
				-make \n\
			
 
				-cd - > /dev/null" > /build.sh \
			
 
				-    && chmod a+x /build.sh
			
--- a/samples/workload/docker/build_workload.sh
+++ b/samples/workload/docker/build_workload.sh
@@ -0,0 +1,33 @@
 
				+#!/usr/bin/env bash
			
 
				+#
			
 
				+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
			
 
				+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+#
			
 
				+
			
 
				+readonly SCRIPT_PATH=$(dirname "$(realpath "$0")")
			
 
				+readonly ROOT=$(realpath "${SCRIPT_PATH}"/../../../)
			
 
				+readonly CURRENT_PATH=$(pwd)
			
 
				+readonly CURRENT_RELATIVE_ROOT=$(realpath --relative-base ${ROOT} ${CURRENT_PATH})
			
 
				+readonly VARIANT=$(lsb_release -c | awk '{print $2}')
			
 
				+
			
 
				+docker build \
			
 
				+  --build-arg VARIANT=${VARIANT} \
			
 
				+  --memory 4G --cpu-quota 50000 \
			
 
				+  -t wamr_dev_${VARIANT}:0.1 -f "${ROOT}"/ci/Dockerfile "${ROOT}"/ci &&
			
 
				+  docker run --rm -it \
			
 
				+    --memory 4G \
			
 
				+    --cpus ".5" \
			
 
				+    --name workload_build_env \
			
 
				+    --mount type=bind,source="${ROOT}",target=/workspace \
			
 
				+    wamr_dev_${VARIANT}:0.1 \
			
 
				+    /bin/bash -c "\
			
 
				+      pwd \
			
 
				+      && pushd ${CURRENT_RELATIVE_ROOT} \
			
 
				+      && rm -rf build \
			
 
				+      && mkdir build \
			
 
				+      && pushd build \
			
 
				+      && cmake .. \
			
 
				+      && cmake --build . --config Release \
			
 
				+      && popd \
			
 
				+      && popd \
			
 
				+      && echo 'Go and find out results under ${CURRENT_RELATIVE_ROOT}/build' "
			
--- a/samples/workload/docker/docker_build.sh
+++ b/samples/workload/docker/docker_build.sh
@@ -1,53 +0,0 @@
 
				-#!/usr/bin/env bash
			
 
				-#
			
 
				-# Copyright (C) 2019 Intel Corporation.  All rights reserved.
			
 
				-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				-#
			
 
				-
			
 
				-readonly BUILD_CONTENT="/tmp/build_content"
			
 
				-if [[ ! -d ${BUILD_CONTENT} ]]; then
			
 
				-  mkdir ${BUILD_CONTENT}
			
 
				-fi
			
 
				-
			
 
				-readonly WASI_SDK_VER=12
			
 
				-readonly WABT_VER=1.0.23
			
 
				-readonly CMAKE_VER=3.16.2
			
 
				-readonly BINARYEN_VER=version_101
			
 
				-readonly BAZELISK_VER=1.7.5
			
 
				-
			
 
				-cd ${BUILD_CONTENT} || exit
			
 
				-if [[ ! -f wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz ]]; then
			
 
				-  wget https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz
			
 
				-fi
			
 
				-
			
 
				-if [[ ! -f wabt-${WABT_VER}-ubuntu.tar.gz ]]; then
			
 
				-  wget https://github.com/WebAssembly/wabt/releases/download/${WABT_VER}/wabt-${WABT_VER}-ubuntu.tar.gz
			
 
				-fi
			
 
				-
			
 
				-if [[ ! -f cmake-${CMAKE_VER}-Linux-x86_64.sh ]]; then
			
 
				-  wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VER}/cmake-${CMAKE_VER}-Linux-x86_64.sh
			
 
				-fi
			
 
				-
			
 
				-if [[ ! -f binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz ]]; then
			
 
				-  wget https://github.com/WebAssembly/binaryen/releases/download/${BINARYEN_VER}/binaryen-${BINARYEN_VER}-x86_64-linux.tar.gz
			
 
				-fi
			
 
				-
			
 
				-if [[ ! -f bazelisk-linux-amd64 ]]; then
			
 
				-  wget https://github.com/bazelbuild/bazelisk/releases/download/v${BAZELISK_VER}/bazelisk-linux-amd64
			
 
				-fi
			
 
				-cd - > /dev/null || exit
			
 
				-
			
 
				-DOCKERFILE_PATH=$(dirname "$(realpath "$0")")
			
 
				-
			
 
				-docker build \
			
 
				-    --build-arg WASI_SDK_VER=${WASI_SDK_VER} \
			
 
				-    --build-arg WABT_VER=${WABT_VER} \
			
 
				-    --build-arg CMAKE_VER=${CMAKE_VER} \
			
 
				-    --build-arg BINARYEN_VER=${BINARYEN_VER} \
			
 
				-    -t wamr_workload_env:0.1 -f "${DOCKERFILE_PATH}"/Dockerfile ${BUILD_CONTENT} \
			
 
				-  && docker run --rm \
			
 
				-      --name workload_w_clang \
			
 
				-      --mount type=bind,source="$(pwd)",target=/data/project \
			
 
				-      -w /data/project \
			
 
				-      wamr_workload_env:0.1 \
			
 
				-      /bin/bash -c /build.sh
			
--- a/samples/workload/meshoptimizer/CMakeLists.txt
+++ b/samples/workload/meshoptimizer/CMakeLists.txt
@@ -21,9 +21,10 @@ ExternalProject_Add(codecbench
 
				                    && ${CMAKE_COMMAND} -E echo "Applying patch"
			
 
				                    && git apply ${CMAKE_CURRENT_SOURCE_DIR}/codecbench.patch
			
 
				   CONFIGURE_COMMAND ${CMAKE_COMMAND}
			
 
				-                         -DWASI_SDK_PREFIX=${WASI_SDK_HOME}/wasi-sdk
			
 
				-                         -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/wasi-sdk/share/cmake/wasi-sdk.cmake
			
 
				-                         ${CMAKE_CURRENT_SOURCE_DIR}/meshoptimizer
			
 
				-  BUILD_COMMAND  make codecbench.opt
			
 
				-  INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ./codecbench.opt.wasm ${CMAKE_CURRENT_SOURCE_DIR}/build/codecbench.wasm
			
 
				+                      -DWASI_SDK_PREFIX=${WASI_SDK_HOME}
			
 
				+                      -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/share/cmake/wasi-sdk.cmake
			
 
				+                      -DCMAKE_SYSROOT=${WASI_SDK_HOME}/share/wasi-sysroot
			
 
				+                      ${CMAKE_CURRENT_SOURCE_DIR}/meshoptimizer
			
 
				+  BUILD_COMMAND  make codecbench
			
 
				+  INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ./codecbench.wasm ${CMAKE_CURRENT_SOURCE_DIR}/build/codecbench.wasm
			
 
				 )
			
--- a/samples/workload/meshoptimizer/build_workload.sh
+++ b/samples/workload/meshoptimizer/build_workload.sh
@@ -0,0 +1 @@
 
				+../docker/build_workload.sh
			
--- a/samples/workload/meshoptimizer/codecbench.patch
+++ b/samples/workload/meshoptimizer/codecbench.patch
@@ -1,8 +1,8 @@
 
				 diff --git a/CMakeLists.txt b/CMakeLists.txt
			
 
				-index ffdb4da..a397427 100644
			
 
				+index f4378ce..9bc104b 100644
			
 
				 --- a/CMakeLists.txt
			
 
				 +++ b/CMakeLists.txt
			
 
				-@@ -127,3 +127,43 @@ install(FILES
			
 
				+@@ -129,3 +129,43 @@ install(FILES
			
 
				      ${CMAKE_CURRENT_BINARY_DIR}/meshoptimizerConfig.cmake
			
 
				      ${CMAKE_CURRENT_BINARY_DIR}/meshoptimizerConfigVersion.cmake
			
 
				      DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/meshoptimizer)
			
@@ -46,3 +46,74 @@ index ffdb4da..a397427 100644
 
				 +)
			
 
				 +
			
 
				 +add_dependencies(codecbench.opt codecbench)
			
 
				+diff --git a/src/vertexcodec.cpp b/src/vertexcodec.cpp
			
 
				+index 5f3ec20..b79bfad 100644
			
 
				+--- a/src/vertexcodec.cpp
			
 
				++++ b/src/vertexcodec.cpp
			
 
				+@@ -81,13 +81,13 @@
			
 
				+ #endif
			
 
				+ 
			
 
				+ #ifdef SIMD_WASM
			
 
				+-#define wasmx_splat_v32x4(v, i) wasm_v32x4_shuffle(v, v, i, i, i, i)
			
 
				+-#define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)
			
 
				+-#define wasmx_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31)
			
 
				+-#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
			
 
				+-#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
			
 
				+-#define wasmx_unpacklo_v64x2(a, b) wasm_v64x2_shuffle(a, b, 0, 2)
			
 
				+-#define wasmx_unpackhi_v64x2(a, b) wasm_v64x2_shuffle(a, b, 1, 3)
			
 
				++#define wasmx_splat_v32x4(v, i) wasm_i32x4_shuffle(v, v, i, i, i, i)
			
 
				++#define wasmx_unpacklo_v8x16(a, b) wasm_i8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)
			
 
				++#define wasmx_unpackhi_v8x16(a, b) wasm_i8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31)
			
 
				++#define wasmx_unpacklo_v16x8(a, b) wasm_i16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
			
 
				++#define wasmx_unpackhi_v16x8(a, b) wasm_i16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
			
 
				++#define wasmx_unpacklo_v64x2(a, b) wasm_i64x2_shuffle(a, b, 0, 2)
			
 
				++#define wasmx_unpackhi_v64x2(a, b) wasm_i64x2_shuffle(a, b, 1, 3)
			
 
				+ #endif
			
 
				+ 
			
 
				+ namespace meshopt
			
 
				+@@ -700,7 +700,7 @@ static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1)
			
 
				+ 	v128_t sm1 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask1]);
			
 
				+ 
			
 
				+ 	v128_t sm1off = wasm_v128_load(&kDecodeBytesGroupCount[mask0]);
			
 
				+-	sm1off = wasm_v8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
			
 
				++	sm1off = wasm_i8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
			
 
				+ 
			
 
				+ 	v128_t sm1r = wasm_i8x16_add(sm1, sm1off);
			
 
				+ 
			
 
				+@@ -751,7 +751,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
			
 
				+ 
			
 
				+ 		v128_t shuf = decodeShuffleMask(mask0, mask1);
			
 
				+ 
			
 
				+-		v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask);
			
 
				++		v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask);
			
 
				+ 
			
 
				+ 		wasm_v128_store(buffer, result);
			
 
				+ 
			
 
				+@@ -773,7 +773,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
			
 
				+ 
			
 
				+ 		v128_t shuf = decodeShuffleMask(mask0, mask1);
			
 
				+ 
			
 
				+-		v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask);
			
 
				++		v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask);
			
 
				+ 
			
 
				+ 		wasm_v128_store(buffer, result);
			
 
				+ 
			
 
				+diff --git a/src/vertexfilter.cpp b/src/vertexfilter.cpp
			
 
				+index 023452c..2374cf7 100644
			
 
				+--- a/src/vertexfilter.cpp
			
 
				++++ b/src/vertexfilter.cpp
			
 
				+@@ -56,10 +56,10 @@
			
 
				+ #endif
			
 
				+ 
			
 
				+ #ifdef SIMD_WASM
			
 
				+-#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
			
 
				+-#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
			
 
				+-#define wasmx_unziplo_v32x4(a, b) wasm_v32x4_shuffle(a, b, 0, 2, 4, 6)
			
 
				+-#define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7)
			
 
				++#define wasmx_unpacklo_v16x8(a, b) wasm_i16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
			
 
				++#define wasmx_unpackhi_v16x8(a, b) wasm_i16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
			
 
				++#define wasmx_unziplo_v32x4(a, b) wasm_i32x4_shuffle(a, b, 0, 2, 4, 6)
			
 
				++#define wasmx_unziphi_v32x4(a, b) wasm_i32x4_shuffle(a, b, 1, 3, 5, 7)
			
 
				+ #endif
			
 
				+ 
			
 
				+ namespace meshopt
			
--- a/samples/workload/meshoptimizer/docker_build.sh
+++ b/samples/workload/meshoptimizer/docker_build.sh
@@ -1 +0,0 @@
 
				-../docker/docker_build.sh
			
--- a/samples/workload/preparation.sh
+++ b/samples/workload/preparation.sh
@@ -5,8 +5,6 @@
 
				 #
			
 
				 
			
 
				 readonly BUILD_CONTENT="/tmp/build_content"
			
 
				-readonly WASI_SDK_VER=12
			
 
				-readonly WASI_SDK_FILE="wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz"
			
 
				 readonly WABT_VER=1.0.23
			
 
				 readonly WABT_FILE="wabt-${WABT_VER}-ubuntu.tar.gz"
			
 
				 readonly CMAKE_VER=3.16.2
			
@@ -28,17 +26,6 @@ function install_deps() {
 
				       build-essential git tree zip unzip
			
 
				 }
			
 
				 
			
 
				-#
			
 
				-# install wasi-sdk
			
 
				-function install_wasi-sdk() {
			
 
				-  if [[ ! -f ${WASI_SDK_FILE} ]]; then
			
 
				-    wget https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/${WASI_SDK_FILE}
			
 
				-  fi
			
 
				-
			
 
				-  tar zxf ${WASI_SDK_FILE} -C /opt
			
 
				-  ln -sf /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk
			
 
				-}
			
 
				-
			
 
				 #
			
 
				 # install wabt
			
 
				 function install_wabt() {
			
@@ -70,8 +57,8 @@ function install_emsdk() {
 
				   git clone https://github.com/emscripten-core/emsdk.git
			
 
				   cd emsdk
			
 
				   git pull
			
 
				-  ./emsdk install 2.0.12
			
 
				-  ./emsdk activate 2.0.12
			
 
				+  ./emsdk install 2.0.26
			
 
				+  ./emsdk activate 2.0.26
			
 
				   echo "source /opt/emsdk/emsdk_env.sh" >> "${HOME}"/.bashrc
			
 
				 }
			
 
				 
			
--- a/samples/workload/tensorflow/README.md
+++ b/samples/workload/tensorflow/README.md
@@ -5,8 +5,8 @@ This sample demonstrates how to build [tensorflow](https://github.com/tensorflow
 
				 ```bash
			
 
				 git clone https://github.com/emscripten-core/emsdk.git
			
 
				 cd emsdk
			
 
				-./emsdk install 2.0.12
			
 
				-./emsdk activate 2.0.12
			
 
				+./emsdk install 2.0.26
			
 
				+./emsdk activate 2.0.26
			
 
				 ```
			
 
				 And set up ensdk environment:
			
 
				 ```bash
			
--- a/samples/workload/tensorflow/build.sh
+++ b/samples/workload/tensorflow/build.sh
@@ -17,7 +17,7 @@ fi
 
				 
			
 
				 set -xe
			
 
				 
			
 
				-EMSDK_WASM_DIR="${EMSDK}/upstream/emscripten/cache/wasm"
			
 
				+EMSDK_WASM_DIR="${EMSDK}/upstream/emscripten/cache/sysroot/lib/wasm32-emscripten"
			
 
				 BUILD_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
			
 
				 OUT_DIR="${BUILD_SCRIPT_DIR}/out"
			
 
				 TENSORFLOW_DIR="${BUILD_SCRIPT_DIR}/tensorflow"
			
--- a/samples/workload/wasm-av1/CMakeLists.avx_wasm.txt
+++ b/samples/workload/wasm-av1/CMakeLists.avx_wasm.txt
@@ -5,8 +5,10 @@ cmake_minimum_required (VERSION 2.8...3.16)
 
				 
			
 
				 project(testavx)
			
 
				 
			
 
				+include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/preparation.cmake)
			
 
				+
			
 
				 # a workaround to let aom find our non-public headers
			
 
				-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include/libc)
			
 
				+include_directories(${WASI_SDK_HOME}/share/wasi-sysroot/include/libc/musl)
			
 
				 
			
 
				 ################  AOM  ################
			
 
				 set(ENABLE_CCACHE ON)
			
@@ -58,15 +60,6 @@ target_link_libraries(${PROJECT_NAME}
 
				 
			
 
				 add_dependencies(${PROJECT_NAME} aom)
			
 
				 
			
 
				-find_program(WASM_OPT
			
 
				-    NAMES wasm-opt
			
 
				-    PATHS /opt/binaryen-version_97/bin /opt/binaryen/bin
			
 
				-)
			
 
				-
			
 
				-if (NOT WASM_OPT)
			
 
				-  message(WARNING "can not find wasm-opt and will not optimize any wasm module")
			
 
				-endif()
			
 
				-
			
 
				 add_custom_target(${PROJECT_NAME}_opt ALL
			
 
				   COMMAND
			
 
				     ${WASM_OPT} -Oz --enable-simd -o ${PROJECT_NAME}.opt.wasm ${PROJECT_NAME}.wasm
			
--- a/samples/workload/wasm-av1/CMakeLists.txt
+++ b/samples/workload/wasm-av1/CMakeLists.txt
@@ -10,27 +10,6 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/preparation.cmake)
 
				 #######################################
			
 
				 include(ExternalProject)
			
 
				 
			
 
				-################ HEADERS ################
			
 
				-set(EMSDK_SYSTEM_HEADERS "$ENV{EMSDK}/upstream/emscripten/system/include")
			
 
				-set(EMSDK_LIBC_HEADERS "${EMSDK_SYSTEM_HEADERS}/libc")
			
 
				-ExternalProject_Add(headers_from_emcc
			
 
				-   PREFIX headers
			
 
				-   SOURCE_DIR "${EMSDK_SYSTEM_HEADERS}"
			
 
				-   CONFIGURE_COMMAND ""
			
 
				-   BUILD_COMMAND  ""
			
 
				-   INSTALL_COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys
			
 
				-                     && ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/libc/bits
			
 
				-                     # copy emscripten pthread related header files
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/pthread.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/signal.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/netdb.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/sys/wait.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/sys/socket.h ${CMAKE_CURRENT_SOURCE_DIR}/include/pthread/sys/
			
 
				-                     # copy emscripten setjmp headers
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_LIBC_HEADERS}/setjmp.h ${CMAKE_CURRENT_SOURCE_DIR}/include/libc/setjmp.h
			
 
				-                     && ${CMAKE_COMMAND} -E copy ${EMSDK_SYSTEM_HEADERS}/../lib/libc/musl/arch/emscripten/bits/setjmp.h ${CMAKE_CURRENT_SOURCE_DIR}/include/libc/bits/setjmp.h
			
 
				-)
			
 
				-
			
 
				 ################ av1 ################
			
 
				 ExternalProject_Add(av1
			
 
				   PREFIX av1
			
@@ -39,15 +18,15 @@ ExternalProject_Add(av1
 
				   GIT_PROGRESS   ON
			
 
				   GIT_SHALLOW    ON
			
 
				   SOURCE_DIR     ${CMAKE_CURRENT_SOURCE_DIR}/av1
			
 
				-  DEPENDS        headers_from_emcc
			
 
				   UPDATE_COMMAND git clean -fd && git checkout -- *
			
 
				                   && ${CMAKE_COMMAND} -E echo "Copying pre-installed CMakeLists.txt"
			
 
				                   && ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.avx_wasm.txt CMakeLists.txt
			
 
				                   && git apply ../av1-clang.patch
			
 
				   CONFIGURE_COMMAND  ${CMAKE_COMMAND}
			
 
				-                         -DWASI_SDK_PREFIX=${WASI_SDK_HOME}/wasi-sdk
			
 
				-                         -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/wasi-sdk/share/cmake/wasi-sdk.cmake
			
 
				-                         ${CMAKE_CURRENT_SOURCE_DIR}/av1
			
 
				+                      -DWASI_SDK_PREFIX=${WASI_SDK_HOME}
			
 
				+                      -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_HOME}/share/cmake/wasi-sdk.cmake
			
 
				+                      -DCMAKE_SYSROOT=${WASI_SDK_HOME}/share/wasi-sysroot
			
 
				+                      ${CMAKE_CURRENT_SOURCE_DIR}/av1
			
 
				   BUILD_COMMAND     make testavx_opt
			
 
				   INSTALL_COMMAND   ${CMAKE_COMMAND} -E copy testavx.opt.wasm ${CMAKE_CURRENT_BINARY_DIR}/testavx.wasm
			
 
				 )
			
--- a/samples/workload/wasm-av1/build.sh
+++ b/samples/workload/wasm-av1/build.sh
@@ -17,7 +17,7 @@ fi
 
				 
			
 
				 set -xe
			
 
				 
			
 
				-EMSDK_WASM_DIR="${EMSDK}/upstream/emscripten/cache/wasm"
			
 
				+EMSDK_WASM_DIR="${EMSDK}/upstream/emscripten/cache/sysroot/lib/wasm32-emscripten"
			
 
				 BUILD_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
			
 
				 OUT_DIR="${BUILD_SCRIPT_DIR}/out"
			
 
				 WASM_AV1_DIR="${BUILD_SCRIPT_DIR}/wasm-av1"
			
--- a/samples/workload/wasm-av1/build_workload.sh
+++ b/samples/workload/wasm-av1/build_workload.sh
@@ -0,0 +1 @@
 
				+../docker/build_workload.sh
			
--- a/samples/workload/wasm-av1/docker_build.sh
+++ b/samples/workload/wasm-av1/docker_build.sh
@@ -1 +0,0 @@
 
				-../docker/docker_build.sh
			
--- a/test-tools/.gitignore
+++ b/test-tools/.gitignore
@@ -0,0 +1 @@
 
				+/wasi-sdk
			
--- a/test-tools/build-wasi-sdk/build_wasi_sdk.py
+++ b/test-tools/build-wasi-sdk/build_wasi_sdk.py
@@ -0,0 +1,299 @@
 
				+#!/usr/bin/env python3
			
 
				+#
			
 
				+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
			
 
				+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+#
			
 
				+
			
 
				+"""
			
 
				+The script operates on such directories and files
			
 
				+|-- core
			
 
				+|   `-- deps
			
 
				+|       |-- emscripten
			
 
				+|       `-- wasi-sdk
			
 
				+|           `-- src
			
 
				+|               |-- llvm-project
			
 
				+|               `-- wasi-libc
			
 
				+`-- test-tools
			
 
				+    |-- build-wasi-sdk
			
 
				+    |   |-- build_wasi_sdk.py
			
 
				+    |   |-- include
			
 
				+    |   `-- patches
			
 
				+    `-- wasi-sdk
			
 
				+        |-- bin
			
 
				+        |-- lib
			
 
				+        `-- share
			
 
				+            `-- wasi-sysroot
			
 
				+"""
			
 
				+
			
 
				+import hashlib
			
 
				+import logging
			
 
				+import os
			
 
				+import pathlib
			
 
				+import shlex
			
 
				+import shutil
			
 
				+import subprocess
			
 
				+import sys
			
 
				+import tarfile
			
 
				+import tempfile
			
 
				+import urllib
			
 
				+import urllib.request
			
 
				+
			
 
				+logger = logging.getLogger("build_wasi_sdk")
			
 
				+
			
 
				+external_repos = {
			
 
				+    "config": {
			
 
				+        "sha256": "302e5e7f3c4996976c58efde8b2f28f71d51357e784330eeed738e129300dc33",
			
 
				+        "store_dir": "core/deps/wasi-sdk/src/config",
			
 
				+        "strip_prefix": "config-191bcb948f7191c36eefe634336f5fc5c0c4c2be",
			
 
				+        "url": "https://git.savannah.gnu.org/cgit/config.git/snapshot/config-191bcb948f7191c36eefe634336f5fc5c0c4c2be.tar.gz",
			
 
				+    },
			
 
				+    "emscripten": {
			
 
				+        "sha256": "0904a65379aea3ea94087b8c12985b2fee48599b473e3bef914fec2e3941532d",
			
 
				+        "store_dir": "core/deps/emscripten",
			
 
				+        "strip_prefix": "emscripten-2.0.28",
			
 
				+        "url": "https://github.com/emscripten-core/emscripten/archive/refs/tags/2.0.28.tar.gz",
			
 
				+    },
			
 
				+    "llvm-project": {
			
 
				+        "sha256": "dc5169e51919f2817d06615285e9da6a804f0f881dc55d6247baa25aed3cc143",
			
 
				+        "store_dir": "core/deps/wasi-sdk/src/llvm-project",
			
 
				+        "strip_prefix": "llvm-project-34ff6a75f58377f32a5046a29f55c4c0e58bee9e",
			
 
				+        "url": "https://github.com/llvm/llvm-project/archive/34ff6a75f58377f32a5046a29f55c4c0e58bee9e.tar.gz",
			
 
				+    },
			
 
				+    "wasi-sdk": {
			
 
				+        "sha256": "fc4fdb0e97b915241f32209492a7d0fab42c24216f87c1d5d75f46f7c70a553d",
			
 
				+        "store_dir": "core/deps/wasi-sdk",
			
 
				+        "strip_prefix": "wasi-sdk-1a953299860bbcc198ad8c12a21d1b2e2f738355",
			
 
				+        "url": "https://github.com/WebAssembly/wasi-sdk/archive/1a953299860bbcc198ad8c12a21d1b2e2f738355.tar.gz",
			
 
				+    },
			
 
				+    "wasi-libc": {
			
 
				+        "sha256": "f6316ca9479d3463eb1c4f6a1d1f659bf15f67cb3c1e2e83d9d11f188dccd864",
			
 
				+        "store_dir": "core/deps/wasi-sdk/src/wasi-libc",
			
 
				+        "strip_prefix": "wasi-libc-a78cd329aec717f149934d7362f57050c9401f60",
			
 
				+        "url": "https://github.com/WebAssembly/wasi-libc/archive/a78cd329aec717f149934d7362f57050c9401f60.tar.gz",
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+# TOOD: can we use headers from wasi-libc and clang directly ?
			
 
				+emscripten_headers_src_dst = [
			
 
				+    ("include/compat/emmintrin.h", "sse/emmintrin.h"),
			
 
				+    ("include/compat/immintrin.h", "sse/immintrin.h"),
			
 
				+    ("include/compat/smmintrin.h", "sse/smmintrin.h"),
			
 
				+    ("include/compat/xmmintrin.h", "sse/xmmintrin.h"),
			
 
				+    ("lib/libc/musl/include/pthread.h", "libc/musl/pthread.h"),
			
 
				+    ("lib/libc/musl/include/signal.h", "libc/musl/signal.h"),
			
 
				+    ("lib/libc/musl/include/netdb.h", "libc/musl/netdb.h"),
			
 
				+    ("lib/libc/musl/include/sys/wait.h", "libc/musl/sys/wait.h"),
			
 
				+    ("lib/libc/musl/include/sys/socket.h", "libc/musl/sys/socket.h"),
			
 
				+    ("lib/libc/musl/include/setjmp.h", "libc/musl/setjmp.h"),
			
 
				+    ("lib/libc/musl/arch/emscripten/bits/setjmp.h", "libc/musl/bits/setjmp.h"),
			
 
				+]
			
 
				+
			
 
				+
			
 
				+def checksum(name, local_file):
			
 
				+    sha256 = hashlib.sha256()
			
 
				+    with open(local_file, "rb") as f:
			
 
				+        bytes = f.read(4096)
			
 
				+        while bytes:
			
 
				+            sha256.update(bytes)
			
 
				+            bytes = f.read(4096)
			
 
				+
			
 
				+    return sha256.hexdigest() == external_repos[name]["sha256"]
			
 
				+
			
 
				+
			
 
				+def download(url, local_file):
			
 
				+    logger.debug(f"download from {url}")
			
 
				+    urllib.request.urlretrieve(url, local_file)
			
 
				+    return local_file.exists()
			
 
				+
			
 
				+
			
 
				+def unpack(tar_file, strip_prefix, dest_dir):
			
 
				+    # extract .tar.gz to /tmp, then move back without strippred prefix directories
			
 
				+    with tempfile.TemporaryDirectory() as tmp:
			
 
				+        with tarfile.open(tar_file) as tar:
			
 
				+            logger.debug(f"extract to {tmp}")
			
 
				+            tar.extractall(tmp)
			
 
				+
			
 
				+        strip_prefix_dir = (
			
 
				+            pathlib.Path(tmp).joinpath(strip_prefix + os.path.sep).resolve()
			
 
				+        )
			
 
				+        if not strip_prefix_dir.exists():
			
 
				+            logger.error(f"extract {tar_file.name} failed")
			
 
				+            return False
			
 
				+
			
 
				+        # mv /tmp/${strip_prefix} dest_dir/*
			
 
				+        logger.debug(f"move {strip_prefix_dir} to {dest_dir}")
			
 
				+        shutil.copytree(
			
 
				+            str(strip_prefix_dir),
			
 
				+            str(dest_dir),
			
 
				+            copy_function=shutil.move,
			
 
				+            dirs_exist_ok=True,
			
 
				+        )
			
 
				+
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def download_repo(name, root):
			
 
				+    if not name in external_repos:
			
 
				+        logger.error(f"{name} is not a known repository")
			
 
				+        return False
			
 
				+
			
 
				+    store_dir = root.joinpath(f'{external_repos[name]["store_dir"]}').resolve()
			
 
				+    download_flag = store_dir.joinpath("DOWNLOADED")
			
 
				+    if store_dir.exists() and download_flag.exists():
			
 
				+        logger.info(
			
 
				+            f"keep using '{store_dir.relative_to(root)}'. Or to remove it and try again"
			
 
				+        )
			
 
				+        return True
			
 
				+
			
 
				+    # download only when the target is neither existed nor broken
			
 
				+    download_dir = pathlib.Path("/tmp/build_wasi_sdk/")
			
 
				+    download_dir.mkdir(exist_ok=True)
			
 
				+
			
 
				+    tar_name = pathlib.Path(external_repos[name]["url"]).name
			
 
				+    tar_file = download_dir.joinpath(tar_name)
			
 
				+    if tar_file.exists():
			
 
				+        if checksum(name, tar_file):
			
 
				+            logger.debug(f"use pre-downloaded {tar_file}")
			
 
				+        else:
			
 
				+            logger.debug(f"{tar_file} is broken, remove it")
			
 
				+            tar_file.unlink()
			
 
				+
			
 
				+    if not tar_file.exists():
			
 
				+        if not download(external_repos[name]["url"], tar_file) or not checksum(
			
 
				+            name, tar_file
			
 
				+        ):
			
 
				+            logger.error(f"download {name} failed")
			
 
				+            return False
			
 
				+
			
 
				+    # unpack and removing *strip_prefix*
			
 
				+    if not unpack(tar_file, external_repos[name]["strip_prefix"], store_dir):
			
 
				+        return False
			
 
				+
			
 
				+    # leave a FLAG
			
 
				+    download_flag.touch()
			
 
				+
			
 
				+    # leave download files in /tmp
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def run_patch(patch_file, cwd):
			
 
				+    if not patch_file.exists():
			
 
				+        logger.error(f"{patch_file} not found")
			
 
				+        return False
			
 
				+
			
 
				+    with open(patch_file, "r") as f:
			
 
				+        try:
			
 
				+            PATCH_DRY_RUN_CMD = "patch -f -p1 --dry-run"
			
 
				+            if subprocess.check_call(shlex.split(PATCH_DRY_RUN_CMD), stdin=f, cwd=cwd):
			
 
				+                logger.error(f"patch dry-run {cwd} failed")
			
 
				+                return False
			
 
				+
			
 
				+            PATCH_CMD = "patch -f -p1"
			
 
				+            f.seek(0)
			
 
				+            if subprocess.check_call(shlex.split(PATCH_CMD), stdin=f, cwd=cwd):
			
 
				+                logger.error(f"patch {cwd} failed")
			
 
				+                return False
			
 
				+        except subprocess.CalledProcessError:
			
 
				+            logger.error(f"patch {cwd} failed")
			
 
				+            return False
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def build_and_install_wasi_sdk(root):
			
 
				+    store_dir = root.joinpath(f'{external_repos["wasi-sdk"]["store_dir"]}').resolve()
			
 
				+    if not store_dir.exists():
			
 
				+        logger.error(f"{store_dir} does not found")
			
 
				+        return False
			
 
				+
			
 
				+    # patch wasi-libc and wasi-sdk
			
 
				+    patch_flag = store_dir.joinpath("PATCHED")
			
 
				+    if not patch_flag.exists():
			
 
				+        if not run_patch(
			
 
				+            root.joinpath("test-tools/build-wasi-sdk/patches/wasi_libc.patch"),
			
 
				+            store_dir.joinpath("src/wasi-libc"),
			
 
				+        ):
			
 
				+            return False
			
 
				+
			
 
				+        if not run_patch(
			
 
				+            root.joinpath("test-tools/build-wasi-sdk/patches/wasi_sdk.patch"), store_dir
			
 
				+        ):
			
 
				+            return False
			
 
				+
			
 
				+        patch_flag.touch()
			
 
				+    else:
			
 
				+        logger.info("bypass the patch phase")
			
 
				+
			
 
				+    # build
			
 
				+    build_flag = store_dir.joinpath("BUILDED")
			
 
				+    if not build_flag.exists():
			
 
				+        BUILD_CMD = "make build"
			
 
				+        if subprocess.check_call(shlex.split(BUILD_CMD), cwd=store_dir):
			
 
				+            logger.error(f"build wasi-sdk failed")
			
 
				+            return False
			
 
				+
			
 
				+        build_flag.touch()
			
 
				+    else:
			
 
				+        logger.info("bypass the build phase")
			
 
				+
			
 
				+    # install
			
 
				+    install_flag = store_dir.joinpath("INSTALLED")
			
 
				+    binary_path = root.joinpath("test-tools").resolve()
			
 
				+    if not install_flag.exists():
			
 
				+        shutil.copytree(
			
 
				+            str(store_dir.joinpath("build/install/opt").resolve()),
			
 
				+            str(binary_path),
			
 
				+            dirs_exist_ok=True,
			
 
				+        )
			
 
				+
			
 
				+        # install headers
			
 
				+        emscripten_headers = (
			
 
				+            root.joinpath(external_repos["emscripten"]["store_dir"])
			
 
				+            .joinpath("system")
			
 
				+            .resolve()
			
 
				+        )
			
 
				+        wasi_sysroot_headers = binary_path.joinpath(
			
 
				+            "wasi-sdk/share/wasi-sysroot/include"
			
 
				+        ).resolve()
			
 
				+        for (src, dst) in emscripten_headers_src_dst:
			
 
				+            src = emscripten_headers.joinpath(src)
			
 
				+            dst = wasi_sysroot_headers.joinpath(dst)
			
 
				+            dst.parent.mkdir(parents=True, exist_ok=True)
			
 
				+            shutil.copy(src, dst)
			
 
				+
			
 
				+        install_flag.touch()
			
 
				+    else:
			
 
				+        logger.info("bypass the install phase")
			
 
				+
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    console = logging.StreamHandler()
			
 
				+    console.setFormatter(logging.Formatter("%(asctime)s - %(message)s"))
			
 
				+    logger.setLevel(logging.INFO)
			
 
				+    logger.addHandler(console)
			
 
				+    logger.propagate = False
			
 
				+
			
 
				+    # locate the root of WAMR
			
 
				+    current_file = pathlib.Path(__file__)
			
 
				+    if current_file.is_symlink():
			
 
				+        current_file = pathlib.Path(os.readlink(current_file))
			
 
				+    root = current_file.parent.joinpath("../..").resolve()
			
 
				+    logger.info(f"The root of WAMR is {root}")
			
 
				+
			
 
				+    # download repos
			
 
				+    for repo in external_repos.keys():
			
 
				+        if not download_repo(repo, root):
			
 
				+            return False
			
 
				+
			
 
				+    # build wasi_sdk and install
			
 
				+    if not build_and_install_wasi_sdk(root):
			
 
				+        return False
			
 
				+
			
 
				+    # TODO install headers from emscripten
			
 
				+
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    sys.exit(0 if main() else 1)
			
--- a/test-tools/build-wasi-sdk/include/.gitkeep
+++ b/test-tools/build-wasi-sdk/include/.gitkeep
--- a/test-tools/build-wasi-sdk/patches/wasi_libc.patch
+++ b/test-tools/build-wasi-sdk/patches/wasi_libc.patch
@@ -0,0 +1,13 @@
 
				+diff --git a/expected/wasm32-wasi/predefined-macros.txt b/expected/wasm32-wasi/predefined-macros.txt
			
 
				+index c1bb19e..954f3b5 100644
			
 
				+--- a/expected/wasm32-wasi/predefined-macros.txt
			
 
				++++ b/expected/wasm32-wasi/predefined-macros.txt
			
 
				+@@ -3002,6 +3002,8 @@
			
 
				+ #define __alignof_is_defined 1
			
 
				+ #define __bitop(x,i,o) ((x)[(i)/8] o (1<<(i)%8))
			
 
				+ #define __bool_true_false_are_defined 1
			
 
				++#define __clang_literal_encoding__ "UTF-8"
			
 
				++#define __clang_wide_literal_encoding__ "UTF-32"
			
 
				+ #define __inline inline
			
 
				+ #define __restrict restrict
			
 
				+ #define __tg_complex(fun,x) (__RETCAST_CX(x)( __FLTCX((x)+I) && __IS_FP(x) ? fun ## f (x) : __LDBLCX((x)+I) ? fun ## l (x) : fun(x) ))
			
--- a/test-tools/build-wasi-sdk/patches/wasi_sdk.patch
+++ b/test-tools/build-wasi-sdk/patches/wasi_sdk.patch
@@ -0,0 +1,15 @@
 
				+diff --git a/version.sh b/version.sh
			
 
				+index 8e7c44c..ff0d3ba 100755
			
 
				+--- a/version.sh
			
 
				++++ b/version.sh
			
 
				+@@ -1,5 +1,6 @@
			
 
				+ #!/usr/bin/env bash
			
 
				+-set -e
			
 
				+-GIT_DESCR=$(git describe --long --candidates=999 --match='wasi-sdk-*' --dirty='+m' --abbrev=12)
			
 
				+-GIT_PACKAGE_VERSION=$(echo $GIT_DESCR | perl -ne 'if(/^wasi-sdk-(\d+)-(\d+)-g([0-9a-f]{7,12})([+]m)?$/) { if($2 == 0) { print "$1.$2$4" } else { print "$1.$2g$3$4" } exit } else { print "could not parse git description"; exit 1 }';)
			
 
				+-echo $GIT_PACKAGE_VERSION
			
 
				++#set -e
			
 
				++#GIT_DESCR=$(git describe --long --candidates=999 --match='wasi-sdk-*' --dirty='+m' --abbrev=12)
			
 
				++#GIT_PACKAGE_VERSION=$(echo $GIT_DESCR | perl -ne 'if(/^wasi-sdk-(\d+)-(\d+)-g([0-9a-f]{7,12})([+]m)?$/) { if($2 == 0) { print "$1.$2$4" } else { print "$1.$2g$3$4" } exit } else { print "could not parse git description"; exit 1 }';)
			
 
				++#echo $GIT_PACKAGE_VERSION
			
 
				++echo wasi-sdk-13-eng
			
--- a/wamr-compiler/build_llvm.py
+++ b/wamr-compiler/build_llvm.py
@@ -1,98 +1,14 @@
 
				+#!/usr/bin/env python3
			
 
				 #
			
 
				 # Copyright (C) 2019 Intel Corporation.  All rights reserved.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 #
			
 
				 
			
 
				-#!/usr/bin/env python3
			
 
				-import os
			
 
				+import pathlib
			
 
				+import subprocess
			
 
				 import sys
			
 
				-from pathlib import Path
			
 
				-
			
 
				-def clone_llvm():
			
 
				-    llvm_dir = Path("llvm")
			
 
				-    if(llvm_dir.exists() == False):
			
 
				-        print("Clone llvm to core/deps/ ..")
			
 
				-        for line in os.popen("git clone --branch release/11.x https://github.com/llvm/llvm-project.git llvm"):
			
 
				-            print(line)
			
 
				-    else:
			
 
				-        print("llvm source codes already existed")
			
 
				-    return llvm_dir
			
 
				-
			
 
				-""" def detect_VS_version():
			
 
				-    program_dirs = [os.environ['ProgramFiles(x86)'], os.environ['ProgramFiles']]
			
 
				-    for dir in program_dirs:
			
 
				-        vswhere  = Path("{}\\Microsoft Visual Studio\\Installer\\vswhere.exe".format(dir))
			
 
				-        if (vswhere.exists()):
			
 
				-            print('"{}" -version 14.0,16.0'.format(vswhere))
			
 
				-            for line in os.popen('"{}" -version 14.0,16.0'.format(vswhere)):
			
 
				-                keyvalue = line.split(':', maxsplit=1)
			
 
				-                if(keyvalue[0] == "installationPath"):
			
 
				-                    value = keyvalue[1].strip()
			
 
				-                    for line in os.popen('"{}\\VC\\Auxiliary\\Build\\vcvars32.bat"'.format(value)):
			
 
				-                        print(line)
			
 
				-            break """
			
 
				-
			
 
				-
			
 
				-def main():
			
 
				-    current_os = sys.platform
			
 
				-    print("current OS is ", current_os)
			
 
				-
			
 
				-    current_dir = Path.cwd()
			
 
				-    deps_dir = current_dir.joinpath( "../core/deps")
			
 
				-
			
 
				-    os.chdir(deps_dir)
			
 
				-    llvm_dir = clone_llvm()
			
 
				-    os.chdir(llvm_dir)
			
 
				-
			
 
				-    if(current_os == "linux"):
			
 
				-        build_dir_name = "build"
			
 
				-        llvm_file = "bin/llvm-lto"
			
 
				-       # generator = '"Unix Makefiles"'
			
 
				-    elif(current_os == "win32"):
			
 
				-        build_dir_name = "win32build"
			
 
				-        llvm_file = "LLVM.sln"
			
 
				-       # generator = '"Visual Studio 15 2017"'
			
 
				-    else:
			
 
				-        build_dir_name = "build"
			
 
				-       # generator = '""'
			
 
				-
			
 
				-    Path(build_dir_name).mkdir(exist_ok = True)
			
 
				-    build_dir = Path(build_dir_name)
			
 
				-    os.chdir(build_dir)
			
 
				-
			
 
				-    if ( not Path(llvm_file).exists()):
			
 
				-        core_number = os.cpu_count()
			
 
				-        print("Build llvm with", core_number, " cores")
			
 
				-        cmd = 'cmake ../llvm \
			
 
				-                -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
			
 
				-                -DCMAKE_BUILD_TYPE:STRING="Release" \
			
 
				-                -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips;RISCV" \
			
 
				-                -DLLVM_INCLUDE_GO_TESTS=OFF \
			
 
				-                -DLLVM_INCLUDE_TOOLS=OFF \
			
 
				-                -DLLVM_INCLUDE_UTILS=OFF \
			
 
				-                -DLLVM_ENABLE_TERMINFO=OFF \
			
 
				-                -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \
			
 
				-                -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \
			
 
				-                -DLLVM_ENABLE_ZLIB:BOOL=OFF \
			
 
				-                -DLLVM_INCLUDE_DOCS:BOOL=OFF \
			
 
				-                -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \
			
 
				-                -DLLVM_INCLUDE_TESTS:BOOL=OFF \
			
 
				-                -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \
			
 
				-                -DLLVM_APPEND_VC_REV:BOOL=OFF'
			
 
				-        print(cmd)
			
 
				-        for line in os.popen(cmd):
			
 
				-            print(line)
			
 
				-    else:
			
 
				-        print("llvm has already been Cmaked")
			
 
				-
			
 
				-    if(current_os == "linux"):
			
 
				-        for line in os.popen("make -j {}".format(core_number)):
			
 
				-            print(line)
			
 
				-    elif(current_os == "win32"):
			
 
				-        print("Please open LLVM.sln in {} to build *Release* version".format(build_dir.absolute()))
			
 
				-
			
 
				-    os.chdir(current_dir)
			
 
				-
			
 
				 
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
 
				+script = (
			
 
				+    pathlib.Path(__file__).parent.joinpath("../build-scripts/build_llvm.py").resolve()
			
 
				+)
			
 
				+subprocess.check_call([sys.executable, script])
			
--- a/wamr-compiler/build_llvm.sh
+++ b/wamr-compiler/build_llvm.sh
@@ -3,44 +3,4 @@
 
				 # Copyright (C) 2020 Intel Corporation. All rights reserved.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 
			
 
				-DEPS_DIR=${PWD}/../core/deps
			
 
				-
			
 
				-cd ${DEPS_DIR}
			
 
				-if [ ! -d "llvm" ]; then
			
 
				-  echo "Clone llvm to core/deps/ .."
			
 
				-  git clone --depth 1 --branch release/11.x https://github.com/llvm/llvm-project.git llvm
			
 
				-fi
			
 
				-
			
 
				-cd llvm
			
 
				-mkdir -p build
			
 
				-cd build
			
 
				-
			
 
				-if [ ! -f bin/llvm-lto ]; then
			
 
				-
			
 
				-  CORE_NUM=$(nproc --all)
			
 
				-  if [ -z "${CORE_NUM}" ]; then
			
 
				-    CORE_NUM=1
			
 
				-  fi
			
 
				-
			
 
				-  echo "Build llvm with" ${CORE_NUM} "cores"
			
 
				-
			
 
				-  cmake ../llvm \
			
 
				-          -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
			
 
				-          -DCMAKE_BUILD_TYPE:STRING="Release" \
			
 
				-          -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips;RISCV" \
			
 
				-          -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \
			
 
				-          -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \
			
 
				-          -DLLVM_ENABLE_ZLIB:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_DOCS:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_TESTS:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \
			
 
				-          -DLLVM_APPEND_VC_REV:BOOL=OFF
			
 
				-  make -j ${CORE_NUM}
			
 
				-
			
 
				-else
			
 
				-  echo "llvm has already been built"
			
 
				-fi
			
 
				-
			
 
				-cd ${PWD}
			
 
				-
			
 
				+/usr/bin/env python3 ../build-scripts/build_llvm.py
			
--- a/wamr-compiler/build_llvm_arc.sh
+++ b/wamr-compiler/build_llvm_arc.sh
@@ -3,45 +3,4 @@
 
				 # Copyright (C) 2020 Intel Corporation. All rights reserved.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 
			
 
				-DEPS_DIR=${PWD}/../core/deps
			
 
				-
			
 
				-cd ${DEPS_DIR}
			
 
				-if [ ! -d "llvm" ]; then
			
 
				-  echo "Clone llvm to core/deps/ .."
			
 
				-  git clone https://github.com/llvm/llvm-project.git llvm
			
 
				-fi
			
 
				-
			
 
				-cd llvm
			
 
				-mkdir -p build
			
 
				-cd build
			
 
				-
			
 
				-if [ ! -f bin/llvm-lto ]; then
			
 
				-
			
 
				-  CORE_NUM=$(nproc --all)
			
 
				-  if [ -z "${CORE_NUM}" ]; then
			
 
				-    CORE_NUM=1
			
 
				-  fi
			
 
				-
			
 
				-  echo "Build llvm with" ${CORE_NUM} "cores"
			
 
				-
			
 
				-  cmake ../llvm \
			
 
				-          -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
			
 
				-          -DCMAKE_BUILD_TYPE:STRING="Release" \
			
 
				-          -DLLVM_TARGETS_TO_BUILD:STRING="X86" \
			
 
				-          -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="ARC" \
			
 
				-          -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \
			
 
				-          -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \
			
 
				-          -DLLVM_ENABLE_ZLIB:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_DOCS:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_TESTS:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \
			
 
				-          -DLLVM_APPEND_VC_REV:BOOL=OFF
			
 
				-  make -j ${CORE_NUM}
			
 
				-
			
 
				-else
			
 
				-  echo "llvm has already been built"
			
 
				-fi
			
 
				-
			
 
				-cd ${PWD}
			
 
				-
			
 
				+/usr/bin/env python3 ../build-scripts/build_llvm.py --platform arc
			
--- a/wamr-compiler/build_llvm_xtensa.sh
+++ b/wamr-compiler/build_llvm_xtensa.sh
@@ -1,47 +1,6 @@
 
				 #!/bin/sh
			
 
				 
			
 
				-# Copyright (C) 2019 Intel Corporation. All rights reserved.
			
 
				+# Copyright (C) 2020 Intel Corporation. All rights reserved.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 
			
 
				-DEPS_DIR=${PWD}/../core/deps
			
 
				-
			
 
				-cd ${DEPS_DIR}
			
 
				-if [ ! -d "llvm" ]; then
			
 
				-  echo "Clone llvm Xtensa to core/deps/ .."
			
 
				-  git clone --depth 1 --branch xtensa_release_10.0.1 https://github.com/espressif/llvm-project.git llvm
			
 
				-fi
			
 
				-
			
 
				-cd llvm
			
 
				-mkdir -p build
			
 
				-cd build
			
 
				-
			
 
				-if [ ! -f bin/llvm-lto ]; then
			
 
				-
			
 
				-  CORE_NUM=$(nproc --all)
			
 
				-  if [ -z "${CORE_NUM}" ]; then
			
 
				-    CORE_NUM=1
			
 
				-  fi
			
 
				-
			
 
				-  echo "Build llvm with" ${CORE_NUM} "cores"
			
 
				-
			
 
				-  cmake ../llvm \
			
 
				-          -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
			
 
				-          -DCMAKE_BUILD_TYPE:STRING="Release" \
			
 
				-          -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips;RISCV" \
			
 
				-          -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="Xtensa" \
			
 
				-          -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \
			
 
				-          -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \
			
 
				-          -DLLVM_ENABLE_ZLIB:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_DOCS:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_TESTS:BOOL=OFF \
			
 
				-          -DLLVM_INCLUDE_BENCHMARKS:BOOL=OFF \
			
 
				-          -DLLVM_APPEND_VC_REV:BOOL=OFF
			
 
				-  make -j ${CORE_NUM}
			
 
				-
			
 
				-else
			
 
				-  echo "llvm has already been built"
			
 
				-fi
			
 
				-
			
 
				-cd ${PWD}
			
 
				-
			
 
				+/usr/bin/env python3 ../build-scripts/build_llvm.py --platform xtensa